问题:
要计算confusionMatrix()的灵敏度,特异性,循环或序列的准确性,其值范围为seq(0.1,0.9,by = 0.1)。
目标:
要迭代的值:0.1到0.9,乘以0.1,通过自定义编码的confusionMatrix函数计算灵敏度,特异性和准确性,当插入符号::: confusionMatrix错误级别不同时,该函数处理水平错误。
空记录已被删除。
R Code WIP解决方案
这是正在处理的R代码,函数compute_seq_accuracy.func()的执行没有错误,但是,现在在函数compute_confusion_matrix.func()内创建的列联表没有被创建,也不从函数返回中返回。以下数据跟踪来自内部打印语句,该语句显示为每个阈值评估创建的列联表:
# function
compute_seq_accuracy.func <- function(value) {
tryCatch({
csa.func.p <- factor(ifelse(loans_predict < value, 0, 1))
csa.func.confusion_table <- compute_confustion_matrix.func(loans_train_data$statusRank, csa.func.p)
tryCatch({
print(csa.func.confusion_table)
csa.func.confusion_matrix <- caret::confusionMatrix(csa.func.p, csa.func.confusion_table)
return(csa.func.confusion_matrix$overall['Accuracy'])
},
error = function(e) return(NULL)
)
return(csa.func.confusion_matrix$overall['Accuracy'])
},
error = function(e) return(NULL)
)
}
# function
compute_confusion_matrix.func <- function(y, p) {
confusion_table <- table(y, p)
if(nrow(confusion_table)!=ncol(confusion_table)){
missings <- setdiff(colnames(confusion_table),rownames(confusion_table))
missing_mat <- mat.or.vec(nr = length(missings), nc = ncol(confusion_table))
confusion_table <- as.table(rbind(as.matrix(confusion_table), missing_mat))
rownames(confusion_table) <- colnames(confusion_table)
}
return(confusion_table)
}
# works ok here
x <- compute_confusion_matrix.func(loans_train_data$statusRank, model_prediction)
confusion_matrix <- confusionMatrix(x)
confusion_matrix$byClass['Sensitivity']
confusion_matrix$byClass['Specificity']
confusion_matrix$overall['Accuracy']
compute_for_values = seq(0.1,0.9, by=0.1)
## WIP error in !all.equal(nrow(data, ncol(data)))
computed_accuracies <- lapply(compute_for_values, compute_seq_accuracy.func)
names(computed_accuracies) <- compute_for_values
computed_accuracies[which.max(computed_accuracies)]
数据跟踪
尝试...已设置捕获,没有警告消息。但是,当调用行csa.func.confusion_matrix
> csa_computed_accuracies <- sapply(compute_for_values, compute_seq_accuracy.func, simplify = FALSE)
p
y 0 1
Bad 4 6009
Good 0 21411
p
y 0 1
Bad 38 5975
Good 15 21396
p
y 0 1
Bad 225 5788
Good 133 21278
p
y 0 1
Bad 702 5311
Good 533 20878
p
y 0 1
Bad 1575 4438
Good 1614 19797
p
y 0 1
Bad 2836 3177
Good 4002 17409
p
y 0 1
Bad 4382 1631
Good 8646 12765
p
y 0 1
Bad 5627 386
Good 15856 5555
>
部分校正
已确定错误的数据集:model_prediction。这引起了错误:“ factor(model_prediction,value):‘
> head(model_prediction, 50)
[1] Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Bad Good
[26] Good Good Good Good Bad Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good
Levels: Bad Good
>
更正的数据集:
head(loans_predict,50)
11413 2561 25337 1643 14264 24191 33989 28193 21129 7895 29007 26622 3065
0.8375821 0.7516343 0.8375704 0.7671279 0.7201578 0.7917037 0.8980501 0.8259884 0.8604232 0.8664207 0.7609676 0.7753622 0.9321958
11423 3953 5789 30150 6070 1486 13195 30344 26721 716 24609 22196 10770
0.8325967 0.9459098 0.5903160 0.5997290 0.9045176 0.6782181 0.7546154 0.8381577 0.7943421 0.7198638 0.4522069 0.7129170 0.8632025
18042 3710 21750 23492 10680 5088 10434 3228 8696 29688 33847 2997 24772
0.8941667 0.6445716 0.7659989 0.2616490 0.7402274 0.7115220 0.8985310 0.7300686 0.8737217 0.6712457 0.7037675 0.6868837 0.7534947
28396 6825 27619 26433 25542 33853 32926 33585 20362 6895 20634
0.7516796 0.7261610 0.8437550 0.8662871 0.8620579 0.9355447 0.6786310 0.6017286 0.9340776 0.9022817 0.7832571
>
> compute_for_values
[1] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9
考虑将您的方法包装在tryCatch
中以捕获异常并在错误时返回NULL
,您可以进一步调查哪个0.1
导致错误,并且可以用NULL
最后删除此类Filter
元素。下面还使用sapply
(包装到lapply
),如果使用字符向量作为输入,它会返回一个命名列表。
compute_seq_accuracy.func <- function(value) {
tryCatch({
p <- factor(ifelse(loans_predict_fcm < as.numeric(value), 'Bad', 'Good'))
confusion_table <- compute_confustion_matrix(loans_train_data$statusRank, p)
c_matrix <- confusionMatrix(confusion_table)
return(c_matrix$overall['Accuracy'])
},
error = function(e) return(NULL)
)
}
compute_for_values <- as.character(seq(0.1, 0.9, by=0.1))
## WIP error in !all.equal(nrow(data, ncol(data)))
computed_accuracies <- sapply(compute_for_values, compute_seq_accuracy.func, simplify = FALSE)
# REMOVE NULLs FROM LIST
computed_accuracies <- Filter(LENGTH, computed_accuracies)