
问题描述 投票:1回答:1

我正在尝试使用rpart在插入符号中的模型选择中最大化灵敏度。为此,我尝试复制此处给出的方法(向下滚动到使用用户定义函数FourStat的示例)caret's github page

# create own function so we can use "sensitivity" as our metric to maximise:
Sensitivity.fc <- function (data, lev = levels(data$obs), model = NULL) {
    out <- c(twoClassSummary(data, lev = levels(data$obs), model = NULL))
    c(out, Sensitivity = out["Sens"])

rpart_caret_fit <- train(outcome~pred1+pred2+pred3+pred4,
    na.action = na.pass,
    method = "rpart", 
    control=rpart.control(maxdepth = 6),
    tuneLength = 20, 
    # maximise sensitivity
    metric = "Sensitivity", 
    maximize = TRUE,
    trControl = trainControl(classProbs = TRUE,
    summaryFunction = Sensitivity.fc))





678282 samples
 4 predictor
 2 classes: 'yes', 'no' 

No pre-processing
Resampling: Bootstrapped (25 reps) 
Summary of sample sizes: 678282, 678282, 678282, 678282, 678282, 678282, ... 
Resampling results across tuning parameters:

cp              ROC        Sens       Spec       Sensitivity.Sens
0.000001909738  0.7259486  0.4123547  0.8227382  0.4123547       
0.000002864607  0.7259486  0.4123547  0.8227382  0.4123547       
0.000005729214  0.7259489  0.4123622  0.8227353  0.4123622       
0.000006684083  0.7258036  0.4123614  0.8227379  0.4123614       
0.000007638953  0.7258031  0.4123576  0.8227398  0.4123576       
0.000009548691  0.7258028  0.4123539  0.8227416  0.4123539       
0.000010694534  0.7257553  0.4123589  0.8227332  0.4123589       
0.000015277905  0.7257313  0.4123614  0.8227290  0.4123614       
0.000032465548  0.7253456  0.4112838  0.8234272  0.4112838       
0.000038194763  0.7252966  0.4112912  0.8234196  0.4112912       
0.000076389525  0.7248774  0.4102792  0.8240339  0.4102792       
0.000164237480  0.7244847  0.4093688  0.8246372  0.4093688       
0.000194793290  0.7241532  0.4086596  0.8250930  0.4086596       
0.000310650737  0.7237546  0.4087379  0.8250393  0.4087379       
0.001625187154  0.7233805  0.4006570  0.8295729  0.4006570       
0.001726403276  0.7233225  0.3983850  0.8308874  0.3983850       
0.002173282000  0.7230906  0.3915758  0.8348320  0.3915758       
0.002237258227  0.7230906  0.3915758  0.8348320  0.3915758       
0.006140444689  0.7173854  0.4897494  0.7695558  0.4897494       
0.055330843035  0.5730987  0.2710906  0.8545549  0.2710906       

ROC was used to select the optimal model using the largest value.
The final value used for the model was cp = 0.000005729214.


r r-caret rpart



metric = "Sens"trainsummaryFunction = twoClassSummarytrainControl



rpart_caret_fit <- train(Class~., 
                         data = Sonar,
                         method = "rpart", 
                         tuneLength = 20, 
                         metric = "Sens", 
                         maximize = TRUE,
                         trControl = trainControl(classProbs = TRUE,
                                                  method = "cv",
                                                  number = 5,
                                                  summaryFunction = twoClassSummary))


208 samples
 60 predictor
  2 classes: 'M', 'R' 

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 167, 166, 166, 166, 167 
Resampling results across tuning parameters:

  cp         ROC        Sens       Spec     
  0.0000000  0.7088298  0.7023715  0.7210526
  0.0255019  0.7075400  0.7292490  0.6684211
  0.0510038  0.7105388  0.7758893  0.6405263
  0.0765057  0.6904202  0.7841897  0.6294737
  0.1020076  0.7104681  0.8114625  0.6094737
  0.1275095  0.7104681  0.8114625  0.6094737
  0.1530114  0.7104681  0.8114625  0.6094737
  0.1785133  0.7104681  0.8114625  0.6094737
  0.2040152  0.7104681  0.8114625  0.6094737
  0.2295171  0.7104681  0.8114625  0.6094737
  0.2550190  0.7104681  0.8114625  0.6094737
  0.2805209  0.7104681  0.8114625  0.6094737
  0.3060228  0.7104681  0.8114625  0.6094737
  0.3315247  0.7104681  0.8114625  0.6094737
  0.3570266  0.7104681  0.8114625  0.6094737
  0.3825285  0.7104681  0.8114625  0.6094737
  0.4080304  0.7104681  0.8114625  0.6094737
  0.4335323  0.7104681  0.8114625  0.6094737
  0.4590342  0.6500135  0.8205534  0.4794737
  0.4845361  0.6500135  0.8205534  0.4794737

Sens was used to select the optimal model using the largest value.
The final value used for the model was cp = 0.4845361.

另外,我认为你不能指定control = rpart.control(maxdepth = 6)来照顾train。 这是不正确的 - 插入符号使用...向前传递任何参数。所以你几乎可以传递任何论点。


Sensitivity.fc <- function (data, lev = NULL, model = NULL) { #every summary function takes these three arguments
  obs <- data[, "obs"] #these are the real values - always in column name "obs" in data
  cls <- levels(obs) #there are the levels - you can also pass this to lev argument 
  probs <- data[, cls[2]] #these are the probabilities for the 2nd class - useful only if prob = TRUE
  class <- as.factor(ifelse(probs > 0.5, cls[2], cls[1])) #calculate the classes based on some probability treshold
  Sensitivity <- caret::sensitivity(class, obs) #do the calculation - I was lazy so I used a built in function to do it for me
  names(Sensitivity) <- "Sens" #the name of the output


rpart_caret_fit <- train(Class~., 
                         data = Sonar,
                         method = "rpart", 
                         tuneLength = 20, 
                         metric = "Sens", #because of this line: names(Sensitivity) <- "Sens" 
                         maximize = TRUE,
                         trControl = trainControl(classProbs = TRUE,
                                                  method = "cv",
                                                  number = 5,
                                                  summaryFunction = Sensitivity.fc))


fit_sens <- train(Class~., 
                  data = Sonar,
                  method = "rpart", 
                  tuneLength = 20, 
                  metric = "Sens", 
                  maximize = TRUE,
                  trControl = trainControl(classProbs = TRUE,
                                           method = "cv",
                                           number = 5,
                                           summaryFunction = Sensitivity.fc))

fit_sens2 <- train(Class~., 
                   data = Sonar,
                   method = "rpart", 
                   tuneLength = 20, 
                   metric = "Sens", 
                   maximize = TRUE,
                   trControl = trainControl(classProbs = TRUE,
                                            method = "cv",
                                            number = 5,
                                            summaryFunction = twoClassSummary))

all.equal(fit_sens$results[c("cp", "Sens")],
          fit_sens2$results[c("cp", "Sens")])  


© www.soinside.com 2019 - 2024. All rights reserved.