'出事了;所有的 ROC 指标值都丢失了'错误

问题描述 投票:0回答:0

我在 Stack Overflow 中发现了多个关于此的讨论。然而,他们都没有提供解决方案。

使用

caret
包,我想使用 XGBoost 算法作为基础学习器并使用随机森林作为元学习器来训练堆叠集成模型。我也尝试了
mlr
包,但它导致了很多其他问题。

查过资料,没有

NA
值,也没有
inf
。我尝试像在其他一些讨论中看到的那样设置
classProbs = FALSE
,并将目标变量设置为
as.factor
。它没有用。

可能是什么问题?

这是我的代码:

super_train <- function(data_list, n_models = 10, cpus = 32) {
  
  # Set up parallel processing
  parallelStartSocket(cpus = cpus)
  
  # Prepare data
  train <- data_list[[1]]
  test <- data_list[[2]]
  
  train[,1] <- make.names(train[,1])
  test[,1] <- make.names(test[,1])
  
  
  train[,1] <- as.factor(train[,1])
  test[,1] <- as.factor(test[,1])
  train_control <- trainControl(method = "cv", number = 10, classProbs = TRUE , savePredictions = TRUE, summaryFunction = twoClassSummary, verboseIter = FALSE )
  
  # Create base learners
  base_learners <- list()
  for (i in 1:n_models) {
    print(i)
    xgb_grid <- expand.grid(
        booster = c("gbtree", "gblinear"),
        eta = c(0.01, 0.05, 0.1, 0.3),
        max_depth = c(3, 6, 9),
        alpha = c(0, 1, 3, 6),
        lambda = c(0, 1 , 3 , 6),
        gamma = c(0, 1 , 3 , 6),
        min_child_weight = c(1, 3, 5),
        subsample = c(0.5, 0.7, 1),
        colsample_bytree = c(0.5, 0.7, 1),
        scale_pos_weight = c(0.5, 1, 3, 5)
      )
    
    xgb_train <- caret::train(CR ~ .,
                                data = train,
                                method = "xgbTree",
                                params = xgb_grid,
                                trControl = train_control,
                                metric = "ROC")
    
    
    base_learners[[i]] <- xgb_train$finalModel
  }
  
  # Create meta-learner
  meta_lrn <- caret::train(
    CR ~ .,
    data = train,
    method = "rf",
    trControl = train_control,
    metric = "ROC",
    importance = TRUE
  )
  
  # Create stacked ensemble
  stack_control <- trainControl(method = "none")
  stack_learner <- caretStack(
    models = base_learners,
    method = "glm",
    trControl = stack_control
  )
  
  # Train stacked ensemble
  stack_model <- caret::train(
    CR ~ .,
    data = train,
    method = stack_learner,
    trControl = train_control,
    metric = "ROC"
  )
  
  # Evaluate on test set
  test_pred <- predict(stack_model, newdata = test)
  
  # Stop parallel processing
  
  parallelStop()
  
  return(stack_model)
}

这就是错误,我们的宿敌:

Something is wrong; all the ROC metric values are missing:
      ROC           Sens          Spec    
 Min.   : NA   Min.   : NA   Min.   : NA  
 1st Qu.: NA   1st Qu.: NA   1st Qu.: NA  
 Median : NA   Median : NA   Median : NA  
 Mean   :NaN   Mean   :NaN   Mean   :NaN  
 3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA  
 Max.   : NA   Max.   : NA   Max.   : NA  
 NA's   :108   NA's   :108   NA's   :108 

这里是训练集的一个子集:

structure(list(CR = structure(c(1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 
1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 
1L), levels = c("0", "1"), class = "factor"), Gender_male = c(1L, 
0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 
1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L), Feat1 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), anti1 = c(1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), Feature2 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Feat3 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Feature4 = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Feature5 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Feature6 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), row.names = c("Pt1", "Pt2", 
"Pt3", "Pt4", "Pt17", "Pt18", "Pt2", "Pt26", "Pt28", "Pt29", 
"Pt30", "Pt34", "Pt37", "Pt38", "Pt39", "Pt4", "Pt44", "Pt46", 
"Pt47", "Pt48", "Pt5", "Pt52", "Pt59", "Pt62", "Pt65"), class = "data.frame")
r machine-learning xgboost r-caret superlearner
© www.soinside.com 2019 - 2024. All rights reserved.