如何在 R 中对弹性网惩罚 cox 模型进行重复交叉验证?

问题描述 投票:0回答:1

我想知道对 R 中的弹性网惩罚 cox 模型进行 10 倍重复 10 倍交叉验证的最佳方法是什么。我目前正在使用包 glmnet,首先调整 alpha,然后调整 lambda。我想使用 C 索引作为性能指标来选择最佳的 lambda,但我无法找到它在 glmnet 对象中的存储位置。我正在考虑制作一个循环并在循环内设置一个种子以重复 CV 10 次,但没有不同 lambda 值的 C 索引,我不确定如何跨模型选择最佳 lambda。我使用plot(glmnetfit) 可视化不同 lambda 值的 C 指数,但我必须目视检查每次重复的图,这对我来说听起来不太准确,如果重复次数超过 10 次,就会变得很困难。

这是我一直在使用的代码:

library(glmnet)
library(survival)

data("CoxExample")
x <- CoxExample$x
y <- CoxExample$y

alphas <- seq(0, 1, by = 0.1) 

# Fit models with different alpha values
models <- lapply(alphas, function(alpha) {
  glmnet(x = x, y = y, family = "cox", alpha = alpha)
})

# Optimize alpha
set.seed(123)
cv_results <- lapply(seq_along(models), function(i) {
  alpha <- alphas[[i]]
  print(alpha)
  lambda <- models[[i]]$lambda
  if (is.null(alpha)) {
    alpha <- 1  # Default to lasso if alpha is missing
  }
  cv.glmnet(x = x, y = y, family = "cox", 
            alpha = alpha, lambda = lambda)
})

optimal_alpha_index <- sapply(cv_results, function(cv_result) which.min(cv_result$cvm))
optimal_cvm <- vector("list", length = length(cv_results))
for (i in seq_along(cv_results)) {
  optimal_index <- which.min(cv_results[[i]]$cvm)  # Get the index of the optimal model for alpha i
  optimal_cvm[[i]] <- cv_results[[i]]$cvm[optimal_index]  # Extract the corresponding cross-validation error
}


optimal_alpha = alphas[which.min(optimal_cvm)]
optimal_alpha #1

#Optimize lambda
models <- list()

for (i in 1:10) {
  set.seed(i*123)
  models[[i]] <- cv.glmnet(x=x, 
                           y = y, 
                           family = "cox", 
                           alpha = optimal_alpha, 
                           type.measure = "C"
  )
  print(models[[i]]$lambda.min)
}

optimal_lambda_index <- sapply(models, function(models) which.min(models$cvm))

我最终得到了 10 次重复中每一次的最佳 lambda。我想提取每个模型中最佳 lambda 的 C 指数,或者如果不可能的话,听听 CV 重复的最佳方法是什么。任何帮助将不胜感激。

谢谢!

r cross-validation glmnet cox elasticnet
1个回答
0
投票

您在交叉验证过程中将 C 索引的

type.measure
设置为“C”,但可以细化提取和使用。

library(glmnet)
library(survival)
library(foreach)
library(doParallel)

data("CoxExample")
x <- CoxExample$x
y <- CoxExample$y

alphas <- seq(0, 1, by = 0.1) 
n_repeats <- 10

results <- vector("list", length = n_repeats)

num_cores <- detectCores()-1  
registerDoParallel(cores = num_cores)

# parallel execution using foreach package
results <- foreach(r = 1:n_repeats, .packages = 'glmnet') %dopar% {
  set.seed(r * 123)
  
  # Using different alpha values to fit the model
  models <- lapply(alphas, function(alpha) {
    cv.glmnet(x = x, y = y, family = "cox", alpha = alpha, type.measure = "C")
  })
  
  # Identifies the best lambda for each alpha based on minimum C-index
  best_lambda_per_alpha <- sapply(models, function(model) {
    lambda_min <- model$lambda.min
    c_index <- max(model$cvm)  
    return(c(lambda = lambda_min, c_index = c_index))
  })
  
  # Finding the alpha and lambda combination that gives the best C-index
  optimal_alpha_index <- which.max(best_lambda_per_alpha["c_index",])
  optimal_alpha <- alphas[optimal_alpha_index]
  optimal_lambda <- best_lambda_per_alpha["lambda", optimal_alpha_index]
  
  list(alpha = optimal_alpha, lambda = optimal_lambda, c_index = best_lambda_per_alpha["c_index", optimal_alpha_index])
}

do.call(rbind, results)
© www.soinside.com 2019 - 2024. All rights reserved.