如何解决以下错误:Error: `data` and `reference` should be factors with the same levels

问题描述 投票:0回答:0

代码如下:

`# Load required libraries
library(caret)
library(glmnet)
library(pROC)



# Convert response variable to factor with consistent levels in training and testing data
response_levels <- union(levels(TrainingSet$target), levels(TestingSet$target))
TrainingSet$target <- factor(TrainingSet$target, levels = response_levels)
TestingSet$target <- factor(TestingSet$target, levels = response_levels)



# Create a list of formulas
formulas <- list(
  formula1 = as.formula(paste("target~. ")),
  formula2 = as.formula(paste("target ~ sex + chest.pain.type + fasting.blood.sugar + max.heart.rate + 
              exercise.angina + oldpeak + ST.slope")),
  formula3 = as.formula(paste("target~ cholesterol + sex + resting.bp.s + 
              age + fasting.blood.sugar")),
  formula4 = as.formula(paste("target~ cholesterol + sex + age + fasting.blood.sugar")),
  formula5 = as.formula(paste("target~  max.heart.rate + resting.ecg + oldpeak + ST.slope+
              chest.pain.type + exercise.angina")),
  formula6 = as.formula(paste("target~  max.heart.rate + oldpeak + ST.slope+
              chest.pain.type + exercise.angina"))
)

# Create a list of models
model_list <- list(
  logistic = list(method = "glm", family = "binomial"),
  glmnet = list(method = "glmnet", family = "binomial")
)

# Create an empty list to store the model results
results <- list()
confusion_matrices <- list()

# Loop through the models
for (model in model_list) {
  for (formula in formulas) {

   # Train the model with the current formula
    if (model$method == "glmnet") {
      # For glmnet, specify the alpha values and lambda grid
      model_fit <- train(
        as.formula(formula),
        data = TrainingSet,
        method = model$method,
        trControl = trainControl(method = "cv", number = 10),
        preProcess = c("center", "scale"),
        tuneGrid = expand.grid(alpha = 0:1, lambda = c(0.001, 0.01, 0.1, 1)),
        family = model$family
      )
    } else {
      # For other models, use default tuning parameter grid
      model_fit <- train(
        as.formula(formula),
        data = TrainingSet,
        method = model$method,
        trControl = trainControl(method = "cv", number = 10),
        preProcess = c("center", "scale"),
        family = model$family
      )
    }

    # Make predictions on the testing set
    predicted <- predict(model_fit, newdata = TestingSet)
    predicted <- as.numeric(predicted)

    # Evaluate model performance
    cm <- caret::confusionMatrix(predicted, as.factor(TestingSet$target))
    auc <- pROC::auc(roc(response = TestingSet$target, predictor = as.numeric(predicted) ))
    rmse <- sqrt(mean((predicted - as.numeric(TestingSet$target))^2))
    r2 <- cor(predicted, as.numeric(TestingSet$target))^2

   # Store results in the results list
    results[[paste(model$method, "_", names(formula), sep = "")]] <- list(
      Confusion_Matrix = cm,
      Accuracy = cm$overall["Accuracy"],
      Error_Rate = cm$byClass["Error Rate"],
      Sensitivity = cm$byClass["Sensitivity"],
      AUC = auc,
      RMSE = rmse,
      R2 = r2
    )

    # Store confusion matrix in the confusion_matrices list
    confusion_matrices[[paste(model$method, "_", names(formula), sep = "")]] <- cm$table
  }
}

# Convert results list to data frame
results_df <- do.call(rbind, lapply(results, data.frame, stringsAsFactors = FALSE))

# Print the results
print(results_df)

# Access confusion matrices
print(confusion_matrices)`

我已经尝试了一切,但我不明白如何解决这个错误。我已经执行as.numeric来预测了,但是错误不会去,我已经使用了这个levels = response_levels仍然不起作用。请指导我如何解决这个问题。我这样做是通过使用带有 levels 参数的 factor() 函数手动指定训练和测试数据的因子水平,您可以确保它们具有一致的因子水平,这应该可以解决与因子水平不匹配相关的错误。然后更新代码,levels() 函数用于从训练数据和测试数据中检索因子水平,union() 函数用于将这些水平组合成一组一致的因子水平。然后,使用 factor() 函数将响应变量转换为训练数据和测试数据中具有一致因子水平的因子。这应该解决与因子水平不匹配相关的错误。仍然是相同的错误。

r linear-regression data-analysis logistic-regression glmnet
© www.soinside.com 2019 - 2024. All rights reserved.