R doParallel:找不到功能

问题描述 投票:2回答:1

我已经设置了以下功能:

cv_model <- function(dat, targets, predictors_name){

  library(randomForest)
  library(caret)
  library(MLmetrics)
  library(Metrics)

  # set up error measures
  sumfct <- function(data, lev = NULL, model = NULL){
    mape <- MAPE(y_pred = data$pred, y_true = data$obs)
    RMSE <- sqrt(mean((data$pred - data$obs)^2, na.omit = TRUE))
    MAE <- mean(abs(data$obs - data$pred))
    BIAS <- mean(data$obs - data$pred)
    Rsquared <- R2(pred = data$pred, obs = data$obs, formula = "corr",    na.rm = FALSE)
    c(MAPE = mape, RMSE = RMSE, MAE = MAE, BIAS = BIAS, Rsquared = Rsquared)
  }

  for (k in 1:length(dat)) {

    a <- dat[[k]][dat[[k]]$vari == "a", -c(which(names(dat[[k]]) == "vari"))]
    b <- dat[[k]][dat[[k]]$vari == "b", -c(which(names(dat[[k]]) == "vari"))]
    ab <- list(a, b)

    for (i in 1:length(targets)) {
      for (j in 1:length(ab)) {


        # specifiy trainControl
        control <- trainControl(method="repeatedcv", number=10,   repeats=10, search="grid", savePred =T,
                                summaryFunction = sumfct)

        tunegrid <- expand.grid(mtry=c(1:length(predictors_name)))




        set.seed(42)
        model <- train(formula(paste0(targets[i], 
                                      " ~ ", 
                                      paste(predictors_name, sep = '',    collapse = ' + '))),
                       data = ab[[j]],
                       method="rf",
                       ntree = 25, 
                       metric= "RMSE", 
                       tuneGrid=tunegrid, 
                       trControl=control)


      }
    }
  }

}

根据本教程(https://topepo.github.io/caret/parallel-processing.html),我可以通过调用library(doParallel); cl <- makePSOCKcluster(2); registerDoParallel(cl)来并行化我的代码。当我然后使用doParallel函数

predictors_name <- c("Time", "Chick")
targets <- "weight"

dat <- as.data.frame(ChickWeight)
dat$vari <- rep(NA, nrow(dat))
dat$vari[c(1:10,320:350)] <- "a"
dat$vari[-c(1:10,320:350)] <- "b"

d <- list(dat[1:300,], dat[301:500,])

## use 2 of the cores
library(doParallel)
cl <- makePSOCKcluster(2)
registerDoParallel(cl)

cv_model(dat = d, targets = targets, predictors_name = predictors_name)

# end parallel computing
stopCluster(cl)

发生错误消息couldn't find function "MAPE"

如何在不使用foreach语法的情况下解决此问题?

r for-loop r-caret doparallel
1个回答
0
投票

如果我在调用像package::function这样的函数时指定包,那么它正在工作。也许有一个更优雅的解决方案,但这就是我在没有错误的情况下运行代码的方法:

cv_model <- function(dat, targets, predictors_name){

  library(randomForest)
  library(caret)
  library(MLmetrics)
  library(Metrics)

  # set up error measures
  sumfct <- function(data, lev = NULL, model = NULL){
            mape <- MLmetrics::MAPE(y_pred = data$pred, y_true = data$obs)
            RMSE <- sqrt(mean((data$pred - data$obs)^2, na.omit = TRUE))
            MAE <- mean(abs(data$obs - data$pred))
            BIAS <- mean(data$obs - data$pred)
            Rsquared <- R2(pred = data$pred, obs = data$obs, formula = "corr", na.rm = FALSE)
            c(MAPE = mape, RMSE = RMSE, MAE = MAE, BIAS = BIAS, Rsquared = Rsquared)
            }

  for (k in 1:length(dat)) {

    a <- dat[[k]][dat[[k]]$vari == "a", -c(which(names(dat[[k]]) == "vari"))]
    b <- dat[[k]][dat[[k]]$vari == "b", -c(which(names(dat[[k]]) == "vari"))]
    ab <- list(a, b)

    for (i in 1:length(targets)) {
      for (j in 1:length(ab)) {


        # specifiy trainControl
        control <- caret::trainControl(method="repeatedcv", number=10, repeats=10, search="grid", savePred =T,
                                       summaryFunction = sumfct)

        tunegrid <- expand.grid(mtry=c(1:length(predictors_name)))

        set.seed(42)
        model <- caret::train(formula(paste0(targets[i], 
                                             " ~ ", 
                                             paste(predictors_name, sep = '', 
                                             collapse = ' + '))),
                              data = ab[[j]],
                              method="rf",
                              ntree = 25, 
                              metric= "RMSE", 
                              tuneGrid=tunegrid, 
                              trControl=control)


      }
    }
  }

}

predictors_name <- c("Time", "Chick", "Diet")
targets <- "weight"

dat <- as.data.frame(ChickWeight)
dat$vari <- rep(NA, nrow(dat))
dat$vari[c(1:10,320:350)] <- "a"
dat$vari[-c(1:10,320:350)] <- "b"

d <- list(dat[1:300,], dat[301:578,])

## use 2 of the cores
library(doParallel)
cl <- makePSOCKcluster(2)
registerDoParallel(cl)

cv_model(dat = d, targets = targets, predictors_name = predictors_name)

# end parallel computing
stopCluster(cl)
© www.soinside.com 2019 - 2024. All rights reserved.