我想通过使用计算机上的所有 8 个核心,在多个估算数据集中使用 R 中的并行化来更有效地运行 apply 函数 (my_func2)。每个估算数据集大约有 170 万长,因此我的计算机需要一段时间才能运行 25 个估算数据集。如何最大化计算时间?
以下是一些示例数据:
library(haven)
library(dplyr)
library(mstate)
impute1 <- data.frame(unique_ID = c(1,2,3,4),
DIED_INDICATOR = c(0,1,1,1),
CVD_ANY = c(0,1,1,0),
YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4),
YEARS_CVD_HOSP = c(15.9, 11.4, 20.7, 3.4),
TOBACCO = c(0, 0, 0, 1),
MARRIED = c(1,0,1,0),
PARITY = c(2,1,1,2))
impute2 <- data.frame(unique_ID = c(1,2,3,4),
DIED_INDICATOR = c(0,1,1,1),
CVD_ANY = c(0,1,1,0),
YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4),
YEARS_CVD_HOSP = c(15.9, 11.4, 21.7, 3.4),
TOBACCO = c(0, 1, 0, 1),
MARRIED = c(1,0,1,1),
PARITY = c(1,1,1,2))
test_list <- list(impute1, impute2)
covs <- c("TOBACCO", "MARRIED", "PARITY")
tmat <- trans.illdeath()
my_func2 <- function(x) {
cohort1 <- msprep(data=x, trans=tmat,
time=c(NA,"YEARS_CVD_HOSP","YEARS_CVD_DEATH"),
status=c(NA,"CVD_ANY","DIED_INDICATOR"),
keep=covs, id=x$unique_ID)
cohort_expand <- expand.covs(cohort1, covs, append=TRUE, longnames=FALSE)
c1 <- coxph(Surv(Tstart, Tstop, status)
~ TOBACCO.1 + TOBACCO.2 + TOBACCO.3 + strata(trans),
data=cohort_expand, method="breslow")
summary(c1)
}
您建议使用此 lapply 功能做什么?