如何编写函数或循环来迭代此过程,该过程在多个数据帧(cohort1-cohort25)上使用 mstate() 包?
样本数据:
impute1 <- data.frame(unique_ID = c(1,2,3,4),
DIED_INDICATOR = c(0,1,1,1),
CVD_ANY = c(0,1,1,0),
YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4),
YEARS_CVD_HOSP = c(15.9, 11.4, 22.7, 3.4),
TOBACCO = c(0, 0, 0, 1),
MARRIED = c(1,0,1,0),
PARITY = c(2,1,1,2))
impute2 <- data.frame(unique_ID = c(1,2,3,4),
DIED_INDICATOR = c(0,1,1,1),
CVD_ANY = c(0,1,1,0),
YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4),
YEARS_CVD_HOSP = c(15.9, 11.4, 22.7, 3.4),
TOBACCO = c(0, 1, 0, 1),
MARRIED = c(1,0,1,1),
PARITY = c(1,1,1,2))
covs<-c("TOBACCO", "MARRIED", "PARITY")
在 1 个数据帧上运行模型的代码:
cohort1 <- msprep(data=impute1,trans=tmat,
time=c(NA,"YEARS_CVD_HOSP","YEARS_CVD_DEATH"),
status=c(NA,"CVD_ANY","DIED_INDICATOR"),
keep=covs,
id = as.vector(impute1$unique_ID))
cohort_expand<-expand.covs(cohort1, covs, append = TRUE, longnames = FALSE)
c1<-coxph(Surv(Tstart, Tstop, status)~TOBACCO.1 + TOBACCO.2 +
TOBACCO.3 + strata(trans), data=cohort1, method = "breslow")
summary(c1)
newdata<-data.frame(trans=1:3, TOBACCO.1 = c(0,0,0), TOBACCO.2 =
c(0,0,0), TOBACCO.3 = c(0,0,0), strata = 1:3)
msf1<-msfit(c1, newdata, trans=tmat)
plot(msf1, las=1, lty=rep(1:2,c(8,4)), xlab="Years")
我将 25 个数据帧加载到列表中:
path <- ''
print(path)
files <- list.files(path = path, pattern="*.sas7bdat", full.names=FALSE)
print(files)
impute <- list()
for (i in 1:length(files)){
filename <- paste0(path, files[i])
print(filename)
impute[[i]] <- haven::read_sas(data_file=filename)
print(names(impute[[i]]))
eval(parse(text = paste0("impute", i, " <-
haven::read_sas(data_file=filename)")))
}
我正在尝试使用前两个估算数据集为 msprep 步骤编写一个函数。
test_list <- list(impute1, impute2)
my_func <- function(x) {
cohort<-mstate::msprep(data=test_list,trans=tmat,
time=c(NA,"YEARS_CVD_HOSP","YEARS_CVD_DEATH"),
status=c(NA,"CVD_ANY","DIED_INDICATOR"),
keep=covs,
id = as.vector(test_list$unique_ID))
}
test<-lapply(test_list, my_func)
我收到错误:
mstate::msprep(data = test_list, trans = tmat, time = c(NA, "YEARS_CVD_HOSP", : 参数“id”不是向量
如何将 unique_ID 指定为向量,即使它是一个列表?
下一个功能:
test<-lapply(test_list, my_func2)
my_func2 <- function(x) {
cohort<-mstate::cohort<-expand.covs(cohort, covs, append =
TRUE, longnames = FALSE)
}
我还尝试使用 lapply 函数在数据集上迭代 coxph 模型,但如何编写整个过程的代码?
c1<- lapply(mydf , function(i) {
iformula <- as.formula(sprintf("Surv(Tstart, Tstop, status)
~TOBACCO.1 + TOBACCO.2 + TOBACCO.3 + strata(trans)", i))
})
我对比例风险回归一无所知,但我也修改了你的代码并将其放入函数中。它相当简单,只有一个参数
x
,它被传递给 data
中的 id
和 msprep()
参数。其余部分保持原样,除了 data
中的 coxph()
更改为 cohort_expand
。 tmat
和 newdata
被移到函数之外,因为它们看起来是恒定的。
library(mstate)
impute1 <- data.frame(unique_ID = c(1,2,3,4),
DIED_INDICATOR = c(0,1,1,1),
CVD_ANY = c(0,1,1,0),
YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4),
YEARS_CVD_HOSP = c(15.9, 11.4, 20.7, 3.4),
TOBACCO = c(0, 0, 0, 1),
MARRIED = c(1,0,1,0),
PARITY = c(2,1,1,2))
impute2 <- data.frame(unique_ID = c(1,2,3,4),
DIED_INDICATOR = c(0,1,1,1),
CVD_ANY = c(0,1,1,0),
YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4),
YEARS_CVD_HOSP = c(15.9, 11.4, 21.7, 3.4),
TOBACCO = c(0, 1, 0, 1),
MARRIED = c(1,0,1,1),
PARITY = c(1,1,1,2))
test_list <- list(impute1, impute2)
covs <- c("TOBACCO", "MARRIED", "PARITY")
tmat <- trans.illdeath()
newdata <- data.frame(trans=1:3, TOBACCO.1=c(0,0,0),
TOBACCO.2=c(0,0,0), TOBACCO.3=c(0,0,0), strata=1:3)
my_func2 <- function(x) {
cohort1 <- msprep(data=x, trans=tmat,
time=c(NA,"YEARS_CVD_HOSP","YEARS_CVD_DEATH"),
status=c(NA,"CVD_ANY","DIED_INDICATOR"),
keep=covs, id=x$unique_ID)
cohort_expand <- expand.covs(cohort1, covs, append=TRUE, longnames=FALSE)
c1 <- coxph(Surv(Tstart, Tstop, status)
~ TOBACCO.1 + TOBACCO.2 + TOBACCO.3 + strata(trans),
data=cohort_expand, method="breslow")
msfit(c1, newdata, trans=tmat)
}
fits <- lapply(test_list, my_func2)
par(mfrow=c(2, 1), mar=c(3, 3, 1, 1), mgp=c(1.5, 0.5, 0))
zzz <- lapply(fits, plot)