在 R 中使用 coxph 和 mstate 循环多个数据帧

问题描述 投票:0回答:1

如何编写函数或循环来迭代此过程,该过程在多个数据帧(cohort1-cohort25)上使用 mstate() 包?

样本数据:

impute1 <- data.frame(unique_ID = c(1,2,3,4), 
                  DIED_INDICATOR = c(0,1,1,1), 
                  CVD_ANY = c(0,1,1,0), 
                  YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4), 
                  YEARS_CVD_HOSP = c(15.9, 11.4, 22.7, 3.4), 
                  TOBACCO = c(0, 0, 0, 1), 
                  MARRIED = c(1,0,1,0), 
                  PARITY = c(2,1,1,2)) 

impute2 <- data.frame(unique_ID = c(1,2,3,4), 
                  DIED_INDICATOR = c(0,1,1,1), 
                  CVD_ANY = c(0,1,1,0), 
                  YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4), 
                  YEARS_CVD_HOSP = c(15.9, 11.4, 22.7, 3.4), 
                  TOBACCO = c(0, 1, 0, 1), 
                  MARRIED = c(1,0,1,1), 
                  PARITY = c(1,1,1,2)) 


covs<-c("TOBACCO", "MARRIED", "PARITY")

在 1 个数据帧上运行模型的代码:

cohort1 <- msprep(data=impute1,trans=tmat, 
             time=c(NA,"YEARS_CVD_HOSP","YEARS_CVD_DEATH"),
             status=c(NA,"CVD_ANY","DIED_INDICATOR"), 
             keep=covs,
             id = as.vector(impute1$unique_ID))

cohort_expand<-expand.covs(cohort1, covs, append = TRUE, longnames = FALSE)

c1<-coxph(Surv(Tstart, Tstop, status)~TOBACCO.1 + TOBACCO.2 + 
TOBACCO.3 + strata(trans),     data=cohort1, method = "breslow")
summary(c1)

newdata<-data.frame(trans=1:3, TOBACCO.1 = c(0,0,0), TOBACCO.2 = 
c(0,0,0), TOBACCO.3 = c(0,0,0), strata = 1:3)

msf1<-msfit(c1, newdata, trans=tmat)

plot(msf1, las=1, lty=rep(1:2,c(8,4)), xlab="Years")

我将 25 个数据帧加载到列表中:

path <- ''
print(path)
files <- list.files(path = path, pattern="*.sas7bdat", full.names=FALSE)
print(files)

impute <- list()
for (i in 1:length(files)){
  filename <- paste0(path, files[i])
  print(filename)
  impute[[i]] <- haven::read_sas(data_file=filename)
  print(names(impute[[i]]))
  eval(parse(text = paste0("impute", i, " <- 
haven::read_sas(data_file=filename)")))
}

我正在尝试使用前两个估算数据集为 msprep 步骤编写一个函数。

test_list <- list(impute1, impute2)

my_func <- function(x) {
    cohort<-mstate::msprep(data=test_list,trans=tmat, 
               time=c(NA,"YEARS_CVD_HOSP","YEARS_CVD_DEATH"),
               status=c(NA,"CVD_ANY","DIED_INDICATOR"), 
               keep=covs,
               id = as.vector(test_list$unique_ID))
}


test<-lapply(test_list, my_func)

我收到错误:

mstate::msprep(data = test_list, trans = tmat, time = c(NA, "YEARS_CVD_HOSP", : 参数“id”不是向量

如何将 unique_ID 指定为向量,即使它是一个列表?

下一个功能:

test<-lapply(test_list, my_func2)

my_func2 <- function(x) {
             cohort<-mstate::cohort<-expand.covs(cohort, covs, append = 
             TRUE, longnames = FALSE)
}

我还尝试使用 lapply 函数在数据集上迭代 coxph 模型,但如何编写整个过程的代码?

c1<- lapply(mydf , function(i) {
  
 iformula <- as.formula(sprintf("Surv(Tstart, Tstop, status) 
~TOBACCO.1 + TOBACCO.2 + TOBACCO.3 +   strata(trans)", i))  

})
r for-loop apply lapply survival
1个回答
0
投票

我对比例风险回归一无所知,但我也修改了你的代码并将其放入函数中。它相当简单,只有一个参数

x
,它被传递给
data
中的
id
msprep()
参数。其余部分保持原样,除了
data
中的
coxph()
更改为
cohort_expand
tmat
newdata
被移到函数之外,因为它们看起来是恒定的。

library(mstate)

impute1 <- data.frame(unique_ID = c(1,2,3,4), 
                  DIED_INDICATOR = c(0,1,1,1), 
                  CVD_ANY = c(0,1,1,0), 
                  YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4), 
                  YEARS_CVD_HOSP = c(15.9, 11.4, 20.7, 3.4), 
                  TOBACCO = c(0, 0, 0, 1), 
                  MARRIED = c(1,0,1,0), 
                  PARITY = c(2,1,1,2)) 

impute2 <- data.frame(unique_ID = c(1,2,3,4), 
                  DIED_INDICATOR = c(0,1,1,1), 
                  CVD_ANY = c(0,1,1,0), 
                  YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4), 
                  YEARS_CVD_HOSP = c(15.9, 11.4, 21.7, 3.4), 
                  TOBACCO = c(0, 1, 0, 1), 
                  MARRIED = c(1,0,1,1), 
                  PARITY = c(1,1,1,2)) 


test_list <- list(impute1, impute2)

covs <- c("TOBACCO", "MARRIED", "PARITY")

tmat <- trans.illdeath()

newdata <- data.frame(trans=1:3, TOBACCO.1=c(0,0,0), 
  TOBACCO.2=c(0,0,0), TOBACCO.3=c(0,0,0), strata=1:3)


my_func2 <- function(x) {
    cohort1 <- msprep(data=x, trans=tmat, 
      time=c(NA,"YEARS_CVD_HOSP","YEARS_CVD_DEATH"),
      status=c(NA,"CVD_ANY","DIED_INDICATOR"), 
      keep=covs, id=x$unique_ID)

    cohort_expand <- expand.covs(cohort1, covs, append=TRUE, longnames=FALSE)

    c1 <- coxph(Surv(Tstart, Tstop, status) 
      ~ TOBACCO.1 + TOBACCO.2 + TOBACCO.3 + strata(trans),
      data=cohort_expand, method="breslow")

    msfit(c1, newdata, trans=tmat)
}

fits <- lapply(test_list, my_func2)

par(mfrow=c(2, 1), mar=c(3, 3, 1, 1), mgp=c(1.5, 0.5, 0))
zzz <- lapply(fits, plot)

© www.soinside.com 2019 - 2024. All rights reserved.