在 R 中使用 coxph 和 mstate 循环多个数据帧

Question

如何编写函数或循环来迭代此过程，该过程在多个数据帧（cohort1-cohort25）上使用 mstate() 包？

样本数据：

impute1 <- data.frame(unique_ID = c(1,2,3,4), 
                  DIED_INDICATOR = c(0,1,1,1), 
                  CVD_ANY = c(0,1,1,0), 
                  YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4), 
                  YEARS_CVD_HOSP = c(15.9, 11.4, 22.7, 3.4), 
                  TOBACCO = c(0, 0, 0, 1), 
                  MARRIED = c(1,0,1,0), 
                  PARITY = c(2,1,1,2)) 

impute2 <- data.frame(unique_ID = c(1,2,3,4), 
                  DIED_INDICATOR = c(0,1,1,1), 
                  CVD_ANY = c(0,1,1,0), 
                  YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4), 
                  YEARS_CVD_HOSP = c(15.9, 11.4, 22.7, 3.4), 
                  TOBACCO = c(0, 1, 0, 1), 
                  MARRIED = c(1,0,1,1), 
                  PARITY = c(1,1,1,2)) 


covs<-c("TOBACCO", "MARRIED", "PARITY")

在 1 个数据帧上运行模型的代码：

cohort1 <- msprep(data=impute1,trans=tmat, 
             time=c(NA,"YEARS_CVD_HOSP","YEARS_CVD_DEATH"),
             status=c(NA,"CVD_ANY","DIED_INDICATOR"), 
             keep=covs,
             id = as.vector(impute1$unique_ID))

cohort_expand<-expand.covs(cohort1, covs, append = TRUE, longnames = FALSE)

c1<-coxph(Surv(Tstart, Tstop, status)~TOBACCO.1 + TOBACCO.2 + 
TOBACCO.3 + strata(trans),     data=cohort1, method = "breslow")
summary(c1)

newdata<-data.frame(trans=1:3, TOBACCO.1 = c(0,0,0), TOBACCO.2 = 
c(0,0,0), TOBACCO.3 = c(0,0,0), strata = 1:3)

msf1<-msfit(c1, newdata, trans=tmat)

plot(msf1, las=1, lty=rep(1:2,c(8,4)), xlab="Years")

我将 25 个数据帧加载到列表中：

path <- ''
print(path)
files <- list.files(path = path, pattern="*.sas7bdat", full.names=FALSE)
print(files)

impute <- list()
for (i in 1:length(files)){
  filename <- paste0(path, files[i])
  print(filename)
  impute[[i]] <- haven::read_sas(data_file=filename)
  print(names(impute[[i]]))
  eval(parse(text = paste0("impute", i, " <- 
haven::read_sas(data_file=filename)")))
}

我正在尝试使用前两个估算数据集为 msprep 步骤编写一个函数。

test_list <- list(impute1, impute2)

my_func <- function(x) {
    cohort<-mstate::msprep(data=test_list,trans=tmat, 
               time=c(NA,"YEARS_CVD_HOSP","YEARS_CVD_DEATH"),
               status=c(NA,"CVD_ANY","DIED_INDICATOR"), 
               keep=covs,
               id = as.vector(test_list$unique_ID))
}


test<-lapply(test_list, my_func)

我收到错误：

mstate::msprep(data = test_list, trans = tmat, time = c(NA, "YEARS_CVD_HOSP", : 参数“id”不是向量

如何将 unique_ID 指定为向量，即使它是一个列表？

下一个功能：

test<-lapply(test_list, my_func2)

my_func2 <- function(x) {
             cohort<-mstate::cohort<-expand.covs(cohort, covs, append = 
             TRUE, longnames = FALSE)
}

我还尝试使用 lapply 函数在数据集上迭代 coxph 模型，但如何编写整个过程的代码？

c1<- lapply(mydf , function(i) {
  
 iformula <- as.formula(sprintf("Surv(Tstart, Tstop, status) 
~TOBACCO.1 + TOBACCO.2 + TOBACCO.3 +   strata(trans)", i))  

})

Answer 1

我对比例风险回归一无所知，但我也修改了你的代码并将其放入函数中。它相当简单，只有一个参数

，它被传递给

data

中的

id

和

msprep()

参数。其余部分保持原样，除了

data

中的

coxph()

更改为

cohort_expand

。

tmat

和

newdata

被移到函数之外，因为它们看起来是恒定的。

library(mstate)

impute1 <- data.frame(unique_ID = c(1,2,3,4), 
                  DIED_INDICATOR = c(0,1,1,1), 
                  CVD_ANY = c(0,1,1,0), 
                  YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4), 
                  YEARS_CVD_HOSP = c(15.9, 11.4, 20.7, 3.4), 
                  TOBACCO = c(0, 0, 0, 1), 
                  MARRIED = c(1,0,1,0), 
                  PARITY = c(2,1,1,2)) 

impute2 <- data.frame(unique_ID = c(1,2,3,4), 
                  DIED_INDICATOR = c(0,1,1,1), 
                  CVD_ANY = c(0,1,1,0), 
                  YEARS_CVD_DEATH = c(15.9, 23.6, 22.7, 3.4), 
                  YEARS_CVD_HOSP = c(15.9, 11.4, 21.7, 3.4), 
                  TOBACCO = c(0, 1, 0, 1), 
                  MARRIED = c(1,0,1,1), 
                  PARITY = c(1,1,1,2)) 


test_list <- list(impute1, impute2)

covs <- c("TOBACCO", "MARRIED", "PARITY")

tmat <- trans.illdeath()

newdata <- data.frame(trans=1:3, TOBACCO.1=c(0,0,0), 
  TOBACCO.2=c(0,0,0), TOBACCO.3=c(0,0,0), strata=1:3)


my_func2 <- function(x) {
    cohort1 <- msprep(data=x, trans=tmat, 
      time=c(NA,"YEARS_CVD_HOSP","YEARS_CVD_DEATH"),
      status=c(NA,"CVD_ANY","DIED_INDICATOR"), 
      keep=covs, id=x$unique_ID)

    cohort_expand <- expand.covs(cohort1, covs, append=TRUE, longnames=FALSE)

    c1 <- coxph(Surv(Tstart, Tstop, status) 
      ~ TOBACCO.1 + TOBACCO.2 + TOBACCO.3 + strata(trans),
      data=cohort_expand, method="breslow")

    msfit(c1, newdata, trans=tmat)
}

fits <- lapply(test_list, my_func2)

par(mfrow=c(2, 1), mar=c(3, 3, 1, 1), mgp=c(1.5, 0.5, 0))
zzz <- lapply(fits, plot)

在 R 中使用 coxph 和 mstate 循环多个数据帧

问题描述投票：0回答：1

1个回答

最新问题

在 R 中使用 coxph 和 mstate 循环多个数据帧

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1