我编写了下面的代码来运行 cox 回归并将其输出到表格中。我想对许多变量运行单变量。你能帮我用一个循环来让它更快吗?变量有 Grade、DCIS、LVE、Tsize 等等...
我写的代码如下:
CoxphToDF <- function(y) {
stopifnot(class(y) == "summary.coxph")
cbind(y[["coefficients"]],
`lower 0.95` = y[["conf.int"]][, "lower .95"],
`upper 0.95` = y[["conf.int"]][, "upper .95"])
}
Grade <- data[!is.na(data$Grade) & data$Grade != "", ]
df1 = CoxphToDF(summary(coxph(Surv(OS, OS.Event) ~ Grade, data = Grade)))
DCIS <- data[!is.na(data$DCIS) & data$DCIS != "", ]
df2 = CoxphToDF(summary(coxph(Surv(OS, OS.Event) ~ DCIS, data = DCIS)))
LVE <- data[!is.na(data$LVE) & data$LVE != "", ]
df3 = CoxphToDF(summary(coxph(Surv(OS, OS.Event) ~ LVE, data = LVE)))
Tsize <- data[!is.na(data$Tsize) & data$Tsize != "", ]
df4 = CoxphToDF(summary(coxph(Surv(OS, OS.Event) ~ Tsize, data = Tsize)))
write.table(rbind(df1,df2,df3,df4), "univariate_results.txt", sep="\t", col.names = NA)
我尝试过以下方法:
#vector of variables
variables_to_test <- c("Grade", "DCIS", "LVE", "Tsize")
#list to store the results
results_list <- list()
# Iterate
for (variable in variables_to_test) {
subset_data <- data[!is.na(data[[variable]]) & data[[variable]] != "", ]
result <- CoxphToDF(summary(coxph(Surv(OS, OS.Event) ~ ., data = subset_data)))
results_list[[variable]] <- result
}
# single data frame
results_df <- do.call(rbind, results_list)
它给了我意想不到的结果,数据框中的所有变量但未选择和奇怪的结果
我的输入数据是:
dput(data)
structure(list(ID = c(1393L, 1105L, 1347L, 1390L, 1398L, 1508L,
1043L, 1361L, 1304L, 1101L, 1135L, 1148L, 1171L, 993L, 1147L,
1509L, 1522L, 1523L, 1301L, 1396L, 1113L, 1121L, 1161L, 1323L,
1351L, 1385L, 1504L, 1544L, 1564L, 1165L, 1146L, 1108L, 1354L,
1369L, 1563L, 1330L, 1378L, 1397L, 1338L, 1511L, 1130L, 1137L,
1187L, 1052L, 1059L, 1087L, 1087L, 1303L, 1328L, 1334L), OS = c(65L,
88L, 69L, 65L, 64L, 63L, 94L, 67L, 74L, 90L, 83L, 81L, 78L, 47L,
81L, 63L, 62L, 62L, 75L, 25L, 86L, 84L, 79L, 72L, 69L, 65L, 64L,
60L, 61L, 78L, 81L, 87L, 68L, 66L, 59L, 71L, 66L, 64L, 71L, 63L,
84L, 83L, 83L, 93L, 91L, 84L, 84L, 74L, 72L, 33L), OS.Event = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L), Grade = c(2L, 3L, 2L, NA, 3L, 3L, 1L, 3L, 2L, 3L, 2L, 3L,
2L, 3L, 2L, 3L, 3L, 2L, NA, 3L, NA, 2L, 2L, 2L, 1L, 2L, 3L, 2L,
2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
2L, 2L, 2L, NA, 3L, 3L), DCIS = c("NO", "Present", "NO", "Present",
"Present", "Present", "Present", "NO", "Present", "Present",
"Present", "Present", "Present", "Present", "Present", "Present",
"NO", "Present", "NO", "NO", "Present", "Present", "NO", "NO",
"Present", "Present", "NO", "Present", "Present", "Present",
"NO", "Present", "Present", "Present", "NO", "Present", "NO",
"NO", "Present", "NO", "NO", "NO", "NO", "NO", "Present", "NO",
"NO", "Present", "Present", "NO"), LVE = c("NO", "YES", "NO",
"NO", "YES", "YES", "NO", "NO", "YES", "NO", "NO", "NO", "YES",
"", "NO", "NO", "NO", "YES", "NO", "NO", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "YES", "NO", "NO", "NO", "NO",
"NO", "YES", "YES", "NO", "NO", "NO", "NO", "NO", "NO", "YES",
"YES", "NO", "NO", "NO", "NO", "NO"), Tsize = c("<=3", " >3cm",
"<=3", "<=3", "<=3", "<=3", "<=3", "<=3", "<=3", " >3cm", "<=3",
"<=3", " >3cm", " >3cm", "<=3", "<=3", "<=3", "<=3", " >3cm",
"<=3", "<=3", "<=3", " >3cm", "<=3", " >3cm", "<=3", "<=3", "<=3",
"<=3", "<=3", "<=3", "<=3", "<=3", " >3cm", "<=3", " >3cm", "<=3",
"<=3", " >3cm", "<=3", "<=3", " >3cm", " >3cm", " >3cm", "<=3",
" >3cm", " >3cm", "<=3", "<=3", "<=3"), LN = c("Negative", "Positive",
"Negative", "Negative", "Negative", "Positive", "Negative", "Negative",
"Positive", "Positive", "Negative", "Positive", "Positive", "Positive",
"Positive", "Positive", "Positive", "Negative", "Positive", "Positive",
"Negative", "Negative", "Negative", "Negative", "Negative", "Negative",
"Positive", "Negative", "Positive", "Positive", "Positive", "Negative",
"Negative", "Positive", "Negative", "Positive", "Negative", "Negative",
"Positive", "Negative", "Negative", "Negative", "Negative", "Positive",
"Negative", "Positive", "Positive", "Negative", "Negative", "Positive"
)), class = "data.frame", row.names = c(NA, -50L))
更改为
#vector of variables
variables_to_test <- c("Grade", "DCIS", "LVE", "Tsize")
#list to store the results
results_list <- vector(mode = "list", length = length(variables_to_test))
# Iterate
for (i in seq_along(variables_to_test)) {
subset_data <- data[!is.na(data[[variables_to_test[[i]]]])
& data[[variables_to_test[[i]]]] != "", ]
result <- CoxphToDF(summary(coxph(Surv(OS, OS.Event) ~ ., data = subset_data)))
results_list[[i]] <- result
}
# single data frame
results_df <- do.call(what = "rbind", args = results_list)
请注意,每个模型拟合都会发出警告
1: In coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Ran out of iterations and did not converge