我按照 @agdamsbo 的评论中的示例计算了 R 中的平滑 Kaplan Meier 图 link
我已复制粘贴与此处评论中相同的代码:
library(tidyverse)
library(survival)
library(purrr)
library(ggsurvfit)
## Data
df <- survfit(Surv(time, status) ~ surg, data = ggsurvfit::df_colon) |> ggsurvfit::tidy_survfit(type = "survival")
df_split <- split(df,df$strata)
df_smoothed <- purrr::reduce(lapply(c("estimate","conf.low", "conf.high"), function(j) {
do.call(rbind,
lapply(seq_along(df_split), function(i) {
nms <- names(df_split)[i]
y <-
predict(mgcv::gam(as.formula(paste0(
j[[1]], " ~ s(time, bs = 'cs')"
)), data = df_split[[i]]))
df <- data.frame(df_split[[i]]$time, y, nms)
names(df) <- c("time", paste0(j[[1]], ".smooth"), "strata")
df
}))
}),dplyr::full_join) |> full_join(df)
#> Joining with `by = join_by(time, strata)`
#> Joining with `by = join_by(time, strata)`
#> Joining with `by = join_by(time, strata)`
## Plotting
ggplot(data=df_smoothed) +
geom_line(aes(x=time, y=estimate.smooth, color = strata))+
geom_ribbon(aes(x=time, ymin = conf.low.smooth, ymax = conf.high.smooth, fill = strata), alpha = 0.50)
上面的代码运行完美,对于每个层(我有 4 个组),我得到了一个相当平滑的 kaplan Meier 图。我无法使用阶跃函数,因为它们可以揭示微观数据。另外,我无法分享上述代码中平滑的 Kaplan Meier 图。
但是,每个层的曲线在 y 轴上从略高于 1 的位置开始。这是没有意义的,因为我的样本量一开始就应该是 100%。我认为这与由于曲线的平滑部分而编码的方式有关。
我试图解决这个问题: 我尝试将
cobs package
中的代码合并到上面的代码中,“强制”曲线在 1 处变平并从那里开始。它几乎对我有用,但是,我得到的 KM 图仅显示一层(我总共有 4 个)。另外,我在我的 survfit 函数中添加了权重,但我不知道在使用时如何合并权重cobs
。
这正是我尝试用 R 中的
cobs
修复它的方法:
library(tidyverse)
library(survival)
library(purrr)
library(ggsurvfit)
## Data
df <- survfit(Surv(time, outcome) ~ exposure+strata(sex), data = my_data) |> ggsurvfit::tidy_survfit(type = "survival")
df_split <- split(df,df$strata)
df_smoothed <- purrr::reduce(lapply(c("estimate","conf.low", "conf.high"), function(j) {
do.call(rbind,
lapply(seq_along(df_split), function(i) {
nms <- names(df_split)[i]
y <-
predict(mgcv::gam(as.formula(paste0(
j[[1]], " ~ s(time, bs = 'cs')"
)), data = df_split[[i]]))
df <- data.frame(df_split[[i]]$time, y, nms)
names(df) <- c("time", paste0(j[[1]], ".smooth"), "strata")
df
}))
}),dplyr::full_join) |> full_join(df)
#> Joining with `by = join_by(time, strata)`
#> Joining with `by = join_by(time, strata)`
#> Joining with `by = join_by(time, strata)`
library(cobs) ##NEW CODING STARTS FROM HERE
pw <- rbind(c( 1,min(df_smoothed1$time),1),
c(-1,max(df_smoothed$time),0))
x <- df_smoothed$time
y <- df_smoothed$estimate.smooth
ft <- cobs(x,y, constraint="decrease", nknots=4,pointwise= con2,
degree = 2)
fit <- predict(ft, x) [, 'fit']
df_smoothed$x <- x
df_smoothed$y <- y
df_smoothed$fit <- fit
## Plotting
ggplot(data=df_smoothed, aes(x,y, color = strata)) +
geom_line(aes(y=fit))+
geom_ribbon(aes(x=time, ymin = conf.low.smooth, ymax = conf.high.smooth, fill = strata), alpha = 0.50)
认识到上述关于这种类型的平滑对于 KM 曲线是否有意义的重要警告,您可以执行以下操作:
survfit
对象## Data
data = ggsurvfit::df_colon %>% filter(surg=="Limited Time Since Surgery")
# create survfit object
df_surv <- survfit(Surv(time, status) ~ surg, data = data)
# tidy using tidy_survfit
df <- ggsurvfit::tidy_survfit(df_surv, type = "survival")
constrain_0_1_smooth_km <- function(df,y,nknots=30, xinterval=c(0,10)) {
knots <- data.frame(time = seq(xinterval[1],xinterval[2],length=nknots))
sm <- smoothCon(s(time,k=nknots,bs="cr"),df,knots=knots)[[1]]
sm$X[,1] <- 0
sm$S[[1]][1,] <- 0
sm$S[[1]][,1] <- 0
predict(gam(df[[y]] ~ sm$X - 1 + offset(off),paraPen=list(X=list(sm$S[[1]]))))
}
estimate
以及平滑后的 conf.high
和 conf.low
# plot the KM curve
plot(df_surv, conf.int = F)
# add the smoothed version constrained at 0,1
lines(x = df$time, y=constrain_0_1_smooth_km(df, "estimate"),col="red")
lines(x = df$time, y=constrain_0_1_smooth_km(df, "conf.high"),col="blue")
lines(x = df$time, y=constrain_0_1_smooth_km(df, "conf.low"),col="blue")
输出:
当然,您可以使用
ggplot2
包进行绘图,但我将其留给您,因为我认为问题的重点是如何约束平滑。