我有一个数据框(df),其中有一列“问题”,其中详细说明了人们向急诊室提出的医疗状况(例如“扭伤”、“割伤”、“感染”)。
然而,有些人在被发现之前就离开了——在这种情况下,他们的问题被标记为“在被发现之前就离开了”。
我想更改我的数据框,以估算这些“在被看到之前留下”的人参加急诊室的问题是什么。
我有每个问题在人口中的比例:
sprain = 0.3
cut = 0.5
infection = 0.2.
如何根据这些问题在总体中出现的比率,将“扭伤”、“割伤”和“感染”随机分配给数据框中的“问题”=“被发现之前留下”的观察结果?这将出现在一个名为“impulated_problem”的新列中,这样我就不会丢失人们在被看到之前留下的数据。
注意:此数据是总体,而不是样本。
您可以使用
sample
将索引值随机分配给这三个问题的向量,并具有所需比例的给定计数。
n.left <- sum(df$problem=="left before being seen")
n.sprain <- round(n.left * 0.3)
n.cut <- round(n.left * 0.5)
n.infection <- n.left - (n.sprain + n.cut)
df$imputed_problem[df$problem=="left before being seen"] <- c(
rep('sprain', n.sprain),
rep('cut', n.cut),
rep('infection', n.infection))[sample(1:n.left)]
检查
prop.table(table(df$imputed_problem))
cut infection sprain
0.5 0.2 0.3
数据:
structure(list(problem = c("cut", "infection", "cut", "sprain",
"cut", "cut", "sprain", "cut", "cut", "infection", "cut", "sprain",
"sprain", "sprain", "infection", "sprain", "cut", "sprain", "infection",
"sprain", "cut", "infection", "cut", "infection", "infection",
"cut", "cut", "sprain", "infection", "sprain", "infection", "infection",
"infection", "cut", "cut", "sprain", "infection", "sprain", "sprain",
"cut", "infection", "sprain", "infection", "infection", "cut",
"cut", "cut", "cut", "infection", "cut", "cut", "cut", "cut",
"infection", "cut", "cut", "cut", "sprain", "sprain", "infection",
"infection", "sprain", "sprain", "infection", "sprain", "sprain",
"infection", "infection", "cut", "cut", "sprain", "cut", "infection",
"infection", "cut", "cut", "cut", "cut", "cut", "infection",
"cut", "sprain", "cut", "infection", "sprain", "sprain", "sprain",
"cut", "cut", "cut", "infection", "cut", "sprain", "cut", "sprain",
"cut", "sprain", "cut", "infection", "cut", "sprain", "cut",
"cut", "cut", "cut", "cut", "cut", "cut", "sprain", "sprain",
"sprain", "cut", "cut", "cut", "cut", "sprain", "sprain", "sprain",
"sprain", "cut", "infection", "sprain", "infection", "sprain",
"infection", "infection", "cut", "infection", "sprain", "cut",
"infection", "infection", "infection", "sprain", "cut", "sprain",
"cut", "sprain", "cut", "infection", "infection", "infection",
"cut", "sprain", "infection", "sprain", "sprain", "cut", "sprain",
"cut", "cut", "sprain", "sprain", "sprain", "cut", "cut", "cut",
"cut", "cut", "sprain", "cut", "cut", "cut", "cut", "sprain",
"sprain", "infection", "sprain", "cut", "cut", "cut", "sprain",
"sprain", "cut", "cut", "cut", "infection", "sprain", "cut",
"infection", "cut", "cut", "infection", "sprain", "sprain", "cut",
"cut", "sprain", "cut", "sprain", "infection", "cut", "cut",
"cut", "sprain", "cut", "cut", "cut", "infection", "cut", "cut",
"infection", "cut", "sprain", "sprain", "infection", "sprain",
"cut", "cut", "sprain", "infection", "infection", "sprain", "sprain",
"cut", "sprain", "cut", "infection", "cut", "infection", "infection",
"cut", "cut", "cut", "cut", "sprain", "cut", "infection", "cut",
"cut", "sprain", "cut", "cut", "sprain", "cut", "cut", "cut",
"infection", "cut", "sprain", "cut", "infection", "sprain", "sprain",
"sprain", "cut", "cut", "sprain", "sprain", "cut", "cut", "sprain",
"sprain", "cut", "infection", "sprain", "cut", "cut", "cut",
"cut", "cut", "infection", "sprain", "sprain", "infection", "cut",
"cut", "sprain", "cut", "sprain", "cut", "cut", "cut", "sprain",
"sprain", "infection", "cut", "sprain", "cut", "sprain", "cut",
"infection", "cut", "sprain", "cut", "cut", "infection", "infection",
"cut", "cut", "sprain", "cut", "infection", "infection", "infection",
"cut", "cut", "infection", "cut", "sprain", "sprain", "cut",
"cut", "cut", "sprain", "infection", "cut", "cut", "cut", "cut",
"cut", "sprain", "cut", "cut", "sprain", "cut", "infection",
"cut", "sprain", "cut", "sprain", "infection", "sprain", "cut",
"infection", "cut", "sprain", "infection", "sprain", "sprain",
"cut", "infection", "sprain", "cut", "cut", "sprain", "sprain",
"infection", "sprain", "cut", "sprain", "cut", "sprain", "cut",
"cut", "infection", "cut", "cut", "cut", "sprain", "cut", "infection",
"cut", "infection", "cut", "cut", "sprain", "infection", "infection",
"cut", "cut", "sprain", "cut", "cut", "sprain", "infection",
"cut", "cut", "infection", "sprain", "cut", "sprain", "cut",
"cut", "cut", "sprain", "infection", "cut", "cut", "infection",
"cut", "cut", "cut", "cut", "cut", "sprain", "sprain", "sprain",
"infection", "cut", "sprain", "sprain", "sprain", "cut", "cut",
"sprain", "cut", "cut", "cut", "cut", "cut", "cut", "sprain",
"cut", "cut", "infection", "infection", "sprain", "infection",
"cut", "infection", "cut", "cut", "cut", "sprain", "cut", "cut",
"sprain", "sprain", "infection", "sprain", "cut", "cut", "sprain",
"cut", "cut", "cut", "cut", "cut", "sprain", "infection", "infection",
"sprain", "cut", "cut", "sprain", "infection", "sprain", "sprain",
"cut", "cut", "cut", "cut", "cut", "sprain", "cut", "infection",
"sprain", "sprain", "cut", "sprain", "cut", "cut", "sprain",
"cut", "sprain", "cut", "cut", "cut", "sprain", "sprain", "cut",
"sprain", "cut", "infection", "cut", "cut", "cut", "cut", "sprain",
"cut", "cut", "cut", "cut", "sprain", "cut", "cut", "infection",
"infection", "cut", "cut", "cut", "infection", "cut", "sprain",
"cut", "cut", "sprain", "infection", "cut", "sprain", "sprain",
"cut", "sprain", "infection", "infection", "cut", "cut", "cut",
"sprain")), class = "data.frame", row.names = c(NA, -500L))