我在
R
工作,我想根据每位外科医生的可用年份计算最后一列 Annual_Cases
。每 3 年时间间隔报告一次变量 Years
。有些外科医生在不止 1 家医院工作。
这是我需要编辑的代码和我的数据
library(dplyr);library(tidyr)
df %>% drop_na(Cases) %>% group_by(Surgeon)%>% summarise(Annaul_Cases= sum(Cases))/n, na.rm=T)-> df # I need to calculate n column (number of years the surgeon were in service)
structure(list(Serial.ID = c(215, 522, 903, 1210, 1591, 1898,
2279, 2586, 2967, 3274, 3655, 3962, 4343, 4650, 5031, 5338, 330,
1018, 1706, 2394, 3082, 3770, 4458, 5146), Surgeon = c("A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "B", "B", "B", "B", "B", "B", "B", "B"), Center = c("Maimonides Medical Center",
"St. Vincents Hospital and Medical Center", "Maimonides Medical Center",
"St. Vincents Hospital and Medical Center", "Maimonides Medical Center",
"St. Vincents Hospital and Medical Center", "Maimonides Medical Center",
"St. Vincents Hospital and Medical Center", "Maimonides Medical Center",
"St. Vincents Hospital and Medical Center", "Maimonides Medical Center",
"St. Vincents Hospital and Medical Center", "Maimonides Medical Center",
"St. Vincents Hospital and Medical Center", "Maimonides Medical Center",
"St. Vincents Hospital and Medical Center", "Mount Sinai Hospital",
"Mount Sinai Hospital", "Mount Sinai Hospital", "Mount Sinai Hospital",
"Mount Sinai Hospital", "Mount Sinai Hospital", "Mount Sinai Hospital",
"Mount Sinai Hospital"), Years = c("1996-1998", "1996-1998",
"1999-2001", "1999-2001", "2002-2004", "2002-2004", "2005-2007",
"2005-2007", "2008-2010", "2008-2010", "2011-2013", "2011-2013",
"2014-2016", "2014-2016", "2017-2019", "2017-2019", "1996-1998",
"1999-2001", "2002-2004", "2005-2007", "2008-2010", "2011-2013",
"2014-2016", "2017-2019"), Cases = c(377, 19, 223, NA, 27, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 110, 18, 13,
17, 4, 1), Deaths = c("10", "1", "6", "NA", "0", "NA", "NA",
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
"1", "0", "0", "0", "0", "0"), Annual_Cases = c("NA", "NA", "NA",
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA")), row.names = c(NA,
-24L), class = c("tbl_df", "tbl", "data.frame"))
任何建议将不胜感激
library(tidyverse)
df |>
separate(Years, c("start_yr", "end_yr"), convert = TRUE, remove = FALSE) |>
summarize(annual_cases = sum(Cases, na.rm = TRUE) / sum(end_yr - start_yr + 1),
.by = c(Surgeon, Center))
结果
# A tibble: 3 × 3
Surgeon Center annual_cases
<chr> <chr> <dbl>
1 A Maimonides Medical Center 26.1
2 A St. Vincents Hospital and Medical Center 0.792
3 B Mount Sinai Hospital 6.79