问题描述:我想应用一个公式,排除包含至少一个 NA 值的所有行。对于可重现的示例,我使用了以下数据框:
df <- data.frame(
stringsAsFactors = FALSE,
check.names = FALSE,
Date = c("01/11/1876",
"01/12/1876","01/01/1877","01/02/1877",
"01/03/1877","01/04/1877","01/05/1877",
"01/06/1877","01/07/1877","01/08/1877","01/09/1877",
"01/10/1877","01/11/1877","01/12/1877",
"01/01/1878","01/02/1878"),
`Bissen P [mm]` = c(59.2,139.4,
89.6,46.9,41.1,105.1,45.3,62.3,NA,119.1,
121.2,86.1,42.1,54,37.3,71.8),
`Bissen Q [mm]` = c(46.5,126.5,
111.7,53.9,49.4,28.1,31.9,22.3,19.1,24.9,
74.1,74.1,51.8,47.3,40.8,53.4),
`Ettel P [mm]` = c(54,131.3,
83.3,42.1,NA,98.3,40.6,56.9,44.7,111.8,
113.8,79.9,37.5,49,32.8,66.1),
`Ettel Q [mm]` = c(29.9,84.1,
77.9,39.9,35.1,21.7,24.4,15.7,14.5,20.1,
51.1,58.6,36,41.8,32.7,37.7),
`Aalbach P [mm]` = c(46.6,112.7,
71.7,36.5,31.7,84.4,35.2,49.1,38.7,96,
97.7,68.7,32.6,42.3,28.5,57),
`Aalbach Q [mm]` = c(59.8,132.4,
87.3,48.7,43.4,101.4,47.2,62.6,51.1,114.1,
116,84.1,44.3,55.1,39.9,71.2))
#Where I have 3 sites (Bissen, Ettel and Aalbach) and each site has 2 variables (P and Q)
#And I would like to obtain the runoff coefficient of each site (Q/P),
在黄色矩形中,我已指示每个站点必须排除哪些行,因为一行包含 NA。
#step 2: extract site names
site_names <- sub(" P \\[mm\\]| Q \\[mm\\]", "", names(df)[-1]) |>
unique()
site_names
#step 3: define function based on formula:
#Here I have indicated that NA should be excluded, however, it does not eliminate all rows as I need.
formula <- function(P, Q) {
runoffcoefficient = mean(Q, na.rm = TRUE)/mean(P, na.rm = TRUE)
return(data.frame(runoffcoefficient = runoffcoefficient))
}
# step 4: Loop through all sites
results <- list()
for (site in site_names) {
site_data <- df[, grepl(site, names(df))]
coefficients <- formula(site_data[[paste0(site, " P [mm]")]],
site_data[[paste0(site, " Q [mm]")]])
df$Site = site # Add the Site column to each site's anomalies
results[[site]] <- coefficients
}
> results
$Bissen
runoffcoefficient
1 0.7160308
$Ettel
runoffcoefficient
1 0.5588475
$Aalbach
runoffcoefficient
1 1.246611
如果消除了行,Bissen 将等于 0.7467,而 Ettel = 0.5624
任何帮助将不胜感激
你在追寻
complete.cases
df[complete.cases(df), ]