library(NHANES)
library(plyr)
# Generate example dataset
myvars <- c("ID","Gender", "Age", "Diabetes","BPSysAve", "BPDiaAve")
nhanes <- as.data.frame(NHANESraw[myvars])
nhanes$age.range <- cut(nhanes$Age, breaks = c(-Inf,45, 65, Inf), labels = c("<45Yrs", "45-65Yrs", ">65Yrs"))
# Fitting the GLM model
fit <- glm(BPSysAve ~ Diabetes + age.range, data = nhanes)
# Producing df with predictions
pred <- data.frame(diabetes = fit$model$Diabetes,
sbp = predict.glm(fit, type = "response"),
age.range = fit$model$age.range)
# Summarize prediction
ddply(pred, c("diabetes", "age.range"), summarize,
N = sum(!is.na(sbp)),
sbp.mean = mean(sbp),
sd = sd(sbp)
)
diabetes age.range N sbp.mean sd
1 No <45Yrs 8616 110.6067 0
2 No 45-65Yrs 2942 124.9528 0
3 No >65Yrs 1701 133.2779 0
4 Yes <45Yrs 214 113.6860 0
5 Yes 45-65Yrs 742 128.0321 0
6 Yes >65Yrs 645 136.3572 0
原因是'sbp'中只有一个唯一值,按'糖尿病'和'age.range'分组]
library(dplyr)
pred %>%
group_by(diabetes, age.range) %>%
summarise(n = n_distinct(sbp))
# A tibble: 6 x 3
# Groups: diabetes [2]
# diabetes age.range n
# <fct> <fct> <int>
#1 No <45Yrs 1
#2 No 45-65Yrs 1
#3 No >65Yrs 1
#4 Yes <45Yrs 1
#5 Yes 45-65Yrs 1
#6 Yes >65Yrs 1
sd(c(15, 15, 15))
#[1] 0