我需要应用公式并使用这些新值创建一个新列(“R_CO_GC_D”)。
我需要按一列(“HOLIDAY”)过滤数据框,并按另外两列(“SEASON”和“TIME_SLOT”)分组以获得平均值,然后使用每个特定的过滤器从原始值中减去该平均值组(通过对两列进行分组而创建的组)。
我一直在尝试通过使用平均值创建数据框,然后使用管道内的平均值作为 mutate 内公式的一部分来实现此目的,但我在新列中得到了 NA。我想用一个循环来自动化一点,但我不确定在 R 中实现我想要的效果的最佳方法。
Season_time_slot_media <- lr2_analysis_CO_FIL %>%
filter(HOLIDAY == 'N') %>%
group_by(SEASON, TIME_SLOT) %>%
summarise(media = mean(R_CO_GC))
lr2_analysis_CO_FIL_DE <- lr2_analysis_CO_FIL %>%
mutate(R_CO_GC_D = case_when(HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "07-09"
~ R_CO_GC - ((Season_time_slot_media$media[1]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "10-12"
~ R_CO_GC - ((Season_time_slot_media$media[2]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "13-15"
~ R_CO_GC - ((Season_time_slot_media$media[3]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "16-17"
~ R_CO_GC - ((Season_time_slot_media$media[4]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "18-20"
~ R_CO_GC - ((Season_time_slot_media$media[5]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "21-06"
~ R_CO_GC - ((Season_time_slot_media$media[6]) - mean(R_CO_GC)))) %>%
mutate(R_CO_GC_D = case_when(HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "07-09"
~ R_CO_GC - ((Season_time_slot_media$media[7]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "10-12"
~ R_CO_GC - ((Season_time_slot_media$media[8]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "13-15"
~ R_CO_GC - ((Season_time_slot_media$media[9]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "16-17"
~ R_CO_GC - ((Season_time_slot_media$media[10]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "18-20"
~ R_CO_GC - ((Season_time_slot_media$media[11]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "21-06"
~ R_CO_GC - ((Season_time_slot_media$media[12]) - mean(R_CO_GC)))) %>%
mutate(R_CO_GC_D = case_when(HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "07-09"
~ R_CO_GC - ((Season_time_slot_media$media[13]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "10-12"
~ R_CO_GC - ((Season_time_slot_media$media[14]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "13-15"
~ R_CO_GC - ((Season_time_slot_media$media[15]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "16-17"
~ R_CO_GC - ((Season_time_slot_media$media[16]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "18-20"
~ R_CO_GC - ((Season_time_slot_media$media[17]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "21-06"
~ R_CO_GC - ((Season_time_slot_media$media[18]) - mean(R_CO_GC)))) %>%
mutate(R_CO_GC_D = case_when(HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "07-09"
~ R_CO_GC - ((Season_time_slot_media$media[19]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "10-12"
~ R_CO_GC - ((Season_time_slot_media$media[20]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "13-15"
~ R_CO_GC - ((Season_time_slot_media$media[21]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "16-17"
~ R_CO_GC - ((Season_time_slot_media$media[22]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "18-20"
~ R_CO_GC - ((Season_time_slot_media$media[23]) - mean(R_CO_GC)),
HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "21-06"
~ R_CO_GC - ((Season_time_slot_media$media[24]) - mean(R_CO_GC))))
我希望你能帮助我...抱歉,如果我没有很好地解释自己...英语不是我的母语。
合并在这里会有帮助......
首先是一些示例数据:
lr2_analysis_CO_FIL <- data.frame(HOLIDAY=rep("N",96), SEASON=c(rep("AUTUMN",24),rep("SPRING",24),rep("SUMMER",24),rep("WINTER",24)), TIME_SLOT=c(rep(c("07-09","07-09","10-12","10-12","13-15","13-15","16-17","16-17","18-20","18-20","21-06","21-06"),8)), R_CO_GC=1:96)
查找组意味着使用问题中的代码:
Season_time_slot_media <- lr2_analysis_CO_FIL %>%
filter(HOLIDAY == 'N') %>%
group_by(SEASON, TIME_SLOT) %>%
summarise(media = mean(R_CO_GC))
合并两个数据框
lr2.combined <- merge(lr2_analysis_CO_FIL, Season_time_slot_media, by=c("SEASON","TIME_SLOT"))
特定于组的平均值现在与 lr2_analysis_CO_FIL 中的每一行相结合:
> head(lr2.combined)
SEASON TIME_SLOT HOLIDAY R_CO_GC media
1 AUTUMN 07-09 N 1 7.5
2 AUTUMN 07-09 N 2 7.5
3 AUTUMN 07-09 N 13 7.5
4 AUTUMN 07-09 N 14 7.5
5 AUTUMN 10-12 N 3 9.5
6 AUTUMN 10-12 N 4 9.5
现在可以进行计算了。我已将
mean(R_CO_GC)
替换为 N.mean
但如果这不正确设置 N.mean = 0
或其他
# Determine the overall mean
N.mean <- mean(lr2_analysis_CO_FIL[which(lr2_analysis_CO_FIL$HOLIDAY == 'N'),]$R_CO_GC)
# Create the new column
lr2_analysis_CO_FIL_DE <- lr2.combined %>%
mutate(R_CO_GC_D = R_CO_GC - (media - N.mean))
如果需要,可以从新数据框中删除媒体列:
lr2_analysis_CO_FIL_DE <- lr2_analysis_CO_FIL_DE[,-5]