通过应用公式将值添加到新列,同时考虑过滤器和每行所属的组

问题描述 投票:0回答:1

我需要应用公式并使用这些新值创建一个新列(“R_CO_GC_D”)。

我需要按一列(“HOLIDAY”)过滤数据框,并按另外两列(“SEASON”和“TIME_SLOT”)分组以获得平均值,然后使用每个特定的过滤器从原始值中减去该平均值组(通过对两列进行分组而创建的组)。

我一直在尝试通过使用平均值创建数据框,然后使用管道内的平均值作为 mutate 内公式的一部分来实现此目的,但我在新列中得到了 NA。我想用一个循环来自动化一点,但我不确定在 R 中实现我想要的效果的最佳方法。

Season_time_slot_media <- lr2_analysis_CO_FIL %>%
  filter(HOLIDAY == 'N') %>%
  group_by(SEASON, TIME_SLOT) %>%
  summarise(media = mean(R_CO_GC))

lr2_analysis_CO_FIL_DE <- lr2_analysis_CO_FIL %>%
  mutate(R_CO_GC_D = case_when(HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "07-09" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[1]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "10-12" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[2]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "13-15" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[3]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "16-17" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[4]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "18-20" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[5]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "AUTUMN" & TIME_SLOT == "21-06" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[6]) - mean(R_CO_GC)))) %>%
  mutate(R_CO_GC_D = case_when(HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "07-09" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[7]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "10-12" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[8]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "13-15" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[9]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "16-17" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[10]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "18-20" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[11]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "SPRING" & TIME_SLOT == "21-06" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[12]) - mean(R_CO_GC)))) %>%
  mutate(R_CO_GC_D = case_when(HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "07-09" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[13]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "10-12" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[14]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "13-15" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[15]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "16-17" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[16]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "18-20" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[17]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "SUMMER" & TIME_SLOT == "21-06" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[18]) - mean(R_CO_GC)))) %>%
  mutate(R_CO_GC_D = case_when(HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "07-09" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[19]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "10-12" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[20]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "13-15" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[21]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "16-17" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[22]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "18-20" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[23]) - mean(R_CO_GC)), 
                               HOLIDAY == 'N' & SEASON == "WINTER" & TIME_SLOT == "21-06" 
                               ~ R_CO_GC - ((Season_time_slot_media$media[24]) - mean(R_CO_GC))))

我希望你能帮助我...抱歉,如果我没有很好地解释自己...英语不是我的母语。

r group-by mutate
1个回答
0
投票

合并在这里会有帮助......

首先是一些示例数据:

lr2_analysis_CO_FIL <- data.frame(HOLIDAY=rep("N",96), SEASON=c(rep("AUTUMN",24),rep("SPRING",24),rep("SUMMER",24),rep("WINTER",24)), TIME_SLOT=c(rep(c("07-09","07-09","10-12","10-12","13-15","13-15","16-17","16-17","18-20","18-20","21-06","21-06"),8)), R_CO_GC=1:96)

查找组意味着使用问题中的代码:

Season_time_slot_media <- lr2_analysis_CO_FIL %>%
  filter(HOLIDAY == 'N') %>%
  group_by(SEASON, TIME_SLOT) %>%
  summarise(media = mean(R_CO_GC))

合并两个数据框

lr2.combined <- merge(lr2_analysis_CO_FIL, Season_time_slot_media, by=c("SEASON","TIME_SLOT"))

特定于组的平均值现在与 lr2_analysis_CO_FIL 中的每一行相结合:

> head(lr2.combined)
  SEASON TIME_SLOT HOLIDAY R_CO_GC media
1 AUTUMN     07-09       N       1   7.5
2 AUTUMN     07-09       N       2   7.5
3 AUTUMN     07-09       N      13   7.5
4 AUTUMN     07-09       N      14   7.5
5 AUTUMN     10-12       N       3   9.5
6 AUTUMN     10-12       N       4   9.5

现在可以进行计算了。我已将

mean(R_CO_GC)
替换为
N.mean
但如果这不正确设置
N.mean = 0
或其他

# Determine the overall mean 
N.mean <- mean(lr2_analysis_CO_FIL[which(lr2_analysis_CO_FIL$HOLIDAY == 'N'),]$R_CO_GC)

# Create the new column
lr2_analysis_CO_FIL_DE <- lr2.combined %>%
  mutate(R_CO_GC_D = R_CO_GC - (media - N.mean)) 

如果需要,可以从新数据框中删除媒体列:

lr2_analysis_CO_FIL_DE <- lr2_analysis_CO_FIL_DE[,-5]
© www.soinside.com 2019 - 2024. All rights reserved.