如何使用 tidyr R 包中的填充功能和组

问题描述 投票:0回答:1

这个问题是我的其他问题的后续问题,如何在填充数据行时获得整洁的填充函数以使用组

我想为每个 TIMESTAMP 值获取一行。我想按列填充真实数据,无论是字符、浮点还是其他。如果两个 TIMESTAMP 行都有 NA,那么我需要返回 NA。我想要返回所有列。

我的数据的一小部分:

dupped_records <- structure(list(Station_Number = c("3999", "3999", "3999", "3999"
    ), Station_Name = c("Eagle 3NW Beta", "Eagle 3NW Beta", "Eagle 3NW Beta","Eagle 3NW 
    Beta"), NWSLI = c("EEMN1", "EEMN1", "EEMN1", "EEMN1"), Station_Legacy_Name = c("Rogers 
    Farm Tower", "Rogers Farm Tower", "Rogers Farm Tower", "Rogers Farm Tower"), Lat = 
    c(40.846994, 40.846994, 40.846994, 40.846994), Lon = c(-96.467199, -96.467199, 
    -96.467199, -96.467199), Elev_m = c("368.7", "368.7", "368.7", "368.7"), TIMESTAMP = 
    c("2021-07-28 20:05:00", "2021-07-28 20:05:00", "2021-07-28 20:06:00", "2021-07-28 
    20:06:00"), RECORD = c(18L, 18L, 19L, 19L), Ta_2m_Avg = c(34.3, NA, 34.26, NA), TaMax_2m = c(34.39, 
NA, 34.35, NA), TaMaxTime_2m = c("2021-07-28 20:04:15", NA, "2021-07-28 20:05:05", 
NA), TaMin_2m = c(34.25, NA, 34.16, NA), TaMinTime_2m = c("2021-07-28 20:04:45", 
NA, "2021-07-28 20:05:50", NA), RH_2m_Avg = c(57.35, NA, 56.97, 
NA), RHMax_2m = c(58.68, NA, 57.99, NA), RHMaxTime_2m = c("2021-07-28 20:04:30", 
NA, "2021-07-28 20:05:40", NA), RHMin_2m = c(55.86, NA, 55.41, 
NA), RHMinTime_2m = c("2021-07-28 20:04:45", NA, "2021-07-28 20:05:20", 
NA), Dp_2m_Avg = c(24.68, NA, 24.53, NA), DpMax_2m = c(25.06, 
NA, 24.77, NA), DpMaxTime_2m = c("2021-07-28 20:04:25", NA, "2021-07-28 20:05:40", 
NA), DpMin_2m = c(24.19, NA, 24.06, NA), DpMinTime_2m = c("2021-07-28 20:04:45", 
NA, "2021-07-28 20:05:20", NA), HeatIndex_2m_Avg = c(41.9, NA, 
41.64, NA), HeatIndexMax_2m = c(42.51, NA, 41.95, NA), HeatIndexMaxTime_2m = c("2021-07-28 20:04:25", 
NA, "2021-07-28 20:05:45", NA), WindChill_2m_Avg = c(37.65, NA, 
37.77, NA), WindChillMin_2m = c(37.47, NA, 37.56, NA), WindChillMinTime_2m = c("2021-07-28 20:04:45", 
NA, "2021-07-28 20:05:20", NA), WndAveSpd_3m = c(2.746, NA, 3.796, 
NA), WndVecMagAve_3m = c(2.594, NA, 3.607, NA), WndAveDir_3m = c(166.5, 
NA, 152.6, NA), WndAveDirSD_3m = c(19.03, NA, 18.05, NA), WndMaxSpd5s_3m = c(5.165, 
NA, 5.498, NA), WndMaxSpd5sTime_3m = c("2021-07-28 20:05:00", 
NA, "2021-07-28 20:05:05", NA), WndMax_5sec_Dir_3m = c(158.3, 
NA, 162.8, NA), PresAvg_1pnt5m = c(972.3953, NA, 972.3508, NA
), PresMax_1pnt5m = c(972.4536, NA, 972.42, NA), PresMaxTime_1pnt5m = c("2021-07-28 20:04:25", 
NA, "2021-07-28 20:05:10", NA), PresMin_1pnt5m = c(972.3868, 
NA, 972.2872, NA), PresMinTime_1pnt5m = c("2021-07-28 20:04:35", 
NA, "2021-07-28 20:05:50", NA), Solar_2m_Avg = c(771.1, NA, 776.1, 
NA), Rain_1m_Tot = c(0, NA, 0, NA), Ts_bare_10cm_Avg = c(32.98, 
NA, 33.01, NA), TsMax_bare_10cm = c(33, NA, 33.02, NA), TsMaxTime_bare_10cm = c("2021-07-28 20:05:00", 
NA, "2021-07-28 20:06:00", NA), TsMin_bare_10cm = c(32.97, NA, 
33, NA), TsMinTime_bare_10cm = c("2021-07-28 20:04:10", NA, "2021-07-28 20:05:05", 
NA), BattVolts_Min = c(12.77, NA, 12.77, NA), LithBatt_Min = c(3.56, 
NA, 3.56, NA), MaintMode = c(0L, NA, 0L, NA), WS_ms_S_WVT = c(NA, 
3.082, NA, 3.614), SlrW_Avg = c(NA, 854, NA, 859), SlrW_Max = c(NA, 
858, NA, 862), SlrW_TMx = c(NA, "2021-07-28 20:04:05", NA, "2021-07-28 20:06:00"
), SlrW = c(NA, 856, NA, 862), SlrW_Std = c(NA, 2.334, NA, 2.376
), WS_Dir = c(NA, 164L, NA, 128L), WS_ms_Avg = c(NA, 3.082, NA, 
3.614), WS_ms_Max = c(NA, 5.17, NA, 5.71), WS_ms_TMx = c(NA, 
"2021-07-28 20:05:00", NA, "2021-07-28 20:05:05"), WS_Diag = c(NA, 
0L, NA, 0L), SmplsF = c(NA, 1L, NA, 1L), Diag1F = c(NA, 0L, NA, 
0L), Diag2F = c(NA, 0L, NA, 0L), Diag4F = c(NA, 0L, NA, 0L), 
    Diag8F = c(NA, 0L, NA, 0L), Diag9F = c(NA, 0L, NA, 0L), Diag10F = c(NA, 
    0L, NA, 0L), NNDF = c(NA, 0L, NA, 0L), WindDir_D1_WVT = c(NA, 
    170.6, NA, 150.3), WindDir_SD1_WVT = c(NA, 16.43, NA, 14.95
    ), source = c("E:\\Data_Collected_withDups/3999/3999_1min.csv-20230216_0122", 
    "E:\\Data_Collected_withDups/3999/3999_1min_exp.csv-20230216_0122", 
    "E:\\Data_Collected_withDups/3999/3999_1min.csv-20230216_0122", 
    "E:\\Data_Collected_withDups/3999/3999_1min_exp.csv-20230216_0122"
    )), class = "data.frame", row.names = 1886274:1886277)

我想要的结果是:

no_dupped_records <- structure(list(Station_Number = c(3999L, 3999L), Station_Name = c("Eagle 3NW Beta", 
"Eagle 3NW Beta"), NWSLI = c("EEMN1", "EEMN1"), Station_Legacy_Name = c("Rogers Farm Tower", 
"Rogers Farm Tower"), Lat = c(40.846994, 40.846994), Lon = c(-96.467199, 
-96.467199), Elev_m = c(368.7, 368.7), TIMESTAMP = c("2021-07-28 20:05:00", 
"2021-07-28 20:06:00"), RECORD = 18:19, Ta_2m_Avg = c(34.3, 34.26
), TaMax_2m = c(34.39, 34.35), TaMaxTime_2m = c("2021-07-28 20:04:15", 
"2021-07-28 20:05:05"), TaMin_2m = c(34.25, 34.16), TaMinTime_2m = c("2021-07-28 20:04:45", 
"2021-07-28 20:05:50"), RH_2m_Avg = c(57.35, 56.97), RHMax_2m = c(58.68, 
57.99), RHMaxTime_2m = c("2021-07-28 20:04:30", "2021-07-28 20:05:40"
), RHMin_2m = c(55.86, 55.41), RHMinTime_2m = c("2021-07-28 20:04:45", 
"2021-07-28 20:05:20"), Dp_2m_Avg = c(24.68, 24.53), DpMax_2m = c(25.06, 
24.77), DpMaxTime_2m = c("2021-07-28 20:04:25", "2021-07-28 20:05:40"
), DpMin_2m = c(24.19, 24.06), DpMinTime_2m = c("2021-07-28 20:04:45", 
"2021-07-28 20:05:20"), HeatIndex_2m_Avg = c(41.9, 41.64), HeatIndexMax_2m = c(42.51, 
41.95), HeatIndexMaxTime_2m = c("2021-07-28 20:04:25", "2021-07-28 20:05:45"
), WindChill_2m_Avg = c(37.65, 37.77), WindChillMin_2m = c(37.47, 
37.56), WindChillMinTime_2m = c("2021-07-28 20:04:45", "2021-07-28 20:05:20"
), WndAveSpd_3m = c(2.746, 3.796), WndVecMagAve_3m = c(2.594, 
3.607), WndAveDir_3m = c(166.5, 152.6), WndAveDirSD_3m = c(19.03, 
18.05), WndMaxSpd5s_3m = c(5.165, 5.498), WndMaxSpd5sTime_3m = c("2021-07-28 20:05:00", 
"2021-07-28 20:05:05"), WndMax_5sec_Dir_3m = c(158.3, 162.8), 
    PresAvg_1pnt5m = c(972.3953, 972.3508), PresMax_1pnt5m = c(972.4536, 
    972.42), PresMaxTime_1pnt5m = c("2021-07-28 20:04:25", "2021-07-28 20:05:10"
    ), PresMin_1pnt5m = c(972.3868, 972.2872), PresMinTime_1pnt5m = c("2021-07-28 20:04:35", 
    "2021-07-28 20:05:50"), Solar_2m_Avg = c(771.1, 776.1), Rain_1m_Tot = c(0L, 
    0L), Ts_bare_10cm_Avg = c(32.98, 33.01), TsMax_bare_10cm = c(33, 
    33.02), TsMaxTime_bare_10cm = c("2021-07-28 20:05:00", "2021-07-28 20:06:00"
    ), TsMin_bare_10cm = c(32.97, 33), TsMinTime_bare_10cm = c("2021-07-28 20:04:10", 
    "2021-07-28 20:05:05"), BattVolts_Min = c(12.77, 12.77), 
    LithBatt_Min = c(3.56, 3.56), MaintMode = c(0L, 0L), WS_ms_S_WVT = c(3.082, 
    3.614), SlrW_Avg = c(854L, 859L), SlrW_Max = c(858L, 862L
    ), SlrW_TMx = c("2021-07-28 20:04:05", "2021-07-28 20:06:00"
    ), SlrW = c(856L, 862L), SlrW_Std = c(2.334, 2.376), WS_Dir = c(164L, 
    128L), WS_ms_Avg = c(3.082, 3.614), WS_ms_Max = c(5.17, 5.71
    ), WS_ms_TMx = c("2021-07-28 20:05:00", "2021-07-28 20:05:05"
    ), WS_Diag = c(0L, 0L), SmplsF = c(1L, 1L), Diag1F = c(0L, 
    0L), Diag2F = c(0L, 0L), Diag4F = c(0L, 0L), Diag8F = c(0L, 
    0L), Diag9F = c(0L, 0L), Diag10F = c(0L, 0L), NNDF = c(0L, 
    0L), WindDir_D1_WVT = c(170.6, 150.3), WindDir_SD1_WVT = c(16.43, 
    14.95), source = c("E:\\Data_Collected_withDups/3999/3999_1min.csv-20230216_0122", 
    "E:\\Data_Collected_withDups/3999/3999_1min.csv-20230216_0122"
    )), class = "data.frame", row.names = c(NA, -2L))

根据我上一个问题的答案,我似乎需要以某种方式使用 mutate 来获得我想要的东西。但如何呢?我需要返回所有列。

以下是我认为可能有效的两种说法。我收到错误消息,所有内容都必须在选择函数中使用。

dupped_records |> group_by(TIMESTAMP) |> mutate_all(fill(everything(), .direction = 'up'))

这个声明没有任何作用。为什么?

dupped_records |> group_by(TIMESTAMP) |> mutate(across(fill(everything(), .direction = 'up')))

无论如何,我一如既往地感谢您的帮助。快速编辑:有没有办法用 dplyr::coalesce 来做到这一点?

r dplyr tidyr fill coalesce
1个回答
0
投票
library(tidyverse)

dupped_records |>
  mutate(TIMESTAMP = ymd_hms(TIMESTAMP)) |>
  summarise(across(Station_Number:RECORD, first), across(Ta_2m_Avg:WindDir_SD1_WVT, na.omit), .by = "TIMESTAMP")
            TIMESTAMP Station_Number   Station_Name NWSLI
1 2021-07-28 20:05:00           3999 Eagle 3NW Beta EEMN1
2 2021-07-28 20:06:00           3999 Eagle 3NW Beta EEMN1
      Station_Legacy_Name      Lat      Lon Elev_m RECORD Ta_2m_Avg TaMax_2m
1 Rogers \n    Farm Tower 40.84699 -96.4672  368.7     18     34.30    34.39
2       Rogers Farm Tower 40.84699 -96.4672  368.7     19     34.26    34.35
         TaMaxTime_2m TaMin_2m        TaMinTime_2m RH_2m_Avg RHMax_2m
1 2021-07-28 20:04:15    34.25 2021-07-28 20:04:45     57.35    58.68
2 2021-07-28 20:05:05    34.16 2021-07-28 20:05:50     56.97    57.99
         RHMaxTime_2m RHMin_2m        RHMinTime_2m Dp_2m_Avg DpMax_2m
1 2021-07-28 20:04:30    55.86 2021-07-28 20:04:45     24.68    25.06
2 2021-07-28 20:05:40    55.41 2021-07-28 20:05:20     24.53    24.77
         DpMaxTime_2m DpMin_2m        DpMinTime_2m HeatIndex_2m_Avg
1 2021-07-28 20:04:25    24.19 2021-07-28 20:04:45            41.90
2 2021-07-28 20:05:40    24.06 2021-07-28 20:05:20            41.64
  HeatIndexMax_2m HeatIndexMaxTime_2m WindChill_2m_Avg WindChillMin_2m
1           42.51 2021-07-28 20:04:25            37.65           37.47
2           41.95 2021-07-28 20:05:45            37.77           37.56
  WindChillMinTime_2m WndAveSpd_3m WndVecMagAve_3m WndAveDir_3m WndAveDirSD_3m
1 2021-07-28 20:04:45        2.746           2.594        166.5          19.03
2 2021-07-28 20:05:20        3.796           3.607        152.6          18.05
  WndMaxSpd5s_3m  WndMaxSpd5sTime_3m WndMax_5sec_Dir_3m PresAvg_1pnt5m
1          5.165 2021-07-28 20:05:00              158.3       972.3953
2          5.498 2021-07-28 20:05:05              162.8       972.3508
  PresMax_1pnt5m  PresMaxTime_1pnt5m PresMin_1pnt5m  PresMinTime_1pnt5m
1       972.4536 2021-07-28 20:04:25       972.3868 2021-07-28 20:04:35
2       972.4200 2021-07-28 20:05:10       972.2872 2021-07-28 20:05:50
  Solar_2m_Avg Rain_1m_Tot Ts_bare_10cm_Avg TsMax_bare_10cm TsMaxTime_bare_10cm
1        771.1           0            32.98           33.00 2021-07-28 20:05:00
2        776.1           0            33.01           33.02 2021-07-28 20:06:00
  TsMin_bare_10cm TsMinTime_bare_10cm BattVolts_Min LithBatt_Min MaintMode
1           32.97 2021-07-28 20:04:10         12.77         3.56         0
2           33.00 2021-07-28 20:05:05         12.77         3.56         0
  WS_ms_S_WVT SlrW_Avg SlrW_Max            SlrW_TMx SlrW SlrW_Std WS_Dir
1       3.082      854      858 2021-07-28 20:04:05  856    2.334    164
2       3.614      859      862 2021-07-28 20:06:00  862    2.376    128
  WS_ms_Avg WS_ms_Max           WS_ms_TMx WS_Diag SmplsF Diag1F Diag2F Diag4F
1     3.082      5.17 2021-07-28 20:05:00       0      1      0      0      0
2     3.614      5.71 2021-07-28 20:05:05       0      1      0      0      0
  Diag8F Diag9F Diag10F NNDF WindDir_D1_WVT WindDir_SD1_WVT
1      0      0       0    0          170.6           16.43
2      0      0       0    0          150.3           14.95
© www.soinside.com 2019 - 2024. All rights reserved.