How to add date column to a dataframe based on a common column in r

问题描述 投票:0回答:1

我有两个数据框:

dput(head(catch,70)) 
structure(list(fldFinalLength = c(NA, 260L, NA, NA, 510L, 650L, 
280L, 550L, NA, 550L, 320L, NA, NA, NA, 410L, 260L, NA, NA, NA, 
330L, 670L, 890L, NA, NA, 1690L, NA, 240L, 470L, 420L, NA, 700L, 
NA, 420L, NA, NA, 260L, 300L, 860L, 370L, NA, NA, 510L, 240L, 
930L, NA, 730L, 730L, NA, 270L, NA, 780L, NA, 280L, 590L, 290L, 
NA, NA, 410L, 350L, NA, NA, 1350L, 560L, 460L, 650L, 430L, NA, 
NA, 560L, 360L), fldInitialLength = c(NA, 220L, NA, NA, 160L, 
650L, 280L, 550L, NA, 290L, 320L, NA, NA, NA, 240L, 160L, NA, 
NA, NA, 330L, 340L, 520L, NA, NA, 830L, NA, 240L, 230L, 230L, 
NA, 340L, NA, 310L, NA, NA, 260L, 300L, 860L, 370L, NA, NA, 210L, 
220L, 730L, NA, 730L, 730L, NA, 270L, NA, 170L, NA, 280L, 190L, 
210L, NA, NA, 390L, 300L, NA, NA, 1350L, 220L, 250L, 380L, 430L, 
NA, NA, 170L, 360L), fldCatchWeight = c(0.73, 0.672, 61.3, 0.34, 
1.534, 0.934, 0.244, 2.55, 0.136, 13.784, 0.182, 0.27, 1.538, 
0.116, 5.94, 0.284, 0.06, 1.064, 1.2, 0.342, 13.3, 167.34, 0.834, 
23.08, 16.59, 0.286, 0.072, 1.24, 0.44, 0.052, 14.66, 6.36, 1.96, 
7.48, 1.7, 0.092, 0.146, 3.73, 0.82, 9.54, 0.846, 3.492, 0.248, 
16.13, 0.17, 1.87, 2.31, 0.084, 0.082, 0.344, 29.08, 3.248, 0.148, 
4.916, 0.871, 0.27, 0.404, 1.028, 0.598, 10.27, 0.444, 7.82, 
4.84, 3.712, 2.09, 0.454, 0.13, 4.93, 1.584, 0.568), fldMeasuringInterval = c(NA, 
10L, NA, NA, 10L, 10L, 10L, 10L, NA, 10L, 10L, NA, NA, NA, 10L, 
10L, NA, NA, NA, 10L, 10L, 10L, NA, NA, 10L, NA, 10L, 10L, 10L, 
NA, 10L, NA, 10L, NA, NA, 10L, 10L, 10L, 10L, NA, NA, 10L, 10L, 
10L, NA, 10L, 10L, NA, 10L, NA, 10L, NA, 10L, 10L, 10L, NA, NA, 
10L, 10L, NA, NA, 10L, 10L, 10L, 10L, 10L, NA, NA, 10L, 10L), 
    fldMeasuringOperator = c(NA, "DFE", NA, NA, "DFE", "MSL", 
    "MSL", "MSL", NA, "MSL", "MSL", NA, NA, NA, "MSL", "DFE", 
    NA, NA, NA, "DFE", "DFE", "DFE", NA, NA, "DFE", NA, "DFE", 
    "DFE", "DFE", NA, "MSL", NA, "MSL", NA, NA, "MSL", "MSL", 
    "MSL", "MSL", NA, NA, "MSL", "MSL", "MSL", NA, "MSL", "MSL", 
    NA, "MSL", NA, "MSL", NA, "MSL", "DFE", "DFE", NA, NA, "DFE", 
    "DFE", NA, NA, "MSL", "MSL", "MSL", "MSL", "MSL", NA, NA, 
    "MSL", "MSL"), fldCruiseStationNumber = c(59L, 59L, 59L, 
    59L, 59L, 60L, 60L, 60L, 60L, 60L, 60L, 61L, 61L, 61L, 61L, 
    62L, 62L, 62L, 62L, 62L, 63L, 63L, 63L, 64L, 64L, 64L, 64L, 
    64L, 64L, 65L, 65L, 65L, 65L, 66L, 66L, 66L, 66L, 66L, 66L, 
    66L, 66L, 66L, 66L, 66L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 
    67L, 67L, 70L, 70L, 70L, 70L, 70L, 70L, 71L, 71L, 71L, 71L, 
    71L, 71L, 71L, 71L, 71L, 71L, 71L), fldMainSpeciesCode = c("BEN", 
    "BIB", "BOF", "LSS", "WAF", "SMH", "TBR", "WAF", "WHB", "WHG", 
    "WIT", "BEN", "BOF", "CDT", "LEM", "MEG", "NEP", "NEP", "PLA", 
    "SOL", "DGS", "DGS", "GFB", "BOF", "COE", "CTC", "CUR", "CUR", 
    "DGS", "NEP", "WAF", "WHB", "WIT", "BEN", "BOF", "CUR", "CUR", 
    "DFL", "JOD", "LSS", "MAC", "MEG", "MEG", "SMH", "PLA", "RDS", 
    "RNS", "SDF", "SHR", "TUB", "WAF", "WHB", "WIT", "HKE", "LBI", 
    "LSS", "WHB", "WHG", "WIT", "BEN", "BOF", "COE", "CUR", "CUR", 
    "DFL", "DFL", "EDC", "GUG", "HKE", "LEM"), fldMainSpeciesCode.1 = c("BEN", 
    "BIB", "BOF", "LSS", "WAF", "SMH", "TBR", "WAF", "WHB", "WHG", 
    "WIT", "BEN", "BOF", "CDT", "LEM", "MEG", "NEP", "NEP", "PLA", 
    "SOL", "DGS", "DGS", "GFB", "BOF", "COE", "CTC", "CUR", "CUR", 
    "DGS", "NEP", "WAF", "WHB", "WIT", "BEN", "BOF", "CUR", "CUR", 
    "DFL", "JOD", "LSS", "MAC", "MEG", "MEG", "SMH", "PLA", "RDS", 
    "RNS", "SDF", "SHR", "TUB", "WAF", "WHB", "WIT", "HKE", "LBI", 
    "LSS", "WHB", "WHG", "WIT", "BEN", "BOF", "COE", "CUR", "CUR", 
    "DFL", "DFL", "EDC", "GUG", "HKE", "LEM")), row.names = c(NA, 
70L), class = "data.frame")
dput(head(days_IAMS2023,13)) 
structure(list(fldDateTimeShot = c("24 February 2023", "24 February 2023", 
"24 February 2023", "24 February 2023", "24 February 2023", "25 February 2023", 
"25 February 2023", "25 February 2023", "25 February 2023", "25 February 2023", 
"26 February 2023", "26 February 2023", "27 February 2023"), 
    fldCruiseStationNumber = 59:71), row.names = c(NA, 13L), class = "data.frame")

两者都有 fldCruiseStationNumber 列。

我想让 fldDateTimeShot 使用数据框 catch 在数据框

days_IAMS2023
的所有
fldCruiseStationNumber
中重复。我的困难在于它们的行数不同。

非常感谢任何帮助。

r dataframe date multiple-columns
1个回答
0
投票

做一个 left_join:

library(dplyr)

updated_df <-
    left_join(
        catch,
        days_IAMS2023,
        by = c("fldCruiseStationNumber")
    )

这是生成的 data.frame 的示例:

> updated_df[ c(10,30,50,70), ]
   fldFinalLength fldInitialLength fldCatchWeight fldMeasuringInterval fldMeasuringOperator fldCruiseStationNumber fldMainSpeciesCode fldMainSpeciesCode.1  fldDateTimeShot
10            550              290         13.784                   10                  MSL                     60                WHG                  WHG 24 February 2023
30             NA               NA          0.052                   NA                 <NA>                     65                NEP                  NEP 25 February 2023
50             NA               NA          0.344                   NA                 <NA>                     67                TUB                  TUB 25 February 2023
70            360              360          0.568                   10                  MSL                     71                LEM                  LEM 27 February 2023
© www.soinside.com 2019 - 2024. All rights reserved.