如何构建仅适用于第一个 left_join 无法匹配的记录的互补 left_join

问题描述 投票:0回答:1

我有 3 个数据框:

第一个:

s_name       <- c("john.lennon",  "paul.mccartney", "george.harrison", "ringo.starr",  "mick.jagger",  "keith.richards", "charlie.watts", "ron.wood")
s_ip_address <- c("192.9.208.161","170.70.24.32",   "180.169.22.12",   "170.70.68.56", "192.9.208.14", "10.10.10.5",     "22.250.32.14",  "22.24.9.3")
n_port       <- c(22,    21,   80,       123,    22,     8080,   8088,    411)
s_protocol   <- c("tcp", "tcp","tcp",    "udp",  "tcp",  "tcp",  "tcp",  "tcp")
n_severity   <- c(4,2,5,1, 3,1,2,2)
df_beats <- data.frame(s_name,s_ip_address,n_port,s_protocol,n_severity)

> df_beats
           s_name  s_ip_address n_port s_protocol n_severity
1     john.lennon 192.9.208.161     22        tcp          4
2  paul.mccartney  170.70.24.32     21        tcp          2
3 george.harrison 180.169.22.12     80        tcp          5
4     ringo.starr  170.70.68.56    123        udp          1
5     mick.jagger  192.9.208.14     22        tcp          3
6  keith.richards    10.10.10.5   8080        tcp          1
7   charlie.watts  22.250.32.14   8088        tcp          2
8        ron.wood     22.24.9.3    411        tcp          2

第二个:

s_asset_tag     <- c("CMDB1009","CMDB0618","CMDB0225","CMDB0707","CMDB0919","CMDB0103")
s_name1          <- c("john.lennon","paul.mccartney","george.harrison","ringo.starr","brian.epstein","george.martin")
s_used_for      <- c("Production","Development","Certification","Pre-Production","Production","Development")
s_os_model      <- c("Windows Server 2012","Windows Server 2019 Datacenter","Windows Server 2008","Windows Server 2016","Windows Server 2012","Windows Server 2012")
df_cmdb1 <- data.frame(s_asset_tag,s_name1,s_used_for,s_os_model)

> df_cmdb1
  s_asset_tag         s_name1     s_used_for                     s_os_model
1    CMDB1009     john.lennon     Production            Windows Server 2012
2    CMDB0618  paul.mccartney    Development Windows Server 2019 Datacenter
3    CMDB0225 george.harrison  Certification            Windows Server 2008
4    CMDB0707     ringo.starr Pre-Production            Windows Server 2016
5    CMDB0919   brian.epstein     Production            Windows Server 2012
6    CMDB0103   george.martin    Development            Windows Server 2012
>

第三个:

s_asset_tag     <- c("CMDB0726","CMDB1218","CMDB0602","CMDB0601","CMDB1024","CMDB0228")
s_ip_address2    <- c("192.9.208.14","10.10.10.5",    "22.250.32.14",  "22.24.9.3", "180.169.22.8",  "180.181.21.25")
s_used_for      <- c("Production","Test","Production","Pre-Production","Contingency","Production")
s_os_model      <- c("Red Hat Linux","Red Hat Linux","Oracle Solaris","Qualys Appliance","IBM AIX","VMWare vRealize")
df_cmdb2 <- data.frame(s_asset_tag,s_ip_address2,s_used_for,s_os_model)

> df_cmdb2
  s_asset_tag s_ip_address2     s_used_for       s_os_model
1    CMDB0726  192.9.208.14     Production    Red Hat Linux
2    CMDB1218    10.10.10.5           Test    Red Hat Linux
3    CMDB0602  22.250.32.14     Production   Oracle Solaris
4    CMDB0601     22.24.9.3 Pre-Production Qualys Appliance
5    CMDB1024  180.169.22.8    Contingency          IBM AIX
6    CMDB0228 180.181.21.25     Production  VMWare vRealize
>

我可以使用 df_beats 和 df_cmdb1 成功构建 left_join,由 s_name/s_name1 链接:

library(dplyr)
left1<-left_join(df_beats,df_cmdb1,by=c("s_name"="s_name1"))   ### _my_first_instruction_

> left1
           s_name  s_ip_address n_port s_protocol n_severity s_asset_tag     s_used_for                     s_os_model
1     john.lennon 192.9.208.161     22        tcp          4    CMDB1009     Production            Windows Server 2012
2  paul.mccartney  170.70.24.32     21        tcp          2    CMDB0618    Development Windows Server 2019 Datacenter
3 george.harrison 180.169.22.12     80        tcp          5    CMDB0225  Certification            Windows Server 2008
4     ringo.starr  170.70.68.56    123        udp          1    CMDB0707 Pre-Production            Windows Server 2016
5     mick.jagger  192.9.208.14     22        tcp          3        <NA>           <NA>                           <NA>
6  keith.richards    10.10.10.5   8080        tcp          1        <NA>           <NA>                           <NA>
7   charlie.watts  22.250.32.14   8088        tcp          2        <NA>           <NA>                           <NA>
8        ron.wood     22.24.9.3    411        tcp          2        <NA>           <NA>                           <NA>
>

但我不知道如何构建 my_second_instruction 使用数据帧 df_cmdb2 完成其余行(行:5、6、7 和 8) 并通过 s_ip_address/s_ip_address2 字段链接:

left1 <- _my_second_instruction_

得到以下输出: 请注意,我们不得修改已使用 my_first_instruction 连接的行(行:1,2,3,4):

> left1
           s_name  s_ip_address n_port s_protocol n_severity s_asset_tag     s_used_for                     s_os_model
1     john.lennon 192.9.208.161     22        tcp          4    CMDB1009     Production            Windows Server 2012
2  paul.mccartney  170.70.24.32     21        tcp          2    CMDB0618    Development Windows Server 2019 Datacenter
3 george.harrison 180.169.22.12     80        tcp          5    CMDB0225  Certification            Windows Server 2008
4     ringo.starr  170.70.68.56    123        udp          1    CMDB0707 Pre-Production            Windows Server 2016
5     mick.jagger  192.9.208.14     22        tcp          3    CMDB0726     Production                  Red Hat Linux
6  keith.richards    10.10.10.5   8080        tcp          1    CMDB1218           Test                  Red Hat Linux
7   charlie.watts  22.250.32.14   8088        tcp          2    CMDB0602     Production                 Oracle Solaris
8        ron.wood     22.24.9.3    411        tcp          2    CMDB0601 Pre-Production               Qualys Appliance
>

提前致谢。 我是 R 语言初学者。

r
1个回答
0
投票

你可以再次 left_join,然后使用

coalesce
:

left_join(left1, df_cmdb2, by=c("s_ip_address"="s_ip_address2")) |>
  mutate(s_asset_tag=coalesce(s_asset_tag.x, s_asset_tag.y),
         s_used_for=coalesce(s_used_for.x, s_used_for.y),
         s_os_model=coalesce(s_os_model.x, s_os_model.y)) |>
  select(-ends_with(c(".x", ".y")))

           s_name  s_ip_address n_port s_protocol n_severity s_asset_tag     s_used_for                     s_os_model
1     john.lennon 192.9.208.161     22        tcp          4    CMDB1009     Production            Windows Server 2012
2  paul.mccartney  170.70.24.32     21        tcp          2    CMDB0618    Development Windows Server 2019 Datacenter
3 george.harrison 180.169.22.12     80        tcp          5    CMDB0225  Certification            Windows Server 2008
4     ringo.starr  170.70.68.56    123        udp          1    CMDB0707 Pre-Production            Windows Server 2016
5     mick.jagger  192.9.208.14     22        tcp          3    CMDB0726     Production                  Red Hat Linux
6  keith.richards    10.10.10.5   8080        tcp          1    CMDB1218           Test                  Red Hat Linux
7   charlie.watts  22.250.32.14   8088        tcp          2    CMDB0602     Production                 Oracle Solaris
8        ron.wood     22.24.9.3    411        tcp          2    CMDB0601 Pre-Production               Qualys Appliance
© www.soinside.com 2019 - 2024. All rights reserved.