我有 3 个数据框:
第一个:
s_name <- c("john.lennon", "paul.mccartney", "george.harrison", "ringo.starr", "mick.jagger", "keith.richards", "charlie.watts", "ron.wood")
s_ip_address <- c("192.9.208.161","170.70.24.32", "180.169.22.12", "170.70.68.56", "192.9.208.14", "10.10.10.5", "22.250.32.14", "22.24.9.3")
n_port <- c(22, 21, 80, 123, 22, 8080, 8088, 411)
s_protocol <- c("tcp", "tcp","tcp", "udp", "tcp", "tcp", "tcp", "tcp")
n_severity <- c(4,2,5,1, 3,1,2,2)
df_beats <- data.frame(s_name,s_ip_address,n_port,s_protocol,n_severity)
> df_beats
s_name s_ip_address n_port s_protocol n_severity
1 john.lennon 192.9.208.161 22 tcp 4
2 paul.mccartney 170.70.24.32 21 tcp 2
3 george.harrison 180.169.22.12 80 tcp 5
4 ringo.starr 170.70.68.56 123 udp 1
5 mick.jagger 192.9.208.14 22 tcp 3
6 keith.richards 10.10.10.5 8080 tcp 1
7 charlie.watts 22.250.32.14 8088 tcp 2
8 ron.wood 22.24.9.3 411 tcp 2
第二个:
s_asset_tag <- c("CMDB1009","CMDB0618","CMDB0225","CMDB0707","CMDB0919","CMDB0103")
s_name1 <- c("john.lennon","paul.mccartney","george.harrison","ringo.starr","brian.epstein","george.martin")
s_used_for <- c("Production","Development","Certification","Pre-Production","Production","Development")
s_os_model <- c("Windows Server 2012","Windows Server 2019 Datacenter","Windows Server 2008","Windows Server 2016","Windows Server 2012","Windows Server 2012")
df_cmdb1 <- data.frame(s_asset_tag,s_name1,s_used_for,s_os_model)
> df_cmdb1
s_asset_tag s_name1 s_used_for s_os_model
1 CMDB1009 john.lennon Production Windows Server 2012
2 CMDB0618 paul.mccartney Development Windows Server 2019 Datacenter
3 CMDB0225 george.harrison Certification Windows Server 2008
4 CMDB0707 ringo.starr Pre-Production Windows Server 2016
5 CMDB0919 brian.epstein Production Windows Server 2012
6 CMDB0103 george.martin Development Windows Server 2012
>
第三个:
s_asset_tag <- c("CMDB0726","CMDB1218","CMDB0602","CMDB0601","CMDB1024","CMDB0228")
s_ip_address2 <- c("192.9.208.14","10.10.10.5", "22.250.32.14", "22.24.9.3", "180.169.22.8", "180.181.21.25")
s_used_for <- c("Production","Test","Production","Pre-Production","Contingency","Production")
s_os_model <- c("Red Hat Linux","Red Hat Linux","Oracle Solaris","Qualys Appliance","IBM AIX","VMWare vRealize")
df_cmdb2 <- data.frame(s_asset_tag,s_ip_address2,s_used_for,s_os_model)
> df_cmdb2
s_asset_tag s_ip_address2 s_used_for s_os_model
1 CMDB0726 192.9.208.14 Production Red Hat Linux
2 CMDB1218 10.10.10.5 Test Red Hat Linux
3 CMDB0602 22.250.32.14 Production Oracle Solaris
4 CMDB0601 22.24.9.3 Pre-Production Qualys Appliance
5 CMDB1024 180.169.22.8 Contingency IBM AIX
6 CMDB0228 180.181.21.25 Production VMWare vRealize
>
我可以使用 df_beats 和 df_cmdb1 成功构建 left_join,由 s_name/s_name1 链接:
library(dplyr)
left1<-left_join(df_beats,df_cmdb1,by=c("s_name"="s_name1")) ### _my_first_instruction_
> left1
s_name s_ip_address n_port s_protocol n_severity s_asset_tag s_used_for s_os_model
1 john.lennon 192.9.208.161 22 tcp 4 CMDB1009 Production Windows Server 2012
2 paul.mccartney 170.70.24.32 21 tcp 2 CMDB0618 Development Windows Server 2019 Datacenter
3 george.harrison 180.169.22.12 80 tcp 5 CMDB0225 Certification Windows Server 2008
4 ringo.starr 170.70.68.56 123 udp 1 CMDB0707 Pre-Production Windows Server 2016
5 mick.jagger 192.9.208.14 22 tcp 3 <NA> <NA> <NA>
6 keith.richards 10.10.10.5 8080 tcp 1 <NA> <NA> <NA>
7 charlie.watts 22.250.32.14 8088 tcp 2 <NA> <NA> <NA>
8 ron.wood 22.24.9.3 411 tcp 2 <NA> <NA> <NA>
>
但我不知道如何构建 my_second_instruction 使用数据帧 df_cmdb2 完成其余行(行:5、6、7 和 8) 并通过 s_ip_address/s_ip_address2 字段链接:
left1 <- _my_second_instruction_
得到以下输出: 请注意,我们不得修改已使用 my_first_instruction 连接的行(行:1,2,3,4):
> left1
s_name s_ip_address n_port s_protocol n_severity s_asset_tag s_used_for s_os_model
1 john.lennon 192.9.208.161 22 tcp 4 CMDB1009 Production Windows Server 2012
2 paul.mccartney 170.70.24.32 21 tcp 2 CMDB0618 Development Windows Server 2019 Datacenter
3 george.harrison 180.169.22.12 80 tcp 5 CMDB0225 Certification Windows Server 2008
4 ringo.starr 170.70.68.56 123 udp 1 CMDB0707 Pre-Production Windows Server 2016
5 mick.jagger 192.9.208.14 22 tcp 3 CMDB0726 Production Red Hat Linux
6 keith.richards 10.10.10.5 8080 tcp 1 CMDB1218 Test Red Hat Linux
7 charlie.watts 22.250.32.14 8088 tcp 2 CMDB0602 Production Oracle Solaris
8 ron.wood 22.24.9.3 411 tcp 2 CMDB0601 Pre-Production Qualys Appliance
>
提前致谢。 我是 R 语言初学者。
你可以再次 left_join,然后使用
coalesce
:
left_join(left1, df_cmdb2, by=c("s_ip_address"="s_ip_address2")) |>
mutate(s_asset_tag=coalesce(s_asset_tag.x, s_asset_tag.y),
s_used_for=coalesce(s_used_for.x, s_used_for.y),
s_os_model=coalesce(s_os_model.x, s_os_model.y)) |>
select(-ends_with(c(".x", ".y")))
s_name s_ip_address n_port s_protocol n_severity s_asset_tag s_used_for s_os_model
1 john.lennon 192.9.208.161 22 tcp 4 CMDB1009 Production Windows Server 2012
2 paul.mccartney 170.70.24.32 21 tcp 2 CMDB0618 Development Windows Server 2019 Datacenter
3 george.harrison 180.169.22.12 80 tcp 5 CMDB0225 Certification Windows Server 2008
4 ringo.starr 170.70.68.56 123 udp 1 CMDB0707 Pre-Production Windows Server 2016
5 mick.jagger 192.9.208.14 22 tcp 3 CMDB0726 Production Red Hat Linux
6 keith.richards 10.10.10.5 8080 tcp 1 CMDB1218 Test Red Hat Linux
7 charlie.watts 22.250.32.14 8088 tcp 2 CMDB0602 Production Oracle Solaris
8 ron.wood 22.24.9.3 411 tcp 2 CMDB0601 Pre-Production Qualys Appliance