在 SQL 中,您可以通过 OR 指定的多个列进行连接。我需要它从查找中引入新列,其中有多个可能匹配的标识符。
例如,通过列
x
和 y
连接 a
和 b
。
x <- tibble::tribble(
~a, ~b, ~old_info,
1, 10, "old1",
2, 20, "old2",
3, 30, "old3",
4, 40, "old4",
5, 50, "old5"
)
y <- tibble::tribble(
~a, ~b, ~new_info,
1, NA, "new1",
2, NA, "new2",
NA, NA, "new3",
NA, 40, "new4",
NA, 50, "new5"
)
这将是所需的输出。
#> # A tibble: 5 × 4
#> a b old_info new_info
#> <dbl> <dbl> <chr> <chr>
#> 1 1 10 old1 new1
#> 2 2 20 old2 new2
#> 3 3 30 old3 NA
#> 4 4 40 old4 new4
#> 5 5 50 old5 new5
这满足了我的需求。
加盟功能
multi_left_join <- function(x, y, by, cols_y,
na_matches = "never", relationship = NULL) {
# Joins -------------------------------------------------------------------
# Get columns to join by as symbols (needed for tidyeval stuff)
by_syms <- x |>
select({{by}}) |>
names() |>
rlang::syms()
# Loop to join by each column
for (i in seq_along(by_syms)) {
by_sym <- by_syms[[i]]
x <- x |>
left_join(y |>
select({{by_sym}}, {{cols_y}}) |>
rename_with(~ paste0(., "_", by_sym), .cols = -{{by_sym}}),
by = join_by({{by_sym}}),
na_matches = na_matches,
relationship = relationship)
}
# Coalesce joined columns -------------------------------------------------
# We now have multiple versions of each column (with a suffix of _{{by}}).
# This will coalesce each of those into one column, without the suffix
# Get columns names as symbols (needed for tidyeval stuff)
cols_y_syms <- y |>
select({{cols_y}}) |>
names() |>
rlang::syms()
# Loop over each of the new columns
for (i in seq_along(cols_y_syms)) {
col_y_sym <- cols_y_syms[[i]]
# List of symbols of each of the versions of the new column
col_y_versions <- by_syms |>
map_chr(~ paste0(col_y_sym, "_", .)) |>
rlang::syms()
# Coalesce the multiple versions. .keep = "unused" is to throw away the
# different versions after we're done. I'd prefer to do it more explicitly
# in select(), but I couldn't figure out the tidyeval
x <- x |>
mutate({{col_y_sym}} := coalesce(!!!col_y_versions), .keep = "unused")
}
return(x)
}
示例
x <- tibble::tribble(
~a, ~b, ~old_info,
1, 10, "old1",
2, 20, "old2",
3, 30, "old3",
4, 40, "old4",
5, 50, "old5"
)
y <- tibble::tribble(
~a, ~b, ~new_info,
1, NA, "new1",
2, NA, "new2",
NA, NA, "new3",
NA, 40, "new4",
NA, 50, "new5"
)
x |>
multi_left_join(y,
by = c(a, b),
cols_y = new_info)
#> # A tibble: 5 × 4
#> a b old_info new_info
#> <dbl> <dbl> <chr> <chr>
#> 1 1 10 old1 new1
#> 2 2 20 old2 new2
#> 3 3 30 old3 NA
#> 4 4 40 old4 new4
#> 5 5 50 old5 new5