我想用 separate 和 negative look behind 并保留分隔符。我的解决方案 下面不保留姓氏首字母大写
有一个不使用否定的答案,我不知道如何修改它
消极的回头看
如何在 R 中使用 tidyr::separate 拆分字符串并保留分隔符字符串的值?
tidyr::tibble(myname = c("HarlanNelson")) |>
tidyr::separate(col = myname, into = c("first", "last"), sep = "(?<!^)[[:upper:]]")
#> # A tibble: 1 × 2
#> first last
#> <chr> <chr>
#> 1 Harlan elson
由 reprex 包 (v2.0.1) 于 2022-10-20 创建
tidyr::tibble(myname = c("HarlanNelson", "Another Person")) |>
tidyr::separate(col = myname, into = c("first", "last"), sep = c(" ", "(?<!^)[[:upper:]]"))
#> Warning in gregexpr(pattern, x, perl = TRUE): argument 'pattern' has length > 1
#> and only the first element will be used
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
#> # A tibble: 2 × 2
#> first last
#> <chr> <chr>
#> 1 HarlanNelson <NA>
#> 2 Another Person
由 reprex 包 (v2.0.1) 于 2022-10-20 创建
tidyr::tibble(myname = c("HarlanNelson", "Another Person", "someone else")) |>
tidyr::separate(col = myname, into = c("first", "last"), sep = c(" ", "(?<!^)[[:upper:]]"))
#> Warning in gregexpr(pattern, x, perl = TRUE): argument 'pattern' has length > 1
#> and only the first element will be used
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
#> # A tibble: 3 × 2
#> first last
#> <chr> <chr>
#> 1 HarlanNelson <NA>
#> 2 Another Person
#> 3 someone else
由 reprex 包 (v2.0.1) 于 2022-10-20 创建
这就是我想出来的。
但这只是在理解答案 https://stackoverflow.com/a/51415101/4629916
来自@cameron
并将其应用于我的问题。
tidyr::tibble(myname = c("HarlanNelson", "Another Person", "someone else")) |>
tidyr::separate(col = myname, into = c("first", "last"), sep = "(?<=[[:lower:]])(?=[[:upper:]])", extra = 'merge', fill = 'right') |>
tidyr::separate(col = first, into = c("first", "last2"), sep = " ", fill = 'right', extra = 'merge') |>
dplyr::mutate(last = dplyr::coalesce(last, last2)) |>
dplyr::select(-last2)
#> # A tibble: 3 × 2
#> first last
#> <chr> <chr>
#> 1 Harlan Nelson
#> 2 Another Person
#> 3 someone else
tidyr::tibble(myname = c("HarlanNelson", "Another Person", "someone else")) |>
tidyr::separate(col = myname, into = c("first", "last"), sep = "(?<!^)(?=[[:upper:]])", extra = 'merge', fill = 'right') |>
tidyr::separate(col = first, into = c("first", "last2"), sep = " ", extra = 'merge', fill = 'right') |>
dplyr::mutate(last = dplyr::coalesce(last, last2)) |>
dplyr::select(-last2)
#> # A tibble: 3 × 2
#> first last
#> <chr> <chr>
#> 1 Harlan Nelson
#> 2 Another Person
#> 3 someone else