我的数据类似于以下示例:
数据 |
---|
假装县太平绅士 |
假县,德克萨斯州 JP 1.1 |
德克萨斯州马德普城 |
不是真实县,JP 2.5 |
如何将“ ,TX ”添加到第一个单元格等没有的县?
我想要一个如下所示的数据集:
数据 |
---|
假装县,德克萨斯州 JP |
假县,德克萨斯州 JP 1.1 |
德克萨斯州马德普城 |
不是真实县,德克萨斯州 JP 2.5 |
我不确定具体要求,但你可以尝试使用正则表达式。
# build example data
df <-
data.frame(
stringsAsFactors = FALSE,
DATA = c("PRETEND COUNTY JP",
"FAKE COUNTY,TX JP 1.1","Madeup City,TX",
"Not Real County, JP 2.5")
)
# build regular expression
pattern <- stringr::regex('county', ignore_case = TRUE)
# use regular expression to make new desired column
df2 <-
df |>
dplyr::mutate(
DATA2 =
dplyr::case_when(
stringr::str_detect(DATA, "TX") ~ DATA,
TRUE ~ stringr::str_replace(DATA, pattern ,"County, TX")
)
)
df2
#> DATA DATA2
#> 1 PRETEND COUNTY JP PRETEND County, TX JP
#> 2 FAKE COUNTY,TX JP 1.1 FAKE COUNTY,TX JP 1.1
#> 3 Madeup City,TX Madeup City,TX
#> 4 Not Real County, JP 2.5 Not Real County, TX, JP 2.5
创建于 2023-09-22,使用 reprex v2.0.2
library(tidyverse)
df %>%
mutate(address = str_replace(address, "(?i)(?<=(COUNTY|CITY)),?\\s(?=JP)", ", TX "))
address
1 PRETEND COUNTY, TX JP
2 FAKE COUNTY,TX JP 1.1
3 Madeup City,TX
4 Not Real County, TX JP 2.5
这是如何运作的:
(?i)
:不区分大小写的标志(?<=(COUNTY|CITY))
:正向后看:仅当您在比赛左侧看到“县”或“城市”时才匹配,?\\s
:匹配可选的逗号和空格(?=JP)
:但只有当匹配项右侧有“JP”时或者你也可以做
library(dplyr)
library(stringr)
# Sample data frame
data_df <- data.frame(DATA = c("PRETEND COUNTY JP", "FAKE COUNTY,TX JP 1.1", "Madeup City,TX", "Not Real County, JP 2.5"), stringsAsFactors = FALSE)
modified_df <- data_df %>%
mutate(DATA = str_replace(DATA, "(?i)(COUNTY|CITY)([[:space:]]*)(JP.*)?$", "\\1, TX \\3"),
DATA = ifelse(!grepl("(?i)(COUNTY|CITY)", DATA), paste0(DATA, ", TX"), DATA))
library(dplyr)
modified_df <- data_df %>%
mutate(DATA = case_when(
grepl("(?i)(COUNTY|CITY)", DATA) ~ sub("(?i)(COUNTY|CITY)([[:space:]]*)(JP.*)?$", "\\1, TX \\3", DATA),
TRUE ~ paste0(DATA, ", TX")
))
可以吗
library(dplyr)
# Sample data frame
data_df <- data.frame(DATA = c("PRETEND COUNTY JP", "FAKE COUNTY,TX JP 1.1", "Madeup City,TX", "Not Real County, JP 2.5"), stringsAsFactors = FALSE)
# Function to modify the "DATA" column
modify_text <- function(text) {
ifelse(grepl("(?i)(COUNTY|CITY)", text),
sub("(?i)(COUNTY|CITY)([[:space:]]*)(JP.*)?$", "\\1, TX \\3", text),
paste0(text, ", TX"))
}
# Use mutate to apply the modification function to the "DATA" column
modified_df <- data_df %>%
mutate(DATA = modify_text(DATA))
# Print the modified data frame
print(modified_df)
提供输出
DATA
1 PRETEND COUNTY, TX JP
2 FAKE COUNTY,TX JP 1.1
3 Madeup City,TX
4 Not Real County, JP 2.5