data <- data.frame(ID = 1, Info = "Zebra fish (one)--Hello; blah", Value = 7)
new_data <- data.frame(ID = 1, CATEGORY = "Zebra Fish", TYPE1 = "one", TYPE2 = "Hello", Value = 7)
有没有可以将数据转换为dataWANT的函数?我试过了,但没用
# Function to parse the info column
parse_info <- function(info) {
# Split by parentheses and dashes
parts1 <- unlist(strsplit(info, "\\(|\\)"))
category <- trimws(parts1[1])
type1 <- trimws(parts1[2])
# Further split by dashes and semicolons
parts2 <- unlist(strsplit(parts1[3], "--|;"))
type2 <- trimws(parts2[1])
return(c(category, type1, type2))
}
# Apply the function to each row
parsed <- t(apply(data, 1, function(row) {
c(row['ID'], parse_info(row['Info']), row['Value'])
}))
# Create a new data frame
new_data <- as.data.frame(parsed, stringsAsFactors = FALSE)
names(new_data) <- c("ID", "CATEGORY", "TYPE1", "TYPE2", "VALUE")
如果您的字符串始终采用“CATEGORY (TYPE1)--TYPE2;不需要”的格式,则可以使用左括号
separate_wider_delim
、右括号 (
和分号 )
作为分隔符来执行 ;
。
library(tidyr)
data %>%
separate_wider_delim(cols = Info,
delim = regex(" \\(|\\)--|;"),
names = c("CATEGORY", "TYPE1", "TYPE2"),
too_many = "drop")
A tibble: 1 × 5
ID CATEGORY TYPE1 TYPE2 Value
<dbl> <chr> <chr> <chr> <dbl>
1 1 Zebra fish one Hello 7