我有一个数据框,是从 Qualtrics 中进行的一项调查中获得的,其中我们确定了每个钓鱼者收获的鱼的种类和数量(ID = 个体钓鱼者)。我正在尝试整理数据,以便可以使用调查数据构建收获数据框架,但保留调查 ID。
这是我的数据:
df<-structure(list(ID = 1:3, Q7 = c("Other species (please list species name; i.e. Tarpon),Other species (please list species name; i.e. Amberjack)",
"Red Drum (a.k.a. Redfish or Red),Other species (please list species name; i.e. Tarpon),Other species (please list species name; i.e. Amberjack)",
"Red Drum (a.k.a. Redfish or Red),Other species (please list species name; i.e. Tarpon)"
), Q7_7_TEXT = c("Tink", "Blue", "Blue"), Q7_8_TEXT = c("Chii",
"Red", NA), Q8_1 = c(NA, "2", "7"), Q8_7 = c("1", "4", "9"),
Q8_8 = c("2", "5", NA)), class = "data.frame", row.names = 3:5)
我使用以下代码提取每个物种的物种和收获数量,以便将信息存储在包含物种和计数的数据框 (
harv
) 中:
library(tidyverse)
library(stringi)
harv<-data.frame(spp=unlist(strsplit(df$Q7, ",")))
oth<-na.omit(data.frame(spp=stri_remove_empty(c(unlist(t(df[,3:4]))))))
idx <- grep('Other', harv$spp)
n <- min(length(idx), nrow(oth))
harv[idx, 'spp'] <- oth
harv.num<-na.omit(as.numeric(stri_remove_empty(c(unlist(t(df[,5:7]))))))
harv$num<-harv.num
harv
spp num
1 Tink 1
2 Chii 2
3 Red Drum (a.k.a. Redfish or Red) 2
4 Blue 4
5 Red 5
6 Red Drum (a.k.a. Redfish or Red) 7
7 Blue 9
如何从
df
中提取此数据,同时维护原始数据框中的调查 ID,以便 harv
看起来像这样:
spp num ID
1 Tink 1 1
2 Chii 2 1
3 Red Drum (a.k.a. Redfish or Red) 2 2
4 Blue 4 2
5 Red 5 2
6 Red Drum (a.k.a. Redfish or Red) 7 3
7 Blue 9 3
您可以使用
pivot_longer
和对列名称进行一些初步重命名来实现您想要的结果,如下所示:
library(dplyr, warn = FALSE)
library(tidyr)
df |>
rename(Q7_1_TEXT = Q7) |>
rename_with(~gsub("_TEXT$", "", .x), ends_with("TEXT")) |>
pivot_longer(-ID, names_to = c(".value", "name"), names_sep = "_") |>
filter(!is.na(Q8)) |>
select(ID, spp = Q7, num = Q8)
#> # A tibble: 7 × 3
#> ID spp num
#> <int> <chr> <chr>
#> 1 1 Tink 1
#> 2 1 Chii 2
#> 3 2 Red Drum (a.k.a. Redfish or Red),Other species (please list speci… 2
#> 4 2 Blue 4
#> 5 2 Red 5
#> 6 3 Red Drum (a.k.a. Redfish or Red),Other species (please list speci… 7
#> 7 3 Blue 9