我为此尝试了许多不同的方法,包括this stack,但没有任何方法可以正常工作。
我的数据框“ SiteVisits”(一个小的子集dput在底部)由列Date
(类别=日期),TagID
(类别=数字),SiteVisits
(字符列表)和NumSites
(类别=数字)。每行列出了在每个日期找到单个生物(TagID
)的所有站点。
我想根据标签访问的网站来确定标签是在“内部”,“外部”还是“过境”中花费了一整天。如果从不访问外部站点,则只能是“内部”;如果从不访问内部站点,则只能是“外部”
首先,我想确定此列表中是否包括所有用于TagID的网站:
inside <- list(c("Release","IC1", "IC2", "IC3","RGD1"))
如果为TRUESiteVisit$Location = "INSIDE"
ELSE测试此列表中是否包含日期的TagID的所有网站:
outside <- list(c("ORS1","WC1","WC2","WC3","RGU1","ORN1","ORN2","ORS3","GL1","CVP1","CLRS"))
如果为TRUESiteVisit$Location = "OUTSIDE"
ELSE SiteVisit$Location = "TRANSITING"
我已经尝试了许多不同的dplyr
和base
版本来完成此操作,但似乎没有一个正确的方法。我认为这是因为我没有正确检查SiteVisit$SiteVisits
我目前的尝试是:
SiteVisit <- SiteVisit %>%
mutate(Location = ifelse(all(SiteVisits[[]] %in% inside), "INSIDE",
ifelse(all(SiteVisits[[]] %in% outside),"OUTSIDE","TRANSITING")))
产生所有“ INSIDE”
和
SiteVisit <- SiteVisit %>%
mutate(Location = ifelse(all(SiteVisits[] %in% inside), "INSIDE",
ifelse(all(SiteVisits[] %in% outside),"OUTSIDE","TRANSITING")))
这将产生所有“ TRANSITING”
此外,尝试在for循环中执行此操作也不起作用
for (i in 1: nrow(SiteVisit)) {SiteVisit$Inside <-
all(SiteVisit$SiteVisits[[i]] %in% inside)}
同时产生所有FALSE
all(SiteVisit$SiteVisits[[2]] %in% inside)
是TRUE
这里是我的数据框“ SiteVisit”数据的一小部分:
structure(list(Date = structure(c(15828, 15828, 15847, 15847,
15847, 15847, 15847, 15847, 15848, 15848, 15848, 15848, 15848,
15848, 15848, 15848, 15849, 15849, 15849, 15849, 15849, 15849,
15849, 15850, 15850, 15850, 15850, 15850, 15850, 15850, 15851,
15851, 15851, 15851, 15851, 15851, 15851, 15851, 15852, 15852,
15852, 15852, 15852, 15852, 15852, 15853, 15853, 15853, 15853,
15853, 15853, 15853, 15853, 15853, 15854, 15854, 15854, 15854,
15854, 15854, 15854, 15854, 15855, 15855, 15855, 15855, 15855,
15855, 15855, 15855, 15855, 15855, 15855, 15855, 15855, 15855,
15856, 15856, 15856, 15856, 15856, 15856, 15856, 15856, 15856,
15856, 15856, 15856, 15856, 15857, 15857, 15857, 15857, 15857,
15857, 15857, 15857, 15857, 15857, 15857), class = "Date"), TagID = c(5717.06,
6277.06, 5073.06, 5717.06, 11121.1, 11191.1, 11387.1, 11415.1,
5717.06, 6277.06, 11121.1, 11191.1, 11219.1, 11289.1, 11387.1,
11415.1, 5717.06, 11121.1, 11191.1, 11219.1, 11289.1, 11387.1,
11415.1, 5717.06, 11121.1, 11191.1, 11219.1, 11289.1, 11387.1,
11415.1, 5717.06, 11121.1, 11191.1, 11219.1, 11289.1, 11317.1,
11387.1, 11415.1, 5717.06, 6277.06, 11191.1, 11219.1, 11289.1,
11387.1, 11415.1, 5717.06, 6277.06, 9015.01, 9833.06, 11191.1,
11219.1, 11289.1, 11387.1, 11415.1, 5717.06, 6277.06, 9015.01,
11191.1, 11219.1, 11289.1, 11387.1, 11415.1, 5641.22, 5717.06,
6221.06, 6277.06, 7909.22, 9015.01, 9833.06, 11121.1, 11191.1,
11219.1, 11289.1, 11317.1, 11387.1, 11415.1, 5717.06, 6277.06,
6529.06, 8119.01, 8545.06, 9015.01, 9497.06, 9833.06, 11191.1,
11219.1, 11289.1, 11387.1, 11415.1, 5717.06, 6277.06, 6529.06,
9015.01, 9497.06, 9833.06, 11191.1, 11219.1, 11289.1, 11387.1,
11415.1), SiteVisits = list("Release", "Release", c("IC2", "IC1",
"Release"), "IC3", "WC2", "RGD1", c("WC1", "WC3"), "WC3", "IC3",
"IC3", "WC2", "RGD1", "IC2", "IC1", "WC1", "WC3", "IC3",
"WC2", "RGD1", c("IC2", "IC1"), "IC1", "WC1", "WC3", "IC3",
"WC2", "RGD1", "IC2", "IC1", "WC1", "WC3", "IC3", "WC2",
"RGD1", "IC2", "IC1", "WC1", "WC1", "WC3", "IC3", "IC3",
"RGD1", "IC2", "IC1", "WC1", "WC3", "IC3", "IC3", c("IC3",
"Release"), c("IC3", "IC2", "IC1", "Release"), "RGD1", "IC2",
"IC1", "WC1", "WC3", "IC3", "IC3", c("IC3", "IC2"), "RGD1",
"IC2", "IC1", "WC1", "WC3", "Release", "IC3", "Release",
"IC3", c("RGD1", "Release"), c("IC3", "IC2"), c("IC3", "IC1"
), "WC2", "RGD1", "IC2", "IC1", "WC1", "WC1", "WC3", "IC3",
"IC3", c("RGD1", "Release"), c("RGD1", "Release"), "Release",
c("IC3", "IC2", "IC1"), "Release", c("IC3", "IC2", "IC1",
"RGD1"), "RGD1", "IC2", "IC1", "WC1", "WC3", "IC3", "IC3",
"RGD1", c("IC3", "IC2", "IC1"), "RGD1", c("IC3", "IC1", "RGD1"
), "RGD1", "IC2", c("IC2", "IC1"), "WC1", "WC3"), NumSites = c(1L,
1L, 3L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 4L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
3L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 1L,
2L, 1L, 1L)), row.names = c(NA, -100L), groups = structure(list(
Date = structure(c(15828, 15847, 15848, 15849, 15850, 15851,
15852, 15853, 15854, 15855, 15856, 15857), class = "Date"),
.rows = list(1:2, 3:8, 9:16, 17:23, 24:30, 31:38, 39:45,
46:54, 55:62, 63:76, 77:89, 90:100)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
[inside
和outside
一旦存储为array
而不是list
,以下工作
inside <- c("Release","IC1", "IC2", "IC3","RGD1")
outside <- c("ORS1","WC1","WC2","WC3","RGU1","ORN1","ORN2","ORS3","GL1","CVP1","CLRS")
df1$Location <- lapply(df1$SiteVisits, function(x) ifelse(all(x %in% inside), "INSIDE", ifelse(all(x %in% outside), "OUTSIDE", "TRANSIT")))