好吧,我知道这可能是一个简单的答案。在这一点上,我只是盯着代码看得太久了。无论哪种方式,我都访问过在 R 中将数据分组到范围内,R 数据分组,在 R 中分组字符变量,R - 数据分组,在 R 中分组数据,以及其他多个。我提前道歉,这是重复的,但我还没有复制。
我有来自 20 多个病毒家族的数据。我已经将它编码成二进制,这样每一行都有一个唯一的标识符,并且在每个病毒家族列中都有一个 0 或 1。我正在尝试将站点数据编码为数字组。
数据(样本量)
date Site geometry
<chr> <chr> <S3: sfc_POINT>
8.20.13 NDUFR <S3: sfc_POINT>
8.27.13 UMNP-HQ <S3: sfc_POINT>
8.28.13 UMNP-campsite3 <S3: sfc_POINT>
8.28.13 UMNP-campsite3 <S3: sfc_POINT>
8.28.13 UMNP-hondohondoa <S3: sfc_POINT>
8.28.13 UMNP-hondohondob <S3: sfc_POINT>
8.29.13 UMNP-njokamoni <S3: sfc_POINT>
8.29.13 UMNP-mangabey <S3: sfc_POINT>
8.30.13 UMNP-hondohondoc <S3: sfc_POINT>
8.30.13 UMNP-hondohondod <S3: sfc_POINT>
8.30.13 UMNP-hondohondoe <S3: sfc_POINT>
8.31.13 UMNP-HQ <S3: sfc_POINT>
8.31.13 MamaGoti <S3: sfc_POINT>
9.1.13 UMNP-Sanje1 <S3: sfc_POINT>
9.1.13 UMNP-Sanje2 <S3: sfc_POINT>
9.1.13 UMNP-Sanje3 <S3: sfc_POINT>
9.2.13 Magombera1 <S3: sfc_POINT>
9.2.13 Magombera2 <S3: sfc_POINT>
9.3.13 Sonjo <S3: sfc_POINT>
9.3.13 SonjoRoad <S3: sfc_POINT>
我想把NDUFR编码成1, UMNP 到 2, UMNP-营地3至3, 然后分组 UMNP-hondohondoa、UMNP-hondohondob、UMNP-hondohondoc、UMNP-hondohondoe 和 UMNP-hondohondoe 至 4 MamaGoti 到 5 UMNP-Sanje1、UMNP-Sanje2 和 UMNP-Sanje3 至 6 Magombera1 和 Magombera2 至 7 Sonjo和SonjoRoad to 8
我知道这可能相对容易,但是脑残了。我已经尝试了很多 dplyr 和 base r 的建议。
非常感谢任何帮助。
输出
structure(list(date = c("8.20.13", "8.27.13", "8.28.13", "8.28.13",
"8.28.13", "8.28.13", "8.29.13", "8.29.13", "8.30.13", "8.30.13",
"8.30.13", "8.31.13", "8.31.13", "9.1.13", "9.1.13", "9.1.13",
"9.2.13", "9.2.13", "9.3.13", "9.3.13"), forestsite = c("NDUFR",
"UMNP-HQ", "UMNP-campsite3", "UMNP-campsite3", "UMNP-hondohondoa",
"UMNP-hondohondob", "UMNP-njokamoni", "UMNP-mangabey", "UMNP-hondohondoc",
"UMNP-hondohondod", "UMNP-hondohondoe", "UMNP-HQ", "MamaGoti",
"UMNP-Sanje1", "UMNP-Sanje2", "UMNP-Sanje3", "Magombera1", "Magombera2",
"Sonjo", "SonjoRoad"), geometry = structure(list(structure(c(35.908246,
-8.0475655), class = c("XY", "POINT", "sfg")), structure(c(36.883988,
-7.844929), class = c("XY", "POINT", "sfg")), structure(c(36.884545,
-7.849439), class = c("XY", "POINT", "sfg")), structure(c(36.884545,
-7.849439), class = c("XY", "POINT", "sfg")), structure(c(36.887065,
-7.833501), class = c("XY", "POINT", "sfg")), structure(c(36.891369,
-7.832091), class = c("XY", "POINT", "sfg")), structure(c(36.878388,
-7.82738), class = c("XY", "POINT", "sfg")), structure(c(36.87753,
-7.8232), class = c("XY", "POINT", "sfg")), structure(c(36.89072,
-7.829972), class = c("XY", "POINT", "sfg")), structure(c(36.890019,
-7.827783), class = c("XY", "POINT", "sfg")), structure(c(36.887414,
-7.825414), class = c("XY", "POINT", "sfg")), structure(c(36.883988,
-7.844929), class = c("XY", "POINT", "sfg")), structure(c(36.886217,
-7.844622), class = c("XY", "POINT", "sfg")), structure(c(36.904182,
-7.783986), class = c("XY", "POINT", "sfg")), structure(c(36.903943,
-7.783), class = c("XY", "POINT", "sfg")), structure(c(36.902821,
-7.77507), class = c("XY", "POINT", "sfg")), structure(c(36.980875,
-7.832182), class = c("XY", "POINT", "sfg")), structure(c(36.960576,
-7.815916), class = c("XY", "POINT", "sfg")), structure(c(36.896019,
-7.808054), class = c("XY", "POINT", "sfg")), structure(c(36.895821,
-7.81365), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 35.908246, ymin = -8.0475655,
xmax = 36.980875, ymax = -7.77507), class = "bbox"), crs = structure(list(
input = "EPSG:4326", wkt = "GEOGCRS[\"WGS 84\",\n ENSEMBLE[\"World Geodetic System 1984 ensemble\",\n MEMBER[\"World Geodetic System 1984 (Transit)\"],\n MEMBER[\"World Geodetic System 1984 (G730)\"],\n MEMBER[\"World Geodetic System 1984 (G873)\"],\n MEMBER[\"World Geodetic System 1984 (G1150)\"],\n MEMBER[\"World Geodetic System 1984 (G1674)\"],\n MEMBER[\"World Geodetic System 1984 (G1762)\"],\n MEMBER[\"World Geodetic System 1984 (G2139)\"],\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]],\n ENSEMBLEACCURACY[2.0]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n CS[ellipsoidal,2],\n AXIS[\"geodetic latitude (Lat)\",north,\n ORDER[1],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n AXIS[\"geodetic longitude (Lon)\",east,\n ORDER[2],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n USAGE[\n SCOPE[\"Horizontal component of 3D system.\"],\n AREA[\"World.\"],\n BBOX[-90,-180,90,180]],\n ID[\"EPSG\",4326]]"), class = "crs"), n_empty = 0L)), row.names = c(NA,
-20L), sf_column = "geometry", agr = structure(c(date = NA_integer_,
forestsite = NA_integer_), .Label = c("constant", "aggregate",
"identity"), class = "factor"), class = c("sf", "tbl_df", "tbl",
"data.frame"))
您可以为此使用
case_when
。请注意,条件必须在 case_when
. 内从最具体到最少
library(tidyverse)
df %>% mutate(code = case_when(forestsite == "NDUFR" ~ 1,
forestsite == "UMNP-campsite3" ~ 3,
forestsite %in% c("UMNP-hondohondoa", "UMNP-hondohondob", "UMNP-hondohondoc", "UMNP-hondohondod", "UMNP-hondohondoe") ~ 4,
forestsite == "MamaGoti" ~ 5,
forestsite %in% c("UMNP-Sanje1", "UMNP-Sanje2", "UMNP-Sanje3") ~ 6,
forestsite %in% c("Magombera1", "Magombera2") ~ 7,
forestsite %in% c("Sonjo", "SonjoRoad") ~ 8,
grepl("UMNP", forestsite) ~ 2,
TRUE ~ NA_real_))
# A tibble: 20 × 4
date forestsite geometry code
* <chr> <chr> <POINT [°]> <dbl>
1 8.20.13 NDUFR (35.90825 -8.047565) 1
2 8.27.13 UMNP-HQ (36.88399 -7.844929) 2
3 8.28.13 UMNP-campsite3 (36.88455 -7.849439) 3
4 8.28.13 UMNP-campsite3 (36.88455 -7.849439) 3
5 8.28.13 UMNP-hondohondoa (36.88706 -7.833501) 4
6 8.28.13 UMNP-hondohondob (36.89137 -7.832091) 4
7 8.29.13 UMNP-njokamoni (36.87839 -7.82738) 2
8 8.29.13 UMNP-mangabey (36.87753 -7.8232) 2
9 8.30.13 UMNP-hondohondoc (36.89072 -7.829972) 4
10 8.30.13 UMNP-hondohondod (36.89002 -7.827783) 4
11 8.30.13 UMNP-hondohondoe (36.88741 -7.825414) 4
12 8.31.13 UMNP-HQ (36.88399 -7.844929) 2
13 8.31.13 MamaGoti (36.88622 -7.844622) 5
14 9.1.13 UMNP-Sanje1 (36.90418 -7.783986) 6
15 9.1.13 UMNP-Sanje2 (36.90394 -7.783) 6
16 9.1.13 UMNP-Sanje3 (36.90282 -7.77507) 6
17 9.2.13 Magombera1 (36.98087 -7.832182) 7
18 9.2.13 Magombera2 (36.96058 -7.815916) 7
19 9.3.13 Sonjo (36.89602 -7.808054) 8
20 9.3.13 SonjoRoad (36.89582 -7.81365) 8
case_match()
可能是一个稍微短一点的选择:
df <- structure(df, class = c("sf", "data.frame"))
x <- df$forestsite %>% unique()
df %>% mutate(code = case_match(
forestsite, "NDUFR" ~ 1,
x[grepl("UMNP", x) & !grepl("UMNP-hondohondo|UMNP-Sanje|UMNP-campsite3", x)] ~ 2,
"UMNP-campsite3" ~ 3,
x[grepl("UMNP-hondohondo", x)] ~ 4,
"MamaGoti" ~ 5,
x[grepl("UMNP-Sanje", x)] ~ 6,
x[grepl("Magombera", x)] ~ 7,
x[grepl("Sonjo|SonjoRoad", x)] ~ 8
))
date forestsite geometry code
1 8.20.13 NDUFR 35.908246, -8.047565 1
2 8.27.13 UMNP-HQ 36.883988, -7.844929 2
3 8.28.13 UMNP-campsite3 36.884545, -7.849439 3
4 8.28.13 UMNP-campsite3 36.884545, -7.849439 3
5 8.28.13 UMNP-hondohondoa 36.887065, -7.833501 4
6 8.28.13 UMNP-hondohondob 36.891369, -7.832091 4
7 8.29.13 UMNP-njokamoni 36.87839, -7.82738 2
8 8.29.13 UMNP-mangabey 36.87753, -7.82320 2
9 8.30.13 UMNP-hondohondoc 36.890720, -7.829972 4
10 8.30.13 UMNP-hondohondod 36.890019, -7.827783 4
11 8.30.13 UMNP-hondohondoe 36.887414, -7.825414 4
12 8.31.13 UMNP-HQ 36.883988, -7.844929 2
13 8.31.13 MamaGoti 36.886217, -7.844622 5
14 9.1.13 UMNP-Sanje1 36.904182, -7.783986 6
15 9.1.13 UMNP-Sanje2 36.90394, -7.78300 6
16 9.1.13 UMNP-Sanje3 36.90282, -7.77507 6
17 9.2.13 Magombera1 36.980875, -7.832182 7
18 9.2.13 Magombera2 36.960576, -7.815916 7
19 9.3.13 Sonjo 36.896019, -7.808054 8
20 9.3.13 SonjoRoad 36.89582, -7.81365 8