R - 将字符数据分组为数字

问题描述 投票:0回答:2

好吧,我知道这可能是一个简单的答案。在这一点上,我只是盯着代码看得太久了。无论哪种方式,我都访问过在 R 中将数据分组到范围内,R 数据分组在 R 中分组字符变量,R - 数据分组在 R 中分组数据,以及其他多个。我提前道歉,这是重复的,但我还没有复制。

我有来自 20 多个病毒家族的数据。我已经将它编码成二进制,这样每一行都有一个唯一的标识符,并且在每个病毒家族列中都有一个 0 或 1。我正在尝试将站点数据编码为数字组。

数据(样本量)

date    Site                geometry
<chr>   <chr>               <S3: sfc_POINT>
8.20.13 NDUFR               <S3: sfc_POINT>     
8.27.13 UMNP-HQ             <S3: sfc_POINT>     
8.28.13 UMNP-campsite3      <S3: sfc_POINT>     
8.28.13 UMNP-campsite3      <S3: sfc_POINT>     
8.28.13 UMNP-hondohondoa    <S3: sfc_POINT>     
8.28.13 UMNP-hondohondob    <S3: sfc_POINT>     
8.29.13 UMNP-njokamoni      <S3: sfc_POINT>     
8.29.13 UMNP-mangabey       <S3: sfc_POINT>     
8.30.13 UMNP-hondohondoc    <S3: sfc_POINT>     
8.30.13 UMNP-hondohondod    <S3: sfc_POINT> 
8.30.13 UMNP-hondohondoe    <S3: sfc_POINT>     
8.31.13 UMNP-HQ             <S3: sfc_POINT>     
8.31.13 MamaGoti            <S3: sfc_POINT>     
9.1.13  UMNP-Sanje1         <S3: sfc_POINT>     
9.1.13  UMNP-Sanje2         <S3: sfc_POINT>     
9.1.13  UMNP-Sanje3         <S3: sfc_POINT>     
9.2.13  Magombera1          <S3: sfc_POINT>     
9.2.13  Magombera2          <S3: sfc_POINT>     
9.3.13  Sonjo               <S3: sfc_POINT>     
9.3.13  SonjoRoad           <S3: sfc_POINT>     

我想把NDUFR编码成1, UMNP 到 2, UMNP-营地3至3, 然后分组 UMNP-hondohondoa、UMNP-hondohondob、UMNP-hondohondoc、UMNP-hondohondoe 和 UMNP-hondohondoe 至 4 MamaGoti 到 5 UMNP-Sanje1、UMNP-Sanje2 和 UMNP-Sanje3 至 6 Magombera1 和 Magombera2 至 7 Sonjo和SonjoRoad to 8

我知道这可能相对容易,但是脑残了。我已经尝试了很多 dplyr 和 base r 的建议。

非常感谢任何帮助。

输出

structure(list(date = c("8.20.13", "8.27.13", "8.28.13", "8.28.13", 
"8.28.13", "8.28.13", "8.29.13", "8.29.13", "8.30.13", "8.30.13", 
"8.30.13", "8.31.13", "8.31.13", "9.1.13", "9.1.13", "9.1.13", 
"9.2.13", "9.2.13", "9.3.13", "9.3.13"), forestsite = c("NDUFR", 
"UMNP-HQ", "UMNP-campsite3", "UMNP-campsite3", "UMNP-hondohondoa", 
"UMNP-hondohondob", "UMNP-njokamoni", "UMNP-mangabey", "UMNP-hondohondoc", 
"UMNP-hondohondod", "UMNP-hondohondoe", "UMNP-HQ", "MamaGoti", 
"UMNP-Sanje1", "UMNP-Sanje2", "UMNP-Sanje3", "Magombera1", "Magombera2", 
"Sonjo", "SonjoRoad"), geometry = structure(list(structure(c(35.908246, 
-8.0475655), class = c("XY", "POINT", "sfg")), structure(c(36.883988, 
-7.844929), class = c("XY", "POINT", "sfg")), structure(c(36.884545, 
-7.849439), class = c("XY", "POINT", "sfg")), structure(c(36.884545, 
-7.849439), class = c("XY", "POINT", "sfg")), structure(c(36.887065, 
-7.833501), class = c("XY", "POINT", "sfg")), structure(c(36.891369, 
-7.832091), class = c("XY", "POINT", "sfg")), structure(c(36.878388, 
-7.82738), class = c("XY", "POINT", "sfg")), structure(c(36.87753, 
-7.8232), class = c("XY", "POINT", "sfg")), structure(c(36.89072, 
-7.829972), class = c("XY", "POINT", "sfg")), structure(c(36.890019, 
-7.827783), class = c("XY", "POINT", "sfg")), structure(c(36.887414, 
-7.825414), class = c("XY", "POINT", "sfg")), structure(c(36.883988, 
-7.844929), class = c("XY", "POINT", "sfg")), structure(c(36.886217, 
-7.844622), class = c("XY", "POINT", "sfg")), structure(c(36.904182, 
-7.783986), class = c("XY", "POINT", "sfg")), structure(c(36.903943, 
-7.783), class = c("XY", "POINT", "sfg")), structure(c(36.902821, 
-7.77507), class = c("XY", "POINT", "sfg")), structure(c(36.980875, 
-7.832182), class = c("XY", "POINT", "sfg")), structure(c(36.960576, 
-7.815916), class = c("XY", "POINT", "sfg")), structure(c(36.896019, 
-7.808054), class = c("XY", "POINT", "sfg")), structure(c(36.895821, 
-7.81365), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
"sfc"), precision = 0, bbox = structure(c(xmin = 35.908246, ymin = -8.0475655, 
xmax = 36.980875, ymax = -7.77507), class = "bbox"), crs = structure(list(
    input = "EPSG:4326", wkt = "GEOGCRS[\"WGS 84\",\n    ENSEMBLE[\"World Geodetic System 1984 ensemble\",\n        MEMBER[\"World Geodetic System 1984 (Transit)\"],\n        MEMBER[\"World Geodetic System 1984 (G730)\"],\n        MEMBER[\"World Geodetic System 1984 (G873)\"],\n        MEMBER[\"World Geodetic System 1984 (G1150)\"],\n        MEMBER[\"World Geodetic System 1984 (G1674)\"],\n        MEMBER[\"World Geodetic System 1984 (G1762)\"],\n        MEMBER[\"World Geodetic System 1984 (G2139)\"],\n        ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n            LENGTHUNIT[\"metre\",1]],\n        ENSEMBLEACCURACY[2.0]],\n    PRIMEM[\"Greenwich\",0,\n        ANGLEUNIT[\"degree\",0.0174532925199433]],\n    CS[ellipsoidal,2],\n        AXIS[\"geodetic latitude (Lat)\",north,\n            ORDER[1],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        AXIS[\"geodetic longitude (Lon)\",east,\n            ORDER[2],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n    USAGE[\n        SCOPE[\"Horizontal component of 3D system.\"],\n        AREA[\"World.\"],\n        BBOX[-90,-180,90,180]],\n    ID[\"EPSG\",4326]]"), class = "crs"), n_empty = 0L)), row.names = c(NA, 
-20L), sf_column = "geometry", agr = structure(c(date = NA_integer_, 
forestsite = NA_integer_), .Label = c("constant", "aggregate", 
"identity"), class = "factor"), class = c("sf", "tbl_df", "tbl", 
"data.frame"))
r database dataframe dplyr grouping
2个回答
2
投票

您可以为此使用

case_when
。请注意,条件必须在
case_when
.

内从最具体到最少
library(tidyverse)

df %>% mutate(code = case_when(forestsite == "NDUFR" ~ 1,
                               forestsite == "UMNP-campsite3" ~ 3,
                               forestsite %in% c("UMNP-hondohondoa", "UMNP-hondohondob", "UMNP-hondohondoc", "UMNP-hondohondod", "UMNP-hondohondoe") ~ 4,
                               forestsite == "MamaGoti" ~ 5,
                               forestsite %in% c("UMNP-Sanje1", "UMNP-Sanje2", "UMNP-Sanje3") ~ 6,
                               forestsite %in% c("Magombera1", "Magombera2") ~ 7,
                               forestsite %in% c("Sonjo", "SonjoRoad") ~ 8,
                               grepl("UMNP", forestsite) ~ 2,
                               TRUE ~ NA_real_))

# A tibble: 20 × 4
   date    forestsite                   geometry  code
 * <chr>   <chr>                     <POINT [°]> <dbl>
 1 8.20.13 NDUFR            (35.90825 -8.047565)     1
 2 8.27.13 UMNP-HQ          (36.88399 -7.844929)     2
 3 8.28.13 UMNP-campsite3   (36.88455 -7.849439)     3
 4 8.28.13 UMNP-campsite3   (36.88455 -7.849439)     3
 5 8.28.13 UMNP-hondohondoa (36.88706 -7.833501)     4
 6 8.28.13 UMNP-hondohondob (36.89137 -7.832091)     4
 7 8.29.13 UMNP-njokamoni    (36.87839 -7.82738)     2
 8 8.29.13 UMNP-mangabey      (36.87753 -7.8232)     2
 9 8.30.13 UMNP-hondohondoc (36.89072 -7.829972)     4
10 8.30.13 UMNP-hondohondod (36.89002 -7.827783)     4
11 8.30.13 UMNP-hondohondoe (36.88741 -7.825414)     4
12 8.31.13 UMNP-HQ          (36.88399 -7.844929)     2
13 8.31.13 MamaGoti         (36.88622 -7.844622)     5
14 9.1.13  UMNP-Sanje1      (36.90418 -7.783986)     6
15 9.1.13  UMNP-Sanje2         (36.90394 -7.783)     6
16 9.1.13  UMNP-Sanje3       (36.90282 -7.77507)     6
17 9.2.13  Magombera1       (36.98087 -7.832182)     7
18 9.2.13  Magombera2       (36.96058 -7.815916)     7
19 9.3.13  Sonjo            (36.89602 -7.808054)     8
20 9.3.13  SonjoRoad         (36.89582 -7.81365)     8

0
投票

case_match()
可能是一个稍微短一点的选择:

df <- structure(df, class = c("sf", "data.frame"))
x <- df$forestsite %>% unique()
df %>% mutate(code = case_match(
  forestsite, "NDUFR" ~ 1,
  x[grepl("UMNP", x) & !grepl("UMNP-hondohondo|UMNP-Sanje|UMNP-campsite3", x)] ~ 2,
  "UMNP-campsite3" ~ 3,
  x[grepl("UMNP-hondohondo", x)] ~ 4,
  "MamaGoti" ~ 5,
  x[grepl("UMNP-Sanje", x)] ~ 6,
  x[grepl("Magombera", x)] ~ 7,
  x[grepl("Sonjo|SonjoRoad", x)] ~ 8
))

      date       forestsite             geometry code
1  8.20.13            NDUFR 35.908246, -8.047565    1
2  8.27.13          UMNP-HQ 36.883988, -7.844929    2
3  8.28.13   UMNP-campsite3 36.884545, -7.849439    3
4  8.28.13   UMNP-campsite3 36.884545, -7.849439    3
5  8.28.13 UMNP-hondohondoa 36.887065, -7.833501    4
6  8.28.13 UMNP-hondohondob 36.891369, -7.832091    4
7  8.29.13   UMNP-njokamoni   36.87839, -7.82738    2
8  8.29.13    UMNP-mangabey   36.87753, -7.82320    2
9  8.30.13 UMNP-hondohondoc 36.890720, -7.829972    4
10 8.30.13 UMNP-hondohondod 36.890019, -7.827783    4
11 8.30.13 UMNP-hondohondoe 36.887414, -7.825414    4
12 8.31.13          UMNP-HQ 36.883988, -7.844929    2
13 8.31.13         MamaGoti 36.886217, -7.844622    5
14  9.1.13      UMNP-Sanje1 36.904182, -7.783986    6
15  9.1.13      UMNP-Sanje2   36.90394, -7.78300    6
16  9.1.13      UMNP-Sanje3   36.90282, -7.77507    6
17  9.2.13       Magombera1 36.980875, -7.832182    7
18  9.2.13       Magombera2 36.960576, -7.815916    7
19  9.3.13            Sonjo 36.896019, -7.808054    8
20  9.3.13        SonjoRoad   36.89582, -7.81365    8
© www.soinside.com 2019 - 2024. All rights reserved.