tidyverse:一个变量与data.frame中所有其他变量的交叉表

问题描述 投票:3回答:4

我想与data.frame中的所有其他变量建立变量的交叉表。

library(tidyverse)
library(janitor)

humans <- starwars %>%
  filter(species == "Human")

humans %>%
  janitor::tabyl(gender, eye_color)



gender blue blue-gray brown dark hazel yellow
 female    3         0     5    0     1      0
   male    9         1    12    1     1      2

humans %>%
  dplyr::select_if(is.character) %>%
  dplyr::select(-name, -gender) %>%
  purrr::map(.f = ~janitor::tabyl(dat = humans, gender, .x))

Error: Unknown columns `blond`, `none`, `brown`, `brown, grey`, `brown` and ... 
Call `rlang::last_error()` to see a backtrace
r tidyverse crosstab purrr janitor
4个回答
3
投票

tably将名称作为参数,并将向量传递给它。

如果你使用imap,你可以访问列的名称,你可以转换为符号,并且由于janitor支持准引用,你可以写:

humans %>%
  select_if(is.character) %>%
  select(-name, -gender) %>%
  imap(.f = ~janitor::tabyl(dat = humans, !!sym(.y), gender))
#$`hair_color`
#     hair_color female male
#         auburn      1    0
#   auburn, grey      0    1
#  auburn, white      0    1
#          black      1    7
#          blond      0    3
#          brown      6    8
#    brown, grey      0    1
#           grey      0    1
#           none      0    3
#          white      1    1
# 
# $skin_color
#  skin_color female male
#        dark      0    4
#        fair      3   13

有趣的是,tabyl.data.frame调用一个对符号有效的未导出函数,所以通过直接调用它我们可以跳过unquoting并使用基数R.

cols <- setdiff(names(Filter(is.character,humans)), c("name","gender"))
lapply(cols, function(x) janitor:::tabyl_2way(humans, as.name(x), quote(gender)))
# [[1]]
#     hair_color female male
#         auburn      1    0
#   auburn, grey      0    1
#  auburn, white      0    1
#          black      1    7
#          blond      0    3
#          brown      6    8
#    brown, grey      0    1
#           grey      0    1
#           none      0    3
#          white      1    1
# 
# [[2]]
#  skin_color female male
#        dark      0    4

为了使它适用于xtable @ akrun的建议也在这里工作:

humans %>%
  select_if(is.character) %>%
  select(-name, -gender) %>%
  imap(.f = ~tabyl(dat = humans, !!sym(.y), gender) %>% rename_at(1,~"x")) %>%
  xtableList

要么

cols <- setdiff(names(Filter(is.character,humans)), c("name","gender"))
l <- lapply(cols, function(x) {
  res <- janitor:::tabyl_2way(humans, as.name(x), quote(gender))
  names(res)[1] <- "x"
  res
})
xtableList(l)

3
投票

假设我们需要成对表与'性别'

humans %>%
  dplyr::select_if(is.character) %>%
  dplyr::select(-name, -gender) %>%
  imap(~ tibble(!! .y := .x) %>% 
             mutate(gender = humans[['gender']]) %>% 
             janitor::tabyl(!!rlang::sym(names(.)[1]), gender))
#$hair_color
#    hair_color female male
#        auburn      1    0
#  auburn, grey      0    1
# auburn, white      0    1
#         black      1    7
#         blond      0    3
#        brown      6    8
#  brown, grey      0    1
#         grey      0    1
#         none      0    3
#        white      1    1

#$skin_color
# skin_color female male
#       dark      0    4
#       fair      3   13
#      light      6    5
#...

Update

xtable::xtableList要求list元素的名称相同。要实现这一点,请在list元素中更改第一列名称,然后创建标识符列

library(xtable)
humans %>%
 dplyr::select_if(is.character) %>%
 dplyr::select(-name, -gender) %>%
 imap(~ tibble(!! .y := .x) %>% 
         mutate(gender = humans[['gender']]) %>% 
         janitor::tabyl(!!rlang::sym(names(.)[1]), gender) %>%  
         mutate(colNname = .y) %>% 
         rename_at(1, ~ 'Variable')) %>%
 xtableList

0
投票

仅使用data.table(和一个%>%):

library(data.table)
swDT <- data.table(starwars)
setkey(swDT, gender, hair_color)


swDT[species == "Human"
     ][CJ(gender, hair_color, unique =TRUE), .N, .EACHI] %>% 
  dcast(hair_color ~ gender, value.var = "N")


       hair_color female male
 1:        auburn      1    0
 2:  auburn, grey      0    1
 3: auburn, white      0    1
 4:         black      1    7
 5:         blond      0    3
 6:         brown      6    8
 7:   brown, grey      0    1
 8:          grey      0    1
 9:          none      0    3
10:         white      1    1

0
投票

starwars中的列表列增加了复杂性,但这里有一个mtcars示例:crosstab cyl对所有其他变量。

mtcars %>%
  tidyr::gather(var, value, -cyl) %>%
  janitor::tabyl(cyl, value, var, show_missing_levels = FALSE) %>%
  purrr::map2(.x = ., .y = names(.), ~ janitor::adorn_title(.x, col_name = .y))

返回交叉表列表。 cyl x am,cyl x carb等:

$`am`
     am  
 cyl  0 1
   4  3 8
   6  4 3
   8 12 2

$carb
     carb          
 cyl    1 2 3 4 6 8
   4    5 6 0 0 0 0
   6    2 0 0 4 1 0
   8    0 4 3 6 0 1

...

如果您将对这些data.frames进行进一步操作,您可能会发现此标题选项更友好:

purrr::map2(.x = ., .y = names(.), ~ janitor::adorn_title(.x, col_name = .y, placement = "combined"))

哪个给你:

$vs
 cyl/vs  0  1
      4  1 10
      6  3  4
      8 14  0
© www.soinside.com 2019 - 2024. All rights reserved.