超级具有挑战性的数据集操作

问题描述 投票:0回答:1

我正在尝试按如下方式操作我的输入输出(IO)表。 假设我们有一个包含 2 个国家和 4 个行业的 IO 表:

df=data.frame(industry=c("DEU_10T12","DEU_13T15","DEU_16","DEU_17","ITA_10T12",
"ITA_13T15","ITA_16","ITA_17"),DEU_10T12=c(20,24,26,20,10,0,NA,1.5),
DEU_13T15=c(15,16,4.5,NA,7.5,5,3,0),DEU_16=c(1.5,6,4,0,0.5,15,3,0.5),
DEU_17=c(NA,20,10,2,0,0,0,7), ITA_10T12=c(0.5,2,3,4,10,50,2,15),
ITA_13T15=c(25,0,4.5,NA,17.5,5,13,0.9), ITA_16=c(2,3,40,20,0.5,15,3,1),
ITA_17=c(1,9,0.5,2,10,20,50,7))

目标是创建以下数据集,其中我现在有 3 个扇区,新扇区是扇区 16 和 17 的总和

df2=data.frame(industry=c("DEU_10T12","DEU_13T15","DEU_16T17","ITA_10T12","ITA_13T15","ITA_16T17"),
DEU_10T12=c(20,24,46,10,0,1.5),DEU_13T15=c(15,16,4.5,7.5,5,3),DEU_16T17=c(1.5,26,16,0.5,15,10.5),
ITA_10T12=c(0.5,2,7,10,50,17),ITA_13T15=c(25,0,4.5,17.5,5,13.9), ITA_16T17=c(3,12,62.5,10.5,35,61))

在这里,我将扇区 16 和 17 聚合为新扇区 16T17。 似乎不可能有一个代码来进行如此复杂的操作。有什么帮助吗?我不知道如何实现这一点。 谢谢!

*** 更新*** 代码尝试:

# Aggregate sectors 16 and 17 (naively and still does not work, and not efficient since I have 100 countries and 10 sectors in my original dataset)
df$DEU_16T17 = rowSums(df[, c("DEU_16", "DEU_17")], na.rm = TRUE)
df$ITA_16T17 = rowSums(df[, c("ITA_16", "ITA_17")], na.rm = TRUE)
# Remove individual sector columns
df2 = df[, !grepl("DEU_16|DEU_17|ITA_16|ITA_17", names(df))]
r database dataframe
1个回答
0
投票

您可以使用

dplyr
进行这些操作。

library(dplyr)

df |>
    # they're character types in your sample data
    type.convert(as.is = TRUE) |>
    # sum columns ending in _16 or _17
    mutate(
        DEU_16T17 = DEU_16 + DEU_17,
        ITA_16T17 = ITA_16 + ITA_17
    ) |>
    # Put the columns in the right places
    relocate(DEU_16T17, .before = DEU_16) |>
    relocate(ITA_16T17, .before = ITA_16) |>
    select(-c(DEU_16, DEU_17, ITA_16, ITA_17)) |>
    # Rename 16 and 17 industries you want to aggregate
    mutate(industry = if_else(
        grepl("_1[67]$", industry), gsub("_1[67]$", "_16T17",industry),
        industry
    )) |>
    summarise(
        across(everything(), sum),
        .by = industry
    )

# # A tibble: 6 × 7
#   industry  DEU_10T12 DEU_13T15 DEU_16T17 ITA_10T12 ITA_13T15 ITA_16T17
#   <chr>         <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
# 1 DEU_10T12        20      15        NA         0.5      25         3  
# 2 DEU_13T15        24      16        26         2         0        12  
# 3 DEU_16T17        46      NA        16         7        NA        62.5
# 4 ITA_10T12        10       7.5       0.5      10        17.5      10.5
# 5 ITA_13T15         0       5        15        50         5        35  
# 6 ITA_16T17        NA       3        10.5      17        13.9      61  
© www.soinside.com 2019 - 2024. All rights reserved.