从季度到年度数据

问题描述 投票:0回答:1

我有一个如下所示的数据框:

head(df_HPI)

<< img src =“ https://image.soinside.com/eyJ1cmwiOiAiaHR0cHM6Ly9pLnN0YWNrLmltZ3VyLmNvbS84dHhaQi5wbmcifQ==” alt =“在此处输入图像描述”>

HPI是我要转换为年度的季度指数。我有17个地区(即CCAA),因此我想汇总HPI,以包含每个地区的年度数据。我进行了一些更改,但是代码无法正常工作。

# Convert series to annual data
df_HPI <- df_HPI_original

# Replace period format 
df_HPI <- data.frame(sapply(df_HPI, function(x) {gsub("T1","-01-01",x)})) # Q1
df_HPI <- data.frame(sapply(df_HPI, function(x) {gsub("T2","-04-01",x)})) # Q2
df_HPI <- data.frame(sapply(df_HPI, function(x) {gsub("T3","-07-01",x)})) # Q3
df_HPI <- data.frame(sapply(df_HPI, function(x) {gsub("T4","-10-01",x)})) # Q4

# Convert column into a date
df_HPI$Periodo <- as.Date(df_HPI$Periodo)

# Aggregate to annual data
df_HPI %>%
  mutate(Year=year(Periodo),
         Quarter=quarter(Periodo),
         Finyear = ifelse(Quarter <= 2, Year, Year+1)) %>% 
  group_by(Finyear, CCAA) %>%
  summarise(HPIy=mean(HPI))

在最后一步,程序说该参数不符合逻辑,并返回NA。

r dataframe dplyr time-series lubridate
1个回答
1
投票

问题在于,当您通过HPI替换期间格式时,gsub列已转换为因子。因此,您必须将其转换回数字。试试这个:

library(dplyr)
library(lubridate)

set.seed(42)

# Example data
quarters <- paste0("T", c(1:4))
years <- c("2019", "2020")
dates <- c(paste0(years[[1]], quarters), paste0(years[[2]], quarters))

df_HPI <- data.frame(
  Periodo = rep(dates, 2),
  CCAA = c(rep("Region1", 8), rep("Region2", 8)),
  HPI = runif(16)
)
head(df_HPI)
#>   Periodo    CCAA       HPI
#> 1  2019T1 Region1 0.9148060
#> 2  2019T2 Region1 0.9370754
#> 3  2019T3 Region1 0.2861395
#> 4  2019T4 Region1 0.8304476
#> 5  2020T1 Region1 0.6417455
#> 6  2020T2 Region1 0.5190959

# Replace period format 
df_HPI <- data.frame(sapply(df_HPI, function(x) {gsub("T1","-01-01",x)})) # Q1
df_HPI <- data.frame(sapply(df_HPI, function(x) {gsub("T2","-04-01",x)})) # Q2
df_HPI <- data.frame(sapply(df_HPI, function(x) {gsub("T3","-07-01",x)})) # Q3
df_HPI <- data.frame(sapply(df_HPI, function(x) {gsub("T4","-10-01",x)})) # Q4

# Convert column into a date
df_HPI$Periodo <- as.Date(df_HPI$Periodo)

# Problem: HPI was converted to a factor 
class(df_HPI$HPI)
#> [1] "factor"
# Solution: Convert back to numeric
df_HPI$HPI <- as.numeric(as.character(df_HPI$HPI))

# Aggregate to annual data
df_HPI %>%
  mutate(Year=year(Periodo),
         Quarter=quarter(Periodo),
         Finyear = ifelse(Quarter <= 2, Year, Year+1)) %>% 
  group_by(Finyear, CCAA) %>%
  summarise(HPIy=mean(HPI))
#> # A tibble: 6 x 3
#> # Groups:   Finyear [3]
#>   Finyear CCAA     HPIy
#>     <dbl> <fct>   <dbl>
#> 1    2019 Region1 0.926
#> 2    2019 Region2 0.681
#> 3    2020 Region1 0.569
#> 4    2020 Region2 0.592
#> 5    2021 Region1 0.436
#> 6    2021 Region2 0.701

reprex package(v0.3.0)在2020-04-04创建

© www.soinside.com 2019 - 2024. All rights reserved.