我遇到的问题是这样的:我有使用R的经验,但不幸的是,循环并不是我的强项。我想创建一个缩短以下内容的循环:
library(wbstats)
enrg_cons = wb(country = "all", indicator = "EG.USE.PCAP.KG.OE")
gdp = wb(country = "all", indicator = "NY.GDP.PCAP.CD")
#Separating members of the OECD
#Australia
enrg_cons_AUS = enrg_cons[which(enrg_cons$iso3c == "AUS" & enrg_cons$date >=1995 & enrg_cons$date <=2014),
names(enrg_cons) %in% c("date", "value", "country")]
gdp_AUS = gdp[which(gdp$iso3c == "AUS" & gdp$date >=1995 & gdp$date<=2014), names(gdp) %in% c("date", "value", "country")]
#Austria
enrg_cons_AUT = enrg_cons[which(enrg_cons$iso3c == "AUT" & enrg_cons$date >=1995 & enrg_cons$date <=2014),
names(enrg_cons) %in% c("date", "value", "country")]
gdp_AUT = gdp[which(gdp$iso3c == "AUT" & gdp$date >=1995 & gdp$date<=2014), names(gdp) %in% c("date", "value", "country")]
#Belgium
enrg_cons_BEL = enrg_cons[which(enrg_cons$iso3c == "BEL" & enrg_cons$date >=1995 & enrg_cons$date <=2014),
names(enrg_cons) %in% c("date", "value", "country")]
gdp_BEL = gdp[which(gdp$iso3c == "BEL" & gdp$date >=1995 & gdp$date<=2014), names(gdp) %in% c("date", "value", "country")]
#Canada
enrg_cons_CAN = enrg_cons[which(enrg_cons$iso3c == "CAN" & enrg_cons$date >=1995 & enrg_cons$date <=2014),
names(enrg_cons) %in% c("date", "value", "country")]
我想调查约20个OECD国家的GDP和能源消耗,我想创建一个不错的循环,从上面的代码中提取我的值,而无需为每个国家编写。我还想为以下命令创建一个循环:
#Augmented Dickey-Fuller(ADF) test
adf.test(log(gdp_AUS$value), k = 0)$p.value; adf.test(diff(log(enrg_cons_AUS$value)), k = 0)
adf.test(log(gdp_AUT$value), k = 0)$p.value; adf.test(diff(log(enrg_cons_AUT$value)), k = 0)
再次针对我正在研究的所有国家/地区。我希望这些信息是足够的和可复制的,如果不能,请告诉我,我将尽最大努力加以改进!预先谢谢!
有几种方法可以做到这一点。最适合您当前工作流程的一项可能是assign
函数。
首先将数据提取打包为一对函数:
get_country_enrg_cons <- function(country) {
enrg_cons[
enrg_cons$iso3c == country & enrg_cons$date >=1995 & enrg_cons$date <=2014,
c("date", "value", "country")
]
}
get_country_gdp <- function(country) {
gdp[
gdp$iso3c == country & gdp$date >=1995 & gdp$date<=2014,
c("date", "value", "country")
]
}
然后我们可以遍历国家/地区列表,并将结果分配给使用paste
创建的变量:
countries <- c("AUS", "AUT", "BEL", "CAN")
for (country in countries) {
assign(paste0("energ_cons_", country), get_country_enrg_cons(country))
assign(paste0("gdp_", country), get_country_gdp(country))
}
您应该最终获得全局环境中所需的表。
然后像这样应用Dickey-Fuller测试图:
library(tseries)
get_adf_test <- function(country_gdp, country_enrg_cons) {
adf.test(
log(country_gdp$value), k = 0)$p.value;
adf.test(diff(log(country_enrg_cons$value)), k = 0)
}
for (country in countries) {
country_gdp <- eval(parse(text = paste0("gdp_", country)))
country_enrg_cons <- eval(parse(text = paste0("energ_cons_", country)))
assign(paste0("adf_", country), get_adf_test(country_gdp, country_enrg_cons))
}
但是,我将是第一个承认此解决方案并不完美的公司。一种更简单的解决方案是基于表的解决方案,该解决方案将所有变量保留在单个数据结构中。
使用上面定义的功能,我们可以创建一个带有包含输出数据帧以及模型输出的列表列的表。
library(tidyverse)
countries_df <- tibble(country = c("AUS", "AUT", "BEL", "CAN"))
countries_df <- countries_df %>%
rowwise() %>%
mutate(
country_gdp = list(get_country_gdp(country)),
country_enrg_cons = list(get_country_enrg_cons(country)),
country_adf_test = list(get_adf_test(country_gdp, country_enrg_cons))
) %>%
ungroup()
现在可以使用子集操作访问模型输出:
countries_df %>%
filter(country == "AUS") %>%
pull(country_adf_test)
# Augmented Dickey-Fuller Test
#
# data: diff(log(country_enrg_cons$value))
# Dickey-Fuller = -3.1949, Lag order = 0, p-value = 0.1172
# alternative hypothesis: stationary
这主要是按组的汇总。对于每个isoc
,您都对摘要统计感兴趣。除了可以在环境中创建250个以上的对象之外,我们还可以使用data.table
library(data.table)
setDT(gdp)
gdp[between(date, 1995, 2014) & iso3c != 'SOM',
adf.test(log(value), k = 0)$p.value,
by = iso3c]
iso3c V1
<char> <num>
1: ARB 0.75501918
2: CSS 0.49522577
3: CEB 0.73116544
4: EAR 0.92497081
5: EAS 0.99000000
---
253: VIR 0.97525907
254: PSE 0.95374047
255: YEM 0.05792444
256: ZMB 0.95657239
257: ZWE 0.79529898
现在您可能已经注意到了iso3C != 'SOM'
部分。由于似乎没有足够的观测值可以进行计算,因此存在一些错误。因此,一种更通用的data.table方法将是:
library(data.table)
setDT(gdp)
gdp[between(date, 1995, 2014),
if (.N > 5L) adf.test(log(value), k = 0)$p.value,
by = iso3c]
setDT(enrg_cons)
enrg_cons[between(date, 1995, 2014),
if (.N > 5L) adf.test(diff(log(value)), k = 0),
by = iso3c]
数据和设置:
library(wbstats)
library(tseries)
enrg_cons = wb(country = "all", indicator = "EG.USE.PCAP.KG.OE")
gdp = wb(country = "all", indicator = "NY.GDP.PCAP.CD")