如下所示的虚拟数据。我尝试修改循环回归代码以考虑列名称中的空格,但它仍然给出错误。此外,我也不知道如何调整
.x
来解释自变量列名称中的空格。
该代码的目的是一次将第一列与随后的每一列进行回归。即 Canada Price ~ Global Price
,然后 Canada Price ~ Canada Bond Price
等等。
lm.test <- map_dfr(
set_names(names(df)),
~ tidy(lm(as.formula(paste("`",colnames(df[1]),"`","~", .x)),
data = df)),
.id = "predictor")
加拿大价格 | 全球价格 | 加拿大2年期债券价格 | 美国 - 加拿大通胀 | 油价 |
---|---|---|---|---|
-0.030661468 | -0.000207995 | 0.084 | -0.075 | 0.002116 |
-0.034269304 | -0.140212141 | 0.363 | -1.062 | -0.06122 |
0.02371464 | 0.019987 | -0.781 | 0.273 | 0.209895 |
-0.007971484 | 0.090597341 | -0.221 | 0.068 | -0.10657 |
0.005342716 | 0.060627149 | 0.151 | 0.234 | -0.22191 |
-0.002210376 | 0.046010712 | -0.106 | 0.13 | 0.073975 |
0.018338299 | 0.019799534 | 0.036 | 0.005 | 0.058091 |
0.000662471 | -0.025356047 | 0.523 | 0.045 | -0.03765 |
0.002124411 | 0.045979342 | -0.433 | 0.282 | 0.365933 |
0.034191555 | 0.043211347 | -0.08 | 0.147 | 0.113365 |
-0.011470069 | -0.033773176 | 0.396 | -0.037 | -0.09753 |
0.007039847 | 0.050327089 | -0.052 | 0.151 | 0.145487 |
-0.028872959 | -0.004501371 | 0.324 | -0.214 | 0.064282 |
0.009244373 | -0.00092413 | 0.166 | -0.066 | 0.076961 |
0.017448201 | -0.010638258 | -0.179 | 0.053 | 0.108548 |
-0.002379819 | 0.050111674 | 0.352 | -0.161 | -0.11261 |
-0.002103406 | 0.03094893 | 0.121 | 0.106 | 0.130575 |
0.019154969 | 0.08307375 | 0.051 | 0.149 | 0.041074 |
-0.00124318 | -0.054526954 | 0.345 | 0.032 | 0.079687 |
-0.001241636 | 0.003319001 | -0.289 | 0.219 | 0.100941 |
数据采用
dput
格式。
df1 <-
structure(list(
`Canada Price` = c(-0.030661468, -0.034269304, 0.02371464, -0.007971484,
0.005342716, -0.002210376, 0.018338299, 0.000662471, 0.002124411,
0.034191555, -0.011470069, 0.007039847, -0.028872959, 0.009244373,
0.017448201, -0.002379819, -0.002103406, 0.019154969, -0.00124318, -0.001241636),
`Global Price` = c(-0.000207995, -0.140212141, 0.019987, 0.090597341,
0.060627149, 0.046010712, 0.019799534, -0.025356047, 0.045979342,
0.043211347, -0.033773176, 0.050327089, -0.004501371, -0.00092413,
-0.010638258, 0.050111674, 0.03094893, 0.08307375, -0.054526954, 0.003319001),
`Canada 2Y Bond Price` = c(0.084, 0.363, -0.781, -0.221, 0.151, -0.106,
0.036, 0.523, -0.433, -0.08, 0.396, -0.052, 0.324, 0.166, -0.179, 0.352,
0.121, 0.051, 0.345, -0.289),
`US - Canada Inflation` = c(-0.075, -1.062, 0.273, 0.068,
0.234, 0.13, 0.005, 0.045, 0.282, 0.147, -0.037, 0.151, -0.214,
-0.066, 0.053, -0.161, 0.106, 0.149, 0.032, 0.219),
`Oil Price` = c(0.002116, -0.06122, 0.209895, -0.10657, -0.22191, 0.073975,
0.058091, -0.03765, 0.365933, 0.113365, -0.09753, 0.145487, 0.064282,
0.076961, 0.108548, -0.11261, 0.130575, 0.041074, 0.079687, 0.100941)),
class = "data.frame", row.names = c(NA, -20L))
这是对除第一列之外的所有其他列运行回归的解决方案。
该公式是用
reformulate
创建的。
suppressPackageStartupMessages({
library(magrittr)
library(purrr)
})
# create the formula with 'value' as regressor,
# it will be the column name after pivoting to long format
fmla <- reformulate("value", response = names(df1)[1L] |> as.name())
# run the regressions
lm_fit_list <- df1 %>%
tidyr::pivot_longer(-1L) %>%
split(.$name) %>%
map(\(df) lm(fmla, data = df))
# this gives some statistics
lm_smry_list <- lm_fit_list %>% map(summary)
lm_smry_list %>% map(coef)
#> $`Canada 2Y Bond Price`
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.001736073 0.003667346 0.4733866 0.64162781
#> value -0.025789455 0.011690588 -2.2060015 0.04061964
#>
#> $`Global Price`
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -0.001515084 0.003712875 -0.4080622 0.68804100
#> value 0.164831106 0.070018144 2.3541199 0.03012634
#>
#> $`Oil Price`
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -0.001424249 0.004112478 -0.3463238 0.7331147
#> value 0.046411939 0.030404220 1.5264966 0.1442663
#>
#> $`US - Canada Inflation`
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.0001923356 0.003176245 0.06055441 0.952381362
#> value 0.0393945114 0.011325922 3.47826091 0.002683133
创建于 2024-02-23,使用 reprex v2.0.2