列名有特殊字符时的循环回归

问题描述 投票:0回答:1

如下所示的虚拟数据。我尝试修改循环回归代码以考虑列名称中的空格,但它仍然给出错误。此外,我也不知道如何调整

.x
来解释自变量列名称中的空格。 该代码的目的是一次将第一列与随后的每一列进行回归。即
Canada Price ~ Global Price
,然后
Canada Price ~ Canada Bond Price
等等。

lm.test <- map_dfr(
  set_names(names(df)),
  ~ tidy(lm(as.formula(paste("`",colnames(df[1]),"`","~", .x)),
            data = df)),
  .id = "predictor") 
加拿大价格 全球价格 加拿大2年期债券价格 美国 - 加拿大通胀 油价
-0.030661468 -0.000207995 0.084 -0.075 0.002116
-0.034269304 -0.140212141 0.363 -1.062 -0.06122
0.02371464 0.019987 -0.781 0.273 0.209895
-0.007971484 0.090597341 -0.221 0.068 -0.10657
0.005342716 0.060627149 0.151 0.234 -0.22191
-0.002210376 0.046010712 -0.106 0.13 0.073975
0.018338299 0.019799534 0.036 0.005 0.058091
0.000662471 -0.025356047 0.523 0.045 -0.03765
0.002124411 0.045979342 -0.433 0.282 0.365933
0.034191555 0.043211347 -0.08 0.147 0.113365
-0.011470069 -0.033773176 0.396 -0.037 -0.09753
0.007039847 0.050327089 -0.052 0.151 0.145487
-0.028872959 -0.004501371 0.324 -0.214 0.064282
0.009244373 -0.00092413 0.166 -0.066 0.076961
0.017448201 -0.010638258 -0.179 0.053 0.108548
-0.002379819 0.050111674 0.352 -0.161 -0.11261
-0.002103406 0.03094893 0.121 0.106 0.130575
0.019154969 0.08307375 0.051 0.149 0.041074
-0.00124318 -0.054526954 0.345 0.032 0.079687
-0.001241636 0.003319001 -0.289 0.219 0.100941

编辑

数据采用

dput
格式。

df1 <-
  structure(list(
    `Canada Price` = c(-0.030661468, -0.034269304, 0.02371464, -0.007971484, 
                       0.005342716, -0.002210376, 0.018338299, 0.000662471, 0.002124411, 
                       0.034191555, -0.011470069, 0.007039847, -0.028872959, 0.009244373, 
                       0.017448201, -0.002379819, -0.002103406, 0.019154969, -0.00124318, -0.001241636), 
    `Global Price` = c(-0.000207995, -0.140212141, 0.019987, 0.090597341, 
                       0.060627149, 0.046010712, 0.019799534, -0.025356047, 0.045979342,
                       0.043211347, -0.033773176, 0.050327089, -0.004501371, -0.00092413, 
                       -0.010638258, 0.050111674, 0.03094893, 0.08307375, -0.054526954, 0.003319001), 
    `Canada 2Y Bond Price` = c(0.084, 0.363, -0.781, -0.221, 0.151, -0.106, 
                               0.036, 0.523, -0.433, -0.08, 0.396, -0.052, 0.324, 0.166, -0.179, 0.352,
                               0.121, 0.051, 0.345, -0.289), 
    `US - Canada Inflation` = c(-0.075, -1.062, 0.273, 0.068, 
                                0.234, 0.13, 0.005, 0.045, 0.282, 0.147, -0.037, 0.151, -0.214, 
                                -0.066, 0.053, -0.161, 0.106, 0.149, 0.032, 0.219), 
    `Oil Price` = c(0.002116, -0.06122, 0.209895, -0.10657, -0.22191, 0.073975, 
                    0.058091, -0.03765, 0.365933, 0.113365, -0.09753, 0.145487, 0.064282, 
                    0.076961, 0.108548, -0.11261, 0.130575, 0.041074, 0.079687, 0.100941)), 
    class = "data.frame", row.names = c(NA, -20L))
r loops linear-regression lm r-colnames
1个回答
0
投票

这是对除第一列之外的所有其他列运行回归的解决方案。
该公式是用

reformulate
创建的。

suppressPackageStartupMessages({
  library(magrittr)
  library(purrr)
})

# create the formula with 'value' as regressor,
# it will be the column name after pivoting to long format
fmla <- reformulate("value", response = names(df1)[1L] |> as.name())
# run the regressions
lm_fit_list <- df1 %>%
  tidyr::pivot_longer(-1L) %>%
  split(.$name) %>%
  map(\(df) lm(fmla, data = df))
  
# this gives some statistics
lm_smry_list <- lm_fit_list %>% map(summary)
lm_smry_list %>% map(coef)
#> $`Canada 2Y Bond Price`
#>                 Estimate  Std. Error    t value   Pr(>|t|)
#> (Intercept)  0.001736073 0.003667346  0.4733866 0.64162781
#> value       -0.025789455 0.011690588 -2.2060015 0.04061964
#> 
#> $`Global Price`
#>                 Estimate  Std. Error    t value   Pr(>|t|)
#> (Intercept) -0.001515084 0.003712875 -0.4080622 0.68804100
#> value        0.164831106 0.070018144  2.3541199 0.03012634
#> 
#> $`Oil Price`
#>                 Estimate  Std. Error    t value  Pr(>|t|)
#> (Intercept) -0.001424249 0.004112478 -0.3463238 0.7331147
#> value        0.046411939 0.030404220  1.5264966 0.1442663
#> 
#> $`US - Canada Inflation`
#>                 Estimate  Std. Error    t value    Pr(>|t|)
#> (Intercept) 0.0001923356 0.003176245 0.06055441 0.952381362
#> value       0.0393945114 0.011325922 3.47826091 0.002683133

创建于 2024-02-23,使用 reprex v2.0.2

© www.soinside.com 2019 - 2024. All rights reserved.