包含来自具有不同 x 日期尺度的 2 个 dfs 的变量的单图

Question

我有一个看起来像这样的 df （我只复制了数据集的一部分，其中包含从 1996 年 10 月 23 日到 2022 年 12 月 14 日的日期变量 %Y%m%d，其中缺少一些日子）。

PSA <- structure(list(date = structure(c(15657, 15665, 15671, 15672, 
15678, 15679, 15685, 15686, 15693, 15714, 15721, 15728, 15735, 
15742, 15749, 15763, 15770, 15777, 15779, 15780, 15784, 15785, 
15789, 15790, 15791, 15797, 15798, 15804, 15805, 15810, 15811, 
15812, 15819, 15819, 15824, 15825, 15831, 15832, 15832, 15833, 
15839, 15839, 15840, 15840, 15841, 15846, 15846, 15847, 15848, 
15853, 15854, 15855, 15859, 15860, 15860, 15861, 15861, 15862, 
15867, 15867, 15868, 15868, 15869, 15874, 15874, 15875, 15875, 
15876, 15880, 15881, 15881, 15882, 15882, 15883, 15888, 15888, 
15889, 15889, 15890, 15894, 15895, 15895, 15896, 15896, 15897, 
15901, 15902, 15902, 15903, 15903, 15904, 15905, 15909, 15909, 
15910, 15911, 15912, 15915, 15916, 15916, 15917), class = "Date"), 
    country = c("Italy", "United Kingdom", "Italy", "United Kingdom", 
    "Italy", "United Kingdom", "Italy", "United Kingdom", "United Kingdom", 
    "United Kingdom", "United Kingdom", "United Kingdom", "United Kingdom", 
    "United Kingdom", "United Kingdom", "United Kingdom", "United Kingdom", 
    "United Kingdom", "Italy", "Italy", "United Kingdom", "Italy", 
    "Italy", "Italy", "Italy", "Italy", "Italy", "Italy", "Italy", 
    "United Kingdom", "Italy", "Italy", "Italy", "United Kingdom", 
    "Italy", "Italy", "Italy", "Italy", "Italy", "Italy", "Italy", 
    "Italy", "Italy", "United Kingdom", "Italy", "Italy", "Italy", 
    "Italy", "Italy", "Italy", "Italy", "Italy", "Italy", "Italy", 
    "Italy", "Italy", "United Kingdom", "Italy", "Italy", "Italy", 
    "Italy", "United Kingdom", "Italy", "Italy", "Italy", "Italy", 
    "United Kingdom", "Italy", "Italy", "Italy", "Italy", "Italy", 
    "United Kingdom", "Italy", "Italy", "Italy", "Italy", "United Kingdom", 
    "Italy", "Italy", "Italy", "Italy", "Italy", "United Kingdom", 
    "Italy", "Italy", "Italy", "Italy", "Italy", "United Kingdom", 
    "Italy", "Italy", "Italy", "Italy", "Italy", "Italy", "Italy", 
    "Italy", "Italy", "Italy", "Italy"), propIT_econ = c(20, 
    0, 12.5, 0, 12.5, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 5.26315789473684, 0, 0, 28.5714285714286, 0, 5.55555555555556, 
    23.5294117647059, 3.50877192982456, 21.0526315789474, 2.72727272727273, 
    0, 2.85714285714286, 0, 0.840336134453782, 0, 9.09090909090909, 
    14.5454545454545, 30.2325581395349, 13.5593220338983, 13.5593220338983, 
    2.85714285714286, 0, 0, 0, 0, 4.54545454545455, 5.61797752808989, 
    5.61797752808989, 0, 0, 0, 2.1505376344086, 4.8780487804878, 
    30, 3.2520325203252, 3.2520325203252, 3.77358490566038, 0, 
    3.06122448979592, 0, 2.01342281879195, 0, 0, 6.66666666666667, 
    4.65116279069767, 0, 0.975609756097561, 0, 1.53846153846154, 
    0, 11.6279069767442, 11.6279069767442, 5.14705882352941, 
    0, 1.61290322580645, 4.1958041958042, 4.1958041958042, 0.271739130434783, 
    0, 1.81818181818182, 7.2463768115942, 7.14285714285714, 0, 
    0, 0, 0, 13.5135135135135, 10.8108108108108, 10.8108108108108, 
    2.21606648199446, 0, 3.92156862745098, 8.57142857142857, 
    2.84090909090909, 2.84090909090909, 3.68421052631579, 9.61538461538462, 
    0, 1.44927536231884, 0.235849056603774, 0, 1.44230769230769
    ), propUK_econ = c(0, 0, 0, 1.51515151515152, 0, 0, 0, 0, 
    0, 0, 0, 0, 1.38888888888889, 1.53846153846154, 1.53846153846154, 
    2.85714285714286, 0, 0, 0, 0, 4.22535211267606, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0.840336134453782, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0)), row.names = c(NA, -101L), class = c("tbl_df", 
"tbl", "data.frame"))

然后我将其转换为长格式：

PSAlong <- PSA %>% gather(variable, value, -date, -country)

我还有另一个 df，从 2004 年开始到 2022 年，有一个日期变量，而不是 %Y%qrt。

MIP_nat <- structure(list(date = c("2004.3", "2005.1", "2005.3", "2006.1", 
"2006.3", "2007.1"), season = c("autumn", "spring", "autumn", 
"spring", "autumn", "spring"), country = c("Italy", "Italy", 
"Italy", "Italy", "Italy", "Italy"), Crime = c(23, 23, 25, 22, 
25, 28), Public_transport = c(4, 5, 5, 0, 0, 0), Economic_situation = c(31, 
37, 31, 33, 33, 28), Rising_prices_inflation = c(29, 32, 31, 
28, 25, 26), Taxation = c(16, 14, 12, 15, 15, 18), Un_employment = c(31, 
36, 31, 36, 30, 28), Terrorism = c(17, 7, 11, 7, 15, 9), Country_external_influence = c(0, 
0, 0, 0, 0, 0), Government_debt = c(0, 0, 0, 0, 0, 0), Defence_Foreign_affairs = c(1, 
1, 1, 1, 2, 1), Cyprus_issue = c(0, 0, 0, 0, 0, 0), Housing = c(1, 
1, 1, 1, 3, 2), Immigration = c(17, 15, 15, 14, 20, 15), Health_social_security = c(5, 
6, 6, 5, 6, 5), Education = c(3, 2, 4, 2, 1, 2), Pensions = c(9, 
7, 10, 6, 9, 11), Environment_climate_energy = c(1, 2, 3, 2, 
6, 9), International_situation = c(0, 0, 0, 0, 0, 0), Other = c(1, 
1, 0, 0, 1, 1), None = c(0, 0, 0, 0, 0, 0), DN = c(0, 0, 1, 4, 
0, 1)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))

我尝试使用此代码将日期变量转换为日期，但返回 NA

MIP_nat$date <- as.yearqtr(MIP_nat$date)

我想要一个将第一个 df (PSAlong) 中的变量 "propIT_econ" 和 "propUK_econ" 的值与第二个 df (MIP_nat) 中的变量 "Economic_situation" 的值组合起来的图，或者具有相同的值具有不同比例的两个 x_轴的比例。我很挣扎，因为日期变量有两种不同的格式，这使得我只能拥有 df 的变量，而不是两者。例如，这工作正常：我只用这段代码运行第一个图，它有效

PSAlong %>% filter(variable == "propIT_econ" | variable == "propUK_econ") %>% ggplot(aes(x = as.Date(date), y = value, color = variable)) + geom_step(linewidth = 0.7, alpha = 0.8) + scale_x_date(date_breaks = "1 year", date_labels = "%Y")

我尝试过，但它没有产生我想要的结果：

ggplot() + geom_step(data = PSAlong %>% filter(variable == "propIT_econ" | variable == "propUK_econ), aes(x = as.numeric(date), y = value, color = country)) + geom_line(data = MIP_nat, aes(x = as.numeric(date), y = "Economic_situation", linetype = country)) + scale_x_continuous(limits = c(1996,2023), breaks = seq(1997,2023, by = 1))

谢谢您的帮助！

Answer 1

MIP_nat

中创建一个额外的列，您可以从其

date

列构建，将

year.1

替换为 year/02/15，将 year.2 替换为 year/05/15，year。 3 与 year/08/15 和 year/11/15（或您想要季度引用的一年中的任何日期）。使用

str_extract

包中的

str_replace

和

str_extract_all

（可能是

str_replace_all

和

stringr

）来实现此目的。不过，它涉及摆弄

正则表达式

（呃！）（在RegExr边做边学）。

包含来自具有不同 x 日期尺度的 2 个 dfs 的变量的单图

问题描述投票：0回答：1

1个回答

最新问题

包含来自具有不同 x 日期尺度的 2 个 dfs 的变量的单图

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1