GGPlot2:错误:美学必须是长度1或与数据(16)相同:x,y,组

问题描述 投票:0回答:2

这是我认为是简单折线图的代码

ggplot(data=top15andAllDatasummary.df, aes(x=years, y=calculations, group=1)) +
    geom_line() +
    geom_point()

我收到了这个错误:

错误:美学必须是长度1或与数据(16)相同:x,y,组

我在 r 的数据框中有数据。我的 X 轴将是几年,Y 轴将是我每年构建的一些计算(其中 16 个)。

编辑添加

structure(list(`2001` = c(349.315750645518, 217.47436370343, 
5.17963850977499, 126.661748432313, 57, 39), `2002` = c(703.26693877551, 
429.92, 9.32897959183673, 264.017959183673, 161, 108), `2003` = c(314.897774687065, 
193.792420027816, 4.08936022253129, 117.015994436718, 54, 37), 
    `2004` = c(305.988451086957, 190.680027173913, 3.87839673913043, 
    111.430027173913, 55, 38), `2005` = c(118.528015659408, 74.3175923660387, 
    1.50942011255199, 42.7010031808172, 10, 8), `2006` = c(120.531992244304, 
    73.8279205041202, 1.54362578768783, 45.1604459524964, 10, 
    8), `2007` = c(113.973899988451, 69.7619817530893, 1.44693382607691, 
    42.7649844092851, 10, 8), `2008` = c(110.676242590059, 67.3693570451436, 
    1.36285909712722, 41.9440264477884, 9, 7), `2009` = c(101.965558714192, 
    63.1446534003936, 1.22982724688388, 37.5910780669145, 9, 
    7), `2010` = c(93.9744360902256, 59.8894736842105, 1.14199785177229, 
    32.9429645542427, 9, 7), `2011` = c(91.8911316298046, 58.5660296328108, 
    1.15675327464033, 32.1683487223534, 9, 7), `2012` = c(91.2302181013592, 
    58.598356337583, 1.16773785691708, 31.4641239068591, 8, 6
    ), `2013` = c(87.1390443392165, 55.0509040034438, 1.10277658200603, 
    30.9853637537667, 8, 6), `2014` = c(85.7812132234942, 56.0456831068792, 
    1.09725045469134, 28.6382796619236, 8, 6), `2015` = c(88.331452900479, 
    58.526237360298, 1.22362959020756, 28.5815859499734, 8, 6
    )), .Names = c("2001", "2002", "2003", "2004", "2005", "2006", 
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", 
"2015"), row.names = c("AllDataMeanByYear", "AllDataMeanAggAssault", 
"AllDataMeanMurderManSlaughter", "AllDataMeanRobbery", "AllDataMedianByYear", 
"AllDataMedianAggAssault"), class = "data.frame")


All Code:

 ## Total
lwdata$total <- lwdata$murdermanslaughter + lwdata$Robbery +    lwdata$Aggravated_assault
## Data Calculations Top 15
top15 <- lwdata[lwdata$total >= lwdata$total[order(lwdata$Year, lwdata$total, decreasing=TRUE)][15] , ]
## Top 15 Means
Top15MeanByYear <- tapply(top15$total,top15$Year,mean)
Top15MeanAggAssault <- tapply(top15$Aggravated_assault,top15$Year,mean)
Top15MeanMurderManSlaughter <- tapply(top15$murdermanslaughter,top15$Year,mean)
Top15MeanRob <- tapply(top15$Robbery,top15$Year,mean)
## All Data Means
AllDataMeanByYear <- tapply(lwdata$total,lwdata$Year,mean)
AllDataMeanAggAssault <- tapply(lwdata$Aggravated_assault,lwdata$Year,mean)
AllDataMeanMurderManSlaughter <- tapply(lwdata$murdermanslaughter,lwdata$Year,mean)
AllDataMeanRobbery <- tapply(lwdata$Robbery,lwdata$Year,mean)
## Top 15 Medians
Top15MedianByYear <- tapply(top15$total,top15$Year,median)
Top15MedianAggAssault <- tapply(top15$Aggravated_assault,top15$Year,median)
Top15MedianMurderManSlaughter <- tapply(top15$murdermanslaughter,top15$Year,median)
Top15MedianRob <- tapply(top15$Robbery,top15$Year,median)
## All Data Medians
AllDataMedianByYear <- tapply(lwdata$total,lwdata$Year,median)
AllDataMedianAggAssault <- tapply(lwdata$Aggravated_assault,lwdata$Year,median)
AllDataMedianMurderManSlaughter <-  tapply(lwdata$murdermanslaughter,lwdata$Year,median)
AllDataMedianRobbery <- tapply(lwdata$Robbery,lwdata$Year,median)
## Rounding Data To Two Decimal Points
Top15MeanByYear <- round(Top15MeanByYear,digits=2)
Top15MeanAggAssault <- round(Top15MeanAggAssault,digits=2)
Top15MeanMurderManSlaughter <- round(Top15MeanMurderManSlaughter,digits=2)
Top15MeanRob <- round(Top15MeanRob,digits=2)
AllDataMeanByYear <- round(AllDataMeanByYear,digits=2)
AllDataMeanAggAssault <- round(AllDataMeanAggAssault,digits=2)
AllDataMeanAggAssault <- round(AllDataMeanAggAssault,digits=2)
AllDataMeanRobbery <- round(AllDataMeanRobbery,digits=2)
Top15MedianByYear <- round(Top15MedianByYear,digits=2)
Top15MedianAggAssault <- round(Top15MedianAggAssault,digits=2)
Top15MedianMurderManSlaughter <- round(Top15MedianMurderManSlaughter,digits=2)
Top15MedianRob <- round(Top15MedianRob,digits=2)
AllDataMedianByYear <- round(AllDataMedianByYear,digits=2)
AllDataMedianAggAssault <- round(AllDataMedianAggAssault,digits=2)
AllDataMedianMurderManSlaughter <-     round(AllDataMedianMurderManSlaughter,digits=2)
AllDataMedianRobbery <- round(AllDataMedianRobbery,digits=2)
## Summaries
AllDataSummary <- rbind(AllDataMeanByYear, AllDataMeanAggAssault, AllDataMeanMurderManSlaughter, AllDataMeanRobbery, AllDataMedianByYear, AllDataMedianAggAssault, AllDataMedianMurderManSlaughter, AllDataMedianRobbery)
Top15Summary <- rbind(Top15MeanByYear, Top15MeanAggAssault, Top15MeanMurderManSlaughter, Top15MeanRob,Top15MedianByYear,Top15MedianAggAssault,Top15MedianMurderManSlaughter,Top15MedianRob)
Top15andAllDatasummary <- rbind(AllDataSummary,Top15Summary)
## Class of New Items
class(AllDataSummary)
class(Top15Summary)
class(top15andAllDatasummary)
## Converting Matrices to Data Frames
AllDataSummary.df <- as.data.frame(AllDataSummary)
Top15Summary.df <- as.data.frame(Top15Summary)
Top15andAllDatasummary.df <- as.data.frame(Top15andAllDatasummary)
## Checking of New Classes
class(AllDataSummary.df)
class(Top15Summary.df)
class(Top15andAllDatasummary.df)
## Verifications for Names of New Components
colnames(Top15andAllDatasummary.df)
rownames(Top15andAllDatasummary.df)
## New Components
years <- colnames(Top15andAllDatasummary.df)
calculations <- colnames(Top15andAllDatasummary.df)
## Chicago
Chicago <- top15[which(top15$City=="Chicago"), ] 
## Basic Plots
plot(y=Chicago$total, x=Chicago$Year, type="l", xlab = "Year", ylab = "Total       Violent Crime (minus rape)", main="Chicago-Specific Data", col="blue")
## Data Types for Chicago
str(Chicago)

完整 >100K 数据集的链接位于此处

r ggplot2 linegraph
2个回答
7
投票

您的数据框(我们称之为

df
)每年有一列,每个计算变量都有行名。这是“宽”数据,其中相同的数据类型存储在多个列中。 ggplot 用于处理“长”数据,其中每一列都包含数据的独特方面(即变量、年份和数据值的单独列)。

Hadley Wickham(他还编写了

ggplot)的 tidyverse 软件包库可以轻松地将数据从宽数据转换为长数据,然后再转换回来。从 tidyr 1.0 开始,这是通过 pivot_wider

pivot_longer
 函数(以前分别是 
spread
gather
)完成的。我在下面展示了这两种方法。

library(tidyverse) # current pivot_longer() implementation: df.new <- mutate(df, variable = rownames(df)) %>% pivot_longer(-variable, names_to = 'year', values_to = 'value') # deprecated gather() function df.new <- mutate(df, variable = rownames(df)) %>% gather(year, value, -variable) variable year value 1 AllDataMeanByYear 2001 349.315751 2 AllDataMeanAggAssault 2001 217.474364 3 AllDataMeanMurderManSlaughter 2001 5.179639 4 AllDataMeanRobbery 2001 126.661748 5 AllDataMedianByYear 2001 57.000000 6 AllDataMedianAggAssault 2001 39.000000 7 AllDataMeanByYear 2002 703.266939 8 AllDataMeanAggAssault 2002 429.920000 9 AllDataMeanMurderManSlaughter 2002 9.328980 10 AllDataMeanRobbery 2002 264.017959 11 AllDataMedianByYear 2002 161.000000 12 AllDataMedianAggAssault 2002 108.000000 13 AllDataMeanByYear 2003 314.897775 14 AllDataMeanAggAssault 2003 193.792420 15 AllDataMeanMurderManSlaughter 2003 4.089360 16 AllDataMeanRobbery 2003 117.015994 17 AllDataMedianByYear 2003 54.000000 18 AllDataMedianAggAssault 2003 37.000000 19 AllDataMeanByYear 2004 305.988451 20 AllDataMeanAggAssault 2004 190.680027 ... and 70 more rows
这个长数据可以发送到

ggplot。请注意,您最初的尝试使用了一个名为“years”的变量,该变量在数据框中不存在。 R(和ggplot)无法知道您的列名称(2001:2015)以某种方式神奇地代表了年份。

plot.years <- ggplot(data = df.new, aes(x = year, y = value, color = variable, group = variable)) + geom_line() print(plot.years)


3
投票
根据您的数据,我会这样做:

library(tidyr) top15andAllDatasummary.df$variable = rownames(top15andAllDatasummary.df) df.long = gather(data = top15andAllDatasummary.df, key = years, value = calculations, -variable)

gather

 调用的要点是将数据重组为以下形式:

head(df.long) # variable years calculations # 1 AllDataMeanByYear 2001 349.315751 # 2 AllDataMeanAggAssault 2001 217.474364 # 3 AllDataMeanMurderManSlaughter 2001 5.179639 # 4 AllDataMeanRobbery 2001 126.661748 # 5 AllDataMedianByYear 2001 57.000000 # 6 AllDataMedianAggAssault 2001 39.000000

完成后,我们可以继续绘图:

ggplot(data = df.long, aes(x = years, y = calculations, group=variable, color=variable)) + geom_line() + geom_point()

这是你想要的结果吗?

© www.soinside.com 2019 - 2024. All rights reserved.