ggplot 直方图中的第一个 bin 的宽度不正确

问题描述 投票:0回答:1

在我创建的直方图中,我希望第一个 bin 包含在第一个月内(即 1.0-2.0 之间的时间)发生该事件的所有人员,这对应于 57/141 (40.4%)。然而,在我生成的直方图中,第一个条代表 48/141 (34.0%)。

Histogram

我发现我的直方图的作用是,在第一个条形图中(标记为第 1 个月),它实际上显示了事件时间在 1.0-1.5 之间的 47 个人。然后继续进行第 2 个月:1.5-2.5,第 3 个月:2.5-3.5,等等

我尝试过调整边界、中断和 bin 宽度,但这些都没有成功。如何将直方图的 bin 设为 1-2、2-3、3-4...,而不是 1-1.5、1.5-2.5、2.5-3.5...?

代码:

library(ggplot2)
library(scales)

#Define data 
data <- data.frame (id= 1:141,
                  time = c(1.328767,33.909589,13.230137,32.331507,6.161644,20.167123,15.794521, 35.750685,3.958904,2.347945,1.756164,3.794521,46.961644,5.668493,14.775342,5.964384,15.268493,17.241096, 4.452055,17.372603,4.123288,28.945205,1.394521,9.515068,1.690411,5.734247,1.065753,5.405479,1.131507,8.39720,1.131507,1.065753,2.084932,13.460274,1.164384,6.786301,39.301370,1.197260,1.263014,1.098630,2.117808,1.230137,1.657534,1.065753,1.131507,13.986301,1.690411,1.065753,1.197260,2.249315,10.665753,1.690411,5.405479,4.452055,1.098630,1.065753,1.197260,1.065753,1.164384,1.493151,1.789041,1.394521,1.263014,17.701370,6.720548,24.605479,1.328767,13.197260,1.065753,4.189041,2.446575,30.852055,19.016438,28.813699,16.715068,29.438356,1.065753,1.230137,7.871233,4.386301,22.698630,1.295890,14.643836,1.131507,13.000000,3.991781,3.400000,17.964384,1.065753,2.512329,1.065753,3.827397,5.800000,21.745205,17.339726,1.164384,11.619178,9.515068,2.282192,4.189041,1.098630,1.065753,19.509589,25.591781,1.230137,28.747945,8.726027,1.065753,11.487671,30.720548,8.232877,19.871233,3.728767,1.986301,10.534247,8.956164,11.356164,11.520548,1.361644,4.484932,15.531507,1.493151,27.432877,10.468493,6.589041,1.065753,1.263014,1.065753,1.098630,35.421918,1.295890,9.810959,1.098630,21.547945,1.657534, 1.394521,1.065753,11.652055,1.164384,1.887671,1.098630))

#Create histogram 
ggplot(data = data, aes(x = time)) +  
  #Bin width is one month
  geom_histogram(aes(y = after_stat(count)/sum(after_stat(count))),binwidth=1,fill="#ED0000B2",color="#FFFFFF") +
  #Set y-axis to represent percent
  scale_y_continuous(breaks=seq(from=0,to=0.35,by=0.05),
                     expand=c(0,0),
                     labels=scales::percent,
                     limits=c(0, 0.35)) +
  #Set axis ticks to 3-month intervals
  scale_x_continuous(breaks=seq(from=0,to=max(data$time),by=3),
                     expand=c(0,0)) +
  labs(title = 'Time to Event',
       caption=paste0("N=",n_distinct(data$id)),
       x = 'Months from Initiation', 
       y = '(%)') +
  #Update font to times new roman
  theme(text=element_text(size=12,  family="Times New Roman"),
        plot.title = element_text(hjust = 0.5,face="bold"),
        plot.subtitle = element_text(hjust=0.5),
        panel.background = element_rect(fill="#FFFFFF"),
        axis.line = element_line(size = 0.2, colour = "#000000", linetype=1))

r ggplot2 histogram
1个回答
0
投票

我认为尝试计算百分比并手动设置中断会导致错误。通过让 R 完成工作,我认为这就是您正在寻找的:

ggplot(data = data, aes(x = time, after_stat(density))) +  
   #Bin width is one month
   geom_histogram(binwidth=1,fill="#ED0000B2",color="#FFFFFF") +
   #Set y-axis to represent percent
   scale_y_continuous(breaks=seq(from=0,to=0.50,by=0.05),
                      labels=scales::percent) +
   #Set axis ticks to 3-month intervals
   scale_x_continuous(breaks=seq(from=0,to=max(data$time),by=3),
                      expand=c(0,0)) +
   labs(title = 'Time to Event',
        caption=paste0("N=",n_distinct(data$id)),
        x = 'Months from Initiation', 
        y = '(%)') +
   #Update font to times new roman
   theme(text=element_text(size=12,  family="Times New Roman"),
         plot.title = element_text(hjust = 0.5,face="bold"),
         plot.subtitle = element_text(hjust=0.5),
         panel.background = element_rect(fill="#FFFFFF"),
         axis.line = element_line(size = 0.2, colour = "#000000", linetype=1))

© www.soinside.com 2019 - 2024. All rights reserved.