在R中拟合零膨胀泊松的问题。

问题描述 投票:0回答:1

我有755行数据和约87%的零。我很难对这些数据进行零膨胀泊松或负二项式(或任何)回归拟合。我已经尝试了4种不同的方法,但都无法成功。 我甚至不确定这些是否是我应该使用的回归。任何帮助都将是非常感激的。我也不是很擅长编码,我相信这一点会很明显。

我知道这很长,但这是我的实际数据......

c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0.134916351861846, 
0, 0.149907057624273, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 
0.134916351861846, 0.134916351861846, 0, 0, 0.269832703723691, 
0, 0.269832703723691, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 0, 0, 0, 
0, 0, 0, 0.367953686895943, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.337290879654614, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0.578212936550767, 0, 0, 0.404749055585537, 0, 0, 0, 0.269832703723691, 
0.269832703723691, 0, 0, 0.299814115248546, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192737645516922, 
0.192737645516922, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 
0, 0, 0, 0, 0, 0.404749055585537, 0.134916351861846, 0.134916351861846, 
0.337290879654614, 0, 0, 0, 0, 0.674581759309228, 0, 0.134916351861846, 
0, 0.299814115248546, 0.168645439827307, 0.449721172872819, 0, 
0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0.134916351861846, 
0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0.134916351861846, 
0, 0, 0.149907057624273, 0, 0, 0, 0, 0.269832703723691, 0, 0, 
0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0.449721172872819, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 
0, 0, 0.134916351861846, 0.539665407447383, 0.134916351861846, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0.134916351861846, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0.134916351861846, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 
0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0.404749055585537, 
0, 0, 0.674581759309228, 0.269832703723691, 0, 0, 0, 0, 0, 0, 
0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0.269832703723691, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 
0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0.269832703723691, 
0.269832703723691, 0.134916351861846, 0, 0.404749055585537, 0.809498111171074, 
0, 0.134916351861846, 0.134916351861846, 1.07933081489477, 0.134916351861846, 
0, 0.269832703723691, 0, 0.94441446303292, 0.245302457930628, 
0, 0, 0, 0, 0, 0.245302457930628, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)

这是我今天尝试的4种方法。

> hog.cpue <- hogA$hog.cpue
> fitg <- fitdist(hog.cpue, "ZIP")
Error in computing default starting values.
Error in manageparam(start.arg = start, fix.arg = fix.arg, obs = data,  : 
  Error in start.arg.default(obs, distname) : 
  Unknown starting values for distribution ZIP.
> fit_zip2 <- fitdist(hogA$hog.cpue, 'nbinom', start = list(mu = 0.293, size = 0.1)) 
<simpleError in optim(par = vstart, fn = fnobj, fix.arg = fix.arg, obs = data,     gr = gradient, ddistnam = ddistname, hessian = TRUE, method = meth,     lower = lower, upper = upper, ...): function cannot be evaluated at initial parameters>
Error in fitdist(hogA$hog.cpue, "nbinom", start = list(mu = 0.293, size = 0.1)) : 
  the function mle failed to estimate the parameters, 
                with the error code 100
> fitzip <- fitdist(hogA$hog.cpue, "ZIP", start = list(mu = 0.293, sigma = 0.1), discrete = TRUE,
+                   optim.method = "L-BFGS-B", lower = c(0, 0), upper = c(Inf, 1))
<simpleError in dZIP(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0.134916351861846, 0, 0.149907057624273, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0.134916351861846, 0.134916351861846, 0, 0, 0.269832703723691, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 0, 0, 0, 0, 0, 0, 0.367953686895943, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.337290879654614, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.578212936550767, 0, 0, 0.404749055585537, 0, 0, 0, 0.269832703723691, 0.269832703723691, 0, 0, 0.299814115248546, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192737645516922, 0.192737645516922, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0.404749055585537, 0.134916351861846, 0.134916351861846, 0.337290879654614, 0, 0, 0, 0, 0.674581759309228, 0, 0.134916351861846, 0, 0.299814115248546, 0.168645439827307, 0.449721172872819, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0.134916351861846, 0, 0, 0.149907057624273, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.449721172872819, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 0, 0, 0.134916351861846, 0.539665407447383, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0.404749055585537, 0, 0, 0.674581759309228, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0.269832703723691, 0.269832703723691, 0.134916351861846, 0, 0.404749055585537, 0.809498111171074, 0, 0.134916351861846, 0.134916351861846, 1.07933081489477, 0.134916351861846, 0, 0.269832703723691, 0, 0.94441446303292, 0.245302457930628, 0, 0, 0, 0, 0, 0.245302457930628, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),     mu = 0, sigma = 1, log = TRUE): mu must be greater than 0 
 >
Error in fitdist(hogA$hog.cpue, "ZIP", start = list(mu = 0.293, sigma = 0.1),  : 
  the function mle failed to estimate the parameters, 
                with the error code 100
In addition: Warning messages:
1: In fitdist(hogA$hog.cpue, "ZIP", start = list(mu = 0.293, sigma = 0.1),  :
  The dZIP function should return a zero-length vector when input has length zero
2: In fitdist(hogA$hog.cpue, "ZIP", start = list(mu = 0.293, sigma = 0.1),  :
  The pZIP function should return a zero-length vector when input has length zero
> fpoisZI <- fitdist(hogA$hog.cpue, "ZIP", start=list(sigma=sum(hogA$hog.cpue == 0)/length(hogA$hog.cpue), mu=mean(hogA$hog.cpue)))
<simpleError in dZIP(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0.134916351861846, 0, 0.149907057624273, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0.134916351861846, 0.134916351861846, 0, 0, 0.269832703723691, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 0, 0, 0, 0, 0, 0, 0.367953686895943, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.337290879654614, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.578212936550767, 0, 0, 0.404749055585537, 0, 0, 0, 0.269832703723691, 0.269832703723691, 0, 0, 0.299814115248546, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192737645516922, 0.192737645516922, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0.404749055585537, 0.134916351861846, 0.134916351861846, 0.337290879654614, 0, 0, 0, 0, 0.674581759309228, 0, 0.134916351861846, 0, 0.299814115248546, 0.168645439827307, 0.449721172872819, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0.134916351861846, 0, 0, 0.149907057624273, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.449721172872819, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 0, 0, 0.134916351861846, 0.539665407447383, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0.404749055585537, 0, 0, 0.674581759309228, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0.269832703723691, 0.269832703723691, 0.134916351861846, 0, 0.404749055585537, 0.809498111171074, 0, 0.134916351861846, 0.134916351861846, 1.07933081489477, 0.134916351861846, 0, 0.269832703723691, 0, 0.94441446303292, 0.245302457930628, 0, 0, 0, 0, 0, 0.245302457930628, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),     sigma = 0.426547699594046, mu = -0.020557328452897, log = TRUE): mu must be greater than 0 
 >
Error in fitdist(hogA$hog.cpue, "ZIP", start = list(sigma = sum(hogA$hog.cpue ==  : 
  the function mle failed to estimate the parameters, 
                with the error code 100
In addition: Warning messages:
1: In fitdist(hogA$hog.cpue, "ZIP", start = list(sigma = sum(hogA$hog.cpue ==  :
  The dZIP function should return a zero-length vector when input has length zero
2: In fitdist(hogA$hog.cpue, "ZIP", start = list(sigma = sum(hogA$hog.cpue ==  :
  The pZIP function should return a zero-length vector when input has length zero
r zip poisson
1个回答
2
投票

对于一个泊松分布,零膨胀或不膨胀,值必须是正的和离散的,即整数,没有小数。我不知道你的值是否是有意的。

一旦你整理好了数值,你可以考虑使用基于glm的方法。

library(pscl)

x = rpois(1000,20)
x[sample(length(x),200)] = 0
# fits intercept only model
fit = zeroinfl(x ~ 1,dist="poisson")

estimated_mean = exp(coefficients(fit)["count_(Intercept)"])
count_(Intercept) 
         20.14875

estimated_missing = coefficients(fit)["zero_(Intercept)"]
# it's a logit you need to convert to prob
estimated_missing = exp(estimated_missing)/(1+exp(estimated_missing))
zero_(Intercept) 
             0.2 

如果你有小数,因为它是一个速率,你需要的是一个... ... 偏移所以假设基本平均率为0.5,缺失率为0.2。

n = rep(1000 * 1:5 , each=100)
x = rpois(length(n),0.5*n)
x[sample(length(x),0.2*length(n))] = 0
# fits intercept only model
fit = zeroinfl(x ~ 1,dist="poisson",offset=log(n))

然后重复上述步骤,得到0. 5的系数和0. 2的失踪率。

© www.soinside.com 2019 - 2024. All rights reserved.