数据排列后的零膨胀 negbin 回归警告

问题描述 投票:0回答:1

我正在对我的数据应用 Zeroinfl negbin 回归。具体来说,我的因变量是计数变量(中心性度量),而我的自变量/控件都是二元且连续的。

我已经在 R 中对数据进行了回归,但现在为了计算 p 值(因为 Y 是中心性度量),我将 Y 随机化 10,000 次,并重新计算 10,000 个 Zeroinfl negbin 以生成系数的分布。然而,在该过程结束时,会弹出一些警告,包括:

  • glm.fit:数值估计速率等于 0 已发生
  • glm.fit:算法不收敛
  • 在 pnbinom(0, mu = mu, size = theta, log.p = TRUE) 中: bgrat(a=21, b=2.04429e-11, x=1, y=1.1354e-315): z=2.32756e-314, b*z == 0 下溢,因此 pbeta() 不准确
  • 在 pnbinom(0, mu = mu, size = theta, log.p = TRUE) 中: pnbinom_mu() -> bratio() 给出错误代码 11。

我的数据集由 177 个观察值组成。

这是我正在使用的代码:

library(pscl)

dput(variables_meetQ1)
structure(list(name = c("N0003", "N0005", "N0007", "N0010", "N0011", 
"N0013", "N0014", "N0015", "N0020", "N0021", "N0024", "N0025", 
"N0033", "N0034", "N0035", "N0039", "N0041", "N0042", "N0043", 
"N0044", "N0045", "N0046", "N0047", "N0048", "N0051", "N0053", 
"N0055", "N0057", "N0058", "N0060", "N0063", "N0064", "N0067", 
"N0069", "N0071", "N0074", "N0075", "N0077", "N0078", "N0079", 
"N0080", "N0082", "N0083", "N0089", "N0095", "N0096", "N0102", 
"N0107", "N0109", "N0110", "N0113", "N0114", "N0115", "N0116", 
"N0119", "N0121", "N0123", "N0125", "N0127", "N0128", "N0133", 
"N0135", "N0136", "N0139", "N0143", "N0146", "N0156", "N0157", 
"N0158", "N0160", "N0161", "N0162", "N0163", "N0177", "N0183", 
"N0186", "N0187", "N0204", "N0230", "N0251", "N0259", "N0263", 
"N0264", "N0284", "N0287", "N0300", "N0327", "N0341", "N0346", 
"N0373", "N0393", "N0399", "N0431", "N0439", "N0441", "N0443", 
"N0447", "N0451", "N0469", "N0480", "N0481", "N0504", "N0537", 
"N0542", "N0552", "N0553", "N0570", "N0571", "N0572", "N0581", 
"N0582", "N0583", "N0609", "N0610", "N0626", "N0629", "N0646", 
"N0663", "N0664", "N0675", "N0767", "N0782", "N0783", "N0792", 
"N0801", "N0811", "N0829", "N0831", "N0833", "N0856", "N0879", 
"N0890", "N0893", "N0900", "N0905", "N0934", "N0939", "N0947", 
"N0955", "N1112", "N1119", "N1123", "N1149", "N1181", "N1190", 
"N1196", "N1366", "N1367", "N0832", "N1325", "N0874", "N1145", 
"N0466", "N0239", "N0768", "N0052", "N0409", "N0619", "N0679", 
"N0295", "N0478", "N0023", "N0076", "N0200", "N0475", "N0242", 
"N0203", "N0270", "N0292", "N1182", "N0054", "N0659", "N0026", 
"N0132", "N0134", "N0322", "N0470"), Leader = c(0, 0, 0, 0, 0, 
0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 
0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0), Age_centred = c(-22.39415205, 7.605847953, 12.60584795, 
9.605847953, -3.394152047, -0.394152047, 12.60584795, 1.605847953, 
0.605847953, -25.39415205, -22.39415205, 12.60584795, 7.605847953, 
19.60584795, 14.60584795, -16.39415205, -7.394152047, 36.60584795, 
-21.39415205, 14.60584795, -10.39415205, 25.60584795, 2.605847953, 
22.60584795, 6.605847953, 9.605847953, -11.39415205, -3.394152047, 
2.605847953, -24.39415205, 26.60584795, 1.605847953, 3.605847953, 
-2.394152047, -0.394152047, 24.60584795, -0.394152047, 0.605847953, 
30.60584795, -3.394152047, 25.60584795, 0.605847953, 3.605847953, 
0.605847953, 15.60584795, 4.605847953, 21.60584795, -10.39415205, 
25.60584795, 10.60584795, 31.60584795, -8.394152047, -10.39415205, 
-5.394152047, -2.394152047, -16.39415205, -4.394152047, -7.394152047, 
40.60584795, -11.39415205, -8.394152047, 33.60584795, 25.60584795, 
6.605847953, -5.394152047, 11.60584795, -26.39415205, -2.394152047, 
1.605847953, 12.60584795, 2.605847953, -30.39415205, -25.39415205, 
-14.39415205, -1.394152047, 11.60584795, 8.605847953, 11.60584795, 
-8.394152047, -25.39415205, 3.605847953, -15.39415205, -18.39415205, 
-3.394152047, 36.60584795, -6.394152047, 29.60584795, -11.39415205, 
-0.394152047, -10.39415205, -26.39415205, -22.39415205, -8.394152047, 
-14.39415205, -26.39415205, -26.39415205, -26.39415205, -31.39415205, 
-19.39415205, -14.39415205, 10.60584795, -20.39415205, -3.394152047, 
12.60584795, 16.60584795, -0.394152047, 6.605847953, -6.394152047, 
-12.39415205, 7.605847953, -5.394152047, -14.39415205, 13.60584795, 
-26.39415205, -3.394152047, -9.394152047, 12.60584795, -15.39415205, 
-13.39415205, -24.39415205, 0.605847953, -15.39415205, -3.394152047, 
-18.39415205, -1.394152047, 17.60584795, -12.39415205, -14.39415205, 
-20.39415205, -5.394152047, 3.605847953, -20.39415205, -17.39415205, 
0.605847953, -19.39415205, -9.394152047, -27.39415205, -27.39415205, 
-16.39415205, 19.60584795, 24.60584795, 5.605847953, -3.394152047, 
-11.39415205, 7.605847953, 11.60584795, -13.39415205, -13.39415205, 
-16.39415205, -23.39415205, -5.394152047, 4.605847953, -23.39415205, 
-4.394152047, -5.394152047, -0.394152047, 21.60584795, -22.39415205, 
-0.394152047, -5.394152047, 0.605847953, -0.394152047, -5.394152047, 
30.60584795, -10.39415205, 1.605847953, -9.394152047, 25.60584795, 
-0.394152047, -28.39415205, -1.394152047, 28.60584795, -25.39415205, 
-4.394152047, -6.394152047, -9.394152047, -16.39415205), n_kinship = c(1, 
1, 0, 1, 9, 10, 7, 11, 1, 0, 3, 4, 1, 1, 0, 0, 8, 4, 0, 5, 3, 
2, 1, 0, 0, 1, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1, 0, 0, 1, 3, 0, 0, 
3, 1, 0, 2, 0, 0, 0, 1, 11, 7, 7, 6, 1, 1, 1, 1, 0, 0, 0, 0, 
0, 0, 0, 0, 1, 1, 2, 0, 3, 3, 3, 0, 0, 0, 1, 0, 0, 2, 0, 1, 0, 
0, 0, 0, 3, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 3, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 1, 0, 0, 0, 
9, 1, 4, 2, 0, 4, 3, 3, 4, 1, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 
3, 0, 2, 0, 4, 0, 1, 0, 7, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 6, 
6, 0, 0, 2, 1, 0, 0, 0, 3, 0), n_call = c(0, 0, 0, 9, 15, 7, 
0, 38, 9, 0, 3, 2, 6, 15, 13, 0, 29, 2, 0, 6, 0, 0, 0, 2, 2, 
5, 0, 0, 0, 1, 23, 2, 0, 2, 0, 0, 1, 1, 6, 63, 0, 0, 0, 0, 0, 
3, 0, 0, 4, 5, 23, 29, 0, 0, 0, 0, 0, 1, 0, 5, 0, 0, 3, 9, 0, 
0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 20, 0, 1, 0, 0, 0, 0, 0, 0, 
0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 7, 0, 0, 9, 0, 6, 0, 0, 0, 0, 
0, 0, 0, 0, 2, 1, 0, 0, 2, 0, 0, 0, 7, 8, 3, 0, 0, 5, 0, 0, 0, 
1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 2, 0, 0, 0, 0), n_met5 = c(0, 0, 1, 1, 1, 1, 0, 1, 0, 
0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 
1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 
1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 
0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), liaison = c(0, 0, 0, 6, 0, 0, 94, 1232, 0, 0, 0, 160, 0, 154, 
0, 0, 26, 0, 0, 694, 34, 0, 0, 320, 170, 62, 0, 0, 0, 10, 30, 
0, 0, 104, 0, 0, 0, 0, 354, 1658, 0, 8, 0, 0, 0, 0, 0, 0, 72, 
0, 1404, 104, 0, 0, 50, 0, 0, 10, 0, 0, 0, 0, 0, 164, 0, 0, 0, 
72, 578, 8, 2, 0, 0, 0, 0, 0, 6, 0, 92, 0, 0, 4, 4, 50, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, 177L), class = "data.frame")

zinb_M4 <- zeroinfl(liaison ~ Leader + Age_centred + n_kinship + n_call
                    |n_met5, data = variables_meetQ1,
                    na.action = na.omit ,dist="negbin")

permutation_matrix <- matrix(0, nrow = length(variables_meetQ1$liaison),ncol = 10000)

for (i in 1:10000)  {
  permutation_matrix[,i] <- sample(variables_meetQ1$liaison, 
                                   size=length(variables_meetQ1$liaison),replace = FALSE)
}

zinb_M4_perm <- list()

for (i in 1:10000){
zinb_M4_perm[[i]] <- zeroinfl(permutation_matrix[,i] ~ Leader + Age_centred + 
                                  n_kinship + n_call | n_met5, data = variables_meetQ1,
                                na.action = na.omit ,dist="negbin")
  
}


coef_M4 <-matrix(0, nrow = 5, ncol = 10000)
for (i in 1:10000) {
  coef_M4[,i] <- as.matrix(unname(zinb_M4_perm[[i]]$coefficients$count)) 
}
r regression permutation glm
1个回答
0
投票

我实际上并不认为这些警告代表了一个严重的问题 - 它们将由在某些方面表现不佳的排列样本引起(例如具有complete分离,即单个数据分区中的所有零,或者具有零-通货膨胀概率估计为零[-Inf on the log-odds scale])。然而,弄清楚到底发生了什么是个好主意。

没有时间深入研究这一点,但这是调试策略的开始。

这是代码的一个稍微精简的版本,它在每次迭代时设置一个已知的种子,并在出现警告时停止,以便您可以查看该特定的排列数据集(最好存储所有排列) ,就像在您的代码中一样,然后查看它们,但是停下来看看发生了什么比标记哪些迭代会引起警告要容易一些......

nperm <- 1e4
coef_M4 <-matrix(0, nrow = 5, ncol = nperm)
options(warn=3) ## warnings to errors
for (i in seq(nperm)) {
    cat(i, "\n")
    set.seed(1 + i)
    permsamp <- sample(variables_meetQ1$liaison, 
                       size=nrow(variables_meetQ1),
                       replace = FALSE)
    pfit <- zeroinfl(permsamp ~ Leader + Age_centred + n_kinship + n_call |n_met5,
             data = variables_meetQ1,
             na.action = na.omit ,dist="negbin")
    coef_M4[,i] <- as.matrix(unname(pfit$coefficients$count))
}
© www.soinside.com 2019 - 2024. All rights reserved.