“错误:未找到有效的系数集:请在拟合 svyglm 二项式('identity')风险差异时提供起始值”

问题描述 投票:0回答:1

我试图将具有身份链接的二项式族的 glm 与调查加权数据进行拟合,以计算风险差异。我能够使用起始值运行 svyglm 模型,但是当我尝试使用似然方法提取不对称置信区间时,我收到了上述错误。

使用 dput 的前 4 个条目的示例数据:

> data <- structure(list(binary_outcome= structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"), 
    binary_exposure= structure(c(1L, 1L, NA, 1L, 1L, NA, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 1L, NA, 1L, 
    NA, NA, NA, NA, 1L, NA, 2L, NA), .Label = c("1", "2"), label = "binary exposure", class = "factor"), 
    wt = c(191217, 89001, 16578, 188901, 7787, 7787, 3876, 
    27569, 7208, 7208, 8594, 170173, 3430, 7208, 53034, 53034, 
    5037, 5037, 786465, 177210, 7849, 9038, 6353, 3837, 89733, 
    188973, 176366, 188736, 3456, 27478), block = c("a", 
    "a", "b", "c", "w", "e", 
    "j", "c", "g", "r", "j", 
    "r", "v", "f", "g", "d", 
    "a", "a", "v", "t", "l", 
    "q", "d", "n", 
    "m", "b", "g", "k", "m", 
    "g"), strata_study = c("C R", "K U", 
    "TN R", "WB R", "K R", "K R", 
    "TN U", "WB U", "TN U", "TN U", 
    "O U", "M U", "K U", "TN U", 
    "WB U", "WB U", "K R", "K R", 
    "D U", "WB R", "TN U", "TN U", 
    "TN U", "K U", "D U", "WB R", "C R", 
    "WB R", "K U", "WB U")), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -30L), groups = structure(list(
    binary_outcome = structure(1:2, .Label = c("0", "1"), class = "factor"), 
    .rows = structure(list(1:15, 16:30), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE))

我运行了以下命令来拟合 svyglm 模型来估计风险差异:

> svy_design <- svydesign(ids = ~block, strata = ~strata_study, weights = ~wt, data = data, nest = TRUE, fpc = NULL)

> summary(svyglm(I(binary_outcome==1)~binary_exposure, design=svy_design,family=quasi(link="identity",variance="mu(1-mu)"),  start=c(0.1,0.6)))

这给出了以下结果:

Call:
svyglm(formula = I(binary_outcome == 1) ~ binary_exposure, design = svy_design, 
    family = quasi(link = "identity", variance = "mu(1-mu)"), 
    start = c(0.1, 0.1))

Survey design:
svydesign(ids = ~block, strata = ~strata_study, weights = ~wt, 
    data = data, nest = TRUE, fpc = NULL)

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)  
(Intercept)        0.3575     0.2043   1.750   0.1107  
binary_exposure2   0.6425     0.2043   3.145   0.0104 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for quasi family taken to be 0.7467369)

Number of Fisher Scoring iterations: 2

但是,我想使用似然法生成置信区间,因此我运行了以下命令:

> confint(svyglm(I(binary_outcome==1)~binary_exposure, design=svy_design,family=quasi(link="identity",variance="mu(1-mu)"),  start=c(0.1,0.6)), method="likelihood")

并收到以下错误消息:

Error: no valid set of coefficients has been found: please supply starting values

对此的任何帮助将不胜感激。谢谢!

r glm survey
1个回答
0
投票

不知道它是否回答了您的问题,但是使用不带

confint
参数的
method="likelihood"
会给出输出。 抱歉,我不太了解这背后的统计数据来确认结果是否相同..

data <- structure(list(binary_outcome= structure(c(1L, 1L, 1L, 1L, 
                                                   1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
                                                   2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"), 
                       binary_exposure= structure(c(1L, 1L, NA, 1L, 1L, NA, 1L, 
                                                    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 1L, NA, 1L, 
                                                    NA, NA, NA, NA, 1L, NA, 2L, NA), .Label = c("1", "2"), label = "binary exposure", class = "factor"), 
                       wt = c(191217, 89001, 16578, 188901, 7787, 7787, 3876, 
                              27569, 7208, 7208, 8594, 170173, 3430, 7208, 53034, 53034, 
                              5037, 5037, 786465, 177210, 7849, 9038, 6353, 3837, 89733, 
                              188973, 176366, 188736, 3456, 27478), block = c("a", 
                                                                              "a", "b", "c", "w", "e", 
                                                                              "j", "c", "g", "r", "j", 
                                                                              "r", "v", "f", "g", "d", 
                                                                              "a", "a", "v", "t", "l", 
                                                                              "q", "d", "n", 
                                                                              "m", "b", "g", "k", "m", 
                                                                              "g"), strata_study = c("C R", "K U", 
                                                                                                     "TN R", "WB R", "K R", "K R", 
                                                                                                     "TN U", "WB U", "TN U", "TN U", 
                                                                                                     "O U", "M U", "K U", "TN U", 
                                                                                                     "WB U", "WB U", "K R", "K R", 
                                                                                                     "D U", "WB R", "TN U", "TN U", 
                                                                                                     "TN U", "K U", "D U", "WB R", "C R", 
                                                                                                     "WB R", "K U", "WB U")), class = c("grouped_df", 
                                                                                                                                        "tbl_df", "tbl", "data.frame"), row.names = c(NA, -30L), groups = structure(list(
                                                                                                                                          binary_outcome = structure(1:2, .Label = c("0", "1"), class = "factor"), 
                                                                                                                                          .rows = structure(list(1:15, 16:30), ptype = integer(0), class = c("vctrs_list_of", 
                                                                                                                                                                                                            "vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df", 
                                                                                                                                                                                                                                                                        "tbl", "data.frame"), .drop = TRUE))
# clean up data : (to prevent error with svyglm)
dat2 <- dplyr::filter(data, !stringr::str_detect(strata_study, 'M U|O U|TN R')) # remove strata with only one incidence

# surveydesign
svy_design <- survey::svydesign(ids = ~block, strata = ~strata_study, weights = ~wt, data = dat2, nest = TRUE, fpc = NULL)

# run glm fit
svyfit <- survey::svyglm(I(binary_outcome==1)~binary_exposure, design=svy_design,family=quasi(link="identity",variance="mu(1-mu)"),  start=c(0.1,0.6))
#> Warning: step size truncated: out of bounds

#> Warning: step size truncated: out of bounds

#> Warning: step size truncated: out of bounds

#> Warning: step size truncated: out of bounds

#> Warning: step size truncated: out of bounds

#> Warning: step size truncated: out of bounds

#> Warning: step size truncated: out of bounds

#> Warning: step size truncated: out of bounds

#> Warning: step size truncated: out of bounds
#> Warning: glm.fit: algorithm stopped at boundary value

# summarize
summary(svyfit)
#> 
#> Call:
#> svyglm(formula = I(binary_outcome == 1) ~ binary_exposure, design = svy_design, 
#>     family = quasi(link = "identity", variance = "mu(1-mu)"), 
#>     start = c(0.1, 0.6))
#> 
#> Survey design:
#> survey::svydesign(ids = ~block, strata = ~strata_study, weights = ~wt, 
#>     data = dat2, nest = TRUE, fpc = NULL)
#> 
#> Coefficients:
#>                  Estimate Std. Error t value Pr(>|t|)  
#> (Intercept)        0.4206     0.2410   1.745    0.112  
#> binary_exposure2   0.5794     0.2410   2.404    0.037 *
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for quasi family taken to be 0.6920658)
#> 
#> Number of Fisher Scoring iterations: 10

# confidence interval
confint(svyfit)
#>                        2.5 %    97.5 %
#> (Intercept)      -0.11633493 0.9575489
#> binary_exposure2  0.04245107 1.1163349

创建于 2023-08-14,使用 reprex v2.0.2

© www.soinside.com 2019 - 2024. All rights reserved.