使用新的基于 GIS 的数据从 JAGS 模型（R2jags 包）预测新的观测结果

Question

我使用

R2jags

包构建了一个 JAGS 模型，以将二次函数拟合到经验数据并估计函数的参数。我想使用这个模型来预测来自栅格层的新数据帧的新观测值。问题是我不知道该怎么做。

第一次尝试：我尝试应用“第一次尝试”部分中的以下代码，但对我来说似乎不正确（见下图）。

我预计数据中会有更多噪音，这意味着数据中有更多波动。

第二次尝试：然后，我使用新的数据框从

“第二次尝试”部分重新运行模型，其中我创建了带有 NA 的列 y（即响应变量），但我收到此错误消息：

Error in all$sims.array[, , "deviance", drop = FALSE] : 
  subscript out of bounds
In addition: Warning message:
In FUN(X[[i]], ...) : Failed to set trace monitor for deviance
There are no observed stochastic nodes

这是第一次根据 JAGS 模型进行预测，因此我们将不胜感激。

可重现的代码

library(geodata)
library(R2jags)

raw_data <- data.frame(T = c(-24, -22, -20, -19, -18, -16, -15, -12, -10 , -5, -2, -1, seq(1, 34, by = 1), 35, 37, 39, 41, 42, 43, 45, 46, 47), 
                       dev = c(0, 0, 0, 0, 0, 0, 0, 0.06, 0.6, 0.9, 0.9, 1, rep(1, length(seq(1, 34, by = 1))), c(0.8, 0.8, 0.4, 0, 0, 0, 0, 0, 0)))
## plot(raw_data$T, raw_data$dev)

## Define a JAGS model
mod <- function(){
  
  ## Priors
  q ~ dunif(0, 1)
  T0 ~ dunif(-24, -2)
  Tn ~ dunif(35, 47)
  sigma ~ dunif(0, 1000)
  tau <- 1 / (sigma * sigma)
  
  ## Likelihood
  for(i in 1:n){
    mu[i] <- -1 * q * (T[i] - T0) * (T[i] - Tn) * (Tn > T[i]) * (T0 < T[i]) * (-1 * q * (T[i] - T0) * (T[i] - Tn) < 1) + (-1 * q * (T[i] - T0) * (T[i] - Tn) > 1)
    y[i] ~ dnorm(mu[i], tau)
  }
  
}

## Group the data
data <- list(y = raw_data$dev, n = dim(raw_data)[1], T = raw_data$T)

## Define the parameters of interest to be estimated
param <- c("q", "T0", "Tn", "sigma", "mu")

## Define the initial values for the parameters of interest to be estimated
inits <- function(){list(q = 0.01, T0 = -20, Tn = 40, sigma = rlnorm(1))}

## Run the JAGS model
jags_mod <- R2jags::jags(data = data, inits = inits, parameters.to.save = param, n.thin = 8, n.chains = 3, n.burnin = 5000, n.iter = 25000, model.file = mod)

第一次尝试：

## Import the raster layer
bio_r <- worldclim_country(country = "France", var="bio", res = 10, path = tempdir())
## plot(bio_r$wc2.1_30s_bio_1)

## Convert the raster layer to a data fame
T_df <- terra::as.data.frame(bio_r$wc2.1_30s_bio_1, xy = TRUE, na.rm = TRUE)
## summary(T_df)
colnames(T_df) <- c("x", "y", "T")

## Apply the quadratic function to the raster layer
q <- jags_mod$BUGSoutput$summary[row.names(jags_mod$BUGSoutput$summary) %in% "q", "mean"]
T0 <- jags_mod$BUGSoutput$summary[row.names(jags_mod$BUGSoutput$summary) %in% "T0", "mean"]
Tn <- jags_mod$BUGSoutput$summary[row.names(jags_mod$BUGSoutput$summary) %in% "Tn", "mean"]
T_df$dev <- -1 * q * (T_df$T - T0) * (T_df$T - Tn) * (Tn > T_df$T) * (T0 < T_df$T) * (-1 * q * (T_df$T - T0) * (T_df$T - Tn) < 1) + (-1 * q * (T_df$T - T0) * (T_df$T - Tn) > 1)
## summary(T_df)
plot(T_df$T, T_df$dev, pch = 16)

第二次尝试：

T_df$dev <- NA
new_data <- list(y = T_df$dev, n = dim(T_df)[1], T = T_df$T)
jags_mod <- R2jags::jags(data = new_data, inits = inits, parameters.to.save = param, n.thin = 8, n.chains = 3, n.burnin = 5000, n.iter = 25000, model.file = mod)

Answer 1

听起来您想要一些新点的后验预测分布。本质上，您需要做的是计算每个新点的

mu

，然后对于

mu

 的每个值，从相关的正态分布中进行绘制。然后，您可以总结您案例中自变量 (

T

) 的每个新值的绘制情况。我就是这样做的。首先，我们将加载您的数据并运行模型。

library(geodata)
library(R2jags)
library(ggplot2)
raw_data <- data.frame(T = c(-24, -22, -20, -19, -18, -16, -15, -12, -10 , -5, -2, -1, seq(1, 34, by = 1), 35, 37, 39, 41, 42, 43, 45, 46, 47), 
                       dev = c(0, 0, 0, 0, 0, 0, 0, 0.06, 0.6, 0.9, 0.9, 1, rep(1, length(seq(1, 34, by = 1))), c(0.8, 0.8, 0.4, 0, 0, 0, 0, 0, 0)))
## plot(raw_data$T, raw_data$dev)

## Define a JAGS model
mod <- function(){
  
  ## Priors
  q ~ dunif(0, 1)
  T0 ~ dunif(-24, -2)
  Tn ~ dunif(35, 47)
  sigma ~ dunif(0, 1000)
  tau <- 1 / (sigma * sigma)
  
  ## Likelihood
  for(i in 1:n){
    mu[i] <- -1 * q * (T[i] - T0) * (T[i] - Tn) * (Tn > T[i]) * (T0 < T[i]) * (-1 * q * (T[i] - T0) * (T[i] - Tn) < 1) + (-1 * q * (T[i] - T0) * (T[i] - Tn) > 1)
    y[i] ~ dnorm(mu[i], tau)
  }
  
}

## Group the data
data <- list(y = raw_data$dev, n = dim(raw_data)[1], T = raw_data$T)

## Define the parameters of interest to be estimated
param <- c("q", "T0", "Tn", "sigma", "mu")

## Define the initial values for the parameters of interest to be estimated
inits <- function(){list(q = 0.01, T0 = -20, Tn = 40, sigma = rlnorm(1))}

## Run the JAGS model
jags_mod <- R2jags::jags(data = data, inits = inits, parameters.to.save = param, n.thin = 8, n.chains = 3, n.burnin = 5000, n.iter = 25000, model.file = mod)
#> module glm loaded
#> Compiling model graph
#>    Resolving undeclared variables
#>    Allocating nodes
#> Graph information:
#>    Observed stochastic nodes: 55
#>    Unobserved stochastic nodes: 4
#>    Total graph size: 622
#> 
#> Initializing model

现在，获取新数据。

bio_r <- worldclim_country(country = "France", var="bio", res = 10, path = tempdir())

## Convert the raster layer to a data fame
T_df <- terra::as.data.frame(bio_r$wc2.1_30s_bio_1, xy = TRUE, na.rm = TRUE)
colnames(T_df) <- c("x", "y", "T")

T_df$dev <- NA
new_data <- list(y = T_df$dev, n = dim(T_df)[1], T = T_df$T)

导致计算量增大的一件事是新数据中有超过 160 万个点。然而，

T

 仅有大约 11k 个唯一值。因此，我们可以只使用唯一值，然后如果您需要让它们与原始 1.6M 值相对应，您可以将它们合并回

T

。

new_data_small <- list(T = sort(unique(new_data$T)))
new_data_small$n <- length(new_data_small$T)

从模型对象的参数中提取数据并将其转换为数据框。

pars <- as.mcmc(jags_mod)
pars <- do.call(rbind, pars)
pars <- as.data.frame(pars)

接下来，将

pars

 的每一行设为列表中的不同元素，我们将其称为

sp

。

sp <- split(pars[,c("q", "T0", "Tn")], 1:nrow(pars))

现在，我们可以为 new_data 制作

post_mu

mu

 的后验值。

post_mu <- sapply(sp, \(x)with(new_data_small, -1 * x$q * (T - x$T0) * (T - x$Tn) * (x$Tn > T) * (x$T0 < T) * (-1 * x$q * (T - x$T0) * (T - x$Tn) < 1) + (-1 * x$q * (T - x$T0) * (T - x$Tn) > 1)))

接下来，我们使用

mu

和

sigma

 的后验值从每次后验绘制的正态分布中进行绘制。

post_obs <- sapply(1:ncol(post_mu), \(i)rnorm(nrow(post_mu), post_mu[,i], pars$sigma[i]))

接下来，我们总结后验均值和后验预测绘制以获得 95% 可信区间。

post_mu_sum <- apply(post_mu, 1, \(x)c(mean(x), median(x), quantile(x, .025), quantile(x, .975)))
post_mu_sum <- t(post_mu_sum)
colnames(post_mu_sum) <- c("mu_mean", "mu_median", "mu_lwr", "mu_upr")
ppd_sum <- apply(post_obs, 1, \(x)c(mean(x), median(x), quantile(x, .025), quantile(x, .975)))
ppd_sum <- t(ppd_sum)
colnames(ppd_sum) <- c("ppd_mean", "ppd_median", "ppd_lwr", "ppd_upr")
post_mu_sum <- as.data.frame(post_mu_sum)
post_mu_sum$T <- new_data_small$T
post_mu_sum <- cbind(post_mu_sum, ppd_sum)

最后，您可以绘制

T

 与因变量的后验预测绘制之间的关系。

ggplot(post_mu_sum, aes(x=T, y=ppd_mean, ymin=ppd_lwr, ymax=ppd_upr)) + 
  geom_ribbon(alpha=.25) + 
  geom_line() + 
  theme_classic()

创建于 2024-04-27，使用 ^{reprex v2.0.2}

顺便说一句，我不鼓励您使用

T

 作为变量名。它是逻辑值

TRUE

 的缩写，因此可能会导致混乱，特别是当您正在开发或与其他人共享代码时。

使用新的基于 GIS 的数据从 JAGS 模型（R2jags 包）预测新的观测结果

问题描述投票：0回答：1

1个回答

最新问题

使用新的基于 GIS 的数据从 JAGS 模型（R2jags 包）预测新的观测结果

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1