使用新的基于 GIS 的数据从 JAGS 模型(R2jags 包)预测新的观测结果

问题描述 投票:0回答:1

我使用

R2jags
包构建了一个 JAGS 模型,以将二次函数拟合到经验数据并估计函数的参数。我想使用这个模型来预测来自栅格层的新数据帧的新观测值。问题是我不知道该怎么做。

第一次尝试:我尝试应用“第一次尝试”部分中的以下代码,但对我来说似乎不正确(见下图)。

enter image description here

我预计数据中会有更多噪音,这意味着数据中有更多波动。

第二次尝试:然后,我使用新的数据框从

“第二次尝试”部分重新运行模型,其中我创建了带有 NA 的列 y(即响应变量),但我收到此错误消息:

Error in all$sims.array[, , "deviance", drop = FALSE] : subscript out of bounds In addition: Warning message: In FUN(X[[i]], ...) : Failed to set trace monitor for deviance There are no observed stochastic nodes
这是第一次根据 JAGS 模型进行预测,因此我们将不胜感激。

可重现的代码

library(geodata) library(R2jags) raw_data <- data.frame(T = c(-24, -22, -20, -19, -18, -16, -15, -12, -10 , -5, -2, -1, seq(1, 34, by = 1), 35, 37, 39, 41, 42, 43, 45, 46, 47), dev = c(0, 0, 0, 0, 0, 0, 0, 0.06, 0.6, 0.9, 0.9, 1, rep(1, length(seq(1, 34, by = 1))), c(0.8, 0.8, 0.4, 0, 0, 0, 0, 0, 0))) ## plot(raw_data$T, raw_data$dev) ## Define a JAGS model mod <- function(){ ## Priors q ~ dunif(0, 1) T0 ~ dunif(-24, -2) Tn ~ dunif(35, 47) sigma ~ dunif(0, 1000) tau <- 1 / (sigma * sigma) ## Likelihood for(i in 1:n){ mu[i] <- -1 * q * (T[i] - T0) * (T[i] - Tn) * (Tn > T[i]) * (T0 < T[i]) * (-1 * q * (T[i] - T0) * (T[i] - Tn) < 1) + (-1 * q * (T[i] - T0) * (T[i] - Tn) > 1) y[i] ~ dnorm(mu[i], tau) } } ## Group the data data <- list(y = raw_data$dev, n = dim(raw_data)[1], T = raw_data$T) ## Define the parameters of interest to be estimated param <- c("q", "T0", "Tn", "sigma", "mu") ## Define the initial values for the parameters of interest to be estimated inits <- function(){list(q = 0.01, T0 = -20, Tn = 40, sigma = rlnorm(1))} ## Run the JAGS model jags_mod <- R2jags::jags(data = data, inits = inits, parameters.to.save = param, n.thin = 8, n.chains = 3, n.burnin = 5000, n.iter = 25000, model.file = mod)

第一次尝试:

## Import the raster layer bio_r <- worldclim_country(country = "France", var="bio", res = 10, path = tempdir()) ## plot(bio_r$wc2.1_30s_bio_1) ## Convert the raster layer to a data fame T_df <- terra::as.data.frame(bio_r$wc2.1_30s_bio_1, xy = TRUE, na.rm = TRUE) ## summary(T_df) colnames(T_df) <- c("x", "y", "T") ## Apply the quadratic function to the raster layer q <- jags_mod$BUGSoutput$summary[row.names(jags_mod$BUGSoutput$summary) %in% "q", "mean"] T0 <- jags_mod$BUGSoutput$summary[row.names(jags_mod$BUGSoutput$summary) %in% "T0", "mean"] Tn <- jags_mod$BUGSoutput$summary[row.names(jags_mod$BUGSoutput$summary) %in% "Tn", "mean"] T_df$dev <- -1 * q * (T_df$T - T0) * (T_df$T - Tn) * (Tn > T_df$T) * (T0 < T_df$T) * (-1 * q * (T_df$T - T0) * (T_df$T - Tn) < 1) + (-1 * q * (T_df$T - T0) * (T_df$T - Tn) > 1) ## summary(T_df) plot(T_df$T, T_df$dev, pch = 16)

第二次尝试:

T_df$dev <- NA new_data <- list(y = T_df$dev, n = dim(T_df)[1], T = T_df$T) jags_mod <- R2jags::jags(data = new_data, inits = inits, parameters.to.save = param, n.thin = 8, n.chains = 3, n.burnin = 5000, n.iter = 25000, model.file = mod)
    
r rjags
1个回答
0
投票
听起来您想要一些新点的后验预测分布。本质上,您需要做的是计算每个新点的

mu

,然后对于 
mu
 的每个值,从相关的正态分布中进行绘制。然后,您可以总结您案例中自变量 (
T
) 的每个新值的绘制情况。我就是这样做的。首先,我们将加载您的数据并运行模型。

library(geodata) library(R2jags) library(ggplot2) raw_data <- data.frame(T = c(-24, -22, -20, -19, -18, -16, -15, -12, -10 , -5, -2, -1, seq(1, 34, by = 1), 35, 37, 39, 41, 42, 43, 45, 46, 47), dev = c(0, 0, 0, 0, 0, 0, 0, 0.06, 0.6, 0.9, 0.9, 1, rep(1, length(seq(1, 34, by = 1))), c(0.8, 0.8, 0.4, 0, 0, 0, 0, 0, 0))) ## plot(raw_data$T, raw_data$dev) ## Define a JAGS model mod <- function(){ ## Priors q ~ dunif(0, 1) T0 ~ dunif(-24, -2) Tn ~ dunif(35, 47) sigma ~ dunif(0, 1000) tau <- 1 / (sigma * sigma) ## Likelihood for(i in 1:n){ mu[i] <- -1 * q * (T[i] - T0) * (T[i] - Tn) * (Tn > T[i]) * (T0 < T[i]) * (-1 * q * (T[i] - T0) * (T[i] - Tn) < 1) + (-1 * q * (T[i] - T0) * (T[i] - Tn) > 1) y[i] ~ dnorm(mu[i], tau) } } ## Group the data data <- list(y = raw_data$dev, n = dim(raw_data)[1], T = raw_data$T) ## Define the parameters of interest to be estimated param <- c("q", "T0", "Tn", "sigma", "mu") ## Define the initial values for the parameters of interest to be estimated inits <- function(){list(q = 0.01, T0 = -20, Tn = 40, sigma = rlnorm(1))} ## Run the JAGS model jags_mod <- R2jags::jags(data = data, inits = inits, parameters.to.save = param, n.thin = 8, n.chains = 3, n.burnin = 5000, n.iter = 25000, model.file = mod) #> module glm loaded #> Compiling model graph #> Resolving undeclared variables #> Allocating nodes #> Graph information: #> Observed stochastic nodes: 55 #> Unobserved stochastic nodes: 4 #> Total graph size: 622 #> #> Initializing model
现在,获取新数据。

bio_r <- worldclim_country(country = "France", var="bio", res = 10, path = tempdir()) ## Convert the raster layer to a data fame T_df <- terra::as.data.frame(bio_r$wc2.1_30s_bio_1, xy = TRUE, na.rm = TRUE) colnames(T_df) <- c("x", "y", "T") T_df$dev <- NA new_data <- list(y = T_df$dev, n = dim(T_df)[1], T = T_df$T)
导致计算量增大的一件事是新数据中有超过 160 万个点。然而,

T

 仅有大约 11k 个唯一值。因此,我们可以只使用唯一值,然后如果您需要让它们与原始 1.6M 值相对应,您可以将它们合并回 
T

new_data_small <- list(T = sort(unique(new_data$T))) new_data_small$n <- length(new_data_small$T)
从模型对象的参数中提取数据并将其转换为数据框。

pars <- as.mcmc(jags_mod) pars <- do.call(rbind, pars) pars <- as.data.frame(pars)
接下来,将 

pars

 的每一行设为列表中的不同元素,我们将其称为 
sp

sp <- split(pars[,c("q", "T0", "Tn")], 1:nrow(pars))
现在,我们可以为 new_data 制作 

post_mu

 - 
mu
 的后验值。

post_mu <- sapply(sp, \(x)with(new_data_small, -1 * x$q * (T - x$T0) * (T - x$Tn) * (x$Tn > T) * (x$T0 < T) * (-1 * x$q * (T - x$T0) * (T - x$Tn) < 1) + (-1 * x$q * (T - x$T0) * (T - x$Tn) > 1)))
接下来,我们使用 

mu

sigma
 的后验值从每次后验绘制的正态分布中进行绘制。

post_obs <- sapply(1:ncol(post_mu), \(i)rnorm(nrow(post_mu), post_mu[,i], pars$sigma[i]))
接下来,我们总结后验均值和后验预测绘制以获得 95% 可信区间。

post_mu_sum <- apply(post_mu, 1, \(x)c(mean(x), median(x), quantile(x, .025), quantile(x, .975))) post_mu_sum <- t(post_mu_sum) colnames(post_mu_sum) <- c("mu_mean", "mu_median", "mu_lwr", "mu_upr") ppd_sum <- apply(post_obs, 1, \(x)c(mean(x), median(x), quantile(x, .025), quantile(x, .975))) ppd_sum <- t(ppd_sum) colnames(ppd_sum) <- c("ppd_mean", "ppd_median", "ppd_lwr", "ppd_upr") post_mu_sum <- as.data.frame(post_mu_sum) post_mu_sum$T <- new_data_small$T post_mu_sum <- cbind(post_mu_sum, ppd_sum)
最后,您可以绘制 

T

 与因变量的后验预测绘制之间的关系。

ggplot(post_mu_sum, aes(x=T, y=ppd_mean, ymin=ppd_lwr, ymax=ppd_upr)) + geom_ribbon(alpha=.25) + geom_line() + theme_classic()

创建于 2024-04-27,使用 reprex v2.0.2

顺便说一句,我不鼓励您使用

T

 作为变量名。它是逻辑值 
TRUE
 的缩写,因此可能会导致混乱,特别是当您正在开发或与其他人共享代码时。

© www.soinside.com 2019 - 2024. All rights reserved.