我希望强制执行一个普遍增加的 GAM,它不必是单调的,但是,它应该增加,并且一旦我的示例中的 B 变量超过 15,000,A 就不应该达到 0。我提供了一个用于复制的示例 df 以及我当前的进度。显然,对于两个唯一 XY 配对之一,最大 B 点处的 A 值正在向下(如果绘制),但是,当 B 超过 15,000 时,我不希望 A 值为 0。需要注意的是,当B小于0时,A等于0是可以的(A=0时B可以为负)。这只是两个配对的示例,还有更多配对,因此我需要一个通用函数,这使得为每个配对设置唯一的权重变得困难。
library(mgcv)
library(dplyr)
df <- structure(list(X = c(-121.40613874, -121.40613874, -121.40613874,
-121.40613874, -121.40613874, -121.40613874, -121.40613874, -121.40613874,
-121.40613874, -121.40613874, -121.40613874, -121.40613874, -121.40613874,
-121.40613874, -121.40613874, -121.40613874, -121.40613874, -121.40613874,
-121.40613874, -121.40613874, -121.40613874, -121.40613874, -121.40613874,
-121.40613874, -121.40613874, -121.40613874, -121.40613874, -121.40613874,
-121.40613874, -121.40613874, -121.40613874, -121.40613874, -121.40613874,
-121.40613874, -121.40613874, -121.40613874, -121.37724092, -121.37724092,
-121.37724092, -121.37724092, -121.37724092, -121.37724092, -121.37724092,
-121.37724092, -121.37724092, -121.37724092, -121.37724092, -121.37724092,
-121.37724092, -121.37724092, -121.37724092, -121.37724092, -121.37724092,
-121.37724092, -121.37724092, -121.37724092, -121.37724092, -121.37724092,
-121.37724092, -121.37724092, -121.37724092, -121.37724092, -121.37724092,
-121.37724092, -121.37724092, -121.37724092, -121.37724092, -121.37724092,
-121.37724092, -121.37724092, -121.37724092, -121.37724092),
Y = c(39.21981979, 39.21981979, 39.21981979, 39.21981979,
39.21981979, 39.21981979, 39.21981979, 39.21981979, 39.21981979,
39.21981979, 39.21981979, 39.21981979, 39.21981979, 39.21981979,
39.21981979, 39.21981979, 39.21981979, 39.21981979, 39.21981979,
39.21981979, 39.21981979, 39.21981979, 39.21981979, 39.21981979,
39.21981979, 39.21981979, 39.21981979, 39.21981979, 39.21981979,
39.21981979, 39.21981979, 39.21981979, 39.21981979, 39.21981979,
39.21981979, 39.21981979, 39.21966932, 39.21966932, 39.21966932,
39.21966932, 39.21966932, 39.21966932, 39.21966932, 39.21966932,
39.21966932, 39.21966932, 39.21966932, 39.21966932, 39.21966932,
39.21966932, 39.21966932, 39.21966932, 39.21966932, 39.21966932,
39.21966932, 39.21966932, 39.21966932, 39.21966932, 39.21966932,
39.21966932, 39.21966932, 39.21966932, 39.21966932, 39.21966932,
39.21966932, 39.21966932, 39.21966932, 39.21966932, 39.21966932,
39.21966932, 39.21966932, 39.21966932), grp = c("0", "1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22",
"23", "24", "25", "26", "27", "28", "29", "30", "31", "32",
"33", "34", "35", "0", "1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26", "27",
"28", "29", "30", "31", "32", "33", "34", "35"), A = c(1.13659381866455,
1.13659477233887, 1.13658058643341, 1.13659477233887, 1.13656425476074,
1.13656532764435, 1.13658249378204, 1.13656723499298, 0.709016442298889,
0.708269774913788, 0.707276940345764, 0.705555200576782,
0.703566908836365, 0.705580234527588, 0.711442351341248,
0.709806680679321, 0.706768035888672, 0.710436880588531,
0.711900770664215, 0.627014517784119, 0.624246716499329,
0.637109100818634, 0.6302330493927, 0.712375164031982, 0.632026135921478,
0.707980036735535, 1.02401030063629, 1.24682903289795, 1.16107773780823,
1.72039294242859, 1.65212118625641, 1.77960109710693, 2.14993286132812,
2.56561064720154, 2.75121307373047, 3.24887132644653, 0,
0.471771329641342, 0.711109101772308, 0.79891711473465, 0.931526362895966,
0.968687295913696, 0.995432734489441, 1.0633533000946, 1.16641199588776,
1.34169447422028, 1.49520266056061, 1.63908088207245, 1.7665741443634,
1.88498985767365, 1.99606990814209, 2.09789752960205, 2.19571089744568,
2.28780961036682, 2.37474989891052, 2.44801378250122, 2.52833318710327,
2.60511207580566, 2.67927670478821, 2.7647979259491, 2.82285761833191,
2.97204279899597, 3.10502099990845, 3.23239874839783, 3.33626580238342,
3.47603249549866, 3.57603311538696, 3.68949151039124, 3.95258760452271,
3.93370676040649, 4.04461479187012, 4.92624521255493), B = c(300,
400, 530, 600, 730, 770, 800, 880, 1000, 1250, 1500, 1750,
2000, 2250, 2500, 2750, 3000, 3250, 3500, 3750, 4000, 4250,
4500, 4750, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500,
9000, 9500, 10000, 15000, 300, 400, 530, 600, 730, 770, 800,
880, 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000,
3250, 3500, 3750, 4000, 4250, 4500, 4750, 5000, 5500, 6000,
6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000, 15000)), row.names = c(NA,
-72L), class = c("tbl_df", "tbl", "data.frame"))
new_dat <- df %>%
summarise(gam_model=list(gam(B~s(A))), .by = c(X,Y))
pred_df <- data.frame(A= 0)
gam_predict <- function(m) predict(m,pred_df)
new_dat$gam_pred_0 <- sapply(new_dat$gam_model,gam_predict)
在步骤 1 保持不变的情况下,以下两个步骤将增强为以下。
new_dat <- df %>%
summarise(scam_model=list(scam(B~s(A,bs="mpi"))), .by = c(X,Y))
pred_df <- data.frame(A= 0)
scam_predict <- function(m) predict(m,pred_df)
new_dat$scam_pred_0 <- sapply(new_dat$scam_model,scam_predict)