通过按多个变量分组来执行locfit并绘制图表

问题描述 投票:0回答:1

我有一个数据:

library(locfit)   

a<- c("A",  "A",    "A",    "A",    "A","A","A","A","A","A","A","A",    "B",    "B",    "B",    "B",    "B","B",    "C",    "C",    "C",    "C",    "C","C","C","C","C","C","C","C","C","C","C","C","C","C")
b<- c("h3c2",   "h3c2", "h3c2", "h3c2", "h3c2","h3c2","h3c2","h3c2","h3c2","h3c2","h3c2","h3c2",    "h1c1", "h1c1", "h1c1", "h1c1", "h1c1","h1c1",  "h5c2", "h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2")
c<- c(0,    0.090909091,    0.181818182,    0.272727273,    0.363636364,    0.454545455,    0.545454545,    0.636363636,    0.727272727,    0.818181818,    0.909090909,    1,  0,  0.2,    0.4,    0.6,    0.8,    1,  0,  0.058823529,    0.117647059,    0.176470588,    0.235294118,    0.294117647,    0.352941176,    0.411764706,    0.470588235,    0.529411765,    0.588235294,    0.647058824,    0.705882353,    0.764705882,    0.823529412,    0.882352941,    0.941176471,    1)
d<- c(391.064,  499.769,    575.051,    666.537,    774.666,    859.841,    932.585,    971.25, 996.516,    1002.115,   999.499,    940.548,    242.836,    391.662,    507.624,    507.624,    643.627,    779.394,    195.014,    259.007,    299.504,    310.179,    323.274,    346.793,    381.012,    384.662,    382.25, 434.032,    416.324,    382.021,    402.603,    396.873,    377.478,    367.659,    373.589,    382.506)
dt <- data.table(A = a,
             B= b,   
             data1=c,
             data2=d)

我想按 A 和 B 对数据进行分组,并为使用 data2 和 data1 排序的每个 A+B 进行 locfit。

我已经能够对数据进行排序了。

dt2 <- dt[, .SD[c(1:.N)], by = c('A', 'B')]

但我无法将 locfit 与此一起使用。 我已经尝试过,但它不起作用:

dt3<- dt[,
     {.SD[c(1:.N)]
       fit<- locfit(data2 ~ data1, data=dt)
       plot(fit, get.data= TRUE)}, by = c('A', 'B')]

当我这样做时: I get a graph like this

哪里I want the graph to look like this

知道如何让它像这样工作吗?

非常感谢!

编辑:我找到了一种使用 dplyr 中的嵌套函数来做到这一点的方法。根据组嵌套数据,并将 locfit 函数映射到每个嵌套组。

r data.table dplyr curve-fitting tidyr
1个回答
0
投票

有关如何嵌套和取消嵌套数据以适应模型并应用于每个分组值的完整代码。

library(dplyr)
library(purrr)
library(locfit)
library(ggplot2)
library(tidyr)


grouping_var<- c("A",  "A",    "A",    "A",    "A","A","A","A","A","A","A","A",    "B",    "B",    "B",    "B",    "B","B",    "C",    "C",    "C",    "C",    "C","C","C","C","C","C","C","C","C","C","C","C","C","C")
name<- c("h3c2",   "h3c2", "h3c2", "h3c2", "h3c2","h3c2","h3c2","h3c2","h3c2","h3c2","h3c2","h3c2",    "h1c1", "h1c1", "h1c1", "h1c1", "h1c1","h1c1",  "h5c2", "h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2")
height_values<- c(0,    0.090909091,    0.181818182,    0.272727273,    0.363636364,    0.454545455,    0.545454545,    0.636363636,    0.727272727,    0.818181818,    0.909090909,    1,  0,  0.2,    0.4,    0.6,    0.8,    1,  0,  0.058823529,    0.117647059,    0.176470588,    0.235294118,    0.294117647,    0.352941176,    0.411764706,    0.470588235,    0.529411765,    0.588235294,    0.647058824,    0.705882353,    0.764705882,    0.823529412,    0.882352941,    0.941176471,    1)
Mean<- c(391.064,  499.769,    575.051,    666.537,    774.666,    859.841,    932.585,    971.25, 996.516,    1002.115,   999.499,    940.548,    242.836,    391.662,    507.624,    507.624,    643.627,    779.394,    195.014,    259.007,    299.504,    310.179,    323.274,    346.793,    381.012,    384.662,    382.25, 434.032,    416.324,    382.021,    402.603,    396.873,    377.478,    367.659,    373.589,    382.506)
df <- data.frame(grouping_var = grouping_var,
             name= name,   
             height_values=height_values,
             Mean=Mean)
df
#nest the data in df with height to use in locfit for prediction
locfit.input <- df %>%
  group_by(grouping_var, name) %>%
  nest()


# heights chosen to interpolate levels
predictvalues <- c(0.1, 0.3, 0.5, 0.7, 0.9)


# function to fit locfit
locfit_model <- function(df) {
  fit <- locfit(Mean ~ height_values, data = df)
  plot(
    fit,
    get.data = TRUE,
    xlab = "Height",
    ylab = "Levels"
  )
  par(new = TRUE) #To superimpose graphs
  predictions <- predict(fit, predictvalues)
}

#to plot the locfit graphs
#to start a new graph page and close all old ones,
# so that new graphs don't get superimposed on the old ones
plot.new()
dev.off()
plot.new()

#add locfit data to grouped cells and add cell height(on which prediction is made) to each cell
locfit.model <- locfit.input %>% 
  mutate(fitvalues = data %>% map(locfit_model)) %>% #fit locfit model to each cell of the grouped data after nesting
  unnest(fitvalues) %>% # unnest the data to get the fit values
  group_by(grouping_var, name) %>% #group by grouping_var and name
  mutate(Standard_heights = predictvalues) %>% #add the cell height to each cell
  unnest(data) %>% #unnest the data to get the original data
  select(-height_values, -Mean) %>% #remove the original data
  distinct() %>% #remove duplicates
  rename(Precited_values = fitvalues)#rename the fit values to Predicted_values
locfit.model <- locfit.model %>% group_by(grouping_var, name) #group by grouping_var and name
locfit.model 

#to plot line plots for standard height vs locfit data------------------
r <- ggplot(data = locfit.model,
            aes(x = factor(Standard_heights), #x is standard heights
                y = Precited_values, #y is predicted values
                fill = grouping_var, #fill is grouping_var
                color = grouping_var, #color is grouping_var
                group= paste0(grouping_var,"_",name))) + #group by grouping_var and name
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 14)) + #theme for plot title and text
  labs(title = "Levels at fixed heights", x = "Height", y = "Levels") + #labels for title, x and y axis
  geom_line()+ #add line plot
  geom_point( #add points
    alpha = 0.4, #alpha value for transparency
    shape = 21 #shape of points
  )
print(r)#print the plot
© www.soinside.com 2019 - 2024. All rights reserved.