我有一个数据:
library(locfit)
a<- c("A", "A", "A", "A", "A","A","A","A","A","A","A","A", "B", "B", "B", "B", "B","B", "C", "C", "C", "C", "C","C","C","C","C","C","C","C","C","C","C","C","C","C")
b<- c("h3c2", "h3c2", "h3c2", "h3c2", "h3c2","h3c2","h3c2","h3c2","h3c2","h3c2","h3c2","h3c2", "h1c1", "h1c1", "h1c1", "h1c1", "h1c1","h1c1", "h5c2", "h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2")
c<- c(0, 0.090909091, 0.181818182, 0.272727273, 0.363636364, 0.454545455, 0.545454545, 0.636363636, 0.727272727, 0.818181818, 0.909090909, 1, 0, 0.2, 0.4, 0.6, 0.8, 1, 0, 0.058823529, 0.117647059, 0.176470588, 0.235294118, 0.294117647, 0.352941176, 0.411764706, 0.470588235, 0.529411765, 0.588235294, 0.647058824, 0.705882353, 0.764705882, 0.823529412, 0.882352941, 0.941176471, 1)
d<- c(391.064, 499.769, 575.051, 666.537, 774.666, 859.841, 932.585, 971.25, 996.516, 1002.115, 999.499, 940.548, 242.836, 391.662, 507.624, 507.624, 643.627, 779.394, 195.014, 259.007, 299.504, 310.179, 323.274, 346.793, 381.012, 384.662, 382.25, 434.032, 416.324, 382.021, 402.603, 396.873, 377.478, 367.659, 373.589, 382.506)
dt <- data.table(A = a,
B= b,
data1=c,
data2=d)
我想按 A 和 B 对数据进行分组,并为使用 data2 和 data1 排序的每个 A+B 进行 locfit。
我已经能够对数据进行排序了。
dt2 <- dt[, .SD[c(1:.N)], by = c('A', 'B')]
但我无法将 locfit 与此一起使用。 我已经尝试过,但它不起作用:
dt3<- dt[,
{.SD[c(1:.N)]
fit<- locfit(data2 ~ data1, data=dt)
plot(fit, get.data= TRUE)}, by = c('A', 'B')]
知道如何让它像这样工作吗?
非常感谢!
编辑:我找到了一种使用 dplyr 中的嵌套函数来做到这一点的方法。根据组嵌套数据,并将 locfit 函数映射到每个嵌套组。
有关如何嵌套和取消嵌套数据以适应模型并应用于每个分组值的完整代码。
library(dplyr)
library(purrr)
library(locfit)
library(ggplot2)
library(tidyr)
grouping_var<- c("A", "A", "A", "A", "A","A","A","A","A","A","A","A", "B", "B", "B", "B", "B","B", "C", "C", "C", "C", "C","C","C","C","C","C","C","C","C","C","C","C","C","C")
name<- c("h3c2", "h3c2", "h3c2", "h3c2", "h3c2","h3c2","h3c2","h3c2","h3c2","h3c2","h3c2","h3c2", "h1c1", "h1c1", "h1c1", "h1c1", "h1c1","h1c1", "h5c2", "h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2","h5c2")
height_values<- c(0, 0.090909091, 0.181818182, 0.272727273, 0.363636364, 0.454545455, 0.545454545, 0.636363636, 0.727272727, 0.818181818, 0.909090909, 1, 0, 0.2, 0.4, 0.6, 0.8, 1, 0, 0.058823529, 0.117647059, 0.176470588, 0.235294118, 0.294117647, 0.352941176, 0.411764706, 0.470588235, 0.529411765, 0.588235294, 0.647058824, 0.705882353, 0.764705882, 0.823529412, 0.882352941, 0.941176471, 1)
Mean<- c(391.064, 499.769, 575.051, 666.537, 774.666, 859.841, 932.585, 971.25, 996.516, 1002.115, 999.499, 940.548, 242.836, 391.662, 507.624, 507.624, 643.627, 779.394, 195.014, 259.007, 299.504, 310.179, 323.274, 346.793, 381.012, 384.662, 382.25, 434.032, 416.324, 382.021, 402.603, 396.873, 377.478, 367.659, 373.589, 382.506)
df <- data.frame(grouping_var = grouping_var,
name= name,
height_values=height_values,
Mean=Mean)
df
#nest the data in df with height to use in locfit for prediction
locfit.input <- df %>%
group_by(grouping_var, name) %>%
nest()
# heights chosen to interpolate levels
predictvalues <- c(0.1, 0.3, 0.5, 0.7, 0.9)
# function to fit locfit
locfit_model <- function(df) {
fit <- locfit(Mean ~ height_values, data = df)
plot(
fit,
get.data = TRUE,
xlab = "Height",
ylab = "Levels"
)
par(new = TRUE) #To superimpose graphs
predictions <- predict(fit, predictvalues)
}
#to plot the locfit graphs
#to start a new graph page and close all old ones,
# so that new graphs don't get superimposed on the old ones
plot.new()
dev.off()
plot.new()
#add locfit data to grouped cells and add cell height(on which prediction is made) to each cell
locfit.model <- locfit.input %>%
mutate(fitvalues = data %>% map(locfit_model)) %>% #fit locfit model to each cell of the grouped data after nesting
unnest(fitvalues) %>% # unnest the data to get the fit values
group_by(grouping_var, name) %>% #group by grouping_var and name
mutate(Standard_heights = predictvalues) %>% #add the cell height to each cell
unnest(data) %>% #unnest the data to get the original data
select(-height_values, -Mean) %>% #remove the original data
distinct() %>% #remove duplicates
rename(Precited_values = fitvalues)#rename the fit values to Predicted_values
locfit.model <- locfit.model %>% group_by(grouping_var, name) #group by grouping_var and name
locfit.model
#to plot line plots for standard height vs locfit data------------------
r <- ggplot(data = locfit.model,
aes(x = factor(Standard_heights), #x is standard heights
y = Precited_values, #y is predicted values
fill = grouping_var, #fill is grouping_var
color = grouping_var, #color is grouping_var
group= paste0(grouping_var,"_",name))) + #group by grouping_var and name
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 14)) + #theme for plot title and text
labs(title = "Levels at fixed heights", x = "Height", y = "Levels") + #labels for title, x and y axis
geom_line()+ #add line plot
geom_point( #add points
alpha = 0.4, #alpha value for transparency
shape = 21 #shape of points
)
print(r)#print the plot