我想根据 MSE
、AIC
和 Adjusted R squared
评估基于 R 中不同相关系数(例如 Pearson、Kendall)的各种主成分模型。我创建了以下函数,但是我找不到方法“ force”函数根据给定的相关矩阵执行主成分回归。因此,我最终得到了完全相同的结果。有人可以帮助我吗?
library(caret)
set.seed(123)
df <- data.frame(Y = rnorm(100), X1 = rnorm(100), X2 = rnorm(100), X3 = rnorm(100), X4 = rnorm(100), X5 = rnorm(100))
X <- df[,-1]
Y <- df[,1]
# compute Pearson's and Spearman's correlation matrices
cor1 <- cor(X, method = "pearson")
cor2 <- cor(X, method = "kendall")
# define function to compute PCA with cross-validation and return MSE, AIC, and adjusted R-squared
pca_cv_mse_aic_r2 <- function(X, Y, cor_mat, ncomp, nfolds) {
# create empty vectors to store results
mse <- rep(0, ncomp)
aic <- rep(0, ncomp)
adj_r2 <- rep(0, ncomp)
# loop over the number of components
for (i in 1:ncomp) {
# perform PCA with cross-validation
pca <- caret::train(X, Y, method = "pcr", preProc = c("center", "scale"),
tuneLength = nfolds, trControl = trainControl(method = "cv", number = nfolds),
tuneGrid = data.frame(ncomp = i))
# compute MSE, AIC, and adjusted R-squared
pred <- predict(pca, newdata = X)
mse[i] <- mean((pred - Y)^2)
aic[i] <- AIC(lm(Y ~ pred + 1))
adj_r2[i] <- summary(lm(Y ~ pred))$adj.r.squared
}
# return a list of results
return(list(mse = mse, aic = aic, adj_r2 = adj_r2))
}
# compute the MSE, AIC, and adjusted R-squared of PCA models with different correlation matrices and numbers of components
results1 <- pca_cv_mse_aic_r2(X, Y, cor1, 5, 10)
results2 <- pca_cv_mse_aic_r2(X, Y, cor2, 5, 10)