`$]中的错误>

问题描述 投票:0回答:1

早安

我运行了带有调整的“随机森林”,并将预测添加到“火车”数据中,该数据运行得很好且没有问题。但是,当我尝试在Test数据集上运行随机森林模型时,出现上述错误。关于什么可能导致此的任何想法是我的代码。感谢任何帮助。训练数据集确实有3500行,而测试数据集有1500行,因为数据集由5000行组成。

R代码:

####Clearing the global environmnent
rm(list = ls())

##Setting the working directory
setwd("D:/Great Learning/Module 3 -Machine Learning/Project")


##Packages required to be loaded
install.packages("DataExplorer")
install.packages("xlsx")
##install.packages("magrittr")
install.packages("dplyr")
install.packages("tidyverse")
install.packages("mice")
install.packages("NbClust")

##Reading in the dataset
library(xlsx)
LoanModelRaw = read.xlsx("Thera Bank_Personal_Loan_Modelling-dataset- 1.xlsx",sheetName = "Bank_Personal_Loan_Modelling",header = T)
##LoanModelRaw = read.csv("Thera Bank_Personal_Loan_Modelling-dataset-1.csv", sep = ";",header = T)

##Viewing the dataset in R
View(LoanModelRaw)
dim(LoanModelRaw)
colnames(LoanModelRaw)
str(LoanModelRaw)
summary(LoanModelRaw)
nrow(LoanModelRaw)
attach(LoanModelRaw)

#Correcting column names
names(LoanModelRaw)[2] = "AgeInYears" 
names(LoanModelRaw)[3] = "ExperienceInYears"
names(LoanModelRaw)[4] = "IncomeInKMonth"
names(LoanModelRaw)[5] = "ZIPCode"
names(LoanModelRaw)[6] = "FamilyMembers"
names(LoanModelRaw)[10] = "PersonalLoan"
names(LoanModelRaw)[11] = "SecuritiesAccount"
names(LoanModelRaw)[12] = "CDAccount" 

colnames(LoanModelRaw)

#############################################################1 EDA of the data#######################################################

library(DataExplorer)
##introduce(LoanModelRaw)
plot_intro(LoanModelRaw)
plot_missing(LoanModelRaw)
##plot_bar(LoanModelRaw)
plot_histogram(LoanModelRaw)
create_report(LoanModelRaw)

?plot_boxplot

#Missing Value Treatment
library(mice)
sum(is.na(LoanModelRaw))
md.pattern(LoanModelRaw)
LoanModelRawImpute = mice(LoanModelRaw, m =5, method = 'pmm', seed = 1000)
LoanModelRawNoNa = complete(LoanModelRawImpute, 3)
md.pattern(LoanModelRawNoNa)

#Correcting negative experience
LoanModel = abs(LoanModelRawNoNa[2:14])
attach(LoanModel)
#View(LoanModel)
#summary(LoanModel)
#nrow(LoanModel)
LoanModel$Split = sample.split(LoanModel$PersonalLoan, SplitRatio = 0.7)
View(LoanModel)
LoanModelTrainRaw = subset(LoanModel,LoanModel$Split == TRUE)
LoanModelTestRaw = subset(LoanModel,LoanModel$Split == FALSE)

#Installing the packages for the running random forest
install.packages("randomForest")
install.packages("dplyr")
library(randomForest)
library(dplyr)
attach(LoanModelTrain)
str(LoanModelTrain)

#Need to exclude the split and move columns
LoanModelTrain = LoanModelTrainRaw[1:13]
LoanModelTest = LoanModelTestRaw[1:13]
LoanModelTrain = LoanModelTrain %>% select(IncomeInKMonth,Mortgage,ZIPCode,CCAvg,everything())
LoanModelTest = LoanModelTest %>% select(IncomeInKMonth,Mortgage,ZIPCode,CCAvg, everything())
head(LoanModelTrain)
head(LoanModelTest)

###Converting the data set to a factor variable in order to be read
#Train
fcol = c(5:13)
LoanModelTrain[,fcol] = lapply(LoanModelTrain[,fcol], factor)
str(LoanModelTrain)
nrow(LoanModelTrain)

#Test
fcol = c(5:13)
LoanModelTest[,fcol] = lapply(LoanModelTest[,fcol], factor)
str(LoanModelTest)

##Running the random forest
seed = 1000
set.seed(seed)
LoanModelTrainRF = randomForest(PersonalLoan ~ ., data = LoanModelTrain, ntree = 501, mtry = 10, nodesize = 10, importance = TRUE, do.trace = TRUE)
print(LoanModelTrainRF)
plot(LoanModelTrainRF)
importance(LoanModelTrainRF)
?randomForest

###Tuning the random Forest
set.seed(seed)
LoanModelTrain = LoanModelTrain %>% select(PersonalLoan,everything())
str(LoanModelTrain)
LoanModelTrainRFTuned = tuneRF(x = LoanModelTrain[,-c(1)], 
                               y = PersonalLoan,
                               mtryStart = 10,
                               stepFactor = 1.5,
                               improve = 0.001,
                               trace = TRUE,
                               plot = TRUE,
                               doBest = TRUE,
                               importance = TRUE)

###Running refined random forest
LoanModelTrainRefinedRF = randomForest(PersonalLoan ~ ., data = LoanModelTrain, ntree = 95, mtry = 10, nodesize = 10, importance = TRUE, do.trace = TRUE)
print(LoanModelTrainRefinedRF)
plot(LoanModelTrainRefinedRF)


###Adding the prediction columns and probability columns 
LoanModelTrain$Predict = predict(LoanModelTrainRefinedRF,data= LoanModelTrain, type = "class")
LoanModelTrain$Score = predict(LoanModelTrainRefinedRF,data= LoanModelTrain, type = "prob")
head(LoanModelTrain)

###Check the accuracy of the model
install.packages("caret")
library(caret)

caret::confusionMatrix(LoanModelTrain$PersonalLoan, LoanModelTrain$Predict)


###Run the model against the Test Data
str(LoanModelTest)
LoanModelTest$Predict =  predict(LoanModelTrainRefinedRF,data= LoanModelTest, type = "class")
LoanModelTest$Score  =  predict(LoanModelTrainRefinedRF,data= LoanModelTest, type = "prob")

[Good Day,我运行了一个带有调整的随机森林,并将预测添加到Train数据中,该数据运行得很好,没有问题。但是,当我尝试在...

r random-forest supervised-learning
1个回答
0
投票

此错误意味着您尝试将长度为3500的列向量附加到具有1500行的矩阵中。当然,这不起作用,因为R不会自动为空行创建ǸA(这是一件好事)。

© www.soinside.com 2019 - 2024. All rights reserved.