缺失值 NaN

问题描述 投票:0回答:2

请帮助我,当我尝试使用 10% 的测试数据(训练数据 90)和 1000 次模拟进行输出时出现错误。

#pemanggilan paket yang digunakan
library(caret)
library(KernelKnn)

#perhitungan akurasi prediksi dengan 100 kali simulasi
sim = function(B, p1, k1)
{
  hasileu = matrix(0, B, 4)
  for (i in 1:B)
  {
    #memanggil data boston
    boston = MASS::Boston

    #membagi data train dan data test
    indexes = createDataPartition(boston$medv, p = p1, list = F)
    train = boston[indexes, ]
    test = boston[-indexes, ]
    
    train_x = train[, -14]
    train_x = scale(train_x)[,]
    train_y = train[,14]
    
    test_x = test[, -14]
    test_x = scale(test[,-14])[,]
    test_y = test[,14]
    
    #model prediksi regresi KNN
    pred1 = KernelKnn(train_x, TEST_data = test_x, train_y, k = k1, method = 'euclidean', weights_function = NULL, regression = T)

    #perhitungan akurasi
    mse = mean((test_y - pred1)^2)
    mae = mean(abs(test_y - pred1))
    rmse = sqrt(mse)
    mape = mean(abs((test_y - pred1)/test_y))
    r2 = cor(test_y, pred1)^2
    hasileu[i,1] = rmse
    hasileu[i,2] = mape
    hasileu[i,3] = mae
    hasileu[i,4] = r2
  }
  return(apply(hasileu, 2, mean))
}

#penentuan rentang nilai K
hitung.variasi.k = function(B,p,K)
{
  has11 = matrix(0,K,4)
  for (i in 1:K)
  {
    has11[i,] = sim(B,p,i)
  }
  has11
}

#hasil akurasi berbagai proporsi data uji
K = 10
has11 = hitung.variasi.k(1000, 0.9,K)

Error in KernelKnn(train_x, TEST_data = test_x, train_y, k = k1, method = "euclidean", : 
the TEST_data includes missing values
r nan knn
2个回答
0
投票

以下代码使用 tryCatch 捕获“错误”并继续执行到最后。

library(caret) 
library(KernelKnn)

#perhitungan akurasi prediksi dengan 100 kali simulasi
sim = function(B, p1, k1)
{
  hasileu = matrix(0, B, 4)
  for (i in 1:B)
  {
    #memanggil data boston
    boston = MASS::Boston
    
    #membagi data train dan data test
    indexes = createDataPartition(boston$medv, p = p1, list = F)
    train = boston[indexes, ]
    test = boston[-indexes, ]
    
    train_x = train[, -14]
    train_x = scale(train_x)[,]
    train_y = train[,14]
    
    test_x = test[, -14]
    test_x = scale(test[,-14])[,]
    test_y = test[,14]
    
    errorFlag=0
    #model prediksi regresi KNN
    pred1 =tryCatch(
     KernelKnn(train_x, TEST_data = test_x, train_y, k = k1, method = 'euclidean', weights_function = NULL, regression = T)
    , error=function(e) 
      { print(e); print(paste("error at loop", i));errorFlag<<-1;}
    )

    if (errorFlag==0)
    {
      #perhitungan akurasi
      mse = mean((test_y - pred1)^2)
      mae = mean(abs(test_y - pred1))
      rmse = sqrt(mse)
      mape = mean(abs((test_y - pred1)/test_y))
      r2 = cor(test_y, pred1)^2
      hasileu[i,1] = rmse
      hasileu[i,2] = mape
      hasileu[i,3] = mae
      hasileu[i,4] = r2
    }
  }
  return(apply(hasileu, 2, mean))
}

#penentuan rentang nilai K
hitung.variasi.k = function(B,p,K)
{
  has11 = matrix(0,K,4)
  for (i in 1:K)
  {
    has11[i,] = sim(B,p1=p,k1=i)
  }
  return(has11)
}

#hasil akurasi berbagai proporsi data uji
K = 10
has11 = hitung.variasi.k(B=1000, p=0.9,K=10)

0
投票

要调试此错误,您必须查明“train_x”或“test_x”是否包含任何 NA,以及您是否可以选择跳过此特定迭代。 'KernelKnn' 包不允许缺少 train、test 或 y 值

的值

以下代码片段(根据您的代码进行了调整)跳过了“train_x”或“test_x”包含缺失值的迭代。 while-for 循环考虑了 NA 存在的情况,并且仅当您的矩阵具有“B”行数时才返回

#pemanggilan paket yang digunakan
library(caret)
library(KernelKnn)

#perhitungan akurasi prediksi dengan 100 kali simulasi
sim = function(B, p1, k1)
{
  hasileu = matrix(0, B, 4)
  increment = 1
  
  while(TRUE) {
    
    if (increment == B + 1) break
    
    #memanggil data boston
    boston = MASS::Boston
    
    #membagi data train dan data test
    indexes = createDataPartition(boston$medv, p = p1, list = F)
    train = boston[indexes, ]
    test = boston[-indexes, ]
    
    train_x = train[, -14]
    train_x = scale(train_x)[,]
    train_y = train[,14]
    
    test_x = test[, -14]
    test_x = scale(test[,-14])[,]
    test_y = test[,14]
    
    # checks for NA's and skips in case train_x or test_x include any
    NAs_train = sum(colSums(is.na(train_x)))
    NAs_tess = sum(colSums(is.na(test_x)))
    
    if (any(c(NAs_train > 0, NAs_tess > 0))) {
      next
    }
    else {
      
      #model prediksi regresi KNN
      pred1 = KernelKnn(train_x, TEST_data = test_x, train_y, k = k1, method = 'euclidean', weights_function = NULL, regression = T)
      
      #perhitungan akurasi
      mse = mean((test_y - pred1)^2)
      mae = mean(abs(test_y - pred1))
      rmse = sqrt(mse)
      mape = mean(abs((test_y - pred1)/test_y))
      r2 = cor(test_y, pred1)^2
      hasileu[increment,1] = rmse
      hasileu[increment,2] = mape
      hasileu[increment,3] = mae
      hasileu[increment,4] = r2
      
      increment = increment + 1
    }
  }

  # return(hasileu)                    # verify that you receive B rows by replacing the next line by this line
  return(apply(hasileu, 2, mean))
}

#penentuan rentang nilai K
hitung.variasi.k = function(B,p,K)
{
  has11 = matrix(0,K,4)
  for (i in 1:K)
  {
    has11[i,] = sim(B,p,i)
  }
  has11
}

#hasil akurasi berbagai proporsi data uji
K = 10
has11 = hitung.variasi.k(1000, 0.9,K)
str(has11)

郑重声明,我是 KernelKnn 包的作者。

© www.soinside.com 2019 - 2024. All rights reserved.