`[<-.ts`(`*tmp*`, ri, value = c(135.945603813953, Only element replacements are allowed when try forecast in R

问题描述 投票:0回答:1

这里是我的火车样本的 dput

dput(head(train,10))
train=structure(list(d_event_date = structure(c(19702, 19702, 19702, 
19702, 19702, 19702, 19702, 19702, 19702, 19702), class = "Date"), 
    id_placement = c(20199549L, 20396189L, 20420889L, 16751495L, 
    17220762L, 15387714L, 20355507L, 20385438L, 17752382L, 21253771L
    ), id_zone = c(1632853L, 2836041L, 2919363L, 1709628L, 9937L, 
    1274157L, 1400727L, 2907891L, 1948324L, 1863050L), id_publisher = c(302501L, 
    480683L, 1204726L, 4468L, 5515L, 163955L, 176741L, 421345L, 
    441862L, 467632L), id_advertiser = c(914L, 914L, 914L, 914L, 
    914L, 914L, 914L, 914L, 914L, 914L), id_campaign = c(687314L, 
    687314L, 687314L, 723900L, 723900L, 723900L, 723900L, 723900L, 
    723900L, 723900L), id_banner = c(2032222L, 2032222L, 2032222L, 
    2136674L, 2136674L, 2136674L, 2136674L, 2136674L, 2136674L, 
    2136674L), id_landing = c(4772102L, 4772102L, 4699256L, 4821730L, 
    4821514L, 4821513L, 4821514L, 4823676L, 4821514L, 4821730L
    ), id_ad_unit = c(28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 
    28L, 28L), n_landing_pricing_type = c(3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L), n_impression_qd = c(1L, 8L, 1L, 1L, 
    25L, 4L, 35L, 1L, 5L, 1L), n_click_qd = structure(c(0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0), class = "integer64"), n_conversion_qd = c(1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L), n_gross = c(0.0024, 
    0.0024, 0.0024, 0.008, 0.032499998, 0.0054, 0.064999996, 
    0.0076, 0.0129, 0.0239), n_net = c(0.001632, 0.00036, 0.001632, 
    0.0052, 0, 0.0027, 0.05135, 0.0038, 0.00774, 0.016252)), row.names = c(NA, 
10L), class = "data.frame")

我使用 Arima 创建时间序列模型。 这是我的代码

# Function for data normalization
normalize_data <- function(data, method) {
   normalized_data <- NULL
   if (method == "none") {
     normalized_data <- data
   } else if (method == "min-max") {
     min_val <- min(data)
     max_val <- max(data)
     normalized_data <- (data - min_val) / (max_val - min_val)
   } else if (method == "z-score") {
     mean_val <- mean(data)
     std_val <- sd(data)
     normalized_data <- (data - mean_val) / std_val
   } else if (method == "cubic-root") {
     normalized_data <- sign(data) * abs(data)^(1/3)
   } else if (method == "logarithmic") {
     normalized_data <- log(1 + data)
   }
   return(normalized_data)
}

# Function for calculating MAPE
calculate_mape <- function(actual, forecasted) {
   mape <- mean(abs((actual - forecasted) / actual)) * 100
   return(map)
}

# Normalization and model building for each variable
variables <- c("n_impression_qd", "n_conversion_qd", "n_gross")
normalization_methods <- c("none", "min-max", "z-score", "cubic-root", "logarithmic")
mape_results <- data.frame(variable = character(), normalization = character(), mape = numeric(), stringsAsFactors = FALSE)

for (variable in variables) {
   for (method in normalization_methods) {
     normalized_data <- normalize_data(data_filtered[[variable]], method)
    
     # Convert to time series object
     ts_data <- ts(normalized_data)
    
     # Building an ARIMA model
     model <- auto.arima(ts_data)
    
     #Forecasting
     forecast <- forecast(model, h = 10)$mean
    
     # MAPE calculation
     actual <- tail(data_filtered[[variable]], 10)
     mape <- calculate_mape(actual, forecast)
    
     # Saving results
     result <- data.frame(variable = variable, normalization = method, mape = mape)
     mape_results <- rbind(mape_results, result)
   }
}

# Output results
print(map_results)

它有效,但是当我尝试在我的测试样本上应用模型时 例如这样

# Convert 'd_event_date' column to date format
test_data$d_event_date <- as.Date(test_data$d_event_date)

# Function to denormalize data
denormalize_data <- function(data, method, min_val, max_val, mean_val, std_val) {
   denormalized_data <- NULL
   if (method == "none") {
     denormalized_data <- data
   } else if (method == "min-max") {
     denormalized_data <- data * (max_val - min_val) + min_val
   } else if (method == "z-score") {
     denormalized_data <- data * std_val + mean_val
   } else if (method == "cubic-root") {
     denormalized_data <- sign(data) * abs(data)^3
   } else if (method == "logarithmic") {
     denormalized_data <- exp(data) - 1
   }
   return(denormalized_data)
}

# Variables and normalization methods
variables <- c("n_impression_qd", "n_conversion_qd", "n_gross")
normalization_methods <- c("none", "min-max", "z-score", "cubic-root", "logarithmic")

# Result storage
prediction_results <- data.frame(variable = character(), actual = numeric(), predicted = numeric(), stringsAsFactors = FALSE)

# Predict and calculate MAPE for each variable and normalization method
for (variable in variables) {
   for (method in normalization_methods) {
     # Retrieving data and normalization parameters for a variable
     data <- test_data[[variable]]
     min_val <- min(data)
     max_val <- max(data)
     mean_val <- mean(data)
     std_val <- sd(data)
    
     # Data normalization
     normalized_data <- normalize_data(data, method)
    
     # Convert to time series object
     ts_data <- ts(normalized_data)
    
     #Forecasting
     forecast <- forecast(model, h = length(ts_data))$mean
    
     # Denormalization of predicted values
     denormalized_forecast <- denormalize_data(forecast, method, min_val, max_val, mean_val, std_val)
    
     # MAPE calculation
     actual <- data
     mape <- calculate_mape(actual, denormalized_forecast)
    
     # Saving results
     result <- data.frame(variable = variable, actual = actual, predicted = denormalized_forecast)
     prediction_results <- rbind(prediction_results, result)
   }
}

i get the error
Error in `[<-.ts`(`*tmp*`, ri, value = c(135.945603813953, 177.486351819609, :
   Only element replacements are allowed

示例测试样本

 dput(test_data)
    structure(list(d_event_date = structure(c(19702L, 19702L, 19702L, 
    19702L, 19702L, 19702L, 19702L, 19702L, 19702L, 19702L), class = c("IDate", 
    "Date")), id_placement = c(20794491L, 20794491L, 17788422L, 20654348L, 
    21496171L, 18582636L, 19840341L, 17218292L, 18423615L, 10973945L
    ), id_zone = c(2859002L, 2859002L, 1781319L, 1733870L, 2096492L, 
    1933910L, 2717419L, 1850302L, 2245073L, 46564L), id_publisher = c(352465L, 
    352465L, 143258L, 304470L, 627080L, 59862L, 1052120L, 425742L, 
    38L, 34624L), id_advertiser = c(914L, 914L, 914L, 914L, 914L, 
    2834L, 2834L, 8579L, 15862L, 15862L), id_campaign = c(689656L, 
    689656L, 723900L, 723900L, 723900L, 873762L, 882336L, 360584L, 
    872210L, 872213L), id_banner = c(2038567L, 2038567L, 2136674L, 
    2136674L, 2136674L, 2546849L, 2568794L, 1181453L, 2543069L, 2543072L
    ), id_landing = c(3814050L, 4590592L, 4823675L, 4821514L, 4823676L, 
    4601283L, 4811884L, 4801168L, 4595695L, 4595698L), id_ad_unit = c(5L, 
    5L, 28L, 28L, 28L, 27L, 29L, 28L, 28L, 28L), n_landing_pricing_type = c(3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L), n_impression_qd = c(13L, 
    11L, 2L, 1L, 8L, 5L, 349L, 2L, 132L, 7L), n_click_qd = structure(c(0, 
    0, 0, 0, 0, 0, 1.15611361126852e-321, 0, 0, 0), class = "integer64"), 
        n_conversion_qd = c(1L, 2L, 1L, 1L, 3L, 2L, 5L, 1L, 3L, 1L
        ), n_gross = c(0.003, 0.0065, 0.0124, 0.0167, 0.090299998, 
        0.00054504, 0.03256, 0.0263, 0.08052, 0.00404), n_net = c(9e-04, 
        0.00195, 0.00868, 0.012024, 0.0686280016, 0.0003706272, 0.0221408, 
        0.017884, 0.032208, 0.00202)), row.names = c(NA, 10L), class = "data.frame")

为什么会出现这个错误,我该如何修复它? 感谢您的帮助。 预测后,我将数据非规范化回绝对值。 对于测试样本中的每个变量,所需的结果是使用一种或另一种标准化方法构建的每个模型的实际值和预测值

structure(list(n_impression_qd_actual = 12:16, n_impression_qd_pred = 12:16, 
    model.normalize_n_impression_qd = c("none", "min-max", "z-score", 
    "cubic-root", "logarithmic"), n_conversion_qd_actual = 12:16, 
    n_conversion_qd_pred = 12:16, model.normalize_n_n_conversion_qd = c("none", 
    "min-max", "z-score", "cubic-root", "logarithmic"), n_gross_actual = 12:16, 
    n_gross_pred = 12:16, n_gross_model.normalize = c("none", 
    "min-max", "z-score", "cubic-root", "logarithmic")), class = "data.frame", row.names = c(NA, 
-5L))
r time-series forecasting
1个回答
0
投票

这可能是一条评论,但我对规范化/非规范化函数有一个建议,并将其作为答案发布。

问题

问题的解决方案是在使用结果之前将

"ts"
对象强制转换为数字。
rbind

数据标准化

规范化数据不会产生任何问题,除非稍后要对数据进行非规范化,在这种情况下,可以使非规范化代码更简单。如果规范化函数将方法和参数保存为返回的对象属性,那么这些属性可以在以后使用,而无需重新计算它们,也不需要有这么多的非规范化函数参数。

for (variable in variables) { for (method in normalization_methods) { # [rest of code omitted] # Saving results result <- data.frame(variable = variable, actual = actual, predicted = denormalized_forecast) # include this line result$predicted <- as.numeric(result$predicted) # prediction_results <- rbind(prediction_results, result) } }

现在调用对数据进行非规范化

# New function for data normalization normalize_data <- function(data, method) { normalized_data <- NULL params <- NULL if (method == "none") { normalized_data <- data } else if (method == "min-max") { min_val <- min(data) max_val <- max(data) normalized_data <- (data - min_val) / (max_val - min_val) params <- c(min = min_val, max = max_val) } else if (method == "z-score") { mean_val <- mean(data) std_val <- sd(data) normalized_data <- (data - mean_val) / std_val params <- c(mean = mean_val, sd = std_val) } else if (method == "cubic-root") { normalized_data <- sign(data) * abs(data)^(1/3) } else if (method == "logarithmic") { normalized_data <- log1p(data) } attr(normalized_data, "method") <- method if(!is.null(params)) attr(normalized_data, "params") <- params normalized_data } # New denormalization function denormalize_data <- function(data, attrib) { method <- attrib$method denormalized_data <- NULL if (method == "none") { denormalized_data <- data } else if (method == "min-max") { min_val <- attrib$params["min"] max_val <- attrib$params["max"] denormalized_data <- data * (max_val - min_val) + min_val } else if (method == "z-score") { mean_val <- attrib$params["mean"] std_val <- attrib$params["sd"] denormalized_data <- data * std_val + mean_val } else if (method == "cubic-root") { denormalized_data <- sign(data) * abs(data)^3 } else if (method == "logarithmic") { denormalized_data <- expm1(data) } denormalized_data }

© www.soinside.com 2019 - 2024. All rights reserved.