我想在使用查找表的数据框上创建一个新变量。所以我有df1(数据帧)有Amount和Term。我需要创建一个新的变量“Premium”,使用查找表创建其值。
我尝试了ifelse功能,但它太繁琐了。以下是插图/示例
df1 <- data.frame(Amount, Term)
df1
# Amount Term
# 1 2500 23
# 2 3600 30
# 3 7000 45
# 4 12000 50
# 5 16000 38
我需要使用下面的Premium Lookup表创建新的'Premium'变量。
Term
Amount 0-24 Mos 25-36 Mos 37-48 Mos 49-60 Mos
0 - 5,000 133 163 175 186
5,001 - 10,000 191 213 229 249
10,001 - 15,000 229 252 275 306
15,001 - 20,000 600 615 625 719
20,001 - 25,000 635 645 675 786
所以保费的输出应该是。
df1
# Amount Term Premium
# 1 2500 23 133
# 2 3600 30 163
# 3 7000 45 229
# 4 12000 50 306
# 5 16000 38 625
要获得您想要的结果,您需要组织表格并对数据进行分类。我提供了一个处理这种情况的潜在工作流程。希望这有用:
library(tidyverse)
df1 <- data.frame(
Amount = c(2500L, 3600L, 7000L, 12000L, 16000L),
Term = c(23L, 30L, 45L, 50L, 38L)
)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# functions for analysis ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
amount_tier_function <- function(x){
case_when(x <= 5000 ~ "Tier_5000",
x <= 10000 ~ "Tier_10000",
x <= 15000 ~ "Tier_15000",
x <= 20000 ~ "Tier_20000",
TRUE ~ "Tier_25000")
}
month_tier_function <- function(x){
case_when(x <= 24 ~ "Tier_24",
x <= 36 ~ "Tier_36",
x <= 48 ~ "Tier_48",
TRUE ~ "Tier_60")
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Recut lookup table headings ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
lookup_df <- data.frame(stringsAsFactors=FALSE,
amount_tier = c("Tier_5000", "Tier_10000", "Tier_15000", "Tier_20000",
"Tier_25000"),
Tier_24 = c(133L, 191L, 229L, 600L, 635L),
Tier_36 = c(163L, 213L, 252L, 615L, 645L),
Tier_48 = c(175L, 229L, 275L, 625L, 675L),
Tier_60 = c(186L, 249L, 306L, 719L, 786L)
)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Join everything together ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
lookup_df_tidy <- lookup_df %>%
gather(mth_tier, Premium, - amount_tier)
df1 %>%
mutate(amount_tier = amount_tier_function(Amount),
mth_tier = month_tier_function(Term)) %>%
left_join(., lookup_df_tidy) %>%
select(-amount_tier, -mth_tier)
数据
df1 <- structure(list(Amount = c(2500L, 3600L, 7000L, 12000L, 16000L),
Term = c(23L, 30L, 45L, 50L, 38L)),
class = "data.frame",
row.names = c(NA, -5L))
lkp <- structure(c(133L, 191L, 229L, 600L, 635L,
163L, 213L, 252L, 615L, 645L,
175L, 229L, 275L, 625L, 675L,
186L, 249L, 306L, 719L, 786L),
.Dim = 5:4,
.Dimnames = list(Amount = c("0 - 5,000", "5,001 - 10,000",
"10,001 - 15,000", "15,001 - 20,000",
"20,001 - 25,000"),
Term = c("0-24 Mos", "25-36 Mos", "37-48 Mos",
"49-60 Mos")))
码
(month <- c(0, as.numeric(sub("\\d+-(\\d+) Mos$",
"\\1",
colnames(lkp)))))
# [1] 0 24 36 48 60
(amt <- c(0, as.numeric(sub("^\\d+,*\\d* - (\\d+),(\\d+)$",
"\\1\\2",
rownames(lkp)))))
# [1] 0 5000 10000 15000 20000 25000
df1
获取findInterval
的每个元素的位置:
(rows <- findInterval(df1$Amount, amt))
# [1] 1 1 2 3 4
(cols <- findInterval(df1$Term, month))
# [1] 1 2 3 4 3
df1$Premium <- lkp[cbind(rows, cols)]
df1
# Amount Term Premium
# 1 2500 23 133
# 2 3600 30 163
# 3 7000 45 229
# 4 12000 50 306
# 5 16000 38 625