data1 = data.frame("id" = c("f","e","b","a","e","d","e","f","c","d","d","c","d","b","e","b","d","b","e","e","b","a","b","e","a","d","a","d","b","f","b","e","b","d","e","d","b","e","f","a","b","b","f","e","c","a","b","d","c","d","e","e","f","e","a","b","b","c","b","a","b","f","a","b","c","e","d","a","e","d","a","f","b","d","e","b","f","e","f","f","c","b","f","c","b","e","e","f","e","b","f","f","b","e","c","a","e","c","d","b"),
"class" = c(4,1,2,4,1,4,3,2,1,1,2,4,2,2,3,1,4,1,2,4,2,2,1,1,1,3,4,4,4,3,3,2,3,2,2,2,3,4,1,2,4,1,1,3,3,2,2,2,4,4,3,3,1,1,4,2,3,2,4,1,4,3,2,3,4,3,3,2,3,4,4,1,4,1,2,3,4,1,2,1,3,4,4,3,1,2,4,2,3,2,4,2,2,1,1,4,1,3,1,1),
"score" = c(59,65,61,64,91,91,70,90,64,87,51,54,92,76,75,78,55,99,66,57,88,89,77,66,100,92,80,84,52,66,59,71,56,88,51,97,65,89,65,67,52,57,51,63,67,79,51,90,79,54,90,55,90,72,64,52,95,61,87,54,91,75,80,93,53,81,87,85,84,84,81,93,100,51,70,64,51,54,83,96,65,61,53,80,68,73,52,57,96,55,63,97,94,77,63,98,85,97,65,77))
data2 = data.frame("class" = c(1,2,3,4),
"s" = c(2,5,3,1))
我有数据集'data1'。我想通过查看“ data2”中的“ class” col并随机采样“ count”行来创建“ data2”。因此,在data2
中,“ c”的“计数”为3;因此,请为data1
中的类“ c”的每个“ id”采样3行。
data1 = data.frame(“ id” = c(“ f”,“ e”,“ b”,“ a”,“ e”,“ d”,“ e”,“ f”,“ c”, “ d”,“ d”,“ c”,“ d”,“ b”,“ e”,“ b”,“ d”,“ b”,“ e”,“ e”,“ b”,“ a “,” b“,” e“,” a“,” d“,” a“,” d“,” b“,” f“,” b“,” e“,” b“,” d“, “ e”,“ d”,“ b”,“ e”,“ f”,“ a”,“ b”,“ b”,“ f”,“ ...
使用dplyr
,一种方法是根据df
,df.1
class
连接group_by
和class
并根据df.1
列中的count
采样行。
library(dplyr)
set.seed(123)
df.1 %>%
rename_at(ncol(.), ~'sample') %>%
right_join(df, by = 'class') %>%
group_by(class) %>%
sample_n(sample) %>%
select(names(df))
# id class count
# <int> <fct> <dbl>
#1 3 a 8
#2 3 b 5
#3 4 b 6
#4 2 b 9
#5 1 b 4
#6 2 c 4
#7 4 c 3
#8 3 c 3