我有这6组命令(每组3行),每组命令大约需要4分钟才能顺序运行。在此期间,RStudio仅在4核8线程CPU上使用〜1个线程。
我正在阅读有关并行包的信息,但不了解如何最好地设置这些命令以并行运行。
我的其余代码总共只需要花费几分钟,因此我不太担心,尽管如果发生任何事情可以加快dplyr :: group_by和dplyr :: sum的速度,那就太棒了。
((在这种情况下,我认为可重现的示例并不意味着要花费大量精力,因为它们都会立即完成)。
#Group1
GuestGender1 <- gender_df(data, name_col = "Names_1_First", year_col = c("BirthMin", "BirthMax"), method = "ssa")
GuestGender1$combo <- paste(GuestGender1$name,"_",GuestGender1$year_min)
data$Gender1 <- expss::vlookup(paste(data$Names_1_First,"_",data$BirthMin), GuestGender1, "gender", "combo")
#Group2
GuestGender2 <- gender_df(data, name_col = "Names_2_First", year_col = c("BirthMin", "BirthMax"), method = "ssa")
GuestGender2$combo <- paste(GuestGender2$name,"_",GuestGender2$year_min)
data$Gender2 <- expss::vlookup(paste(data$Names_2_First,"_",data$BirthMin), GuestGender2, "gender", "combo")
#Group3
GuestGender3 <- gender_df(data, name_col = "Names_3_First", year_col = c("BirthMin", "BirthMax"), method = "ssa")
GuestGender3$combo <- paste(GuestGender3$name,"_",GuestGender3$year_min)
data$Gender3 <- expss::vlookup(paste(data$Names_3_First,"_",data$BirthMin), GuestGender3, "gender", "combo")
#Group4
GuestGender4 <- gender_df(data, name_col = "Names_4_First", year_col = c("BirthMin", "BirthMax"), method = "ssa")
GuestGender4$combo <- paste(GuestGender4$name,"_",GuestGender4$year_min)
data$Gender4 <- expss::vlookup(paste(data$Names_4_First,"_",data$BirthMin), GuestGender4, "gender", "combo")
#Group5
GuestGender5 <- gender_df(data, name_col = "Names_5_First", year_col = c("BirthMin", "BirthMax"), method = "ssa")
GuestGender5$combo <- paste(GuestGender5$name,"_",GuestGender5$year_min)
data$Gender5 <- expss::vlookup(paste(data$Names_5_First,"_",data$BirthMin), GuestGender5, "gender", "combo")
#Group6
GuestGender6 <- gender_df(data, name_col = "Names_6_First", year_col = c("BirthMin", "BirthMax"), method = "ssa")
GuestGender6$combo <- paste(GuestGender6$name,"_",GuestGender6$year_min)
data$Gender6 <- expss::vlookup(paste(data$Names_6_First,"_",data$BirthMin), GuestGender6, "gender", "combo")```
您可以使用furrr包:
library(furrr)
library(dplyr) # for the pipe
plan(multiprocess)
LongOperation <- function(n){
name_col <- paste0("Names_",n,"_First")
# Need the real data to test, so just simulate long operation
mean(rnorm(1e7))
# GuestGender <- gender_df(data, name_col = name_col_, year_col = c("BirthMin", "BirthMax"), method = "ssa")
#GuestGender$combo <- paste(GuestGender$name,"_",GuestGender$year_min)
#data$Gender <- expss::vlookup(paste(data$Names_1_First,"_",data$BirthMin), GuestGender1, "gender", "combo")
paste0("Result for ",name_col)
}
1:6 %>% future_map(LongOperation)
随着时间的流逝,处理器使用率应该达到100%