我试图首先通过某些分组value
找到最大值id
,然后我需要使用该分组ID的提取值来创建一列。
类似:
df$maxdvalue <-aggregate(value ~ id, data = df, max)
我收到以下错误:
$<-.data.frame
(*tmp*
中的错误,最大值,值= list(id = 1:1763,:替换项有1763行,数据项有74619]
有很多方法可以完成,并且您是R / stackoverflow的新手,所以我希望我早就知道几种方法:
# Groupwise aggregation
# (note: the aggregate function applied can be changed to whatever is required)
# 1st base R method using "ave" function, assigned as vector:
df$maxmpg <- ave(df$mpg, df$car_brand, FUN = max)
# 2nd base R method, transforming dataframe using "ave" function, assigned as dataframe:
df <- transform(df, maxmpg = ave(mpg, car_brand, FUN = max))
# 3rd method using with syntax in conjunction with "ave", assigned as vector:
df$maxmpg <- with(df, ave(mpg, car_brand, FUN = max))
# 4th method using cbind in conjunction with "ave", assigned as dataframe:
df <- cbind(df, maxmpg = ave(df$mpg, df$car_brand, FUN = max))
# 5th method using tapply, assigned as vector:
df$maxmpg <- tapply(df$mpg, df$car_brand, max)
# 6th base R method using lapply with lambda function
# and row-binding the each list element back into a df (assigned as df):
df <- do.call("rbind", lapply(split(df, df$car_brand),
function(x){
x$maxmpg <- max(x$mpg)
return(x)
}
)
)
# 7th base R solution using aggregate and merge (assigned as df):
df <- merge(df, setNames(aggregate(mpg ~ car_brand, df, FUN = max),
c("car_brand", "max_mpg")), by = "car_brand", all.x = TRUE)
# Using pacakges:
# Create a vector of the required packages:
necessary_packages <- c("dplyr", "data.table")
# Create a vector containing the names of any packages requiring installation:
new_packages <- necessary_packages[!(necessary_packages %in% installed.packages()[,"Package"])]
# If the vector has more than 0 elements, install the new pacakges
# (and it's/their) associated dependencies:
if(length(new_packages) > 0){
install.packages(new_packages, dependencies = TRUE)
}
# Initialise the packages in the session:
lapply(necessary_packages,
require,
character.only = TRUE)
# 8th method using dplyr, assigned as vector:
df <-
df %>%
group_by(car_brand) %>%
mutate(maxmpg = max(mpg)) %>%
ungroup()
# 9th solution using data.table (set as vector):
dt <- data.table(df)
dt[, maxmpg := max(mpg), by = car_brand]
使用的数据:
df <- data.frame(car_type = row.names(mtcars),
car_brand = gsub(" .*", "", row.names(mtcars)),
mtcars,
row.names = NULL)