开始数据:
data = data.frame("Student" = c(1,2,3,4,5),
"Class" = c('A','A','B','C','C'),
"Score1" = c(7,4,7,2,1),
"Score2" = c(2,2,0,10,4),
"Score3" = c(8,1,3,2,2))
所需的输出:
data1 = data.frame("Student" = c(1,2,3,4,5),
"Class" = c('A','A','B','C','C'),
"Score1" = c(0,0,7,2,1),
"Score2" = c(0,0,0,10,4),
"Score3" = c(17,7,3,2,2))
对于“类”为“ A”的所有行,我想将“ Score1”和“ Score2”添加到“ Score 3”并将“ Score1”和“ Score2”设置为0。
#find rows where class is "A"
inds <- data$Class == "A"
#Add sum of the values in col3
data$Score3[inds] <- rowSums(data[inds, -c(1:2)])
#Turn Score1 and Score2 to 0
data[inds, c("Score1", "Score2")] <- 0
# Student Class Score1 Score2 Score3
#1 1 A 0 0 17
#2 2 A 0 0 7
#3 3 B 7 0 3
#4 4 C 2 10 2
#5 5 C 1 4 2
使用data.table
也可以完成
library(data.table) setDT(data) data[inds, Score3 := rowSums(.SD), .SDcols = grep('Score', names(data))] data[inds, c('Score1', 'Score2') := 0]
data.table
软件包:library(data.table)
cols <- c("Score1", "Score2", "Score3")
setDT(data)[Class=="A", (cols) := .(0, 0, rowSums(.SD)), .SDcols = cols]
# Student Class Score1 Score2 Score3
# 1: 1 A 0 0 17
# 2: 2 A 0 0 7
# 3: 3 B 7 0 3
# 4: 4 C 2 10 2
# 5: 5 C 1 4 2
apply
和dplyr
包的另一个选项:data %>%
mutate(Score3 = apply(select(data, starts_with('Score')), 1, sum)) %>%
mutate(Score1 = 0, Score2 = 0)
基本上是if-else
条件,使用case_when
中的dplyr
操作完成。
library(dplyr); library(magrittr) #load the packages
data_output <- data %>% mutate(Score1 = (case_when(Class == "A" ~ 0,#if Class is `A`, then Score1 is set to `0`
Class != "A" ~ Score1)),
Score2 = (case_when(Class == "A" ~ 0,
Class != "A" ~ Score2)),
Score3 = (case_when(Class == "A" ~ Score1 + Score2,
Class != "A" ~ Score3)));
data_output
Student Class Score1 Score2 Score3
1 1 A 0 0 0
2 2 A 0 0 0
3 3 B 7 0 3
4 4 C 2 10 2
5 5 C 1 4 2