我想有条件地替换一列中的值,我想我需要的是一个循环。虽然我知道循环的基本工作原理,但我几乎没有使用它们的经验,所以我很难理解操作的顺序。使用数据框架
df <- structure(list(player = c("p1", "p2", "p3", "p4", "p5", "p6",
"p7", "p8", "p9", "p10", "p11", "p1", "p2", "p3", "p4", "p5",
"p6", "p7", "p8", "p9", "p10", "p11", "p1", "p2", "p3", "p4",
"p5", "p6", "p7", "p8", "p9", "p10", "p11", "p1", "p2", "p3",
"p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p1", "p2",
"p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p1",
"p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11"),
Date = c("D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1",
"D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1",
"D1", "D1", "D1", "D1", "D2", "D2", "D2", "D2", "D2", "D2",
"D2", "D2", "D2", "D2", "D2", "D2", "D2", "D2", "D2", "D2",
"D2", "D2", "D2", "D2", "D2", "D2", "D3", "D3", "D3", "D3",
"D3", "D3", "D3", "D3", "D3", "D3", "D3", "D3", "D3", "D3",
"D3", "D3", "D3", "D3", "D3", "D3", "D3", "D3"), `Period Number` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), `IMA High Total` = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 43L, 51L, 13L, 84L, 74L, 12L, 12L, 12L, 40L,
75L, 2L, 12L, 5L, 9L, 26L, 9L, 0L, 12L, 4L, 2L, 10L, 2L,
48L, 53L, 9L, 51L, 53L, 3L, 23L, 21L, 68L, 56L, 1L, 7L, 3L,
9L, 15L, 9L, 0L, 15L, 9L, 4L, 8L, 1L), Dur = c(67.6, 98.3,
50.5, 93.3, 97.5, 51, 42, 61.8, 94.7, 107.2, 42, 41.9, 41.9,
41.9, 41.9, 41.9, 41.9, 41.9, 41.9, 41.9, 41.9, 41.9, 62.5,
90.4, 44.5, 88.9, 87.3, 48.3, 40.2, 55.4, 84.5, 94.5, 40.2,
39.1, 39.1, 39.1, 39.1, 39.1, 39.1, 39.1, 39.1, 39.1, 39.1,
39.1, 75.4, 87.9, 42.2, 90.2, 86, 47.5, 49.8, 60.1, 104.3,
108.1, 42.2, 42.1, 42.1, 42.1, 42.1, 42.1, 42.1, 42.1, 42.1,
42.1, 42.1, 42.1)), row.names = c(727L, 728L, 729L, 730L,
731L, 732L, 733L, 734L, 735L, 736L, 737L, 738L, 739L, 740L, 741L,
742L, 743L, 744L, 745L, 746L, 747L, 748L, 793L, 794L, 795L, 796L,
797L, 798L, 799L, 800L, 801L, 802L, 803L, 804L, 805L, 806L, 807L,
808L, 809L, 810L, 811L, 812L, 813L, 814L, 859L, 860L, 861L, 862L,
863L, 864L, 865L, 866L, 867L, 868L, 869L, 870L, 871L, 872L, 873L,
874L, 875L, 876L, 877L, 878L, 879L, 880L), class = "data.frame")
我想把D1的 "IMA高总 "列中的0值,用D2和D3的平均值代替,条件是Dur >1时的 "选手 "列和 "期数 "列。 一个子集的例子是这样转的
player Date Period Number IMA High Total Dur
727 p1 D1 0 0 67.6
738 p1 D1 1 0 41.9
793 p1 D2 0 43 62.5
804 p1 D2 1 12 39.1
859 p1 D3 0 48 75.4
870 p1 D3 1 7 42.1
入此
player Date Period Number IMA High Total Dur
727 p1 D1 0 46 67.6
738 p1 D1 1 10 41.9
793 p1 D2 0 43 62.5
804 p1 D2 1 12 39.1
859 p1 D3 0 48 75.4
870 p1 D3 1 7 42.1
如果有比循环更简单的方法,那太好了! 谢谢帮助,stackoverflow社区对我的R学习很有帮助。
我们可以 group_by
player
和 Period Number
和 replace ``IMA High Total
哪儿 Date = "D1"
和 IMA High Total = 0
与 mean
价值的 Date
不是 "D1"
和 Dur > 1
.
library(dplyr)
df %>%
group_by(player, `Period Number`) %>%
mutate(`IMA High Total` = replace(`IMA High Total`, Date == "D1" &
`IMA High Total` == 0,
mean(`IMA High Total`[Date != "D1" & Dur > 1]))) %>%
arrange(player, `Period Number`)
# player Date `Period Number` `IMA High Total` Dur
# <chr> <chr> <int> <dbl> <dbl>
# 1 p1 D1 0 45.5 67.6
# 2 p1 D2 0 43 62.5
# 3 p1 D3 0 48 75.4
# 4 p1 D1 1 9.5 41.9
# 5 p1 D2 1 12 39.1
# 6 p1 D3 1 7 42.1
# 7 p10 D1 0 65.5 107.
# 8 p10 D2 0 75 94.5
# 9 p10 D3 0 56 108.
#10 p10 D1 1 9 41.9
# … with 56 more rows