我正在寻找有效和/或优雅的方法来计算累积最大值。我对算法本身以及解决该问题的不同方法感兴趣。
set.seed(1)
vec <- sample(1000,10,TRUE)
# vec
# [1] 266 373 573 909 202 899 945 661 630 62
预计:
cummax(vec)
# [1] 266 373 573 909 909 909 945 945 945 945
这里有 2 个解决方案可供您参考:
cummax_1 <- function(x) sapply(seq_along(x),function(i) max(x[1:i]))
cummax_2 <- function(x) {
l <- length(x)
y <- numeric(l)
y[1] <- x[1]
for(i in 2:l) y[i] <- max(x[i],y[i-1])
y
}
library(microbenchmark)
set.seed(1)
vec_bm <- sample(1e3,1e4,TRUE)
microbenchmark(
cummax_1 = cummax_1(vec_bm),
cummax_2 = cummax_2(vec_bm),
times = 10)
# Unit: milliseconds
# expr min lq mean median uq max neval
# cummax_1 337.33007 338.92838 350.93473 341.1705 375.4408 377.56073 10
# cummax_2 13.61659 13.85143 14.30533 14.1797 14.8560 15.18955 10
基于您的
cummax_2
,您实际上可以通过添加if
语句来提高性能,这样仅当前一个值大于当前值时才会发生最大值更新。
cummax_3 <- function(x) {
for (i in 2:length(x)) {
if (x[i] < x[i - 1L]) {
x[i] <- x[i - 1L]
}
}
x
}
给出如下候选人
cummax_baser <- cummax
cummax_1 <- function(x) sapply(seq_along(x), function(i) max(x[1:i]))
cummax_2 <- function(x) {
l <- length(x)
y <- numeric(l)
y[1] <- x[1]
for (i in 2:l) y[i] <- max(x[i], y[i - 1])
y
}
cummax_3 <- function(x) {
for (i in 2:length(x)) {
if (x[i] < x[i - 1L]) {
x[i] <- x[i - 1L]
}
}
x
}
cummax_4 <- function(x) {
Reduce(max, x, accumulate = TRUE)
}
和基准测试
set.seed(1)
vec_bm <- sample(1e4)
bm <- microbenchmark(
cummax_baser = cummax_baser(vec_bm),
cummax_1 = cummax_1(vec_bm),
cummax_2 = cummax_2(vec_bm),
cummax_3 = cummax_3(vec_bm),
cummax_4 = cummax_4(vec_bm),
times = 10,
unit = "relative"
)
我们将会看到
> bm
Unit: relative
expr min lq mean median uq
cummax_baser 1.00000 1.00000 1.00000 1.00000 1.00000
cummax_1 16706.47586 14925.03922 13706.72870 14574.64789 14996.19742
cummax_2 164.40000 132.45588 155.49367 147.35915 167.30043
cummax_3 53.75172 39.78431 59.02883 40.15258 47.55794
cummax_4 362.84828 285.67157 292.80778 308.45305 306.72532
max neval
1.0000 10
8429.4262 10
186.1524 10
127.2738 10
219.0714 10