# 校正时间序列中的连续错误

##### 问题描述投票：1回答：1

``````par(mfrow=c(2,1))

#simulating perfect dataset
dfe<-data.frame(
date=seq(as.Date('2015-07-12'),as.Date('2015-07-12')+49, by = '1 day'),
valueideal=round(sin(seq(1,50,1))+20)
)

#introducing artifacts
dfe\$valuer<-dfe\$valueideal
dfe\$valuer[10:20]<-dfe\$valueideal[10:20]+10
dfe\$valuer[30:35]<-dfe\$valueideal[30:35]-10

#plotting ideal vs real data
plot(dfe\$date, dfe\$valuer, main="real data", ylim=c(8,32))
plot(dfe\$date, dfe\$valueideal, main="ideal data", ylim=c(8,32))
``````

``````#trying to solve it with a loop
dfe\$valuel<-dfe\$valuer
for (i in seq(2,length(dfe\$valuel)-1,1)){
future<-diff(c(dfe\$valuel[i],dfe\$valuel[i+1]))
past<-diff(c(dfe\$valuel[i-1],dfe\$valuel[i]))

if (abs(future)>2*abs(past)){
dfe\$valuel[i:length(dfe\$valuel)]<-dfe\$valuel[i:length(dfe\$valuel)]-future

}
}
plot(dfe\$date, dfe\$valuel, main="loop corrected data", ylim=c(8,32))
``````

``````#trying to solve it with a vectorised function
remove_artifacts <- function(weights, delta_max) {
# calculate deltas, and set first delta to zero
dw <- c(0, diff(x))
# create vector of zeros and abs(observations) > delta_max
# dw * (logical vector) results in either:
# dw * 0 (if FALSE)
# dw * 1 (if TRUE)
dm <- dw * (abs(dw) > delta_max)
# subtract the cumulative sum of observations > delta_max
return(weights - cumsum(dm))
}
dfe\$valuedm<-remove_artifacts(dfe\$valuer, 10)
plot(dfe\$date, dfe\$valuedm, main="remove artifacts function", ylim=c(8,32))
``````

r time-series error-correction
##### 1个回答
0

``````#simulating perfect dataset
dfe<-data.frame(
date=seq(as.Date('2015-07-12'),as.Date('2015-07-12')+49, by = '1 day'),
valueideal=round(sin(seq(1,50,1))+20)
)

#introducing artifacts
dfe\$valuer<-dfe\$valueideal
dfe\$valuer[10:20]<-dfe\$valueideal[10:20]+10
dfe\$valuer[30:35]<-dfe\$valueideal[30:35]-10
``````

``````# Find breakpoints
bp <- strucchange::breakpoints(valuer ~ date, data = dfe)

# Get breakpoints plus start & end of time series
int <- c(1, bp\$breakpoints + 1, nrow(dfe))
``````

``````# Create labels
dfe\$label <- cut(1:nrow(dfe),
breaks = int,
include.lowest = TRUE,
right = FALSE)

# Plot "real" data coloured by label
with(dfe, plot(date, valuer, col = label, main="real data", ylim=c(8,32)))
``````

``````# Load library
library(data.table)

# Convert to data.table
setDT(dfe)
``````

``````# Offset by mean
dfe[, corrected := valuer - mean(valuer), by = label]

# Plot again
with(dfe, plot(date, corrected, col = label, main = "Corrected data", ylim = c(-10, 10)))
``````

<< img src =“ https://image.soinside.com/eyJ1cmwiOiAiaHR0cHM6Ly9pLmltZ3VyLmNvbS90Q0I2ZGZQLnBuZyJ9” alt =“”>

reprex package（v0.2.1.9000）在2019-12-02创建

## TL; DR

``````# Find breakpoints
bp <- strucchange::breakpoints(valuer ~ date, data = dfe)\$breakpoints

# Add start & end points
int <- c(1, bp + 1, nrow(dfe))

# Tag intervals
dfe\$label <- cut(1:nrow(dfe),
breaks = int,
include.lowest = TRUE,
right = FALSE)

# Correct by subtracting mean from each interval
data.table::setDT(dfe)[, corrected := valuer - mean(valuer), by = label]
``````