我正在使用CNN自动编码器来对我已生成的某些合成噪声数据进行降噪。此练习的目的是测试去噪自动编码器的去噪能力。我也想做一部分缩小尺寸的功能。因此,我将30个要素编码为3维潜在空间。我附上代码,我的问题关于我得到的输出如下。噪声已大大降低,但我得到的分量与原始信号不同步。那么,这种潜在空间表示的含义是什么?
R代码:
# Core Tidyverse
library(tidyverse)
library(glue)
library(forcats)
# Time Series
library(timetk)
library(tidyquant)
library(tibbletime)
# Visualization
library(cowplot)
# Preprocessing
library(recipes)
# Sampling / Accuracy
library(rsample)
library(yardstick)
# Modeling
library(keras)
library(tfruns)
library(abind)
### Equation: y=a*sin(b*t)+c.unif*amp
# variables
n <- 20000 # number of data points
t <- seq(0,4*pi,,n)
a <- 3
b <- 100
c.unif <- runif(n)
c.norm <- rnorm(n)
amp <- 2
# generate data and calculate "x"
set.seed(1)
x_features = c()
for(i in 1:10) {
x1 <- a*sin(b*t)+c.unif*amp # uniform error
x2 <- a*sin(b*t)+c.norm*amp # Gaussian/normal error
x3 <- jitter(a*sin(b*t), amount=1.25)
x_features <- cbind(x_features,x1,x2,x3)
}
str(x_features)
# plot results
matplot(t[1:400], x_features[1:400,], t="l", ylim=range(x_features))
legend("top", legend=c("x_features"), bty="n")
# functions used
BuildTensor <- function(X,w)
{
aperm(abind(do.call('c',apply(X, 2, function(x) list(apply(embed(x,w),1,rev)))),along=3),c(2,1,3))
}
build_matrix <- function(tseries, overall_timesteps) {
t(sapply(1:(length(tseries) - overall_timesteps + 1), function(x)
tseries[x:(x + overall_timesteps - 1),]))
}
reshape_X_3d <- function(X) {
dim(X) <- c(dim(X)[1], dim(X)[2], 1)
X
}
n_timesteps = 128
X <- BuildTensor(x_features, n_timesteps)
N = 19000
x_train <- X[1:N,,]
x_test <- X[N:dim(X)[1],,]
str(x_train)
n_features = 30
encoder_inputs <- layer_input(shape=list(n_timesteps,n_features))
encoded <- layer_conv_1d(filters = 8, kernel_size = n_features, activation = 'relu', dilation_rate=2)(encoder_inputs)
encoded <- layer_max_pooling_1d(pool_size = 2)(encoded)
encoded <- layer_conv_1d(filters = 4, kernel_size = n_features, activation = 'relu', dilation_rate=2)(encoded)
encoded <- layer_max_pooling_1d(pool_size = 2)(encoded)
encoded <- layer_global_average_pooling_1d()(encoded)
encoded <- layer_flatten()(encoded)
encoded <- layer_dense(units=3)(encoded)
decoded <- layer_dense(units=64)(encoded)
decoded <- layer_reshape(target_shape=c(16,4))(decoded)
decoded <- layer_conv_1d(filters=4,kernel_size=1,strides=1, activation='relu',padding='same')(decoded)
decoded <- layer_upsampling_1d(size=2)(decoded)
decoded <- layer_conv_1d(filters=8,kernel_size=1,strides=1, activation='relu',padding='same')(decoded)
decoded <- layer_upsampling_1d(size=2)(decoded)
decoded <- layer_upsampling_1d(size=2)(decoded)
decoded <- layer_conv_1d(filters=n_features,kernel_size=n_features,strides=1, activation='relu',padding='same')(decoded)
autoencoder <- keras_model(encoder_inputs, decoded)
model_den <- keras_model(encoder_inputs,encoded)
autoencoder
autoencoder %>% compile(
loss = 'mse',
optimizer = 'adam'
)
autoencoder %>% fit(x_train, x_train, batch_size = 16, epochs = 100)
x_train_den <- model_den %>% predict(x_train, batch_size = 16)
str(x_train_den)
par(mfrow=c(2,1))
matplot(x_train_den[1:500,],type="l")
matplot(x_train[1:500,dim(x_train)[2],],type="l")
x_test_den <- model_den %>% predict(x_test, batch_size = 16)
matplot(x_test_den[1:500,],type="l")
matlines(x_test[1:500,dim(x_test)[2],],type="l")
潜在空间与原始功能:
将LSTM自动编码器用于序列或时间序列数据。
您的输入数据是一个嘈杂的正弦波数据。您不应该对序列数据使用卷积自动编码器。