我的自动编码器没有学习预测值

问题描述 投票:0回答:1

我正在尝试在 Keras 中构建一个变分自动编码器,输入形状为 X= (1,50) 和 Y= (1,20)。

我做了 1 个输入,我想在输入和输出之间建立关系。 (数据是二进制情况的一维)。但我总是发现这些结果:

我尝试改变激活和损失,但没有积极的结果。

# %% [code]
from keras.layers import Lambda, Input, Dense, Reshape, RepeatVector, Dropout
from keras.models import Model
from keras.datasets import mnist
from keras.losses import mse, binary_crossentropy
from keras.utils import plot_model
from keras import backend as K
from keras.constraints import unit_norm, max_norm
import tensorflow as tf

from scipy import stats
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import argparse
import os
from sklearn.manifold import MDS
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error, r2_score
from keras.layers import Input, Dense, Flatten, Lambda,Conv1D, BatchNormalization, MaxPooling1D, Activation
from keras.models import Model
import keras.backend as K
import numpy as np

from mpl_toolkits.mplot3d import Axes3D



# reparameterization trick
# instead of sampling from Q(z|X), sample eps = N(0,I)
# z = z_mean + sqrt(var)*eps
def sampling(args):
    """Reparameterization trick by sampling fr an isotropic unit Gaussian.
    # Arguments:
        args (tensor): mean and log of variance of Q(z|X)
    # Returns:
        z (tensor): sampled latent vector
    """

    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    thre = K.random_uniform(shape=(batch,1))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon


# Load my Data
training_feature = X
ground_truth_r = Y
np.random.seed(seed=0)
original_dim = 32

# Define VAE model components
input_shape_x = (32, )
input_shape_r = (16, )
intermediate_dim = 32
latent_dim = 32


# Encoder network
inputs_x = Input(shape=input_shape_x, name='encoder_input')
inputs_x_dropout = Dropout(0.25)(inputs_x)
inputs_x_dropout = Dense( 1024 , activation='relu')(inputs_x_dropout)
inputs_x_dropout = Dense(512, activation='relu')(inputs_x_dropout)
inputs_x_dropout = Dense(224, activation='relu')(inputs_x_dropout)
inter_x1 = Dense(128, activation='relu')(inputs_x_dropout)
inter_x2 = Dense(intermediate_dim, activation='relu')(inter_x1)
z_mean = Dense(latent_dim, name='z_mean')(inter_x2)
z_log_var = Dense(latent_dim, name='z_log_var')(inter_x2)
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
encoder = Model(inputs_x, [z_mean, z_log_var, z], name='encoder')

# Decoder network for reconstruction
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
inter_y1 = Dense(intermediate_dim, activation='relu')(latent_inputs)

inter_y1 = Dense(224, activation='relu')(inter_y1)
inter_y1 = Dense( 512 , activation='relu')(inter_y1)
inter_y1 = Dense(1024 , activation='relu')(inter_y1)

inter_y2 = Dense(128, activation='relu')(inter_y1)
outputs_reconstruction = Dense(original_dim)(inter_y2)
decoder = Model(latent_inputs, outputs_reconstruction, name='decoder')

# Separate network for prediction from latent space
outputs_prediction = Dense(Y.shape[1])(inter_y2)  # Adjust Y.shape[1] as per your data
predictor = Model(latent_inputs, outputs_prediction, name='predictor')

# Instantiate VAE model with two outputs
outputs_vae = [decoder(encoder(inputs_x)[2]), predictor(encoder(inputs_x)[2])]
vae = Model(inputs_x, outputs_vae, name='vae_mlp')
vae.compile(optimizer='adam', loss=['mean_squared_error', 'mean_squared_error'])

# Train the model
#history = vae.fit(X, [X, Y], epochs=100, batch_size=100, shuffle=True,validation_data=(XX,[XX, YY]))
# Compile the decoder with the reconstruction loss
decoder.compile(optimizer='adam', loss='mean_squared_error')

# Compile the predictor with the prediction loss
predictor.compile(optimizer='adam', loss='mean_squared_error')

# Train the decoder
history_decoder = decoder.fit(X, X, epochs=100, batch_size=20, shuffle=True, validation_data=(XX, XX))

# Train the predictor
history_predictor = predictor.fit(X, Y, epochs=100, batch_size=20, shuffle=True, validation_data=(XX, YY))


# Save models and plot training/validation loss
decoder.save("BrmDeco Seperate.h5")
predictor.save("BrmPred Seperate.h5")

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history_decoder.history['loss'], label='Decoder Training Loss')
plt.plot(history_decoder.history['val_loss'], label='Decoder Validation Loss')
plt.title('Decoder Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history_predictor.history['loss'], label='Predictor Training Loss')
plt.plot(history_predictor.history['val_loss'], label='Predictor Validation Loss')
plt.title('Predictor Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()

plt.show()
keras deep-learning neural-network
1个回答
0
投票

我能够训练模型。我所做的主要更改是设置预测器输出

n_outputs
以匹配所预测的类别数量,并为预测输出分配
sparse_categorical_crossentropy
损失(MSE 损失通常用于回归,而不是分类)。

我在

digits
中的一些
sklearn
图像上运行了它,因此我将输入形状从 32 增加到 36,并将
predictor
输出形状设置为 10 个类别。我标准化了输入数据并使用了较小的批量大小。

我没有包含验证数据,因为这只是一个测试,以确定模型可以按预期学习。

from keras.layers import Lambda, Input, Dense, Dropout
from keras.models import Model
from keras import backend as K
import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Input, Dense, Lambda
from keras.models import Model
import keras.backend as K

#Set the random seed for consistent results
import random
random.seed(0)
tf.random.set_seed(0)
np.random.seed(0)
#clear session for each run
K.clear_session()

#
#Load digits data
#
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler

digits = load_digits()
X, Y = digits['data'], digits['target']

Y = Y.reshape(-1, 1).astype(int)
X = X.reshape(-1, 8, 8)[:, 1:-1, 1:-1].reshape(-1, 36)
X = StandardScaler().fit_transform(X)

original_dim = X.shape[1]
n_classes = len(np.unique(Y))

#View some samples
f, axs = plt.subplots(5, 5, figsize=(4, 4), layout='tight')
for i, ax in enumerate(axs.flatten()):
    ax.imshow(X[i, :].reshape(6, 6), cmap='binary')
    ax.axis('off')
    ax.set_title(f'"{Y[i][0]}"', fontsize=8)
f.suptitle('Samples from normalised digits data', fontsize=10)
plt.show()

# reparameterization trick
# instead of sampling from Q(z|X), sample eps = N(0,I)
# z = z_mean + sqrt(var)*eps
def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    thre = K.random_uniform(shape=(batch,1))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

# Define VAE model components
intermediate_dim = 32 // 2
latent_dim = 32 // 8

# Encoder network
inputs_x = Input(shape=original_dim, name='encoder_input')
inputs_x_dropout = Dropout(0.25)(inputs_x)
inputs_x_dropout = Dense(1024, activation='relu')(inputs_x_dropout)
inputs_x_dropout = Dense(512, activation='relu')(inputs_x_dropout)
inputs_x_dropout = Dense(224, activation='relu')(inputs_x_dropout)

inter_x1 = Dense(128, activation='relu')(inputs_x_dropout)
inter_x2 = Dense(intermediate_dim, activation='relu')(inter_x1)

z_mean = Dense(latent_dim, name='z_mean')(inter_x2)
z_log_var = Dense(latent_dim, name='z_log_var')(inter_x2)
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
encoder = Model(inputs_x, [z_mean, z_log_var, z], name='encoder')

# Decoder network for reconstruction
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
inter_y1 = Dense(intermediate_dim, activation='relu')(latent_inputs)
inter_y1 = Dense(224, activation='relu')(inter_y1)
inter_y1 = Dense(512, activation='relu')(inter_y1)
inter_y1 = Dense(1024, activation='relu')(inter_y1)

inter_y2 = Dense(128, activation='relu')(inter_y1)
outputs_reconstruction = Dense(original_dim)(inter_y2)
decoder = Model(latent_inputs, outputs_reconstruction, name='decoder')

# Separate network for class prediction from inter_y2
outputs_prediction = Dense(n_classes, activation='softmax')(inter_y2)
predictor = Model(latent_inputs, outputs_prediction, name='predictor')

# Instantiate VAE model with two outputs
outputs_vae = [decoder(z), predictor(z)]
vae = Model(inputs_x, outputs_vae, name='vae_mlp')
loss_weights = [0.55, 0.45] #scale losses so they're roughly same level
vae.compile(
    optimizer='adam',
    loss=['mean_squared_error', 'sparse_categorical_crossentropy'],
    loss_weights=loss_weights
)

# Train the model
history = vae.fit(X, [X, Y], epochs=10, batch_size=32, shuffle=True)

plt.plot(history.history['loss'], label='total')
plt.plot(np.array(history.history['decoder_loss']) * loss_weights[0], label='decoder MSE loss')
plt.plot(np.array(history.history['predictor_loss']) * loss_weights[1], label='predictor CE loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Losses')
plt.legend()
plt.gcf().set_size_inches(5, 3)

recon, preds = vae.predict(X)
f, axs = plt.subplots(5, 5, figsize=(5, 5), layout='tight')
for i, ax in enumerate(axs.flatten()):
    ax.imshow(recon[i, :].reshape(6, 6), cmap='binary')
    ax.axis('off')
    ax.set_title('$\hat{y}$=' + f'{preds[i].argmax()} | $y$={Y[i][0]}', fontsize=8)
f.suptitle('Digit reconstructions and predictions', fontsize=10)
plt.show()
© www.soinside.com 2019 - 2024. All rights reserved.