无法保存自定义keras模型

Question

我有一个基于 keras.Model 类的自定义模型，我试图保存该模型以供以后使用。虽然保存模型可以正常工作，但重新加载同一模型会引发以下错误：

ValueError: Cannot assign value to variable ' dense_4/kernel:0': Shape mismatch.The variable shape (24, 512), and the assigned value shape (784, 256) are incompatible.

我正在使用 keras 文件格式以及相应的

model.save

和

keras.saving.load_model

方法。我也尝试了

pickle

和

dill

，都未能保存模型。

class Encoder

之上的所有内容都是损失计算的数学，可以忽略（可能），但需要存在以实现功能。该模型是一种在 MNIST 数据上训练的变分自动编码器。首先训练 BaseNet 类。 HigherNet 从经过训练的 BaseNet 接收编码器和解码器，并使用自己的生成器网络再次进行训练。此 HigherNet 已保存，这是我尝试再次加载的模型。除了在 HigherNet 中使用的部分之外，BaseNet 本身不再相关。

错误重现的工作示例：

import tensorflow as tf
import keras
from keras import layers
import numpy as np
import math


def silverman_rule_of_thumb_normal(N):
    return tf.pow((4 / (3 * N)), 0.4)


def pairwise_distances(x, y=None):
    if y is None:
        y = x
    distances_tf = tf.norm(x[:, None] - y, axis=-1) ** 2
    return tf.cast(distances_tf, dtype=tf.float64)


def cw_normality(X, y=None):
    assert len(X.shape) == 2

    D = tf.cast(tf.shape(X)[1], tf.float64)
    N = tf.cast(tf.shape(X)[0], tf.float64)

    if y is None:
        y = silverman_rule_of_thumb_normal(N)

    # adjusts for dimensionality; D=2 -> K1=1, D>2 -> K1<1
    K1 = 1.0 / (2.0 * D - 3.0)

    A1 = pairwise_distances(X)
    A = tf.reduce_mean(1 / tf.math.sqrt(y + K1 * A1))

    B1 = tf.cast(tf.square(tf.math.reduce_euclidean_norm(X, axis=1)), dtype=tf.float64)
    B = 2 * tf.reduce_mean((1 / tf.math.sqrt(y + 0.5 + K1 * B1)))

    return (1 / tf.sqrt(1 + y)) + A - B


def phi_sampling(s, D):
    return tf.pow(1.0 + 4.0 * s / (2.0 * D - 3), -0.5)


def cw_sampling_lcw(first_sample, second_sample, y):
    shape = first_sample.get_shape().as_list()
    dim = np.prod(shape[1:])
    first_sample = tf.reshape(first_sample, [-1, dim])

    shape = second_sample.get_shape().as_list()
    dim = np.prod(shape[1:])
    second_sample = tf.reshape(second_sample, [-1, dim])

    assert len(first_sample.shape) == 2
    assert first_sample.shape == second_sample.shape

    _, D = first_sample.shape

    T = 1.0 / (2.0 * tf.sqrt(math.pi * y))

    A0 = pairwise_distances(first_sample)
    A = tf.reduce_mean(phi_sampling(A0 / (4 * y), D))

    B0 = pairwise_distances(second_sample)
    B = tf.reduce_mean(phi_sampling(B0 / (4 * y), D))

    C0 = pairwise_distances(first_sample, second_sample)
    C = tf.reduce_mean(phi_sampling(C0 / (4 * y), D))

    return T * (A + B - 2 * C)


def euclidean_norm_squared(X, axis=None):
    return tf.reduce_sum(tf.square(X), axis=axis)


def cw_sampling_silverman(first_sample, second_sample):
    stddev = tf.math.reduce_std(second_sample)
    N = tf.cast(tf.shape(second_sample)[0], tf.float64)
    gamma = silverman_rule_of_thumb_normal(N)
    return cw_sampling_lcw(first_sample, second_sample, gamma)


@tf.keras.saving.register_keras_serializable()
class Encoder(keras.Model):
    def __init__(self, args, **kwargs):
        super().__init__(**kwargs)
        self.activation = layers.Activation("relu")
        self.flatten = layers.Flatten()
        self.dense1 = layers.Dense(256)
        self.dense2 = layers.Dense(args["latent_dim"], name="z")

    def build(self, **kwargs):
        encoder_inputs = keras.Input(shape=(28, 28, 1))
        x = self.flatten(encoder_inputs)
        x = self.dense1(x)
        x = self.activation(x)
        z = self.dense2(x)
        encoder = keras.Model(encoder_inputs, [z], name="encoder")
        return encoder


@tf.keras.saving.register_keras_serializable()
class Decoder(keras.Model):
    def __init__(self, args, **kwargs):
        super().__init__(**kwargs)
        self.latent_dim = args["latent_dim"]
        self.activation = layers.Activation("relu")
        self.dense1 = layers.Dense(256)
        self.dense2 = layers.Dense(28 * 28, activation="sigmoid")
        self.reshape = layers.Reshape([28, 28, 1])

    def build(self, **kwargs):
        latent_inputs = keras.Input(shape=(self.latent_dim,))
        x = self.dense1(latent_inputs)
        x = self.activation(x)
        x = self.dense2(x)
        decoder_outputs = self.reshape(x)
        decoder = keras.Model(latent_inputs, decoder_outputs, name="encoder")
        return decoder


@tf.keras.saving.register_keras_serializable()
class Generator(keras.Model):
    def __init__(self, args, **kwargs):
        super().__init__(**kwargs)
        self.noise_dim = args["noise_dim"]
        self.activation = layers.Activation("relu")
        self.dense1 = layers.Dense(512)
        self.dense2 = layers.Dense(args["latent_dim"], name="z")

    def build(self, **kwargs):
        noise_inputs = keras.Input(shape=(self.noise_dim,))
        x = self.dense1(noise_inputs)
        x = self.activation(x)
        z = self.dense2(x)
        latent_generator = keras.Model(noise_inputs, [z], name="generator")
        return latent_generator


@tf.keras.saving.register_keras_serializable()
class BaseNet(keras.Model):
    def __init__(self, args, **kwargs):
        super(BaseNet, self).__init__(**kwargs)
        self.encoder = Encoder(args).build()
        self.decoder = Decoder(args).build()
        self.args = args
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="cw_reconstruction_loss"
        )
        self.cw_loss_tracker = keras.metrics.Mean(name="cw_loss")

    def get_config(self):
        config = {
            "args": self.args
        }
        base_config = super(BaseNet, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.cw_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z = self.encoder(data)
            reconstruction = self.decoder(z)
            # tf.print(reconstruction)
            cw_reconstruction_loss = tf.math.log(
                cw_sampling_silverman(data, reconstruction))
            lambda_val = 1
            cw_loss = lambda_val * tf.math.log(cw_normality(z))
            total_loss = cw_reconstruction_loss + cw_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(cw_reconstruction_loss)
        self.cw_loss_tracker.update_state(cw_loss)
        return {
            "total_loss": self.total_loss_tracker.result(),
            "cw_reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "cw_loss": self.cw_loss_tracker.result(),
        }


@tf.keras.saving.register_keras_serializable()
class HighNet(keras.Model):
    def __init__(self, encoder, decoder, args, **kwargs):
        super(HighNet, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.args = args
        self.generator = Generator(args).build()
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="cw_reconstruction_loss"
        )

    def get_config(self):
        config = {
            "encoder": self.encoder,
            "decoder": self.decoder,
            "args": self.args
        }
        base_config = super(HighNet, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def call(self, inputs, **kwargs):
        x = self.encoder(inputs)
        return self.decoder(x)

    @property
    def metrics(self):
        return [
            self.reconstruction_loss_tracker
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z = self.encoder(data)
            batch_size = tf.shape(z)[0]
            noise_np = np.random.normal(0, 1, size=self.args["noise_dim"])
            noise_tf = tf.expand_dims(tf.convert_to_tensor(noise_np), axis=0)
            noise_tf = tf.repeat(noise_tf, repeats=batch_size, axis=0)
            noise_z = self.generator(noise_tf)
            # tf.print(reconstruction)
            cw_reconstruction_loss = tf.math.log(
                cw_sampling_silverman(z, noise_z))
        grads = tape.gradient(cw_reconstruction_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.reconstruction_loss_tracker.update_state(cw_reconstruction_loss)
        return {
            "cw_reconstruction_loss": self.reconstruction_loss_tracker.result()
        }


def test_saving():
    args = {"sample_amount": 1000,
            "latent_dim": 24,
            "noise_dim": 24,
            "epochs": 1,
            "batch_size": 128,
            "patience": 3,
            "learning_rate": 0.0001}
    (x_train, y_train), (x_test, _) = keras.datasets.mnist.load_data()
    mnist_digits = np.concatenate([x_train, x_test], axis=0)[0:100]
    mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255
    base_model = BaseNet(args)
    base_model.compile(optimizer=keras.optimizers.Adam(learning_rate=args["learning_rate"]))
    es_callback = keras.callbacks.EarlyStopping(monitor='total_loss', patience=args["patience"], mode="min")
    base_model.fit(mnist_digits, epochs=args["epochs"], batch_size=args["batch_size"], callbacks=[es_callback])

    model = HighNet(base_model.encoder, base_model.decoder, args)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=args["learning_rate"]))
    es2_callback = keras.callbacks.EarlyStopping(monitor='cw_reconstruction_loss', patience=args["patience"],
                                                 mode="min")
    model.fit(mnist_digits, epochs=args["epochs"], batch_size=args["batch_size"], callbacks=[es2_callback])

    model.save("high_model.keras", save_format="keras")

    loaded_model = keras.saving.load_model("high_model.keras")


if __name__ == "__main__":
    test_saving()

保存后模型.summary的输出：

Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 256)               200960    
                                                                 
 activation (Activation)     (None, 256)               0         
                                                                 
 z (Dense)                   (None, 24)                6168      
                                                                 
=================================================================
Total params: 207128 (809.09 KB)
Trainable params: 207128 (809.09 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_2 (InputLayer)        [(None, 24)]              0         
                                                                 
 dense_1 (Dense)             (None, 256)               6400      
                                                                 
 activation_1 (Activation)   (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 784)               201488    
                                                                 
 reshape (Reshape)           (None, 28, 28, 1)         0         
                                                                 
=================================================================
Total params: 207888 (812.06 KB)
Trainable params: 207888 (812.06 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


Model: "generator"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_3 (InputLayer)        [(None, 24)]              0         
                                                                 
 dense_3 (Dense)             (None, 512)               12800     
                                                                 
 activation_2 (Activation)   (None, 512)               0         
                                                                 
 z (Dense)                   (None, 24)                12312     
                                                                 
=================================================================
Total params: 25112 (98.09 KB)
Trainable params: 25112 (98.09 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________

我不知道为什么这个摘要看起来与我的原始摘要不同，但在原始摘要中我可以从“dense_4/kernel:0”得出形状不匹配发生在编码器的第一个密集层中。那里的层用它们的运行时标签进行注释，即dense_4等等。我不知道我省略了什么会导致不同风格的摘要，原始模型有更多层，批量归一化，重用激活函数等。错误仍然是一样的。我无法想象简单地保存和加载模型会改变任何形状不兼容的情况。

我正在使用 Tensorflow 2.15、Keras 2.15、Python 3.11，并且正在 Ubuntu 23.10 上使用 PyCharm Professional。

Answer 1

当我尝试调试时，我将错误消息缩小到：

ValueError: Layer 'dense_3' expected 0 variables, but received 2 variables during loading. Expected: []

这很奇怪，因为没有 dend_3 层。我设法构建了以下最小示例：

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Activation, Input, BatchNormalization, Dropout, Flatten, Identity


args = {"activation": "relu",
        "batch_norm": True}

@keras.saving.register_keras_serializable()
class CustomModel1(Model):
    def __init__(self):
        super().__init__()
        self.dense = Dense(32)

    def call(self, inputs):
        x = self.dense(inputs)
        return x


@keras.saving.register_keras_serializable()
class CustomModel2(Model):
    def __init__(self):
        super().__init__()
        self.dense = Dense(32)

    def call(self, inputs):
        x = self.dense(inputs)
        return x


@keras.saving.register_keras_serializable()
class CustomModel3(Model):
    def __init__(self):
        super().__init__()
        self.net1 = CustomModel1()
        self.net2 = CustomModel2()

    def call(self, inputs):
        z = self.net1(inputs)
        x = self.net2(z)
        return z, x

    def train_step(self, data):
        x, y = data

        with tf.GradientTape() as tape:
            # z, y_pred = self(x)                 # this fixes it instead
            y_pred = self.net2(self.net1(x))      # this line throws the error
            loss = self.compiled_loss(y, y_pred)

        trainable_vars = self.trainable_weights
        gradients = tape.gradient(loss, trainable_vars)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        self.compiled_metrics.update_state(y, y_pred)
        return {m.name: m.result() for m in self.metrics}


# Instantiate the model
model = CustomModel3()

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Create some dummy data for training
x_train = np.random.random((1000, 32))
y_train = np.random.randint(10, size=(1000,))

# Train the model for one epoch
model.fit(x_train, y_train, epochs=1)

# Save the model
model.save('custom_model.keras', save_format='keras')

# Load the model again
loaded_model = tf.keras.models.load_model('custom_model.keras')

# Generate some sample data for prediction
x_sample = np.random.random((10, 32))  # Assuming 10 samples with 32 features each

# Make predictions using the loaded model
predictions = loaded_model.predict(x_sample)
print(predictions)
# Print the predictions
print(model.summary())

通过 self.net1 和 self.net2 inside train_step 自行调用子模型会引发错误，但通过更高模型的 call 方法调用它们并返回各自的值不会引发错误。我无法理解这如何导致此错误消息。

无法保存自定义keras模型

问题描述投票：0回答：1

1个回答

最新问题

无法保存自定义keras模型

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1