我在Google Colab上遇到了问题,TF版本:2.16.1,keras版本:3.3.3
这是错误:
ValueError Traceback (most recent call last)
<ipython-input-46-4d65c8f7b80b> in <cell line: 1>()
----> 1 m3.load_weights("./model_v8.weights.h5")
1 frames
/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py in error_handler(*args, **kwargs)
120 # To get the full stack trace, call:
121 # `keras.config.disable_traceback_filtering()`
--> 122 raise e.with_traceback(filtered_tb) from None
123 finally:
124 del filtered_tb
/usr/local/lib/python3.10/dist-packages/keras/src/saving/saving_lib.py in _raise_loading_failure(error_msgs, warn_only)
293 warnings.warn(msg)
294 else:
--> 295 raise ValueError(msg)
296
297
ValueError: A total of 1 objects could not be loaded. Example error message for object <Embedding name=embedding_10, built=True>:
Layer 'embedding_10' expected 1 variables, but received 0 variables during loading. Expected: ['embeddings']
List of objects that could not be loaded:
[<Embedding name=embedding_10, built=True>]
我正在以 Bahdanau 的注意力实施 seq2seq。权重是昨天从同一模型训练的。在加载之前,我用translate()方法做了一些翻译,因为如果我只是实例化模型实例然后加载,似乎无法加载权重。
这是我的模型的代码。
编码器:
class Encoder(Layer):
def __init__(self,
tokenizer,
embedding_size,
hidden_units):
"""
Encoder Block in seq2seq
:param tokenizer: tokenizer of the source language
:param embedding_size: dimensionality of the embedding layer
:param hidden_units: dimensionality of the output
"""
super().__init__()
self.tokenizer = tokenizer
self.embedding_size = embedding_size
self.hidden_units = hidden_units
self.vocab_size = tokenizer.vocabulary_size()
self.embedding = Embedding(input_dim=self.vocab_size,
output_dim=embedding_size)
self.rnn = Bidirectional(
merge_mode="sum",
layer=LSTM(units=hidden_units,
dropout=DROPOUT,
return_sequences=True,
return_state=True))
def call(self,
x,
training=True):
"""
:param x: [batch, time_steps]
:param training: is training or not
:return:
encoder_hidden_state: [batch, hidden_state_dim]
state_h: [batch, hidden_state_dim]
state_c: [batch, hidden_state_dim]
"""
mask = tf.where(x != 0, True, False)
x = self.embedding(x)
x, forward_h, forward_c, backward_h, backward_c = self.rnn(x, mask=mask,
training=training)
return x, forward_h + backward_h, forward_c + backward_c
巴赫达瑙注意:
class BahdanauAttention(Layer):
def __init__(self,
hidden_units):
super().__init__()
self.Va = Dense(1)
self.Wa = Dense(hidden_units)
self.Ua = Dense(hidden_units)
self.norm = LayerNormalization()
self.tanh = Activation(tf.keras.activations.tanh)
self.add = Add()
def call(self,
context, x):
"""
Calculate the context vector based on all encoder hidden states and
previous decoder state.
:param: context: tensor, all encoder hidden states
:param: x: tensor, previous state from Decoder
:return:
context_vector: tensor, the calculated context vector based on the
input parameters
"""
# Expand dims to ensure scores shape = [batch, Ty, Tx]
context = tf.expand_dims(context, axis=1)
x = tf.expand_dims(x, axis=2)
scores = self.Va(self.tanh(self.add([self.Wa(context), self.Ua(x)])))
scores = tf.squeeze(scores, axis=-1)
attn_weights = tf.nn.softmax(scores, axis=-1)
# NOTE: context shape = [batch, 1, Tx, feature] so that expand
# dim of attention weights
context_vector = tf.expand_dims(attn_weights, axis=-1) * context
context_vector = tf.reduce_sum(context_vector, axis=-2)
context_vector = self.norm(context_vector)
context_vector = self.add([context_vector, tf.squeeze(x, -2)])
return context_vector
解码器:
class Decoder(Layer):
def __init__(self,
tokenizer,
embedding_size,
hidden_units):
"""
Decoder Block in seq2seq
:param tokenizer: tokenizer of the source language
:param embedding_size: dimensionality of the embedding layer
:param hidden_units: dimensionality of the output
"""
super().__init__()
self.tokenizer = tokenizer
self.embedding_size = embedding_size
self.hidden_units = hidden_units
self.vocab = tokenizer.get_vocabulary()
self.vocab_size = tokenizer.vocabulary_size()
self.embedding = Embedding(input_dim=self.vocab_size,
output_dim=embedding_size)
self.rnn = LSTM(units=hidden_units,
dropout=DROPOUT,
return_sequences=True,
return_state=True)
self.attention = BahdanauAttention(hidden_units)
self.dense = Dense(self.vocab_size)
def call(self,
context, x,
encoder_state,
training=True,
return_state=False):
"""
:param context: all encoder states
:param x: all initial decoder states
:param encoder_state: last state from encoder
:param training:
:param return_state:
:return:
logits:
state_h: hidden state
state_c: cell state
"""
mask = tf.where(x != 0, True, False)
x = self.embedding(x)
decoder_outputs, state_h, state_c = self.rnn(x, initial_state=encoder_state,
mask=mask,
training=training)
dense_inputs = self.attention(context, decoder_outputs)
logits = self.dense(dense_inputs)
if return_state:
return logits, state_h, state_c
else:
return logits
型号:
class NMT(Model):
@classmethod
def add_method(cls, fun):
setattr(cls, fun.__name__, fun)
return fun
def __init__(self,
input_tokenizer,
output_tokenizer,
embedding_size,
hidden_units):
"""
Initialize an instance for Neural Machine Translation Task
:param input_tokenizer: tokenizer of the input language
:param output_tokenizer: tokenizer of the output language
:param embedding_size: dimensionality of embedding layer
:param hidden_units: dimensionality of the output
"""
super().__init__()
self.input_tokenizer = input_tokenizer
self.output_tokenizer = output_tokenizer
self.embedding_size = embedding_size
self.hidden_units = hidden_units
self.encoder = Encoder(input_tokenizer,
embedding_size,
hidden_units)
self.decoder = Decoder(output_tokenizer,
embedding_size,
hidden_units)
def call(self,
inputs):
encoder_inputs, decoder_inputs = inputs
encoder_outputs, state_h, state_c = self.encoder(encoder_inputs)
logits = self.decoder(encoder_outputs, decoder_inputs,
[state_h, state_c])
return logits
@NMT.add_method
def translate(self, next_inputs,
maxlen=40):
"""
"""
def sampling(logits):
probs = tf.nn.softmax(logits)
dist = probs.numpy().squeeze()
idx = np.random.choice(range(self.decoder.vocab_size), p=dist)
return idx
translation = []
next_inputs = expand_contractions(next_inputs.lower(), en_contraction_map)
next_idx = np.asarray(self.encoder.tokenizer(next_inputs))
while next_idx.ndim != 2:
next_idx = tf.expand_dims(next_idx, axis=0)
encoder_outputs, state_h, state_c = self.encoder(next_idx, training=False)
next_inputs = "[START]"
next_idx = np.asarray(word_to_idx[next_inputs])
for i in range(maxlen):
while next_idx.ndim != 2:
next_idx = tf.expand_dims(next_idx, axis=0)
logits, state_h, state_c = self.decoder(encoder_outputs, next_idx,
[state_h, state_c],
training=False,
return_state=True)
next_idx = sampling(logits)
next_inputs = self.decoder.vocab[next_idx]
if next_inputs == "[END]":
break
elif next_inputs == "[UNK]":
continue
else:
translation.append(next_inputs)
return " ".join(translation)
如果您想查看的话,这是笔记本的链接: https://colab.research.google.com/drive/1EKOm7ULFKEusvEFb8thSGOHTfRnRKru7?usp=sharing
我寻找解决方案,但它们通常是关于检查 tf 和 keras 版本。我用的是h5py,它说我的权重文件中的keras ver是3.3.3,所以看起来没有问题。我还尝试使用 get_config() 和 from_config() 方法中的序列化自定义对象来保存整个模型,但出现相同的错误,并出现有关 Decoder 类的 build() 方法的警告,因此我返回到保存重量,然后加载仅此而已。
我还注意到,在 TF 的保存和加载模型教程中,重量是直接在示例中加载的。如何实现这一目标?顺便说一句,是否有关于为自定义层实现 build() 的任何指导,因为我的解码器由其他不同的 TF 层组成,所以我不知道如何实现一个。
原来Colab默认的tf版本是2.15.1,当我安装tf_text时没有在notebook的头部指定版本时更新到了2.16.1。因此,我将 tf_text 指定为 2.15.1,权重加载完美。