我正在尝试对omniglot数据集实施元学习,但是有些不正确。
这里是代码:
def get_siamese_model(input_shape):
"""
Model architecture based on the one provided in: http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf
"""
# Define the tensors for the two input images
left_input = Input(input_shape)
right_input = Input(input_shape)
# Convolutional Neural Network
model = Sequential()
model.add(Conv2D(64, (10,10), activation='relu', input_shape=input_shape,
kernel_initializer=initialize_weights, kernel_regularizer=l2(2e-4)))
model.add(MaxPooling2D())
model.add(Conv2D(128, (7,7), activation='relu',
kernel_initializer=initialize_weights,
bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4)))
model.add(MaxPooling2D())
model.add(Conv2D(128, (4,4), activation='relu', kernel_initializer=initialize_weights,
bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4)))
model.add(MaxPooling2D())
model.add(Conv2D(256, (4,4), activation='relu', kernel_initializer=initialize_weights,
bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4)))
model.add(Flatten())
model.add(Dense(4096, activation='sigmoid',
kernel_regularizer=l2(1e-3),
kernel_initializer=initialize_weights,bias_initializer=initialize_bias))
# Generate the encodings (feature vectors) for the two images
encoded_l = model(left_input)
encoded_r = model(right_input)
# # Add a customized layer to compute the absolute difference between the encodings
# L1_layer = Lambda(lambda tensors:K.abs(tensors[0] - tensors[1]))
# L1_distance = L1_layer([encoded_l, encoded_r])
# # Add a dense layer with a sigmoid unit to generate the similarity score
# prediction = Dense(1,activation='sigmoid',bias_initializer=initialize_bias)(L1_distance)
#Connect the inputs with the outputs
siamese_net = Model(inputs=[left_input,right_input],outputs=[encoded_l, encoded_r])
# return the model
return siamese_net
def forward(model, x1, x2):
return model.call([x1,x2])
model = get_siamese_model((105, 105, 1))
test_loss = tf.convert_to_tensor(0.0)
with tf.GradientTape() as test_tape:
test_tape.watch(model.trainable_weights)
test_tape.watch(test_loss)
x, y = get_batch(32)
x1 = tf.cast(tf.convert_to_tensor(x[0]), dtype=tf.float32)
x2 = tf.cast(tf.convert_to_tensor(x[1]), dtype=tf.float32)
y1 = tf.cast(tf.convert_to_tensor(y), dtype=tf.float32)
train_loss = tf.convert_to_tensor(0.0)
with tf.GradientTape() as train_tape:
train_tape.watch(model.trainable_weights)
train_tape.watch(train_loss)
train_loss = contrastive_loss(forward(model, x1, x2), y1)
gradients = train_tape.gradient(train_loss, model.trainable_weights)
old_weights = model.get_weights()
model.set_weights([w - 0.01 * g for w, g in zip(model.trainable_weights, gradients)])
test_loss = contrastive_loss(forward(model, x1, x2), y1)
model.set_weights(old_weights)
print(train_loss)
print(test_loss)
结果:
tf.Tensor(8.294627, shape=(), dtype=float32)
tf.Tensor(8.294627, shape=(), dtype=float32)
为什么我会遭受同样的损失?如您所见,权重已更改,但输出相同。改变权重应该导致不同的输出,应该导致不同的损失?也许前进会再次改变权重?
我假设您正在使用交叉熵损失函数。您看到的损失(8.2 ...)本质上是最大可能损失,这意味着损失计算中存在溢出。例如,如果您的预测超出0-1范围,或者如果您的预测恰好为0,则通常会发生这种情况。