我正在尝试用动量实现反向传播,但在训练循环中,我并没有得到更小的损失,而是越来越高
我已经为反向传播编写了这样的函数。我已经多次检查代码,老实说,我没有注意到任何错误。
`
def backpropagation_mini_batch_momentum(self, inputs, targets, learning_rate, batch_size, momentum = 0.9):
for i in range(0, len(inputs), batch_size):
inputs_batch = inputs[i:i+batch_size]
targets_batch = targets[i:i+batch_size]
# Forward propagation
self.forward_propagation(inputs_batch)
# Backward propagation
error = self.activations[-1] - targets_batch
delta = error
self.derivatives_weights[-1] = np.dot(self.activations[-2].T, delta)
self.derivatives_biases[-1] = np.sum(delta, axis=0)
error = np.dot(delta, self.weights[-1].T)
for j in range(len(self.weights) - 2, -1, -1):
delta = error * self._sigmoid_derivative(self.activations[j + 1])
self.derivatives_weights[j] = np.dot(self.activations[j].T, delta)
self.derivatives_biases[j] = np.sum(delta, axis=0)
error = np.dot(delta, self.weights[j].T)
# Update velocities for weights and biases
for j in range(len(self.weights)):
self.velocities_weights[j] = momentum * self.velocities_weights[j] + (learning_rate * self.derivatives_weights[j]) / batch_size
self.velocities_biases[j] = momentum * self.velocities_biases[j] + (learning_rate * self.derivatives_biases[j]) / batch_size
# Update weights and biases
self.weights[j] -= self.velocities_weights[j]
self.biases[j] -= self.velocities_biases[j]
`