class Layers:
class Dense:
def __init__(self, neurons=0, activation=Activations.relu, inputs=0, dropout_rate=1):
# Initialization of weights and biases
self.weights = np.random.randn(neurons, inputs)
self.biases = np.random.randn(neurons, 1) # prev (1, neurons)
self.activation = activation
self.dropout_rate = dropout_rate
def forward(self, inputs):
print(f"FORWARD START. in: {inputs.shape}")
self.inputs = inputs
print(f"Biasses: {self.biases.shape}")
self.outputs = np.add(np.dot(inputs, self.weights.T), self.biases)
self.outputs = self.activation(self.outputs)
self.outputs = self.dropout(self.outputs)
print(f"FORWARD STOP. slf_out: {self.outputs.shape}, slf_in: {self.inputs.shape}, in: {inputs.shape}")
return self.outputs
def backward(self, error, learning_rate):
self.error = error
self.delta = self.error * self.activation(self.outputs, derivative=True)
self.delta = self.dropout(self.delta, derivative=True)
self.weights -= learning_rate * np.dot(self.delta, self.inputs.T)
self.biases -= learning_rate * np.sum(self.delta, axis=0, keepdims=True)
return self.delta
def dropout(self, x, derivative=False):
if derivative:
return self.dropout_rate * (1 - self.dropout_rate) * x
return self.dropout_rate * x
class NeuralNetwork:
def __init__(self, layers, loss=LossFunctions.Categorical_Crossentropy, optimizer=Optimizers.SGD, learning_rate=0.01, momentum=0.0, beta1=0.9, beta2=0.999, epsilon=1e-7):
self.layers = layers
self.loss = loss
self.optimizer = optimizer
self.learning_rate = learning_rate
self.momentum = momentum
self.weights = []
self.biases = []
self.velocities = []
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
self.optimizer_kwargs = {'alpha': self.learning_rate, 'beta1': self.beta1, 'beta2': self.beta2}
if self.optimizer == Optimizers.SGD:
self.optimizer_kwargs['momentum'] = self.momentum
def forward(self, inputs):
self.inputs = inputs
self.outputs = self.inputs
for layer in self.layers:
self.output = layer.forward(self.outputs)
return self.outputs
def backward(self, targets):
self.targets = targets
self.error = self.loss(self.outputs, self.targets)
self.delta = self.error
for layer in reversed(self.layers):
self.delta = layer.backward(self.delta, self.optimizer_kwargs)
return self.delta
def update_weights(self):
for layer in self.layers:
layer.update_weights(self.optimizer_kwargs)
def train(self, inputs, targets, epochs=1, batch_size=1, verbose=False):
self.epochs = epochs
self.epoch_errors = []
self.epoch_losses = []
self.epoch_accuracies = []
self.epoch_times = []
start = time.time()
for epoch in range(self.epochs):
epoch_start = time.time()
epoch_error = 0
epoch_loss = 0
epoch_accuracy = 0
print("in epoch loop: " + str(inputs.shape[0]))
for i in range(0, inputs.shape[0], batch_size):
print("in batch loop: " + str(i))
batch_inputs = inputs[i: i+batch_size]
batch_targets = targets[i: i+batch_size]
self.forward(batch_inputs)
self.backward(batch_targets)
self.update_weights()
epoch_error += self.error.sum()
epoch_loss += self.loss(self.outputs, self.targets).sum()
epoch_accuracy += self.accuracy(self.outputs, self.targets)
epoch_time = time.time() - epoch_start
self.epoch_errors.append(epoch_error)
self.epoch_losses.append(epoch_loss)
self.epoch_accuracies.append(epoch_accuracy)
self.epoch_times.append(epoch_time)
if verbose:
print('Epoch: {}, Error: {}, Loss: {}, Accuracy: {}, Time: {}'.format(epoch, epoch_error, epoch_loss, epoch_accuracy, epoch_time))
self.train_time = time.time() - start
return self.epoch_errors, self.epoch_losses, self.epoch_accuracies, self.epoch_times
def accuracy(self, outputs, targets):
return np.sum(np.argmax(outputs, axis=1) == np.argmax(targets, axis=1)) / outputs.shape[0]
def loss(self, outputs, targets):
return self.loss.forward(outputs, targets)
def predict(self, inputs):
return np.argmax(self.forward(inputs), axis=1)
def evaluate(self, inputs, targets):
return self.loss(self.forward(inputs), targets), self.accuracy(self.forward(inputs), targets)
def main:
X_train_flattened = X_train.reshape(X_train.shape[0], -1)
model = NeuralNetwork([
Layers.Dense(neurons=1024, activation=Activations.relu, inputs=X_train_flattened.shape[1]),
Layers.Dense(neurons=128, activation=Activations.relu),
Layers.Dense(neurons=89, activation=Activations.softmax)
], LossFunctions.Categorical_Crossentropy, Optimizers.SGD, learning_rate=0.01)
model.train(X_train_flattened, Y_train_ohe, epochs=100, batch_size=10, verbose=True)
回溯(最近一次调用最后一次): 文件“/Users/31_grudnia/Desktop/Python/Playground/Masters_Degree_Project/backend/MNIST_app/notebooks/test.py”,第 188 行,位于 model.train(X_train_flattened,Y_train_ohe,epochs=100,batch_size=10,verbose=True) 文件“/Users/31_grudnia/Desktop/Python/Playground/Masters_Degree_Project/backend/MNIST_app/notebooks/test.py”,第 131 行,列车中 self.forward(batch_inputs) 文件“/Users/31_grudnia/Desktop/Python/Playground/Masters_Degree_Project/backend/MNIST_app/notebooks/test.py”,第 99 行,向前 self.output = 层.forward(self.outputs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 文件“/Users/31_grudnia/Desktop/Python/Playground/Masters_Degree_Project/backend/MNIST_app/notebooks/test.py”,第 58 行,向前 self.outputs = np.add(np.dot(输入, self.weights.T), self.biases) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^ ValueError:操作数无法与形状一起广播 (10,1024) (1024,1)
为什么我不能将两个形状为 (10,1024) (1024,1) 的矩阵相加?
据我所知,当矩阵具有相同的形状并且第一个矩阵中的列数等于第二个矩阵中的行数时,我应该能够进行矩阵运算。
这些据我所知,当它们具有相同的形状时,我应该能够进行矩阵运算
数组(不是矩阵)的形状是不同的。一个具有形状 (10, 1024)
,另一个具有形状
(1024, 1)
。
第一个的列数等于第二个的行数这允许产生
矩阵积,这就是 np.dot
import numpy as np
A = np.ones((10, 1024))
x = np.ones((1024, 1))
np.dot(A, x).shape # (10, 1)
它不允许像数组加法这样的数组算术,这是按元素执行的。
np.add(A, x)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-34d2dc560770> in <cell line: 1>()
----> 1 np.add(A, x)
ValueError: operands could not be broadcast together with shapes (10,1024) (1024,1)
为此,数组需要是“可广播的”。例如,如果它们具有相同的列数,并且其中一个具有 m
行,而另一个具有 m
行、1 行或仅为 1D,则它们将是可广播的。
add
似乎是此处失败的操作。请参阅[广播规则] (https://numpy.org/doc/stable/user/basics.broadcasting.html
)。