ValueError:操作数无法与形状一起广播 (10,1024) (1024,1)

问题描述 投票:0回答:1


class Layers:
class Dense:

        def __init__(self, neurons=0, activation=Activations.relu, inputs=0, dropout_rate=1):
            # Initialization of weights and biases
            self.weights = np.random.randn(neurons, inputs)
            self.biases = np.random.randn(neurons, 1)  # prev (1, neurons)
            self.activation = activation
            self.dropout_rate = dropout_rate
        
        def forward(self, inputs):
            print(f"FORWARD START. in: {inputs.shape}")
            self.inputs = inputs
            print(f"Biasses: {self.biases.shape}")
            self.outputs = np.add(np.dot(inputs, self.weights.T), self.biases)
            self.outputs = self.activation(self.outputs)
            self.outputs = self.dropout(self.outputs)
            print(f"FORWARD STOP. slf_out: {self.outputs.shape}, slf_in: {self.inputs.shape}, in: {inputs.shape}")
            return self.outputs
        
        def backward(self, error, learning_rate):
            self.error = error
            self.delta = self.error * self.activation(self.outputs, derivative=True)
            self.delta = self.dropout(self.delta, derivative=True)
            self.weights -= learning_rate * np.dot(self.delta, self.inputs.T)
            self.biases -= learning_rate * np.sum(self.delta, axis=0, keepdims=True)
            return self.delta
        
        def dropout(self, x, derivative=False):
            if derivative:
                return self.dropout_rate * (1 - self.dropout_rate) * x
            return self.dropout_rate * x 

class NeuralNetwork:

    def __init__(self, layers, loss=LossFunctions.Categorical_Crossentropy, optimizer=Optimizers.SGD, learning_rate=0.01, momentum=0.0, beta1=0.9, beta2=0.999, epsilon=1e-7):
        self.layers = layers
        self.loss = loss
        self.optimizer = optimizer
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.weights = []
        self.biases = []
        self.velocities = []
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.optimizer_kwargs = {'alpha': self.learning_rate, 'beta1': self.beta1, 'beta2': self.beta2}
        if self.optimizer == Optimizers.SGD:
            self.optimizer_kwargs['momentum'] = self.momentum
        
    def forward(self, inputs):
        self.inputs = inputs
        self.outputs = self.inputs
        for layer in self.layers:
            self.output = layer.forward(self.outputs)
        return self.outputs
    
    def backward(self, targets):
        self.targets = targets
        self.error = self.loss(self.outputs, self.targets)
        self.delta = self.error
        for layer in reversed(self.layers):
            self.delta = layer.backward(self.delta, self.optimizer_kwargs)
        return self.delta
        
    def update_weights(self):
        for layer in self.layers:
            layer.update_weights(self.optimizer_kwargs)
            
    def train(self, inputs, targets, epochs=1, batch_size=1, verbose=False):
        self.epochs = epochs
        self.epoch_errors = []
        self.epoch_losses = []
        self.epoch_accuracies = []
        self.epoch_times = []
        start = time.time()
        for epoch in range(self.epochs):
            epoch_start = time.time()
            epoch_error = 0
            epoch_loss = 0
            epoch_accuracy = 0
            print("in epoch loop: " + str(inputs.shape[0]))
            for i in range(0, inputs.shape[0], batch_size):
                print("in batch loop: " + str(i))
                batch_inputs = inputs[i: i+batch_size]
                batch_targets = targets[i: i+batch_size]
                self.forward(batch_inputs)
                self.backward(batch_targets)
                self.update_weights()
                epoch_error += self.error.sum()
                epoch_loss += self.loss(self.outputs, self.targets).sum()
                epoch_accuracy += self.accuracy(self.outputs, self.targets)
            epoch_time = time.time() - epoch_start
            self.epoch_errors.append(epoch_error)
            self.epoch_losses.append(epoch_loss)
            self.epoch_accuracies.append(epoch_accuracy)
            self.epoch_times.append(epoch_time)
            if verbose:
                print('Epoch: {}, Error: {}, Loss: {}, Accuracy: {}, Time: {}'.format(epoch, epoch_error, epoch_loss, epoch_accuracy, epoch_time))
        self.train_time = time.time() - start
        return self.epoch_errors, self.epoch_losses, self.epoch_accuracies, self.epoch_times
    
    def accuracy(self, outputs, targets):
        return np.sum(np.argmax(outputs, axis=1) == np.argmax(targets, axis=1)) / outputs.shape[0]
    
    def loss(self, outputs, targets):
        return self.loss.forward(outputs, targets)
        
    def predict(self, inputs):
        return np.argmax(self.forward(inputs), axis=1)
        
    def evaluate(self, inputs, targets):
        return self.loss(self.forward(inputs), targets), self.accuracy(self.forward(inputs), targets) 


def main:
   X_train_flattened = X_train.reshape(X_train.shape[0], -1)
model = NeuralNetwork([
    Layers.Dense(neurons=1024, activation=Activations.relu, inputs=X_train_flattened.shape[1]),
    Layers.Dense(neurons=128, activation=Activations.relu),
    Layers.Dense(neurons=89, activation=Activations.softmax)
], LossFunctions.Categorical_Crossentropy, Optimizers.SGD, learning_rate=0.01)

model.train(X_train_flattened, Y_train_ohe, epochs=100, batch_size=10, verbose=True)

回溯(最近一次调用最后一次): 文件“/Users/31_grudnia/Desktop/Python/Playground/Masters_Degree_Project/backend/MNIST_app/notebooks/test.py”,第 188 行,位于 model.train(X_train_flattened,Y_train_ohe,epochs=100,batch_size=10,verbose=True) 文件“/Users/31_grudnia/Desktop/Python/Playground/Masters_Degree_Project/backend/MNIST_app/notebooks/test.py”,第 131 行,列车中 self.forward(batch_inputs) 文件“/Users/31_grudnia/Desktop/Python/Playground/Masters_Degree_Project/backend/MNIST_app/notebooks/test.py”,第 99 行,向前 self.output = 层.forward(self.outputs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 文件“/Users/31_grudnia/Desktop/Python/Playground/Masters_Degree_Project/backend/MNIST_app/notebooks/test.py”,第 58 行,向前 self.outputs = np.add(np.dot(输入, self.weights.T), self.biases) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^ ValueError:操作数无法与形状一起广播 (10,1024) (1024,1)

为什么我不能将两个形状为 (10,1024) (1024,1) 的矩阵相加?

据我所知,当矩阵具有相同的形状并且第一个矩阵中的列数等于第二个矩阵中的行数时,我应该能够进行矩阵运算。

python numpy machine-learning neural-network
1个回答
0
投票

据我所知,当它们具有相同的形状时,我应该能够进行矩阵运算

这些

数组(不是矩阵)的形状是不同的。一个具有形状 (10, 1024)

,另一个具有形状 
(1024, 1)

第一个的列数等于第二个的行数

这允许产生

矩阵积,这就是 np.dot

 在数组为二维时所做的事情。

import numpy as np A = np.ones((10, 1024)) x = np.ones((1024, 1)) np.dot(A, x).shape # (10, 1)
它不允许像数组加法这样的

数组算术,这是按元素执行的。

np.add(A, x)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-4-34d2dc560770> in <cell line: 1>()
----> 1 np.add(A, x)

ValueError: operands could not be broadcast together with shapes (10,1024) (1024,1)
为此,数组需要是“可广播的”。例如,如果它们具有相同的列数,并且其中一个具有 

m 行,而另一个具有 m

 行、1 行或仅为 1D,则它们将是可广播的。
add

似乎是此处失败的操作。请参阅[广播规则] (

https://numpy.org/doc/stable/user/basics.broadcasting.html

)。

© www.soinside.com 2019 - 2024. All rights reserved.