我正在尝试仅使用 numpy 和 pandas 编写神经网络代码,以对 MNIST 数据集中的手写数字进行分类。这是我的代码:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data) # shuffle before splitting into dev and training sets
data_dev = data[0:1000].T
y_dev = data_dev[0]
x_dev = data_dev[1:n]
x_dev = x_dev / 255.
data_train = data[1000:m].T
y_train = data_train[0]
x_train = data_train[1:n]
x_train = x_train / 255.
_,m_train = x_train.shape
class NeuralNet():
def __init__(self, i_lay, h_lay, o_lay):
self.i_lay = i_lay
self.h_lay = h_lay
self.o_lay = o_lay
self.w1 = np.random.rand(self.i_lay, self.h_lay)/np.sqrt(self.i_lay)
self.b1 = np.zeros((1, self.h_lay))
self.w2 = np.random.rand(self.h_lay, self.o_lay)/np.sqrt(self.h_lay)
self.b2 = np.zeros((1, self.o_lay))
def ReLU(self, z):
return np.maximum(z, 0)
def softmax(self, z):
return np.exp(z)/np.sum(np.exp(z))
def forward(self, weights1, bias1, weights2, bias2, x):
z1 = weights1.dot(x) + bias1
a1 = ReLU(z1)
z2 = weights2.dot(a1) + bias2
a2 = softmax(z2)
return z1, a1, z2, a2
def RelU_deriv(self, z):
return z > 0
def one_hot(self, y):
one_hot_y = np.zeros((y.size, y.max() + 1))
one_hot_y[np.arange(y.size), y] = 1
one_hot_y = one_hot_y.T
return one_hot_y
def backward(self, z1, a1, z2, a2, w1, w2, x, y):
one_hot_y = one_hot(y)
dz2 = a2 - one_hot_y
dw2 = 1 / m * dz2.dot(a1.T)
db2 = 1 / m * np.sum(dz2)
dz1 = w2.T.dot(dz2) * ReLU_deriv(z1)
dw1 = 1 / m * dz1.dot(x.T)
db1 = 1 / m * np.sum(dz1)
return dw1, db1, dw2, db2
def update_parameters(self, w1, b1, w2, b2, dw1, db1, dw2, db2, alpha):
w1 = w1 - alpha * dw1
b1 = b1 - alpha * db1
w2 = w2 - alpha * dw2
b2 = b2 - alpha * db2
return w1, b1, w2, b2
def get_preds(a2):
return np.argmax(a2, 0)
def get_acc(self, preds, y):
print(preds, y)
return np.sum(preds == y) / y.size
def gradient_descent(self, x, y, alpha, iterations):
w1, b1, w2, b2 = __init__()
for i in range(iterations):
z1, a1, z2, a2 = forward_prop(w1, b1, w2, b2, x)
dw1, db1, dw2, db2 = backward_prop(z1, a1, z2, a2, w1, w2, x, y)
w1, b1, w2, b2 = update_params(w1, b1, w2, b2, dw1, db1, dw2, db2, alpha)
if i % 10 == 0:
print("Iteration: ", i)
predictions = get_predictions(a2)
print(get_accuracy(predictions, y))
return w1, b1, w2, b2
nn = NeuralNet(784, 10, 10)
nn.gradient_descent(x_train, y_train, 0.1, 500)
我得到的错误是说“gradient_descent() 需要 4 个位置参数,但给出了 5 个”,这很令人困惑,因为我在最后调用它时显然只为我的方法分配了 4 个参数。