我想用BP算法对神经网络预测心脏疾病。为此,我在这里使用链接UCI心脏疾病数据集:processed cleveland。要做到这一点,我用下面的博客上发现了CDE:Build a flexible Neural Network with Backpropagation in Python并根据我自己的数据集改了一点。我的代码如下:
import numpy as np
import csv
reader = csv.reader(open("cleveland_data.csv"), delimiter=",")
x = list(reader)
result = np.array(x).astype("float")
X = result[:, :13]
y0 = result[:, 13]
y1 = np.array([y0])
y = y1.T
# scale units
X = X / np.amax(X, axis=0) # maximum of X array
class Neural_Network(object):
def __init__(self):
# parameters
self.inputSize = 13
self.outputSize = 1
self.hiddenSize = 13
# weights
self.W1 = np.random.randn(self.inputSize, self.hiddenSize)
self.W2 = np.random.randn(self.hiddenSize, self.outputSize)
def forward(self, X):
# forward propagation through our network
self.z = np.dot(X, self.W1)
self.z2 = self.sigmoid(self.z) # activation function
self.z3 = np.dot(self.z2, self.W2)
o = self.sigmoid(self.z3) # final activation function
return o
def sigmoid(self, s):
# activation function
return 1 / (1 + np.exp(-s))
def sigmoidPrime(self, s):
# derivative of sigmoid
return s * (1 - s)
def backward(self, X, y, o):
# backward propgate through the network
self.o_error = y - o # error in output
self.o_delta = self.o_error * self.sigmoidPrime(o) # applying derivative of sigmoid to error
self.z2_error = self.o_delta.dot(
self.W2.T) # z2 error: how much our hidden layer weights contributed to output error
self.z2_delta = self.z2_error * self.sigmoidPrime(self.z2) # applying derivative of sigmoid to z2 error
self.W1 += X.T.dot(self.z2_delta) # adjusting first set (input --> hidden) weights
self.W2 += self.z2.T.dot(self.o_delta) # adjusting second set (hidden --> output) weights
def train(self, X, y):
o = self.forward(X)
self.backward(X, y, o)
NN = Neural_Network()
for i in range(100): # trains the NN 100 times
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" + str(NN.forward(X)))
print("Loss: \n" + str(np.mean(np.square(y - NN.forward(X))))) # mean sum squared loss
print("\n")
NN.train(X, y)
但是,当我运行此代码,我所有的预言输出变为= 1几次迭代后,然后保持不变长达全部100迭代。什么是在代码中的问题?
我注意到几个错误:
[0, 1]
之间的值 - 预测概率西装。但目标似乎是[0, 4]
之间的值。这解释了网络的最大化输出以获得尽可能接近大唱片公司的愿望。但它不能去超过1.0
和卡住。
您应该摆脱最终乙状结肠或预处理标签和规模,以[0, 1]
。这两种方法都将使其更好地学习。1.0
),这可能是有点高,所以这是可能的NN发散。我的实验表明,0.01
是一个很好的学习速度,但你可以玩这一点。除此之外,您backprop似乎工作的权利。