我需要你的帮助。我正在尝试修改M. Nielsen [http://neuralnetworksanddeeplearning.com/index.html]]开发的用于MNIST数据分类的基于python的神经网络。特别是,我正在使用networ3.py脚本。它使用Theano库。
此网络中的最后一层是softmax,但将来我想将此程序用于回归目的,因此,我需要对其进行修改,将最后一层更改为S型。
当我简单地更改时
activation_fn=softmax
到
activation_fn=sigmoid
程序无法正常运行。
下面提供了代码的重要部分。
# Initialization of the neural network
net = Network([
ConvPoolLayer(input_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
ConvPoolLayer(input_shape=(mini_batch_size, 20, 12, 12),
filter_shape=(40, 20, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=ReLU, p_dropout=0.0),
SoftmaxLayer(n_in=100, n_out=10, activation_fn=softmax, p_dropout=0.0)],
mini_batch_size)
...
# Softmax layer
class SoftmaxLayer(object):
def __init__(self, n_in, n_out, activation_fn, p_dropout):
self.n_in = n_in
self.n_out = n_out
self.activation_fn = activation_fn
self.p_dropout = p_dropout
# Initialize weights and biases
self.w = theano.shared(np.asarray(np.random.normal(loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
dtype=theano.config.floatX), name='w', borrow=True)
self.b = theano.shared(np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
dtype=theano.config.floatX), name='b', borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = self.activation_fn((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1) # ??? Change
self.inpt_dropout = dropout_layer(inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = self.activation_fn(T.dot(self.inpt_dropout, self.w) + self.b)
# Return the log-likelihood cost
def cost(self, net):
return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])
# Return the accuracy for the mini-batch
def accuracy(self, y):
return T.mean(T.eq(y, self.y_out))
我进行了以下修改:
1)改变了目标表示的方式(在它是0、5、8,...或对应于应分类图片的任何数字之前)。现在它是具有10个元素的向量,0等于[1,0,0,0 ..,0],5等于[0,0,0,0,0,1,0,... 0],等等相应地,此新格式的代码应固定(较小的修改)。
2)更改了图层定义(下面的代码)。主要变化是成本和准确性。
# Fully connected layer
class FullyConnectedLayer(object):
def __init__(self, n_in, n_out, activation_fn, p_dropout):
self.n_in = n_in
self.n_out = n_out
self.activation_fn = activation_fn
self.p_dropout = p_dropout
# Initialize weights and biases
self.w = theano.shared(np.asarray(np.random.normal(loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
dtype=theano.config.floatX), name='w', borrow=True)
self.b = theano.shared(np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
dtype=theano.config.floatX), name='b', borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = self.activation_fn((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = self.output #T.argmax(self.output, axis=1) # ??? Change
self.inpt_dropout = dropout_layer(inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = self.activation_fn(T.dot(self.inpt_dropout, self.w) + self.b)
# Return the cross-entropy cost ??? Change
def cost(self, net):
xent = -net.y*T.log(self.output_dropout) - (1-net.y)*T.log(1-self.output_dropout)
return T.mean(xent)
# Accuracy for the mini-batch
def accuracy(self, y):
y_pred = T.argmax(self.y_out, axis=1)
y_targ = T.argmax(y, axis=1)
return T.mean(T.eq(y_targ, y_pred))