卷积神经网络不学习

问题描述 投票:0回答:1

我正在尝试在包含 1500 张图像(15 个类别)的训练集上训练用于图像识别的卷积神经网络。有人告诉我,采用这种架构和从均值为 0、标准差为 0.01 的高斯分布得出的初始权重以及初始偏差值为 0 的情况,在适当的学习率下,它的准确度应该达到 30 左右%.

然而,它根本没有学到任何东西:准确率与随机分类器相似,并且训练后的权重仍然遵循正态分布。我做错了什么?

这是NN

class simpleCNN(nn.Module):
  def __init__(self):
    super(simpleCNN,self).__init__() #initialize the model

    self.conv1=nn.Conv2d(in_channels=1,out_channels=8,kernel_size=3,stride=1) #Output image size is (size+2*padding-kernel)/stride -->62*62
    self.relu1=nn.ReLU()
    self.maxpool1=nn.MaxPool2d(kernel_size=2,stride=2) #outtput image 62/2-->31*31

    self.conv2=nn.Conv2d(in_channels=8,out_channels=16,kernel_size=3,stride=1) #output image is 29*29
    self.relu2=nn.ReLU()
    self.maxpool2=nn.MaxPool2d(kernel_size=2,stride=2) #output image is 29/2-->14*14  (MaxPool2d approximates size with floor)

    self.conv3=nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3,stride=1) #output image is 12*12
    self.relu3=nn.ReLU()

    self.fc1=nn.Linear(32*12*12,15) #16 channels * 16*16 image (64*64 with 2 maxpooling of stride 2), 15 output features=15 classes
    self.softmax = nn.Softmax(dim=1)

  def forward(self,x):
    x=self.conv1(x)
    x=self.relu1(x)
    x=self.maxpool1(x)

    x=self.conv2(x)
    x=self.relu2(x)
    x=self.maxpool2(x)

    x=self.conv3(x)
    x=self.relu3(x)

    x=x.view(-1,32*12*12)

    x=self.fc1(x)
    x=self.softmax(x)

    return x

初始化:

def init_weights(m):
  if isinstance(m,nn.Conv2d) or isinstance(m,nn.Linear):
    nn.init.normal_(m.weight,0,0.01)
    nn.init.zeros_(m.bias)

model = simpleCNN()
model.apply(init_weights)

训练功能:

loss_function=nn.CrossEntropyLoss()
optimizer=optim.SGD(model.parameters(),lr=0.1,momentum=0.9)

def train_one_epoch(epoch_index,loader):
  running_loss=0

  for i, data in enumerate(loader):

    inputs,labels=data #get the minibatch
    outputs=model(inputs) #forward pass

    loss=loss_function(outputs,labels) #compute loss
    running_loss+=loss.item() #sum up the loss for the minibatches processed so far

    optimizer.zero_grad() #reset gradients
    loss.backward() #compute gradient
    optimizer.step() #update weights

  return running_loss/(i+1) # average loss per minibatch

培训:

EPOCHS=20

best_validation_loss=np.inf

for epoch in range(EPOCHS):
  print('EPOCH{}:'.format(epoch+1))

  model.train(True)
  train_loss=train_one_epoch(epoch,train_loader)

  running_validation_loss=0.0

  model.eval()

  with torch.no_grad(): # Disable gradient computation and reduce memory consumption
    for i,vdata in enumerate(validation_loader):
      vinputs,vlabels=vdata
      voutputs=model(vinputs)
      vloss=loss_function(voutputs,vlabels)
      running_validation_loss+=vloss.item()
  validation_loss=running_validation_loss/(i+1)
  print('LOSS train: {} validation: {}'.format(train_loss,validation_loss))

  if validation_loss<best_validation_loss: #save the model if it's the best so far
    timestamp=datetime.now().strftime('%Y%m%d_%H%M%S')
    best_validation_loss=validation_loss
    model_path='model_{}_{}'.format(timestamp,epoch)
    torch.save(model.state_dict(),model_path)

使用默认初始化,效果会好一些,但使用高斯应该可以达到 30%。 您能发现一些可能导致它无法学习的问题吗?我已经尝试过不同的学习率和动力。

python machine-learning deep-learning pytorch conv-neural-network
1个回答
0
投票

我能够使用您的模型在 MNIST 数据集(1500 个样本,10 个类别)上获得约 90% 的验证准确性。我使用了您定义的相同网络,但修改了图像尺寸和输出类的图层大小(28x28 灰度输入,10 类输出)。

我所做的主要事情:

  • 标准化输入图像
  • 使用默认图层初始化
  • 使用 Adam 优化器而不是 SGD

import torch
from torch import nn, optim
from torch.utils.data import DataLoader

import torchvision

from datetime import datetime
import numpy as np

np.random.seed(0)
torch.manual_seed(0)

#Load data
mnist = torchvision.datasets.MNIST('./torch_mnist', train=True, download=True)

#Get tensors, and to appropriate dtypes
X = mnist.data.float()
y = mnist.targets.long()

#Normalise X
means = X.mean(dim=0)
stds = X.std(dim=0)
X = torchvision.transforms.Normalize(means, stds + 1e-10)(X)
X = torch.unsqueeze(X, dim=1)

#Shuffle. Stratified sampling of 1500 samples.
from sklearn.model_selection import train_test_split
X, _, y, _ = train_test_split(X, y, stratify=y, train_size=1500, shuffle=True, random_state=0)

class simpleCNN(nn.Module):
  def __init__(self):
    super().__init__()

    self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1)
    self.relu1 = nn.ReLU()
    self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)

    self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1)
    self.relu2 = nn.ReLU()
    self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)

    self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1)
    self.relu3 = nn.ReLU()

    # self.fc1 = nn.Linear(32 * 12 * 12, 15)
    self.fc1 = nn.Linear(32 * 3 * 3, 10)
    self.softmax = nn.Softmax(dim=1)

  def forward(self,x):
    x = self.conv1(x)
    x = self.relu1(x)
    x = self.maxpool1(x)

    x = self.conv2(x)
    x = self.relu2(x)
    x = self.maxpool2(x)

    x = self.conv3(x)
    x = self.relu3(x)

    # x = x.view(-1, 32 * 12 * 12)
    x = x.view(-1, 32 * 3 * 3)

    x = self.fc1(x)
    x = self.softmax(x)

    return x

def init_weights(m):
  if isinstance(m,nn.Conv2d) or isinstance(m,nn.Linear):
    nn.init.normal_(m.weight,0,0.01)
    nn.init.zeros_(m.bias)

model = simpleCNN()
# model.apply(init_weights)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
optimizer = optim.Adam(model.parameters())

validation_size = 250
train_loader = DataLoader(list(zip(X[:-validation_size], y[:-validation_size])), shuffle=True, batch_size=32)
validation_loader = DataLoader(list(zip(X[-validation_size:], y[-validation_size:])), batch_size=validation_size)

def train_one_epoch(epoch_index, loader):
  running_loss = 0

  for i, data in enumerate(loader):

    inputs, labels = data #get the minibatch
    outputs = model(inputs) #forward pass

    loss = loss_function(outputs, labels) #compute loss
    running_loss += loss.item() #sum up the loss for the minibatches processed so far

    optimizer.zero_grad() #reset gradients
    loss.backward() #compute gradient
    optimizer.step() #update weights

  return running_loss / (i + 1) # average loss per minibatch

EPOCHS = 16

best_validation_loss = np.inf

train_losses = []
validation_losses = []
validation_accuracies = []

for epoch in range(EPOCHS):
    print('EPOCH{:>2d}'.format(epoch + 1), end='    ')

    model.train()
    train_loss = train_one_epoch(epoch, train_loader)
    
    running_validation_loss = 0.0

    model.eval()

    with torch.no_grad():
        total_correct = 0
        for i, vdata in enumerate(validation_loader):
            vinputs, vlabels = vdata
            voutputs = model(vinputs)
            vloss = loss_function(voutputs, vlabels)
            running_validation_loss += vloss.item()
            
            total_correct += (voutputs.argmax(dim=1) == vlabels).sum()
    validation_loss = running_validation_loss / (i + 1)
    validation_acc = total_correct / len(validation_loader.dataset) * 100
    print('LOSS train: {:1.3f} validation: {:1.3f} | ACC val: {:>5.1f}%'.format(
        train_loss, validation_loss, validation_acc
    ))
  
    if validation_loss < best_validation_loss: #save the model if it's the best so far
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        best_validation_loss = validation_loss
        model_path = 'model_{}_{}'.format(timestamp, epoch)
        torch.save(model.state_dict(), model_path)
    
    train_losses.append(train_loss)
    validation_losses.append(validation_loss)
    validation_accuracies.append(validation_acc)

import matplotlib.pyplot as plt
plt.plot(train_losses, color='tab:red', linewidth=3, label='train loss')
plt.plot(validation_losses, color='tab:green', linewidth=3, label='validation loss')
plt.xlabel('Epoch')
plt.ylabel('CE loss')

ax_right = plt.gca().twinx()
ax_right.plot(validation_accuracies, color='tab:green', linestyle='--', label='validation accuracy')
ax_right.set_ylabel('accuracy (%)')

plt.gcf().legend(ncol=3)
plt.gcf().set_size_inches(6, 3)
© www.soinside.com 2019 - 2024. All rights reserved.