我正在尝试在包含 1500 张图像(15 个类别)的训练集上训练用于图像识别的卷积神经网络。有人告诉我,采用这种架构和从均值为 0、标准差为 0.01 的高斯分布得出的初始权重以及初始偏差值为 0 的情况,在适当的学习率下,它的准确度应该达到 30 左右%.
然而,它根本没有学到任何东西:准确率与随机分类器相似,并且训练后的权重仍然遵循正态分布。我做错了什么?
这是NN
class simpleCNN(nn.Module):
def __init__(self):
super(simpleCNN,self).__init__() #initialize the model
self.conv1=nn.Conv2d(in_channels=1,out_channels=8,kernel_size=3,stride=1) #Output image size is (size+2*padding-kernel)/stride -->62*62
self.relu1=nn.ReLU()
self.maxpool1=nn.MaxPool2d(kernel_size=2,stride=2) #outtput image 62/2-->31*31
self.conv2=nn.Conv2d(in_channels=8,out_channels=16,kernel_size=3,stride=1) #output image is 29*29
self.relu2=nn.ReLU()
self.maxpool2=nn.MaxPool2d(kernel_size=2,stride=2) #output image is 29/2-->14*14 (MaxPool2d approximates size with floor)
self.conv3=nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3,stride=1) #output image is 12*12
self.relu3=nn.ReLU()
self.fc1=nn.Linear(32*12*12,15) #16 channels * 16*16 image (64*64 with 2 maxpooling of stride 2), 15 output features=15 classes
self.softmax = nn.Softmax(dim=1)
def forward(self,x):
x=self.conv1(x)
x=self.relu1(x)
x=self.maxpool1(x)
x=self.conv2(x)
x=self.relu2(x)
x=self.maxpool2(x)
x=self.conv3(x)
x=self.relu3(x)
x=x.view(-1,32*12*12)
x=self.fc1(x)
x=self.softmax(x)
return x
初始化:
def init_weights(m):
if isinstance(m,nn.Conv2d) or isinstance(m,nn.Linear):
nn.init.normal_(m.weight,0,0.01)
nn.init.zeros_(m.bias)
model = simpleCNN()
model.apply(init_weights)
训练功能:
loss_function=nn.CrossEntropyLoss()
optimizer=optim.SGD(model.parameters(),lr=0.1,momentum=0.9)
def train_one_epoch(epoch_index,loader):
running_loss=0
for i, data in enumerate(loader):
inputs,labels=data #get the minibatch
outputs=model(inputs) #forward pass
loss=loss_function(outputs,labels) #compute loss
running_loss+=loss.item() #sum up the loss for the minibatches processed so far
optimizer.zero_grad() #reset gradients
loss.backward() #compute gradient
optimizer.step() #update weights
return running_loss/(i+1) # average loss per minibatch
培训:
EPOCHS=20
best_validation_loss=np.inf
for epoch in range(EPOCHS):
print('EPOCH{}:'.format(epoch+1))
model.train(True)
train_loss=train_one_epoch(epoch,train_loader)
running_validation_loss=0.0
model.eval()
with torch.no_grad(): # Disable gradient computation and reduce memory consumption
for i,vdata in enumerate(validation_loader):
vinputs,vlabels=vdata
voutputs=model(vinputs)
vloss=loss_function(voutputs,vlabels)
running_validation_loss+=vloss.item()
validation_loss=running_validation_loss/(i+1)
print('LOSS train: {} validation: {}'.format(train_loss,validation_loss))
if validation_loss<best_validation_loss: #save the model if it's the best so far
timestamp=datetime.now().strftime('%Y%m%d_%H%M%S')
best_validation_loss=validation_loss
model_path='model_{}_{}'.format(timestamp,epoch)
torch.save(model.state_dict(),model_path)
使用默认初始化,效果会好一些,但使用高斯应该可以达到 30%。 您能发现一些可能导致它无法学习的问题吗?我已经尝试过不同的学习率和动力。
我能够使用您的模型在 MNIST 数据集(1500 个样本,10 个类别)上获得约 90% 的验证准确性。我使用了您定义的相同网络,但修改了图像尺寸和输出类的图层大小(28x28 灰度输入,10 类输出)。
我所做的主要事情:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
import torchvision
from datetime import datetime
import numpy as np
np.random.seed(0)
torch.manual_seed(0)
#Load data
mnist = torchvision.datasets.MNIST('./torch_mnist', train=True, download=True)
#Get tensors, and to appropriate dtypes
X = mnist.data.float()
y = mnist.targets.long()
#Normalise X
means = X.mean(dim=0)
stds = X.std(dim=0)
X = torchvision.transforms.Normalize(means, stds + 1e-10)(X)
X = torch.unsqueeze(X, dim=1)
#Shuffle. Stratified sampling of 1500 samples.
from sklearn.model_selection import train_test_split
X, _, y, _ = train_test_split(X, y, stratify=y, train_size=1500, shuffle=True, random_state=0)
class simpleCNN(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1)
self.relu1 = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1)
self.relu2 = nn.ReLU()
self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1)
self.relu3 = nn.ReLU()
# self.fc1 = nn.Linear(32 * 12 * 12, 15)
self.fc1 = nn.Linear(32 * 3 * 3, 10)
self.softmax = nn.Softmax(dim=1)
def forward(self,x):
x = self.conv1(x)
x = self.relu1(x)
x = self.maxpool1(x)
x = self.conv2(x)
x = self.relu2(x)
x = self.maxpool2(x)
x = self.conv3(x)
x = self.relu3(x)
# x = x.view(-1, 32 * 12 * 12)
x = x.view(-1, 32 * 3 * 3)
x = self.fc1(x)
x = self.softmax(x)
return x
def init_weights(m):
if isinstance(m,nn.Conv2d) or isinstance(m,nn.Linear):
nn.init.normal_(m.weight,0,0.01)
nn.init.zeros_(m.bias)
model = simpleCNN()
# model.apply(init_weights)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
optimizer = optim.Adam(model.parameters())
validation_size = 250
train_loader = DataLoader(list(zip(X[:-validation_size], y[:-validation_size])), shuffle=True, batch_size=32)
validation_loader = DataLoader(list(zip(X[-validation_size:], y[-validation_size:])), batch_size=validation_size)
def train_one_epoch(epoch_index, loader):
running_loss = 0
for i, data in enumerate(loader):
inputs, labels = data #get the minibatch
outputs = model(inputs) #forward pass
loss = loss_function(outputs, labels) #compute loss
running_loss += loss.item() #sum up the loss for the minibatches processed so far
optimizer.zero_grad() #reset gradients
loss.backward() #compute gradient
optimizer.step() #update weights
return running_loss / (i + 1) # average loss per minibatch
EPOCHS = 16
best_validation_loss = np.inf
train_losses = []
validation_losses = []
validation_accuracies = []
for epoch in range(EPOCHS):
print('EPOCH{:>2d}'.format(epoch + 1), end=' ')
model.train()
train_loss = train_one_epoch(epoch, train_loader)
running_validation_loss = 0.0
model.eval()
with torch.no_grad():
total_correct = 0
for i, vdata in enumerate(validation_loader):
vinputs, vlabels = vdata
voutputs = model(vinputs)
vloss = loss_function(voutputs, vlabels)
running_validation_loss += vloss.item()
total_correct += (voutputs.argmax(dim=1) == vlabels).sum()
validation_loss = running_validation_loss / (i + 1)
validation_acc = total_correct / len(validation_loader.dataset) * 100
print('LOSS train: {:1.3f} validation: {:1.3f} | ACC val: {:>5.1f}%'.format(
train_loss, validation_loss, validation_acc
))
if validation_loss < best_validation_loss: #save the model if it's the best so far
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
best_validation_loss = validation_loss
model_path = 'model_{}_{}'.format(timestamp, epoch)
torch.save(model.state_dict(), model_path)
train_losses.append(train_loss)
validation_losses.append(validation_loss)
validation_accuracies.append(validation_acc)
import matplotlib.pyplot as plt
plt.plot(train_losses, color='tab:red', linewidth=3, label='train loss')
plt.plot(validation_losses, color='tab:green', linewidth=3, label='validation loss')
plt.xlabel('Epoch')
plt.ylabel('CE loss')
ax_right = plt.gca().twinx()
ax_right.plot(validation_accuracies, color='tab:green', linestyle='--', label='validation accuracy')
ax_right.set_ylabel('accuracy (%)')
plt.gcf().legend(ncol=3)
plt.gcf().set_size_inches(6, 3)