PyTorch 卷积神经网络 (CNN) 训练批量大小不匹配错误

问题描述 投票:0回答:1

我有一个数据集文件夹,其中有 4 个名为 contexts(森林、冰川、山脉、海洋)的子文件夹,每个文件夹大约有 25000 个关于上下文的 jpg 图像,我正在使用随机梯度下降在 PyTorch 中训练卷积神经网络(CNN),并且我'我在训练循环中遇到“预期输入批次大小与目标批次大小匹配”错误。如何解决批量大小不匹配的问题?我可以进行哪些更改来解决训练循环中的此错误?先谢谢您的建议!

这是数据集

https://drive.google.com/file/d/1vXbTtm-PlTQPO-zN443Ehr7EsYuIoer_/view?usp=share_link\*

异常情况的屏幕截图*


error

这是我的代码

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
from PIL import Image
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self, num_classes=4):
        super(ConvNet, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)

        # Max-pooling layers
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected (linear) layer
        self.fc = nn.Linear(16 * 64 * 64, num_classes)  # Adjust the input size based on your image dimensions

    def forward(self, X):
        # Convolutional layers with ReLU activations and max-pooling
        X = F.relu(self.conv1(X))
        X = self.pool(X)
        X = F.relu(self.conv2(X))
        X = self.pool(X)
        X = F.relu(self.conv3(X))
        X = self.pool(X)

        # Flatten the output for the fully connected layer
        X = X.view(-1, 16 * 64 * 64)  # Adjust the size based on your image dimensions

        # Fully connected layer
        X = self.fc(X)

        return X

class SceneDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_list, self.labels = self.load_dataset()

        # Create a mapping from class names to indices
        self.class_to_index = {class_name: idx for idx, class_name in enumerate(set(self.labels))}

    def load_dataset(self):
        image_list = []
        labels = []

        for class_name in os.listdir(self.root_dir):
            class_path = os.path.join(self.root_dir, class_name)
            if os.path.isdir(class_path):
                label = class_name
                for filename in os.listdir(class_path):
                    if filename.endswith(".jpg"):
                        image_list.append(os.path.join(class_path, filename))
                        labels.append(label)

        return image_list, labels

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, index):
        img_path = self.image_list[index]
        label = self.labels[index]

        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        # Get the class index
        label_index = self.class_to_index[label]

        # Convert label to tensor
        label_tensor = torch.tensor(label_index, dtype=torch.long)

        return image, label_tensor

def get_dataloaders(root, train_batchsize, test_batchsize):
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
    ])

    dataset = SceneDataset(root, transform=transform)

    # Split the dataset into train, validation, and test sets
    train_size = int(0.7 * len(dataset))
    val_size = int(0.1 * len(dataset))
    test_size = len(dataset) - train_size - val_size

    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
        dataset, [train_size, val_size, test_size])

    # Create data loaders
    train_dataloader = DataLoader(train_dataset, batch_size=train_batchsize, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=test_batchsize, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=test_batchsize, shuffle=False)

    return train_dataloader, val_dataloader, test_dataloader

# Example usage
root_directory = "data"
train_batchsize = 32
test_batchsize = 1
train_dataloader, val_dataloader, test_dataloader = get_dataloaders(root_directory, train_batchsize, test_batchsize)

# Helper for visualization
def img_show(image, label):
    plt.figure()
    plt.title(f'This is a {label}')
    im = np.moveaxis(np.array(image), [0,1,2], [2, 0, 1])
    plt.imshow(im)
    plt.show()

# Visualize first 4 samples
for count, (image, label) in enumerate(train_dataloader):
    img_show(image[0], label[0])
    if count == 3:
        break

max_epoch = 300
train_batch = 32
test_batch = 1
learning_rate = 0.01

# Create train, validation, and test dataset loaders
train_loader, val_loader, test_loader = get_dataloaders(root_directory, train_batch, train_batch)  # Use the same batch size for validation

# Initialize your network
model = ConvNet()

# Define your loss function
criterion = nn.CrossEntropyLoss()

# Initialize optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=5e-04)

# Placeholder for best validation accuracy
best_val_accuracy = 0.0

# Placeholder for the best model state
best_model_state = None

# Placeholder for training and validation statistics
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []

# Start training
for epoch in range(max_epoch):
    model = model.train()
    total_train_loss = 0.0
    correct_train = 0
    total_train = 0

    for images, labels in train_loader:
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Ensure labels have the correct shape
        if labels.size(0) != outputs.size(0):
            labels = labels[:outputs.size(0)]

        loss = criterion(outputs, labels.squeeze().long())  # Adjusted for label size

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        print(f"Predicted shape: {predicted.shape}, Labels shape: {labels[:predicted.size(0)].squeeze().shape}")
        total_train += labels.size(0)
        batch_size = min(labels.size(0), predicted.size(0))
        correct_train += (predicted[:batch_size] == labels[:batch_size].squeeze()).sum().item()

    # Calculate training accuracy and loss
    train_accuracy = correct_train / total_train
    train_losses.append(total_train_loss / len(train_loader))
    train_accuracies.append(train_accuracy)

    # Validation
    model = model.eval()
    total_val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels.squeeze().long()) # Convert labels to long tensor
            total_val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels[:predicted.size(0)].squeeze()).sum().item()

    # Calculate validation accuracy and loss
    val_accuracy = correct_val / total_val
    val_losses.append(total_val_loss / len(val_loader))
    val_accuracies.append(val_accuracy)

    # Save the best model based on validation accuracy
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        best_model_state = model.state_dict()

# Save the best model state to a file
best_model_path = "best_cnn_sgd.pth"
torch.save(best_model_state, best_model_path)

# Plot losses vs epoch
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss vs. Epoch')
plt.legend()
plt.show()

# Plot accuracies vs epoch
plt.figure(figsize=(10, 5))
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy vs. Epoch')
plt.legend()
plt.show()


我注意到训练循环中的批量大小存在问题,导致预测批量大小与目标批量大小之间不匹配。我尝试通过调整数据加载和模型输入维度来调查和解决问题。但是,问题仍然存在,我正在寻求有关如何在训练循环中正确处理批量大小以解决错误的指导。

python pytorch computer-vision conv-neural-network training-data
1个回答
0
投票

主要问题在于您的模型架构。我建议用以下内容替换您当前的架构:

class ConvNet(nn.Module):
    def __init__(self, num_classes=4):
        super(ConvNet, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)

        # Max-pooling layers
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected (linear) layer
        self.fc = nn.Linear(16*32*32, num_classes)  # Adjust the input size based on your image dimensions

    def forward(self, X):
        # Convolutional layers with ReLU activations and max-pooling
        X = F.relu(self.conv1(X))
        X = self.pool(X)
        X = F.relu(self.conv2(X))
        X = self.pool(X)
        X = F.relu(self.conv3(X))
        X = self.pool(X)
        # Flatten the output for the fully connected layer
        X = X.view(X.size(0),-1)   # Adjust the size based on your image dimensions
        # Fully connected layer
        X = self.fc(X)
        return X

我对扁平化过程

X.view(X.size(0),-1)
和全连接网络
self.fc
进行了修改。

此外,您还应该更改损失计算:

loss = criterion(outputs, labels.squeeze().long())

至:

loss = criterion(outputs, labels.long())

此外,在验证步骤中,您应确保在循环

correct_val
内跟踪
total_val
for images, labels in val_loader:
,以避免潜在的被零除错误。

© www.soinside.com 2019 - 2024. All rights reserved.