我有一个数据集文件夹,其中有 4 个名为 contexts(森林、冰川、山脉、海洋)的子文件夹,每个文件夹大约有 25000 个关于上下文的 jpg 图像,我正在使用随机梯度下降在 PyTorch 中训练卷积神经网络(CNN),并且我'我在训练循环中遇到“预期输入批次大小与目标批次大小匹配”错误。如何解决批量大小不匹配的问题?我可以进行哪些更改来解决训练循环中的此错误?先谢谢您的建议!
这是数据集
https://drive.google.com/file/d/1vXbTtm-PlTQPO-zN443Ehr7EsYuIoer_/view?usp=share_link\*
异常情况的屏幕截图*
这是我的代码
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
from PIL import Image
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
class ConvNet(nn.Module):
def __init__(self, num_classes=4):
super(ConvNet, self).__init__()
# Convolutional layers
self.conv1 = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)
# Max-pooling layers
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
# Fully connected (linear) layer
self.fc = nn.Linear(16 * 64 * 64, num_classes) # Adjust the input size based on your image dimensions
def forward(self, X):
# Convolutional layers with ReLU activations and max-pooling
X = F.relu(self.conv1(X))
X = self.pool(X)
X = F.relu(self.conv2(X))
X = self.pool(X)
X = F.relu(self.conv3(X))
X = self.pool(X)
# Flatten the output for the fully connected layer
X = X.view(-1, 16 * 64 * 64) # Adjust the size based on your image dimensions
# Fully connected layer
X = self.fc(X)
return X
class SceneDataset(Dataset):
def __init__(self, root_dir, transform=None):
self.root_dir = root_dir
self.transform = transform
self.image_list, self.labels = self.load_dataset()
# Create a mapping from class names to indices
self.class_to_index = {class_name: idx for idx, class_name in enumerate(set(self.labels))}
def load_dataset(self):
image_list = []
labels = []
for class_name in os.listdir(self.root_dir):
class_path = os.path.join(self.root_dir, class_name)
if os.path.isdir(class_path):
label = class_name
for filename in os.listdir(class_path):
if filename.endswith(".jpg"):
image_list.append(os.path.join(class_path, filename))
labels.append(label)
return image_list, labels
def __len__(self):
return len(self.image_list)
def __getitem__(self, index):
img_path = self.image_list[index]
label = self.labels[index]
image = Image.open(img_path).convert('RGB')
if self.transform:
image = self.transform(image)
# Get the class index
label_index = self.class_to_index[label]
# Convert label to tensor
label_tensor = torch.tensor(label_index, dtype=torch.long)
return image, label_tensor
def get_dataloaders(root, train_batchsize, test_batchsize):
transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor(),
])
dataset = SceneDataset(root, transform=transform)
# Split the dataset into train, validation, and test sets
train_size = int(0.7 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
dataset, [train_size, val_size, test_size])
# Create data loaders
train_dataloader = DataLoader(train_dataset, batch_size=train_batchsize, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=test_batchsize, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=test_batchsize, shuffle=False)
return train_dataloader, val_dataloader, test_dataloader
# Example usage
root_directory = "data"
train_batchsize = 32
test_batchsize = 1
train_dataloader, val_dataloader, test_dataloader = get_dataloaders(root_directory, train_batchsize, test_batchsize)
# Helper for visualization
def img_show(image, label):
plt.figure()
plt.title(f'This is a {label}')
im = np.moveaxis(np.array(image), [0,1,2], [2, 0, 1])
plt.imshow(im)
plt.show()
# Visualize first 4 samples
for count, (image, label) in enumerate(train_dataloader):
img_show(image[0], label[0])
if count == 3:
break
max_epoch = 300
train_batch = 32
test_batch = 1
learning_rate = 0.01
# Create train, validation, and test dataset loaders
train_loader, val_loader, test_loader = get_dataloaders(root_directory, train_batch, train_batch) # Use the same batch size for validation
# Initialize your network
model = ConvNet()
# Define your loss function
criterion = nn.CrossEntropyLoss()
# Initialize optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=5e-04)
# Placeholder for best validation accuracy
best_val_accuracy = 0.0
# Placeholder for the best model state
best_model_state = None
# Placeholder for training and validation statistics
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []
# Start training
for epoch in range(max_epoch):
model = model.train()
total_train_loss = 0.0
correct_train = 0
total_train = 0
for images, labels in train_loader:
optimizer.zero_grad()
# Forward pass
outputs = model(images)
# Ensure labels have the correct shape
if labels.size(0) != outputs.size(0):
labels = labels[:outputs.size(0)]
loss = criterion(outputs, labels.squeeze().long()) # Adjusted for label size
# Backward pass and optimization
loss.backward()
optimizer.step()
total_train_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
print(f"Predicted shape: {predicted.shape}, Labels shape: {labels[:predicted.size(0)].squeeze().shape}")
total_train += labels.size(0)
batch_size = min(labels.size(0), predicted.size(0))
correct_train += (predicted[:batch_size] == labels[:batch_size].squeeze()).sum().item()
# Calculate training accuracy and loss
train_accuracy = correct_train / total_train
train_losses.append(total_train_loss / len(train_loader))
train_accuracies.append(train_accuracy)
# Validation
model = model.eval()
total_val_loss = 0.0
correct_val = 0
total_val = 0
with torch.no_grad():
for images, labels in val_loader:
outputs = model(images)
loss = criterion(outputs, labels.squeeze().long()) # Convert labels to long tensor
total_val_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total_train += labels.size(0)
correct_train += (predicted == labels[:predicted.size(0)].squeeze()).sum().item()
# Calculate validation accuracy and loss
val_accuracy = correct_val / total_val
val_losses.append(total_val_loss / len(val_loader))
val_accuracies.append(val_accuracy)
# Save the best model based on validation accuracy
if val_accuracy > best_val_accuracy:
best_val_accuracy = val_accuracy
best_model_state = model.state_dict()
# Save the best model state to a file
best_model_path = "best_cnn_sgd.pth"
torch.save(best_model_state, best_model_path)
# Plot losses vs epoch
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss vs. Epoch')
plt.legend()
plt.show()
# Plot accuracies vs epoch
plt.figure(figsize=(10, 5))
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy vs. Epoch')
plt.legend()
plt.show()
我注意到训练循环中的批量大小存在问题,导致预测批量大小与目标批量大小之间不匹配。我尝试通过调整数据加载和模型输入维度来调查和解决问题。但是,问题仍然存在,我正在寻求有关如何在训练循环中正确处理批量大小以解决错误的指导。
主要问题在于您的模型架构。我建议用以下内容替换您当前的架构:
class ConvNet(nn.Module):
def __init__(self, num_classes=4):
super(ConvNet, self).__init__()
# Convolutional layers
self.conv1 = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)
# Max-pooling layers
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
# Fully connected (linear) layer
self.fc = nn.Linear(16*32*32, num_classes) # Adjust the input size based on your image dimensions
def forward(self, X):
# Convolutional layers with ReLU activations and max-pooling
X = F.relu(self.conv1(X))
X = self.pool(X)
X = F.relu(self.conv2(X))
X = self.pool(X)
X = F.relu(self.conv3(X))
X = self.pool(X)
# Flatten the output for the fully connected layer
X = X.view(X.size(0),-1) # Adjust the size based on your image dimensions
# Fully connected layer
X = self.fc(X)
return X
我对扁平化过程
X.view(X.size(0),-1)
和全连接网络self.fc
进行了修改。
此外,您还应该更改损失计算:
loss = criterion(outputs, labels.squeeze().long())
至:
loss = criterion(outputs, labels.long())
此外,在验证步骤中,您应确保在循环
correct_val
内跟踪 total_val
和 for images, labels in val_loader:
,以避免潜在的被零除错误。