pytorch中执行cnn时出现shape错误

问题描述 投票:0回答:1

我是 pytorch 的新手,因为我今天刚刚开始,我在 kaggle 中看到了一些例子,并尝试从 torch 中的张量流调整我的 cnn 以实现更好的 GPU 分配。然而,我陷入了形状问题

我想平衡训练和测试数据集中的数据,所以我使用了train_test_split,我是否以正确的方式应用了它,尽管它没有给出错误

有人可以帮忙吗?

我已经在其他Python文件中定义了自定义数据集并将其导入到主模块中


class CustomDataset(Dataset):

        def __init__(self, root_folder_path):

            self.root_folder_path = root_folder_path
            self.image_files = []
            self.labels = []


            # Collect image paths and corresponding labels

            folders = sorted([f for f in os.listdir(root_folder_path) if os.path.isdir(os.path.join(root_folder_path, f))])
            self.label_dict = {folder: i for i, folder in enumerate(folders)}


            for folder in folders:

                folder_path = os.path.join(root_folder_path, folder)
                image_files = sorted([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f)) and f.endswith('.jpg')])
                self.image_files.extend([os.path.join(folder_path, img) for img in image_files])
                self.labels.extend([self.label_dict[folder]] * len(image_files))


            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((900, 300)),
                transforms.Grayscale(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5], std=[0.5])
            ])

            

        def __len__(self):

            return len(self.image_files)


        def __getitem__(self, idx):

            image_path = self.image_files[idx]
            label = self.labels[idx]
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            image = self.transform(image)
           

            return image, label

这是我的主要脚本


if __name__ == '__main__':
    
    
    # Instantiate your custom dataset and dataloaders
    root_folder_path = r'W:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\image_dataset_300_900_10_classes'
    dataset = CustomDataset(root_folder_path)

    print("Labels:", sorted(dataset.label_dict.keys()))
    print("Total number of labels:", len(dataset.label_dict))


    # Display some images from each folder
    n_images_to_display = 4
    n_folders = len(dataset.label_dict)
    fig, ax = plt.subplots(n_images_to_display, n_folders, figsize=(n_folders * 4, n_images_to_display * 4))

    for i, (folder, label) in enumerate(dataset.label_dict.items()):
        folder_images = [dataset[i][0] for i, lbl in enumerate(dataset.labels) if lbl == label]
        indices_to_display = random.sample(range(len(folder_images)), min(n_images_to_display, len(folder_images)))
        for j, ind in enumerate(indices_to_display):
            ax[j, i].imshow(folder_images[ind].squeeze(), cmap='gray')  # Squeeze to remove the channel dimension for grayscale images
            ax[j, i].axis('off')
        ax[0, i].set_title(folder, fontsize=30)

    plt.show()
    fig.tight_layout(pad=0, w_pad=0, h_pad=0)

  

    from torch.utils.data import DataLoader, Subset
    from sklearn.model_selection import train_test_split

    TEST_SIZE = 0.2
    BATCH_SIZE = 64
    SEED = 42

    # Get the labels from the dataset
    labels = np.array([label for _, label in dataset])
    

    # generate indices: instead of the actual data we pass in integers instead
    train_indices, test_indices, _, _ = train_test_split(
        range(len(dataset)),
        labels,
        stratify=labels,
        test_size=TEST_SIZE,
        random_state=SEED
    )

    # generate subset based on indices
    train_split = Subset(dataset, train_indices)
    test_split = Subset(dataset, test_indices)
    print('Length of train_batch:',len(train_split))
    print('Length of test_batch:',len(test_split))

   
    # create batches
    train_loader = DataLoader(train_split, batch_size=BATCH_SIZE, num_workers=6,shuffle=True,pin_memory=True)
    test_loader = DataLoader(test_split, batch_size=BATCH_SIZE,num_workers=6,pin_memory=True)

    class ImageClassificationBase(nn.Module):
        
        def training_step(self, batch):
            images, labels = batch 
            out = self(images)                  # Generate predictions
            loss = F.cross_entropy(out, labels) # Calculate loss
            return loss
        
        def accuracy(outputs, labels):
            _, preds = torch.max(outputs, dim=1)
            return torch.tensor(torch.sum(preds == labels).item() / len(preds))
        
        def validation_step(self, batch):
            images, labels = batch 
            out = self(images)                    # Generate predictions
            loss = F.cross_entropy(out, labels)   # Calculate loss
            acc = accuracy(out, labels)           # Calculate accuracy
            return {'val_loss': loss.detach(), 'val_acc': acc}
            
        def validation_epoch_end(self, outputs):
            batch_losses = [x['val_loss'] for x in outputs]
            epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
            batch_accs = [x['val_acc'] for x in outputs]
            epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
            return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
        
        def epoch_end(self, epoch, result):
            print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
                epoch, result['train_loss'], result['val_loss'], result['val_acc']))
            

    import torch.nn.init as init
    class ImageClassification(ImageClassificationBase):
        def __init__(self):
            super().__init__()
            self.network = nn.Sequential(
                #image size is [1,900,300] as [channel, height,width]
                nn.Conv2d(1, 32, kernel_size = 3, padding = 1),
                nn.LeakyReLU(0.01),
                nn.BatchNorm2d(32),
                nn.AvgPool2d(kernel_size=2, stride=2),

                nn.Conv2d(32,32, kernel_size = 3,  padding = 1),
                nn.LeakyReLU(0.01),
                nn.BatchNorm2d(32),
                nn.AvgPool2d(kernel_size=2, stride=2),
            
                nn.Conv2d(32, 64, kernel_size = 3, padding = 1),
                nn.LeakyReLU(0.01),
                nn.BatchNorm2d(64),
                nn.AvgPool2d(kernel_size=2, stride=2),
            
                nn.Conv2d(64 ,64, kernel_size = 3, padding = 1),
                nn.LeakyReLU(0.01),
                nn.BatchNorm2d(64),
                nn.AvgPool2d(kernel_size=2, stride=2),
                                    
                nn.Flatten(),
                nn.Dropout(0.3),

                nn.Linear(64 * 56 * 18, 64),  # Assuming input size after convolutional layers is 64 * 56 * 18
                nn.LeakyReLU(0.01),
                nn.BatchNorm1d(64),
                nn.Dropout(0.2),
            
                nn.Linear(64, 64),
                nn.LeakyReLU(0.01),
                nn.BatchNorm1d(64),
                nn.Dropout(0.2),
            
                nn.Linear(64, 10)  # Output layer
            )
            # Initialize the weights of convolutional layers
            self._initialize_weights()

        def _initialize_weights(self):
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
        
        def forward(self, xb):
            return self.network(xb)

    def get_default_device():
        """ Set Device to GPU or CPU"""
        if torch.cuda.is_available():
            return torch.device('cuda')
        else:
            return torch.device('cpu')
        

    def to_device(data, device):
        "Move data to the device"
        if isinstance(data,(list,tuple)):
            return [to_device(x,device) for x in data]
        return data.to(device,non_blocking = True)

    class DeviceDataLoader():
        """ Wrap a dataloader to move data to a device """
        
        def __init__(self, dl, device):
            self.dl = dl
            self.device = device
        
        def __iter__(self):
            """ Yield a batch of data after moving it to device"""
            for b in self.dl:
                yield to_device(b,self.device)
                
        def __len__(self):
            """ Number of batches """
            return len(self.dl)

    device = get_default_device()
    device

    torch.cuda.empty_cache()
    model = ImageClassification()

    random_seed = 99
    torch.manual_seed(random_seed)

    train_loader = DeviceDataLoader(train_loader, device)
    test_loader = DeviceDataLoader(test_loader, device)

    to_device(model, device)

    @torch.no_grad()
    def evaluate(model, val_loader):
        model.eval()
        outputs = [model.validation_step(batch) for batch in val_loader]
        return model.validation_epoch_end(outputs)

    def accuracy(outputs, labels):
        _, preds = torch.max(outputs, dim=1)
        return torch.tensor(torch.sum(preds == labels).item() / len(preds))

    def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.RMSprop):
        history = []
        optimizer = opt_func(model.parameters(), lr)
        for epoch in range(epochs):
            # Training Phase 
            model.train()
            train_losses = []
            for batch in train_loader:
                loss = model.training_step(batch)
                train_losses.append(loss)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
            # Validation phase
            result = evaluate(model, val_loader)
            result['train_loss'] = torch.stack(train_losses).mean().item()
            model.epoch_end(epoch, result)
            history.append(result)
        return history

    model=to_device(ImageClassification(),device)

    #initial evaluation of the model
    evaluate(model,test_loader)

数据看起来像这样,模型中图像的大小是 [1,900,300],其中 1 代表灰度通道,900 - 图像的高度(以像素为单位),300 - 图像的宽度(以像素为单位)

Sample data

错误是

  File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 236, in <module>
    evaluate(model,test_loader)
  File "C:\Users\smjobagc\AppData\Local\miniconda3\envs\FSV\lib\site-packages\torch\autograd\grad_mode.py", line 28, in decorate_context
    return func(*args, **kwargs)
  File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 206, in evaluate
    outputs = [model.validation_step(batch) for batch in val_loader]
  File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 206, in <listcomp>
    outputs = [model.validation_step(batch) for batch in val_loader]
  File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 92, in validation_step
    acc = accuracy(out, labels)           # Calculate accuracy
  File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 211, in accuracy
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))
RuntimeError: The size of tensor a (64) must match the size of tensor b (10) at non-singleton dimension 1

这是数据集的驱动器链接:https://drive.google.com/drive/folders/1PsT9_HWX4snfgnhlwC6xM4rNjcoqXdk5?usp=drive_link

python pytorch conv-neural-network
1个回答
0
投票

您收到的错误消息暴露了问题以及触发它的代码:

张量

a
(
64
) 的大小必须与非单一维度
b
 处张量 
10
 (
1

) 的大小相匹配

退一步,你会发现:

  • 数据加载器在每批元素 900 x 300 灰度图像上输出形状为
    (64, 1, 900, 300)
    ie. 的张量;
  • 以及形状
    (64,)
    对应的真实标签;
  • 一个模型,输出形状为
    (64, 10)
    ie.
    10
    每个批次元素的类 logits:
    q1
    q2
    、...、
    q10
  • 您正在尝试比较两个不同形状的张量:
    preds == labels
    ,preds 是
    (64, 10)
    ,而
    labels
    (64,)

因此出现错误,您可以将其重写为

张量

labels
(
64
) 的大小必须与非单一维度 preds
 处张量 
10
 (
1

) 的大小相匹配

您正在尝试将图像的估计概率与整数类进行比较,您如何期望它能起作用?相反,您应该获得估计 logits 的 arg max,只有这样与真实情况进行比较以测量准确性才有意义。

© www.soinside.com 2019 - 2024. All rights reserved.