运行时错误:张量的大小必须匹配,维度 1 除外。预期大小为 128,但得到的大小为 256。我总是收到“不匹配错误:”始终为 2 的因子

问题描述 投票:0回答:1

我有这个代码:

import logging
import os
import sys
import tempfile
from glob import glob

import torch
from torch.cuda.amp import autocast, GradScaler
from PIL import Image
from torch.utils.tensorboard import SummaryWriter

import monai
from monai.data import create_test_image_2d, list_data_collate, decollate_batch, DataLoader
from monai.inferers import sliding_window_inference
from monai.metrics import DiceMetric
from monai.transforms import (
    Activations,
    EnsureChannelFirstd,
    Compose,
    ScaleIntensityd,
    ToTensor,
    DivisiblePadd,
    AsDiscrete
)
from monai.visualize import plot_2d_or_3d_image


def main(tempdir):
    monai.config.print_config()
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)

    # Check and convert data format only once
    converted_raw_dict, converted_analyzed_dict = check_and_convert_format(updated_raw_dict, new_analyzed_dict)

    # Check if the dictionaries contain the same length or not, then create train val test:
    if len(converted_raw_dict) != len(converted_analyzed_dict):
        raise ValueError("The lengths of converted_raw_dict and converted_analyzed_dict do not match.")

    num_images = len(converted_raw_dict)

    # Calculate the number of images for training, validation, and test, e.g., using an 80-10-10 split
    raw_image_list = list(converted_raw_dict.items())
    analyzed_image_list = list(converted_analyzed_dict.items())

    # Calculate the number of images for training, validation, and test
    train_percentage = 0.8
    val_percentage = 0.1
    num_images = len(raw_image_list)
    num_train_images = int(num_images * train_percentage)
    num_val_images = int(num_images * val_percentage)

    # Select images for training, validation, and test
    train_files = [{"img": image, "seg": converted_analyzed_dict[name]} for name, image in raw_image_list[:num_train_images]]
    val_files = [{"img": image, "seg": converted_analyzed_dict[name]} for name, image in raw_image_list[num_train_images:num_train_images + num_val_images]]
    test_files = [{"img": image, "seg": converted_analyzed_dict[name]} for name, image in raw_image_list[num_train_images + num_val_images:]]

    # define transforms for image and segmentation
    train_transforms = Compose(
        [
            ToTensor(),
            EnsureChannelFirstd(keys=["img", "seg"], channel_dim=-1),  # Use channel_dim=-1 for NumPy arrays
            ScaleIntensityd(keys=["img", "seg"]),
            DivisiblePadd(keys=["img", "seg"],k=16),
        ]
    )
    val_transforms = Compose(
        [
            ToTensor(),
            EnsureChannelFirstd(keys=["img", "seg"], channel_dim=-1),  # Use channel_dim=-1 for NumPy arrays
            ScaleIntensityd(keys=["img", "seg"]),
            DivisiblePadd(keys=["img", "seg"],k=16),
             
        ]
    )

    # define dataset, data loader
    check_ds = monai.data.Dataset(data=train_files, transform=train_transforms)
    # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training
    check_loader = DataLoader(check_ds, batch_size=1, num_workers=1, collate_fn=list_data_collate)
    check_data = monai.utils.misc.first(check_loader)
    print(check_data["img"].shape, check_data["seg"].shape)

    # create a training data loader
    train_ds = monai.data.Dataset(data=train_files, transform=train_transforms)
    # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training
    train_loader = DataLoader(
        train_ds,
        batch_size=1,
        shuffle=True,
        num_workers=1,
        collate_fn=list_data_collate,
        pin_memory=torch.cuda.is_available(),
    )
    # create a validation data loader
    val_ds = monai.data.Dataset(data=val_files, transform=val_transforms)
    val_loader = DataLoader(val_ds, batch_size=1, num_workers=1, collate_fn=list_data_collate)
    dice_metric = DiceMetric(include_background=True, reduction="mean", get_not_nans=False)
    post_trans = Compose([Activations(sigmoid=True), AsDiscrete(threshold=0.5)])
    # create UNet, DiceLoss and Adam optimizer
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Print the shape of your input image for debugging
    #input_image_shape = train_files[0]["img"].shape
    #print("Input image shape:", input_image_shape)

    model = monai.networks.nets.UNet(
        spatial_dims=2,
        in_channels=1,
        out_channels=1,
        channels=(16, 32, 64, 128, 256),
        strides=(2, 2, 2, 2),
        num_res_units=2,
    ).to(device)
    loss_function = monai.losses.DiceLoss(sigmoid=True)
    optimizer = torch.optim.Adam(model.parameters(), 1e-3)

    # start a typical PyTorch training
    val_interval = 5
    best_metric = -1
    best_metric_epoch = -1
    epoch_loss_values = list()
    metric_values = list()
    writer = SummaryWriter()
    # Define the number of mini-batches to accumulate gradients over
    accumulation_steps = 4  # You can adjust this value based on your GPU memory capacity
    scaler = GradScaler()
    for epoch in range(10):
        print("-" * 10)
        print(f"epoch {epoch + 1}/{10}")
        model.train()
        epoch_loss = 0
        step = 0
        accumulated_loss = 0  # Initialize accumulated loss
        for batch_data in train_loader:
            step += 1
            inputs, labels = batch_data["img"].to(device), batch_data["seg"].to(device)
            optimizer.zero_grad()
            with autocast():
              outputs = model(inputs)
              loss = loss_function(outputs, labels)
              loss /= accumulation_steps

            scaler.scale(loss).backward()

            if step % accumulation_steps == 0:
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
                epoch_loss += accumulated_loss.item()  # Accumulated loss for logging
                accumulated_loss = 0  # Reset accumulated loss

            else:
                accumulated_loss += loss  # Accumulate the loss

            epoch_len = len(train_ds) // (train_loader.batch_size * accumulation_steps)
            print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}")
            writer.add_scalar("train_loss", loss.item(), epoch_len * epoch + step)

        # Handle any remaining accumulated loss
        if step % accumulation_steps != 0:
            optimizer.step()
            epoch_loss += accumulated_loss.item()

        epoch_loss /= step
        epoch_loss_values.append(epoch_loss)
        print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}")

        if (epoch + 1) % val_interval == 0:
            model.eval()
            with torch.no_grad():
                val_images = None
                val_labels = None
                val_outputs = None
                for val_data in val_loader:
                    val_images, val_labels = val_data["img"].to(device), val_data["seg"].to(device)
                    roi_size = (64, 64)
                    sw_batch_size = 4
                    val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model)
                    val_outputs = [post_trans(i) for i in decollate_batch(val_outputs)]
                    # compute metric for current iteration
                    dice_metric(y_pred=val_outputs, y=val_labels)
                # aggregate the final mean dice result
                metric = dice_metric.aggregate().item()
                # reset the status for next validation round
                dice_metric.reset()
                metric_values.append(metric)
                if metric > best_metric:
                    best_metric = metric
                    best_metric_epoch = epoch + 1
                    torch.save(model.state_dict(), "best_metric_model_segmentation2d_dict.pth")
                    print("saved new best metric model")
                print(
                    "current epoch: {} current mean dice: {:.4f} best mean dice: {:.4f} at epoch {}".format(
                        epoch + 1, metric, best_metric, best_metric_epoch
                    )
                )
                writer.add_scalar("val_mean_dice", metric, epoch + 1)
                
                # Print the shape of inputs, labels, and outputs
                print("Input shape:", inputs.shape)
                print("Label shape:", labels.shape)
                print("Output shape:", outputs.shape)

                # plot the last model output as GIF image in TensorBoard with the corresponding image and label
                plot_2d_or_3d_image(val_images, epoch + 1, writer, index=0, tag="image")
                plot_2d_or_3d_image(val_labels, epoch + 1, writer, index=0, tag="label")
                plot_2d_or_3d_image(val_outputs, epoch + 1, writer, index=0, tag="output")

    print(f"train completed, best_metric: {best_metric:.4f} at epoch: {best_metric_epoch}")
    writer.close()


if __name__ == "__main__":
    with tempfile.TemporaryDirectory() as tempdir:
        main(tempdir)

我的网络有这些张量: torch.Size([1, 1536, 1152]) torch.Size([1, 1536, 1152]).

但我总是遇到运行时错误:

RuntimeError: Sizes of tensors must match except in dimension 1.
Expected size 128 but got size 256 for tensor number 1 in the list.

如果我更改通道大小,假设我将它们乘以 2,我会得到预期大小 256,但在这种情况下得到大小 512。我很确定我犯了一个明显的错误,但我找不到具体的错误。

我一直在尝试使用 Monai 和 Google Colab 测试一个简单的 UNet,但我遇到了张量形状不匹配错误。我只是尝试用小图像训练网络过度拟合,这样我就知道我走在正确的轨道上。但存在语法或网络架构相关的问题。无法修复它。我将channels=(16,32,64,128,256)更改为channels=(32,64,128,256,512)或任何其他倍数,但得到了相同的运行时错误,只是大小和预期大小的值不同。

deep-learning pytorch artificial-intelligence image-segmentation unet-neural-network
1个回答
0
投票

是输入数据的维度问题。

输入和标签不应具有三维度。

如果批量大小为 1,尺寸应为 torch.Size([1, 1, 1536, 1152]) torch.Size([1, 1, 1536, 1152])。

或 torch.Size([10, 1, 1536, 1152]) torch.Size([10, 1, 1536, 1152]) 如果批量大小为 10。

您忘记了第一个维度中的批次维度。

下面的代码工作正常

import logging
import os
import sys
import tempfile
from glob import glob

import torch
from torch.cuda.amp import autocast, GradScaler
from PIL import Image
from torch.utils.tensorboard import SummaryWriter

import monai

def main(tempdir):
    monai.config.print_config()
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = monai.networks.nets.UNet(
        spatial_dims=2,
        in_channels=1,
        out_channels=1,
        channels=(16, 32, 64, 128, 256),
        strides=(2, 2, 2, 2),
        num_res_units=2,
    ).to(device)
    loss_function = monai.losses.DiceLoss(sigmoid=True)
    optimizer = torch.optim.Adam(model.parameters(), 1e-3)

    # start a typical PyTorch training
    val_interval = 5
    best_metric = -1
    best_metric_epoch = -1
    epoch_loss_values = list()
    metric_values = list()
    writer = SummaryWriter()
    # Define the number of mini-batches to accumulate gradients over
    accumulation_steps = 4  # You can adjust this value based on your GPU memory capacity
    scaler = GradScaler()
    for epoch in range(10):
        print("-" * 10)
        print(f"epoch {epoch + 1}/{10}")
        model.train()
        epoch_loss = 0
        step = 0
        accumulated_loss = 0  # Initialize accumulated loss
        # for batch_data in train_loader:
        if True:
            step += 1
            # inputs, labels = batch_data["img"].to(device), batch_data["seg"].to(device)
            inputs = torch.zeros((1, 1, 1536, 1152)).to(device)
            labels = torch.zeros((1, 1, 1536, 1152)).to(device)
            optimizer.zero_grad()
            with autocast():
              outputs = model(inputs)
              loss = loss_function(outputs, labels)
              loss /= accumulation_steps
              exit(0)


if __name__ == "__main__":
    with tempfile.TemporaryDirectory() as tempdir:
        main(tempdir)
© www.soinside.com 2019 - 2024. All rights reserved.