运行时错误：张量 a (524288) 的大小必须与非单一维度 0 处的张量 b (131072) 的大小匹配

我正在尝试使用 Github 代码执行叶病图像分割。这是代码，我面临这个问题“张量 a (524288) 的大小必须与非单维 0 处张量 b (131072) 的大小匹配”。帮我修复这个错误。我无法理解错误实际上出在哪里。

import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
import cv2
import os

# Dice coefficient and IoU calculation
def dice_coeff(pred, target):
    smooth = 1.
    pred_flat = pred.view(-1)
    target_flat = target.view(-1)
    intersection = (pred_flat * target_flat).sum()
    return (2. * intersection + smooth) / (pred_flat.sum() + target_flat.sum() + smooth)

def iu_acc(y_pred, y_true):
    smooth = 1e-12
    y_pred_pos = torch.round(torch.clamp(y_pred, 0, 1))
    intersection = torch.sum(y_true * y_pred_pos)
    sum_ = torch.sum(y_true) + torch.sum(y_pred_pos)
    jac = (intersection + smooth) / (sum_ - intersection + smooth)
    return torch.mean(jac)

# Loss function
def dice_coef_loss(pred, target):
    return 1 - dice_coeff(pred, target)

# Define your U-Net model
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        return self.relu(self.bn2(self.conv2(x)))

class UpConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(UpConvBlock, self).__init__()
        self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
        self.conv_block = ConvBlock(out_channels * 2, out_channels)

    def forward(self, x1, x2):
        up = self.up(x1)
        return self.conv_block(torch.cat([up, x2], dim=1))

class Unet(nn.Module):
    def __init__(self, in_channels=3, num_classes=1):
        super(Unet, self).__init__()
        self.enc1 = ConvBlock(in_channels, 16)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.enc2 = ConvBlock(16, 32)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.enc3 = ConvBlock(32, 64)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.enc4 = ConvBlock(64, 128)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.enc5 = ConvBlock(128, 256)

        self.dec6 = UpConvBlock(256, 128)
        self.dec7 = UpConvBlock(128, 64)
        self.dec8 = UpConvBlock(64, 32)
        self.dec9 = UpConvBlock(32, 16)
        self.out = nn.Conv2d(16, num_classes, kernel_size=1)

    def forward(self, x):
        x1 = self.enc1(x)
        x2 = self.pool1(x1)
        x3 = self.enc2(x2)
        x4 = self.pool2(x3)
        x5 = self.enc3(x4)
        x6 = self.pool3(x5)
        x7 = self.enc4(x6)
        x8 = self.pool4(x7)
        x9 = self.enc5(x8)
        y1 = self.dec6(x9, x7)
        y2 = self.dec7(y1, x5)
        y3 = self.dec8(y2, x3)
        y4 = self.dec9(y3, x1)
        out = self.out(y4)
        return out

# Define your dataset class
class CustomDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        mask_path = self.mask_paths[index]

        image = cv2.imread(image_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        image = image.float() / 255.0

        return image, mask

# Define your training and evaluation functions
def train(model, train_loader, optimizer, loss_fn, device):
    model.train()
    epoch_loss = 0.0
    for images, targets in tqdm(train_loader, desc='Training', leave=False):
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(train_loader)

def evaluate(model, val_loader, loss_fn, device):
    model.eval()
    epoch_loss = 0.0
    with torch.no_grad():
        for images, targets in tqdm(val_loader, desc='Validation', leave=False):
            images, targets = images.to(device), targets.to(device)
            outputs = model(images)
            loss = loss_fn(outputs, targets)
            epoch_loss += loss.item()
    return epoch_loss / len(val_loader)

if __name__ == '__main__':
    # Define dataset paths
    train_image_dir = '/content/drive/My Drive/brown/train/images'
    train_mask_dir = '/content/drive/My Drive/brown/train/labels'
    val_image_dir = '/content/drive/My Drive/brown/val/images'
    val_mask_dir = '/content/drive/My Drive/brown/val/labels'

    # Get image and mask paths
    train_image_paths = sorted([os.path.join(train_image_dir, f) for f in os.listdir(train_image_dir) if f.endswith('.png') or f.endswith('.jpg')])
    train_mask_paths = sorted([os.path.join(train_mask_dir, f) for f in os.listdir(train_mask_dir) if f.endswith('.png') or f.endswith('.jpg')])
    val_image_paths = sorted([os.path.join(val_image_dir, f) for f in os.listdir(val_image_dir) if f.endswith('.png') or f.endswith('.jpg')])
    val_mask_paths = sorted([os.path.join(val_mask_dir, f) for f in os.listdir(val_mask_dir) if f.endswith('.png') or f.endswith('.jpg')])

    # Create datasets
    train_dataset = CustomDataset(train_image_paths, train_mask_paths, transform=transforms.ToTensor())
    val_dataset = CustomDataset(val_image_paths, val_mask_paths, transform=transforms.ToTensor())

    # Define data loaders
    batch_size = 2
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Define device, model, optimizer, loss function...
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Unet(in_channels=3, num_classes=1)  # Specify the number of input channels and output classes
model = model.to(device)
optimizer = Adam(model.parameters(), lr=1e-4)

loss_fn = dice_coef_loss  # or your custom loss function

num_epochs = 10
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, loss_fn, device)
    val_loss = evaluate(model, val_loader, loss_fn, device)
    print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

我尝试了 Github 上的另一个代码并得到了同样的错误。据我了解，这是我的输入尺寸的问题。但我无法解决它。

图像文件：尺寸：256 x 256，位深度：24，扩展名：.jpg 掩码文件：尺寸：256 x 256，位深度：8，扩展名：.png

更新：这是我的完整错误

    --> 178 train_loss = train(model, train_loader, optimizer, loss_fn, device)
    179     val_loss = evaluate(model, val_loader, loss_fn, device)
    180     print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

train(model, train_loader, optimizer, loss_fn, device)
    127         optimizer.zero_grad()
    128         outputs = model(images)
--> 129         loss = loss_fn(outputs, targets)
    130         loss.backward()
    131         optimizer.step()

dice_coef_loss(pred, target)
     32 # Loss function
     33 def dice_coef_loss(pred, target):
---> 34     return 1 - dice_coeff(pred, target)
# Define your U-Net model
dice_coeff(pred, target)
     16     pred_flat = pred.view(-1)
     17     target_flat = target.view(-1)
---> 18     intersection = (pred_flat * target_flat).sum()
     19     return (2. * intersection + smooth) / (pred_flat.sum() + target_flat.sum() + smooth)

运行时错误：张量 a (524288) 的大小必须与非单维 0 处张量 b (131072) 的大小匹配

问题描述投票：0回答：1

1个回答

最新问题

运行时错误：张量 a (524288) 的大小必须与非单一维度 0 处的张量 b (131072) 的大小匹配

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1