我正在尝试使用 Github 代码执行叶病图像分割。这是代码,我面临这个问题“张量 a (524288) 的大小必须与非单维 0 处张量 b (131072) 的大小匹配”。帮我修复这个错误。我无法理解错误实际上出在哪里。
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
import cv2
import os
# Dice coefficient and IoU calculation
def dice_coeff(pred, target):
smooth = 1.
pred_flat = pred.view(-1)
target_flat = target.view(-1)
intersection = (pred_flat * target_flat).sum()
return (2. * intersection + smooth) / (pred_flat.sum() + target_flat.sum() + smooth)
def iu_acc(y_pred, y_true):
smooth = 1e-12
y_pred_pos = torch.round(torch.clamp(y_pred, 0, 1))
intersection = torch.sum(y_true * y_pred_pos)
sum_ = torch.sum(y_true) + torch.sum(y_pred_pos)
jac = (intersection + smooth) / (sum_ - intersection + smooth)
return torch.mean(jac)
# Loss function
def dice_coef_loss(pred, target):
return 1 - dice_coeff(pred, target)
# Define your U-Net model
class ConvBlock(nn.Module):
def __init__(self, in_channels, out_channels):
super(ConvBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(out_channels)
def forward(self, x):
x = self.relu(self.bn1(self.conv1(x)))
return self.relu(self.bn2(self.conv2(x)))
class UpConvBlock(nn.Module):
def __init__(self, in_channels, out_channels):
super(UpConvBlock, self).__init__()
self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
self.conv_block = ConvBlock(out_channels * 2, out_channels)
def forward(self, x1, x2):
up = self.up(x1)
return self.conv_block(torch.cat([up, x2], dim=1))
class Unet(nn.Module):
def __init__(self, in_channels=3, num_classes=1):
super(Unet, self).__init__()
self.enc1 = ConvBlock(in_channels, 16)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.enc2 = ConvBlock(16, 32)
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.enc3 = ConvBlock(32, 64)
self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
self.enc4 = ConvBlock(64, 128)
self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
self.enc5 = ConvBlock(128, 256)
self.dec6 = UpConvBlock(256, 128)
self.dec7 = UpConvBlock(128, 64)
self.dec8 = UpConvBlock(64, 32)
self.dec9 = UpConvBlock(32, 16)
self.out = nn.Conv2d(16, num_classes, kernel_size=1)
def forward(self, x):
x1 = self.enc1(x)
x2 = self.pool1(x1)
x3 = self.enc2(x2)
x4 = self.pool2(x3)
x5 = self.enc3(x4)
x6 = self.pool3(x5)
x7 = self.enc4(x6)
x8 = self.pool4(x7)
x9 = self.enc5(x8)
y1 = self.dec6(x9, x7)
y2 = self.dec7(y1, x5)
y3 = self.dec8(y2, x3)
y4 = self.dec9(y3, x1)
out = self.out(y4)
return out
# Define your dataset class
class CustomDataset(Dataset):
def __init__(self, image_paths, mask_paths, transform=None):
self.image_paths = image_paths
self.mask_paths = mask_paths
self.transform = transform
def __len__(self):
return len(self.image_paths)
def __getitem__(self, index):
image_path = self.image_paths[index]
mask_path = self.mask_paths[index]
image = cv2.imread(image_path)
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
if self.transform:
image = self.transform(image)
mask = self.transform(mask)
image = image.float() / 255.0
return image, mask
# Define your training and evaluation functions
def train(model, train_loader, optimizer, loss_fn, device):
model.train()
epoch_loss = 0.0
for images, targets in tqdm(train_loader, desc='Training', leave=False):
images, targets = images.to(device), targets.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = loss_fn(outputs, targets)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
return epoch_loss / len(train_loader)
def evaluate(model, val_loader, loss_fn, device):
model.eval()
epoch_loss = 0.0
with torch.no_grad():
for images, targets in tqdm(val_loader, desc='Validation', leave=False):
images, targets = images.to(device), targets.to(device)
outputs = model(images)
loss = loss_fn(outputs, targets)
epoch_loss += loss.item()
return epoch_loss / len(val_loader)
if __name__ == '__main__':
# Define dataset paths
train_image_dir = '/content/drive/My Drive/brown/train/images'
train_mask_dir = '/content/drive/My Drive/brown/train/labels'
val_image_dir = '/content/drive/My Drive/brown/val/images'
val_mask_dir = '/content/drive/My Drive/brown/val/labels'
# Get image and mask paths
train_image_paths = sorted([os.path.join(train_image_dir, f) for f in os.listdir(train_image_dir) if f.endswith('.png') or f.endswith('.jpg')])
train_mask_paths = sorted([os.path.join(train_mask_dir, f) for f in os.listdir(train_mask_dir) if f.endswith('.png') or f.endswith('.jpg')])
val_image_paths = sorted([os.path.join(val_image_dir, f) for f in os.listdir(val_image_dir) if f.endswith('.png') or f.endswith('.jpg')])
val_mask_paths = sorted([os.path.join(val_mask_dir, f) for f in os.listdir(val_mask_dir) if f.endswith('.png') or f.endswith('.jpg')])
# Create datasets
train_dataset = CustomDataset(train_image_paths, train_mask_paths, transform=transforms.ToTensor())
val_dataset = CustomDataset(val_image_paths, val_mask_paths, transform=transforms.ToTensor())
# Define data loaders
batch_size = 2
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
# Define device, model, optimizer, loss function...
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Unet(in_channels=3, num_classes=1) # Specify the number of input channels and output classes
model = model.to(device)
optimizer = Adam(model.parameters(), lr=1e-4)
loss_fn = dice_coef_loss # or your custom loss function
num_epochs = 10
for epoch in range(num_epochs):
train_loss = train(model, train_loader, optimizer, loss_fn, device)
val_loss = evaluate(model, val_loader, loss_fn, device)
print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
我尝试了 Github 上的另一个代码并得到了同样的错误。据我了解,这是我的输入尺寸的问题。但我无法解决它。
图像文件:尺寸:256 x 256,位深度:24,扩展名:.jpg 掩码文件:尺寸:256 x 256,位深度:8,扩展名:.png
更新:这是我的完整错误
--> 178 train_loss = train(model, train_loader, optimizer, loss_fn, device)
179 val_loss = evaluate(model, val_loader, loss_fn, device)
180 print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
train(model, train_loader, optimizer, loss_fn, device)
127 optimizer.zero_grad()
128 outputs = model(images)
--> 129 loss = loss_fn(outputs, targets)
130 loss.backward()
131 optimizer.step()
dice_coef_loss(pred, target)
32 # Loss function
33 def dice_coef_loss(pred, target):
---> 34 return 1 - dice_coeff(pred, target)
# Define your U-Net model
dice_coeff(pred, target)
16 pred_flat = pred.view(-1)
17 target_flat = target.view(-1)
---> 18 intersection = (pred_flat * target_flat).sum()
19 return (2. * intersection + smooth) / (pred_flat.sum() + target_flat.sum() + smooth)
运行时错误:张量 a (524288) 的大小必须与非单维 0 处张量 b (131072) 的大小匹配
zip(trainloader)
循环的火车功能中尝试
for
吗?