应用乘法时如何匹配不同张量的大小

Question

我正在尝试使用来自不同 github 存储库的代码，我根据自己的需要进行了调整。但是，我在 res_unit 中得到的乘法张量的大小不同，并导致运行时错误。这是我的代码：

数据集

import torchvision
from random import choice
from torch.utils.data import Dataset
from PIL import Image
class KinDataset(Dataset):
    def __init__(self, relations, person_to_images_map, transform=None):  
        self.relations = relations # 가능한 모든 부모 - 자식 관계를 모은 csv 파일
        self.transform = transform
        self.person_to_images_map = person_to_images_map # (가족 id : 사람데이터) 형태의 데이터
        self.ppl = list(person_to_images_map.keys()) # 사람 리스트

    def __len__(self):
        return len(self.relations)*2
               
    def __getitem__(self, idx):
        
        if (idx%2==0): #Positive samples
            p1, p2 = self.relations[idx//2]
            label = 1
        else:          #Negative samples
            while True:
                p1 = choice(self.ppl) # 사람 리스트에서 하나 뽑기
                p2 = choice(self.ppl) # 사람 리스트에서 하나 뽑기
                if p1 != p2 and (p1, p2) not in self.relations and (p2, p1) not in self.relations:
                    # 만약 둘이 같은 사람이 아니고 가족 관계가 아닐 경우
                    break 
            label = 0
        
        # 고른 사진의 경로에서 이미지 불러오기
        path1, path2 = choice(self.person_to_images_map[p1]), choice(self.person_to_images_map[p2])
        img1, img2 = Image.open(path1).convert("RGB"), Image.open(path2).convert('RGB')
        
        if self.transform:
            img1, img2 = self.transform(img1), self.transform(img2)
        imgs=torch.cat((img1,img2),dim=0)
        
        return imgs, label

数据准备：

print("Prepare data...")
train_file_path = "../input/recognizing-faces-in-the-wild/train_relationships.csv"
train_folders_path = "./"
val_famillies = "F09"

all_images = glob(train_folders_path + "*/*/*.jpg")

train_images = [x for x in all_images if val_famillies not in x]
val_images = [x for x in all_images if val_famillies in x]

train_person_to_images_map = defaultdict(list)

ppl = [x.split("/")[-3] + "/" + x.split("/")[-2] for x in all_images]

for x in train_images:
    train_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

val_person_to_images_map = defaultdict(list)

for x in val_images:
    val_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)
    
relationships = pd.read_csv(train_file_path)
relationships = list(zip(relationships.p1.values, relationships.p2.values))
relationships = [x for x in relationships if x[0] in ppl and x[1] in ppl]

train_relations = [x for x in relationships if val_famillies not in x[0]]
val_relations  = [x for x in relationships if val_famillies in x[0]]

from torch.utils.data import DataLoader
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize(160),
    transforms.Grayscale(num_output_channels=3),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5]) 
])
val_transform = transforms.Compose([
    transforms.Resize(160),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5]) 
])

trainset = KinDataset(train_relations, train_person_to_images_map, train_transform)
valset = KinDataset(val_relations, val_person_to_images_map, val_transform)

trainloader = DataLoader(trainset, batch_size=200, shuffle=True)
valloader = DataLoader(valset, batch_size=200, shuffle=False)

型号

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models



class BasicConv2d(nn.Module):
    """
    basic convoluation model
    """

    def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
        super().__init__()
        self.conv = nn.Conv2d(
            in_planes, out_planes,
            kernel_size=kernel_size, stride=stride,
            padding=padding, bias=False
        )  # verify bias false
        self.bn = nn.BatchNorm2d(
            out_planes,
            eps=0.001,  # value found in tensorflow
            momentum=0.1,  # default pytorch value
            affine=True
        )
        self.relu = nn.ReLU(inplace=False)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x


class res_unit(nn.Module):
    """
    this is the attention module before Residual structure
    """
    def __init__(self,channel,up_size = None):
        """

        :param channel: channels of input feature map
        :param up_size: upsample size
        """
        super(res_unit,self).__init__()
        self.pool = nn.MaxPool2d(2,2)
        self.conv = nn.Conv2d(channel,channel,3,padding=1)
        if up_size == None:
            self.upsample = nn.Upsample(scale_factor=2,mode='bilinear',align_corners=False)
        else:
            self.upsample = nn.Upsample(size=(up_size,up_size), mode='bilinear', align_corners=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self,x):
        identity = x
        x = self.pool(x)
        x = self.conv(x)
        x = self.upsample(x)
        x = self.sigmoid(x)
        print("Identity shape :",identity.shape,"X Shape",x.shape)
        x = identity*x
        return x


class basenet(nn.Module):
    def __init__(self):
        super().__init__()
        self.base = _attenNet()
        self.fea = nn.Sequential(
            nn.Dropout(),
            nn.Linear(128 * 9 * 9, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 128),
        )

    def forward(self,x):
        x = self.base(x)
        x = x.view(-1, 9 * 9 * 128)
        x = self.fea(x)
        return x


class _atten(nn.Module):
    """
    the attention Module in <Learning part-aware attention networks for kinship verification>
    """
    def __init__(self):
        super(_atten,self).__init__()
        self.conv1 = nn.Conv2d(6,32,5)
        self.conv2 = nn.Conv2d(32,64,5)
        self.at1 = res_unit(32)
        self.at2 = res_unit(64)
        self.pool = nn.MaxPool2d(2,2)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        # self.fc1 = nn.Linear((9*9*128),512)
        # self.dp  = nn.Dropout()
        # self.fc2 = nn.Linear(512,2)

    def forward(self,x):
        """
        :param x: 6x64x64
        :return:
        """
        x = self.conv1(x)
        identity1 = x
        x = self.at1(x)
        x = identity1+x
        x = self.bn1(x)
        x = self.pool(F.relu(x))

        x = self.conv2(x)
        identity2 = x
        x = self.at2(x)
        x = identity2 + x
        x = self.bn2(x)
        x = self.pool(F.relu((x)))


        # x = x.view(-1, 9*9*128)
        # x = F.relu(self.fc1(x))
        # x = self.dp(x)
        # x = self.fc2(x)
        return x


class _attenNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.base = _atten()
        self.conv3 = nn.Conv2d(64, 128, 5)
        self.at3 = res_unit(128,up_size=9)
        self.bn3 = nn.BatchNorm2d(128)

    def forward(self,x):
        x = self.base(x)
        x = self.conv3(x)
        identity3 = x
        x = self.at3(x)
        x = identity3 + x
        x = self.bn3(x)
        x = F.relu(x)
        return x


class res_addNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv3 = nn.Conv2d(64, 128, 5)
        self.at3 = res_unit(128, up_size=9)
        self.bn3 = nn.BatchNorm2d(128)

    def forward(self, x):
        x = self.conv3(x)
        identity3 = x
        x = self.at3(x)
        x = identity3 + x
        x = self.bn3(x)
        x = F.relu(x)
        return x

class each_brach(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv3 = nn.Conv2d(64, 128, 5)
        self.at3 = res_unit(128, up_size=9)
        self.bn3 = nn.BatchNorm2d(128)

        self.fc = nn.Sequential(
            nn.Linear((9 * 9 * 128), 512),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(512, 2)
        )
    def forward(self, x):
        x = self.conv3(x)
        identity3 = x
        x = self.at3(x)
        x = identity3 + x
        x = self.bn3(x)
        x = F.relu(x)
        x = x.view(-1, 9*9*128)
        x = self.fc(x)
        return x



class My_Network(nn.Module):
    """
    concatenate 4x2 output + add loss layer
    """
    def __init__(self):
        super().__init__()
        self.base = _atten()

        self.fd_fc = each_brach()

    def forward(self, x):
        x = self.base(x)
        x = self.fd_fc(x)
        return x
def train():
    net.train() # 학습 시작
    train_loss = 0.0
    running_loss = 0.0
    running_corrects = 0
    
    for i, batch in enumerate(trainloader):
        # 로더에서 하나씩 꺼내오며
        optimizer.zero_grad()
        
        img, label = batch
        img, label = img.to(device), label.float().view(-1,1).to(device)
        output = net(img)
        preds = output>0.5
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        running_loss += loss.item()
        running_corrects += torch.sum(preds == (label>0.5))
        
        step = 100
        if i % step == step-1:
            print(' [{} - {:.2f}%],\ttrain loss: {:.5}'.format(epoch+1, 100*(i+1)/len(trainloader), running_loss/step/200))
            running_loss = 0
        
    train_loss /= len(trainset)
    running_corrects = running_corrects.item()/len(trainset)
    print('[{}], \ttrain loss: {:.5}\tacc: {:.5}'.format(epoch+1, train_loss, running_corrects))
    return train_loss, running_corrects
def validate():
    net.eval()
    val_loss = 0.0
    running_corrects = 0
    
    for batch in valloader:
        img ,label = batch
        img, label = img.to(device), label.float().view(-1,1).to(device)
        with torch.no_grad():
            output = net(img)
            preds = output>0.5
            loss = criterion(output, label)
            
        val_loss += loss.item()
        running_corrects += torch.sum(preds == (label>0.5))
    
    val_loss /= len(valset)
    running_corrects = running_corrects.item()/len(valset)
    print('[{}], \tval loss: {:.5}\tacc: {:.5}'.format(epoch+1, val_loss, running_corrects))

    return val_loss, running_corrects
print("Initialize network...")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net=My_Network().to(device)
lr = 1e-3

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

from torch.optim.lr_scheduler import ReduceLROnPlateau
scheduler = ReduceLROnPlateau(optimizer, patience=10)
print("Start training...")  
num_epoch = 100

best_val_loss = 1000
best_epoch = 0

history = []
accuracy = []
for epoch in range(num_epoch):
    train_loss, train_acc = train()  
    val_loss, val_acc = validate()
    history.append((train_loss, val_loss))
    accuracy.append((train_acc,val_acc))
    scheduler.step(val_loss)
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_epoch = epoch
        torch.torch.save(net.state_dict(), 'net_checkpoint.pth')

torch.save(net.state_dict(),'net_full_training.pth')

这是我使用的一些错误调试：

Start training...
Identity shape : torch.Size([200, 32, 156, 156]) X Shape torch.Size([200, 32, 156, 156])
Identity shape : torch.Size([200, 64, 74, 74]) X Shape torch.Size([200, 64, 74, 74])
Identity shape : torch.Size([200, 128, 33, 33]) X Shape torch.Size([200, 128, 9, 9])
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_17/1389268748.py in <module>
      8 accuracy = []
      9 for epoch in range(num_epoch):
---> 10     train_loss, train_acc = train()
     11     val_loss, val_acc = validate()
     12     history.append((train_loss, val_loss))

/tmp/ipykernel_17/902859818.py in train()
     11         img, label = batch
     12         img, label = img.to(device), label.float().view(-1,1).to(device)
---> 13         output = net(img)
     14         preds = output>0.5
     15         loss = criterion(output, label)

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1108         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110             return forward_call(*input, **kwargs)
   1111         # Do not call functions when jit is used
   1112         full_backward_hooks, non_full_backward_hooks = [], []

/tmp/ipykernel_17/1410794548.py in forward(self, x)
    198     def forward(self, x):
    199         x = self.base(x)
--> 200         x = self.fd_fc(x)
    201         return x
    202 

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1108         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110             return forward_call(*input, **kwargs)
   1111         # Do not call functions when jit is used
   1112         full_backward_hooks, non_full_backward_hooks = [], []

/tmp/ipykernel_17/1410794548.py in forward(self, x)
    176         x = self.conv3(x)
    177         identity3 = x
--> 178         x = self.at3(x)
    179         x = identity3 + x
    180         x = self.bn3(x)

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1108         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110             return forward_call(*input, **kwargs)
   1111         # Do not call functions when jit is used
   1112         full_backward_hooks, non_full_backward_hooks = [], []

/tmp/ipykernel_17/1410794548.py in forward(self, x)
     58         x = self.sigmoid(x)
     59         print("Identity shape :",identity.shape,"X Shape",x.shape)
---> 60         x = identity*x
     61         return x
     62 

RuntimeError: The size of tensor a (33) must match the size of tensor b (9) at non-singleton dimension 3

我实现了一个用于亲属关系验证的神经网络，并期待训练能够发挥作用。

Answer 1

您遇到的错误消息是 RuntimeError，它表示代码执行期间出现问题。具体来说，错误发生在 My_Network 类的转发方法中，您将 self.base(x) 的输出传递给 self.fd_fc(x)。

这里修复您的代码：

class My_Network(nn.Module):
"""
Concatenate 4x2 output + add loss layer
"""
def __init__(self):
    super().__init__()
    self.base = _atten()
    self.fc = nn.Sequential(
        nn.Linear(128 * 9 * 9, 512),  # Adjust input size based on the output size of _attenNet
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(512, 2)
    )

def forward(self, x):
    x = self.base(x)
    x = x.view(-1, 128 * 9 * 9)  # Reshape output to match input size expected by fc
    x = self.fc(x)
    return x

调试步骤：

检查网络的架构并确保正确配置层以处理输入张量大小。验证前向传递的每一步中张量的尺寸是否符合预期。确保各层之间输入大小的一致性。

潜在的解决方案：

调整网络的架构或参数以更有效地处理不同的输入大小。根据需要调整张量的大小或形状，以确保层之间的兼容性。考虑使用自适应池化或全局平均池化来标准化输入大小，然后再将其传递到后续层。

进一步行动：

检查网络的架构，密切关注每层的输入和输出大小。仔细检查任何调整大小或池化操作，以确保它们正确应用。调试网络的前向传递，以准确识别大小不匹配发生的位置以及如何解决它。

应用乘法时如何匹配不同张量的大小

问题描述投票：0回答：1

1个回答

最新问题

应用乘法时如何匹配不同张量的大小

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1