我正在尝试使用来自不同 github 存储库的代码,我根据自己的需要进行了调整。但是,我在 res_unit 中得到的乘法张量的大小不同,并导致运行时错误。这是我的代码:
数据集
import torchvision
from random import choice
from torch.utils.data import Dataset
from PIL import Image
class KinDataset(Dataset):
def __init__(self, relations, person_to_images_map, transform=None):
self.relations = relations # 가능한 모든 부모 - 자식 관계를 모은 csv 파일
self.transform = transform
self.person_to_images_map = person_to_images_map # (가족 id : 사람데이터) 형태의 데이터
self.ppl = list(person_to_images_map.keys()) # 사람 리스트
def __len__(self):
return len(self.relations)*2
def __getitem__(self, idx):
if (idx%2==0): #Positive samples
p1, p2 = self.relations[idx//2]
label = 1
else: #Negative samples
while True:
p1 = choice(self.ppl) # 사람 리스트에서 하나 뽑기
p2 = choice(self.ppl) # 사람 리스트에서 하나 뽑기
if p1 != p2 and (p1, p2) not in self.relations and (p2, p1) not in self.relations:
# 만약 둘이 같은 사람이 아니고 가족 관계가 아닐 경우
break
label = 0
# 고른 사진의 경로에서 이미지 불러오기
path1, path2 = choice(self.person_to_images_map[p1]), choice(self.person_to_images_map[p2])
img1, img2 = Image.open(path1).convert("RGB"), Image.open(path2).convert('RGB')
if self.transform:
img1, img2 = self.transform(img1), self.transform(img2)
imgs=torch.cat((img1,img2),dim=0)
return imgs, label
数据准备:
print("Prepare data...")
train_file_path = "../input/recognizing-faces-in-the-wild/train_relationships.csv"
train_folders_path = "./"
val_famillies = "F09"
all_images = glob(train_folders_path + "*/*/*.jpg")
train_images = [x for x in all_images if val_famillies not in x]
val_images = [x for x in all_images if val_famillies in x]
train_person_to_images_map = defaultdict(list)
ppl = [x.split("/")[-3] + "/" + x.split("/")[-2] for x in all_images]
for x in train_images:
train_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)
val_person_to_images_map = defaultdict(list)
for x in val_images:
val_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)
relationships = pd.read_csv(train_file_path)
relationships = list(zip(relationships.p1.values, relationships.p2.values))
relationships = [x for x in relationships if x[0] in ppl and x[1] in ppl]
train_relations = [x for x in relationships if val_famillies not in x[0]]
val_relations = [x for x in relationships if val_famillies in x[0]]
from torch.utils.data import DataLoader
from torchvision import transforms
train_transform = transforms.Compose([
transforms.Resize(160),
transforms.Grayscale(num_output_channels=3),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5])
])
val_transform = transforms.Compose([
transforms.Resize(160),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5])
])
trainset = KinDataset(train_relations, train_person_to_images_map, train_transform)
valset = KinDataset(val_relations, val_person_to_images_map, val_transform)
trainloader = DataLoader(trainset, batch_size=200, shuffle=True)
valloader = DataLoader(valset, batch_size=200, shuffle=False)
型号
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models
class BasicConv2d(nn.Module):
"""
basic convoluation model
"""
def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
super().__init__()
self.conv = nn.Conv2d(
in_planes, out_planes,
kernel_size=kernel_size, stride=stride,
padding=padding, bias=False
) # verify bias false
self.bn = nn.BatchNorm2d(
out_planes,
eps=0.001, # value found in tensorflow
momentum=0.1, # default pytorch value
affine=True
)
self.relu = nn.ReLU(inplace=False)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class res_unit(nn.Module):
"""
this is the attention module before Residual structure
"""
def __init__(self,channel,up_size = None):
"""
:param channel: channels of input feature map
:param up_size: upsample size
"""
super(res_unit,self).__init__()
self.pool = nn.MaxPool2d(2,2)
self.conv = nn.Conv2d(channel,channel,3,padding=1)
if up_size == None:
self.upsample = nn.Upsample(scale_factor=2,mode='bilinear',align_corners=False)
else:
self.upsample = nn.Upsample(size=(up_size,up_size), mode='bilinear', align_corners=False)
self.sigmoid = nn.Sigmoid()
def forward(self,x):
identity = x
x = self.pool(x)
x = self.conv(x)
x = self.upsample(x)
x = self.sigmoid(x)
print("Identity shape :",identity.shape,"X Shape",x.shape)
x = identity*x
return x
class basenet(nn.Module):
def __init__(self):
super().__init__()
self.base = _attenNet()
self.fea = nn.Sequential(
nn.Dropout(),
nn.Linear(128 * 9 * 9, 512),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(512, 128),
)
def forward(self,x):
x = self.base(x)
x = x.view(-1, 9 * 9 * 128)
x = self.fea(x)
return x
class _atten(nn.Module):
"""
the attention Module in <Learning part-aware attention networks for kinship verification>
"""
def __init__(self):
super(_atten,self).__init__()
self.conv1 = nn.Conv2d(6,32,5)
self.conv2 = nn.Conv2d(32,64,5)
self.at1 = res_unit(32)
self.at2 = res_unit(64)
self.pool = nn.MaxPool2d(2,2)
self.bn1 = nn.BatchNorm2d(32)
self.bn2 = nn.BatchNorm2d(64)
# self.fc1 = nn.Linear((9*9*128),512)
# self.dp = nn.Dropout()
# self.fc2 = nn.Linear(512,2)
def forward(self,x):
"""
:param x: 6x64x64
:return:
"""
x = self.conv1(x)
identity1 = x
x = self.at1(x)
x = identity1+x
x = self.bn1(x)
x = self.pool(F.relu(x))
x = self.conv2(x)
identity2 = x
x = self.at2(x)
x = identity2 + x
x = self.bn2(x)
x = self.pool(F.relu((x)))
# x = x.view(-1, 9*9*128)
# x = F.relu(self.fc1(x))
# x = self.dp(x)
# x = self.fc2(x)
return x
class _attenNet(nn.Module):
def __init__(self):
super().__init__()
self.base = _atten()
self.conv3 = nn.Conv2d(64, 128, 5)
self.at3 = res_unit(128,up_size=9)
self.bn3 = nn.BatchNorm2d(128)
def forward(self,x):
x = self.base(x)
x = self.conv3(x)
identity3 = x
x = self.at3(x)
x = identity3 + x
x = self.bn3(x)
x = F.relu(x)
return x
class res_addNet(nn.Module):
def __init__(self):
super().__init__()
self.conv3 = nn.Conv2d(64, 128, 5)
self.at3 = res_unit(128, up_size=9)
self.bn3 = nn.BatchNorm2d(128)
def forward(self, x):
x = self.conv3(x)
identity3 = x
x = self.at3(x)
x = identity3 + x
x = self.bn3(x)
x = F.relu(x)
return x
class each_brach(nn.Module):
def __init__(self):
super().__init__()
self.conv3 = nn.Conv2d(64, 128, 5)
self.at3 = res_unit(128, up_size=9)
self.bn3 = nn.BatchNorm2d(128)
self.fc = nn.Sequential(
nn.Linear((9 * 9 * 128), 512),
nn.ReLU(),
nn.Dropout(),
nn.Linear(512, 2)
)
def forward(self, x):
x = self.conv3(x)
identity3 = x
x = self.at3(x)
x = identity3 + x
x = self.bn3(x)
x = F.relu(x)
x = x.view(-1, 9*9*128)
x = self.fc(x)
return x
class My_Network(nn.Module):
"""
concatenate 4x2 output + add loss layer
"""
def __init__(self):
super().__init__()
self.base = _atten()
self.fd_fc = each_brach()
def forward(self, x):
x = self.base(x)
x = self.fd_fc(x)
return x
def train():
net.train() # 학습 시작
train_loss = 0.0
running_loss = 0.0
running_corrects = 0
for i, batch in enumerate(trainloader):
# 로더에서 하나씩 꺼내오며
optimizer.zero_grad()
img, label = batch
img, label = img.to(device), label.float().view(-1,1).to(device)
output = net(img)
preds = output>0.5
loss = criterion(output, label)
loss.backward()
optimizer.step()
train_loss += loss.item()
running_loss += loss.item()
running_corrects += torch.sum(preds == (label>0.5))
step = 100
if i % step == step-1:
print(' [{} - {:.2f}%],\ttrain loss: {:.5}'.format(epoch+1, 100*(i+1)/len(trainloader), running_loss/step/200))
running_loss = 0
train_loss /= len(trainset)
running_corrects = running_corrects.item()/len(trainset)
print('[{}], \ttrain loss: {:.5}\tacc: {:.5}'.format(epoch+1, train_loss, running_corrects))
return train_loss, running_corrects
def validate():
net.eval()
val_loss = 0.0
running_corrects = 0
for batch in valloader:
img ,label = batch
img, label = img.to(device), label.float().view(-1,1).to(device)
with torch.no_grad():
output = net(img)
preds = output>0.5
loss = criterion(output, label)
val_loss += loss.item()
running_corrects += torch.sum(preds == (label>0.5))
val_loss /= len(valset)
running_corrects = running_corrects.item()/len(valset)
print('[{}], \tval loss: {:.5}\tacc: {:.5}'.format(epoch+1, val_loss, running_corrects))
return val_loss, running_corrects
print("Initialize network...")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net=My_Network().to(device)
lr = 1e-3
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
from torch.optim.lr_scheduler import ReduceLROnPlateau
scheduler = ReduceLROnPlateau(optimizer, patience=10)
print("Start training...")
num_epoch = 100
best_val_loss = 1000
best_epoch = 0
history = []
accuracy = []
for epoch in range(num_epoch):
train_loss, train_acc = train()
val_loss, val_acc = validate()
history.append((train_loss, val_loss))
accuracy.append((train_acc,val_acc))
scheduler.step(val_loss)
if val_loss < best_val_loss:
best_val_loss = val_loss
best_epoch = epoch
torch.torch.save(net.state_dict(), 'net_checkpoint.pth')
torch.save(net.state_dict(),'net_full_training.pth')
这是我使用的一些错误调试:
Start training...
Identity shape : torch.Size([200, 32, 156, 156]) X Shape torch.Size([200, 32, 156, 156])
Identity shape : torch.Size([200, 64, 74, 74]) X Shape torch.Size([200, 64, 74, 74])
Identity shape : torch.Size([200, 128, 33, 33]) X Shape torch.Size([200, 128, 9, 9])
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_17/1389268748.py in <module>
8 accuracy = []
9 for epoch in range(num_epoch):
---> 10 train_loss, train_acc = train()
11 val_loss, val_acc = validate()
12 history.append((train_loss, val_loss))
/tmp/ipykernel_17/902859818.py in train()
11 img, label = batch
12 img, label = img.to(device), label.float().view(-1,1).to(device)
---> 13 output = net(img)
14 preds = output>0.5
15 loss = criterion(output, label)
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1109 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110 return forward_call(*input, **kwargs)
1111 # Do not call functions when jit is used
1112 full_backward_hooks, non_full_backward_hooks = [], []
/tmp/ipykernel_17/1410794548.py in forward(self, x)
198 def forward(self, x):
199 x = self.base(x)
--> 200 x = self.fd_fc(x)
201 return x
202
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1109 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110 return forward_call(*input, **kwargs)
1111 # Do not call functions when jit is used
1112 full_backward_hooks, non_full_backward_hooks = [], []
/tmp/ipykernel_17/1410794548.py in forward(self, x)
176 x = self.conv3(x)
177 identity3 = x
--> 178 x = self.at3(x)
179 x = identity3 + x
180 x = self.bn3(x)
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1109 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110 return forward_call(*input, **kwargs)
1111 # Do not call functions when jit is used
1112 full_backward_hooks, non_full_backward_hooks = [], []
/tmp/ipykernel_17/1410794548.py in forward(self, x)
58 x = self.sigmoid(x)
59 print("Identity shape :",identity.shape,"X Shape",x.shape)
---> 60 x = identity*x
61 return x
62
RuntimeError: The size of tensor a (33) must match the size of tensor b (9) at non-singleton dimension 3
我实现了一个用于亲属关系验证的神经网络,并期待训练能够发挥作用。
您遇到的错误消息是 RuntimeError,它表示代码执行期间出现问题。具体来说,错误发生在 My_Network 类的转发方法中,您将 self.base(x) 的输出传递给 self.fd_fc(x)。
这里修复您的代码:
class My_Network(nn.Module):
"""
Concatenate 4x2 output + add loss layer
"""
def __init__(self):
super().__init__()
self.base = _atten()
self.fc = nn.Sequential(
nn.Linear(128 * 9 * 9, 512), # Adjust input size based on the output size of _attenNet
nn.ReLU(),
nn.Dropout(),
nn.Linear(512, 2)
)
def forward(self, x):
x = self.base(x)
x = x.view(-1, 128 * 9 * 9) # Reshape output to match input size expected by fc
x = self.fc(x)
return x
调试步骤:
检查网络的架构并确保正确配置层以处理输入张量大小。 验证前向传递的每一步中张量的尺寸是否符合预期。 确保各层之间输入大小的一致性。
潜在的解决方案:
调整网络的架构或参数以更有效地处理不同的输入大小。 根据需要调整张量的大小或形状,以确保层之间的兼容性。 考虑使用自适应池化或全局平均池化来标准化输入大小,然后再将其传递到后续层。
进一步行动:
检查网络的架构,密切关注每层的输入和输出大小。 仔细检查任何调整大小或池化操作,以确保它们正确应用。 调试网络的前向传递,以准确识别大小不匹配发生的位置以及如何解决它。