我已经编写了一个用于分类和回归的自定义架构,例如
import torch.nn as nn
import torch.nn.functional as F
class CustomCNN(nn.Module):
def __init__(self, img_size):
super(CustomCNN, self).__init__()
self.img_size = img_size
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=3,
out_channels=16,
kernel_size=3,
stride=1,
padding=1,
),
nn.ReLU(),
nn.BatchNorm2d(16),
nn.MaxPool2d(kernel_size=2),
nn.Dropout(0.1),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 3, 1, 1),
nn.ReLU(),
nn.BatchNorm2d(32),
nn.MaxPool2d(2),
nn.Dropout(0.2),
)
# Calculate the size of the fully connected layers' input based on the convolutional layers
fc_input_size = 32 * (img_size // 4) * (img_size // 4)
# Fully connected layers
self.fc1 = nn.Linear(fc_input_size, img_size * 3)
self.fc2 = nn.Linear(img_size * 3, img_size * 2)
self.fc3 = nn.Linear(img_size * 2, img_size * 1)
# Separate branches for gender and age
self.gender_branch = nn.Linear(img_size * 1, 2) # Binary classification for gender (0 or 1)
self.age_branch = nn.Linear(img_size * 1, 1) # Regression for age (between 0 and 1)
self.dropout = nn.Dropout(p=0.3)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)
x = self.dropout(F.relu(self.fc1(x)))
x = self.dropout(F.relu(self.fc2(x)))
x = self.dropout(F.relu(self.fc3(x)))
# Separate branches
output_gender = self.gender_branch(x)
output_age = self.age_branch(x)
return output_gender, output_age
model = CustomCNN(img_size=320)
model = model.to(device)
optimizer_gender = torch.optim.Adam(model.gender_branch.parameters(), lr=0.001)
optimizer_age = torch.optim.Adam(model.age_branch.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
criterion_age = nn.MSELoss()
现在模型使用 torch.save 分别保存两个模型,即年龄和性别
现在如何在给定图像和此架构以及 2 个经过训练的 .pt 模型的情况下构建推理
或者我应该对架构进行任何更改以实现相同的任务,即在同一架构中保存 2 个不同的模型,两者将分别收敛并分别保存模型。
模型训练如下
import time
import copy
epochs = 400
train_losses_gender, train_losses_age = [], []
val_losses_gender, val_losses_age = [], []
train_accu_gender, train_accu_age = [], []
val_accu_gender, val_accu_age = [], []
start_time = time.time()
early_stop_counter = 100
counter = 0
best_val_loss_gender = float('Inf')
best_val_loss_age = float('Inf')
# criterion_age = nn.MSELoss()
for e in range(epochs):
epoch_start_time = time.time()
running_loss_gender, running_loss_age = 0, 0
accuracy_gender, accuracy_age = 0, 0
# training step
model.train()
for images, labels in train_dataloader:
images = images.to(device)
labels_gender = labels[:, 0, 0].long().to(device)
labels_age = labels[:, 0, 1].view(-1, 1).to(device)
optimizer_gender.zero_grad()
optimizer_age.zero_grad()
output_gender, output_age = model(images)
# Gender loss and accuracy
loss_gender = criterion(output_gender, labels_gender)
running_loss_gender += loss_gender.item()
accuracy_gender += torch.sum(torch.argmax(output_gender, dim=1) == labels_gender).item()
# loss_gender.backward()
# optimizer_gender.step()
# Zero the gradients for gender optimizer
# optimizer_gender.zero_grad()
# Age regression loss (MSE loss)
loss_age = criterion_age(output_age, labels_age)
running_loss_age += loss_age.item()
accuracy_age += torch.sum(torch.abs(output_age - labels_age)).item()
accuracy_age += torch.sum(torch.abs(output_age - labels_age) < 0.10).item()
# loss_age.backward()
# optimizer_age.step()
# Zero the gradients for age optimizer
# optimizer_age.zero_grad()
# Detach tensors before backward pass to create separate computation graphs
loss_gender.backward(retain_graph=True)
loss_age.backward()
# Update parameters for each optimizer
optimizer_gender.step()
optimizer_age.step()
# Total loss
total_loss = loss_gender + loss_age
# total_loss.backward()
# optimizer.step()
# record training loss and error, then evaluate using validation data
train_losses_gender.append(running_loss_gender / len(train_dataloader))
train_losses_age.append(running_loss_age / len(train_dataloader))
train_accu_gender.append(accuracy_gender / len(train_dataloader))
train_accu_age.append(accuracy_age / len(train_dataloader))
val_loss_gender, val_loss_age = 0, 0
accuracy_gender, accuracy_age = 0, 0
model.eval()
with torch.no_grad():
for images, labels in val_dataloader:
images = images.to(device)
labels_gender = labels[:, 0, 0].long().to(device) # Assuming the gender label is in the first column
labels_age = labels[:, 0, 1].view(-1, 1).to(device) # Assuming the age label is in the second column
output_gender, output_age = model(images)
# Gender loss and accuracy
val_loss_gender += criterion(output_gender, labels_gender).item()
accuracy_gender += torch.sum(torch.argmax(output_gender, dim=1) == labels_gender).item()
# Age regression loss (MSE loss)
val_loss_age += criterion_age(output_age, labels_age).item()
accuracy_age += torch.sum(torch.abs(output_age - labels_age) < 0.10).item()
val_losses_gender.append(val_loss_gender / len(val_dataloader))
val_losses_age.append(val_loss_age / len(val_dataloader))
val_accu_gender.append(accuracy_gender / len(val_dataloader))
val_accu_age.append(accuracy_age / len(val_dataloader))
print("Epoch: {}/{}.. ".format(e + 1, epochs),
"Time: {:.2f}s..".format(time.time() - epoch_start_time),
# "Training Loss (Gender): {:.3f}.. ".format(train_losses_gender[-1]),
# "Training Accu (Gender): {:.3f}.. ".format(train_accu_gender[-1]),
# "Training Loss (Age): {:.3f}.. ".format(train_losses_age[-1]),
# "Training Accu (Age): {:.3f}.. ".format(train_accu_age[-1]),
"Val Loss (Gender): {:.3f}.. ".format(val_losses_gender[-1]),
"Val Accu (Gender): {:.3f}.. ".format(val_accu_gender[-1]),
"Val Loss (Age): {:.3f}.. ".format(val_losses_age[-1]),
"Val Accu (Age): {:.3f}".format(val_accu_age[-1]))
if val_losses_gender[-1] < best_val_loss_gender:
best_val_loss_gender = val_losses_gender[-1]
counter_gender = 0
# Save the gender model
torch.save(model.gender_branch, f'model/gender_model.pt')
else:
counter_gender += 1
print('Gender Validation loss has not improved since: {:.3f} Count: {}'.format(best_val_loss_gender, counter_gender))
if counter_gender >= early_stop_counter:
print('Early Stopping for Gender Now!!!!')
if val_losses_age[-1] < best_val_loss_age:
best_val_loss_age = val_losses_age[-1]
counter_age = 0
# Save the age model
torch.save(model.age_branch, f'model/age_model.pt')
else:
counter_age += 1
print('Age Validation loss has not improved since: {:.3f} Count: {}'.format(best_val_loss_age, counter_age))
if counter_age >= early_stop_counter:
print('Early Stopping for Age Now!!!!')
训练管道是否正确,或者我是否应该对训练架构进行更改,如果它正确,那么我无法加载两个年龄和性别训练的模型进行推理
一般的培训方法看起来是有效的。我的理解是,你有一个模型,直到最后一个分支都有共享权重,它分为两个输出(分类和回归)。您为每个输出分配一个优化器,并独立地步进优化器。如果您需要对每个单独的任务有更多的控制或者任务要求不同的学习策略,那么使用独立的优化器会很有用。
另一种更简单的方法(通常是一个很好的起点)是合并损失,并使用全局优化器更新权重。通过这种方法,模型可以联合从这两项任务中学习。以下是我训练初始模型的方法:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
class_loss = nn.CrossEntropyLoss() #classification loss
regr_loss = nn.MSELoss() #regression loss
loss_weights = [1, 1] #tune loss scaling so they're equally relevant
for e in range(epochs):
...
# training step
model.train()
for images, labels in train_dataloader:
images = images.to(device)
labels_class = labels[:, 0, 0].long().to(device)
labels_regr = labels[:, 0, 1].view(-1, 1).to(device)
output_class, output_regr = model(images)
loss_class = ce_loss(output_class, labels_class)
loss_regr = mse_loss(output_regr, labels_regr)
weighted_loss = loss_weights[0] * loss_class + loss_weights[1] * loss_regr
weighted_loss.backward()
optimizer.step()
...