我使用 Mnist 数据集构建了这个 GAN 模型,我的目标是计算非饱和损失,我的批量大小为 256,latent_dim 为 100,轮数为 10:
# Define the generator network
class Generator(nn.Module):
def __init__(self, latent_dim, image_size, num_channels,ngpu):
super(Generator, self).__init__()
self.latent_dim = latent_dim
self.image_size = image_size
self.num_channels = num_channels
self.ngpu = ngpu
self.fc = nn.Sequential(
nn.Linear(latent_dim, 128 * (image_size // 4) ** 2),
nn.BatchNorm1d(128 * (image_size // 4) ** 2),
nn.LeakyReLU(0.2, inplace=True)
)
self.conv = nn.Sequential(
nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2, inplace=True),
nn.ConvTranspose2d(64, num_channels, kernel_size=4, stride=2, padding=1),
nn.Sigmoid()
)
def forward(self, x):
x = self.fc(x)
x = x.view(-1, 128, self.image_size // 4, self.image_size // 4)
x = self.conv(x)
return x
# Define the discriminator network
class Discriminator(nn.Module):
def __init__(self, image_size, num_channels,ngpu):
super(Discriminator, self).__init__()
self.image_size = image_size
self.num_channels = num_channels
self.ngpu = ngpu
self.conv = nn.Sequential(
nn.Conv2d(num_channels, 64, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace=True)
)
self.fc = nn.Sequential(
nn.Linear(128 * (image_size // 4) ** 2, 1),
nn.Sigmoid()
)
def forward(self, x):
x = self.conv(x)
x = x.view(-1, 128 * (self.image_size // 4) ** 2)
x = self.fc(x)
return x
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
# Initialize the generator and discriminator networks
generator = Generator(latent_dim, image_size, num_channels,ngpu).to(device)
discriminator=Discriminator(image_size, num_channels,ngpu).to(device)
generator.apply( weights_init)
discriminator.apply( weights_init)
我正在训练模型来计算非饱和损失:
real_label = 1
fake_label = 0
def training_loop(num_epochs=num_epochs, saturating=False):
netG = generator
netD=discriminator
## Initialize BCELoss function
#criterion = nn.BCELoss()
criterion = nn.BCELoss()
# Setup Adam optimizers for both G and D
optimizerD = optim.SGD(netD.parameters(), lr=0.0001, momentum=0.9)
optimizerG = optim.SGD(netG.parameters(), lr=0.0001, momentum=0.9)
## Training Loop
# Lists to keep track of progress
img_list = []
G_losses = []
G_grads_mean = []
G_grads_std = []
D_losses = []
iters = 0
print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
# For each batch
for i, data in enumerate(train_loader):
############################
# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
###########################
## Train with all-real batch
netD.zero_grad()
# Format batch
real_cpu = data[0].to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size,), real_label, device=device)
# Forward pass real batch through D
output = netD(real_cpu).view(-1)
# Calculate loss on all-real batch
errD_real = criterion(output, label)
# Calculate gradients for D in backward pass
errD_real.backward()
D_x = output.mean().item()
## Train with all-fake batch
# Generate batch of latent vectors
noise = torch.randn(b_size,latent_dim, 1, 1, device=device)
# Generate fake image batch with G
fake = netG(noise)
label.fill_(fake_label)
# Classify all fake batch with D
output = netD(fake.detach()).view(-1)
# Calculate D's loss on the all-fake batch
errD_fake = criterion(output, label)
# Calculate the gradients for this batch
errD_fake.backward()
D_G_z1 = output.mean().item()
# Add the gradients from the all-real and all-fake batches
errD = errD_real + errD_fake
# Update D
optimizerD.step()
############################
# (2) Update G network: maximize log(D(G(z)))
###########################
netG.zero_grad()
label.fill_(real_label) # Non-saturating loss: fake labels are real for generator cost
# Since we just updated D, perform another forward pass of all-fake batch through D
output = netD(fake).view(-1)
# Calculate G's loss based on this output
errG = criterion(output, label) # Non-saturating loss
# Calculate gradients for G
errG.backward()
D_G_z2 = output.mean().item()
# Update G
optimizerG.step()
# Save gradients
G_grad = [p.grad.view(-1).cpu().numpy() for p in list(netG.parameters())]
G_grads_mean.append(np.concatenate(G_grad).mean())
G_grads_std.append(np.concatenate(G_grad).std())
# Output training stats
if i % 50 == 0:
print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch+1, num_epochs, i, len(train_loader),
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
# Save Losses for plotting later
G_losses.append(errG.item())
D_losses.append(errD.item())
return G_losses, D_losses
起初我收到这个错误:
# Train with non-saturating G loss
G_losses_nonsat, D_losses_nonsat = training_loop(saturating=False)
Starting Training Loop...
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-36-35b0d037c184> in <cell line: 2>()
1 # Train with non-saturating G loss
----> 2 G_losses_nonsat, D_losses_nonsat = training_loop(saturating=False)
3 frames
<ipython-input-35-65dcc085b783> in training_loop(num_epochs, saturating)
37 output = netD(real_cpu).view(-1)
38 # Calculate loss on all-real batch
---> 39 errD_real = criterion(output, label)
40 # Calculate gradients for D in backward pass
41 errD_real.backward()
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
617
618 def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 619 return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
620
621
/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
3096 weight = weight.expand(new_size)
3097
-> 3098 return torch._C._nn.binary_cross_entropy(input, target, weight, reduction_enum)
3099
3100
RuntimeError: Found dtype Long but expected Float
我设法用 :label = torch.full((b_size,), real_label, device=device, dtype=torch.float) 修复它
但是我又遇到了另一个错误:Starting Training Loop...
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-38-35b0d037c184> in <cell line: 2>()
1 # Train with non-saturating G loss
----> 2 G_losses_nonsat, D_losses_nonsat = training_loop(saturating=False)
6 frames
<ipython-input-37-ab625c3e3adf> in training_loop(num_epochs, saturating)
46 noise = torch.randn(b_size,latent_dim, 1, 1, device=device)
47 # Generate fake image batch with G
---> 48 fake = netG(noise)
49 label.fill_(fake_label)
50 # Classify all fake batch with D
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-19-d0ccbb1d678c> in forward(self, x)
22
23 def forward(self, x):
---> 24 x = self.fc(x)
25 x = x.view(-1, 128, self.image_size // 4, self.image_size // 4)
26 x = self.conv(x)
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/container.py in forward(self, input)
215 def forward(self, input):
216 for module in self:
--> 217 input = module(input)
218 return input
219
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) -> str:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (25600x1 and 100x6272)
据我所知,网络维度存在问题,但我不明白在哪里,因为当我计算饱和损失时,一切正常
很乐意提供任何帮助:)