在这里学习pytorch。从书上得到以下代码。它在我的 Nvidia RTX3070 上运行得很快(每个 epoch 需要 3.6 秒),但在 Google colab 上,即使我选择运行时间为 A100,每个 epoch 也需要大约 7 秒。
知道为什么吗? 预先感谢!
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
torch.manual_seed(1)
batch_size = 128
learning_rate = 1e-2
num_epoches = 10
train_dataset = datasets.MNIST(
root='./data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = datasets.MNIST(
root='./data',
train=False,
transform=transforms.ToTensor())
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
class Cnn(nn.Module):
def __init__(self, in_dim, n_class): # 28x28x1
super(Cnn, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_dim, 6, 3, stride=1, padding=1), # 28 x28
nn.ReLU(True),
nn.MaxPool2d(2, 2), # 14 x 14
nn.Conv2d(6, 16, 5, stride=1, padding=0), # 10 * 10*16
nn.ReLU(True),
nn.MaxPool2d(2, 2)) # 5x5x16
self.fc = nn.Sequential(
nn.Linear(400, n_class)) #120), # 400 = 5 * 5 * 16
#nn.Linear(120, 84),
#nn.Linear(84, n_class))
def forward(self, x):
out = self.conv(x)
out = out.view(out.size(0), 400) # 400 = 5 * 5 * 16,
out = self.fc(out)
return out
model = Cnn(1, 10).to('cuda')
print(model)
# loss和optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
import time
start = time.time()
for epoch in range(num_epoches):
print('epoch {}'.format(epoch + 1))
print('*' * 10)
running_loss = 0.0
running_acc = 0.0
for i, data in enumerate(train_loader, 1):
img, label = data
img = Variable(img).to("cuda")
label = Variable(label).to("cuda")
out = model(img)
loss = criterion(out, label) # loss
running_loss += loss.item() * label.size(0)
_, pred = torch.max(out, 1)
num_correct = (pred == label).sum()
# accuracy = (pred == label).float().mean()
running_acc += num_correct.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Train Finish {} epoch, Loss: {:.6f}, Acc: {:.6f}'.format(
epoch + 1, running_loss / (len(train_dataset)), running_acc / (len(
train_dataset))))
print(f"took {time.time() - start}")
start = time.time()
torch.save(model.state_dict(), './cnn.pth')
你可以尝试在DataLoader中设置一个更大的batch_size,例如16或32。默认batch_size将是1,这在A100上相当慢。