我目前正在处理小型结构化网络,即使 Pytorch 由于其高度灵活性而成为显而易见的选择,但与 TensorFlow 相比,它在训练速度方面仍然下降了很多。 正如我在 10 次训练后所看到的,PyTorch 的训练时间似乎增加了一倍。难道我做错了什么?有没有办法提高Pytorch的计算时间?
我制作了一个 Google Colab,在 PyTorch/TensorFlow 中重现相同的网络,以比较这两个模型。
import torch.nn as nn
import torch
import time
from torch.utils.data import DataLoader,TensorDataset
import numpy as np
## Parameters
learning_rate = 0.0005
num_of_epochs = 300
batch_size = 128
## Create Data
xx=torch.arange(40*640*60, dtype=torch.float32).view(640*60, 40)
ff=torch.arange(1*640*60, dtype=torch.float32).view(640*60, 1)
out=torch.arange(1*640*60, dtype=torch.float32).view(640*60, 1)
## Define the model
class Model(nn.Module):
def __init__(self):
super().__init__()
self.lin = nn.Linear(40, 1, False)
self.par = nn.Parameter(torch.Tensor(1))
def forward(self, x, f):
return torch.add(self.lin(x),self.par*f)
model = Model()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fun = torch.nn.MSELoss()
dataset = TensorDataset(xx,ff,out)
train_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
%%time
## Training
model.train(True)
for i in range(n_train):
start = time.time()
for iter in range(num_of_epochs):
train_loss = []
for x, f, y in train_dataloader:
# Zero your gradients for every batch!
optimizer.zero_grad()
# Make predictions for this batch
out = model(x, f)
# Compute the loss and its gradients
loss = loss_fun(out, y)
loss.backward()
# Adjust learning weights
optimizer.step()
train_loss.append(loss.item())
train_loss = np.mean(train_loss)
end = time.time()
exe_time['pytorch'].append(end-start)
完整代码可以在这里
找到您需要将模型和数据传输到 GPU 才能从 GPU 计算中受益。首先,您应该检查您是否确实有权访问 CUDA 设备。
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.train(True)
model.to(device) # Transfer model to device
for i in range(n_train):
start = time.time()
for iter in range(num_of_epochs):
train_loss = []
for x, f, y in train_dataloader:
# Zero your gradients for every batch!
optimizer.zero_grad()
# Transfer data to device
x, f = x.to(device), f.to(device)
# Make predictions for this batch
out = model(x, f)