def training(model, optimiser, scheduler, tn, tf, num_bins, data_loader_train, data_loader_val, num_epoch, device='cuda', save_dir='/content/drive/MyDrive/NeRF/trained_models/Models/', save_every=1, resume_from=无): # 挂载 Google Drive 以保存训练好的模型 drive.mount('/content/drive')
training_loss = []
val_loss = []
start_epoch = 0
if resume_from is not None:
# Load saved checkpoint
checkpoint_path = os.path.join(save_dir, f"model_epoch_{resume_from}.pt")
if os.path.exists(checkpoint_path):
print(f"Resuming training from epoch {resume_from}")
model.load_state_dict(torch.load(checkpoint_path))
start_epoch = resume_from + 1
else:
print(f"No saved checkpoint found at {checkpoint_path}, starting from epoch 0")
for epoch in tqdm(range(start_epoch, num_epoch)):
total = 0
b = 0
correct = 0
for batch in tqdm(data_loader_train):
b += 1
o = batch[:,:3].to(device)
d = batch[:,3:6].to(device)
target = batch[:,6:].to(device)
prediction = rendering(model, o, d, tn, tf, num_bins=num_bins, device=device)
loss = ((prediction - target)**2).mean()
correct += ((prediction - target)**2 < 0.01**2).sum().item()
total += target.numel()
accuracy_train = correct / total
print("accuracy_train", accuracy_train)
optimiser.zero_grad()
loss.backward()
optimiser.step()
training_loss.append(loss.item())
scheduler.step()
if epoch % 1 == 0:
correct_val = 0
total_val = 0
k=0
with torch.no_grad():
for batch in tqdm(data_loader_val):
k += 1
o = batch[:,:3].to(device)
d = batch[:,3:6].to(device)
target = batch[:,6:].to(device)
prediction = rendering(model, o, d, tn, tf, num_bins=num_bins, device=device)
valid_loss = ((prediction - target)**2).mean()
correct_val += ((prediction - target)**2 < 0.01**2).sum().item()
val_loss.append(valid_loss)
total_val += target.numel()
accuracy_val = correct_val / total_val
print("accuracy_val", accuracy_val)
# Save model every save_every epochs
if epoch % save_every == 0:
save_path = os.path.join(save_dir, f"model_epoch_{epoch}.pt")
torch.save(model, save_path)
return training_loss, val_loss
#num_ 个纪元的模型训练 training_loss,val_loss = training(model, optimiser, scheduler, tn, tf, num_bins, data_loader_train, data_loader_val, num_epoch, device='cuda', save_dir='/content/drive/MyDrive/NeRF/trained_models/Models',save_every=1 , resume_from=无) plt.plot(training_loss)
我已经使用这个训练代码运行了我的模型,并运行了 6 个 epoch。然后我用这段代码加载模型(见下文),
check_point = torch.load('/content/drive/MyDrive/NeRF/trained_models/Models/model_epoch_5.pt').to(device) 检查点.eval()
最后我用下面的代码进行了测试, def msetopsnr(mse): 返回 20*np.log10(1/np.sqrt(mse))
@torch.no_grad() def 测试(模型、o、d、tn、tf、num_bins =100、chunck_size =10、h=400、w=400、target =None): o = o.chunk(chunck_size) d = d.chunk(chunck_size)
image = [] #add batch images together to form final image
for o_batch,d_batch in zip(o,d):
image_batch = rendering(model,o_batch.to('cuda'),d_batch.to('cuda'),tn,tf ,num_bins)
image.append(image_batch)
image = torch.cat(image) #[h*w,3]
image = image.reshape(h,w,3).cpu().numpy()
if target is not None:
mse = ((image-target)**2).mean()
else:
mse = 0
psnr = msetopsnr(mse)
return image,mse,psnr
和
imag, mse, psnr = testing(check_point, torch.from_numpy(o_test[5]).to(device).float(), torch.from_numpy(d_test[5]).to(device).float(), tn, tf, num_bins=100, chunck_size=10, target=target_pixel_values_test[5].reshape(400, 400, 3))
这里的问题是当我使用下面的代码绘制 img 时,我得到一个 psnr 值为 11.91 的空白图像, #我们预测的测试图像 plt.imshow(图像) 打印(psnr)
为什么这至少不会产生模糊的图像。我的模型没有从 google colab 正确加载吗?
我用的时候,
imag, mse, psnr = testing(model, torch.from_numpy(o_test[5]).to(device).float(), torch.from_numpy(d_test[5]).to(device).float(), tn, tf, num_bins=100, chunck_size=10, target=target_pixel_values_test[5].reshape(400, 400, 3))
我得到一个 psnr 值为 6.33 的模糊图像,其中我放入测试函数的模型只是我未经训练的模型名称,
model = nerf().to(device)
这里的 nerf() 是我存储网络架构的类,为什么未经训练的模型会产生良好的结果并显示模糊的图像,而经过训练的模型会显示空图像。