我保存在谷歌驱动器中的模型无法正确加载

问题描述 投票:0回答:0

def training(model, optimiser, scheduler, tn, tf, num_bins, data_loader_train, data_loader_val, num_epoch, device='cuda', save_dir='/content/drive/MyDrive/NeRF/trained_models/Models/', save_every=1, resume_from=无): # 挂载 Google Drive 以保存训练好的模型 drive.mount('/content/drive')

training_loss = []
val_loss = []
start_epoch = 0

if resume_from is not None:
    # Load saved checkpoint
    checkpoint_path = os.path.join(save_dir, f"model_epoch_{resume_from}.pt")
    if os.path.exists(checkpoint_path):
        print(f"Resuming training from epoch {resume_from}")
        model.load_state_dict(torch.load(checkpoint_path))
        start_epoch = resume_from + 1
    else:
        print(f"No saved checkpoint found at {checkpoint_path}, starting from epoch 0")

for epoch in tqdm(range(start_epoch, num_epoch)):
  
    total = 0
    b = 0
    correct = 0
    for batch in tqdm(data_loader_train):
        b += 1
        o = batch[:,:3].to(device)
        d = batch[:,3:6].to(device)
        target = batch[:,6:].to(device)

        prediction = rendering(model, o, d, tn, tf, num_bins=num_bins, device=device)

        loss = ((prediction - target)**2).mean()

        correct += ((prediction - target)**2 < 0.01**2).sum().item()

        total += target.numel()
        accuracy_train = correct / total
        print("accuracy_train", accuracy_train)

        optimiser.zero_grad()
        loss.backward()
        optimiser.step()

        training_loss.append(loss.item())

    scheduler.step()

    if epoch % 1 == 0:
        correct_val = 0
        total_val = 0
        k=0
        with torch.no_grad():
            for batch in tqdm(data_loader_val):
                k += 1
                o = batch[:,:3].to(device)
                d = batch[:,3:6].to(device)
                target = batch[:,6:].to(device)

                prediction = rendering(model, o, d, tn, tf, num_bins=num_bins, device=device)

                valid_loss = ((prediction - target)**2).mean()

                correct_val += ((prediction - target)**2 < 0.01**2).sum().item()
                val_loss.append(valid_loss)

        total_val += target.numel()
        accuracy_val = correct_val / total_val
        print("accuracy_val", accuracy_val)

        # Save model every save_every epochs
        if epoch % save_every == 0:
            save_path = os.path.join(save_dir, f"model_epoch_{epoch}.pt")
            torch.save(model, save_path)



return training_loss, val_loss

#num_ 个纪元的模型训练 training_loss,val_loss = training(model, optimiser, scheduler, tn, tf, num_bins, data_loader_train, data_loader_val, num_epoch, device='cuda', save_dir='/content/drive/MyDrive/NeRF/trained_models/Models',save_every=1 , resume_from=无) plt.plot(training_loss)

我已经使用这个训练代码运行了我的模型,并运行了 6 个 epoch。然后我用这段代码加载模型(见下文),

check_point = torch.load('/content/drive/MyDrive/NeRF/trained_models/Models/model_epoch_5.pt').to(device) 检查点.eval()

最后我用下面的代码进行了测试, def msetopsnr(mse): 返回 20*np.log10(1/np.sqrt(mse))

@torch.no_grad() def 测试(模型、o、d、tn、tf、num_bins =100、chunck_size =10、h=400、w=400、target =None): o = o.chunk(chunck_size) d = d.chunk(chunck_size)

image = []    #add batch images together to form final image

for o_batch,d_batch in zip(o,d):
    image_batch = rendering(model,o_batch.to('cuda'),d_batch.to('cuda'),tn,tf ,num_bins)
    
    image.append(image_batch)
    
image = torch.cat(image)  #[h*w,3]
image = image.reshape(h,w,3).cpu().numpy()

if target is not None:
    mse = ((image-target)**2).mean()
else:
    mse = 0
    
psnr = msetopsnr(mse)

return image,mse,psnr 

imag, mse, psnr = testing(check_point, torch.from_numpy(o_test[5]).to(device).float(), torch.from_numpy(d_test[5]).to(device).float(), tn, tf, num_bins=100, chunck_size=10, target=target_pixel_values_test[5].reshape(400, 400, 3))

这里的问题是当我使用下面的代码绘制 img 时,我得到一个 psnr 值为 11.91 的空白图像, #我们预测的测试图像 plt.imshow(图像) 打印(psnr)

为什么这至少不会产生模糊的图像。我的模型没有从 google colab 正确加载吗?

我用的时候,

imag, mse, psnr = testing(model, torch.from_numpy(o_test[5]).to(device).float(), torch.from_numpy(d_test[5]).to(device).float(), tn, tf, num_bins=100, chunck_size=10, target=target_pixel_values_test[5].reshape(400, 400, 3))

我得到一个 psnr 值为 6.33 的模糊图像,其中我放入测试函数的模型只是我未经训练的模型名称,

model = nerf().to(device)

这里的 nerf() 是我存储网络架构的类,为什么未经训练的模型会产生良好的结果并显示模糊的图像,而经过训练的模型会显示空图像。

pytorch load
© www.soinside.com 2019 - 2024. All rights reserved.