梯度计算所需的变量之一已通过就地操作进行了修改:[torch.cuda.FloatTensor [16,1]]

问题描述 投票:0回答:1

嗨,我在运行时在loss.backward()中出现此错误。我在Internet上搜索,并且在就地操作中使用和编辑张量时确保了该错误。我没有找到错误,我也不明白是什么张量给了这个错误这里是感兴趣的2类的代码:

class PolicyNetwork():

    def __init__(self, n_state, n_action, n_hidden1=128, n_hidden2 = 128, lr=0.00001):
        self.model = ActorCriticModel(n_state, n_action, n_hidden1, n_hidden2)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr)

    def predict(self, s):
        return self.model(s)

    def get_action(self, s):



        distXZ, distO, state_value = self.predict(s)

        action1 = distXZ.sample()
        action2 = distO.sample()


        log_prob1 = distXZ.log_prob(action1).view(-1,1)
        log_prob2 = distO.log_prob(action2).view(-1,1)

        return action1, action2, log_prob1, log_prob2, state_value

    def update(self, log_prob1, log_prob2,value, reward):

        #for log_prob1, log_prob2, value, reward in zip(log_probs1, log_probs2, state_values, rewards):


        advantage = reward - value



        policy_loss = ((-log_prob1 * advantage) + (-log_prob2 * advantage)).mean()
        value_loss = F.smooth_l1_loss(value, reward)


        loss = policy_loss + value_loss


        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss, reward, policy_loss, value_loss

这里是调用更新的火车类,在这里我称为loss.backward():

def train_regression(model, estimator, train_loader, test_loader,exp_name='train_regressor', epochs=200):

    # device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    # definiamo un dizionario contenente i loader
    loader = {'train': train_loader, 'test': test_loader}





    for e in range(epochs):



        #Iteriamo in due modalità, training e testing
        for mode in ['train', 'test']:

            model.train() if mode == 'train' else model.eval()
            with torch.set_grad_enabled(mode == 'train'):  # abilitiamo i gradienti solo in training
                for i, batch in enumerate(loader[mode]):



                    state_image = batch['image'].to(device)

                    state = batch['start'][0]
                    action1, action2, log_prob1, log_prob2, state_value = estimator.get_action(state_image)


                    rewards = []
                    for a1, a2 in zip(action1, action2):


                        rewards.append(loader[mode].dataset.get_reward(state.item(), a1[0], a1[1], a2[0], a2[1]))


                    reward= torch.Tensor(rewards).view(-1,1).cuda()



                    if mode == 'train':
                        loss_to_plot, policy_to_plot, reward_to_plot, value_to_plot = estimator.update(log_prob1, log_prob2, state_value, reward)



        torch.save(model.state_dict(), '%s.pth' % exp_name)

    return model


很抱歉,如果我输入了大量代码,但是我没有发现错误(我的操作已修改张量)我也打印了4张量,它们是16X1

tensor([[-1.6174],
        [-1.7020],
        [-1.4557],
        [-1.8915],
        [-5.1338],
        [-4.6408],
        [-2.1750],
        [-1.7954],
        [-2.6198],
        [-2.5692],
        [-2.4872],
        [-2.4471],
        [-2.6823],
        [-1.6389],
        [-2.3344],
        [-2.3923]], device='cuda:0', grad_fn=<ViewBackward>)
tensor([[-3.1944],
        [-2.3279],
        [-1.6919],
        [-2.1784],
        [-2.9512],
        [-3.1380],
        [-2.6455],
        [-1.8039],
        [-2.1176],
        [-3.2955],
        [-2.2406],
        [-1.8591],
        [-4.2133],
        [-3.0645],
        [-5.1651],
        [-1.8645]], device='cuda:0', grad_fn=<ViewBackward>)
tensor([[-0.0178],
        [-0.0075],
        [-0.0216],
        [-0.0035],
        [-0.0750],
        [-0.0397],
        [-0.0263],
        [-0.0412],
        [-0.0118],
        [-0.0376],
        [-0.0511],
        [-0.0626],
        [ 0.0066],
        [-0.0344],
        [-0.0438],
        [-0.0028]], device='cuda:0', grad_fn=<ViewBackward>)
tensor([[0.5000],
        [0.2500],
        [0.7500],
        [0.5000],
        [0.0000],
        [0.5000],
        [0.0000],
        [0.2500],
        [0.5000],
        [0.2500],
        [0.5000],
        [0.2500],
        [0.5000],
        [0.0000],
        [0.2500],
        [0.5000]], device='cuda:0')

这里是班级演员评论家,模特:

    class ActorCriticModel(nn.Module):

        def __init__(self, n_input, n_output, n_hidden1, n_hidden2):
            super(ActorCriticModel, self).__init__()

            #ResNet50 per features sharing
            self.model = resnet50(pretrained=True)

            #Cambiamo il Fully Conncted con un lineare con input di un MLP
            #self.model.fc = nn.Linear(self.model.fc.in_features, n_input)
            out = self.model.fc.in_features
            self.model.fc = nn.Identity()

            #"Spezzettiamo" l' hiidden layer in due (devo dividere il numero delle features in due?

            self.fcN = nn.Linear(out, n_hidden1)
            self.fcV = nn.Linear(out, n_hidden2)


            #self.muX, self.muZ, self.sigma, self.muO1, self.muO2, self.sigmaO = nn.Linear(n_hidden1, n_output)

            #Il primo hidden layer ci ritorna 6 parametri per due distribuzioni Normali
            #6 linear con un solo output per ogni parametro
            self.muX = nn.Linear(n_hidden1, 1)
            self.muZ = nn.Linear(n_hidden1, 1)
            self.sigma = nn.Linear(n_hidden1, 1)
            self.muO1 = nn.Linear(n_hidden1, 1)
            self.muO2 = nn.Linear(n_hidden1, 1)
            self.sigmaO = nn.Linear(n_hidden1, 1)

            #Il secondo hidden layer ci ritorna uno scalare, value
            self.value = nn.Linear(n_hidden2, 1)

            #istanziamo due distribuzioni Normali, relative al movimento....
            self.distributionXZ = torch.distributions.MultivariateNormal
            # e all'orientamento
            self.distributionO = torch.distributions.MultivariateNormal

        def forward(self, x):

            #Passiamo l'immagine sulla ResNet50
            #x = x.cuda()
            x = self.model(x)

            #Attivazioni -> ReLU
            x1 = F.relu(self.fcN(x))
            x2 = F.relu(self.fcV(x))



            #WLS linux terminal on wondows
            #Normale per movimento
            muX = 2 * torch.tanh(self.muX(x1))
            muZ = 5 * torch.sigmoid(self.muZ(x1))
            sigma = F.softplus(self.sigma(x1)) + 1e-5

            #Normale per orientamento
            muO1 =  torch.tanh(self.muO1(x1))
            muO2 =  torch.tanh(self.muO2(x1))
            mod = torch.sqrt(muO1**2+muO2**2)
            mod[mod<1e-5]=1

            muO1=muO1/mod
            muO2=muO2/mod
            sigmaO = F.softplus(self.sigmaO(x1))

            #Creiamo le distribuzioni relativa al movimento...
            cov = torch.eye(2).view(1,2,2).cuda()*sigma.view(-1,1,1)
            distXZ = self.distributionXZ(torch.cat([muX,muZ],1), cov)

            #relativa all'orientamento
            covO = torch.eye(2).view(1,2,2).cuda()*sigmaO.view(-1,1,1)
            distO = self.distributionO(torch.cat([muO1,muO2],1), covO)

            #Tensore, 16x1
            value = self.value(x2)
            value = value.view(-1,1)
            #print(value)

            return distXZ, distO, value
python pytorch reinforcement-learning tensor
1个回答
0
投票

问题出在您的网络中。不在前进中。

.forward中的某处您有类似

    h = foo(x)
    h = bar(h)

或实际的镶嵌物,例如一种“ _”方法(例如unsqueeze_()unsqueeze())。

© www.soinside.com 2019 - 2024. All rights reserved.