python pytorch 为什么Sequential NN和相同的nn.Module NN有不同的结果

问题描述 投票:0回答:1
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.bn1 = torch.nn.BatchNorm2d(num_features=3)
        self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.act1 = torch.nn.ReLU()
        self.pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        # self.dr1 = torch.nn.Dropout2d(0.1)

        self.bn2 = torch.nn.BatchNorm2d(num_features=16)
        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.act2 = torch.nn.ReLU()
        self.pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        # self.dr2 = torch.nn.Dropout2d(0.1)

        self.bn3 = torch.nn.BatchNorm2d(num_features=32)
        self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.act3 = torch.nn.ReLU()
        self.pool3 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        # self.dr3 = torch.nn.Dropout2d(0.1)

        self.bn4 = torch.nn.BatchNorm1d(num_features=4 * 4 * 64)
        self.fc4 = torch.nn.Linear(4 * 4 * 64, 256)
        self.act4 = torch.nn.Tanh()
        # self.dr4 = torch.nn.Dropout1d(0.1)

        self.bn5 = torch.nn.BatchNorm1d(num_features=256)
        self.fc5 = torch.nn.Linear(256, 64)
        self.act5 = torch.nn.Tanh()
        # self.dr5 = torch.nn.Dropout1d(0.1)


        self.fc6 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = self.bn1(x)
        x = self.conv1(x)
        x = self.act1(x)
        x = self.pool1(x)
        # x = self.dr1(x)

        x = self.bn2(x)
        x = self.conv2(x)
        x = self.act2(x)
        x = self.pool2(x)
        # x = self.dr2(x)

        x = self.bn3(x)
        x = self.conv3(x)
        x = self.act3(x)
        x = self.pool3(x)
        # x = self.dr3(x)

        x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3))
        x = self.bn4(x)
        x = self.fc4(x)
        x = self.act4(x)
        # x = self.dr4(x)

        x = self.bn5(x)
        x = self.fc5(x)
        x = self.act5(x)
        # x = self.dr5(x)

        x = self.fc6(x)
        return x
last_model = Net()
#%%
def conv_block(in_f, out_f, activation='relu', *args, **kwargs):
    activations = nn.ModuleDict([
            ['tanh', nn.Tanh()],
            ['relu', nn.ReLU()]
    ])

    return nn.Sequential(
        nn.BatchNorm2d(in_f),
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        activations[activation],
        nn.MaxPool2d(kernel_size=2, stride=2),
        # nn.Dropout2d(0.1)
    )


class MyEncoder(nn.Module):
    def __init__(self, enc_sizes, *args, **kwargs):
        super().__init__()
        self.conv_blokcs = nn.Sequential(*[conv_block(in_f,
            out_f, kernel_size=3, padding=1, *args, **kwargs)
            for in_f, out_f in zip(enc_sizes, enc_sizes[1:])])


    def forward(self, x):
        return self.conv_blokcs(x)


def dec_block(in_f, out_f):
    return nn.Sequential(
        nn.BatchNorm1d(in_f),
        nn.Linear(in_f, out_f),
        nn.Tanh(),
        # nn.Dropout1d(0.1)
    )

class MyDecoder(nn.Module):
    def __init__(self, dec_sizes, n_classes):
        super().__init__()
        self.dec_blocks = nn.Sequential(*[dec_block(in_f, out_f)
            for in_f, out_f in zip(dec_sizes, dec_sizes[1:])])
        self.last = nn.Linear(dec_sizes[-1], n_classes)


    def forward(self, x):
        return self.dec_blocks(x)


class MyNET(nn.Module):
    def __init__(self, in_c, enc_sizes, dec_sizes, n_classes, activation='relu'):
        super().__init__()
        self.enc_sizes = [in_c, *enc_sizes]
        l = 32 / (2 ** len(enc_sizes))
        # print(enc_sizes[-1] * l * l)
        self.dec_sizes = [int(enc_sizes[-1] * l * l), *dec_sizes]
        self.encoder = MyEncoder(self.enc_sizes, activation=activation)
        self.decoder = MyDecoder(self.dec_sizes, n_classes)


    def forward(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3))
        x = self.decoder(x)
        return x
my_nodel = MyNET(3, [16, 32, 64], [256, 64], 10, activation='relu')

以及 5 个时期 CIFAR10 的结果:

张量(0.6721)

张量(0.7059)

张量(0.7359)

张量(0.7288)

张量(0.7373)

---------------

张量(0.4944)

张量(0.5391)

张量(0.5898)

张量(0.6283)

张量(0.6398)

火车功能:

def train(net, X_train, y_train, X_test, y_test):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    net = net.to(device)
    loss = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=1.0e-3, weight_decay=1e-5)
    batch_size = 100
    test_accuracy_history = []
    test_loss_history = []
    X_test = X_test.to(device)
    y_test = y_test.to(device)
    for epoch in range(5):
        order = np.random.permutation(len(X_train))
        for start_index in range(0, len(X_train), batch_size):
            optimizer.zero_grad()
            net.train()

            batch_indexes = order[start_index:start_index+batch_size]

            X_batch = X_train[batch_indexes].to(device)
            y_batch = y_train[batch_indexes].to(device).view(-1)

            preds = net.forward(X_batch)

            loss_value = loss(preds, y_batch)
            loss_value.backward()

            optimizer.step()

        net.eval()
        test_preds = net.forward(X_test)
        test_loss_history.append(loss(test_preds, y_test.squeeze()).data.cpu())

        accuracy = (test_preds.argmax(dim=1) == y_test).float().mean().data.cpu()
        test_accuracy_history.append(accuracy)

        print(accuracy)
    print('---------------')
    return test_accuracy_history, test_loss_history

我希望这些是相同的神经网络,并且它们会产生相同的结果。我认为问题出在训练本身,但如果你先训练第二个,然后再训练第一个,结果是一样的。在代码中,我专门禁用了 dropout,这样它就不会意外关闭神经元(尽管随机种子是相同的)。也许问题在于,当计算梯度时,它们的计算方式与通常的形式不同???

python pytorch conv-neural-network sequential
1个回答
0
投票

您的

forward
模块中的
MyDecoder
方法会跳过最后的线性层。

如所写,第一个模型产生大小为

(bs, 10)
的输出,而第二个模型产生大小为
(bs, 64)
的输出。

© www.soinside.com 2019 - 2024. All rights reserved.