我的 pytorch 神经网络只给我 1 和 0,而不是实际概率,即使我使用的是 sigmoid 函数
X_train, X_val, y_train, y_val = train_test_split(scaled_X, y,
stratify=y, test_size=0.3, random_state=42)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train,
dtype=torch.float32).reshape(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values).reshape(-1, 1)
test_tensor = torch.tensor(test.values, dtype=torch.float32)
class Network(nn.Module):
def __init__(self):
super(Network, self).__init__()
self.fc1 = nn.Linear(13, 2024)
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(0.3)
self.bn1 = nn.BatchNorm1d(2024)
self.fc2 = nn.Linear(2024, 2024)
self.relu2 = nn.ReLU()
self.dropout2 = nn.Dropout(0.3)
self.bn2 = nn.BatchNorm1d(2024)
self.fc3 = nn.Linear(2024, 2024)
self.relu3 = nn.ReLU()
self.dropout3 = nn.Dropout(0.3)
self.bn3 = nn.BatchNorm1d(2024)
self.fc5 = nn.Linear(2024, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.fc1(x)
x = self.relu1(x)
x = self.dropout1(x)
x = self.bn1(x)
x = self.fc2(x)
x = self.relu2(x)
x = self.dropout2(x)
x = self.bn2(x)
x = self.fc3(x)
x = self.relu3(x)
x = self.dropout3(x)
x = self.bn3(x)
x = self.fc5(x)
x = self.sigmoid(x)
return x
def model_train(model, X_train, y_train, X_val, y_val):
# loss function and optimizer
loss_fn = nn.BCELoss() # binary cross entropy
optimizer = optim.Adam(model.parameters(), lr=0.0001)
n_epochs = 50 # number of epochs to run
batch_size = 500 # size of each batch
batch_start = torch.arange(0, len(X_train), batch_size)
# Hold the best model
best_acc = - np.inf # init to negative infinity
best_weights = None
for epoch in range(n_epochs):
model.train()
with tqdm.tqdm(batch_start, unit="batch", mininterval=0,
disable=False) as bar:
bar.set_description(f"Epoch {epoch}")
for start in bar:
# take a batch
X_batch = X_train[start:start+batch_size]
y_batch = y_train[start:start+batch_size]
# forward pass
y_pred = model(X_batch)
loss = loss_fn(y_pred, y_batch)
# backward pass
optimizer.zero_grad()
loss.backward()
# update weights
optimizer.step()
# print progress
acc = (y_pred.round() == y_batch).float().mean()
bar.set_postfix(
loss=float(loss),
acc=float(ACC)
)
# evaluate accuracy at end of each epoch
model.eval()
y_pred = model(X_val)
acc = (y_pred.round() == y_val).float().mean()
acc = float(ACC)
if acc > best_acc:
best_acc = ACC
best_weights = copy.deepcopy(model.state_dict())
# restore model and return best accuracy
model.load_state_dict(best_weights)
return best_acc
best_validation_accuracy = model_train(model, X_train_tensor,
y_train_tensor, X_val_tensor, y_val_tensor)
print("Best Validation Accuracy:", best_validation_accuracy)
with torch.no_grad():
model.eval()
y_pred_prob = model(test_tensor).numpy().flatten()
问题是预测给我的唯一值是零,而不是实际概率;这与我的预期不同。我需要理解它为什么这样做。我的代码很容易理解:
我尝试通过kaggle查看其他笔记本是否有同样的问题。我尝试修改我的神经网络的架构,改变批量大小、纪元大小和学习率。我询问了 chatGPT 并在 Google 上进行了搜索,但没有找到我的问题的明确答案。
零或一已经表示模型输出的概率,只是模型过度拟合或过度自信。