需要帮助找出 for 循环仅输出 1s 的原因。当我删除循环时,它工作正常,输出合理的数据,但在循环内,创建的 df 的每一行都是 1。为什么呢?
def bootstrap(x, Nboot):
x = np.array(x)
models = []
resampled_stat = []
precision = []
recall = []
f1 = []
numb_rows = len(data)
for k in range(Nboot):
chosen_rows = np.random.choice(numb_rows, replace=True, size=numb_rows)
bootstrap_sample = data.iloc[chosen_rows]
resampled_stat.append(bootstrap_sample)
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)
model = tree.DecisionTreeClassifier().fit(X_train, Y_train)
models.append(model)
y_pred = model.predict(X_test)
precision.append(metrics.precision_score(Y_test, y_pred, average="macro"))
recall.append(metrics.recall_score(Y_test, y_pred, average="macro"))
f1.append(metrics.f1_score(Y_test, y_pred, average="macro"))
pred_df = pd.DataFrame(
{
"Precision": precision,
"Recall": recall,
"F1": f1,
"Models": models,
})
return pred_df
我已经复制了你的代码,它对我有用。因此我建议您检查输入数据。具体
y
传递给函数?data
和bootstrap_sample
的作用是什么?我认为这个逻辑在代码中被部分审查。它们会以某种方式影响代码的其他部分吗?y
数组有任何类吗?也许 y
是一个常数,因此你的预测只有 100% 准确?from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import pandas as pd
import numpy as np
def bootstrap(x, y, Nboot):
x = np.array(x)
y = np.array(y)
models = []
resampled_stat = []
precision = []
recall = []
f1 = []
for k in range(Nboot):
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)
model = DecisionTreeClassifier().fit(X_train, Y_train)
models.append(model)
y_pred = model.predict(X_test)
precision.append(metrics.precision_score(Y_test, y_pred, average="macro"))
recall.append(metrics.recall_score(Y_test, y_pred, average="macro"))
f1.append(metrics.f1_score(Y_test, y_pred, average="macro"))
pred_df = pd.DataFrame(
{
"Precision": precision,
"Recall": recall,
"F1": f1,
"Models": models,
})
return pred_df
x, y = make_classification(n_samples=100, n_features=20, n_classes=2)
bootstrap(x,y, 10)