我的逻辑回归的 classification_report 精度很低,但我不知道为什么
from sklearn.model_selection import train_test_split
features = ['gender','age','hypertension','heart_disease','avg_glucose_level',
'bmi','smoking_status']
data1 = ['stroke']
X = df[features]
y = df[data1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
from sklearn.linear_model import LogisticRegression
#setting class weight to 'balance' will help with the data imbalance giving higher weights to the data with less samples
reg = LogisticRegression(penalty='l2',solver='sag' ,class_weight='balanced').fit(X_train, y_train)
y_pred = reg.predict(X_test)
train_pred = reg.predict(X_train)
test_pred = reg.predict(X_test)
from sklearn.metrics import classification_report
#train report
print (classification_report(y_train ,train_pred))
#test report
print (classification_report(y_test ,test_pred))