我正在建立一个模型,可以预测患者被诊断患有慢性肾脏病 以下是我使用的步骤;
这是我的代码;
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
import warnings
warnings.filterwarnings(action='ignore')
df = pd.read_csv('kidney_disease.csv')
cat_cols = [col for col in df.columns if df[col].dtype == 'object']
num_cols = [col for col in df.columns if df[col].dtype != 'object']
from sklearn.preprocessing import OrdinalEncoder
enc = OrdinalEncoder()
df_copied = df.copy()
X_enc = enc.fit_transform(df_copied[cat_cols])
df_copied[cat_cols] = pd.DataFrame(X_enc, columns=cat_cols)
x = df_copied.drop(['classification'], axis=1)
y = df_copied['classification']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)
dtc = grid_search_dtc.best_estimator_
# accuracy score, confusion matrix and classification report of decision tree
dtc_acc = accuracy_score(y_test, dtc.predict(X_test))
dtcClassifier = 'dtc_model.pkl'
with open(dtcClassifier, 'wb') as file:
pickle.dump(dtc, file)
我使用streamlit库来实现我的模型
with open('dtc_model.pkl', 'rb') as file:
model = pickle.load(file)
# Define the column names
cols = ["age", "bp", "sg", "al", "su", "rbc", "pc", "pcc", "ba", "bgr", "bu", "sc", "sod", "pot", "hemo", "pcv", "wc", "rc", "htn", "dm", "cad", "appet", "pe", "ane"]
def main():
st.title("Kidney Disease Prediction Using Hybrid Model")
html_temp = """
<div style="background:#025246 ;padding:10px">
<h2 style="color:white;text-align:center;">KD Prediction App </h2>
</div>
"""
st.markdown(html_temp, unsafe_allow_html = True)
# Define input fields
age = st.text_input("Age", 0)
bp = st.text_input("Blood Pressure", 0)
sg = st.selectbox("Specific Gravity", [0, 1.005, 1.010, 1.015, 1.020, 1.025])
al = st.selectbox("Albumin", [0, 1, 2, 3, 4, 5])
su = st.selectbox("Sugar", [0, 1, 2, 3, 4, 5])
rbc = st.selectbox("Red Blood Cells", ["", "normal", "abnormal"])
pc = st.selectbox("Pus Cell", ["", "normal", "abnormal"])
pcc = st.selectbox("Pus Cell clumps", ["", "present", "notpresent"])
ba = st.selectbox("Bacteria", ["", "present", "notpresent"])
bgr = st.text_input("Blood Glucose Random", 0)
bu = st.text_input("Blood Urea", 0)
sc = st.text_input("Serum Creatinine", 0)
sod = st.text_input("Sodium", 0)
pot = st.text_input("Potassium", 0)
hemo = st.text_input("Hemoglobin", 0)
pcv = st.text_input("Packed Cell Volume", 0)
wc = st.text_input("White Blood Cell Count", 0)
rc = st.text_input("Red Blood Cell Count", 0)
htn = st.selectbox("Hypertension", ["", "yes", "no"])
dm = st.selectbox("Diabetes Mellitus", ["", "yes", "no"])
cad = st.selectbox("Coronary Artery Disease", ["", "yes", "no"])
appet = st.selectbox("Appetite", ["", "good", "poor"])
pe = st.selectbox("Pedal Edema", ["", "yes", "no"])
ane = st.selectbox("Anemia", ["", "yes", "no"])
if st.button("Predict"):
# Convert data to DataFrame
data = {
'age': int(age),
'bp': float(bp),
'sg': sg,
'al': al,
'su': su,
'rbc': rbc,
'pc': pc,
'pcc': pcc,
'ba': ba,
'bgr': float(bgr),
'bu': float(bu),
'sc': float(sc),
'sod': float(sod),
'pot': float(pot),
'hemo': float(hemo),
'pcv': float(pcv),
'wc': float(wc),
'rc': float(rc),
'htn': htn,
'dm': dm,
'cad': cad,
'appet': appet,
'pe': pe,
'ane': ane
}
df = pd.DataFrame([data], columns=cols)
# Check if all input fields are filled
# Convert data to DataFrame
cat_cols = [col for col in df.columns if df[col].dtype == 'object']
enc = OrdinalEncoder()
df_copied = df.copy()
df_copied[cat_cols] = enc.fit_transform(df_copied[cat_cols])
print(pd.DataFrame(df_copied))
prediction = model.predict(df_copied)
print(prediction[0])
# Display prediction result
if prediction[0] == 1:
st.write("Positive")
else:
st.write("Negative")
if __name__ == '__main__':
main()
当我以 Dataframe 格式打印 df_copied 时,我注意到 cat_cols 始终为 0
我尝试了 LabelEncoder() 但是一样的
您需要存储训练中的序数编码器,并使用它来转换来自 Streamlit 的传入数据,而不是创建一个新的编码器(它只能看到一个值,因此只能为其分配值 0,无论该值是什么) ).