我正试图确定流失率。如果我尝试用 .size()
它的工作原理。但如果我用 .mean()
它没有。我不明白为什么它不工作,因为我需要找出平均值。
from sklearn.metrics import classification_report,confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings("ignore")
from pylab import rcParams
%matplotlib inline
import plotly.offline as pyoff
import plotly.graph_objs as go
import xgboost as xgb
from sklearn.model_selection import KFold, cross_val_score, train_test_split
import xgboost as xgb
#initate plotly
pyoff.init_notebook_mode()
#read data from csv and redo the data work we done before
df_data = pd.read_csv(r'C:\Users\aayus\OneDrive\Desktop\College Project\4. Churn Prediction\churn-data.csv', encoding='latin1')
df_data.head(10)
df_data.info()
df_data.loc[df_data.Churn=='No','Churn'] = 0
df_data.loc[df_data.Churn=='Yes','Churn'] = 1
df_plot = df_data.groupby('gender').Churn.size().reset_index()
这工作。但如果我把最后一行改成
df_plot = df_data.groupby('gender').Churn.mean().reset_index()
它给出的是 "没有数字类型可聚合"。
DataError Traceback (most recent call last)
<ipython-input-1-4875501f5fb5> in <module>
26 df_data.loc[df_data.Churn=='No','Churn'] = 0
27 df_data.loc[df_data.Churn=='Yes','Churn'] = 1
---> 28 f_plot = df_data.groupby('gender').Churn.mean().reset_index()
~\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\groupby\groupby.py in mean(self, *args, **kwargs)
1222 """
1223 nv.validate_groupby_func("mean", args, kwargs, ["numeric_only"])
-> 1224 return self._cython_agg_general(
1225 "mean", alt=lambda x, axis: Series(x).mean(**kwargs), **kwargs
1226 )
~\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\groupby\groupby.py in _cython_agg_general(self, how, alt, numeric_only, min_count)
905
906 if len(output) == 0:
--> 907 raise DataError("No numeric types to aggregate")
908
909 return self._wrap_aggregated_output(output)
DataError: No numeric types to aggregate
DataError Traceback (most recent call last)
<ipython-input-1-4875501f5fb5> in <module>
26 df_data.loc[df_data.Churn=='No','Churn'] = 0
27 df_data.loc[df_data.Churn=='Yes','Churn'] = 1
---> 28 f_plot = df_data.groupby('gender').Churn.mean().reset_index()
~\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\groupby\groupby.py in mean(self, *args, **kwargs)
1222 """
1223 nv.validate_groupby_func("mean", args, kwargs, ["numeric_only"])
-> 1224 return self._cython_agg_general(
1225 "mean", alt=lambda x, axis: Series(x).mean(**kwargs), **kwargs
1226 )
~\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\groupby\groupby.py in _cython_agg_general(self, how, alt, numeric_only, min_count)
905
906 if len(output) == 0:
--> 907 raise DataError("No numeric types to aggregate")
908
909 return self._wrap_aggregated_output(output)
DataError: No numeric types to aggregate
DataError Traceback (most recent call last)
<ipython-input-1-4875501f5fb5> in <module>
26 df_data.loc[df_data.Churn=='No','Churn'] = 0
27 df_data.loc[df_data.Churn=='Yes','Churn'] = 1
---> 28 f_plot = df_data.groupby('gender').Churn.mean().reset_index()
~\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\groupby\groupby.py in mean(self, *args, **kwargs)
1222 """
1223 nv.validate_groupby_func("mean", args, kwargs, ["numeric_only"])
-> 1224 return self._cython_agg_general(
1225 "mean", alt=lambda x, axis: Series(x).mean(**kwargs), **kwargs
1226 )
~\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\groupby\groupby.py in _cython_agg_general(self, how, alt, numeric_only, min_count)
905
906 if len(output) == 0:
--> 907 raise DataError("No numeric types to aggregate")
908
909 return self._wrap_aggregated_output(output)
DataError: No numeric types to aggregate
电信数据集
确保你在Churn列中没有任何其他值。使用df_data['Churn'].value_counts().如果你能分享你的数据的前10行,这将是真棒。