for feature in features_with_na:
data = train.copy()
# make a variable that indicates 1 if the observation was missing or 0 if not missing
data[feature] = np.where(data[feature].isnull(), 1, 0)
# calculate median sales price where the information is missing or present
data.groupby(feature)['saleprice'].median().plot.bar()
plt.title(feature)
plt.show()
我运行这段代码是为了查看pandas数据框架中各列之间的关系,以及销售价格(恰好是其中一列)。我想寻找销售价格和列中缺失值之间的某种形式的关系,通过将我的缺失值转换为1,否则0,如果它不是一个缺失值。然而,我得到了很多的图,我必须滚动一个单独的单元格,这使得它相当困难。有没有一个代码可以让我防止滚动一个特定的单元格,这样我就可以一次看到所有的图?
你可以在你的代码中做一些修改,得到所需的情节。
比如说'Price' = 'saleprice'.
feature_data_with_na_and_Price = feature_with_na.copy()
feature_data_with_na_and_Price.append('Price')
data = train[feature_data_with_na_and_Price]
features = [] # store all the features with missing value
null_medians = [] # will store all the price where there was missing values
not_null_medians = [] # will store all the price where there was no missing values
for feature in feature_with_na:
# make a variable that indicates 1 if the observation was missing or 0 if not missing
data[feature] = np.where(data[feature].isnull(), 1, 0)
# the median price for null values
null_median = data[data[feature] == 1]['Price'].median()
# the median price for not null values
not_null_median = data[data[feature] == 0]['Price'].median()
# append the median price where there is missing values in null_medians
null_medians.append(null_median)
# append the median price where there is no missing values in not_null_medians
not_null_medians.append(not_null_median)
# append the feature
features.append(feature)
# create two different dataframes for missing values and not missing values
df1 = pd.DataFrame({'feature': features, 'Price': null_medians})
df2 = pd.DataFrame({'feature': features, 'Price': not_null_medians})
# Add hue
df1['hue']=1 # 1 means missing values
df2['hue']=0 # 0 means no missing values
# concatenate the two dataframes
df = pd.concat([df1, df2])
import seaborn as sns
plt.figure(figsize=(20,10))
sns.barplot(x='feature', y = 'Price', data = df, hue='hue')
plt.xticks(rotation=90, ha='right')