我有以下数据:
prop_tenure prop_12m prop_6m
0.00 0.00 0.00
0.00 0.00 0.00
0.06 0.06 0.10
0.38 0.38 0.25
0.61 0.61 0.66
0.01 0.01 0.02
0.10 0.10 0.12
0.04 0.04 0.04
0.22 0.22 0.22
我正在做一个配对图,如下所示:
sns.pairplot(data)
plt.show()
但是我想显示变量之间的相关系数,如果可能的话,显示每个变量的偏度和峰度。 你如何在seaborn中做到这一点?
据我所知,没有开箱即用的功能可以执行此操作,您必须创建自己的:
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
def corrfunc(x, y, ax=None, **kws):
"""Plot the correlation coefficient in the top left hand corner of a plot."""
r, _ = pearsonr(x, y)
ax = ax or plt.gca()
ax.annotate(f'ρ = {r:.2f}', xy=(.1, .9), xycoords=ax.transAxes)
使用您的输入的示例:
import seaborn as sns; sns.set(style='white')
import pandas as pd
data = {'prop_tenure': [0.0, 0.0, 0.06, 0.38, 0.61, 0.01, 0.10, 0.04, 0.22],
'prop_12m': [0.0, 0.0, 0.06, 0.38, 0.61, 0.01, 0.10, 0.04, 0.22],
'prop_6m': [0.0, 0.0, 0.10, 0.25, 0.66, 0.02, 0.12, 0.04, 0.22]}
df = pd.DataFrame(data)
g = sns.pairplot(df)
g.map_lower(corrfunc)
plt.show()
顺便提一下,对于最新版本的seaborn(
>0.11.0
),上面的答案不再有效。但您需要添加一个hue=None
才能使其再次工作。
def corrfunc(x, y, hue=None, ax=None, **kws):
"""Plot the correlation coefficient in the top left hand corner of a plot."""
r, _ = pearsonr(x, y)
ax = ax or plt.gca()
ax.annotate(f'ρ = {r:.2f}', xy=(.1, .9), xycoords=ax.transAxes)
参考此问题https://github.com/mwaskom/seaborn/issues/2307#issuecomment-702980853
如果您正在寻找包含每个色调级别的相关值,我修改了上面的代码。觉得有用就点个赞吧
def corrfunc(x, y, hue=None, ax=None, **kws):
'''Plot the correlation coefficient in the bottom left hand corner of a plot.'''
if hue is not None:
hue_order = pd.unique(g.hue_vals)
color_dict = dict(zip(hue_order, sns.color_palette('tab10', hue_order.shape[0]) ))
groups = x.groupby(g.hue_vals)
r_values = []
for name, group in groups:
mask = (~group.isnull()) & (~y[group.index].isnull())
if mask.sum() > 0:
r, _ = pearsonr(group[mask], y[group.index][mask])
r_values.append((name, r))
text = '\n'.join([f'{name}: ρ = {r:.2f}' for name, r in r_values])
fontcolors = [color_dict[name] for name in hue_order]
else:
mask = (~x.isnull()) & (~y.isnull())
if mask.sum() > 0:
r, _ = pearsonr(x[mask], y[mask])
text = f'ρ = {r:.2f}'
fontcolors = 'grey'
# print(fontcolors)
else:
text = ''
fontcolors = 'grey'
ax = ax or plt.gca()
if hue is not None:
for i, name in enumerate(hue_order):
text_i = [f'{name}: ρ = {r:.2f}' for n, r in r_values if n==name][0]
# print(text_i)
color_i = fontcolors[i]
ax.annotate(text_i, xy=(.02, .98-i*.05), xycoords='axes fraction', ha='left', va='top',
color=color_i, fontsize=10)
else:
ax.annotate(text, xy=(.02, .98), xycoords='axes fraction', ha='left', va='top',
color=fontcolors, fontsize=10)
penguins = sns.load_dataset('penguins')
g = sns.pairplot(penguins, hue='species',diag_kind='hist',kind='reg', plot_kws={'line_kws':{'color':'red'}})
g.map_lower(corrfunc, hue='species')