我有一个嵌套分类x轴的Bokeh图。这是一个玩具问题的代码。我的真实用例自然是一个更大、更复杂的数据集。
import pandas as pd
from bokeh.io import output_notebook, show, reset_output
from bokeh.models import Band, Span, FactorRange, ColumnDataSource
from bokeh.plotting import figure
reset_output()
output_notebook()
data = {'fruit': ['Apples', 'Pears'],
'2015': [2, 1],
'2016': [5, 3]}
tidy_df = (pd.DataFrame(data)
.melt(id_vars=["fruit"], var_name="year")
.assign(fruit_year=lambda df: list(zip(df['fruit'], df['year'])))
.set_index('fruit_year'))
display(tidy_df)
p = figure(x_range=FactorRange(factors=tidy_df.index.unique()),
height=300,
width=300)
cds = ColumnDataSource(tidy_df)
p.circle(x='fruit_year',
y='value',
size=20,
source=cds,
line_color=None,
)
# this does not show anything or cause an error
p.line(
x=[("Apples", 2015), ("Apples", 2016)],
y=[3.5, 3.5],
color="red",
line_width=2
)
# this works, but does not scale to problems where location can't be manually specified
# also, the line does not line up with the data?
p.line(
x=[4, 5],
y=[2, 2],
color="red",
line_width=2
)
show(p)
输出。
这条线没有出现在图上,也没有出错。
p.line(
x=[("Apples", 2015), ("Apples", 2016)],
y=[3.5, 3.5],
color="red",
line_width=2
)
我如何指定 x
来让这一行显示出来?我是否可以指定一个任意的子级别,即只针对 ("Apples", 2015)
?
非常类似的问题解决方案 此处. (我提出了这个问题,并在这个答案的基础上建立起来。)
一般的概念是基于一个人的初始数据集创建数据框,然后建立多个 ColumnDataSource
的数据帧。
下面是完整的代码。
p = figure(x_range=FactorRange(factors=tidy_df.index.unique()),
plot_height=400,
plot_width=400,
tooltips=[('Fruit', '@fruit'), # first string is user-defined; second string must refer to a column
('Year', '@year'),
('Value', '@value')])
cds = ColumnDataSource(tidy_df)
index_cmap = factor_cmap("fruit",
Spectral5[:2],
factors=sorted(tidy_df["fruit"].unique())) # this is a reference back to the dataframe
p.circle(x='fruit_year',
y='value',
size=20,
source=cds,
fill_color=index_cmap,
line_color=None,
)
# add global median
# how to add for each fruit?
median = Span(location=tidy_df["value"].median(), # median value for Apples
#dimension='height',
line_color='orange',
line_dash='dashed',
line_width=1.0
)
p.add_layout(median)
for fruit, stddev in list(zip(tidy_df["fruit"].unique(), tidy_df.groupby("fruit").std().values.flatten())):
b_df = tidy_df[tidy_df['fruit'] == fruit]\
.drop(columns=['fruit', 'year'])\
.assign(lower=lambda df: df['value'].median() - stddev,
upper=lambda df: df['value'].median() + stddev)\
.assign(median=lambda df: df["value"].median())\
.drop(columns='value')
display(b_df)
# create another cds
cds2 = ColumnDataSource(b_df)
p.add_layout(
Band(
base='fruit_year',
lower='lower',
upper='upper',
source=cds2)
)
p.line(x="fruit_year",
y="median",
source=cds2,
color="red",
line_width=2,
line_dash='dashed',
)
show(p)