导入数据包

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
#不发出警告
from bokeh.io import output_notebook
output_notebook()
#导入绘图模块
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
#导入绘图模块、展示模块
#导入ColumsDataSource模块

单系列柱状图—vbar

# 1、单系列柱状图
# vbar     竖向柱状图
p = figure(plot_width = 600, plot_height = 400)
p.vbar(x = [1, 2, 3],    #设置柱状图的位置，这里数据可以是一个Serice的index
       width = 0.4,    #设置柱状图的宽度
       bottom = [0, 0, 0],    #设置柱状图底部的位置
       top = [1.2, 2.5, 3.7],      #设置柱状图顶部的位置，这里数据可以是一个Serice的values
       line_width = 2,   #设置柱状图边框的粗细
       line_color = 'black',    #设置柱状图边框的颜色
       line_alpha = 0.6,      #设置柱状图边框线的透明度
       line_dash = [10, 5],    #设置柱状图边框为虚线显示
       fill_color = 'red',    #设置柱状图内部填充颜色
       fill_alpha = 0.7,      #设置柱状图内部颜色的透明度
)
show(p)

图片.png

单系列柱状图—hbar

# 1、单系列柱状图
# hbar
df = pd.DataFrame({'value' : np.random.randn(100)*10,
                  'color' : np.random.choice(['red', 'yellow', 'blue'], 100)})
p = figure(plot_width = 600, plot_height = 400)
p.hbar(y = df.index,    #设置柱状图的位置
       height = 0.5,        #设置柱状图的宽度
       left = 0,     #设置柱状图左边最小值
       right = df['value'],    #设置柱状图右边的最大值
       color = df.color    #社渚柱状图的颜色
      )
show(p)

图片.png

单系列柱状图—分类设置标签（ColumnDataSource）

from bokeh.palettes import Spectral6
from bokeh.transform import factor_cmap
#导入相关模块
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 6, 4, 2, 4, 6 ]
source = ColumnDataSource(data = dict(fruits = fruits, counts = counts))
colors = ['salmon', 'olive', 'darkred', 'goldenrod', 'skyblue', 'orange']
#创建一个包含标签的data，对象类型为ColumnDatasource
p = figure(x_range = fruits, y_range = (0, 9), plot_height = 350, title = 'Fruit Counts', tools = '')
p.vbar(x = 'fruits', top = 'counts', source = source,   #加载数据
      width = 0.8, alpha = 0.8,
       color = factor_cmap('fruits', palette=Spectral6, factors=fruits),   #设置颜色
       legend = 'fruits'
      )
# 绘制柱状图，横轴直接显示标签
# factor_cmap(field_name, palette, factors, start=0, end=None, nan_color='gray')：颜色转换模块，生成一个颜色转换对象
# field_name：分类名称
# palette：调色盘
# factors：用于在调色盘中分颜色的参数
# 参考文档：http://bokeh.pydata.org/en/latest/docs/reference/transform.html
p.xgrid.grid_line_color = None
p.legend.orientation = 'horizontal'
p.legend.location  = 'top_center'
# 其他参数设置
show(p)

图片.png

多系列柱状图

# 多系列柱状图
# vbar
from bokeh.transform import dodge
from bokeh.core.properties import value
#导入dodge、value模块
df = pd.DataFrame({'2015':[2, 1, 4, 3, 2, 4], '2016':[5, 3, 3, 2, 4, 6], '2017' : [3, 2, 4, 4, 5, 3]}, 
                  index = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries'])
#创建数据
fruits = df.index.tolist()   #横坐标名
years = df.columns.tolist()    #纵坐标名
data = {'index' : fruits}
for year in years:
    data[year] = df[year].tolist()
#生成数据，数据格式为dict
source = ColumnDataSource(data = data)
#将数据转化为ColumnDataSource对象
p = figure(x_range = fruits, y_range = (0, 10), plot_height = 350, title = 'Fruit Counts by Year')
p.vbar(x = dodge('index', -0.25, range = p.x_range), top = '2015', width = 0.2, source = source, color = 'red', legend = value('2015'))
p.vbar(x = dodge('index', 0, range = p.x_range), top = '2016', width = 0.2, source = source, color = 'yellow', legend = value('2016'))
p.vbar(x = dodge('index', 0.25, range = p.x_range), top = '2017', width = 0.2, source = source, color = 'blue', legend = value('2017'))
show(p)

图片.png
Bokeh所能识别的数据结构，是Python自身的数据结构，例如list和dict等数据结构，不过新版本得也开始兼容DataFarme和Serice数据类型

堆叠图—竖图

from bokeh.core.properties import value
#导入value模块
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ['2015', '2016', '2017']
colors = ["#c9d9d3", "#718dbf", "#e84d60"]
data = {'fruits' : fruits,
        '2015'   : [2, 1, 4, 3, 2, 4],
        '2016'   : [5, 3, 4, 2, 4, 6],
        '2017'   : [3, 2, 4, 4, 5, 3]}
source = ColumnDataSource(data)
#创建数据
p = figure(x_range = fruits, plot_height = 350, title = 'Fruit Counts by Year', tools = '')
renderers = p.vbar_stack(years,   #设置堆叠值，这里source包含了不同年份的值，years变量用于识别不同的对叠层
                         x = 'fruits',    #设置x轴坐标
                         source = source,
                         width = 0.9,
                         color = colors,
                         legend = [value(x) for x in years],
                         name = years                       
                        )  
# 绘制堆叠图
# 注意第一个参数需要放years
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.legend.location = 'top_left'
p.legend.orientation = 'horizontal'
设置其他参数
show(p)

图片.png

堆叠图—横图

堆叠图
from bokeh.palettes import GnBu3, OrRd3
# 导入颜色模块
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ["2015", "2016", "2017"]
exports = {'fruits' : fruits,
           '2015'   : [2, 1, 4, 3, 2, 4],
           '2016'   : [5, 3, 4, 2, 4, 6],
           '2017'   : [3, 2, 4, 4, 5, 3]}
imports = {'fruits' : fruits,
           '2015'   : [-1, 0, -1, -3, -2, -1],
           '2016'   : [-2, -1, -3, -1, -2, -2],
           '2017'   : [-1, -2, -1, 0, -2, -2]}
p = figure(y_range=fruits, plot_height=350, x_range=(-16, 16), title="Fruit import/export, by year")
p.hbar_stack(years, y='fruits', height=0.9, color=GnBu3, source=ColumnDataSource(exports),
             legend=["%s exports" % x for x in years])      # 绘制出口数据堆叠图
p.hbar_stack(years, y='fruits', height=0.9, color=OrRd3, source=ColumnDataSource(imports),
             legend=["%s imports" % x for x in years])      # 绘制进口数据堆叠图，这里值为负值
p.y_range.range_padding = 0.2     # 调整边界间隔
p.ygrid.grid_line_color = None   
p.legend.location = "top_left"
p.axis.minor_tick_line_color = None
p.outline_line_color = None
# 设置其他参数
show(p)

图片.png

直方图

# 4、直方图
# np.histogram + figure.quad()
# 不需要构建ColumnDataSource对象
df = pd.DataFrame({'value': np.random.randn(1000)*100})
df.index.name = 'index'
print(df.head())
# 创建数据
hist, edges = np.histogram(df['value'],bins=20)
print(hist[:5])
print(edges)
#hist代表不同箱子中，数的个数
#edges代表将数据分成不同的箱子以后，每一个箱子的位置坐标
# 将数据解析成直方图统计格式
# 高阶函数np.histogram(a, bins=10, range=None, weights=None, density=None) 
# a：数据
# bins：箱数
# range：最大最小值的范围，如果不设定则为(a.min(), a.max())
# weights：权重
# density：为True则返回“频率”，为False则返回“计数”
# 返回值1 - hist：每个箱子的统计值（top）
# 返回值2 - edges：每个箱子的位置坐标，这里n个bins将会有n+1个edges
p = figure(title="HIST", tools="save",background_fill_color="#E8DDCB")
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],   # 分别代表每个柱子的四边值
        fill_color="#036564", line_color="#033649")
# figure.quad绘制直方图
show(p)

图片.png

Python交互图标可视化：Bokeh

柱状图、堆叠图、直方图

导入数据包

单系列柱状图—vbar

单系列柱状图—hbar

单系列柱状图—分类设置标签（ColumnDataSource）

多系列柱状图

堆叠图—竖图

堆叠图—横图

直方图