官方地址

  1. https://github.com/pyecharts/pyecharts

文档地址

  1. https://pyecharts.org/#/

实例地址

https://gallery.pyecharts.org/#/

安装包

pip install pyecharts
pip install pandas
from pyecharts import options as opts
from pyecharts.charts import Pie
import pandas as pd
from pyecharts.faker import Faker
# 生成数据
df = pd.DataFrame({"月份":["一月份","二月份","三月份","四月份","五月份"],
                   "销售额":[2580,1300,500,900,1300]
                  })
print(df)

x_data = df["月份"].tolist()
print(x_data)

y_data = df["销售额"].tolist()
print(y_data)

c = (
    Pie()
    .add("", [list(z) for z in zip(x_data, y_data)])   # zip函数两个部分组合在一起list(zip(x,y))-----> [(x,y)]
    .set_global_opts(title_opts=opts.TitleOpts(title="饼图"))  # 标题
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))  # 数据标签设置
)

# c.render_notebook()

c.render()

image.png

设置左右边距和颜色

# 生成数据
df = pd.DataFrame({"月份":["一月份","二月份","三月份","四月份","五月份"],
                   "销售额":[2580,1300,500,900,1300]
                  })
print(df)

x_data = df["月份"].tolist()
print(x_data)

y_data = df["销售额"].tolist()
print(y_data)

c = (
    Pie()
    .add( [list(z) for z in zip(x_data, y_data)], center=["30%","50%"]) # 设置左边 上面百分比距离
         .set_colors(["blue","green","purple","red","pink"])
    .set_global_opts(title_opts=opts.TitleOpts(title="饼图"))  # 标题
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))  # 数据标签设置
)

# c.render_notebook()

c.render()

image.png改变图例位置

数据生成

上面的图例是水平方向排列的,而且个数比较少。如果我们的图例比较多,需要改成竖直方向,同时实现翻页滚动功能。

在这里我们使用的是 pyecharts 中自带的数据:

1、Faker.choose() :是用来生成数据标签,有3种不同的取值情况

Faker.choose()

2、Faker.values() 是用来生成具体的数据,随机生成

Faker.values()

代码


c = (
    Pie()
    .add("",[list(z) for z in zip(
        Faker.choose() + Faker.choose() + Faker.choose() +  Faker.choose(),
        Faker.values() + Faker.values()  + Faker.values() + Faker.values()
        )], center=["30%","50%"]) # 设置左边 上面百分比距离
         .set_colors(["blue","green","purple","red","pink"])
    .set_global_opts(title_opts=opts.TitleOpts(title="饼图"))  # 标题
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))  # 数据标签设置
)

c.render_notebook()

#c.render()

image.png
环状饼图主要是通过 add 方法中的 radius 参数来实现的。实现过程如下:

x_data = ["小明", "小红", "张三", "李四", "王五"]
y_data = [335, 310, 234, 135, 548]

c = (
    Pie(init_opts=opts.InitOpts(width="1600px", height="1000px"))   # 图形的大小设置
    .add(
        series_name="访问来源",
        data_pair=[list(z) for z in zip(x_data, y_data)],
        radius=["15%", "50%"],   # 饼图内圈和外圈的大小比例
        center=["30%", "40%"],   # 饼图的位置:左边距和上边距
        label_opts=opts.LabelOpts(is_show=True),   # 显示数据和百分比  
    )
    .set_global_opts(legend_opts=opts.LegendOpts(pos_left="left", orient="vertical"))   # 图例在左边和垂直显示
    .set_series_opts(
        tooltip_opts=opts.TooltipOpts(
            trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)"
        ),
    )
)
c.render_notebook()

image.png
内嵌饼图

import pyecharts.options as opts
from pyecharts.charts import Pie
from pyecharts.globals import ThemeType

# 内部饼图
inner_x_data = ["直达", "营销广告", "搜索引擎","产品"]
inner_y_data = [335, 679, 548, 283]
inner_data_pair = [list(z) for z in zip(inner_x_data, inner_y_data)]
# [['直达', 335], ['营销广告', 679], ['搜索引擎', 1548], [‘产品’, 283]]

# 外部环形(嵌套)
outer_x_data = ["搜索引擎", "邮件营销", "直达", "营销广告", "联盟广告", "视频广告", "产品", "百度", "谷歌","邮件营销", "联盟广告"]
outer_y_data = [335, 135, 147, 102, 220, 310, 234, 135, 648, 251]
outer_data_pair = [list(z) for z in zip(outer_x_data, outer_y_data)]

c = (
     # 初始化
    Pie(init_opts=opts.InitOpts(
        width="900px",  # 设置图形大小
        height="800px",
        theme=ThemeType.SHINE))  # 选择主题

    # 内部饼图
    .add(
        series_name="版本3.2.1",  # 图形名称
        center=["50%", "35%"],  # 饼图位置
        data_pair=inner_data_pair,  # 系列数据项,格式为 [(key1, value1), (key2, value2)]
        radius=["25%", "40%"],  # 饼图半径 数组的第一项是内半径,第二项是外半径
        label_opts=opts.LabelOpts(position='inner'), # 标签设置在内部
    )

    # 外部嵌套环形图
    .add(
        series_name="版本3.2.9",  # 系列名称
        center=["50%", "35%"],  # 饼图位置
        radius=["40%", "60%"],  # 饼图半径 数组的第一项是内半径,第二项是外半径
        data_pair=outer_data_pair, # 系列数据项,格式为 [(key1, value1), (key2, value2)]

        # 标签配置项 
        label_opts=opts.LabelOpts(
            position="outside",
            formatter="{a|{a}}{abg|}\n{hr|}\n {b|{b}: }{c}  {per|{d}%}  ",
            background_color="#eee",
            border_color="#aaa",
            border_width=1,
            border_radius=4,
            rich={
                "a": {"color": "#999",
                      "lineHeight": 22,
                      "align": "center"},

                "abg": {
                    "backgroundColor": "#e3e3e3",
                    "width": "100%",
                    "align": "right",
                    "height": 22,
                    "borderRadius": [4, 4, 0, 0],
                },


                "hr": {
                    "borderColor": "#aaa",
                    "width": "100%",
                    "borderWidth": 0.5,
                    "height": 0,
                },

                "b": {"fontSize": 16, "lineHeight": 33},

                "per": {
                    "color": "#eee",
                    "backgroundColor": "#334455",
                    "padding": [2, 4],
                    "borderRadius": 2,
                },
            },
        ),
    )

    # 全局配置项
    .set_global_opts(
        xaxis_opts = opts.AxisOpts(is_show = False),   #隐藏X轴刻度
        yaxis_opts = opts.AxisOpts(is_show = False),    #隐藏Y轴刻度
        legend_opts = opts.LegendOpts(is_show = True),  #隐藏图例
        title_opts = opts.TitleOpts(title = None),    #隐藏标题
                    )

    # 系统配置项
    .set_series_opts(
        tooltip_opts=opts.TooltipOpts(
            trigger="item",
            formatter="{a} <br/>{b}: {c} ({d}%)"
        ),
        label_opts=opts.LabelOpts(is_show=True)  # 隐藏每个触角标签
    )
)

c.render_notebook()

image.png
多饼图

c = (
    Pie()
    .add(
        "",
        [list(z) for z in zip(["剧情", "其他"], [30, 70])],
        center=["20%", "30%"],  # 位置
        radius=[60, 80],   # 每个饼图内外圈的大小
    )
    .add(
        "",
        [list(z) for z in zip(["奇幻", "其他"], [40, 60])],
        center=["55%", "30%"],
        radius=[60, 80],
    )
    .add(
        "",
        [list(z) for z in zip(["爱情", "其他"], [24, 76])],
        center=["20%", "70%"],
        radius=[60, 80],
    )
    .add(
        "",
        [list(z) for z in zip(["惊悚", "其他"], [11, 89])],
        center=["55%", "70%"],
        radius=[60, 80],
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="Pie-多饼图基本示例"),
        legend_opts=opts.LegendOpts(
            type_="scroll", pos_top="20%", pos_left="80%", orient="vertical"
        ),
    )
)

c.render_notebook()

image.png
玫瑰图

v = Faker.choose()
c = (
    Pie()
    .add(
        "",
        [list(z) for z in zip(v, Faker.values())],   # 两个值
        radius=["30%", "60%"],  # 大小
        center=["25%", "50%"],  # 位置
        rosetype="radius",   
        label_opts=opts.LabelOpts(is_show=False),  # 不在图形上显示数据
    )
    .add(
        "",
        [list(z) for z in zip(v, Faker.values())],
        radius=["30%", "60%"],
        center=["75%", "50%"],
        rosetype="area",
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="Pie-玫瑰图示例"))
)

c.render_notebook()

image.png

案例 淘宝2020年12月12日 晚12时 圣诞节 数据

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

christmas_data = pd.read_excel('./christmas.xls')
# describe()展示一些基本信息
christmas_data.describe()

company_sorted_data = christmas_data[['title','url','nickname','shopname','shopType','originalPrice','discountPrice','coupon','startingPrice','information','favorablePrice','location','daySales','dsr','category','leaf']]
company_sorted_data.head(2)

#数据类型转化
company_sorted_data['originalPrice']=company_sorted_data.originalPrice.astype('float64')
company_sorted_data.head(2)

#从location列中取出省份和城市,然后删除location列
company_sorted_data['province']=company_sorted_data.location.apply(lambda x:x.split()[0])
company_sorted_data['city']=company_sorted_data.location.apply(lambda x:x.split()[0] if len(x)<4 else x.split()[1])
company_sorted_data.drop('location',axis=1, inplace=True)

company_sorted_data.head(2)

# 数据获取处理
company_sorted_data = company_sorted_data[['title','nickname','shopType','discountPrice','startingPrice','information','daySales','dsr','category','leaf','province','city']]
company_sorted_data

数据挖掘与分析

pip install jieba
import jieba   
import pandas as pd
import numpy as np
add_words  = pd.read_excel('./christmas.xls')
add_words_list = add_words['title'].tolist() 
title_s=[]
for w in add_words_list:
    title_cut=jieba.lcut(w)
    title_s.append(title_cut)
#去重,对title_clean中的每个list的元素进行去重,即每个标题被分割后的词语唯一,如【麻辣小鱼干香辣小鱼干】->【麻辣,香辣,小鱼干】
#去重后的title_clean_dist为二维list,即[[……],[……],……]
title_clean_dist = []  
for line in title_s:   
   line_dist = []
   for word in line:
      if word not in line_dist:
         line_dist.append(word)
   title_clean_dist.append(line_dist)
# 将 title_clean_dist 转化为一维list
allwords_clean_dist = []
for line in title_clean_dist:
   for word in line:
      allwords_clean_dist.append(word)
# 把列表 allwords_clean_dist 转为数据框: 
df_allwords_clean_dist = pd.DataFrame({'allwords': allwords_clean_dist})
# 对过滤_去重的词语 进行分类汇总:
import numpy as np
word_count = df_allwords_clean_dist.allwords.value_counts().reset_index()    
word_count.columns = ['word','count']      #添加列名 
train_data = np.array(word_count) #先将数据框转换为数组
train_data_list = train_data.tolist()  #其次转换为列表
word_count = np.array(train_data_list) #以数组形式打出来方便看
from pyecharts import options as opts
from pyecharts.charts import WordCloud


c = (
    WordCloud()
    .add("", word_count, word_size_range=[12, 55])
    .set_global_opts(title_opts=opts.TitleOpts(title="圣诞节关键词"))
    .render("christmas_key.html")
)

image.png

效率统计

word_counts = df_allwords_clean_dist.allwords.value_counts().reset_index()    
word_counts.columns = ['word','count']      #添加列名 

ws_sum=[]
for w in word_counts.word:
    i=0
    s_list=[]
    for t in title_clean_dist:
        if w in t:
            s_list.append(add_words.daySales[i])
        i+=1
    ws_sum.append(sum(s_list))

df_sum=pd.DataFrame({'ws_sum':ws_sum})
df_word_sum=pd.concat([word_counts,df_sum],axis=1,ignore_index=True)
df_word_sum.columns=['word','count','ws_sum']    #词语,出现次数,包含该词语的商品销量

df_word_sum.sort_values('ws_sum',inplace=False,ascending=False)    #升序排列
df_ws=df_word_sum.tail(40)

index=np.arange(df_ws.word.size)
index
df_word_sum_data = df_word_sum.head(15)
from pyecharts import options as opts
from pyecharts.charts import Bar
from pyecharts.commons.utils import JsCode
from pyecharts.faker import Faker

c = (
    Bar()
    .add_xaxis(df_word_sum_data.word.tolist())
    .add_yaxis("关键词效率", df_word_sum_data.ws_sum.tolist(), category_gap="60%")
    .set_series_opts(
        itemstyle_opts={
            "normal": {
                "color": JsCode(
                    """new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
                offset: 0,
                color: 'rgba(0, 244, 255, 1)'
            }, {
                offset: 1,
                color: 'rgba(0, 77, 167, 1)'
            }], false)"""
                ),
                "barBorderRadius": [30, 30, 30, 30],
                "shadowColor": "rgb(0, 160, 221)",
            }
        }
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="圣诞节礼物柱子-渐变圆柱"))
    .render("christmas_bar_border_radius.html")
)

image.png
留下几个练习的

店名分析

价格区间分析

价格和地区关系