官方地址
https://github.com/pyecharts/pyecharts
文档地址
https://pyecharts.org/#/
实例地址
https://gallery.pyecharts.org/#/
安装包
pip install pyecharts
pip install pandas
from pyecharts import options as opts
from pyecharts.charts import Pie
import pandas as pd
from pyecharts.faker import Faker
# 生成数据
df = pd.DataFrame({"月份":["一月份","二月份","三月份","四月份","五月份"],
"销售额":[2580,1300,500,900,1300]
})
print(df)
x_data = df["月份"].tolist()
print(x_data)
y_data = df["销售额"].tolist()
print(y_data)
c = (
Pie()
.add("", [list(z) for z in zip(x_data, y_data)]) # zip函数两个部分组合在一起list(zip(x,y))-----> [(x,y)]
.set_global_opts(title_opts=opts.TitleOpts(title="饼图")) # 标题
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) # 数据标签设置
)
# c.render_notebook()
c.render()
设置左右边距和颜色
# 生成数据
df = pd.DataFrame({"月份":["一月份","二月份","三月份","四月份","五月份"],
"销售额":[2580,1300,500,900,1300]
})
print(df)
x_data = df["月份"].tolist()
print(x_data)
y_data = df["销售额"].tolist()
print(y_data)
c = (
Pie()
.add( [list(z) for z in zip(x_data, y_data)], center=["30%","50%"]) # 设置左边 上面百分比距离
.set_colors(["blue","green","purple","red","pink"])
.set_global_opts(title_opts=opts.TitleOpts(title="饼图")) # 标题
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) # 数据标签设置
)
# c.render_notebook()
c.render()
改变图例位置
数据生成
上面的图例是水平方向排列的,而且个数比较少。如果我们的图例比较多,需要改成竖直方向,同时实现翻页滚动功能。
在这里我们使用的是 pyecharts 中自带的数据:
1、Faker.choose() :是用来生成数据标签,有3种不同的取值情况
Faker.choose()
2、Faker.values() 是用来生成具体的数据,随机生成
Faker.values()
代码
c = (
Pie()
.add("",[list(z) for z in zip(
Faker.choose() + Faker.choose() + Faker.choose() + Faker.choose(),
Faker.values() + Faker.values() + Faker.values() + Faker.values()
)], center=["30%","50%"]) # 设置左边 上面百分比距离
.set_colors(["blue","green","purple","red","pink"])
.set_global_opts(title_opts=opts.TitleOpts(title="饼图")) # 标题
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) # 数据标签设置
)
c.render_notebook()
#c.render()
环状饼图主要是通过 add 方法中的 radius 参数来实现的。实现过程如下:
x_data = ["小明", "小红", "张三", "李四", "王五"]
y_data = [335, 310, 234, 135, 548]
c = (
Pie(init_opts=opts.InitOpts(width="1600px", height="1000px")) # 图形的大小设置
.add(
series_name="访问来源",
data_pair=[list(z) for z in zip(x_data, y_data)],
radius=["15%", "50%"], # 饼图内圈和外圈的大小比例
center=["30%", "40%"], # 饼图的位置:左边距和上边距
label_opts=opts.LabelOpts(is_show=True), # 显示数据和百分比
)
.set_global_opts(legend_opts=opts.LegendOpts(pos_left="left", orient="vertical")) # 图例在左边和垂直显示
.set_series_opts(
tooltip_opts=opts.TooltipOpts(
trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)"
),
)
)
c.render_notebook()
内嵌饼图
import pyecharts.options as opts
from pyecharts.charts import Pie
from pyecharts.globals import ThemeType
# 内部饼图
inner_x_data = ["直达", "营销广告", "搜索引擎","产品"]
inner_y_data = [335, 679, 548, 283]
inner_data_pair = [list(z) for z in zip(inner_x_data, inner_y_data)]
# [['直达', 335], ['营销广告', 679], ['搜索引擎', 1548], [‘产品’, 283]]
# 外部环形(嵌套)
outer_x_data = ["搜索引擎", "邮件营销", "直达", "营销广告", "联盟广告", "视频广告", "产品", "百度", "谷歌","邮件营销", "联盟广告"]
outer_y_data = [335, 135, 147, 102, 220, 310, 234, 135, 648, 251]
outer_data_pair = [list(z) for z in zip(outer_x_data, outer_y_data)]
c = (
# 初始化
Pie(init_opts=opts.InitOpts(
width="900px", # 设置图形大小
height="800px",
theme=ThemeType.SHINE)) # 选择主题
# 内部饼图
.add(
series_name="版本3.2.1", # 图形名称
center=["50%", "35%"], # 饼图位置
data_pair=inner_data_pair, # 系列数据项,格式为 [(key1, value1), (key2, value2)]
radius=["25%", "40%"], # 饼图半径 数组的第一项是内半径,第二项是外半径
label_opts=opts.LabelOpts(position='inner'), # 标签设置在内部
)
# 外部嵌套环形图
.add(
series_name="版本3.2.9", # 系列名称
center=["50%", "35%"], # 饼图位置
radius=["40%", "60%"], # 饼图半径 数组的第一项是内半径,第二项是外半径
data_pair=outer_data_pair, # 系列数据项,格式为 [(key1, value1), (key2, value2)]
# 标签配置项
label_opts=opts.LabelOpts(
position="outside",
formatter="{a|{a}}{abg|}\n{hr|}\n {b|{b}: }{c} {per|{d}%} ",
background_color="#eee",
border_color="#aaa",
border_width=1,
border_radius=4,
rich={
"a": {"color": "#999",
"lineHeight": 22,
"align": "center"},
"abg": {
"backgroundColor": "#e3e3e3",
"width": "100%",
"align": "right",
"height": 22,
"borderRadius": [4, 4, 0, 0],
},
"hr": {
"borderColor": "#aaa",
"width": "100%",
"borderWidth": 0.5,
"height": 0,
},
"b": {"fontSize": 16, "lineHeight": 33},
"per": {
"color": "#eee",
"backgroundColor": "#334455",
"padding": [2, 4],
"borderRadius": 2,
},
},
),
)
# 全局配置项
.set_global_opts(
xaxis_opts = opts.AxisOpts(is_show = False), #隐藏X轴刻度
yaxis_opts = opts.AxisOpts(is_show = False), #隐藏Y轴刻度
legend_opts = opts.LegendOpts(is_show = True), #隐藏图例
title_opts = opts.TitleOpts(title = None), #隐藏标题
)
# 系统配置项
.set_series_opts(
tooltip_opts=opts.TooltipOpts(
trigger="item",
formatter="{a} <br/>{b}: {c} ({d}%)"
),
label_opts=opts.LabelOpts(is_show=True) # 隐藏每个触角标签
)
)
c.render_notebook()
多饼图
c = (
Pie()
.add(
"",
[list(z) for z in zip(["剧情", "其他"], [30, 70])],
center=["20%", "30%"], # 位置
radius=[60, 80], # 每个饼图内外圈的大小
)
.add(
"",
[list(z) for z in zip(["奇幻", "其他"], [40, 60])],
center=["55%", "30%"],
radius=[60, 80],
)
.add(
"",
[list(z) for z in zip(["爱情", "其他"], [24, 76])],
center=["20%", "70%"],
radius=[60, 80],
)
.add(
"",
[list(z) for z in zip(["惊悚", "其他"], [11, 89])],
center=["55%", "70%"],
radius=[60, 80],
)
.set_global_opts(
title_opts=opts.TitleOpts(title="Pie-多饼图基本示例"),
legend_opts=opts.LegendOpts(
type_="scroll", pos_top="20%", pos_left="80%", orient="vertical"
),
)
)
c.render_notebook()
玫瑰图
v = Faker.choose()
c = (
Pie()
.add(
"",
[list(z) for z in zip(v, Faker.values())], # 两个值
radius=["30%", "60%"], # 大小
center=["25%", "50%"], # 位置
rosetype="radius",
label_opts=opts.LabelOpts(is_show=False), # 不在图形上显示数据
)
.add(
"",
[list(z) for z in zip(v, Faker.values())],
radius=["30%", "60%"],
center=["75%", "50%"],
rosetype="area",
)
.set_global_opts(title_opts=opts.TitleOpts(title="Pie-玫瑰图示例"))
)
c.render_notebook()
案例 淘宝2020年12月12日 晚12时 圣诞节 数据
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
christmas_data = pd.read_excel('./christmas.xls')
# describe()展示一些基本信息
christmas_data.describe()
company_sorted_data = christmas_data[['title','url','nickname','shopname','shopType','originalPrice','discountPrice','coupon','startingPrice','information','favorablePrice','location','daySales','dsr','category','leaf']]
company_sorted_data.head(2)
#数据类型转化
company_sorted_data['originalPrice']=company_sorted_data.originalPrice.astype('float64')
company_sorted_data.head(2)
#从location列中取出省份和城市,然后删除location列
company_sorted_data['province']=company_sorted_data.location.apply(lambda x:x.split()[0])
company_sorted_data['city']=company_sorted_data.location.apply(lambda x:x.split()[0] if len(x)<4 else x.split()[1])
company_sorted_data.drop('location',axis=1, inplace=True)
company_sorted_data.head(2)
# 数据获取处理
company_sorted_data = company_sorted_data[['title','nickname','shopType','discountPrice','startingPrice','information','daySales','dsr','category','leaf','province','city']]
company_sorted_data
数据挖掘与分析
pip install jieba
import jieba
import pandas as pd
import numpy as np
add_words = pd.read_excel('./christmas.xls')
add_words_list = add_words['title'].tolist()
title_s=[]
for w in add_words_list:
title_cut=jieba.lcut(w)
title_s.append(title_cut)
#去重,对title_clean中的每个list的元素进行去重,即每个标题被分割后的词语唯一,如【麻辣小鱼干香辣小鱼干】->【麻辣,香辣,小鱼干】
#去重后的title_clean_dist为二维list,即[[……],[……],……]
title_clean_dist = []
for line in title_s:
line_dist = []
for word in line:
if word not in line_dist:
line_dist.append(word)
title_clean_dist.append(line_dist)
# 将 title_clean_dist 转化为一维list
allwords_clean_dist = []
for line in title_clean_dist:
for word in line:
allwords_clean_dist.append(word)
# 把列表 allwords_clean_dist 转为数据框:
df_allwords_clean_dist = pd.DataFrame({'allwords': allwords_clean_dist})
# 对过滤_去重的词语 进行分类汇总:
import numpy as np
word_count = df_allwords_clean_dist.allwords.value_counts().reset_index()
word_count.columns = ['word','count'] #添加列名
train_data = np.array(word_count) #先将数据框转换为数组
train_data_list = train_data.tolist() #其次转换为列表
word_count = np.array(train_data_list) #以数组形式打出来方便看
from pyecharts import options as opts
from pyecharts.charts import WordCloud
c = (
WordCloud()
.add("", word_count, word_size_range=[12, 55])
.set_global_opts(title_opts=opts.TitleOpts(title="圣诞节关键词"))
.render("christmas_key.html")
)
效率统计
word_counts = df_allwords_clean_dist.allwords.value_counts().reset_index()
word_counts.columns = ['word','count'] #添加列名
ws_sum=[]
for w in word_counts.word:
i=0
s_list=[]
for t in title_clean_dist:
if w in t:
s_list.append(add_words.daySales[i])
i+=1
ws_sum.append(sum(s_list))
df_sum=pd.DataFrame({'ws_sum':ws_sum})
df_word_sum=pd.concat([word_counts,df_sum],axis=1,ignore_index=True)
df_word_sum.columns=['word','count','ws_sum'] #词语,出现次数,包含该词语的商品销量
df_word_sum.sort_values('ws_sum',inplace=False,ascending=False) #升序排列
df_ws=df_word_sum.tail(40)
index=np.arange(df_ws.word.size)
index
df_word_sum_data = df_word_sum.head(15)
from pyecharts import options as opts
from pyecharts.charts import Bar
from pyecharts.commons.utils import JsCode
from pyecharts.faker import Faker
c = (
Bar()
.add_xaxis(df_word_sum_data.word.tolist())
.add_yaxis("关键词效率", df_word_sum_data.ws_sum.tolist(), category_gap="60%")
.set_series_opts(
itemstyle_opts={
"normal": {
"color": JsCode(
"""new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
offset: 0,
color: 'rgba(0, 244, 255, 1)'
}, {
offset: 1,
color: 'rgba(0, 77, 167, 1)'
}], false)"""
),
"barBorderRadius": [30, 30, 30, 30],
"shadowColor": "rgb(0, 160, 221)",
}
}
)
.set_global_opts(title_opts=opts.TitleOpts(title="圣诞节礼物柱子-渐变圆柱"))
.render("christmas_bar_border_radius.html")
)
留下几个练习的
店名分析
价格区间分析
价格和地区关系