— 建表

  1. CREATE TABLE o_retailes_trade_user (
  2. user_id INT ( 9 ),
  3. item_id INT ( 9 ),
  4. behavior_type INT ( 1 ),
  5. user_geohash VARCHAR ( 14 ),
  6. item_category INT ( 5 ),
  7. time VARCHAR ( 13 )
  8. ) SELECT
  9. *
  10. FROM
  11. o_retailes_trade_user

— 1)数据预处理

— 增加新列date_time、dates alter table o_retailes_trade_user add column date_time datetime null;— date_time数据来自time update o_retailes_trade_user set date_time =str_to_date(time,’%Y-%m-%d %H’);— H代表24进制,h-12进制 alter table o_retailes_trade_user add column dates char(10) null;— dates数据来自date_time update o_retailes_trade_user set dates=date(date_time);

— 2)重复值处理

— 插入表结构一样的表

create table temp_trade like o_retailes_trade_user; insert into temp_trade select distinct *from o_retailes_trade_user;

— 基础指标体系

/
需求:UV\PV\浏览深度 按日统计
PV:统计behavior_type=1的记录数,按日统计 分组
UV:distinct user_id的数量。 按日统计 分组
浏览深度:pv/uv
/

— pv count运算的时候,使用if语句如果behavior_type=1,返回本身,不等于返回null

SELECT dates, count( DISTINCT user_id ) as ‘uv’, count(if(behavior_type=1,user_id,null)) as ‘pv’ ,count(if(behavior_type=1,user_id,null))/ count( DISTINCT user_id ) as ‘pv/uv’ FROM temp_trade GROUP BY dates;
/
留存率(按日统计)
活跃用户留存()
/

— 自关联的方式

— 计算留存数

select user_id,dates from temp_trade group by user_id,dates; create view user_remain_view as select a.dates,count(distinct b.user_id) as user_count, count(distinct if(datediff(b.dates,a.dates)=1,b.user_id,null))as remain1, — 1日留存数计算 count(distinct if(datediff(b.dates,a.dates)=3,b.user_id,null))as remain3, — 3日留存数计算 count(distinct if(datediff(b.dates,a.dates)=4,b.user_id,null))as remain4, — 4日留存数计算 count(distinct if(datediff(b.dates,a.dates)=5,b.user_id,null))as remain5, — 5日留存数计算 count(distinct if(datediff(b.dates,a.dates)=6,b.user_id,null))as remain6, — 6日留存数计算 count(distinct if(datediff(b.dates,a.dates)=7,b.user_id,null))as remain7, — 7日留存数计算 count(distinct if(datediff(b.dates,a.dates)=15,b.user_id,null))as remain15, — 15日留存数计算 count(distinct if(datediff(b.dates,a.dates)=30,b.user_id,null))as remain30 — 30日留存数计算, count(distinct if(datediff(b.dates,a.dates)=2,b.user_id,null))as remain2 — 2日留存数计算 from (select user_id,dates from temp_trade group by user_id,dates)a left join (select user_id,dates from temp_trade group by user_id,dates)b on a.user_id=b.user_id where b.dates>=a.dates group by a.dates select * from user_remain_view

— 留存率计算 user_remain_view

— cast转换函数 decimal转换类型

select dates,user_count, concat(cast((remain1/user_count)100 as decimal(10,2)),’%’) ‘day_1’, concat(cast((remain2/user_count)100 as decimal(10,2)),’%’) ‘day_2’, concat(cast((remain3/user_count)100 as decimal(10,2)),’%’) ‘day_3’, concat(cast((remain4/user_count)100 as decimal(10,2)),’%’) ‘day_4’, concat(cast((remain5/user_count)100 as decimal(10,2)),’%’) ‘day_5’, concat(cast((remain6/user_count)100 as decimal(10,2)),’%’) ‘day_6’, concat(cast((remain7/user_count)100 as decimal(10,2)),’%’) ‘day_7’, concat(cast((remain15/user_count)100 as decimal(10,2)),’%’) ‘day_15’, concat(cast((remain30/user_count)*100 as decimal(10,2)),’%’) ‘day_30’ from user_remain_view

— RFM模型 本案例只有RF 没有M数据

— R指标分析:根据每个用户最近一次购买时间,给出相应的分数

— 每个用户的最近购买时间

drop view if EXISTS user_recency_view; create view user_recency_view as select user_id,max(dates) recency_buy_time from temp_trade where behavior_type=2 group by user_id; select * from user_recency_view

— 计算每个用户最近购买时间距离 指定时间2019-12-18

— 根据相差天数给予分数

create view r_level as select user_id,recency_buy_time,datediff(‘2019-12-18’,recency_buy_time), — <=2 5分 | <=4 4分 | <=6 3分 | <=8 2分 | 1分 (case when datediff(‘2019-12-18’,recency_buy_time)<=2 then 5 when datediff(‘2019-12-18’,recency_buy_time)<=4 then 4 when datediff(‘2019-12-18’,recency_buy_time)<=6 then 3 when datediff(‘2019-12-18’,recency_buy_time)<=8 then 2 else 1 end ) as r_value — 判断具体某个人的r分值 from user_recency_view;

— F指标计算(一定时间内的消费频率 count)

— 求出每个用户消费次数,得到消费次数后对次数进行评分 与R类似

create view user_buy_fre_view as select user_id, count(user_id) as buy_frequency— 各个用户购买次数 from temp_trade where behavior_type=2 group by user_id;

— 评分

— <=2 1分 | <=4 2分 | <=6 3分 | <=8 4分 | 5分
create view f_level as select user_id,buy_frequency, (case when buy_frequency <=2 then 1 when buy_frequency <=4 then 2 when buy_frequency <=6 then 3 when buy_frequency <=8 then 4 else 5 end) as ‘f_value’ from user_buy_fre_view — r avg select avg(r_value) as ‘r_avg’ from r_level; — 2.7939 — f avg select avg(f_value) as ‘f_avg’ from f_level; — 2.2606

— 用户级别判断

— 重要⾼价值客户:指最近⼀次消费较近⽽且消费频率较⾼的客户;
— 重要唤回客户:指最近⼀次消费较远且消费频率较⾼的客户;
— 重要深耕客户:指最近⼀次消费较近且消费频率较低的客户;
— 重要挽留客户:指最近⼀次消费较远且消费频率较低的客户;
— 拿到每个人的F和R,与均值对比
select r.user_id,r.r_value,f.f_value, (case when r.r_value>2.7939 and f.f_value>2.2606 then ‘重要⾼价值客户’ when r.r_value<2.7939 and f.f_value>2.2606 then ‘重要唤回客户’ when r.r_value>2.7939 and f.f_value<2.2606 then '重要深耕客户' when r.r_value<2.7939 and f.f_value<2.2606 then '重要挽留客户' end) from r_level r,f_level f where r.user_id=f.user_id;

— 商品指标体系

— 按照商品分组统计

select item_id, sum(case when behavior_type=1 then 1 else 0 end) as ‘pv’, — 点击量计算 sum(case when behavior_type=4 then 1 else 0 end) as ‘fav’, — 收藏量计算 sum(case when behavior_type=3 then 1 else 0 end) as ‘cart’, — 加购量计算 sum(case when behavior_type=2 then 1 else 0 end) as ‘pv’, — 购买次数计算 count(distinct case when behavior_type=2 then user_id else null end )/count(distinct user_id) as ‘buy_rate’ from temp_trade group by item_id;

— 对应品类的点击量 收藏量 加购量 购买次数 购买转化(该商品品类的所有用户中有购买转化的用户比;)

select item_category, sum(case when behavior_type=1 then 1 else 0 end) as ‘pv’, — 点击量计算 sum(case when behavior_type=4 then 1 else 0 end) as ‘fav’, — 收藏量计算 sum(case when behavior_type=3 then 1 else 0 end) as ‘cart’, — 加购量计算 sum(case when behavior_type=2 then 1 else 0 end) as ‘buy’, — 购买次数计算 count(distinct case when behavior_type=2 then user_id else null end )/count(distinct user_id) as ‘buy_rate’ from temp_trade group by item_category;

— 平台指标

— 每日的分析(1-4,分别表示点击pv、购买buy、加购物车cart、喜欢fav)

select dates, sum(case when behavior_type=1 then 1 else 0 end) as ‘pv’, — 点击量计算 sum(case when behavior_type=4 then 1 else 0 end) as ‘fav’, — 收藏量计算 sum(case when behavior_type=3 then 1 else 0 end) as ‘cart’, — 加购量计算 sum(case when behavior_type=2 then 1 else 0 end) as ‘buy’, — 购买次数计算 count(distinct case when behavior_type=2 then user_id else null end )/count(distinct user_id) as ‘buy_rate’ from temp_trade group by dates;

— 行为路径分析计算

/
行为路径分析计算
核心:拼接行为路径
偏移函数 lag lead
/

— 用户行为拼接准备

— 窗口函数 s使用窗口函数将用户行为数据并排摆放

CREATE view path_base_view as select a.* from ( select user_id, item_id, lag ( behavior_type, 4 ) over ( partition by user_id, item_id order by date_time ) lag_4, lag ( behavior_type, 3 ) over ( partition by user_id, item_id order by date_time ) lag_3, lag ( behavior_type, 2 ) over ( partition by user_id, item_id order by date_time ) lag_2, lag ( behavior_type, 1 ) over ( partition by user_id, item_id order by date_time ) lag_1, behavior_type, rank () over ( partition by user_id, item_id order by date_time desc ) rank_number from temp_trade ) a where a.rank_number = 1 and a.behavior_type = 2;

— 拼接用户行为路径

select concat(ifnull(lag_4,’空’),’-‘,ifnull(lag_3,’空’),’-‘,ifnull(lag_2,’空’),’-‘,ifnull(lag_1,’空’),’-‘,behavior_type) from path_base_view;

— 针对行为路径进行control统计

select concat(ifnull(lag_4,’空’),’-‘,ifnull(lag_3,’空’),’-‘,ifnull(lag_2,’空’),’-‘,ifnull(lag_1,’空’),’-‘,behavior_type),count(distinct user_id) from path_base_view group by concat(ifnull(lag_4,’空’),’-‘,ifnull(lag_3,’空’),’-‘,ifnull(lag_2,’空’),’-‘,ifnull(lag_1,’空’),’-‘,behavior_type);