数据准备
a,2017-02-05,200
a,2017-02-06,300
a,2017-02-07,200
a,2017-02-08,400
a,2017-02-10,600
b,2017-02-05,200
b,2017-02-06,300
b,2017-02-08,200
b,2017-02-09,400
b,2017-02-10,600
c,2017-01-31,200
c,2017-02-01,300
c,2017-02-02,200
c,2017-02-03,400
c,2017-02-10,600
a,2017-03-01,200
a,2017-03-02,300
a,2017-03-03,200
a,2017-03-04,400
a,2017-03-05,600
求连续N天的销售记录
一 先建一个 ClickHouse 表
create table
tb_shop(name String , ctime Date ,money Float64)
engine=MergeTree ---> 引擎
primary key (name,ctime) ----> 主键
order by (name,ctime) ; ----> 分组
二 导入数据
cat shop.csv | clickhouse-client -q "insert into tb_shop FORMAT CSV"
把shop.csv 下的数据 导入到 tb_shop 表中 --> CSV 数据格式
select * from tb_shop; 查看表数据
三 把数据转化成数组
select
name,
groupArray(ctime) ---> 时间进行排序 转化成数组
from
tb_shop
group by name ;
四 把数组进行索引编号排序
select
name ,
groupArray(ctime) arr , ----> 时间进行排序
arrayEnumerate(arr) arr_index ----> 求出数组的索引进行编号排序
from
tb_shop
group by name ;
五 把数组和后面的索引值进行拼接 使用 array join 方法
弄了一个子查询
select
name ,
ct ,
idx
from
(
select
name ,
groupArray(ctime) arr ,
arrayEnumerate(arr) arr_index
from
tb_shop
group by name)
array join ----> 把数组和索引进行拼接
arr as ct ,
arr_index as idx
order by name ----> 按名字进行排序
;
六 查看相同天数的连续的登录天数
select
name ,
ct ,
idx ,
subtractDays(ct , idx) as diff ---> 时间减去索引的个数
from
(
select
name ,
groupArray(ctime) arr ,
arrayEnumerate(arr) arr_index
from
tb_shop
group by name)
array join
arr as ct ,
arr_index as idx
order by name
;
七 求出最终结果 连续N天的销售记录
select
name ,
count(1) days ---> 登录天数的总和
from
(select
name ,
ct ,
idx ,
subtractDays(ct , idx) as diff
from
(
select
name ,
groupArray(ctime) arr ,
arrayEnumerate(arr) arr_index
from
tb_shop
group by name)t1
array join
arr as ct ,
arr_index as idx
order by name)t2
group by name , diff ----> name 和 diff 进行分组
having days >3 ----> 天数大于3天
order by name , days desc -----> name 和days 进行降序排序
limit 1 by name ; -----> 名字进行分组后取第一个数据