1. 购买OSS存储表
2. 下载ClickHouse数据文件,并存储到OSS
:::info
通过阿里cloudshell(默认只让用1小时,1小时后开启新的实例)下载并传输到oss
:::

wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csvaliyun oss cp pp-complete.csv oss://clickhouse-test-sinosoft/pp-complete.csv
:::warning 下载上传时间 1个小时之内 :::
处理完成后一定要删除 cloudshell里面得文件
3. 购买并导入数据
:::info
购买
:::
:::info
创建账户
:::
:::info
开通公网 ,并配置安全组
:::
3.1 创建表
CREATE TABLE uk_price_paid_local on cluster default(price UInt32,date Date,postcode1 LowCardinality(String),postcode2 LowCardinality(String),type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),is_new UInt8,duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),addr1 String,addr2 String,street LowCardinality(String),locality LowCardinality(String),town LowCardinality(String),district LowCardinality(String),county LowCardinality(String),category UInt8) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
CREATE TABLE uk_price_paid_distributed on cluster default(price UInt32,date Date,postcode1 LowCardinality(String),postcode2 LowCardinality(String),type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),is_new UInt8,duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),addr1 String,addr2 String,street LowCardinality(String),locality LowCardinality(String),town LowCardinality(String),district LowCardinality(String),county LowCardinality(String),category UInt8) ENGINE = Distributed(default, default, uk_price_paid_local, rand());
3.2 查看OSS在线数据
SELECT * FROM oss('oss-cn-hangzhou-internal.aliyuncs.com', '<AK>', '<SK>', 'oss://clickhouse-test-sinosoft/pp-complete.csv', 'CSV','uuid String,price UInt32,time String,postcode String,a String,b String,c String,addr1 String,addr2 String,street String,locality String,town String,district String,county String,d String,e String');
3.3 导入数据
insert into uk_price_paid_distributed SELECT price,toDate(concat(time,':00')) AS date,splitByChar(' ', postcode)[1] AS postcode1,splitByChar(' ', postcode)[2] AS postcode2,transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,b = 'Y' AS is_new,transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,addr1,addr2,street,locality,town,district,county,d = 'B' AS category FROM oss('oss-cn-hangzhou-internal.aliyuncs.com', '<AK>', '<SK>', 'oss://clickhouse-test-sinosoft/pp-complete.csv', 'CSV','uuid String,price UInt32,time String,postcode String,a String,b String,c String,addr1 String,addr2 String,street String,locality String,town String,district String,county String,d String,e String');

:::info
注意: 1个实例分布式表 导入数据大概 250s 官方案例上面说的 40s (因为官方是单机器)
如果更多实例,这个时间上会有提升
:::
3.4 其他查询
SELECT count() FROM uk_price_paid_distributed;SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid_local';-- 每年得平均价格 95msSELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid_local GROUP BY year ORDER BY year;SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid_distributed GROUP BY year ORDER BY year;-- 每年得平均价格 伦敦------------------ 48msSELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid_local WHERE town = 'LONDON' GROUP BY year ORDER BY year;------------------62msSELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid_distributed WHERE town = 'LONDON' GROUP BY year ORDER BY year;-- 查询昂贵的社区前100 148msSELECTtown,district,count() AS c,round(avg(price)) AS price,bar(price, 0, 5000000, 100)FROM uk_price_paid_distributedWHERE date >= '2020-01-01'GROUP BYtown,districtHAVING c >= 100ORDER BY price DESCLIMIT 100;

