分区表的实现
-- 添加分区表-- CREATE TABLE device_10 PARTITION OF history_main FOR VALUES FROM (10) TO (11);-- CREATE INDEX history_idx_device_10 ON device_10 USING btree(device_id);-- 添加模拟数据-- INSERT INTO history_main (device_id, geometry, valid_time, unvalid_time)-- select generate_series(0,99),-- point(random()* (518373-515834)+ 515834, random() * (3690194-3688409)+ 3688409)::geometry,-- generate_series('2022-04-01T12:00:00'::timestamp(6) with time zone, '2022-04-028T12:00:00'::timestamp(6) with time zone, '1 minutes') ,-- generate_series('2022-04-01T12:01:00'::timestamp(6) with time zone, '2022-04-028T12:01:00'::timestamp(6) with time zone, '1 minutes');truncate table history_main;INSERT INTO history_main (device_id, geometry, valid_time, unvalid_time)select * from (select generate_series(0,99), point(random()* (518373-515834) + 515834, random() * (3690194 - 3688409)+ 3688409)::geometry ) AS tb1CROSS JOIN(selectgenerate_series('2020-04-28T12:00:00'::timestamp(6) with time zone, '2022-04-028T12:9:59'::timestamp(6) with time zone, '1 minutes') as t1,generate_series('2020-04-28T12:01:00'::timestamp(6) with time zone, '2022-04-028T12:10:59'::timestamp(6) with time zone, '1 minutes') as t2 ) as t3;-- select generate_series(0,99),floor(random()*100);-- 只查看主表数据-- select * from only history_main;-- 通过主表查询所有数据-- select * from history_main;-- 查询分区表的某个子表信息-- SELECT-- nmsp_parent.nspname AS parent_schema ,-- parent.relname AS parent ,-- nmsp_child.nspname AS child_schema ,-- child.relname AS child-- FROM-- pg_inherits JOIN pg_class parent-- ON pg_inherits.inhparent = parent.oid JOIN pg_class child-- ON pg_inherits.inhrelid = child.oid JOIN pg_namespace nmsp_parent-- ON nmsp_parent.oid = parent.relnamespace JOIN pg_namespace nmsp_child-- ON nmsp_child.oid = child.relnamespace-- WHERE-- parent.relname = 'history_main' and child.relname='device_7'-- 根据范围 创建分区表-- Table: public.history_main-- DROP TABLE IF EXISTS public.history_main;-- -- CREATE SEQUENCE IF NOT EXISTS history_main_id_seq;-- CREATE TABLE IF NOT EXISTS public.history_main-- (-- geometry geometry NOT NULL,-- device_id Integer NOT NULL,-- id serial,-- state character varying(8) DEFAULT 0,-- valid_time timestamp(6) with time zone,-- unvalid_time timestamp(6) with time zone,-- type character varying(8) ,-- meta character varying(200) ,-- CONSTRAINT history_main_pkey PRIMARY KEY (id, device_id)-- ) PARTITION BY RANGE(device_id);-- ALTER TABLE IF EXISTS public.history_main-- OWNER to postgres;-- 根据list 创建分区表-- Table: public.history_main-- DROP TABLE IF EXISTS public.history_main;-- CREATE TABLE IF NOT EXISTS public.history_main-- (-- geometry geometry NOT NULL,-- device_id character varying(100) NOT NULL,-- id serial,-- state character varying(8) DEFAULT 0,-- valid_time timestamp(6) with time zone,-- unvalid_time timestamp(6) with time zone,-- type character varying(8) ,-- meta character varying(200) ,-- name character varying(100),-- CONSTRAINT history_main_pkey PRIMARY KEY (id, device_id)-- ) PARTITION BY LIST(device_id);-- -- 添加分区表-- CREATE TABLE device_0 PARTITION OF history_main FOR VALUES in('0');-- CREATE INDEX history_idx_device_0 ON device_0 USING btree(device_id);
优化目标
- 通过时间点查询每个设备的位置
- 通过时间段和设备id,查询设备在当前时间内的轨迹
- 通过name, 查询对应的设备id和轨迹
- 数据量很大时,对表的分区优化
数据表结构
根据需求主要由以下几个字段
| 字段 | 类型 | 约束和规则 | 备注 |
|---|---|---|---|
| id | serial | 主键 | 自增主键 |
| name | varchar(50) | 普通索引 | 名字,警员的名字或者警车的名字 |
| device_id | varchar(20) | 分区键 | 设备id,可根据设备id进行分区 |
| geom | geometry | epsg:4549投影坐标 | |
| state | enum | enum{0,1,2,3,4,5,6} | 设备状态:0关机;1运行中;2损坏:3未分配;4未知 |
| valid_time | timestamp | 唯一索引 | 生效时间 |
| unvalid_time | timestamp | 唯一索引 | 失效时间 |
| meta | varchar(200) | 可以存储警务通视频源的地址 |
总的数据库优化思路:
创建分区表
根据psql12的新特性,使用声明式分区,创建history表时,将其声明成分区表,并对其进行分区
-- Table: public.history_mainDROP TABLE IF EXISTS public.history_main;CREATE TABLE IF NOT EXISTS public.history_main(id serial,geometry geometry NOT NULL,device_id character varying(100) NOT NULL,state character varying(8) DEFAULT 0,valid_time timestamp(6) with time zone,unvalid_time timestamp(6) with time zone,type character varying(8) ,meta character varying(200) ,CONSTRAINT history_main_pkey PRIMARY KEY (id, device_id)) PARTITION BY LIST(device_id);-- -- 添加分区表CREATE TABLE device_0 PARTITION OF history_main FOR VALUES in('0');
看数据量,如果后续数据量过大,可以进行多级分区优化。但是分区表的子表不能超过1024。如果超过就修改配置文件中 /data/postgresql.conf 中的 max_locks_per_transaction 配置。
创建索引
创建索引https://blog.csdn.net/horses/article/details/85986558
-- 对姓名创建hash索引, 只做相等查询CREATE INDEX history_idx_name ON history_main USING hash(name);-- 对生效时间创建btree索引, 需要做大小比较查询CREATE INDEX history_idx_valid_time ON history_main USING btree(valid_time);-- 对失效时间创建btree索引,需要做大小比较查询CREATE INDEX history_idx_unvalid_time ON history_main USING btree(unvalid_time);
事务优化
select * from users;begin;update users set score = 50 where player = '库里';update users set score = 60 where player = '哈登';commit;select * from users;begin;update users set score = 0 where player = '库里';update users set score = 0 where player = '哈登';rollback;select * from users;
性能测试
添加测试数据
设置数据量:
实验设置:
普通存储,索引优化,分区,分区优化+索引优化
对比操作:初始化,更新,条件查询
| 操作时间 | ||
|---|---|---|
| 普通存储 | 初始化表结构和数据 | 11分19秒 |
| 更新设备点坐标 | ||
| 根据时间段和设备id查询轨迹 | 首次50s,后续30s左右 | |
| 根据时间点查询各个设备的位置 | 首次14s,后续6s | |
| 根据名字和当期时间查询对应的设备位置 | ||
| 分区优化 | 初始化表结构和数据 | 7分22秒 |
| 更新设备点坐标 | ||
| 根据时间段和设备id查询轨迹 | 首次50s,后续的100ms | |
| 根据时间点查询各个设备的位置 | 首次8s,后续8s | |
| 根据名字和当期时间查询对应的设备位置 | ||
| 索引优化 | 初始化表结构和数据 | 12分47秒 |
| 更新设备点坐标 | ||
| 根据时间段和设备id查询轨迹 | ||
| 根据时间点查询各个设备的位置 | ||
| 根据名字和当期时间查询对应的设备位置 | ||
| 分区+索引优化 | 初始化表结构和数据 | 12分44秒 |
| 更新设备点坐标 | ||
| 根据时间段和设备id查询轨迹 | 首次50s,后续100ms | |
| 根据时间点查询各个设备的位置 | 首次7s,后续3s | |
| 根据名字和当期时间查询对应的设备位置 |
| 时间 | 普通存储 | 分区存储 | 索引优化 | 分区+索引优化 |
|---|---|---|---|---|
| 初始化和插入数据 | 11分19秒 | 7分22秒 | 12分47秒 | 12分44秒 |
| 根据时间段和设备id查询轨迹 | 首次50s,后续30s左右 | 首次50s,后续的100ms | 首次50s,后续100ms | |
| 根据时间点查询各个设备的位置 | 首次14s,后续6s | 首次8s,后续8s | 首次7s,后续3s | |
| 测试环境 | MBP M1 PRO 16G, pg14, 数据量1e8左右 |
创建不同类型的数据表
-- 1. history_normal 普通表DROP TABLE IF EXISTS public.history_normal;CREATE TABLE IF NOT EXISTS public.history_normal(id serial NOT NULL,geometry geometry NOT NULL,device_id character varying(100) NOT NULL,state character varying(8) DEFAULT 0,valid_time timestamp(6) with time zone,unvalid_time timestamp(6) with time zone,type character varying(8) ,meta character varying(200) ,name character varying(100),CONSTRAINT history_normal_pkey PRIMARY KEY (id,device_id));-- 2. history_index 索引表DROP TABLE IF EXISTS public.history_index;CREATE TABLE IF NOT EXISTS public.history_index(id serial NOT NULL,geometry geometry NOT NULL,device_id character varying(100) NOT NULL,state character varying(8) DEFAULT 0,valid_time timestamp(6) with time zone,unvalid_time timestamp(6) with time zone,type character varying(8) ,meta character varying(200) ,name character varying(100),CONSTRAINT history_index_pkey PRIMARY KEY (id,device_id));-- 对姓名创建hash索引, 只做相等查询CREATE INDEX history_idx_name ON history_index USING hash(name);-- 对生效时间创建btree索引, 需要做大小比较查询CREATE INDEX history_idx_valid_time ON history_index USING btree(valid_time);-- 对失效时间创建btree索引,需要做大小比较查询CREATE INDEX history_idx_unvalid_time ON history_index USING btree(unvalid_time);-- 3. history_div 分区表DROP TABLE IF EXISTS public.history_div;CREATE TABLE IF NOT EXISTS public.history_div(geometry geometry NOT NULL,device_id character varying(100) NOT NULL,id serial,state character varying(8) DEFAULT 0,valid_time timestamp(6) with time zone,unvalid_time timestamp(6) with time zone,type character varying(8) ,meta character varying(200) ,name character varying(100),CONSTRAINT history_div_pkey PRIMARY KEY (id, device_id)) PARTITION BY LIST(device_id);-- CREATE TABLE device_0 PARTITION OF history_main FOR VALUES in('0');-- 4. history_main 分区+索引DROP TABLE IF EXISTS public.history_main;CREATE TABLE IF NOT EXISTS public.history_main(geometry geometry NOT NULL,device_id character varying(100) NOT NULL,id serial,state character varying(8) DEFAULT 0,valid_time timestamp(6) with time zone,unvalid_time timestamp(6) with time zone,type character varying(8) ,meta character varying(200) ,name character varying(100),CONSTRAINT history_main_pkey PRIMARY KEY (id, device_id)) PARTITION BY LIST(device_id);-- CREATE INDEX history_main_name ON history_main USING hash(name);-- 对生效时间创建btree索引, 需要做大小比较查询CREATE INDEX history_main_valid_time ON history_main USING btree(valid_time);-- 对失效时间创建btree索引,需要做大小比较查询CREATE INDEX history_main_unvalid_time ON history_main USING btree(unvalid_time);
测试插入模拟数据
-- 创建测试数据DROP VIEW IF EXISTS public.test_data;create view test_data asselect device_id::character varying(100) as device_id, point(random() * (518373-515834) + 515834, random() * (3690194 - 3688409)+ 3688409)::geometry as geom, valid_time, unvalid_time,device_id::character varying(100) as name,''::character varying(8) as state,''::character varying(8) as type,''::character varying(200) as metafrom (select generate_series(0,999) as device_id, point(random() * (518373-515834) + 515834, random() * (3690194 - 3688409)+ 3688409)::geometry as geom) AS tb1CROSS JOIN(selectgenerate_series('2020-04-28T12:00:00'::timestamp(6) with time zone, '2022-04-028T12:9:59'::timestamp(6) with time zone, '1 hours') as valid_time,generate_series('2020-04-28T12:10:00'::timestamp(6) with time zone, '2022-04-028T12:19:59'::timestamp(6) with time zone, '1 hours') as unvalid_time ) as t3;truncate table history_main;truncate table history_div;truncate table history_index;truncate table history_normal;-- 测试插入普通表INSERT INTO history_normal(geometry, device_id, state, valid_time, unvalid_time ,type, meta, name)select geom, device_id , state ,valid_time ,unvalid_time , type, meta , namefrom test_data;-- 测试插入索引优化表INSERT INTO history_index(geometry, device_id, state, valid_time, unvalid_time ,type, meta, name)select geom, device_id , state ,valid_time ,unvalid_time , type, meta , namefrom test_data;-- 测试分区优化表INSERT INTO history_div(geometry, device_id, state, valid_time, unvalid_time ,type, meta, name)select geom, device_id , state ,valid_time ,unvalid_time , type, meta , namefrom test_data;-- 测试索引+分区优化表INSERT INTO history_main(geometry, device_id, state, valid_time, unvalid_time ,type, meta, name)select geom, device_id , state ,valid_time ,unvalid_time , type, meta , namefrom test_data;
| 表类型 | 普通表 | 索引优化 | 分区优化 | 分区+索引优化 |
|---|---|---|---|---|
| 测试数据插入用时 | 1’35 | 11’30 | 3’1 | 3’55 |
| 根据时间段和id 查询 | 3’’328 | 2’’781 | 4’’576 | 1’’911 |
| 根据时间点查找设备 | 2’’913 | 3’’353 | 3’’268 | 1’’77 |
| 测试环境 | 数据量1.7e7,pg12, AMD Ryzen 5 3400G,16G内存 |
条件查询
-- 根据时间段和id 查询轨迹select device_id, geometry, valid_time, unvalid_time from public.history_normalwhere device_id = (floor(random() * 1000)::varchar)and valid_time > '2022-1-1T0:0:0'and unvalid_time < '2022-4-28T0:0:0';-- 根据时间点查询所有设备的位置select device_id, geometry, valid_time, unvalid_time from history_normalwhere valid_time <= '2022-1-1T0:0:0'and unvalid_time > '2022-1-1T0:0:0';
