连接到cn psql -h 172.16.0.42 -p 11387 -d postgres -U tbase

一、表

1 创建表

创建shard普通表

create table public.t1(
  f1 int not null, 
  f2 varchar(20), 
  primary key(f1)
) distribute by shard(f1) to group default_group;

shard(f1)表时布键为f1字段,如果没有指定分布键，主键f1做shard key,如果没有指定主键，也没指定分布键,则默认第―个字段为分布键。

distribute by shard(x) 用于指定分布键,数据分布于那个节点就是根据这个字段值来计算分片
to group xxx：指定存储组(每个存储组可以有多个节点)
分布键字段值不能修改，字段长度不能修改，字段类型不能修改。

创建shard分区表

create table public.t1_pt (
  f1 int not null, 
  f2 timestamp not null, 
  f3 varchar(20), 
  primary key(f1) 
) partition by range (f2) 
begin (timestamp without time zone '2019-01-01 0:0:0') 
step (interval '1 month') 
partitions (3) 
distribute by shard(f1) to group default_group;

partition by range (x) 用于指定分区键，支持 timesamp,int 类型，数据分布于那个子表就是根据这个字段值来计算分区。begin( xxx )指定开始分区的时间点。
step(xxx)指定分区有周期。
partions(xx)初始化时建立分区子表个数。
增加分区子表的方法 ALTER TABLE public.t1_pt ADD PARTITIONS 2;

创建 shard 冷热分区表

create table public.t1_cold_hot (
  f1 int not null, 
  f2 timestamp not null, 
  f3 varchar(20), 
  primary key(f1) 
) partition by range (f2) 
begin (timestamp without time zone '2017-01-01 0:0:0') 
step (interval '12 month') 
partitions (4) 
distribute by shard(f1,f2) 
to group default_group cold_group;

Distribute By SHARD(f1,f2) 冷热分区表需要指定两个字段来做路由，分别是分布键和分区键。
to group default_group cold_group 需要指定两个存储组，第一个是热数据存储组，第二个是冷存储组。
创建时间范围冷热分区表需要有两个 group，冷数据的 cold_group 对应的节点需要标识为冷节点
这里连接的是 DN 节点，如果有主备，主备都要连接执行

select pg_set_node_cold_access();

冷热分区表需要在 postgresql.conf 中配置冷热分区时间参数和分区级别

cold_hot_sepration_mode = 'year' 
enable_cold_seperation = true 
manual_hot_date = '2019-01-01'

创建复制表

postgres=# create table public.t1_rep (
  f1 int not null, 
  f2 varchar(20), 
  primary key(f1) 
) distribute by replication 
to group default_group;

经常要跨库 JOIN 的小数据量表可以考虑使用复制表。
复制表是所有节点都有全量数据，对于大数据量的数据表不适合。
复制表更新性能较低。
注意更新值不要推到 DN 上去计算取值，因为这样可能导致各个 DN 的数据不一致。

2 查看表

注意：以\开头的命令只能在psql里面运行，无法在TStudio和sql工具中使用

查看所有表

\dt
\dt+

SELECT schemaname,tablename FROM pg_tables;

查询表结构

输入\d+ 表名，可以显示表结构，分布键信息

\d+ public.t1

或

SELECT a.attnum,
       a.attname AS field,
       t.typname AS type,
       a.attlen AS length,
       a.atttypmod AS lengthvar,
       a.attnotnull AS notnull,
       b.description AS comment
  FROM pg_class c,
       pg_attribute a
       LEFT OUTER JOIN pg_description b ON a.attrelid=b.objoid AND a.attnum = b.objsubid,
       pg_type t
 WHERE c.relname = 't1'  --tablename: t1
       and a.attnum > 0
       and a.attrelid = c.oid
       and a.atttypid = t.oid
 ORDER BY a.attnum

通过relname=表名来查询某表的结构

--查询用户表,获取oid
SELECT a.oid,
       a.relname AS name,
       b.description AS comment
  FROM pg_class a
       LEFT OUTER JOIN pg_description b ON b.objsubid=0 AND a.oid = b.objoid
 WHERE a.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname='public') --用户表一般存储在public模式下
   AND a.relkind='r'
 ORDER BY a.relname
 SELECT a.attname AS field,
       t.typname AS type,
       a.attlen AS length,
       a.atttypmod AS lengthvar,
       a.attnotnull AS notnull,
       b.description AS comment
  FROM pg_attribute a 
       LEFT OUTER JOIN pg_description b ON a.attrelid=b.objoid AND a.attnum = b.objsubid,
       pg_type t
 WHERE a.attnum > 0
       and a.attrelid = 16768 --oid 通过oid查询对应表表结构
       and a.atttypid = t.oid
 ORDER BY a.attnum

3、DML 相关操作

3.1 INSERT

插入多条记录

CREATE TABLE public.t1_insert_mul (
  f1 int not null,
  f2 varchar(20),
  primary key(f1) 
)distribute by shard(f1) 
to group default_group;
INSERT INTO t1_insert_mul VALUES(1,'Tbase'),(2,'pg');

插入更新

create table public.t1_conflict (
  f1 int not null,
  f2 varchar(20),
  primary key(f1) 
) distribute by shard(f1) 
to group default_group; 
# 插入更新
insert into t1_conflict values(1,'tbase') ON CONFLICT (f1) DO UPDATE SET f2 = 'tbase';

create table public.t1_conflict (
  f1 int not null,
  f2 varchar(20) not null,
  f3 int ,
  primary key(f1,f2) 
)distribute by shard(f1) to group default_group;
insert into t1_conflict values(1,'tbase',2) ON CONFLICT (f1,f2) DO UPDATE SET f3 = 2;

插入返回

create table public.t1_insert_return (
  f1 int not null,
  f2 varchar(20) not null default 'tbase',
  primary key(f1) 
) distribute by shard(f1) 
to group default_group; 
insert into t1_insert_return values(1) returning *;

INSERT 更多的使用方法请参考 Postgresql 用法
http://www.postgres.cn/docs/10/sql-insert.html

3.2 UPDATE

基于分布键条件更新

create table public.t1_update_pkey (
  f1 int not null,
  f2 varchar(20) not null default 'tbase',
  f3 varchar(32), 
  primary key(f1) 
)distribute by shard(f1) 
to group default_group;
explain UPDATE t1_update_pkey SET f2='tbase' where f1=1;

性能最优，扩展性好

非分布键更新

explain UPDATE t1_update_pkey SET f2='tbase' where f3='pg';

更新语句发往所有节点

分区表带分区条件更新

create table public.t1_pt_update ( 
  f1 int not null,
  f2 timestamp not null,
  f3 varchar(20),
  primary key(f1) 
) partition by range (f2) 
begin (timestamp without time zone '2019-01-01 0:0:0') 
step (interval '1 month') 
partitions (2) 
distribute by shard(f1) 
to group default_group;
explain update t1_pt_update 
set f3='tbase' 
where 
f1=1 
and 
f2>'2019-01-01' 
and 
f2<'2019-02-01';

带分区条件更新，性能最优，扩展性好

分区表不带分区条件更新

explain update t1_pt_update set f3='tbase' where f1=1;

需要扫描所有分区子表

关联表更新 ```sql create table public.t1_update_join1 ( f1 int not null, f2 varchar(20) not null default ‘tbase’, primary key(f1) )distribute by shard(f1) to group default_group;

create table public.t1_update_join2 ( f1 int not null, f2 varchar(20) not null default ‘tbase’, primary key(f1) )distribute by shard(f1) to group default_group;

update t1_update_join1 set f2=’pg’ from t1_update_join2 where t1_update_join1.f1=t1_update_join2.f1;

表关联更新只能是基于分布键关联
- **分布键，分区键不能更新**
目前的解决办法“删除旧记录，再新增记录”<br />更多update使用方法参考[http://www.postgres.cn/docs/10/sql-update.html](http://www.postgres.cn/docs/10/sql-update.html)
<a name="CKXEA"></a>
#### 3.3 **DELETE**
- 删除返回记录
```sql
create table public.t1_delete_return (
  f1 int not null,
  f2 varchar(20) not null default 'tbase',
  primary key(f1) 
)distribute by shard(f1) 
to group default_group; 
insert into t1_delete_return values(1,'tbase'); 
delete from t1_delete_return where f1=1 returning *;

UPDATE 最优使用方法同样适合于 DELETE

DELETE 更多的使用方法见
http://www.postgres.cn/docs/10/sql-delete.html

3.4 SELECT

基于分布键查询

create table public.t1_select (
  f1 int not null,
  f2 varchar(20) not null default 'tbase',
  f3 varchar(32), primary key(f1) 
)distribute by shard(f1) 
to group default_group; 
explain select * from t1_select where f1=1;

性能最优，扩展性好

非分布键查询

explain select * from t1_select where f1<3;

查询语句发往所有节点，然后在 CN 汇总

分布键 JOIN 查询 ```sql create table public.t1_select_join1 ( f1 int not null, f2 int,primary key(f1) ) distribute by shard(f1) to group default_group;

create index t1_select_join1_f2_idx on t1_select_join1(f2);

create table public.t1_select_join2 ( f1 int not null, f2 int,primary key(f1) ) distribute by shard(f1) to group default_group;

create index t1_select_join2_f2_idx on t1_select_join2(f2);


```sql
explain select t1_select_join1.* from t1_select_join1,t1_select_join2 
where t1_select_join1.f1=t1_select_join2.f1 and t1_select_join1.f1=1;

非分布键 JOIN 查询

explain select * from t1_select_join1,t1_select_join2 
where t1_select_join1.f1=t1_select_join2.f2 and t1_select_join1.f2=1 ;

需要在 DN 做数据重分布

3.5 TRUNCATE

普通表 truncate

create table public.t1_delete_truncate ( 
  f1 int not null,
  f2 varchar(20) not null default 'tbase',
  primary key(f1) 
) distribute by shard(f1) 
to group default_group; 
insert into t1_delete_truncate select t,t::text from generate_series(1,1000000) as t;

truncate table t1_delete_truncate;

分区表 truncate

create table public.t1_pt (
  f1 int not null, 
  f2 timestamp not null, 
  f3 varchar(20), 
  primary key(f1) 
)partition by range (f2) 
begin (timestamp without time zone '2019-01-01 0:0:0') 
step (interval '1 month') 
partitions (3) 
distribute by shard(f1) 
to group default_group;

truncate public.t1_pt partition for ('2019-01-01' ::timestamp without time zone);

数据库

tbase使用

一、表