python
安装 kudu-python 模块
一定要确保 pip 是最新版本!!!,并且 Cython 已经安装好,安装 kudu-python 的 1.2.0 版本。
sudo pip install --upgrade pip或 [python -m ensurepip、python -m pip install --upgrade pip]
sudo pip install -i https://pypi.douban.com/simple Cythonsudo pip install -i https://pypi.tuna.tsinghua.edu.cn/simple kudu-python==1.2.0
或者
https://kudu.apache.org/docs/developing.html
http://archive.cloudera.com/kudu/redhat/6/x86_64/kudu/5/RPMS/x86_64/
rpm -i http://archive.cloudera.com/kudu/redhat/6/x86_64/kudu/5/RPMS/x86_64/kudu-client-devel-1.4.0+cdh5.12.2+0-1.cdh5.12.2.p0.8.el6.x86_64.rpmrpm -i http://archive.cloudera.com/kudu/redhat/6/x86_64/kudu/5/RPMS/x86_64/kudu-client0-1.4.0+cdh5.12.2+0-1.cdh5.12.2.p0.8.el6.x86_64.rpmyum install gccyum install gcc-c++pip install Cython kudu-python==1.2.0
import kudufrom kudu.client import Partitioningfrom datetime import datetime# Connect to Kudu master server# 连接 kudu master 服务client = kudu.connect(host='kudu.master', port=7051)# Define a schema for a new table# 为表定义一个模式builder = kudu.schema_builder()builder.add_column('key').type(kudu.int64).nullable(False).primary_key()builder.add_column('ts_val', type_=kudu.unixtime_micros, nullable=False, compression='lz4')schema = builder.build()# Define partitioning schemapartitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3)# Create new tableclient.create_table('python-example', schema, partitioning)# Open a tabletable = client.table('python-example')# Create a new session so that we can apply write operationssession = client.new_session()# Insert a row# 往表里面插入一行数据op = table.new_insert({'key': 1, 'ts_val': datetime.utcnow()})session.apply(op)# Upsert a rowop = table.new_upsert({'key': 2, 'ts_val': "2016-01-01T00:00:00.000000"})session.apply(op)# Updating a rowop = table.new_update({'key': 1, 'ts_val': ("2017-01-01", "%Y-%m-%d")})session.apply(op)# Delete a rowop = table.new_delete({'key': 2})session.apply(op)# Flush write operations, if failures occur, capture print them.try:session.flush()except kudu.KuduBadStatus as e:print(session.get_pending_errors())# Create a scanner and add a predicate# 先创建一个 scanner,然后再读取表中的数据# 表中的数据太多,你还需要添加一个所谓的 predicate 只读取规定区间内的数据scanner = table.scanner()scanner.add_predicate(table['ts_val'] == datetime(2017, 1, 1))# Open Scanner and read all tuples# Note: This doesn't scale for large scansresult = scanner.open().read_all_tuples()
