Kudu - kuduClient - 《技术水库》

python
Java

python

安装 kudu-python 模块
一定要确保 pip 是最新版本!!!，并且 Cython 已经安装好，安装 kudu-python 的 1.2.0 版本。

sudo pip install --upgrade pip 
或 [python -m ensurepip、python -m pip install --upgrade pip]

sudo pip install -i https://pypi.douban.com/simple Cython
sudo pip install -i https://pypi.tuna.tsinghua.edu.cn/simple kudu-python==1.2.0

或者

https://kudu.apache.org/docs/developing.html
http://archive.cloudera.com/kudu/redhat/6/x86_64/kudu/5/RPMS/x86_64/

rpm -i http://archive.cloudera.com/kudu/redhat/6/x86_64/kudu/5/RPMS/x86_64/kudu-client-devel-1.4.0+cdh5.12.2+0-1.cdh5.12.2.p0.8.el6.x86_64.rpm
rpm -i http://archive.cloudera.com/kudu/redhat/6/x86_64/kudu/5/RPMS/x86_64/kudu-client0-1.4.0+cdh5.12.2+0-1.cdh5.12.2.p0.8.el6.x86_64.rpm
yum install gcc
yum install gcc-c++
pip install Cython kudu-python==1.2.0

import kudu
from kudu.client import Partitioning
from datetime import datetime
# Connect to Kudu master server
# 连接 kudu master 服务
client = kudu.connect(host='kudu.master', port=7051)
# Define a schema for a new table
# 为表定义一个模式
builder = kudu.schema_builder()
builder.add_column('key').type(kudu.int64).nullable(False).primary_key()
builder.add_column('ts_val', type_=kudu.unixtime_micros, nullable=False, compression='lz4')
schema = builder.build()
# Define partitioning schema
partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3)
# Create new table
client.create_table('python-example', schema, partitioning)
# Open a table
table = client.table('python-example')
# Create a new session so that we can apply write operations
session = client.new_session()
# Insert a row
# 往表里面插入一行数据
op = table.new_insert({'key': 1, 'ts_val': datetime.utcnow()})
session.apply(op)
# Upsert a row
op = table.new_upsert({'key': 2, 'ts_val': "2016-01-01T00:00:00.000000"})
session.apply(op)
# Updating a row
op = table.new_update({'key': 1, 'ts_val': ("2017-01-01", "%Y-%m-%d")})
session.apply(op)
# Delete a row
op = table.new_delete({'key': 2})
session.apply(op)
# Flush write operations, if failures occur, capture print them.
try:
    session.flush()
except kudu.KuduBadStatus as e:
    print(session.get_pending_errors())
# Create a scanner and add a predicate
# 先创建一个 scanner，然后再读取表中的数据
# 表中的数据太多，你还需要添加一个所谓的 predicate 只读取规定区间内的数据
scanner = table.scanner()
scanner.add_predicate(table['ts_val'] == datetime(2017, 1, 1))
# Open Scanner and read all tuples
# Note: This doesn't scale for large scans
result = scanner.open().read_all_tuples()

Java

https://www.e-learn.cn/topic/3601334