python

安装 kudu-python 模块
一定要确保 pip 是最新版本!!!,并且 Cython 已经安装好,安装 kudu-python 的 1.2.0 版本。

  1. sudo pip install --upgrade pip
  2. [python -m ensurepippython -m pip install --upgrade pip]
  1. sudo pip install -i https://pypi.douban.com/simple Cython
  2. sudo pip install -i https://pypi.tuna.tsinghua.edu.cn/simple kudu-python==1.2.0

或者

https://kudu.apache.org/docs/developing.html
http://archive.cloudera.com/kudu/redhat/6/x86_64/kudu/5/RPMS/x86_64/

  1. rpm -i http://archive.cloudera.com/kudu/redhat/6/x86_64/kudu/5/RPMS/x86_64/kudu-client-devel-1.4.0+cdh5.12.2+0-1.cdh5.12.2.p0.8.el6.x86_64.rpm
  2. rpm -i http://archive.cloudera.com/kudu/redhat/6/x86_64/kudu/5/RPMS/x86_64/kudu-client0-1.4.0+cdh5.12.2+0-1.cdh5.12.2.p0.8.el6.x86_64.rpm
  3. yum install gcc
  4. yum install gcc-c++
  5. pip install Cython kudu-python==1.2.0
  1. import kudu
  2. from kudu.client import Partitioning
  3. from datetime import datetime
  4. # Connect to Kudu master server
  5. # 连接 kudu master 服务
  6. client = kudu.connect(host='kudu.master', port=7051)
  7. # Define a schema for a new table
  8. # 为表定义一个模式
  9. builder = kudu.schema_builder()
  10. builder.add_column('key').type(kudu.int64).nullable(False).primary_key()
  11. builder.add_column('ts_val', type_=kudu.unixtime_micros, nullable=False, compression='lz4')
  12. schema = builder.build()
  13. # Define partitioning schema
  14. partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3)
  15. # Create new table
  16. client.create_table('python-example', schema, partitioning)
  17. # Open a table
  18. table = client.table('python-example')
  19. # Create a new session so that we can apply write operations
  20. session = client.new_session()
  21. # Insert a row
  22. # 往表里面插入一行数据
  23. op = table.new_insert({'key': 1, 'ts_val': datetime.utcnow()})
  24. session.apply(op)
  25. # Upsert a row
  26. op = table.new_upsert({'key': 2, 'ts_val': "2016-01-01T00:00:00.000000"})
  27. session.apply(op)
  28. # Updating a row
  29. op = table.new_update({'key': 1, 'ts_val': ("2017-01-01", "%Y-%m-%d")})
  30. session.apply(op)
  31. # Delete a row
  32. op = table.new_delete({'key': 2})
  33. session.apply(op)
  34. # Flush write operations, if failures occur, capture print them.
  35. try:
  36. session.flush()
  37. except kudu.KuduBadStatus as e:
  38. print(session.get_pending_errors())
  39. # Create a scanner and add a predicate
  40. # 先创建一个 scanner,然后再读取表中的数据
  41. # 表中的数据太多,你还需要添加一个所谓的 predicate 只读取规定区间内的数据
  42. scanner = table.scanner()
  43. scanner.add_predicate(table['ts_val'] == datetime(2017, 1, 1))
  44. # Open Scanner and read all tuples
  45. # Note: This doesn't scale for large scans
  46. result = scanner.open().read_all_tuples()

Java

https://www.e-learn.cn/topic/3601334