注意使用java函数一定要导入相应的类!要不然会报错!对于自定义工具类怎么导入还没有研究

kettle内无论你取出的字段值是为空,还是为null值,若经过JAVA脚本的处理后都会变成NULL 字符串了,java代码中的integer类型无法直接判断,必须用,就算在128字节以内的也判断不了(不会报错) //如果指定的数与参数相等返回0。 //如果指定的数小于参数返回-1。 //如果指定的数大于参数返回1。 Integer1.compareTo(Integer2)

本次测试内容

需要将原来的数据清洗到新的表中!新表中有新的字段需要随机生成数据


流程图

image.png

解析

表输入(由于旧数据中没有字段所以 首先现 在sql中虚拟字段)


image.png

Java 生成随机数据


image.png
image.png

  1. import java.util.*;
  2. import java.text.*;
  3. import java.util.regex.Pattern;
  4. public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
  5. Object[] r = getRow();
  6. if (r == null) {
  7. setOutputDone();
  8. return false;
  9. }
  10. //如果为空着进行随机添加
  11. String book_insert_date = get(Fields.In, "book_insert_date").getString(r);
  12. if(book_insert_date==null || book_insert_date==""){
  13. String str = "";
  14. Date date = new Date();
  15. try {
  16. str = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(date)+".0";
  17. } catch (Exception e) {
  18. str="";
  19. e.printStackTrace();
  20. }
  21. get(Fields.Out, "book_insert_date").setValue(r, str);
  22. }
  23. //设置uuid
  24. get(Fields.Out, "uuid").setValue(r, UUID.randomUUID().toString().replaceAll("-", ""));
  25. //设置推荐指数 随机0-5、
  26. get(Fields.Out, "level").setValue(r, new Random().nextInt(6));
  27. //设置副本数 随机1000
  28. get(Fields.Out, "copy").setValue(r, new Random().nextInt(1000)+1);
  29. //获取中图号 注意sql 判空 这里判断也行
  30. String book_ztype = get(Fields.In, "book_ztype").getString(r);
  31. String chineseType = book_ztype.substring(0,1);
  32. String pattern = "^[a-zA-Z]"; //包含 a-Z
  33. if(!Pattern.matches(pattern, chineseType)){ //如果包含a-Z 就强行置为Z
  34. chineseType = "Z";
  35. }
  36. get(Fields.Out, "chineseType").setValue(r,chineseType.toUpperCase());
  37. putRow(data.outputRowMeta, r);
  38. return true;
  39. }

表输出

image.png

点击mian函数进去java执行主体

image.png
image.png

获取输入值对其进行判断等操作

image.png

对上面的输入值操作之后输出理想的值

image.png

有也可以自定义输出值

image.png

代码参考

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <transformation-steps>
  3. <steps>
  4. <step>
  5. <name>Java 代码</name>
  6. <type>UserDefinedJavaClass</type>
  7. <description/>
  8. <distribute>Y</distribute>
  9. <custom_distribution/>
  10. <copies>1</copies>
  11. <partitioning>
  12. <method>none</method>
  13. <schema_name/>
  14. </partitioning>
  15. <definitions>
  16. <definition>
  17. <class_type>TRANSFORM_CLASS</class_type>
  18. <class_name>Processor</class_name>
  19. <class_source><![CDATA[import java.util.*;
  20. public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
  21. if (first) {
  22. first = false;
  23. }
  24. Object[] r = getRow();
  25. if (r == null) {
  26. setOutputDone();
  27. return false;
  28. }
  29. // It is always safest to call createOutputRow() to ensure that your output row's Object[] is large
  30. // enough to handle any new fields you are creating in this step.
  31. r = createOutputRow(r, data.outputRowMeta.size());
  32. //设置uuid
  33. get(Fields.Out, "uuid").setValue(r, UUID.randomUUID().toString().replaceAll("-", ""));
  34. putRow(data.outputRowMeta, r);
  35. return true;
  36. }
  37. ]]></class_source>
  38. </definition>
  39. </definitions>
  40. <fields>
  41. <field>
  42. <field_name>uuid</field_name>
  43. <field_type>String</field_type>
  44. <field_length>-1</field_length>
  45. <field_precision>-1</field_precision>
  46. </field>
  47. </fields><clear_result_fields>N</clear_result_fields>
  48. <info_steps></info_steps><target_steps></target_steps><usage_parameters></usage_parameters> <cluster_schema/>
  49. <remotesteps>
  50. <input>
  51. </input>
  52. <output>
  53. </output>
  54. </remotesteps>
  55. <GUI>
  56. <xloc>400</xloc>
  57. <yloc>176</yloc>
  58. <draw>Y</draw>
  59. </GUI>
  60. </step>
  61. <step>
  62. <name>表输入</name>
  63. <type>TableInput</type>
  64. <description/>
  65. <distribute>Y</distribute>
  66. <custom_distribution/>
  67. <copies>1</copies>
  68. <partitioning>
  69. <method>none</method>
  70. <schema_name/>
  71. </partitioning>
  72. <connection>mysql5.6_hadoop</connection>
  73. <sql>SELECT
  74. log_id
  75. , three_name
  76. , book_isbn
  77. , create_by
  78. , create_time
  79. , door_way
  80. , field_info
  81. , jp_record_source
  82. , jp_source_flag
  83. , log_book_type
  84. , log_bookid
  85. , log_details
  86. , log_from
  87. , log_ip
  88. , log_tableid
  89. , log_table_name
  90. , log_time
  91. , log_type
  92. , log_userid
  93. , room_num
  94. , time_length
  95. , update_by
  96. , update_time
  97. , vp_record_source
  98. , vp_source_flag
  99. FROM re_logs where log_from=&#34;门禁管理系统&#34;
  100. </sql>
  101. <limit>0</limit>
  102. <lookup/>
  103. <execute_each_row>N</execute_each_row>
  104. <variables_active>N</variables_active>
  105. <lazy_conversion_active>N</lazy_conversion_active>
  106. <cluster_schema/>
  107. <remotesteps>
  108. <input>
  109. </input>
  110. <output>
  111. </output>
  112. </remotesteps>
  113. <GUI>
  114. <xloc>144</xloc>
  115. <yloc>192</yloc>
  116. <draw>Y</draw>
  117. </GUI>
  118. </step>
  119. <step>
  120. <name>表输出</name>
  121. <type>TableOutput</type>
  122. <description/>
  123. <distribute>Y</distribute>
  124. <custom_distribution/>
  125. <copies>1</copies>
  126. <partitioning>
  127. <method>none</method>
  128. <schema_name/>
  129. </partitioning>
  130. <connection>mysql5.6_hadsource</connection>
  131. <schema/>
  132. <table>sys_doorlog</table>
  133. <commit>1000</commit>
  134. <truncate>N</truncate>
  135. <ignore_errors>N</ignore_errors>
  136. <use_batch>Y</use_batch>
  137. <specify_fields>Y</specify_fields>
  138. <partitioning_enabled>N</partitioning_enabled>
  139. <partitioning_field/>
  140. <partitioning_daily>N</partitioning_daily>
  141. <partitioning_monthly>Y</partitioning_monthly>
  142. <tablename_in_field>N</tablename_in_field>
  143. <tablename_field/>
  144. <tablename_in_table>Y</tablename_in_table>
  145. <return_keys>N</return_keys>
  146. <return_field/>
  147. <fields>
  148. <field>
  149. <column_name>create_by</column_name>
  150. <stream_name>create_by</stream_name>
  151. </field>
  152. <field>
  153. <column_name>create_time</column_name>
  154. <stream_name>create_time</stream_name>
  155. </field>
  156. <field>
  157. <column_name>door_way</column_name>
  158. <stream_name>door_way</stream_name>
  159. </field>
  160. <field>
  161. <column_name>log_details</column_name>
  162. <stream_name>log_details</stream_name>
  163. </field>
  164. <field>
  165. <column_name>log_from</column_name>
  166. <stream_name>log_from</stream_name>
  167. </field>
  168. <field>
  169. <column_name>log_tableid</column_name>
  170. <stream_name>log_tableid</stream_name>
  171. </field>
  172. <field>
  173. <column_name>log_table_name</column_name>
  174. <stream_name>log_table_name</stream_name>
  175. </field>
  176. <field>
  177. <column_name>log_time</column_name>
  178. <stream_name>log_time</stream_name>
  179. </field>
  180. <field>
  181. <column_name>log_type</column_name>
  182. <stream_name>log_type</stream_name>
  183. </field>
  184. <field>
  185. <column_name>log_userid</column_name>
  186. <stream_name>log_userid</stream_name>
  187. </field>
  188. <field>
  189. <column_name>update_by</column_name>
  190. <stream_name>update_by</stream_name>
  191. </field>
  192. <field>
  193. <column_name>update_time</column_name>
  194. <stream_name>update_time</stream_name>
  195. </field>
  196. <field>
  197. <column_name>uuid</column_name>
  198. <stream_name>uuid</stream_name>
  199. </field>
  200. <field>
  201. <column_name>id</column_name>
  202. <stream_name>log_id</stream_name>
  203. </field>
  204. <field>
  205. <column_name>door_aisle_remark</column_name>
  206. <stream_name>log_ip</stream_name>
  207. </field>
  208. </fields>
  209. <cluster_schema/>
  210. <remotesteps>
  211. <input>
  212. </input>
  213. <output>
  214. </output>
  215. </remotesteps>
  216. <GUI>
  217. <xloc>640</xloc>
  218. <yloc>192</yloc>
  219. <draw>Y</draw>
  220. </GUI>
  221. </step>
  222. </steps>
  223. <order>
  224. <hop>
  225. <from>Java 代码</from>
  226. <to>表输出</to>
  227. <enabled>Y</enabled>
  228. </hop>
  229. <hop>
  230. <from>表输入</from>
  231. <to>Java 代码</to>
  232. <enabled>Y</enabled>
  233. </hop>
  234. </order>
  235. <notepads>
  236. </notepads>
  237. <step_error_handling>
  238. </step_error_handling>
  239. </transformation-steps>

🍔注意

数据结构有变化是需要清除下缓存要不然可能找不到 新的结构字段

image.png