hive 实现pivot(行转列)

在网上看多了两种方法,两种方法均需要逐个写下新增字段的取值,不适用于字段值较多的情形。目前使用了其中一种方法,记录如下

  1. DROP TABLE IF EXISTS dev.dev_packing_rebulid_day_report;
  2. CREATE TABLE dev.dev_packing_rebulid_day_report AS
  3. SELECT region_name,
  4. dim_store_name,
  5. warehouse_ord_total,
  6. SUM(IF(reason='正确', ratio, 0)) AS correct,
  7. SUM(IF(reason='推袋用箱', ratio, 0)) AS reco_bag_use_box,
  8. SUM(IF(reason='推箱用袋', ratio, 0)) AS reco_box_use_bag,
  9. SUM(IF(reason='大袋小物', ratio, 0)) AS big_bag_small_goods,
  10. SUM(IF(reason='大箱小物', ratio, 0)) AS big_box_small_bag,
  11. SUM(IF(reason='小袋大物', ratio, 0)) AS small_bag_big_goods,
  12. SUM(IF(reason='小箱大物', ratio, 0)) AS small_box_big_goods,
  13. SUM(IF(reason='原包异常', ratio, 0)) AS original_package_abnormal,
  14. SUM(IF(reason='原订单耗材未知', ratio, 0)) AS unknow_consumer,
  15. SUM(IF(reason='未知原因', ratio, 0)) AS unknown_reason
  16. FROM dev.dev_packing_rebulid_day_report_tmp2
  17. GROUP BY region_name,
  18. dim_store_name,
  19. warehouse_ord_total;

参考链接:https://www.codenong.com/cs106279865/
链接中展示了结果,参考意义更多。展示本文所用代码的目的是说明,可以保留多个字段。

case when 可以实现复杂的if else功能

  1. -- 方法一
  2. select sale_ord_ob_id,
  3. (CASE WHEN consumables_label='箱装' then '异形'
  4. ELSE label_content END) AS label_content,
  5. dim_store_name
  6. FROM app.app_packing_rebulid_order_sku_info_alg
  7. LIMIT 100;
  8. -- 方发二
  9. select sale_ord_ob_id,
  10. (CASE consumables_label
  11. WHEN '箱装' then '异形'
  12. ELSE label_content END) AS label_content,
  13. dim_store_name
  14. FROM app.app_packing_rebulid_order_sku_info_alg
  15. LIMIT 100;

用case when 实现一个Python函数

python函数

  1. # 获得推荐耗材和实际耗材的匹配因素
  2. def get_result_reason(self, result):
  3. result['reason'] = ''
  4. result['reco_con_label'] = result['reco_con_label'].str.replace('京尊达', '箱装')
  5. # 按行赋值
  6. for index, row in result.iterrows():
  7. try:
  8. if row['reco_con_label'] == '异形' or row['order_con_label'] == '二次纸箱' or row['order_con_label'] == '京尊达':
  9. result.at[index, 'reason'] = '正确'
  10. elif row['order_con_label'] == '未知耗材':
  11. result.at[index, 'reason'] = '原订单耗材未知'
  12. # 原包异常: 一个原包,一个非原包
  13. elif (row['order_con_label'] == '原包' and row['reco_con_label'] != '原包') or (
  14. row['order_con_label'] != '原包' and row['reco_con_label'] == '原包'):
  15. result.at[index, 'reason'] = '原包异常'
  16. # 正确
  17. elif (row['order_con_label'] == '原包' and row['reco_con_label'] == '原包'):
  18. result.at[index, 'reason'] = '正确'
  19. # 推袋用箱
  20. elif '箱' in row['order_con_label'] and '袋' in row['reco_con_label']:
  21. result.at[index, 'reason'] = '推袋用箱'
  22. # 推箱用袋
  23. elif '袋' in row['order_con_label'] and '箱' in row['reco_con_label']:
  24. result.at[index, 'reason'] = '推箱用袋'
  25. elif '袋' in row['order_con_label'] and '袋' in row['reco_con_label']:
  26. # 大袋小物
  27. if row['volume_ratio'] > 1.3 and row['volume_ratio'] < 1000:
  28. result.at[index, 'reason'] = '大袋小物'
  29. # 正常
  30. elif row['volume_ratio'] >= 0.7 and row['volume_ratio'] <= 1.3:
  31. result.at[index, 'reason'] = '正确'
  32. # 小袋大物(大物小袋)
  33. elif row['volume_ratio'] < 0.7:
  34. # result.at[index, 'reason'] = '小袋大物'
  35. result.at[index, 'reason'] = '正确'
  36. elif '箱' in row['order_con_label'] and '箱' in row['reco_con_label']:
  37. # 大箱小物
  38. if row['volume_ratio'] > 1.3 and row['volume_ratio'] < 1000:
  39. result.at[index, 'reason'] = '大箱小物'
  40. # 正常
  41. elif row['volume_ratio'] >= 0.7 and row['volume_ratio'] <= 1.3:
  42. result.at[index, 'reason'] = '正确'
  43. # 小箱大物(大物小箱)
  44. elif row['volume_ratio'] < 0.7:
  45. # result.at[index, 'reason'] = '小箱大物'
  46. result.at[index, 'reason'] = '正确'
  47. else:
  48. result.at[index, 'reason'] = '未知原因'
  49. except:
  50. result.at[index, 'reason'] = '未知原因'
  51. result = result[['sale_ord_ob_id', 'reason']].drop_duplicates()
  52. return result

sql脚本

  1. SELECT
  2. a.sale_ord_ob_id,
  3. a.reco_con_code,
  4. a.reco_con_label,
  5. b.volume_ratio,
  6. c.order_con_label,
  7. (
  8. CASE
  9. WHEN a.reco_con_label = '异形'
  10. OR c.order_con_label = '二次纸箱'
  11. OR c.order_con_label = '京尊达'
  12. THEN '正确'
  13. WHEN c.order_con_label = '未知耗材'
  14. THEN '原订单耗材未知'
  15. WHEN c.order_con_label = '原包'
  16. AND a.reco_con_label = '原包'
  17. THEN '正确'
  18. WHEN c.order_con_label = '原包'
  19. OR a.reco_con_label = '原包'
  20. THEN '原包异常'
  21. WHEN c.order_con_label LIKE '%箱%'
  22. AND a.reco_con_label LIKE '%袋%'
  23. THEN '推袋用箱'
  24. WHEN c.order_con_label LIKE '%袋%'
  25. AND a.reco_con_label LIKE '%箱%'
  26. THEN '推箱用袋'
  27. WHEN c.order_con_label LIKE '%袋%'
  28. AND a.reco_con_label LIKE '%袋%'
  29. THEN(
  30. CASE
  31. WHEN b.volume_ratio > 1.3
  32. AND b.volume_ratio < 1000
  33. THEN '大袋小物'
  34. WHEN b.volume_ratio >= 0.7
  35. AND b.volume_ratio <= 1.3
  36. THEN '正确'
  37. --'小袋大物'
  38. ELSE '正确'
  39. END)
  40. WHEN c.order_con_label LIKE '%箱%'
  41. AND a.reco_con_label LIKE '%箱%'
  42. THEN(
  43. CASE
  44. WHEN b.volume_ratio > 1.3
  45. AND b.volume_ratio < 1000
  46. THEN '大箱小物'
  47. WHEN b.volume_ratio >= 0.7
  48. AND b.volume_ratio <= 1.3
  49. THEN '正确'
  50. --'小箱大物'
  51. ELSE '正确'
  52. END)
  53. ELSE '未知原因'
  54. END) AS reason

mysql 创建表插入中文

如果按方式一创建表,会出现插入中文乱码的问题,在网上找的方法都不能应,于是尝试了方法二。
方法二大概只适用于临时建立小文件,如果需要在数据库中传入大文件,可以在云端建表,传入csv文件或txt文件的方法

方法一

  1. -- 建立顺序表
  2. DROP TABLE IF EXISTS dev.dev_packing_rebulid_region_seq;
  3. CREATE EXTERNAL TABLE `dev.dev_packing_rebulid_region_seq`
  4. ( `region_name` VARCHAR(255) COMMENT '区域',
  5. `sequence` String COMMENT '顺序') COMMENT '区域排序'
  6. ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
  7. NULL DEFINED AS ''
  8. STORED AS ORC
  9. tblproperties('orc.compress' = 'SNAPPY') ;
  10. -- 并插入数据
  11. -- 方式1 https://www.cnblogs.com/FocusIN/p/6747715.html
  12. insert into dev.dev_packing_rebulid_region_seq values(N'华北', '1'), (N'华南', '2');
  13. -- 方式2 https://blog.csdn.net/liu82327114/article/details/53812772
  14. insert into dev.dev_packing_rebulid_region_seq select decode(binary('华北'),'utf-8'),'1';
  15. insert into dev.dev_packing_rebulid_region_seq select decode(binary('华南'),'utf-8'),'2';

方法二

  1. DROP TABLE IF EXISTS dev.dev_packing_rebulid_region_seq;
  2. CREATE TABLE dev.dev_packing_rebulid_region_seq AS
  3. SELECT * from(
  4. SELECT '华北' as region_name, 1 as sequence
  5. UNION
  6. SELECT '华东' as region_name, 2 as sequence
  7. UNION
  8. SELECT '华南' as region_name, 3 as sequence
  9. UNION
  10. SELECT '西南' as region_name, 4 as sequence
  11. UNION
  12. SELECT '华中' as region_name, 5 as sequence
  13. UNION
  14. SELECT '东北' as region_name, 6 as sequence
  15. UNION
  16. SELECT '西北' as region_name, 7 as sequence
  17. UNION
  18. SELECT '汇总' as region_name, 8 as sequence
  19. ) a;