hive 实现pivot(行转列)
在网上看多了两种方法,两种方法均需要逐个写下新增字段的取值,不适用于字段值较多的情形。目前使用了其中一种方法,记录如下
DROP TABLE IF EXISTS dev.dev_packing_rebulid_day_report;CREATE TABLE dev.dev_packing_rebulid_day_report ASSELECT region_name,dim_store_name,warehouse_ord_total,SUM(IF(reason='正确', ratio, 0)) AS correct,SUM(IF(reason='推袋用箱', ratio, 0)) AS reco_bag_use_box,SUM(IF(reason='推箱用袋', ratio, 0)) AS reco_box_use_bag,SUM(IF(reason='大袋小物', ratio, 0)) AS big_bag_small_goods,SUM(IF(reason='大箱小物', ratio, 0)) AS big_box_small_bag,SUM(IF(reason='小袋大物', ratio, 0)) AS small_bag_big_goods,SUM(IF(reason='小箱大物', ratio, 0)) AS small_box_big_goods,SUM(IF(reason='原包异常', ratio, 0)) AS original_package_abnormal,SUM(IF(reason='原订单耗材未知', ratio, 0)) AS unknow_consumer,SUM(IF(reason='未知原因', ratio, 0)) AS unknown_reasonFROM dev.dev_packing_rebulid_day_report_tmp2GROUP BY region_name,dim_store_name,warehouse_ord_total;
参考链接:https://www.codenong.com/cs106279865/
链接中展示了结果,参考意义更多。展示本文所用代码的目的是说明,可以保留多个字段。
case when 可以实现复杂的if else功能
-- 方法一select sale_ord_ob_id,(CASE WHEN consumables_label='箱装' then '异形'ELSE label_content END) AS label_content,dim_store_nameFROM app.app_packing_rebulid_order_sku_info_algLIMIT 100;-- 方发二select sale_ord_ob_id,(CASE consumables_labelWHEN '箱装' then '异形'ELSE label_content END) AS label_content,dim_store_nameFROM app.app_packing_rebulid_order_sku_info_algLIMIT 100;
用case when 实现一个Python函数
python函数
# 获得推荐耗材和实际耗材的匹配因素def get_result_reason(self, result):result['reason'] = ''result['reco_con_label'] = result['reco_con_label'].str.replace('京尊达', '箱装')# 按行赋值for index, row in result.iterrows():try:if row['reco_con_label'] == '异形' or row['order_con_label'] == '二次纸箱' or row['order_con_label'] == '京尊达':result.at[index, 'reason'] = '正确'elif row['order_con_label'] == '未知耗材':result.at[index, 'reason'] = '原订单耗材未知'# 原包异常: 一个原包,一个非原包elif (row['order_con_label'] == '原包' and row['reco_con_label'] != '原包') or (row['order_con_label'] != '原包' and row['reco_con_label'] == '原包'):result.at[index, 'reason'] = '原包异常'# 正确elif (row['order_con_label'] == '原包' and row['reco_con_label'] == '原包'):result.at[index, 'reason'] = '正确'# 推袋用箱elif '箱' in row['order_con_label'] and '袋' in row['reco_con_label']:result.at[index, 'reason'] = '推袋用箱'# 推箱用袋elif '袋' in row['order_con_label'] and '箱' in row['reco_con_label']:result.at[index, 'reason'] = '推箱用袋'elif '袋' in row['order_con_label'] and '袋' in row['reco_con_label']:# 大袋小物if row['volume_ratio'] > 1.3 and row['volume_ratio'] < 1000:result.at[index, 'reason'] = '大袋小物'# 正常elif row['volume_ratio'] >= 0.7 and row['volume_ratio'] <= 1.3:result.at[index, 'reason'] = '正确'# 小袋大物(大物小袋)elif row['volume_ratio'] < 0.7:# result.at[index, 'reason'] = '小袋大物'result.at[index, 'reason'] = '正确'elif '箱' in row['order_con_label'] and '箱' in row['reco_con_label']:# 大箱小物if row['volume_ratio'] > 1.3 and row['volume_ratio'] < 1000:result.at[index, 'reason'] = '大箱小物'# 正常elif row['volume_ratio'] >= 0.7 and row['volume_ratio'] <= 1.3:result.at[index, 'reason'] = '正确'# 小箱大物(大物小箱)elif row['volume_ratio'] < 0.7:# result.at[index, 'reason'] = '小箱大物'result.at[index, 'reason'] = '正确'else:result.at[index, 'reason'] = '未知原因'except:result.at[index, 'reason'] = '未知原因'result = result[['sale_ord_ob_id', 'reason']].drop_duplicates()return result
sql脚本
SELECTa.sale_ord_ob_id,a.reco_con_code,a.reco_con_label,b.volume_ratio,c.order_con_label,(CASEWHEN a.reco_con_label = '异形'OR c.order_con_label = '二次纸箱'OR c.order_con_label = '京尊达'THEN '正确'WHEN c.order_con_label = '未知耗材'THEN '原订单耗材未知'WHEN c.order_con_label = '原包'AND a.reco_con_label = '原包'THEN '正确'WHEN c.order_con_label = '原包'OR a.reco_con_label = '原包'THEN '原包异常'WHEN c.order_con_label LIKE '%箱%'AND a.reco_con_label LIKE '%袋%'THEN '推袋用箱'WHEN c.order_con_label LIKE '%袋%'AND a.reco_con_label LIKE '%箱%'THEN '推箱用袋'WHEN c.order_con_label LIKE '%袋%'AND a.reco_con_label LIKE '%袋%'THEN(CASEWHEN b.volume_ratio > 1.3AND b.volume_ratio < 1000THEN '大袋小物'WHEN b.volume_ratio >= 0.7AND b.volume_ratio <= 1.3THEN '正确'--'小袋大物'ELSE '正确'END)WHEN c.order_con_label LIKE '%箱%'AND a.reco_con_label LIKE '%箱%'THEN(CASEWHEN b.volume_ratio > 1.3AND b.volume_ratio < 1000THEN '大箱小物'WHEN b.volume_ratio >= 0.7AND b.volume_ratio <= 1.3THEN '正确'--'小箱大物'ELSE '正确'END)ELSE '未知原因'END) AS reason
mysql 创建表插入中文
如果按方式一创建表,会出现插入中文乱码的问题,在网上找的方法都不能应,于是尝试了方法二。
方法二大概只适用于临时建立小文件,如果需要在数据库中传入大文件,可以在云端建表,传入csv文件或txt文件的方法
方法一
-- 建立顺序表DROP TABLE IF EXISTS dev.dev_packing_rebulid_region_seq;CREATE EXTERNAL TABLE `dev.dev_packing_rebulid_region_seq`( `region_name` VARCHAR(255) COMMENT '区域',`sequence` String COMMENT '顺序') COMMENT '区域排序'ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'NULL DEFINED AS ''STORED AS ORCtblproperties('orc.compress' = 'SNAPPY') ;-- 并插入数据-- 方式1 https://www.cnblogs.com/FocusIN/p/6747715.htmlinsert into dev.dev_packing_rebulid_region_seq values(N'华北', '1'), (N'华南', '2');-- 方式2 https://blog.csdn.net/liu82327114/article/details/53812772insert into dev.dev_packing_rebulid_region_seq select decode(binary('华北'),'utf-8'),'1';insert into dev.dev_packing_rebulid_region_seq select decode(binary('华南'),'utf-8'),'2';
方法二
DROP TABLE IF EXISTS dev.dev_packing_rebulid_region_seq;CREATE TABLE dev.dev_packing_rebulid_region_seq ASSELECT * from(SELECT '华北' as region_name, 1 as sequenceUNIONSELECT '华东' as region_name, 2 as sequenceUNIONSELECT '华南' as region_name, 3 as sequenceUNIONSELECT '西南' as region_name, 4 as sequenceUNIONSELECT '华中' as region_name, 5 as sequenceUNIONSELECT '东北' as region_name, 6 as sequenceUNIONSELECT '西北' as region_name, 7 as sequenceUNIONSELECT '汇总' as region_name, 8 as sequence) a;
