1. person索引
1.1 插入person数据
PUT person
{
"settings":{
"number_of_shards":20,
"number_of_replicas":0
},
"mappings":{
"properties":{
"addr":{
"type":"text",
"analyzer":"ik_max_word"
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"age":{
"type":"long"
},
"birdthday":{
"type":"date",
"format":"[yyyy-MM-dd]"
},
"color":{
"type":"keyword",
"doc_values": true
},
"create_time":{
"type":"date",
"format":"[yyyy-MM-dd HH:mm:ss]"
},
"height":{
"type":"float"
},
"name":{
"type":"text",
"analyzer":"ik_max_word"
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"sex":{
"type":"keyword",
"doc_values": true
},
"tags":{
"type":"keyword",
"doc_values": true
},
"weight":{
"type":"float",
"doc_values": true
}
}
}
}
使用java插入数据
package com.yxyy.yxpay.utils;
import cn.hutool.core.date.DateField;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.RandomUtil;
import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSONObject;
import java.io.UnsupportedEncodingException;
import java.math.BigDecimal;
import java.util.*;
/**
* @Title: randomName
* @return String 名字
*/
public class RandomName {
//地址列表
public static List<String> addrList = new ArrayList<>();
//标签列表
public static List<String> tagsList = Arrays.asList("郁郁寡欢", "悲观失意", "好吃懒做", "疑神疑鬼", "患得患失", "异想天开", "多愁善感", "狡猾多变", "贪小便宜", "见异思迁", "情绪多变", "脾气暴躁", "重色轻友", "胆小怕事 好吃懒做", "成熟稳重", "幼稚调皮", "温柔体贴", "诚实坦白", "婆婆妈妈", "活泼可爱", "普普通通", "内向害羞", "外向开朗", "心地善良", "聪明伶俐", "善解人意", "风趣幽默", "思想开放", "积极进取", "小心谨慎", "郁郁寡欢", "正义正直", "悲观失意 好吃懒做", "处事洒脱", "疑神疑鬼", "患得患失", "异想天开", "多愁善感", "淡泊名利", "见利忘义", "瞻前顾后", "循规蹈矩", "热心助人", "快言快语", "少言寡语", "爱管闲事", "追求刺激", "豪放不羁", "狡猾多变", "贪小便宜", "见异思迁", "情绪多变", "水性扬花", "重色轻友", "胆小怕事", "积极负责", "勇敢正义", "聪明好学", "实事求是", "务实", "实际", "老实巴交", "圆滑", "老练", "脾气暴躁", "慢条斯理", "冲动", "任性", "神经质", "暴躁", "善变", "难以琢磨", "患得患失", "浮躁", "见异思迁", "莽撞", "易怒", "犹豫不决", "轻率", "善变", "温柔", "内向", "腼腆", "害羞", "多疑", "直率", "活泼", "开朗", "滑稽", "可笑", "古怪", "怪异", "狭窄", "宽容", "猜忌", "多情", "冷淡", "热情", "放荡", "拘谨", "谨慎", "严格", "严厉", "凶残", "残忍", "无情", "懦弱", "怯弱", "卑鄙", "无耻", "下流", "无赖", "好色", "肮脏", "飘逸", "圣洁", "纯洁", "清纯", "可爱", "贤慧", "慈爱", "仁慈", "老实", "木讷", "慷慨", "大方", "随便", "暴躁", "急躁", "尖酸", "刻薄", "侠义", "忠诚", "开朗", "温柔乐观", "健谈", "冲动", "莽撞", "易怒", "情绪低落","善良", "热情", "好客", "孝顺讲义气", "大公无私", "好心肠", "豪放", "爽朗", "爽快", "爽直", "豪爽直爽", "豁达", "小气", "小心眼多心", "外向", "有人缘", "孤僻", "不合群", "好交际兴趣广泛", "仔细", "严于律已宽以待人", "严守秩序有条理", "执着", "较真", "专注", "文质彬彬", "聪明", "迟钝", "冰雪聪明聪明绝世", "聪颖", "出口成章语惊四座", "娓娓而谈", "口若悬河", "才华横溢", "出类拔萃博大精深", "有成就感", "急功近利", "好大喜功", "勤劳", "勇敢", "自信", "坚强有志气", "懒惰", "胸无大志", "胆小怕事", "果敢", "倔强", "挑衅", "信心受挫", "意志坚定","人见人爱","冰山美人","环肥燕瘦","粉装玉琢","兰心慧质","衣冠楚楚","冰雪聪明","装潢门面","红颜知己","婀娜多姿","亭亭玉立","我见犹怜","冰肌玉骨","貌美如花","仙姿玉色","信言不美","美若天仙","梨花带雨","仙姿佚貌","浑金璞玉","花枝招展","娇艳欲滴","仙姿玉貌","倾国倾城","城北徐公","楚楚可人","美目盼兮","琪花瑶草","艳压群芳","绝代佳人","肤如凝脂","姹紫嫣红","宛转蛾眉","小鸟依人","闭月羞花","姑射神人","双瞳剪水","傅粉何郎","人淡如菊","美丽动人","美如冠玉","娇小玲珑","尽善尽美","含苞欲放","一表非凡","惠质兰心","仪态万方","人间尤物","美伦美奂","香草美人","白璧无瑕","风华绝代","明艳动人","秀色可餐","文过饰非","仪态万千","小家碧玉","螓首蛾眉","红粉佳人","钟灵毓秀","妍姿艳质","袅袅婷婷","秀外慧中","月里嫦娥","楚楚动人","齿如瓠犀","语笑嫣然","美艳绝伦","温柔可人","出水芙蓉","千娇百媚","林下风气","巧笑倩兮","淡妆浓抹","风姿绰约","秀外惠中","出尘脱俗","窈窕淑女","捧心西子","左家娇女","绰约多姿","朱唇皓齿","一笑千金","芳泽无加","夭桃秾李","活泼可爱","眉清目秀","掷果潘安","外向","善良","开朗","活泼","好动","轻松","愉快","热情","可亲","豁达","稳重","幽默","真诚","豪爽","耿直","成熟","独立","果断","健谈","机敏","深沉","坚强","兴奋","热情","率直","毅力","友爱","风趣","沉静","谨慎","忠诚","友善","严肃","忠心","乐观","坦率","勇敢","自信","自立","沉著","执著","容忍","体贴","满足","积极","有趣","知足","勤劳","和气","无畏","务实","轻浮","冲动","幼稚","自私","依赖","任性","自负","拜金","暴躁","倔强","虚伪","孤僻","刻薄","武断","浮躁","莽撞","易怒","轻率","善变","狡猾","易怒","多疑","懒惰","专横","顽固","猜疑","挑衅","冷漠","虚荣","冷淡","反覆","跋扈","自负","逆反","怨恨","鲁莽","放任","贫乏","固执内向","脆弱","自卑","害羞","敏感","迟钝","柔弱","畏缩","顺从","胆小","安静","寡言","保守","被动","忍让","抑郁","谨慎","胆怯","温和","老实","平和","顺服","含蓄","迁就","羞涩","忸怩","缓慢","乏味","散漫","迟缓","罗嗦","耐性","悲观","消极","拖延","烦躁","妥协","唠叨","好交际","善组织","有韧性","可依赖","规范型","好心肠","善交际","无异议","竞争性","自控性","受尊重","激励性","重秩序","有条理","聆听者","无拘束","领导者","受欢迎","神经质","糊涂虫","有惰性","易兴奋","好批评","不专注","好争吵","无目标","不宽恕","无热忱","易激动","难预测","不合群","不灵活","喜操纵","情绪化","大嗓门","统治欲","强迫性","好表现","猥琐","小气","恶心","邋遢","懒惰","任性","刻薄","贪吃","贪睡","贪玩","阴险","狡诈","无趣","幼稚","小气","冲动","自以为是","眼高手低","好高骛远","虚荣心强","爱吹牛","无赖","无聊","无知","无情","愚昧","小气","愚蠢","憨","笨","傻","小气","贪财","怕死","爱慕虚荣","邋遢","好吃懒做","不劳而获","信口开河","以讹传讹","拈轻怕重","墨守陈规","顽固不化","固执己见","自私自利","唯利是图","忘恩负义","粗心大意","半途而废");
//负责列表
public static List<String> colors = Arrays.asList("Yellow","White","Black","");
//性别列表
public static List<String> sexList = Arrays.asList("MAN","WOMAN","");
public static void main(String[] args) {
for (int i = 101; i <= 200; i++) {
List<String> datas = getData(0, 200000);
FileUtil.appendLines(datas,"C:\\Users\\1\\Desktop\\json\\100000"+i+".json","UTF-8");
}
}
/**
* 获取定量数据
* @param start
* @param end
* @return
*/
public static List<String> getData(int start,int end){
List<String> list = new ArrayList<>();
for (int i = start; i < end; i++) {
JSONObject person = new JSONObject();
person.put("name",randomName(true,3));
person.put("age",RandomUtil.randomInt(10,80));
person.put("weight",RandomUtil.randomBigDecimal(new BigDecimal("40"),new BigDecimal("105")).setScale(2, BigDecimal.ROUND_DOWN));
person.put("color",colors.get(RandomUtil.randomInt(0,2)));
person.put("height",RandomUtil.randomBigDecimal(new BigDecimal("150"),new BigDecimal("210")).setScale(2,BigDecimal.ROUND_DOWN));
person.put("addr",getRandomAddr());
person.put("create_time",RandomUtil.randomDate(new Date(), DateField.HOUR, -700000, 0).toString());
person.put("birdthday", DateUtil.format(RandomUtil.randomDate(new Date(), DateField.HOUR, -700000, 0),"yyyy-MM-dd") .toString());
person.put("sex",getRandomSex());
List tags = new ArrayList();
int tagsSize = RandomUtil.randomInt(1,10);
for (int j = 0; j < tagsSize; j++) {
tags.add(tagsList.get(RandomUtil.randomInt(0,tagsList.size())));
}
person.put("tags",tags);
JSONObject action = new JSONObject().put("index",new JSONObject().put("_index","person").put("_type","_doc"));
list.add(action.toString());
list.add(person.toString());
}
return list;
}
/**
* 获取随机性别
* @return
*/
public static String getRandomSex(){
return sexList.get(RandomUtil.randomInt(0,sexList.size()));
}
/**
* 获取随机地址
* @return
*/
public static String getRandomAddr(){
return addrList.get(RandomUtil.randomInt(0,addrList.size()));
}
/**方法1*/
public static String getRandomJianHan(int len) {
String randomName = "";
for (int i = 0; i < len; i++) {
String str = null;
int hightPos, lowPos; // 定义高低位
Random random = new Random();
hightPos = (176 + Math.abs(random.nextInt(39))); // 获取高位值
lowPos = (161 + Math.abs(random.nextInt(93))); // 获取低位值
byte[] b = new byte[2];
b[0] = (new Integer(hightPos).byteValue());
b[1] = (new Integer(lowPos).byteValue());
try {
str = new String(b, "GBK"); // 转成中文
} catch (UnsupportedEncodingException ex) {
ex.printStackTrace();
}
randomName += str;
}
return randomName;
}
/**方法2*/
public static String randomName(boolean simple, int len) {
String surName[] = {
"赵","钱","孙","李","周","吴","郑","王","冯","陈","楮","卫","蒋","沈","韩","杨",
"朱","秦","尤","许","何","吕","施","张","孔","曹","严","华","金","魏","陶","姜",
"戚","谢","邹","喻","柏","水","窦","章","云","苏","潘","葛","奚","范","彭","郎",
"鲁","韦","昌","马","苗","凤","花","方","俞","任","袁","柳","酆","鲍","史","唐",
"费","廉","岑","薛","雷","贺","倪","汤","滕","殷","罗","毕","郝","邬","安","常",
"乐","于","时","傅","皮","卞","齐","康","伍","余","元","卜","顾","孟","平","黄",
"和","穆","萧","尹","姚","邵","湛","汪","祁","毛","禹","狄","米","贝","明","臧",
"计","伏","成","戴","谈","宋","茅","庞","熊","纪","舒","屈","项","祝","董","梁",
"杜","阮","蓝","闽","席","季","麻","强","贾","路","娄","危","江","童","颜","郭",
"梅","盛","林","刁","锺","徐","丘","骆","高","夏","蔡","田","樊","胡","凌","霍",
"虞","万","支","柯","昝","管","卢","莫","经","房","裘","缪","干","解","应","宗",
"丁","宣","贲","邓","郁","单","杭","洪","包","诸","左","石","崔","吉","钮","龚",
"程","嵇","邢","滑","裴","陆","荣","翁","荀","羊","於","惠","甄","麹","家","封",
"芮","羿","储","靳","汲","邴","糜","松","井","段","富","巫","乌","焦","巴","弓",
"牧","隗","山","谷","车","侯","宓","蓬","全","郗","班","仰","秋","仲","伊","宫",
"宁","仇","栾","暴","甘","斜","厉","戎","祖","武","符","刘","景","詹","束","龙",
"叶","幸","司","韶","郜","黎","蓟","薄","印","宿","白","怀","蒲","邰","从","鄂",
"索","咸","籍","赖","卓","蔺","屠","蒙","池","乔","阴","郁","胥","能","苍","双",
"闻","莘","党","翟","谭","贡","劳","逄","姬","申","扶","堵","冉","宰","郦","雍",
"郤","璩","桑","桂","濮","牛","寿","通","边","扈","燕","冀","郏","浦","尚","农",
"温","别","庄","晏","柴","瞿","阎","充","慕","连","茹","习","宦","艾","鱼","容",
"向","古","易","慎","戈","廖","庾","终","暨","居","衡","步","都","耿","满","弘",
"匡","国","文","寇","广","禄","阙","东","欧","殳","沃","利","蔚","越","夔","隆",
"师","巩","厍","聂","晁","勾","敖","融","冷","訾","辛","阚","那","简","饶","空",
"曾","毋","沙","乜","养","鞠","须","丰","巢","关","蒯","相","查","后","荆","红",
"游","竺","权","逑","盖","益","桓","公","晋","楚","阎","法","汝","鄢","涂","钦",
"岳","帅","缑","亢","况","后","有","琴","商","牟","佘","佴","伯","赏","墨","哈",
"谯","笪","年","爱","阳","佟"};
String doubleSurName[] = {"万俟","司马","上官","欧阳","夏侯","诸葛","闻人","东方",
"赫连","皇甫","尉迟","公羊","澹台","公冶","宗政","濮阳","淳于","单于","太叔","申屠",
"公孙","仲孙","轩辕","令狐","锺离","宇文","长孙","慕容","鲜于","闾丘","司徒","司空",
"丌官","司寇","仉","督","子车","颛孙","端木","巫马","公西","漆雕","乐正","壤驷","公良",
"拓拔","夹谷","宰父","谷梁","段干","百里","东郭","南门","呼延","归","海","羊舌","微生",
"梁丘","左丘","东门","西门","南宫"};
String[] word = {"一","乙","二","十","丁","厂","七","卜","人","入","八","九","几","儿","了","力","乃","刀","又",
"三","于","干","亏","士","工","土","才","寸","下","大","丈","与","万","上","小","口","巾","山",
"千","乞","川","亿","个","勺","久","凡","及","夕","丸","么","广","亡","门","义","之","尸","弓",
"己","已","子","卫","也","女","飞","刃","习","叉","马","乡","丰","王","井","开","夫","天","无",
"元","专","云","扎","艺","木","五","支","厅","不","太","犬","区","历","尤","友","匹","车","巨",
"牙","屯","比","互","切","瓦","止","少","日","中","冈","贝","内","水","见","午","牛","手","毛",
"气","升","长","仁","什","片","仆","化","仇","币","仍","仅","斤","爪","反","介","父","从","今",
"凶","分","乏","公","仓","月","氏","勿","欠","风","丹","匀","乌","凤","勾","文","六","方","火",
"为","斗","忆","订","计","户","认","心","尺","引","丑","巴","孔","队","办","以","允","予","劝",
"双","书","幻","玉","刊","示","末","未","击","打","巧","正","扑","扒","功","扔","去","甘","世",
"古","节","本","术","可","丙","左","厉","右","石","布","龙","平","灭","轧","东","卡","北","占",
"业","旧","帅","归","且","旦","目","叶","甲","申","叮","电","号","田","由","史","只","央","兄",
"叼","叫","另","叨","叹","四","生","失","禾","丘","付","仗","代","仙","们","仪","白","仔","他",
"斥","瓜","乎","丛","令","用","甩","印","乐","句","匆","册","犯","外","处","冬","鸟","务","包",
"饥","主","市","立","闪","兰","半","汁","汇","头","汉","宁","穴","它","讨","写","让","礼","训",
"必","议","讯","记","永","司","尼","民","出","辽","奶","奴","加","召","皮","边","发","孕","圣",
"对","台","矛","纠","母","幼","丝","式","刑","动","扛","寺","吉","扣","考","托","老","执","巩",
"圾","扩","扫","地","扬","场","耳","共","芒","亚","芝","朽","朴","机","权","过","臣","再","协",
"西","压","厌","在","有","百","存","而","页","匠","夸","夺","灰","达","列","死","成","夹","轨",
"邪","划","迈","毕","至","此","贞","师","尘","尖","劣","光","当","早","吐","吓","虫","曲","团",
"同","吊","吃","因","吸","吗","屿","帆","岁","回","岂","刚","则","肉","网","年","朱","先","丢",
"舌","竹","迁","乔","伟","传","乒","乓","休","伍","伏","优","伐","延","件","任","伤","价","份",
"华","仰","仿","伙","伪","自","血","向","似","后","行","舟","全","会","杀","合","兆","企","众",
"爷","伞","创","肌","朵","杂","危","旬","旨","负","各","名","多","争","色","壮","冲","冰","庄",
"庆","亦","刘","齐","交","次","衣","产","决","充","妄","闭","问","闯","羊","并","关","米","灯",
"州","汗","污","江","池","汤","忙","兴","宇","守","宅","字","安","讲","军","许","论","农","讽",
"设","访","寻","那","迅","尽","导","异","孙","阵","阳","收","阶","阴","防","奸","如","妇","好",
"她","妈","戏","羽","观","欢","买","红","纤","级","约","纪","驰","巡","寿","弄","麦","形","进",
"戒","吞","远","违","运","扶","抚","坛","技","坏","扰","拒","找","批","扯","址","走","抄","坝",
"贡","攻","赤","折","抓","扮","抢","孝","均","抛","投","坟","抗","坑","坊","抖","护","壳","志",
"扭","块","声","把","报","却","劫","芽","花","芹","芬","苍","芳","严","芦","劳","克","苏","杆",
"杠","杜","材","村","杏","极","李","杨","求","更","束","豆","两","丽","医","辰","励","否","还",
"歼","来","连","步","坚","旱","盯","呈","时","吴","助","县","里","呆","园","旷","围","呀","吨",
"足","邮","男","困","吵","串","员","听","吩","吹","呜","吧","吼","别","岗","帐","财","针","钉",
"告","我","乱","利","秃","秀","私","每","兵","估","体","何","但","伸","作","伯","伶","佣","低",
"你","住","位","伴","身","皂","佛","近","彻","役","返","余","希","坐","谷","妥","含","邻","岔",
"肝","肚","肠","龟","免","狂","犹","角","删","条","卵","岛","迎","饭","饮","系","言","冻","状",
"亩","况","床","库","疗","应","冷","这","序","辛","弃","冶","忘","闲","间","闷","判","灶","灿",
"弟","汪","沙","汽","沃","泛","沟","没","沈","沉","怀","忧","快","完","宋","宏","牢","究","穷",
"灾","良","证","启","评","补","初","社","识","诉","诊","词","译","君","灵","即","层","尿","尾",
"迟","局","改","张","忌","际","陆","阿","陈","阻","附","妙","妖","妨","努","忍","劲","鸡","驱",
"纯","纱","纳","纲","驳","纵","纷","纸","纹","纺","驴","纽","奉","玩","环","武","青","责","现",
"表","规","抹","拢","拔","拣","担","坦","押","抽","拐","拖","拍","者","顶","拆","拥","抵","拘",
"势","抱","垃","拉","拦","拌","幸","招","坡","披","拨","择","抬","其","取","苦","若","茂","苹",
"苗","英","范","直","茄","茎","茅","林","枝","杯","柜","析","板","松","枪","构","杰","述","枕",
"丧","或","画","卧","事","刺","枣","雨","卖","矿","码","厕","奔","奇","奋","态","欧","垄","妻",
"轰","顷","转","斩","轮","软","到","非","叔","肯","齿","些","虎","虏","肾","贤","尚","旺","具",
"果","味","昆","国","昌","畅","明","易","昂","典","固","忠","咐","呼","鸣","咏","呢","岸","岩",
"帖","罗","帜","岭","凯","败","贩","购","图","钓","制","知","垂","牧","物","乖","刮","秆","和",
"季","委","佳","侍","供","使","例","版","侄","侦","侧","凭","侨","佩","货","依","的","迫","质",
"欣","征","往","爬","彼","径","所","舍","金","命","斧","爸","采","受","乳","贪","念","贫","肤",
"肺","肢","肿","胀","朋","股","肥","服","胁","周","昏","鱼","兔","狐","忽","狗","备","饰","饱",
"饲","变","京","享","店","夜","庙","府","底","剂","郊","废","净","盲","放","刻","育","闸","闹",
"郑","券","卷","单","炒","炊","炕","炎","炉","沫","浅","法","泄","河","沾","泪","油","泊","沿",
"泡","注","泻","泳","泥","沸","波","泼","泽","治","怖","性","怕","怜","怪","学","宝","宗","定",
"宜","审","宙","官","空","帘","实","试","郎","诗","肩","房","诚","衬","衫","视","话","诞","询",
"该","详","建","肃","录","隶","居","届","刷","屈","弦","承","孟","孤","陕","降","限","妹","姑",
"姐","姓","始","驾","参","艰","线","练","组","细","驶","织","终","驻","驼","绍","经","贯","奏",
"春","帮","珍","玻","毒","型","挂","封","持","项","垮","挎","城","挠","政","赴","赵","挡","挺",
"括","拴","拾","挑","指","垫","挣","挤","拼","挖","按","挥","挪","某","甚","革","荐","巷","带",
"草","茧","茶","荒","茫","荡","荣","故","胡","南","药","标","枯","柄","栋","相","查","柏","柳",
"柱","柿","栏","树","要","咸","威","歪","研","砖","厘","厚","砌","砍","面","耐","耍","牵","残",
"殃","轻","鸦","皆","背","战","点","临","览","竖","省","削","尝","是","盼","眨","哄","显","哑",
"冒","映","星","昨","畏","趴","胃","贵","界","虹","虾","蚁","思","蚂","虽","品","咽","骂","哗",
"咱","响","哈","咬","咳","哪","炭","峡","罚","贱","贴","骨","钞","钟","钢","钥","钩","卸","缸",
"拜","看","矩","怎","牲","选","适","秒","香","种","秋","科","重","复","竿","段","便","俩","贷",
"顺","修","保","促","侮","俭","俗","俘","信","皇","泉","鬼","侵","追","俊","盾","待","律","很",
"须","叙","剑","逃","食","盆","胆","胜","胞","胖","脉","勉","狭","狮","独","狡","狱","狠","贸",
"怨","急","饶","蚀","饺","饼","弯","将","奖","哀","亭","亮","度","迹","庭","疮","疯","疫","疤",
"姿","亲","音","帝","施","闻","阀","阁","差","养","美","姜","叛","送","类","迷","前","首","逆",
"总","炼","炸","炮","烂","剃","洁","洪","洒","浇","浊","洞","测","洗","活","派","洽","染","济",
"洋","洲","浑","浓","津","恒","恢","恰","恼","恨","举","觉","宣","室","宫","宪","突","穿","窃",
"客","冠","语","扁","袄","祖","神","祝","误","诱","说","诵","垦","退","既","屋","昼","费","陡",
"眉","孩","除","险","院","娃","姥","姨","姻","娇","怒","架","贺","盈","勇","怠","柔","垒","绑",
"绒","结","绕","骄","绘","给","络","骆","绝","绞","统","耕","耗","艳","泰","珠","班","素","蚕",
"顽","盏","匪","捞","栽","捕","振","载","赶","起","盐","捎","捏","埋","捉","捆","捐","损","都",
"哲","逝","捡","换","挽","热","恐","壶","挨","耻","耽","恭","莲","莫","荷","获","晋","恶","真",
"框","桂","档","桐","株","桥","桃","格","校","核","样","根","索","哥","速","逗","栗","配","翅",
"辱","唇","夏","础","破","原","套","逐","烈","殊","顾","轿","较","顿","毙","致","柴","桌","虑",
"监","紧","党","晒","眠","晓","鸭","晃","晌","晕","蚊","哨","哭","恩","唤","啊","唉","罢","峰",
"圆","贼","贿","钱","钳","钻","铁","铃","铅","缺","氧","特","牺","造","乘","敌","秤","租","积",
"秧","秩","称","秘","透","笔","笑","笋","债","借","值","倚","倾","倒","倘","俱","倡","候","俯",
"倍","倦","健","臭","射","躬","息","徒","徐","舰","舱","般","航","途","拿","爹","爱","颂","翁",
"脆","脂","胸","胳","脏","胶","脑","狸","狼","逢","留","皱","饿","恋","桨","浆","衰","高","席",
"准","座","脊","症","病","疾","疼","疲","效","离","唐","资","凉","站","剖","竞","部","旁","旅",
"畜","阅","羞","瓶","拳","粉","料","益","兼","烤","烘","烦","烧","烛","烟","递","涛","浙","涝",
"酒","涉","消","浩","海","涂","浴","浮","流","润","浪","浸","涨","烫","涌","悟","悄","悔","悦",
"害","宽","家","宵","宴","宾","窄","容","宰","案","请","朗","诸","读","扇","袜","袖","袍","被",
"祥","课","谁","调","冤","谅","谈","谊","剥","恳","展","剧","屑","弱","陵","陶","陷","陪","娱",
"娘","通","能","难","预","桑","绢","绣","验","继","球","理","捧","堵","描","域","掩","捷","排",
"掉","堆","推","掀","授","教","掏","掠","培","接","控","探","据","掘","职","基","著","勒","黄",
"萌","萝","菌","菜","萄","菊","萍","菠","营","械","梦","梢","梅","检","梳","梯","桶","救","副",
"票","戚","爽","聋","袭","盛","雪","辅","辆","虚","雀","堂","常","匙","晨","睁","眯","眼","悬",
"野","啦","晚","啄","距","跃","略","蛇","累","唱","患","唯","崖","崭","崇","圈","铜","铲","银",
"甜","梨","犁","移","笨","笼","笛","符","第","敏","做","袋","悠","偿","偶","偷","您","售","停",
"偏","假","得","衔","盘","船","斜","盒","鸽","悉","欲","彩","领","脚","脖","脸","脱","象","够",
"猜","猪","猎","猫","猛","馅","馆","凑","减","毫","麻","痒","痕","廊","康","庸","鹿","盗","章",
"竟","商","族","旋","望","率","着","盖","粘","粗","粒","断","剪","兽","清","添","淋","淹","渠",
"渐","混","渔","淘","液","淡","深","婆","梁","渗","情","惜","惭","悼","惧","惕","惊","惨","惯",
"寇","寄","宿","窑","密","谋","谎","祸","谜","逮","敢","屠","弹","随","蛋","隆","隐","婚","婶",
"颈","绩","绪","续","骑","绳","维","绵","绸","绿","琴","斑","替","款","堪","搭","塔","越","趁",
"趋","超","提","堤","博","揭","喜","插","揪","搜","煮","援","裁","搁","搂","搅","握","揉","斯",
"期","欺","联","散","惹","葬","葛","董","葡","敬","葱","落","朝","辜","葵","棒","棋","植","森",
"椅","椒","棵","棍","棉","棚","棕","惠","惑","逼","厨","厦","硬","确","雁","殖","裂","雄","暂",
"雅","辈","悲","紫","辉","敞","赏","掌","晴","暑","最","量","喷","晶","喇","遇","喊","景","践",
"跌","跑","遗","蛙","蛛","蜓","喝","喂","喘","喉","幅","帽","赌","赔","黑","铸","铺","链","销",
"锁","锄","锅","锈","锋","锐","短","智","毯","鹅","剩","稍","程","稀","税","筐","等","筑","策",
"筛","筒","答","筋","筝","傲","傅","牌","堡","集","焦","傍","储","奥","街","惩","御","循","艇",
"舒","番","释","禽","腊","脾","腔","鲁","猾","猴","然","馋","装","蛮","就","痛","童","阔","善",
"羡","普","粪","尊","道","曾","焰","港","湖","渣","湿","温","渴","滑","湾","渡","游","滋","溉",
"愤","慌","惰","愧","愉","慨","割","寒","富","窜","窝","窗","遍","裕","裤","裙","谢","谣","谦",
"属","屡","强","粥","疏","隔","隙","絮","嫂","登","缎","缓","编","骗","缘","瑞","魂","肆","摄",
"摸","填","搏","塌","鼓","摆","携","搬","摇","搞","塘","摊","蒜","勤","鹊","蓝","墓","幕","蓬",
"蓄","蒙","蒸","献","禁","楚","想","槐","榆","楼","概","赖","酬","感","碍","碑","碎","碰","碗",
"碌","雷","零","雾","雹","输","督","龄","鉴","睛","睡","睬","鄙","愚","暖","盟","歇","暗","照",
"跨","跳","跪","路","跟","遣","蛾","蜂","嗓","置","罪","罩","错","锡","锣","锤","锦","键","锯",
"矮","辞","稠","愁","筹","签","简","毁","舅","鼠","催","傻","像","躲","微","愈","遥","腰","腥",
"腹","腾","腿","触","解","酱","痰","廉","新","韵","意","粮","数","煎","塑","慈","煤","煌","满",
"漠","源","滤","滥","滔","溪","溜","滚","滨","粱","滩","慎","誉","塞","谨","福","群","殿","辟",
"障","嫌","嫁","叠","缝","缠","静","碧","璃","墙","撇","嘉","摧","截","誓","境","摘","摔","聚",
"蔽","慕","暮","蔑","模","榴","榜","榨","歌","遭","酷","酿","酸","磁","愿","需","弊","裳","颗",
"嗽","蜻","蜡","蝇","蜘","赚","锹","锻","舞","稳","算","箩","管","僚","鼻","魄","貌","膜","膊",
"膀","鲜","疑","馒","裹","敲","豪","膏","遮","腐","瘦","辣","竭","端","旗","精","歉","熄","熔",
"漆","漂","漫","滴","演","漏","慢","寨","赛","察","蜜","谱","嫩","翠","熊","凳","骡","缩","慧",
"撕","撒","趣","趟","撑","播","撞","撤","增","聪","鞋","蕉","蔬","横","槽","樱","橡","飘","醋",
"醉","震","霉","瞒","题","暴","瞎","影","踢","踏","踩","踪","蝶","蝴","嘱","墨","镇","靠","稻",
"黎","稿","稼","箱","箭","篇","僵","躺","僻","德","艘","膝","膛","熟","摩","颜","毅","糊","遵",
"潜","潮","懂","额","慰","劈","操","燕","薯","薪","薄","颠","橘","整","融","醒","餐","嘴","蹄",
"器","赠","默","镜","赞","篮","邀","衡","膨","雕","磨","凝","辨","辩","糖","糕","燃","澡","激",
"懒","壁","避","缴","戴","擦","鞠","藏","霜","霞","瞧","蹈","螺","穗","繁","辫","赢","糟","糠",
"燥","臂","翼","骤","鞭","覆","蹦","镰","翻","鹰","警","攀","蹲","颤","瓣","爆","疆","壤","耀",
"躁","嚼","嚷","籍","魔","灌","蠢","霸","露","囊","罐"};
int surNameLen = surName.length;
int doubleSurNameLen = doubleSurName.length;
int wordLen = word.length;
StringBuffer sb = new StringBuffer();
Random random = new Random();
if(simple){
sb.append(surName[random.nextInt(surNameLen)]);
int surLen = sb.toString().length();
for (int i = 0; i < len - surLen; i++) {
if(sb.toString().length() <= len){
sb.append(word[random.nextInt(wordLen)]);
}
}
}else{
sb.append(doubleSurName[random.nextInt(doubleSurNameLen)]);
int doubleSurLen = sb.toString().length();
for (int i = 0; i < len - doubleSurLen; i++) {
if(sb.toString().length() <= len){
sb.append(word[random.nextInt(wordLen)]);
}
}
}
return sb.toString();
}
//初始化地址列表
static {
String string = HttpUtil.downloadString("https://res.zrbx.com/2021041723095713UY.txt", "UTF-8");
String[] addrs = string.split("\\r\\n");
for (String addr : addrs) {
addrList.add(addr);
}
}
}
controller
package com.yxyy.yxpay;
import cn.hutool.http.HttpResponse;
import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSONUtil;
import com.yxyy.yxpay.utils.RandomName;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.List;
import java.util.StringJoiner;
@RestController
@Slf4j
public class YxpayApplication {
@GetMapping("/test/{size}")
public String index1(@PathVariable Integer size){
for (int i = 0; i < size; i++) {
try {
StringJoiner stringJoiner = new StringJoiner(System.lineSeparator(),"",System.lineSeparator());
List<String> data = RandomName.getData(0, 100000);
for (String datum : data) {
stringJoiner.add(datum);
}
String url = "http://localhost:9201/_bulk";
log.info("=================_bulk================");
log.info("_bulk请求地址:"+url);
log.info("_bulk请求内容長度:"+stringJoiner.toString().length());
HttpResponse response = HttpUtil.createPost(url).header("Content-Type", "application/json").body(stringJoiner.toString()).execute();
log.info("_bulk响应内容:"+JSONUtil.parseObj(response.body()).getBool("errors"));
log.info("");
}catch (Exception e){
StringWriter stringWriter = new StringWriter();
PrintWriter printWriter = new PrintWriter(stringWriter);
e.printStackTrace(printWriter);
log.error(stringWriter.toString());
}
}
return "这里是壹心支付服务";
}
}
1.2 基础统计,平均值、总数、分组和平均值。
#获取2020-05-01的人,并按照tags分组,求每个tag的总数,平均年龄,总体重
GET person/_search?size=0
{
"query": {
"bool": {
"filter": [
{
"range": {
"birdthday": {
"format": "yyyy-MM-dd",
"gte": "2020-05-01",
"lte": "2020-05-01"
}
}
}
]
}
},
"aggs": {
"按照标签分组": {
"terms": {
"field": "tags.keyword"
},
"aggs": {
"标签的总数": {
"value_count": {
"field": "tags.keyword"
}
},
"平均年龄":{
"avg": {
"field": "age"
}
},
"总体重":{
"sum": {
"field": "weight"
}
}
}
}
}
}
2. histogram
2.1 avg、max、min、value_count、sum、分组
GET person/_search?size=0
{
"aggs": {
"avg_height": { //平均身高
"avg": {
"field": "height"
}
},
"sum_age": { //总年龄
"sum": {
"field": "age"
}
},
"count_id":{ //总数
"value_count": {
"field": "age"
}
},
"max_age":{ //最大年龄
"max": {
"field": "age"
}
},
"min_age":{ //最小年龄
"min": {
"field": "age"
}
},
"aggs_group":{ //按照tags分组并统计每个组的最大体重和总量
"terms": {
"field": "tags.keyword"
},
"aggs": {
"group_count": {
"value_count": {
"field": "age"
}
},
"max_age":{
"max": {
"field": "weight"
}
}
}
},
"group_name":{ //按照name分组并取每个组的总量
"terms": {
"field": "name.keyword"
},
"aggs": {
"group_name_": {
"value_count": {
"field": "age"
}
}
}
},
"createtime_group":{ //按照创建时间分组并取每个组的平均年龄
"terms": {
"field": "create_time"
},
"aggs": {
"group_create": {
"date_range": {
"field": "create_time",
"format": "yyyy-MM-dd",
"ranges": [
{ "from": "1900-01-01", "to": "2000-01-01" },
{ "from": "2000-01-01", "to": "now" }
]
},
"aggs": {
"group_create_avg_age": {
"avg": {
"field": "age"
}
}
}
}
}
}
}
}
2.1 range
先使用filter
过滤0-40岁的人,再按照每10岁一组分组,计算每个组的平均年龄
GET person/_search?size=0
{
"query": {
"bool": {
"filter": [
{
"range": {
"age": {
"gte": 0,
"lte": 39
}
}
}
]
}
},
"aggs": {
"age_step10_group": {
"range": {
"field": "age",
"ranges": [
{
"from": 0,
"to": 10
},
{
"from": 10,
"to": 20
},
{
"from": 20,
"to": 30
},
{
"from": 30,
"to": 40
}
]
}
},
"aggs_avg":{
"avg": {
"field": "age"
}
}
}
}
2.2 histogram
先使用filter
过滤0-40岁的人,再使用。年龄按每10岁一组分组统计,并计算每组的平均年龄
GET person/_search?size=0
{
"query": {
"bool": {
"filter": [
{
"range": {
"age": {
"gte": 0,
"lte": 39
}
}
}
]
}
},
"aggs": {
"test_histogram": {
"histogram": {
"field": "age",
"interval": 10,
"min_doc_count": 0
},
"aggs": {
"test_group": {
"avg": {
"field": "age",
"missing": 10
}
}
}
}
}
}
2.3 constant_score
不计算评分使用filter
过滤0-40岁的人,再使用histogram
。年龄按每10岁一组分组统计,并计算每组的平均年龄
GET person/_search?size=0
{
"query": {
"constant_score": {
"filter": {
"range": {
"age": {
"gte": 0,
"lte": 39
}
}
},
"boost": 1.2
}
},
"aggs": {
"test_histogram": {
"histogram": {
"field": "age",
"interval": 10,
"keyed": true, //
"missing": 0,
"min_doc_count": 0
},
"aggs": {
"test_group": {
"avg": {
"field": "age",
"missing": 10
}
}
}
}
}
}
2.4 排序order
将40-45的人按照年龄分组,并按照每组的人数排序。
GET person/_search?size=0
{
"query": {
"bool": {
"filter": [
{
"range": {
"age": {
"gte": 40,
"lte": 45
}
}
}
]
}
},
"aggs": {
"color_group":{
"terms": {
"field": "age",
"order": {
"_count": "asc"
}
}
}
}
}
排序模式的枚举值为:**_count**
、**_term**
、**_key**
**_count**
:按文档数排序。对terms
、histogram
、date_histogram
有效。**_term **
:按词项的字符串值的字母顺序排序。只在terms
内使用。**_key**
:按每个桶的键值数值排序(理论上与_term
类似)。 只在histogram
和date_histogram
内使用
2.4 histogram
过滤生日在 2020-05-01 到 2020-10-01 的人。再按照年龄每10人一组,求每组的doc数量和平均年龄。使用histogram
#histogram
GET person/_search?size=0
{
"query": {
"bool": {
"filter": [
{
"range": {
"birdthday": {
"format": "yyyy-MM-dd",
"gte": "2020-05-01",
"lte": "2020-10-01"
}
}
}
]
}
},
"aggs": {
"aggs_histogram": {
"histogram": {
"field": "age",
"interval": 10,
"min_doc_count": 0
},
"aggs": {
"sum_doc": {
"sum": {
"field": "age"
}
},
"avg_age":{
"avg": {
"field": "age"
}
}
}
}
}
}
2.5 date_histogram
过滤生日在 2020-05-01 - 2020-10-01 的人。再按照生日每月分组,求每月的doc数量和平均年龄。
#date_histogram
GET person/_search?size=0
{
"query": {
"bool": {
"filter": [
{
"range": {
"birdthday": {
"format": "yyyy-MM-dd",
"gte": "2020-05-01",
"lte": "2020-10-01"
}
}
}
]
}
},
"aggs": {
"aggs_histogram": {
"date_histogram": {
"field": "birdthday",
"format": "yyyy-MM",
"interval": "month", //可选参数: month/day/hour/minute/quarter/second/week/year/2D(2天)/1H(1小时)
"min_doc_count": 0
},
"aggs": {
"sum_doc": {
"sum": {
"field": "age"
}
},
"avg_age":{
"avg": {
"field": "age"
}
}
}
}
}
}
不适用filter
, 改为extended_bounds
GET person/_search?size=0
{
"aggs": {
"aggs_date_histogram": {
"date_histogram": {
"field": "birdthday",
"interval": 10,
"format": "yyyy-MM",
"min_doc_count": 0,
"extended_bounds": {
"min": "2020-05",
"max": "2020-10"
}
},
"aggs": {
"sum_doc": {
"sum": {
"field": "birdthday"
}
},
"avg_age":{
"avg": {
"field": "birdthday"
}
}
}
}
}
}
date_histogram
的字段必须是**date**
类型。nterval
再高版本将会被删除,可替代的参数是:fixed_interval
、calendar_interval
, 需要注意的是如果使用calendar_interval
的话min_doc_count
要设置为0,不然统计会不精准。
2.6auto_date_histogram
过滤生日在 2020-05-01 - 2020-10-01 的人。再按照生日分组,不指定分组规则,只规定分10个组,es会动态判断我们的需求,做出正确的分组。
#auto_date_histogram
GET person/_search?size=0
{
"query": {
"bool": {
"filter": [
{
"range": {
"birdthday": {
"format": "yyyy-MM-dd",
"gte": "2020-05-01",
"lte": "2020-10-01"
}
}
}
]
}
},
"aggs": {
"aggs_auto_date_histogram": {
"auto_date_histogram": {
"field": "birdthday",
"format": "yyyy-MM-dd",
"buckets":10,
"min_doc_count": 0
},
"aggs": {
"sum_doc": {
"sum": {
"field": "age"
}
},
"avg_age":{
"avg": {
"field": "age"
}
}
}
}
}
}
2.7 cumulative_sum
cumulative_sum
是累计函数,它会把每个结果累计计算。
过滤生日在2020-09-01和2020-10-01之间的人,并以每10天一组分组,计算总数,平均年龄以及累计总数。
GET person/_search?size=0
{
"query": {
"bool": {
"filter": [
{
"range": {
"birdthday": {
"format": "yyyy-MM-dd",
"gte": "2020-09-01",
"lte": "2020-10-01"
}
}
}
]
}
},
"aggs": {
"aggs_date_histogram": {
"date_histogram": {
"field": "birdthday",
"interval": "10D",
"min_doc_count": 0
},
"aggs": {
"总数": {
"value_count": {
"field": "age"
}
},
"avg_age":{
"avg": {
"field": "birdthday"
}
},
"累计总数":{
"cumulative_sum": {
"buckets_path": "总数"
}
}
}
}
}
}
2.8 多层聚合搜索(下钻)
过滤生日在2020-09-01和2020-10-01之间的人,并按照标签分组,每个标签组内再次按照年龄分组,计算每个标签组内的每个年龄组的doc数量。
GET person/_search?size=0
{
"query": {
"bool": {
"filter": [
{
"range": {
"birdthday": {
"format": "yyyy-MM-dd",
"gte": "2009-01-01",
"lte": "2010-01-01"
}
}
}
]
}
},
"aggs": {
"按照标签分组": {
"terms": {
"field": "tags.keyword"
},
"aggs": {
"按照肤色分组": {
"terms": {
"field": "age"
},
"aggs":{
"doc总量":{
"value_count": {
"field": "age"
}
}
}
}
}
}
}
}
3. missing
missing
会在搜索时把值为null的字段设置为指定值
查询生日在2009-01-01到2010-01-01的人数,按照年份分组,统计每年总数。
GET person/_search?size=0
{
"query": {
"bool": {
"filter": [
{
"range": {
"birdthday": {
"format": "yyyy-MM-dd",
"gte": "2009-01-01",
"lte": "2010-01-01"
}
}
}
]
}
},
"aggs": {
"aggs_date_histogram": {
"date_histogram": {
"field": "birdthday",
"interval": "year",
"min_doc_count": 0,
"missing": "1940-09-09"
},
"aggs": {
"总数": {
"value_count": {
"field": "age"
}
}
}
}
}
}
#结果
[
{
"key_as_string":"2009-01-01",
"key":1230768000000,
"doc_count":510486,
"总数":{
"value":510486
}
},
{
"key_as_string":"2010-01-01",
"key":1262304000000,
"doc_count":1398,
"总数":{
"value":1398
}
}
]
#修改2009年的一个数据
PUT person/_doc/B3T16HgBwlsip8eIiMts
{
//"birdthday" : "2009-11-04",
"color" : "Yellow",
"create_time" : "2011-10-08 22:08:12",
"name" : "锺务张",
"weight" : 90.41,
"addr" : "山东省东营市东营区西四路174号",
"age" : 76,
"height" : 184.85,
"tags" : [
"粗心大意",
"一表非凡",
"拜金",
"易怒",
"喜操纵",
"善变"
]
}
}
#再次查询的结果
[
{
"key_as_string":"2009-01-01",
"key":1230768000000,
"doc_count":510485,
"总数":{
"value":510485
}
},
{
"key_as_string":"2010-01-01",
"key":1262304000000,
"doc_count":1398,
"总数":{
"value":1398
}
}
]
- 大部分聚合搜索都支持
missing
missing
有自己单独的聚合搜索#查询birdthday为空值的_doc
GET person/_search?size=0
{
"aggs": {
"查询birdthday为空值的_doc": {
"missing": {
"field": "birdthday"
}
}
}
}
4.
stats
、string_stats
、extended_stats
、熵
stats
是最常用的统计的整合查询,看下面的QueryDSL。
按照生日过滤后的查询最身高常用的数据。 ```json身高最常用的查询
GET person/_search?size=0 { “aggs”: { “test_height_stats”: {
} } }"stats": {
"field": "height"
}
结果
{ “took” : 80, “timed_out” : false, “_shards” : { “total” : 10, “successful” : 10, “skipped” : 0, “failed” : 0 }, “hits” : { “total” : { “value” : 10000, “relation” : “gte” }, “max_score” : null, “hits” : [ ] }, “aggregations” : { “test_height_stats” : { “count” : 511883, “min” : 150.0, “max” : 209.99000549316406, “avg” : 180.0088688679315, “sum” : 9.214347982272339E7 } } }
`stats`只支持精准类型的搜索,如果想要文本类型的搜索,需要把`stats`替换为`string_stats`。<br />`extended_stats`是常见的查询的总结
```json
#extended_stats查询height字段
GET person/_search?size=0
{
"aggs": {
"test_height_stats": {
"": {
"field": "height"
}
}
}
}
#结果
{
"took" : 3497,
"timed_out" : false,
"_shards" : {
"total" : 20,
"successful" : 20,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"test_height_stats" : {
"count" : 49600000,
"min" : 150.0,
"max" : 209.99000549316406,
"avg" : 179.99486968326323,
"sum" : 8.927745536289856E9,
"sum_of_squares" : 1.6218301904987834E12,
"variance" : 300.03621227733487,
"variance_population" : 300.03621227733487,
"variance_sampling" : 300.0362183264522,
"std_deviation" : 17.321553402548364,
"std_deviation_population" : 17.321553402548364,
"std_deviation_sampling" : 17.321553577160802,
"std_deviation_bounds" : {
"upper" : 214.63797648835995,
"lower" : 145.3517628781665,
"upper_population" : 214.63797648835995,
"lower_population" : 145.3517628781665,
"upper_sampling" : 214.63797683758483,
"lower_sampling" : 145.35176252894163
}
}
}
}
“熵(entropy)”是字段内每个项出现的概率
#熵show_distribution
GET person/_search?size=0
{
"aggs": {
"test_string_stats": {
"string_stats": {
"field": "name.keyword",
"show_distribution":true
}
}
}
}
#结果
{
"took" : 26168,
"timed_out" : false,
"_shards" : {
"total" : 20,
"successful" : 20,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"test_string_stats" : {
"count" : 49600000,
"min_length" : 3,
"max_length" : 3,
"avg_length" : 3.0,
"entropy" : 11.124187802116122, //熵值
"distribution" : {
"后" : 0.0017931922043010753,
"阎" : 0.0015224731182795698,
"郁" : 0.0015212365591397848,
"从" : 0.0010349932795698925,
"万" : 0.0010343413978494624,
"姜" : 0.0010343077956989247,
"乔" : 0.0010342540322580646,
"富" : 0.0010332123655913978,
"融" : 0.0010330040322580646,
"吉" : 0.0010329233870967742,
"容" : 0.0010323387096774193,
"祝" : 0.0010322983870967742,
"商" : 0.0010321975806451614,
"花" : 0.0010320564516129033,
"权" : 0.0010320362903225807,
"伏" : 0.0010319422043010753,
"相" : 0.0010318817204301076,
"经" : 0.0010318615591397848,
"孔" : 0.0010317674731182796,
"路" : 0.0010317540322580645,
"贡" : 0.001031633064516129,
"丁" : 0.001031606182795699,
"杨" : 0.0010315793010752688,
"唐" : 0.0010315658602150538,
//.....太多了
}
}
}
}
5. 多bucket排序
按照生日过滤后按照标签分组,每个标签组内再次按照年龄分组,计算每个标签组内的每个年龄组的doc数量。排序顺序为:**group_color>stats.sum**
GET person/_search?size=0
{
"query": {
"bool": {
"filter": [
{
"range": {
"birdthday": {
"format": "yyyy-MM-dd",
"gte": "2009-01-01",
"lte": "2010-01-01"
}
}
}
]
}
},
"aggs": {
"按照标签分组": {
"terms": {
"field": "tags",
"order": {
"group_color>stats.sum": "asc"
}
},
"aggs": {
"group_color": {
"filter": {
"term": {
"color.keyword": "Yellow"
}
},
"aggs":{
"stats": {
"extended_stats": {
"field": "age"
}
}
}
}
}
}
}
}
6. cardinality 去重
GET person/_search?size=0
{
"aggs": {
"索引的doc总数量":{
"value_count": {
"field": "age"
}
},
"根据年龄去重后的数量": {
"cardinality": {
"field": "age"
}
},
"根据生日去重后的数量": {
"cardinality": {
"field": "birdthday"
}
},
"根据tags去重后的总数量":{
"cardinality": {
"field": "tags"
}
}
}
}
precision_threshold
是以**牺牲内存**
的方式控制去重的精度,es默认是1%-6%的精度损失,设置precision_threshold
可以有效的控制精度损失,precision_threshold
的控制范围是0-40000,默认为3000, 值越大精度越高,值越小精度越低。
GET person/_search?size=0
{
"aggs": {
"索引的doc总数量":{
"value_count": {
"field": "age"
}
},
"根据年龄去重后的数量": {
"cardinality": {
"field": "age"
}
},
"根据生日去重后的数量": {
"cardinality": {
"field": "birdthday",
"precision_threshold":3000 //精度控制
}
},
"根据tags去重后的总数量":{
"cardinality": {
"field": "tags",
"precision_threshold":40000 //精度控制
}
},
"使用脚本根据年龄和生日组合去重":{
"cardinality": {
"script": {
"lang": "painless",
"source": "doc['age'].value+'-'+doc['sex'].value"
}
}
}
}
}
内存使用的计算为: memory = precision_threshold * 8
;
假设:precision_threshold = 3000; 则使用的memory的大小为 24000Byte≈ 23.75kb。看似很小,但是如果请求过多的话,内存也是扛不住的。一般这个字段就使用3000的默认值,不用刻意去设置。
何时调整precision_threshold
? 当数据量很大并且重复项很多的时候可以把这个字段调整大一点,这样性能会有显著提升,比如:性别去重、肤色去重、年龄去重等等。
#4600W数据,性别去重,
#不使用precision_threshold耗时
{
"took" : 5679,
}
#使用precision_threshold=40000 耗时
{
"took" : 5690,
}
# What??没提升??
es内部使用HyperLogLog++
算法去重,它会动态地为查询的field生成哈希值。所field越多。哈希值越多,越占用内存,但是比如性别等字段,不管多少doc,它的值只有2个,所以只会生成2个哈希值。
几乎所有的aggs都支持script
7. top hits aggs
查找年龄是13岁的,按照肤色分组,按照组内元素数量排序取前2组, 组内元素按照体重倒叙排序取前2个元素。
GET person/_search?size=0
{
"query": {
"bool": {
"must": [
{
"constant_score": {
"filter": {
"term": {
"age": "13"
}
}
}
}
]
}
},
"aggs": {
"按照肤色分组,按照组内元素数量排序取前2组": {
"terms": {
"field": "color",
"size": 2,
"order": {
"_count": "asc"
}
},
"aggs": {
"组内元素按照体重倒叙排序取前2个元素": {
"top_hits": {
"size": 2,
"sort":[
{
"height": "desc"
}
]
}
}
}
}
}
}
8. filters
查找年龄是12岁、13岁、14岁的人,按照年龄分组,求每组的内的平均体重。
方式一:常规查询
#方法一:常规过滤后统计
GET person/_search?size=0
{
"query": {
"bool": {
"must": [
{
"constant_score": {
"filter": {
"terms": {
"age": [ "12", "13", "14" ]
}
}
}
}
]
}
},
"aggs": {
"按照年龄分组": {
"terms": {
"field": "age"
},
"aggs": {
"每个分组内的平均体重": {
"top_hits": {
"size": 2,
"sort":[
{
"height": "desc"
}
]
}
}
}
}
}
}
#方式二:使用filters
GET person/_search?size=0
{
"aggs": {
"filters分组": {
"filters": {
"other_bucket": true,
"filters": {
"年龄12统计总数": {
"term": {
"age": "12"
}
},
"年龄13统计总数": {
"term": {
"age": "13"
}
},
"年龄13或14统计总数": {
"terms": {
"age": ["13","14"]
}
},
"sex是MAN的统计": {
"terms": {
"sex": ["MAN"]
}
}
}
}
}
}
}
9. median_absolute_deviation
绝对中位差
假设有一列数:[1, 3, 1, 4, 5, 2, 1]。 绝对中位差会对这列数进行如下运算:
- 原始数组:[1, 3, 1, 4, 5, 2, 1]
- 排序:[1, 1, 1, 2, 3, 4, 5]
- 取中间数,如果中间是2个数则取平均值:2
- 把中间数与排序后的数相减,获得数组:[1, 1, 1, 0, -1, -2, -3]
- 取绝对值,获得数组:[1, 1, 1, 0, 1, 2, 3]
- 取中间数,获得最终结果:0
经过运算后,[1, 3, 1, 4, 5, 2, 1]的绝对中位差是0。
10. meta
meta就是给查询结果添加一些数据。
GET person/_search?size=0
{
"query": {
"bool": {
"must": [
{
"constant_score": {
"filter": {
"terms": {
"age": [ "12", "13", "14" ]
}
}
}
}
]
}
},
"aggs": {
"年龄分组": {
"terms": {
"field": "age"
},
"meta":{
"自定义字段":"自定义字段值"
},
"aggs": {
"取每组的平均值": {
"avg": {
"field": "age"
}
}
}
}
}
}
#结果
{
"took" : 343,
"timed_out" : false,
"_shards" : {
"total" : 20,
"successful" : 20,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"年龄分组" : {
"meta" : {
"自定义字段" : "自定义字段值"
},
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 12,
"doc_count" : 709319,
"取每组的平均值" : {
"value" : 12.0
}
},
{
"key" : 13,
"doc_count" : 708819,
"取每组的平均值" : {
"value" : 13.0
}
},
{
"key" : 14,
"doc_count" : 708080,
"取每组的平均值" : {
"value" : 14.0
}
}
]
}
}
}
11. 深度优先和广度优先
假设我们要按标签分组,每个标签一个桶,每个标签桶的doc数量排序后找出最大的两个最大的桶,然后再按照性别分组,求出每组的平均年龄。使用聚合是非常简单的:
GET person/_search?size=0
{
"aggs": {
"按照标签分组": {
"terms": {
"field": "tags",
"size": 2,
"order": {
"_key": "desc"
},
"collect_mode": "depth_first"
},
"aggs": {
"按照性别分组":{
"terms": {
"field": "sex",
"size": 2
},
"aggs": {
"平均年龄": {
"avg": {
"field": "age"
}
}
}
}
}
}
}
}
这看起来是一个简单的聚合查询,最终只返回 4条数据!但是, 这个看上去简单的查询可以轻而易举地消耗大量内存,我们可以通过在内存中构建一个树来查看这个 terms 聚合。
“按照标签分组”
的聚合会构建树的第一层,每个年龄都有一个桶。然后,内套在第一层的每个节点之下, “按照性别分组”
聚合会构建第二层,每个性别一个桶。这意味着每个年龄都会生成2个桶,如果有1W个标签的话,会生成2W和桶。
Elasticsearch 允许我们改变聚合的 集合模式 ,就是为了应对这种状况。 我们之前展示的策略叫做 **深度优先 **
,它是默认设置, 先构建完整的树,然后修剪无用节点。 深度优先 的方式对于大多数聚合都能正常工作,但对于这样例子的情形就不太适用。
为了应对这些特殊的应用场景,我们应该使用另一种集合策略叫做 **广度优先**
。这种策略的工作方式有些不同,它先执行第一层聚合, 再继续下一层聚合之前会先做修剪。
在我们的示例中, “按照标签分组”
聚合会首先执行,在这个时候,我们的树只有一层,但我们已经知道了前 2个最大的桶!这就没有必要保留其他的桶的信息,因为它们无论如何都不会出现在前2位中。因为我们已经知道了前2个最大的标签,我们可以安全的修剪其他节点。修剪后,下一层是基于它的 执行模式读入的,重复执行这个过程直到聚合完成。要使用广度优先,只需简单 的通过参数 collect_mode
开启:
GET person/_search?size=0
{
"aggs": {
"按照标签分组": {
"terms": {
"field": "tags",
"size": 2,
"order": {
"_key": "desc"
},
"collect_mode": "depth_first"
},
"aggs": {
"按照性别分组":{
"terms": {
"field": "sex",
"size": 2
},
"aggs": {
"平均年龄": {
"avg": {
"field": "age"
}
}
}
}
}
}
}
}
**深度优先**
是先把所有doc构建完整的terms树,再进行聚合等操作。**广度优先**
是先进行第一层数据筛选,筛选完成之后在进行第二层筛选,这样一层一层筛选下去的数据会越来越少,越来越快。collect_mode
默认是"depth_first"
广度优先仅仅适用于每个组的聚合数量远远小于当前总组数的情况下,因为广度优先会在内存中缓存裁剪后的仅仅需要缓存的每个组的所有数据,以便于它的子聚合分组查询可以复用上级聚合的数据。
广度优先的内存使用情况与裁剪后的缓存分组数据量是成线性的。对于很多聚合来说,每个桶内的文档数量是相当大的。
想象一种按月分组的直方图,总组数肯定是固定的,因为每年只有12个月,这个时候每个月下的数据量可能非常大。这使广度优先不是一个好的选择,这也是为什么深度优先作为默认策略的原因。
针对上面的例子,如果数据量越大,那么默认的使用深度优先的聚合模式生成的总分组数就会非常多,但是预估二级的聚合字段分组后的数据量相比总的分组数会小很多所以这种情况下使用广度优先的模式能大大节省内存,从而通过优化聚合模式来大大提高了在某些特定场景下聚合查询的成功率。