1、 Es基本指令

  1. index,document 索引、类型
  1. #POST 提交的时候会检查数据对比,如果一直_seq_num 和version 都不会变 需要带上_update
  2. #如果不带update会导致version和如果一直_seq_num都加1
  3. POST /customer/external/1/_update
  4. {
  5. "doc":{
  6. "name":"xuwei"
  7. }
  8. }
  9. #PUT 会直接替换
  10. PUT /customer/external/1/
  11. {
  12. "name": "xuwei"
  13. }

2、导入批量数据

  1. POST /customer/external/_bulk
  2. #https://github.com/elastic/elasticsearch/blob/master/docs/src/test/resources/accounts.json
  3. #https://segmentfault.com/a/1190000037572389

3、 请求的两种方式

  1. 1.Request Api URl
  2. 2.Query DSL (常用)
  3. GET /bank/_search
  4. {
  5. "query": {
  6. "match_all": {}
  7. },
  8. "sort": [
  9. {
  10. "account_number": {
  11. "order": "desc"
  12. }
  13. ,
  14. "balance": {
  15. "order": "asc"
  16. }
  17. }
  18. ],
  19. "_source": ["balance","firstname"] #不使用默认是select *
  20. }

3.1 Query

1、 match

  1. #match_phrase精准匹配 match会分词匹配
  2. GET bank/_search
  3. {
  4. "query": {
  5. "match_phrase": {
  6. "address": "kings Hwy"
  7. }
  8. }
  9. }
  10. #multi_match 多熟悉查询包含 也支持分词
  11. GET bank/_search
  12. {
  13. "query": {
  14. "multi_match": {
  15. "query": "mill Movico",
  16. "fields": ["address","city"]
  17. }
  18. }
  19. }

2、 符合查询 Bool

  1. //Must
  2. GET bank/_search
  3. {
  4. "query": {
  5. "bool": {
  6. "must": [
  7. {
  8. "match": {
  9. "gender": "F"
  10. }
  11. },
  12. {
  13. "match": {
  14. "address": "mill"
  15. }
  16. }
  17. ]
  18. }
  19. }
  20. }
  21. //must_not
  22. GET bank/_search
  23. {
  24. "query": {
  25. "bool": {
  26. "must": [
  27. {
  28. "match": {
  29. "gender": "F"
  30. }
  31. },
  32. {
  33. "match": {
  34. "address": "mill Street"
  35. }
  36. }
  37. ],
  38. "must_not": [
  39. {
  40. "match": {
  41. "age": "32"
  42. }
  43. }
  44. ]
  45. }
  46. }
  47. }
  48. //should 对得分影响
  49. GET bank/_search
  50. {
  51. "query": {
  52. "bool": {
  53. "must": [
  54. {
  55. "match": {
  56. "gender": "F"
  57. }
  58. },
  59. {
  60. "match": {
  61. "address": "mill Street"
  62. }
  63. }
  64. ],
  65. "should": [
  66. {
  67. "match": {
  68. "lastname": "Bates"
  69. }
  70. }
  71. ]
  72. }
  73. }
  74. }
  75. // must 会提供相关性得分 filter不会提供相关性得分
  76. GET bank/_search
  77. {
  78. "query": {
  79. "bool": {
  80. "filter": [
  81. {
  82. "range": {
  83. "age": {
  84. "gte": 10,
  85. "lte": 30
  86. }
  87. }
  88. }
  89. ]
  90. }
  91. }
  92. }
  93. GET bank/_search
  94. {
  95. "query": {
  96. "bool": {
  97. "must": [
  98. {
  99. "range": {
  100. "age": {
  101. "gte": 10,
  102. "lte": 30
  103. }
  104. }
  105. }
  106. ]
  107. }
  108. }
  109. }
  110. //term 推荐搜索准确的值对text不要使用 match_pahse也可以精确查询,可以短语匹配
  111. #全文检索用match(match下有keyword) text检索用term
  112. GET /bank/_search
  113. {
  114. "query": {
  115. "term": {
  116. "age": {
  117. "value": "28"
  118. }
  119. }
  120. }
  121. }
  122. //精确匹配 keyword
  123. GET /bank/_search
  124. {
  125. "query": {
  126. "match": {
  127. "address.keyword": "302 Howard Place"
  128. }
  129. }
  130. }

2、数据分析分组(aggs)

  1. //搜索address中所有mill的年龄分布和平均年龄 子聚合
  2. GET bank/_search
  3. {
  4. "query": {
  5. "match_all": {
  6. }
  7. },
  8. "aggs": {
  9. "ageFAgg": {
  10. "terms": {
  11. "field": "age",
  12. "size": 10
  13. },
  14. "aggs": {
  15. "ageAgg": {
  16. "avg": {
  17. "field": "age"
  18. }
  19. }
  20. }
  21. },
  22. "banlanceAgg":{
  23. "avg": {
  24. "field": "balance"
  25. }
  26. }
  27. }
  28. }
  29. #搜索所有年龄分布下的性别分布的平均年龄 如:M Count: 24 avg:32,F Count: 24 avg:32
  30. GET bank/_search
  31. {
  32. "query": {
  33. "match_all": {}
  34. },
  35. "aggs": {
  36. "ageFAgg": {
  37. "terms": {
  38. "field": "age",
  39. "size": 10
  40. },
  41. "aggs": {
  42. "genderAgg": {
  43. "terms": {
  44. "field": "gender.keyword"
  45. },
  46. "aggs": {
  47. "avg2Agg": {
  48. "avg": {
  49. "field": "age"
  50. }
  51. }
  52. }
  53. }
  54. }
  55. }
  56. }
  57. }

3、属性映射

  1. #创建所以制定映射
  2. PUT /my_index
  3. {
  4. "mappings": {
  5. "properties": {
  6. "age":{"type": "integer"}, //普通类型
  7. "email":{"type": "keyword"},//全词精确查找
  8. "name":{"type": "text"}//自动分词检索
  9. }
  10. }
  11. }
  12. #修改映射 可以添加属性
  13. PUT /my_index/_mapping
  14. {
  15. "properties": {
  16. "employee-id": {
  17. "type": "keyword",
  18. "index": false, //不需要索引 默认是true 可以被索引
  19. "doc_values": false //不会被用作排序啥的 冗余处处、存储可以使用这两个字段
  20. }
  21. }
  22. }

4、数据迁移/映射修改

  1. #新映射规则
  2. PUT newbank
  3. {
  4. "mappings": {
  5. "properties": {
  6. "account_number": {
  7. "type": "long"
  8. },
  9. "address": {
  10. "type": "text"
  11. },
  12. "age": {
  13. "type": "long"
  14. },
  15. "balance": {
  16. "type": "long"
  17. },
  18. "city": {
  19. "type": "text"
  20. },
  21. "email": {
  22. "type": "keyword"
  23. },
  24. "employer": {
  25. "type": "keyword"
  26. },
  27. "firstname": {
  28. "type": "text"
  29. },
  30. "gender": {
  31. "type": "text"
  32. },
  33. "lastname": {
  34. "type": "text",
  35. "fields": {
  36. "keyword": {
  37. "type": "keyword",
  38. "ignore_above": 256
  39. }
  40. }
  41. },
  42. "state": {
  43. "type": "keyword"
  44. }
  45. }
  46. }
  47. }
  48. #迁移 index/type/doc
  49. POST _reindex
  50. {
  51. "source": {
  52. "index": "bank",
  53. "type": "count" #如果是存在type的源数据 加上type ,否则不需要加
  54. },
  55. "dest": {
  56. "index": "newbank"
  57. }
  58. }

4、分词器

  1. #安装 就是下载一个压缩包解压到plugins
  2. chmod +R 777 /mydata/elasticsearch/plugins/ik
  3. cd /mydata/elasticsearch/plugins/ik
  4. wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.6.2/elasticsearch-analysis-ik-7.6.2.zip
  5. unzip elasticsearch-analysis-ik-7.6.2.zip
  6. #使用 支持中文分词
  7. POST _analyze
  8. {
  9. "analyzer": "ik_smart",
  10. "text": "我是中国人"
  11. }
  12. POST _analyze
  13. {
  14. "analyzer": "ik_max_word",
  15. "text": "我是中国人"
  16. }
  17. #建立索引的时候制定默认的IK
  18. PUT /my_idnex
  19. {
  20. }

5、 RestHighLevelClient

  1. # es config
  2. @Configuration
  3. public class EsConfig {
  4. public static final RequestOptions COMMON_OPTIONS;
  5. static {
  6. RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
  7. //后期需要加权限认证等
  8. // builder.addHeader("Authorization", "Bearer " + TOKEN);
  9. // builder.setHttpAsyncResponseConsumerFactory(
  10. // new HttpAsyncResponseConsumerFactory
  11. // .HeapBufferedResponseConsumerFactory(30 * 1024 * 1024 * 1024));
  12. COMMON_OPTIONS = builder.build();
  13. }
  14. @Bean
  15. public RestHighLevelClient restHighLevelClient() {
  16. RestHighLevelClient restHighLevelClient = new RestHighLevelClient(RestClient.builder
  17. (new HttpHost("t.freefish.info", 9200, "http")));
  18. return restHighLevelClient;
  19. }
  20. }
  1. DSL查询语句
  2. GET bank/_search
  3. {
  4. "query": {
  5. "match": {
  6. "address": "mill"
  7. }
  8. },
  9. "aggs": {
  10. "ageAgg": {
  11. "terms": {
  12. "field": "age",
  13. "size": 10
  14. },
  15. "aggs": {
  16. "avgAgg": {
  17. "avg": {
  18. "field": "age"
  19. }
  20. },
  21. "balanceAgg": {
  22. "avg": {
  23. "field": "balance"
  24. }
  25. }
  26. }
  27. }
  28. }
  29. }
  30. DSL直接查询数据
  31. {
  32. "took" : 1,
  33. "timed_out" : false,
  34. "_shards" : {
  35. "total" : 1,
  36. "successful" : 1,
  37. "skipped" : 0,
  38. "failed" : 0
  39. },
  40. "hits" : {
  41. "total" : {
  42. "value" : 4,
  43. "relation" : "eq"
  44. },
  45. "max_score" : 5.4032025,
  46. "hits" : [
  47. {
  48. "_index" : "bank",
  49. "_type" : "count",
  50. "_id" : "970",
  51. "_score" : 5.4032025,
  52. "_source" : {
  53. "account_number" : 970,
  54. "balance" : 19648,
  55. "firstname" : "Forbes",
  56. "lastname" : "Wallace",
  57. "age" : 28,
  58. "gender" : "M",
  59. "address" : "990 Mill Road",
  60. "employer" : "Pheast",
  61. "email" : "forbeswallace@pheast.com",
  62. "city" : "Lopezo",
  63. "state" : "AK"
  64. }
  65. },
  66. {
  67. "_index" : "bank",
  68. "_type" : "count",
  69. "_id" : "136",
  70. "_score" : 5.4032025,
  71. "_source" : {
  72. "account_number" : 136,
  73. "balance" : 45801,
  74. "firstname" : "Winnie",
  75. "lastname" : "Holland",
  76. "age" : 38,
  77. "gender" : "M",
  78. "address" : "198 Mill Lane",
  79. "employer" : "Neteria",
  80. "email" : "winnieholland@neteria.com",
  81. "city" : "Urie",
  82. "state" : "IL"
  83. }
  84. },
  85. {
  86. "_index" : "bank",
  87. "_type" : "count",
  88. "_id" : "345",
  89. "_score" : 5.4032025,
  90. "_source" : {
  91. "account_number" : 345,
  92. "balance" : 9812,
  93. "firstname" : "Parker",
  94. "lastname" : "Hines",
  95. "age" : 38,
  96. "gender" : "M",
  97. "address" : "715 Mill Avenue",
  98. "employer" : "Baluba",
  99. "email" : "parkerhines@baluba.com",
  100. "city" : "Blackgum",
  101. "state" : "KY"
  102. }
  103. },
  104. {
  105. "_index" : "bank",
  106. "_type" : "count",
  107. "_id" : "472",
  108. "_score" : 5.4032025,
  109. "_source" : {
  110. "account_number" : 472,
  111. "balance" : 25571,
  112. "firstname" : "Lee",
  113. "lastname" : "Long",
  114. "age" : 32,
  115. "gender" : "F",
  116. "address" : "288 Mill Street",
  117. "employer" : "Comverges",
  118. "email" : "leelong@comverges.com",
  119. "city" : "Movico",
  120. "state" : "MT"
  121. }
  122. }
  123. ]
  124. },
  125. "aggregations" : {
  126. "ageAgg" : {
  127. "doc_count_error_upper_bound" : 0,
  128. "sum_other_doc_count" : 0,
  129. "buckets" : [
  130. {
  131. "key" : 38,
  132. "doc_count" : 2,
  133. "balanceAgg" : {
  134. "value" : 27806.5
  135. },
  136. "avgAgg" : {
  137. "value" : 38.0
  138. }
  139. },
  140. {
  141. "key" : 28,
  142. "doc_count" : 1,
  143. "balanceAgg" : {
  144. "value" : 19648.0
  145. },
  146. "avgAgg" : {
  147. "value" : 28.0
  148. }
  149. },
  150. {
  151. "key" : 32,
  152. "doc_count" : 1,
  153. "balanceAgg" : {
  154. "value" : 25571.0
  155. },
  156. "avgAgg" : {
  157. "value" : 32.0
  158. }
  159. }
  160. ]
  161. }
  162. }
  163. }
  164. //API
  165. @Test
  166. public void SearchData() throws IOException {
  167. SearchRequest search = new SearchRequest("bank");
  168. //知道DSL
  169. SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
  170. SearchSourceBuilder query = searchSourceBuilder.query(QueryBuilders.matchQuery("address", "mill"));
  171. //年龄分布聚合
  172. TermsAggregationBuilder ageAgg = AggregationBuilders.terms("ageAgg").field("age").size(5);
  173. //年龄分布子聚合 avgAgg
  174. ageAgg.subAggregation(AggregationBuilders.avg("avgAgg").field("age"));
  175. //年龄分布子聚合 balanceAgg
  176. ageAgg.subAggregation(AggregationBuilders.avg("balanceAgg").field("balance"));
  177. query.aggregation(ageAgg);
  178. search.source(searchSourceBuilder);
  179. log.info("检索条件{}", query);
  180. SearchResponse ret = restHighLevelClient.search(search, RequestOptions.DEFAULT);
  181. log.info("检索结果{}", ret);
  182. SearchHit[] hits = ret.getHits().getHits();
  183. for (int i = 0; i < hits.length; i++) {
  184. System.out.println(JSON.parseObject(hits[i].getSourceAsString(), Acount.class));
  185. }
  186. //获取聚合信息
  187. Aggregations aggregations = ret.getAggregations();
  188. Terms ageAgg_ret = aggregations.get("ageAgg");
  189. //获取buckets
  190. List<? extends Terms.Bucket> buckets = ageAgg_ret.getBuckets();
  191. //每个buckets包含agg信息
  192. buckets.forEach(p -> {
  193. System.out.println("bucket:" + p.getKeyAsString());
  194. Aggregations aggs = p.getAggregations();
  195. Avg avgAgg = aggs.get("avgAgg");
  196. Avg balanceAgg = aggs.get("balanceAgg");
  197. System.out.println("avgAgg.getValue()" + avgAgg.getValue());
  198. System.out.println("balanceAgg.getValue()" + balanceAgg.getValue());
  199. });
  200. }

6、数组扁平处理

  1. #默认Es会扁平化处理数据 nested 嵌入式的
  2. PUT my_index/my_type/1
  3. {
  4. "group" : "fans",
  5. "user" : [
  6. {
  7. "first" : "John",
  8. "last" : "Smith"
  9. },
  10. {
  11. "first" : "Alice",
  12. "last" : "White"
  13. }
  14. ]
  15. }
  16. #实际存储结构为
  17. {
  18. "group" : "fans",
  19. "user.first" : [ "alice", "john" ],
  20. "user.last" : [ "smith", "white" ]
  21. }
  22. #查询 不应该查到的数据被查到了
  23. GET my_index/_search
  24. {
  25. "query": {
  26. "bool": {
  27. "must": [
  28. {
  29. "match": {
  30. "user.first": "Alice"
  31. }
  32. },
  33. {
  34. "match": {
  35. "user.last": "Smith"
  36. }
  37. }
  38. ]
  39. }
  40. }
  41. }

6、正确的方式

  1. //创建nested索引
  2. PUT my_index
  3. {
  4. "mappings": {
  5. "properties": {
  6. "user":{
  7. "type": "nested"
  8. }
  9. }
  10. }
  11. }
  12. //存入数据
  13. PUT my_index/my_type/1
  14. {
  15. "group" : "fans",
  16. "user" : [
  17. {
  18. "first" : "John",
  19. "last" : "Smith"
  20. },
  21. {
  22. "first" : "Alice",
  23. "last" : "White"
  24. }
  25. ]
  26. }
  27. //此时正确不可查询到数据
  28. GET my_index/_search
  29. {
  30. "query": {
  31. "bool": {
  32. "must": [
  33. {
  34. "match": {
  35. "user.first": "Alice"
  36. }
  37. },
  38. {
  39. "match": {
  40. "user.last": "Smith"
  41. }
  42. }
  43. ]
  44. }
  45. }
  46. }