深入es查询

es主要分为两类查询,term查询和全文查询

term

term是表达语义的最小单位,在搜索的时候基本都要使用到term。 term查询的种类有:Term Query、Range Query等。
在ES中,Term查询不会对输入进行分词处理,将输入作为一个整体,在倒排索引中查找准确的词项。 我们也可以使用 Constant Score 将查询转换为一个filter,避免算分,利用缓存,提高查询的效 率。

term查询不会做任何的分词处理,match做分词处理后进行查询
term与terms,单条件和多条件

  1. GET /house-*/_search
  2. {
  3. "query": {
  4. "term": {
  5. "house_listing_time": {
  6. "value": 1
  7. }
  8. }
  9. }
  10. }
  11. GET /house-*/_search
  12. {
  13. "query": {
  14. "terms": {
  15. "FIELD": [
  16. 111,
  17. 222
  18. ]
  19. }
  20. }
  21. }

term查询按照时间进行排序

GET /house-*/_search
{
  "query": {
    "term": {
      "house_listing_time": {
        "value": 1
      }
    }
  },"sort": [
    {
      "@timestamp": {
        "order": "desc",
        "unmapped_type": "keyword"
      }
    }
  ]
}

constant_score ,不进行相关性算分,查询的数据进行缓存,提高效率 ,
constant_score 只能在term查询使用

GET /house-*/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "term": {
          "house_listing_time": "1"
        }
      }
    }
  }
}


image.png

全文查询

全文查询的种类有: Match Query、Match Phrase Query、Query String Query等
索引和搜索的时候都会进行分词,在查询的时候,会对输入进行分词,然后每个词项会逐个到底层进行 查询,将最终的结果进行合并
term查询不会对输入进行分词,全文查询会对输入进行分词

match_phrase 匹配一个短语,把数值作为一个整体,进行匹配

GET /house-*/_search
{
  "query": {
    "match_phrase": {
      "house_elevator": "供暖方式集中供暖"
    }
  }
}

整体匹配:例
image.png
image.png

match_all 全部查询,匹配所有,和GET /house-*/_search是一样的

GET /house-*/_search
{
  "query": {
    "match_all": {}
  }
}

GET /house-*/_search

multi_match 从多个字段中查询匹配的数值

GET /house-*/_search
{
  "query": {
    "multi_match": {
      "query": "北京",
      "fields": ["city","message"]   //从这两个字段中匹配包含北京的数据
    }
  }
}

image.png

query_string 查询,同时包含多个数值或只包含其中一个数值

city字段中包含 北京和天津的数据
GET /house-*/_search
{
  "query": {
    "query_string": {
      "default_field": "city",
      "query": "北京 AND 天津" //city字段中包含 北京和天津的数据
    }
  }
}

GET /house-*/_search
{
  "query": {
    "query_string": {
      "default_field": "city",
      "query": "北京 OR 天津" //city字段中包含 北京或者天津的数据
    }
  }
}
------------------------第二种写法---------------------------
GET /house-*/_search
{
  "query": {
    "query_string": {
      "default_field": "city",
      "query": "北京 天津",
      "default_operator": "OR"
    }
  }
}

image.png
image.png


查询语句示例

设置查询展示字段已经屏蔽字段


"_source": {
    "includes": [],
    "excludes": [
      "introduces",
      "imagelist",
      "tFilteritems",
      "creationtime",
      "tFilteritems"
    ]
  }

must和should条件并列,同时满足

{
  "from": 0,
  "size": 100,
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "should": [
              {
                "match": {
                  "names": "口罩"
                }
              },{
                "match": {
                  "brandname": "口罩"
                }
              }
            ]
          }
        },{
          "term": {
            "top": {
              "value": 0
            }
          }
        }
      ], 
      "adjust_pure_negative": true,
      "boost": 1
    }
  },
  "_source": {
    "includes": [],
    "excludes": [
      "introduces",
      "imagelist",
      "tFilteritems",
      "creationtime",
      "tFilteritems"
    ]
  }
}



SearchRequest searchRequest = new SearchRequest(indexName);
        BoolQueryBuilder builder = QueryBuilders.boolQuery();

        if (supplierid!=null){
            builder.must(QueryBuilders.termQuery("supplierid",supplierid));
        }

//        if ("".equals(hotSearch)){
//            searchSourceBuilder.query(QueryBuilders.matchQuery("extension_field",hotSearch));
//        }
        if (!"".equals(searchPhrase)&&searchPhrase!=null){
            builder.must(QueryBuilders.boolQuery()
                    .should(QueryBuilders.matchQuery("number",searchPhrase).boost(1).operator(Operator.AND))
                    .should(QueryBuilders.matchQuery("brandname",searchPhrase).boost(2).operator(Operator.AND))
                    .should(QueryBuilders.matchQuery("names",searchPhrase).boost(3).operator(Operator.AND))
            );

//            builder.should(QueryBuilders.matchQuery("number",searchPhrase)).boost(1)
//                    .should(QueryBuilders.matchQuery("brandname",searchPhrase)).boost(2)
//                    .should(QueryBuilders.matchQuery("names",searchPhrase)).boost(3);
        }
        if(twoid!=0&&twoid!=null){
            builder.must(QueryBuilders.termQuery("twoid",twoid));
        }
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        searchSourceBuilder.query(builder);
        searchSourceBuilder.size(size);
        searchSourceBuilder.from(from);
        searchSourceBuilder.fetchSource(new String[]{},new String[]{"introduces", "imagelist", "tFilteritems", "creationtime","tFilteritems"});
//        searchSourceBuilder.sort();
        if ("asc".equals(sort)){
            searchSourceBuilder.sort(sortField, SortOrder.ASC);
        }else if("desc".equals(sort)){
            searchSourceBuilder.sort(sortField, SortOrder.DESC);
        }else {
            searchSourceBuilder.sort("personSort",SortOrder.ASC);
        }

        searchRequest.source(searchSourceBuilder);
        SearchResponse searchResponse = null;
        try {
            searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
1. //配置字段值查找
2. GET /index/_search/
3. 
4. {
5. "query": {
6. "bool": {
7. "must": [{
8. "match": {
9. //字段key为IPV4_DST_ADDR,value为192.168.8.216
10. "IPV4_DST_ADDR": "192.168.8.216"
11.                 }
12.             }]
13.         }
14.     }
15. }
16. 
17. 
18. 
19.         {
20.             "query": {
21.                 "bool": {
22.                     "must": [{
23.                         "match": {
24.                             "IPV4_SRC_ADDR": "192.168.8.56"
25.                         }
26.                     }, {
27.                         "match": {
28.                             "L7_PROTO_NAME": "SSH"
29.                         }
30.                     }],
31.                     "must_not": [],
32.                     "should": []
33.                 }
34.             },
35.             "from": 0,
36.             "size": 50,
37.             "sort": [],
38.             "aggs": {}
39.         }


1. //范围查询
2. GET /syslog-20201222/_search/
3. {
4.   "query": {
5.     "range": {
6.       "timestamp": { //timestamp范围查询字段
7.         "gte": "2020-10-16T10:08:06+0800", //大于等于
8.         "lte": "2020-10-16T10:08:06+0800"  //小于等于
9.       }
10.     }
11.   }
12. }
1. //范围查询  (查询结果只显示"id","timesyamp","ipv4",)
2. GET /syslog-20201222/_search/
3. {
            "_source":["id","timesyamp","ipv4"],
4.   "query": {
5.     "range": {
6.       "timestamp": { //timestamp范围查询字段
7.         "gte": "2020-10-16T10:08:06+0800", //大于等于
8.         "lte": "2020-10-16T10:08:06+0800"  //小于等于
9.       }
10.     }
11.   }
12. }
1. //匹配条件and范围查找
2. {
3. "query": {
4. "bool": {
5. "must": [{
6. "match": {
7. "IPV4_DST_ADDR": "192.168.8.216"
8.                 }
9.             }, {
10. "range": {
11. "timestamp": {
12. "gte": "2020-10-17T10:08:06+0800",
13. "lte": "2020-10-18T10:08:06+0800"
14.                     }
15.                 }
16.             }]
17.         }
18. 
19.     }
20. }
1. 范围内,嵌套查询
2.         GET /syslog-20201225/_search/
3.      {      "size": 0,
4.             "query": {
5.                 "range": {
6.                     "timestamp": {
7.                         "lt": "2020-10-19T10:00:06+0800",
8.                         "gte":"2020-10-15T10:09:06+0800"
9.                     }
10.                 }
11.             },
12. 
13.             "aggs": {
14.                 "group_L7_PROTO_NAME": {
15.                     "terms": {
16.                         "field": "L7_PROTO_NAME"
17.                     },
18.                     "aggs": {
19.                         "IPV4_SRC_ADDR": {
20.                             "terms": {
21.                                 "field": "IPV4_SRC_ADDR"
22.                             },
23.                             "aggs": {
24.                                 "IPV4_DST_ADDR": {
25.                                     "terms": {
26.                                         "field": "IPV4_DST_ADDR"
27.                                     }
28.                                 }
29.                             }
30.                         }
31.                     }
32.                 }
33.             }
34.         }
1. es按字段进行统计并排序    
2.     GET /syslog-20201225/_search/
3.      {
4. 
5.             "query": {
6.                 "range": {
7.                     "timestamp": {
8.                         "lt": "2020-10-19T10:00:06+0800",
9.                         "gte":"2020-10-15T10:09:06+0800"
10.                     }
11.                 }
12.             },
13.             "size": 0,
14.             "aggs": {
15.                 "group_L7_PROTO_NAME": {
16.                     "terms": {
17.                         "field": "L7_PROTO_NAME"
18.                     }
19.                 }
20.             }
21. 
22.         }



多条件查询:es的request body中只能包含一个query,,当有多条件查询时,使用bool

GET /house-*/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "house_area": "崇文门"
          }

        },{
          "range": {
            "house_listing_time": {
              "gte": "2019/9/14",
              "lte": "2021/9/14"
            }
          }
        }
      ]
    }
  }
}


踩过的坑。

1.es查询索引可以使用通配符,索引按照一定规则建立,设置的是appname_ ,肯定是并发查询所有满足条件的索引
2. 可以索引采用通配符,查询语句中对利用时间范围来进行范围性查询
3.可以多索引查询:[list][
]首先你如果有时间周期的查询,可以将时间周期通过自己的程序转换为固定的索引,然后通过多索引appname_1,appname_2,appname_3/_search的方式查询