1、 Es基本指令
index,document 索引、类型
#POST 提交的时候会检查数据对比,如果一直_seq_num 和version 都不会变 需要带上_update
#如果不带update会导致version和如果一直_seq_num都加1
POST /customer/external/1/_update
{
"doc":{
"name":"xuwei"
}
}
#PUT 会直接替换
PUT /customer/external/1/
{
"name": "xuwei"
}
2、导入批量数据
POST /customer/external/_bulk
#https://github.com/elastic/elasticsearch/blob/master/docs/src/test/resources/accounts.json
#https://segmentfault.com/a/1190000037572389
3、 请求的两种方式
1.Request Api URl
2.Query DSL (常用)
GET /bank/_search
{
"query": {
"match_all": {}
},
"sort": [
{
"account_number": {
"order": "desc"
}
,
"balance": {
"order": "asc"
}
}
],
"_source": ["balance","firstname"] #不使用默认是select *
}
3.1 Query
1、 match
#match_phrase精准匹配 match会分词匹配
GET bank/_search
{
"query": {
"match_phrase": {
"address": "kings Hwy"
}
}
}
#multi_match 多熟悉查询包含 也支持分词
GET bank/_search
{
"query": {
"multi_match": {
"query": "mill Movico",
"fields": ["address","city"]
}
}
}
2、 符合查询 Bool
//Must
GET bank/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"gender": "F"
}
},
{
"match": {
"address": "mill"
}
}
]
}
}
}
//must_not
GET bank/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"gender": "F"
}
},
{
"match": {
"address": "mill Street"
}
}
],
"must_not": [
{
"match": {
"age": "32"
}
}
]
}
}
}
//should 对得分影响
GET bank/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"gender": "F"
}
},
{
"match": {
"address": "mill Street"
}
}
],
"should": [
{
"match": {
"lastname": "Bates"
}
}
]
}
}
}
// must 会提供相关性得分 filter不会提供相关性得分
GET bank/_search
{
"query": {
"bool": {
"filter": [
{
"range": {
"age": {
"gte": 10,
"lte": 30
}
}
}
]
}
}
}
GET bank/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"age": {
"gte": 10,
"lte": 30
}
}
}
]
}
}
}
//term 推荐搜索准确的值对text不要使用 match_pahse也可以精确查询,可以短语匹配
#全文检索用match(match下有keyword) 非text检索用term
GET /bank/_search
{
"query": {
"term": {
"age": {
"value": "28"
}
}
}
}
//精确匹配 keyword
GET /bank/_search
{
"query": {
"match": {
"address.keyword": "302 Howard Place"
}
}
}
2、数据分析分组(aggs)
//搜索address中所有mill的年龄分布和平均年龄 子聚合
GET bank/_search
{
"query": {
"match_all": {
}
},
"aggs": {
"ageFAgg": {
"terms": {
"field": "age",
"size": 10
},
"aggs": {
"ageAgg": {
"avg": {
"field": "age"
}
}
}
},
"banlanceAgg":{
"avg": {
"field": "balance"
}
}
}
}
#搜索所有年龄分布下的性别分布的平均年龄 如:M Count: 24 avg:32,F Count: 24 avg:32
GET bank/_search
{
"query": {
"match_all": {}
},
"aggs": {
"ageFAgg": {
"terms": {
"field": "age",
"size": 10
},
"aggs": {
"genderAgg": {
"terms": {
"field": "gender.keyword"
},
"aggs": {
"avg2Agg": {
"avg": {
"field": "age"
}
}
}
}
}
}
}
}
3、属性映射
#创建所以制定映射
PUT /my_index
{
"mappings": {
"properties": {
"age":{"type": "integer"}, //普通类型
"email":{"type": "keyword"},//全词精确查找
"name":{"type": "text"}//自动分词检索
}
}
}
#修改映射 可以添加属性
PUT /my_index/_mapping
{
"properties": {
"employee-id": {
"type": "keyword",
"index": false, //不需要索引 默认是true 可以被索引
"doc_values": false //不会被用作排序啥的 冗余处处、存储可以使用这两个字段
}
}
}
4、数据迁移/映射修改
#新映射规则
PUT newbank
{
"mappings": {
"properties": {
"account_number": {
"type": "long"
},
"address": {
"type": "text"
},
"age": {
"type": "long"
},
"balance": {
"type": "long"
},
"city": {
"type": "text"
},
"email": {
"type": "keyword"
},
"employer": {
"type": "keyword"
},
"firstname": {
"type": "text"
},
"gender": {
"type": "text"
},
"lastname": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"state": {
"type": "keyword"
}
}
}
}
#迁移 index/type/doc
POST _reindex
{
"source": {
"index": "bank",
"type": "count" #如果是存在type的源数据 加上type ,否则不需要加
},
"dest": {
"index": "newbank"
}
}
4、分词器
#安装 就是下载一个压缩包解压到plugins
chmod +R 777 /mydata/elasticsearch/plugins/ik
cd /mydata/elasticsearch/plugins/ik
wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.6.2/elasticsearch-analysis-ik-7.6.2.zip
unzip elasticsearch-analysis-ik-7.6.2.zip
#使用 支持中文分词
POST _analyze
{
"analyzer": "ik_smart",
"text": "我是中国人"
}
POST _analyze
{
"analyzer": "ik_max_word",
"text": "我是中国人"
}
#建立索引的时候制定默认的IK
PUT /my_idnex
{
}
5、 RestHighLevelClient
# es config
@Configuration
public class EsConfig {
public static final RequestOptions COMMON_OPTIONS;
static {
RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
//后期需要加权限认证等
// builder.addHeader("Authorization", "Bearer " + TOKEN);
// builder.setHttpAsyncResponseConsumerFactory(
// new HttpAsyncResponseConsumerFactory
// .HeapBufferedResponseConsumerFactory(30 * 1024 * 1024 * 1024));
COMMON_OPTIONS = builder.build();
}
@Bean
public RestHighLevelClient restHighLevelClient() {
RestHighLevelClient restHighLevelClient = new RestHighLevelClient(RestClient.builder
(new HttpHost("t.freefish.info", 9200, "http")));
return restHighLevelClient;
}
}
DSL查询语句
GET bank/_search
{
"query": {
"match": {
"address": "mill"
}
},
"aggs": {
"ageAgg": {
"terms": {
"field": "age",
"size": 10
},
"aggs": {
"avgAgg": {
"avg": {
"field": "age"
}
},
"balanceAgg": {
"avg": {
"field": "balance"
}
}
}
}
}
}
DSL直接查询数据
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 5.4032025,
"hits" : [
{
"_index" : "bank",
"_type" : "count",
"_id" : "970",
"_score" : 5.4032025,
"_source" : {
"account_number" : 970,
"balance" : 19648,
"firstname" : "Forbes",
"lastname" : "Wallace",
"age" : 28,
"gender" : "M",
"address" : "990 Mill Road",
"employer" : "Pheast",
"email" : "forbeswallace@pheast.com",
"city" : "Lopezo",
"state" : "AK"
}
},
{
"_index" : "bank",
"_type" : "count",
"_id" : "136",
"_score" : 5.4032025,
"_source" : {
"account_number" : 136,
"balance" : 45801,
"firstname" : "Winnie",
"lastname" : "Holland",
"age" : 38,
"gender" : "M",
"address" : "198 Mill Lane",
"employer" : "Neteria",
"email" : "winnieholland@neteria.com",
"city" : "Urie",
"state" : "IL"
}
},
{
"_index" : "bank",
"_type" : "count",
"_id" : "345",
"_score" : 5.4032025,
"_source" : {
"account_number" : 345,
"balance" : 9812,
"firstname" : "Parker",
"lastname" : "Hines",
"age" : 38,
"gender" : "M",
"address" : "715 Mill Avenue",
"employer" : "Baluba",
"email" : "parkerhines@baluba.com",
"city" : "Blackgum",
"state" : "KY"
}
},
{
"_index" : "bank",
"_type" : "count",
"_id" : "472",
"_score" : 5.4032025,
"_source" : {
"account_number" : 472,
"balance" : 25571,
"firstname" : "Lee",
"lastname" : "Long",
"age" : 32,
"gender" : "F",
"address" : "288 Mill Street",
"employer" : "Comverges",
"email" : "leelong@comverges.com",
"city" : "Movico",
"state" : "MT"
}
}
]
},
"aggregations" : {
"ageAgg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 38,
"doc_count" : 2,
"balanceAgg" : {
"value" : 27806.5
},
"avgAgg" : {
"value" : 38.0
}
},
{
"key" : 28,
"doc_count" : 1,
"balanceAgg" : {
"value" : 19648.0
},
"avgAgg" : {
"value" : 28.0
}
},
{
"key" : 32,
"doc_count" : 1,
"balanceAgg" : {
"value" : 25571.0
},
"avgAgg" : {
"value" : 32.0
}
}
]
}
}
}
//API
@Test
public void SearchData() throws IOException {
SearchRequest search = new SearchRequest("bank");
//知道DSL
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
SearchSourceBuilder query = searchSourceBuilder.query(QueryBuilders.matchQuery("address", "mill"));
//年龄分布聚合
TermsAggregationBuilder ageAgg = AggregationBuilders.terms("ageAgg").field("age").size(5);
//年龄分布子聚合 avgAgg
ageAgg.subAggregation(AggregationBuilders.avg("avgAgg").field("age"));
//年龄分布子聚合 balanceAgg
ageAgg.subAggregation(AggregationBuilders.avg("balanceAgg").field("balance"));
query.aggregation(ageAgg);
search.source(searchSourceBuilder);
log.info("检索条件{}", query);
SearchResponse ret = restHighLevelClient.search(search, RequestOptions.DEFAULT);
log.info("检索结果{}", ret);
SearchHit[] hits = ret.getHits().getHits();
for (int i = 0; i < hits.length; i++) {
System.out.println(JSON.parseObject(hits[i].getSourceAsString(), Acount.class));
}
//获取聚合信息
Aggregations aggregations = ret.getAggregations();
Terms ageAgg_ret = aggregations.get("ageAgg");
//获取buckets
List<? extends Terms.Bucket> buckets = ageAgg_ret.getBuckets();
//每个buckets包含agg信息
buckets.forEach(p -> {
System.out.println("bucket:" + p.getKeyAsString());
Aggregations aggs = p.getAggregations();
Avg avgAgg = aggs.get("avgAgg");
Avg balanceAgg = aggs.get("balanceAgg");
System.out.println("avgAgg.getValue()" + avgAgg.getValue());
System.out.println("balanceAgg.getValue()" + balanceAgg.getValue());
});
}
6、数组扁平处理
#默认Es会扁平化处理数据 nested 嵌入式的
PUT my_index/my_type/1
{
"group" : "fans",
"user" : [
{
"first" : "John",
"last" : "Smith"
},
{
"first" : "Alice",
"last" : "White"
}
]
}
#实际存储结构为
{
"group" : "fans",
"user.first" : [ "alice", "john" ],
"user.last" : [ "smith", "white" ]
}
#查询 不应该查到的数据被查到了
GET my_index/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"user.first": "Alice"
}
},
{
"match": {
"user.last": "Smith"
}
}
]
}
}
}
6、正确的方式
//创建nested索引
PUT my_index
{
"mappings": {
"properties": {
"user":{
"type": "nested"
}
}
}
}
//存入数据
PUT my_index/my_type/1
{
"group" : "fans",
"user" : [
{
"first" : "John",
"last" : "Smith"
},
{
"first" : "Alice",
"last" : "White"
}
]
}
//此时正确不可查询到数据
GET my_index/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"user.first": "Alice"
}
},
{
"match": {
"user.last": "Smith"
}
}
]
}
}
}