在 ES 中group by 称为分桶,桶聚合 bucketing
指标聚合
max、min、sum、avg
# size只返回聚合指标
POST /book/_search
{
"size": 0,
"aggs": {
"max_price": {
"max": {
"field": "price"
}
}
}
}
# avg
POST /book/_search
{
"size": 0,
"aggs": {
"max_price": {
"avg": {
"field": "price"
}
}
}
}
count
POST /book/_count
{
"query": {
"range": {
"price": {
"gt": 100
}
}
}
}
value_count 统计某字段有值的文档数
例如 统计某字段不为null 的数据量
POST /book/_search
{
"size": 0,
"aggs": {
"price_count": {
"value_count": {
"field": "price"
}
}
}
}
cardinality值去重计数 基数
Cardinality:意为集合的势,或者基数,是指不同数值的个数,类似SQL中的distinct count概念
GET /book/_search
{
"size": 0,
"aggs": {
"_id_count": {
"cardinality": {
"field": "_id"
}
},
"price_count":{
"cardinality": {
"field": "price"
}
}
}
}
**响应结果**
...
"aggregations" : {
"price_count" : {
"value" : 4
},
"_id_count" : {
"value" : 4
}
}
stats统计指标
stats会统计 min、max、avg、sum、count;extended_stats
会统计更多指标: 方差、平方和、标准差
POST /book/_search
{
"size": 0,
"aggs": {
"price_stats": {
"stats": {
"field": "price"
}
}
}
}
**响应结果**
"aggregations" : {
"price_stats" : {
"count" : 4,
"min" : 100.44999694824219,
"max" : 999.989990234375,
"avg" : 455.4699993133545,
"sum" : 1821.879997253418
}
}
桶聚合
类似sql的group by
POST /book/_search
{
"size": 0,
"aggs": {
"group_by_price": {
"range": {
"field": "price",
"ranges": [
{
"from": 0,
"to": 200
},{
"from": 200,
"to": 400
},
{
"from": 400,
"to": 1000
}
]
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
查询数量
POST /book/_search
{
"size": 0,
"aggs": {
"price_count": {
"value_count": {
"field": "price"
}
}
}
}
组合
sql语句类似 select avg(price),count(price) from table group by price
POST /book/_search
{
"size": 0,
"aggs": {
"group_by_price": {
"range": {
"field": "price",
"ranges": [
{
"from": 0,
"to": 200
},
{
"from": 200,
"to": 400
},
{
"from": 400,
"to": 1000
}
]
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
},
"count_price": {
"value_count": {
"field": "price"
}
}
}
}
}
}
实现having效果
在 aggs中用having来实现sql中的having中的效果
POST /book/_search
{
"size": 0,
"aggs": {
"group_by_price": {
"range": {
"field": "price",
"ranges": [
{
"from": 0,
"to": 200
},
{
"from": 200,
"to": 400
},
{
"from": 400,
"to": 1000
}
]
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
},
"count_price": {
"value_count": {
"field": "price"
}
},
"having": {
"bucket_selector": {
"buckets_path": {
"avg_price_result" :"avg_price"
},
"script": "params.avg_price_result >= 200"
}
}
}
}
}
}
terms 聚合
搜索address中包含mill的所有人的年龄分布以及平均年龄,但不显示这些人的详情
GET bank/_search
{
"query": { # 查询出包含mill的
"match": {
"address": "Mill"
}
},
"aggs": { #基于查询聚合
"ageAgg": { # 聚合的名字,随便起
"terms": { # 看值的可能性分布
"field": "age",
"size": 10
}
},
"ageAvg": {
"avg": { # 看age值的平均
"field": "age"
}
},
"balanceAvg": {
"avg": { # 看balance的平均
"field": "balance"
}
}
},
"size": 0 # 不看详情
}
按照年龄聚合,并且求这些年龄段的这些人的平均薪资
GET bank/_search
{
"query": {
"match_all": {}
},
"aggs": {
"ageAgg": {
"terms": { # 看分布
"field": "age",
"size": 100
},
"aggs": { # 与terms并列
"ageAvg": { #平均
"avg": {
"field": "balance"
}
}
}
}
},
"size": 0
}
复杂子聚合:查出所有年龄分布,并且这些年龄段中M的平均薪资和F的平均薪资以及这个年龄段的总体平均薪资
GET bank/_search
{
"query": {
"match_all": {}
},
"aggs": {
"ageAgg": {
"terms": { # 看age分布
"field": "age",
"size": 100
},
"aggs": { # 子聚合
"genderAgg": {
"terms": { # 看gender分布
"field": "gender.keyword" # 注意这里,文本字段应该用.keyword
},
"aggs": { # 子聚合
"balanceAvg": {
"avg": { # 男性的平均
"field": "balance"
}
}
}
},
"ageBalanceAvg": {
"avg": { #age分布的平均(男女)
"field": "balance"
}
}
}
}
},
"size": 0
}