数据类型

Text: 被 Analyzer 索引的字符串类型
KeyWord: 不能被 Analyzer、只能被精确匹配的字符串类型
Date: 日期类型，配合 format
数字类型: long integer short doubel .etc
boolean: true false
Array: 数组
Object: json 嵌套
IP类型
Geo_point: 地理位置

语法

添加

控制分片

// 控制分片
put /employee
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  }
}

es 集群状态主要看从分片能不能存在在 n-1 个节点上

暂时存储

GET /_all
delete /employee
# 控制分片
put /employee
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  }
}
# 非结构化方式新建索引
# _doc 为 type，但是仅仅作为占位符
PUT /employee/_doc/1
{
  "name": "akarin2",
  "age": 20
}
GET /employee/_doc/1
# 会直接覆盖上面得内容，即默认需要全量更新
PUT /employee/_doc/1
{
  "name": "akarin2"
}
# 获取索引记录
GET /employee/_doc/1
# 指定某一字段更新
POST /employee/_update/1
{
  "doc":{
    "name": "fuck"
  }
}
# 强制指定创建，如果存在，则失败
POST /employee/_create/1
{
  "name": "lee",
  "age": 30
}
# 删除某个文档
DELETE /employee/_doc/1
# 查询全部文档
GET /employee/_search
#/
DELETE /employee
# 使用结构化的方式创建 索引
# 设置 mapping 的结构
PUT /employee
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas" : 0
  },
  "mappings":{
    "properties": {
      "name": {
        "type": "text"
      },
      "age":{
        "type": "integer"
      }
    }
  }
}
# 此时进行put 操作
PUT /employee/_doc/1
{
  "name": "baba"
}
# 虽然显示缺少了 age ，但是 /employee 的 mapping 还是保持原结构
# 但是如果 put 了新的不冲突字段，mapping 还是会跟着改
GET /employee/_doc/1
PUT /employee/_doc/1
{
  "name": "demo 1",
  "age": 123
}
PUT /employee/_doc/2
{
  "name": "demo 2",
  "age": 1234
}
# 不带条件查询所有记录
GET /employee/_search
{
  "query": {
    "match_all": {}
  }
}
# 分页查询
# from 从第几个索引开始（索引从0开始）
# size 要几条记录
GET /employee/_search 
{
  "query": {
    "match_all": {}
  },
  "from": 0,
  "size": 1
}
# 条件查询
# 带关键字查询, 只能单一字段
GET /employee/_search
{
  "query": {
    "match": {
      "name": "demo"
    }
  }
}
# 带排序的查询
# 此时返回 _score 为 null，评分标准是 sort字段
GET /employee/_search 
{
  "query": {
     "match": {
       "name": "demo"
      }
    },
    "sort":{
      "age":{
        "order": "asc"
      }
    }
  }
}
# filter
# 作用在 bool 下
# filter 类似 relation db 中的 where
# 打分均为0.0
# term 和 match 区别是后者带有分词 analize, term 是纯粹的 equals
GET /employee/_search
{
  "query": {
    "bool":{
      "filter" :{
        "term": {
          "name": "demo"
        }
      }
    }
  }
}
# 带聚合的查询
# 对某个字段进行聚合
# 返回的聚合字段中，key 就是对应 field 的值
GET /employee/_search 
{
  "query": {
    "match": {
      "name": "demo"
    }
  },
  "aggs":{
    "随便起个名称":{
      "terms": {
        "field": "age"
      }
    }
  }
}
#
# 分词
#
PUT /movie/_doc/1 
{
  "name": "eating a apple"
}
# 查看分词状态
# 根据对应 field 的 analyzer 对 text 进行分词
# 默认是标准字符处理，以空格和标点符号分割内容
GET /movie/_analyze
{
  "field": "name",
  "text": ["eating a applet"]
}
DELETE /movie
# 修改为 engalish 分词
# 在结构化索引的时候，不要带_doc和id
PUT /movie
{
  "mappings":{
    "properties":{
      "name":{
        "type": "text",
        "analyzer": "english"
      }
    }
  }
}
# 查看新的分词状态
GET /movie/_analyze
{
  "field": "name",
  "text": ["Eatting this apple"]
}
# 添加一条街ilu
POST /movie/_doc/1
{
  "name": "Eatting a apple"
}
GET /movie/_doc/1
GET /movie/_search
{
  "query": {
    "match": {
      "name": "Eatting a apple"
    }
  }
}
GET /movie/_search
{
  "query": {
    "bool": {
      "filter": {
        "term" : {
          "name": "Eatting a apple"
        }
      }
    }
  }
}
#/
# tomdb
#/
DELETE /movie
# 时间format可以对应多种，前面语法要求带个8
# object 中 character 和 name 的分词用 standard 精确查询
PUT /movie 
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  },
  "mappings": {
    "properties": {
      "title": {
        "type": "text",
        "analyzer": "english"
      },
      "tagline": {
        "type": "text",
        "analyzer": "english"
      },
      "release_date": {
        "type": "date",
        "format": "8yyyy/MM/dd||yyyy/M/dd||yyyy/MM/d||yyyy/M/d"
      },
      "popularity": {
        "type": "double"
      },
      "overview": {
        "type": "text",
        "analyzer": "english"
      },
      "cast": {
        "type": "object",
        "properties": {
          "character": {"type": "text", "analyzer": "standard"},
          "name": {"type": "text", "analyzer": "standard"}
        }
      }
    }
  }
}
# match: 对搜索词进行分词分析，再去索引查询
GET /movie/_search
{
  "query": {
    "match": {
      "title": "steve"
    }
  }
}
# term: 不进行分词分析，直接去索引查询，精确匹配
GET /movie/_search
{
  "query": {
    "term": {
      "title": "steve"
    }
  }
}
# or
# 默认是将分词后的token进行or匹配，只要有一个 token 命中索引即返回数据
GET /movie/_search
{
  "query": {
    "match": {
      "title": "basketball and aliens"
    }
  }
}
# and
# 可以修改为分词后的 token 全部命中才可以返回数据
GET /movie/_search
{
  "query": {
    "match": {
      "title": {
        "query": "basketball and aliens",
        "operator": "and"
      }
    }
  }
}
# 最小词匹配项
# 分词后的 token 要命中指定个数
GET /movie/_search
{
    "query": {
    "match": {
      "title": {
        "query": "basketball and aliens",
        "operator": "or",
        "minimum_should_match": 1
      }
    }
  }
}
# 短语查询
# 不走索引?
GET /movie/_search
{
  "query": {
    "match_phrase": {
      "title": "steve"
    }
  }
}
# 多字段查询
GET /movie/_search
{
  "query": {
    "multi_match": {
      "query": "basketball and aliens",
      "fields": ["title", "overview"]
    }
  }
}
# 查看打分过程
GET /movie/_search
{
  "explain": true, 
  "query": {
    "match": {
      "title": "steve"
    }
  }
}
# 多字段查询打分
# 通过结果来看，会对不同 field 进行打分，然后取最大的一个
GET /movie/_search
{
  "explain": true, 
  "query": {
    "multi_match": {
      "query": "basketball and aliens",
      "fields": ["title", "overview"]
    }
  }
}
# 优化多字段查询优化1
# 对重要字段进行 放大系数的乘积
GET /movie/_search
{
  "explain": true, 
  "query": {
    "multi_match": {
      "query": "basketball and aliens",
      "fields": ["title^10", "overview"]
    }
  }
}
# 优化多字段查询优化2
# unknown
GET /movie/_search
{
  "explain": true, 
  "query": {
    "multi_match": {
      "query": "basketball and aliens",
      "fields": ["title^10", "overview"],
      "tie_breaker": 0.3
    }
  }
}
# 多字段查询 从 max of 改为 sum of
# bool 查询，根据条件，会先过滤不匹配的 document，再进行打分，最后 sum of
#   must: 必须都为 true
#   must not: 必须都是 false
#   shold: 其中有个 true 即可
GET /movie/_search
{
  "explain": true,
  "query": {
    "bool": {
      "should": [
        {"match": {"title": "basketball and aliens"}},
        {"match": {"overview": "basketball and aliens"}}
      ]
    }
  }
}
# 多字段查询
# multi_query 会根据不同的 type 进行打分匹配
# 默认是 best_fields ，最匹配模式
GET /movie/_search
{
  "query": {
    "multi_match": {
      "query": "basketball and aliens",
      "fields": ["title", "overview"],
      "type": "best_fields"
    }
  }
}
# best_fields 等同于 dis_max + queries 模式
GET /movie/_search
{
  "explain": true,
  "query": {
    "dis_max": {
      "queries": [
        {"match": {"title": "basketball and aliens"}},
        {"match": {"overview": "basketball and aliens"}}
      ]
    }
  }
}
# best_fields 模式，最模式打分，不管 fields 有没有对应分词，只管各个 fiedls 打分最大值
# "explanation" : "((overview:basketbal overview:alien) | (title:basketbal title:alien))"
GET /movie/_validate/query?explain
{
  "query": {
    "multi_match": {
      "query": "basketball and aliens",
      "fields": ["title", "overview"],
      "type": "best_fields"
    }
  }
}
# most_fields 模式，考虑所有文本的， 类似 bool + should
#  "explanation" : "((overview:basketbal overview:alien) | (title:basketbal title:alien))~1.0"
GET /movie/_validate/query?explain
{
  "query": {
    "multi_match": {
      "query": "basketball and aliens",
      "fields": ["title", "overview"],
      "type": "most_fields"
    }
  }
}
# cross_fields 分词导向模式，以分词为单位计算栏位的总分
# 先算分词在各个field 中的分数，取最大值，然后再和其他分词结果相加
#  "explanation" : "blended(terms:[overview:basketbal, title:basketbal]) blended(terms:[overview:alien, title:alien])"
GET /movie/_validate/query?explain
{
  "query": {
    "multi_match": {
      "query": "basketball and aliens",
      "fields": ["title", "overview"],
      "type": "cross_fields"
    }
  }
}
##########################
# 过滤和排序
##########################
# AND OR 快速使用
# 不用使用 bool + must 之类的
GET /movie/_search
{
  "explain": true, 
  "query": {
    "query_string": {
      "fields": ["title", "overview"], 
      "query": "steve AND jobs"
    }
  }
}
# filter，score为0
# 单条件过滤
GET /movie/_search
{
  "query": {
    "bool": {
      "filter": {
        "term": {"title": "steve"}
      }
    }
  }
}
# filter 多条件过滤 + 排序
# score 为 null
GET /movie/_search
{
  "query": {
    "bool": {
      "filter": [
        {"term": {"title": "steve"}},
        {"term": {"cast.name": "gaspard"}},
        {"range": {"release_date": {"lte": "2015/01/01"}}},
        {"range": {"popularity": {"gte": "25"}}}
        ]
    }
  },
  "sort": [
    {
      "popularity": {
        "order": "desc"
      }
    }
  ]
}
# 带match打分的 filter
GET /movie/_search
{
  "query": {
    "bool": {
      "should": [
        {"match": {"title": "steve"}}
      ], 
      "filter": [
        {"term": {"title": "steve"}},
        {"term": {"cast.name": "gaspard"}},
        {"range": {"release_date": {"lte": "2015/01/01"}}},
        {"range": {"popularity": {"gte": "25"}}}
        ]
    }
  }
}
# function score   自定义打分
GET /movie/_search
{
  "query": {
    "function_score": {
      // 原始查询得到 oldValue
      "query": {
        "multi_match": {
          "query": "steve job",
          "fields": ["title", "overview"],
          "operator": "or",
          "type": "most_fields"
        }
      },
      "functions": [
        {
          "field_value_factor": {
            "field": "popularity",  // 对应要处理的字段
            "modifier": "log2p",  // 字段值+2后，计算对数
            "factor": 1.2   // 字段预处理 * 10
          }
        }
      ],
      "score_mode": "sum",  // 不同的 field value 之间的得分处理模式，默认 multiply 相乘
      "boost_mode": "sum" // 和 oldValue 之间的得分处理模式，默认 multiply 相乘
    }
  }
}

ik 分词器

# 测试 ik
# 宁缺勿滥
GET _analyze?pretty
{
  "analyzer": "ik_smart",
  "text": "中华人民共和国国歌"
}
# 最大努力
GET _analyze?pretty
{
  "analyzer": "ik_max_word",
  "text": "中华人民共和国国歌"
}
# 普通分词
GET _analyze?pretty
{
  "analyzer": "standard",
  "text": "中华人民共和国国歌"
}
# analyzer 指定搞得是构建索引的时候的分词
# search_analyzer 指定的是搜索关键字时的分词
# 最佳实践
#   索引阶段用 max_word 
#   查询阶段用 smart word

实践

定义字段逻辑
定义字段类型
定义字段 analyzer

新版后端

ES 语法

数据类型

语法

添加

控制分片

暂时存储

ik 分词器

实践