数据类型
- Text: 被 Analyzer 索引的字符串类型
- KeyWord: 不能被 Analyzer、只能被精确匹配的字符串类型
- Date: 日期类型,配合 format
- 数字类型: long integer short doubel .etc
- boolean: true false
- Array: 数组
- Object: json 嵌套
- IP类型
- Geo_point: 地理位置
语法
添加
控制分片
// 控制分片
put /employee
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
}
}
- es 集群状态主要看 从分片能不能存在在 n-1 个节点上
暂时存储
GET /_all
delete /employee
# 控制分片
put /employee
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
}
}
# 非结构化方式新建索引
# _doc 为 type,但是仅仅作为占位符
PUT /employee/_doc/1
{
"name": "akarin2",
"age": 20
}
GET /employee/_doc/1
# 会直接覆盖上面得内容,即默认需要全量更新
PUT /employee/_doc/1
{
"name": "akarin2"
}
# 获取索引记录
GET /employee/_doc/1
# 指定某一字段更新
POST /employee/_update/1
{
"doc":{
"name": "fuck"
}
}
# 强制指定创建,如果存在,则失败
POST /employee/_create/1
{
"name": "lee",
"age": 30
}
# 删除某个文档
DELETE /employee/_doc/1
# 查询全部文档
GET /employee/_search
#/
DELETE /employee
# 使用结构化的方式创建 索引
# 设置 mapping 的结构
PUT /employee
{
"settings": {
"number_of_shards": 1,
"number_of_replicas" : 0
},
"mappings":{
"properties": {
"name": {
"type": "text"
},
"age":{
"type": "integer"
}
}
}
}
# 此时进行put 操作
PUT /employee/_doc/1
{
"name": "baba"
}
# 虽然显示缺少了 age ,但是 /employee 的 mapping 还是保持原结构
# 但是如果 put 了新的不冲突字段,mapping 还是会跟着改
GET /employee/_doc/1
PUT /employee/_doc/1
{
"name": "demo 1",
"age": 123
}
PUT /employee/_doc/2
{
"name": "demo 2",
"age": 1234
}
# 不带条件查询所有记录
GET /employee/_search
{
"query": {
"match_all": {}
}
}
# 分页查询
# from 从第几个索引开始(索引从0开始)
# size 要几条记录
GET /employee/_search
{
"query": {
"match_all": {}
},
"from": 0,
"size": 1
}
# 条件查询
# 带关键字查询, 只能单一字段
GET /employee/_search
{
"query": {
"match": {
"name": "demo"
}
}
}
# 带排序的查询
# 此时返回 _score 为 null,评分标准是 sort字段
GET /employee/_search
{
"query": {
"match": {
"name": "demo"
}
},
"sort":{
"age":{
"order": "asc"
}
}
}
}
# filter
# 作用在 bool 下
# filter 类似 relation db 中的 where
# 打分均为0.0
# term 和 match 区别是后者带有分词 analize, term 是纯粹的 equals
GET /employee/_search
{
"query": {
"bool":{
"filter" :{
"term": {
"name": "demo"
}
}
}
}
}
# 带聚合的查询
# 对某个字段进行聚合
# 返回的聚合字段中,key 就是对应 field 的值
GET /employee/_search
{
"query": {
"match": {
"name": "demo"
}
},
"aggs":{
"随便起个名称":{
"terms": {
"field": "age"
}
}
}
}
#
# 分词
#
PUT /movie/_doc/1
{
"name": "eating a apple"
}
# 查看分词状态
# 根据对应 field 的 analyzer 对 text 进行分词
# 默认是标准字符处理,以空格和标点符号分割内容
GET /movie/_analyze
{
"field": "name",
"text": ["eating a applet"]
}
DELETE /movie
# 修改为 engalish 分词
# 在结构化索引的时候,不要带_doc和id
PUT /movie
{
"mappings":{
"properties":{
"name":{
"type": "text",
"analyzer": "english"
}
}
}
}
# 查看新的分词状态
GET /movie/_analyze
{
"field": "name",
"text": ["Eatting this apple"]
}
# 添加一条街ilu
POST /movie/_doc/1
{
"name": "Eatting a apple"
}
GET /movie/_doc/1
GET /movie/_search
{
"query": {
"match": {
"name": "Eatting a apple"
}
}
}
GET /movie/_search
{
"query": {
"bool": {
"filter": {
"term" : {
"name": "Eatting a apple"
}
}
}
}
}
#/
# tomdb
#/
DELETE /movie
# 时间format可以对应多种,前面语法要求带个8
# object 中 character 和 name 的分词用 standard 精确查询
PUT /movie
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "english"
},
"tagline": {
"type": "text",
"analyzer": "english"
},
"release_date": {
"type": "date",
"format": "8yyyy/MM/dd||yyyy/M/dd||yyyy/MM/d||yyyy/M/d"
},
"popularity": {
"type": "double"
},
"overview": {
"type": "text",
"analyzer": "english"
},
"cast": {
"type": "object",
"properties": {
"character": {"type": "text", "analyzer": "standard"},
"name": {"type": "text", "analyzer": "standard"}
}
}
}
}
}
# match: 对搜索词进行分词分析,再去索引查询
GET /movie/_search
{
"query": {
"match": {
"title": "steve"
}
}
}
# term: 不进行分词分析,直接去索引查询,精确匹配
GET /movie/_search
{
"query": {
"term": {
"title": "steve"
}
}
}
# or
# 默认是将分词后的token进行or匹配,只要有一个 token 命中索引即返回数据
GET /movie/_search
{
"query": {
"match": {
"title": "basketball and aliens"
}
}
}
# and
# 可以修改为分词后的 token 全部命中才可以返回数据
GET /movie/_search
{
"query": {
"match": {
"title": {
"query": "basketball and aliens",
"operator": "and"
}
}
}
}
# 最小词匹配项
# 分词后的 token 要命中指定个数
GET /movie/_search
{
"query": {
"match": {
"title": {
"query": "basketball and aliens",
"operator": "or",
"minimum_should_match": 1
}
}
}
}
# 短语查询
# 不走索引?
GET /movie/_search
{
"query": {
"match_phrase": {
"title": "steve"
}
}
}
# 多字段查询
GET /movie/_search
{
"query": {
"multi_match": {
"query": "basketball and aliens",
"fields": ["title", "overview"]
}
}
}
# 查看打分过程
GET /movie/_search
{
"explain": true,
"query": {
"match": {
"title": "steve"
}
}
}
# 多字段查询打分
# 通过结果来看,会对不同 field 进行打分,然后取最大的一个
GET /movie/_search
{
"explain": true,
"query": {
"multi_match": {
"query": "basketball and aliens",
"fields": ["title", "overview"]
}
}
}
# 优化多字段查询优化1
# 对重要字段进行 放大系数的乘积
GET /movie/_search
{
"explain": true,
"query": {
"multi_match": {
"query": "basketball and aliens",
"fields": ["title^10", "overview"]
}
}
}
# 优化多字段查询优化2
# unknown
GET /movie/_search
{
"explain": true,
"query": {
"multi_match": {
"query": "basketball and aliens",
"fields": ["title^10", "overview"],
"tie_breaker": 0.3
}
}
}
# 多字段查询 从 max of 改为 sum of
# bool 查询,根据条件,会先过滤不匹配的 document,再进行打分,最后 sum of
# must: 必须都为 true
# must not: 必须都是 false
# shold: 其中有个 true 即可
GET /movie/_search
{
"explain": true,
"query": {
"bool": {
"should": [
{"match": {"title": "basketball and aliens"}},
{"match": {"overview": "basketball and aliens"}}
]
}
}
}
# 多字段查询
# multi_query 会根据不同的 type 进行打分匹配
# 默认是 best_fields ,最匹配模式
GET /movie/_search
{
"query": {
"multi_match": {
"query": "basketball and aliens",
"fields": ["title", "overview"],
"type": "best_fields"
}
}
}
# best_fields 等同于 dis_max + queries 模式
GET /movie/_search
{
"explain": true,
"query": {
"dis_max": {
"queries": [
{"match": {"title": "basketball and aliens"}},
{"match": {"overview": "basketball and aliens"}}
]
}
}
}
# best_fields 模式,最模式打分,不管 fields 有没有对应分词,只管各个 fiedls 打分最大值
# "explanation" : "((overview:basketbal overview:alien) | (title:basketbal title:alien))"
GET /movie/_validate/query?explain
{
"query": {
"multi_match": {
"query": "basketball and aliens",
"fields": ["title", "overview"],
"type": "best_fields"
}
}
}
# most_fields 模式,考虑所有文本的, 类似 bool + should
# "explanation" : "((overview:basketbal overview:alien) | (title:basketbal title:alien))~1.0"
GET /movie/_validate/query?explain
{
"query": {
"multi_match": {
"query": "basketball and aliens",
"fields": ["title", "overview"],
"type": "most_fields"
}
}
}
# cross_fields 分词导向模式,以分词为单位计算栏位的总分
# 先算分词在各个field 中的分数,取最大值,然后再和其他分词结果相加
# "explanation" : "blended(terms:[overview:basketbal, title:basketbal]) blended(terms:[overview:alien, title:alien])"
GET /movie/_validate/query?explain
{
"query": {
"multi_match": {
"query": "basketball and aliens",
"fields": ["title", "overview"],
"type": "cross_fields"
}
}
}
##########################
# 过滤和排序
##########################
# AND OR 快速使用
# 不用使用 bool + must 之类的
GET /movie/_search
{
"explain": true,
"query": {
"query_string": {
"fields": ["title", "overview"],
"query": "steve AND jobs"
}
}
}
# filter,score为0
# 单条件过滤
GET /movie/_search
{
"query": {
"bool": {
"filter": {
"term": {"title": "steve"}
}
}
}
}
# filter 多条件过滤 + 排序
# score 为 null
GET /movie/_search
{
"query": {
"bool": {
"filter": [
{"term": {"title": "steve"}},
{"term": {"cast.name": "gaspard"}},
{"range": {"release_date": {"lte": "2015/01/01"}}},
{"range": {"popularity": {"gte": "25"}}}
]
}
},
"sort": [
{
"popularity": {
"order": "desc"
}
}
]
}
# 带match打分的 filter
GET /movie/_search
{
"query": {
"bool": {
"should": [
{"match": {"title": "steve"}}
],
"filter": [
{"term": {"title": "steve"}},
{"term": {"cast.name": "gaspard"}},
{"range": {"release_date": {"lte": "2015/01/01"}}},
{"range": {"popularity": {"gte": "25"}}}
]
}
}
}
# function score 自定义打分
GET /movie/_search
{
"query": {
"function_score": {
// 原始查询得到 oldValue
"query": {
"multi_match": {
"query": "steve job",
"fields": ["title", "overview"],
"operator": "or",
"type": "most_fields"
}
},
"functions": [
{
"field_value_factor": {
"field": "popularity", // 对应要处理的字段
"modifier": "log2p", // 字段值+2后,计算对数
"factor": 1.2 // 字段预处理 * 10
}
}
],
"score_mode": "sum", // 不同的 field value 之间的得分处理模式,默认 multiply 相乘
"boost_mode": "sum" // 和 oldValue 之间的得分处理模式,默认 multiply 相乘
}
}
}
ik 分词器
# 测试 ik
# 宁缺勿滥
GET _analyze?pretty
{
"analyzer": "ik_smart",
"text": "中华人民共和国国歌"
}
# 最大努力
GET _analyze?pretty
{
"analyzer": "ik_max_word",
"text": "中华人民共和国国歌"
}
# 普通分词
GET _analyze?pretty
{
"analyzer": "standard",
"text": "中华人民共和国国歌"
}
# analyzer 指定搞得是构建索引的时候的分词
# search_analyzer 指定的是搜索关键字时的分词
# 最佳实践
# 索引阶段用 max_word
# 查询阶段用 smart word
实践
- 定义字段逻辑
- 定义字段类型
- 定义字段 analyzer