问题描述:
媒资库搜索”we”,但存储在es中text类型的title字段值”wewe”,使用match:{“title”:”we”}搜索不出来;
原因:
- 查看了该条记录的title字段分词情况,“wewe”没有被分词;
- 查看该mapping使用的分词器
- 查看查询的参数分词,”we”没有被分词
- “we”匹配不上”wewe”,因为后者没有被拆词
解决:
- 使用wildcard语法代替match,利用正则表达式实现在mysql中模糊匹配效果;
- 如果产品要求仍保留ES打分效果(更智能的查询),可以使用 MUST:{should:[],should:[]},在保证1的情况下,扩展2的数据
使用match查询,会对参数进行分词 Like the match query, the match_phrase query first analyzes the query string to produce a list of terms. It then searches for all the terms, but keeps only documents that contain all of the search terms, in the same positions relative to each other. ——摘录自官网
另外,多用filter代替match,match会涉及到打分score
分词器:ik_max_word、smart
match查询keyword,全词匹配 match查询text字段,match参数、text都分词,只要match得分词结构和text得分词结果有相同就匹配
- 查看分词情况 ```json
{ “_index”: “cms_index”, “_type”: “cms_media”, “_id”: “a751b867857c4eb38df701c7b142f736”, “_version”: 3, “found”: true, “took”: 12, “term_vectors”: { “searchAll”: { “field_statistics”: { “sum_doc_freq”: 75748, “doc_count”: 1984, “sum_ttf”: 297568 }, “terms”: { “0”: { “term_freq”: 1, “tokens”: [ { “position”: 22, “start_offset”: 42, “end_offset”: 43 } ] }, “08”: { “term_freq”: 1, “tokens”: [ { “position”: 20, “start_offset”: 39, “end_offset”: 41 } ] }, “4”: { “term_freq”: 3, “tokens”: [ { “position”: 9, “start_offset”: 19, “end_offset”: 20 }, { “position”: 12, “start_offset”: 26, “end_offset”: 27 }, { “position”: 18, “start_offset”: 36, “end_offset”: 37 } ] }, “6”: { “term_freq”: 2, “tokens”: [ { “position”: 14, “start_offset”: 30, “end_offset”: 31 }, { “position”: 16, “start_offset”: 33, “end_offset”: 34 } ] }, “7794”: { “term_freq”: 1, “tokens”: [ { “position”: 10, “start_offset”: 21, “end_offset”: 25 } ] }, “92683”: { “term_freq”: 1, “tokens”: [ { “position”: 7, “start_offset”: 12, “end_offset”: 17 } ] }, “92683fd4a7794e4dbb6df6ed4ee08e0e.jpg”: { “term_freq”: 1, “tokens”: [ { “position”: 6, “start_offset”: 12, “end_offset”: 48 } ] }, “dbb”: { “term_freq”: 1, “tokens”: [ { “position”: 13, “start_offset”: 27, “end_offset”: 30 } ] }, “df”: { “term_freq”: 1, “tokens”: [ { “position”: 15, “start_offset”: 31, “end_offset”: 33 } ] }, “e”: { “term_freq”: 3, “tokens”: [ { “position”: 11, “start_offset”: 25, “end_offset”: 26 }, { “position”: 21, “start_offset”: 41, “end_offset”: 42 }, { “position”: 23, “start_offset”: 43, “end_offset”: 44 } ] }, “ed”: { “term_freq”: 1, “tokens”: [ { “position”: 17, “start_offset”: 34, “end_offset”: 36 } ] }, “ee”: { “term_freq”: 1, “tokens”: [ { “position”: 19, “start_offset”: 37, “end_offset”: 39 } ] }, “fd”: { “term_freq”: 1, “tokens”: [ { “position”: 8, “start_offset”: 17, “end_offset”: 19 } ] }, “jpg”: { “term_freq”: 1, “tokens”: [ { “position”: 24, “start_offset”: 45, “end_offset”: 48 } ] }, “wewe”: { “term_freq”: 1, “tokens”: [ { “position”: 0, “start_offset”: 0, “end_offset”: 4 } ] }, “华佗”: { “term_freq”: 1, “tokens”: [ { “position”: 5, “start_offset”: 9, “end_offset”: 11 } ] }, “原创”: { “term_freq”: 1, “tokens”: [ { “position”: 1, “start_offset”: 4, “end_offset”: 6 } ] }, “李”: { “term_freq”: 1, “tokens”: [ { “position”: 2, “start_offset”: 6, “end_offset”: 7 } ] }, “飞”: { “term_freq”: 1, “tokens”: [ { “position”: 4, “start_offset”: 8, “end_offset”: 9 } ] }, “鸿”: { “term_freq”: 1, “tokens”: [ { “position”: 3, “start_offset”: 7, “end_offset”: 8 } ] } } } } }
2. mapping
```json
{
"cms_index": {
"mappings": {
"cms_media": {
"_all": {
"enabled": false
},
"dynamic_templates": [
{
"stringTemplate": {
"match_mapping_type": "string",
"mapping": {
"type": "keyword"
}
}
}
],
"properties": {
"analyzeId": {
"type": "keyword"
},
"analyzeStatus": {
"type": "integer"
},
"attachList": {
"type": "text",
"analyzer": "ik_max_word"
},
"author": {
"type": "keyword"
},
"bitrate": {
"type": "long"
},
"businessType": {
"type": "long"
},
"category": {
"type": "text",
"analyzer": "comma"
},
"content": {
"type": "text",
"analyzer": "ik_max_word"
},
"contentLength": {
"type": "integer"
},
"createTime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis"
},
"creator": {
"type": "keyword"
},
"creatorId": {
"type": "keyword"
},
"departmentId": {
"type": "keyword"
},
"departmentName": {
"type": "keyword"
},
"description": {
"type": "text",
"analyzer": "ik_max_word"
},
"duration": {
"type": "long"
},
"external": {
"type": "long"
},
"fileSize": {
"type": "long"
},
"framerate": {
"type": "float"
},
"height": {
"type": "integer"
},
"id": {
"type": "keyword"
},
"intoTime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis"
},
"isCategory": {
"type": "integer"
},
"labelAiList": {
"properties": {
"id": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"type": {
"type": "keyword"
}
}
},
"labelAiText": {
"type": "text",
"analyzer": "ik_max_word"
},
"lesseeId": {
"type": "keyword"
},
"md5": {
"type": "keyword"
},
"name": {
"type": "text",
"analyzer": "ik_max_word"
},
"path": {
"type": "keyword"
},
"personAiText": {
"type": "text",
"analyzer": "ik_max_word"
},
"searchAll": {
"type": "text",
"analyzer": "ik_max_word"
},
"sourceId": {
"type": "keyword"
},
"sourceName": {
"type": "keyword"
},
"sourceSys": {
"type": "long"
},
"sourceUrl": {
"type": "keyword"
},
"subType": {
"type": "long"
},
"syncVersion": {
"type": "keyword"
},
"tags": {
"type": "text",
"analyzer": "ik_max_word"
},
"thirdId": {
"type": "keyword"
},
"thumbnail": {
"type": "keyword"
},
"type": {
"type": "integer"
},
"videoUrl": {
"type": "keyword"
},
"width": {
"type": "integer"
}
}
}
}
}
}