问题描述:
    媒资库搜索”we”,但存储在es中text类型的title字段值”wewe”,使用match:{“title”:”we”}搜索不出来;
    原因:

    1. 查看了该条记录的title字段分词情况,“wewe”没有被分词;
    2. 查看该mapping使用的分词器
    3. 查看查询的参数分词,”we”没有被分词
    4. “we”匹配不上”wewe”,因为后者没有被拆词

    解决:

    1. 使用wildcard语法代替match,利用正则表达式实现在mysql中模糊匹配效果;
    2. 如果产品要求仍保留ES打分效果(更智能的查询),可以使用 MUST:{should:[],should:[]},在保证1的情况下,扩展2的数据

    使用match查询,会对参数进行分词 Like the match query, the match_phrase query first analyzes the query string to produce a list of terms. It then searches for all the terms, but keeps only documents that contain all of the search terms, in the same positions relative to each other. ——摘录自官网

    另外,多用filter代替match,match会涉及到打分score

    分词器:ik_max_word、smart

    match查询keyword,全词匹配 match查询text字段,match参数、text都分词,只要match得分词结构和text得分词结果有相同就匹配

    1. 查看分词情况 ```json

    { “_index”: “cms_index”, “_type”: “cms_media”, “_id”: “a751b867857c4eb38df701c7b142f736”, “_version”: 3, “found”: true, “took”: 12, “term_vectors”: { “searchAll”: { “field_statistics”: { “sum_doc_freq”: 75748, “doc_count”: 1984, “sum_ttf”: 297568 }, “terms”: { “0”: { “term_freq”: 1, “tokens”: [ { “position”: 22, “start_offset”: 42, “end_offset”: 43 } ] }, “08”: { “term_freq”: 1, “tokens”: [ { “position”: 20, “start_offset”: 39, “end_offset”: 41 } ] }, “4”: { “term_freq”: 3, “tokens”: [ { “position”: 9, “start_offset”: 19, “end_offset”: 20 }, { “position”: 12, “start_offset”: 26, “end_offset”: 27 }, { “position”: 18, “start_offset”: 36, “end_offset”: 37 } ] }, “6”: { “term_freq”: 2, “tokens”: [ { “position”: 14, “start_offset”: 30, “end_offset”: 31 }, { “position”: 16, “start_offset”: 33, “end_offset”: 34 } ] }, “7794”: { “term_freq”: 1, “tokens”: [ { “position”: 10, “start_offset”: 21, “end_offset”: 25 } ] }, “92683”: { “term_freq”: 1, “tokens”: [ { “position”: 7, “start_offset”: 12, “end_offset”: 17 } ] }, “92683fd4a7794e4dbb6df6ed4ee08e0e.jpg”: { “term_freq”: 1, “tokens”: [ { “position”: 6, “start_offset”: 12, “end_offset”: 48 } ] }, “dbb”: { “term_freq”: 1, “tokens”: [ { “position”: 13, “start_offset”: 27, “end_offset”: 30 } ] }, “df”: { “term_freq”: 1, “tokens”: [ { “position”: 15, “start_offset”: 31, “end_offset”: 33 } ] }, “e”: { “term_freq”: 3, “tokens”: [ { “position”: 11, “start_offset”: 25, “end_offset”: 26 }, { “position”: 21, “start_offset”: 41, “end_offset”: 42 }, { “position”: 23, “start_offset”: 43, “end_offset”: 44 } ] }, “ed”: { “term_freq”: 1, “tokens”: [ { “position”: 17, “start_offset”: 34, “end_offset”: 36 } ] }, “ee”: { “term_freq”: 1, “tokens”: [ { “position”: 19, “start_offset”: 37, “end_offset”: 39 } ] }, “fd”: { “term_freq”: 1, “tokens”: [ { “position”: 8, “start_offset”: 17, “end_offset”: 19 } ] }, “jpg”: { “term_freq”: 1, “tokens”: [ { “position”: 24, “start_offset”: 45, “end_offset”: 48 } ] }, “wewe”: { “term_freq”: 1, “tokens”: [ { “position”: 0, “start_offset”: 0, “end_offset”: 4 } ] }, “华佗”: { “term_freq”: 1, “tokens”: [ { “position”: 5, “start_offset”: 9, “end_offset”: 11 } ] }, “原创”: { “term_freq”: 1, “tokens”: [ { “position”: 1, “start_offset”: 4, “end_offset”: 6 } ] }, “李”: { “term_freq”: 1, “tokens”: [ { “position”: 2, “start_offset”: 6, “end_offset”: 7 } ] }, “飞”: { “term_freq”: 1, “tokens”: [ { “position”: 4, “start_offset”: 8, “end_offset”: 9 } ] }, “鸿”: { “term_freq”: 1, “tokens”: [ { “position”: 3, “start_offset”: 7, “end_offset”: 8 } ] } } } } }

    1. 2. mapping
    2. ```json
    3. {
    4. "cms_index": {
    5. "mappings": {
    6. "cms_media": {
    7. "_all": {
    8. "enabled": false
    9. },
    10. "dynamic_templates": [
    11. {
    12. "stringTemplate": {
    13. "match_mapping_type": "string",
    14. "mapping": {
    15. "type": "keyword"
    16. }
    17. }
    18. }
    19. ],
    20. "properties": {
    21. "analyzeId": {
    22. "type": "keyword"
    23. },
    24. "analyzeStatus": {
    25. "type": "integer"
    26. },
    27. "attachList": {
    28. "type": "text",
    29. "analyzer": "ik_max_word"
    30. },
    31. "author": {
    32. "type": "keyword"
    33. },
    34. "bitrate": {
    35. "type": "long"
    36. },
    37. "businessType": {
    38. "type": "long"
    39. },
    40. "category": {
    41. "type": "text",
    42. "analyzer": "comma"
    43. },
    44. "content": {
    45. "type": "text",
    46. "analyzer": "ik_max_word"
    47. },
    48. "contentLength": {
    49. "type": "integer"
    50. },
    51. "createTime": {
    52. "type": "date",
    53. "format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis"
    54. },
    55. "creator": {
    56. "type": "keyword"
    57. },
    58. "creatorId": {
    59. "type": "keyword"
    60. },
    61. "departmentId": {
    62. "type": "keyword"
    63. },
    64. "departmentName": {
    65. "type": "keyword"
    66. },
    67. "description": {
    68. "type": "text",
    69. "analyzer": "ik_max_word"
    70. },
    71. "duration": {
    72. "type": "long"
    73. },
    74. "external": {
    75. "type": "long"
    76. },
    77. "fileSize": {
    78. "type": "long"
    79. },
    80. "framerate": {
    81. "type": "float"
    82. },
    83. "height": {
    84. "type": "integer"
    85. },
    86. "id": {
    87. "type": "keyword"
    88. },
    89. "intoTime": {
    90. "type": "date",
    91. "format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis"
    92. },
    93. "isCategory": {
    94. "type": "integer"
    95. },
    96. "labelAiList": {
    97. "properties": {
    98. "id": {
    99. "type": "keyword"
    100. },
    101. "name": {
    102. "type": "keyword"
    103. },
    104. "type": {
    105. "type": "keyword"
    106. }
    107. }
    108. },
    109. "labelAiText": {
    110. "type": "text",
    111. "analyzer": "ik_max_word"
    112. },
    113. "lesseeId": {
    114. "type": "keyword"
    115. },
    116. "md5": {
    117. "type": "keyword"
    118. },
    119. "name": {
    120. "type": "text",
    121. "analyzer": "ik_max_word"
    122. },
    123. "path": {
    124. "type": "keyword"
    125. },
    126. "personAiText": {
    127. "type": "text",
    128. "analyzer": "ik_max_word"
    129. },
    130. "searchAll": {
    131. "type": "text",
    132. "analyzer": "ik_max_word"
    133. },
    134. "sourceId": {
    135. "type": "keyword"
    136. },
    137. "sourceName": {
    138. "type": "keyword"
    139. },
    140. "sourceSys": {
    141. "type": "long"
    142. },
    143. "sourceUrl": {
    144. "type": "keyword"
    145. },
    146. "subType": {
    147. "type": "long"
    148. },
    149. "syncVersion": {
    150. "type": "keyword"
    151. },
    152. "tags": {
    153. "type": "text",
    154. "analyzer": "ik_max_word"
    155. },
    156. "thirdId": {
    157. "type": "keyword"
    158. },
    159. "thumbnail": {
    160. "type": "keyword"
    161. },
    162. "type": {
    163. "type": "integer"
    164. },
    165. "videoUrl": {
    166. "type": "keyword"
    167. },
    168. "width": {
    169. "type": "integer"
    170. }
    171. }
    172. }
    173. }
    174. }
    175. }