_analyze是Elasticsearch一个非常有用的API,它可以帮助你分析每一个field或者某个analyzer/tokenizer是如何分析和索引一段文字。
image.pngimage.png
返回结果字段含义:
token是一个实际被存储在索引中的词
position指明词在原文本中是第几个出现的
start_offset和end_offset表示词在原文本中占据的位置。


1、默认analyzer


GET /_analyze?
{“analyzer” : “standard”, “text” : “床前明月光”}

  1. {
  2. "tokens": [
  3. {
  4. "token": "床",
  5. "start_offset": 0,
  6. "end_offset": 1,
  7. "type": "<IDEOGRAPHIC>",
  8. "position": 0
  9. },
  10. {
  11. "token": "前",
  12. "start_offset": 1,
  13. "end_offset": 2,
  14. "type": "<IDEOGRAPHIC>",
  15. "position": 1
  16. },
  17. {
  18. "token": "明",
  19. "start_offset": 2,
  20. "end_offset": 3,
  21. "type": "<IDEOGRAPHIC>",
  22. "position": 2
  23. },
  24. {
  25. "token": "月",
  26. "start_offset": 3,
  27. "end_offset": 4,
  28. "type": "<IDEOGRAPHIC>",
  29. "position": 3
  30. },
  31. {
  32. "token": "光",
  33. "start_offset": 4,
  34. "end_offset": 5,
  35. "type": "<IDEOGRAPHIC>",
  36. "position": 4
  37. }
  38. ]
  39. }

2、whitspace


GET /_analyze?
{“analyzer” : “whitespace”, “text” : “床前明月光”}

  1. {
  2. "tokens": [
  3. {
  4. "token": "床前",
  5. "start_offset": 0,
  6. "end_offset": 2,
  7. "type": "word",
  8. "position": 0
  9. },
  10. {
  11. "token": "明月光",
  12. "start_offset": 3,
  13. "end_offset": 6,
  14. "type": "word",
  15. "position": 1
  16. }
  17. ]
  18. }

3、使用ik分析器

GET /_analyze?
{“analyzer” : “ik_max_word”, “text” : “床前明月光”}

  1. {
  2. "tokens": [
  3. {
  4. "token": "床前明月光",
  5. "start_offset": 0,
  6. "end_offset": 5,
  7. "type": "CN_WORD",
  8. "position": 0
  9. },
  10. {
  11. "token": "床前",
  12. "start_offset": 0,
  13. "end_offset": 2,
  14. "type": "CN_WORD",
  15. "position": 1
  16. },
  17. {
  18. "token": "明月光",
  19. "start_offset": 2,
  20. "end_offset": 5,
  21. "type": "CN_WORD",
  22. "position": 2
  23. },
  24. {
  25. "token": "明月",
  26. "start_offset": 2,
  27. "end_offset": 4,
  28. "type": "CN_WORD",
  29. "position": 3
  30. },
  31. {
  32. "token": "明",
  33. "start_offset": 2,
  34. "end_offset": 3,
  35. "type": "CN_WORD",
  36. "position": 4
  37. },
  38. {
  39. "token": "月光",
  40. "start_offset": 3,
  41. "end_offset": 5,
  42. "type": "CN_WORD",
  43. "position": 5
  44. },
  45. {
  46. "token": "月",
  47. "start_offset": 3,
  48. "end_offset": 4,
  49. "type": "CN_WORD",
  50. "position": 6
  51. },
  52. {
  53. "token": "光",
  54. "start_offset": 4,
  55. "end_offset": 5,
  56. "type": "CN_CHAR",
  57. "position": 7
  58. }
  59. ]
  60. }


4、使用拼音分析器


GET /_analyze?
{“analyzer” : “pinyin”, “text” : “床前明月光”}

  1. {
  2. "tokens": [
  3. {
  4. "token": "chuang",
  5. "start_offset": 0,
  6. "end_offset": 1,
  7. "type": "word",
  8. "position": 0
  9. },
  10. {
  11. "token": "cqmyg",
  12. "start_offset": 0,
  13. "end_offset": 5,
  14. "type": "word",
  15. "position": 0
  16. },
  17. {
  18. "token": "qian",
  19. "start_offset": 1,
  20. "end_offset": 2,
  21. "type": "word",
  22. "position": 1
  23. },
  24. {
  25. "token": "ming",
  26. "start_offset": 2,
  27. "end_offset": 3,
  28. "type": "word",
  29. "position": 2
  30. },
  31. {
  32. "token": "yue",
  33. "start_offset": 3,
  34. "end_offset": 4,
  35. "type": "word",
  36. "position": 3
  37. },
  38. {
  39. "token": "guang",
  40. "start_offset": 4,
  41. "end_offset": 5,
  42. "type": "word",
  43. "position": 4
  44. }
  45. ]
  46. }

cfd4346a-2386-46f9-83de-0e36e366de01.jpg