数据类型

  1. Text: 被 Analyzer 索引的字符串类型
  2. KeyWord: 不能被 Analyzer、只能被精确匹配的字符串类型
  3. Date: 日期类型,配合 format
  4. 数字类型: long integer short doubel .etc
  5. boolean: true false
  6. Array: 数组
  7. Object: json 嵌套
  8. IP类型
  9. Geo_point: 地理位置

语法

添加

控制分片

  1. // 控制分片
  2. put /employee
  3. {
  4. "settings": {
  5. "number_of_shards": 1,
  6. "number_of_replicas": 0
  7. }
  8. }
  • es 集群状态主要看 从分片能不能存在在 n-1 个节点上

暂时存储

  1. GET /_all
  2. delete /employee
  3. # 控制分片
  4. put /employee
  5. {
  6. "settings": {
  7. "number_of_shards": 1,
  8. "number_of_replicas": 0
  9. }
  10. }
  11. # 非结构化方式新建索引
  12. # _doc type,但是仅仅作为占位符
  13. PUT /employee/_doc/1
  14. {
  15. "name": "akarin2",
  16. "age": 20
  17. }
  18. GET /employee/_doc/1
  19. # 会直接覆盖上面得内容,即默认需要全量更新
  20. PUT /employee/_doc/1
  21. {
  22. "name": "akarin2"
  23. }
  24. # 获取索引记录
  25. GET /employee/_doc/1
  26. # 指定某一字段更新
  27. POST /employee/_update/1
  28. {
  29. "doc":{
  30. "name": "fuck"
  31. }
  32. }
  33. # 强制指定创建,如果存在,则失败
  34. POST /employee/_create/1
  35. {
  36. "name": "lee",
  37. "age": 30
  38. }
  39. # 删除某个文档
  40. DELETE /employee/_doc/1
  41. # 查询全部文档
  42. GET /employee/_search
  43. #/
  44. DELETE /employee
  45. # 使用结构化的方式创建 索引
  46. # 设置 mapping 的结构
  47. PUT /employee
  48. {
  49. "settings": {
  50. "number_of_shards": 1,
  51. "number_of_replicas" : 0
  52. },
  53. "mappings":{
  54. "properties": {
  55. "name": {
  56. "type": "text"
  57. },
  58. "age":{
  59. "type": "integer"
  60. }
  61. }
  62. }
  63. }
  64. # 此时进行put 操作
  65. PUT /employee/_doc/1
  66. {
  67. "name": "baba"
  68. }
  69. # 虽然显示缺少了 age ,但是 /employee mapping 还是保持原结构
  70. # 但是如果 put 了新的不冲突字段,mapping 还是会跟着改
  71. GET /employee/_doc/1
  72. PUT /employee/_doc/1
  73. {
  74. "name": "demo 1",
  75. "age": 123
  76. }
  77. PUT /employee/_doc/2
  78. {
  79. "name": "demo 2",
  80. "age": 1234
  81. }
  82. # 不带条件查询所有记录
  83. GET /employee/_search
  84. {
  85. "query": {
  86. "match_all": {}
  87. }
  88. }
  89. # 分页查询
  90. # from 从第几个索引开始(索引从0开始)
  91. # size 要几条记录
  92. GET /employee/_search
  93. {
  94. "query": {
  95. "match_all": {}
  96. },
  97. "from": 0,
  98. "size": 1
  99. }
  100. # 条件查询
  101. # 带关键字查询, 只能单一字段
  102. GET /employee/_search
  103. {
  104. "query": {
  105. "match": {
  106. "name": "demo"
  107. }
  108. }
  109. }
  110. # 带排序的查询
  111. # 此时返回 _score null,评分标准是 sort字段
  112. GET /employee/_search
  113. {
  114. "query": {
  115. "match": {
  116. "name": "demo"
  117. }
  118. },
  119. "sort":{
  120. "age":{
  121. "order": "asc"
  122. }
  123. }
  124. }
  125. }
  126. # filter
  127. # 作用在 bool
  128. # filter 类似 relation db 中的 where
  129. # 打分均为0.0
  130. # term match 区别是后者带有分词 analize, term 是纯粹的 equals
  131. GET /employee/_search
  132. {
  133. "query": {
  134. "bool":{
  135. "filter" :{
  136. "term": {
  137. "name": "demo"
  138. }
  139. }
  140. }
  141. }
  142. }
  143. # 带聚合的查询
  144. # 对某个字段进行聚合
  145. # 返回的聚合字段中,key 就是对应 field 的值
  146. GET /employee/_search
  147. {
  148. "query": {
  149. "match": {
  150. "name": "demo"
  151. }
  152. },
  153. "aggs":{
  154. "随便起个名称":{
  155. "terms": {
  156. "field": "age"
  157. }
  158. }
  159. }
  160. }
  161. #
  162. # 分词
  163. #
  164. PUT /movie/_doc/1
  165. {
  166. "name": "eating a apple"
  167. }
  168. # 查看分词状态
  169. # 根据对应 field analyzer text 进行分词
  170. # 默认是标准字符处理,以空格和标点符号分割内容
  171. GET /movie/_analyze
  172. {
  173. "field": "name",
  174. "text": ["eating a applet"]
  175. }
  176. DELETE /movie
  177. # 修改为 engalish 分词
  178. # 在结构化索引的时候,不要带_docid
  179. PUT /movie
  180. {
  181. "mappings":{
  182. "properties":{
  183. "name":{
  184. "type": "text",
  185. "analyzer": "english"
  186. }
  187. }
  188. }
  189. }
  190. # 查看新的分词状态
  191. GET /movie/_analyze
  192. {
  193. "field": "name",
  194. "text": ["Eatting this apple"]
  195. }
  196. # 添加一条街ilu
  197. POST /movie/_doc/1
  198. {
  199. "name": "Eatting a apple"
  200. }
  201. GET /movie/_doc/1
  202. GET /movie/_search
  203. {
  204. "query": {
  205. "match": {
  206. "name": "Eatting a apple"
  207. }
  208. }
  209. }
  210. GET /movie/_search
  211. {
  212. "query": {
  213. "bool": {
  214. "filter": {
  215. "term" : {
  216. "name": "Eatting a apple"
  217. }
  218. }
  219. }
  220. }
  221. }
  222. #/
  223. # tomdb
  224. #/
  225. DELETE /movie
  226. # 时间format可以对应多种,前面语法要求带个8
  227. # object character name 的分词用 standard 精确查询
  228. PUT /movie
  229. {
  230. "settings": {
  231. "number_of_shards": 1,
  232. "number_of_replicas": 0
  233. },
  234. "mappings": {
  235. "properties": {
  236. "title": {
  237. "type": "text",
  238. "analyzer": "english"
  239. },
  240. "tagline": {
  241. "type": "text",
  242. "analyzer": "english"
  243. },
  244. "release_date": {
  245. "type": "date",
  246. "format": "8yyyy/MM/dd||yyyy/M/dd||yyyy/MM/d||yyyy/M/d"
  247. },
  248. "popularity": {
  249. "type": "double"
  250. },
  251. "overview": {
  252. "type": "text",
  253. "analyzer": "english"
  254. },
  255. "cast": {
  256. "type": "object",
  257. "properties": {
  258. "character": {"type": "text", "analyzer": "standard"},
  259. "name": {"type": "text", "analyzer": "standard"}
  260. }
  261. }
  262. }
  263. }
  264. }
  265. # match: 对搜索词进行分词分析,再去索引查询
  266. GET /movie/_search
  267. {
  268. "query": {
  269. "match": {
  270. "title": "steve"
  271. }
  272. }
  273. }
  274. # term: 不进行分词分析,直接去索引查询,精确匹配
  275. GET /movie/_search
  276. {
  277. "query": {
  278. "term": {
  279. "title": "steve"
  280. }
  281. }
  282. }
  283. # or
  284. # 默认是将分词后的token进行or匹配,只要有一个 token 命中索引即返回数据
  285. GET /movie/_search
  286. {
  287. "query": {
  288. "match": {
  289. "title": "basketball and aliens"
  290. }
  291. }
  292. }
  293. # and
  294. # 可以修改为分词后的 token 全部命中才可以返回数据
  295. GET /movie/_search
  296. {
  297. "query": {
  298. "match": {
  299. "title": {
  300. "query": "basketball and aliens",
  301. "operator": "and"
  302. }
  303. }
  304. }
  305. }
  306. # 最小词匹配项
  307. # 分词后的 token 要命中指定个数
  308. GET /movie/_search
  309. {
  310. "query": {
  311. "match": {
  312. "title": {
  313. "query": "basketball and aliens",
  314. "operator": "or",
  315. "minimum_should_match": 1
  316. }
  317. }
  318. }
  319. }
  320. # 短语查询
  321. # 不走索引?
  322. GET /movie/_search
  323. {
  324. "query": {
  325. "match_phrase": {
  326. "title": "steve"
  327. }
  328. }
  329. }
  330. # 多字段查询
  331. GET /movie/_search
  332. {
  333. "query": {
  334. "multi_match": {
  335. "query": "basketball and aliens",
  336. "fields": ["title", "overview"]
  337. }
  338. }
  339. }
  340. # 查看打分过程
  341. GET /movie/_search
  342. {
  343. "explain": true,
  344. "query": {
  345. "match": {
  346. "title": "steve"
  347. }
  348. }
  349. }
  350. # 多字段查询打分
  351. # 通过结果来看,会对不同 field 进行打分,然后取最大的一个
  352. GET /movie/_search
  353. {
  354. "explain": true,
  355. "query": {
  356. "multi_match": {
  357. "query": "basketball and aliens",
  358. "fields": ["title", "overview"]
  359. }
  360. }
  361. }
  362. # 优化多字段查询优化1
  363. # 对重要字段进行 放大系数的乘积
  364. GET /movie/_search
  365. {
  366. "explain": true,
  367. "query": {
  368. "multi_match": {
  369. "query": "basketball and aliens",
  370. "fields": ["title^10", "overview"]
  371. }
  372. }
  373. }
  374. # 优化多字段查询优化2
  375. # unknown
  376. GET /movie/_search
  377. {
  378. "explain": true,
  379. "query": {
  380. "multi_match": {
  381. "query": "basketball and aliens",
  382. "fields": ["title^10", "overview"],
  383. "tie_breaker": 0.3
  384. }
  385. }
  386. }
  387. # 多字段查询 max of 改为 sum of
  388. # bool 查询,根据条件,会先过滤不匹配的 document,再进行打分,最后 sum of
  389. # must: 必须都为 true
  390. # must not: 必须都是 false
  391. # shold: 其中有个 true 即可
  392. GET /movie/_search
  393. {
  394. "explain": true,
  395. "query": {
  396. "bool": {
  397. "should": [
  398. {"match": {"title": "basketball and aliens"}},
  399. {"match": {"overview": "basketball and aliens"}}
  400. ]
  401. }
  402. }
  403. }
  404. # 多字段查询
  405. # multi_query 会根据不同的 type 进行打分匹配
  406. # 默认是 best_fields ,最匹配模式
  407. GET /movie/_search
  408. {
  409. "query": {
  410. "multi_match": {
  411. "query": "basketball and aliens",
  412. "fields": ["title", "overview"],
  413. "type": "best_fields"
  414. }
  415. }
  416. }
  417. # best_fields 等同于 dis_max + queries 模式
  418. GET /movie/_search
  419. {
  420. "explain": true,
  421. "query": {
  422. "dis_max": {
  423. "queries": [
  424. {"match": {"title": "basketball and aliens"}},
  425. {"match": {"overview": "basketball and aliens"}}
  426. ]
  427. }
  428. }
  429. }
  430. # best_fields 模式,最模式打分,不管 fields 有没有对应分词,只管各个 fiedls 打分最大值
  431. # "explanation" : "((overview:basketbal overview:alien) | (title:basketbal title:alien))"
  432. GET /movie/_validate/query?explain
  433. {
  434. "query": {
  435. "multi_match": {
  436. "query": "basketball and aliens",
  437. "fields": ["title", "overview"],
  438. "type": "best_fields"
  439. }
  440. }
  441. }
  442. # most_fields 模式,考虑所有文本的, 类似 bool + should
  443. # "explanation" : "((overview:basketbal overview:alien) | (title:basketbal title:alien))~1.0"
  444. GET /movie/_validate/query?explain
  445. {
  446. "query": {
  447. "multi_match": {
  448. "query": "basketball and aliens",
  449. "fields": ["title", "overview"],
  450. "type": "most_fields"
  451. }
  452. }
  453. }
  454. # cross_fields 分词导向模式,以分词为单位计算栏位的总分
  455. # 先算分词在各个field 中的分数,取最大值,然后再和其他分词结果相加
  456. # "explanation" : "blended(terms:[overview:basketbal, title:basketbal]) blended(terms:[overview:alien, title:alien])"
  457. GET /movie/_validate/query?explain
  458. {
  459. "query": {
  460. "multi_match": {
  461. "query": "basketball and aliens",
  462. "fields": ["title", "overview"],
  463. "type": "cross_fields"
  464. }
  465. }
  466. }
  467. ##########################
  468. # 过滤和排序
  469. ##########################
  470. # AND OR 快速使用
  471. # 不用使用 bool + must 之类的
  472. GET /movie/_search
  473. {
  474. "explain": true,
  475. "query": {
  476. "query_string": {
  477. "fields": ["title", "overview"],
  478. "query": "steve AND jobs"
  479. }
  480. }
  481. }
  482. # filterscore0
  483. # 单条件过滤
  484. GET /movie/_search
  485. {
  486. "query": {
  487. "bool": {
  488. "filter": {
  489. "term": {"title": "steve"}
  490. }
  491. }
  492. }
  493. }
  494. # filter 多条件过滤 + 排序
  495. # score null
  496. GET /movie/_search
  497. {
  498. "query": {
  499. "bool": {
  500. "filter": [
  501. {"term": {"title": "steve"}},
  502. {"term": {"cast.name": "gaspard"}},
  503. {"range": {"release_date": {"lte": "2015/01/01"}}},
  504. {"range": {"popularity": {"gte": "25"}}}
  505. ]
  506. }
  507. },
  508. "sort": [
  509. {
  510. "popularity": {
  511. "order": "desc"
  512. }
  513. }
  514. ]
  515. }
  516. # match打分的 filter
  517. GET /movie/_search
  518. {
  519. "query": {
  520. "bool": {
  521. "should": [
  522. {"match": {"title": "steve"}}
  523. ],
  524. "filter": [
  525. {"term": {"title": "steve"}},
  526. {"term": {"cast.name": "gaspard"}},
  527. {"range": {"release_date": {"lte": "2015/01/01"}}},
  528. {"range": {"popularity": {"gte": "25"}}}
  529. ]
  530. }
  531. }
  532. }
  533. # function score 自定义打分
  534. GET /movie/_search
  535. {
  536. "query": {
  537. "function_score": {
  538. // 原始查询得到 oldValue
  539. "query": {
  540. "multi_match": {
  541. "query": "steve job",
  542. "fields": ["title", "overview"],
  543. "operator": "or",
  544. "type": "most_fields"
  545. }
  546. },
  547. "functions": [
  548. {
  549. "field_value_factor": {
  550. "field": "popularity", // 对应要处理的字段
  551. "modifier": "log2p", // 字段值+2后,计算对数
  552. "factor": 1.2 // 字段预处理 * 10
  553. }
  554. }
  555. ],
  556. "score_mode": "sum", // 不同的 field value 之间的得分处理模式,默认 multiply 相乘
  557. "boost_mode": "sum" // oldValue 之间的得分处理模式,默认 multiply 相乘
  558. }
  559. }
  560. }

ik 分词器

  1. # 测试 ik
  2. # 宁缺勿滥
  3. GET _analyze?pretty
  4. {
  5. "analyzer": "ik_smart",
  6. "text": "中华人民共和国国歌"
  7. }
  8. # 最大努力
  9. GET _analyze?pretty
  10. {
  11. "analyzer": "ik_max_word",
  12. "text": "中华人民共和国国歌"
  13. }
  14. # 普通分词
  15. GET _analyze?pretty
  16. {
  17. "analyzer": "standard",
  18. "text": "中华人民共和国国歌"
  19. }
  20. # analyzer 指定搞得是构建索引的时候的分词
  21. # search_analyzer 指定的是搜索关键字时的分词
  22. # 最佳实践
  23. # 索引阶段用 max_word
  24. # 查询阶段用 smart word

实践

  1. 定义字段逻辑
  2. 定义字段类型
  3. 定义字段 analyzer