跳到主要内容

查询语法

Elasticsearch 提供了强大的 Query DSL(Domain Specific Language)查询语言。本章介绍各种查询方式。

基本查询结构

GET /articles/_search
{
"query": {
// 查询条件
},
"from": 0, # 分页起始位置
"size": 10, # 返回文档数量
"sort": [ # 排序
{ "created_at": "desc" }
],
"_source": [ # 返回字段
"title",
"author"
]
}

全文查询

match 查询

最常用的全文查询,会对查询词进行分词:

GET /articles/_search
{
"query": {
"match": {
"title": "Elasticsearch 教程"
}
}
}

# 带参数
GET /articles/_search
{
"query": {
"match": {
"title": {
"query": "Elasticsearch 教程",
"operator": "and", # 默认 or,改为 and 要求所有词都匹配
"minimum_should_match": "75%" # 至少匹配 75% 的词
}
}
}
}

解释

  • operatoror 表示任意一个词匹配即可,and 表示所有词都必须匹配
  • minimum_should_match:控制至少匹配多少个词

match_phrase 查询

精确匹配短语,词的顺序也必须一致:

GET /articles/_search
{
"query": {
"match_phrase": {
"title": "Elasticsearch 入门教程"
}
}
}

# 允许词之间的间隔(slop)
GET /articles/_search
{
"query": {
"match_phrase": {
"title": {
"query": "Elasticsearch 教程",
"slop": 2 # 允许中间间隔 2 个词
}
}
}
}

multi_match 查询

在多个字段中搜索:

GET /articles/_search
{
"query": {
"multi_match": {
"query": "Python 教程",
"fields": ["title", "content", "author"],
"type": "best_fields" # 默认,使用最高分的字段
}
}
}

# 不同的匹配类型
GET /articles/_search
{
"query": {
"multi_match": {
"query": "Python 教程",
"fields": ["title^3", "content"], # title 权重 x3
"type": "most_fields" # 将所有匹配字段的分数相加
}
}
}

type 参数说明

类型说明
best_fields默认,使用最高分字段的分数
most_fields所有匹配字段的分数相加
cross_fields跨字段匹配,适合人名等
phrase在每个字段上执行 match_phrase
phrase_prefix短语前缀匹配

query_string 查询

支持 Lucene 查询语法的高级查询:

GET /articles/_search
{
"query": {
"query_string": {
"query": "title:(Elasticsearch OR Python) AND author:张三",
"default_field": "content"
}
}
}

精确查询

term 查询

精确匹配,不分词:

GET /articles/_search
{
"query": {
"term": {
"status": "published"
}
}
}

# 对于数值字段
GET /articles/_search
{
"query": {
"term": {
"views": 1000
}
}
}

注意term 查询不会对查询词分词,适合 keyword、数值、日期等精确字段。

terms 查询

匹配多个值(类似 SQL 的 IN):

GET /articles/_search
{
"query": {
"terms": {
"category": ["Python", "Java", "Go"]
}
}
}

# 从另一个查询获取 terms
GET /articles/_search
{
"query": {
"terms": {
"author": {
"index": "authors",
"id": "1",
"path": "favorite_authors"
}
}
}
}

range 查询

范围查询:

GET /articles/_search
{
"query": {
"range": {
"views": {
"gte": 1000, # 大于等于
"lte": 10000, # 小于等于
"boost": 2.0 # 权重
}
}
}
}

# 日期范围
GET /articles/_search
{
"query": {
"range": {
"created_at": {
"gte": "2024-01-01",
"lt": "2024-02-01",
"time_zone": "+08:00"
}
}
}
}

# 相对日期
GET /articles/_search
{
"query": {
"range": {
"created_at": {
"gte": "now-7d/d", # 7 天前,向下取整到天
"lt": "now/d" # 今天
}
}
}
}

exists 查询

检查字段是否存在:

GET /articles/_search
{
"query": {
"exists": {
"field": "author"
}
}
}

# 查找字段为空的文档
GET /articles/_search
{
"query": {
"bool": {
"must_not": {
"exists": {
"field": "author"
}
}
}
}
}

ids 查询

根据 ID 查询:

GET /articles/_search
{
"query": {
"ids": {
"values": ["1", "2", "3"]
}
}
}

复合查询

bool 查询

组合多个查询条件:

GET /articles/_search
{
"query": {
"bool": {
"must": [ # 必须匹配,参与评分
{ "match": { "title": "Python" } }
],
"must_not": [ # 必须不匹配
{ "term": { "status": "draft" } }
],
"should": [ # 可选匹配,增加评分
{ "term": { "category": "技术" } },
{ "term": { "tags": "教程" } }
],
"filter": [ # 必须匹配,不参与评分
{ "range": { "views": { "gte": 100 } } }
],
"minimum_should_match": 1 # should 至少匹配几个
}
}
}

must vs filter

类型是否参与评分是否缓存使用场景
must需要相关性评分
filter精确过滤,性能更好

boosting 查询

降低某些文档的评分:

GET /articles/_search
{
"query": {
"boosting": {
"positive": {
"match": { "title": "Python" }
},
"negative": {
"term": { "category": "广告" }
},
"negative_boost": 0.5 # 匹配 negative 的文档评分乘以 0.5
}
}
}

constant_score 查询

忽略相关性评分:

GET /articles/_search
{
"query": {
"constant_score": {
"filter": {
"term": { "status": "published" }
},
"boost": 1.2
}
}
}

嵌套查询

nested 查询

查询嵌套对象:

# 假设有以下文档结构
{
"title": "文章标题",
"comments": [
{ "user": "张三", "content": "好文章" },
{ "user": "李四", "content": "学习了" }
]
}

# 查询嵌套对象
GET /articles/_search
{
"query": {
"nested": {
"path": "comments",
"query": {
"bool": {
"must": [
{ "match": { "comments.user": "张三" } },
{ "match": { "comments.content": "好文章" } }
]
}
},
"inner_hits": {} # 返回匹配的嵌套文档
}
}
}

has_child / has_parent 查询

查询父子关系的文档:

# 查询有特定评论的文章
GET /articles/_search
{
"query": {
"has_child": {
"type": "comment",
"query": {
"match": { "content": "好文章" }
}
}
}
}

高亮显示

GET /articles/_search
{
"query": {
"match": { "content": "Elasticsearch" }
},
"highlight": {
"fields": {
"content": {
"pre_tags": ["<em>"],
"post_tags": ["</em>"],
"fragment_size": 150, # 片段大小
"number_of_fragments": 3 # 返回片段数量
}
}
}
}

排序

基本排序

GET /articles/_search
{
"query": { "match_all": {} },
"sort": [
{ "views": "desc" },
{ "created_at": "desc" }
]
}

按相关性评分排序

GET /articles/_search
{
"query": {
"match": { "title": "Python" }
},
"sort": [
"_score",
{ "views": "desc" }
]
}

按脚本排序

GET /articles/_search
{
"query": { "match_all": {} },
"sort": {
"_script": {
"type": "number",
"script": {
"source": "doc['likes'].value + doc['views'].value"
},
"order": "desc"
}
}
}

分页

基本分页

GET /articles/_search
{
"query": { "match_all": {} },
"from": 0, # 起始位置(从 0 开始)
"size": 10 # 每页数量
}

深度分页问题

from + size 不能超过 10000(默认),深度分页会消耗大量内存。

search_after 分页

适合深度分页和实时数据:

# 第一页
GET /articles/_search
{
"query": { "match_all": {} },
"size": 10,
"sort": [
{ "created_at": "desc" },
{ "_id": "desc" }
]
}

# 使用上一页最后一条数据的排序值获取下一页
GET /articles/_search
{
"query": { "match_all": {} },
"size": 10,
"sort": [
{ "created_at": "desc" },
{ "_id": "desc" }
],
"search_after": ["2024-01-15T10:30:00", "abc123"]
}

scroll 滚动查询

适合大批量数据导出:

# 创建滚动上下文,保持 1 分钟
GET /articles/_search?scroll=1m
{
"query": { "match_all": {} },
"size": 1000
}

# 使用 scroll_id 继续获取
GET /_search/scroll
{
"scroll": "1m",
"scroll_id": "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFl..."
}

# 清除滚动上下文
DELETE /_search/scroll
{
"scroll_id": "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFl..."
}

建议器

term suggester(词项建议)

用于拼写纠错:

POST /articles/_search
{
"suggest": {
"my-suggestion": {
"text": "pyton tutoril",
"term": {
"field": "title"
}
}
}
}

completion suggester(自动补全)

# 创建索引时配置
PUT /articles
{
"mappings": {
"properties": {
"title": {
"type": "text"
},
"title_suggest": {
"type": "completion"
}
}
}
}

# 查询建议
POST /articles/_search
{
"suggest": {
"title-suggest": {
"prefix": "pyth",
"completion": {
"field": "title_suggest"
}
}
}
}

实战示例

综合搜索

GET /articles/_search
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "Python 异步编程",
"fields": ["title^2", "content"],
"type": "best_fields"
}
}
],
"filter": [
{ "term": { "status": "published" } },
{ "range": { "views": { "gte": 100 } } }
],
"should": [
{ "term": { "category": { "value": "Python", "boost": 2 } } },
{ "term": { "tags": "教程" } }
]
}
},
"sort": [
"_score",
{ "created_at": "desc" }
],
"from": 0,
"size": 20,
"highlight": {
"fields": {
"title": {},
"content": {
"fragment_size": 150
}
}
},
"_source": ["title", "author", "summary", "created_at", "views"]
}

小结

本章我们学习了:

  1. 全文查询(match、match_phrase、multi_match)
  2. 精确查询(term、terms、range)
  3. 复合查询(bool、boosting)
  4. 嵌套查询
  5. 高亮显示
  6. 排序和分页
  7. 建议器

练习

  1. 实现一个商品搜索,支持关键词搜索和分类过滤
  2. 实现文章搜索并高亮显示关键词
  3. 使用 search_after 实现深度分页
  4. 实现搜索词拼写纠错功能

参考资源