Search Api

语法 范围
/_search 集群上所有的索引
/index1/_search index1
/index1,index2/_search index1,index2
/index*/_search 以index开头的索引

Term :Beautiful Mind 等效于 Beautiful OR Mind。使用括号括起来:(Beautiful Mind)

Phrase:"Beautiful Mind" 等效于 Beautiful AND Mind 。Phrase查询还要求前后顺序保持一致。使用引号

一、Url Search 

在url中使用查询参数

//查询title字段包含2013的
GET movies/_search?q=2012&df=title
{
  "profile": "true"
}
//查询title字段包含2013的
GET movies/_search?q=title:2012&sort=year:desc&from=0&size=10&timeout=1m
{
  "profile": "true"
}

//查询所有字段包含2013的
GET movies/_search?q=2012
{
  "profile": "true"
}

//PhraseQuery
GET movies/_search?q=title:"Beautiful Mind"
{
  "profile": "true"
}

//TermQuery。两个Term在一起默认是 OR 的关系
GET movies/_search?q=title:(Beautiful Mind)
{
  "profile": "true"
}

//title 必须包括Beautiful 和 Mind
GET movies/_search?q=title:(Beautiful AND Mind)
{
  "profile": "true"
}
//title 必须包括Beautiful 和 Mind
GET movies/_search?q=title:(Beautiful %2BMind)
{
  "profile": "true"
}

//title 必须包括Beautiful 不能包括Mind
GET movies/_search?q=title:(Beautiful NOT Mind)
{
  "profile": "true"
}

//查询1980以后的电影
GET movies/_search?q=year:>=1980
{
  "profile": "true"
}

//title包含b开头的
GET movies/_search?q=title:b*
{
  "profile": "true"
}

//模糊匹配&近似匹配
GET movies/_search?q=title:beautifl~1
{
  "profile": "true"
}
GET movies/_search?q=title:"lord rings"~2
{
  "profile": "true"
}

二、Request Body Search 

使用elasticsearch提供的,基于json格式的更加完备的DSL

// 分页查询第一页,每页1条数据
GET kibana_sample_data_ecommerce/_search
{
  "from": 0,
  "size": 1,
  "query": {
    "match_all": {}
  }
}

//根据order_date倒序
GET kibana_sample_data_ecommerce/_search
{
  "sort":[{"order_date":"desc"}], 
  "query": {
    "match_all": {}
  }
}

//只返回order_date字段
GET kibana_sample_data_ecommerce/_search
{
  "_source": ["order_date"], 
  "query": {"match_all": {}}
}

//脚本字段,新增一个new_field字段
GET kibana_sample_data_ecommerce/_search
{
  "script_fields": {
    "new_field": {
      "script": {
        "lang": "painless",
        "source": "doc['order_date'].value+'_hello'"
      }
    }
  }, 
  "query": {"match_all": {}}
}

//查询包含Last或者包含Christmas
GET movies/_search
{
  "query": {
    "match": {
      "title": "Last Christmas"
    }
  }
  , "profile": "true"
}

//查询即包含Last又包含Christmas
GET movies/_search
{
  "query": {
    "match": {
      "title": {
        "query": "Christmas Last",
        "operator": "and"
      }
    }
  }
}

//slop 指定中间忽略匹配的数量。搜索出的结果:One I Love, The
GET movies/_search
{
  "query": {
    "match_phrase": {
      "title":{
        "query":"one love",
        "slop": 1
      }
    }
  }
  
}

1,Query String&Simple  Query String

//title包含Homeward和Bound
GET movies/_search
{
  "query": {
    "query_string": {
      "default_field": "title",
      "query": "Homeward AND Bound"
    }
  }
}

//title包含Homeward和Bound 或者 包含Lost和in
GET movies/_search
{
  "query": {
    "query_string": {
      "default_field": "title",
      "query": "(Homeward AND Bound) or (Lost and in)"
    }
  }
}

//title包含Homeward和Bound 
GET movies/_search
{
  "query": {
    "simple_query_string": {
      "query": "Homeward Bound",
      "fields": ["title"],
      "default_operator": "and"
    }
  }
}

2,term查询

terms查询是用于结构化数据的查询。全文用match查询。而bool属于一种复合查询。可以结合terms查询和match查询

GET movies/_search
{
  "query": {
        "term": {
          "title.keyword": {
            "value": "Homeward Bound: The Incredible Journey"
          }
     }
  }
}
//跳过算分,提高性能
GET movies/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "term": {
          "title.keyword": {
            "value": "Homeward Bound: The Incredible Journey"
          }
     }}
    }
  }
}
GET request_audit_logs/_search
{
   "query": {
     "bool": {
       "filter": [
         {"term": {
           "url": "http://localhost:18908/api/User/Login"
         }},
         {
           "match":{
             "request_content":"15607172222"
           }
         },
         {
           "range": {
             "request_time": {
               "gte": "2020-01-01 00:00:00"
             }
           }
         }
       ]
     }
   }
}

3,Query&Filtering与多字符多字段查询

gte:大等于
lte:小等于
gt:大于
lt:小于

must 必须匹配。贡献算分
should 选择性匹配。贡献算分
must_not

Filter Context

查询字句,必须不能匹配

filter

Filter Context

必须匹配,但不贡献算分

GET movies/_search
{
  "query": {
    "bool": {
      "must": {"term": {"year":"1960"}},
      "filter": {"term":{"title.keyword":"Pollyanna"}},
      "must_not":{"range":{"year":{"lte":1961}}},
      "should": [
        {"term":{"genre.keyword": "Children"}},
        {"term":{"genre.keyword": "Comedy"}}
      ]
    }
  }
}
//title中包含The并且不包含Good
GET movies/_search
{
  
  "query": {
    "bool": {
      "must": [{"match": {"title": "The"}}],
      "must_not": [{"match": {"title": "Good"}}]
    }
  }
}
//将title中包含The的语句排在靠前,包含Grifters排在靠后
GET movies/_search
{
  "query": {
    "boosting": {
      "positive": {"match": {
        "title": "The"
      }},
      "negative": {"match": {
        "title": "Grifters"
      }},
      "negative_boost": 0.5
    }
    
  }
}

4,单字符串多字段查询:Dis Max Query

//1,获取最佳匹配语句的评分_score
//2,将其他匹配语句的评分与tie_breaker相乘
//3,对以上评分求和并规范
//tie_breaker是一个介于0-1之间的浮点数。0代表使用最佳匹配;1代表所有语句同等重要
POST blogs/_search
{
    "query": {
        "dis_max": {
            "queries": [
                { "match": { "title": "Quick pets" }},
                { "match": { "body":  "Quick pets" }}
            ],
            "tie_breaker": 0
        }
    }
}

5,单字符串多字段查询:Mult Match

最佳字段(Best Fields):当字段之间相互竞争,有相互关联。例如title和body这样的字段。评分来自最匹配字段

多数字段(Most Fields):处理英文内容时:一种常见的手段是,在主字段(English Analyzer),抽取词干,加入同义词,以匹配更多的文档。相同的文本,加入子字段(Standard Analyzer),以提供更加精确的匹配。其他字段作为匹配文档提高相关度的信号。匹配字段越多则越好

混合字段(Corss Fields):对于某些实体,例如人名、地址、图书信息。需要在多字字段中确定信息,单个字段只能作为整理的一部分。希望在任何这些列出的字段中找到尽可能多的词

POST blogs/_search
{
  "query": {
    "multi_match": {
      "type": "best_fields",
      "query": "Quick pets",
      "fields": ["title","body"],
      "tie_breaker": 0.2,
      "minimum_should_match": "20%"
    }
  }
}
//英文分词器可以提高算分值,标准分词器可以提高精度
POST titles/_bulk
{ "index": { "_id": 1 }}
{ "title": "My dog barks" }
{ "index": { "_id": 2 }}
{ "title": "I see a lot of barking dogs on the road " }
PUT /titles
{
  "mappings": {
    "properties": {
      "title": {
        "type": "text",
        "analyzer": "english",
        "fields": {"std": {"type": "text","analyzer": "standard"}}
      }
    }
  } 
}
GET /titles/_search
{
   "query": {
        "multi_match": {
            "query":  "barking dogs",
            "type":   "most_fields",
            "fields": [ "title", "title.std" ]
        }
    }
}
GET /titles/_search
{
   "query": {
        "multi_match": {
            "query":  "barking dogs",
            "type":   "cross_fields",
            "operator": "and", 
            "fields": [ "title", "title.std" ]
        }
    }
}

 6,Search Template与 Index Alias

//删除搜索模版
DELETE _scripts/tmdb
//设置搜索模版
POST _scripts/tmdb
{
  "script":{
    "lang": "mustache",
    "source": {
      "_source":["title"],
    "size":20,
     "query":{
       "bool": {
         "must": [
           {"term": {
              "title.keyword":"{{q}}"
          }}
         ]
       }
     }
    }
  }
  
}
//使用搜索模版
POST movies/_search/template
{
  "id":"tmdb",
  "params": {
    "q":"Lamerica"
  }
}

//删除别名
POST _aliases
{
  "actions": [
    {
      "remove": {
        "index": "movies",
        "alias": "movies2"
      }
    }
  ]
  
}
//设置别名
POST _aliases
{
  "actions": [
    {
      "add": {
        "index": "movies",
        "alias": "movies2"
      }
    }
  ]
}
//使用别名查询
GET movies2/_search

 7,Function Score Query优化算分

//fields算分度 * votes
POST /blogs/_search
{
  "query": {
    "function_score": {
      "query": {
        "multi_match": {
          "query":    "popularity",
          "fields": [ "title", "content" ]
        }
      },
      "field_value_factor": {
        "field": "votes"
      }
    }
  }
}

//log(fields算分度 * votes)
POST /blogs/_search
{
  "query": {
    "function_score": {
      "query": {
        "multi_match": {
          "query":    "popularity",
          "fields": [ "title", "content" ]
        }
      },
      "field_value_factor": {
        "field": "votes",
        "modifier": "log1p"
      }
    }
  }
}

 8,Term Suggester与Phrese Suggester

missing 如索引中已经存在,就不建议提供

popular 推荐出现频率更加高的词

always 无论是否存在,都提供建议

POST /articles/_search
{
  "size": 1,
  "query": {
    "match": {
      "body": "lucen rock"
    }
  },
  "suggest": {
    "term": {
      "text": "lucen rock",
      "term": {
        "suggest_mode": "missing",
        "field": "body"
      }
    }
  }
}

phrase多增加了几个参数

max_errors 最多可以拼错的terms数

confidence 限制返回的结果数

POST /articles/_search
{
  "suggest": {
    "my-suggestion": {
      "text": "lucne and elasticsear rock hello world ",
      "phrase": {
        "field": "body",
        "max_errors":2,
        "confidence":0,
        "direct_generator":[{
          "field":"body",
          "suggest_mode":"always"
        }],
        "highlight": {
          "pre_tag": "<em>",
          "post_tag": "</em>"
        }
      }
    }
  }
}

9,自动补全与基于上下文的提示

DELETE articles
//设置mapper
PUT articles
{
  "mappings": {
    "properties": {
      "title_completion":{
        "type": "completion"
      }
    }
  }
}

POST articles/_bulk
{ "index" : { } }
{ "title_completion": "lucene is very cool"}
{ "index" : { } }
{ "title_completion": "Elasticsearch builds on top of lucene"}
{ "index" : { } }
{ "title_completion": "Elasticsearch rocks"}
{ "index" : { } }
{ "title_completion": "elastic is the company behind ELK stack"}
{ "index" : { } }
{ "title_completion": "Elk stack rocks"}
{ "index" : {} }


POST articles/_search?pretty
{
  "size": 0,
  "suggest": {
    "article-suggester": {
      "prefix": "elk",
      "completion": {
        "field": "title_completion"
      }
    }
  }
}

GET comments/_search
DELETE comments
PUT comments
//设置mapper,多了contexts
PUT comments/_mapping
{
  "properties": {
    "comment_autocomplete":{
      "type": "completion",
      "contexts":[{
        "type":"category",
        "name":"comment_category"
      }]
    }
  }
}

POST comments/_doc
{
  "comment":"I love the star war movies",
  "comment_autocomplete":{
    "input":["star wars"],
    "contexts":{
      "comment_category":"movies"
    }
  }
}

POST comments/_doc
{
  "comment":"Where can I find a Starbucks",
  "comment_autocomplete":{
    "input":["starbucks"],
    "contexts":{
      "comment_category":"coffee"
    }
  }
}


POST comments/_search
{
  "suggest": {
    "MY_SUGGESTION": {
      "prefix": "sta",
      "completion":{
        "field":"comment_autocomplete",
        "contexts":{
          "comment_category":"coffee"
        }
      }
    }
  }
}
View Code

10,Search After与Scroll Api解决分页大于10000条数据问题

//search_after:order_id为740002后面下一条数据
GET kibana_sample_data_ecommerce/_search
{
  "size": 1,
  "query": {"match_all": {}},
  "search_after":[740002],
  "sort": [{"order_id":"desc"}]
}

//先创建快照
POST kibana_sample_data_ecommerce/_search?scroll=5m
{
  "size": 1, 
  "query": {"match_all": {}}
}
//根据上一个scroll_id查询下一页数据
POST _search/scroll
{
  "scroll":"1m",
  "scroll_id":"DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAEuEWVHlEU0NNSFFSd2VQVElQX3Vza2Zfdw=="
}

11,使用乐观锁解决并发写入问题

①内部版本控制: if_seq_no+if_primary_term

②使用外部版本(使用其他数据库作为主要数据存储):version+version_type=external

PUT products/_doc/1?if_seq_no=1&if_primary_term=1
{
  "title":"iphone",
  "count":100
}

PUT products/_doc/1?version=30000&version_type=external
{
  "title":"iphone",
  "count":100
}

三、Mapping

1,设置Dynamic Mapping

  "true" "false" "strict"
新增字段是否可保存 yes yes no
新增字段是否可被搜索 yes no no
Mapping会不会被更新 yes no no
PUT user/_mapping
{
  "dynamic":"true"
}
PUT user/_mapping
{
  "dynamic":"false"
}
PUT user/_mapping
{
  "dynamic":"strict"
}

2,定义mapping

①index控制字段是否可以被搜索

②null_value设置一个默认值"NULL",方便搜索null值字段

③text类型和keyword类型区别:text类型会使用默认分词器分词,当然你也可以为他指定特定的分词器。如果定义成keyword类型,那么默认就不会对其进行分词

//查询user的mapping
GET user/_mapping
PUT employee
{
  "mappings": {
    "properties": {
      "firstName":{
        "type": "text",
        "copy_to": "fullname"
      },
      "lastName":{
        "type": "text",
        "copy_to": "fullname"
      },
      "mobile":{
        "type": "text",
        "index": false
      },
      "age":{
        "type": "integer"
      },
      "cardNo":{
        "type": "keyword",
        "null_value": "NULL"
      }
    }
  }
}

//添加值
POST employee/_doc
{
  "firstName":"zhang",
  "lastName":"san",
  "mobile":"1300000000",
  "age":20,
  "cardNo":null
}
//查询cardNo是null的值
GET employee/_search
{
  "query": {
    "match": {
      "cardNo": "NULL"
    }
  }
}
//搜索报错
GET employee/_search
{
  "query": {
    "match": {
      "mobile": "1300000000"
    }
  }
}

四、Index Templete与Dynamic Templete

1,Index Templete

应用在所有的index上面。当一个索引被创建的时候

①应用elasticsearch默认的settings和mappings

②应用order数值低的index template中的设定

③应用order高的index template中的设定,之前的设定会被覆盖

④应用创建索引时,用户指定的settings和mappings,并覆盖之前模版中的设定

//order数值:控制“merging”的过程。多个模版会merge在一起
PUT _template/template_default
{
  "index_patterns": ["*"],
  "order": 0,
  "version": 1,
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 1
  }
}

//创建test开头的索引时,主分片设置1,副本分片设置2,开启数值检测
PUT _template/template_test
{
  "index_patterns": ["test*"],
  "order": 1,
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 2
  },
  "mappings": {
    "date_detection": false,
    "numeric_detection": true
  }
}

//指定索引设置template
PUT testmy
{
  "settings": {
    "number_of_replicas": 5
  }
}

//查看template信息
GET _template/template_default
GET _template/template_test

//删除
DELETE testmy
DELETE _template/template_default
DELETE _template/template_test
PUT request_audit_logs_v2
{
  "mappings": {
    "properties": {
      "key":{
        "type": "keyword"
      },
      "post_type":{
        "type": "keyword"
      },
      "url":{
        "type": "keyword"
      },
      "api":{
        "type": "keyword"
      },
      "request_content":{
        "type": "object"
      },
      "hander":{
        "type": "text",
        "index": false
      },
      "status":{
        "type": "keyword"
      },
      "response_content":{
        "type": "object"
      },
      "request_time":{
        "type": "date",
        "format": "yyyy-MM-dd HH:mm:ss"
      },
      "response_time":{
        "type": "date",
        "format": "yyyy-MM-dd HH:mm:ss"
      },
      "exception":{
        "type": "text"
      },
      "run_time":{
        "type": "float"
      },
      "thread_num":{
        "type": "integer"
      },
      "create_time":{
        "type": "date",
        "format": "yyyy-MM-dd HH:mm:ss"
      },
      "modify_time":{
        "type": "date",
        "format": "yyyy-MM-dd HH:mm:ss"
      },
      "user_name":{
        "type": "keyword"
      }
    },
    "dynamic":"strict"

  }
}

2,Dynamic Templete

设置在具体的index上面

GET myindex/_search?q=full_name:zhang
//将name.fitst和name.last映射到full_name字段上
PUT myindex
{
  "mappings": {
     "dynamic_templates":[
        { 
          "full_name":{
            "path_match":"name.*",
            "path_unmatch":"*.middle",
            "mapping":{
              "type":"text",
              "copy_to":"full_name"
            }
          }
        }
       ]
  }
}

POST myindex/_doc
{
  "name":{
    "fitst":"zhang",
    "middle":"123",
    "last":"san"
  }
}

五、Aggregation

Bucket Aggregation:一些列满足特定条件的文档集合

Meric Aggregation:一些数学运算,可以对文档字段进行统计分析

Pipeline Aggregation:对其他聚合结果进行二次聚合

Matrix Aggregation:支持对多个字段的操做并提供一个结果矩阵

//统计去往目的地的天气情况、价格情况
GET kibana_sample_data_flights/_search
{
  "size": 0,
  "aggs": {
    "flights_dest": {
      "terms": {
        "field": "DestCountry"
      },
      "aggs": {
        "stats_price": {
          "stats": {
            "field": "AvgTicketPrice"
          }
        },
        "wather":{
          "terms": {
            "field": "DestWeather"
          }
        }
      }
    }
  }
}
PUT /employees/
{
  "mappings" : {
      "properties" : {
        "age" : {
          "type" : "integer"
        },
        "gender" : {
          "type" : "keyword"
        },
        "job" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 50
            }
          }
        },
        "name" : {
          "type" : "keyword"
        },
        "salary" : {
          "type" : "integer"
        }
      }
    }
}

PUT /employees/_bulk
{ "index" : {  "_id" : "1" } }
{ "name" : "Emma","age":32,"job":"Product Manager","gender":"female","salary":35000 }
{ "index" : {  "_id" : "2" } }
{ "name" : "Underwood","age":41,"job":"Dev Manager","gender":"male","salary": 50000}
{ "index" : {  "_id" : "3" } }
{ "name" : "Tran","age":25,"job":"Web Designer","gender":"male","salary":18000 }
{ "index" : {  "_id" : "4" } }
{ "name" : "Rivera","age":26,"job":"Web Designer","gender":"female","salary": 22000}
{ "index" : {  "_id" : "5" } }
{ "name" : "Rose","age":25,"job":"QA","gender":"female","salary":18000 }
{ "index" : {  "_id" : "6" } }
{ "name" : "Lucy","age":31,"job":"QA","gender":"female","salary": 25000}
{ "index" : {  "_id" : "7" } }
{ "name" : "Byrd","age":27,"job":"QA","gender":"male","salary":20000 }
{ "index" : {  "_id" : "8" } }
{ "name" : "Foster","age":27,"job":"Java Programmer","gender":"male","salary": 20000}
{ "index" : {  "_id" : "9" } }
{ "name" : "Gregory","age":32,"job":"Java Programmer","gender":"male","salary":22000 }
{ "index" : {  "_id" : "10" } }
{ "name" : "Bryant","age":20,"job":"Java Programmer","gender":"male","salary": 9000}
{ "index" : {  "_id" : "11" } }
{ "name" : "Jenny","age":36,"job":"Java Programmer","gender":"female","salary":38000 }
{ "index" : {  "_id" : "12" } }
{ "name" : "Mcdonald","age":31,"job":"Java Programmer","gender":"male","salary": 32000}
{ "index" : {  "_id" : "13" } }
{ "name" : "Jonthna","age":30,"job":"Java Programmer","gender":"female","salary":30000 }
{ "index" : {  "_id" : "14" } }
{ "name" : "Marshall","age":32,"job":"Javascript Programmer","gender":"male","salary": 25000}
{ "index" : {  "_id" : "15" } }
{ "name" : "King","age":33,"job":"Java Programmer","gender":"male","salary":28000 }
{ "index" : {  "_id" : "16" } }
{ "name" : "Mccarthy","age":21,"job":"Javascript Programmer","gender":"male","salary": 16000}
{ "index" : {  "_id" : "17" } }
{ "name" : "Goodwin","age":25,"job":"Javascript Programmer","gender":"male","salary": 16000}
{ "index" : {  "_id" : "18" } }
{ "name" : "Catherine","age":29,"job":"Javascript Programmer","gender":"female","salary": 20000}
{ "index" : {  "_id" : "19" } }
{ "name" : "Boone","age":30,"job":"DBA","gender":"male","salary": 30000}
{ "index" : {  "_id" : "20" } }
{ "name" : "Kathy","age":29,"job":"DBA","gender":"female","salary": 20000}


# Metric 聚合,找到最低的工资
GET employees/_search
{
  "size": 0,
  "aggs": {
    "min_salary": {
      "min": {
        "field": "salary"
      }
    }
  }
}

# Metric 聚合,找到最高的工资
GET employees/_search
{
  "size": 0,
  "aggs": {
    "max_salary": {
      "max": {
        "field": "salary"
      }
    }
  }
}


# 多个 Metric 聚合,找到最低最高和平均工资
GET employees/_search
{
  "size": 0,
  "aggs": {
    "max_salary": {
      "max": {
        "field": "salary"
      }
    },
    "min_salary": {
      "min": {
        "field": "salary"
      }
    },
    "avg_salary": {
      "avg": {
        "field": "salary"
      }
    }
  }
}

# 一个聚合,输出多值
GET employees/_search
{
  "size": 0,
  "aggs": {
    "stats_salary": {
      "stats": {
        "field": "salary"
      }
    }
  }
}

# 对keword 进行聚合
GET employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword"
      }
    }
  }
}

# 对 Text 字段进行 terms 聚合查询
#对 Text 字段打开 fielddata,支持terms aggregation
PUT employees/_mapping
{
  "properties" : {
    "job":{
       "type":     "text",
       "fielddata": true
    }
  }
}
POST employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field":"job"
      }
    }
  }
}

# cardinality 相当于distinct count
POST employees/_search
{
  "size": 0,
  "aggs": {
    "cardinate": {
      "cardinality": {
        "field": "job.keyword"
      }
    }
  }
}


# 对 性别的 keyword 进行聚合
POST employees/_search
{
  "size": 0,
  "aggs": {
    "gender": {
      "terms": {
        "field": "gender"
      }
    }
  }
}

#指定 bucket 的 size
POST employees/_search
{
  "size": 0,
  "aggs": {
    "ages_5": {
      "terms": {
        "field":"age",
        "size":3
      }
    }
  }
}

# 指定size,不同工种中,年纪最大的3个员工的具体信息
POST employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field":"job.keyword"
      },
      "aggs": {
        "old_employee": {
          "top_hits": {
            "size": 3,
            "sort": [{
              "age": "desc"
            }]
          }
        }
      }
    }
  }
}

#自定义工资区间分桶
POST employees/_search
{
  "size": 0,
  "aggs": {
    "salary_range": {
      "range": {
        "field": "salary",
        "ranges": [
          {
            "to": 10000
          },
          {
            "from": 10000, 
            "to": 20000
          }
          ,
          {
            "key": ">=20000", 
            "from": 20000
          }
        ]
      }
    }
  }
}


#Salary Histogram,工资0到10万,以 5000一个区间进行分桶
POST employees/_search
{
  "size": 0,
  "aggs": {
    "salary_histrogram": {
      "histogram": {
        "field": "salary",
        "interval": 5000,
        "extended_bounds": {
          "min": 0,
          "max": 100000
        }
      }
    }
  }
}


# 嵌套聚合1,按照工作类型分桶,并统计工资信息
POST employees/_search
{
  "size": 0,
  "aggs": {
    "Job_salary_stats": {
      "terms": {
        "field": "job.keyword"
      },
      "aggs": {
        "salary": {
          "stats": {
            "field": "salary"
          }
        }
      }
    }
  }
}

# 多次嵌套。根据工作类型分桶,然后按照性别分桶,计算工资的统计信息
POST employees/_search
{
  "size": 0,
  "aggs": {
    "Job_gender_stats": {
      "terms": {
        "field": "job.keyword"
      },
      "aggs": {
        "gender_stats": {
          "terms": {
            "field": "gender"
          },
          "aggs": {
            "salary_stats": {
              "stats": {
                "field": "salary"
              }
            }
          }
        }
      }
    }
  }
}
# 平均工资最低的工作类型
POST employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword",
        "order": {
          "avg_salary": "desc"
        }
      },
      "aggs": {
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        }
      }
    },
    "min_salary_by_job":{
      "min_bucket": {
        "buckets_path": "jobs>avg_salary"
      }
    }
  }
}


# 平均工资最高的工作类型
POST employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword",
        "order": {
          "avg_salary": "desc"
        }
      },
      "aggs": {
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        }
      }
    },
    "max_salary_by_job":
    {
      "max_bucket": {
        "buckets_path": "jobs>avg_salary"
      }
    }
  }
}

# 平均工资的平均工资
POST employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword",
        "order": {
          "avg_salary": "desc"
        }
      },
      "aggs": {
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        }
      }
    },
    "avg_salary_by_job":
    {
      "avg_bucket": {
        "buckets_path": "jobs>avg_salary"
      }
    }
  }
}

# 平均工资的统计分析
POST employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword",
        "size": 10, 
        "order": {
          "avg_salary": "desc"
        }
      },
      "aggs": {
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        }
      }
    },
    "stats_salary_by_job":
    {
      "stats_bucket": {
        "buckets_path": "jobs>avg_salary"
      }
    }
  }
}

# 平均工资的百分位数
POST employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword",
        "size": 10
      },
      "aggs": {
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        }
      }
    },
    "percentiles_salary_by_job":{
      "percentiles_bucket": {
        "buckets_path": "jobs>avg_salary"
      }
    }
  }
}

#按照年龄对平均工资求导
POST employees/_search
{
  "size": 0,
  "aggs": {
    "age": {
      "histogram": {
        "field": "age",
        "min_doc_count": 1,
        "interval": 1
      },
      "aggs": {
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        },
        "derivative_avg_salary":{
          "derivative": {
            "buckets_path": "avg_salary"
          }
        }
      }
    }
  }
}

作用范围

# Query 年龄大于20岁的员工,根据job分桶
POST employees/_search
{
  "size": 0,
  "query": {
    "range": {
      "age": {
        "gte": 20
      }
    }
  },
  "aggs": {
    "jobs": {
      "terms": {
        "field":"job.keyword"
        
      }
    }
  }
}


#field 年长的员工job分桶,和所有的员工job分桶
POST employees/_search
{
  "size": 0, 
  "aggs": {
    "older_person": {
      "filter": {"range": {
        "age": {
          "gte": 35
        }
      }},
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword"
          }
        }
      }
    },
    "all_jobs":{
      "terms": {
        "field": "job.keyword"
      }
    }
  }
}

#Post field. 一条语句,找出所有的job类型。还能找到聚合后符合条件的结果
#将分完桶的job为Javascript Programmer显示出来
POST employees/_search
{
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword"
      }
    }
  },
  "post_filter": {
    "match": {
      "job.keyword": "Javascript Programmer"
    }
  }
}

#global
#global忽略query的条件限制
POST employees/_search
{
  "size": 0,
  "query": {
    "range": {
      "age": {
        "gte": 40
      }
    }
  },
  "aggs": {
    "jobs": {
      "terms": {
        "field":"job.keyword"
        
      }
    },
    
    "all":{
      "global":{},
      "aggs":{
        "salary_avg":{
          "avg":{
            "field":"salary"
          }
        }
      }
    }
  }
}

排序

#排序 order 根据分桶之后的数量进行排序
POST employees/_search
{
  "size": 0,
  "query": {
    "range": {
      "age": {
        "gte": 20
      }
    }
  },
  "aggs": {
    "jobs": {
      "terms": {
        "field":"job.keyword",
        "order":[
          {"_count":"asc"},
          {"_key":"desc"}
          ]
        
      }
    }
  }
}


#排序 order 根据子聚合进行排序
POST employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field":"job.keyword",
        "order":[  {
            "avg_salary":"desc"
          }]
        
        
      },
    "aggs": {
      "avg_salary": {
        "avg": {
          "field":"salary"
        }
      }
    }
    }
  }
}

#排序 order 根据子统计min进行排序
POST employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field":"job.keyword",
        "order":[  {
            "stats_salary.min":"desc"
          }]
        
        
      },
    "aggs": {
      "stats_salary": {
        "stats": {
          "field":"salary"
        }
      }
    }
    }
  }
}

聚合分析精准度问题:

doc_count_error_upper_bound:被遗漏的term分桶,包含的文档,有可能的最大值

sum_other_doc_count:除了返回结果bucket的terms以外,其他terms的文档总数(总数-返回的总数)

size和shard_size的区别?
size是最终返回多少个buckt的数量。
shard_size是每个bucket在一个shard上取回的bucket的总数。然后,每个shard上的结果,会在coordinate节点上在做一次汇总,返回总数。

①如何解决Terms不准的问题:

  terms聚合分析不准的原因,数据分散在多个分片上,Coordinating Node无法获取数据全貌

  解决方案1:当数据量不大时,设置Primary Shard为1;实现准确性

  解决方案2:在分布式数据上,设置shard_size参数,提高精确度(原理:每次从shard上额外多获取数据,提升准确率)

六、重建索引

一般在以下几种情况下,需要重建索引

  索引的mappings发生变更:字段类型更改,分词器及字典更新

  索引的settings发生更改:索引的主分片数发生改变

  集群内,集群间需要做数据迁移

Elasticsearch的内置提供API

  Update By Query:在现有索引上重建

  Reindex:在其他索引上重建索引

# 修改 Mapping,增加子字段,使用英文分词器
PUT blogs/_mapping
{
      "properties" : {
        "content" : {
          "type" : "text",
          "fields" : {
            "english" : {
              "type" : "text",
              "analyzer":"english"
            }
          }
        }
      }
    }

# Update所有文档
POST blogs/_update_by_query
{

}  
# 创建新的索引并且设定新的Mapping
PUT blogs_fix/
{
  "mappings": {
        "properties" : {
        "content" : {
          "type" : "text",
          "fields" : {
            "english" : {
              "type" : "text",
              "analyzer" : "english"
            }
          }
        },
        "keyword" : {
          "type" : "keyword"
        }
      }    
  }
}

# Reindx API
POST  _reindex
{
  "source": {
    "index": "blogs"
  },
  "dest": {
    "index": "blogs_fix"
  }
}

七、Ingest Pipeline 

1,测试

# 测试split tags
POST _ingest/pipeline/_simulate
{
  "pipeline": {
    "description": "to split blog tags",
    "processors": [
      {
        "split": {
          "field": "tags",
          "separator": ","
        }
      }
    ]
  },
  "docs": [
    {
      "_index": "index",
      "_id": "id",
      "_source": {
        "title": "Introducing big data......",
        "tags": "hadoop,elasticsearch,spark",
        "content": "You konw, for big data"
      }
    },
    {
      "_index": "index",
      "_id": "idxx",
      "_source": {
        "title": "Introducing cloud computering",
        "tags": "openstack,k8s",
        "content": "You konw, for cloud"
      }
    }
  ]
}

2,创建pipeline

# 为ES添加一个 Pipeline
PUT _ingest/pipeline/blog_pipeline
{
  "description": "a blog pipeline",
  "processors": [
      {
        "split": {
          "field": "tags",
          "separator": ","
        }
      },

      {
        "set":{
          "field": "views",
          "value": 0
        }
      }
    ]
}

#查看Pipleline
GET _ingest/pipeline/blog_pipeline


#测试pipeline
POST _ingest/pipeline/blog_pipeline/_simulate
{
  "docs": [
    {
      "_source": {
        "title": "Introducing cloud computering",
        "tags": "openstack,k8s",
        "content": "You konw, for cloud"
      }
    }
  ]
}

3,修复之前的数据

#增加update_by_query的条件
POST tech_blogs/_update_by_query?pipeline=blog_pipeline
{
    "query": {
        "bool": {
            "must_not": {
                "exists": {
                    "field": "views"
                }
            }
        }
    }
}

4,使用pipeline更新添加文档

POST tech_blogs/_doc?pipeline=blog_pipeline
{
  "title":"Introducing big data......",
  "tags":"hadoop,elasticsearch,spark",
  "content":"You konw, for big data"
}
PUT tech_blogs/_doc/2?pipeline=blog_pipeline
{
  "title": "Introducing cloud computering",
  "tags": "openstack,k8s",
  "content": "You konw, for cloud"
}
PUT _ingest/pipeline/stackoverflow_pipeline
{
  "description": "Pipeline for stackoverflow survey",
  "processors": [
    {
      "split": {
        "field": "DatabaseDesireNextYear",
        "separator": ";"
      }
    },
    
    {
      "split": {
        "field": "DatabaseWorkedWith",
        "separator": ";"
      }
    },
    
    {
      "split": {
        "field": "DevEnviron",
        "separator": ";"
      }
    },
    
    {
      "split": {
        "field": "MiscTechDesireNextYear",
        "separator": ";"
      }
    },
    
    {
      "split": {
        "field": "PlatformDesireNextYear",
        "separator": ";"
      }
    },

   {
      "split": {
        "field": "WebFrameDesireNextYear",
        "separator": ";"
      }
    }
    ,

   {
      "split": {
        "field": "Containers",
        "separator": ";"
      }
    }

  ]
}
案例
原文地址:https://www.cnblogs.com/zd1994/p/12650357.html