ES系列十、ES常用查询API

1.term查询

{ 
    "query": {
        "term": {
            "title": "crime"
        }
    }
}

1.1.指定权重

{ 
    "query": {
        "term": {
            "title": {
                "value":"crime",
                "boost":10.0
             }
        }
    }
}

1.2.多term查询查询tags字段中包含novel或book

{ 
    "query": {
        "terms": {
            "tags": ["novel","book"]
        }
    }
}

2.常用词查询

2.1.cutoff_frequency查询低于这个概率的词将

{ 
    "query": {
        "common": {
             "title":{
                 "query":"crime and punishment",
                 "cutoff_frequency":0.001
             }
        }
    }
}

2.2.match查询( 不支持lucene查询语法，分词后再查询 )

查询title包含crime或and或punishment的文档

{ 
    "query": {
        "match": {
            "title": "crime and punishment"
        }
    }
}

2.3.operator操作符

要求and或者or匹配文本的分词

{ 
    "query": {
        "match": {
            "title": {
                 "query":"crime and punishment",
                 "operator":"and"
            }
        }
    }
}

2.4.短语查询

{ 
    "query": {
        "match_phrase": {
            "title": {
                 "query":"crime  punishment",
                 "slop":1
            }
        }
    }
}

2.5.前缀查询

对查询关键词的最后一个词条做前缀匹配

{ 
    "query": {
        "match_phrase_prefix": {
            "title": {
                 "query":"crime  punish",
                 "slop":1,
                 "max_expansions":20
            }
        }
    }
}

2.6.multi_match( 针对多个字段查询 )

{ 
    "query": {
        "multi_match": {
             "query":"crime  heller",
             "fields":["title","author"]
        }
    }
}

3.query_string查询( 支持lucene的查询语法 )

3.1复合语法查询

title字段包含crime，且权重为10，也要包含punishment，但是otitle不包含cat，同事author字段包含Fyodor和dostoevsky。

{ 
    "query": {
        "query_string": {
             "query":"title:crime^10 +title:punishment -otitle:cat +author:(+Fyodor +dostoevsky)",
             "default_field":"title"
        }
    }
}

3.2.针对多字段查询

use_dis_max使用最大分查询，max指对于给定的关键词，只有最高分才会包括在最后的文档的评分中，而不是所有包含该词条的所有字段分数之和。

{ 
    "query": {
        "query_string": {
             "query":"crime heller",
             "fields":["title","author"],
              "use_dis_max":true
        }
    }
}

常见写法：

{“query”:{“query_string”:{“name:obama”}}}

name字段为obama

{“query”:{“query_string”:{“nam\*:obama”}}}

存在一个nam开头的字段，值为obama

{“query”:{“query_string”:{“__missing__:name”}}}

name字段值为null的文档

{“query”:{“query_string”:{“__exists__:name”}}}

name字段值不为null的文档

{“query”:{“query_string”:{“name:（obama OR xidada)”}}}

name字段为Obama或者xidada的文档

3.3.simple_query_string查询

解析出错时不抛异常，丢弃查询无效的部分

{ 
    "query": {
        "simple_query_string": {
             "query":"title:crime^10 +title:punishment -otitle:cat +author:(+Fyodor +dostoevsky)",
             "default_operator":"or"
        }
    }
}

3.4.标识符查询

{ 
    "query": {
        "ids": {
             "type":"book",
             "values":["1","2","3"]
        }
    }
}

3.4.前缀查询

前缀匹配给定的关键词

{ 
    "query": {
        "prefix": {
             "title":"cri"
        }
    }
}

指定权重

{ 
    "query": {
        "prefix": {
             "title":{
                 "value":"cri",
                 "boost":3.0
             }
        }
    }
}

3.5.fuzzy模糊查询

使用编辑距离的模糊查询，计算量较大，但是对用户拼写错的场景比较有用

{ 
    "query": {
        "fuzzy": {
             "title":"crme"
        }
    }
}

指定最小相似度偏差

{ 
    "query": {
        "fuzzy": {
             "title":{
                 "value":"crme",
                 "min_similarity":1
              }
        }
    }
}

3.6.通配符查询

支持*和?等通配符

{ 
    "query": {
        "wildcard": {
             "title": "cr?me"
        }
    }
}

?：任意字符

*：0个或任意多个字符

性能差，必须扫描整个倒排索引，才ok

3.8.范围查询

只能针对单个字段，可以是数值型的，也可以是基于字符串的。

{ 
    "query": {
        "range": {
             "year": {
                  "gte" :1890,
                  "lte":1900
              }
        }
    }
}

3.8.正则表达式查询

查询性能取决于正则表达式

{ 
    "query": {
        "regexp": {
             "title": {
                  "value" :"cr.m[ae]",
                  "boost":10.0  //配置评分乘以10
              }
        }
    }
}

K[A-Z].+

[0-9]：指定范围内的数字

[a-z]：指定范围内的字母

.：一个字符

+：前面的正则表达式可以出现一次或多次

wildcard和regexp，与prefix原理一致，都会扫描整个索引，性能很差

4.布尔查询( 组合查询 )

{
    "query": {
        "bool": {
            "must": {
                "term": {
                    "title": "crime"
                }
            }, 
            "should": {
                "range": {
                    "year": {
                        "from": 1900, 
                        "to": 2000
                    }
                }
            }, 
            "must_not": {
                "term": {
                    "otitle": "nothing"
                }
            }
        }
    }
}

mus:必须包含的条件，must not:不包含，should:包含的话会更匹配

搜索多个条件：

GET test*/_search
{
  "size":3,
  "query": {
    "bool":{
      "must": [
          {"match":{"message": "学生"}},
          {"match":{"message": "所有"}}
        ],
      "should": [
          {"match": {"port": "53198"}},
          {"match": {"@timestamp":"2018-09-17T17:49:25.991Z"}}
        ],
      "must_not": [
          {"match": {"port": "64273"}},
          {"match": {"port":"1234"}}
        ]
    }

  }

}

结果：

{
  "took": 25,
  "timed_out": false,
  "_shards": {
    "total": 35,
    "successful": 35,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 14,
    "max_score": 17.026089,
    "hits": [
      {
        "_index": "test-name",
        "_type": "doc",
        "_id": "Ff0g6GUBPXqEl7zCsbQb",
        "_score": 17.026089,
        "_source": {
          "@timestamp": "2018-09-17T15:23:06.878Z",
          "appname": "test-name",
          "level": "INFO",
          "port": 55714,
          "thread_name": "main",
          "level_value": 20000,
          "appName": "test-name",
          "@version": 1,
          "host": "192.168.1.100",
          "logger_name": "com.example.service.StudentService",
          "@metdata": {
            "ip_address": "192.168.1.100"
          },
          "message": "查询所有学生，pageNo1,pageSize1"
        }
      },
      {
        "_index": "test-name",
        "_type": "doc",
        "_id": "WFOm6GUBlATfpgHyvD55",
        "_score": 16.024178,
        "_source": {
          "@timestamp": "2018-09-17T17:49:25.991Z",
          "appname": "test-name",
          "level": "INFO",
          "port": 53198,
          "thread_name": "main",
          "level_value": 20000,
          "appName": "test-name",
          "@version": 1,
          "host": "192.168.1.100",
          "logger_name": "com.example.service.StudentService",
          "@metdata": {
            "ip_address": "192.168.1.100"
          },
          "message": "查询所有学生，pageNo1,pageSize1"
        }
      },
      {
        "_index": "test-name",
        "_type": "doc",
        "_id": "nAMg42UBRHcv2wBhnFDg",
        "_score": 14.024178,
        "_source": {
          "@timestamp": "2018-09-16T16:04:54.948Z",
          "appname": "test-name",
          "level": "INFO",
          "port": 58709,
          "thread_name": "main",
          "level_value": 20000,
          "appName": "test-name",
          "@version": 1,
          "host": "172.20.10.6",
          "logger_name": "com.example.service.StudentService",
          "@metdata": {
            "ip_address": "172.20.10.6"
          },
          "message": "查询所有学生，pageNo1,pageSize1"
        }
      }
    ]
  }
}

还可以这么实现：

GET test*/_search
{
  "size":3,
  "query": {
    "query_string":{"query": "message:学生 +message:所有 -port:55714"}
  }
}