ES搜索管理

ES搜索管理

准备环境
创建索引库

put
http://localhost:9200/xwx_film


{
"settings":{
"number_of_shards":1, //索引库分片数量
"number_of_replicas":0 //每个分片的副本数
}
}


创建mapping
post
http://localhost:9200/xwx_film/doc/_mapping


{
"properties":{
"film_name":{
"type":"text"
},
"description":{
"type":"text"
},
"cost":{
"type":"double"
},
"cinema":{
"type":"text"
}
}
}

插入文档
post
http://localhost:9200/xwx_film/doc

{
"film_name" : "蜘蛛侠:英雄归来",
"description" : "最受关注的漫威超级英雄片《蜘蛛侠:英雄远征》依旧由导演乔沃茨执导,汤姆:赫兰德继续饰演蜘蛛侠彼得帕克。故事承接《复仇者联盟4:终局之战》,此次蜘蛛侠将前往欧洲展开新的征程,并将对抗由杰克:吉伦哈尔加盟饰演的神秘客。赞达亚、雅各布巴特朗、托尼雷沃罗利等原班人马也将悉数回归。本片将于2019年6月28日在中国大陆上映,经历了第一部的成长经历后,小蜘蛛在失去钢铁侠之后又会面临怎样的危机?敬请期待。",
"cost" : 70.7,
"cinema":"春天国际影城中庚漫游城店"
}

搜索
简单搜索

  1. 搜索指定索引库中的所有文档
  1. GET /xc_course/_search


2. 搜索指定type中的所有文档

  1. GET /xc_course/doc/_search


3. 按id搜索

  1. GET /xc_course/doc/id


DSL搜索
DSL(Domain Specific Language)是ES提出的基于json的搜索方式,在搜索时传入特定的json格式的数据来完成不同的搜索需求。

DSL比URI搜索方式功能强大,在项目中建议使用DSL方式来完成搜索。

DSL搜索方式是使用POST提交,URI为以"_search"结尾(在某index或某type范围内搜索),而在JSON请求体中定义搜索条件。
查询所有文档——matchAllQuery
POST /xwx_film/doc/_search

{
"query":{
"match_all":{}
},
"_source":["film_name","cost"]
}

query: 用来定义搜索条件,
_source:用来指定返回的结果集中需要包含哪些字段。这在文档本身数据量较大但我们只想获取其中特定几个字段数据时有用(既可过滤掉不必要字段,又可提高传输效率)。

结果说明:

  • took,本次操作花费的时间,单位毫秒
  • time_out,请求是否超时(ES不可用或网络故障时会超时)
  • _shard,本次操作共搜索了哪些分片
  • hits,命中的结果
  • hits.total,符合条件的文档数
  • hits.hits,命中的文档集
  • hits.max_score,hits.hits中各文档得分的最高分,文档得分即查询相关度
  • _source,文档源数据
{
"took": 19,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 8,
"max_score": 1,
"hits": [
{
"_index": "xwx_film",
"_type": "doc",
"_id": "m1dwjmsBgExwdZZ41CUQ",
"_score": 1,
"_source": {
"cost": 39.7,
"film_name": "千与千寻"
}
},
{
"_index": "xwx_film",
"_type": "doc",
"_id": "nFdxjmsBgExwdZZ4KCVN",
"_score": 1,
"_source": {
"cost": 31.7,
"film_name": "千与千寻"
}
},
{
"_index": "xwx_film",
"_type": "doc",
"_id": "nVdyjmsBgExwdZZ44iXR",
"_score": 1,
"_source": {
"cost": 34.7,
"film_name": "千与千寻"
}
},
{
"_index": "xwx_film",
"_type": "doc",
"_id": "nldzjmsBgExwdZZ4vSXN",
"_score": 1,
"_source": {
"cost": 51.7,
"film_name": "玩具总动员4"
}
},
{
"_index": "xwx_film",
"_type": "doc",
"_id": "n1d0jmsBgExwdZZ4YCXg",
"_score": 1,
"_source": {
"cost": 60.7,
"film_name": "玩具总动员4"
}
},
{
"_index": "xwx_film",
"_type": "doc",
"_id": "oFd0jmsBgExwdZZ4rCVM",
"_score": 1,
"_source": {
"cost": 60.7,
"film_name": "玩具总动员4"
}
},
{
"_index": "xwx_film",
"_type": "doc",
"_id": "oVd1jmsBgExwdZZ4jSVP",
"_score": 1,
"_source": {
"cost": 60.7,
"film_name": "蜘蛛侠:英雄归来"
}
},
{
"_index": "xwx_film",
"_type": "doc",
"_id": "old1jmsBgExwdZZ46SV7",
"_score": 1,
"_source": {
"cost": 70.7,
"film_name": "蜘蛛侠:英雄归来"
}
}
]
}
}

Java代码实现:

 @Test
public void allMatchSearch() throws IOException{
//请求对象
SearchRequest request=new SearchRequest("xwx_film");
request.types("doc");

//查询条件对象
SearchSourceBuilder builder=new SearchSourceBuilder();
builder.query(QueryBuilders.matchAllQuery());
builder.fetchSource(new String[]{"film_name","cost"},null);

//查询条件对象放入请求对象中
request.source(builder);
SearchResponse response= restHighLevelClient.search(request);

System.out.print(response);
}

DSL核心API

new SearchRequest(index),指定要搜索的索引库
searchRequest.type(type),指定要搜索的type
SearchSourceBuilder,构建DSL请求体
searchSourceBuilder.query(queryBuilder),构造请求体中“query”:{}部分的内容
QueryBuilders,静态工厂类,方便构造queryBuilder,如searchSourceBuilder.query(QueryBuilders.matchAllQuery())就相当于构造了“query”:{ "match_all":{} }
searchRequest.source(),将构造好的请求体设置到请求对象中

分页查询
PUT http://localhost:9200/xwx_film/doc/_search

{
"from":0,"size":1,
"query":{
"match_all":{}
},
"_source":["film_name","cost"]
}

其中from的含义是结果集偏移,而size则是从偏移位置开始之后的size条结果。

{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 8,
"max_score": 1,
"hits": [
{
"_index": "xwx_film",
"_type": "doc",
"_id": "m1dwjmsBgExwdZZ41CUQ",
"_score": 1,
"_source": {
"cost": 39.7,
"film_name": "千与千寻"
}
}
]
}
}


java代码实现:

  /**
* 分页
*
* @throws IOException
*/
@Test
public void testPaginating() throws IOException {
//请求对象
SearchRequest request = new SearchRequest("xwx_film");
request.types("doc");
//分页
int page = 0, size = 1;
//查询条件对象
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.from(page);
builder.size(size);
builder.query(QueryBuilders.matchAllQuery());
builder.fetchSource(new String[]{"film_name", "cost"}, null);
//查询条件对象放入请求对象中
request.source(builder);
SearchResponse response = restHighLevelClient.search(request);
System.out.print(response);
//获取结果集中的结果
SearchHits searchHits = response.getHits();
if (null != searchHits) {
SearchHit[] results = searchHits.getHits();
for (SearchHit result : results) {
System.out.println(result.getSourceAsMap()); //hits.hits._source
}
}
}

词项匹配——termQuery
词项匹配是==精确匹配==,只有当倒排索引表中存在我们指定的词项时才会返回该词项关联的文档集。

这边有个问题就是,查询的条件必须在分词库中,不然会查不到.

 /**
* termsQuery 查询
*
* @throws IOException
*/
@Test
public void testTermsQueryByFileName() throws IOException {
//请求对象
SearchRequest request = new SearchRequest("xwx_film");
request.types("doc");

//查询条件对象
SearchSourceBuilder builder = new SearchSourceBuilder();
List<String> fileNameList = new ArrayList<>();
fileNameList.add("千与千寻");//千与千寻 查不到,
fileNameList.add("千");//千 就可以查的到;
builder.query(QueryBuilders.termsQuery("film_name", fileNameList));
builder.fetchSource(new String[]{"film_name", "cost"}, null);

//查询条件对象放入请求对象中
request.source(builder);
SearchResponse response = restHighLevelClient.search(request);

System.out.print(response);
}

全文检索—— matchQuery
输入的关键词会被 search_analyzer
指定的分词器分词,然后根据所得词项到倒排索引表中查找文档集合,每个词项关联的文档集合都会被查出来.

例:1.如果查找千与千寻,首先会被search_analyzer 分词,"千","与","寻",这时候就可以查到我们之前的三个文档,上面说的termQuery就不能查到,因为它不会被分词器分词,属于精确查找

    @Test
public void testMatchQuery() throws IOException{
//请求对象
SearchRequest request = new SearchRequest("xwx_film");
request.types("doc");
//查询条件对象
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.query(QueryBuilders.matchQuery("film_name","千与千寻"));

printResult(request,builder);
}
/**
* 打印结果
* @param request
* @param sourceBuilder
*/
private void printResult(SearchRequest request,SearchSourceBuilder sourceBuilder) {
request.source(sourceBuilder);
SearchResponse response = null;
try {
response = restHighLevelClient.search(request);
} catch (IOException e) {
e.printStackTrace();
}
SearchHits hits = response.getHits();
if (hits != null) {
SearchHit[] results = hits.getHits();
for (SearchHit result : results) {
System.out.println(result.getSourceAsMap());
}
}
}

多域检索——multiMatchQuery
上边学习的termQuery和matchQuery一次只能匹配一个Field
multiQuery一次可以匹配多个字段(即扩大了检索范围,之前一直都是在film_name字段中检索)

/**
* 多域检索
*/
@Test
public void testMultiMatchQuery() {
SearchRequest request = new SearchRequest("xwx_film");
request.types("doc");

SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.query(QueryBuilders.
multiMatchQuery("部","film_name","description").
minimumShouldMatch("50%"));
//
// sourceBuilder.query(QueryBuilders.
// multiMatchQuery("部寻","film_name","description").
// minimumShouldMatch("50%"));

printResult(request, sourceBuilder);
}

布尔查询——boolQuery
布尔查询对应于Lucene的BooleanQuery查询,实现将多个查询组合起来。

三个参数

  • must: 文档必须匹配must所包括的查询条件,相当于 “AND”
  • should: 文档应该匹配should所包括的查询条件其中的一个或多个,相当于 "OR"
  • must_not: 文档不能匹配must_not所包括的该查询条件,相当于“NOT”
{
"query": {
"bool":{
"must":[
{
"term":{
"description":"导"
}
},
{
"multi_match":{
"query":"千与千寻",
"fields":["film_name","description"]
}
}
]
}
},
"_source":["film_name"]
}

查询 description 中有导字的并且film_name和description中含千与千寻的文档

  • term精确匹配
  • multi_match:全文检索--分词匹配
/**
* 布尔查询--条件查询
*/
@Test
public void testBoolQuery() {
SearchRequest request = new SearchRequest("xwx_film");
request.types("doc");

SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); //query
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); //query.bool

TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("description", "导");
MultiMatchQueryBuilder multiMatchQueryBuilder
= QueryBuilders.multiMatchQuery("千与千寻", "film_name", "description");

boolQueryBuilder.must(termQueryBuilder); //query.bool.must
boolQueryBuilder.must(multiMatchQueryBuilder);
sourceBuilder.query(boolQueryBuilder);

printResult(request, sourceBuilder);
}

过滤器——filter
过滤是针对搜索的结果进行过滤,==过滤器主要判断的是文档是否匹配,不去计算和判断文档的匹配度得分==,所以==过滤器性能比查询要高,且方便缓存==,推荐尽量使用过滤器去实现查询或者过滤器和查询共同使用。过滤器仅能在布尔查询中使用。

查询价格是60.7的蜘蛛侠电影

{
"query": {
"bool":{
"must":[
{
"multi_match":{
"film_name":"蜘蛛侠"
}
}
],
"filter": [
{
"term": {
"cost": "60.7"
}
}
}
},
"_source":["film_name"]
}

java代码实现:

/**
* 过滤器
*/
@Test
public void testBoolQuery3() {
SearchRequest request = new SearchRequest("xwx_film");
request.types("doc");

SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();

boolQueryBuilder.must(QueryBuilders.multiMatchQuery("蜘蛛侠", "film_name", "description"));
boolQueryBuilder.filter(QueryBuilders.termQuery("cost", "60.7"));

sourceBuilder.query(boolQueryBuilder);

printResult(request, sourceBuilder);
}

排序
查找价格按照在10到50块之间的电影并按照价格排序

{
"query": {
"bool": {
"filter": [
{
"range": {
"cost": {
"gte": "10",
"lte": "50"
}
}
}
]
}
},
"sort": [
{
"cost": "asc"
}
]
}

java代码实现:

    /**
* 排序
*/
@Test
public void testSort() {
SearchRequest request = new SearchRequest("xwx_film");
request.types("doc");

SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
boolQueryBuilder.filter(QueryBuilders.rangeQuery("cost").gte(10).lte(60));

sourceBuilder.sort("cost", SortOrder.ASC);

sourceBuilder.query(boolQueryBuilder);
printResult(request, sourceBuilder);
}
原文地址:https://www.cnblogs.com/xwx20160804/p/11718497.html