初试ElasticSearch做菜谱搜索 整理思路

1.docker部署elasticSearch集群及kibana服务

  借鉴https://blog.csdn.net/ctwy291314/article/details/111313419这位博主的部署方式。

2.学习es的DSL语法

  推荐【慕课】ElasticSearch+Spark 构建高匹配度搜索服务+千人千面推荐系统

3.logstash的logstash-input-jdbc插件对数据初始化全量索引构建

4.阿里canal中间件完成准实时增量索引构建

5.业务功能开发(中文IK分词器插件安装、定制化分词、同义词扩展、相关性重塑)

6.总结一些东西

GET cookbook/_search
{
  "explain": true,
  "query": {
    "function_score": {
      "query": {
        "bool": {
          "must": [
            {
              "multi_match": {
                "query": "国庆佳节",
                "fields": [
                  "name^10",   #权重
                  "introduction",
                  "description",
                  "materials",
                  "seasons",
                  "categories",
                  "platforms",
                  "themes",
                  "tags"
                ],
                "type": "most_fields"  #还有别的类型
              } #如果term放到这里也可以过滤但是会有计分
            }
          ],
          "filter": [ #filter不参与计分 标签 状态 等建议放在这里
            {
              "term": {
                "verified": {
                  "value": "true"
                }
              }
            },
            {
              "term": {
                "grounding": {
                  "value": "true"
                }
              }
            },
            {
              "term": {
                "tags": "家常菜"
              }
            },
            {
              "term": {
                "tags": "夜宵"
              }
            }
          ]
        }
      },
      "functions": [ #额外自定义计分
        {
          "field_value_factor": {
            "field": "collect_count"
          },
          "weight": 0.00002  #权重
        },
        {
          "field_value_factor": {
            "field": "view_count"
          },
          "weight": 0.00002  #权重
        }
      ],
      "score_mode": "sum",
      "boost_mode": "sum"     #replace  function score 可以替换 query score 
    }
  },
  "sort": [ #sort 非_source下的字段依然有分数 否则不计分
    {
      "_score": {
        "order": "desc"
      }
    }
  ],
  "aggs": {
    "group_by_tags": {
      "terms": {
        "field": "tags"
      }
  }
}


app内的排序可以使用sort不计分排序 也可以用boost_mode = replace  ,自定义分数来排序



#分析索引分词
GET cookbook/_analyze
{
  "field": "tags",
  "text": ["创意菜 甜 西餐 甜品 电烤箱"]
}
#分析搜索分词
GET _analyze?pretty
{"text": ["ROKI"],"analyzer": "ik_max_word"}


GET _analyze?pretty
{"text": ["创意菜 甜 西餐 甜品 电烤箱"],"analyzer": "ik_smart"}

 2020-12-23 增 name字段支持中文拼音搜索

PUT /cookbook/
{
  "settings": {
    "number_of_shards": 10,
    "number_of_replicas": 3
  }
}

POST cookbook/_close

PUT cookbook/_settings
{
    "settings": {
        "index": {
            "analysis": {
                "analyzer": {
                    "ik_pinyin_analyzer": {
                        "type": "custom",
                        "tokenizer": "ik_max_word",
                        "filter": ["my_pinyin"]
                    }
                },
                "filter": {
                    "my_pinyin": {
                        "type": "pinyin",
                        "keep_separate_first_letter": false,
                        "keep_full_pinyin": true,
                        "keep_original": false,
                        "limit_first_letter_length": 10,
                        "lowercase": true,
                        "remove_duplicated_term": true
                    }
                }
            }
        }
    }
}

#字段映射
PUT cookbook/_mappings
{
  "dynamic": false,
  "properties": {
    "id": {
      "type": "integer"
    },
    "name": {
      "type": "text",
      "analyzer": "ik_pinyin_analyzer",
      "search_analyzer": "ik_pinyin_analyzer"
    },
    "introduction": {
      "type": "text",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word"
    },
    "description": {
      "type": "text",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word"
    },
    "collect_count": {
      "type": "integer"
    },
    "view_count": {
      "type": "integer"
    },
    "difficulty": {
      "type": "integer"
    },
    "need_time": {
      "type": "integer"
    },
    "prepare_desc": {
      "type": "text",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word"
    },
    "type": {
      "type": "integer"
    },
    "cookbook_type": {
      "type": "integer"
    },
    "recommend": {
      "type": "boolean"
    },
    "verified": {
      "type": "boolean"
    },
    "grounding": {
      "type": "boolean"
    },
    "allow_distribution": {
      "type": "boolean"
    },
    "tags": {
      "type": "text",
      "analyzer": "whitespace",
      "fielddata": true
    },
    "materials": {
      "type": "text",
      "analyzer": "ik_smart",
      "search_analyzer": "ik_smart"
    },
    "seasons": {
      "type": "text",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word"
    },
    "category_tags": {
      "type": "text",
      "analyzer": "whitespace",
      "fielddata": true
    },
    "categories": {
      "type": "text",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word"
    },
    "platforms": {
      "type": "text",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word"
    },
    "themes": {
      "type": "text",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word"
    },
    "pub_time": {
      "type": "date"
    }
  }
}

#打开索引
POST cookbook/_open

 2020-12-24 记elasticsearch-rest-client 下的httpclient jar包冲突。查看7.9.3下引用的是 

直接覆盖同版本的引用

 

2020-12-28

Ik分词 同时支持 拼音和同义词,可以根据自定义filter 多层嵌套

# 先定义同义词分词器
PUT cookbook/_settings
{
  "settings": {
    "index": {
      "analysis": {
        "analyzer": {
          "ik_synonym_pinyin_max_word": {
            "type": "custom",
            "tokenizer": "ik_max_word",
            "filter": [
              "my_synonym_filter",
              "my_pinyin_filter"
            ]
          },
          "ik_synonym_pinyin_smart": {
            "type": "custom",
            "tokenizer": "ik_smart",
            "filter": [
              "my_synonym_filter",
              "my_pinyin_filter"
            ]
          }
        },
        "filter": {
          "my_synonym_filter": {
            "type": "synonym",
            "synonyms_path": "analysis-ik/synonyms.txt"
          },
          "my_pinyin_filter": {
            "type": "pinyin",
            "keep_separate_first_letter": false,
            "keep_full_pinyin": true,
            "keep_original": false,
            "limit_first_letter_length": 10,
            "lowercase": true,
            "remove_duplicated_term": true
          }
        }
      }
    }
  }
}

  

原文地址:https://www.cnblogs.com/xuetieqi/p/14168694.html