java基础之----elasticsearch(Java客服端搜索实例)

概述

es是使用Java编写的,对Java原生比较支持,下面是使用Java写的一个demo,根据关键字进行搜索,并对搜索结果重排序,对部分字段惊醒高亮处理。

public class EsIndexService { 
    protected Logger logger = LoggerFactory.getLogger(getClass());

    public static final String SCS_WEB_INDEX = "scs_web";
    public static final String QUESTION_TYPE = "question";
    public static final String QUESTION_PAIR_TYPE = "question_pair";
    public static final String DELETED_FLAG = "1";
    public static final String FIELD_CONTENT = "content";
    public static final String FIELD_ANSWER = "answer";
    public static final String FIELD_HRELATION_TYPE = "hRelation.relationType";
    public static final String FIELD_MRELATION_TYPE = "mRelation.relationType";
    public static final String FIELD_QUESTION1_ID = "question1.id";
    public static final String FIELD_QUESTION2_ID = "question2.id";
    public static final String FIELD_HRELATION_START_DATE = "hRelation.startDate";
    public static final String FIELD_HRELATION_EXPIRE_DATE = "hRelation.expireDate";
    public static final String FIELD_SCORE = "score";
    public static final String FIELD_COUNT = "count";
    public static final Float COUNT_FACTOR = 0.1f;

    @Autowired
    private QuestionDao questionDao;

    @Autowired
    private QuestionPairDao questionPairDao;

    @Autowired
    private DataSourceTransactionManager transactionManager;

    @Autowired
    private SearchLogDao searchLogDao;

    public Page<QuestionSearchResult> searchQuestion(Question question) throws IOException {

        //搜索问题为空直接返回
        if (StringUtils.isEmpty(question.getContent())) return emptyResult(question);
      //es原始搜索
        MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery(FIELD_CONTENT, question.getContent());
     //开启模糊匹配
        matchQueryBuilder.fuzziness(Fuzziness.AUTO);
        //对原始结果进行重排序,采用的公式为newScore = oldScore + log(1 + 0.1*count),启动count为数据库中一个字段,意思是点击次数,这个优化的目的就是让点击数大的排在前面
        ScoreFunctionBuilder scoreFunctionBuilder = ScoreFunctionBuilders.
                fieldValueFactorFunction(FIELD_COUNT).factor(COUNT_FACTOR).modifier(FieldValueFactorFunction.Modifier.LOG1P);
        FunctionScoreQueryBuilder queryBuilder = QueryBuilders.functionScoreQuery(matchQueryBuilder, scoreFunctionBuilder);
     //这个就是采用oldScore + log(1 + 0.1*count),控制中间的加号的,默认是multiply,就是相乘
        queryBuilder.boostMode(CombineFunction.SUM);
        return searchQuestion(question, queryBuilder, null, null);
    }

    public Page<QuestionSearchResult> topByCount(Question question) throws IOException {
        return searchQuestion(question, null, FIELD_COUNT, SortOrder.DESC);
    }

    public Page<QuestionSearchResult> searchQuestion(Question question, QueryBuilder queryBuilder, String orderBy, SortOrder order) throws IOException {

        Page<QuestionSearchResult> resultPage = emptyResult(question);
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        searchSourceBuilder.trackScores(true);
        //进行分页的操作,这个是设置每页的大小
        searchSourceBuilder.size(question.getPage().getPageSize());
        if (question.getPage().getPageNo() > 1) {
            //设置从哪里开始搜索
            searchSourceBuilder.from((question.getPage().getPageNo() - 1) * question.getPage().getPageSize());
        }

        if (queryBuilder != null) searchSourceBuilder.query(queryBuilder);
        if (StringUtils.isNotEmpty(orderBy)) searchSourceBuilder.sort(orderBy, order);

      //高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
       //高亮的结果会使用<em>中间是需要高亮的数据</em>括住
        highlightBuilder.field(FIELD_CONTENT).field(FIELD_ANSWER).requireFieldMatch(false);
        searchSourceBuilder.highlighter(highlightBuilder);
        
        SearchRequest searchRequest = new SearchRequest(SCS_WEB_INDEX);
        searchRequest.types(QUESTION_TYPE);
        searchRequest.source(searchSourceBuilder);

        SearchResponse response = EsUtil.client.search(searchRequest);
        logger.info("搜索问题[{}],结果:{}条,最高score:{},耗时:{}ms",
                question.getContent(), response.getHits().totalHits,
                response.getHits().getMaxScore(), response.getTookInMillis());
        int maxResultCount = Integer.parseInt(DictUtils.getDictValue("max_result_count", "scs_config", "30"));
        resultPage.setCount(response.getHits().totalHits > maxResultCount ? maxResultCount : response.getHits().totalHits);
        if (response.getHits().totalHits < 1) {
            return resultPage;
        }
        BigDecimal maxScore = BigDecimal.valueOf(response.getHits().getMaxScore());
        for (SearchHit hit : response.getHits()) {
            QuestionSearchResult tmp = new QuestionSearchResult();
            tmp.setSimilarQuestion((Question) JsonMapper.fromJsonString(hit.getSourceAsString(), Question.class));
            BigDecimal currentDocScore = BigDecimal.valueOf(hit.getScore());
            tmp.setEsScore(currentDocScore.divide(maxScore, 4, BigDecimal.ROUND_HALF_EVEN));
            logger.debug("搜索结果 score: {}, question: {}, count:{}, id: {}",
                    hit.getScore(), tmp.getSimilarQuestion().getContent(), tmp.getSimilarQuestion().getCount(), hit.getId());
            List<StringHighlightField> highlightList = new ArrayList<>();
            for (String key : hit.getHighlightFields().keySet()) {
                highlightList.add(StringHighlightField.fromHighlightField(hit.getHighlightFields().get(key)));
            }
            tmp.setHighlightList(highlightList);
            resultPage.getList().add(tmp);
        }
        searchLogDao.insert(question.getContent(), response.getHits().getTotalHits());
        return resultPage;

    }
public void bulkQuestion(List<Question> questions) throws IOException { bulk(questions, QUESTION_TYPE); } public void bulkQuestionPairs(List<QuestionPair> questionPairs) throws IOException { bulk(questionPairs, QUESTION_PAIR_TYPE); } public void bulk(List<?> list, String type) throws IOException { BulkRequest bulkRequest = new BulkRequest(); for (Object obj : list) { DataEntity entity = (DataEntity) obj; if (DELETED_FLAG.equals(entity.getDelFlag())) {
//删除es中的数据,通过id,es的索引是scs_web,id只是type中的一个字段 DeleteRequest request
= new DeleteRequest(SCS_WEB_INDEX, type, entity.getId()); bulkRequest.add(request); } else {
// 设置一个查询的条件,使用id查询,如果查找不到,则添加文档数据
          //这个IndexRequest中有个参数,OpType.INDEX,默认是INDEX,意思就是如果es已经存在这条记录,会强制覆盖,而不是更新 IndexRequest request
= new IndexRequest(SCS_WEB_INDEX, type, entity.getId()); request.source(JsonMapper.toJsonString(entity), XContentType.JSON); bulkRequest.add(request); } } EsUtil.client.bulk(bulkRequest); } @Scheduled(cron = "0/10 * * * * ?") public void cronIndex() throws IOException { logger.info("定时索引更新开始"); boolean continueFlag = true; while (continueFlag) { DefaultTransactionDefinition trans = new DefaultTransactionDefinition(); trans.setTimeout(10); trans.setPropagationBehavior(DefaultTransactionDefinition.PROPAGATION_REQUIRES_NEW); TransactionStatus transStatus = transactionManager.getTransaction(trans); try { List<Question> questionList = questionDao.findUnIndexed(Integer.valueOf(DictUtils.getDictValue("import_size", "scs_config", "100"))); if (!questionList.isEmpty()) { this.bulkQuestion(questionList); questionDao.updateIndexFlag(questionList); } else { continueFlag = false; } transactionManager.commit(transStatus); } catch (Throwable e) { transactionManager.rollback(transStatus); throw e; } } continueFlag = true; while (continueFlag) { DefaultTransactionDefinition trans = new DefaultTransactionDefinition(); trans.setPropagationBehavior(DefaultTransactionDefinition.PROPAGATION_REQUIRES_NEW); trans.setTimeout(10); TransactionStatus transStatus = transactionManager.getTransaction(trans); try { List<QuestionPair> questionPairList = questionPairDao.findUnIndexed( Integer.valueOf(DictUtils.getDictValue("import_size", "scs_config", "100")), new BigDecimal(DictUtils.getDictValue("mark_score_min", "scs_config", "0.6"))); if (!questionPairList.isEmpty()) { this.bulkQuestionPairs(questionPairList); questionPairDao.updateIndexFlag(questionPairList); } else { continueFlag = false; } transactionManager.commit(transStatus); } catch (Throwable e) { transactionManager.rollback(transStatus); throw e; } } logger.info("定时索引更新结束"); }

 参考:https://blog.csdn.net/prestigeding/article/details/83351064

原文地址:https://www.cnblogs.com/gunduzi/p/12507688.html