lucene实践

  之前的博客搜索栏用的是 sql 模糊查询进行查找,最近学完lucene,要学以致用啊,就把sql搜索给替换下来吧

  中间遇到一些问题,也是学过程中没有提到的,所以说,还是实践出真知啊。

lucene分开来讲的话,我感觉就是两大块:索引维护、搜索索引

索引维护包括:添加索引、删除索引、更新索引

public class BlogIndex {
// lucene 路径在 bean 里面配置
    private String lucenePath;public String getLucenePath() {
        return lucenePath;
    }

    public void setLucenePath(String lucenePath) {
        this.lucenePath = lucenePath;
    }

    /**
     * 获取对lucene的写入方法
     */
    private IndexWriter getWriter() throws Exception {
        Directory dir = FSDirectory.open(new File(lucenePath).toPath());
        IndexWriterConfig config = new IndexWriterConfig(new IKAnalyzer());
        IndexWriter indexWriter = new IndexWriter(dir,config);
        return indexWriter;
    }
    
    /**
     * 增加索引
     */
    public void addIndex(BlogCustom blog) throws Exception {
        IndexWriter indexWriter = getWriter();
        Document doc = new Document();
        
        doc.add(new StringField("id",String.valueOf(blog.getId()),Field.Store.YES));
        doc.add(new TextField("title",blog.getTitle(),Field.Store.YES));
        doc.add(new TextField("summary",blog.getSummary(),Field.Store.YES));
        doc.add(new TextField("keyWord",blog.getKeyWord(),Field.Store.YES));
        indexWriter.addDocument(doc);
        indexWriter.close();
    }
    
    /**
     * 更新索引
     */
    public void updateIndex(BlogCustom blog) throws Exception {
        IndexWriter indexWriter = getWriter();
        Document doc = new Document();
        
        doc.add(new StringField("id",blog.getId()+"",Field.Store.YES));
        doc.add(new TextField("title",blog.getTitle(),Field.Store.YES));
        doc.add(new TextField("summary",blog.getSummary(),Field.Store.YES));
        doc.add(new TextField("keyWord",blog.getKeyWord(),Field.Store.YES));
        
        indexWriter.updateDocument(new Term("id",String.valueOf(blog.getId())),doc);
        indexWriter.close();
    }
    
    /**
     * 删除索引
     */
    public void deleteIndex(String blogId) throws Exception {
        IndexWriter indexWriter = getWriter();
        indexWriter.deleteDocuments(new Term("id",blogId));
        indexWriter.close();
    }

搜索索引就比较复杂一点

    /**
     * 搜索索引
     */
    public List<BlogCustom> searchBlog(String q) throws Exception{
        //创建一个 Analyzer对象,IKAnalyzer 对象
        Analyzer analyzer = new IKAnalyzer();
        List<BlogCustom> blogList = new LinkedList<>();
        Directory dir = FSDirectory.open(new File(lucenePath).toPath());
        IndexReader indexReader = DirectoryReader.open(dir);
        IndexSearcher indexSearch = new IndexSearcher(indexReader);
        
        // 多域查询
        String[] fields = {"id","title","summary","keyWord"};
        // 表示多个条件之间的关系,SHOULD 只要一个域里面有满足我们的搜索的内容就行
        // 数组长度 = fields 长度
        BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,
                BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD };
        // 参数: 关键词、多域、条件之间的关系、中文分析器
        Query query = MultiFieldQueryParser.parse(q, fields, clauses, analyzer);
        // 查询结果,设置最多返回100条数据
        TopDocs topDocs = indexSearch.search(query, 100);
        
        // 高亮关键词
        // 高亮格式
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font style='color:red;'>","</font>");
        // 关键词查询出来的指定位置
        QueryScorer scorer = new QueryScorer(query);
        // 在关键词指定位置,加上设定的高亮格式
        Highlighter highlighter = new Highlighter(formatter,scorer);
        // 设置含有关键字文本块的大小
        highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
        
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        //遍历查询结果,放入blogList
        for(ScoreDoc scoreDoc : scoreDocs){
            // 取当前文档
            Document doc = indexSearch.doc(scoreDoc.doc);
            BlogCustom blog = new BlogCustom();
            
            // 取出关键词
            int id = Integer.parseInt(doc.get("id"));
            blog.setId(id);
            String title = doc.get("title");
            String summary = doc.get("summary");
            String keyWord = doc.get("keyWord");
            
            // 给不为空的关键词,加上高亮显示
            if(title!=null) {
                TokenStream tokenStream = analyzer.tokenStream("title", title);
                String hTitle = highlighter.getBestFragment(tokenStream, title);
                if(StringUtil.isEmpty(hTitle)) {
                    blog.setTitle(title);
                }else {
                    blog.setTitle(hTitle);
                }
            }
            if(summary!=null) {
                TokenStream tokenStream = analyzer.tokenStream("summary", summary);
                String hSummary = highlighter.getBestFragment(tokenStream, summary);
                if(StringUtil.isEmpty(hSummary)) {
                    blog.setSummary(summary);
                }else {
                    blog.setSummary(hSummary);
                }
            }
            if(keyWord!=null) {
                TokenStream tokenStream = analyzer.tokenStream("keyWord", keyWord);
                String hKeyWord = highlighter.getBestFragment(tokenStream, keyWord);
                if(StringUtil.isEmpty(hKeyWord)) {
                    blog.setKeyWord(keyWord);
                }else {
                    blog.setKeyWord(hKeyWord);
                }
            }
            blogList.add(blog);
        }
        return blogList;
    }
    
}

完成 !

原文地址:https://www.cnblogs.com/jr-xiaojian/p/12312221.html