Lucese——字符串检索

Lucese版本太多,最新的已经到8,而网络教程大都还是3,并且有类似于ES、Solr这样的封装框架,学习收益较低,并没有准备花很多时间研究的打算。

之前双十一购物时,有了个思考:“如何检索一个商品名称?”

用数据库的Like语句,功能是在太单薄,因此,用搜索引擎搞一个吧。

以下代码可以微调,设计成文件检索索引。

        <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>3.6.2</version>
        </dependency>
package lucese.test;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.IOException;

public class StringIndexer {
  private static String KEY = "string";
  private static String path;
  private static Directory indexDirectory;
  private static Directory ramDirectory;
  private static Analyzer analyzer;
  private static IndexWriterConfig ramConfig;
  private static IndexWriterConfig discConfig;

  public static void init(String out) throws IOException {
    path = out;
    indexDirectory = FSDirectory.open(new File(out));


    ramDirectory = new RAMDirectory(indexDirectory);

    //标准的分词
    analyzer = new StandardAnalyzer(Version.LUCENE_36);

    ramConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);

    discConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);
    discConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
  }

  public static void createIndex(String str) {
    try (IndexWriter writer = new IndexWriter(ramDirectory, ramConfig)) {
      write(writer, str);
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException("create IndexWriter error:", e);
    }
  }

  public static void createIndexInDisc(String str) {
    try (IndexWriter writer = new IndexWriter(indexDirectory, discConfig)) {
      write(writer, str);
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException("create IndexWriter error:", e);
    }
  }

  public static void write(IndexWriter writer, String str) throws IOException {
    Document document = new Document();
    Field fileNameField = new Field(KEY,
        str,
        Field.Store.YES,
        Field.Index.ANALYZED);
    document.add(fileNameField);
    writer.addDocument(document);
  }

  public static void query(String str) {
    try (IndexSearcher indexSearcher = new IndexSearcher(ramDirectory)) {
      /**
       * 参数一: Version matchVersion 版本号
       * 参数二:String f
       * 参数三:Analyzer
       * */
      QueryParser queryParser = new QueryParser(Version.LUCENE_36, KEY, analyzer);
      Query query = queryParser.parse(str);

      //用IndexSearcher对象去索引库中查询符合条件的前100条记录,不足100条记录的以实际为准
      TopDocs hits = indexSearcher.search(query, 100);
      for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = indexSearcher.doc(scoreDoc.doc);
        System.out.println(doc.get(KEY));
      }
    } catch (Exception e) {
      e.printStackTrace();
      throw new RuntimeException("query error:", e);
    }
  }

  public static void queryFromDisc(String str) {
    try (IndexSearcher indexSearcher = new IndexSearcher(indexDirectory)) {
      /**
       * 参数一: Version matchVersion 版本号
       * 参数二:String f
       * 参数三:Analyzer
       * */
      QueryParser queryParser = new QueryParser(Version.LUCENE_36, KEY, analyzer);
      Query query = queryParser.parse(str);

      //用IndexSearcher对象去索引库中查询符合条件的前100条记录,不足100条记录的以实际为准
      TopDocs hits = indexSearcher.search(query, 100);
      for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = indexSearcher.doc(scoreDoc.doc);
        System.out.println(doc.get(KEY));
      }
    } catch (Exception e) {
      e.printStackTrace();
      throw new RuntimeException("query error:", e);
    }
  }

  /**
   * 数据同步,内存同步到磁盘
   */
  public static void synce() {
    try (IndexWriter fsIndexWriter = new IndexWriter(indexDirectory, discConfig)) {
      fsIndexWriter.addIndexes(ramDirectory);
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException("create IndexWriter error:", e);
    }
  }

  public static void main(String[] args) throws IOException {
    StringIndexer.init("D:/lucene/index");
    StringIndexer.createIndex("Mr.css is a teacher!");
    StringIndexer.synce();
//        Indexer.query("Mr.css");

//        Indexer.createIndexInDisc("Mr.css is a teacher!");
    StringIndexer.queryFromDisc("Mr.css");
//        Indexer.synce();
//        {
//        }
  }
}
原文地址:https://www.cnblogs.com/chenss15060100790/p/11954744.html