Lucene学习笔记1(V7.1)

Lucene是一个搜索类库,solr、nutch和elasticsearch都是基于Lucene。个人感觉学习高级搜索引擎应用程序之前有必要了解Lucene。

开发环境：idea maven springboot

开始贴代码：

maven配置

 <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>1.4.3.RELEASE</version>
    </parent>

    <properties>
        <java.version>1.8</java.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-thymeleaf</artifactId>
        </dependency>
        <!-- hot swapping, disable cache for template, enable live reload -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-devtools</artifactId>
            <optional>true</optional>
        </dependency>

            <!--Lucene-->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-core</artifactId>
                <version>7.1.0</version>
            </dependency>

            <!--中文分词器,一般分词器适用于英文分词(common)-->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-analyzers-smartcn</artifactId>
                <version>7.1.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-queryparser</artifactId>
                <version>7.1.0</version>
            </dependency>

            <!--检索关键字高亮显示-->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-highlighter</artifactId>
                <version>7.1.0</version>
            </dependency>
            <!--Lucene-->

            <dependency>
                <groupId>junit</groupId>
                <artifactId>junit</artifactId>
                <version>4.12</version>
            </dependency>


    </dependencies>

    <build>
        <plugins>
            <!-- Package as an executable jar/war -->
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>

View Code

辅助类

public class LuceneConstants {
    public static final String CONTENTS="contents";
    public static final String FILE_NAME="filename";
    public static final String FILE_PATH="filepath";
    public static final int MAX_SEARCH = 10;

    public  static final String IndexDir ="E:\\Lucene\\Index";
    public  static final String DataDir ="E:\\Lucene\\Data";
    public  static final String ArticleDir ="E:\\Lucene\\Files\\article.txt";
}

View Code

调用Lucene

public class Indexer {

    public void addEntity() throws IOException {
        Article article = new Article();
        //article.setId(1);
        //article.setTitle("Lucene全文检索");
        //article.setContent("Lucene是apache软件基金会4 jakarta项目组的一个子项目，是一个开放源代码的全文检索引擎工具包，但它不是一个完整的全文检索引擎，而是一个全文检索引擎的架构，提供了完整的查询引擎和索引引擎，部分文本分析引擎（英文与德文两种西方语言）。");
        article.setId(2);
        article.setTitle("Solr搜索引擎");
        article.setContent("Solr是基于Lucene框架的搜索莹莹程序，是一个开放源代码的全文检索引擎。");

        final Path path = Paths.get(LuceneConstants.IndexDir);
        Directory directory = FSDirectory.open(path);//索引存放目录 存在磁盘
        //Directory RAMDirectory= new RAMDirectory();// 存在内存

        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        //indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.APPEND);

        IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);//更新或创建索引

        Document document = new Document();
        document.add(new TextField("id", article.getId().toString(), Field.Store.YES));
        document.add(new TextField("title", article.getTitle(), Field.Store.YES));
        document.add(new TextField("content", article.getContent(), Field.Store.YES));

        indexWriter.addDocument(document);
        indexWriter.close();
    }

    public void addFile() throws IOException {
        final Path path = Paths.get(LuceneConstants.IndexDir);

        Directory directory = FSDirectory.open(path);
        Analyzer analyzer=new StandardAnalyzer();

        IndexWriterConfig indexWriterConfig=new IndexWriterConfig(analyzer);
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        IndexWriter indexWriter=new IndexWriter(directory,indexWriterConfig);
        InputStreamReader isr = new InputStreamReader(new FileInputStream(LuceneConstants.ArticleDir), "GBK");//.txt文档,不设置格式会乱码
        BufferedReader bufferedReader=new BufferedReader(isr);

        String content="";
        while ((content=bufferedReader.readLine())!=null){
            Document document=new Document();
            document.add(new TextField("content",content,Field.Store.YES) );
            indexWriter.addDocument(document);
        }
        bufferedReader.close();
        indexWriter.close();
    }

    public List<String> SearchFiles() throws IOException, ParseException {
        String queryString = "Solr";

        final Path path = Paths.get(LuceneConstants.IndexDir);
        Directory directory = FSDirectory.open(path);//索引存储位置
        Analyzer analyzer = new StandardAnalyzer();//分析器

        //单条件
        //关键词解析
        //QueryParser queryParser=new QueryParser("content",analyzer);
        //Query query=queryParser.parse(queryString);

        //多条件
        Query mQuery = MultiFieldQueryParser.parse(new String[]{"Solr"},new String[]{"content"},new StandardAnalyzer());

        IndexReader indexReader = DirectoryReader.open(directory);//索引阅读器
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);//查询

        //TopDocs topDocs=indexSearcher.search(query,3);
        TopDocs topDocs=indexSearcher.search(mQuery,10);
        long count = topDocs.totalHits;
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;


        List<String> list=new ArrayList<String>();
        list.add(String.valueOf(count));

        Integer cnt=0;

        for (ScoreDoc scoreDoc : scoreDocs) {
            Document document = indexSearcher.doc(scoreDoc.doc);

            //list.add(cnt.toString()+"-"+"相关度："+scoreDoc.score+"-----time:"+document.get("time"));
            //list.add("|||");
            //list.add(cnt.toString()+"-"+document.get("content"));

            list.add(document.get("content"));
            cnt++;
        }

        return  list;
    }
}

View Code

查看运行效果

@Controller
public class LuceneController {
    @RequestMapping("/add")
    public String welcomepage(Map<String, Object> model) {

        try {
            Indexer indexer = new Indexer();
            indexer.addEntity();

            model.put("message", "Success");
        } catch (IOException ex) {
            model.put("message", "Failure");
        }

        return "welcome";
    }

    @RequestMapping("/file")
    public String fileindex(Map<String, Object> model) {

        try {
            Indexer indexer = new Indexer();
            indexer.addFile();

            model.put("message", "SuccessF");
        } catch (IOException ex) {
            model.put("message", "FailureF");
        }

        return "welcome";
    }

    @RequestMapping("/search")
    public String searchindex(Map<String, Object> model) {

        try {
            Indexer indexer = new Indexer();
            List<String> rlts = indexer.SearchFiles();
            String message = "";
            for (String str : rlts) {
                message += str + " ";
            }
            model.put("message", message);
        } catch (Exception ex) {
            model.put("message", "FailureF");
        }

        return "welcome";
    }

}

View Code