Lucene学习笔记（1）初步了解Lucene

Lucen是一个强大的java搜索库，它能让你很轻易地将搜索功能加入到任何程序中。刚开始学习Lucene，首先要了解Lucene的整体架构，这样就能清晰地理解程序中由Lucene完成的内容，以及其他需要你自行完成的内容。

搜索程序首先需要实现的功能是索引链，这需要按照几个独立的步骤依次来完成：1、检索原始内容；2、根据原始内容来创建对应的文档；3、对创建的文档进行索引。一旦建立起索引，用于搜索的组件也就出来了，这些搜索组件包括：用户接口、构建可编程查询语句的方法、执行查询语句（或者检索匹配文档）、展现查询结果等。

根据以上的说明，我们先来创建一个Lucene的示例程序，通过这个示例来进一步了解Lucene的易用性和强大功能。

1、建立索引

package com.lucene.demo;

import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Indexer {
        
   private IndexWriter writer;
   //构造方法，创建IndexWriter
   public Indexer(String indexDir) throws IOException{
       Directory dir = FSDirectory.open(new File(indexDir));
       writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30),true,IndexWriter.MaxFieldLength.UNLIMITED);      
   }
   
   
   public void close() throws IOException, IOException{
       writer.close();
   }
   
   //返回被索引的文档数
   public int index(String dataDir, FileFilter filter) throws Exception{
       File[] files = new File(dataDir).listFiles();
       
       for(File f:files){
           if(!f.isDirectory()&&!f.isHidden()&&f.exists()&&f.canRead()&&(filter==null||filter.accept(f))){
               indexFile(f);
               
           }
           
       }
       return writer.numDocs();
       
   }
   
   //只对txt文档建立索引
   private static class TextFilesFilter implements FileFilter{

    @Override
    public boolean accept(File pathname) {        
        return pathname.getName().toLowerCase().endsWith(".txt");
    }
       
   }
   
   protected Document getDocument(File f) throws Exception{
       Document doc = new Document();
       doc.add(new Field("contents", new FileReader(f)));
       doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
       doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
       return doc;
   }
   
   private void indexFile(File f) throws Exception{
       System.out.println("Indexing "+f.getCanonicalPath());
       Document doc = getDocument(f);
       writer.addDocument(doc);
   }
   
   
   
   public static void main(String args[]) throws Exception{
       //存放Lucene索引的路径
       String indexDir = "E:\\luceneDir\\indexDir";
      
       //被索引文件的存放路径
       String dataDir = "E:\\luceneDir\\dataDir";
       
       long start = System.currentTimeMillis();
       Indexer indexer = new Indexer(indexDir);
       int numIndexed;
       try{
           numIndexed = indexer.index(dataDir, new TextFilesFilter());
       }finally{
           indexer.close();
       }
       long end  = System.currentTimeMillis();
       
       System.out.println(" Indexing "+ numIndexed + " files took "+ (end - start)+ " milliseconds");
       
       
   }
   
   
}

2、搜索索引

package com.lucene.demo;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Searcher {
    /*
     * indexDir:索引文件存放路径
     * q:输入的查询条件
     */
    public static void search(String indexDir, String q) throws Exception{
        //打开索引文件
        Directory dir = FSDirectory.open(new File(indexDir));
        IndexSearcher is = new IndexSearcher(dir);
        
        //解析查询字符串
        QueryParser parser = new QueryParser(Version.LUCENE_30,"contents", new StandardAnalyzer(Version.LUCENE_30));
        Query query = parser.parse(q);
        
        
        long start = System.currentTimeMillis();
        //搜索索引
        TopDocs hits = is.search(query, 10);
        long end = System.currentTimeMillis();
        
        System.err.println("Found "+hits.totalHits+" documnet(s) (in "+(end-start)+" milliseconds) that matched query '"+q+"';" );
        
        for(ScoreDoc scoreDoc : hits.scoreDocs){
            Document doc = is.doc(scoreDoc.doc);
            System.out.println(doc.get("fullpath"));
        }
        is.close();
    }
    public static void main(String[] args) throws Exception {
        String indexDir = "E:\\luceneDir\\indexDir";
        String queryStr = "lucene";
        search(indexDir, queryStr);

    }
    
}

通过以上代码，我们初步的了解了一下Lucene的功能，但不要因为这个例子简单就感到满足，Lucene包含的内容还有很多。