IKAnalyzer2012FF_u1.jarlucene4.0简单实例 悟寰轩

  1 import java.io.File;
  2 import java.io.IOException;
  3 import java.io.StringReader;
  4 
  5 import org.apache.lucene.analysis.Analyzer;
  6 import org.apache.lucene.analysis.TokenStream;
  7 import org.apache.lucene.document.Document;
  8 import org.apache.lucene.document.TextField;
  9 import org.apache.lucene.document.Field.Store;
 10 import org.apache.lucene.index.IndexReader;
 11 import org.apache.lucene.index.IndexWriter;
 12 import org.apache.lucene.index.IndexWriterConfig;
 13 import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
 14 import org.apache.lucene.queryparser.classic.ParseException;
 15 import org.apache.lucene.queryparser.classic.QueryParser;
 16 import org.apache.lucene.search.IndexSearcher;
 17 import org.apache.lucene.search.Query;
 18 import org.apache.lucene.search.ScoreDoc;
 19 import org.apache.lucene.search.TopDocs;
 20 import org.apache.lucene.search.TopScoreDocCollector;
 21 import org.apache.lucene.search.highlight.Highlighter;
 22 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
 23 import org.apache.lucene.search.highlight.QueryScorer;
 24 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 25 import org.apache.lucene.store.Directory;
 26 import org.apache.lucene.store.FSDirectory;
 27 import org.apache.lucene.util.Version;
 28 import org.wltea.analyzer.lucene.IKAnalyzer;
 29 
 30 public class IndexTools {
 31     /**
 32      * 获得indexwriter对象
 33      * 
 34      * @param dir
 35      * @return
 36      * @throws IOException
 37      * @throws Exception
 38      */
 39     private IndexWriter getIndexWriter(Directory dir, Analyzer analyzer) throws IOException {
 40         IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
 41         return new IndexWriter(dir, iwc);
 42     }
 43     
 44     /**
 45      * 关闭indexwriter对象
 46      * 
 47      * @throws IOException
 48      * 
 49      * @throws Exception
 50      */
 51     private void closeWriter(IndexWriter indexWriter) throws IOException {
 52         if (indexWriter != null) {
 53             indexWriter.close();
 54         }
 55     }
 56     
 57     /**
 58      * 创建索引
 59      * 
 60      * @throws InvalidTokenOffsetsException
 61      */
 62     public void createIndex() throws InvalidTokenOffsetsException {
 63         String indexPath = "D://luceneindex"; // 建立索引文件的目录
 64         // 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分
 65         Analyzer analyzer = new IKAnalyzer(true);
 66         IndexWriter indexWriter = null;
 67         Directory directory = null;
 68         try {
 69             directory = FSDirectory.open(new File(indexPath));
 70             indexWriter = getIndexWriter(directory, analyzer);
 71         } catch (Exception e) {
 72             System.out.println("索引打开异常!");
 73         }
 74         // 添加索引
 75         try {
 76             Document document = new Document();
 77             document.add(new TextField("filename", "标题:起点", Store.YES));
 78             document.add(new TextField("content", "内容:我是一名程序员", Store.YES));
 79             indexWriter.addDocument(document);
 80             Document document1 = new Document();
 81             document1.add(new TextField("filename", "标题:终点", Store.YES));
 82             document1.add(new TextField("content", "内容:我不再只是程序员", Store.YES));
 83             indexWriter.addDocument(document1);
 84             indexWriter.commit();
 85         } catch (IOException e1) {
 86             System.out.println("索引创建异常!");
 87         }
 88         try {
 89             closeWriter(indexWriter);
 90         } catch (Exception e) {
 91             System.out.println("索引关闭异常!");
 92         }
 93     }
 94     
 95     /**
 96      * 搜索
 97      * 
 98      * @throws ParseException
 99      * @throws IOException
100      * @throws InvalidTokenOffsetsException
101      */
102     @SuppressWarnings("deprecation")
103     public void searchIndex() throws ParseException, IOException, InvalidTokenOffsetsException {
104         String indexPath = "D://luceneindex"; // 建立索引文件的目录
105         // 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分
106         Analyzer analyzer = new IKAnalyzer(true);
107         Directory directory = null;
108         try {
109             directory = FSDirectory.open(new File(indexPath));
110         } catch (Exception e) {
111             System.out.println("索引打开异常!");
112         }
113         IndexReader ireader = null;
114         IndexSearcher isearcher = null;
115         try {
116             ireader = IndexReader.open(directory);
117         } catch (IOException e) {
118             System.out.println("打开索引文件!");
119         }
120         isearcher = new IndexSearcher(ireader);
121         String keyword = "程序员";
122         // 使用QueryParser查询分析器构造Query对象
123         // eg:单个字段查询
124         // String fieldName = "content";
125         // QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, analyzer);
126         String[] fields = { "filename", "content" };
127         QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_40, fields, analyzer);
128         qp.setDefaultOperator(QueryParser.AND_OPERATOR);
129         Query query = qp.parse(keyword);
130         // 搜索相似度最高的5条记录
131         TopDocs topDocs = isearcher.search(query, 25);
132         System.out.println("命中:" + topDocs.totalHits);
133         // 输出结果
134         ScoreDoc[] scoreDocs = topDocs.scoreDocs;
135         for (int i = 0; i < topDocs.totalHits; i++) {
136             Document targetDoc = isearcher.doc(scoreDocs[i].doc);
137             System.out.println("内容:" + targetDoc.toString());
138         }
139         // 分页,高亮显示
140         higherIndex(analyzer, isearcher, query, topDocs);
141     }
142     
143     public static void main(String[] args) {
144         IndexTools tool = new IndexTools();
145         try {
146             tool.searchIndex();
147         } catch (ParseException e) {
148             System.out.println("解析错误");
149         } catch (IOException e) {
150             System.out.println("读取文件流错误");
151         } catch (InvalidTokenOffsetsException e) {
152             System.out.println("查询失败");
153         }
154     }
155     
156     /**
157      * 分页,高亮显示
158      * 
159      * @param analyzer
160      * @param isearcher
161      * @param query
162      * @param topDocs
163      * @throws IOException
164      * @throws InvalidTokenOffsetsException
165      */
166     public void higherIndex(Analyzer analyzer, IndexSearcher isearcher, Query query, TopDocs topDocs)
167             throws IOException, InvalidTokenOffsetsException {
168         TopScoreDocCollector results = TopScoreDocCollector.create(topDocs.totalHits, false);
169         isearcher.search(query, results);
170         // 分页取出指定的doc(开始条数, 取几条)
171         ScoreDoc[] docs = results.topDocs(1, 2).scoreDocs;
172         for (int i = 0; i < docs.length; i++) {
173             Document targetDoc = isearcher.doc(docs[i].doc);
174             System.out.println("内容:" + targetDoc.toString());
175         }
176         // 关键字高亮显示的html标签,需要导入lucene-highlighter-3.5.0.jar
177         SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
178         Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
179         for (int i = 0; i < docs.length; i++) {
180             Document doc = isearcher.doc(docs[i].doc);
181             // 标题增加高亮显示
182             TokenStream tokenStream1 = analyzer.tokenStream("filename", new StringReader(doc.get("filename")));
183             String title = highlighter.getBestFragment(tokenStream1, doc.get("filename"));
184             // 内容增加高亮显示
185             TokenStream tokenStream2 = analyzer.tokenStream("content", new StringReader(doc.get("content")));
186             String content = highlighter.getBestFragment(tokenStream2, doc.get("content"));
187             System.out.println(doc.get("filename") + " : " + title + " : " + content);
188         }
189     }
190 }
原文地址:https://www.cnblogs.com/sunxucool/p/2799805.html