Lucene查询简述

本示例是对Lucene查询,对结果进行了一些处理(Lucene 3.5):

1、支持前缀搜索,如*国,可以搜索出中国、美国等国字结尾的词的内容:
        支持后缀匹配,如国* 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。
  parser.setAllowLeadingWildcard(true);

  2、搜索时在有通配符时可以不区分大小写:

  // 有通配符时不转换大小写
  parser.setLowercaseExpandedTerms(false);

  3、结果进行多字段排序,详细见代码排序部分;

  4、结果高亮显示,详细见代码高亮部分。

 package cn.test.gxg.engine.query; 
  
 import java.io.File; 
 import java.io.IOException; 
 import java.io.StringReader; 
  
 import org.apache.lucene.analysis.Analyzer; 
 import org.apache.lucene.analysis.TokenStream; 
 import org.apache.lucene.analysis.standard.StandardAnalyzer; 
 import org.apache.lucene.document.Document; 
 import org.apache.lucene.document.Field; 
 import org.apache.lucene.document.Fieldable; 
 import org.apache.lucene.document.NumericField; 
 import org.apache.lucene.document.Field.Store; 
 import org.apache.lucene.index.CorruptIndexException; 
 import org.apache.lucene.index.IndexReader; 
 import org.apache.lucene.index.IndexWriter; 
 import org.apache.lucene.queryParser.ParseException; 
 import org.apache.lucene.queryParser.QueryParser; 
 import org.apache.lucene.search.IndexSearcher; 
 import org.apache.lucene.search.Query; 
 import org.apache.lucene.search.ScoreDoc; 
 import org.apache.lucene.search.Searcher; 
 import org.apache.lucene.search.Sort; 
 import org.apache.lucene.search.SortField; 
 import org.apache.lucene.search.TopDocs; 
 import org.apache.lucene.search.highlight.Highlighter; 
 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; 
 import org.apache.lucene.search.highlight.QueryScorer; 
 import org.apache.lucene.search.highlight.SimpleFragmenter; 
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter; 
 import org.apache.lucene.store.FSDirectory; 
 import org.apache.lucene.store.LockObtainFailedException; 
 import org.apache.lucene.util.Version; 
  
 /** 
  * 创建索引并查询示例 
  * 
  * @createTime: Feb 22, 2010 3:02:28 PM 
  * @author:  <a href="mailto:leader1212@sina.com.cn">天涯 </a> 
  * @version: 0.1 
  * @lastVersion: 0.1 
  * @updateTime: 
  * @updateAuthor:  <a href="mailto:leader1212@sina.com.cn">天涯 </a> 
  * @changesSum: 
  *  
  */ 
 public class QueryTest { 
     public static void main(String[] args) { 
         //索引目录   D:\workspace\code\java\TestLucene3\index\txt\test 
         String INDNEX_PATH = "D:\\workspace\\code\\java\\TestLucene3\\index\\txt\\test"; 
         createIndex(INDNEX_PATH); 
         search(INDNEX_PATH); 
     } 
      
     public static void createIndex (String indexPath) { 
         // 获取中文分词器,查询的时候也要用一样的分词器。不然会导致查询结果不准确 
         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 
         // 建立索引 
         IndexWriter writer; 
         NumericField nField = null; 
         try { 
             writer = new IndexWriter(FSDirectory.open(new File(indexPath)), 
                     analyzer, true, IndexWriter.MaxFieldLength.LIMITED); 
  
             Document doc = new Document(); 
             Field field = null; 
             for(int i =0; i  <10; i++) { 
                 doc = new Document(); 
                 field = new Field("Code", "feinnocdb_App_info"+i, Field.Store.YES, 
                         Field.Index.ANALYZED); 
                 doc.add(field); 
                 nField = new NumericField("Id", Store.YES, true).setIntValue(i%3); 
                 doc.add(nField); 
                 field = new Field("Name", "国家名字-" + i, Field.Store.YES, 
                         Field.Index.ANALYZED); 
                 doc.add(nField); 
                 field = new Field("Content", "中国中华人民共和国—" + i, Field.Store.YES, 
                         Field.Index.ANALYZED); 
                 doc.add(field); 
                 nField = new NumericField("Type", Store.YES, true).setIntValue((i%10)); 
                 doc.add(nField); 
                 nField = new NumericField("Price", Store.YES, true).setFloatValue((i%3)); 
                 doc.add(nField); 
                 nField = new NumericField("Sex", Store.YES, true).setIntValue((i%2)); 
                 doc.add(nField); 
                 writer.addDocument(doc); 
             } 
             writer.close(); 
             System.out.println("Indexed success!"); 
         } catch (Exception e) { 
             e.printStackTrace(); 
         } 
     } 
      
     public static void search(String indexPath) { 
         //获取Lucene标准分词器,可以使用其他分词器,前提是创建索引的时候也使用相同的分词器       
         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 
         //建立索引       
         try { 
             IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); 
             QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "Content", analyzer); 
             Query query = null; 
             String q = "Content:国"; 
             try { 
                 query = parser.parse(q); 
                 // 支持后缀匹配,如*国 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。 
                 parser.setAllowLeadingWildcard(true); 
                 // 有通配符时不转换大小写 
                 parser.setLowercaseExpandedTerms(false); 
             } catch (ParseException e) { 
                 e.printStackTrace(); 
             } 
              
             // 多字段排序,设置在前面的会优先排序 
             SortField[] sortFields = new SortField[2]; 
             SortField sortField = new SortField("Id", SortField.INT, true); 
             SortField FIELD_SEX = new SortField("Sex", SortField.INT, false); 
             sortFields[0] = sortField; 
             sortFields[1] = FIELD_SEX; 
             Sort sort = new Sort(sortFields); 
             // 单字段排序 
             /* 
             SortField sortField = new SortField("Id", SortField.INT, true); 
             Sort sort = new Sort(sortField); 
              */ 
              
             Searcher searcher = new IndexSearcher(reader); 
             // 如果不需要排序则使用注释掉的代码查询 
                        // TopDocs topDocs = searcher.search(query, 100); 
             TopDocs topDocs = searcher.search(query, null, 1000, sort); 
              
             System.out.println("查询语句为:" + query.toString()); 
             System.out.println("查询到数据条数为:" + topDocs.totalHits); 
             if (topDocs.totalHits != 0) { 
                 // 用作高亮显示的Query语句。绝大多数情况都是使用查询的Query语句。 
                 // 这里为了演示,所以不那样做 
                 Query hilightQuery = null; 
                 try { 
                     hilightQuery = parser.parse("Content:中"); 
                 } catch (ParseException e) { 
                     // TODO Auto-generated catch block 
                     e.printStackTrace(); 
                 } 
                  
                 // 设置需要高亮的字段值 
                 String[] highlightCol = {"Content", "Name"}; 
                 Highlighter highlighter = null; 
                 // 关键字高亮显示设置 
                 // 设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀   
                 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(" <FONT COLOR='RED'>", " </FONT>"); 
                 highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(hilightQuery)); 
                  
                 //设置每次返回的字符数 
                 highlighter.setTextFragmenter(new SimpleFragmenter(1000));  
                  
                     // 遍历查询的索引,得到具体索引值。 
                 for(ScoreDoc sd : topDocs.scoreDocs) { 
                     Document document = searcher.doc(sd.doc); 
                     for (Fieldable fa : document.getFields()) { 
                         String value = document.get(fa.name()); 
                         for (String col : highlightCol) { 
                             if(fa.name().equals(col)) { 
                                 //设置高显内容 
                                 TokenStream tokenStream = analyzer.tokenStream("Content",new StringReader(value));  
                                 value = highlighter.getBestFragment(tokenStream, value); 
                             } 
                         } 
                         System.out.print(fa.name() + ":" + value + "  "); 
                     } 
                     System.out.println(); 
                 } 
             } 
             reader.close(); 
         } catch (CorruptIndexException e) { 
             // TODO Auto-generated catch block       
             e.printStackTrace(); 
         } catch (LockObtainFailedException e) { 
             // TODO Auto-generated catch block       
             e.printStackTrace(); 
         } catch (IOException e) { 
             // TODO Auto-generated catch block       
             e.printStackTrace(); 
         } catch (InvalidTokenOffsetsException e) { 
             // TODO Auto-generated catch block 
             e.printStackTrace(); 
         } 
     } 
 } 
原文地址:https://www.cnblogs.com/lcuzhanglei/p/2618456.html