lucene学习笔记

1,创建索引

  

Directory directory=FSDirectory.open(Paths.get(fileName));
IndexWriterConfig iwc=new IndexWriterConfig(new StandadAnalyzer());
iwc.setOpenMode(OpenMode.CREATE);
IndexWriter writer=new IndexWriter(,directory,iwc);
Document doc=new Document();
doc.add(new TextField(name,value));
writer.addDocument(doc);
directory.close();
directory.close();

  2,简单搜索

Path indexPath = Paths.get(fileName);
Directory dir = FSDirectory.open(indexPath);
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
        
Term term = new Term(field, keyword);
Query termQuery = new TermQuery(term);
System.out.println("Query:"+termQuery);
        
// 返回前10条
TopDocs tds = searcher.search(termQuery, 10);
System.out.println(tds.totalHits);
for (ScoreDoc sd : tds.scoreDocs) {
    // Explanation explanation = searcher.explain(query, sd.doc);
    // System.out.println("explain:" + explanation + "
");
    Document doc = searcher.doc(sd.doc);
    System.out.println("DocID:" + sd.doc);
    System.out.println("open_id:" + doc.get("open_id"));
    System.out.println("time:" + doc.get("time"));
}
dir.close();
reader.close();

3,收集器

 实现Collector和LeafCollector

  getLeafCollector中获取reader

  collect中统计

 CountCollector implements Collector,LeafCollector 

public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
        this.reader=context.reader();
        return this;
    }

 public void collect(int doc) throws IOException {
        SortedDocValues docValues=reader.getSortedDocValues(field);
        BytesRef bytesRef=docValues.get(doc);
        String v=bytesRef.utf8ToString();
//        Document document=reader.document(doc);
//        String v=document.get(field);
        if (map.containsKey(v)){
            map.put(v,map.get(v)+1);
        }else {
            map.put(v,1);
        }
    }

4,grouping

  索引字段需设置分词并设置词项量

FieldType type = new FieldType();
        // 索引时保存文档、词项频率、位置信息、偏移信息
        type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        type.setStored(true);// 原始字符串全部被保存在索引中
        type.setStoreTermVectors(true);// 存储词项量
        type.setTokenized(true);// 词条化

 

long t1=System.currentTimeMillis();
        GroupingSearch groupingSearch=new GroupingSearch("time");
        groupingSearch.setAllGroups(true);
        groupingSearch.setCachingInMB(4.0,true);
        groupingSearch.setFillSortFields(true);
        IndexSearcher searcher=null;
        try {
            Directory directory= FSDirectory.open(Paths.get("testCollect"));
            IndexReader reader= DirectoryReader.open(directory);
            searcher=new IndexSearcher(reader);
            Query query=new MatchAllDocsQuery();
            TopGroups<BytesRef> result=groupingSearch.search(searcher,query,0,searcher.getIndexReader().maxDoc());
            GroupDocs<BytesRef>[] docs=result.groups;
            int total=0;
            for (GroupDocs<BytesRef> groupDocs : docs){
                System.out.println(new String(groupDocs.groupValue.bytes)+":"+groupDocs.totalHits);
                total+=groupDocs.totalHits;
            }
            System.out.println("time cost:"+(System.currentTimeMillis()-t1));
            System.out.println("总数据量:"+total);
            int totalGroupCount = result.totalGroupCount;
            System.out.println(totalGroupCount);
        }catch (Exception e){
            e.printStackTrace();
        }

5,facet

  维度统计,需设置维度字段FacetField

  第一步构造索引Searcher

  

IndexReader indexReader= DirectoryReader.open(indexDir);
IndexSearcher searcher=new IndexSearcher(indexReader);

  第二步构造facetReader

  

TaxonomyReader taxoReader=new DirectoryTaxonomyReader(taxoDir);

  第三步,设置收集器

FacetsCollector fc=new FacetsCollector();

  第四步搜索

TermQuery query=new TermQuery(new Term("device","手机"));
FacetsCollector.search(searcher,query,10,fc);

  第五步获取维度统计结果

Facets facets=new FastTaxonomyFacetCounts(taxoReader,config,fc);
List<FacetResult> results=facets.getAllDims(10);
for (FacetResult tmp:results){
     System.out.println(tmp);
}

  DrillDownQuery可以设置多个查询维度

  DrillSideWays搜索同类

原文地址:https://www.cnblogs.com/jinjixia/p/8573616.html