lucene的增量更新

对于每天更新的索引,可以采用增量更新,例子如下:

例如:
pid mondayCv
123   23000

第二天
pid  mondayCv   tuesdayCv
123   23000        45000
package com.sachie.lucene.test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import com.sachie.lucene.model.TestObject;

public class CreateTeest {

    /**
     * @param args
     */
    static IndexWriterConfig conf = null;
    static {
        Analyzer analysis = new StandardAnalyzer(Version.LUCENE_36);
        conf = new IndexWriterConfig(Version.LUCENE_36, analysis);
        conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
        conf.setRAMBufferSizeMB(512.00);
    }

    public void addDoc(Document doc, String name, String value) {
        doc.add(new Field(name, value, Field.Store.YES,
                Field.Index.NOT_ANALYZED));
    }

    public List<TestObject> getIndexObject(File file) throws IOException {
        List<TestObject> objectList = new ArrayList<TestObject>();
        if (!file.exists())
            throw new FileNotFoundException();
        else {
            BufferedReader br = new BufferedReader(new FileReader(file));
            String tempStr = null;
            while ((tempStr = br.readLine()) != null) {
                String[] tempStrs = tempStr.split("\t");
                objectList.add(new TestObject(tempStrs[0], tempStrs[1],
                        tempStrs[2]));
            }
            br.close();
        }
        return objectList;

    }

    public void createIndex() throws IOException {
        String sourcePath = "d:\\data";
        String target = "d:\\testIndex";
        File files = new File(sourcePath);
        IndexWriter indexWriter = null;
        boolean create = false;
        Directory directory = new SimpleFSDirectory(new File(target));
        IndexSearcher searcher = null;
        indexWriter = new IndexWriter(directory, conf);
        for (int i = 0; i < files.listFiles().length; i++) {
            if (i != 0) {
                searcher = new IndexSearcher(IndexReader.open(directory));
            }

            File file = files.listFiles()[i];
            String date = file.getName();
            List<TestObject> list = this.getIndexObject(file);

            try {
                for (TestObject tmp : list) {
                    Document doc = new Document();
                    addDoc(doc, "pid", tmp.getPid());
                    addDoc(doc, date + "cvOne", tmp.getCvOne());
                    addDoc(doc, date + "cvAll", tmp.getCvAll());
                    if (i == 0)
                        indexWriter.addDocument(doc);
                    else
                        this.searchAndUpdateDocument(indexWriter, searcher,
                                doc, new Term("pid", tmp.getPid()));
                }

            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            // indexWriter.forceMerge(1);
            indexWriter.commit();
            indexWriter.forceMerge(1);
        }

        indexWriter.close();

    }

    public void searchAndUpdateDocument(IndexWriter writer,
            IndexSearcher searcher, Document updateDoc, Term term)
            throws IOException {
        TermQuery query = new TermQuery(term);
        TopDocs hits = searcher.search(query, 10);
        if (hits.scoreDocs.length == 0) {
            writer.addDocument(updateDoc);
        } else if (hits.scoreDocs.length > 1) {
            throw new IllegalArgumentException(
                    "Given Term matches more than 1 document in the index.");
        } else {
            int docId = hits.scoreDocs[0].doc;
            Document doc = searcher.doc(docId);
            List<Fieldable> replacementFields = updateDoc.getFields();
            for (Fieldable field : replacementFields) {
                String name = field.name();
                String currentValue = doc.get(name);
                if (currentValue != null) {
                    doc.removeFields(name);
                    doc.add(field);
                } else {
                    doc.add(field);
                }
            }
            writer.updateDocument(term, doc);
        }

    }

    public static void main(String args[]) throws IOException {
        CreateTeest ic = new CreateTeest();
        ic.createIndex();
    }

}
I believe,I can. Sachie.Dong
原文地址:https://www.cnblogs.com/sachie/p/3108736.html