lucene 总结

lucene总结

公司项目：portal中期刊文章内容作为大字段存储在Oracle中，首页有一个搜索功能：要求将所有包括搜索字段的文章的标题列出来（文章的内容存储在Oracle的CLOB字段中），也就是要用Lucene实现对数据库的大字段进行索引(索引通过计划任务定时建立索引)和搜索。。。

==================定时建立索引文件:===============

Main方法:

Java代码

package zxt.lucene.index;
import java.util.Timer;
public class IndexerServer {
/**
* 定时调用建立索引任务
* @author wulihai
* @create 2009-06-02
*/
public static void main(String[] args) {
String propFile = "directory.properties";
Config.setConfigFileName(propFile);
Timer timer = new Timer();
LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance();
timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME));
}
}

 package zxt.lucene.index;

import java.util.Timer;
public class IndexerServer {

	
	/**
	 * 定时调用建立索引任务
	 * @author wulihai
	 * @create 2009-06-02
	 */
	public static void main(String[] args) {
		String propFile = "directory.properties";
		Config.setConfigFileName(propFile);
		Timer	timer = new Timer();
		LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance(); 
		timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME));
	}

}

定时调用建立索引任务：

Java代码

package zxt.lucene.index;
import java.util.Timer;
public class IndexerServer {
/**
* 定时调用建立索引任务
* @author wulihai
* @create 2009-06-02
*/
public static void main(String[] args) {
String propFile = "directory.properties";
Config.setConfigFileName(propFile);
Timer timer = new Timer();
LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance();
timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME));
}
}

package zxt.lucene.index;

import java.util.Timer;
public class IndexerServer {

	
	/**
	 * 定时调用建立索引任务
	 * @author wulihai
	 * @create 2009-06-02
	 */
	public static void main(String[] args) {
		String propFile = "directory.properties";
		Config.setConfigFileName(propFile);
		Timer	timer = new Timer();
		LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance(); 
		timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME));
	}

}

建立索引的核心实现：

Java代码

package zxt.lucene.index;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.TimerTask;
import oracle.sql.CLOB;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
/**
* 建立索引的任务类
* @author wulihai
* @create 2009-06-02
*/
public class LuceneDBIndexerTask extends TimerTask {
//缺省索引目录
private static String DEFAULT_INDEX_DIR="C:\\IndexDB";
//临时索引目录的父目录
private File parentDir=null;
//被搜索的索引文件
private static LuceneDBIndexerTask index=new LuceneDBIndexerTask();
//构造方法
private LuceneDBIndexerTask(){
String dirStr=Constant.INDEX_STORE_DIRECTORY;
if(dirStr!=null&&!"".equals(dirStr)){
this.parentDir=new File(dirStr);
}else{
this.parentDir=new File(DEFAULT_INDEX_DIR);
}
if(!this.parentDir.exists()){
this.parentDir.mkdir();
}
}
/**
* 单实例访问接口
* @return
*/
public static LuceneDBIndexerTask getInstance(){
return index;
}
/**
* 锁定目录以及文件
* 只允许单线程访问
*
*/
/*public synchronized void singleRunning(){
if(flag==false){
flag=true;
run(parentDir);
}
}*/
/**
* 为数据库字段建立索引
*/
public void run() {
System.out.println("====LuceneDBIndexerTask$run()===============");
System.out.println("~~~开始建立索引文件~~~~~~~~~~~~~~~");
Connection conn=null;
Statement stmt=null;
ResultSet rs=null;
try {
Class.forName(Constant.DB_DRIVER_STRING);
conn = DriverManager.getConnection(Constant.DB_URI_STRING, Constant.DB_USERNAME, Constant.DB_PWD);
stmt = conn.createStatement();
rs = stmt.executeQuery(Constant.DB_QUERY_STRING);
File file=new File(parentDir+File.separator+new SimpleDateFormat("yyyyMMddHHmmss").format(new Date())+File.separator);
if(!file.exists()){
file.mkdir();
}
IndexWriter writer = new IndexWriter(file,new StandardAnalyzer(), true);
long startTime = new Date().getTime();
while (rs.next()) {
Document doc = new Document();
doc.add(new Field("ARTICLEID", rs.getString("ARTICLEID"), Field.Store.YES,Field.Index.TOKENIZED));
doc.add(new Field("TITLE", rs.getString("TITLE"), Field.Store.YES,Field.Index.TOKENIZED));
doc.add(new Field("USERNAME", rs.getString("USERNAME"), Field.Store.YES,Field.Index.TOKENIZED));
doc.add(new Field("USERID", rs.getString("USERID"), Field.Store.YES,Field.Index.TOKENIZED));
//对日期建立索引
String createdate=new SimpleDateFormat("yyyy-MM-dd").format(rs.getTimestamp("CREATEDATE"));
doc.add(new Field("CREATEDATE", createdate, Field.Store.YES,Field.Index.TOKENIZED));
//对大字段建立索引
BufferedReader in=null;
String content="";
CLOB clob = (CLOB) rs.getClob("CONTENT");
if (clob != null) {
//得到一个读入流
in=new BufferedReader(clob.getCharacterStream());
StringWriter out=new StringWriter();
int c;
while((c=in.read())!=-1){
out.write(c);
}
content=out.toString();
}
doc.add(new Field("CONTENT", content, Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
}
writer.optimize();
writer.close();
//测试一下索引的时间
long endTime = new Date().getTime();
System.out.println("索引文件"+file.getPath()+"建立成功...");
System.out.println("这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!");
//判断文件目录file下的文件个数如果大于3，就将文件建立最早的文件给删除掉
checkFiles(parentDir);
} catch (IOException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}finally{
try {
if(rs!=null){
rs.close();
}
if(stmt!=null){
stmt.close();
}
if(conn!=null){
conn.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
/**
* 判断文件目录file下的文件个数如果大于3，就将文件建立最早的文件给删除掉
*/
public void checkFiles(File dir) {
int length=dir.listFiles().length;
while(length>3){
//删除生成最早的文件
File [] files=dir.listFiles();
String[] names=dir.list();
Arrays.sort(names);
File deletefile=files[0];
deleteDirectory(deletefile);
length--;
}
}
/*
* 递归删除一个目录以及下面的文件
*/
public boolean deleteDirectory(File path) {
if( path.exists() ) {
File[] files = path.listFiles();
for(int i=0; i<files.length; i++) {
if(files.isDirectory()) {
deleteDirectory(files);
}
else {
//删除文件
files.delete();
}
}
}
//删除目录
boolean hasdelete=path.delete();
if(hasdelete){
System.out.println("删除索引目录"+path);
}
return hasdelete;
}
public static void main(String[] args) {
new LuceneDBIndexerTask().run();
}
}

package zxt.lucene.index;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.TimerTask;

import oracle.sql.CLOB;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

  /**
  * 建立索引的任务类
  * @author wulihai
  * @create 2009-06-02
 */
public class LuceneDBIndexerTask extends TimerTask {
	//缺省索引目录
	private static String DEFAULT_INDEX_DIR="C:\\IndexDB";
	 //临时索引目录的父目录
    private File parentDir=null;
     //被搜索的索引文件
	private static LuceneDBIndexerTask index=new LuceneDBIndexerTask();
	
	//构造方法
	private LuceneDBIndexerTask(){
		String dirStr=Constant.INDEX_STORE_DIRECTORY;
		if(dirStr!=null&&!"".equals(dirStr)){
			this.parentDir=new File(dirStr);
		
		}else{
			this.parentDir=new File(DEFAULT_INDEX_DIR);
		}
		
		if(!this.parentDir.exists()){
			this.parentDir.mkdir();
		}
	}
	
	/**
	 * 单实例访问接口
	 * @return
	 */
	public static LuceneDBIndexerTask getInstance(){
		return index;
	}
	
	 /**
	 * 锁定目录以及文件
	 * 只允许单线程访问
	 *
	 */
	/*public synchronized  void singleRunning(){
		if(flag==false){
			flag=true;
			run(parentDir);	
		}
	}*/

	 /**
	 * 为数据库字段建立索引
	 */
	public  void run()  {
		System.out.println("====LuceneDBIndexerTask$run()===============");
		
		System.out.println("~~~开始建立索引文件~~~~~~~~~~~~~~~");
		Connection conn=null;
		Statement stmt=null;
		ResultSet rs=null;
		try {
			Class.forName(Constant.DB_DRIVER_STRING);
			 conn = DriverManager.getConnection(Constant.DB_URI_STRING, Constant.DB_USERNAME, Constant.DB_PWD);
			 stmt = conn.createStatement();
			 rs = stmt.executeQuery(Constant.DB_QUERY_STRING);
			 File file=new File(parentDir+File.separator+new SimpleDateFormat("yyyyMMddHHmmss").format(new Date())+File.separator);
			 if(!file.exists()){
				 file.mkdir();
			 }
			IndexWriter writer = new IndexWriter(file,new StandardAnalyzer(), true);
			long startTime = new Date().getTime();
			while (rs.next()) {
				Document doc = new Document();
				 doc.add(new Field("ARTICLEID", rs.getString("ARTICLEID"), Field.Store.YES,Field.Index.TOKENIZED));
				 doc.add(new Field("TITLE", rs.getString("TITLE"), Field.Store.YES,Field.Index.TOKENIZED));
				 doc.add(new Field("USERNAME", rs.getString("USERNAME"), Field.Store.YES,Field.Index.TOKENIZED));
				 doc.add(new Field("USERID", rs.getString("USERID"), Field.Store.YES,Field.Index.TOKENIZED));
				 //对日期建立索引
				 String createdate=new SimpleDateFormat("yyyy-MM-dd").format(rs.getTimestamp("CREATEDATE"));
				 doc.add(new Field("CREATEDATE", createdate, Field.Store.YES,Field.Index.TOKENIZED));
			     //对大字段建立索引
			     BufferedReader in=null;
			     String content="";
			     CLOB clob =  (CLOB) rs.getClob("CONTENT");
			     if (clob != null) {
			    	//得到一个读入流
			      in=new BufferedReader(clob.getCharacterStream());
			      StringWriter out=new StringWriter();
                  int c;
                  while((c=in.read())!=-1){
                	  out.write(c);
                   }
                  content=out.toString();
			    }
			    doc.add(new Field("CONTENT", content, Field.Store.YES, Field.Index.TOKENIZED));
			    writer.addDocument(doc);
			}
			writer.optimize();
			writer.close();
			
			//测试一下索引的时间   
			long endTime = new Date().getTime();
			System.out.println("索引文件"+file.getPath()+"建立成功...");
			System.out.println("这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!");
			
				
			//判断文件目录file下的文件个数如果大于3，就将文件建立最早的文件给删除掉
			checkFiles(parentDir);
			
		} catch (IOException e) {
			e.printStackTrace();
		} catch (SQLException e) {
			e.printStackTrace();
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
			
		}finally{
		 try {
			 if(rs!=null){
			   rs.close();	 
			 }
			 if(stmt!=null){
				 stmt.close();	 
		      }	
			 if(conn!=null){
				 conn.close();	 
		      }	
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}
	}
	
	 /**
	 * 判断文件目录file下的文件个数如果大于3，就将文件建立最早的文件给删除掉
	 */
	public  void checkFiles(File dir) {
		int length=dir.listFiles().length;
		while(length>3){
			//删除生成最早的文件
		 File [] files=dir.listFiles();
		 String[] names=dir.list();
		 Arrays.sort(names);
		 File deletefile=files[0];
		 deleteDirectory(deletefile);
		 length--;
	   }
		
	}
	
	/*
	 * 递归删除一个目录以及下面的文件
	 */
	public boolean deleteDirectory(File path) {   
	    if( path.exists() ) {   
	      File[] files = path.listFiles();   
	      for(int i=0; i<files.length; i++) {   
	         if(files.isDirectory()) {   
	           deleteDirectory(files);  
	          
	         }   
	         else {   
	        	 //删除文件
	           files.delete();   
	         }   
	      }   
	    }   
	    //删除目录
	    boolean hasdelete=path.delete();
	    if(hasdelete){
	    	System.out.println("删除索引目录"+path);
	    	
	    }
		return hasdelete;
	    
	    
	  }   
	  

	
	
	
	public static void main(String[] args) {
	  new LuceneDBIndexerTask().run();
		
	}



}

配置文件管理类：

Java代码

package zxt.lucene.index;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

/**
*
* @author wulihai
* @create 2009-06-02
*
*/
public class Config {

    private static Config cfg = null;

    private static String configFileName = null;

    private Properties props;

    public Config() {
        props = new java.util.Properties();
    }

    /**
     * 单例访问接口
     * @return
     */
    public synchronized static Config getInstance() {
        if (cfg == null) {
            cfg = new Config();
            cfg.loadConfig();
            return cfg;
        } else {
            return cfg;
        }

    }

    private int loadConfig() {
        if (configFileName != null || configFileName.length() > 0) {
            InputStream inputStream = Config.class.getClassLoader()
                    .getResourceAsStream("directory.properties");
            System.out.println("configFileName=" + configFileName);
            try {
                props.load(inputStream);
            } catch (IOException e) {
                e.printStackTrace();
            }
            return 1;
        }
        return 0;

    }

    public static void setConfigFileName(String cfg) {
        configFileName = cfg;
    }

    public String getProperty(String keyName) {
        return props.getProperty(keyName);
    }

}
package zxt.lucene.index; import java.io.IOException; import java.io.InputStream; import java.util.Properties; /** * * @author wulihai * @create 2009-06-02 * */ public class Config { private static Config cfg = null; private static String configFileName = null; private Properties props; public Config() { props = new java.util.Properties(); } /** * 单例访问接口 * @return */ public synchronized static Config getInstance() { if (cfg == null) { cfg = new Config(); cfg.loadConfig(); return cfg; } else { return cfg; } } private int loadConfig() { if (configFileName != null || configFileName.length() > 0) { InputStream inputStream = Config.class.getClassLoader() .getResourceAsStream("directory.properties"); System.out.println("configFileName=" + configFileName); try { props.load(inputStream); } catch (IOException e) { e.printStackTrace(); } return 1; } return 0; } public static void setConfigFileName(String cfg) { configFileName = cfg; } public String getProperty(String keyName) { return props.getProperty(keyName); } }

常量配置

Java代码

package zxt.lucene.index;

/**
* 常量配置类 *
* @author wulihai
* @create 2009-06-02
*/
public class Constant {

    // 隔多长时间建立一次索引
    public static final String CREATE_INDEX_SLEEP_TIME = Config.getInstance()
            .getProperty("create_index_sleep_time");

    // 索引文件存放路径
    public static final String INDEX_STORE_DIRECTORY = Config.getInstance()
            .getProperty("index_store_directory");
    //数据库驱动程序
    public static final String DB_DRIVER_STRING = Config.getInstance()
    .getProperty("db_driver_string");
    //数据库连接URI
    public static final String DB_URI_STRING = Config.getInstance()
    .getProperty("db_uri_string");
    //数据库连接username
    public static final String DB_USERNAME= Config.getInstance()
    .getProperty("db_username");
     //数据库连接pwd
    public static final String DB_PWD= Config.getInstance()
    .getProperty("db_pwd");
    //数据库查询语句db_query_str
    public static final String DB_QUERY_STRING= Config.getInstance()
    .getProperty("db_query_string");

}
package zxt.lucene.index; /** * 常量配置类 * * @author wulihai * @create 2009-06-02 */ public class Constant { // 隔多长时间建立一次索引 public static final String CREATE_INDEX_SLEEP_TIME = Config.getInstance() .getProperty("create_index_sleep_time"); // 索引文件存放路径 public static final String INDEX_STORE_DIRECTORY = Config.getInstance() .getProperty("index_store_directory"); //数据库驱动程序 public static final String DB_DRIVER_STRING = Config.getInstance() .getProperty("db_driver_string"); //数据库连接URI public static final String DB_URI_STRING = Config.getInstance() .getProperty("db_uri_string"); //数据库连接username public static final String DB_USERNAME= Config.getInstance() .getProperty("db_username"); //数据库连接pwd public static final String DB_PWD= Config.getInstance() .getProperty("db_pwd"); //数据库查询语句db_query_str public static final String DB_QUERY_STRING= Config.getInstance() .getProperty("db_query_string"); }

数据类型处理类：

Java代码

package zxt.lucene.index;

/**
* 数据类型转换工具类
* @author wulihai
* @create 2009-06-02
*/
public class DataTypeUtil {
     /**
     * 将对象转换为整数型
     * @param o  源对象
     * @return 对应的Long值,如果出错,则返回Long.MIN_VALUE
     */
    public static long toLong(Object o) {
        if (o == null) {
            throw new IllegalArgumentException("该对象为空");
        }
        String s = o.toString();
        try {
            return Long.parseLong(s);
        } catch (Exception ex) {
            return Long.MAX_VALUE;
        }
    }
}
package zxt.lucene.index; /** * 数据类型转换工具类 * @author wulihai * @create 2009-06-02 */ public class DataTypeUtil { /** * 将对象转换为整数型 * @param o 源对象 * @return 对应的Long值,如果出错,则返回Long.MIN_VALUE */ public static long toLong(Object o) { if (o == null) { throw new IllegalArgumentException("该对象为空"); } String s = o.toString(); try { return Long.parseLong(s); } catch (Exception ex) { return Long.MAX_VALUE; } } }

配置文件：

Properties代码


#== the directory for store lucene-index ========#
index_store_directory=D:/lucene/indexDB/

#======== two hours ========#
#create_index_sleep_time=7200000

#======== two minutes ========#
create_index_sleep_time=120000

db_driver_string=oracle.jdbc.driver.OracleDriver
db_uri_string=jdbc:oracle:thin:@localhost:1521:lportal
db_username=lportal
db_pwd=lportal
db_query_string=SELECT  * from journalarticle
#== the directory for store lucene-index ========# index_store_directory=D:/lucene/indexDB/ #======== two hours ========# #create_index_sleep_time=7200000 #======== two minutes ========# create_index_sleep_time=120000 db_driver_string=oracle.jdbc.driver.OracleDriver db_uri_string=jdbc:oracle:thin:@localhost:1521:lportal db_username=lportal db_pwd=lportal db_query_string=SELECT * from journalarticle

==================搜索类:===============

核心搜索类：

Java代码

package com.liferay.portal.util;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import com.liferay.portlet.journal.model.JournalArticle;

/**
* 负责搜索的类
*/
public class LuceneDBQuery {

    private static LuceneDBQuery search = new LuceneDBQuery();

    // 构造方法
    private LuceneDBQuery() {

    }

    /**
     * 单实例访问接口
     *
     * @return
     */
    public static LuceneDBQuery getInstance() {
        return search;
    }

    /**
     * 搜索方法
     *
     * @throws java.text.ParseException
     * @throws Exception
     */
    public List search(String queryString) {
        int count = 0;
        long startTime = new Date().getTime();
        Hits hits = null;

        // 搜索目录
        File searchDir = null;
        Query query = null;
        InputStream inputStream=null;;
         String filePath="index.xml";
         String indexDir="";
         indexDir= LuceneDBQueryUtil.getIndexPath();

        if (indexDir != null && !"".equals(indexDir)) {
            searchDir = new File(indexDir);
            if(!searchDir.exists()){
                searchDir.mkdir();
            }
        }
        // 这里注意索引存放的目录的父目录
        // searchDir=new File("E:\\index\\indexDB\\");
        File targetDir = getTargetDir(searchDir);
        IndexSearcher searcher = null;
        List results = new ArrayList();

        try {
            Directory dir=FSDirectory.getDirectory(targetDir,false);
            searcher = new IndexSearcher(dir);
        } catch (Exception e1) {
            e1.printStackTrace();
            System.out.println("创建索引对象出现异常...");
        }
        Analyzer analyzer = new StandardAnalyzer();

        // 构建查询对象Query,对CONTENT字段进行搜索
        QueryParser qp = new QueryParser("CONTENT", analyzer);
        try {
            query = qp.parse(queryString);
        } catch (ParseException e1) {
            e1.printStackTrace();
        }

        if (searcher != null) {

            // 得到搜索结果Hits
            try {
                hits = searcher.search(query);
            } catch (IOException e1) {
                System.out.println("查询索引库出现异常...");
                e1.printStackTrace();
            }
            // 查到的记录条数
            count = hits.length();
            if (hits.length() > 0) {
                for (int i = 0; i < hits.length(); i++) {// 输出搜索信息
                    JournalArticle article = new JournalArticle();
                    Document document = null;
                    try {
                        document = hits.doc(i);
                    } catch (Exception e1) {
                        System.out.println("返回查询结果集出现异常...");
                        e1.printStackTrace();
                    }
                    try {
                        article.setDisplayDate(new SimpleDateFormat("yyyyMMdd")
                                .parse(document.get("CREATEDATE")));
                        article.setCreateDate(new SimpleDateFormat("yyyyMMdd")
                                .parse(document.get("CREATEDATE")));
                    } catch (java.text.ParseException e) {
                        e.printStackTrace();
                    }
                    article.setTitle(document.get("TITLE"));
                    article.setArticleId(document.get("ARTICLEID"));
                    article.setUserName(document.get("USERNAME"));
                    article.setUserId(document.get("USERID"));
                    results.add(article);
                }
                // 测试一下索引的时间
                long endTime = new Date().getTime();
                System.out.println("查询过程花费了" + (endTime - startTime) + " 毫秒!");
            } else {
                System.out.println("0个结果!");
            }
        }

        return results;

    }

    /**
     * 确定搜索索引所在目录目录
     */
    private File getTargetDir(File dir) {
        int length = dir.listFiles().length;
        File searchFile = null;

        // length=3的时候最多
        // 同时搜索和同时建索引的时候会出现length=4
        if (length >= 2) {
            // 找到次最新建立的索引文件
            String[] names = dir.list();
            Arrays.sort(names);
            searchFile = new File(dir + File.separator + names[length - 2]);
        }
        if (length == 1) {
            File files[] = dir.listFiles();
            searchFile = files[0];
        }
        if (length == 0) {
            // 如果没有索引文件则，建立第一个索引
            // TestDBIndexer.getInstance().isInstanceRunning();
            // search();
        }

        return searchFile;
    }
//
//  public static void main(String[] args) throws Exception {
//      new LuceneDBQuery().search("纳税人");
//  }

}
package com.liferay.portal.util; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import com.liferay.portlet.journal.model.JournalArticle; /** * 负责搜索的类 */ public class LuceneDBQuery { private static LuceneDBQuery search = new LuceneDBQuery(); // 构造方法 private LuceneDBQuery() { } /** * 单实例访问接口 * * @return */ public static LuceneDBQuery getInstance() { return search; } /** * 搜索方法 * * @throws java.text.ParseException * @throws Exception */ public List search(String queryString) { int count = 0; long startTime = new Date().getTime(); Hits hits = null; // 搜索目录 File searchDir = null; Query query = null; InputStream inputStream=null;; String filePath="index.xml"; String indexDir=""; indexDir= LuceneDBQueryUtil.getIndexPath(); if (indexDir != null && !"".equals(indexDir)) { searchDir = new File(indexDir); if(!searchDir.exists()){ searchDir.mkdir(); } } // 这里注意索引存放的目录的父目录 // searchDir=new File("E:\\index\\indexDB\\"); File targetDir = getTargetDir(searchDir); IndexSearcher searcher = null; List results = new ArrayList(); try { Directory dir=FSDirectory.getDirectory(targetDir,false); searcher = new IndexSearcher(dir); } catch (Exception e1) { e1.printStackTrace(); System.out.println("创建索引对象出现异常..."); } Analyzer analyzer = new StandardAnalyzer(); // 构建查询对象Query,对CONTENT字段进行搜索 QueryParser qp = new QueryParser("CONTENT", analyzer); try { query = qp.parse(queryString); } catch (ParseException e1) { e1.printStackTrace(); } if (searcher != null) { // 得到搜索结果Hits try { hits = searcher.search(query); } catch (IOException e1) { System.out.println("查询索引库出现异常..."); e1.printStackTrace(); } // 查到的记录条数 count = hits.length(); if (hits.length() > 0) { for (int i = 0; i < hits.length(); i++) {// 输出搜索信息 JournalArticle article = new JournalArticle(); Document document = null; try { document = hits.doc(i); } catch (Exception e1) { System.out.println("返回查询结果集出现异常..."); e1.printStackTrace(); } try { article.setDisplayDate(new SimpleDateFormat("yyyyMMdd") .parse(document.get("CREATEDATE"))); article.setCreateDate(new SimpleDateFormat("yyyyMMdd") .parse(document.get("CREATEDATE"))); } catch (java.text.ParseException e) { e.printStackTrace(); } article.setTitle(document.get("TITLE")); article.setArticleId(document.get("ARTICLEID")); article.setUserName(document.get("USERNAME")); article.setUserId(document.get("USERID")); results.add(article); } // 测试一下索引的时间 long endTime = new Date().getTime(); System.out.println("查询过程花费了" + (endTime - startTime) + " 毫秒!"); } else { System.out.println("0个结果!"); } } return results; } /** * 确定搜索索引所在目录目录 */ private File getTargetDir(File dir) { int length = dir.listFiles().length; File searchFile = null; // length=3的时候最多 // 同时搜索和同时建索引的时候会出现length=4 if (length >= 2) { // 找到次最新建立的索引文件 String[] names = dir.list(); Arrays.sort(names); searchFile = new File(dir + File.separator + names[length - 2]); } if (length == 1) { File files[] = dir.listFiles(); searchFile = files[0]; } if (length == 0) { // 如果没有索引文件则，建立第一个索引 // TestDBIndexer.getInstance().isInstanceRunning(); // search(); } return searchFile; } // // public static void main(String[] args) throws Exception { // new LuceneDBQuery().search("纳税人"); // } }

配置文件管理类:

Java代码


package com.liferay.portal.util;

import java.io.IOException;

import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;

public class LuceneDBQueryUtil {

public static String getIndexPath(){

        String filePath = "zxt_index.xml";
        String indexPath="";
        SAXBuilder builder = new SAXBuilder(false);
        try {
            Document doc = builder.build(Thread.currentThread().getContextClassLoader().getResource(filePath));
            Element rootElement = doc.getRootElement();
             Element index=rootElement.getChild("index");
             indexPath=index.getText();
             System.out.println(indexPath);
        } catch (JDOMException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return indexPath;


}
}

配置文件：zxt_index.xml

Xml代码

<?xml version="1.0" encoding="UTF-8"?>
<list>
<index>D:\\index\\IndexDB</index>
</list>