lucene 的关键字变色 与排序

package com.zte.adc.search.service.impl;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.LockObtainFailedException;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;

import com.zte.adc.search.dao.SearchStateBeanDAO;
import com.zte.adc.search.dao.impl.IndexBeanDAOImpl;
import com.zte.adc.search.dao.impl.SearchStateBeanDAOImpl;
import com.zte.adc.search.entity.IndexBean;
import com.zte.adc.search.entity.SearchStateBean;
import com.zte.adc.search.entity.XmlBean;
import com.zte.adc.search.service.IndexService;
import com.zte.adc.search.service.XmlParserService;
import com.zte.adc.search.utils.PageCondition;
import com.zte.adc.search.utils.PageDataSet;
import com.zte.adc.search.utils.PropertyManager;
import com.zte.adc.search.utils.SearchStateEnum;
import com.zte.adc.search.utils.StringTools;

public class IndexServiceImpl implements IndexService {

 private static final Log log = LogFactory.getLog(IndexServiceImpl.class);
 // 用于匹配搜索的字段
 private String[] ids = { "id", "type" };
 // 匹配的时段是否是必须
 private Occur[] occur = { Occur.MUST, Occur.MUST };
 // 获得索引目录
 private String indexDir = PropertyManager
   .getPropertyByName("index.file.path");
 // 读取xml接口
 private XmlParserService xmlService = null;
 // xml取得的结果
 private List<XmlBean> beans = new ArrayList<XmlBean>();
 // 索引数据操作类
 private IndexBeanDAOImpl dao = new IndexBeanDAOImpl();
 // 数据库行状态变化类
 private SearchStateBeanDAO sdao = new SearchStateBeanDAOImpl();

 // 创建索引
 public void createIndex() {
  log.warn("开始创建索引");
  if (ifIndexExist()) {
   return;
  }
  File file = new File(indexDir);
  if (!file.exists()) {
   log.warn("索引目录不存,创建索引目录");
   file.mkdir();
  }
  xmlService = new SAXXmlParserServiceImpl();
  beans = xmlService.getXmlBean();
  IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
  IndexWriter writer = null;
  List<IndexBean> list;
  try {
   writer = new IndexWriter(file, new IKAnalyzer());
   writeIndexBean(beans, writer);
   log.warn("创建索引成功");
  } catch (CorruptIndexException e) {
   log.error("创建索引失败,file不能写入或读取或不存在,不能创建索引" + e);
  } catch (LockObtainFailedException e) {
   log.error("创建索引失败,file不能写入或读取或不存在,不能创建索引" + e);
  } catch (IOException e) {
   log.error("创建索引失败,file不能写入或读取或不存在,不能创建索引" + e);
  } finally {
   try {
    if (writer != null)
     writer.close();
   } catch (CorruptIndexException e) {
    // TODO Auto-generated catch block
    log.error(e.toString());
   } catch (IOException e) {
    // TODO Auto-generated catch block
    log.error(e.toString());
   }
  }
 }

 // 删除所有索引
 public void deleteAllIndex() {
  log.warn("开始删除所有索引");
  IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
  IndexWriter writer = null;
  try {
   writer = new IndexWriter(indexDir, new IKAnalyzer(), false);
   writer.deleteAll();
   writer.optimize();
   log.warn("删除索引成功");
  } catch (CorruptIndexException e) {
   log.error("删除索引失败,错误的indexDir,不能创建indexWriter" + e);
  } catch (LockObtainFailedException e) {
   log.error("删除索引失败,错误的indexDir,不能创建indexWriter" + e);
  } catch (IOException e) {
   log.error("删除索引失败,错误的indexDir,不能创建indexWriter" + e);
  } finally {
   try {
    if (writer != null)
     writer.close();
   } catch (CorruptIndexException e) {
    // TODO Auto-generated catch block
    log.error(e.toString());
   } catch (IOException e) {
    // TODO Auto-generated catch block
    log.error(e.toString());
   }
  }
 }

 // 根据xmlBean查询数据库得到indexBean
 public List<IndexBean> getIndexBean(List<XmlBean> xmlBean) {
  List<IndexBean> list = new ArrayList<IndexBean>();
  XmlBean bean = null;
  String sql = null;
  List<IndexBean> listBean = null;
  for (int i = 0; i < xmlBean.size(); i++) {
   bean = xmlBean.get(i);
   sql = this.getSqlByXmlBean(bean);
   listBean = dao.getIndexBean(sql, null, bean);
   list.addAll(listBean);
  }
  return list;
 }

 // 根据xmlBean查询数据库得到indexBean
 public void writeIndexBean(List<XmlBean> xmlBean, IndexWriter writer) {
  List<IndexBean> list = new ArrayList<IndexBean>();
  XmlBean bean;
  String sql;
  for (int i = 0; i < xmlBean.size(); i++) {
   bean = xmlBean.get(i);
   sql = this.getSqlByXmlBean(bean);
//   if (bean.getTable().equalsIgnoreCase("nn_faq")) {
//    sql = sql + " where INTSTATUS=1";
//   }
//   if (bean.getTable().equalsIgnoreCase("nn_news")
//     || bean.getTable().equalsIgnoreCase("nn_affichedetail")) {
//    sql = sql + " where strsendto = 'OP'";
//   }
   log.warn("根据xmlBean得到sql语句: " + sql);
   dao.writeIndexBean(sql, null, bean, writer);
  }
 }

 // 取配置文件中的设置的每页行数
 public int getPageSize() {
  return Integer.parseInt(PropertyManager
    .getPropertyByName("index.pageSize"));
 }

 @Override
 // 判断索引是否存在
 public boolean ifIndexExist() {
  File directory = new File(indexDir);
  if (!directory.exists())
   directory.mkdir();
  if (directory.listFiles() != null) {
   if (directory.listFiles().length >= 3) {
    return true;
   } else {
    return false;
   }
  }
  return false;
 }

 // 查询索引
 public List<IndexBean> searchIndex(String keyWord, int currentPage) {
  log.warn("开始查询索引");
  List list = new ArrayList();
  if (!ifIndexExist()) {
   createIndex();
   log.warn("索引不存时创建索引");
  }
  IndexSearcher searcher;
  Query query;
  TopDocs doc;
  SimpleHTMLFormatter sHtmlF;
  Highlighter highlighter;
  try {
   log.warn("根据关键字检索索引文件");
   searcher = new IndexSearcher(this.getIndexDir());
   searcher.setSimilarity(new IKSimilarity());
   query = IKQueryParser.parse("content", keyWord);
   // 设置高亮属性
   sHtmlF = new SimpleHTMLFormatter("<b><font color='red'>",
     "</font></b>");
   highlighter = new Highlighter(sHtmlF, new QueryScorer(query));
   highlighter.setTextFragmenter(new SimpleFragmenter(300));
   doc = searcher.search(query, this.getIndexCount());
   ScoreDoc[] docArray = doc.scoreDocs;
   Document document;
   IndexBean bean;
   SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
   String indexdate;
   Date day;
   for (ScoreDoc d : docArray) {
    document = searcher.doc(d.doc);
    bean = new IndexBean();
    bean.setIndexId(document.getField("id").stringValue());
    bean.setTitle(document.getField("title").stringValue());
    bean.setContent(document.getField("content").stringValue());
    bean.setHigherContent(getHigherStr("content",
      bean.getContent(), highlighter));
    bean.setType(document.getField("type").stringValue());
    bean.setUrl(PropertyManager.getPropertyByName(bean.getType(),
      "urlConfig") == null ? null : PropertyManager
      .getPropertyByName(bean.getType(), "urlConfig")
      + bean.getTargetId());
    bean.setClickCount(Integer.parseInt(document.getField(
      "clickcount").stringValue()));
    bean.setDescription(document.getField("description")
      .stringValue());
    indexdate = document.getField("indexdate").stringValue();
    if (null != indexdate && !"".equals(indexdate)) {
     day = sdf.parse(document.getField("indexdate")
       .stringValue());
     bean.setIndexDate(day);
    } else {
     bean.setIndexDate(new Date());
    }
    list.add(bean);
   }
  } catch (CorruptIndexException e) {
   log.error("检索失败,检索目录出错");
  } catch (IOException e) {
   log.error("检索失败,解析关键字出错");
  } catch (java.text.ParseException e) {
   log.error("时间格式化错误");
  }
  return list;
 }

 // 分页查询索引
 public PageDataSet searchIndex(String keyWord, PageCondition page) {
  log.warn("开始分页查询索引");
  PageDataSet pds = new PageDataSet();
  // 每页行数
  pds.setPageSize(page.getPageSize());
  //List<Document> list = new ArrayList<Document>();
  List<IndexBean> l = new ArrayList<IndexBean>();
  if (!this.ifIndexExist()) {
   return pds;
  }
  IndexSearcher searcher;
  Query query;
  TopDocs doc;
  Sort sort;
  SimpleHTMLFormatter sHtmlF;
  Highlighter highlighter;
  try {
   log.warn("根据关键字取得索引并按每页记录数取得每页数据");
   searcher = new IndexSearcher(getIndexDir());
   searcher.setSimilarity(new IKSimilarity());
   if (null == keyWord) {
    keyWord = "";
   }
   query = IKQueryParser.parse("content", keyWord);
   Filter f = new QueryFilter(query);
   sort = new Sort(new SortField("indexdate", SortField.STRING, true));
   // 设置高亮属性
   sHtmlF = new SimpleHTMLFormatter("<b><font color='red'>",
     "</font></b>");
   highlighter = new Highlighter(sHtmlF, new QueryScorer(query));
   // 设置高亮片段长度
   highlighter.setTextFragmenter(new SimpleFragmenter(PropertyManager
     .getPropertyByName("index.contentSize") == null ? 300
     : Integer.parseInt(PropertyManager
       .getPropertyByName("index.contentSize"))));
   doc = searcher.search(query, f, this.getIndexCount(), sort);
   ScoreDoc[] docArray = doc.scoreDocs;
   // 总记录数
   pds.setTotalCount(docArray.length);
   // 当前页
   pds.setCurrentPage(page.getCurrentPage());
   // 取每页数据从startIndex到endIndex
   int startIndex = (page.getCurrentPage() - 1) * page.getPageSize();
   int endIndex = startIndex + page.getPageSize();
   if (endIndex > pds.getTotalCount()) {
    endIndex = pds.getTotalCount();
   }
   // System.out.println("开始:" + startIndex + ",到" + endIndex);
   // 取出分页所需的document
   // 从startIndex到endIndex-1的数据.
   Document document;
   IndexBean bean;
   Date day;
   SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
   for (int i = startIndex; i < endIndex; i++) {
    document = searcher.doc(docArray[i].doc);
    bean = new IndexBean();
    bean.setIndexId(document.getField("id").stringValue());
    bean.setTitle(document.getField("title").stringValue());
    bean.setContent(document.getField("content").stringValue());
    bean.setHigherContent(getHigherStr("content", StringTools
      .getHtmlContent(bean.getContent()), highlighter));
    bean.setTargetId(document.getField("targetid").stringValue());
    bean.setType(document.getField("type").stringValue());
    bean.setUrl(PropertyManager.getPropertyByName(bean.getType(),
      "urlConfig") == null ? null : PropertyManager
      .getPropertyByName(bean.getType(), "urlConfig")
      + bean.getTargetId());
    bean.setClickCount(Integer.parseInt(document.getField(
      "clickcount").stringValue()));
    bean.setDescription(document.getField("description")
      .stringValue());
    String indexdate = document.getField("indexdate").stringValue();
    if (null != indexdate && !"".equals(indexdate)) {
     day = sdf.parse(document.getField("indexdate")
       .stringValue());
     bean.setIndexDate(day);
    } else {
     bean.setIndexDate(new Date());
    }
    l.add(bean);
   }
   // 分页数据
   pds.setDataset(l);
  } catch (CorruptIndexException e) {
   log.error("分页检索失败,检索目录出错",e);
  } catch (IOException e) {
   log.error("分页检索失败",e);
  } catch (java.text.ParseException e) {
   log.error("时间格式化错误");
  }
  return pds;
 }

 // 更新索引
 public void updateIndex() {
  File file = new File(indexDir);
  if (!file.exists()) {
   file.mkdir();
   createIndex();
  }
  List<SearchStateBean> bean = sdao.getUpdateInfo();
  for (SearchStateBean b : bean) {
   // 根据sate值执行相应操作
   boolean isswitch = true;
   switch (b.getState()) {
   case SearchStateEnum.INSERT: {
    insert(b);
    break;
   }
   case SearchStateEnum.UPDATE: {
    update(b);
    break;
   }
   case SearchStateEnum.DELETE: {
    delete(b);
    break;
   }
   default: {
    System.out.println(b.getId() + "状态出现问题 请更正");
    isswitch = false;
   }
   }
   if (isswitch) {
    sdao.delete(b);
   }
  }
 }

 // 传入xmlBean得到索引sql
 public String getSqlByXmlBean(XmlBean bean) {
  String sql = "select " + bean.getPrimaryKeyName() + ",";
  // 可能title与content 同时取了 同一个字段所以要过滤掉一个
  for (String field : bean.getContents()) {
   sql = sql + field + ",";
  }
  for (String field : bean.getTitles()) {
   boolean ishave = true;
   for (String f : bean.getContents()) {
    if (field.equals(f)) {
     ishave = false;
     break;
    }
   }
   if (ishave == true)
    sql = sql + field + ",";
  }
  if (bean.getUpdateTime() != null && !bean.getUpdateTime().equals("")) {
   sql = sql + bean.getCreateTime() + "," + bean.getUpdateTime() + ",";
  } else {
   sql = sql + bean.getCreateTime() + ",";
  }
  sql = sql.substring(0, sql.length() - 1) + " ";
  sql = sql + "from " + bean.getTable() + " where 1=1";
  for (String field : bean.getConditions()) {
   if (field != null && !field.trim().equals("")) {
    sql = sql + " and " + field;
   }
  }
  return sql;
 }

 public int getIndexCountByCondition(IndexBean bean) {
  int i = 0;
  Query query = null;
  IndexSearcher s = null;
  try {
   query = IKQueryParser.parseMultiField(ids, new String[] {
     bean.getTargetId(), bean.getType() }, occur);
   s = new IndexSearcher(this.indexDir);
   i = s.search(query).length();
  } catch (IOException e) {
   log.error("取出索引个数错误:" + e);
  }
  return i;
 }

 // 添加索引document
 public void addDocument(List<IndexBean> list, IndexWriter writer)
   throws CorruptIndexException, IOException {
  log.warn("开始写入索引字段");
  Document document;
  for (IndexBean bean : list) {
   if (getIndexCountByCondition(bean) == 0) {
    document = new Document();
    Field id = new Field("id", bean.getIndexId() == null ? ""
      : bean.getIndexId(), Field.Store.YES,
      Field.Index.ANALYZED);
    Field title = new Field("title", bean.getTitle() == null ? ""
      : bean.getTitle(), Field.Store.YES,
      Field.Index.ANALYZED);
    Field content = new Field("content",
      bean.getContent() == null ? "" : bean.getContent(),
      Field.Store.YES, Field.Index.ANALYZED);
    Field type = new Field("type", bean.getType() == null ? ""
      : bean.getType(), Field.Store.YES, Field.Index.ANALYZED);
    Field url = new Field("url", bean.getUrl() == null ? "" : bean
      .getUrl(), Field.Store.YES, Field.Index.ANALYZED);
    Field clickCount = new Field("clickcount", bean.getClickCount()
      + "" == null ? "" : bean.getClickCount() + "",
      Field.Store.YES, Field.Index.ANALYZED);
    Field indexDate = new Field("indexdate",
      bean.getIndexDate() == null ? "" : bean.getIndexDate()
        .toString(), Field.Store.YES,
      Field.Index.NOT_ANALYZED);
    Field description = new Field("description", bean
      .getDescription() == null ? "" : bean.getDescription(),
      Field.Store.YES, Field.Index.ANALYZED);
    document.add(new Field("targetid",
      bean.getTargetId() == null ? "" : bean.getTargetId(),
      Field.Store.YES, Field.Index.ANALYZED));
    document.add(id);
    document.add(title);
    document.add(content);
    document.add(type);
    document.add(url);
    document.add(clickCount);
    document.add(indexDate);
    document.add(description);
    writer.addDocument(document);
   }
  }
 }

 //
 public IndexBean getIndexBean(XmlBean xmlBean) {
  List<XmlBean> xmlBeans = new ArrayList<XmlBean>();
  xmlBeans.add(xmlBean);
  List<IndexBean> indexbean = this.getIndexBean(xmlBeans);
  if (indexbean.size() > 0)
   return indexbean.get(0);
  return null;
 }

 // 与数据集库同步更新
 public void update(SearchStateBean bean) {
  log.warn("先删除已更改的索引");
  deleteIndex(ids, new String[] { bean.getTargetId(),
    bean.getTargetTable() }, occur);
  log.warn("再加入更改后的索引");
  insert(bean);

 }

 // 与数据同步插入
 public void insert(SearchStateBean bean) {
  log.warn("开始同步数据");
  xmlService = new SAXXmlParserServiceImpl();
  XmlBean xmlBean = xmlService.getXmlBeanByType(bean.getTargetTable());
  String sql = this.getSqlByXmlBean(xmlBean) + " and "
    + xmlBean.getPrimaryKeyName() + "='" + bean.getTargetId() + "'";
  log.warn("需要更新的sql语句: " + sql);
  List<IndexBean> indexBeans = dao.getIndexBean(sql, null, xmlBean);
  IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
  IndexWriter writer = null;
  try {
   writer = new IndexWriter(indexDir, new IKAnalyzer(), false);
   addDocument(indexBeans, writer);
   log.warn("同步数据成功,关闭writer");
  } catch (CorruptIndexException e) {
   log.error("同步数据失败" + e);
  } catch (LockObtainFailedException e) {
   log.error("同步数据失败" + e);
  } catch (IOException e) {
   log.error("同步数据失败" + e);
  } finally {
   try {
    if (writer != null)
     writer.close();
   } catch (CorruptIndexException e) {
    // TODO Auto-generated catch block
    log.error(e.toString());
   } catch (IOException e) {
    // TODO Auto-generated catch block
    log.error(e.toString());
   }
  }
 }

 // 与数据同步删除
 public void delete(SearchStateBean bean) {
  this.deleteIndex(ids, new String[] { bean.getTargetId(),
    bean.getTargetTable() }, occur);
 }

 // 根据索引条件删除索引
 public void deleteIndex(String[] fields, String[] values, Occur[] occur) {
  log.warn("开始根据条件删除索引");
  IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
  IndexWriter writer = null;
  Query query;
  try {
   writer = new IndexWriter(indexDir, new IKAnalyzer(), false);
   query = IKQueryParser.parseMultiField(fields, values, occur);
   writer.deleteDocuments(query);
   writer.optimize();
   log.warn("删除索引成功,关闭writer");
  } catch (CorruptIndexException e) {
   log.error("删除索引失败" + e);
  } catch (LockObtainFailedException e) {
   log.error("删除索引失败" + e);
  } catch (IOException e) {
   log.error("删除索引失败" + e);
  } finally {
   try {
    if (writer != null)
     writer.close();
   } catch (CorruptIndexException e) {
    // TODO Auto-generated catch block
    log.error(e.toString());
   } catch (IOException e) {
    // TODO Auto-generated catch block
    log.error(e.toString());
   }
  }
 }

 // 高亮字段
 public String getHigherStr(String field, String value,
   Highlighter highlighter) {
  TokenStream stream = new IKAnalyzer().tokenStream(field,
    new StringReader(value));
  String str = null;
  try {
   str = highlighter.getBestFragment(stream, value);
  } catch (IOException e) {
   log.error("获得高亮字段失败" + e);
  }
  return str;
 }

 public String getIndexDir() {
  return indexDir;
 }

 public void setIndexDir(String indexDir) {
  this.indexDir = indexDir;
 }

 // 获得当前数据库中的所有数据的数量
 public int getDataCount() {
  String sql = "select count(*) from ";
  int count = 0;
  for (XmlBean bean : beans) {
   count = count + dao.getUniqueValue(sql + bean.getTable());
  }
  return count;
 }

 // 获得当前索引的数量
 public int getIndexCount() {
  int count = 0;
  if (ifIndexExist()) {
   IndexReader read = null;
   try {
    read = IndexReader.open(this.indexDir);
    count = read.numDocs();
    read.close();
   } catch (CorruptIndexException e) {
    log.error("获得当前索引数量失败" + e);
   } catch (IOException e) {
    log.error("获得当前索引数量失败" + e);
   }
  }
  return count;
 }
}

原文地址:https://www.cnblogs.com/liaomin416100569/p/9331855.html