盘古搜索--实例解析

1.引用

PanGu.dll
Lucene.Net.dll
PanGu.Lucene.Analyzer.dll
PanGu.HighLight.dll--高亮使用
 
2.Dict文件夹
文件夹名称一定要为Dict,并且设置“如果较新则复制”
 
3.创建索引部分。
思路:把添加的消息放在redis队列中,从队列中读取消息,并且添加索引。
利用线程读队列然后一条一条写入索引。
写入索引的部分通常是单独项目中,因为写入索引会非常耗内存
 1 public class MessageIndex
 2     {
 3         Thread thread;
 4         public bool IsRunning { get; set; }
 5         public void Start()
 6         {
 7             IsRunning = true;
 8             thread = new Thread(RunScan);//线程开始就不断的从队列中取出消息,,先取消息再写入索引
 9             thread.IsBackground = false;//后台线程,关闭程序就退出
10             thread.Start();
11         }
12         public void RunScan()
13         {
14             while (IsRunning)
15             {
16                 using (var client = RedisManager.ClientManager.GetClient())
17                 {
18                     StartIndex(client);//读出消息后取出写入索引
19                 }
20             }
21         }
22         public void StartIndex(IRedisClient client)
23         {
24             FSDirectory directory = null;
25             IndexWriter writer = null;
26             try
27             {
28                 string indexPath = @"F:	mpindex2";//注意和磁盘上文件夹的大小写一致,否则会报错。索引保存位置
29                 directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());
30                 bool isExists = IndexReader.IndexExists(directory);
31                 if (isExists)
32                 {
33                     //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁
34                     //Lucene.Net在写索引库之前会自动加锁,在close的时候会自动解锁
35                     //不能多线程执行,只能处理意外被永远锁定的情况
36                     if (IndexWriter.IsLocked(directory))
37                     {
38                         IndexWriter.Unlock(directory);//强制解锁
39                     }
40                 }
41                 writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExists, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);
42                 //从redis队列中取出数据
43                 while (true)
44                 {
45                     string json = client.DequeueItemFromList("QiuShiBaiKe.Message");
46                     //取出可能为null
47                     if (string.IsNullOrEmpty(json))
48                     {
49                         Thread.Sleep(3000);//
50                         return;//没有消息可读就return
51                     }
52                     else
53                     {
54                         //反序列化
55                         Message message = new JavaScriptSerializer().Deserialize<Message>(json);
56                         WriteIndex(message, writer);
57                         
58                     }
59                   
60                 }
61             }
62             catch (Exception)
63             {
64                 throw new Exception("写入索引出错");
65             }
66             finally
67             {
68                 writer.Close();
69                 directory.Close();//不要忘了Close,否则索引结果搜不到
70             }
71         }
72         /// <summary>
73         /// 写入索引
74         /// </summary>
75         /// <param name="message"></param>
76         private void WriteIndex(Message message, IndexWriter writer)
77         {
78             //将重复的字段删掉
79             writer.DeleteDocuments(new Term("id", message.Id.ToString()));
80             //添加记录
81             Document document = new Document();//一条Document相当于一条记录
82             document.Add(new Field("id", message.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//每个Document可以有自己的属性(字段),所有字段名都是自定义的,值都是string类型
83             document.Add(new Field("message", message.Msg, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
84             writer.AddDocument(document);//insert into...插入一条记录,有两个字段:number和body
85             Console.WriteLine("id="+message.Id.ToString()+"msg="+message.Msg);
86         }
87     }

在索引的主程序中,调用

MessageIndex messageIndex = new MessageIndex();
 messageIndex.Start();
4.搜索部分--使用MVC框架
1)分词方法:
//分词
        static public string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer)
        {
            StringBuilder result = new StringBuilder();
            ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);
            foreach (WordInfo word in words)
            {
                if (word == null)
                {
                    continue;
                }
                result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
            }
            return result.ToString().Trim();
        } 
2)搜索并且对结果高亮显示
 //搜索
        public static List<SearchResult> SearchResult(String indexDir, String q, int pageLen, int pageNo, out int recCount)
        {
            string keywords = q;
            IndexSearcher search = new IndexSearcher(indexDir);
            q = GetKeyWordsSplitBySpace(q, new PanGuTokenizer());
            QueryParser queryParser = new QueryParser("message", new PanGuAnalyzer(true));
            Query query = queryParser.Parse(q);//在message中搜索keyword
            //当然这里可以添加多个搜索条件
            //QueryParser titleQueryParser = new QueryParser("title", new PanGuAnalyzer(true));
            //Query titleQuery = titleQueryParser.Parse(q);
            BooleanQuery bq = new BooleanQuery();
            bq.Add(query, BooleanClause.Occur.SHOULD);
            //bq.Add(titleQuery, BooleanClause.Occur.SHOULD);
            Hits hits = search.Search(bq);
            List<SearchResult> results = new List<SearchResult>();
            recCount = hits.Length();//搜索结果的总条数
            int i = (pageNo - 1) * pageLen;
            while (i < recCount && results.Count < pageLen)//这里是一个分页处理
            {
                SearchResult result = null;
                try
                {
                    result = new SearchResult();
                    result.Message = hits.Doc(i).Get("message");
                    result.MessageUrl = "/Message/PreviewMessage/" + hits.Doc(i).Get("id");
                    //news.Url = hits.Doc(i).Get("url");
                    //String strTime = hits.Doc(i).Get("time");
                    //news.Time = DateTime.ParseExact(strTime, "yyyyMMdd", null);
                    PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter =
                        new PanGu.HighLight.SimpleHTMLFormatter("<font color="red">",
"</font>");
                    PanGu.HighLight.Highlighter highlighter =
                        new PanGu.HighLight.Highlighter(simpleHTMLFormatter,
                        new Segment());
                    highlighter.FragmentSize = 50;
                    result.MessageHightLigther = highlighter.GetBestFragment(keywords, result.Message);//高亮显示的消息
                    //news.TitleHighLighter = highlighter.GetBestFragment(keywords, news.Title);
                    if (string.IsNullOrEmpty(result.MessageHightLigther))
                    {
                        result.MessageHightLigther = result.Message;
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                }
                finally
                {
                    results.Add(result);
                    i++;
                }
            }
            search.Close();
            return results;
        } 

3)搜索结果的类

1  public class SearchResult
2     {
3         public string Message { get;set;}//消息
4         public string MessageUrl { get; set; }//消息详细页链接
5         public string MessageHightLigther { get; set; }//高亮显示的消息
6     }

4)在Controller中的主代码

 1 public ActionResult Search(string keyword, int? pageIndex)
 2         {
 3             if (pageIndex==null)
 4             {
 5                 pageIndex = 1;
 6             }
 7             if (pageIndex<=0)
 8             {
 9                return View("Error", (object)"pageIndex必须大于0");
10             }
11             int totalCount;
12           List<SearchResult>results=  SearchResult(@"F:	mpindex2", keyword, 3, (int)pageIndex, out totalCount);
13             ViewBag.KeyWord = keyword;
14             ViewBag.SearchResults = results;
15             ViewBag.TotalCount = totalCount;//总结果条数
16             ViewBag.PageIndex = pageIndex;
17             ViewBag.PageSize = 3;
18             return View();
19         }

5)Search View中的代码

 1 @{
 2     ViewBag.Title = "搜索" + ViewBag.KeyWord;
 3     Layout = "~/Views/DefaultLayout.cshtml";
 4 }
 5 @{
 6     foreach (var result in ViewBag.SearchResults)
 7     {
 8     <div >
 9         <a href="@result.MessageUrl" target="_blank">@(new HtmlString(result.MessageHightLigther))</a>
10     </div>
11     
12     }
13 }
14 @QiuShiBaiKe.Web.WebHelper.Pager("/Message/Search/{pageIndex}/"+ViewBag.KeyWord, ViewBag.TotalCount, ViewBag.PageIndex, ViewBag.PageSize)

6)搜索的路由配置

1 //搜索路由
2             routes.MapRoute(
3               name: "search",
4               url: "{controller}/{action}/{pageIndex}/{keyword}",
5               defaults: new { controller = "Message", action = "Search", pageIndex = UrlParameter.Optional, keyword=UrlParameter.Optional }
6           );
 
原文地址:https://www.cnblogs.com/lucyliang/p/4934515.html