Lucene.net系列六 search 下

本文主要结合测试案例介绍了Lucene下的各种查询语句以及它们的简化方法.

通过本文你将了解Lucene的基本查询语句,并通过学习相关的测试代码以加强了解.

源代码下载

具体的查询语句


在了解了SQL后, 你是否想了解一下查询语法树
?在这里简要介绍一些能被Lucene直接使用的查询语句.

1.         TermQuery
查询某个特定的词,在文章开始的例子中已有介绍.常用于查询关键字.

             [Test]
         
public void Keyword()
         
{
              IndexSearcher searcher 
= new IndexSearcher(directory);
              Term t 
= new Term("isbn""1930110995");
              Query query 
= new TermQuery(t);
              Hits hits 
= searcher.Search(query);
              Assert.AreEqual(
1, hits.Length(), "JUnit in Action");
         }


注意Lucene中的关键字,是需要用户去保证唯一性的.

 TermQuery和QueryParse

只要在QueryParse的Parse方法中只有一个word,就会自动转换成TermQuery.

2.         RangeQuery
用于查询范围,通常用于时间,还是来看例子:

namespace dotLucene.inAction.BasicSearch
{
     
public class RangeQueryTest : LiaTestCase
     
{
         
private Term begin, end; 
         [SetUp]
         
protected override void Init()
         
{
              begin 
= new Term("pubmonth""200004");

              end 
= new Term("pubmonth""200206");
              
base.Init();
         }



         [Test]
         
public void Inclusive()
         
{
              RangeQuery query 
= new RangeQuery(begin, end, true);
              IndexSearcher searcher 
= new IndexSearcher(directory);


              Hits hits 
= searcher.Search(query);
              Assert.AreEqual(
1, hits.Length());
         }
 
         [Test]
         
public void Exclusive()
         
{
              RangeQuery query 
= new RangeQuery(begin, end, false);
              IndexSearcher searcher 
= new IndexSearcher(directory);


              Hits hits 
= searcher.Search(query);
              Assert.AreEqual(
0, hits.Length());
         }


     }

}



RangeQuery的第三个参数用于表示是否包含该起止日期.

RangeQuery和QueryParse

              [Test]
         
public void TestQueryParser()
         
{
              Query query 
= QueryParser.Parse("pubmonth:[200004 TO 200206]""subject"new SimpleAnalyzer());
              Assert.IsTrue(query 
is RangeQuery);
              IndexSearcher searcher 
= new IndexSearcher(directory);
              Hits hits 
= searcher.Search(query);

              query 
= QueryParser.Parse("{200004 TO 200206}""pubmonth"new SimpleAnalyzer());
              hits 
= searcher.Search(query);
              Assert.AreEqual(
0, hits.Length(), "JDwA in 200206");
         }
 

Lucene用[] 和
{}分别表示包含和不包含.

3.   PrefixQuery

用于搜索是否包含某个特定前缀,常用于Catalog的检索.

           [Test]
         
public  void  TestPrefixQuery()
         
{
              PrefixQuery query 
= new PrefixQuery(new Term("category""/Computers"));

              IndexSearcher searcher 
= new IndexSearcher(directory);
              Hits hits 
= searcher.Search(query);
              Assert.AreEqual(
2, hits.Length());
              
              query 
= new PrefixQuery(new Term("category""/Computers/JUnit"));
              hits 
= searcher.Search(query);
              Assert.AreEqual(
1, hits.Length(), "JUnit in Action");
         }



PrefixQuery和QueryParse

              [Test]
         
public void TestQueryParser()
         
{

              QueryParser qp 
= new QueryParser("category"new SimpleAnalyzer());
              qp.SetLowercaseWildcardTerms(
false);
              Query query 
=qp.Parse("/Computers*");
              Console.Out.WriteLine(
"query = {0}", query.ToString());
              IndexSearcher searcher 
= new IndexSearcher(directory);
              Hits hits 
= searcher.Search(query);
              Assert.AreEqual(
2, hits.Length());
              query 
=qp.Parse("/Computers/JUnit*");
              hits 
= searcher.Search(query);
              Assert.AreEqual(
1, hits.Length(), "JUnit in Action");
         }



这里需要注意的是我们使用了QueryParser对象,而不是QueryParser类. 原因在于使用对象可以对QueryParser的一些默认属性进行修改.比如在上面的例子中我们的category是大写的,而QueryParser默认会把所有的含
*的查询字符串变成小写/computer*. 这样我们就会查不到原文中的/Computers* ,所以我们需要通过设置QueryParser的默认属性来改变这一默认选项.即qp.SetLowercaseWildcardTerms(false)所做的工作. 

4.    BooleanQuery


用于测试满足多个条件.

下面两个例子用于分别测试了满足与条件和或条件的情况.

         [Test]
         
public void And()
         
{
              TermQuery searchingBooks 
=
                   
new TermQuery(new Term("subject""junit"));

              RangeQuery currentBooks 
=
                   
new RangeQuery(new Term("pubmonth""200301"),
                                  
new Term("pubmonth""200312"),
                                  
true);
              BooleanQuery currentSearchingBooks 
= new BooleanQuery();
              currentSearchingBooks.Add(searchingBooks, 
truefalse);
              currentSearchingBooks.Add(currentBooks, 
truefalse);
              IndexSearcher searcher 
= new IndexSearcher(directory);
              Hits hits 
= searcher.Search(currentSearchingBooks);

              AssertHitsIncludeTitle(hits, 
"JUnit in Action");
         }

         [Test]
         
public void Or()
         
{
              TermQuery methodologyBooks 
= new TermQuery(
                   
new Term("category",
                            
"/Computers/JUnit"));
              TermQuery easternPhilosophyBooks 
= new TermQuery(
                   
new Term("category",
                            
"/Computers/Ant"));
              BooleanQuery enlightenmentBooks 
= new BooleanQuery();
              enlightenmentBooks.Add(methodologyBooks, 
falsefalse);
              enlightenmentBooks.Add(easternPhilosophyBooks, 
falsefalse);
              IndexSearcher searcher 
= new IndexSearcher(directory);
              Hits hits 
= searcher.Search(enlightenmentBooks);
              Console.Out.WriteLine(
"or = " + enlightenmentBooks);
              AssertHitsIncludeTitle(hits, 
"Java Development with Ant");
              AssertHitsIncludeTitle(hits, 
"JUnit in Action");

         }


什么时候是与什么时候又是或
? 关键在于BooleanQuery对象的Add方法的参数.

参数一是待添加的查询条件.

参数二Required表示这个条件必须满足吗
? True表示必须满足, False表示可以不满足该条件.

参数三Prohibited表示这个条件必须拒绝吗
? True表示这么满足这个条件的结果要排除, False表示可以满足该条件.

这样会有三种组合情况,如下表所示:



BooleanQuery和QueryParse

         [Test]
         
public void TestQueryParser()
         
{
              Query query 
= QueryParser.Parse("pubmonth:[200301 TO 200312] AND junit""subject"new SimpleAnalyzer());
              IndexSearcher searcher 
= new IndexSearcher(directory);
              Hits hits 
= searcher.Search(query);
              Assert.AreEqual(
1, hits.Length());
              query 
= QueryParser.Parse("/Computers/JUnit OR /Computers/Ant""category"new WhitespaceAnalyzer());
              hits 
= searcher.Search(query);
              Assert.AreEqual(
2, hits.Length());
         }


注意AND和OR的大小 如果想要A与非B 就用 A AND –B 表示, 
+A –B也可以.

默认的情况下QueryParser会把空格认为是或关系,就象google一样.但是你可以通过QueryParser对象修改这一属性.

[Test]
         
public void TestQueryParserDefaultAND()
         
{
              QueryParser qp 
= new QueryParser("subject"new SimpleAnalyzer());
              qp.SetOperator(QueryParser.DEFAULT_OPERATOR_AND );
              Query query 
= qp.Parse("pubmonth:[200301 TO 200312] junit");
              IndexSearcher searcher 
= new IndexSearcher(directory);
              Hits hits 
= searcher.Search(query);
              Assert.AreEqual(
1, hits.Length());

         }

5.         PhraseQuery
查询短语,这里面主要有一个slop的概念, 也就是各个词之间的位移偏差, 这个值会影响到结果的评分.如果slop为0,当然最匹配.看看下面的例子就比较容易明白了,有关slop的计算用户就不需要理解了,不过slop太大的时候对查询效率是有影响的,所以在实际使用中要把该值设小一点. PhraseQuery对于短语的顺序是不管的,这点在查询时除了提高命中率外,也会对性能产生很大的影响, 利用SpanNearQuery可以对短语的顺序进行控制,提高性能.
        [SetUp]
     
protected void Init()
     
{
         
// set up sample document
         RAMDirectory directory = new RAMDirectory();
         IndexWriter writer 
= new IndexWriter(directory,
                                              
new WhitespaceAnalyzer(), true);
         Document doc 
= new Document();
         doc.Add(Field.Text(
"field",
                            
"the quick brown fox jumped over the lazy dog"));
         writer.AddDocument(doc);
         writer.Close();

         searcher 
= new IndexSearcher(directory);
     }

      
private bool matched(String[] phrase, int slop)
     
{
         PhraseQuery query 
= new PhraseQuery();
         query.SetSlop(slop);


         
for (int i = 0; i < phrase.Length; i++)
         
{
              query.Add(
new Term("field", phrase[i]));
         }



         Hits hits 
= searcher.Search(query);
         
return hits.Length() > 0;
     }


     [Test]
     
public void SlopComparison()
     
{
         String[] phrase 
= new String[]{"quick""fox"};


         Assert.IsFalse(matched(phrase, 
0), "exact phrase not found");

         Assert.IsTrue(matched(phrase, 
1), "close enough");
     }



     [Test]
     
public void Reverse()
     
{
         String[] phrase 
= new String[] {"fox""quick"};

         Assert.IsFalse(matched(phrase, 
2), "exact phrase not found");

         Assert.IsTrue(matched(phrase, 
3), "close enough");
     }


     [Test]
     
public void Multiple()-
     
{
         Assert.IsFalse(matched(
new String[] {"quick""jumped""lazy"}3), "not close enough");
         Assert.IsTrue(matched(
new String[] {"quick""jumped""lazy"}4), "just enough");
         Assert.IsFalse(matched(
new String[] {"lazy""jumped""quick"}7), "almost but not quite");
         Assert.IsTrue(matched(
new String[] {"lazy""jumped""quick"}8), "bingo");
     }


PhraseQuery和QueryParse

利用QueryParse进行短语查询的时候要先设定slop的值,有两种方式如下所示

[Test]
     
public void TestQueryParser()
     
{
         Query q1 
= QueryParser.Parse(""quick fox"",
              
"field"new SimpleAnalyzer());
         Hits hits1 
= searcher.Search(q1);
         Assert.AreEqual(hits1.Length(), 
0);

         Query q2 
= QueryParser.Parse(""quick fox"~1",          //第一种方式
                                     "field"new SimpleAnalyzer());
         Hits hits2 
= searcher.Search(q2);
         Assert.AreEqual(hits2.Length(), 
1);

         QueryParser qp 
= new QueryParser("field"new SimpleAnalyzer());
         qp.SetPhraseSlop(
1);                                    //第二种方式
         Query q3=qp.Parse(""quick fox"");
         Assert.AreEqual(
""quick fox"~1", q3.ToString("field"),"sloppy, implicitly");
         Hits hits3 
= searcher.Search(q2);
         Assert.AreEqual(hits3.Length(), 
1);
     }


6.         WildcardQuery
通配符搜索,需要注意的是child, mildew的分值是一样的. 
         [Test]
         
public void Wildcard()
         
{
              IndexSingleFieldDocs(
new Field[]
                   
{
                       Field.Text(
"contents""wild"),
                       Field.Text(
"contents""child"),
                       Field.Text(
"contents""mild"),
                       Field.Text(
"contents""mildew")
                   }
);
              IndexSearcher searcher 
= new IndexSearcher(directory);
              Query query 
= new WildcardQuery(
                   
new Term("contents""?ild*"));
              Hits hits 
= searcher.Search(query);
              Assert.AreEqual(
3, hits.Length(), "child no match");
              Assert.AreEqual(hits.Score(
0), hits.Score(1), 0.0"score the same");
              Assert.AreEqual(hits.Score(
1), hits.Score(2), 0.0"score the same");
         }

WildcardQuery和QueryParse
需要注意的是出于性能的考虑使用QueryParse的时候,不允许在开头就使用就使用通配符.
同样处于性能考虑会将只在末尾含有
*的查询词转换为PrefixQuery.
         [Test, ExpectedException(
typeof (ParseException))]
         
public void TestQueryParserException()
         
{
              Query query 
= QueryParser.Parse("?ild*""contents"new WhitespaceAnalyzer());
         }


         [Test]
         
public void TestQueryParserTailAsterrisk()
         
{
              Query query 
= QueryParser.Parse("mild*""contents"new WhitespaceAnalyzer());
              Assert.IsTrue(query 
is PrefixQuery);
              Assert.IsFalse(query 
is WildcardQuery);

         }


         [Test]
         
public void TestQueryParser()
         
{
              Query query 
= QueryParser.Parse("mi?d*""contents"new WhitespaceAnalyzer());
              Hits hits 
= searcher.Search(query);
              Assert.AreEqual(
2, hits.Length());
         }

7.         FuzzyQuery
模糊查询, 需要注意的是两个匹配项的分值是不同的,这点和WildcardQuery是不同的

         [Test]
         
public void Fuzzy()
         
{
              Query query 
= new FuzzyQuery(new Term("contents""wuzza"));
              Hits hits 
= searcher.Search(query);
              Assert.AreEqual( 
2, hits.Length(),"both close enough");
              Assert.IsTrue(hits.Score(
0!= hits.Score(1),"wuzzy closer than fuzzy");
              Assert.AreEqual(
"wuzzy", hits.Doc(0).Get("contents"),"wuzza bear");
         }



FuzzyQuery和QueryParse

注意和PhraseQuery中表示slop的区别,前者
~后要跟数字.

         [Test]
         
public void TestQueryParser()
         
{
              Query query 
=QueryParser.Parse("wuzza~","contents",new SimpleAnalyzer());
              Hits hits 
= searcher.Search(query);
              Assert.AreEqual( 
2, hits.Length(),"both close enough");
         }



原文地址:https://www.cnblogs.com/kokoliu/p/615343.html