总述 IndexSearch全过程源码分析

  1. 1.总述  IndexSearch全过程源码分析--->生成weight树  
  2. IndexSearch ---> search(createWeight(query), filter, n, sort)  
  3.     |--createWeight(query)  |实际为生成Weight树  
  4.          |--return query.weight(this);  
  5.       
  6. 2.创建weight树总过程  
  7. query.weight(this)  
  8.     |--Query query = searcher.rewrite(this);  |重新解析Query,将Query生成为单个可供直接查询的Query  
  9.     |--Weight weight = query.createWeight(searcher);  
  10.     |--float sum = weight.sumOfSquaredWeights();  |计算sum分值  
  11.     |--float norm = getSimilarity(searcher).queryNorm(sum); |获取标准因子  
  12.     |--weight.normalize(norm);   |标准化  
  13.     |--return weight;  |返回weight权值树  
  14.   
  15. 3.重写Query对象,生成Query树  
  16. IndexSearcher.rewrite(Query original) |重写Query对象,主要实现拆分  
  17.     |--for (Query rewrittenQuery = query.rewrite(reader); rewrittenQuery != query; rewrittenQuery = query.rewrite(reader)) |重写直至不能再拆分  
  18.          |--query = rewrittenQuery;  
  19.                 |--eg1:BooleanQuery.rewrite(reader) 实现  
  20.                      |--for (int i = 0 ; i < clauses.size(); i++)  
  21.                                         |--Query query = c.getQuery().rewrite(reader);   |重写query对象,重复写的过程,最后都生成BooleanQuery对象  
  22.                                         |--clone.clauses.set(i, new BooleanClause(query, c.getOccur()));  |合成新的BooleanQuery对象  
  23.                 |--eg2:MultiTermQuery.rewrite(reader) 实现  
  24.                      |--rewriteMethod.rewrite(reader, this);  
  25.                                 |--ConstantScoreFilterRewrite.rewrite(reader)   |将所有Term当成一个Term处理  
  26.                                         |--Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query));  
  27.                                 |--result.setBoost(query.getBoost());  
  28.                         |--ScoringBooleanQueryRewrite.rewrite(reader)   |将Term分离出来,风险在于350Term的限制值  
  29.                               |--ConstantScoreBooleanQueryRewrite  
  30.                                     |--result.add(tq, BooleanClause.Occur.SHOULD);  |逐一分离Term,生成Boolean查询  
  31.                       
  32.                                 |--ConstantScoreAutoRewrite.rewrite(reader)     |结合上述二者,自动选择,以term < 350 为界,进行选择  
  33.                                         |--FilteredTermEnum enumerator = query.getEnum(reader);    |根据需要改变的query生成枚举器  
  34.                                         |--Term t = enumerator.term();              |此时含IO操作?  
  35.                                         |--Iterator it = pendingTerms.iterator();  
  36.                                  |--BooleanQuery bq = new BooleanQuery(true);  
  37.                                  |--while(it.hasNext())   |逐个循环,生成term  
  38.                                     |--TermQuery tq = new TermQuery((Term) it.next());  
  39.                                     |--bq.add(tq, BooleanClause.Occur.SHOULD);       
  40.                              |--Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));  
  41.                              |--result.setBoost(query.getBoost());          |设置分值  
  42.                              |--query.incTotalNumberOfTerms(pendingTerms.size());       |增加Term总数  
  43.   |--return query;  |返回最终生成的Query树  
  44.     
  45. 4.不同的Query查询,重写后生成新的Query                              
  46. |--eg2.1:PrefixQuery.getEnum(reader)  
  47.                 |--return new PrefixTermEnum(reader, prefix);  |返回FilterTermEnum的子类  
  48.                      |--setEnum(reader.terms(new Term(prefix.field(), prefix.text())));  
  49.                             |--if (term != null && termCompare(term))   |比较前缀  
  50.                              |--currentTerm = term;  
  51.                 |--else next()  
  52.                              |--if (actualEnum.next())         |取下一个term,判断是否为当前term的pre  
  53.                                         |-- Term term = actualEnum.term();  
  54.                                         |-- if (termCompare(term)) {  
  55.                                     |--currentTerm = term;  
  56. |--eg2.2:FuzzyQuery.getEnum(reader)  
  57.                 |--return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);  
  58.                       |--this.text = searchTerm.text().substring(realPrefixLength);    |获取前缀及text文本内容  
  59.                     |--this.prefix = searchTerm.text().substring(0, realPrefixLength);  
  60.                     |--initializeMaxDistances();     |计算初始最大距离  
  61.                     |--setEnum(reader.terms(new Term(searchTerm.field(), prefix))); |计算差距值  
  62.                       |--termCompare(Term term)  
  63.                                     |--final String target = term.text().substring(prefix.length());  
  64.                                     |--this.similarity = similarity(target);  
  65.                                     |--return (similarity > minimumSimilarity);  
  66.   
  67.   
  68. 5.weight.sumOfSquaredWeights() |--计算合值  
  69.         |--BooleanWeight  |计算后出现二种情况,Boolean及单个weight树,以BooleanWeight为准进行分析  
  70.              |--for (int i = 0 ; i < weights.size(); i++)   |逐一单个的Weight进行计算  
  71.                         |--float s = w.sumOfSquaredWeights()  
  72.                         |--sum += s;  
  73.              |--sum *= getBoost() * getBoost();   
  74.                
  75.         |--TermWeight  |以TermWeight为例  
  76.          |--queryWeight = idf * getBoost();     
  77.          |--return queryWeight * queryWeight;  |开平方  
  78.            
  79.   
  80. 6.float norm = getSimilarity(searcher).queryNorm(sum);  |计算标准因子,默认为DefaultSimilarity  
  81.         |-- return (float)(1.0 / Math.sqrt(sumOfSquaredWeights));  
  82.           
  83.           
  84. 7.weight.normalize(norm);  |标准化norm因子,以BooleanWeight为例  
  85.         |--norm *= getBoost();  
  86.         |-- for (Iterator iter = weights.iterator(); iter.hasNext();)   
  87.              |--w.normalize(norm);  |逐个标准化  
  88.           
  89.         |--TermWeight.normalize(norm)    |以TermWeight为例  
  90.                 |--queryWeight *= queryNorm;  
  91.           |--value = queryWeight * idf  
  92.           |--实际值value = (idf * getBoost())*(idf * getBoost())*queryNorm*idf; 
原文地址:https://www.cnblogs.com/l1pe1/p/2397442.html