jQuery 源码分析 7: sizzle

　　jQuery使用的是sizzle这个选择器引擎，这个引擎以其高速著称，其实现十分精妙但是也足够复杂，下面现简单分析一下相关的代码。

在jQuery的部分API接口是直接引用了Sizzle的方法，这些接口如下：

1 jQuery.find = Sizzle;
2 jQuery.expr = Sizzle.selectors;
3 jQuery.expr[":"] = jQuery.expr.pseudos;
4 jQuery.unique = Sizzle.uniqueSort;
5 jQuery.text = Sizzle.getText;
6 jQuery.isXMLDoc = Sizzle.isXML;
7 jQuery.contains = Sizzle.contains;

　　jQuery.find 引用的就是Sizzle，下面看看Sizzle的实现

  1  // @param selector 已去掉头尾空白的选择器字符串 
  2  // @param context 执行匹配的最初的上下文（即DOM元素集合）。若context没有赋值，则取document。 
  3  // @param results 已匹配出的部分最终结果。若results没有赋值，则赋予空数组。 
  4  // @param seed 初始集合 
  5 
  6 function Sizzle( selector, context, results, seed ) {
  7      var match, elem, m, nodeType,
  8           // QSA vars
  9           i, groups, old, nid, newContext, newSelector;
 10       if ( ( context ? context.ownerDocument || context : preferredDoc ) !== document ) {
 11 
 12           // 根据不同的浏览器环境,设置合适的Expr方法,构造合适的rbuggy测试
 13           setDocument( context );
 14      }
 15       context = context || document;
 16      results = results || [];
 17      nodeType = context.nodeType;
 18       if ( typeof selector !== "string" || !selector ||
 19           nodeType !== 1 && nodeType !== 9 && nodeType !== 11 ) {
 20            return results;
 21      }
 22       if ( !seed && documentIsHTML ) {
 23           // 尽可能快地找到目标节点, 选择器类型是id,标签和类
 24           // rquickExpr = /^(?:#([w-]+)|(w+)|.([w-]+))$/
 25           // 将selector按 #[id] / [tag] / .[class]的顺序捕获到数组中,数组的第一个元素是原始值
 26           // 捕获结果中'#'和'.'会被移除
 27           if ( nodeType !== 11 && (match = rquickExpr.exec( selector )) ) {
 28                // 加速: Sizzle("#ID")
 29                if ( (m = match[1]) ) {
 30                     if ( nodeType === 9 ) {
 31                          elem = context.getElementById( m );
 32                          // 检查Blackberry 4.6返回的已经不在document中的parentNode
 33                          if ( elem && elem.parentNode ) {
 34                               // IE, Opera, Webkit有时候会返回name == m的元素
 35                               if ( elem.id === m ) {
 36                                    results.push( elem );
 37                                    return results;
 38                               }
 39                          } else {
 40                               return results;
 41                          }
 42                     } else {
 43                          // 上下文不是document
 44                          if ( context.ownerDocument && 
 45 
 46                               (elem = context.ownerDocument.getElementById( m )) &&
 47                               contains( context, elem ) && elem.id === m ) {
 48 
 49                               results.push( elem );
 50                               return results;
 51                          }
 52                     }
 53                 // 加速: Sizzle("TAG")
 54                // 由于返回是一个数组,因此需要让这个数组作为参数数组并利用push.apply调用将其拼接到results后面
 55                } else if ( match[2] ) {
 56                     push.apply( results, context.getElementsByTagName( selector ) );
 57                     return results;
 58                // 加速: Sizzle(".CLASS")
 59                // push.apply的使用原因同上
 60                } else if ( (m = match[3]) && support.getElementsByClassName ) {
 61                     push.apply( results, context.getElementsByClassName( m ) );
 62                     return results;
 63                }
 64           }
 65 
 66           // 使用QSA, QSA: querySelectorAll, 原生的QSA运行速度非常快,因此尽可能使用QSA来对CSS选择器进行查询
 67           // querySelectorAll是原生的选择器,但不支持老的浏览器版本, 主要是IE8及以前的浏览器
 68           // rbuggyQSA 保存了用于解决一些浏览器兼容问题的bug修补的正则表达式
 69           // QSA在不同浏览器上运行的效果有差异，表现得非常奇怪，因此对某些selector不能用QSA
 70           // 为了适应不同的浏览器，就需要首先进行浏览器兼容性测试，然后确定测试正则表达式,用rbuggyQSA来确定selector是否能用QSA
 71 
 72           if ( support.qsa && (!rbuggyQSA || !rbuggyQSA.test( selector )) ) {
 73                nid = old = expando;
 74                newContext = context;
 75                newSelector = nodeType !== 1 && selector;
 76 
 77                // QSA 在以某个根节点ID为基础的查找中(.rootClass span)表现很奇怪，
 78                // 它会忽略某些selector选项，返回不合适的结果
 79                // 一个比较通常的解决方法是为根节点设置一个额外的id，并以此开始查询
 80                // IE 8 doesn't work on object elements 
 81                if ( nodeType === 1 && context.nodeName.toLowerCase() !== "object" ) {
 82                     groups = tokenize( selector );                  // 分析选择器的词法并返回一个词法标记数组
 83                     if ( (old = context.getAttribute("id")) ) {     // 保存并设置新id
 84                          nid = old.replace( rescape, "\$&" );
 85                     } else {
 86                          context.setAttribute( "id", nid );
 87                     }
 88                     nid = "[id='" + nid + "'] ";
 89                      i = groups.length;
 90                     while ( i-- ) {
 91                          groups[i] = nid + toSelector( groups[i] );     // 把新的id添加到选择器标记里
 92                     }
 93                     newContext = rsibling.test( selector ) && testContext( context.parentNode ) || context;
 94                     newSelector = groups.join(",");                     // 构造新的选择器
 95                }
 96                 if ( newSelector ) {                                    // 使用新的选择器通过QSA来查询元素
 97                     try {
 98                          push.apply( results,                          // 将查询结果合并到results上
 99                               newContext.querySelectorAll( newSelector )
100                          );
101                          return results;
102                     } catch(qsaError) {
103                     } finally {
104                          if ( !old ) {
105                               context.removeAttribute("id");          // 如果没有旧id,则移除
106                          }
107                     }
108                }
109           }
110      }
111       // 其他selector,这些selector无法直接使用原生的document查询方法
112      return select( selector.replace( rtrim, "$1" ), context, results, seed );
113 }

rbuggy：测试QSA的Bug

使用assert(function(div){})函数进程浏览器bug测试

 1 /**
 2 * Support testing using an element
 3 * @param {Function} fn Passed the created div and expects a boolean result
 4 */
 5 function assert( fn ) {
 6      var div = document.createElement("div");          // 创建测试用节点
 7      try {
 8           return !!fn( div );                          // 转换fn的返回值为boolean值
 9      } catch (e) {
10           return false;
11      } finally {
12           if ( div.parentNode ) {                      // 结束时移除这个节点
13                div.parentNode.removeChild( div );
14           }
15           div = null;                                  // IE浏览器中必须这样,释放内存
16      }
17 }

assert函数建立一个div节点，将这个div节点传递给回调函数；
div节点在assert函数结束时会被删除，此时注意要删除由回调函数创建的子节点，并将div赋值null以让GC回收。
回调函数利用新建的div节点作为根节点，在这个根节点上创建一些测试用的节点进行测试；

一个bug测试例子：

 1 assert(function( div ) {
 2 
 3      // 创建一些子节点
 4      docElem.appendChild( div ).innerHTML = "<a id='" + expando + "'></a>" +
 5           "<select id='" + expando + "-f]' msallowcapture=''>" +
 6           "<option selected=''></option></select>";
 7      ... // 其他测试
 8      // 测试document.querySelectorAll()的正确性
 9      if ( div.querySelectorAll("[msallowcapture^='']").length ) {
10           rbuggyQSA.push( "[*^$]=" + whitespace + "*(?:''|"")" );
11 
12           // 确定用于测试selector能否使用QSA的正则表达式
13 
14      }
15      ... // 其他测试 
16 });

select方法:

当无法直接使用document的原生选择器时,就会调用Sizzle.select.

注释里写到"A low-level selection function that works with Sizzle's compiled",这是一个低级选择器,与Sizzle.compiled协作执行.

 1 // @param selector 已去掉头尾空白的选择器字符串 
 2 // @param context 执行匹配的最初的上下文（即DOM元素集合）。若context没有赋值，则取document。 
 3 // @param results 已匹配出的部分最终结果。若results没有赋值，则赋予空数组。 
 4 // @param seed 初始集合 
 5 
 6 select = Sizzle.select = function( selector, context, results, seed ) {
 7      var i, tokens, token, type, find,
 8           compiled = typeof selector === "function" && selector,
 9           match = !seed && tokenize( (selector = compiled.selector || selector) );
10       results = results || [];
11       // 当没有seed或group时，尽可能地减少操作
12      if ( match.length === 1 ) {
13 
14           // 如果根选择器是id，利用快捷方式并设置context
15           tokens = match[0] = match[0].slice( 0 );
16           if ( tokens.length > 2 && (token = tokens[0]).type === "ID" &&
17                     support.getById && context.nodeType === 9 && documentIsHTML &&
18                     Expr.relative[ tokens[1].type ] ) {
19 
20              // 使用Expr.find["ID"]查找元素,其中调用了context.getElementById方法
21              // 为了兼容不同的浏览器,setDocument方法会测试不同的浏览器环境并构造一个使用与当前运行环境的Expr.find["ID"]元素
22              // 将id选择器的返回结果作为新的上下文
23 
24                context = ( Expr.find["ID"]( token.matches[0].replace(runescape, funescape), context ) || [] )[0];
25                if ( !context ) {
26                     return results;     // 如果找不到id根元素直接返回results
27                 // Precompiled matchers will still verify ancestry, so step up a level
28                } else if ( compiled ) {
29                     context = context.parentNode;
30                }
31                // 移除第一个id选择器
32                selector = selector.slice( tokens.shift().value.length );
33           }
34           // Fetch a seed set for right-to-left matching
35           // matchExpr["needsContext"]测试选择器是否含有位置伪类,如:first,:even,或包含"> + ~"等关系
36 
37           // 如果包含将i赋值0,否则赋值tokens.length
38           i = matchExpr["needsContext"].test( selector ) ? 0 : tokens.length;
39 
40           // 遍历tokens, 逐个查询
41           while ( i-- ) {
42                token = tokens[i];
43                 // 遇到关系符"~ + > ."的时候跳出
44 
45                if ( Expr.relative[ (type = token.type) ] ) {
46                     break;
47                }
48 
49                // 根据type获取查询方法
50                if ( (find = Expr.find[ type ]) ) {
51                     // Search, expanding context for leading sibling combinators
52                     // rsibling = /[+~]/， 用于判断同胞关系符
53                     if ( (seed = find(
54                          token.matches[0].replace( runescape, funescape ),
55                          rsibling.test( tokens[0].type ) && testContext( context.parentNode ) || context
56                     )) ) {
57 
58                          // 如果seed是空的或者没有任何token了,就可以提前返回
59                          // 否则,就根据新的seed和token,迭代地继续搜索下去
60                          tokens.splice( i, 1 );
61                          selector = seed.length && toSelector( tokens );
62                          if ( !selector ) {
63                               push.apply( results, seed );
64                               return results;
65                          }
66                          break;
67                     }
68                }
69           }
70      }
71       // Compile and execute a filtering function if one is not provided
72      // Provide `match` to avoid retokenization if we modified the selector above
73      // 执行compile返回一个匹配器函数, 再利用这个返回的函数进行匹配;
74      ( compiled || compile( selector, match ) )(
75           seed,
76           context,
77           !documentIsHTML,
78           results,
79           rsibling.test( selector ) && testContext( context.parentNode ) || context
80      );
81      return results;
82 };

Compile方法:

 1 compile = Sizzle.compile = function( selector, match /* Internal Use Only */ ) {
 2      var i,
 3           setMatchers = [],
 4           elementMatchers = [],
 5           cached = compilerCache[ selector + " " ];     // 根据selector获取cache中的匹配器
 6      // 如果尚未创建这个匹配器，则需要创建一个
 7      if ( !cached ) {
 8           // 产生一个函数,这个函数包含一系列递归函数用来检索每一个元素
 9           if ( !match ) {
10                match = tokenize( selector );          // 解析选择器词法
11           }
12           i = match.length;
13           while ( i-- ) {
14                cached = matcherFromTokens( match[i] );     // 根据token创建匹配器
15                if ( cached[ expando ] ) {
16                     setMatchers.push( cached );
17                } else {
18                     elementMatchers.push( cached );
19                }
20           }
21           // Cache the compiled function
22           // matcherFromGroupMatchers 返回一个superMatcher
23           // compelerCache = createCache(), 根据selector建立匹配器方法cache
24           cached = compilerCache( selector, matcherFromGroupMatchers( elementMatchers, setMatchers ) );
25            //在cached中保存选择器
26           cached.selector = selector;
27      }
28 
29      // 返回这个对应的匹配器
30      return cached;
31 };

总结

调用jQuery.find的时候实际上就调用了Sizzle。Sizzle的实现的一个最基本思路是以最快的速度完成选择器匹配，那如何才能够完成呢？对于简单不含其他关系符的选择器如（#id，tag，.class）就尽可能得直接调用document.getElementById/ .getElementByTagName/ .getElementByClassName 等原生方法。这些DOM的基本方法的运行速度是最快的。而对于词法更为复杂的选择器（包含关系符，伪类选择器），首选是调用document.querySelectorAll。QSA的速度非常快，但在不同的浏览器上其运行效果不一样，很多时候会有莫名奇妙的返回结果，因此使用前需要对浏览器进行测试，确保在确定能够使用QSA的情况下才进行这样的调用，否则就需要调用Sizzle.select进行更低层次的元素匹配；
select的实现相对而言比较复杂，它首先需要对选择器进行词法分析，然后根据所得到的词法标记利用sizzle.compile构造出一系列匹配器，并将这些匹配器组合成一个更大的匹配方法，最后才执行这个匹配方法；关于select的更详细具体的分析，留在日后再看；
有select可以看出来，鉴于浏览器的兼容性问题，尤其是针对IE < 8的兼容性，jQuery在这方面作出了许多努力，使得代码的编写变得臃肿，执行效率也有所下降；
jQuery中使用的assert方法十分精妙，新建DOM节点元素作为测试根节点，然后进行节点匹配测试。要注意的是，测试完成后删除子节点，并对根节点赋值null，否则容易导致内存泄漏（IE）；
jQuery的选择器是使用频率最高的方法之一，因此一切效率之上。从jQuery的源码可学习到，如何在保证运行效率的前提下保证一个方法对浏览器的兼容性。