前端编译原理 parser.js源码解读

前面已经介绍了一个jison的使用，在正常开发中其实已经够用下，下面主要是看了下parser.js代码解读下，作为一些了解。
下面以最简单的文法产生的parser做一些代码注释
下面是一些注释，标示了编译过程，能够了解jison产生的编译器有哪些处理
var a = (function() {
    var o = function(k, v, o, l) {
        for (o = o || {}, l = k.length; l--; o[k[l]] = v);
        return o;
    };
    var parser = {
        trace: function trace() {},
        //编译期共享对象
        yy: {},
        //标示符，$accept: 0, $end: 1，error: 2是默认的符号，E: 3, NAT: 4是分析文法获得的
        symbols_: { error: 2, E: 3, NAT: 4, $accept: 0, $end: 1 },
        //终结符
        terminals_: { 2: 'error', 4: 'NAT' },
        //对应文法规则
        productions_: [
            0,  // e` => E $end
            [
                3, //规约成符号2（E） 
                1 //堆栈移除一个状态即移除NAT，压入E
            ] // E => NAT
        ],
        //规约的时候执行的代码
        performAction: function anonymous(
            yytext,
            yyleng,
            yylineno,
            yy,
            yystate /* action[1] */,
            $$ /* vstack */,
            _$ /* lstack */
        ) {
            var $0 = $$.length - 1;
            switch (yystate) {
                case 1:
                    this.$ = $$[$0] * 1;
                    break;
            }
        },
        //移入规约自动机 lalr(1)分析表，每一值表示一个状态
        table: [
            /*状态0 I0*/{ 3: 1/**goto 遇到符号3(E),跳转到状态1 */, 4: [1, 2] /**移入，遇到符号4（NAT），移入进入状态2 */}, 
            /*状态1 I1*/{ 1: [3] /**接受 遇到符号1($end),结束程序，完成编译 */}, 
            /*状态2 I2*/{ 1: [2, 1] /**规约 遇到符号1($end),按照productions[1],进行规则 */ }
        ],
        defaultActions: { 
            2: [2, 1] //状态2进行规则规约，不用向前看符号是什么(逻辑优化)
        },
        //错误提示
        parseError: function parseError(str, hash) {
            if (hash.recoverable) {
                this.trace(str);
            } else {
                var error = new Error(str);
                error.hash = hash;
                throw error;
            }
        },
        //编译 input为处理的字符串
        parse: function parse(input) {
            var self = this,
                stack = [0],
                tstack = [],
                vstack = [null],
                lstack = [],
                table = this.table,
                yytext = '',
                yylineno = 0,
                yyleng = 0,
                recovering = 0,
                TERROR = 2,
                EOF = 1;
            var args = lstack.slice.call(arguments, 1);
            var lexer = Object.create(this.lexer);
            var sharedState = { yy: {} };
            for (var k in this.yy) {
                if (Object.prototype.hasOwnProperty.call(this.yy, k)) {
                    sharedState.yy[k] = this.yy[k];
                }
            }
            lexer.setInput(input, sharedState.yy);
            sharedState.yy.lexer = lexer;
            sharedState.yy.parser = this;
            if (typeof lexer.yylloc === 'undefined') {
                lexer.yylloc = {};
            }
            var yyloc = lexer.yylloc;
            lstack.push(yyloc);
            //位置信息采用区间的形式显示
            var ranges = lexer.options && lexer.options.ranges;
            if (typeof sharedState.yy.parseError === 'function') {
                this.parseError = sharedState.yy.parseError;
            } else {
                this.parseError = Object.getPrototypeOf(this).parseError;
            }
            //规约的时候需要弹出堆栈
            function popStack(n) {
                //stack [0,标识符,状态,标识符,状态...],所以出栈是2倍
                stack.length = stack.length - 2 * n;
                vstack.length = vstack.length - n;
                lstack.length = lstack.length - n;
            }
            //词法分析,返回token即标示符
            var lex = function() {
                var token;
                token = lexer.lex() || EOF;
                if (typeof token !== 'number') {
                    token = self.symbols_[token] || token;
                }
                return token;
            };
            var symbol,
                preErrorSymbol,
                state,
                action,
                a,
                r,
                yyval = {},
                p,
                len,
                newState,
                expected;
            while (true) {
                state = stack[stack.length - 1];
                if (this.defaultActions[state]) {
                    action = this.defaultActions[state];
                } else {
                    if (symbol === null || typeof symbol === 'undefined') {
                        //通过正则获取下一个标示符
                        symbol = lex();
                    }
                    //action对应的动作
                    action = table[state] && table[state][symbol];
                }
                //动作错误，语法分析错误
                if (typeof action === 'undefined' || !action.length || !action[0]) {
                    var errStr = '';
                    expected = [];
                    for (p in table[state]) {
                        if (this.terminals_[p] && p > TERROR) {
                            expected.push("'" + this.terminals_[p] + "'");
                        }
                    }
                    if (lexer.showPosition) {
                        errStr =
                            'Parse error on line ' +
                            (yylineno + 1) +
                            ':
' +
                            lexer.showPosition() +
                            '
Expecting ' +
                            expected.join(', ') +
                            ", got '" +
                            (this.terminals_[symbol] || symbol) +
                            "'";
                    } else {
                        errStr =
                            'Parse error on line ' +
                            (yylineno + 1) +
                            ': Unexpected ' +
                            (symbol == EOF ? 'end of input' : "'" + (this.terminals_[symbol] || symbol) + "'");
                    }
                    this.parseError(errStr, {
                        text: lexer.match,
                        token: this.terminals_[symbol] || symbol,
                        line: lexer.yylineno,
                        loc: yyloc,
                        expected: expected
                    });
                }
                //规约-规约冲突，移入规约冲突
                if (action[0] instanceof Array && action.length > 1) {
                    throw new Error('Parse Error: multiple actions possible at state: ' + state + ', token: ' + symbol);
                }
                switch (action[0]) {
                    //移入
                    case 1:
                        stack.push(symbol);
                        vstack.push(lexer.yytext);
                        lstack.push(lexer.yylloc);
                        stack.push(action[1]);
                        symbol = null;
                        if (!preErrorSymbol) {
                            yyleng = lexer.yyleng;
                            yytext = lexer.yytext;
                            yylineno = lexer.yylineno;
                            yyloc = lexer.yylloc;
                            if (recovering > 0) {
                                recovering--;
                            }
                        } else {
                            symbol = preErrorSymbol;
                            preErrorSymbol = null;
                        }
                        break;
                    //规约
                    case 2:
                        len = this.productions_[action[1]][1];
                        yyval.$ = vstack[vstack.length - len];
                        yyval._$ = {
                            first_line: lstack[lstack.length - (len || 1)].first_line,
                            last_line: lstack[lstack.length - 1].last_line,
                            first_column: lstack[lstack.length - (len || 1)].first_column,
                            last_column: lstack[lstack.length - 1].last_column
                        };
                        if (ranges) {
                            yyval._$.range = [
                                lstack[lstack.length - (len || 1)].range[0],
                                lstack[lstack.length - 1].range[1]
                            ];
                        }
                        //执行文法定义的代码
                        r = this.performAction.apply(
                            yyval,
                            [yytext, yyleng, yylineno, sharedState.yy, action[1], vstack, lstack].concat(args)
                        );
                        if (typeof r !== 'undefined') {
                            return r;
                        }
                        if (len) {
                            stack = stack.slice(0, -1 * len * 2);
                            vstack = vstack.slice(0, -1 * len);
                            lstack = lstack.slice(0, -1 * len);
                        }
                        stack.push(this.productions_[action[1]][0]);
                        vstack.push(yyval.$);
                        lstack.push(yyval._$);
                        newState = table[stack[stack.length - 2]][stack[stack.length - 1]];
                        stack.push(newState);
                        break;
                    //接受
                    case 3:
                        return true;
                }
            }
            return true;
        }
    };
    /* 词法分析 */
    var lexer = (function() {
        var lexer = {
            EOF: 1,

            parseError: function parseError(str, hash) {
                if (this.yy.parser) {
                    this.yy.parser.parseError(str, hash);
                } else {
                    throw new Error(str);
                }
            },

            // resets the lexer, sets new input
            setInput: function(input, yy) {
                this.yy = yy || this.yy || {};
                this._input = input;
                this._more = this._backtrack = this.done = false;
                this.yylineno = this.yyleng = 0;
                this.yytext = this.matched = this.match = '';
                this.conditionStack = ['INITIAL'];
                this.yylloc = {
                    first_line: 1,
                    first_column: 0,
                    last_line: 1,
                    last_column: 0
                };
                if (this.options.ranges) {
                    this.yylloc.range = [0, 0];
                }
                this.offset = 0;
                return this;
            },
            input: function() {
                var ch = this._input[0];
                this.yytext += ch;
                this.yyleng++;
                this.offset++;
                this.match += ch;
                this.matched += ch;
                var lines = ch.match(/(?:
?|
).*/g);
                if (lines) {
                    this.yylineno++;
                    this.yylloc.last_line++;
                } else {
                    this.yylloc.last_column++;
                }
                if (this.options.ranges) {
                    this.yylloc.range[1]++;
                }

                this._input = this._input.slice(1);
                return ch;
            },
            unput: function(ch) {
                var len = ch.length;
                var lines = ch.split(/(?:
?|
)/g);

                this._input = ch + this._input;
                this.yytext = this.yytext.substr(0, this.yytext.length - len);
                //this.yyleng -= len;
                this.offset -= len;
                var oldLines = this.match.split(/(?:
?|
)/g);
                this.match = this.match.substr(0, this.match.length - 1);
                this.matched = this.matched.substr(0, this.matched.length - 1);

                if (lines.length - 1) {
                    this.yylineno -= lines.length - 1;
                }
                var r = this.yylloc.range;

                this.yylloc = {
                    first_line: this.yylloc.first_line,
                    last_line: this.yylineno + 1,
                    first_column: this.yylloc.first_column,
                    last_column: lines
                        ? (lines.length === oldLines.length ? this.yylloc.first_column : 0) +
                          oldLines[oldLines.length - lines.length].length -
                          lines[0].length
                        : this.yylloc.first_column - len
                };

                if (this.options.ranges) {
                    this.yylloc.range = [r[0], r[0] + this.yyleng - len];
                }
                this.yyleng = this.yytext.length;
                return this;
            },
            more: function() {
                this._more = true;
                return this;
            },
            reject: function() {
                if (this.options.backtrack_lexer) {
                    this._backtrack = true;
                } else {
                    return this.parseError(
                        'Lexical error on line ' +
                            (this.yylineno + 1) +
                            '. You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).
' +
                            this.showPosition(),
                        {
                            text: '',
                            token: null,
                            line: this.yylineno
                        }
                    );
                }
                return this;
            },
            less: function(n) {
                this.unput(this.match.slice(n));
            },
            pastInput: function() {
                var past = this.matched.substr(0, this.matched.length - this.match.length);
                return (past.length > 20 ? '...' : '') + past.substr(-20).replace(/
/g, '');
            },
            upcomingInput: function() {
                var next = this.match;
                if (next.length < 20) {
                    next += this._input.substr(0, 20 - next.length);
                }
                return (next.substr(0, 20) + (next.length > 20 ? '...' : '')).replace(/
/g, '');
            },
            showPosition: function() {
                var pre = this.pastInput();
                var c = new Array(pre.length + 1).join('-');
                return pre + this.upcomingInput() + '
' + c + '^';
            },
            test_match: function(match, indexed_rule) {
                var token, lines, backup;

                if (this.options.backtrack_lexer) {
                    // save context
                    backup = {
                        yylineno: this.yylineno,
                        yylloc: {
                            first_line: this.yylloc.first_line,
                            last_line: this.last_line,
                            first_column: this.yylloc.first_column,
                            last_column: this.yylloc.last_column
                        },
                        yytext: this.yytext,
                        match: this.match,
                        matches: this.matches,
                        matched: this.matched,
                        yyleng: this.yyleng,
                        offset: this.offset,
                        _more: this._more,
                        _input: this._input,
                        yy: this.yy,
                        conditionStack: this.conditionStack.slice(0),
                        done: this.done
                    };
                    if (this.options.ranges) {
                        backup.yylloc.range = this.yylloc.range.slice(0);
                    }
                }

                lines = match[0].match(/(?:
?|
).*/g);
                if (lines) {
                    this.yylineno += lines.length;
                }
                this.yylloc = {
                    first_line: this.yylloc.last_line,
                    last_line: this.yylineno + 1,
                    first_column: this.yylloc.last_column,
                    last_column: lines
                        ? lines[lines.length - 1].length - lines[lines.length - 1].match(/
?
?/)[0].length
                        : this.yylloc.last_column + match[0].length
                };
                this.yytext += match[0];
                this.match += match[0];
                this.matches = match;
                this.yyleng = this.yytext.length;
                if (this.options.ranges) {
                    this.yylloc.range = [this.offset, (this.offset += this.yyleng)];
                }
                this._more = false;
                this._backtrack = false;
                this._input = this._input.slice(match[0].length);
                this.matched += match[0];
                token = this.performAction.call(
                    this,
                    this.yy,
                    this,
                    indexed_rule,
                    this.conditionStack[this.conditionStack.length - 1]
                );
                if (this.done && this._input) {
                    this.done = false;
                }
                if (token) {
                    return token;
                } else if (this._backtrack) {
                    for (var k in backup) {
                        this[k] = backup[k];
                    }
                    return false;
                }
                return false;
            },
            next: function() {
                if (this.done) {
                    return this.EOF;
                }
                if (!this._input) {
                    this.done = true;
                }

                var token, match, tempMatch, index;
                if (!this._more) {
                    this.yytext = '';
                    this.match = '';
                }
                //获取词法规则
                //没看出来conditionStack有什么用，可能跟我的文法定义比较简单，没有使用一些内置的begin，pushState, popState之类的有点关系
                var rules = this._currentRules();
                for (var i = 0; i < rules.length; i++) {
                    tempMatch = this._input.match(this.rules[rules[i]]);
                    if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
                        match = tempMatch;
                        index = i;
                        //backtrack_lexer，会先尝试返回token,如果想尝试下面的规则的话，通过reject()方法
                        if (this.options.backtrack_lexer) {
                            token = this.test_match(tempMatch, rules[i]);
                            if (token !== false) {
                                return token;
                            } else if (this._backtrack) {
                                match = false;
                                continue;
                            } else {
                                return false;
                            }
                        //flex 设置true 会匹配符合正则多的文本
                        //flex 设置false 根据文法顺序返回结果
                        } else if (!this.options.flex) {
                            break;
                        }
                    }
                }
                if (match) {
                    token = this.test_match(match, rules[index]);
                    if (token !== false) {
                        return token;
                    }
                    return false;
                }
                if (this._input === '') {
                    return this.EOF;
                } else {
                    //输入字符串不存在词法对应
                    return this.parseError(
                        'Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.
' + this.showPosition(),
                        {
                            text: '',
                            token: null,
                            line: this.yylineno
                        }
                    );
                }
            },
            lex: function lex() {
                var r = this.next();
                if (r) {
                    return r;
                } else {
                    return this.lex();
                }
            },
            begin: function begin(condition) {
                this.conditionStack.push(condition);
            },
            popState: function popState() {
                var n = this.conditionStack.length - 1;
                if (n > 0) {
                    return this.conditionStack.pop();
                } else {
                    return this.conditionStack[0];
                }
            },
            _currentRules: function _currentRules() {
                if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) {
                    return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules;
                } else {
                    return this.conditions['INITIAL'].rules;
                }
            },
            topState: function topState(n) {
                n = this.conditionStack.length - 1 - Math.abs(n || 0);
                if (n >= 0) {
                    return this.conditionStack[n];
                } else {
                    return 'INITIAL';
                }
            },
            pushState: function pushState(condition) {
                this.begin(condition);
            },
            stateStackSize: function stateStackSize() {
                return this.conditionStack.length;
            },
            options: {},
            performAction: function anonymous(yy, yy_, $avoiding_name_collisions, YY_START) {
                var YYSTATE = YY_START;
                switch ($avoiding_name_collisions) {
                    case 0 /* skip whitespace */:
                        break;
                    case 1:
                        return 4;
                        break;
                    case 2:
                        return '+';
                        break;
                }
            },
            //词法中的规则
            rules: [
                /^(?:s+)/, 
                /^(?:[0-9]+)/, 
                /^(?:+)/
            ],
            conditions: { 
                INITIAL: { rules: [0, 1, 2], inclusive: true } 
            }
        };
        return lexer;
    })();
    parser.lexer = lexer;
    function Parser() {
        this.yy = {};
    }
    Parser.prototype = parser;
    parser.Parser = Parser;
    return new Parser();
})();