lua源码学习篇二:语法分析

  

  一步步调试,在lparser.c文件中luaY_parser函数是语法分析的重点函数,词法分析也是在这个过程中调用的。在这个过程中,用到一些数据结构,下面会详细说。

  

Proto *luaY_parser (lua_State *L, ZIO *z, Mbuffer *buff, const char *name) {
  struct LexState lexstate;
  struct FuncState funcstate;
  lexstate.buff = buff;
  luaX_setinput(L, &lexstate, z, luaS_new(L, name));
  open_func(&lexstate, &funcstate);//初始化funcstate
  funcstate.f->is_vararg = VARARG_ISVARARG;  /* main func. is always vararg */
  luaX_next(&lexstate);  //Luax_next用于获取下一个字符
  chunk(&lexstate);//代码块分析
  check(&lexstate, TK_EOS);//判断lua程序文件是否到达末尾
  close_func(&lexstate);//关闭程序
  lua_assert(funcstate.prev == NULL);
  lua_assert(funcstate.f->nups == 0);
  lua_assert(lexstate.fs == NULL);
  return funcstate.f;
}

  好,不着急,一步一步来看。lua_State ,LexState ,FuncState 是啥玩意呢?

   lua_state是lua程序运行过程中一直存在的,并且一个运行程序只有一个lua_State实例。

struct lua_State {
  CommonHeader;
  lu_byte status;
  StkId top;  /* first free slot in the stack */
  StkId base;  /* base of current function */
  global_State *l_G;//全局状态的指针
  CallInfo *ci;  /* call info for current function */当前函数的调用信息
  const Instruction *savedpc;  /* `savedpc' of current function */记录上一个函数的pc位置
  StkId stack_last;  /* last free slot in the stack */
  StkId stack;  /* stack base */
  CallInfo *end_ci;  /* points after end of ci array*/函数调用栈的栈顶
  CallInfo *base_ci;  /* array of CallInfo's */函数调用栈的栈底
  int stacksize;
  int size_ci;  /* size of array `base_ci' */
  unsigned short nCcalls;  /* number of nested C calls */
  lu_byte hookmask;
  lu_byte allowhook;
  int basehookcount;
  int hookcount;
  lua_Hook hook;
  TValue l_gt;  /* table of globals */
  TValue env;  /* temporary place for environments */
  GCObject *openupval;  /* list of open upvalues in this stack */
  GCObject *gclist;
  struct lua_longjmp *errorJmp;  /* current error recover point */
  ptrdiff_t errfunc;  /* current error handling function (stack index) */
};

LexState是用于存储词法分析时的上下文数据。

typedef struct LexState {
  int current;  /* current character (charint) */指向下一个要读取的字符
  int linenumber;  /* input line counter */行号
  int lastline;  /* line of last token `consumed' */
  Token t;  /* current token */
  Token lookahead;  /* look ahead token */ 预读的下一个token
  struct FuncState *fs;  /* `FuncState' is private to the parser */函数状态的数据结构
  struct lua_State *L;
  ZIO *z;  /* input stream */ 输入流
  Mbuffer *buff;  /* buffer for tokens */ 临时缓冲区
  TString *source;  /* current source name */ 源文件名
  char decpoint;  /* locale decimal point */
} LexState;

FuncState是用于存储函数状态的数据结构。
typedef struct FuncState {
  Proto *f;  /* current function header */函数头信息
  Table *h;  /* table to find (and reuse) elements in `k' */
  struct FuncState *prev;  /* enclosing function */指向函数链表的上一个函数
  struct LexState *ls;  /* lexical state */
  struct lua_State *L;  /* copy of the Lua state */
  struct BlockCnt *bl;  /* chain of current blocks */
  int pc;  /* next position to code (equivalent to `ncode') */
  int lasttarget;   /* `pc' of last `jump target' */
  int jpc;  /* list of pending jumps to `pc' */
  int freereg;  /* first free register */
  int nk;  /* number of elements in `k' */
  int np;  /* number of elements in `p' */
  short nlocvars;  /* number of elements in `locvars' */local变量个数
  lu_byte nactvar;  /* number of active local variables */
  upvaldesc upvalues[LUAI_MAXUPVALUES];  /* upvalues */
  unsigned short actvar[LUAI_MAXVARS];  /* declared-variable stack */
} FuncState;

初始化完成后,就要进行词法分析,即读取下一个token,调用
luaX_next(&lexstate); 下面进入llex.c文件的源代码中
void luaX_next (LexState *ls) {
  ls->lastline = ls->linenumber;
  if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
    ls->t = ls->lookahead;  /* use this one */
    ls->lookahead.token = TK_EOS;  /* and discharge it */
  }
  else
    ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */调用llex函数读取下一个token
}

llex函数:里面是一大串的switch...case...语句,对各种可能的情况进行处理,正常的变量名或者保留字会进入default语句,分别处理空格,数字或者变量名。

for (;;) {
    switch (ls->current) {
      case '\n':
      case '\r': 
      case '-': 
      case '[': 
      case '=': 
      case '<': 
      case '>': 
      case '~': 
      case '"':
      case '\'': 
      case '.': 
      case EOZ:
      default: {
        if (isspace(ls->current)) {
          lua_assert(!currIsNewline(ls));
          next(ls);
          continue;
        }
        else if (isdigit(ls->current)) {
          read_numeral(ls, seminfo);
          return TK_NUMBER;
        }
        else if (isalpha(ls->current) || ls->current == '_') {
          /* identifier or reserved word */
          TString *ts;
          do {
            save_and_next(ls);
          } while (isalnum(ls->current) || ls->current == '_');
          ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
                                  luaZ_bufflen(ls->buff));
          if (ts->tsv.reserved > 0)  /* reserved word? */
            return ts->tsv.reserved - 1 + FIRST_RESERVED;
          else {
            seminfo->ts = ts;
            return TK_NAME;
          }
        }
        else {
          int c = ls->current;
          next(ls);
          return c;  /* single-char tokens (+ - / ...) */
        }
      }
    }
  }

luaX_newstring用于生成变量名,如果全局变量表中没有该变量的字符串,则会创建新的变量字符串。对每个token,如果是保留字段,都会预先加载在全局变量表中,因此,如果不是保留字段,就会生成TK_NAME。保留字段的判定来自于if (ts->tsv.reserved > 0),关于Token的种类,定义在llex.h头文件中。

获取token字符串后,进入chunk代码:

static void chunk (LexState *ls) {
  /* chunk -> { stat [`;'] } */
  int islast = 0;
  enterlevel(ls);//内嵌调用层数
  while (!islast && !block_follow(ls->t.token)) {//当前token既不是block的开始也不是结束
    islast = statement(ls);//代码语句分析
    testnext(ls, ';');
    lua_assert(ls->fs->f->maxstacksize >= ls->fs->freereg &&
               ls->fs->freereg >= ls->fs->nactvar);
    ls->fs->freereg = ls->fs->nactvar;  /* free registers */
  }
  leavelevel(ls);
}

statement函数用于分析语义,里面是也是大大的switch...case...语句。如果是if, while, do, for, function等等关键字,都会进入相应的处理函数中,在default语句中处理赋值和函数调用的分析。

static int statement (LexState *ls) {
  int line = ls->linenumber;  /* may be needed for error messages */
  switch (ls->t.token) {
    case TK_IF: {  /* stat -> ifstat */
      ifstat(ls, line);
      return 0;
    }
    case TK_WHILE: {  /* stat -> whilestat */
      whilestat(ls, line);
      return 0;
    }
    case TK_DO: {  /* stat -> DO block END */
      luaX_next(ls);  /* skip DO */
      block(ls);
      check_match(ls, TK_END, TK_DO, line);
      return 0;
    }
    case TK_FOR: {  /* stat -> forstat */
      forstat(ls, line);
      return 0;
    }
    case TK_REPEAT: {  /* stat -> repeatstat */
      repeatstat(ls, line);
      return 0;
    }
    case TK_FUNCTION: {
      funcstat(ls, line);  /* stat -> funcstat */
      return 0;
    }
    case TK_LOCAL: {  /* stat -> localstat */
      luaX_next(ls);  /* skip LOCAL */
      if (testnext(ls, TK_FUNCTION))  /* local function? */
        localfunc(ls);
      else
        localstat(ls);
      return 0;
    }
    case TK_RETURN: {  /* stat -> retstat */
      retstat(ls);
      return 1;  /* must be last statement */
    }
    case TK_BREAK: {  /* stat -> breakstat */
      luaX_next(ls);  /* skip BREAK */
      breakstat(ls);
      return 1;  /* must be last statement */
    }
    default: {
      exprstat(ls);
      return 0;  /* to avoid warnings */
    }
  }
}
View Code

语句中的表达式通过exprstat(ls)函数处理,还有lua代码指令的生成,有时间再写。




原文地址:https://www.cnblogs.com/nazhizq/p/6516561.html