1.TValue
struct {
CommonHeader;
lu_byte reserved;
unsigned int hash;
size_t len;
} tsv;
} TString;
typedef union TKey {
struct {
TValuefields;
struct Node *next; /* for chaining */
} nk;
TValue tvk;
} TKey;
typedef struct Node {
TValue i_val;
TKey i_key;
} Node;
if (!ttisnil(gval(mp)) || mp == dummynode) {
Node *othern;
Node *n = getfreepos(t); /* get a free place */
if (n == NULL) { /* cannot find a free place? */
rehash(L, t, key); /* grow table */
return luaH_set(L, t, key); /* re-insert key into grown table */
}
lua_assert(n != dummynode);
othern = mainposition(t, key2tval(mp));
if (othern != mp) { /* is colliding node out of its main position? */
/* yes; move colliding node into free position */
while (gnext(othern) != mp) othern = gnext(othern); /* find previous */
gnext(othern) = n; /* redo the chain with `n' in place of `mp' */
*n = *mp; /* copy colliding node into free pos. (mp->next also goes) */
gnext(mp) = NULL; /* now `mp' is free */
setnilvalue(gval(mp));
}
else { /* colliding node is in its own main position */
/* new node will go into free position */
gnext(n) = gnext(mp); /* chain new position */
gnext(mp) = n;
mp = n;
}
}
gkey(mp)->value = key->value; gkey(mp)->tt = key->tt;
luaC_barriert(L, t, key);
lua_assert(ttisnil(gval(mp)));
return gval(mp);
}
#define ClosureHeader \
CommonHeader; lu_byte isC; lu_byte nupvalues; GCObject *gclist; \
struct Table *env
/*
** Upvalues
*/
typedef struct UpVal {
CommonHeader;
TValue *v; /* points to stack or to its own value */
union {
TValue value; /* the value (when closed) */
struct { /* double linked list (when open) */
struct UpVal *prev;
struct UpVal *next;
} l;
} u;
} UpVal;
typedef struct
{
Value value;
int tt;
} TValue
union Closure cl; // 闭包函数
struct Table h; // 表
struct Proto p; // 函数原型
struct UpVal uv; // upvalue
struct lua_State th; /* thread */
lua所有类型,Value是union,
typedef union
{
// GCObject *gc; 可以gc的类型
GCheader gch; // 头部
union TString ts; // 字符串
union Udata u; // userdataunion Closure cl; // 闭包函数
struct Table h; // 表
struct Proto p; // 函数原型
struct UpVal uv; // upvalue
struct lua_State th; /* thread */
// 不可gc的类型
void *p;
lua_Number n;
int b;
} Value
GCHeader头部信息:
typedef struct
{
GCObject *next; lu_byte tt; lu_byte marked
}GCHeader;
2.字符串类型
typedef union TString {
L_Umaxalign dummy; /* ensures maximum alignment for strings */struct {
CommonHeader;
lu_byte reserved;
unsigned int hash;
size_t len;
} tsv;
} TString;
这个CommonHeader就是GCheader,其实是同一块内存,这样写法,在GCObject和具体类型中都可以访问到头部。
lua里字符串内存布局:字符串类型表现形式是TString指针,其实是一个TString头部包含字符串描述信息,紧接着才是一个(TString->len)长度的字符串内容,并加一个'\0'字符。
lua里保存字符串的方式:global state有一个全局字符串表strt(stringtable类型),其实是一个哈希表,通过一种哈希算法(保存在TSreing->hash)计算出哈希值,保存到对应表项,对于冲突项使用链表解决(TString->tsv->next保存下一个相同哈希值的字符串)。
lua的这种机制,使得相同的字符串公用一块内存,节省了不少内存,同其他GC对象一样,字符串都是引用着存在。看下新创建一个字符串类型的过程:
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
76 GCObject *o;
77 unsigned int h = cast(unsigned int, l); /* seed */
78 size_t step = (l>>5)+1; /* if string is too long, don't hash all its chars */
79 size_t l1;
80 for (l1=l; l1>=step; l1-=step) /* compute hash */
81 h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
82 for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
83 o != NULL;
84 o = o->gch.next) {
85 TString *ts = rawgco2ts(o);
86 if (ts->tsv.len == l && (memcmp(str, getstr(ts), l) == 0)) {
87 /* string may be dead */
88 if (isdead(G(L), o)) changewhite(o);
89 return ts;
90 }
91 }
92 return newlstr(L, str, l, h); /* not found */
93 }
94
76 GCObject *o;
77 unsigned int h = cast(unsigned int, l); /* seed */
78 size_t step = (l>>5)+1; /* if string is too long, don't hash all its chars */
79 size_t l1;
80 for (l1=l; l1>=step; l1-=step) /* compute hash */
81 h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
82 for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
83 o != NULL;
84 o = o->gch.next) {
85 TString *ts = rawgco2ts(o);
86 if (ts->tsv.len == l && (memcmp(str, getstr(ts), l) == 0)) {
87 /* string may be dead */
88 if (isdead(G(L), o)) changewhite(o);
89 return ts;
90 }
91 }
92 return newlstr(L, str, l, h); /* not found */
93 }
94
3.table类型
typedef struct Table {
CommonHeader;
lu_byte flags; /* 1<<p means tagmethod(p) is not present */
lu_byte lsizenode; /* log2 of size of `node' array */
struct Table *metatable;
TValue *array; /* array part */
Node *node;
Node *lastfree; /* any free position is before this position */
GCObject *gclist;
int sizearray; /* size of `array' array */
} Table;
CommonHeader;
lu_byte flags; /* 1<<p means tagmethod(p) is not present */
lu_byte lsizenode; /* log2 of size of `node' array */
struct Table *metatable;
TValue *array; /* array part */
Node *node;
Node *lastfree; /* any free position is before this position */
GCObject *gclist;
int sizearray; /* size of `array' array */
} Table;
Table类型被分为两部分了,一部分是array一部分是hash table,这样可以对用户使用数组进行优化,lua_creatable(L, narray, nrec) API :narray代表数组部分大小,nrec代表hash表部分大小。
对于数组部分的实现没啥好说了,看下哈希表的算法。
哈希表数据结构:
typedef union TKey {
struct {
TValuefields;
struct Node *next; /* for chaining */
} nk;
TValue tvk;
} TKey;
typedef struct Node {
TValue i_val;
TKey i_key;
} Node;
其实是一个Node数组,Node分为key,value字段,key其实也可以是任何value,TKey又使用了之前的技巧,nk,tvk是union的字段,公用同样的内存,这样既可TKey->tvk取值,又可TKey->nk->..取值,另外key保存了一个指向冲突表项的指针。
总而言之,这个哈希表是这样的:内存上只是一块Node的连续数组,插入时:依据key算出hash value,mod到表项,如果这一表项是空项,直接插入。如果冲突,先取一块新的空表项(根据Table的lastfree取),之后判断冲突表项的key hash后是否应该放在自己所在的位置,如果不应该放在这,把它挪到刚取出的空表项,腾出的表项放新key;如果确实是放在这,也就是与新key有同样的hash
value,则把新key插入空表项,并设计好链接关系。看下代码吧:
static TValue *newkey (lua_State *L, Table *t, const TValue *key) {
Node *mp = mainposition(t, key);if (!ttisnil(gval(mp)) || mp == dummynode) {
Node *othern;
Node *n = getfreepos(t); /* get a free place */
if (n == NULL) { /* cannot find a free place? */
rehash(L, t, key); /* grow table */
return luaH_set(L, t, key); /* re-insert key into grown table */
}
lua_assert(n != dummynode);
othern = mainposition(t, key2tval(mp));
if (othern != mp) { /* is colliding node out of its main position? */
/* yes; move colliding node into free position */
while (gnext(othern) != mp) othern = gnext(othern); /* find previous */
gnext(othern) = n; /* redo the chain with `n' in place of `mp' */
*n = *mp; /* copy colliding node into free pos. (mp->next also goes) */
gnext(mp) = NULL; /* now `mp' is free */
setnilvalue(gval(mp));
}
else { /* colliding node is in its own main position */
/* new node will go into free position */
gnext(n) = gnext(mp); /* chain new position */
gnext(mp) = n;
mp = n;
}
}
gkey(mp)->value = key->value; gkey(mp)->tt = key->tt;
luaC_barriert(L, t, key);
lua_assert(ttisnil(gval(mp)));
return gval(mp);
}
看下接口设计,每次插入时,luaH_set其实是返回了这个key对应的value指针,所以使用时类似这样 setobj2t(L, luaH_set(L, hvalue(t), L->top-2), L->top-1);
4.函数类型
typedef struct CClosure {
ClosureHeader;
lua_CFunction f;
TValue upvalue[1];
} CClosure;
typedef struct LClosure {
ClosureHeader;
struct Proto *p;
UpVal *upvals[1];
} LClosure;
typedef union Closure {
CClosure c;
LClosure l;
} Closure;
ClosureHeader;
lua_CFunction f;
TValue upvalue[1];
} CClosure;
typedef struct LClosure {
ClosureHeader;
struct Proto *p;
UpVal *upvals[1];
} LClosure;
typedef union Closure {
CClosure c;
LClosure l;
} Closure;
#define ClosureHeader \
CommonHeader; lu_byte isC; lu_byte nupvalues; GCObject *gclist; \
struct Table *env
分为c函数闭包和lua函数闭包,其实都是包含函数原型,upvalues,环境表这三个部分。关于c函数闭包实现比较简单,主要是通过接口lua_pushcclosure创建,函数实现中可以通过lua_getvalue访问upvalue。lua函数闭包和虚拟机相关,后面再描述。
5.upvalue
upvalue不是单独的一个对外可用的类型,他是Closure的一部分,是为了引用在这个函数外部的局部变量而设计的。许多语言限制了这种特性,比如python用作用域限制访问外部的局部变量,pascal的函数类型不是第一类类型,即你必须保证外部的变量一直在栈中,以求引用时,内存仍然存在。c语言两种限制方式都存在。lua里面使用upvalue,以很少的代码实现了访问函数外部局部变量的特性。看定义:
/*
** Upvalues
*/
typedef struct UpVal {
CommonHeader;
TValue *v; /* points to stack or to its own value */
union {
TValue value; /* the value (when closed) */
struct { /* double linked list (when open) */
struct UpVal *prev;
struct UpVal *next;
} l;
} u;
} UpVal;
UpVal有两个状态open和closed:
open状态是指UpVal所引用的值还在外部函数栈上保存,这时候UpVal->v指向栈内存,UpVal->prev和UpVal->next则是用于把所有UpVal链接到一个链表里,链表保存在lua_State的openupval,这样如果有不同函数引用同一个变量,就可以直接使用这个UpVal,而不需要再创建一个新的。
closed状态是指UpVal所引用的值所在的栈已经被释放了,例如:
function add (x)
return function (y) return y + x end
end
add2 = add(2)
print(add2())
当执行add2()时add2引用的x所在的栈已经释放了,这时,lua会复制x到UpVal->l.value,并且UpVal->v指向自己的value。