深入redis内部--字典实现

redis的字典定义和实现在dict.h和dict.c文件中。

1.字典结构

typedef struct dict {
dictType *type;  //定义了字典需要的函数
void *privdata;
dictht ht[2];     //哈希表结构
int rehashidx; //下一个需要扩容的字典编号，若rehashidx == -1 则不会进行重新散列。
int iterators; //当前正在运行的迭代器数目
} dict;

其中涉及到数据结构，如下所示：

1.1 字典类型，包含了一系列字典所需要用到的函数

typedef struct dictType {
unsigned int (*hashFunction)(const void *key);        //hash函数
void *(*keyDup)(void *privdata, const void *key);    //键复制
void *(*valDup)(void *privdata, const void *obj);     //值复制
int (*keyCompare)(void *privdata, const void *key1, const void *key2);  //key比较
void (*keyDestructor)(void *privdata, void *key);    //key析构
void (*valDestructor)(void *privdata, void *obj);     //value析构
} dictType;

1.2 哈希表结构，每个字典有两个哈希表。当哈希表扩容时实现散列。

typedef struct dictht {
dictEntry **table;
unsigned long size;                 //桶的大小，是2的指数
unsigned long sizemask;         //sizemask=size-1，方便取模（i%sizemask 开放链地址法处理hash冲突）。
unsigned long used;               //哈希表中的记录数
} dictht;

1.3 dictEntry为字典的条目，其定义如下：

typedef struct dictEntry {
void *key;                           // 键
union {                              //值的共用体
void *val;
uint64_t u64;
int64_t s64;
} v;
struct dictEntry *next;
} dictEntry;

2. 字典的遍历--字典遍历器

typedef struct dictIterator {
dict *d;
int table, index, safe;
dictEntry *entry, *nextEntry;
long long fingerprint; /* unsafe iterator fingerprint for misuse detection */
} dictIterator;

注意：当safe=1时，该遍历器是安全的，即字典可以在遍历的同时执行dictAdd, dictFind, 和别的函数。否则遍历器是不安全的，遍历时只能执行dictNext()。

迭代器提供了遍历字典中所有元素的方法，通过dicGetIterator()获得迭代器后，使用dictNext(dictIterator *)获得下一个元素。遍历的过程，先从ht[0]开始，依次从第一个桶table[0]开始遍历桶中的元素，然后遍历table[1],'*** ,table[size],若正在扩容，则会继续遍历ht[1]中的桶。遍历桶中元素时，依次访问链表中的每一个元素。

3.宏定义函数

#define dictFreeVal(d, entry) 
if ((d)->type->valDestructor) 
(d)->type->valDestructor((d)->privdata, (entry)->v.val)

#define dictSetVal(d, entry, _val_) do { 
if ((d)->type->valDup) 
entry->v.val = (d)->type->valDup((d)->privdata, _val_); 
else 
entry->v.val = (_val_); 
} while(0)

#define dictSetSignedIntegerVal(entry, _val_) 
do { entry->v.s64 = _val_; } while(0)

#define dictSetUnsignedIntegerVal(entry, _val_) 
do { entry->v.u64 = _val_; } while(0)

#define dictFreeKey(d, entry) 
if ((d)->type->keyDestructor) 
(d)->type->keyDestructor((d)->privdata, (entry)->key)

#define dictSetKey(d, entry, _key_) do { 
if ((d)->type->keyDup) 
entry->key = (d)->type->keyDup((d)->privdata, _key_); 
else 
entry->key = (_key_); 
} while(0)

#define dictCompareKeys(d, key1, key2) 
(((d)->type->keyCompare) ? 
(d)->type->keyCompare((d)->privdata, key1, key2) : 
(key1) == (key2))

#define dictHashKey(d, key) (d)->type->hashFunction(key)
#define dictGetKey(he) ((he)->key)
#define dictGetVal(he) ((he)->v.val)
#define dictGetSignedIntegerVal(he) ((he)->v.s64)
#define dictGetUnsignedIntegerVal(he) ((he)->v.u64)
#define dictSlots(d) ((d)->ht[0].size+(d)->ht[1].size)
#define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)
#define dictIsRehashing(ht) ((ht)->rehashidx != -1)

4. 字典提供的api，有字典的创建，增加、删除、修改记录，还有迭代器（前面已经介绍）和自动扩容（下面介绍）。

dict *dictCreate(dictType *type, void *privDataPtr);
int dictExpand(dict *d, unsigned long size);
int dictAdd(dict *d, void *key, void *val);
dictEntry *dictAddRaw(dict *d, void *key);
int dictReplace(dict *d, void *key, void *val);
dictEntry *dictReplaceRaw(dict *d, void *key);
int dictDelete(dict *d, const void *key);
int dictDeleteNoFree(dict *d, const void *key);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
void *dictFetchValue(dict *d, const void *key);
int dictResize(dict *d);
dictIterator *dictGetIterator(dict *d);
dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
void dictPrintStats(dict *d);
unsigned int dictGenHashFunction(const void *key, int len);
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d);
void dictEnableResize(void);
void dictDisableResize(void);
int dictRehash(dict *d, int n);
int dictRehashMilliseconds(dict *d, int ms);
void dictSetHashFunctionSeed(unsigned int initval);
unsigned int dictGetHashFunctionSeed(void);

5.外部定义变量

 /* 哈希表类型*/

extern dictType dictTypeHeapStringCopyKey;
extern dictType dictTypeHeapStrings;
extern dictType dictTypeHeapStringCopyKeyValue;

6. 自动扩容

Redis使用标识dict_can_resize来记录字典是否可以扩容，可以使用dictEnableResize()方法和dictDisableResize()来改变此标识。使用dictResize()来扩容，但需要首先判断是否允许扩容及是否正在扩容。若可以扩容，则调用dictExpand()扩容，然后调用dictRehashMilliseconds()启动扩容，并指定扩容过程中记录的copy速度。请看程序：

6.1 dictResize()

/* Resize the table to the minimal size that contains all the elements,
 * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
int dictResize(dict *d)
{
    int minimal;

    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
    minimal = d->ht[0].used;
    if (minimal < DICT_HT_INITIAL_SIZE)
        minimal = DICT_HT_INITIAL_SIZE;
    return dictExpand(d, minimal);
}

6.2 dictExpand()

/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
    dictht n; /* the new hash table */
    unsigned long realsize = _dictNextPower(size);

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}

6.3

/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
int dictRehashMilliseconds(dict *d, int ms) {
    long long start = timeInMilliseconds();
    int rehashes = 0;

    while(dictRehash(d,100)) {
        rehashes += 100;
        if (timeInMilliseconds()-start > ms) break;
    }
    return rehashes;
}