redis学习笔记(三): dict

dict是redis中比较重要的结构了，不像sds, adlist那样是底层的基础结构。涉及到的相关结构体:

/* dictEntry就是一个key-value对 */
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next;
} dictEntry;

/* 每一个dict都有一个dictType，特定于dict的操作的集合。比如，不同类型的dict，其比较key的方式可能会是不一样的。 */
typedef struct dictType {
unsigned int (*hashFunction)(const void *key);
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
void (*keyDestructor)(void *privdata, void *key);
void (*valDestructor)(void *privdata, void *obj);
} dictType;

/* 按照代码中的注释，dictht就是字典的hash表的结构 */
typedef struct dictht {
dictEntry **table;    /* 指针数组，做为hash table的bucket */
unsigned long size; /* table数组的大小，看起来在实现中一直会是2的倍数 */
unsigned long sizemask; /* 为了方便操作记录mask */
unsigned long used; /* 已经使用的bucket个数 */
} dictht;

/* 关键数据结构dict的定义

 * 对于ht[2]这个成员，从实现上看，可能是出于实时的考虑，rehash过程(可能)需要分好几次才全部完成。

 * 那么在rehash过程中，需要一个额外的表做临时存储。

 * rehash的过程，总是从ht[0]往ht[1]迁移数据。迁移过程中，两个表都有数据。 

 */

typedef struct dict {
dictType *type;   /* 特定于dict的操作集合 */
void *privdata;　/* 特定于dict的数据 */
dictht ht[2];　　 /* 两个hash表 */
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
int iterators; /* number of iterators currently running */
} dict;

/* If safe is set to 1 this is a safe iterator, that means, you can call
* dictAdd, dictFind, and other functions against the dictionary even while
* iterating. Otherwise it is a non safe iterator, and only dictNext()
* should be called while iterating. */
typedef struct dictIterator {
dict *d;
long index;
int table, safe;
dictEntry *entry, *nextEntry;
/* unsafe iterator fingerprint for misuse detection. */
long long fingerprint;
} dictIterator;

内部提供的操作也比较丰富，目前看到的主要还是dictCreate, dictAdd, dictReplace, dictDelect这几个基本操作。

当dict的大小发生改变时会开始rehash过程。比如：
1. 往dict添加新key-value数据时，如果size大于4并且used/size的比值超过一定范围，则会调用dictExpand对dict进行扩容。扩容后，dict的table包含的bucket个数为_dictNextPower(used*2)
2. 在dataCron处理过程中，如果dict或者expires表中size大于4、used大于0并且used/size的比值小于10%，则会调用dictExpand对dict进行收缩。收缩后，dict的table包含的bucket个数为_dictNextPower(max(4, used))
dictExpand的处理比较简单，代码如下：

/* Expand or create the hash table */
/* 如果是扩容，size是used*2，如果是收缩，size是max(used, 4) */
int dictExpand(dict *d, unsigned long size)
{
    dictht n; /* the new hash table */
    unsigned long realsize = _dictNextPower(size);

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    /* Rehashing to the same table size is not useful. */
    if (realsize == d->ht[0].size) return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    /* 新的dictht */
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
     /* 如果是创建新的hashtable，则n就做为ht[0] */
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* 如果是resize，那么n就做为ht[1] */
    /* Prepare a second hash table for incremental rehashing */
    d->ht[1] = n;
    /* 标记rehas的开始 */
    d->rehashidx = 0;
    return DICT_OK;
}

从它的实现来看，如果是创建新的hashtable，则只是给ht[0].table做初始化；如果是resize，则会给ht[1]做初始化，并标记rehash的开始(dict->rehashidx设置为0)。
rehash的真正执行过程是在函数dictRehash当中，其实现代码如下:

/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table, however
 * since part of the hash table may be composed of empty spaces, it is not
 * guaranteed that this function will rehash even a single bucket, since it
 * will visit at max N*10 empty buckets in total, otherwise the amount of
 * work it does would be unbound and the function may block for a long time. */
 /* n是这一次rehash需要处理的bucket的个数 */
int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;

    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        /* rehashidx记录着下一个待处理的bucket下标 */
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        /* 将当前bucket上的所有元素迁移到ht[1]的某个bucket上 */
        while(de) {
            unsigned int h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        /* 该bucket上没有元素了 */
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    /* Check if we already rehashed the whole table... */
    /* 如果ht[0]全部处理完了，就释放ht[0].table，将ht[1]做为新的ht[0]，再清空ht[1]的各个成员的值，最后标记rehash的结束并返回0 */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    /* 还有更多的元素待处理则返回1 */
    return 1;
}

从dictRehash的实现可以看出，rehash的过程可能并不是一次完成的。这个可能是出于实时性的考虑
会执行rehash过程的点有如下几个：
1. dataCron处理流程中，对于所有db中的dict和expires表，如果开始了rehash，则调用dictRehashMilliseconds执行rehash，它会在1ms内尽可能多地处理buckets
2. dictAddRaw，添加新的key-value时。如果需要执行rehash，只处理一个bucket
3. dictGenericDelete，删除特定key对应的条目时。如果需要执行rehash，只处理一个bucket
4. dictFind，查找特定key对应的dictEntry。如果需要执行rehash，只处理一个bucket
5. dictGetRandomKey，随机返回一个key-value对时。如果需要执行rehash，只处理一个bucket
6. dictGetSomeKeys，随机返回一些key-value时。如果需要执行rehash，只处理一个bucket
这里有一个疑惑：在dataCron的处理中，只要dict/expires开始了rehash，则不管其iterators是否为0都执行rehash的过程；而后面的5个执行点中，不仅要判断是否开始了rehash，还要判断iterators是否为0.为什么有这个区别？