redis学习笔记(一): sds

sds(Simple Dynamic String): 它其实就是普通的字符串,只是在每个字符串的前面加了一个管理用的头部,相关类型结构的定义如下

typedef char *sds;

/* Note: sdshdr5 is never used, we just access the flags byte directly.
 * However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
    unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
    uint8_t len; /* used */
    uint8_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
    uint16_t len; /* used */
    uint16_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
    uint32_t len; /* used */
    uint32_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
    uint64_t len; /* used */
    uint64_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};

从定义上看,5种定义对应了最大长度不同的字符串,定义这5种不同的类型可能是为了尽量减少sdshdr占用的空间。
为了分sdshdr是哪一种类型,它在每一种定义中都加上了一个8bit的flags字段,用其中的低3位标识sdshdr的类型。
为了能从buf直接找到到flags,所以在定义时必须用((__packed__))强制不要按字节对齐,这样不管是哪种类型的hdr,都可以用buf[-1]找到对应的flags

相应地,sds内部实现了一些基本的操作,主要看看sdsnewlen和sdsMakeRoomFor这两个函数,大概就能知道sds的内部操作方式以及内存分配的方式。

sdsnewlen是根据指定内容和指定长度创建一个新的sds。这段代码有个bug,见下面的注释

/* Create a new sds string with the content specified by the 'init' pointer
 * and 'initlen'.
 * If NULL is used for 'init' the string is initialized with zero bytes.
 *
 * The string is always null-termined (all the sds strings are, always) so
 * even if you create an sds string with:
 *
 * mystring = sdsnewlen("abc",3);
 *
 * You can print the string with printf() as there is an implicit  at the
 * end of the string. However the string is binary safe and can contain
 *  characters in the middle, as the length is stored in the sds header. */
sds sdsnewlen(const void *init, size_t initlen) {
    void *sh;
    sds s;
    /* 根据要求的长度返回合适的类型 */
    char type = sdsReqType(initlen);
    /* Empty strings are usually created in order to append. Use type 8
     * since type 5 is not good at this. */
    /* type 5 不太适合动态扩展,默认用8 */
    if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
    /* 不同的type对应的sdshdr的大小不一样 */
    int hdrlen = sdsHdrSize(type);
    unsigned char *fp; /* flags pointer. */

    sh = s_malloc(hdrlen+initlen+1);
    
    /* 如果没有指定初始内容,则将新分配的空间清0 */
    if (!init)
        memset(sh, 0, hdrlen+initlen+1);
    /* 这个判断应该放到前面吧,不然上面memset会导致程序挂掉 */
    if (sh == NULL) return NULL;
    /* 找到buf的位置,也是要返回给caller的 */
    s = (char*)sh+hdrlen;
    /* 填充对应sdshdr的flags字段 */
    fp = ((unsigned char*)s)-1;
    switch(type) {
        case SDS_TYPE_5: {
            *fp = type | (initlen << SDS_TYPE_BITS);
            break;
        }
        case SDS_TYPE_8: {
            SDS_HDR_VAR(8,s);
            sh->len = initlen;
            sh->alloc = initlen;
            *fp = type;
            break;
        }
        case SDS_TYPE_16: {
            SDS_HDR_VAR(16,s);
            sh->len = initlen;
            sh->alloc = initlen;
            *fp = type;
            break;
        }
        case SDS_TYPE_32: {
            SDS_HDR_VAR(32,s);
            sh->len = initlen;
            sh->alloc = initlen;
            *fp = type;
            break;
        }
        case SDS_TYPE_64: {
            SDS_HDR_VAR(64,s);
            sh->len = initlen;
            sh->alloc = initlen;
            *fp = type;
            break;
        }
    }
    /* 指定了初始内容并且内容非空,填充到新分配的空间当中 */
    if (initlen && init)
        memcpy(s, init, initlen);
    s[initlen] = '';
    return s;
}

sdsMakeRoomFor是在已存在的sds上动态扩展其空间,可以很清楚地看到扩展时使用的策略。

/* Enlarge the free space at the end of the sds string so that the caller
 * is sure that after calling this function can overwrite up to addlen
 * bytes after the end of the string, plus one more byte for nul term.
 *
 * Note: this does not change the *length* of the sds string as returned
 * by sdslen(), but only the free buffer space we have. */
sds sdsMakeRoomFor(sds s, size_t addlen) {
    void *sh, *newsh;
    size_t avail = sdsavail(s);
    size_t len, newlen;
    char type, oldtype = s[-1] & SDS_TYPE_MASK;
    int hdrlen;

    /* Return ASAP if there is enough space left. */
    if (avail >= addlen) return s;

    len = sdslen(s);
    sh = (char*)s-sdsHdrSize(oldtype);
    newlen = (len+addlen);
    /* 新长度小于1M就double,否则就增加1M */
    if (newlen < SDS_MAX_PREALLOC)
        newlen *= 2;
    else
        newlen += SDS_MAX_PREALLOC;

    /* newlen对应的sdshdr的类型 */
    type = sdsReqType(newlen);

    /* Don't use type 5: the user is appending to the string and type 5 is
     * not able to remember empty space, so sdsMakeRoomFor() must be called
     * at every appending operation. */
    /* type 5不适合做扩展 */
    if (type == SDS_TYPE_5) type = SDS_TYPE_8;

    hdrlen = sdsHdrSize(type);
    /* 如果前后type不需要变化,则直接使用realloc
     * 否则,分配新的空间,拷贝原来的内容,释放原来的空间
     */
    if (oldtype==type) {
        newsh = s_realloc(sh, hdrlen+newlen+1);
        if (newsh == NULL) return NULL;
        s = (char*)newsh+hdrlen;
    } else {
        /* Since the header size changes, need to move the string forward,
         * and can't use realloc */
        newsh = s_malloc(hdrlen+newlen+1);
        if (newsh == NULL) return NULL;
        memcpy((char*)newsh+hdrlen, s, len+1);
        s_free(sh);
        s = (char*)newsh+hdrlen;
        s[-1] = type;
        sdssetlen(s, len);
    }
    /* 设置sdshdr的alloc字段(type 5除外) */
    sdssetalloc(s, newlen);
    return s;
}
原文地址:https://www.cnblogs.com/flypighhblog/p/7748186.html