数据结构-SDS


底层数据结构-SDS

SDS数据结构
typedef char *sds;  //sds本质也是字符串数组

/* Note: sdshdr5 is never used, we just access the flags byte directly.
 * However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
    unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
    uint8_t len; /* used */
    uint8_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
    uint16_t len; /* used */
    uint16_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
    uint32_t len; /* used */
    uint32_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
    uint64_t len; /* used */
    uint64_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};

从上述定义我们可以抽象得出SDS的基础结构如下

struct sdshr{
    T len;      //字符串长度
    T alloc;    //分配的空间大小
    char flags; //sds类型标记
    char buf[]; //存放实际的字符串
};
与Char*字符串的区别
  • 由于sds存储len字段,所以计算字符串长度的时间复杂度为O(1),而char*则需要从头遍历O(n)。
  • 通过空间预分配和惰性空间释放,减少了修改字符串时带来的内存重新分配的问题。
  • 由于sds通过len来标识字符串长度,所以可以存储任何二进制数据,而char*通过\0来标识字符串结束。
  • 由于sds存储len和alloc字段,在函数计算过程中可以有效避免缓冲区溢出问题。
应用场景

在redis中,除字符串字面量,绝大部分可以被修改的字符串值,都是通过sds来标识。

SDS的常用方法
  • 根据字符串长度,获取匹配的sds类型。(注意每个类型所能存储的字符串长度)
static inline char sdsReqType(size_t string_size) {
    if (string_size < 1<<5)
        return SDS_TYPE_5;  //32
    if (string_size < 1<<8)
        return SDS_TYPE_8;  //256
    if (string_size < 1<<16)
        return SDS_TYPE_16; //65536
#if (LONG_MAX == LLONG_MAX)
    if (string_size < 1ll<<32)
        return SDS_TYPE_32; //4,294,967,296
    return SDS_TYPE_64;
#else
    return SDS_TYPE_32;
#endif
}
  • 获取sds header长度。(注意每个sds类型的header长度)
static inline int sdsHdrSize(char type) {
    switch(type&SDS_TYPE_MASK) {
        case SDS_TYPE_5:
            return sizeof(struct sdshdr5); //1
        case SDS_TYPE_8:
            return sizeof(struct sdshdr8); //1+1+1=3
        case SDS_TYPE_16:
            return sizeof(struct sdshdr16); //2+2+1=5
        case SDS_TYPE_32:
            return sizeof(struct sdshdr32); //4+4+1=9
        case SDS_TYPE_64:
            return sizeof(struct sdshdr64); //8+8+1=17
    }
    return 0;
}
  • 创建sds对象
/* Create a new sds string with the content specified by the 'init' pointer and 'initlen'.*/
sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) {
    void *sh;
    sds s;
    char type = sdsReqType(initlen);
    /* Empty strings are usually created in order to append. Use type 8
     * since type 5 is not good at this. */
    if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
    int hdrlen = sdsHdrSize(type);
    unsigned char *fp; /* flags pointer. */
    size_t usable;

    assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow 避免长度溢出的问题*/ 
    sh = trymalloc?
        s_trymalloc_usable(hdrlen+initlen+1, &usable) :
        s_malloc_usable(hdrlen+initlen+1, &usable);
    if (sh == NULL) return NULL;
    if (init==SDS_NOINIT)
        init = NULL;
    else if (!init)
        memset(sh, 0, hdrlen+initlen+1);
    s = (char*)sh+hdrlen; //这里s指向实际内容存储的地方(及buf开始的地方)
    fp = ((unsigned char*)s)-1; //获取sds类型,能这样计算的原因是__packed__进行了字段对齐
    usable = usable-hdrlen-1;   //计算出实际可用的空间(排除sds头和\0的占位符)
    if (usable > sdsTypeMaxSize(type))
        usable = sdsTypeMaxSize(type);
    switch(type) {
        case SDS_TYPE_5: {
            *fp = type | (initlen << SDS_TYPE_BITS);
            break;
        }
        case SDS_TYPE_8: {
            SDS_HDR_VAR(8,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
        case SDS_TYPE_16: {
            SDS_HDR_VAR(16,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
        case SDS_TYPE_32: {
            SDS_HDR_VAR(32,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
        case SDS_TYPE_64: {
            SDS_HDR_VAR(64,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
    }
    if (initlen && init)
        memcpy(s, init, initlen); //字符串赋值
    s[initlen] = '\0';   //添加结束标记
    return s;
}

/*猜测一点:
一般分配的空间为2的次方(取决于选用的内存分配方法),
所以len和alloc的差距:len离最近比它大2次方的数据。
len:10 ->alloc:12
len:13 ->alloc:24
*/
  • 字符串扩容
/*  When greedy is 1, enlarge more than needed, to avoid need for future reallocs
 * on incremental growth.
 * When greedy is 0, enlarge just enough so that there's free space for 'addlen'.*/
sds _sdsMakeRoomFor(sds s, size_t addlen, int greedy) {
    void *sh, *newsh;
    size_t avail = sdsavail(s); //可用空间alloc-len
    size_t len, newlen, reqlen;
    char type, oldtype = s[-1] & SDS_TYPE_MASK;
    int hdrlen;
    size_t usable;

    /* Return ASAP if there is enough space left.有足够空间 */
    if (avail >= addlen) return s;

    len = sdslen(s);
    sh = (char*)s-sdsHdrSize(oldtype);//指向buff内容开始的地方
    reqlen = newlen = (len+addlen);
    assert(newlen > len);   /* Catch size_t overflow */
    
    //小于1M,每次翻倍扩容,大于1M,每次+1M。
    if (greedy == 1) { 
        if (newlen < SDS_MAX_PREALLOC) 
            newlen *= 2;
        else
            newlen += SDS_MAX_PREALLOC;
    }

    type = sdsReqType(newlen);

    /* Don't use type 5: the user is appending to the string and type 5 is
     * not able to remember empty space, so sdsMakeRoomFor() must be called
     * at every appending operation. */
    if (type == SDS_TYPE_5) type = SDS_TYPE_8;

    hdrlen = sdsHdrSize(type);
    assert(hdrlen + newlen + 1 > reqlen);  /* Catch size_t overflow */
    if (oldtype==type) {
        newsh = s_realloc_usable(sh, hdrlen+newlen+1, &usable);
        if (newsh == NULL) return NULL;
        s = (char*)newsh+hdrlen;
    } else {
        /* Since the header size changes, need to move the string forward,
         * and can't use realloc */
        newsh = s_malloc_usable(hdrlen+newlen+1, &usable);
        if (newsh == NULL) return NULL;
        memcpy((char*)newsh+hdrlen, s, len+1);
        s_free(sh);
        s = (char*)newsh+hdrlen;
        s[-1] = type;
        sdssetlen(s, len);
    }
    usable = usable-hdrlen-1;
    if (usable > sdsTypeMaxSize(type))
        usable = sdsTypeMaxSize(type);
    sdssetalloc(s, usable);
    return s;
}
  • 惰性空间释放

惰性空间释放策略,则用于优化SDS字符串缩短操作,当缩短SDS字符串后,并不会立即执行内存重分配来回收多余的空间,如果后续有增长操作,则可直接使用。

触发点:

  • string类型命令触发:APPEND,GETSET,MSET,PSETEX,SET,SETEX,SETNX
void RM_TrimStringAllocation(RedisModuleString *str) {
    if (!str) return;
    trimStringObjectIfNeeded(str);
}

//只有RAW编码且空闲空间>10%
/* Optimize the SDS string inside the string object to require little space,
 * in case there is more than 10% of free space at the end of the SDS
 * string. This happens because SDS strings tend to overallocate to avoid
 * wasting too much time in allocations when appending to the string. */
void trimStringObjectIfNeeded(robj *o) {
    if (o->encoding == OBJ_ENCODING_RAW &&
        sdsavail(o->ptr) > sdslen(o->ptr)/10)
    {
        o->ptr = sdsRemoveFreeSpace(o->ptr);
    }
}
注意事项
  • 学会计算使用不同sds类型时的基础开销
SDS类型 开销 支持的数据长度
SDS_TYPE_5 1 <32
SDS_TYPE_8 3=1+1+1 <256
SDS_TYPE_16 5=2+2+1 <65536
SDS_TYPE_32 9=4+4+1 <4,294,967,296
SDS_TYPE_64 17=8+8+1 more
参考资料

极客时间,蒋德钧《Redis核心技术与实战》

黄建宏,《Redis设计与实现》

钱文品,《Redis深度历险:核心原理与应用实践》