数据结构-SDS
底层数据结构-SDS
SDS数据结构
typedef char *sds; //sds本质也是字符串数组
/* Note: sdshdr5 is never used, we just access the flags byte directly.
* However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
uint8_t len; /* used */
uint8_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
uint16_t len; /* used */
uint16_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
uint32_t len; /* used */
uint32_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
uint64_t len; /* used */
uint64_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
从上述定义我们可以抽象得出SDS的基础结构如下
struct sdshr{
T len; //字符串长度
T alloc; //分配的空间大小
char flags; //sds类型标记
char buf[]; //存放实际的字符串
};
与Char*字符串的区别
- 由于sds存储len字段,所以计算字符串长度的时间复杂度为O(1),而char*则需要从头遍历O(n)。
- 通过空间预分配和惰性空间释放,减少了修改字符串时带来的内存重新分配的问题。
- 由于sds通过len来标识字符串长度,所以可以存储任何二进制数据,而char*通过\0来标识字符串结束。
- 由于sds存储len和alloc字段,在函数计算过程中可以有效避免缓冲区溢出问题。
应用场景
在redis中,除字符串字面量,绝大部分可以被修改的字符串值,都是通过sds来标识。
SDS的常用方法
- 根据字符串长度,获取匹配的sds类型。(注意每个类型所能存储的字符串长度)
static inline char sdsReqType(size_t string_size) {
if (string_size < 1<<5)
return SDS_TYPE_5; //32
if (string_size < 1<<8)
return SDS_TYPE_8; //256
if (string_size < 1<<16)
return SDS_TYPE_16; //65536
#if (LONG_MAX == LLONG_MAX)
if (string_size < 1ll<<32)
return SDS_TYPE_32; //4,294,967,296
return SDS_TYPE_64;
#else
return SDS_TYPE_32;
#endif
}
- 获取sds header长度。(注意每个sds类型的header长度)
static inline int sdsHdrSize(char type) {
switch(type&SDS_TYPE_MASK) {
case SDS_TYPE_5:
return sizeof(struct sdshdr5); //1
case SDS_TYPE_8:
return sizeof(struct sdshdr8); //1+1+1=3
case SDS_TYPE_16:
return sizeof(struct sdshdr16); //2+2+1=5
case SDS_TYPE_32:
return sizeof(struct sdshdr32); //4+4+1=9
case SDS_TYPE_64:
return sizeof(struct sdshdr64); //8+8+1=17
}
return 0;
}
- 创建sds对象
/* Create a new sds string with the content specified by the 'init' pointer and 'initlen'.*/
sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) {
void *sh;
sds s;
char type = sdsReqType(initlen);
/* Empty strings are usually created in order to append. Use type 8
* since type 5 is not good at this. */
if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
int hdrlen = sdsHdrSize(type);
unsigned char *fp; /* flags pointer. */
size_t usable;
assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow 避免长度溢出的问题*/
sh = trymalloc?
s_trymalloc_usable(hdrlen+initlen+1, &usable) :
s_malloc_usable(hdrlen+initlen+1, &usable);
if (sh == NULL) return NULL;
if (init==SDS_NOINIT)
init = NULL;
else if (!init)
memset(sh, 0, hdrlen+initlen+1);
s = (char*)sh+hdrlen; //这里s指向实际内容存储的地方(及buf开始的地方)
fp = ((unsigned char*)s)-1; //获取sds类型,能这样计算的原因是__packed__进行了字段对齐
usable = usable-hdrlen-1; //计算出实际可用的空间(排除sds头和\0的占位符)
if (usable > sdsTypeMaxSize(type))
usable = sdsTypeMaxSize(type);
switch(type) {
case SDS_TYPE_5: {
*fp = type | (initlen << SDS_TYPE_BITS);
break;
}
case SDS_TYPE_8: {
SDS_HDR_VAR(8,s);
sh->len = initlen;
sh->alloc = usable;
*fp = type;
break;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16,s);
sh->len = initlen;
sh->alloc = usable;
*fp = type;
break;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32,s);
sh->len = initlen;
sh->alloc = usable;
*fp = type;
break;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
sh->len = initlen;
sh->alloc = usable;
*fp = type;
break;
}
}
if (initlen && init)
memcpy(s, init, initlen); //字符串赋值
s[initlen] = '\0'; //添加结束标记
return s;
}
/*猜测一点:
一般分配的空间为2的次方(取决于选用的内存分配方法),
所以len和alloc的差距:len离最近比它大2次方的数据。
len:10 ->alloc:12
len:13 ->alloc:24
*/
- 字符串扩容
/* When greedy is 1, enlarge more than needed, to avoid need for future reallocs
* on incremental growth.
* When greedy is 0, enlarge just enough so that there's free space for 'addlen'.*/
sds _sdsMakeRoomFor(sds s, size_t addlen, int greedy) {
void *sh, *newsh;
size_t avail = sdsavail(s); //可用空间alloc-len
size_t len, newlen, reqlen;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen;
size_t usable;
/* Return ASAP if there is enough space left.有足够空间 */
if (avail >= addlen) return s;
len = sdslen(s);
sh = (char*)s-sdsHdrSize(oldtype);//指向buff内容开始的地方
reqlen = newlen = (len+addlen);
assert(newlen > len); /* Catch size_t overflow */
//小于1M,每次翻倍扩容,大于1M,每次+1M。
if (greedy == 1) {
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;
}
type = sdsReqType(newlen);
/* Don't use type 5: the user is appending to the string and type 5 is
* not able to remember empty space, so sdsMakeRoomFor() must be called
* at every appending operation. */
if (type == SDS_TYPE_5) type = SDS_TYPE_8;
hdrlen = sdsHdrSize(type);
assert(hdrlen + newlen + 1 > reqlen); /* Catch size_t overflow */
if (oldtype==type) {
newsh = s_realloc_usable(sh, hdrlen+newlen+1, &usable);
if (newsh == NULL) return NULL;
s = (char*)newsh+hdrlen;
} else {
/* Since the header size changes, need to move the string forward,
* and can't use realloc */
newsh = s_malloc_usable(hdrlen+newlen+1, &usable);
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
s = (char*)newsh+hdrlen;
s[-1] = type;
sdssetlen(s, len);
}
usable = usable-hdrlen-1;
if (usable > sdsTypeMaxSize(type))
usable = sdsTypeMaxSize(type);
sdssetalloc(s, usable);
return s;
}
- 惰性空间释放
惰性空间释放策略,则用于优化SDS字符串缩短操作,当缩短SDS字符串后,并不会立即执行内存重分配来回收多余的空间,如果后续有增长操作,则可直接使用。
触发点:
- string类型命令触发:APPEND,GETSET,MSET,PSETEX,SET,SETEX,SETNX
void RM_TrimStringAllocation(RedisModuleString *str) {
if (!str) return;
trimStringObjectIfNeeded(str);
}
//只有RAW编码且空闲空间>10%
/* Optimize the SDS string inside the string object to require little space,
* in case there is more than 10% of free space at the end of the SDS
* string. This happens because SDS strings tend to overallocate to avoid
* wasting too much time in allocations when appending to the string. */
void trimStringObjectIfNeeded(robj *o) {
if (o->encoding == OBJ_ENCODING_RAW &&
sdsavail(o->ptr) > sdslen(o->ptr)/10)
{
o->ptr = sdsRemoveFreeSpace(o->ptr);
}
}
注意事项
- 学会计算使用不同sds类型时的基础开销
SDS类型 | 开销 | 支持的数据长度 |
---|---|---|
SDS_TYPE_5 | 1 | <32 |
SDS_TYPE_8 | 3=1+1+1 | <256 |
SDS_TYPE_16 | 5=2+2+1 | <65536 |
SDS_TYPE_32 | 9=4+4+1 | <4,294,967,296 |
SDS_TYPE_64 | 17=8+8+1 | more |
参考资料
极客时间,蒋德钧《Redis核心技术与实战》
黄建宏,《Redis设计与实现》
钱文品,《Redis深度历险:核心原理与应用实践》