Redis的Hash结构和Rehash

数据结构

Dict

表示一个字典

1
2
3
4
5
6
7
8
9
10
11
12
typedef struct dict {
//字典类型
dictType *type;
//私有数据
void *privdata;
//两个hash表
dictht ht[2];
//rehash进度,-1表示没有rehash
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
//当前迭代器数
unsigned long iterators; /* number of iterators currently running */
} dict;

每个RedisDB对应着一个Dict,每个Dict声明了两个Dictht,但是只会用到Dictht[0],Dictht[1]是用来Rehash使用的

Dictht

表示一个hash表

1
2
3
4
5
6
7
8
9
10
11
12
/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
//哈希表数组
dictEntry **table;
//哈希表大小
unsigned long size;
//哈希表大小掩码
unsigned long sizemask;
//哈希表已使用的大小
unsigned long used;
} dictht;

每个Dictht对应着一个DictEntry[],这个DictEntry[]在初始化的时候默认大小为4

DictEntry[]

表示一个hash桶数组,对应到一个Dictht,存储着一组DictEntry指针

DictEntry

表示一个hash桶

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
typedef struct dictEntry {
//键
void *key;
//值
union {
//自定义类型
void *val;
//无符号整型
uint64_t u64;
//有符号整型
int64_t s64;
//浮点类型
double d;
} v;
//下一个节点
struct dictEntry *next;
} dictEntry;

可以看到,redis解决hash冲突的方式也是使用数组+链表的方式,使用*next存储冲突的节点

Rehash

对于Rehash,redis里的注释内容为

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/* Using dictEnableResize() / dictDisableResize() we make possible to
* enable/disable resizing of the hash table as needed. This is very important
* for Redis, as we use copy-on-write and don't want to move too much memory
* around when there is a child performing saving operations.
*
* Note that even when dict_can_resize is set to 0, not all resizes are
* prevented: a hash table is still allowed to grow if the ratio between
* the number of elements and the buckets > dict_force_resize_ratio. */

/**
* 使用dictEnableResize() / dictDisableResize()我们可以根据自己的需要禁用或者启用哈希表调整
* 这对redis来说非常重要
* 我们在使用copy-on-write的时候不想在写入的同时进行过多的内存移动
* 请注意,即使dict_can_resize设置为0,并非所有调整都被阻止:
* 如果元素数和桶数之间的比率> dict_force_resize_ratio,仍然允许哈希表增长
*/

而默认情况下,是否允许hash以及默认比率的设置为

1
2
3
4
//开启
static int dict_can_resize = 1;
//元素数和桶数的比率
static unsigned int dict_force_resize_ratio = 5;

rehash的源码在dict.c中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{
//如果正在rehash,返回
/* Incremental rehashing already in progress. Return. */
if (dictIsRehashing(d)) return DICT_OK;

//ht[0]为空,则初始化ht[0]并返回
/* If the hash table is empty expand it to the initial size. */
if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

//满足条件A:ht[0]的used>=size>1 并且 B:dict_can_resize=1或者ht[0]的used/size>5
/* If we reached the 1:1 ratio, and we are allowed to resize the hash
* table (global setting) or we should avoid it but the ratio between
* elements/buckets is over the "safe" threshold, we resize doubling
* the number of buckets. */
if (d->ht[0].used >= d->ht[0].size &&
(dict_can_resize ||
d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
{
//扩展后的大小是ht[0]的used两倍
return dictExpand(d, d->ht[0].used*2);
}
return DICT_OK;
}