static void *assoc_maintenance_thread(void *arg) { while (do_run_maintenance_thread) { int ii = 0; /* Lock the cache, and bulk move multiple buckets to the new * hash table. */ item_lock_global(); mutex_lock(&cache_lock); for (ii = 0; ii < hash_bulk_move && expanding; ++ii) { item *it, *next; int bucket; for (it = old_hashtable[expand_bucket]; NULL != it; it = next) { next = it->h_next; bucket = hash(ITEM_key(it), it->nkey, 0) & hashmask(hashpower); it->h_next = primary_hashtable[bucket]; primary_hashtable[bucket] = it; } old_hashtable[expand_bucket] = NULL; expand_bucket++; if (expand_bucket == hashsize(hashpower - 1)) { expanding = false; free(old_hashtable); STATS_LOCK(); stats.hash_bytes -= hashsize(hashpower - 1) * sizeof(void *); stats.hash_is_expanding = 0; STATS_UNLOCK(); if (settings.verbose > 1) fprintf(stderr, "Hash table expansion done\n"); } } mutex_unlock(&cache_lock); item_unlock_global(); if (!expanding) { /* finished expanding. tell all threads to use fine-grained locks */ switch_item_lock_type(ITEM_LOCK_GRANULAR); slabs_rebalancer_resume(); /* We are done expanding.. just wait for next invocation */ mutex_lock(&cache_lock); started_expanding = false; pthread_cond_wait(&maintenance_cond, &cache_lock); /* Before doing anything, tell threads to use a global lock */ mutex_unlock(&cache_lock); slabs_rebalancer_pause(); switch_item_lock_type(ITEM_LOCK_GLOBAL); mutex_lock(&cache_lock); assoc_expand(); mutex_unlock(&cache_lock); } } return NULL; }
/* Must not be called with any deeper locks held */ void pause_threads(enum pause_thread_types type) { char buf[1]; int i; buf[0] = 0; switch (type) { case PAUSE_ALL_THREADS: lru_maintainer_pause(); slabs_rebalancer_pause(); lru_crawler_pause(); #ifdef EXTSTORE storage_compact_pause(); #endif case PAUSE_WORKER_THREADS: buf[0] = 'p'; pthread_mutex_lock(&worker_hang_lock); break; case RESUME_ALL_THREADS: lru_maintainer_resume(); slabs_rebalancer_resume(); lru_crawler_resume(); #ifdef EXTSTORE storage_compact_resume(); #endif case RESUME_WORKER_THREADS: pthread_mutex_unlock(&worker_hang_lock); break; default: fprintf(stderr, "Unknown lock type: %d\n", type); assert(1 == 0); break; } /* Only send a message if we have one. */ if (buf[0] == 0) { return; } pthread_mutex_lock(&init_lock); init_count = 0; for (i = 0; i < settings.num_threads; i++) { if (write(threads[i].notify_send_fd, buf, 1) != 1) { perror("Failed writing to notify pipe"); /* TODO: This is a fatal problem. Can it ever happen temporarily? */ } } wait_for_thread_registration(settings.num_threads); pthread_mutex_unlock(&init_lock); }
/* * Dump schema requires assoc expansion to be locked - * it walks throught it's buckets while saving. */ static void on_sigdump(int evfd, short ev, void *arg) { FILE *f; char tmpname[1024]; bool ok; fprintf(stderr, "Dump cache content to %s\n", settings.dump_file); if (snprintf(tmpname, sizeof(tmpname), "%s.tmp", settings.dump_file) >= sizeof(tmpname)) { return; } f = fopen(tmpname, "w"); if (!f) { fprintf(stderr, "Failed to open file %s: %s\n", tmpname, strerror(errno)); return; } /* Before doing anything, tell threads to use a global lock */ slabs_rebalancer_pause(); switch_item_lock_type(ITEM_LOCK_GLOBAL); /* dump-to-disk walks through assoc array, so lock expansion */ pthread_mutex_lock(&assoc_expansion_lock); ok = dd_dump(f); fclose(f); if (ok) { fprintf(stderr, "Moving temprorary %s -> %s\n", tmpname, settings.dump_file); if (rename(tmpname, settings.dump_file) == -1) { fprintf(stderr, "Failed to rename %s to %s: %s\n", tmpname, settings.dump_file, strerror(errno)); } } else { fprintf(stderr, "Failed to dump file to %s: %s", tmpname, strerror(errno)); } switch_item_lock_type(ITEM_LOCK_GRANULAR); slabs_rebalancer_resume(); pthread_mutex_unlock(&assoc_expansion_lock); }
//启动扩容线程,扩容线程在main函数中会启动,启动运行一遍之后会阻塞在条件变量maintenance_cond上面, //当插入元素超过规定,唤醒条件变量,再次运行 static void *assoc_maintenance_thread(void *arg) { //do_run_maintenance_thread是全局变量,初始值为1 //在stop_assoc_maintenance_thread函数中会被赋值0,用来终止迁移线程 while (do_run_maintenance_thread) { int ii = 0; /* Lock the cache, and bulk move multiple buckets to the new * hash table. */ item_lock_global(); mutex_lock(&cache_lock); //hash_bulk_move用来控制每次迁移,移动多少个桶的item。默认是一个. //如果expanding为true才会进入循环体,所以迁移线程刚创建的时候,并不会进入循环体 for (ii = 0; ii < hash_bulk_move && expanding; ++ii) { item *it, *next; int bucket; //遍历旧哈希表中由expand_bucket指明的桶,将该桶的所有item迁移到新扩容的哈希表中 for (it = old_hashtable[expand_bucket]; NULL != it; it = next) { next = it->h_next; bucket = hash(ITEM_key(it), it->nkey, 0) & hashmask(hashpower); it->h_next = primary_hashtable[bucket]; primary_hashtable[bucket] = it; } old_hashtable[expand_bucket] = NULL; //迁移完一个桶,接着把expand_bucket指向下一个待迁移的桶 expand_bucket++; //数据迁移完毕 if (expand_bucket == hashsize(hashpower - 1)) { expanding = false; free(old_hashtable); STATS_LOCK(); stats.hash_bytes -= hashsize(hashpower - 1) * sizeof(void *); stats.hash_is_expanding = 0; STATS_UNLOCK(); if (settings.verbose > 1) fprintf(stderr, "Hash table expansion done\n"); } } //释放锁 mutex_unlock(&cache_lock); item_unlock_global(); //不再迁移数据 if (!expanding) { /* finished expanding. tell all threads to use fine-grained locks */ switch_item_lock_type(ITEM_LOCK_GRANULAR); slabs_rebalancer_resume(); /* We are done expanding.. just wait for next invocation */ mutex_lock(&cache_lock); started_expanding = false; //挂起迁移线程,直到worker线程插入数据后发现item数量已经到了1.5倍哈希表大小, //此时调用worker线程调用assoc_start_expand函数,该函数会调用pthread_cond_signal //唤醒迁移线程 pthread_cond_wait(&maintenance_cond, &cache_lock); /* Before doing anything, tell threads to use a global lock */ mutex_unlock(&cache_lock); slabs_rebalancer_pause(); switch_item_lock_type(ITEM_LOCK_GLOBAL); mutex_lock(&cache_lock); assoc_expand();//??? mutex_unlock(&cache_lock); } } return NULL; }
static void slabs_rebalancer_pause_safe(){ slabs_rebalancer_pause(); first_call_assoc_maintenance_thread = false; }
//数据迁移线程回调函数 static void *assoc_maintenance_thread(void *arg) { //do_run_maintenance_thread 是全局变量,初始值为1,在stop_assoc_mainternance_thread //函数中会被赋值0,之中迁移线程 while (do_run_maintenance_thread) { int ii = 0; /* Lock the cache, and bulk move multiple buckets to the new * hash table. */ //上锁 item_lock_global();//锁上全局级别的锁,全部的item都在全局锁的控制之下 //锁住哈希表里面的item。不然别的线程对哈希表进行增删操作时,会出现 //数据不一致的情况.在item.c的do_item_link和do_item_unlink可以看到 //其内部也会锁住cache_lock锁. mutex_lock(&cache_lock); //进行item迁移 for (ii = 0; ii < hash_bulk_move && expanding; ++ii) { item *it, *next; int bucket; for (it = old_hashtable[expand_bucket]; NULL != it; it = next) { next = it->h_next; bucket = hash(ITEM_key(it), it->nkey) & hashmask(hashpower); it->h_next = primary_hashtable[bucket]; primary_hashtable[bucket] = it; } old_hashtable[expand_bucket] = NULL; expand_bucket++; if (expand_bucket == hashsize(hashpower - 1)) { expanding = false; free(old_hashtable); STATS_LOCK(); stats.hash_bytes -= hashsize(hashpower - 1) * sizeof(void *); stats.hash_is_expanding = 0; STATS_UNLOCK(); if (settings.verbose > 1) fprintf(stderr, "Hash table expansion done\n"); } } //遍历完就释放锁 mutex_unlock(&cache_lock); item_unlock_global(); //不需要迁移数据了 if (!expanding) { /* 迁移线程为什么要这么迂回曲折地切换workers线程的锁类型呢?直接修改所有线程的LIBEVENT_THREAD结构的item_lock_type 成员变量不就行了吗? 这主要是因为迁移线程不知道worker线程此刻在干些什么。如果worker线程正在访问item,并抢占了段级别锁。此时你把worker 线程的锁切换到全局锁,等worker线程解锁的时候就会解全局锁(参考前面的item_lock和item_unlock代码),这样程序就崩溃了。 所以不能迁移线程去切换,只能迁移线程通知worker线程,然后worker线程自己去切换。当然是要worker线程忙完了手头上的事情 后,才会去修改切换的。所以迁移线程在通知完所有的worker线程后,会调用wait_for_thread_registration函数休眠等待所有的 worker线程都切换到指定的锁类型后才醒来。 */ /* finished expanding. tell all threads to use fine-grained locks */ //进入到这里,说明已经不需要迁移数据(停止扩展了)。 //告诉所有的workers线程,访问item时,切换到段级别的锁。 //会阻塞到所有workers线程都切换到段级别的锁 switch_item_lock_type(ITEM_LOCK_GRANULAR); slabs_rebalancer_resume(); /* We are done expanding.. just wait for next invocation */ mutex_lock(&cache_lock); // 重置 started_expanding = false; //挂起迁移线程,直到worker线程插入数据后发现item数量已经到了1.5被哈希表大小, //此时调用worker线程调用assoc_start_expand函数,该函数会调用pthread_cond_signal唤醒迁移线程 pthread_cond_wait(&maintenance_cond, &cache_lock); /* Before doing anything, tell threads to use a global lock */ mutex_unlock(&cache_lock); slabs_rebalancer_pause(); //从maintenance_cond条件变量中醒来,说明又要开始扩展哈希表和迁移数据了。 //迁移线程在迁移一个桶的数据时是锁上全局级别的锁. //此时workers线程不能使用段级别的锁,而是要使用全局级别的锁, //所有的workers线程和迁移线程一起,争抢全局级别的锁. //哪个线程抢到了,才有权利访问item. //下面一行代码就是通知所有的workers线程,把你们访问item的锁切换 //到全局级别的锁。switch_item_lock_type会通过条件变量休眠等待, //直到,所有的workers线程都切换到全局级别的锁,才会醒来过 switch_item_lock_type(ITEM_LOCK_GLOBAL); mutex_lock(&cache_lock); //申请更大的哈希表,并将expanding设置为true assoc_expand(); mutex_unlock(&cache_lock); } } return NULL; }