/* Loop up to N times: * If too many items are in HOT_LRU, push to COLD_LRU * If too many items are in WARM_LRU, push to COLD_LRU * If too many items are in COLD_LRU, poke COLD_LRU tail * 1000 loops with 1ms min sleep gives us under 1m items shifted/sec. The * locks can't handle much more than that. Leaving a TODO for how to * autoadjust in the future. */ static int lru_maintainer_juggle(const int slabs_clsid) { int i; int did_moves = 0; bool mem_limit_reached = false; uint64_t total_bytes = 0; unsigned int chunks_perslab = 0; unsigned int chunks_free = 0; /* TODO: if free_chunks below high watermark, increase aggressiveness */ chunks_free = slabs_available_chunks(slabs_clsid, &mem_limit_reached, &total_bytes, &chunks_perslab); if (settings.expirezero_does_not_evict) total_bytes -= noexp_lru_size(slabs_clsid); /* If slab automove is enabled on any level, and we have more than 2 pages * worth of chunks free in this class, ask (gently) to reassign a page * from this class back into the global pool (0) */ if (settings.slab_automove > 0 && chunks_free > (chunks_perslab * 2.5)) { slabs_reassign(slabs_clsid, SLAB_GLOBAL_PAGE_POOL); } /* Juggle HOT/WARM up to N times */ for (i = 0; i < 1000; i++) { int do_more = 0; if (lru_pull_tail(slabs_clsid, HOT_LRU, total_bytes, LRU_PULL_CRAWL_BLOCKS) || lru_pull_tail(slabs_clsid, WARM_LRU, total_bytes, LRU_PULL_CRAWL_BLOCKS)) { do_more++; } do_more += lru_pull_tail(slabs_clsid, COLD_LRU, total_bytes, LRU_PULL_CRAWL_BLOCKS); if (do_more == 0) break; did_moves++; } return did_moves; }
static void *storage_write_thread(void *arg) { void *storage = arg; // NOTE: ignoring overflow since that would take years of uptime in a // specific load pattern of never going to sleep. unsigned int backoff[MAX_NUMBER_OF_SLAB_CLASSES] = {0}; unsigned int counter = 0; useconds_t to_sleep = WRITE_SLEEP_MIN; logger *l = logger_create(); if (l == NULL) { fprintf(stderr, "Failed to allocate logger for storage compaction thread\n"); abort(); } pthread_mutex_lock(&storage_write_plock); while (1) { // cache per-loop to avoid calls to the slabs_clsid() search loop int min_class = slabs_clsid(settings.ext_item_size); bool do_sleep = true; counter++; if (to_sleep > WRITE_SLEEP_MAX) to_sleep = WRITE_SLEEP_MAX; for (int x = 0; x < MAX_NUMBER_OF_SLAB_CLASSES; x++) { bool did_move = false; bool mem_limit_reached = false; unsigned int chunks_free; int item_age; int target = settings.ext_free_memchunks[x]; if (min_class > x || (backoff[x] && (counter % backoff[x] != 0))) { // Long sleeps means we should retry classes sooner. if (to_sleep > WRITE_SLEEP_MIN * 10) backoff[x] /= 2; continue; } // Avoid extra slab lock calls during heavy writing. chunks_free = slabs_available_chunks(x, &mem_limit_reached, NULL, NULL); // storage_write() will fail and cut loop after filling write buffer. while (1) { // if we are low on chunks and no spare, push out early. if (chunks_free < target && mem_limit_reached) { item_age = 0; } else { item_age = settings.ext_item_age; } if (storage_write(storage, x, item_age)) { chunks_free++; // Allow stopping if we've done enough this loop did_move = true; do_sleep = false; if (to_sleep > WRITE_SLEEP_MIN) to_sleep /= 2; } else { break; } } if (!did_move) { backoff[x]++; } else if (backoff[x]) { backoff[x] /= 2; } } // flip lock so we can be paused or stopped pthread_mutex_unlock(&storage_write_plock); if (do_sleep) { usleep(to_sleep); to_sleep *= 2; } pthread_mutex_lock(&storage_write_plock); } return NULL; }