/** wrapper around assoc_find which does the lazy expiration logic */ hash_item *do_item_get(struct default_engine *engine, const hash_key *key) { rel_time_t current_time = engine->server.core->get_current_time(); hash_item *it = assoc_find(engine, crc32c(hash_key_get_key(key), hash_key_get_key_len(key), 0), key); int was_found = 0; if (engine->config.verbose > 2) { EXTENSION_LOGGER_DESCRIPTOR *logger; logger = (void*)engine->server.extension->get_extension(EXTENSION_LOGGER); if (it == NULL) { logger->log(EXTENSION_LOG_DEBUG, NULL, "> NOT FOUND in bucket %d, %s", hash_key_get_bucket_index(key), hash_key_get_client_key(key)); } else { logger->log(EXTENSION_LOG_DEBUG, NULL, "> FOUND KEY in bucket %d, %s", hash_key_get_bucket_index(item_get_key(it)), hash_key_get_client_key(item_get_key(it))); was_found++; } } if (it != NULL && engine->config.oldest_live != 0 && engine->config.oldest_live <= current_time && it->time <= engine->config.oldest_live) { do_item_unlink(engine, it); /* MTSAFE - items.lock held */ it = NULL; } if (it == NULL && was_found) { EXTENSION_LOGGER_DESCRIPTOR *logger; logger = (void*)engine->server.extension->get_extension(EXTENSION_LOGGER); logger->log(EXTENSION_LOG_DEBUG, NULL, " -nuked by flush"); was_found--; } if (it != NULL && it->exptime != 0 && it->exptime <= current_time) { do_item_unlink(engine, it); /* MTSAFE - items.lock held */ it = NULL; } if (it == NULL && was_found) { EXTENSION_LOGGER_DESCRIPTOR *logger; logger = (void*)engine->server.extension->get_extension(EXTENSION_LOGGER); logger->log(EXTENSION_LOG_DEBUG, NULL, " -nuked by expire"); was_found--; } if (it != NULL) { it->refcount++; DEBUG_REFCNT(it, '+'); do_item_update(engine, it); } return it; }
/* * Moves an item to the back of the LRU queue. */ void item_update(item *item) { uint32_t hv; hv = hash(ITEM_key(item), item->nkey, 0); item_lock(hv); do_item_update(item); item_unlock(hv); }
void LRU_list::item_update(base_item* item) { uint32_t hv; hv = HashTable::hash(item->data, item->nkey); //哈希局部锁 hashtable.hash_lock(hv); do_item_update(item); hashtable.hash_unlock(hv); }
/* * allocate all memory with small and large chunks. link them such the * allocation of a large object will start evicting small chunks but then stop * because the large chunk LRU has an older item. this covers part of case 3 * and part of case 4 for the small item alloc in flat_storage_lru_evict(..). */ static int mixed_items_release_small_and_large_items_scan_stop_test(int verbose) { typedef struct { item* it; char key[KEY_MAX_LENGTH]; uint8_t klen; } mixed_items_release_one_small_item_t; size_t num_small_objects = (fsi.large_free_list_sz / 2) * SMALL_CHUNKS_PER_LARGE_CHUNK; /* this is not the same as fsi.large_free_list_sz / 2 due to rounding. */ size_t num_large_objects = fsi.large_free_list_sz - (fsi.large_free_list_sz / 2); mixed_items_release_one_small_item_t* large_items = malloc(sizeof(mixed_items_release_one_small_item_t) * num_large_objects); mixed_items_release_one_small_item_t* small_items = malloc(sizeof(mixed_items_release_one_small_item_t) * num_small_objects); item* lru_trigger; size_t max_small_key_size = SMALL_TITLE_CHUNK_DATA_SZ; size_t min_size_for_large_chunk = ( sizeof( ((small_title_chunk_t*) 0)->data ) ) + ( (SMALL_CHUNKS_PER_LARGE_CHUNK - 1) * sizeof( ((small_body_chunk_t*) 0)->data ) ) + 1; size_t i; char key[KEY_MAX_LENGTH]; size_t klen; size_t large_free_list_sz = fsi.large_free_list_sz, small_free_list_sz = fsi.small_free_list_sz; V_PRINTF(1, " * %s\n", __FUNCTION__); TASSERT(fsi.large_free_list_sz != 0); TASSERT(fsi.small_free_list_sz == 0); for (i = 0; i < num_small_objects; i ++) { V_PRINTF(2, "\r * allocating small object %lu", i); V_FLUSH(2); do { small_items[i].klen = make_random_key(small_items[i].key, max_small_key_size, true); } while (assoc_find(small_items[i].key, small_items[i].klen)); small_items[i].it = do_item_alloc(small_items[i].key, small_items[i].klen, FLAGS, 0, 0, addr); TASSERT(small_items[i].it); TASSERT(is_item_large_chunk(small_items[i].it) == 0); do_item_link(small_items[i].it, small_items[i].key); } V_PRINTF(2, "\n"); for (i = 0; i < num_large_objects; i ++) { V_PRINTF(2, "\r * allocating large object %lu", i); V_FLUSH(2); do { large_items[i].klen = make_random_key(large_items[i].key, KEY_MAX_LENGTH, true); } while (assoc_find(large_items[i].key, large_items[i].klen)); large_items[i].it = do_item_alloc(large_items[i].key, large_items[i].klen, FLAGS, 0, min_size_for_large_chunk - large_items[i].klen, addr); TASSERT(large_items[i].it); TASSERT(is_item_large_chunk(large_items[i].it)); do_item_link(large_items[i].it, large_items[i].key); } V_PRINTF(2, "\n"); TASSERT(fsi.large_free_list_sz == 0 && fsi.small_free_list_sz == 0); V_LPRINTF(2, "update items\n"); /* update the objects we want to clobber *first*. but since ties go to the * large item, we need to bump the time stamp to ensure the small item is * released first. */ current_time += ITEM_UPDATE_INTERVAL + 1; /* initial bump to ensure that * LRU reordering takes place. */ do_item_update(small_items[0].it); current_time += 1; do_item_update(large_items[0].it); /* bump the timestamp and add the remaining items. */ current_time += 1; for (i = 1; i < num_small_objects; i ++) { do_item_update(small_items[i].it); } for (i = 1; i < num_large_objects; i ++) { do_item_update(large_items[i].it); } V_LPRINTF(2, "dereferencing objects\n"); for (i = 0; i < num_small_objects; i ++) { do_item_deref(small_items[i].it); } for (i = 0; i < num_large_objects; i ++) { do_item_deref(large_items[i].it); } V_LPRINTF(2, "alloc after deref\n"); do { klen = make_random_key(key, max_small_key_size, true); } while (assoc_find(key, klen)); lru_trigger = do_item_alloc(key, klen, FLAGS, 0, LARGE_TITLE_CHUNK_DATA_SZ - klen, addr); TASSERT(lru_trigger != NULL); TASSERT(is_item_large_chunk(lru_trigger)); V_LPRINTF(2, "search for evicted objects\n"); TASSERT(assoc_find(small_items[0].key, small_items[0].klen) == NULL); TASSERT(assoc_find(large_items[0].key, large_items[0].klen) == NULL); V_LPRINTF(2, "ensuring that objects that shouldn't be evicted are still present\n"); for (i = 1; i < num_small_objects; i ++) { TASSERT(assoc_find(small_items[i].key, small_items[i].klen)); } for (i = 1; i < num_large_objects; i ++) { TASSERT(assoc_find(large_items[i].key, large_items[i].klen)); } V_LPRINTF(2, "cleanup objects\n"); for (i = 1; i < num_small_objects; i ++) { do_item_unlink(small_items[i].it, UNLINK_NORMAL, small_items[i].key); } for (i = 1; i < num_large_objects; i ++) { do_item_unlink(large_items[i].it, UNLINK_NORMAL, large_items[i].key); } do_item_deref(lru_trigger); TASSERT(fsi.large_free_list_sz == large_free_list_sz && fsi.small_free_list_sz == small_free_list_sz); return 0; }
/* * allocate all memory with small items. allocate one large object that can be * covered by the release of small items, but also requires the migration of * single chunk items. this covers part of case 1 for the large item alloc in * flat_storage_lru_evict(..). */ static int all_small_items_migrate_small_single_chunk_items_test(int verbose) { typedef struct { item* it; char key[KEY_MAX_LENGTH]; uint8_t klen; } test_keys_t; size_t num_objects = fsi.large_free_list_sz * SMALL_CHUNKS_PER_LARGE_CHUNK; test_keys_t* items = malloc(sizeof(test_keys_t) * num_objects); item* lru_trigger; size_t max_small_key_size = SMALL_TITLE_CHUNK_DATA_SZ; size_t min_size_for_large_chunk = ( sizeof( ((small_title_chunk_t*) 0)->data ) ) + ( (SMALL_CHUNKS_PER_LARGE_CHUNK - 1) * sizeof( ((small_body_chunk_t*) 0)->data ) ) + 1; size_t i; char key[KEY_MAX_LENGTH]; size_t klen; size_t large_free_list_sz = fsi.large_free_list_sz, small_free_list_sz = fsi.small_free_list_sz; V_PRINTF(1, " * %s\n", __FUNCTION__); TASSERT(fsi.large_free_list_sz != 0); TASSERT(fsi.small_free_list_sz == 0); for (i = 0; i < num_objects; i ++) { V_PRINTF(2, "\r * allocating small object %lu", i); V_FLUSH(2); do { items[i].klen = make_random_key(items[i].key, max_small_key_size, true); } while (assoc_find(items[i].key, items[i].klen)); items[i].it = do_item_alloc(items[i].key, items[i].klen, FLAGS, 0, 0, addr); TASSERT(items[i].it); TASSERT(is_item_large_chunk(items[i].it) == 0); do_item_link(items[i].it, items[i].key); } V_PRINTF(2, "\n"); TASSERT(fsi.large_free_list_sz == 0 && fsi.small_free_list_sz == 0); /* access items we don't want to move. */ current_time += ITEM_UPDATE_INTERVAL + 1; /* touch every other item. the ones that are not touched in (0, * SMALL_CHUNKS_PER_LARGE_CHUNK * 2) will be evicted. */ for (i = 0; i < SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i += 2) { do_item_update(items[i].it); } /* touch remaining items */ for (i = SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i < num_objects; i ++) { do_item_update(items[i].it); } V_LPRINTF(2, "dereferencing objects\n"); for (i = 0; i < num_objects; i ++) { do_item_deref(items[i].it); } V_LPRINTF(2, "alloc after deref\n"); do { klen = make_random_key(key, max_small_key_size, true); } while (assoc_find(key, klen)); lru_trigger = do_item_alloc(key, klen, FLAGS, 0, min_size_for_large_chunk - klen, addr); TASSERT(lru_trigger != NULL); V_LPRINTF(2, "search for evicted object\n"); for (i = 1; i < SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i += 2) { TASSERT(assoc_find(items[i].key, items[i].klen) == NULL); } V_LPRINTF(2, "ensuring that objects that shouldn't be evicted are still present\n"); for (i = 0; i < SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i += 2) { /* these may have been moved. */ TASSERT((items[i].it = assoc_find(items[i].key, items[i].klen))); } for (i = SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i < num_objects; i ++) { TASSERT(assoc_find(items[i].key, items[i].klen)); } V_LPRINTF(2, "cleanup objects\n"); for (i = 0; i < SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i += 2) { do_item_unlink(items[i].it, UNLINK_NORMAL, items[i].key); } for (i = SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i < num_objects; i ++) { do_item_unlink(items[i].it, UNLINK_NORMAL, items[i].key); } do_item_deref(lru_trigger); TASSERT(fsi.large_free_list_sz == large_free_list_sz && fsi.small_free_list_sz == small_free_list_sz); return 0; }
enum store_item_type do_store_item ( item *it, int comm, const uint32_t hv ) { char *key = ITEM_key (it); item *old_it = do_item_get (key, it->nkey, hv); enum store_item_type stored = NOT_STORED; item *new_it = NULL; int flags; if ( old_it != NULL && comm == NREAD_ADD ) { do_item_update (old_it); } else if ( ! old_it && ( comm == NREAD_REPLACE || comm == NREAD_APPEND || comm == NREAD_PREPEND ) ) { } else { if ( comm == NREAD_APPEND || comm == NREAD_PREPEND ) { if ( stored == NOT_STORED ) { flags = ( int ) strtol (ITEM_suffix (old_it), ( char ** ) NULL, 10); new_it = do_item_alloc (key, it->nkey, flags, old_it->exptime, ITEM_data (it), it->nbytes + old_it->nbytes - 2, hv); if ( ! new_it ) { if ( old_it ) do_item_remove (old_it); return NOT_STORED; } if ( comm == NREAD_APPEND ) { memcpy (ITEM_data (new_it), ITEM_data (old_it), old_it->nbytes); memcpy (ITEM_data (new_it) + old_it->nbytes - 2, ITEM_data (it), it->nbytes); } else { memcpy (ITEM_data (new_it), ITEM_data (it), it->nbytes); memcpy (ITEM_data (new_it) + it->nbytes - 2, ITEM_data (old_it), old_it->nbytes); } it = new_it; } } if ( stored == NOT_STORED ) { if ( old_it != NULL ) { item_replace (old_it, it, hv); } else { do_item_link (it, hv); } stored = STORED; } } if ( old_it != NULL ) { do_item_remove (old_it); } if ( new_it != NULL ) { do_item_remove (new_it); } return stored; }
/* * Moves an item to the back of the LRU queue. */ void item_update(item *item) { pthread_mutex_lock(&cache_lock); do_item_update(item); pthread_mutex_unlock(&cache_lock); }
int LRU_list::do_store_item(base_item* it, Conn* c, uint32_t hv) { char* key = it->data; //获取旧的数据项 base_item* old_it = do_item_get(key, it->nkey, hv); int store_stat = LRU_list::NOT_STORED; base_item* new_it = 0; int flags = 0; //已经有该项item存在 if (old_it != 0 && c->cmd == NREAD_ADD) { /* * 更新当前item目的 * 1.更新时间,重建LRU链 * 2.后面执行do_item_remove,每次remove会把refcount引用计数减一 * 如果引用计数=1则被删除,重建之后refcount为2 * */ do_item_update(old_it); //旧的item不存在 }else if (!old_it && (c->cmd == NREAD_REPLACE || c->cmd == NREAD_APPEND || c->cmd == NREAD_PREPEND)) { //什么也不做,因为只有replace替换已有值 }else if (c->cmd == NREAD_CAS) { //不存在此项 if (old_it == 0) { store_stat = LRU_list::NOT_FOUND; } if (it->cas == old_it->cas) { item_replace(old_it, it, hv); store_stat = LRU_list::STORED; } else { if (mem_setting.verbose > 1) { std::cerr << "CAS: failure: expected " << old_it->get_cas() << " but got " << it->cas; } store_stat = LRU_list::EXISTS; } } else { //与上面第二个判断不同,这里是旧的item存在的replace append prepend set命令 if (c->cmd == NREAD_APPEND || c->cmd == NREAD_PREPEND) { //if (it->cas != 0) { if (it->cas != old_it->cas) { store_stat = LRU_list::EXISTS; } //} if (store_stat == LRU_list::NOT_STORED) { new_it = do_item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2, hv); //分配失败 if (new_it == 0) { if (old_it != 0) do_item_remove(old_it); return LRU_list::NOT_STORED; } new_it->nkey = old_it->nkey; new_it->item_flag = flags; new_it->exptime = old_it->exptime; new_it->nbytes = it->nbytes + old_it->nbytes - 2; memcpy(new_it->data, old_it->data, old_it->nkey); new_it->data[old_it->nkey] = '0'; //copy数据 if (c->cmd == NREAD_APPEND) { memcpy(new_it->real_data_addr(), old_it->real_data_addr(), old_it->nbytes); memcpy(new_it->real_data_addr() + old_it->nbytes - 2/*\r\n*/, it->real_data_addr(), it->nbytes); } else { //NREAD_PREPEND memcpy(new_it->real_data_addr(), it->real_data_addr(), it->nbytes); memcpy(new_it->real_data_addr() + it->nbytes - 2, old_it->real_data_addr(), old_it->nbytes); } it = new_it; } } if (store_stat == LRU_list::NOT_STORED) { it->cas++; if (old_it != 0) item_replace(old_it, it, hv); else //set a new key-value do_item_link(it, hv); store_stat = LRU_list::STORED; } } if (old_it != 0) do_item_remove(old_it); if (new_it != 0) do_item_remove(new_it); if (store_stat == LRU_list::STORED) { c->cas = it->get_cas(); } return store_stat; }
/* * Stores an item in the cache according to the semantics of one of the set * commands. In threaded mode, this is protected by the cache lock. * * Returns the state of storage. */ static ENGINE_ERROR_CODE do_store_item(struct default_engine *engine, hash_item *it, uint64_t *cas, ENGINE_STORE_OPERATION operation, const void *cookie) { const char *key = item_get_key(it); hash_item *old_it = do_item_get(engine, key, it->nkey); ENGINE_ERROR_CODE stored = ENGINE_NOT_STORED; hash_item *new_it = NULL; if (old_it != NULL && operation == OPERATION_ADD) { /* add only adds a nonexistent item, but promote to head of LRU */ do_item_update(engine, old_it); } else if (!old_it && (operation == OPERATION_REPLACE || operation == OPERATION_APPEND || operation == OPERATION_PREPEND)) { /* replace only replaces an existing value; don't store */ } else if (operation == OPERATION_CAS) { /* validate cas operation */ if(old_it == NULL) { // LRU expired stored = ENGINE_KEY_ENOENT; } else if (item_get_cas(it) == item_get_cas(old_it)) { // cas validates // it and old_it may belong to different classes. // I'm updating the stats for the one that's getting pushed out do_item_replace(engine, old_it, it); stored = ENGINE_SUCCESS; } else { if (engine->config.verbose > 1) { EXTENSION_LOGGER_DESCRIPTOR *logger; logger = (void*)engine->server.extension->get_extension(EXTENSION_LOGGER); logger->log(EXTENSION_LOG_INFO, NULL, "CAS: failure: expected %"PRIu64", got %"PRIu64"\n", item_get_cas(old_it), item_get_cas(it)); } stored = ENGINE_KEY_EEXISTS; } } else { /* * Append - combine new and old record into single one. Here it's * atomic and thread-safe. */ if (operation == OPERATION_APPEND || operation == OPERATION_PREPEND) { /* * Validate CAS */ if (item_get_cas(it) != 0) { // CAS much be equal if (item_get_cas(it) != item_get_cas(old_it)) { stored = ENGINE_KEY_EEXISTS; } } if (stored == ENGINE_NOT_STORED) { /* we have it and old_it here - alloc memory to hold both */ new_it = do_item_alloc(engine, key, it->nkey, old_it->flags, old_it->exptime, it->nbytes + old_it->nbytes, cookie); if (new_it == NULL) { /* SERVER_ERROR out of memory */ if (old_it != NULL) { do_item_release(engine, old_it); } return ENGINE_NOT_STORED; } /* copy data from it and old_it to new_it */ if (operation == OPERATION_APPEND) { memcpy(item_get_data(new_it), item_get_data(old_it), old_it->nbytes); memcpy(item_get_data(new_it) + old_it->nbytes, item_get_data(it), it->nbytes); } else { /* OPERATION_PREPEND */ memcpy(item_get_data(new_it), item_get_data(it), it->nbytes); memcpy(item_get_data(new_it) + it->nbytes, item_get_data(old_it), old_it->nbytes); } it = new_it; } } if (stored == ENGINE_NOT_STORED) { if (old_it != NULL) { do_item_replace(engine, old_it, it); } else { do_item_link(engine, it); } *cas = item_get_cas(it); stored = ENGINE_SUCCESS; } } if (old_it != NULL) { do_item_release(engine, old_it); /* release our reference */ } if (new_it != NULL) { do_item_release(engine, new_it); } if (stored == ENGINE_SUCCESS) { *cas = item_get_cas(it); } return stored; }