/* * Allocates a new item. */ item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, char *chksum, int nbytes, int cklen) { #ifdef MOXI_ITEM_MALLOC // Skip past the lock, since we're using malloc. return do_item_alloc(key, nkey, flags, exptime, chksum, nbytes, cklen); #else item *it; pthread_mutex_lock(&cache_lock); it = do_item_alloc(key, nkey, flags, exptime, chksum, nbytes, cklen); pthread_mutex_unlock(&cache_lock); return it; #endif }
/* * adds a delta value to a numeric item. * * c connection requesting the operation * it item to adjust * incr true to increment value, false to decrement * delta amount to adjust value by * @param ritem The resulting item after adding the delta. Only valid if * ENGINE_SUCCESS is returned. Caller is responsible for calling * do_item_release() on this when finished with it. * * returns a response code to send back to the client. */ static ENGINE_ERROR_CODE do_add_delta(struct default_engine *engine, hash_item *it, const bool incr, const int64_t delta, item** ritem, uint64_t *result, const void *cookie) { const char *ptr; uint64_t value; char buf[80]; int res; if (it->nbytes >= (sizeof(buf) - 1)) { return ENGINE_EINVAL; } ptr = item_get_data(it); memcpy(buf, ptr, it->nbytes); buf[it->nbytes] = '\0'; if (!safe_strtoull(buf, &value)) { return ENGINE_EINVAL; } if (incr) { value += delta; } else { if ((uint64_t)delta > value) { value = 0; } else { value -= delta; } } *result = value; res = snprintf(buf, sizeof(buf), "%" PRIu64, value); if (res < 0 || res >= sizeof(buf)) { return ENGINE_EINVAL; } if (it->refcount == 1 && res <= (int)it->nbytes) { /* we can do inline replacement */ memcpy(item_get_data(it), buf, res); memset(item_get_data(it) + res, ' ', it->nbytes - res); item_set_cas(NULL, NULL, it, get_cas_id()); *ritem = it; } else { hash_item *new_it = do_item_alloc(engine, item_get_key(it), it->flags, it->exptime, res, cookie, it->datatype); if (new_it == NULL) { do_item_unlink(engine, it); return ENGINE_ENOMEM; } memcpy(item_get_data(new_it), buf, res); do_item_replace(engine, it, new_it); *ritem = new_it; } return ENGINE_SUCCESS; }
/* * Allocates a new item. */ item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes) { item *it; pthread_mutex_lock(&cache_lock); it = do_item_alloc(key, nkey, flags, exptime, nbytes); pthread_mutex_unlock(&cache_lock); return it; }
//分配一个新的item item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes) { item *it; /* do_item_alloc handles its own locks */ //这里比较特殊,这里把加锁操作放在do_item_alloc中了, //这是由于加锁和逻辑代码耦合,有些锁可能需要在if条件下发生,外部不好加锁 it = do_item_alloc(key, nkey, flags, exptime, nbytes, 0); return it; }
/* * Allocates a new item. */ hash_item *item_alloc(struct default_engine *engine, const void *key, size_t nkey, int flags, rel_time_t exptime, int nbytes, const void *cookie) { hash_item *it; pthread_mutex_lock(&engine->cache_lock); it = do_item_alloc(engine, key, nkey, flags, exptime, nbytes, cookie); pthread_mutex_unlock(&engine->cache_lock); return it; }
/* * Allocates a new item. */ hash_item *item_alloc(struct default_engine *engine, const void *key, size_t nkey, int flags, rel_time_t exptime, int nbytes, const void *cookie, uint8_t datatype) { hash_item *it; hash_key hkey; if (!hash_key_create(&hkey, key, nkey, engine, cookie)) { return NULL; } cb_mutex_enter(&engine->items.lock); it = do_item_alloc(engine, &hkey, flags, exptime, nbytes, cookie, datatype); cb_mutex_exit(&engine->items.lock); hash_key_destroy(&hkey); return it; }
int item_test() { int maxi = 0; //test set. for(int i = 0; i < 10; i++) { char key[1024]; memset(key, 0, 1024); sprintf(key, "charlie_%d", i); const size_t nkey = strlen(key) + 1; const int flags = 0; const time_t exptime = 0; const int nbytes = 1024; uint32_t cur_hv = jenkins_hash((void *)key, nkey); item *it = do_item_alloc((const char *)key, nkey, flags, exptime, nbytes, cur_hv); if(it == NULL) { fprintf(stderr, "\033[31malloc fail\033[0m"); maxi = i; break; } char val[1024]; sprintf(val, "%d", i); memcpy(ITEM_data(it), (void *)&val, strlen(val)+1); } //test get. for(int i = 0; i < 10; ++i) { char key[1024]; memset(key, 0, 1024); sprintf(key, "charlie_%d", i); const size_t nkey = strlen(key) + 1; uint32_t cur_hv = jenkins_hash((void *)key, nkey); item *it = assoc_find(key, nkey, cur_hv); if(it == NULL) { fprintf(stderr, "\033[31mget fail\033[0m"); return -1; } int val = 0; memcpy((void *)&val, ITEM_data(it), sizeof(val)); if(i&0x1) { fprintf(stdout, "del key:%s value:%d\n", ITEM_key(it), val); do_item_unlink(it, cur_hv); lru_traverse(NULL); } } return 0; }
static ENGINE_ERROR_CODE do_arithmetic(struct default_engine *engine, const void* cookie, const hash_key* key, const bool increment, const bool create, const uint64_t delta, const uint64_t initial, const rel_time_t exptime, item **result_item, uint8_t datatype, uint64_t *result) { hash_item *item = do_item_get(engine, key); ENGINE_ERROR_CODE ret; if (item == NULL) { if (!create) { return ENGINE_KEY_ENOENT; } else { char buffer[128]; int len = snprintf(buffer, sizeof(buffer), "%"PRIu64, (uint64_t)initial); if (len < 0 || len >= sizeof(buffer)) { return ENGINE_ENOMEM; } item = do_item_alloc(engine, key, 0, exptime, len, cookie, datatype); if (item == NULL) { return ENGINE_ENOMEM; } memcpy((void*)item_get_data(item), buffer, len); if ((ret = do_store_item(engine, item, OPERATION_ADD, cookie, (hash_item**)result_item)) == ENGINE_SUCCESS) { *result = initial; } else { do_item_release(engine, item); } } } else { ret = do_add_delta(engine, item, increment, delta, result_item, result, cookie); } return ret; }
/* * allocate nearly all memory with small items (all memory - * SMALL_CHUNKS_PER_LARGE_CHUNK - 1). then set it up such that there is only * one item eligible to be freed (i.e., by removing the remaining items from the * LRU. allocate one large object. this will require the migration of one * single chunk item at the LRU head. this covers part of case 1 for the small * item alloc in flat_storage_lru_evict(..). */ static int all_small_items_migrate_small_single_chunk_item_at_lru_head_test(int verbose) { typedef struct { item* it; char key[KEY_MAX_LENGTH]; uint8_t klen; } test_keys_t; size_t num_objects = fsi.large_free_list_sz * SMALL_CHUNKS_PER_LARGE_CHUNK; test_keys_t* items = malloc(sizeof(test_keys_t) * num_objects); item* lru_trigger; size_t max_small_key_size = SMALL_TITLE_CHUNK_DATA_SZ; size_t min_size_for_large_chunk = ( sizeof( ((small_title_chunk_t*) 0)->data ) ) + ( (SMALL_CHUNKS_PER_LARGE_CHUNK - 1) * sizeof( ((small_body_chunk_t*) 0)->data ) ) + 1; size_t i, count; char key[KEY_MAX_LENGTH]; size_t klen; size_t large_free_list_sz = fsi.large_free_list_sz, small_free_list_sz = fsi.small_free_list_sz; V_PRINTF(1, " * %s\n", __FUNCTION__); TASSERT(fsi.large_free_list_sz != 0); TASSERT(fsi.small_free_list_sz == 0); for (i = 0, count = 0; fsi.large_free_list_sz || fsi.small_free_list_sz > SMALL_CHUNKS_PER_LARGE_CHUNK - 1; i ++, count ++) { V_PRINTF(2, "\r * allocating small object %lu", i); V_FLUSH(2); assert(i < num_objects); do { items[i].klen = make_random_key(items[i].key, max_small_key_size, true); } while (assoc_find(items[i].key, items[i].klen)); items[i].it = do_item_alloc(items[i].key, items[i].klen, FLAGS, 0, 0, addr); TASSERT(items[i].it); TASSERT(is_item_large_chunk(items[i].it) == 0); do_item_link(items[i].it, items[i].key); } V_PRINTF(2, "\n"); TASSERT(fsi.large_free_list_sz == 0); TASSERT(fsi.small_free_list_sz == SMALL_CHUNKS_PER_LARGE_CHUNK - 1); // remove all but one item from the LRU. and release our reference to the // item we don't remove from the LRU. for (i = 0; i < count - 1; i ++) { do_item_unlink(items[i].it, UNLINK_NORMAL, items[i].key); } do_item_deref(items[count - 1].it); TASSERT(fsi.lru_head == items[count - 1].it); TASSERT(fsi.large_free_list_sz == 0); TASSERT(fsi.small_free_list_sz == SMALL_CHUNKS_PER_LARGE_CHUNK - 1); V_LPRINTF(2, "alloc\n"); do { klen = make_random_key(key, max_small_key_size, true); } while (assoc_find(key, klen)); lru_trigger = do_item_alloc(key, klen, FLAGS, 0, min_size_for_large_chunk - klen, addr); TASSERT(lru_trigger != NULL); V_LPRINTF(2, "search for evicted object\n"); TASSERT(assoc_find(items[count - 1].key, items[count - 1].klen) == NULL); V_LPRINTF(2, "cleanup objects\n"); for (i = 0; i < count - 1; i ++) { do_item_deref(items[i].it); } do_item_deref(lru_trigger); TASSERT(fsi.large_free_list_sz == large_free_list_sz && fsi.small_free_list_sz == small_free_list_sz); return 0; }
/* * allocate all memory with small items. allocate one large object that can be * covered by the release of small items, but also requires the migration of * single chunk items. this covers part of case 1 for the large item alloc in * flat_storage_lru_evict(..). */ static int all_small_items_migrate_small_single_chunk_items_test(int verbose) { typedef struct { item* it; char key[KEY_MAX_LENGTH]; uint8_t klen; } test_keys_t; size_t num_objects = fsi.large_free_list_sz * SMALL_CHUNKS_PER_LARGE_CHUNK; test_keys_t* items = malloc(sizeof(test_keys_t) * num_objects); item* lru_trigger; size_t max_small_key_size = SMALL_TITLE_CHUNK_DATA_SZ; size_t min_size_for_large_chunk = ( sizeof( ((small_title_chunk_t*) 0)->data ) ) + ( (SMALL_CHUNKS_PER_LARGE_CHUNK - 1) * sizeof( ((small_body_chunk_t*) 0)->data ) ) + 1; size_t i; char key[KEY_MAX_LENGTH]; size_t klen; size_t large_free_list_sz = fsi.large_free_list_sz, small_free_list_sz = fsi.small_free_list_sz; V_PRINTF(1, " * %s\n", __FUNCTION__); TASSERT(fsi.large_free_list_sz != 0); TASSERT(fsi.small_free_list_sz == 0); for (i = 0; i < num_objects; i ++) { V_PRINTF(2, "\r * allocating small object %lu", i); V_FLUSH(2); do { items[i].klen = make_random_key(items[i].key, max_small_key_size, true); } while (assoc_find(items[i].key, items[i].klen)); items[i].it = do_item_alloc(items[i].key, items[i].klen, FLAGS, 0, 0, addr); TASSERT(items[i].it); TASSERT(is_item_large_chunk(items[i].it) == 0); do_item_link(items[i].it, items[i].key); } V_PRINTF(2, "\n"); TASSERT(fsi.large_free_list_sz == 0 && fsi.small_free_list_sz == 0); /* access items we don't want to move. */ current_time += ITEM_UPDATE_INTERVAL + 1; /* touch every other item. the ones that are not touched in (0, * SMALL_CHUNKS_PER_LARGE_CHUNK * 2) will be evicted. */ for (i = 0; i < SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i += 2) { do_item_update(items[i].it); } /* touch remaining items */ for (i = SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i < num_objects; i ++) { do_item_update(items[i].it); } V_LPRINTF(2, "dereferencing objects\n"); for (i = 0; i < num_objects; i ++) { do_item_deref(items[i].it); } V_LPRINTF(2, "alloc after deref\n"); do { klen = make_random_key(key, max_small_key_size, true); } while (assoc_find(key, klen)); lru_trigger = do_item_alloc(key, klen, FLAGS, 0, min_size_for_large_chunk - klen, addr); TASSERT(lru_trigger != NULL); V_LPRINTF(2, "search for evicted object\n"); for (i = 1; i < SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i += 2) { TASSERT(assoc_find(items[i].key, items[i].klen) == NULL); } V_LPRINTF(2, "ensuring that objects that shouldn't be evicted are still present\n"); for (i = 0; i < SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i += 2) { /* these may have been moved. */ TASSERT((items[i].it = assoc_find(items[i].key, items[i].klen))); } for (i = SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i < num_objects; i ++) { TASSERT(assoc_find(items[i].key, items[i].klen)); } V_LPRINTF(2, "cleanup objects\n"); for (i = 0; i < SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i += 2) { do_item_unlink(items[i].it, UNLINK_NORMAL, items[i].key); } for (i = SMALL_CHUNKS_PER_LARGE_CHUNK * 2; i < num_objects; i ++) { do_item_unlink(items[i].it, UNLINK_NORMAL, items[i].key); } do_item_deref(lru_trigger); TASSERT(fsi.large_free_list_sz == large_free_list_sz && fsi.small_free_list_sz == small_free_list_sz); return 0; }
enum store_item_type do_store_item ( item *it, int comm, const uint32_t hv ) { char *key = ITEM_key (it); item *old_it = do_item_get (key, it->nkey, hv); enum store_item_type stored = NOT_STORED; item *new_it = NULL; int flags; if ( old_it != NULL && comm == NREAD_ADD ) { do_item_update (old_it); } else if ( ! old_it && ( comm == NREAD_REPLACE || comm == NREAD_APPEND || comm == NREAD_PREPEND ) ) { } else { if ( comm == NREAD_APPEND || comm == NREAD_PREPEND ) { if ( stored == NOT_STORED ) { flags = ( int ) strtol (ITEM_suffix (old_it), ( char ** ) NULL, 10); new_it = do_item_alloc (key, it->nkey, flags, old_it->exptime, ITEM_data (it), it->nbytes + old_it->nbytes - 2, hv); if ( ! new_it ) { if ( old_it ) do_item_remove (old_it); return NOT_STORED; } if ( comm == NREAD_APPEND ) { memcpy (ITEM_data (new_it), ITEM_data (old_it), old_it->nbytes); memcpy (ITEM_data (new_it) + old_it->nbytes - 2, ITEM_data (it), it->nbytes); } else { memcpy (ITEM_data (new_it), ITEM_data (it), it->nbytes); memcpy (ITEM_data (new_it) + it->nbytes - 2, ITEM_data (old_it), old_it->nbytes); } it = new_it; } } if ( stored == NOT_STORED ) { if ( old_it != NULL ) { item_replace (old_it, it, hv); } else { do_item_link (it, hv); } stored = STORED; } } if ( old_it != NULL ) { do_item_remove (old_it); } if ( new_it != NULL ) { do_item_remove (new_it); } return stored; }
base_item* LRU_list::item_alloc(const char* key, size_t nkey, int flags, unsigned int exptime, int bytes) { base_item* it; it = do_item_alloc(key, nkey, flags, exptime, bytes, 0); return it; }
int LRU_list::do_store_item(base_item* it, Conn* c, uint32_t hv) { char* key = it->data; //获取旧的数据项 base_item* old_it = do_item_get(key, it->nkey, hv); int store_stat = LRU_list::NOT_STORED; base_item* new_it = 0; int flags = 0; //已经有该项item存在 if (old_it != 0 && c->cmd == NREAD_ADD) { /* * 更新当前item目的 * 1.更新时间,重建LRU链 * 2.后面执行do_item_remove,每次remove会把refcount引用计数减一 * 如果引用计数=1则被删除,重建之后refcount为2 * */ do_item_update(old_it); //旧的item不存在 }else if (!old_it && (c->cmd == NREAD_REPLACE || c->cmd == NREAD_APPEND || c->cmd == NREAD_PREPEND)) { //什么也不做,因为只有replace替换已有值 }else if (c->cmd == NREAD_CAS) { //不存在此项 if (old_it == 0) { store_stat = LRU_list::NOT_FOUND; } if (it->cas == old_it->cas) { item_replace(old_it, it, hv); store_stat = LRU_list::STORED; } else { if (mem_setting.verbose > 1) { std::cerr << "CAS: failure: expected " << old_it->get_cas() << " but got " << it->cas; } store_stat = LRU_list::EXISTS; } } else { //与上面第二个判断不同,这里是旧的item存在的replace append prepend set命令 if (c->cmd == NREAD_APPEND || c->cmd == NREAD_PREPEND) { //if (it->cas != 0) { if (it->cas != old_it->cas) { store_stat = LRU_list::EXISTS; } //} if (store_stat == LRU_list::NOT_STORED) { new_it = do_item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2, hv); //分配失败 if (new_it == 0) { if (old_it != 0) do_item_remove(old_it); return LRU_list::NOT_STORED; } new_it->nkey = old_it->nkey; new_it->item_flag = flags; new_it->exptime = old_it->exptime; new_it->nbytes = it->nbytes + old_it->nbytes - 2; memcpy(new_it->data, old_it->data, old_it->nkey); new_it->data[old_it->nkey] = '0'; //copy数据 if (c->cmd == NREAD_APPEND) { memcpy(new_it->real_data_addr(), old_it->real_data_addr(), old_it->nbytes); memcpy(new_it->real_data_addr() + old_it->nbytes - 2/*\r\n*/, it->real_data_addr(), it->nbytes); } else { //NREAD_PREPEND memcpy(new_it->real_data_addr(), it->real_data_addr(), it->nbytes); memcpy(new_it->real_data_addr() + it->nbytes - 2, old_it->real_data_addr(), old_it->nbytes); } it = new_it; } } if (store_stat == LRU_list::NOT_STORED) { it->cas++; if (old_it != 0) item_replace(old_it, it, hv); else //set a new key-value do_item_link(it, hv); store_stat = LRU_list::STORED; } } if (old_it != 0) do_item_remove(old_it); if (new_it != 0) do_item_remove(new_it); if (store_stat == LRU_list::STORED) { c->cas = it->get_cas(); } return store_stat; }
/* * allocate all memory with small and large chunks. link them such the * allocation of a large object will start evicting small chunks but then stop * because the large chunk LRU has an older item. this covers part of case 3 * and part of case 4 for the small item alloc in flat_storage_lru_evict(..). */ static int mixed_items_release_small_and_large_items_scan_stop_test(int verbose) { typedef struct { item* it; char key[KEY_MAX_LENGTH]; uint8_t klen; } mixed_items_release_one_small_item_t; size_t num_small_objects = (fsi.large_free_list_sz / 2) * SMALL_CHUNKS_PER_LARGE_CHUNK; /* this is not the same as fsi.large_free_list_sz / 2 due to rounding. */ size_t num_large_objects = fsi.large_free_list_sz - (fsi.large_free_list_sz / 2); mixed_items_release_one_small_item_t* large_items = malloc(sizeof(mixed_items_release_one_small_item_t) * num_large_objects); mixed_items_release_one_small_item_t* small_items = malloc(sizeof(mixed_items_release_one_small_item_t) * num_small_objects); item* lru_trigger; size_t max_small_key_size = SMALL_TITLE_CHUNK_DATA_SZ; size_t min_size_for_large_chunk = ( sizeof( ((small_title_chunk_t*) 0)->data ) ) + ( (SMALL_CHUNKS_PER_LARGE_CHUNK - 1) * sizeof( ((small_body_chunk_t*) 0)->data ) ) + 1; size_t i; char key[KEY_MAX_LENGTH]; size_t klen; size_t large_free_list_sz = fsi.large_free_list_sz, small_free_list_sz = fsi.small_free_list_sz; V_PRINTF(1, " * %s\n", __FUNCTION__); TASSERT(fsi.large_free_list_sz != 0); TASSERT(fsi.small_free_list_sz == 0); for (i = 0; i < num_small_objects; i ++) { V_PRINTF(2, "\r * allocating small object %lu", i); V_FLUSH(2); do { small_items[i].klen = make_random_key(small_items[i].key, max_small_key_size, true); } while (assoc_find(small_items[i].key, small_items[i].klen)); small_items[i].it = do_item_alloc(small_items[i].key, small_items[i].klen, FLAGS, 0, 0, addr); TASSERT(small_items[i].it); TASSERT(is_item_large_chunk(small_items[i].it) == 0); do_item_link(small_items[i].it, small_items[i].key); } V_PRINTF(2, "\n"); for (i = 0; i < num_large_objects; i ++) { V_PRINTF(2, "\r * allocating large object %lu", i); V_FLUSH(2); do { large_items[i].klen = make_random_key(large_items[i].key, KEY_MAX_LENGTH, true); } while (assoc_find(large_items[i].key, large_items[i].klen)); large_items[i].it = do_item_alloc(large_items[i].key, large_items[i].klen, FLAGS, 0, min_size_for_large_chunk - large_items[i].klen, addr); TASSERT(large_items[i].it); TASSERT(is_item_large_chunk(large_items[i].it)); do_item_link(large_items[i].it, large_items[i].key); } V_PRINTF(2, "\n"); TASSERT(fsi.large_free_list_sz == 0 && fsi.small_free_list_sz == 0); V_LPRINTF(2, "update items\n"); /* update the objects we want to clobber *first*. but since ties go to the * large item, we need to bump the time stamp to ensure the small item is * released first. */ current_time += ITEM_UPDATE_INTERVAL + 1; /* initial bump to ensure that * LRU reordering takes place. */ do_item_update(small_items[0].it); current_time += 1; do_item_update(large_items[0].it); /* bump the timestamp and add the remaining items. */ current_time += 1; for (i = 1; i < num_small_objects; i ++) { do_item_update(small_items[i].it); } for (i = 1; i < num_large_objects; i ++) { do_item_update(large_items[i].it); } V_LPRINTF(2, "dereferencing objects\n"); for (i = 0; i < num_small_objects; i ++) { do_item_deref(small_items[i].it); } for (i = 0; i < num_large_objects; i ++) { do_item_deref(large_items[i].it); } V_LPRINTF(2, "alloc after deref\n"); do { klen = make_random_key(key, max_small_key_size, true); } while (assoc_find(key, klen)); lru_trigger = do_item_alloc(key, klen, FLAGS, 0, LARGE_TITLE_CHUNK_DATA_SZ - klen, addr); TASSERT(lru_trigger != NULL); TASSERT(is_item_large_chunk(lru_trigger)); V_LPRINTF(2, "search for evicted objects\n"); TASSERT(assoc_find(small_items[0].key, small_items[0].klen) == NULL); TASSERT(assoc_find(large_items[0].key, large_items[0].klen) == NULL); V_LPRINTF(2, "ensuring that objects that shouldn't be evicted are still present\n"); for (i = 1; i < num_small_objects; i ++) { TASSERT(assoc_find(small_items[i].key, small_items[i].klen)); } for (i = 1; i < num_large_objects; i ++) { TASSERT(assoc_find(large_items[i].key, large_items[i].klen)); } V_LPRINTF(2, "cleanup objects\n"); for (i = 1; i < num_small_objects; i ++) { do_item_unlink(small_items[i].it, UNLINK_NORMAL, small_items[i].key); } for (i = 1; i < num_large_objects; i ++) { do_item_unlink(large_items[i].it, UNLINK_NORMAL, large_items[i].key); } do_item_deref(lru_trigger); TASSERT(fsi.large_free_list_sz == large_free_list_sz && fsi.small_free_list_sz == small_free_list_sz); return 0; }
/* * this is a negative test to ensure the proper behavior when we don't have * sufficient resources. in this case, we have sufficient small items on the * LRU, and enough of them have refcount == 0, but all the parent broken chunks * have refcount > 0. */ static int insufficient_available_large_broken_chunks(int verbose) { typedef struct { item* it; char key[KEY_MAX_LENGTH]; uint8_t klen; } all_small_chunks_key_t; size_t num_objects = fsi.large_free_list_sz * SMALL_CHUNKS_PER_LARGE_CHUNK; all_small_chunks_key_t* small_items = malloc(sizeof(all_small_chunks_key_t) * num_objects); item* lru_trigger; size_t max_key_size = SMALL_TITLE_CHUNK_DATA_SZ; size_t min_size_for_large_chunk = ( sizeof( ((small_title_chunk_t*) 0)->data ) ) + ( (SMALL_CHUNKS_PER_LARGE_CHUNK - 1) * sizeof( ((small_body_chunk_t*) 0)->data ) ) + 1; size_t i; char key[KEY_MAX_LENGTH]; size_t klen; size_t large_free_list_sz = fsi.large_free_list_sz, small_free_list_sz = fsi.small_free_list_sz; V_PRINTF(1, " * %s\n", __FUNCTION__); TASSERT(fsi.large_free_list_sz != 0); TASSERT(fsi.small_free_list_sz == 0); for (i = 0; i < num_objects; i ++) { V_PRINTF(2, "\r * allocating object %lu", i); V_FLUSH(2); do { small_items[i].klen = make_random_key(small_items[i].key, max_key_size, true); } while (assoc_find(small_items[i].key, small_items[i].klen)); small_items[i].it = do_item_alloc(small_items[i].key, small_items[i].klen, FLAGS, 0, 0, addr); TASSERT(small_items[i].it); TASSERT(is_item_large_chunk(small_items[i].it) == false); do_item_link(small_items[i].it, small_items[i].key); } V_PRINTF(2, "\n"); TASSERT(fsi.large_free_list_sz == 0 && fsi.small_free_list_sz == 0); V_LPRINTF(2, "alloc before deref\n"); do { klen = make_random_key(key, max_key_size, true); } while (assoc_find(key, klen)); lru_trigger = do_item_alloc(key, klen, FLAGS, 0, min_size_for_large_chunk - klen, addr); TASSERT(lru_trigger == NULL); V_LPRINTF(2, "dereferencing objects\n"); for (i = 0; i < num_objects; i += 2) { do_item_deref(small_items[i].it); } V_LPRINTF(2, "alloc after deref\n"); lru_trigger = do_item_alloc(key, klen, FLAGS, 0, min_size_for_large_chunk - klen, addr); TASSERT(lru_trigger == NULL); V_LPRINTF(2, "ensuring that objects that shouldn't be evicted are still present\n"); for (i = 0; i < num_objects; i ++) { bool should_be_found; /* we free everything we encounter that has no refcount until we hit the * LRU_SEARCH_DEPTH, at which time we cease searching. */ if (i % 2 == 0 && i < (LRU_SEARCH_DEPTH * 2)) { should_be_found = false; } else { should_be_found = true; } TASSERT((assoc_find(small_items[i].key, small_items[i].klen) ? (true) : (false)) == should_be_found); } V_LPRINTF(2, "cleanup objects\n"); for (i = 0; i < num_objects; i ++) { /* we dereference all the odd numbered items */ if ((i % 2) != 0) { do_item_deref(small_items[i].it); } /* we unlink everything that's still in the LRU. */ if (i % 2 == 0 && i < (LRU_SEARCH_DEPTH * 2)) { ; } else { do_item_unlink(small_items[i].it, UNLINK_NORMAL, small_items[i].key); } } TASSERT(fsi.large_free_list_sz == large_free_list_sz && fsi.small_free_list_sz == small_free_list_sz); return 0; }
/* allocate all memory with small chunks. allocate one more object. it should * free up the oldest object. release all objects. this covers case 1 for the * small item alloc in flat_storage_lru_evict(..). */ static int all_small_chunks_test(int verbose) { typedef struct { item* it; char key[KEY_MAX_LENGTH]; uint8_t klen; } all_small_chunks_key_t; size_t num_objects = fsi.large_free_list_sz * SMALL_CHUNKS_PER_LARGE_CHUNK; all_small_chunks_key_t* small_items = malloc(sizeof(all_small_chunks_key_t) * num_objects); item* lru_trigger; size_t max_key_size = SMALL_TITLE_CHUNK_DATA_SZ; size_t i; char key[KEY_MAX_LENGTH]; size_t klen; size_t large_free_list_sz = fsi.large_free_list_sz, small_free_list_sz = fsi.small_free_list_sz; V_PRINTF(1, " * %s\n", __FUNCTION__); TASSERT(fsi.large_free_list_sz != 0); TASSERT(fsi.small_free_list_sz == 0); for (i = 0; i < num_objects; i ++) { V_PRINTF(2, "\r * allocating object %lu", i); V_FLUSH(2); do { small_items[i].klen = make_random_key(small_items[i].key, max_key_size, true); } while (assoc_find(small_items[i].key, small_items[i].klen)); small_items[i].it = do_item_alloc(small_items[i].key, small_items[i].klen, FLAGS, 0, 0, addr); TASSERT(small_items[i].it); TASSERT(is_item_large_chunk(small_items[i].it) == false); do_item_link(small_items[i].it, small_items[i].key); } V_PRINTF(2, "\n"); TASSERT(fsi.large_free_list_sz == 0 && fsi.small_free_list_sz == 0); V_LPRINTF(2, "alloc before deref\n"); do { klen = make_random_key(key, max_key_size, true); } while (assoc_find(key, klen)); lru_trigger = do_item_alloc(key, klen, FLAGS, 0, 0, addr); TASSERT(lru_trigger == NULL); V_LPRINTF(2, "dereferencing objects\n"); for (i = 0; i < num_objects; i ++) { do_item_deref(small_items[i].it); } V_LPRINTF(2, "alloc after deref\n"); lru_trigger = do_item_alloc(key, klen, FLAGS, 0, 0, addr); TASSERT(lru_trigger != NULL); V_LPRINTF(2, "search for evicted object\n"); TASSERT(assoc_find(small_items[0].key, small_items[0].klen) == NULL); V_LPRINTF(2, "ensuring that objects that shouldn't be evicted are still present\n"); for (i = 1; i < num_objects; i ++) { TASSERT(assoc_find(small_items[i].key, small_items[i].klen)); } V_LPRINTF(2, "cleanup objects\n"); for (i = 1; i < num_objects; i ++) { do_item_unlink(small_items[i].it, UNLINK_NORMAL, small_items[i].key); } do_item_deref(lru_trigger); TASSERT(fsi.large_free_list_sz == large_free_list_sz && fsi.small_free_list_sz == small_free_list_sz); return 0; }
static int storage_write(void *storage, const int clsid, const int item_age) { int did_moves = 0; struct lru_pull_tail_return it_info; it_info.it = NULL; lru_pull_tail(clsid, COLD_LRU, 0, LRU_PULL_RETURN_ITEM, 0, &it_info); /* Item is locked, and we have a reference to it. */ if (it_info.it == NULL) { return did_moves; } obj_io io; item *it = it_info.it; /* First, storage for the header object */ size_t orig_ntotal = ITEM_ntotal(it); uint32_t flags; if ((it->it_flags & ITEM_HDR) == 0 && (item_age == 0 || current_time - it->time > item_age)) { FLAGS_CONV(it, flags); item *hdr_it = do_item_alloc(ITEM_key(it), it->nkey, flags, it->exptime, sizeof(item_hdr)); /* Run the storage write understanding the start of the item is dirty. * We will fill it (time/exptime/etc) from the header item on read. */ if (hdr_it != NULL) { int bucket = (it->it_flags & ITEM_CHUNKED) ? PAGE_BUCKET_CHUNKED : PAGE_BUCKET_DEFAULT; // Compress soon to expire items into similar pages. if (it->exptime - current_time < settings.ext_low_ttl) { bucket = PAGE_BUCKET_LOWTTL; } hdr_it->it_flags |= ITEM_HDR; io.len = orig_ntotal; io.mode = OBJ_IO_WRITE; // NOTE: when the item is read back in, the slab mover // may see it. Important to have refcount>=2 or ~ITEM_LINKED assert(it->refcount >= 2); // NOTE: write bucket vs free page bucket will disambiguate once // lowttl feature is better understood. if (extstore_write_request(storage, bucket, bucket, &io) == 0) { // cuddle the hash value into the time field so we don't have // to recalculate it. item *buf_it = (item *) io.buf; buf_it->time = it_info.hv; // copy from past the headers + time headers. // TODO: should be in items.c if (it->it_flags & ITEM_CHUNKED) { // Need to loop through the item and copy item_chunk *sch = (item_chunk *) ITEM_schunk(it); int remain = orig_ntotal; int copied = 0; // copy original header int hdrtotal = ITEM_ntotal(it) - it->nbytes; memcpy((char *)io.buf+STORE_OFFSET, (char *)it+STORE_OFFSET, hdrtotal - STORE_OFFSET); copied = hdrtotal; // copy data in like it were one large object. while (sch && remain) { assert(remain >= sch->used); memcpy((char *)io.buf+copied, sch->data, sch->used); // FIXME: use one variable? remain -= sch->used; copied += sch->used; sch = sch->next; } } else { memcpy((char *)io.buf+STORE_OFFSET, (char *)it+STORE_OFFSET, io.len-STORE_OFFSET); } // crc what we copied so we can do it sequentially. buf_it->it_flags &= ~ITEM_LINKED; buf_it->exptime = crc32c(0, (char*)io.buf+STORE_OFFSET, orig_ntotal-STORE_OFFSET); extstore_write(storage, &io); item_hdr *hdr = (item_hdr *) ITEM_data(hdr_it); hdr->page_version = io.page_version; hdr->page_id = io.page_id; hdr->offset = io.offset; // overload nbytes for the header it hdr_it->nbytes = it->nbytes; /* success! Now we need to fill relevant data into the new * header and replace. Most of this requires the item lock */ /* CAS gets set while linking. Copy post-replace */ item_replace(it, hdr_it, it_info.hv); ITEM_set_cas(hdr_it, ITEM_get_cas(it)); do_item_remove(hdr_it); did_moves = 1; LOGGER_LOG(NULL, LOG_EVICTIONS, LOGGER_EXTSTORE_WRITE, it, bucket); } else { /* Failed to write for some reason, can't continue. */ slabs_free(hdr_it, ITEM_ntotal(hdr_it), ITEM_clsid(hdr_it)); } } } do_item_remove(it); item_unlock(it_info.hv); return did_moves; }
/* * allocate all memory with small and large chunks. link them such that the * small items are the oldest. allocate one large object that can be covered by * the release of one large item. this covers part of case 4 for the large item * alloc in flat_storage_lru_evict(..). */ static int mixed_items_release_one_large_item_test(int verbose) { typedef struct { item* it; char key[KEY_MAX_LENGTH]; uint8_t klen; } mixed_items_release_one_small_item_t; size_t num_small_objects = (fsi.large_free_list_sz / 2) * SMALL_CHUNKS_PER_LARGE_CHUNK; /* this is not the same as fsi.large_free_list_sz / 2 due to rounding. */ size_t num_large_objects = fsi.large_free_list_sz - (fsi.large_free_list_sz / 2); mixed_items_release_one_small_item_t* large_items = malloc(sizeof(mixed_items_release_one_small_item_t) * num_large_objects); mixed_items_release_one_small_item_t* small_items = malloc(sizeof(mixed_items_release_one_small_item_t) * num_small_objects); item* lru_trigger; size_t max_small_key_size = SMALL_TITLE_CHUNK_DATA_SZ; size_t min_size_for_large_chunk = ( sizeof( ((small_title_chunk_t*) 0)->data ) ) + ( (SMALL_CHUNKS_PER_LARGE_CHUNK - 1) * sizeof( ((small_body_chunk_t*) 0)->data ) ) + 1; size_t i; char key[KEY_MAX_LENGTH]; size_t klen; size_t large_free_list_sz = fsi.large_free_list_sz, small_free_list_sz = fsi.small_free_list_sz; V_PRINTF(1, " * %s\n", __FUNCTION__); TASSERT(fsi.large_free_list_sz != 0); TASSERT(fsi.small_free_list_sz == 0); for (i = 0; i < num_large_objects; i ++) { V_PRINTF(2, "\r * allocating large object %lu", i); V_FLUSH(2); do { large_items[i].klen = make_random_key(large_items[i].key, KEY_MAX_LENGTH, true); } while (assoc_find(large_items[i].key, large_items[i].klen)); large_items[i].it = do_item_alloc(large_items[i].key, large_items[i].klen, FLAGS, 0, min_size_for_large_chunk - large_items[i].klen, addr); TASSERT(large_items[i].it); TASSERT(is_item_large_chunk(large_items[i].it)); do_item_link(large_items[i].it, large_items[i].key); } V_PRINTF(2, "\n"); for (i = 0; i < num_small_objects; i ++) { V_PRINTF(2, "\r * allocating small object %lu", i); V_FLUSH(2); do { small_items[i].klen = make_random_key(small_items[i].key, max_small_key_size, true); } while (assoc_find(small_items[i].key, small_items[i].klen)); small_items[i].it = do_item_alloc(small_items[i].key, small_items[i].klen, FLAGS, 0, 0, addr); TASSERT(small_items[i].it); TASSERT(is_item_large_chunk(small_items[i].it) == 0); do_item_link(small_items[i].it, small_items[i].key); } V_PRINTF(2, "\n"); TASSERT(fsi.large_free_list_sz == 0 && fsi.small_free_list_sz == 0); V_LPRINTF(2, "alloc before deref\n"); do { klen = make_random_key(key, max_small_key_size, true); } while (assoc_find(key, klen)); lru_trigger = do_item_alloc(key, klen, FLAGS, 0, 0, addr); TASSERT(lru_trigger == NULL); V_LPRINTF(2, "dereferencing objects\n"); for (i = 0; i < num_small_objects; i ++) { do_item_deref(small_items[i].it); } for (i = 0; i < num_large_objects; i ++) { do_item_deref(large_items[i].it); } V_LPRINTF(2, "alloc after deref\n"); lru_trigger = do_item_alloc(key, klen, FLAGS, 0, min_size_for_large_chunk - klen, addr); TASSERT(lru_trigger != NULL); V_LPRINTF(2, "search for evicted object\n"); TASSERT(assoc_find(large_items[0].key, large_items[0].klen) == NULL); V_LPRINTF(2, "ensuring that objects that shouldn't be evicted are still present\n"); for (i = 0; i < num_small_objects; i ++) { TASSERT(assoc_find(small_items[i].key, small_items[i].klen)); } for (i = 1; i < num_large_objects; i ++) { TASSERT(assoc_find(large_items[i].key, large_items[i].klen)); } V_LPRINTF(2, "cleanup objects\n"); for (i = 0; i < num_small_objects; i ++) { do_item_unlink(small_items[i].it, UNLINK_NORMAL, small_items[i].key); } for (i = 1; i < num_large_objects; i ++) { do_item_unlink(large_items[i].it, UNLINK_NORMAL, large_items[i].key); } do_item_deref(lru_trigger); TASSERT(fsi.large_free_list_sz == large_free_list_sz && fsi.small_free_list_sz == small_free_list_sz); return 0; }
/* * Stores an item in the cache according to the semantics of one of the set * commands. In threaded mode, this is protected by the cache lock. * * Returns the state of storage. */ static ENGINE_ERROR_CODE do_store_item(struct default_engine *engine, hash_item *it, uint64_t *cas, ENGINE_STORE_OPERATION operation, const void *cookie) { const char *key = item_get_key(it); hash_item *old_it = do_item_get(engine, key, it->nkey); ENGINE_ERROR_CODE stored = ENGINE_NOT_STORED; hash_item *new_it = NULL; if (old_it != NULL && operation == OPERATION_ADD) { /* add only adds a nonexistent item, but promote to head of LRU */ do_item_update(engine, old_it); } else if (!old_it && (operation == OPERATION_REPLACE || operation == OPERATION_APPEND || operation == OPERATION_PREPEND)) { /* replace only replaces an existing value; don't store */ } else if (operation == OPERATION_CAS) { /* validate cas operation */ if(old_it == NULL) { // LRU expired stored = ENGINE_KEY_ENOENT; } else if (item_get_cas(it) == item_get_cas(old_it)) { // cas validates // it and old_it may belong to different classes. // I'm updating the stats for the one that's getting pushed out do_item_replace(engine, old_it, it); stored = ENGINE_SUCCESS; } else { if (engine->config.verbose > 1) { EXTENSION_LOGGER_DESCRIPTOR *logger; logger = (void*)engine->server.extension->get_extension(EXTENSION_LOGGER); logger->log(EXTENSION_LOG_INFO, NULL, "CAS: failure: expected %"PRIu64", got %"PRIu64"\n", item_get_cas(old_it), item_get_cas(it)); } stored = ENGINE_KEY_EEXISTS; } } else { /* * Append - combine new and old record into single one. Here it's * atomic and thread-safe. */ if (operation == OPERATION_APPEND || operation == OPERATION_PREPEND) { /* * Validate CAS */ if (item_get_cas(it) != 0) { // CAS much be equal if (item_get_cas(it) != item_get_cas(old_it)) { stored = ENGINE_KEY_EEXISTS; } } if (stored == ENGINE_NOT_STORED) { /* we have it and old_it here - alloc memory to hold both */ new_it = do_item_alloc(engine, key, it->nkey, old_it->flags, old_it->exptime, it->nbytes + old_it->nbytes, cookie); if (new_it == NULL) { /* SERVER_ERROR out of memory */ if (old_it != NULL) { do_item_release(engine, old_it); } return ENGINE_NOT_STORED; } /* copy data from it and old_it to new_it */ if (operation == OPERATION_APPEND) { memcpy(item_get_data(new_it), item_get_data(old_it), old_it->nbytes); memcpy(item_get_data(new_it) + old_it->nbytes, item_get_data(it), it->nbytes); } else { /* OPERATION_PREPEND */ memcpy(item_get_data(new_it), item_get_data(it), it->nbytes); memcpy(item_get_data(new_it) + it->nbytes, item_get_data(old_it), old_it->nbytes); } it = new_it; } } if (stored == ENGINE_NOT_STORED) { if (old_it != NULL) { do_item_replace(engine, old_it, it); } else { do_item_link(engine, it); } *cas = item_get_cas(it); stored = ENGINE_SUCCESS; } } if (old_it != NULL) { do_item_release(engine, old_it); /* release our reference */ } if (new_it != NULL) { do_item_release(engine, new_it); } if (stored == ENGINE_SUCCESS) { *cas = item_get_cas(it); } return stored; }
/* * Allocates a new item. */ item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes) { item *it; /* do_item_alloc handles its own locks */ it = do_item_alloc(key, nkey, flags, exptime, nbytes); return it; }
/* * Stores an item in the cache according to the semantics of one of the set * commands. In threaded mode, this is protected by the cache lock. * * Returns the state of storage. */ static ENGINE_ERROR_CODE do_item_store(struct demo_engine *engine, hash_item *it, uint64_t *cas, ENGINE_STORE_OPERATION operation, const void *cookie) { const char *key = dm_item_get_key(it); hash_item *old_it; hash_item *new_it = NULL; ENGINE_ERROR_CODE stored; old_it = do_item_get(engine, key, it->nkey, true); if (old_it != NULL) { if (operation == OPERATION_ADD) { do_item_release(engine, old_it); return ENGINE_NOT_STORED; } } else { if (operation == OPERATION_REPLACE || operation == OPERATION_APPEND || operation == OPERATION_PREPEND) { return ENGINE_NOT_STORED; } if (operation == OPERATION_CAS) { return ENGINE_KEY_ENOENT; } } stored = ENGINE_NOT_STORED; if (operation == OPERATION_CAS) { assert(old_it != NULL); if (dm_item_get_cas(it) == dm_item_get_cas(old_it)) { // cas validates // it and old_it may belong to different classes. // I'm updating the stats for the one that's getting pushed out do_item_replace(engine, old_it, it); stored = ENGINE_SUCCESS; } else { if (engine->config.verbose > 1) { logger->log(EXTENSION_LOG_WARNING, NULL, "CAS: failure: expected %"PRIu64", got %"PRIu64"\n", dm_item_get_cas(old_it), dm_item_get_cas(it)); } stored = ENGINE_KEY_EEXISTS; } } else { /* * Append - combine new and old record into single one. Here it's * atomic and thread-safe. */ if (operation == OPERATION_APPEND || operation == OPERATION_PREPEND) { assert(old_it != NULL); /* * Validate CAS */ if (dm_item_get_cas(it) != 0) { // CAS much be equal if (dm_item_get_cas(it) != dm_item_get_cas(old_it)) { stored = ENGINE_KEY_EEXISTS; } } if (stored == ENGINE_NOT_STORED) { /* we have it and old_it here - alloc memory to hold both */ new_it = do_item_alloc(engine, key, it->nkey, old_it->flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */, cookie); if (new_it == NULL) { /* SERVER_ERROR out of memory */ if (old_it != NULL) do_item_release(engine, old_it); return ENGINE_NOT_STORED; } /* copy data from it and old_it to new_it */ if (operation == OPERATION_APPEND) { memcpy(dm_item_get_data(new_it), dm_item_get_data(old_it), old_it->nbytes); memcpy(dm_item_get_data(new_it) + old_it->nbytes - 2 /* CRLF */, dm_item_get_data(it), it->nbytes); } else { /* OPERATION_PREPEND */ memcpy(dm_item_get_data(new_it), dm_item_get_data(it), it->nbytes); memcpy(dm_item_get_data(new_it) + it->nbytes - 2 /* CRLF */, dm_item_get_data(old_it), old_it->nbytes); } it = new_it; } } if (stored == ENGINE_NOT_STORED) { if (old_it != NULL) { do_item_replace(engine, old_it, it); stored = ENGINE_SUCCESS; } else { stored = do_item_link(engine, it); } if (stored == ENGINE_SUCCESS) { *cas = dm_item_get_cas(it); } } } if (old_it != NULL) { do_item_release(engine, old_it); /* release our reference */ } if (new_it != NULL) { do_item_release(engine, new_it); } if (stored == ENGINE_SUCCESS) { *cas = dm_item_get_cas(it); } return stored; }