/** wrapper around assoc_find which does the lazy expiration logic */ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) { mutex_lock(&cache_lock); item *it = assoc_find(key, nkey, hv); if (it != NULL) { refcount_incr(&it->refcount); /* Optimization for slab reassignment. prevents popular items from * jamming in busy wait. Can only do this here to satisfy lock order * of item_lock, cache_lock, slabs_lock. */ if (slab_rebalance_signal && ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) { do_item_unlink_nolock(it, hv); do_item_remove(it); it = NULL; } } pthread_mutex_unlock(&cache_lock); int was_found = 0; if (settings.verbose > 2) { if (it == NULL) { fprintf(stderr, "> NOT FOUND %s", key); } else { fprintf(stderr, "> FOUND KEY %s", ITEM_key(it)); was_found++; } } if (it != NULL) { if (settings.oldest_live != 0 && settings.oldest_live <= current_time && it->time <= settings.oldest_live) { do_item_unlink(it, hv); do_item_remove(it); it = NULL; if (was_found) { fprintf(stderr, " -nuked by flush"); } } else if (it->exptime != 0 && it->exptime <= current_time) { if (it->exptime + 10 < current_time) { do_item_unlink(it, hv); do_item_remove(it); if (was_found) { fprintf(stderr, " -nuked by expire"); } } else { /* re-active just expired items, to anti miss-storm */ it->exptime = current_time + 10; } it = NULL; } else { it->it_flags |= ITEM_FETCHED; DEBUG_REFCNT(it, '+'); } } if (settings.verbose > 2) fprintf(stderr, "\n"); return it; }
/* * Decrements the reference count on an item and adds it to the freelist if * needed. */ void item_remove(item *cq_item) { #ifdef MOXI_ITEM_MALLOC // Skip past the lock, since we're using malloc. do_item_remove(cq_item); #else pthread_mutex_lock(&cache_lock); do_item_remove(cq_item); pthread_mutex_unlock(&cache_lock); #endif }
/* I pulled this out to make the main thread clearer, but it reaches into the * main thread's values too much. Should rethink again. */ static void item_crawler_evaluate(item *search, uint32_t hv, int i) { rel_time_t oldest_live = settings.oldest_live; if ((search->exptime != 0 && search->exptime < current_time) || (search->time <= oldest_live && oldest_live <= current_time)) { itemstats[i].crawler_reclaimed++; if (settings.verbose > 1) { int ii; char *key = ITEM_key(search); fprintf(stderr, "LRU crawler found an expired item (flags: %d, slab: %d): ", search->it_flags, search->slabs_clsid); for (ii = 0; ii < search->nkey; ++ii) { fprintf(stderr, "%c", key[ii]); } fprintf(stderr, "\n"); } if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[i].expired_unfetched++; } do_item_unlink_nolock(search, hv); do_item_remove(search); assert(search->slabs_clsid == 0); } else { refcount_decr(&search->refcount); } }
/* * Decrements the reference count on an item and adds it to the freelist if * needed. */ void item_remove(item *item) { uint32_t hv; hv = hash(ITEM_key(item), item->nkey, 0); item_lock(hv); do_item_remove(item); item_unlock(hv); }
void LRU_list::do_item_unlink_nolock(base_item* it, const uint32_t hv) { if ((it->item_flag & Slab::ITEM_LINKED) != 0){ it->item_flag &= ~(Slab::ITEM_LINKED); //省去状态更迭 hashtable.hash_delete(it->data, it->nkey, hv); item_unlink_q(it); do_item_remove(it); } }
void LRU_list::item_remove(base_item* item) { uint32_t hv; hv = HashTable::hash(item->data, item->nkey); //哈希局部锁 hashtable.hash_lock(hv); do_item_remove(item); hashtable.hash_unlock(hv); }
/* FIXME: Is it necessary to keep this copy/pasted code? */ void do_item_unlink_nolock(item *it, const uint32_t hv) { MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes); if ((it->it_flags & ITEM_LINKED) != 0) { it->it_flags &= ~ITEM_LINKED; STATS_LOCK(); stats.curr_bytes -= ITEM_ntotal(it); stats.curr_items -= 1; STATS_UNLOCK(); assoc_delete(ITEM_key(it), it->nkey, hv); item_unlink_q(it); do_item_remove(it); } }
/* slawek */ void do_item_unlink_nolock_nostat(item *it, const uint32_t hv, uint64_t *items_removed, uint64_t *bytes_removed) { MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes); if ((it->it_flags & ITEM_LINKED) != 0) { it->it_flags &= ~ITEM_LINKED; *items_removed += 1; *bytes_removed += ITEM_ntotal(it); assoc_delete(ITEM_key(it), it->nkey, hv); item_unlink_q(it); do_item_remove(it); } }
/* FIXME: Is it necessary to keep this copy/pasted code? */ void do_item_unlink_nolock(item *it, const uint32_t hv) { syslog(LOG_INFO, "[%s:%s:%d]", __FILE__, __func__, __LINE__); MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes); if ((it->it_flags & ITEM_LINKED) != 0) { it->it_flags &= ~ITEM_LINKED; STATS_LOCK(); stats.curr_bytes -= ITEM_ntotal(it); stats.curr_items -= 1; STATS_UNLOCK(); assoc_delete(ITEM_key(it), it->nkey, hv); item_unlink_q(it); do_item_remove(it); } }
//unlink是把item从hashtable上和LRU上移除 void LRU_list::do_item_unlink(base_item* it, uint32_t hv) { mutex_lock(&cache_lock); if ((it->item_flag & Slab::ITEM_LINKED) != 0) { it->item_flag &= ~Slab::ITEM_LINKED; //stats.stat_lock.lock(); //stats.curr_bytes -= sizeof(base_item); //stats.curr_items -= 1; //stats.stat_lock.unlock(); hashtable.hash_delete(it->data, it->nkey, hv); item_unlink_q(it); do_item_remove(it); } cache_lock.unlock(); }
void do_item_unlink(item *it, const uint32_t hv) { syslog(LOG_INFO, "[%s:%s:%d]", __FILE__, __func__, __LINE__); MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes); mutex_lock(&cache_lock); if ((it->it_flags & ITEM_LINKED) != 0) { it->it_flags &= ~ITEM_LINKED;//设置为非linked STATS_LOCK(); stats.curr_bytes -= ITEM_ntotal(it); stats.curr_items -= 1; STATS_UNLOCK(); assoc_delete(ITEM_key(it), it->nkey, hv); //从hash表中删除 item_unlink_q(it); //从LRU链中删除 do_item_remove(it); } mutex_unlock(&cache_lock); }
//将item从hashtable和LRU链中移除,而且还释放掉 item 所占的内存 (其实只是把 item 放到空闲链表中),是do_item_link的逆操作 void do_item_unlink(item *it, const uint32_t hv) { MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes); mutex_lock(&cache_lock);//执行同步 if ((it->it_flags & ITEM_LINKED) != 0) {//判断状态值,保证item还在LRU队列中 it->it_flags &= ~ITEM_LINKED;//修改状态值 STATS_LOCK();//更新统计信息 stats.curr_bytes -= ITEM_ntotal(it); stats.curr_items -= 1; STATS_UNLOCK(); assoc_delete(ITEM_key(it), it->nkey, hv);//从Hash表中删除 item_unlink_q(it);//将item从slabclass对应的LRU队列摘除 do_item_remove(it);//释放 item 所占的内存 } mutex_unlock(&cache_lock); }
//从哈希表和LRU中删除 void do_item_unlink(item *it, const uint32_t hv) { MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes); mutex_lock(&cache_lock); if ((it->it_flags & ITEM_LINKED) != 0) { it->it_flags &= ~ITEM_LINKED; STATS_LOCK(); stats.curr_bytes -= ITEM_ntotal(it); stats.curr_items -= 1; STATS_UNLOCK(); //从哈希表中删除 assoc_delete(ITEM_key(it), it->nkey, hv); //从链表中删除 item_unlink_q(it); //向slab归还这个item do_item_remove(it); } mutex_unlock(&cache_lock); }
int LRU_list::do_store_item(base_item* it, Conn* c, uint32_t hv) { char* key = it->data; //获取旧的数据项 base_item* old_it = do_item_get(key, it->nkey, hv); int store_stat = LRU_list::NOT_STORED; base_item* new_it = 0; int flags = 0; //已经有该项item存在 if (old_it != 0 && c->cmd == NREAD_ADD) { /* * 更新当前item目的 * 1.更新时间,重建LRU链 * 2.后面执行do_item_remove,每次remove会把refcount引用计数减一 * 如果引用计数=1则被删除,重建之后refcount为2 * */ do_item_update(old_it); //旧的item不存在 }else if (!old_it && (c->cmd == NREAD_REPLACE || c->cmd == NREAD_APPEND || c->cmd == NREAD_PREPEND)) { //什么也不做,因为只有replace替换已有值 }else if (c->cmd == NREAD_CAS) { //不存在此项 if (old_it == 0) { store_stat = LRU_list::NOT_FOUND; } if (it->cas == old_it->cas) { item_replace(old_it, it, hv); store_stat = LRU_list::STORED; } else { if (mem_setting.verbose > 1) { std::cerr << "CAS: failure: expected " << old_it->get_cas() << " but got " << it->cas; } store_stat = LRU_list::EXISTS; } } else { //与上面第二个判断不同,这里是旧的item存在的replace append prepend set命令 if (c->cmd == NREAD_APPEND || c->cmd == NREAD_PREPEND) { //if (it->cas != 0) { if (it->cas != old_it->cas) { store_stat = LRU_list::EXISTS; } //} if (store_stat == LRU_list::NOT_STORED) { new_it = do_item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2, hv); //分配失败 if (new_it == 0) { if (old_it != 0) do_item_remove(old_it); return LRU_list::NOT_STORED; } new_it->nkey = old_it->nkey; new_it->item_flag = flags; new_it->exptime = old_it->exptime; new_it->nbytes = it->nbytes + old_it->nbytes - 2; memcpy(new_it->data, old_it->data, old_it->nkey); new_it->data[old_it->nkey] = '0'; //copy数据 if (c->cmd == NREAD_APPEND) { memcpy(new_it->real_data_addr(), old_it->real_data_addr(), old_it->nbytes); memcpy(new_it->real_data_addr() + old_it->nbytes - 2/*\r\n*/, it->real_data_addr(), it->nbytes); } else { //NREAD_PREPEND memcpy(new_it->real_data_addr(), it->real_data_addr(), it->nbytes); memcpy(new_it->real_data_addr() + it->nbytes - 2, old_it->real_data_addr(), old_it->nbytes); } it = new_it; } } if (store_stat == LRU_list::NOT_STORED) { it->cas++; if (old_it != 0) item_replace(old_it, it, hv); else //set a new key-value do_item_link(it, hv); store_stat = LRU_list::STORED; } } if (old_it != 0) do_item_remove(old_it); if (new_it != 0) do_item_remove(new_it); if (store_stat == LRU_list::STORED) { c->cas = it->get_cas(); } return store_stat; }
/** wrapper around assoc_find which does the lazy expiration logic */ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) { //mutex_lock(&cache_lock); // 查找 key 是否存在 item *it = assoc_find(key, nkey, hv); if (it != NULL) { refcount_incr(&it->refcount); /* Optimization for slab reassignment. prevents popular items from * jamming in busy wait. Can only do this here to satisfy lock order * of item_lock, cache_lock, slabs_lock. */ if (slab_rebalance_signal && ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) { do_item_unlink_nolock(it, hv); do_item_remove(it); it = NULL; } } //mutex_unlock(&cache_lock); int was_found = 0; if (settings.verbose > 2) { if (it == NULL) { fprintf(stderr, "> NOT FOUND %s", key); } else { fprintf(stderr, "> FOUND KEY %s", ITEM_key(it)); was_found++; } } if (it != NULL) { //settings.oldest_live初始化值为0 //检测用户是否使用过flush_all命令,删除所有item //it->time <= settings.oldest_live就说明用户在使用flush_all命令的时候 //就已经存在该item了。那么该item是要删除的 if (settings.oldest_live != 0 && settings.oldest_live <= current_time && it->time <= settings.oldest_live) { do_item_unlink(it, hv); do_item_remove(it); it = NULL; if (was_found) { fprintf(stderr, " -nuked by flush"); } } //该item已经过期失效了 else if (it->exptime != 0 && it->exptime <= current_time) { do_item_unlink(it, hv); do_item_remove(it); it = NULL; if (was_found) { fprintf(stderr, " -nuked by expire"); } } else { it->it_flags |= ITEM_FETCHED; DEBUG_REFCNT(it, '+'); } } if (settings.verbose > 2) fprintf(stderr, "\n"); return it; }
enum store_item_type do_store_item ( item *it, int comm, const uint32_t hv ) { char *key = ITEM_key (it); item *old_it = do_item_get (key, it->nkey, hv); enum store_item_type stored = NOT_STORED; item *new_it = NULL; int flags; if ( old_it != NULL && comm == NREAD_ADD ) { do_item_update (old_it); } else if ( ! old_it && ( comm == NREAD_REPLACE || comm == NREAD_APPEND || comm == NREAD_PREPEND ) ) { } else { if ( comm == NREAD_APPEND || comm == NREAD_PREPEND ) { if ( stored == NOT_STORED ) { flags = ( int ) strtol (ITEM_suffix (old_it), ( char ** ) NULL, 10); new_it = do_item_alloc (key, it->nkey, flags, old_it->exptime, ITEM_data (it), it->nbytes + old_it->nbytes - 2, hv); if ( ! new_it ) { if ( old_it ) do_item_remove (old_it); return NOT_STORED; } if ( comm == NREAD_APPEND ) { memcpy (ITEM_data (new_it), ITEM_data (old_it), old_it->nbytes); memcpy (ITEM_data (new_it) + old_it->nbytes - 2, ITEM_data (it), it->nbytes); } else { memcpy (ITEM_data (new_it), ITEM_data (it), it->nbytes); memcpy (ITEM_data (new_it) + it->nbytes - 2, ITEM_data (old_it), old_it->nbytes); } it = new_it; } } if ( stored == NOT_STORED ) { if ( old_it != NULL ) { item_replace (old_it, it, hv); } else { do_item_link (it, hv); } stored = STORED; } } if ( old_it != NULL ) { do_item_remove (old_it); } if ( new_it != NULL ) { do_item_remove (new_it); } return stored; }
/* * Decrements the reference count on an item and adds it to the freelist if * needed. */ void item_remove(item *item) { pthread_mutex_lock(&cache_lock); do_item_remove(item); pthread_mutex_unlock(&cache_lock); }
/* Returns number of items remove, expired, or evicted. * Callable from worker threads or the LRU maintainer thread */ static int lru_pull_tail(const int orig_id, const int cur_lru, const uint64_t total_bytes, uint8_t flags) { item *it = NULL; int id = orig_id; int removed = 0; if (id == 0) return 0; int tries = 5; item *search; item *next_it; void *hold_lock = NULL; unsigned int move_to_lru = 0; uint64_t limit = 0; id |= cur_lru; pthread_mutex_lock(&lru_locks[id]); search = tails[id]; /* We walk up *only* for locked items, and if bottom is expired. */ for (; tries > 0 && search != NULL; tries--, search=next_it) { /* we might relink search mid-loop, so search->prev isn't reliable */ next_it = search->prev; if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) { /* We are a crawler, ignore it. */ if (flags & LRU_PULL_CRAWL_BLOCKS) { pthread_mutex_unlock(&lru_locks[id]); return 0; } tries++; continue; } uint32_t hv = hash(ITEM_key(search), search->nkey); /* Attempt to hash item lock the "search" item. If locked, no * other callers can incr the refcount. Also skip ourselves. */ if ((hold_lock = item_trylock(hv)) == NULL) continue; /* Now see if the item is refcount locked */ if (refcount_incr(&search->refcount) != 2) { /* Note pathological case with ref'ed items in tail. * Can still unlink the item, but it won't be reusable yet */ itemstats[id].lrutail_reflocked++; /* In case of refcount leaks, enable for quick workaround. */ /* WARNING: This can cause terrible corruption */ if (settings.tail_repair_time && search->time + settings.tail_repair_time < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; /* This will call item_remove -> item_free since refcnt is 1 */ do_item_unlink_nolock(search, hv); item_trylock_unlock(hold_lock); continue; } } /* Expired or flushed */ if ((search->exptime != 0 && search->exptime < current_time) || item_is_flushed(search)) { itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].expired_unfetched++; } /* refcnt 2 -> 1 */ do_item_unlink_nolock(search, hv); /* refcnt 1 -> 0 -> item_free */ do_item_remove(search); item_trylock_unlock(hold_lock); removed++; /* If all we're finding are expired, can keep going */ continue; } /* If we're HOT_LRU or WARM_LRU and over size limit, send to COLD_LRU. * If we're COLD_LRU, send to WARM_LRU unless we need to evict */ switch (cur_lru) { case HOT_LRU: limit = total_bytes * settings.hot_lru_pct / 100; case WARM_LRU: if (limit == 0) limit = total_bytes * settings.warm_lru_pct / 100; if (sizes_bytes[id] > limit) { itemstats[id].moves_to_cold++; move_to_lru = COLD_LRU; do_item_unlink_q(search); it = search; removed++; break; } else if ((search->it_flags & ITEM_ACTIVE) != 0) { /* Only allow ACTIVE relinking if we're not too large. */ itemstats[id].moves_within_lru++; search->it_flags &= ~ITEM_ACTIVE; do_item_update_nolock(search); do_item_remove(search); item_trylock_unlock(hold_lock); } else { /* Don't want to move to COLD, not active, bail out */ it = search; } break; case COLD_LRU: it = search; /* No matter what, we're stopping */ if (flags & LRU_PULL_EVICT) { if (settings.evict_to_free == 0) { /* Don't think we need a counter for this. It'll OOM. */ break; } itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].evicted_unfetched++; } LOGGER_LOG(NULL, LOG_EVICTIONS, LOGGER_EVICTION, search); do_item_unlink_nolock(search, hv); removed++; if (settings.slab_automove == 2) { slabs_reassign(-1, orig_id); } } else if ((search->it_flags & ITEM_ACTIVE) != 0 && settings.lru_maintainer_thread) { itemstats[id].moves_to_warm++; search->it_flags &= ~ITEM_ACTIVE; move_to_lru = WARM_LRU; do_item_unlink_q(search); removed++; } break; } if (it != NULL) break; } pthread_mutex_unlock(&lru_locks[id]); if (it != NULL) { if (move_to_lru) { it->slabs_clsid = ITEM_clsid(it); it->slabs_clsid |= move_to_lru; item_link_q(it); } do_item_remove(it); item_trylock_unlock(hold_lock); } return removed; }
/** wrapper around assoc_find which does the lazy expiration logic */ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv, conn *c) { item *it = assoc_find(key, nkey, hv); if (it != NULL) { refcount_incr(&it->refcount); /* Optimization for slab reassignment. prevents popular items from * jamming in busy wait. Can only do this here to satisfy lock order * of item_lock, slabs_lock. */ /* This was made unsafe by removal of the cache_lock: * slab_rebalance_signal and slab_rebal.* are modified in a separate * thread under slabs_lock. If slab_rebalance_signal = 1, slab_start = * NULL (0), but slab_end is still equal to some value, this would end * up unlinking every item fetched. * This is either an acceptable loss, or if slab_rebalance_signal is * true, slab_start/slab_end should be put behind the slabs_lock. * Which would cause a huge potential slowdown. * Could also use a specific lock for slab_rebal.* and * slab_rebalance_signal (shorter lock?) */ /*if (slab_rebalance_signal && ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) { do_item_unlink(it, hv); do_item_remove(it); it = NULL; }*/ } int was_found = 0; if (settings.verbose > 2) { int ii; if (it == NULL) { fprintf(stderr, "> NOT FOUND "); } else { fprintf(stderr, "> FOUND KEY "); } for (ii = 0; ii < nkey; ++ii) { fprintf(stderr, "%c", key[ii]); } } if (it != NULL) { was_found = 1; if (item_is_flushed(it)) { do_item_unlink(it, hv); do_item_remove(it); it = NULL; pthread_mutex_lock(&c->thread->stats.mutex); c->thread->stats.get_flushed++; pthread_mutex_unlock(&c->thread->stats.mutex); if (settings.verbose > 2) { fprintf(stderr, " -nuked by flush"); } was_found = 2; } else if (it->exptime != 0 && it->exptime <= current_time) { do_item_unlink(it, hv); do_item_remove(it); it = NULL; pthread_mutex_lock(&c->thread->stats.mutex); c->thread->stats.get_expired++; pthread_mutex_unlock(&c->thread->stats.mutex); if (settings.verbose > 2) { fprintf(stderr, " -nuked by expire"); } was_found = 3; } else { it->it_flags |= ITEM_FETCHED|ITEM_ACTIVE; DEBUG_REFCNT(it, '+'); } } if (settings.verbose > 2) fprintf(stderr, "\n"); /* For now this is in addition to the above verbose logging. */ LOGGER_LOG(c->thread->l, LOG_FETCHERS, LOGGER_ITEM_GET, NULL, was_found, key, nkey); return it; }
/** wrapper around assoc_find which does the lazy expiration logic */ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) { //mutex_lock(&cache_lock); item *it = assoc_find(key, nkey, hv); if (it != NULL) { refcount_incr(&it->refcount); /* Optimization for slab reassignment. prevents popular items from * jamming in busy wait. Can only do this here to satisfy lock order * of item_lock, cache_lock, slabs_lock. */ if (slab_rebalance_signal && ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) { do_item_unlink_nolock(it, hv); do_item_remove(it); it = NULL; } } //mutex_unlock(&cache_lock); int was_found = 0; if (settings.verbose > 2) { int ii; if (it == NULL) { fprintf(stderr, "> NOT FOUND "); } else { fprintf(stderr, "> FOUND KEY "); was_found++; } for (ii = 0; ii < nkey; ++ii) { fprintf(stderr, "%c", key[ii]); } } if (it != NULL) { if (settings.oldest_live != 0 && settings.oldest_live <= current_time && it->time <= settings.oldest_live) { do_item_unlink(it, hv); do_item_remove(it); it = NULL; if (was_found) { fprintf(stderr, " -nuked by flush"); } } else if (it->exptime != 0 && it->exptime <= current_time) { do_item_unlink(it, hv); do_item_remove(it); it = NULL; if (was_found) { fprintf(stderr, " -nuked by expire"); } } else { it->it_flags |= ITEM_FETCHED; DEBUG_REFCNT(it, '+'); if (settings.anti_stampede > 0 && !(it->it_flags & ITEM_FAKE_MISSED) && it->exptime != 0 && it->exptime < current_time + settings.anti_stampede && it->exptime <= current_time + rand() % settings.anti_stampede) { it->it_flags |= ITEM_FAKE_MISSED; if (settings.verbose > 2) fprintf(stderr," - FAKE MISS (stampede protection)"); refcount_decr(&it->refcount); it = NULL; } } } if (settings.verbose > 2) fprintf(stderr, "\n"); return it; }
static int storage_write(void *storage, const int clsid, const int item_age) { int did_moves = 0; struct lru_pull_tail_return it_info; it_info.it = NULL; lru_pull_tail(clsid, COLD_LRU, 0, LRU_PULL_RETURN_ITEM, 0, &it_info); /* Item is locked, and we have a reference to it. */ if (it_info.it == NULL) { return did_moves; } obj_io io; item *it = it_info.it; /* First, storage for the header object */ size_t orig_ntotal = ITEM_ntotal(it); uint32_t flags; if ((it->it_flags & ITEM_HDR) == 0 && (item_age == 0 || current_time - it->time > item_age)) { FLAGS_CONV(it, flags); item *hdr_it = do_item_alloc(ITEM_key(it), it->nkey, flags, it->exptime, sizeof(item_hdr)); /* Run the storage write understanding the start of the item is dirty. * We will fill it (time/exptime/etc) from the header item on read. */ if (hdr_it != NULL) { int bucket = (it->it_flags & ITEM_CHUNKED) ? PAGE_BUCKET_CHUNKED : PAGE_BUCKET_DEFAULT; // Compress soon to expire items into similar pages. if (it->exptime - current_time < settings.ext_low_ttl) { bucket = PAGE_BUCKET_LOWTTL; } hdr_it->it_flags |= ITEM_HDR; io.len = orig_ntotal; io.mode = OBJ_IO_WRITE; // NOTE: when the item is read back in, the slab mover // may see it. Important to have refcount>=2 or ~ITEM_LINKED assert(it->refcount >= 2); // NOTE: write bucket vs free page bucket will disambiguate once // lowttl feature is better understood. if (extstore_write_request(storage, bucket, bucket, &io) == 0) { // cuddle the hash value into the time field so we don't have // to recalculate it. item *buf_it = (item *) io.buf; buf_it->time = it_info.hv; // copy from past the headers + time headers. // TODO: should be in items.c if (it->it_flags & ITEM_CHUNKED) { // Need to loop through the item and copy item_chunk *sch = (item_chunk *) ITEM_schunk(it); int remain = orig_ntotal; int copied = 0; // copy original header int hdrtotal = ITEM_ntotal(it) - it->nbytes; memcpy((char *)io.buf+STORE_OFFSET, (char *)it+STORE_OFFSET, hdrtotal - STORE_OFFSET); copied = hdrtotal; // copy data in like it were one large object. while (sch && remain) { assert(remain >= sch->used); memcpy((char *)io.buf+copied, sch->data, sch->used); // FIXME: use one variable? remain -= sch->used; copied += sch->used; sch = sch->next; } } else { memcpy((char *)io.buf+STORE_OFFSET, (char *)it+STORE_OFFSET, io.len-STORE_OFFSET); } // crc what we copied so we can do it sequentially. buf_it->it_flags &= ~ITEM_LINKED; buf_it->exptime = crc32c(0, (char*)io.buf+STORE_OFFSET, orig_ntotal-STORE_OFFSET); extstore_write(storage, &io); item_hdr *hdr = (item_hdr *) ITEM_data(hdr_it); hdr->page_version = io.page_version; hdr->page_id = io.page_id; hdr->offset = io.offset; // overload nbytes for the header it hdr_it->nbytes = it->nbytes; /* success! Now we need to fill relevant data into the new * header and replace. Most of this requires the item lock */ /* CAS gets set while linking. Copy post-replace */ item_replace(it, hdr_it, it_info.hv); ITEM_set_cas(hdr_it, ITEM_get_cas(it)); do_item_remove(hdr_it); did_moves = 1; LOGGER_LOG(NULL, LOG_EVICTIONS, LOGGER_EXTSTORE_WRITE, it, bucket); } else { /* Failed to write for some reason, can't continue. */ slabs_free(hdr_it, ITEM_ntotal(hdr_it), ITEM_clsid(hdr_it)); } } } do_item_remove(it); item_unlock(it_info.hv); return did_moves; }
static void storage_compact_readback(void *storage, logger *l, bool drop_unread, char *readback_buf, uint32_t page_id, uint64_t page_version, uint64_t read_size) { uint64_t offset = 0; unsigned int rescues = 0; unsigned int lost = 0; unsigned int skipped = 0; while (offset < read_size) { item *hdr_it = NULL; item_hdr *hdr = NULL; item *it = (item *)(readback_buf+offset); unsigned int ntotal; // probably zeroed out junk at the end of the wbuf if (it->nkey == 0) { break; } ntotal = ITEM_ntotal(it); uint32_t hv = (uint32_t)it->time; item_lock(hv); // We don't have a conn and don't need to do most of do_item_get hdr_it = assoc_find(ITEM_key(it), it->nkey, hv); if (hdr_it != NULL) { bool do_write = false; refcount_incr(hdr_it); // Check validity but don't bother removing it. if ((hdr_it->it_flags & ITEM_HDR) && !item_is_flushed(hdr_it) && (hdr_it->exptime == 0 || hdr_it->exptime > current_time)) { hdr = (item_hdr *)ITEM_data(hdr_it); if (hdr->page_id == page_id && hdr->page_version == page_version) { // Item header is still completely valid. extstore_delete(storage, page_id, page_version, 1, ntotal); // drop inactive items. if (drop_unread && GET_LRU(hdr_it->slabs_clsid) == COLD_LRU) { do_write = false; skipped++; } else { do_write = true; } } } if (do_write) { bool do_update = false; int tries; obj_io io; io.len = ntotal; io.mode = OBJ_IO_WRITE; for (tries = 10; tries > 0; tries--) { if (extstore_write_request(storage, PAGE_BUCKET_COMPACT, PAGE_BUCKET_COMPACT, &io) == 0) { memcpy(io.buf, it, io.len); extstore_write(storage, &io); do_update = true; break; } else { usleep(1000); } } if (do_update) { if (it->refcount == 2) { hdr->page_version = io.page_version; hdr->page_id = io.page_id; hdr->offset = io.offset; rescues++; } else { lost++; // TODO: re-alloc and replace header. } } else { lost++; } } do_item_remove(hdr_it); } item_unlock(hv); offset += ntotal; if (read_size - offset < sizeof(struct _stritem)) break; } STATS_LOCK(); stats.extstore_compact_lost += lost; stats.extstore_compact_rescues += rescues; stats.extstore_compact_skipped += skipped; STATS_UNLOCK(); LOGGER_LOG(l, LOG_SYSEVENTS, LOGGER_COMPACT_READ_END, NULL, page_id, offset, rescues, lost, skipped); }
base_item* LRU_list::do_item_get(const char* key, const size_t nkey, const uint32_t hv) { base_item* it = hashtable.hash_find(key, nkey, hv); if (it != NULL) { //原子增 std::atomic_fetch_add(&(it->refcount), 1u); /* * 这里有一段相关item忙等的东西,不知道和扩容相关不 if (slab_rebalance_signal && ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) { do_item_unlink_nolock(it, hv); do_item_remove(it); it = NULL; } */ } int was_found = 0; //输出必要信息 if (mem_setting.verbose > 2) { int ii; if (it == 0) { std::cerr << "> NOT FOUND "; } else { std::cerr << "> FOUND KEY "; was_found++; } for( ii = 0; ii < nkey; ++ii) { std::cerr << key[ii]; } } if (it != 0) { //如果存活时间不为0或者小于规定时间,删除(LRU算法) //但此时仍不知何时设置时间的 /* if (mem_setting.oldest_live != 0 && mem_setting.oldest_live <= current_time && it->realtime <= mem_setting.oldest_live) { do_item_unlink(it, hv); do_item_remove(it); it = 0; if (was_found) { std::cerr << "-nuked by flush\n"; } } else */ if (it->exptime != 0 && it->exptime <= current_time()) { do_item_unlink(it, hv); do_item_remove(it); it = 0; if (was_found) { std::cerr << "-nuked by expire\n"; } }else { it->item_flag |= Slab::ITEM_FETCHED; } } if (mem_setting.verbose > 2) { std::cerr << '\n'; } return it; }
/* 根据 key 找对应的 item, 为了加快查找速度, memcached 使用一个哈希表对 key 和 item 所在的内存地址做映射. item_get直接从哈希表中 查找就可以了, 当然找到了还要检查 item 是否超时. 超时了的 item将从哈希表和 LRU 队列中删除掉 */ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) { item *it = assoc_find(key, nkey, hv);/* 从哈希表中找 item */ if (it != NULL) { refcount_incr(&it->refcount);//item的引用次数+1 /* Optimization for slab reassignment. prevents popular items from * jamming in busy wait. Can only do this here to satisfy lock order * of item_lock, cache_lock, slabs_lock. */ if (slab_rebalance_signal && //如果正在进行slab调整,且该item是调整的对象 ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) { do_item_unlink_nolock(it, hv);//将item从hashtable和LRU链中移除 do_item_remove(it);//删除item it = NULL;//置为空 } } //mutex_unlock(&cache_lock); int was_found = 0; //打印调试信息 if (settings.verbose > 2) { if (it == NULL) { fprintf(stderr, "> NOT FOUND %s", key); } else { fprintf(stderr, "> FOUND KEY %s", ITEM_key(it)); was_found++; } } /* 找到了, 然后检查是否超时 */ if (it != NULL) { //判断Memcached初始化是否开启过期删除机制,如果开启,则执行删除相关操作 if (settings.oldest_live != 0 && settings.oldest_live <= current_time && it->time <= settings.oldest_live) { do_item_unlink(it, hv);//将item从hashtable和LRU链中移除 do_item_remove(it);//删除item it = NULL; if (was_found) { fprintf(stderr, " -nuked by flush"); } } //判断item是否过期 else if (it->exptime != 0 && it->exptime <= current_time) { do_item_unlink(it, hv);//将item从hashtable和LRU链中移除 do_item_remove(it);//删除item it = NULL; if (was_found) { fprintf(stderr, " -nuked by expire"); } } else { it->it_flags |= ITEM_FETCHED;//item的标识修改为已经读取 DEBUG_REFCNT(it, '+'); } } if (settings.verbose > 2)fprintf(stderr, "\n"); return it; }