/* * adds a delta value to a numeric item. * * c connection requesting the operation * it item to adjust * incr true to increment value, false to decrement * delta amount to adjust value by * @param ritem The resulting item after adding the delta. Only valid if * ENGINE_SUCCESS is returned. Caller is responsible for calling * do_item_release() on this when finished with it. * * returns a response code to send back to the client. */ static ENGINE_ERROR_CODE do_add_delta(struct default_engine *engine, hash_item *it, const bool incr, const int64_t delta, item** ritem, uint64_t *result, const void *cookie) { const char *ptr; uint64_t value; char buf[80]; int res; if (it->nbytes >= (sizeof(buf) - 1)) { return ENGINE_EINVAL; } ptr = item_get_data(it); memcpy(buf, ptr, it->nbytes); buf[it->nbytes] = '\0'; if (!safe_strtoull(buf, &value)) { return ENGINE_EINVAL; } if (incr) { value += delta; } else { if ((uint64_t)delta > value) { value = 0; } else { value -= delta; } } *result = value; res = snprintf(buf, sizeof(buf), "%" PRIu64, value); if (res < 0 || res >= sizeof(buf)) { return ENGINE_EINVAL; } if (it->refcount == 1 && res <= (int)it->nbytes) { /* we can do inline replacement */ memcpy(item_get_data(it), buf, res); memset(item_get_data(it) + res, ' ', it->nbytes - res); item_set_cas(NULL, NULL, it, get_cas_id()); *ritem = it; } else { hash_item *new_it = do_item_alloc(engine, item_get_key(it), it->flags, it->exptime, res, cookie, it->datatype); if (new_it == NULL) { do_item_unlink(engine, it); return ENGINE_ENOMEM; } memcpy(item_get_data(new_it), buf, res); do_item_replace(engine, it, new_it); *ritem = new_it; } return ENGINE_SUCCESS; }
/* * Replaces one item with another in the hashtable. * Unprotected by a mutex lock since the core server does not require * it to be thread-safe. */ int item_replace(item *old_it, item *new_it, const uint32_t hv) { return do_item_replace(old_it, new_it, hv); }
//无论旧item是否有其他worker线程在引用,都是直接将之从哈希表和LRU队列中删除 int item_replace(item *old_it, item *new_it, const uint32_t hv) { //这里面会unlink old_it,然后link new_it return do_item_replace(old_it, new_it, hv); //注意这里面old_it->refcount会减1,new_it会增1 }
/* * Replaces one item with another in the hashtable. * Unprotected by a mutex lock since the core server does not require * it to be thread-safe. */ int item_replace(item *old_it, item *new_it) { return do_item_replace(old_it, new_it); }
int LRU_list::item_replace(base_item* old_it, base_item* new_it, const uint32_t hv) { return do_item_replace(old_it, new_it, hv); }
/* * Stores an item in the cache according to the semantics of one of the set * commands. In threaded mode, this is protected by the cache lock. * * Returns the state of storage. */ static ENGINE_ERROR_CODE do_store_item(struct default_engine *engine, hash_item *it, uint64_t *cas, ENGINE_STORE_OPERATION operation, const void *cookie) { const char *key = item_get_key(it); hash_item *old_it = do_item_get(engine, key, it->nkey); ENGINE_ERROR_CODE stored = ENGINE_NOT_STORED; hash_item *new_it = NULL; if (old_it != NULL && operation == OPERATION_ADD) { /* add only adds a nonexistent item, but promote to head of LRU */ do_item_update(engine, old_it); } else if (!old_it && (operation == OPERATION_REPLACE || operation == OPERATION_APPEND || operation == OPERATION_PREPEND)) { /* replace only replaces an existing value; don't store */ } else if (operation == OPERATION_CAS) { /* validate cas operation */ if(old_it == NULL) { // LRU expired stored = ENGINE_KEY_ENOENT; } else if (item_get_cas(it) == item_get_cas(old_it)) { // cas validates // it and old_it may belong to different classes. // I'm updating the stats for the one that's getting pushed out do_item_replace(engine, old_it, it); stored = ENGINE_SUCCESS; } else { if (engine->config.verbose > 1) { EXTENSION_LOGGER_DESCRIPTOR *logger; logger = (void*)engine->server.extension->get_extension(EXTENSION_LOGGER); logger->log(EXTENSION_LOG_INFO, NULL, "CAS: failure: expected %"PRIu64", got %"PRIu64"\n", item_get_cas(old_it), item_get_cas(it)); } stored = ENGINE_KEY_EEXISTS; } } else { /* * Append - combine new and old record into single one. Here it's * atomic and thread-safe. */ if (operation == OPERATION_APPEND || operation == OPERATION_PREPEND) { /* * Validate CAS */ if (item_get_cas(it) != 0) { // CAS much be equal if (item_get_cas(it) != item_get_cas(old_it)) { stored = ENGINE_KEY_EEXISTS; } } if (stored == ENGINE_NOT_STORED) { /* we have it and old_it here - alloc memory to hold both */ new_it = do_item_alloc(engine, key, it->nkey, old_it->flags, old_it->exptime, it->nbytes + old_it->nbytes, cookie); if (new_it == NULL) { /* SERVER_ERROR out of memory */ if (old_it != NULL) { do_item_release(engine, old_it); } return ENGINE_NOT_STORED; } /* copy data from it and old_it to new_it */ if (operation == OPERATION_APPEND) { memcpy(item_get_data(new_it), item_get_data(old_it), old_it->nbytes); memcpy(item_get_data(new_it) + old_it->nbytes, item_get_data(it), it->nbytes); } else { /* OPERATION_PREPEND */ memcpy(item_get_data(new_it), item_get_data(it), it->nbytes); memcpy(item_get_data(new_it) + it->nbytes, item_get_data(old_it), old_it->nbytes); } it = new_it; } } if (stored == ENGINE_NOT_STORED) { if (old_it != NULL) { do_item_replace(engine, old_it, it); } else { do_item_link(engine, it); } *cas = item_get_cas(it); stored = ENGINE_SUCCESS; } } if (old_it != NULL) { do_item_release(engine, old_it); /* release our reference */ } if (new_it != NULL) { do_item_release(engine, new_it); } if (stored == ENGINE_SUCCESS) { *cas = item_get_cas(it); } return stored; }
/* refcount == 0 is safe since nobody can incr while item_lock is held. * refcount != 0 is impossible since flags/etc can be modified in other * threads. instead, note we found a busy one and bail. logic in do_item_get * will prevent busy items from continuing to be busy * NOTE: This is checking it_flags outside of an item lock. I believe this * works since it_flags is 8 bits, and we're only ever comparing a single bit * regardless. ITEM_SLABBED bit will always be correct since we're holding the * lock which modifies that bit. ITEM_LINKED won't exist if we're between an * item having ITEM_SLABBED removed, and the key hasn't been added to the item * yet. The memory barrier from the slabs lock should order the key write and the * flags to the item? * If ITEM_LINKED did exist and was just removed, but we still see it, that's * still safe since it will have a valid key, which we then lock, and then * recheck everything. * This may not be safe on all platforms; If not, slabs_alloc() will need to * seed the item key while holding slabs_lock. */ static int slab_rebalance_move(void) { slabclass_t *s_cls; int x; int was_busy = 0; int refcount = 0; uint32_t hv; void *hold_lock; enum move_status status = MOVE_PASS; pthread_mutex_lock(&slabs_lock); s_cls = &slabclass[slab_rebal.s_clsid]; for (x = 0; x < slab_bulk_check; x++) { hv = 0; hold_lock = NULL; item *it = slab_rebal.slab_pos; status = MOVE_PASS; /* ITEM_FETCHED when ITEM_SLABBED is overloaded to mean we've cleared * the chunk for move. Only these two flags should exist. */ if (it->it_flags != (ITEM_SLABBED|ITEM_FETCHED)) { /* ITEM_SLABBED can only be added/removed under the slabs_lock */ if (it->it_flags & ITEM_SLABBED) { /* remove from slab freelist */ if (s_cls->slots == it) { s_cls->slots = it->next; } if (it->next) it->next->prev = it->prev; if (it->prev) it->prev->next = it->next; s_cls->sl_curr--; status = MOVE_FROM_SLAB; } else if ((it->it_flags & ITEM_LINKED) != 0) { /* If it doesn't have ITEM_SLABBED, the item could be in any * state on its way to being freed or written to. If no * ITEM_SLABBED, but it's had ITEM_LINKED, it must be active * and have the key written to it already. */ hv = hash(ITEM_key(it), it->nkey); if ((hold_lock = item_trylock(hv)) == NULL) { status = MOVE_LOCKED; } else { refcount = refcount_incr(&it->refcount); if (refcount == 2) { /* item is linked but not busy */ /* Double check ITEM_LINKED flag here, since we're * past a memory barrier from the mutex. */ if ((it->it_flags & ITEM_LINKED) != 0) { status = MOVE_FROM_LRU; } else { /* refcount == 1 + !ITEM_LINKED means the item is being * uploaded to, or was just unlinked but hasn't been freed * yet. Let it bleed off on its own and try again later */ status = MOVE_BUSY; } } else { if (settings.verbose > 2) { fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n", it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid); } status = MOVE_BUSY; } /* Item lock must be held while modifying refcount */ if (status == MOVE_BUSY) { refcount_decr(&it->refcount); item_trylock_unlock(hold_lock); } } } else { /* See above comment. No ITEM_SLABBED or ITEM_LINKED. Mark * busy and wait for item to complete its upload. */ status = MOVE_BUSY; } } int save_item = 0; item *new_it = NULL; size_t ntotal = 0; switch (status) { case MOVE_FROM_LRU: /* Lock order is LRU locks -> slabs_lock. unlink uses LRU lock. * We only need to hold the slabs_lock while initially looking * at an item, and at this point we have an exclusive refcount * (2) + the item is locked. Drop slabs lock, drop item to * refcount 1 (just our own, then fall through and wipe it */ /* Check if expired or flushed */ ntotal = ITEM_ntotal(it); /* REQUIRES slabs_lock: CHECK FOR cls->sl_curr > 0 */ if ((it->exptime != 0 && it->exptime < current_time) || item_is_flushed(it)) { /* TODO: maybe we only want to save if item is in HOT or * WARM LRU? */ save_item = 0; } else if ((new_it = slab_rebalance_alloc(ntotal, slab_rebal.s_clsid)) == NULL) { save_item = 0; slab_rebal.evictions_nomem++; } else { save_item = 1; } pthread_mutex_unlock(&slabs_lock); if (save_item) { /* if free memory, memcpy. clear prev/next/h_bucket */ memcpy(new_it, it, ntotal); new_it->prev = 0; new_it->next = 0; new_it->h_next = 0; /* These are definitely required. else fails assert */ new_it->it_flags &= ~ITEM_LINKED; new_it->refcount = 0; do_item_replace(it, new_it, hv); slab_rebal.rescues++; } else { do_item_unlink(it, hv); } item_trylock_unlock(hold_lock); pthread_mutex_lock(&slabs_lock); /* Always remove the ntotal, as we added it in during * do_slabs_alloc() when copying the item. */ s_cls->requested -= ntotal; case MOVE_FROM_SLAB: it->refcount = 0; it->it_flags = ITEM_SLABBED|ITEM_FETCHED; #ifdef DEBUG_SLAB_MOVER memcpy(ITEM_key(it), "deadbeef", 8); #endif break; case MOVE_BUSY: case MOVE_LOCKED: slab_rebal.busy_items++; was_busy++; break; case MOVE_PASS: break; } slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size; if (slab_rebal.slab_pos >= slab_rebal.slab_end) break; } if (slab_rebal.slab_pos >= slab_rebal.slab_end) { /* Some items were busy, start again from the top */ if (slab_rebal.busy_items) { slab_rebal.slab_pos = slab_rebal.slab_start; STATS_LOCK(); stats.slab_reassign_busy_items += slab_rebal.busy_items; STATS_UNLOCK(); slab_rebal.busy_items = 0; } else { slab_rebal.done++; } } pthread_mutex_unlock(&slabs_lock); return was_busy; }
/* refcount == 0 is safe since nobody can incr while item_lock is held. * refcount != 0 is impossible since flags/etc can be modified in other * threads. instead, note we found a busy one and bail. logic in do_item_get * will prevent busy items from continuing to be busy * NOTE: This is checking it_flags outside of an item lock. I believe this * works since it_flags is 8 bits, and we're only ever comparing a single bit * regardless. ITEM_SLABBED bit will always be correct since we're holding the * lock which modifies that bit. ITEM_LINKED won't exist if we're between an * item having ITEM_SLABBED removed, and the key hasn't been added to the item * yet. The memory barrier from the slabs lock should order the key write and the * flags to the item? * If ITEM_LINKED did exist and was just removed, but we still see it, that's * still safe since it will have a valid key, which we then lock, and then * recheck everything. * This may not be safe on all platforms; If not, slabs_alloc() will need to * seed the item key while holding slabs_lock. */ static int slab_rebalance_move(void) { slabclass_t *s_cls; int x; int was_busy = 0; int refcount = 0; uint32_t hv; void *hold_lock; enum move_status status = MOVE_PASS; pthread_mutex_lock(&slabs_lock); s_cls = &slabclass[slab_rebal.s_clsid]; for (x = 0; x < slab_bulk_check; x++) { hv = 0; hold_lock = NULL; item *it = slab_rebal.slab_pos; item_chunk *ch = NULL; status = MOVE_PASS; if (it->it_flags & ITEM_CHUNK) { /* This chunk is a chained part of a larger item. */ ch = (item_chunk *) it; /* Instead, we use the head chunk to find the item and effectively * lock the entire structure. If a chunk has ITEM_CHUNK flag, its * head cannot be slabbed, so the normal routine is safe. */ it = ch->head; assert(it->it_flags & ITEM_CHUNKED); } /* ITEM_FETCHED when ITEM_SLABBED is overloaded to mean we've cleared * the chunk for move. Only these two flags should exist. */ if (it->it_flags != (ITEM_SLABBED|ITEM_FETCHED)) { /* ITEM_SLABBED can only be added/removed under the slabs_lock */ if (it->it_flags & ITEM_SLABBED) { assert(ch == NULL); slab_rebalance_cut_free(s_cls, it); status = MOVE_FROM_SLAB; } else if ((it->it_flags & ITEM_LINKED) != 0) { /* If it doesn't have ITEM_SLABBED, the item could be in any * state on its way to being freed or written to. If no * ITEM_SLABBED, but it's had ITEM_LINKED, it must be active * and have the key written to it already. */ hv = hash(ITEM_key(it), it->nkey); if ((hold_lock = item_trylock(hv)) == NULL) { status = MOVE_LOCKED; } else { refcount = refcount_incr(it); if (refcount == 2) { /* item is linked but not busy */ /* Double check ITEM_LINKED flag here, since we're * past a memory barrier from the mutex. */ if ((it->it_flags & ITEM_LINKED) != 0) { status = MOVE_FROM_LRU; } else { /* refcount == 1 + !ITEM_LINKED means the item is being * uploaded to, or was just unlinked but hasn't been freed * yet. Let it bleed off on its own and try again later */ status = MOVE_BUSY; } } else { if (settings.verbose > 2) { fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n", it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid); } status = MOVE_BUSY; } /* Item lock must be held while modifying refcount */ if (status == MOVE_BUSY) { refcount_decr(it); item_trylock_unlock(hold_lock); } } } else { /* See above comment. No ITEM_SLABBED or ITEM_LINKED. Mark * busy and wait for item to complete its upload. */ status = MOVE_BUSY; } } int save_item = 0; item *new_it = NULL; size_t ntotal = 0; switch (status) { case MOVE_FROM_LRU: /* Lock order is LRU locks -> slabs_lock. unlink uses LRU lock. * We only need to hold the slabs_lock while initially looking * at an item, and at this point we have an exclusive refcount * (2) + the item is locked. Drop slabs lock, drop item to * refcount 1 (just our own, then fall through and wipe it */ /* Check if expired or flushed */ ntotal = ITEM_ntotal(it); /* REQUIRES slabs_lock: CHECK FOR cls->sl_curr > 0 */ if (ch == NULL && (it->it_flags & ITEM_CHUNKED)) { /* Chunked should be identical to non-chunked, except we need * to swap out ntotal for the head-chunk-total. */ ntotal = s_cls->size; } if ((it->exptime != 0 && it->exptime < current_time) || item_is_flushed(it)) { /* Expired, don't save. */ save_item = 0; } else if (ch == NULL && (new_it = slab_rebalance_alloc(ntotal, slab_rebal.s_clsid)) == NULL) { /* Not a chunk of an item, and nomem. */ save_item = 0; slab_rebal.evictions_nomem++; } else if (ch != NULL && (new_it = slab_rebalance_alloc(s_cls->size, slab_rebal.s_clsid)) == NULL) { /* Is a chunk of an item, and nomem. */ save_item = 0; slab_rebal.evictions_nomem++; } else { /* Was whatever it was, and we have memory for it. */ save_item = 1; } pthread_mutex_unlock(&slabs_lock); unsigned int requested_adjust = 0; if (save_item) { if (ch == NULL) { assert((new_it->it_flags & ITEM_CHUNKED) == 0); /* if free memory, memcpy. clear prev/next/h_bucket */ memcpy(new_it, it, ntotal); new_it->prev = 0; new_it->next = 0; new_it->h_next = 0; /* These are definitely required. else fails assert */ new_it->it_flags &= ~ITEM_LINKED; new_it->refcount = 0; do_item_replace(it, new_it, hv); /* Need to walk the chunks and repoint head */ if (new_it->it_flags & ITEM_CHUNKED) { item_chunk *fch = (item_chunk *) ITEM_data(new_it); fch->next->prev = fch; while (fch) { fch->head = new_it; fch = fch->next; } } it->refcount = 0; it->it_flags = ITEM_SLABBED|ITEM_FETCHED; #ifdef DEBUG_SLAB_MOVER memcpy(ITEM_key(it), "deadbeef", 8); #endif slab_rebal.rescues++; requested_adjust = ntotal; } else { item_chunk *nch = (item_chunk *) new_it; /* Chunks always have head chunk (the main it) */ ch->prev->next = nch; if (ch->next) ch->next->prev = nch; memcpy(nch, ch, ch->used + sizeof(item_chunk)); ch->refcount = 0; ch->it_flags = ITEM_SLABBED|ITEM_FETCHED; slab_rebal.chunk_rescues++; #ifdef DEBUG_SLAB_MOVER memcpy(ITEM_key((item *)ch), "deadbeef", 8); #endif refcount_decr(it); requested_adjust = s_cls->size; } } else { /* restore ntotal in case we tried saving a head chunk. */ ntotal = ITEM_ntotal(it); do_item_unlink(it, hv); slabs_free(it, ntotal, slab_rebal.s_clsid); /* Swing around again later to remove it from the freelist. */ slab_rebal.busy_items++; was_busy++; } item_trylock_unlock(hold_lock); pthread_mutex_lock(&slabs_lock); /* Always remove the ntotal, as we added it in during * do_slabs_alloc() when copying the item. */ s_cls->requested -= requested_adjust; break; case MOVE_FROM_SLAB: it->refcount = 0; it->it_flags = ITEM_SLABBED|ITEM_FETCHED; #ifdef DEBUG_SLAB_MOVER memcpy(ITEM_key(it), "deadbeef", 8); #endif break; case MOVE_BUSY: case MOVE_LOCKED: slab_rebal.busy_items++; was_busy++; break; case MOVE_PASS: break; } slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size; if (slab_rebal.slab_pos >= slab_rebal.slab_end) break; } if (slab_rebal.slab_pos >= slab_rebal.slab_end) { /* Some items were busy, start again from the top */ if (slab_rebal.busy_items) { slab_rebal.slab_pos = slab_rebal.slab_start; STATS_LOCK(); stats.slab_reassign_busy_items += slab_rebal.busy_items; STATS_UNLOCK(); slab_rebal.busy_items = 0; } else { slab_rebal.done++; } } pthread_mutex_unlock(&slabs_lock); return was_busy; }
/* * Stores an item in the cache according to the semantics of one of the set * commands. In threaded mode, this is protected by the cache lock. * * Returns the state of storage. */ static ENGINE_ERROR_CODE do_item_store(struct demo_engine *engine, hash_item *it, uint64_t *cas, ENGINE_STORE_OPERATION operation, const void *cookie) { const char *key = dm_item_get_key(it); hash_item *old_it; hash_item *new_it = NULL; ENGINE_ERROR_CODE stored; old_it = do_item_get(engine, key, it->nkey, true); if (old_it != NULL) { if (operation == OPERATION_ADD) { do_item_release(engine, old_it); return ENGINE_NOT_STORED; } } else { if (operation == OPERATION_REPLACE || operation == OPERATION_APPEND || operation == OPERATION_PREPEND) { return ENGINE_NOT_STORED; } if (operation == OPERATION_CAS) { return ENGINE_KEY_ENOENT; } } stored = ENGINE_NOT_STORED; if (operation == OPERATION_CAS) { assert(old_it != NULL); if (dm_item_get_cas(it) == dm_item_get_cas(old_it)) { // cas validates // it and old_it may belong to different classes. // I'm updating the stats for the one that's getting pushed out do_item_replace(engine, old_it, it); stored = ENGINE_SUCCESS; } else { if (engine->config.verbose > 1) { logger->log(EXTENSION_LOG_WARNING, NULL, "CAS: failure: expected %"PRIu64", got %"PRIu64"\n", dm_item_get_cas(old_it), dm_item_get_cas(it)); } stored = ENGINE_KEY_EEXISTS; } } else { /* * Append - combine new and old record into single one. Here it's * atomic and thread-safe. */ if (operation == OPERATION_APPEND || operation == OPERATION_PREPEND) { assert(old_it != NULL); /* * Validate CAS */ if (dm_item_get_cas(it) != 0) { // CAS much be equal if (dm_item_get_cas(it) != dm_item_get_cas(old_it)) { stored = ENGINE_KEY_EEXISTS; } } if (stored == ENGINE_NOT_STORED) { /* we have it and old_it here - alloc memory to hold both */ new_it = do_item_alloc(engine, key, it->nkey, old_it->flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */, cookie); if (new_it == NULL) { /* SERVER_ERROR out of memory */ if (old_it != NULL) do_item_release(engine, old_it); return ENGINE_NOT_STORED; } /* copy data from it and old_it to new_it */ if (operation == OPERATION_APPEND) { memcpy(dm_item_get_data(new_it), dm_item_get_data(old_it), old_it->nbytes); memcpy(dm_item_get_data(new_it) + old_it->nbytes - 2 /* CRLF */, dm_item_get_data(it), it->nbytes); } else { /* OPERATION_PREPEND */ memcpy(dm_item_get_data(new_it), dm_item_get_data(it), it->nbytes); memcpy(dm_item_get_data(new_it) + it->nbytes - 2 /* CRLF */, dm_item_get_data(old_it), old_it->nbytes); } it = new_it; } } if (stored == ENGINE_NOT_STORED) { if (old_it != NULL) { do_item_replace(engine, old_it, it); stored = ENGINE_SUCCESS; } else { stored = do_item_link(engine, it); } if (stored == ENGINE_SUCCESS) { *cas = dm_item_get_cas(it); } } } if (old_it != NULL) { do_item_release(engine, old_it); /* release our reference */ } if (new_it != NULL) { do_item_release(engine, new_it); } if (stored == ENGINE_SUCCESS) { *cas = dm_item_get_cas(it); } return stored; }