/* * alloc a item buffer, and init it. */ item *item_alloc1(char *key, const size_t nkey, const int flags, const int nbytes) { uint8_t nsuffix; item *it; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (ntotal > settings.item_buf_size){ it = (item *)malloc(ntotal); if (it == NULL){ return NULL; } memset(it, 0, ntotal); if (settings.verbose > 1) { fprintf(stderr, "alloc a item buffer from malloc.\n"); } }else{ it = item_from_freelist(); if (it == NULL){ return NULL; } if (settings.verbose > 1) { fprintf(stderr, "alloc a item buffer from freelist.\n"); } } it->nkey = nkey; it->nbytes = nbytes; strcpy(ITEM_key(it), key); memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
int add_filtered_msg(void *flib, void *tc, void ***msglist, int i, void *tit, char *keys_arr, int keys_len, add_iov_ptr add_iov, int numParam, char **params) { void *msg = NULL; int mstart = 0; int mlen = 0; int ret = 1; conn *c = (conn *) tc; item *it = (item *) tit; if (c != NULL && *msglist != NULL && it != NULL && i > 0) { get_filtered_msg(flib, *msglist, i, ITEM_suffix(it), &msg, &mstart, &mlen, numParam, params); if (msg != NULL && mstart != 0 && mlen != 0) { c->filteredmsg = msg; if (add_iov(c, "VALUE ", 6) != 0 || add_iov(c, keys_arr, keys_len) != 0 || add_iov(c, (char *) msg + mstart, mlen) != 0) ret = 2; else ret = 0; } } free_msglist(flib, msglist, i); return ret; }
item *item_alloc(char *key, int flags, rel_time_t exptime, int nbytes) { int nsuffix, ntotal, len; item *it; unsigned int id; char suffix[40]; ntotal = item_make_header(key, flags, nbytes, suffix, &nsuffix, &len); id = slabs_clsid(ntotal); if (id == 0) return 0; it = slabs_alloc(ntotal); if (it == 0) { int tries = 50; item *search; /* If requested to not push old items out of cache when memory runs out, * we're out of luck at this point... */ if (!settings.evict_to_free) return 0; /* * try to get one off the right LRU * don't necessariuly unlink the tail because it may be locked: refcount>0 * search up from tail an item with refcount==0 and unlink it; give up after 50 * tries */ if (id > LARGEST_ID) return 0; if (tails[id]==0) return 0; for (search = tails[id]; tries>0 && search; tries--, search=search->prev) { if (search->refcount==0) { item_unlink(search); break; } } it = slabs_alloc(ntotal); if (it==0) return 0; } assert(it->slabs_clsid == 0); it->slabs_clsid = id; assert(it != heads[it->slabs_clsid]); it->next = it->prev = it->h_next = 0; it->refcount = 0; it->it_flags = 0; it->nkey = len; it->nbytes = nbytes; strcpy(ITEM_key(it), key); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, nsuffix); it->nsuffix = nsuffix; return it; }
// complete the response to a get request. void finish_get_command(conn *c) { item *it; int i; // setup all items for writing out. for (i = 0; i < c->ileft; i++) { it = *(c->ilist + i); if (it) { // Construct the response. Each hit adds three elements to the // outgoing data list: // "VALUE <key> <flags> <data_length>\r\n" // "<data>\r\n" // The <data> element is stored on the connection item list, not on // the iov list. if (!conn_add_iov(c, "VALUE ", 6) || !conn_add_iov(c, ITEM_key(it), it->nkey) || !conn_add_iov(c, ITEM_suffix(it), it->nsuffix + it->nbytes)) { item_remove(it); error_response(c, "SERVER_ERROR out of memory writing get response"); return; } if (config.verbose > 1) { fprintf(stderr, ">%d sending key %s\n", c->sfd, ITEM_key(it)); } } else { fprintf(stderr, "ERROR corrupted ilist!\n"); exit(1); } } if (config.verbose > 1) { fprintf(stderr, ">%d END\n", c->sfd); } if (!conn_add_iov(c, "END\r\n", 5) != 0) { error_response(c, "SERVER_ERROR out of memory writing get response"); } else { conn_set_state(c, conn_mwrite); } }
/** * @param cas_emit 1: emit CAS. * 0: do not emit CAS. * -1: data driven. */ void cproxy_upstream_ascii_item_response(item *it, conn *uc, int cas_emit) { assert(it != NULL); assert(uc != NULL); assert(uc->state == conn_pause); assert(uc->funcs != NULL); assert(IS_ASCII(uc->protocol)); assert(IS_PROXY(uc->protocol)); if (settings.verbose > 2) { char key[KEY_MAX_LENGTH + 10]; assert(it->nkey <= KEY_MAX_LENGTH); memcpy(key, ITEM_key(it), it->nkey); key[it->nkey] = '\0'; moxi_log_write("<%d cproxy ascii item response, key %s\n", uc->sfd, key); } if (strncmp(ITEM_data(it) + it->nbytes - 2, "\r\n", 2) == 0) { // TODO: Need to clean up half-written add_iov()'s. // Consider closing the upstream_conns? // uint64_t cas = ITEM_get_cas(it); if ((cas_emit == 0) || (cas_emit < 0 && cas == CPROXY_NOT_CAS)) { if (add_conn_item(uc, it)) { it->refcount++; if (add_iov(uc, "VALUE ", 6) == 0 && add_iov(uc, ITEM_key(it), it->nkey) == 0 && add_iov(uc, ITEM_suffix(it), it->nsuffix + it->nbytes) == 0) { if (settings.verbose > 2) { moxi_log_write("<%d cproxy ascii item response success\n", uc->sfd); } } } } else { char *suffix = add_conn_suffix(uc); if (suffix != NULL) { sprintf(suffix, " %llu\r\n", (unsigned long long) cas); if (add_conn_item(uc, it)) { it->refcount++; if (add_iov(uc, "VALUE ", 6) == 0 && add_iov(uc, ITEM_key(it), it->nkey) == 0 && add_iov(uc, ITEM_suffix(it), it->nsuffix - 2) == 0 && add_iov(uc, suffix, strlen(suffix)) == 0 && add_iov(uc, ITEM_data(it), it->nbytes) == 0) { if (settings.verbose > 2) { moxi_log_write("<%d cproxy ascii item response ok\n", uc->sfd); } } } } } } else { if (settings.verbose > 1) { moxi_log_write("ERROR: unexpected downstream data block"); } } }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv) { uint8_t nsuffix; ck_spinlock_mcs_context_t second_lock; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; LOCK_CLOCK(); /* Avoid hangs if a slab has nothing but refcounted stuff in it. */ /* int tries_lrutail_reflocked = 1000; */ item *search; item *next_it; void *hold_lock = NULL; /* We have no expiration. Try alloc a new one first. */ if ((it = slabs_alloc(ntotal, id)) == NULL) { printf("item slab alloc fails\n"); assert(0); /* doing CLOCK eviction */ search = hand[id]; if (!search) { /* no mem from alloc or replace */ UNLOCK_CLOCK(); return NULL; } /* scan loop of the clock, which could be potentially * unbounded -- we may want an upper limit for it. */ for (search = hand[id]; search != NULL; search = next_it) { assert(search); /* we might relink search mid-loop, so search->prev isn't reliable */ next_it = search->prev; // if (*key == 101) printf("aaa %d\n", sizes[id]); if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) { /* We are a crawler, ignore it. */ continue; } uint32_t hv = hash(ITEM_key(search), search->nkey); /* Attempt to hash item lock the "search" item. If locked, no * other callers can incr the refcount */ /* Don't accidentally grab ourselves, or bail if we can't quicklock */ if (hv == cur_hv || (hold_lock = item_try_mcslock(hv, &second_lock)) == NULL) continue; /* Now see if the item is refcount locked */ if (refcount_incr(&search->refcount) != 2) { /* Avoid pathological case with ref'ed items in tail */ do_item_update_nolock(search); /* tries_lrutail_reflocked--; */ refcount_decr(&search->refcount); itemstats[id].lrutail_reflocked++; /* Old rare bug could cause a refcount leak. We haven't seen * it in years, but we leave this code in to prevent failures * just in case */ if (settings.tail_repair_time && search->time + settings.tail_repair_time < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hv); } if (hold_lock) item_try_mcsunlock(hold_lock, &second_lock); /* if (tries_lrutail_reflocked < 1) */ /* break; */ continue; } if (search->recency) { /* recently accessed. clear bit and continue. */ search->recency = 0; continue; } // printf("aaa %d, %d\n", sizes[id], *key); itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].evicted_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; refcount_decr(&search->refcount); /* If hash values were equal, we don't grab a second lock */ if (hold_lock) item_try_mcsunlock(hold_lock, &second_lock); break; } /* end of loop*/ } /* end of allocation / eviction */ if (it == NULL) { itemstats[id].outofmemory++; UNLOCK_CLOCK(); return NULL; } assert(it->slabs_clsid == 0); /* Item initialization can happen outside of the lock; the item's already * been removed from the slab LRU. */ it->refcount = 1; /* the caller will have a reference */ UNLOCK_CLOCK(); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = 0; //exptime; /* disable expiration. */ memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
// process a memcached get(s) command. (we don't support CAS). void process_get_command(conn *c, token_t *tokens, size_t ntokens, bool return_cas) { char *key; size_t nkey; int i = 0; item *it; token_t *key_token = &tokens[KEY_TOKEN]; char *suffix; assert(c != NULL); if (config.alloc && c->mem_blob == NULL) { long size = config.alloc_mean + gsl_ran_gaussian(c->thread->r, config.alloc_stddev); size = size <= 0 ? 10 : size; if (config.verbose > 0) { fprintf(stderr, "allocated blob: %ld\n", size); } c->mem_blob = malloc(sizeof(char) * size); c->mem_free_delay = 0; if (config.rtt_delay) { double r = config.rtt_mean + gsl_ran_gaussian(c->thread->r, config.rtt_stddev); if (r >= config.rtt_cutoff) { int wait = r / 100; if (config.verbose > 0) { fprintf(stderr, "delay: %d\n", wait); } c->mem_free_delay = wait; conn_set_state(c, conn_mwrite); } } } // process the whole command line, (only part of it may be tokenized right now) do { // process all tokenized keys at this stage. while(key_token->length != 0) { key = key_token->value; nkey = key_token->length; if(nkey > KEY_MAX_LENGTH) { error_response(c, "CLIENT_ERROR bad command line format"); return; } // lookup key-value. it = item_get(key, nkey); // hit. if (it) { if (i >= c->isize && !conn_expand_items(c)) { item_remove(it); break; } // Construct the response. Each hit adds three elements to the // outgoing data list: // "VALUE <key> <flags> <data_length>\r\n" // "<data>\r\n" // The <data> element is stored on the connection item list, not on // the iov list. if (!conn_add_iov(c, "VALUE ", 6) != 0 || !conn_add_iov(c, ITEM_key(it), it->nkey) != 0 || !conn_add_iov(c, ITEM_suffix(it), it->nsuffix + it->nbytes) != 0) { item_remove(it); break; } if (config.verbose > 1) { fprintf(stderr, ">%d sending key %s\n", c->sfd, key); } // add item to remembered list (i.e., we've taken ownership of them // through refcounting and later must release them once we've // written out the iov associated with them). item_update(it); *(c->ilist + i) = it; i++; } key_token++; } /* * If the command string hasn't been fully processed, get the next set * of tokens. */ if(key_token->value != NULL) { ntokens = tokenize_command(key_token->value, tokens, MAX_TOKENS); key_token = tokens; } } while(key_token->value != NULL); c->icurr = c->ilist; c->ileft = i; if (config.verbose > 1) { fprintf(stderr, ">%d END\n", c->sfd); } // If the loop was terminated because of out-of-memory, it is not reliable // to add END\r\n to the buffer, because it might not end in \r\n. So we // send SERVER_ERROR instead. if (key_token->value != NULL || !conn_add_iov(c, "END\r\n", 5) != 0) { error_response(c, "SERVER_ERROR out of memory writing get response"); } else { conn_set_state(c, conn_mwrite); } }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { uint8_t nsuffix; item *it; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; it = slabs_alloc(ntotal, id); if (it == 0) { int tries = 50; item *search; /* If requested to not push old items out of cache when memory runs out, * we're out of luck at this point... */ if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; return NULL; } /* * try to get one off the right LRU * don't necessariuly unlink the tail because it may be locked: refcount>0 * search up from tail an item with refcount==0 and unlink it; give up after 50 * tries */ if (tails[id] == 0) { itemstats[id].outofmemory++; return NULL; } for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount == 0) { if (search->exptime == 0 || search->exptime > current_time) { itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; STATS_LOCK(); stats.evictions++; STATS_UNLOCK(); #ifdef USE_REPLICATION replication_call_del(ITEM_key(search), search->nkey); #endif /* USE_REPLICATION */ } do_item_unlink(search); break; } } it = slabs_alloc(ntotal, id); if (it == 0) { itemstats[id].outofmemory++; /* Last ditch effort. There is a very rare bug which causes * refcount leaks. We've fixed most of them, but it still happens, * and it may happen in the future. * We can reasonably assume no item can stay locked for more than * three hours, so if we find one in the tail which is that old, * free it anyway. */ tries = 50; for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount != 0 && search->time + 10800 < current_time) { itemstats[id].tailrepairs++; search->refcount = 0; do_item_unlink(search); break; } } it = slabs_alloc(ntotal, id); if (it == 0) { return NULL; } } } assert(it->slabs_clsid == 0); it->slabs_clsid = id; assert(it != heads[it->slabs_clsid]); it->next = it->prev = it->h_next = 0; it->refcount = 1; /* the caller will have a reference */ DEBUG_REFCNT(it, '*'); it->it_flags = 0; it->nkey = nkey; it->nbytes = nbytes; strcpy(ITEM_key(it), key); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
item *do_item_alloc(char *key, const size_t nkey, const unsigned int flags, const rel_time_t exptime, const int nbytes) { int i; uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; /* If no memory is available, attempt a direct LRU juggle/eviction */ /* This is a race in order to simplify lru_pull_tail; in cases where * locked items are on the tail, you want them to fall out and cause * occasional OOM's, rather than internally work around them. * This also gives one fewer code path for slab alloc/free */ /* TODO: if power_largest, try a lot more times? or a number of times * based on how many chunks the new object should take up? * or based on the size of an object lru_pull_tail() says it evicted? * This is a classical GC problem if "large items" are of too varying of * sizes. This is actually okay here since the larger the data, the more * bandwidth it takes, the more time we can loop in comparison to serving * and replacing small items. */ for (i = 0; i < 10; i++) { uint64_t total_bytes; /* Try to reclaim memory first */ if (!settings.lru_maintainer_thread) { lru_pull_tail(id, COLD_LRU, 0, 0); } it = slabs_alloc(ntotal, id, &total_bytes, 0); if (settings.expirezero_does_not_evict) total_bytes -= noexp_lru_size(id); if (it == NULL) { if (settings.lru_maintainer_thread) { lru_pull_tail(id, HOT_LRU, total_bytes, 0); lru_pull_tail(id, WARM_LRU, total_bytes, 0); if (lru_pull_tail(id, COLD_LRU, total_bytes, LRU_PULL_EVICT) <= 0) break; } else { if (lru_pull_tail(id, COLD_LRU, 0, LRU_PULL_EVICT) <= 0) break; } } else { break; } } if (i > 0) { pthread_mutex_lock(&lru_locks[id]); itemstats[id].direct_reclaims += i; pthread_mutex_unlock(&lru_locks[id]); } if (it == NULL) { pthread_mutex_lock(&lru_locks[id]); itemstats[id].outofmemory++; pthread_mutex_unlock(&lru_locks[id]); return NULL; } assert(it->slabs_clsid == 0); //assert(it != heads[id]); /* Refcount is seeded to 1 by slabs_alloc() */ it->next = it->prev = 0; /* Items are initially loaded into the HOT_LRU. This is '0' but I want at * least a note here. Compiler (hopefully?) optimizes this out. */ if (settings.lru_maintainer_thread) { if (exptime == 0 && settings.expirezero_does_not_evict) { id |= NOEXP_LRU; } else { id |= HOT_LRU; } } else { /* There is only COLD in compat-mode */ id |= COLD_LRU; } it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags |= settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; /* Need to shuffle the pointer stored in h_next into it->data. */ if (it->it_flags & ITEM_CHUNKED) { item_chunk *chunk = (item_chunk *) ITEM_data(it); chunk->next = (item_chunk *) it->h_next; chunk->prev = 0; chunk->head = it; /* Need to chain back into the head's chunk */ chunk->next->prev = chunk; chunk->size = chunk->next->size - ((char *)chunk - (char *)it); chunk->used = 0; assert(chunk->size > 0); } it->h_next = 0; return it; }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv) { uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; mutex_lock(&cache_lock); /* do a quick check if we have any expired items in the tail.. */ int tries = 5; /* Avoid hangs if a slab has nothing but refcounted stuff in it. */ int tries_lrutail_reflocked = 1000; int tried_alloc = 0; item *search; item *next_it; void *hold_lock = NULL; rel_time_t oldest_live = settings.oldest_live; search = tails[id]; /* We walk up *only* for locked items. Never searching for expired. * Waste of CPU for almost all deployments */ for (; tries > 0 && search != NULL; tries--, search=next_it) { /* we might relink search mid-loop, so search->prev isn't reliable */ next_it = search->prev; if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) { /* We are a crawler, ignore it. */ tries++; continue; } uint32_t hv = hash(ITEM_key(search), search->nkey); /* Attempt to hash item lock the "search" item. If locked, no * other callers can incr the refcount */ /* Don't accidentally grab ourselves, or bail if we can't quicklock */ if (hv == cur_hv || (hold_lock = item_trylock(hv)) == NULL) continue; /* Now see if the item is refcount locked */ if (refcount_incr(&search->refcount) != 2) { /* Avoid pathological case with ref'ed items in tail */ do_item_update_nolock(search); tries_lrutail_reflocked--; tries++; refcount_decr(&search->refcount); itemstats[id].lrutail_reflocked++; /* Old rare bug could cause a refcount leak. We haven't seen * it in years, but we leave this code in to prevent failures * just in case */ if (settings.tail_repair_time && search->time + settings.tail_repair_time < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hv); } if (hold_lock) item_trylock_unlock(hold_lock); if (tries_lrutail_reflocked < 1) break; continue; } /* Expired or flushed */ if ((search->exptime != 0 && search->exptime < current_time) || (search->time <= oldest_live && oldest_live <= current_time)) { itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].expired_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; } else if ((it = slabs_alloc(ntotal, id)) == NULL) { tried_alloc = 1; if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; } else { itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].evicted_unfetched++; } shadow_item* new_shadow_it = create_shadow_item(search); hv = hash(new_shadow_it->key, new_shadow_it->nkey); shadow_assoc_insert(new_shadow_it, hv); insert_shadowq_item(new_shadow_it,new_shadow_it->slabs_clsid); it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; /* If we've just evicted an item, and the automover is set to * angry bird mode, attempt to rip memory into this slab class. * TODO: Move valid object detection into a function, and on a * "successful" memory pull, look behind and see if the next alloc * would be an eviction. Then kick off the slab mover before the * eviction happens. */ if (settings.slab_automove == 2) slabs_reassign(-1, id); } } refcount_decr(&search->refcount); /* If hash values were equal, we don't grab a second lock */ if (hold_lock) item_trylock_unlock(hold_lock); break; } if (!tried_alloc && (tries == 0 || search == NULL)) it = slabs_alloc(ntotal, id); if (it == NULL) { itemstats[id].outofmemory++; mutex_unlock(&cache_lock); return NULL; } assert(it->slabs_clsid == 0); assert(it != heads[id]); /* Item initialization can happen outside of the lock; the item's already * been removed from the slab LRU. */ it->refcount = 1; /* the caller will have a reference */ mutex_unlock(&cache_lock); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
//分配一个item,这个函数包含了memcached具体item分配的逻辑 item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv) { uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); //item总大小 if (settings.use_cas) { ntotal += sizeof(uint64_t); //如果有用到cas,那么item大小还要加上unit64_t的size } unsigned int id = slabs_clsid(ntotal); //根据item的大小,找到适合的slabclass if (id == 0) return 0; mutex_lock(&cache_lock); //cache锁 /* do a quick check if we have any expired items in the tail.. */ int tries = 5; int tried_alloc = 0; item *search; void *hold_lock = NULL; rel_time_t oldest_live = settings.oldest_live; search = tails[id]; //全局变量,tails[x]是id为x的slabclass lru链表的尾部 /* We walk up *only* for locked items. Never searching for expired. * Waste of CPU for almost all deployments */ //首先从lru链表尾部查找有没有过期的item,tries = 5,最多循环5次 //注意这里是最多查找5次,只要找到一个没有被其他地方引用的item,那么就不再继续查找,如果这个item过期,就使用这个item的空间,否则创建新的slab for (; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) { /* We are a crawler, ignore it. */ //这里只是搜索过期的item,对于异常的item,直接忽略继续查找 tries++; continue; } //计算item的hash值,hv有两个作用:1.用于hash表保存item 2.用于item lock表中锁住item,通过hv计算出item_lock中哪个锁对当前item加锁 //不同item的hash值可能相同,hash表中用链表的方式解决冲突;item lock中多个item共享一个锁 uint32_t hv = hash(ITEM_key(search), search->nkey); /* Attempt to hash item lock the "search" item. If locked, no * other callers can incr the refcount */ /* Don't accidentally grab ourselves, or bail if we can't quicklock */ //锁住当前item if (hv == cur_hv || (hold_lock = item_trylock(hv)) == NULL) continue; /* Now see if the item is refcount locked */ //检查这个指向的这个item是否被其他地方引用,如果是的话,继续向前查找 if (refcount_incr(&search->refcount) != 2) { refcount_decr(&search->refcount); /* Old rare bug could cause a refcount leak. We haven't seen * it in years, but we leave this code in to prevent failures * just in case */ if (settings.tail_repair_time && search->time + settings.tail_repair_time < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hv); } if (hold_lock) item_trylock_unlock(hold_lock); continue; } /* Expired or flushed */ //如果找到过期的item if ((search->exptime != 0 && search->exptime < current_time) || (search->time <= oldest_live && oldest_live <= current_time)) { itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].expired_unfetched++; } it = search; //更新统计数据 slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); //把旧的item从hash表和LRU链表中移除 do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; } //如果没有找到过期的item,则调用slabs_alloc分配空间 //如果slabs_alloc返回null,表示分配失败,内存空间已满 //需要按LRU进行淘汰 else if ((it = slabs_alloc(ntotal, id)) == NULL) { tried_alloc = 1; //标记一下,表示有尝试调用slabs_alloc分配空间 //记录被淘汰item的信息, 使用memcached经常会查看的evicted_time就是在这里赋值的 if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; } else { itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; //被淘汰item距离上次使用的时间 if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].evicted_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); //更新统计数据 do_item_unlink_nolock(it, hv); //从hash表和LRU链表中移除 /* Initialize the item block: */ it->slabs_clsid = 0; /* If we've just evicted an item, and the automover is set to * angry bird mode, attempt to rip memory into this slab class. * TODO: Move valid object detection into a function, and on a * "successful" memory pull, look behind and see if the next alloc * would be an eviction. Then kick off the slab mover before the * eviction happens. */ //默认情况下,slab_automove=1,会合理地更具淘汰统计数据来分析怎么进行slabclass空间的分配 //如果slab_automove=2,只要分配失败了,马上进行slabclass空间的重分配 if (settings.slab_automove == 2) slabs_reassign(-1, id); } } refcount_decr(&search->refcount); /* If hash values were equal, we don't grab a second lock */ if (hold_lock) item_trylock_unlock(hold_lock); break; } //查找5次过期的item都失败,并且也没有淘汰可用且没有过期的item //分配新的内存空间 if (!tried_alloc && (tries == 0 || search == NULL)) it = slabs_alloc(ntotal, id); //分配失败,返回null if (it == NULL) { itemstats[id].outofmemory++; mutex_unlock(&cache_lock); return NULL; } assert(it->slabs_clsid == 0); assert(it != heads[id]); /* Item initialization can happen outside of the lock; the item's already * been removed from the slab LRU. */ //item内存空间分配成功,做一些初始化工作 it->refcount = 1; /* the caller will have a reference */ mutex_unlock(&cache_lock); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; mutex_lock(&cache_lock); /* do a quick check if we have any expired items in the tail.. */ item *search; rel_time_t oldest_live = settings.oldest_live; search = tails[id]; if (search != NULL && (refcount_incr(&search->refcount) == 2)) { if ((search->exptime != 0 && search->exptime < current_time) || (search->time <= oldest_live && oldest_live <= current_time)) { // dead by flush STATS_LOCK(); stats.reclaimed++; STATS_UNLOCK(); itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { STATS_LOCK(); stats.expired_unfetched++; STATS_UNLOCK(); itemstats[id].expired_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0)); /* Initialize the item block: */ it->slabs_clsid = 0; } else if ((it = slabs_alloc(ntotal, id)) == NULL) { if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; mutex_unlock(&cache_lock); return NULL; } itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { STATS_LOCK(); stats.evicted_unfetched++; STATS_UNLOCK(); itemstats[id].evicted_unfetched++; } STATS_LOCK(); stats.evictions++; STATS_UNLOCK(); it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0)); /* Initialize the item block: */ it->slabs_clsid = 0; /* If we've just evicted an item, and the automover is set to * angry bird mode, attempt to rip memory into this slab class. * TODO: Move valid object detection into a function, and on a * "successful" memory pull, look behind and see if the next alloc * would be an eviction. Then kick off the slab mover before the * eviction happens. */ if (settings.slab_automove == 2) slabs_reassign(-1, id, 1); } else { refcount_decr(&search->refcount); } } else { /* If the LRU is empty or locked, attempt to allocate memory */ it = slabs_alloc(ntotal, id); if (search != NULL) refcount_decr(&search->refcount); } if (it == NULL) { itemstats[id].outofmemory++; /* Last ditch effort. There was a very rare bug which caused * refcount leaks. We leave this just in case they ever happen again. * We can reasonably assume no item can stay locked for more than * three hours, so if we find one in the tail which is that old, * free it anyway. */ if (search != NULL && search->refcount != 2 && search->time + TAIL_REPAIR_TIME < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hash(ITEM_key(search), search->nkey, 0)); } mutex_unlock(&cache_lock); return NULL; } assert(it->slabs_clsid == 0); assert(it != heads[id]); /* Item initialization can happen outside of the lock; the item's already * been removed from the slab LRU. */ it->refcount = 1; /* the caller will have a reference */ mutex_unlock(&cache_lock); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
static int replication_rep(conn *c, item *it) { int r = 0; int exp = 0; int len = 0; char *s = "rep "; char *n = "\r\n"; char *p = NULL; char flag[40]; if(it->exptime) exp = it->exptime + stats.started; flag[0]=0; if(p=ITEM_suffix(it)){ int i; memcpy(flag, p, it->nsuffix - 2); flag[it->nsuffix - 2] = 0; for(i=0;i<strlen(flag);i++){ if(flag[i] > ' ') break; } memmove(flag,&flag[i],strlen(flag)-i); for(p=flag;*p>' ';p++); *p=0; } len += strlen(s); len += it->nkey; len += 1; len += strlen(flag); len += 1; len += replication_get_num(NULL, exp); len += 1; len += replication_get_num(NULL, it->nbytes - 2); len += 1; len += replication_get_num(NULL, it->cas_id); len += strlen(n); len += it->nbytes; len += strlen(n); if(replication_alloc(c,len) == -1){ fprintf(stderr, "replication: rep alloc error\n"); return(-1); } p = c->wcurr + c->wbytes; memcpy(p, s, strlen(s)); p += strlen(s); memcpy(p, ITEM_key(it), it->nkey); p += it->nkey; *(p++) = ' '; memcpy(p, flag, strlen(flag)); p += strlen(flag); *(p++) = ' '; p += replication_get_num(p, exp); *(p++) = ' '; p += replication_get_num(p, it->nbytes - 2); *(p++) = ' '; p += replication_get_num(p, it->cas_id); memcpy(p, n, strlen(n)); p += strlen(n); memcpy(p, ITEM_data(it), it->nbytes); p += it->nbytes; c->wbytes = p - c->wcurr; return(0); }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; /* do a quick check if we have any expired items in the tail.. */ int tries = 50; item *search; for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount == 0 && (search->exptime != 0 && search->exptime < current_time)) { it = search; /* I don't want to actually free the object, just steal * the item to avoid to grab the slab mutex twice ;-) */ it->refcount = 1; do_item_unlink(it); /* Initialize the item block: */ it->slabs_clsid = 0; it->refcount = 0; break; } } if (it == NULL && (it = slabs_alloc(ntotal, id)) == NULL) { /* ** Could not find an expired item at the tail, and memory allocation ** failed. Try to evict some items! */ tries = 50; /* If requested to not push old items out of cache when memory runs out, * we're out of luck at this point... */ if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; return NULL; } /* * try to get one off the right LRU * don't necessariuly unlink the tail because it may be locked: refcount>0 * search up from tail an item with refcount==0 and unlink it; give up after 50 * tries */ if (tails[id] == 0) { itemstats[id].outofmemory++; return NULL; } for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount == 0) { if (search->exptime == 0 || search->exptime > current_time) { itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; STATS_LOCK(); stats.evictions++; STATS_UNLOCK(); } do_item_unlink(search); break; } } it = slabs_alloc(ntotal, id); if (it == 0) { itemstats[id].outofmemory++; /* Last ditch effort. There is a very rare bug which causes * refcount leaks. We've fixed most of them, but it still happens, * and it may happen in the future. * We can reasonably assume no item can stay locked for more than * three hours, so if we find one in the tail which is that old, * free it anyway. */ tries = 50; for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount != 0 && search->time + TAIL_REPAIR_TIME < current_time) { itemstats[id].tailrepairs++; search->refcount = 0; do_item_unlink(search); break; } } it = slabs_alloc(ntotal, id); if (it == 0) { return NULL; } } } assert(it->slabs_clsid == 0); it->slabs_clsid = id; assert(it != heads[it->slabs_clsid]); it->next = it->prev = it->h_next = 0; it->refcount = 1; /* the caller will have a reference */ DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
enum store_item_type do_store_item ( item *it, int comm, const uint32_t hv ) { char *key = ITEM_key (it); item *old_it = do_item_get (key, it->nkey, hv); enum store_item_type stored = NOT_STORED; item *new_it = NULL; int flags; if ( old_it != NULL && comm == NREAD_ADD ) { do_item_update (old_it); } else if ( ! old_it && ( comm == NREAD_REPLACE || comm == NREAD_APPEND || comm == NREAD_PREPEND ) ) { } else { if ( comm == NREAD_APPEND || comm == NREAD_PREPEND ) { if ( stored == NOT_STORED ) { flags = ( int ) strtol (ITEM_suffix (old_it), ( char ** ) NULL, 10); new_it = do_item_alloc (key, it->nkey, flags, old_it->exptime, ITEM_data (it), it->nbytes + old_it->nbytes - 2, hv); if ( ! new_it ) { if ( old_it ) do_item_remove (old_it); return NOT_STORED; } if ( comm == NREAD_APPEND ) { memcpy (ITEM_data (new_it), ITEM_data (old_it), old_it->nbytes); memcpy (ITEM_data (new_it) + old_it->nbytes - 2, ITEM_data (it), it->nbytes); } else { memcpy (ITEM_data (new_it), ITEM_data (it), it->nbytes); memcpy (ITEM_data (new_it) + it->nbytes - 2, ITEM_data (old_it), old_it->nbytes); } it = new_it; } } if ( stored == NOT_STORED ) { if ( old_it != NULL ) { item_replace (old_it, it, hv); } else { do_item_link (it, hv); } stored = STORED; } } if ( old_it != NULL ) { do_item_remove (old_it); } if ( new_it != NULL ) { do_item_remove (new_it); } return stored; }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { uint8_t nsuffix; item *it; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; it = slabs_alloc(ntotal); if (it == 0) { int tries = 50; item *search; /* If requested to not push old items out of cache when memory runs out, * we're out of luck at this point... */ if (settings.evict_to_free == 0) return NULL; /* * try to get one off the right LRU * don't necessariuly unlink the tail because it may be locked: refcount>0 * search up from tail an item with refcount==0 and unlink it; give up after 50 * tries */ if (id > LARGEST_ID) return NULL; if (tails[id] == 0) return NULL; for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount == 0) { if (search->exptime == 0 || search->exptime > current_time) { STATS_LOCK(); stats.evictions++; STATS_UNLOCK(); } do_item_unlink(search); break; } } it = slabs_alloc(ntotal); if (it == 0) return NULL; } assert(it->slabs_clsid == 0); it->slabs_clsid = id; assert(it != heads[it->slabs_clsid]); it->next = it->prev = it->h_next = 0; it->refcount = 1; /* the caller will have a reference */ DEBUG_REFCNT(it, '*'); it->it_flags = 0; it->nkey = nkey; it->nbytes = nbytes; strcpy(ITEM_key(it), key); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv) { uint8_t nsuffix; item *it = NULL; char suffix[40]; //计算这个item的空间 size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } //根据大小判断从属于哪个slab unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; mutex_lock(&cache_lock); /* do a quick check if we have any expired items in the tail.. */ //在LRU中尝试5次还没合适的空间,则执行申请空间的操作 int tries = 5; int tried_alloc = 0; item *search; void *hold_lock = NULL; rel_time_t oldest_live = settings.oldest_live; search = tails[id]; /* We walk up *only* for locked items. Never searching for expired. * Waste of CPU for almost all deployments */ for (; tries > 0 && search != NULL; tries--, search=search->prev) { uint32_t hv = hash(ITEM_key(search), search->nkey, 0); /* Attempt to hash item lock the "search" item. If locked, no * other callers can incr the refcount */ /* FIXME: I think we need to mask the hv here for comparison? */ if (hv != cur_hv && (hold_lock = item_trylock(hv)) == NULL) continue; /* Now see if the item is refcount locked */ if (refcount_incr(&search->refcount) != 2) { refcount_decr(&search->refcount); /* Old rare bug could cause a refcount leak. We haven't seen * it in years, but we leave this code in to prevent failures * just in case */ if (search->time + TAIL_REPAIR_TIME < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hv); } if (hold_lock) item_trylock_unlock(hold_lock); continue; } /* Expired or flushed */ // search指向的item过期了,则直接复用这块内存 if ((search->exptime != 0 && search->exptime < current_time) || (search->time <= oldest_live && oldest_live <= current_time)) { itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].expired_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; } //此刻,过期失效的item没有找到,申请内存又失败了。看来只能使用 //LRU淘汰一个item(即使这个item并没有过期失效) else if ((it = slabs_alloc(ntotal, id)) == NULL) { tried_alloc = 1; if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; } else { itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].evicted_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; /* If we've just evicted an item, and the automover is set to * angry bird mode, attempt to rip memory into this slab class. * TODO: Move valid object detection into a function, and on a * "successful" memory pull, look behind and see if the next alloc * would be an eviction. Then kick off the slab mover before the * eviction happens. */ if (settings.slab_automove == 2) slabs_reassign(-1, id); } } refcount_decr(&search->refcount); /* If hash values were equal, we don't grab a second lock */ if (hold_lock) item_trylock_unlock(hold_lock); break; } //从slab分配器中申请内存 if (!tried_alloc && (tries == 0 || search == NULL)) it = slabs_alloc(ntotal, id); if (it == NULL) { itemstats[id].outofmemory++; mutex_unlock(&cache_lock); return NULL; } assert(it->slabs_clsid == 0); assert(it != heads[id]); /* Item initialization can happen outside of the lock; the item's already * been removed from the slab LRU. */ it->refcount = 1; /* the caller will have a reference */ mutex_unlock(&cache_lock); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
/* Forward an upstream command that came with item data, * like set/add/replace/etc. */ bool cproxy_forward_a2a_item_downstream(downstream *d, short cmd, item *it, conn *uc) { assert(d != NULL); assert(d->ptd != NULL); assert(d->ptd->proxy != NULL); assert(d->downstream_conns != NULL); assert(it != NULL); assert(uc != NULL); assert(uc->next == NULL); // Assuming we're already connected to downstream. // bool self = false; conn *c = cproxy_find_downstream_conn(d, ITEM_key(it), it->nkey, &self); if (c != NULL) { if (self) { cproxy_optimize_to_self(d, uc, uc->cmd_start); complete_nread_ascii(uc); return true; } if (cproxy_prep_conn_for_write(c)) { assert(c->state == conn_pause); char *verb = nread_text(cmd); assert(verb != NULL); char *str_flags = ITEM_suffix(it); char *str_length = strchr(str_flags + 1, ' '); int len_flags = str_length - str_flags; int len_length = it->nsuffix - len_flags - 2; char *str_exptime = add_conn_suffix(c); char *str_cas = (cmd == NREAD_CAS ? add_conn_suffix(c) : NULL); if (str_flags != NULL && str_length != NULL && len_flags > 1 && len_length > 1 && str_exptime != NULL && (cmd != NREAD_CAS || str_cas != NULL)) { sprintf(str_exptime, " %u", it->exptime); if (str_cas != NULL) sprintf(str_cas, " %llu", (unsigned long long) ITEM_get_cas(it)); if (add_iov(c, verb, strlen(verb)) == 0 && add_iov(c, ITEM_key(it), it->nkey) == 0 && add_iov(c, str_flags, len_flags) == 0 && add_iov(c, str_exptime, strlen(str_exptime)) == 0 && add_iov(c, str_length, len_length) == 0 && (str_cas == NULL || add_iov(c, str_cas, strlen(str_cas)) == 0) && (uc->noreply == false || add_iov(c, " noreply", 8) == 0) && add_iov(c, ITEM_data(it) - 2, it->nbytes + 2) == 0) { conn_set_state(c, conn_mwrite); c->write_and_go = conn_new_cmd; if (update_event(c, EV_WRITE | EV_PERSIST)) { d->downstream_used_start = 1; d->downstream_used = 1; if (cproxy_dettach_if_noreply(d, uc) == false) { cproxy_start_downstream_timeout(d, c); // During a synchronous (with-reply) SET, // handle fire-&-forget SET optimization. // if (cmd == NREAD_SET && cproxy_optimize_set_ascii(d, uc, ITEM_key(it), it->nkey)) { d->ptd->stats.stats.tot_optimize_sets++; } } else { c->write_and_go = conn_pause; mcache_delete(&d->ptd->proxy->front_cache, ITEM_key(it), it->nkey); } return true; } } d->ptd->stats.stats.err_oom++; cproxy_close_conn(c); } else { // TODO: Handle this weird error case. } } else { d->ptd->stats.stats.err_downstream_write_prep++; cproxy_close_conn(c); } if (settings.verbose > 1) fprintf(stderr, "Proxy item write out of memory"); } return false; }
/* 从 slab 系统分配一个空闲 item */ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv) { uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; mutex_lock(&cache_lock); /* do a quick check if we have any expired items in the tail.. */ int tries = 5; int tried_alloc = 0; item *search; void *hold_lock = NULL; rel_time_t oldest_live = settings.oldest_live; search = tails[id]; /* We walk up *only* for locked items. Never searching for expired. * Waste of CPU for almost all deployments */ for (; tries > 0 && search != NULL; tries--, search=search->prev) { uint32_t hv = hash(ITEM_key(search), search->nkey, 0); /* Attempt to hash item lock the "search" item. If locked, no * other callers can incr the refcount */ /* FIXME: I think we need to mask the hv here for comparison? */ if (hv != cur_hv && (hold_lock = item_trylock(hv)) == NULL) continue; /* Now see if the item is refcount locked */ if (refcount_incr(&search->refcount) != 2) { refcount_decr(&search->refcount); /* Old rare bug could cause a refcount leak. We haven't seen * it in years, but we leave this code in to prevent failures * just in case */ if (search->time + TAIL_REPAIR_TIME < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hv); } if (hold_lock) item_trylock_unlock(hold_lock); continue; } /* 先检查 LRU 队列最后一个 item 是否超时, 超时的话就把这个 item 分配给用户 */ if ((search->exptime != 0 && search->exptime < current_time) || (search->time <= oldest_live && oldest_live <= current_time)) { itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].expired_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); /* 把这个 item 从 LRU 队列和哈希表中移除 */ do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; } /* 没有超时的item, 那就尝试从slabclass分配, 运气不好的话, 分配失败, 那就把 LRU 队列最后一个 item 剔除, 然后分配给用户 */ else if ((it = slabs_alloc(ntotal, id)) == NULL) { tried_alloc = 1; if (settings.evict_to_free == 0) { itemstats[id].outofmemory++;//显示出的统计信息 } else { itemstats[id].evicted++;//这个slab的分配失败次数加1,后面的分析统计信息的线程会用到这个统计信息 itemstats[id].evicted_time = current_time - search->time;//显示的统计信息 if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].evicted_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);//不用请求新的item了,减少相关的统计信息 /* 把老的item从hash表和lru队列中删除 */ do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; /* If we've just 回收 an item, and the automover is set to angry bird mode, attempt to rip memory into this slab class. TODO: Move valid object detection into a function, and on a "successful" memory pull, look behind and see if the next alloc would be an eviction. Then kick off the slab mover before the eviction happens.可以看到如果slab_automove=2(默认是1),这样会导致angry模式,就是只要分配失败了,马上就选择一个slab(旧的slagclass 释放的),把这个slab移动到当前slab-class中(不会有通过统计信息有选择的移动slab)*/ if (settings.slab_automove == 2) slabs_reassign(-1, id); } } refcount_decr(&search->refcount); /* If hash values were equal, we don't grab a second lock */ if (hold_lock) item_trylock_unlock(hold_lock); break; } if (!tried_alloc && (tries == 0 || search == NULL)) it = slabs_alloc(ntotal, id); if (it == NULL) { itemstats[id].outofmemory++; mutex_unlock(&cache_lock); return NULL; } assert(it->slabs_clsid == 0); assert(it != heads[id]); /* Item initialization can happen outside of the lock; the item's already been removed from the slab LRU. */ it->refcount = 1; /* the caller will have a reference */ mutex_unlock(&cache_lock); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }