Beispiel #1
0
/*
 * alloc a item buffer, and init it.
 */
item *item_alloc1(char *key, const size_t nkey, const int flags, const int nbytes) {
    uint8_t nsuffix;
    item *it;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);

    if (ntotal > settings.item_buf_size){
        it = (item *)malloc(ntotal);
        if (it == NULL){
            return NULL;
        }
        memset(it, 0, ntotal);
        if (settings.verbose > 1) {
            fprintf(stderr, "alloc a item buffer from malloc.\n");
        }
    }else{
        it = item_from_freelist();
        if (it == NULL){
            return NULL;
        }
        if (settings.verbose > 1) {
            fprintf(stderr, "alloc a item buffer from freelist.\n");
        }
    }

    it->nkey = nkey;
    it->nbytes = nbytes;
    strcpy(ITEM_key(it), key);
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}
Beispiel #2
0
int add_filtered_msg(void *flib, void *tc, void ***msglist, int i, void *tit, char *keys_arr, int keys_len, add_iov_ptr add_iov, int numParam, char **params)
{
    void *msg = NULL;
    int mstart = 0;
    int mlen = 0;
    int ret = 1;
    conn *c = (conn *) tc;
    item *it = (item *) tit;

    if (c != NULL && *msglist != NULL && it != NULL && i > 0) {
        get_filtered_msg(flib, *msglist, i, ITEM_suffix(it), &msg, &mstart, &mlen, numParam, params);
        if (msg != NULL && mstart != 0 && mlen != 0) {
            c->filteredmsg = msg;
            if (add_iov(c, "VALUE ", 6) != 0 ||
                add_iov(c, keys_arr, keys_len) != 0 ||
                add_iov(c, (char *) msg + mstart, mlen) != 0)
                    ret = 2;
                else
                    ret = 0;
        }
    }
    free_msglist(flib, msglist, i);

    return ret;
}
Beispiel #3
0
item *item_alloc(char *key, int flags, rel_time_t exptime, int nbytes) {
    int nsuffix, ntotal, len;
    item *it;
    unsigned int id;
    char suffix[40];

    ntotal = item_make_header(key, flags, nbytes, suffix, &nsuffix, &len);

    id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;

    it = slabs_alloc(ntotal);
    if (it == 0) {
        int tries = 50;
        item *search;

        /* If requested to not push old items out of cache when memory runs out,
         * we're out of luck at this point...
         */

        if (!settings.evict_to_free) return 0;

        /*
         * try to get one off the right LRU
         * don't necessariuly unlink the tail because it may be locked: refcount>0
         * search up from tail an item with refcount==0 and unlink it; give up after 50
         * tries
         */

        if (id > LARGEST_ID) return 0;
        if (tails[id]==0) return 0;

        for (search = tails[id]; tries>0 && search; tries--, search=search->prev) {
            if (search->refcount==0) {
                item_unlink(search);
                break;
            }
        }
        it = slabs_alloc(ntotal);
        if (it==0) return 0;
    }

    assert(it->slabs_clsid == 0);

    it->slabs_clsid = id;

    assert(it != heads[it->slabs_clsid]);

    it->next = it->prev = it->h_next = 0;
    it->refcount = 0;
    it->it_flags = 0;
    it->nkey = len;
    it->nbytes = nbytes;
    strcpy(ITEM_key(it), key);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, nsuffix);
    it->nsuffix = nsuffix;
    return it;
}
Beispiel #4
0
// complete the response to a get request.
void finish_get_command(conn *c) {
	item *it;
	int i;

	// setup all items for writing out.
	for (i = 0; i < c->ileft; i++) {
		it = *(c->ilist + i);
		if (it) {
			// Construct the response. Each hit adds three elements to the
			// outgoing data list:
			//   "VALUE <key> <flags> <data_length>\r\n"
			//   "<data>\r\n"
			// The <data> element is stored on the connection item list, not on
			// the iov list.
			if (!conn_add_iov(c, "VALUE ", 6) ||
				 !conn_add_iov(c, ITEM_key(it), it->nkey) ||
				 !conn_add_iov(c, ITEM_suffix(it), it->nsuffix + it->nbytes)) {
				item_remove(it);
				error_response(c, "SERVER_ERROR out of memory writing get response");
				return;
			}

			if (config.verbose > 1) {
				fprintf(stderr, ">%d sending key %s\n", c->sfd, ITEM_key(it));
			}
		} else {
			fprintf(stderr, "ERROR corrupted ilist!\n");
			exit(1);
		}
	}

	if (config.verbose > 1) {
		fprintf(stderr, ">%d END\n", c->sfd);
	}

	if (!conn_add_iov(c, "END\r\n", 5) != 0) {
		error_response(c, "SERVER_ERROR out of memory writing get response");
	} else {
		conn_set_state(c, conn_mwrite);
	}
}
Beispiel #5
0
/**
 * @param cas_emit  1: emit CAS.
 *                  0: do not emit CAS.
 *                 -1: data driven.
 */
void cproxy_upstream_ascii_item_response(item *it, conn *uc,
        int cas_emit) {
    assert(it != NULL);
    assert(uc != NULL);
    assert(uc->state == conn_pause);
    assert(uc->funcs != NULL);
    assert(IS_ASCII(uc->protocol));
    assert(IS_PROXY(uc->protocol));

    if (settings.verbose > 2) {
        char key[KEY_MAX_LENGTH + 10];
        assert(it->nkey <= KEY_MAX_LENGTH);
        memcpy(key, ITEM_key(it), it->nkey);
        key[it->nkey] = '\0';

        moxi_log_write("<%d cproxy ascii item response, key %s\n",
                       uc->sfd, key);
    }

    if (strncmp(ITEM_data(it) + it->nbytes - 2, "\r\n", 2) == 0) {
        // TODO: Need to clean up half-written add_iov()'s.
        //       Consider closing the upstream_conns?
        //
        uint64_t cas = ITEM_get_cas(it);
        if ((cas_emit == 0) ||
                (cas_emit < 0 &&
                 cas == CPROXY_NOT_CAS)) {
            if (add_conn_item(uc, it)) {
                it->refcount++;

                if (add_iov(uc, "VALUE ", 6) == 0 &&
                        add_iov(uc, ITEM_key(it), it->nkey) == 0 &&
                        add_iov(uc, ITEM_suffix(it),
                                it->nsuffix + it->nbytes) == 0) {
                    if (settings.verbose > 2) {
                        moxi_log_write("<%d cproxy ascii item response success\n",
                                       uc->sfd);
                    }
                }
            }
        } else {
            char *suffix = add_conn_suffix(uc);
            if (suffix != NULL) {
                sprintf(suffix, " %llu\r\n", (unsigned long long) cas);

                if (add_conn_item(uc, it)) {
                    it->refcount++;

                    if (add_iov(uc, "VALUE ", 6) == 0 &&
                            add_iov(uc, ITEM_key(it), it->nkey) == 0 &&
                            add_iov(uc, ITEM_suffix(it),
                                    it->nsuffix - 2) == 0 &&
                            add_iov(uc, suffix, strlen(suffix)) == 0 &&
                            add_iov(uc, ITEM_data(it), it->nbytes) == 0) {
                        if (settings.verbose > 2) {
                            moxi_log_write("<%d cproxy ascii item response ok\n",
                                           uc->sfd);
                        }
                    }
                }
            }
        }
    } else {
        if (settings.verbose > 1) {
            moxi_log_write("ERROR: unexpected downstream data block");
        }
    }
}
Beispiel #6
0
/*@null@*/
item *do_item_alloc(char *key, const size_t nkey, const int flags,
                    const rel_time_t exptime, const int nbytes,
                    const uint32_t cur_hv) {
    uint8_t nsuffix;
    ck_spinlock_mcs_context_t second_lock;
    item *it = NULL;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
    if (settings.use_cas) {
        ntotal += sizeof(uint64_t);
    }

    unsigned int id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;

    LOCK_CLOCK();

    /* Avoid hangs if a slab has nothing but refcounted stuff in it. */
    /* int tries_lrutail_reflocked = 1000; */
    item *search;
    item *next_it;
    void *hold_lock = NULL;

    /* We have no expiration. Try alloc a new one first. */
    if ((it = slabs_alloc(ntotal, id)) == NULL) {
        printf("item slab alloc fails\n");
        assert(0);
        /* doing CLOCK eviction */
        search = hand[id];
        if (!search) {
            /* no mem from alloc or replace */
            UNLOCK_CLOCK();
            return NULL;
        }

        /* scan loop of the clock, which could be potentially
         * unbounded -- we may want an upper limit for it. */
        for (search = hand[id]; search != NULL; search = next_it) {
            assert(search);
            /* we might relink search mid-loop, so search->prev isn't reliable */
            next_it = search->prev;
//            if (*key == 101) printf("aaa %d\n", sizes[id]);
            if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) {
                /* We are a crawler, ignore it. */
                continue;
            }
            uint32_t hv = hash(ITEM_key(search), search->nkey);
            /* Attempt to hash item lock the "search" item. If locked, no
             * other callers can incr the refcount
             */
            /* Don't accidentally grab ourselves, or bail if we can't quicklock */
            if (hv == cur_hv || (hold_lock = item_try_mcslock(hv, &second_lock)) == NULL)
                continue;
            /* Now see if the item is refcount locked */
            if (refcount_incr(&search->refcount) != 2) {
                /* Avoid pathological case with ref'ed items in tail */
                do_item_update_nolock(search);
                /* tries_lrutail_reflocked--; */
                refcount_decr(&search->refcount);
                itemstats[id].lrutail_reflocked++;
                /* Old rare bug could cause a refcount leak. We haven't seen
                 * it in years, but we leave this code in to prevent failures
                 * just in case */
                if (settings.tail_repair_time &&
                    search->time + settings.tail_repair_time < current_time) {
                    itemstats[id].tailrepairs++;
                    search->refcount = 1;
                    do_item_unlink_nolock(search, hv);
                }
                if (hold_lock)
                    item_try_mcsunlock(hold_lock, &second_lock);

                /* if (tries_lrutail_reflocked < 1) */
                /*     break; */

                continue;
            }

            if (search->recency) {
                /* recently accessed. clear bit and continue. */
                search->recency = 0;
                continue;
            }

//            printf("aaa %d, %d\n", sizes[id], *key);

            itemstats[id].evicted++;
            itemstats[id].evicted_time = current_time - search->time;
            if (search->exptime != 0)
                itemstats[id].evicted_nonzero++;
            if ((search->it_flags & ITEM_FETCHED) == 0) {
                itemstats[id].evicted_unfetched++;
            }
            it = search;
            slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
            do_item_unlink_nolock(it, hv);
            /* Initialize the item block: */
            it->slabs_clsid = 0;
            
            refcount_decr(&search->refcount);
            /* If hash values were equal, we don't grab a second lock */
            if (hold_lock)
                item_try_mcsunlock(hold_lock, &second_lock);
            break;
        }
        /* end of loop*/
    }
    /* end of allocation / eviction */

    if (it == NULL) {
        itemstats[id].outofmemory++;
        UNLOCK_CLOCK();
        return NULL;
    }

    assert(it->slabs_clsid == 0);

    /* Item initialization can happen outside of the lock; the item's already
     * been removed from the slab LRU.
     */
    it->refcount = 1;     /* the caller will have a reference */
    UNLOCK_CLOCK();
    it->next = it->prev = it->h_next = 0;
    it->slabs_clsid = id;

    DEBUG_REFCNT(it, '*');
    it->it_flags = settings.use_cas ? ITEM_CAS : 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    memcpy(ITEM_key(it), key, nkey);
    it->exptime = 0; //exptime; /* disable expiration. */
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}
Beispiel #7
0
// process a memcached get(s) command. (we don't support CAS).
void process_get_command(conn *c, token_t *tokens, size_t ntokens,
                                bool return_cas) {
	char *key;
	size_t nkey;
	int i = 0;
	item *it;
	token_t *key_token = &tokens[KEY_TOKEN];
	char *suffix;

	assert(c != NULL);

	if (config.alloc && c->mem_blob == NULL) {
		long size = config.alloc_mean + gsl_ran_gaussian(c->thread->r, config.alloc_stddev);
		size = size <= 0 ? 10 : size;
		if (config.verbose > 0) {
			fprintf(stderr, "allocated blob: %ld\n", size);
		}

		c->mem_blob = malloc(sizeof(char) * size);
		c->mem_free_delay = 0;

		if (config.rtt_delay) {
			double r = config.rtt_mean + gsl_ran_gaussian(c->thread->r, config.rtt_stddev);
			if (r >= config.rtt_cutoff) {
				int wait = r / 100;
				if (config.verbose > 0) {
					fprintf(stderr, "delay: %d\n", wait);
				}
				c->mem_free_delay = wait;
				conn_set_state(c, conn_mwrite);
			}
		}
	}

	// process the whole command line, (only part of it may be tokenized right now)
	do {
		// process all tokenized keys at this stage.
		while(key_token->length != 0) {
			key = key_token->value;
			nkey = key_token->length;

			if(nkey > KEY_MAX_LENGTH) {
				error_response(c, "CLIENT_ERROR bad command line format");
				return;
			}

			// lookup key-value.
			it = item_get(key, nkey);
			
			// hit.
			if (it) {
				if (i >= c->isize && !conn_expand_items(c)) {
					item_remove(it);
					break;
				}

				// Construct the response. Each hit adds three elements to the
				// outgoing data list:
				//   "VALUE <key> <flags> <data_length>\r\n"
				//   "<data>\r\n"
				// The <data> element is stored on the connection item list, not on
				// the iov list.
				if (!conn_add_iov(c, "VALUE ", 6) != 0 ||
				    !conn_add_iov(c, ITEM_key(it), it->nkey) != 0 ||
				    !conn_add_iov(c, ITEM_suffix(it), it->nsuffix + it->nbytes) != 0) {
					item_remove(it);
					break;
				}

				if (config.verbose > 1) {
					fprintf(stderr, ">%d sending key %s\n", c->sfd, key);
				}

				// add item to remembered list (i.e., we've taken ownership of them
				// through refcounting and later must release them once we've
				// written out the iov associated with them).
				item_update(it);
				*(c->ilist + i) = it;
				i++;
			}

			key_token++;
		}

		/*
		 * If the command string hasn't been fully processed, get the next set
		 * of tokens.
		 */
		if(key_token->value != NULL) {
			ntokens = tokenize_command(key_token->value, tokens, MAX_TOKENS);
			key_token = tokens;
		}

	} while(key_token->value != NULL);

	c->icurr = c->ilist;
	c->ileft = i;

	if (config.verbose > 1) {
		fprintf(stderr, ">%d END\n", c->sfd);
	}

	// If the loop was terminated because of out-of-memory, it is not reliable
	// to add END\r\n to the buffer, because it might not end in \r\n. So we
	// send SERVER_ERROR instead.
	if (key_token->value != NULL || !conn_add_iov(c, "END\r\n", 5) != 0) {
		error_response(c, "SERVER_ERROR out of memory writing get response");
	} else {
		conn_set_state(c, conn_mwrite);
	}
}
/*@null@*/
item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
    uint8_t nsuffix;
    item *it;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);

    unsigned int id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;

    it = slabs_alloc(ntotal, id);
    if (it == 0) {
        int tries = 50;
        item *search;

        /* If requested to not push old items out of cache when memory runs out,
         * we're out of luck at this point...
         */

        if (settings.evict_to_free == 0) {
            itemstats[id].outofmemory++;
            return NULL;
        }

        /*
         * try to get one off the right LRU
         * don't necessariuly unlink the tail because it may be locked: refcount>0
         * search up from tail an item with refcount==0 and unlink it; give up after 50
         * tries
         */

        if (tails[id] == 0) {
            itemstats[id].outofmemory++;
            return NULL;
        }

        for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
            if (search->refcount == 0) {
                if (search->exptime == 0 || search->exptime > current_time) {
                    itemstats[id].evicted++;
                    itemstats[id].evicted_time = current_time - search->time;
                    STATS_LOCK();
                    stats.evictions++;
                    STATS_UNLOCK();
#ifdef USE_REPLICATION
                    replication_call_del(ITEM_key(search), search->nkey);
#endif /* USE_REPLICATION */
                }
                do_item_unlink(search);
                break;
            }
        }
        it = slabs_alloc(ntotal, id);
        if (it == 0) {
            itemstats[id].outofmemory++;
            /* Last ditch effort. There is a very rare bug which causes
             * refcount leaks. We've fixed most of them, but it still happens,
             * and it may happen in the future.
             * We can reasonably assume no item can stay locked for more than
             * three hours, so if we find one in the tail which is that old,
             * free it anyway.
             */
            tries = 50;
            for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
                if (search->refcount != 0 && search->time + 10800 < current_time) {
                    itemstats[id].tailrepairs++;
                    search->refcount = 0;
                    do_item_unlink(search);
                    break;
                }
            }
            it = slabs_alloc(ntotal, id);
            if (it == 0) {
                return NULL;
            }
        }
    }

    assert(it->slabs_clsid == 0);

    it->slabs_clsid = id;

    assert(it != heads[it->slabs_clsid]);

    it->next = it->prev = it->h_next = 0;
    it->refcount = 1;     /* the caller will have a reference */
    DEBUG_REFCNT(it, '*');
    it->it_flags = 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    strcpy(ITEM_key(it), key);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}
Beispiel #9
0
item *do_item_alloc(char *key, const size_t nkey, const unsigned int flags,
                    const rel_time_t exptime, const int nbytes) {
    int i;
    uint8_t nsuffix;
    item *it = NULL;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
    if (settings.use_cas) {
        ntotal += sizeof(uint64_t);
    }

    unsigned int id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;

    /* If no memory is available, attempt a direct LRU juggle/eviction */
    /* This is a race in order to simplify lru_pull_tail; in cases where
     * locked items are on the tail, you want them to fall out and cause
     * occasional OOM's, rather than internally work around them.
     * This also gives one fewer code path for slab alloc/free
     */
    /* TODO: if power_largest, try a lot more times? or a number of times
     * based on how many chunks the new object should take up?
     * or based on the size of an object lru_pull_tail() says it evicted?
     * This is a classical GC problem if "large items" are of too varying of
     * sizes. This is actually okay here since the larger the data, the more
     * bandwidth it takes, the more time we can loop in comparison to serving
     * and replacing small items.
     */
    for (i = 0; i < 10; i++) {
        uint64_t total_bytes;
        /* Try to reclaim memory first */
        if (!settings.lru_maintainer_thread) {
            lru_pull_tail(id, COLD_LRU, 0, 0);
        }
        it = slabs_alloc(ntotal, id, &total_bytes, 0);

        if (settings.expirezero_does_not_evict)
            total_bytes -= noexp_lru_size(id);

        if (it == NULL) {
            if (settings.lru_maintainer_thread) {
                lru_pull_tail(id, HOT_LRU, total_bytes, 0);
                lru_pull_tail(id, WARM_LRU, total_bytes, 0);
                if (lru_pull_tail(id, COLD_LRU, total_bytes, LRU_PULL_EVICT) <= 0)
                    break;
            } else {
                if (lru_pull_tail(id, COLD_LRU, 0, LRU_PULL_EVICT) <= 0)
                    break;
            }
        } else {
            break;
        }
    }

    if (i > 0) {
        pthread_mutex_lock(&lru_locks[id]);
        itemstats[id].direct_reclaims += i;
        pthread_mutex_unlock(&lru_locks[id]);
    }

    if (it == NULL) {
        pthread_mutex_lock(&lru_locks[id]);
        itemstats[id].outofmemory++;
        pthread_mutex_unlock(&lru_locks[id]);
        return NULL;
    }

    assert(it->slabs_clsid == 0);
    //assert(it != heads[id]);

    /* Refcount is seeded to 1 by slabs_alloc() */
    it->next = it->prev = 0;

    /* Items are initially loaded into the HOT_LRU. This is '0' but I want at
     * least a note here. Compiler (hopefully?) optimizes this out.
     */
    if (settings.lru_maintainer_thread) {
        if (exptime == 0 && settings.expirezero_does_not_evict) {
            id |= NOEXP_LRU;
        } else {
            id |= HOT_LRU;
        }
    } else {
        /* There is only COLD in compat-mode */
        id |= COLD_LRU;
    }
    it->slabs_clsid = id;

    DEBUG_REFCNT(it, '*');
    it->it_flags |= settings.use_cas ? ITEM_CAS : 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    memcpy(ITEM_key(it), key, nkey);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;

    /* Need to shuffle the pointer stored in h_next into it->data. */
    if (it->it_flags & ITEM_CHUNKED) {
        item_chunk *chunk = (item_chunk *) ITEM_data(it);

        chunk->next = (item_chunk *) it->h_next;
        chunk->prev = 0;
        chunk->head = it;
        /* Need to chain back into the head's chunk */
        chunk->next->prev = chunk;
        chunk->size = chunk->next->size - ((char *)chunk - (char *)it);
        chunk->used = 0;
        assert(chunk->size > 0);
    }
    it->h_next = 0;

    return it;
}
Beispiel #10
0
/*@null@*/
item *do_item_alloc(char *key, const size_t nkey, const int flags,
                    const rel_time_t exptime, const int nbytes,
                    const uint32_t cur_hv) {
    uint8_t nsuffix;
    item *it = NULL;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
    if (settings.use_cas) {
        ntotal += sizeof(uint64_t);
    }

    unsigned int id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;

    mutex_lock(&cache_lock);
    /* do a quick check if we have any expired items in the tail.. */
    int tries = 5;
    /* Avoid hangs if a slab has nothing but refcounted stuff in it. */
    int tries_lrutail_reflocked = 1000;
    int tried_alloc = 0;
    item *search;
    item *next_it;
    void *hold_lock = NULL;
    rel_time_t oldest_live = settings.oldest_live;

    search = tails[id];
    /* We walk up *only* for locked items. Never searching for expired.
     * Waste of CPU for almost all deployments */
    for (; tries > 0 && search != NULL; tries--, search=next_it) {
        /* we might relink search mid-loop, so search->prev isn't reliable */
        next_it = search->prev;
        if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) {
            /* We are a crawler, ignore it. */
            tries++;
            continue;
        }
        uint32_t hv = hash(ITEM_key(search), search->nkey);
        /* Attempt to hash item lock the "search" item. If locked, no
         * other callers can incr the refcount
         */
        /* Don't accidentally grab ourselves, or bail if we can't quicklock */
        if (hv == cur_hv || (hold_lock = item_trylock(hv)) == NULL)
            continue;
        /* Now see if the item is refcount locked */
        if (refcount_incr(&search->refcount) != 2) {
            /* Avoid pathological case with ref'ed items in tail */
            do_item_update_nolock(search);
            tries_lrutail_reflocked--;
            tries++;
            refcount_decr(&search->refcount);
            itemstats[id].lrutail_reflocked++;
            /* Old rare bug could cause a refcount leak. We haven't seen
             * it in years, but we leave this code in to prevent failures
             * just in case */
            if (settings.tail_repair_time &&
                    search->time + settings.tail_repair_time < current_time) {
                itemstats[id].tailrepairs++;
                search->refcount = 1;
                do_item_unlink_nolock(search, hv);
            }
            if (hold_lock)
                item_trylock_unlock(hold_lock);

            if (tries_lrutail_reflocked < 1)
                break;

            continue;
        }

        /* Expired or flushed */
        if ((search->exptime != 0 && search->exptime < current_time)
            || (search->time <= oldest_live && oldest_live <= current_time)) {
            itemstats[id].reclaimed++;
            if ((search->it_flags & ITEM_FETCHED) == 0) {
                itemstats[id].expired_unfetched++;
            }
            it = search;
            slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
            do_item_unlink_nolock(it, hv);
            /* Initialize the item block: */
            it->slabs_clsid = 0;
        } else if ((it = slabs_alloc(ntotal, id)) == NULL) {
            tried_alloc = 1;
            if (settings.evict_to_free == 0) {
                itemstats[id].outofmemory++;
            } else {
                itemstats[id].evicted++;
                itemstats[id].evicted_time = current_time - search->time;
                if (search->exptime != 0)
                    itemstats[id].evicted_nonzero++;
                if ((search->it_flags & ITEM_FETCHED) == 0) {
                    itemstats[id].evicted_unfetched++;
                }

                shadow_item* new_shadow_it = create_shadow_item(search);
                hv = hash(new_shadow_it->key, new_shadow_it->nkey);
                shadow_assoc_insert(new_shadow_it, hv); 
                insert_shadowq_item(new_shadow_it,new_shadow_it->slabs_clsid);

                it = search;
                slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
                do_item_unlink_nolock(it, hv);
                /* Initialize the item block: */
                it->slabs_clsid = 0;

                /* If we've just evicted an item, and the automover is set to
                 * angry bird mode, attempt to rip memory into this slab class.
                 * TODO: Move valid object detection into a function, and on a
                 * "successful" memory pull, look behind and see if the next alloc
                 * would be an eviction. Then kick off the slab mover before the
                 * eviction happens.
                 */
                if (settings.slab_automove == 2)
                    slabs_reassign(-1, id);
            }
        }

        refcount_decr(&search->refcount);
        /* If hash values were equal, we don't grab a second lock */
        if (hold_lock)
            item_trylock_unlock(hold_lock);
        break;
    }

    if (!tried_alloc && (tries == 0 || search == NULL))
        it = slabs_alloc(ntotal, id);

    if (it == NULL) {
        itemstats[id].outofmemory++;
        mutex_unlock(&cache_lock);
        return NULL;
    }

    assert(it->slabs_clsid == 0);
    assert(it != heads[id]);

    /* Item initialization can happen outside of the lock; the item's already
     * been removed from the slab LRU.
     */
    it->refcount = 1;     /* the caller will have a reference */
    mutex_unlock(&cache_lock);
    it->next = it->prev = it->h_next = 0;
    it->slabs_clsid = id;

    DEBUG_REFCNT(it, '*');
    it->it_flags = settings.use_cas ? ITEM_CAS : 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    memcpy(ITEM_key(it), key, nkey);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}
Beispiel #11
0
//分配一个item,这个函数包含了memcached具体item分配的逻辑
item *do_item_alloc(char *key, const size_t nkey, const int flags,
                    const rel_time_t exptime, const int nbytes,
                    const uint32_t cur_hv) {
    uint8_t nsuffix;
    item *it = NULL;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); //item总大小
    if (settings.use_cas) {
        ntotal += sizeof(uint64_t);   //如果有用到cas,那么item大小还要加上unit64_t的size
    }

    unsigned int id = slabs_clsid(ntotal); //根据item的大小,找到适合的slabclass
    if (id == 0)
        return 0;

    mutex_lock(&cache_lock); //cache锁
    /* do a quick check if we have any expired items in the tail.. */
	  int tries = 5;
    int tried_alloc = 0;
    item *search;
    void *hold_lock = NULL;
    rel_time_t oldest_live = settings.oldest_live;

    search = tails[id]; //全局变量,tails[x]是id为x的slabclass lru链表的尾部
    /* We walk up *only* for locked items. Never searching for expired.
     * Waste of CPU for almost all deployments */
     //首先从lru链表尾部查找有没有过期的item,tries = 5,最多循环5次
     //注意这里是最多查找5次,只要找到一个没有被其他地方引用的item,那么就不再继续查找,如果这个item过期,就使用这个item的空间,否则创建新的slab
	  for (; tries > 0 && search != NULL; tries--, search=search->prev) {
        if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) {
            /* We are a crawler, ignore it. */
            //这里只是搜索过期的item,对于异常的item,直接忽略继续查找
			      tries++;
            continue;
        }
		    //计算item的hash值,hv有两个作用:1.用于hash表保存item 2.用于item lock表中锁住item,通过hv计算出item_lock中哪个锁对当前item加锁
		    //不同item的hash值可能相同,hash表中用链表的方式解决冲突;item lock中多个item共享一个锁
        uint32_t hv = hash(ITEM_key(search), search->nkey);
        /* Attempt to hash item lock the "search" item. If locked, no
         * other callers can incr the refcount
         */
        /* Don't accidentally grab ourselves, or bail if we can't quicklock */
        //锁住当前item
		    if (hv == cur_hv || (hold_lock = item_trylock(hv)) == NULL)
            continue;
        /* Now see if the item is refcount locked */
        //检查这个指向的这个item是否被其他地方引用,如果是的话,继续向前查找
        if (refcount_incr(&search->refcount) != 2) {
            refcount_decr(&search->refcount);
            /* Old rare bug could cause a refcount leak. We haven't seen
             * it in years, but we leave this code in to prevent failures
             * just in case */
            if (settings.tail_repair_time &&
                    search->time + settings.tail_repair_time < current_time) {
                itemstats[id].tailrepairs++;
                search->refcount = 1;
                do_item_unlink_nolock(search, hv);
            }
            if (hold_lock)
                item_trylock_unlock(hold_lock);
            continue;
        }

        /* Expired or flushed */
		    //如果找到过期的item
        if ((search->exptime != 0 && search->exptime < current_time)
            || (search->time <= oldest_live && oldest_live <= current_time)) {
            itemstats[id].reclaimed++;
            if ((search->it_flags & ITEM_FETCHED) == 0) {
                itemstats[id].expired_unfetched++;
            }
            it = search;
			      //更新统计数据
            slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
            //把旧的item从hash表和LRU链表中移除
			      do_item_unlink_nolock(it, hv);
            /* Initialize the item block: */
            it->slabs_clsid = 0;
        }
		    //如果没有找到过期的item,则调用slabs_alloc分配空间
		    //如果slabs_alloc返回null,表示分配失败,内存空间已满
		    //需要按LRU进行淘汰
		    else if ((it = slabs_alloc(ntotal, id)) == NULL) {
            tried_alloc = 1;  //标记一下,表示有尝试调用slabs_alloc分配空间
            //记录被淘汰item的信息, 使用memcached经常会查看的evicted_time就是在这里赋值的
			      if (settings.evict_to_free == 0) {
                itemstats[id].outofmemory++;
            } else {
                itemstats[id].evicted++;
                itemstats[id].evicted_time = current_time - search->time; //被淘汰item距离上次使用的时间
                if (search->exptime != 0)
                    itemstats[id].evicted_nonzero++;
                if ((search->it_flags & ITEM_FETCHED) == 0) {
                    itemstats[id].evicted_unfetched++;
                }
                it = search;
                slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); //更新统计数据
                do_item_unlink_nolock(it, hv);  //从hash表和LRU链表中移除
                /* Initialize the item block: */
                it->slabs_clsid = 0;

                /* If we've just evicted an item, and the automover is set to
                 * angry bird mode, attempt to rip memory into this slab class.
                 * TODO: Move valid object detection into a function, and on a
                 * "successful" memory pull, look behind and see if the next alloc
                 * would be an eviction. Then kick off the slab mover before the
                 * eviction happens.
                 */
				        //默认情况下,slab_automove=1,会合理地更具淘汰统计数据来分析怎么进行slabclass空间的分配
				        //如果slab_automove=2,只要分配失败了,马上进行slabclass空间的重分配
                if (settings.slab_automove == 2)
                    slabs_reassign(-1, id);
            }
        }

        refcount_decr(&search->refcount);
        /* If hash values were equal, we don't grab a second lock */
        if (hold_lock)
            item_trylock_unlock(hold_lock);
        break;
    }

    //查找5次过期的item都失败,并且也没有淘汰可用且没有过期的item
    //分配新的内存空间
    if (!tried_alloc && (tries == 0 || search == NULL))
        it = slabs_alloc(ntotal, id);

	  //分配失败,返回null
    if (it == NULL) {
        itemstats[id].outofmemory++;
        mutex_unlock(&cache_lock);
        return NULL;
    }

    assert(it->slabs_clsid == 0);
    assert(it != heads[id]);

    /* Item initialization can happen outside of the lock; the item's already
     * been removed from the slab LRU.
     */
	  //item内存空间分配成功,做一些初始化工作
    it->refcount = 1;     /* the caller will have a reference */
    mutex_unlock(&cache_lock);
    it->next = it->prev = it->h_next = 0;
    it->slabs_clsid = id;

    DEBUG_REFCNT(it, '*');
    it->it_flags = settings.use_cas ? ITEM_CAS : 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    memcpy(ITEM_key(it), key, nkey);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}
Beispiel #12
0
/*@null@*/
item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
    uint8_t nsuffix;
    item *it = NULL;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
    if (settings.use_cas) {
        ntotal += sizeof(uint64_t);
    }

    unsigned int id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;

    mutex_lock(&cache_lock);
    /* do a quick check if we have any expired items in the tail.. */
    item *search;
    rel_time_t oldest_live = settings.oldest_live;

    search = tails[id];
    if (search != NULL && (refcount_incr(&search->refcount) == 2)) {
        if ((search->exptime != 0 && search->exptime < current_time)
            || (search->time <= oldest_live && oldest_live <= current_time)) {  // dead by flush
            STATS_LOCK();
            stats.reclaimed++;
            STATS_UNLOCK();
            itemstats[id].reclaimed++;
            if ((search->it_flags & ITEM_FETCHED) == 0) {
                STATS_LOCK();
                stats.expired_unfetched++;
                STATS_UNLOCK();
                itemstats[id].expired_unfetched++;
            }
            it = search;
            slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
            do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
            /* Initialize the item block: */
            it->slabs_clsid = 0;
        } else if ((it = slabs_alloc(ntotal, id)) == NULL) {
            if (settings.evict_to_free == 0) {
                itemstats[id].outofmemory++;
                mutex_unlock(&cache_lock);
                return NULL;
            }
            itemstats[id].evicted++;
            itemstats[id].evicted_time = current_time - search->time;
            if (search->exptime != 0)
                itemstats[id].evicted_nonzero++;
            if ((search->it_flags & ITEM_FETCHED) == 0) {
                STATS_LOCK();
                stats.evicted_unfetched++;
                STATS_UNLOCK();
                itemstats[id].evicted_unfetched++;
            }
            STATS_LOCK();
            stats.evictions++;
            STATS_UNLOCK();
            it = search;
            slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
            do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
            /* Initialize the item block: */
            it->slabs_clsid = 0;

            /* If we've just evicted an item, and the automover is set to
             * angry bird mode, attempt to rip memory into this slab class.
             * TODO: Move valid object detection into a function, and on a
             * "successful" memory pull, look behind and see if the next alloc
             * would be an eviction. Then kick off the slab mover before the
             * eviction happens.
             */
            if (settings.slab_automove == 2)
                slabs_reassign(-1, id, 1);
        } else {
            refcount_decr(&search->refcount);
        }
    } else {
        /* If the LRU is empty or locked, attempt to allocate memory */
        it = slabs_alloc(ntotal, id);
        if (search != NULL)
            refcount_decr(&search->refcount);
    }

    if (it == NULL) {
        itemstats[id].outofmemory++;
        /* Last ditch effort. There was a very rare bug which caused
         * refcount leaks. We leave this just in case they ever happen again.
         * We can reasonably assume no item can stay locked for more than
         * three hours, so if we find one in the tail which is that old,
         * free it anyway.
         */
        if (search != NULL &&
            search->refcount != 2 &&
            search->time + TAIL_REPAIR_TIME < current_time) {
            itemstats[id].tailrepairs++;
            search->refcount = 1;
            do_item_unlink_nolock(search, hash(ITEM_key(search), search->nkey, 0));
        }
        mutex_unlock(&cache_lock);
        return NULL;
    }

    assert(it->slabs_clsid == 0);
    assert(it != heads[id]);

    /* Item initialization can happen outside of the lock; the item's already
     * been removed from the slab LRU.
     */
    it->refcount = 1;     /* the caller will have a reference */
    mutex_unlock(&cache_lock);
    it->next = it->prev = it->h_next = 0;
    it->slabs_clsid = id;

    DEBUG_REFCNT(it, '*');
    it->it_flags = settings.use_cas ? ITEM_CAS : 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    memcpy(ITEM_key(it), key, nkey);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}
static int replication_rep(conn *c, item *it)
{
    int   r = 0;
    int exp = 0;
    int len = 0;
    char *s = "rep ";
    char *n = "\r\n";
    char *p = NULL;
    char flag[40];

    if(it->exptime)
        exp = it->exptime + stats.started;
    flag[0]=0;
    if(p=ITEM_suffix(it)){
        int i;
        memcpy(flag, p, it->nsuffix - 2);
        flag[it->nsuffix - 2] = 0;
        for(i=0;i<strlen(flag);i++){
            if(flag[i] > ' ')
                break;
        }
        memmove(flag,&flag[i],strlen(flag)-i);
        for(p=flag;*p>' ';p++);
        *p=0;
    }
    len += strlen(s);
    len += it->nkey;
    len += 1;
    len += strlen(flag);
    len += 1;
    len += replication_get_num(NULL, exp);
    len += 1;
    len += replication_get_num(NULL, it->nbytes - 2);
    len += 1;
    len += replication_get_num(NULL, it->cas_id);
    len += strlen(n);
    len += it->nbytes;
    len += strlen(n);
    if(replication_alloc(c,len) == -1){
        fprintf(stderr, "replication: rep alloc error\n");
        return(-1);
    }
    p = c->wcurr + c->wbytes;
    memcpy(p, s, strlen(s));
    p += strlen(s);
    memcpy(p, ITEM_key(it), it->nkey);
    p += it->nkey;
    *(p++) = ' ';
    memcpy(p, flag, strlen(flag));
    p += strlen(flag);
    *(p++) = ' ';
    p += replication_get_num(p, exp);
    *(p++) = ' ';
    p += replication_get_num(p, it->nbytes - 2);
    *(p++) = ' ';
    p += replication_get_num(p, it->cas_id);
    memcpy(p, n, strlen(n));
    p += strlen(n);
    memcpy(p, ITEM_data(it), it->nbytes);
    p += it->nbytes;
    c->wbytes = p - c->wcurr;
    return(0);
}
Beispiel #14
0
/*@null@*/
item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
    uint8_t nsuffix;
    item *it = NULL;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
    if (settings.use_cas) {
        ntotal += sizeof(uint64_t);
    }

    unsigned int id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;

    /* do a quick check if we have any expired items in the tail.. */
    int tries = 50;
    item *search;

    for (search = tails[id];
         tries > 0 && search != NULL;
         tries--, search=search->prev) {
        if (search->refcount == 0 &&
            (search->exptime != 0 && search->exptime < current_time)) {
            it = search;
            /* I don't want to actually free the object, just steal
             * the item to avoid to grab the slab mutex twice ;-)
             */
            it->refcount = 1;
            do_item_unlink(it);
            /* Initialize the item block: */
            it->slabs_clsid = 0;
            it->refcount = 0;
            break;
        }
    }

    if (it == NULL && (it = slabs_alloc(ntotal, id)) == NULL) {
        /*
        ** Could not find an expired item at the tail, and memory allocation
        ** failed. Try to evict some items!
        */
        tries = 50;

        /* If requested to not push old items out of cache when memory runs out,
         * we're out of luck at this point...
         */

        if (settings.evict_to_free == 0) {
            itemstats[id].outofmemory++;
            return NULL;
        }

        /*
         * try to get one off the right LRU
         * don't necessariuly unlink the tail because it may be locked: refcount>0
         * search up from tail an item with refcount==0 and unlink it; give up after 50
         * tries
         */

        if (tails[id] == 0) {
            itemstats[id].outofmemory++;
            return NULL;
        }

        for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
            if (search->refcount == 0) {
                if (search->exptime == 0 || search->exptime > current_time) {
                    itemstats[id].evicted++;
                    itemstats[id].evicted_time = current_time - search->time;
                    STATS_LOCK();
                    stats.evictions++;
                    STATS_UNLOCK();
                }
                do_item_unlink(search);
                break;
            }
        }
        it = slabs_alloc(ntotal, id);
        if (it == 0) {
            itemstats[id].outofmemory++;
            /* Last ditch effort. There is a very rare bug which causes
             * refcount leaks. We've fixed most of them, but it still happens,
             * and it may happen in the future.
             * We can reasonably assume no item can stay locked for more than
             * three hours, so if we find one in the tail which is that old,
             * free it anyway.
             */
            tries = 50;
            for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
                if (search->refcount != 0 && search->time + TAIL_REPAIR_TIME < current_time) {
                    itemstats[id].tailrepairs++;
                    search->refcount = 0;
                    do_item_unlink(search);
                    break;
                }
            }
            it = slabs_alloc(ntotal, id);
            if (it == 0) {
                return NULL;
            }
        }
    }

    assert(it->slabs_clsid == 0);

    it->slabs_clsid = id;

    assert(it != heads[it->slabs_clsid]);

    it->next = it->prev = it->h_next = 0;
    it->refcount = 1;     /* the caller will have a reference */
    DEBUG_REFCNT(it, '*');
    it->it_flags = settings.use_cas ? ITEM_CAS : 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    memcpy(ITEM_key(it), key, nkey);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}
Beispiel #15
0
enum store_item_type do_store_item ( item *it, int comm, const uint32_t hv )
{
    char *key = ITEM_key (it);
    item *old_it = do_item_get (key, it->nkey, hv);
    enum store_item_type stored = NOT_STORED;

    item *new_it = NULL;
    int flags;

    if ( old_it != NULL && comm == NREAD_ADD )
    {
        do_item_update (old_it);
    }
    else if ( ! old_it && ( comm == NREAD_REPLACE || comm == NREAD_APPEND || comm == NREAD_PREPEND ) )
    {

    }
    else
    {
        if ( comm == NREAD_APPEND || comm == NREAD_PREPEND )
        {
            if ( stored == NOT_STORED )
            {
                flags = ( int ) strtol (ITEM_suffix (old_it), ( char ** ) NULL, 10);
                new_it = do_item_alloc (key, it->nkey, flags, old_it->exptime, ITEM_data (it), it->nbytes + old_it->nbytes - 2, hv);
                if ( ! new_it )
                {
                    if ( old_it )
                        do_item_remove (old_it);
                    return NOT_STORED;
                }
                if ( comm == NREAD_APPEND )
                {
                    memcpy (ITEM_data (new_it), ITEM_data (old_it), old_it->nbytes);
                    memcpy (ITEM_data (new_it) + old_it->nbytes - 2, ITEM_data (it), it->nbytes);
                }
                else
                {
                    memcpy (ITEM_data (new_it), ITEM_data (it), it->nbytes);
                    memcpy (ITEM_data (new_it) + it->nbytes - 2, ITEM_data (old_it), old_it->nbytes);
                }
                it = new_it;
            }
        }
        if ( stored == NOT_STORED )
        {
            if ( old_it != NULL )
            {
                item_replace (old_it, it, hv);
            }
            else
            {
                do_item_link (it, hv);
            }
            stored = STORED;
        }
    }
    if ( old_it != NULL )
    {
        do_item_remove (old_it);
    }
    if ( new_it != NULL )
    {
        do_item_remove (new_it);
    }
    return stored;
}
Beispiel #16
0
/*@null@*/
item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
    uint8_t nsuffix;
    item *it;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);

    unsigned int id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;

    it = slabs_alloc(ntotal);
    if (it == 0) {
        int tries = 50;
        item *search;

        /* If requested to not push old items out of cache when memory runs out,
         * we're out of luck at this point...
         */

        if (settings.evict_to_free == 0) return NULL;

        /*
         * try to get one off the right LRU
         * don't necessariuly unlink the tail because it may be locked: refcount>0
         * search up from tail an item with refcount==0 and unlink it; give up after 50
         * tries
         */

        if (id > LARGEST_ID) return NULL;
        if (tails[id] == 0) return NULL;

        for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
            if (search->refcount == 0) {
               if (search->exptime == 0 || search->exptime > current_time) {
                       STATS_LOCK();
                       stats.evictions++;
                       STATS_UNLOCK();
                }
                do_item_unlink(search);
                break;
            }
        }
        it = slabs_alloc(ntotal);
        if (it == 0) return NULL;
    }

    assert(it->slabs_clsid == 0);

    it->slabs_clsid = id;

    assert(it != heads[it->slabs_clsid]);

    it->next = it->prev = it->h_next = 0;
    it->refcount = 1;     /* the caller will have a reference */
    DEBUG_REFCNT(it, '*');
    it->it_flags = 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    strcpy(ITEM_key(it), key);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}
Beispiel #17
0
/*@null@*/
item *do_item_alloc(char *key, const size_t nkey, const int flags,
                    const rel_time_t exptime, const int nbytes,
                    const uint32_t cur_hv) {
    uint8_t nsuffix;
    item *it = NULL;
    char suffix[40];
    //计算这个item的空间
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
    if (settings.use_cas) {
        ntotal += sizeof(uint64_t);
    }
    //根据大小判断从属于哪个slab
    unsigned int id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;

    mutex_lock(&cache_lock);
    /* do a quick check if we have any expired items in the tail.. */
    //在LRU中尝试5次还没合适的空间,则执行申请空间的操作
    int tries = 5;
    int tried_alloc = 0;
    item *search;
    void *hold_lock = NULL;
    rel_time_t oldest_live = settings.oldest_live;

    search = tails[id];
    /* We walk up *only* for locked items. Never searching for expired.
     * Waste of CPU for almost all deployments */
    for (; tries > 0 && search != NULL; tries--, search=search->prev) {
        uint32_t hv = hash(ITEM_key(search), search->nkey, 0);
        /* Attempt to hash item lock the "search" item. If locked, no
         * other callers can incr the refcount
         */
        /* FIXME: I think we need to mask the hv here for comparison? */
        if (hv != cur_hv && (hold_lock = item_trylock(hv)) == NULL)
            continue;
        /* Now see if the item is refcount locked */
        if (refcount_incr(&search->refcount) != 2) {
            refcount_decr(&search->refcount);
            /* Old rare bug could cause a refcount leak. We haven't seen
             * it in years, but we leave this code in to prevent failures
             * just in case */
            if (search->time + TAIL_REPAIR_TIME < current_time) {
                itemstats[id].tailrepairs++;
                search->refcount = 1;
                do_item_unlink_nolock(search, hv);
            }
            if (hold_lock)
                item_trylock_unlock(hold_lock);
            continue;
        }

        /* Expired or flushed */
        // search指向的item过期了,则直接复用这块内存
        if ((search->exptime != 0 && search->exptime < current_time)
                || (search->time <= oldest_live && oldest_live <= current_time)) {
            itemstats[id].reclaimed++;
            if ((search->it_flags & ITEM_FETCHED) == 0) {
                itemstats[id].expired_unfetched++;
            }
            it = search;
            slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
            do_item_unlink_nolock(it, hv);
            /* Initialize the item block: */
            it->slabs_clsid = 0;
        }
        //此刻,过期失效的item没有找到,申请内存又失败了。看来只能使用
        //LRU淘汰一个item(即使这个item并没有过期失效)
        else if ((it = slabs_alloc(ntotal, id)) == NULL) {
            tried_alloc = 1;
            if (settings.evict_to_free == 0) {
                itemstats[id].outofmemory++;
            } else {
                itemstats[id].evicted++;
                itemstats[id].evicted_time = current_time - search->time;
                if (search->exptime != 0)
                    itemstats[id].evicted_nonzero++;
                if ((search->it_flags & ITEM_FETCHED) == 0) {
                    itemstats[id].evicted_unfetched++;
                }
                it = search;
                slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
                do_item_unlink_nolock(it, hv);
                /* Initialize the item block: */
                it->slabs_clsid = 0;

                /* If we've just evicted an item, and the automover is set to
                 * angry bird mode, attempt to rip memory into this slab class.
                 * TODO: Move valid object detection into a function, and on a
                 * "successful" memory pull, look behind and see if the next alloc
                 * would be an eviction. Then kick off the slab mover before the
                 * eviction happens.
                 */
                if (settings.slab_automove == 2)
                    slabs_reassign(-1, id);
            }
        }

        refcount_decr(&search->refcount);
        /* If hash values were equal, we don't grab a second lock */
        if (hold_lock)
            item_trylock_unlock(hold_lock);
        break;
    }
    //从slab分配器中申请内存
    if (!tried_alloc && (tries == 0 || search == NULL))
        it = slabs_alloc(ntotal, id);

    if (it == NULL) {
        itemstats[id].outofmemory++;
        mutex_unlock(&cache_lock);
        return NULL;
    }

    assert(it->slabs_clsid == 0);
    assert(it != heads[id]);

    /* Item initialization can happen outside of the lock; the item's already
     * been removed from the slab LRU.
     */
    it->refcount = 1;     /* the caller will have a reference */
    mutex_unlock(&cache_lock);
    it->next = it->prev = it->h_next = 0;
    it->slabs_clsid = id;

    DEBUG_REFCNT(it, '*');
    it->it_flags = settings.use_cas ? ITEM_CAS : 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    memcpy(ITEM_key(it), key, nkey);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}
Beispiel #18
0
/* Forward an upstream command that came with item data,
 * like set/add/replace/etc.
 */
bool cproxy_forward_a2a_item_downstream(downstream *d, short cmd,
                                        item *it, conn *uc) {
    assert(d != NULL);
    assert(d->ptd != NULL);
    assert(d->ptd->proxy != NULL);
    assert(d->downstream_conns != NULL);
    assert(it != NULL);
    assert(uc != NULL);
    assert(uc->next == NULL);

    // Assuming we're already connected to downstream.
    //
    bool self = false;

    conn *c = cproxy_find_downstream_conn(d, ITEM_key(it), it->nkey,
                                          &self);
    if (c != NULL) {
        if (self) {
            cproxy_optimize_to_self(d, uc, uc->cmd_start);
            complete_nread_ascii(uc);
            return true;
        }

        if (cproxy_prep_conn_for_write(c)) {
            assert(c->state == conn_pause);

            char *verb = nread_text(cmd);
            assert(verb != NULL);

            char *str_flags   = ITEM_suffix(it);
            char *str_length  = strchr(str_flags + 1, ' ');
            int   len_flags   = str_length - str_flags;
            int   len_length  = it->nsuffix - len_flags - 2;
            char *str_exptime = add_conn_suffix(c);
            char *str_cas     = (cmd == NREAD_CAS ? add_conn_suffix(c) : NULL);

            if (str_flags != NULL &&
                str_length != NULL &&
                len_flags > 1 &&
                len_length > 1 &&
                str_exptime != NULL &&
                (cmd != NREAD_CAS ||
                 str_cas != NULL)) {
                sprintf(str_exptime, " %u", it->exptime);

                if (str_cas != NULL)
                    sprintf(str_cas, " %llu",
                            (unsigned long long) ITEM_get_cas(it));

                if (add_iov(c, verb, strlen(verb)) == 0 &&
                    add_iov(c, ITEM_key(it), it->nkey) == 0 &&
                    add_iov(c, str_flags, len_flags) == 0 &&
                    add_iov(c, str_exptime, strlen(str_exptime)) == 0 &&
                    add_iov(c, str_length, len_length) == 0 &&
                    (str_cas == NULL ||
                     add_iov(c, str_cas, strlen(str_cas)) == 0) &&
                    (uc->noreply == false ||
                     add_iov(c, " noreply", 8) == 0) &&
                    add_iov(c, ITEM_data(it) - 2, it->nbytes + 2) == 0) {
                    conn_set_state(c, conn_mwrite);
                    c->write_and_go = conn_new_cmd;

                    if (update_event(c, EV_WRITE | EV_PERSIST)) {
                        d->downstream_used_start = 1;
                        d->downstream_used       = 1;

                        if (cproxy_dettach_if_noreply(d, uc) == false) {
                            cproxy_start_downstream_timeout(d, c);

                            // During a synchronous (with-reply) SET,
                            // handle fire-&-forget SET optimization.
                            //
                            if (cmd == NREAD_SET &&
                                cproxy_optimize_set_ascii(d, uc,
                                                          ITEM_key(it),
                                                          it->nkey)) {
                                d->ptd->stats.stats.tot_optimize_sets++;
                            }
                        } else {
                            c->write_and_go = conn_pause;

                            mcache_delete(&d->ptd->proxy->front_cache,
                                          ITEM_key(it), it->nkey);
                        }

                        return true;
                    }
                }

                d->ptd->stats.stats.err_oom++;
                cproxy_close_conn(c);
            } else {
                // TODO: Handle this weird error case.
            }
        } else {
            d->ptd->stats.stats.err_downstream_write_prep++;
            cproxy_close_conn(c);
        }

        if (settings.verbose > 1)
            fprintf(stderr, "Proxy item write out of memory");
    }

    return false;
}
Beispiel #19
0
/* 从 slab 系统分配一个空闲 item */
item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes,
                    												   const uint32_t cur_hv)
{
    uint8_t nsuffix;
    item *it = NULL;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
    if (settings.use_cas) {
        ntotal += sizeof(uint64_t);
    }
    unsigned int id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;
    mutex_lock(&cache_lock);
    /* do a quick check if we have any expired items in the tail.. */
    int tries = 5;
    int tried_alloc = 0;
    item *search;
    void *hold_lock = NULL;
    rel_time_t oldest_live = settings.oldest_live;
    search = tails[id];
    /* We walk up *only* for locked items. Never searching for expired.
     * Waste of CPU for almost all deployments */
    for (; tries > 0 && search != NULL; tries--, search=search->prev)
    {
        uint32_t hv = hash(ITEM_key(search), search->nkey, 0);
        /* Attempt to hash item lock the "search" item. If locked, no
         * other callers can incr the refcount
         */
        /* FIXME: I think we need to mask the hv here for comparison? */
        if (hv != cur_hv && (hold_lock = item_trylock(hv)) == NULL)
            continue;
        /* Now see if the item is refcount locked */
        if (refcount_incr(&search->refcount) != 2)
        {
            refcount_decr(&search->refcount);
            /* Old rare bug could cause a refcount leak. We haven't seen
             * it in years, but we leave this code in to prevent failures
             * just in case */
            if (search->time + TAIL_REPAIR_TIME < current_time)
            {
                itemstats[id].tailrepairs++;
                search->refcount = 1;
                do_item_unlink_nolock(search, hv);
            }
            if (hold_lock)
                item_trylock_unlock(hold_lock);
            continue;
        }
        /* 先检查 LRU 队列最后一个 item 是否超时, 超时的话就把这个 item 分配给用户 */
        if ((search->exptime != 0 && search->exptime < current_time)
            || (search->time <= oldest_live && oldest_live <= current_time))
        {
            itemstats[id].reclaimed++;
            if ((search->it_flags & ITEM_FETCHED) == 0)
            {
                itemstats[id].expired_unfetched++;
            }
            it = search;
            slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
            /* 把这个 item 从 LRU 队列和哈希表中移除 */
            do_item_unlink_nolock(it, hv);
            /* Initialize the item block: */
            it->slabs_clsid = 0;
        }
        /* 没有超时的item, 那就尝试从slabclass分配, 运气不好的话, 分配失败, 那就把 LRU 队列最后一个 item 剔除, 然后分配给用户 */
        else if ((it = slabs_alloc(ntotal, id)) == NULL)
        {
            tried_alloc = 1;
            if (settings.evict_to_free == 0)
            {
                itemstats[id].outofmemory++;//显示出的统计信息
            }
            else
            {
                itemstats[id].evicted++;//这个slab的分配失败次数加1,后面的分析统计信息的线程会用到这个统计信息
                itemstats[id].evicted_time = current_time - search->time;//显示的统计信息
                if (search->exptime != 0)
                    itemstats[id].evicted_nonzero++;
                if ((search->it_flags & ITEM_FETCHED) == 0)
                {
                    itemstats[id].evicted_unfetched++;
                }
                it = search;
                slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);//不用请求新的item了,减少相关的统计信息
                /*  把老的item从hash表和lru队列中删除 */
                do_item_unlink_nolock(it, hv);
                /* Initialize the item block: */
                it->slabs_clsid = 0;
                /* If we've just 回收 an item, and the automover is set to angry bird mode, attempt to rip memory into this
                slab class. TODO: Move valid object detection into a function, and on a "successful" memory pull, look
                behind and see if the next alloc would be an eviction. Then kick off the slab mover before the eviction
                happens.可以看到如果slab_automove=2(默认是1),这样会导致angry模式,就是只要分配失败了,马上就选择一个slab(旧的slagclass
                释放的),把这个slab移动到当前slab-class中(不会有通过统计信息有选择的移动slab)*/
                if (settings.slab_automove == 2)
                    slabs_reassign(-1, id);
            }
        }
        refcount_decr(&search->refcount);
        /* If hash values were equal, we don't grab a second lock */
        if (hold_lock)
            item_trylock_unlock(hold_lock);
        break;
    }
    if (!tried_alloc && (tries == 0 || search == NULL)) it = slabs_alloc(ntotal, id);
    if (it == NULL)
    {
        itemstats[id].outofmemory++;
        mutex_unlock(&cache_lock);
        return NULL;
    }
    assert(it->slabs_clsid == 0);
    assert(it != heads[id]);
    /* Item initialization can happen outside of the lock; the item's already been removed from the slab LRU. */
    it->refcount = 1;     /* the caller will have a reference */
    mutex_unlock(&cache_lock);
    it->next = it->prev = it->h_next = 0;
    it->slabs_clsid = id;
    DEBUG_REFCNT(it, '*');
    it->it_flags = settings.use_cas ? ITEM_CAS : 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    memcpy(ITEM_key(it), key, nkey);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}