/** * Returns true if an item will fit in the cache (its size does not exceed * the maximum for a cache entry.) */ bool item_size_ok(const size_t nkey, const int flags, const int nbytes) { char prefix[40]; uint8_t nsuffix; return slabs_clsid(item_make_header(nkey + 1, flags, nbytes, prefix, &nsuffix)) != 0; }
/* * alloc a item buffer, and init it. */ item *item_alloc1(char *key, const size_t nkey, const int flags, const int nbytes) { uint8_t nsuffix; item *it; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (ntotal > settings.item_buf_size){ it = (item *)malloc(ntotal); if (it == NULL){ return NULL; } memset(it, 0, ntotal); if (settings.verbose > 1) { fprintf(stderr, "alloc a item buffer from malloc.\n"); } }else{ it = item_from_freelist(); if (it == NULL){ return NULL; } if (settings.verbose > 1) { fprintf(stderr, "alloc a item buffer from freelist.\n"); } } it->nkey = nkey; it->nbytes = nbytes; strcpy(ITEM_key(it), key); memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
/* * Returns true if an item will fit in the cache (its size does not exceed * the maximum for a cache entry.) */ int item_size_ok(char *key, int flags, int nbytes) { char prefix[40]; int keylen, nsuffix; return slabs_clsid(item_make_header(key, flags, nbytes, prefix, &nsuffix, &keylen)) != 0; }
item *item_alloc(char *key, int flags, rel_time_t exptime, int nbytes) { int nsuffix, ntotal, len; item *it; unsigned int id; char suffix[40]; ntotal = item_make_header(key, flags, nbytes, suffix, &nsuffix, &len); id = slabs_clsid(ntotal); if (id == 0) return 0; it = slabs_alloc(ntotal); if (it == 0) { int tries = 50; item *search; /* If requested to not push old items out of cache when memory runs out, * we're out of luck at this point... */ if (!settings.evict_to_free) return 0; /* * try to get one off the right LRU * don't necessariuly unlink the tail because it may be locked: refcount>0 * search up from tail an item with refcount==0 and unlink it; give up after 50 * tries */ if (id > LARGEST_ID) return 0; if (tails[id]==0) return 0; for (search = tails[id]; tries>0 && search; tries--, search=search->prev) { if (search->refcount==0) { item_unlink(search); break; } } it = slabs_alloc(ntotal); if (it==0) return 0; } assert(it->slabs_clsid == 0); it->slabs_clsid = id; assert(it != heads[it->slabs_clsid]); it->next = it->prev = it->h_next = 0; it->refcount = 0; it->it_flags = 0; it->nkey = len; it->nbytes = nbytes; strcpy(ITEM_key(it), key); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, nsuffix); it->nsuffix = nsuffix; return it; }
/** * Returns true if an item will fit in the cache (its size does not exceed * the maximum for a cache entry.) */ bool item_size_ok(const size_t nkey, const int flags, const int nbytes) { char prefix[40]; uint8_t nsuffix; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, prefix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } return slabs_clsid(ntotal) != 0; }
//检验item是否有合适的slab来存储 bool item_size_ok(const size_t nkey, const int flags, const int nbytes) { syslog(LOG_INFO, "[%s:%s:%d]", __FILE__, __func__, __LINE__); char prefix[40]; uint8_t nsuffix; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, prefix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } return slabs_clsid(ntotal) != 0; }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; mutex_lock(&cache_lock); /* do a quick check if we have any expired items in the tail.. */ item *search; rel_time_t oldest_live = settings.oldest_live; search = tails[id]; if (search != NULL && (refcount_incr(&search->refcount) == 2)) { if ((search->exptime != 0 && search->exptime < current_time) || (search->time <= oldest_live && oldest_live <= current_time)) { // dead by flush STATS_LOCK(); stats.reclaimed++; STATS_UNLOCK(); itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { STATS_LOCK(); stats.expired_unfetched++; STATS_UNLOCK(); itemstats[id].expired_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0)); /* Initialize the item block: */ it->slabs_clsid = 0; } else if ((it = slabs_alloc(ntotal, id)) == NULL) { if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; mutex_unlock(&cache_lock); return NULL; } itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { STATS_LOCK(); stats.evicted_unfetched++; STATS_UNLOCK(); itemstats[id].evicted_unfetched++; } STATS_LOCK(); stats.evictions++; STATS_UNLOCK(); it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0)); /* Initialize the item block: */ it->slabs_clsid = 0; /* If we've just evicted an item, and the automover is set to * angry bird mode, attempt to rip memory into this slab class. * TODO: Move valid object detection into a function, and on a * "successful" memory pull, look behind and see if the next alloc * would be an eviction. Then kick off the slab mover before the * eviction happens. */ if (settings.slab_automove == 2) slabs_reassign(-1, id, 1); } else { refcount_decr(&search->refcount); } } else { /* If the LRU is empty or locked, attempt to allocate memory */ it = slabs_alloc(ntotal, id); if (search != NULL) refcount_decr(&search->refcount); } if (it == NULL) { itemstats[id].outofmemory++; /* Last ditch effort. There was a very rare bug which caused * refcount leaks. We leave this just in case they ever happen again. * We can reasonably assume no item can stay locked for more than * three hours, so if we find one in the tail which is that old, * free it anyway. */ if (search != NULL && search->refcount != 2 && search->time + TAIL_REPAIR_TIME < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hash(ITEM_key(search), search->nkey, 0)); } mutex_unlock(&cache_lock); return NULL; } assert(it->slabs_clsid == 0); assert(it != heads[id]); /* Item initialization can happen outside of the lock; the item's already * been removed from the slab LRU. */ it->refcount = 1; /* the caller will have a reference */ mutex_unlock(&cache_lock); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv) { uint8_t nsuffix; ck_spinlock_mcs_context_t second_lock; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; LOCK_CLOCK(); /* Avoid hangs if a slab has nothing but refcounted stuff in it. */ /* int tries_lrutail_reflocked = 1000; */ item *search; item *next_it; void *hold_lock = NULL; /* We have no expiration. Try alloc a new one first. */ if ((it = slabs_alloc(ntotal, id)) == NULL) { printf("item slab alloc fails\n"); assert(0); /* doing CLOCK eviction */ search = hand[id]; if (!search) { /* no mem from alloc or replace */ UNLOCK_CLOCK(); return NULL; } /* scan loop of the clock, which could be potentially * unbounded -- we may want an upper limit for it. */ for (search = hand[id]; search != NULL; search = next_it) { assert(search); /* we might relink search mid-loop, so search->prev isn't reliable */ next_it = search->prev; // if (*key == 101) printf("aaa %d\n", sizes[id]); if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) { /* We are a crawler, ignore it. */ continue; } uint32_t hv = hash(ITEM_key(search), search->nkey); /* Attempt to hash item lock the "search" item. If locked, no * other callers can incr the refcount */ /* Don't accidentally grab ourselves, or bail if we can't quicklock */ if (hv == cur_hv || (hold_lock = item_try_mcslock(hv, &second_lock)) == NULL) continue; /* Now see if the item is refcount locked */ if (refcount_incr(&search->refcount) != 2) { /* Avoid pathological case with ref'ed items in tail */ do_item_update_nolock(search); /* tries_lrutail_reflocked--; */ refcount_decr(&search->refcount); itemstats[id].lrutail_reflocked++; /* Old rare bug could cause a refcount leak. We haven't seen * it in years, but we leave this code in to prevent failures * just in case */ if (settings.tail_repair_time && search->time + settings.tail_repair_time < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hv); } if (hold_lock) item_try_mcsunlock(hold_lock, &second_lock); /* if (tries_lrutail_reflocked < 1) */ /* break; */ continue; } if (search->recency) { /* recently accessed. clear bit and continue. */ search->recency = 0; continue; } // printf("aaa %d, %d\n", sizes[id], *key); itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].evicted_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; refcount_decr(&search->refcount); /* If hash values were equal, we don't grab a second lock */ if (hold_lock) item_try_mcsunlock(hold_lock, &second_lock); break; } /* end of loop*/ } /* end of allocation / eviction */ if (it == NULL) { itemstats[id].outofmemory++; UNLOCK_CLOCK(); return NULL; } assert(it->slabs_clsid == 0); /* Item initialization can happen outside of the lock; the item's already * been removed from the slab LRU. */ it->refcount = 1; /* the caller will have a reference */ UNLOCK_CLOCK(); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = 0; //exptime; /* disable expiration. */ memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
//分配一个item,这个函数包含了memcached具体item分配的逻辑 item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv) { uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); //item总大小 if (settings.use_cas) { ntotal += sizeof(uint64_t); //如果有用到cas,那么item大小还要加上unit64_t的size } unsigned int id = slabs_clsid(ntotal); //根据item的大小,找到适合的slabclass if (id == 0) return 0; mutex_lock(&cache_lock); //cache锁 /* do a quick check if we have any expired items in the tail.. */ int tries = 5; int tried_alloc = 0; item *search; void *hold_lock = NULL; rel_time_t oldest_live = settings.oldest_live; search = tails[id]; //全局变量,tails[x]是id为x的slabclass lru链表的尾部 /* We walk up *only* for locked items. Never searching for expired. * Waste of CPU for almost all deployments */ //首先从lru链表尾部查找有没有过期的item,tries = 5,最多循环5次 //注意这里是最多查找5次,只要找到一个没有被其他地方引用的item,那么就不再继续查找,如果这个item过期,就使用这个item的空间,否则创建新的slab for (; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) { /* We are a crawler, ignore it. */ //这里只是搜索过期的item,对于异常的item,直接忽略继续查找 tries++; continue; } //计算item的hash值,hv有两个作用:1.用于hash表保存item 2.用于item lock表中锁住item,通过hv计算出item_lock中哪个锁对当前item加锁 //不同item的hash值可能相同,hash表中用链表的方式解决冲突;item lock中多个item共享一个锁 uint32_t hv = hash(ITEM_key(search), search->nkey); /* Attempt to hash item lock the "search" item. If locked, no * other callers can incr the refcount */ /* Don't accidentally grab ourselves, or bail if we can't quicklock */ //锁住当前item if (hv == cur_hv || (hold_lock = item_trylock(hv)) == NULL) continue; /* Now see if the item is refcount locked */ //检查这个指向的这个item是否被其他地方引用,如果是的话,继续向前查找 if (refcount_incr(&search->refcount) != 2) { refcount_decr(&search->refcount); /* Old rare bug could cause a refcount leak. We haven't seen * it in years, but we leave this code in to prevent failures * just in case */ if (settings.tail_repair_time && search->time + settings.tail_repair_time < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hv); } if (hold_lock) item_trylock_unlock(hold_lock); continue; } /* Expired or flushed */ //如果找到过期的item if ((search->exptime != 0 && search->exptime < current_time) || (search->time <= oldest_live && oldest_live <= current_time)) { itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].expired_unfetched++; } it = search; //更新统计数据 slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); //把旧的item从hash表和LRU链表中移除 do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; } //如果没有找到过期的item,则调用slabs_alloc分配空间 //如果slabs_alloc返回null,表示分配失败,内存空间已满 //需要按LRU进行淘汰 else if ((it = slabs_alloc(ntotal, id)) == NULL) { tried_alloc = 1; //标记一下,表示有尝试调用slabs_alloc分配空间 //记录被淘汰item的信息, 使用memcached经常会查看的evicted_time就是在这里赋值的 if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; } else { itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; //被淘汰item距离上次使用的时间 if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].evicted_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); //更新统计数据 do_item_unlink_nolock(it, hv); //从hash表和LRU链表中移除 /* Initialize the item block: */ it->slabs_clsid = 0; /* If we've just evicted an item, and the automover is set to * angry bird mode, attempt to rip memory into this slab class. * TODO: Move valid object detection into a function, and on a * "successful" memory pull, look behind and see if the next alloc * would be an eviction. Then kick off the slab mover before the * eviction happens. */ //默认情况下,slab_automove=1,会合理地更具淘汰统计数据来分析怎么进行slabclass空间的分配 //如果slab_automove=2,只要分配失败了,马上进行slabclass空间的重分配 if (settings.slab_automove == 2) slabs_reassign(-1, id); } } refcount_decr(&search->refcount); /* If hash values were equal, we don't grab a second lock */ if (hold_lock) item_trylock_unlock(hold_lock); break; } //查找5次过期的item都失败,并且也没有淘汰可用且没有过期的item //分配新的内存空间 if (!tried_alloc && (tries == 0 || search == NULL)) it = slabs_alloc(ntotal, id); //分配失败,返回null if (it == NULL) { itemstats[id].outofmemory++; mutex_unlock(&cache_lock); return NULL; } assert(it->slabs_clsid == 0); assert(it != heads[id]); /* Item initialization can happen outside of the lock; the item's already * been removed from the slab LRU. */ //item内存空间分配成功,做一些初始化工作 it->refcount = 1; /* the caller will have a reference */ mutex_unlock(&cache_lock); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { uint8_t nsuffix; item *it; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; it = slabs_alloc(ntotal); if (it == 0) { int tries = 50; item *search; /* If requested to not push old items out of cache when memory runs out, * we're out of luck at this point... */ if (settings.evict_to_free == 0) return NULL; /* * try to get one off the right LRU * don't necessariuly unlink the tail because it may be locked: refcount>0 * search up from tail an item with refcount==0 and unlink it; give up after 50 * tries */ if (id > LARGEST_ID) return NULL; if (tails[id] == 0) return NULL; for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount == 0) { if (search->exptime == 0 || search->exptime > current_time) { STATS_LOCK(); stats.evictions++; STATS_UNLOCK(); } do_item_unlink(search); break; } } it = slabs_alloc(ntotal); if (it == 0) return NULL; } assert(it->slabs_clsid == 0); it->slabs_clsid = id; assert(it != heads[it->slabs_clsid]); it->next = it->prev = it->h_next = 0; it->refcount = 1; /* the caller will have a reference */ DEBUG_REFCNT(it, '*'); it->it_flags = 0; it->nkey = nkey; it->nbytes = nbytes; strcpy(ITEM_key(it), key); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; /* do a quick check if we have any expired items in the tail.. */ int tries = 50; item *search; for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount == 0 && (search->exptime != 0 && search->exptime < current_time)) { it = search; /* I don't want to actually free the object, just steal * the item to avoid to grab the slab mutex twice ;-) */ it->refcount = 1; do_item_unlink(it); /* Initialize the item block: */ it->slabs_clsid = 0; it->refcount = 0; break; } } if (it == NULL && (it = slabs_alloc(ntotal, id)) == NULL) { /* ** Could not find an expired item at the tail, and memory allocation ** failed. Try to evict some items! */ tries = 50; /* If requested to not push old items out of cache when memory runs out, * we're out of luck at this point... */ if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; return NULL; } /* * try to get one off the right LRU * don't necessariuly unlink the tail because it may be locked: refcount>0 * search up from tail an item with refcount==0 and unlink it; give up after 50 * tries */ if (tails[id] == 0) { itemstats[id].outofmemory++; return NULL; } for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount == 0) { if (search->exptime == 0 || search->exptime > current_time) { itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; STATS_LOCK(); stats.evictions++; STATS_UNLOCK(); } do_item_unlink(search); break; } } it = slabs_alloc(ntotal, id); if (it == 0) { itemstats[id].outofmemory++; /* Last ditch effort. There is a very rare bug which causes * refcount leaks. We've fixed most of them, but it still happens, * and it may happen in the future. * We can reasonably assume no item can stay locked for more than * three hours, so if we find one in the tail which is that old, * free it anyway. */ tries = 50; for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount != 0 && search->time + TAIL_REPAIR_TIME < current_time) { itemstats[id].tailrepairs++; search->refcount = 0; do_item_unlink(search); break; } } it = slabs_alloc(ntotal, id); if (it == 0) { return NULL; } } } assert(it->slabs_clsid == 0); it->slabs_clsid = id; assert(it != heads[it->slabs_clsid]); it->next = it->prev = it->h_next = 0; it->refcount = 1; /* the caller will have a reference */ DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
/* * Returns true if an item will fit in the cache (its size does not exceed * the maximum for a cache entry.) */ int item_size_ok(char *key, int flags, int nbytes) { char prefix[40]; int keylen, nsuffix; return item_make_header(key, flags, nbytes, prefix, &nsuffix, &keylen) <= 1024*1024; }
item *do_item_alloc(char *key, const size_t nkey, const unsigned int flags, const rel_time_t exptime, const int nbytes) { int i; uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; /* If no memory is available, attempt a direct LRU juggle/eviction */ /* This is a race in order to simplify lru_pull_tail; in cases where * locked items are on the tail, you want them to fall out and cause * occasional OOM's, rather than internally work around them. * This also gives one fewer code path for slab alloc/free */ /* TODO: if power_largest, try a lot more times? or a number of times * based on how many chunks the new object should take up? * or based on the size of an object lru_pull_tail() says it evicted? * This is a classical GC problem if "large items" are of too varying of * sizes. This is actually okay here since the larger the data, the more * bandwidth it takes, the more time we can loop in comparison to serving * and replacing small items. */ for (i = 0; i < 10; i++) { uint64_t total_bytes; /* Try to reclaim memory first */ if (!settings.lru_maintainer_thread) { lru_pull_tail(id, COLD_LRU, 0, 0); } it = slabs_alloc(ntotal, id, &total_bytes, 0); if (settings.expirezero_does_not_evict) total_bytes -= noexp_lru_size(id); if (it == NULL) { if (settings.lru_maintainer_thread) { lru_pull_tail(id, HOT_LRU, total_bytes, 0); lru_pull_tail(id, WARM_LRU, total_bytes, 0); if (lru_pull_tail(id, COLD_LRU, total_bytes, LRU_PULL_EVICT) <= 0) break; } else { if (lru_pull_tail(id, COLD_LRU, 0, LRU_PULL_EVICT) <= 0) break; } } else { break; } } if (i > 0) { pthread_mutex_lock(&lru_locks[id]); itemstats[id].direct_reclaims += i; pthread_mutex_unlock(&lru_locks[id]); } if (it == NULL) { pthread_mutex_lock(&lru_locks[id]); itemstats[id].outofmemory++; pthread_mutex_unlock(&lru_locks[id]); return NULL; } assert(it->slabs_clsid == 0); //assert(it != heads[id]); /* Refcount is seeded to 1 by slabs_alloc() */ it->next = it->prev = 0; /* Items are initially loaded into the HOT_LRU. This is '0' but I want at * least a note here. Compiler (hopefully?) optimizes this out. */ if (settings.lru_maintainer_thread) { if (exptime == 0 && settings.expirezero_does_not_evict) { id |= NOEXP_LRU; } else { id |= HOT_LRU; } } else { /* There is only COLD in compat-mode */ id |= COLD_LRU; } it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags |= settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; /* Need to shuffle the pointer stored in h_next into it->data. */ if (it->it_flags & ITEM_CHUNKED) { item_chunk *chunk = (item_chunk *) ITEM_data(it); chunk->next = (item_chunk *) it->h_next; chunk->prev = 0; chunk->head = it; /* Need to chain back into the head's chunk */ chunk->next->prev = chunk; chunk->size = chunk->next->size - ((char *)chunk - (char *)it); chunk->used = 0; assert(chunk->size > 0); } it->h_next = 0; return it; }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv) { uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; mutex_lock(&cache_lock); /* do a quick check if we have any expired items in the tail.. */ int tries = 5; /* Avoid hangs if a slab has nothing but refcounted stuff in it. */ int tries_lrutail_reflocked = 1000; int tried_alloc = 0; item *search; item *next_it; void *hold_lock = NULL; rel_time_t oldest_live = settings.oldest_live; search = tails[id]; /* We walk up *only* for locked items. Never searching for expired. * Waste of CPU for almost all deployments */ for (; tries > 0 && search != NULL; tries--, search=next_it) { /* we might relink search mid-loop, so search->prev isn't reliable */ next_it = search->prev; if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) { /* We are a crawler, ignore it. */ tries++; continue; } uint32_t hv = hash(ITEM_key(search), search->nkey); /* Attempt to hash item lock the "search" item. If locked, no * other callers can incr the refcount */ /* Don't accidentally grab ourselves, or bail if we can't quicklock */ if (hv == cur_hv || (hold_lock = item_trylock(hv)) == NULL) continue; /* Now see if the item is refcount locked */ if (refcount_incr(&search->refcount) != 2) { /* Avoid pathological case with ref'ed items in tail */ do_item_update_nolock(search); tries_lrutail_reflocked--; tries++; refcount_decr(&search->refcount); itemstats[id].lrutail_reflocked++; /* Old rare bug could cause a refcount leak. We haven't seen * it in years, but we leave this code in to prevent failures * just in case */ if (settings.tail_repair_time && search->time + settings.tail_repair_time < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hv); } if (hold_lock) item_trylock_unlock(hold_lock); if (tries_lrutail_reflocked < 1) break; continue; } /* Expired or flushed */ if ((search->exptime != 0 && search->exptime < current_time) || (search->time <= oldest_live && oldest_live <= current_time)) { itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].expired_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; } else if ((it = slabs_alloc(ntotal, id)) == NULL) { tried_alloc = 1; if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; } else { itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].evicted_unfetched++; } shadow_item* new_shadow_it = create_shadow_item(search); hv = hash(new_shadow_it->key, new_shadow_it->nkey); shadow_assoc_insert(new_shadow_it, hv); insert_shadowq_item(new_shadow_it,new_shadow_it->slabs_clsid); it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; /* If we've just evicted an item, and the automover is set to * angry bird mode, attempt to rip memory into this slab class. * TODO: Move valid object detection into a function, and on a * "successful" memory pull, look behind and see if the next alloc * would be an eviction. Then kick off the slab mover before the * eviction happens. */ if (settings.slab_automove == 2) slabs_reassign(-1, id); } } refcount_decr(&search->refcount); /* If hash values were equal, we don't grab a second lock */ if (hold_lock) item_trylock_unlock(hold_lock); break; } if (!tried_alloc && (tries == 0 || search == NULL)) it = slabs_alloc(ntotal, id); if (it == NULL) { itemstats[id].outofmemory++; mutex_unlock(&cache_lock); return NULL; } assert(it->slabs_clsid == 0); assert(it != heads[id]); /* Item initialization can happen outside of the lock; the item's already * been removed from the slab LRU. */ it->refcount = 1; /* the caller will have a reference */ mutex_unlock(&cache_lock); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
/* 从 slab 系统分配一个空闲 item */ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv) { uint8_t nsuffix; item *it = NULL; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; mutex_lock(&cache_lock); /* do a quick check if we have any expired items in the tail.. */ int tries = 5; int tried_alloc = 0; item *search; void *hold_lock = NULL; rel_time_t oldest_live = settings.oldest_live; search = tails[id]; /* We walk up *only* for locked items. Never searching for expired. * Waste of CPU for almost all deployments */ for (; tries > 0 && search != NULL; tries--, search=search->prev) { uint32_t hv = hash(ITEM_key(search), search->nkey, 0); /* Attempt to hash item lock the "search" item. If locked, no * other callers can incr the refcount */ /* FIXME: I think we need to mask the hv here for comparison? */ if (hv != cur_hv && (hold_lock = item_trylock(hv)) == NULL) continue; /* Now see if the item is refcount locked */ if (refcount_incr(&search->refcount) != 2) { refcount_decr(&search->refcount); /* Old rare bug could cause a refcount leak. We haven't seen * it in years, but we leave this code in to prevent failures * just in case */ if (search->time + TAIL_REPAIR_TIME < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hv); } if (hold_lock) item_trylock_unlock(hold_lock); continue; } /* 先检查 LRU 队列最后一个 item 是否超时, 超时的话就把这个 item 分配给用户 */ if ((search->exptime != 0 && search->exptime < current_time) || (search->time <= oldest_live && oldest_live <= current_time)) { itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].expired_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); /* 把这个 item 从 LRU 队列和哈希表中移除 */ do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; } /* 没有超时的item, 那就尝试从slabclass分配, 运气不好的话, 分配失败, 那就把 LRU 队列最后一个 item 剔除, 然后分配给用户 */ else if ((it = slabs_alloc(ntotal, id)) == NULL) { tried_alloc = 1; if (settings.evict_to_free == 0) { itemstats[id].outofmemory++;//显示出的统计信息 } else { itemstats[id].evicted++;//这个slab的分配失败次数加1,后面的分析统计信息的线程会用到这个统计信息 itemstats[id].evicted_time = current_time - search->time;//显示的统计信息 if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].evicted_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);//不用请求新的item了,减少相关的统计信息 /* 把老的item从hash表和lru队列中删除 */ do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; /* If we've just 回收 an item, and the automover is set to angry bird mode, attempt to rip memory into this slab class. TODO: Move valid object detection into a function, and on a "successful" memory pull, look behind and see if the next alloc would be an eviction. Then kick off the slab mover before the eviction happens.可以看到如果slab_automove=2(默认是1),这样会导致angry模式,就是只要分配失败了,马上就选择一个slab(旧的slagclass 释放的),把这个slab移动到当前slab-class中(不会有通过统计信息有选择的移动slab)*/ if (settings.slab_automove == 2) slabs_reassign(-1, id); } } refcount_decr(&search->refcount); /* If hash values were equal, we don't grab a second lock */ if (hold_lock) item_trylock_unlock(hold_lock); break; } if (!tried_alloc && (tries == 0 || search == NULL)) it = slabs_alloc(ntotal, id); if (it == NULL) { itemstats[id].outofmemory++; mutex_unlock(&cache_lock); return NULL; } assert(it->slabs_clsid == 0); assert(it != heads[id]); /* Item initialization can happen outside of the lock; the item's already been removed from the slab LRU. */ it->refcount = 1; /* the caller will have a reference */ mutex_unlock(&cache_lock); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv) { uint8_t nsuffix; item *it = NULL; char suffix[40]; //计算这个item的空间 size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } //根据大小判断从属于哪个slab unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; mutex_lock(&cache_lock); /* do a quick check if we have any expired items in the tail.. */ //在LRU中尝试5次还没合适的空间,则执行申请空间的操作 int tries = 5; int tried_alloc = 0; item *search; void *hold_lock = NULL; rel_time_t oldest_live = settings.oldest_live; search = tails[id]; /* We walk up *only* for locked items. Never searching for expired. * Waste of CPU for almost all deployments */ for (; tries > 0 && search != NULL; tries--, search=search->prev) { uint32_t hv = hash(ITEM_key(search), search->nkey, 0); /* Attempt to hash item lock the "search" item. If locked, no * other callers can incr the refcount */ /* FIXME: I think we need to mask the hv here for comparison? */ if (hv != cur_hv && (hold_lock = item_trylock(hv)) == NULL) continue; /* Now see if the item is refcount locked */ if (refcount_incr(&search->refcount) != 2) { refcount_decr(&search->refcount); /* Old rare bug could cause a refcount leak. We haven't seen * it in years, but we leave this code in to prevent failures * just in case */ if (search->time + TAIL_REPAIR_TIME < current_time) { itemstats[id].tailrepairs++; search->refcount = 1; do_item_unlink_nolock(search, hv); } if (hold_lock) item_trylock_unlock(hold_lock); continue; } /* Expired or flushed */ // search指向的item过期了,则直接复用这块内存 if ((search->exptime != 0 && search->exptime < current_time) || (search->time <= oldest_live && oldest_live <= current_time)) { itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].expired_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; } //此刻,过期失效的item没有找到,申请内存又失败了。看来只能使用 //LRU淘汰一个item(即使这个item并没有过期失效) else if ((it = slabs_alloc(ntotal, id)) == NULL) { tried_alloc = 1; if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; } else { itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; if (search->exptime != 0) itemstats[id].evicted_nonzero++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].evicted_unfetched++; } it = search; slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); do_item_unlink_nolock(it, hv); /* Initialize the item block: */ it->slabs_clsid = 0; /* If we've just evicted an item, and the automover is set to * angry bird mode, attempt to rip memory into this slab class. * TODO: Move valid object detection into a function, and on a * "successful" memory pull, look behind and see if the next alloc * would be an eviction. Then kick off the slab mover before the * eviction happens. */ if (settings.slab_automove == 2) slabs_reassign(-1, id); } } refcount_decr(&search->refcount); /* If hash values were equal, we don't grab a second lock */ if (hold_lock) item_trylock_unlock(hold_lock); break; } //从slab分配器中申请内存 if (!tried_alloc && (tries == 0 || search == NULL)) it = slabs_alloc(ntotal, id); if (it == NULL) { itemstats[id].outofmemory++; mutex_unlock(&cache_lock); return NULL; } assert(it->slabs_clsid == 0); assert(it != heads[id]); /* Item initialization can happen outside of the lock; the item's already * been removed from the slab LRU. */ it->refcount = 1; /* the caller will have a reference */ mutex_unlock(&cache_lock); it->next = it->prev = it->h_next = 0; it->slabs_clsid = id; DEBUG_REFCNT(it, '*'); it->it_flags = settings.use_cas ? ITEM_CAS : 0; it->nkey = nkey; it->nbytes = nbytes; memcpy(ITEM_key(it), key, nkey); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }
/*@null@*/ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { uint8_t nsuffix; item *it; char suffix[40]; size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); unsigned int id = slabs_clsid(ntotal); if (id == 0) return 0; it = slabs_alloc(ntotal, id); if (it == 0) { int tries = 50; item *search; /* If requested to not push old items out of cache when memory runs out, * we're out of luck at this point... */ if (settings.evict_to_free == 0) { itemstats[id].outofmemory++; return NULL; } /* * try to get one off the right LRU * don't necessariuly unlink the tail because it may be locked: refcount>0 * search up from tail an item with refcount==0 and unlink it; give up after 50 * tries */ if (tails[id] == 0) { itemstats[id].outofmemory++; return NULL; } for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount == 0) { if (search->exptime == 0 || search->exptime > current_time) { itemstats[id].evicted++; itemstats[id].evicted_time = current_time - search->time; STATS_LOCK(); stats.evictions++; STATS_UNLOCK(); #ifdef USE_REPLICATION replication_call_del(ITEM_key(search), search->nkey); #endif /* USE_REPLICATION */ } do_item_unlink(search); break; } } it = slabs_alloc(ntotal, id); if (it == 0) { itemstats[id].outofmemory++; /* Last ditch effort. There is a very rare bug which causes * refcount leaks. We've fixed most of them, but it still happens, * and it may happen in the future. * We can reasonably assume no item can stay locked for more than * three hours, so if we find one in the tail which is that old, * free it anyway. */ tries = 50; for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { if (search->refcount != 0 && search->time + 10800 < current_time) { itemstats[id].tailrepairs++; search->refcount = 0; do_item_unlink(search); break; } } it = slabs_alloc(ntotal, id); if (it == 0) { return NULL; } } } assert(it->slabs_clsid == 0); it->slabs_clsid = id; assert(it != heads[it->slabs_clsid]); it->next = it->prev = it->h_next = 0; it->refcount = 1; /* the caller will have a reference */ DEBUG_REFCNT(it, '*'); it->it_flags = 0; it->nkey = nkey; it->nbytes = nbytes; strcpy(ITEM_key(it), key); it->exptime = exptime; memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); it->nsuffix = nsuffix; return it; }