void aofRewriteBufferAppend(unsigned char *s, unsigned long len){ /*add string s at the end of AOF buffer, assign a new block if needed*/ listNode *last; aofrwblock *block; unsigned int clen; int counts; last = listLast(server.aof_rewrite_buf_blocks); if(last){ block = last->value; } else{ block = NULL; } while(len > 0){ if(block){ clen = (block->free < len)? block->free: len; memcpy(block->buf, s, clen); block->free -= clen; block->used += clen; s += clen; len -= clen; } if(len){ block = zmalloc(sizeof(aofrwblock)); block->free = AOF_RW_BUF_BLOCK_SIZE; block->used = 0; listAddNodeTail(server.aof_rewrite_buf_blocks, block); /*NOTICE when node number is coming to n*10, WARNING when n*100*/ counts = listLength(server.aof_rewrite_buf_blocks); if(((counts+1) % 100) == 0){ xredisLog(XREDIS_WARNING, "Background AOF buffer size: %lu MB", aofRewriteBufferSize()/(1024*1024)); } else if(((counts+1)%10) == 0){ xredisLog(XREDIS_NOTICE, "Background AOF buffer size: %lu MB", aofRewriteBufferSize()/(1024*1024)); } } } }
/* We don't want to count AOF buffers and slaves output buffers as * used memory: the eviction should use mostly data size. This function * returns the sum of AOF and slaves buffer. */ size_t freeMemoryGetNotCountedMemory(void) { size_t overhead = 0; int slaves = listLength(server.slaves); if (slaves) { listIter li; listNode *ln; listRewind(server.slaves,&li); while((ln = listNext(&li))) { client *slave = listNodeValue(ln); overhead += getClientOutputBufferMemoryUsage(slave); } } if (server.aof_state != AOF_OFF) { overhead += sdslen(server.aof_buf)+aofRewriteBufferSize(); } return overhead; }
/* Append data to the AOF rewrite buffer, allocating new blocks if needed. */ void aofRewriteBufferAppend(unsigned char *s, unsigned long len) { listNode *ln = listLast(server.aof_rewrite_buf_blocks); aofrwblock *block = ln ? ln->value : NULL; while(len) { /* If we already got at least an allocated block, try appending * at least some piece into it. */ if (block) { unsigned long thislen = (block->free < len) ? block->free : len; if (thislen) { /* The current block is not already full. */ memcpy(block->buf+block->used, s, thislen); block->used += thislen; block->free -= thislen; s += thislen; len -= thislen; } } if (len) { /* First block to allocate, or need another block. */ int numblocks; block = zmalloc(sizeof(*block)); block->free = AOF_RW_BUF_BLOCK_SIZE; block->used = 0; listAddNodeTail(server.aof_rewrite_buf_blocks,block); /* Log every time we cross more 10 or 100 blocks, respectively * as a notice or warning. */ numblocks = listLength(server.aof_rewrite_buf_blocks); if (((numblocks+1) % 10) == 0) { int level = ((numblocks+1) % 100) == 0 ? REDIS_WARNING : REDIS_NOTICE; redisLog(level,"Background AOF buffer size: %lu MB", aofRewriteBufferSize()/(1024*1024)); } } } }
/* A background append only file rewriting (BGREWRITEAOF) terminated its work. * Handle this. */ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { if (!bysignal && exitcode == 0) { int newfd, oldfd; char tmpfile[256]; long long now = ustime(); redisLog(REDIS_NOTICE, "Background AOF rewrite terminated with success"); /* Flush the differences accumulated by the parent to the * rewritten AOF. */ snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int)server.aof_child_pid); newfd = open(tmpfile,O_WRONLY|O_APPEND); if (newfd == -1) { redisLog(REDIS_WARNING, "Unable to open the temporary AOF produced by the child: %s", strerror(errno)); goto cleanup; } if (aofRewriteBufferWrite(newfd) == -1) { redisLog(REDIS_WARNING, "Error trying to flush the parent diff to the rewritten AOF: %s", strerror(errno)); close(newfd); goto cleanup; } redisLog(REDIS_NOTICE, "Parent diff successfully flushed to the rewritten AOF (%lu bytes)", aofRewriteBufferSize()); /* The only remaining thing to do is to rename the temporary file to * the configured file and switch the file descriptor used to do AOF * writes. We don't want close(2) or rename(2) calls to block the * server on old file deletion. * * There are two possible scenarios: * * 1) AOF is DISABLED and this was a one time rewrite. The temporary * file will be renamed to the configured file. When this file already * exists, it will be unlinked, which may block the server. * * 2) AOF is ENABLED and the rewritten AOF will immediately start * receiving writes. After the temporary file is renamed to the * configured file, the original AOF file descriptor will be closed. * Since this will be the last reference to that file, closing it * causes the underlying file to be unlinked, which may block the * server. * * To mitigate the blocking effect of the unlink operation (either * caused by rename(2) in scenario 1, or by close(2) in scenario 2), we * use a background thread to take care of this. First, we * make scenario 1 identical to scenario 2 by opening the target file * when it exists. The unlink operation after the rename(2) will then * be executed upon calling close(2) for its descriptor. Everything to * guarantee atomicity for this switch has already happened by then, so * we don't care what the outcome or duration of that close operation * is, as long as the file descriptor is released again. */ if (server.aof_fd == -1) { /* AOF disabled */ /* Don't care if this fails: oldfd will be -1 and we handle that. * One notable case of -1 return is if the old file does * not exist. */ oldfd = open(server.aof_filename,O_RDONLY|O_NONBLOCK); } else { /* AOF enabled */ oldfd = -1; /* We'll set this to the current AOF filedes later. */ } /* Rename the temporary file. This will not unlink the target file if * it exists, because we reference it with "oldfd". */ if (rename(tmpfile,server.aof_filename) == -1) { redisLog(REDIS_WARNING, "Error trying to rename the temporary AOF file: %s", strerror(errno)); close(newfd); if (oldfd != -1) close(oldfd); goto cleanup; } if (server.aof_fd == -1) { /* AOF disabled, we don't need to set the AOF file descriptor * to this new file, so we can close it. */ close(newfd); } else { /* AOF enabled, replace the old fd with the new one. */ oldfd = server.aof_fd; server.aof_fd = newfd; if (server.aof_fsync == AOF_FSYNC_ALWAYS) aof_fsync(newfd); else if (server.aof_fsync == AOF_FSYNC_EVERYSEC) aof_background_fsync(newfd); server.aof_selected_db = -1; /* Make sure SELECT is re-issued */ aofUpdateCurrentSize(); server.aof_rewrite_base_size = server.aof_current_size; /* Clear regular AOF buffer since its contents was just written to * the new AOF from the background rewrite buffer. */ sdsfree(server.aof_buf); server.aof_buf = sdsempty(); } server.aof_lastbgrewrite_status = REDIS_OK; redisLog(REDIS_NOTICE, "Background AOF rewrite finished successfully"); /* Change state from WAIT_REWRITE to ON if needed */ if (server.aof_state == REDIS_AOF_WAIT_REWRITE) server.aof_state = REDIS_AOF_ON; /* Asynchronously close the overwritten AOF. */ if (oldfd != -1) bioCreateBackgroundJob(REDIS_BIO_CLOSE_FILE,(void*)(long)oldfd,NULL,NULL); redisLog(REDIS_VERBOSE, "Background AOF rewrite signal handler took %lldus", ustime()-now); } else if (!bysignal && exitcode != 0) { server.aof_lastbgrewrite_status = REDIS_ERR; redisLog(REDIS_WARNING, "Background AOF rewrite terminated with error"); } else { server.aof_lastbgrewrite_status = REDIS_ERR; redisLog(REDIS_WARNING, "Background AOF rewrite terminated by signal %d", bysignal); } cleanup: aofRewriteBufferReset(); aofRemoveTempFile(server.aof_child_pid); server.aof_child_pid = -1; server.aof_rewrite_time_last = time(NULL)-server.aof_rewrite_time_start; server.aof_rewrite_time_start = -1; /* Schedule a new rewrite if we are waiting for it to switch the AOF ON. */ if (server.aof_state == REDIS_AOF_WAIT_REWRITE) server.aof_rewrite_scheduled = 1; }
int freeMemoryIfNeeded(void) { size_t mem_reported, mem_used, mem_tofree, mem_freed; int slaves = listLength(server.slaves); mstime_t latency, eviction_latency; long long delta; /* Check if we are over the memory usage limit. If we are not, no need * to subtract the slaves output buffers. We can just return ASAP. */ mem_reported = zmalloc_used_memory(); if (mem_reported <= server.maxmemory) return C_OK; /* Remove the size of slaves output buffers and AOF buffer from the * count of used memory. */ mem_used = mem_reported; if (slaves) { listIter li; listNode *ln; listRewind(server.slaves,&li); while((ln = listNext(&li))) { client *slave = listNodeValue(ln); unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave); if (obuf_bytes > mem_used) mem_used = 0; else mem_used -= obuf_bytes; } } if (server.aof_state != AOF_OFF) { mem_used -= sdslen(server.aof_buf); mem_used -= aofRewriteBufferSize(); } /* Check if we are still over the memory limit. */ if (mem_used <= server.maxmemory) return C_OK; /* Compute how much memory we need to free. */ mem_tofree = mem_used - server.maxmemory; mem_freed = 0; if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION) goto cant_free; /* We need to free memory, but policy forbids. */ latencyStartMonitor(latency); while (mem_freed < mem_tofree) { int j, k, i, keys_freed = 0; static int next_db = 0; sds bestkey = NULL; int bestdbid; redisDb *db; dict *dict; dictEntry *de; if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU) { struct evictionPoolEntry *pool = EvictionPoolLRU; while(bestkey == NULL) { unsigned long total_keys = 0, keys; /* We don't want to make local-db choices when expiring keys, * so to start populate the eviction pool sampling keys from * every DB. */ for (i = 0; i < server.dbnum; i++) { db = server.db+i; dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU) ? db->dict : db->expires; if ((keys = dictSize(dict)) != 0) { evictionPoolPopulate(i, dict, db->dict, pool); total_keys += keys; } } if (!total_keys) break; /* No keys to evict. */ /* Go backward from best to worst element to evict. */ for (k = EVPOOL_SIZE-1; k >= 0; k--) { if (pool[k].key == NULL) continue; bestdbid = pool[k].dbid; if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU) { de = dictFind(server.db[pool[k].dbid].dict, pool[k].key); } else { de = dictFind(server.db[pool[k].dbid].expires, pool[k].key); } /* Remove the entry from the pool. */ if (pool[k].key != pool[k].cached) sdsfree(pool[k].key); pool[k].key = NULL; pool[k].idle = 0; /* If the key exists, is our pick. Otherwise it is * a ghost and we need to try the next element. */ if (de) { bestkey = dictGetKey(de); break; } else { /* Ghost... Iterate again. */ } } } } /* volatile-random and allkeys-random policy */ else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM || server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM) { /* When evicting a random key, we try to evict a key for * each DB, so we use the static 'next_db' variable to * incrementally visit all DBs. */ for (i = 0; i < server.dbnum; i++) { j = (++next_db) % server.dbnum; db = server.db+j; dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ? db->dict : db->expires; if (dictSize(dict) != 0) { de = dictGetRandomKey(dict); bestkey = dictGetKey(de); bestdbid = j; break; } } } /* volatile-ttl */ else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { long bestttl = 0; /* Initialized to avoid warning. */ /* In this policy we scan a single DB per iteration (visiting * a different DB per call), expiring the key with the smallest * TTL among the few sampled. * * Note that this algorithm makes local-DB choices, and should * use a pool and code more similr to the one used in the * LRU eviction policies in the future. */ for (i = 0; i < server.dbnum; i++) { j = (++next_db) % server.dbnum; db = server.db+j; dict = db->expires; if (dictSize(dict) != 0) { for (k = 0; k < server.maxmemory_samples; k++) { sds thiskey; long thisttl; de = dictGetRandomKey(dict); thiskey = dictGetKey(de); thisttl = (long) dictGetVal(de); /* Keys expiring sooner (smaller unix timestamp) are * better candidates for deletion */ if (bestkey == NULL || thisttl < bestttl) { bestkey = thiskey; bestttl = thisttl; bestdbid = j; } } } } } /* Finally remove the selected key. */ if (bestkey) { db = server.db+bestdbid; robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); propagateExpire(db,keyobj,server.lazyfree_lazy_eviction); /* We compute the amount of memory freed by db*Delete() alone. * It is possible that actually the memory needed to propagate * the DEL in AOF and replication link is greater than the one * we are freeing removing the key, but we can't account for * that otherwise we would never exit the loop. * * AOF and Output buffer memory will be freed eventually so * we only care about memory used by the key space. */ delta = (long long) zmalloc_used_memory(); latencyStartMonitor(eviction_latency); if (server.lazyfree_lazy_eviction) dbAsyncDelete(db,keyobj); else dbSyncDelete(db,keyobj); latencyEndMonitor(eviction_latency); latencyAddSampleIfNeeded("eviction-del",eviction_latency); latencyRemoveNestedEvent(latency,eviction_latency); delta -= (long long) zmalloc_used_memory(); mem_freed += delta; server.stat_evictedkeys++; notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted", keyobj, db->id); decrRefCount(keyobj); keys_freed++; /* When the memory to free starts to be big enough, we may * start spending so much time here that is impossible to * deliver data to the slaves fast enough, so we force the * transmission here inside the loop. */ if (slaves) flushSlavesOutputBuffers(); } if (!keys_freed) { latencyEndMonitor(latency); latencyAddSampleIfNeeded("eviction-cycle",latency); goto cant_free; /* nothing to free... */ } } latencyEndMonitor(latency); latencyAddSampleIfNeeded("eviction-cycle",latency); return C_OK; cant_free: /* We are here if we are not able to reclaim memory. There is only one * last thing we can try: check if the lazyfree thread has jobs in queue * and wait... */ while(bioPendingJobsOfType(BIO_LAZY_FREE)) { if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree) break; usleep(1000); } return C_ERR; }