/* This is how rewriting of the append only file in background works: * * 1) The user calls BGREWRITEAOF * 2) Redis calls this function, that forks(): * 2a) the child rewrite the append only file in a temp file. * 2b) the parent accumulates differences in server.aof_rewrite_buf. * 3) When the child finished '2a' exists. * 4) The parent will trap the exit code, if it's OK, will append the * data accumulated into server.aof_rewrite_buf into the temp file, and * finally will rename(2) the temp file in the actual file name. * The the new file is reopened as the new append only file. Profit! */ int rewriteAppendOnlyFileBackground(void) { pid_t childpid; long long start; if (server.aof_child_pid != -1) return REDIS_ERR; start = ustime(); if ((childpid = fork()) == 0) { char tmpfile[256]; /* Child */ closeListeningSockets(0); redisSetProcTitle("redis-aof-rewrite"); snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) { size_t private_dirty = zmalloc_get_private_dirty(); if (private_dirty) { redisLog(REDIS_NOTICE, "AOF rewrite: %zu MB of memory used by copy-on-write", private_dirty/(1024*1024)); } exitFromChild(0); } else { exitFromChild(1); } } else { /* Parent */ server.stat_fork_time = ustime()-start; server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */ latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000); if (childpid == -1) { redisLog(REDIS_WARNING, "Can't rewrite append only file in background: fork: %s", strerror(errno)); return REDIS_ERR; } redisLog(REDIS_NOTICE, "Background append only file rewriting started by pid %d",childpid); server.aof_rewrite_scheduled = 0; server.aof_rewrite_time_start = time(NULL); server.aof_child_pid = childpid; updateDictResizePolicy(); /* We set appendseldb to -1 in order to force the next call to the * feedAppendOnlyFile() to issue a SELECT command, so the differences * accumulated by the parent into server.aof_rewrite_buf will start * with a SELECT statement and it will be safe to merge. */ server.aof_selected_db = -1; replicationScriptCacheFlush(); return REDIS_OK; } return REDIS_OK; /* unreached */ }
// This function is meant to be an exact replica of the fork() child path in rdbSaveToSlavesSockets int do_socketSave2(int *fds, int numfds, uint64_t *clientids) { #ifndef NO_QFORKIMPL int retval; rio slave_sockets; server.rdb_child_pid = GetCurrentProcessId(); rioInitWithFdset(&slave_sockets,fds,numfds); zfree(fds); // On Windows we haven't duplicated the listening sockets so we shouldn't close them #ifndef _WIN32 closeListeningSockets(0); #endif redisSetProcTitle("redis-rdb-to-slaves"); retval = rdbSaveRioWithEOFMark(&slave_sockets,NULL); if (retval == REDIS_OK && rioFlush(&slave_sockets) == 0) retval = REDIS_ERR; if (retval == REDIS_OK) { size_t private_dirty = zmalloc_get_private_dirty(); if (private_dirty) { redisLog(REDIS_NOTICE, "RDB: %zu MB of memory used by copy-on-write", private_dirty/(1024*1024)); } /* If we are returning OK, at least one slave was served * with the RDB file as expected, so we need to send a report * to the parent via the pipe. The format of the message is: * * <len> <slave[0].id> <slave[0].error> ... * * len, slave IDs, and slave errors, are all uint64_t integers, * so basically the reply is composed of 64 bits for the len field * plus 2 additional 64 bit integers for each entry, for a total * of 'len' entries. * * The 'id' represents the slave's client ID, so that the master * can match the report with a specific slave, and 'error' is * set to 0 if the replication process terminated with a success * or the error code if an error occurred. */ void *msg = zmalloc(sizeof(uint64_t)*(1+2*numfds)); uint64_t *len = msg; uint64_t *ids = len+1; int j, msglen; *len = numfds; for (j = 0; j < numfds; j++) { *ids++ = clientids[j]; *ids++ = slave_sockets.io.fdset.state[j]; } /* Write the message to the parent. If we have no good slaves or * we are unable to transfer the message to the parent, we exit * with an error so that the parent will abort the replication * process with all the childre that were waiting. */ msglen = sizeof(uint64_t)*(1+2*numfds); if (*len == 0 || write(server.rdb_pipe_write_result_to_parent,msg,msglen) != msglen) { retval = REDIS_ERR; } } return retval; #endif return REDIS_OK; }