void *IOThreadEntryPoint(void *arg) { iojob *j; listNode *ln; REDIS_NOTUSED(arg); pthread_detach(pthread_self()); while(1) { /* Get a new job to process */ lockThreadedIO(); if (listLength(server.io_newjobs) == 0) { /* No new jobs in queue, exit. */ redisLog(REDIS_DEBUG,"Thread %ld exiting, nothing to do", (long) pthread_self()); server.io_active_threads--; unlockThreadedIO(); return NULL; } ln = listFirst(server.io_newjobs); j = ln->value; listDelNode(server.io_newjobs,ln); /* Add the job in the processing queue */ j->thread = pthread_self(); listAddNodeTail(server.io_processing,j); ln = listLast(server.io_processing); /* We use ln later to remove it */ unlockThreadedIO(); redisLog(REDIS_DEBUG,"Thread %ld got a new job (type %d): %p about key '%s'", (long) pthread_self(), j->type, (void*)j, (char*)j->key->ptr); /* Process the Job */ if (j->type == REDIS_IOJOB_LOAD) { vmpointer *vp = (vmpointer*)j->id; j->val = vmReadObjectFromSwap(j->page,vp->vtype); } else if (j->type == REDIS_IOJOB_PREPARE_SWAP) { j->pages = rdbSavedObjectPages(j->val); } else if (j->type == REDIS_IOJOB_DO_SWAP) { if (vmWriteObjectOnSwap(j->val,j->page) == REDIS_ERR) j->canceled = 1; } /* Done: insert the job into the processed queue */ redisLog(REDIS_DEBUG,"Thread %ld completed the job: %p (key %s)", (long) pthread_self(), (void*)j, (char*)j->key->ptr); lockThreadedIO(); listDelNode(server.io_processing,ln); listAddNodeTail(server.io_processed,j); unlockThreadedIO(); /* Signal the main thread there is new stuff to process */ redisAssert(write(server.io_ready_pipe_write,"x",1) == 1); } return NULL; /* never reached */ }
/* Preload keys needed to execute the entire MULTI/EXEC block. * * This function is called by blockClientOnSwappedKeys when EXEC is issued, * and will block the client when any command requires a swapped out value. */ void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) { int i, margc; struct redisCommand *mcmd; robj **margv; REDIS_NOTUSED(cmd); REDIS_NOTUSED(argc); REDIS_NOTUSED(argv); if (!(c->flags & REDIS_MULTI)) return; for (i = 0; i < c->mstate.count; i++) { mcmd = c->mstate.commands[i].cmd; margc = c->mstate.commands[i].argc; margv = c->mstate.commands[i].argv; if (mcmd->vm_preload_proc != NULL) { mcmd->vm_preload_proc(c,mcmd,margc,margv); } else { waitForMultipleSwappedKeys(c,mcmd,margc,margv); } } }
int showThroughput(struct aeEventLoop *eventLoop, long long id, void *clientData) { REDIS_NOTUSED(eventLoop); REDIS_NOTUSED(id); REDIS_NOTUSED(clientData); if (config.liveclients == 0) { fprintf(stderr,"All clients disconnected... aborting.\n"); exit(1); } if (config.csv) return 250; if (config.idlemode == 1) { printf("clients: %d\r", config.liveclients); fflush(stdout); return 250; } float dt = (float)(mstime()-config.start)/1000.0; float rps = (float)config.requests_finished/dt; printf("%s: %.2f\r", config.title, rps); fflush(stdout); return 250; /* every 250ms */ }
/* Behaves as posix, works without ifdefs, makes compiler happy */ int sigaction(int sig, struct sigaction *in, struct sigaction *out) { REDIS_NOTUSED(out); /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction * is used. Otherwise, sa_handler is used */ if (in->sa_flags & SA_SIGINFO) { signal(sig, in->sa_sigaction); } else { signal(sig, in->sa_handler); } return 0; }
static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask) { client c = privdata; REDIS_NOTUSED(el); REDIS_NOTUSED(fd); REDIS_NOTUSED(mask); /* Initialize request when nothing was written. */ if (c->written == 0) { /* Enforce upper bound to number of requests. */ if (config.requests_issued++ >= config.requests) { freeClient(c); return; } /* Really initialize: randomize keys and set start time. */ if (config.randomkeys) randomizeClientKey(c); c->start = ustime(); c->latency = -1; } if (sdslen(c->obuf) > c->written) { void *ptr = c->obuf+c->written; #ifdef _WIN32 int nwritten = send(c->context->fd,ptr,sdslen(c->obuf)-c->written, 0); #else int nwritten = write(c->context->fd,ptr,sdslen(c->obuf)-c->written); #endif if (nwritten == -1) { if (errno != EPIPE) fprintf(stderr, "Writing to socket: %s\n", strerror(errno)); freeClient(c); return; } c->written += nwritten; if (sdslen(c->obuf) == c->written) { aeDeleteFileEvent(config.el,c->context->fd,AE_WRITABLE); aeCreateFileEvent(config.el,c->context->fd,AE_READABLE,readHandler,c); } } }
static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { client c = privdata; void *reply = NULL; REDIS_NOTUSED(el); REDIS_NOTUSED(fd); REDIS_NOTUSED(mask); /* Calculate latency only for the first read event. This means that the * server already sent the reply and we need to parse it. Parsing overhead * is not part of the latency, so calculate it only once, here. */ if (c->latency < 0) c->latency = ustime()-(c->start); if (redisBufferRead(c->context) != REDIS_OK) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); } else { while(c->pending) { if (redisGetReply(c->context,&reply) != REDIS_OK) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); } if (reply != NULL) { if (reply == (void*)REDIS_REPLY_ERROR) { fprintf(stderr,"Unexpected error reply, exiting...\n"); exit(1); } freeReplyObject(reply); if (config.requests_finished < config.requests) config.latency[config.requests_finished++] = c->latency; c->pending--; if (c->pending == 0) clientDone(c); } else { break; } } } }
void readFromSlave(aeEventLoop *el, int fd, void *privdata, int mask){ REDIS_NOTUSED(el); REDIS_NOTUSED(mask); char rdata[MAX_READ]; int nread = readData(el, fd, rdata, MAX_READ); if(nread <= 0 ) { return; } if(nread == 6){ rdata[6]= 0; char *sync = "SYNC\r\n"; if(strcmp(sync, rdata) == 0){ log4c_category_log(mycat, LOG4C_PRIORITY_INFO, "sync start"); state = SYNC_START_STATE; } } int wr = writeData(el, master_fd, rdata, nread); if(-1 == wr) { log4c_category_log(mycat, LOG4C_PRIORITY_ERROR, "write to master failed error %d, at line %d in file %s", errno, __LINE__, __FILE__); } }
void luaMaskCountHook(lua_State *lua, lua_Debug *ar) { long long elapsed; REDIS_NOTUSED(ar); REDIS_NOTUSED(lua); elapsed = mstime() - server.lua_time_start; if (elapsed >= server.lua_time_limit && server.lua_timedout == 0) { redisLog(REDIS_WARNING,"Lua slow script detected: still in execution after %lld milliseconds. You can try killing the script using the SCRIPT KILL command.",elapsed); server.lua_timedout = 1; /* Once the script timeouts we reenter the event loop to permit others * to call SCRIPT KILL or SHUTDOWN NOSAVE if needed. For this reason * we need to mask the client executing the script from the event loop. * If we don't do that the client may disconnect and could no longer be * here when the EVAL command will return. */ aeDeleteFileEvent(server.el, server.lua_caller->fd, AE_READABLE); } if (server.lua_timedout) processEventsWhileBlocked(); if (server.lua_kill) { redisLog(REDIS_WARNING,"Lua script killed by user with SCRIPT KILL."); lua_pushstring(lua,"Script killed by user with SCRIPT KILL..."); lua_error(lua); } }
static void ctrlc(int sig) { REDIS_NOTUSED(sig); if (config.idlemode) { exit(1); } else { config.ctrlc++; if (config.ctrlc == 1) { config.done = 1; printf("\nWaiting for pending requests to complete...\n"); } else { printf("\nForcing exit...\n"); exit(1); } } }
int pthread_join(pthread_t thread, void **value_ptr) { int result; HANDLE h = OpenThread(SYNCHRONIZE, FALSE, thread); REDIS_NOTUSED(value_ptr); switch (WaitForSingleObject(h, INFINITE)) { case WAIT_OBJECT_0: result = 0; case WAIT_ABANDONED: result = EINVAL; default: result = GetLastError(); } CloseHandle(h); return result; }
int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, int *numkeys) { int j, i = 0, last, *keys; REDIS_NOTUSED(argv); if (cmd->firstkey == 0) { *numkeys = 0; return NULL; } last = cmd->lastkey; if (last < 0) last = argc+last; keys = zmalloc(sizeof(int)*((last - cmd->firstkey)+1)); for (j = cmd->firstkey; j <= last; j += cmd->keystep) { redisAssert(j < argc); keys[i++] = j; } *numkeys = i; return keys; }
/* Helper function to extract keys from the following commands: * EVAL <script> <num-keys> <key> <key> ... <key> [more stuff] * EVALSHA <script> <num-keys> <key> <key> ... <key> [more stuff] */ int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) { int i, num, *keys; REDIS_NOTUSED(cmd); num = atoi(argv[2]->ptr); /* Sanity check. Don't return any key if the command is going to * reply with syntax error. */ if (num > (argc-3)) { *numkeys = 0; return NULL; } keys = zmalloc(sizeof(int)*num); *numkeys = num; /* Add all key positions for argv[3...n] to keys[] */ for (i = 0; i < num; i++) keys[i] = 3+i; return keys; }
/* Helper function to extract keys from the SORT command. * * SORT <sort-key> ... STORE <store-key> ... * * The first argument of SORT is always a key, however a list of options * follow in SQL-alike style. Here we parse just the minimum in order to * correctly identify keys in the "STORE" option. */ int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) { int i, j, num, *keys, found_store = 0; REDIS_NOTUSED(cmd); num = 0; keys = zmalloc(sizeof(int)*2); /* Alloc 2 places for the worst case. */ keys[num++] = 1; /* <sort-key> is always present. */ /* Search for STORE option. By default we consider options to don't * have arguments, so if we find an unknown option name we scan the * next. However there are options with 1 or 2 arguments, so we * provide a list here in order to skip the right number of args. */ struct { char *name; int skip; } skiplist[] = { {"limit", 2}, {"get", 1}, {"by", 1}, {NULL, 0} /* End of elements. */ }; for (i = 2; i < argc; i++) { for (j = 0; skiplist[j].name != NULL; j++) { if (!strcasecmp(argv[i]->ptr,skiplist[j].name)) { i += skiplist[j].skip; break; } else if (!strcasecmp(argv[i]->ptr,"store") && i+1 < argc) { /* Note: we don't increment "num" here and continue the loop * to be sure to process the *last* "STORE" option if multiple * ones are provided. This is same behavior as SORT. */ found_store = 1; keys[num] = i+1; /* <store-key> */ break; } } } *numkeys = num + found_store; return keys; }
int pthread_create(pthread_t *thread, const void *unused, void *(*start_routine)(void*), void *arg) { HANDLE h; thread_params *params = (thread_params *)malloc(sizeof(thread_params)); REDIS_NOTUSED(unused); params->func = start_routine; params->arg = arg; h =(HANDLE) _beginthreadex(NULL, /* Security not used */ REDIS_THREAD_STACK_SIZE, /* Set custom stack size */ win32_proxy_threadproc, /* calls win32 stdcall proxy */ params, /* real threadproc is passed as paremeter */ STACK_SIZE_PARAM_IS_A_RESERVATION, /* reserve stack */ thread /* returned thread id */ ); if (!h) return errno; CloseHandle(h); return 0; }
int pthread_cond_init(pthread_cond_t *cond, const void *unused) { REDIS_NOTUSED(unused); cond->waiters = 0; cond->was_broadcast = 0; InitializeCriticalSection(&cond->waiters_lock); cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL); if (!cond->sema) { errno = GetLastError(); return -1; } cond->continue_broadcast = CreateEvent(NULL, /* security */ FALSE, /* auto-reset */ FALSE, /* not signaled */ NULL); /* name */ if (!cond->continue_broadcast) { errno = GetLastError(); return -1; } return 0; }
void readFromMaster(aeEventLoop *el, int fd, void *privdata, int mask) { REDIS_NOTUSED(el); REDIS_NOTUSED(mask); int nread; char rdata[MAX_READ]; char *p = rdata; switch(state){ case SYNC_START_STATE:{ while(1){ nread = readData(el, fd, p,1); if(nread < 0){ return; } else if(nread == 0){ continue; } int wr = writeData(el, client_fd, p, 1); if(-1 == wr) { return; } if(*p == '\n' && p != rdata) break; if(*p != '\n') p++; } *p = '\0'; if(rdata[0] == '-'){ log4c_category_log(mycat, LOG4C_PRIORITY_ERROR, "sync with master failed at line %d in file %s", __LINE__, __FILE__); exit(1); } bulk_size = strtoull(rdata + 1, 0, 10); log4c_category_log(mycat, LOG4C_PRIORITY_INFO, "bulk size %llu, at line %d in file %s", bulk_size, __LINE__, __FILE__); state = SYNC_STATE; break; } case SYNC_STATE:{ long long int all_read =0; long long int all_write =0; while(bulk_size){ nread = readData(el, fd, rdata, (bulk_size > MAX_READ) ? MAX_READ:bulk_size); if(nread < 0 ) { return; } else if (nread == 0){ continue; } else{ all_read += nread; } int wr = writeData(el, client_fd, rdata,nread); if(-1 == wr) { log4c_category_log(mycat, LOG4C_PRIORITY_ERROR, "write to payload failed error %d, at line %d in file %s", errno, __LINE__, __FILE__); return; } else{ all_write += wr; } bulk_size -= nread; } state = RUNNING_STATE; log4c_category_log(mycat, LOG4C_PRIORITY_INFO, "sync done"); break; } default:{ nread = readData(el, fd, rdata, MAX_READ); if(nread > 0){ struct WData wdata; wdata.len = nread; wdata.data= rdata; if( state == RUNNING_STATE){ processInputBuffer(el, &wdata); int wr = writeData(el, client_fd, wdata.data, wdata.len); if(-1 == wr) { return; } } else{ if(nread > 9 && nread < 100) { if(!strncmp(rdata,"+CONTINUE",9) || !strncmp(rdata,"+FULLRESYNC",11)){ log4c_category_log(mycat, LOG4C_PRIORITY_INFO, "sync start"); state = SYNC_START_STATE; } } int wr = writeData(el, client_fd, wdata.data, wdata.len); if(-1 == wr) { return; } } } break; } } }
/* Noop in windows */ int pthread_detach (pthread_t thread) { REDIS_NOTUSED(thread); return 0; /* noop */ }
void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask) { redisClient *c = privdata; int nwritten = 0, totwritten = 0, objlen, willwrite; robj *o; struct iovec iov[REDIS_WRITEV_IOVEC_COUNT]; int offset, ion = 0; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); listNode *node; while (listLength(c->reply)) { offset = c->sentlen; ion = 0; willwrite = 0; /* fill-in the iov[] array */ for(node = listFirst(c->reply); node; node = listNextNode(node)) { o = listNodeValue(node); objlen = sdslen(o->ptr); if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT) break; if(ion == REDIS_WRITEV_IOVEC_COUNT) break; /* no more iovecs */ iov[ion].iov_base = ((char*)o->ptr) + offset; iov[ion].iov_len = objlen - offset; willwrite += objlen - offset; offset = 0; /* just for the first item */ ion++; } if(willwrite == 0) break; /* write all collected blocks at once */ if((nwritten = writev(fd, iov, ion)) < 0) { if (errno != EAGAIN) { redisLog(REDIS_VERBOSE, "Error writing to client: %s", strerror(errno)); freeClient(c); return; } break; } totwritten += nwritten; offset = c->sentlen; /* remove written robjs from c->reply */ while (nwritten && listLength(c->reply)) { o = listNodeValue(listFirst(c->reply)); objlen = sdslen(o->ptr); if(nwritten >= objlen - offset) { listDelNode(c->reply, listFirst(c->reply)); nwritten -= objlen - offset; c->sentlen = 0; } else { /* partial write */ c->sentlen += nwritten; break; } offset = 0; } } if (totwritten > 0) c->lastinteraction = time(NULL); if (listLength(c->reply) == 0) { c->sentlen = 0; aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); } }
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { redisClient *c = privdata; int nwritten = 0, totwritten = 0, objlen; robj *o; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); /* Use writev() if we have enough buffers to send */ if (!server.glueoutputbuf && listLength(c->reply) > REDIS_WRITEV_THRESHOLD && !(c->flags & REDIS_MASTER)) { sendReplyToClientWritev(el, fd, privdata, mask); return; } while(listLength(c->reply)) { if (server.glueoutputbuf && listLength(c->reply) > 1) glueReplyBuffersIfNeeded(c); o = listNodeValue(listFirst(c->reply)); objlen = sdslen(o->ptr); if (objlen == 0) { listDelNode(c->reply,listFirst(c->reply)); continue; } if (c->flags & REDIS_MASTER) { /* Don't reply to a master */ nwritten = objlen - c->sentlen; } else { nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen); if (nwritten <= 0) break; } c->sentlen += nwritten; totwritten += nwritten; /* If we fully sent the object on head go to the next one */ if (c->sentlen == objlen) { listDelNode(c->reply,listFirst(c->reply)); c->sentlen = 0; } /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT * bytes, in a single threaded server it's a good idea to serve * other clients as well, even if a very large request comes from * super fast link that is always able to accept data (in real world * scenario think about 'KEYS *' against the loopback interfae) */ if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break; } if (nwritten == -1) { if (errno == EAGAIN) { nwritten = 0; } else { redisLog(REDIS_VERBOSE, "Error writing to client: %s", strerror(errno)); freeClient(c); return; } } if (totwritten > 0) c->lastinteraction = time(NULL); if (listLength(c->reply) == 0) { c->sentlen = 0; aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); } }
/* Asynchronously read the SYNC payload we receive from a master */ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { char buf[4096]; ssize_t nread, readlen; REDIS_NOTUSED(el); REDIS_NOTUSED(privdata); REDIS_NOTUSED(mask); /* If repl_transfer_left == -1 we still have to read the bulk length * from the master reply. */ if (server.repl_transfer_left == -1) { if (syncReadLine(fd,buf,1024,3600) == -1) { redisLog(REDIS_WARNING, "I/O error reading bulk count from MASTER: %s", strerror(errno)); replicationAbortSyncTransfer(); return; } if (buf[0] == '-') { redisLog(REDIS_WARNING, "MASTER aborted replication with an error: %s", buf+1); replicationAbortSyncTransfer(); return; } else if (buf[0] == '\0') { /* At this stage just a newline works as a PING in order to take * the connection live. So we refresh our last interaction * timestamp. */ server.repl_transfer_lastio = time(NULL); return; } else if (buf[0] != '$') { redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); replicationAbortSyncTransfer(); return; } server.repl_transfer_left = strtol(buf+1,NULL,10); redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: receiving %ld bytes from master", server.repl_transfer_left); return; } /* Read bulk data */ readlen = (server.repl_transfer_left < (signed)sizeof(buf)) ? server.repl_transfer_left : (signed)sizeof(buf); nread = read(fd,buf,readlen); if (nread <= 0) { redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s", (nread == -1) ? strerror(errno) : "connection lost"); replicationAbortSyncTransfer(); return; } server.repl_transfer_lastio = time(NULL); if (write(server.repl_transfer_fd,buf,nread) != nread) { redisLog(REDIS_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno)); replicationAbortSyncTransfer(); return; } server.repl_transfer_left -= nread; /* Check if the transfer is now complete */ if (server.repl_transfer_left == 0) { if (rename(server.repl_transfer_tmpfile,server.dbfilename) == -1) { redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno)); replicationAbortSyncTransfer(); return; } redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Loading DB in memory"); emptyDb(); /* Before loading the DB into memory we need to delete the readable * handler, otherwise it will get called recursively since * rdbLoad() will call the event loop to process events from time to * time for non blocking loading. */ aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE); if (rdbLoad(server.dbfilename) != REDIS_OK) { redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk"); replicationAbortSyncTransfer(); return; } /* Final setup of the connected slave <- master link */ zfree(server.repl_transfer_tmpfile); close(server.repl_transfer_fd); server.master = createClient(server.repl_transfer_s); server.master->flags |= REDIS_MASTER; server.master->authenticated = 1; server.replstate = REDIS_REPL_CONNECTED; redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Finished with success"); } }
// 这其实是一个回调函数,会作为参数传入另外一个函数中 static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { client c = privdata; void *reply = NULL; REDIS_NOTUSED(el); REDIS_NOTUSED(fd); REDIS_NOTUSED(mask); /* Calculate latency only for the first read event. This means that the * server already sent the reply and we need to parse it. Parsing overhead * is not part of the latency, so calculate it only once, here. */ // 计算延时,然后比较延时,取得第一个read的event事件 if (c->latency < 0) c->latency = ustime()-(c->start); if (redisBufferRead(c->context) != REDIS_OK) { // 首先判断是否能读 fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); } else { while(c->pending) { if (redisGetReply(c->context,&reply) != REDIS_OK) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); } if (reply != NULL) { if (reply == (void*)REDIS_REPLY_ERROR) { // 如果获取reply回复出错,也会直接退出 fprintf(stderr,"Unexpected error reply, exiting...\n"); exit(1); } freeReplyObject(reply); /* This is an OK for prefix commands such as auth and select.*/ if (c->prefix_pending > 0) { c->prefix_pending--; // 执行到这里,请求已经执行成功,等待的请求数-1 c->pending--; /* Discard prefix commands on first response.*/ if (c->prefixlen > 0) { size_t j; sdsrange(c->obuf, c->prefixlen, -1); /* We also need to fix the pointers to the strings * we need to randomize. */ for (j = 0; j < c->randlen; j++) c->randptr[j] -= c->prefixlen; c->prefixlen = 0; } continue; } if (config.requests_finished < config.requests) config.latency[config.requests_finished++] = c->latency; c->pending--; // 全部处理完毕,调用客户端done完成后的方法 if (c->pending == 0) { clientDone(c); break; } } else { break; } } } }
void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { char tmpfile[256], *err; int dfd, maxtries = 5; int sockerr = 0; socklen_t errlen = sizeof(sockerr); REDIS_NOTUSED(el); REDIS_NOTUSED(privdata); REDIS_NOTUSED(mask); /* If this event fired after the user turned the instance into a master * with SLAVEOF NO ONE we must just return ASAP. */ if (server.repl_state == REDIS_REPL_NONE) { close(fd); return; } /* Check for errors in the socket. */ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &sockerr, &errlen) == -1) sockerr = errno; if (sockerr) { aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE); redisLog(REDIS_WARNING,"Error condition on socket for SYNC: %s", strerror(sockerr)); goto error; } /* If we were connecting, it's time to send a non blocking PING, we want to * make sure the master is able to reply before going into the actual * replication process where we have long timeouts in the order of * seconds (in the meantime the slave would block). */ if (server.repl_state == REDIS_REPL_CONNECTING) { redisLog(REDIS_NOTICE,"Non blocking connect for SYNC fired the event."); /* Delete the writable event so that the readable event remains * registered and we can wait for the PONG reply. */ aeDeleteFileEvent(server.el,fd,AE_WRITABLE); server.repl_state = REDIS_REPL_RECEIVE_PONG; /* Send the PING, don't check for errors at all, we have the timeout * that will take care about this. */ syncWrite(fd,"PING\r\n",6,100); return; } /* Receive the PONG command. */ if (server.repl_state == REDIS_REPL_RECEIVE_PONG) { char buf[1024]; /* Delete the readable event, we no longer need it now that there is * the PING reply to read. */ aeDeleteFileEvent(server.el,fd,AE_READABLE); /* Read the reply with explicit timeout. */ buf[0] = '\0'; if (syncReadLine(fd,buf,sizeof(buf), server.repl_syncio_timeout*1000) == -1) { redisLog(REDIS_WARNING, "I/O error reading PING reply from master: %s", strerror(errno)); goto error; } /* We don't care about the reply, it can be +PONG or an error since * the server requires AUTH. As long as it replies correctly, it's * fine from our point of view. */ if (buf[0] != '-' && buf[0] != '+') { redisLog(REDIS_WARNING,"Unexpected reply to PING from master."); goto error; } else { redisLog(REDIS_NOTICE, "Master replied to PING, replication can continue..."); } } /* AUTH with the master if required. */ if(server.masterauth) { err = sendSynchronousCommand(fd,"AUTH",server.masterauth,NULL); if (err) { redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",err); sdsfree(err); goto error; } } /* Set the slave port, so that Master's INFO command can list the * slave listening port correctly. */ { sds port = sdsfromlonglong(server.port); err = sendSynchronousCommand(fd,"REPLCONF","listening-port",port, NULL); sdsfree(port); /* Ignore the error if any, not all the Redis versions support * REPLCONF listening-port. */ if (err) { redisLog(REDIS_NOTICE,"(non critical): Master does not understand REPLCONF listening-port: %s", err); sdsfree(err); } } /* Issue the SYNC command */ if (syncWrite(fd,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) { redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s", strerror(errno)); goto error; } /* Prepare a suitable temp file for bulk transfer */ while(maxtries--) { snprintf(tmpfile,256, "temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid()); dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644); if (dfd != -1) break; sleep(1); } if (dfd == -1) { redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno)); goto error; } /* Setup the non blocking download of the bulk file. */ if (aeCreateFileEvent(server.el,fd, AE_READABLE,readSyncBulkPayload,NULL) == AE_ERR) { redisLog(REDIS_WARNING, "Can't create readable event for SYNC: %s (fd=%d)", strerror(errno),fd); goto error; } server.repl_state = REDIS_REPL_TRANSFER; server.repl_transfer_size = -1; server.repl_transfer_read = 0; server.repl_transfer_last_fsync_off = 0; server.repl_transfer_fd = dfd; server.repl_transfer_lastio = server.unixtime; server.repl_transfer_tmpfile = zstrdup(tmpfile); return; error: close(fd); server.repl_transfer_s = -1; server.repl_state = REDIS_REPL_CONNECT; return; }
/* Asynchronously read the SYNC payload we receive from a master */ #define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { char buf[4096]; ssize_t nread, readlen; off_t left; REDIS_NOTUSED(el); REDIS_NOTUSED(privdata); REDIS_NOTUSED(mask); /* If repl_transfer_size == -1 we still have to read the bulk length * from the master reply. */ if (server.repl_transfer_size == -1) { if (syncReadLine(fd,buf,1024,server.repl_syncio_timeout*1000) == -1) { redisLog(REDIS_WARNING, "I/O error reading bulk count from MASTER: %s", strerror(errno)); goto error; } if (buf[0] == '-') { redisLog(REDIS_WARNING, "MASTER aborted replication with an error: %s", buf+1); goto error; } else if (buf[0] == '\0') { /* At this stage just a newline works as a PING in order to take * the connection live. So we refresh our last interaction * timestamp. */ server.repl_transfer_lastio = server.unixtime; return; } else if (buf[0] != '$') { redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); goto error; } server.repl_transfer_size = strtol(buf+1,NULL,10); redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: receiving %ld bytes from master", server.repl_transfer_size); return; } /* Read bulk data */ left = server.repl_transfer_size - server.repl_transfer_read; readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf); nread = read(fd,buf,readlen); if (nread <= 0) { redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s", (nread == -1) ? strerror(errno) : "connection lost"); replicationAbortSyncTransfer(); return; } server.repl_transfer_lastio = server.unixtime; if (write(server.repl_transfer_fd,buf,nread) != nread) { redisLog(REDIS_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> SLAVE synchronization: %s", strerror(errno)); goto error; } server.repl_transfer_read += nread; /* Sync data on disk from time to time, otherwise at the end of the transfer * we may suffer a big delay as the memory buffers are copied into the * actual disk. */ if (server.repl_transfer_read >= server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC) { off_t sync_size = server.repl_transfer_read - server.repl_transfer_last_fsync_off; rdb_fsync_range(server.repl_transfer_fd, server.repl_transfer_last_fsync_off, sync_size); server.repl_transfer_last_fsync_off += sync_size; } /* Check if the transfer is now complete */ if (server.repl_transfer_read == server.repl_transfer_size) { if (rename(server.repl_transfer_tmpfile,server.rdb_filename) == -1) { redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno)); replicationAbortSyncTransfer(); return; } redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Loading DB in memory"); emptyDb(); /* Before loading the DB into memory we need to delete the readable * handler, otherwise it will get called recursively since * rdbLoad() will call the event loop to process events from time to * time for non blocking loading. */ aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE); if (rdbLoad(server.rdb_filename) != REDIS_OK) { redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk"); replicationAbortSyncTransfer(); return; } /* Final setup of the connected slave <- master link */ zfree(server.repl_transfer_tmpfile); close(server.repl_transfer_fd); server.master = createClient(server.repl_transfer_s); server.master->flags |= REDIS_MASTER; server.master->authenticated = 1; server.repl_state = REDIS_REPL_CONNECTED; redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Finished with success"); /* Restart the AOF subsystem now that we finished the sync. This * will trigger an AOF rewrite, and when done will start appending * to the new file. */ if (server.aof_state != REDIS_AOF_OFF) { int retry = 10; stopAppendOnly(); while (retry-- && startAppendOnly() == REDIS_ERR) { redisLog(REDIS_WARNING,"Failed enabling the AOF after successful master synchrnization! Trying it again in one second."); sleep(1); } if (!retry) { redisLog(REDIS_WARNING,"FATAL: this slave instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now."); exit(1); } } } return; error: replicationAbortSyncTransfer(); return; }
/* Dictionary type for latency events. */ int dictStringKeyCompare(void *privdata, const void *key1, const void *key2) { REDIS_NOTUSED(privdata); return strcmp(key1,key2) == 0; }
/* Every time a thread finished a Job, it writes a byte into the write side * of an unix pipe in order to "awake" the main thread, and this function * is called. * * Note that this is called both by the event loop, when a I/O thread * sends a byte in the notification pipe, and is also directly called from * waitEmptyIOJobsQueue(). * * In the latter case we don't want to swap more, so we use the * "privdata" argument setting it to a not NULL value to signal this * condition. */ void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, int mask) { char buf[1]; int retval, processed = 0, toprocess = -1, trytoswap = 1; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); REDIS_NOTUSED(privdata); if (privdata != NULL) trytoswap = 0; /* check the comments above... */ /* For every byte we read in the read side of the pipe, there is one * I/O job completed to process. */ #ifndef _WIN32 while((retval = read(fd,buf,1)) == 1) { #else DWORD pipe_is_on = 0; while (1) { retval = 0; /*Windows fix: We need to peek pipe, since read would block. */ if (!PeekNamedPipe((HANDLE) _get_osfhandle(fd), NULL, 0, NULL, &pipe_is_on, NULL)) { redisLog(REDIS_DEBUG,"PeekReadPipe failed %s", strerror(GetLastError())); break; } /* No data on pipe */ if (!pipe_is_on) break; if ((retval = read(fd,buf,1)) != 1) break; #endif iojob *j; listNode *ln; struct dictEntry *de; /* Get the processed element (the oldest one) */ lockThreadedIO(); redisLog(REDIS_DEBUG,"Processing I/O completed job"); redisAssert(listLength(server.io_processed) != 0); if (toprocess == -1) { toprocess = (listLength(server.io_processed)*REDIS_MAX_COMPLETED_JOBS_PROCESSED)/100; if (toprocess <= 0) toprocess = 1; } ln = listFirst(server.io_processed); j = ln->value; listDelNode(server.io_processed,ln); unlockThreadedIO(); /* If this job is marked as canceled, just ignore it */ if (j->canceled) { freeIOJob(j); continue; } /* Post process it in the main thread, as there are things we * can do just here to avoid race conditions and/or invasive locks */ redisLog(REDIS_DEBUG,"COMPLETED Job type: %d, ID %p, key: %s", j->type, (void*)j->id, (unsigned char*)j->key->ptr); de = dictFind(j->db->dict,j->key->ptr); redisAssert(de != NULL); if (j->type == REDIS_IOJOB_LOAD) { redisDb *db; vmpointer *vp = dictGetEntryVal(de); /* Key loaded, bring it at home */ vmMarkPagesFree(vp->page,vp->usedpages); redisLog(REDIS_DEBUG, "VM: object %s loaded from disk (threaded)", (unsigned char*) j->key->ptr); server.vm_stats_swapped_objects--; server.vm_stats_swapins++; dictGetEntryVal(de) = j->val; incrRefCount(j->val); db = j->db; /* Handle clients waiting for this key to be loaded. */ handleClientsBlockedOnSwappedKey(db,j->key); freeIOJob(j); zfree(vp); } else if (j->type == REDIS_IOJOB_PREPARE_SWAP) { /* Now we know the amount of pages required to swap this object. * Let's find some space for it, and queue this task again * rebranded as REDIS_IOJOB_DO_SWAP. */ if (!vmCanSwapOut() || vmFindContiguousPages(&j->page,j->pages) == REDIS_ERR) { /* Ooops... no space or we can't swap as there is * a fork()ed Redis trying to save stuff on disk. */ j->val->storage = REDIS_VM_MEMORY; /* undo operation */ freeIOJob(j); } else { /* Note that we need to mark this pages as used now, * if the job will be canceled, we'll mark them as freed * again. */ vmMarkPagesUsed(j->page,j->pages); j->type = REDIS_IOJOB_DO_SWAP; lockThreadedIO(); queueIOJob(j); unlockThreadedIO(); } } else if (j->type == REDIS_IOJOB_DO_SWAP) { vmpointer *vp; /* Key swapped. We can finally free some memory. */ if (j->val->storage != REDIS_VM_SWAPPING) { vmpointer *vp = (vmpointer*) j->id; printf("storage: %d\n",vp->storage); printf("key->name: %s\n",(char*)j->key->ptr); printf("val: %p\n",(void*)j->val); printf("val->type: %d\n",j->val->type); printf("val->ptr: %s\n",(char*)j->val->ptr); } redisAssert(j->val->storage == REDIS_VM_SWAPPING); vp = createVmPointer(j->val); vp->page = j->page; vp->usedpages = j->pages; dictGetEntryVal(de) = vp; /* Fix the storage otherwise decrRefCount will attempt to * remove the associated I/O job */ j->val->storage = REDIS_VM_MEMORY; decrRefCount(j->val); redisLog(REDIS_DEBUG, "VM: object %s swapped out at %lld (%lld pages) (threaded)", (unsigned char*) j->key->ptr, (unsigned long long) j->page, (unsigned long long) j->pages); server.vm_stats_swapped_objects++; server.vm_stats_swapouts++; freeIOJob(j); /* Put a few more swap requests in queue if we are still * out of memory */ if (trytoswap && vmCanSwapOut() && zmalloc_used_memory() > server.vm_max_memory) { int more = 1; while(more) { lockThreadedIO(); more = listLength(server.io_newjobs) < (unsigned) server.vm_max_threads; unlockThreadedIO(); /* Don't waste CPU time if swappable objects are rare. */ if (vmSwapOneObjectThreaded() == REDIS_ERR) { trytoswap = 0; break; } } } } processed++; if (processed == toprocess) return; } if (retval < 0 && errno != EAGAIN) { redisLog(REDIS_WARNING, "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", strerror(errno)); } } void lockThreadedIO(void) { pthread_mutex_lock(&server.io_mutex); }
/* Missing wait3() implementation */ pid_t wait3(int *stat_loc, int options, void *rusage) { REDIS_NOTUSED(stat_loc); REDIS_NOTUSED(options); REDIS_NOTUSED(rusage); return (pid_t) waitpid((intptr_t) -1, 0, WAIT_FLAGS); }
void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { char tmpfile[256], *err; int dfd, maxtries = 5; int sockerr = 0; socklen_t errlen = sizeof(sockerr); REDIS_NOTUSED(el); REDIS_NOTUSED(privdata); REDIS_NOTUSED(mask); /* If this event fired after the user turned the instance into a master * with SLAVEOF NO ONE we must just return ASAP. */ if (server.repl_state == REDIS_REPL_NONE) { close(fd); return; } /* Check for errors in the socket. */ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &sockerr, &errlen) == -1) sockerr = errno; if (sockerr) { aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE); redisLog(REDIS_WARNING,"Error condition on socket for SYNC: %s", strerror(sockerr)); goto error; } /* If we were connecting, it's time to send a non blocking PING, we want to * make sure the master is able to reply before going into the actual * replication process where we have long timeouts in the order of * seconds (in the meantime the slave would block). */ //如果之前正在REDIS_REPL_CONNECTING,现在有可读可写事件了,说明连接成功了,下一步就是需要发送PING请求 if (server.repl_state == REDIS_REPL_CONNECTING) { redisLog(REDIS_NOTICE,"Non blocking connect for SYNC fired the event."); /* Delete the writable event so that the readable event remains * registered and we can wait for the PONG reply. */ aeDeleteFileEvent(server.el,fd,AE_WRITABLE); server.repl_state = REDIS_REPL_RECEIVE_PONG; /* Send the PING, don't check for errors at all, we have the timeout * that will take care about this. */ syncWrite(fd,"PING\r\n",6,100);//同步阻塞发送PING命令,这样服务端会返回"+PONG\r\n"字符串的 //这里实现的比较简单,就是发送完PING后,当连接可读可写时,再进syncWithMaster这个函数的时候 //下面的代码会判断PONG这个动作,然后就会阻塞去读取一行回复,看他是不是成功了。 return; } /* Receive the PONG command. */ if (server.repl_state == REDIS_REPL_RECEIVE_PONG) { char buf[1024]; //上面在REDIS_REPL_CONNECTING状态的时候给master发送了一行PING指令,这样master会返回"+PONG\r\n"的, //现在连接可读或者可写了,所以我们阻塞读取这么多数据,判断返回是否OK。 /* Delete the readable event, we no longer need it now that there is * the PING reply to read. */ aeDeleteFileEvent(server.el,fd,AE_READABLE); /* Read the reply with explicit timeout. */ buf[0] = '\0'; if (syncReadLine(fd,buf,sizeof(buf), server.repl_syncio_timeout*1000) == -1) { redisLog(REDIS_WARNING, "I/O error reading PING reply from master: %s", strerror(errno)); goto error; } /* We accept only two replies as valid, a positive +PONG reply * (we just check for "+") or an authentication error. * Note that older versions of Redis replied with "operation not * permitted" instead of using a proper error code, so we test * both. */ if (buf[0] != '+' && strncmp(buf,"-NOAUTH",7) != 0 && strncmp(buf,"-ERR operation not permitted",28) != 0) { redisLog(REDIS_WARNING,"Error reply to PING from master: '%s'",buf); goto error; } else { redisLog(REDIS_NOTICE, "Master replied to PING, replication can continue..."); } } /* AUTH with the master if required. */ if(server.masterauth) { err = sendSynchronousCommand(fd,"AUTH",server.masterauth,NULL); if (err) { redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",err); sdsfree(err); goto error; } } /* Set the slave port, so that Master's INFO command can list the * slave listening port correctly. */ { sds port = sdsfromlonglong(server.port); err = sendSynchronousCommand(fd,"REPLCONF","listening-port",port, NULL); sdsfree(port); /* Ignore the error if any, not all the Redis versions support * REPLCONF listening-port. */ if (err) { redisLog(REDIS_NOTICE,"(non critical): Master does not understand REPLCONF listening-port: %s", err); sdsfree(err); } } //到这里的话,连接肯定成功了,而且PING指令也都收到了回复。所以果断发送SYNC指令 /* Issue the SYNC command */ if (syncWrite(fd,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) { redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s", strerror(errno)); goto error; } /* Prepare a suitable temp file for bulk transfer */ while(maxtries--) { snprintf(tmpfile,256, "temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid()); dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644); if (dfd != -1) break; sleep(1); } if (dfd == -1) { redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno)); goto error; } //SYNC命令已经发送了,以后的可读可写事件就依靠readSyncBulkPayload来读取解析了。 /* Setup the non blocking download of the bulk file. */ if (aeCreateFileEvent(server.el,fd, AE_READABLE,readSyncBulkPayload,NULL) == AE_ERR) { redisLog(REDIS_WARNING, "Can't create readable event for SYNC: %s (fd=%d)", strerror(errno),fd); goto error; } server.repl_state = REDIS_REPL_TRANSFER; server.repl_transfer_size = -1; server.repl_transfer_read = 0; server.repl_transfer_last_fsync_off = 0; server.repl_transfer_fd = dfd; server.repl_transfer_lastio = server.unixtime; server.repl_transfer_tmpfile = zstrdup(tmpfile); return; error: close(fd); server.repl_transfer_s = -1; server.repl_state = REDIS_REPL_CONNECT; return; }
static int enableRawMode(int fd) { #ifndef _WIN32 struct termios raw; if (!isatty(STDIN_FILENO)) goto fatal; if (!atexit_registered) { atexit(linenoiseAtExit); atexit_registered = 1; } if (tcgetattr(fd,&orig_termios) == -1) goto fatal; raw = orig_termios; /* modify the original mode */ /* input modes: no break, no CR to NL, no parity check, no strip char, * no start/stop output control. */ raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON); /* output modes - disable post processing */ raw.c_oflag &= ~(OPOST); /* control modes - set 8 bit chars */ raw.c_cflag |= (CS8); /* local modes - choing off, canonical off, no extended functions, * no signal chars (^Z,^C) */ raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG); /* control chars - set return condition: min number of bytes and timer. * We want read to return every single byte, without timeout. */ raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */ /* put terminal in raw mode after flushing */ if (tcsetattr(fd,TCSAFLUSH,&raw) < 0) goto fatal; rawmode = 1; #else REDIS_NOTUSED(fd); if (!atexit_registered) { /* Init windows console handles only once */ hOut = GetStdHandle(STD_OUTPUT_HANDLE); if (hOut==INVALID_HANDLE_VALUE) goto fatal; if (!GetConsoleMode(hOut, &consolemode)) { CloseHandle(hOut); errno = ENOTTY; return -1; }; hIn = GetStdHandle(STD_INPUT_HANDLE); if (hIn == INVALID_HANDLE_VALUE) { CloseHandle(hOut); errno = ENOTTY; return -1; } GetConsoleMode(hIn, &consolemode); SetConsoleMode(hIn, ENABLE_PROCESSED_INPUT); /* Cleanup them at exit */ atexit(linenoiseAtExit); atexit_registered = 1; } rawmode = 1; #endif return 0; fatal: errno = ENOTTY; return -1; }