/* Concatenate a string representing the state of a client in an human * readable format, into the sds string 's'. */ sds catClientInfoString(sds s, client *client) { char flags[16], events[3], *p; int emask; p = flags; if (client->flags & CLIENT_CLOSE_AFTER_REPLY) *p++ = 'c'; if (client->flags & CLIENT_CLOSE_ASAP) *p++ = 'A'; if (p == flags) *p++ = 'N'; *p++ = '\0'; emask = client->fd == -1 ? 0 : aeGetFileEvents(server.el,client->fd); p = events; if (emask & AE_READABLE) *p++ = 'r'; if (emask & AE_WRITABLE) *p++ = 'w'; *p = '\0'; return sdscatfmt(s, "id=%U fd=%i name=%s age=%I idle=%I flags=%s qbuf=%U qbuf-free=%U obl=%U cmd=%s", (unsigned long long) client->id, client->fd, client->name ? (char*)client->name : "", (long long)(server.unixtime - client->ctime), (long long)(server.unixtime - client->lastinteraction), flags, (unsigned long long) sdslen(client->querybuf), (unsigned long long) sdsavail(client->querybuf), (unsigned long long) client->bufpos, client->lastcmd ? client->lastcmd->name : "NULL"); }
/** * make room for addlen byte after s * @s : the original string * @addlen: the length we want to add **/ static sds sdsMakeRoomFor(sds s, size_t addlen) { /* @sh : point the the struct contains s * @newsh : points the new struct */ struct sdshdr *sh, *newsh; /* calculate the free space of sh */ size_t free = sdsavail(s); size_t len, newlen; /* if there are enough free space for addlen */ if (free >= addlen) return s; len = sdslen(s); sh = (void*) (s-(sizeof(struct sdshdr))); /** * allocate (len + addlen)*2 space * we want (len + addlen) * so we may not reallocate memory next time **/ newlen = (len+addlen)*2; /* realloc the memory */ newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1); #ifdef SDS_ABORT_ON_OOM if (newsh == NULL) sdsOomAbort(); #else if (newsh == NULL) return NULL; #endif /** * update the struct , newsh->len doesn't change **/ newsh->free = newlen - len; return newsh->buf; }
int redisReaderFeed(redisReader *r, const char *buf, size_t len) { sds newbuf; /* Return early when this reader is in an erroneous state. */ if (r->err) return REDIS_ERR; /* Copy the provided buffer. */ if (buf != NULL && len >= 1) { /* Destroy internal buffer when it is empty and is quite large. */ if (r->len == 0 && sdsavail(r->buf) > 16*1024) { sdsfree(r->buf); r->buf = sdsempty(); r->pos = 0; /* r->buf should not be NULL since we just free'd a larger one. */ assert(r->buf != NULL); } newbuf = sdscatlen(r->buf,buf,len); if (newbuf == NULL) { __redisReaderSetErrorOOM(r); return REDIS_ERR; } r->buf = newbuf; r->len = sdslen(r->buf); } return REDIS_OK; }
int main(){ char hello[20]={"hello,world!"}; printf("hello:%s\n",hello); printf("str new\n"); sds str=sdsnew(hello); printf("str:%s\n",str); printf("str len:%d\n",sdslen(str)); printf("str avail:%d\n",sdsavail(str)); printf("str cat\n"); sds newstr=sdscat(str,"this is strcat content"); printf("str:%s\n",str); printf("str:%s\n",newstr); printf("str len:%d\n",sdslen(newstr)); printf("str avail:%d\n",sdsavail(newstr)); printf("str cpy\n"); char* teststr="test a string"; sdscpy(newstr,teststr); printf("str:%s\n",newstr); printf("str len:%d\n",sdslen(newstr)); printf("str avail:%d\n",sdsavail(newstr)); printf("trim char \n"); sdstrim(newstr,"g"); printf("str:%s\n",newstr); printf("sds range \n"); sdsrange(newstr,1,3); printf("str:%s\n",newstr); printf("sds toupper \n"); sdstoupper(newstr); printf("str:%s\n",newstr); printf("sds tolower \n"); sdstolower(newstr); printf("str:%s\n",newstr); printf("sds cmp \n"); sds cmpstr=sdsnew("est"); printf("str:%d\n",sdscmp(newstr,cmpstr)); return 0; }
int redisReplyReaderGetReply(void *reader, void **reply) { redisReader *r = reader; if (reply != NULL) *reply = NULL; /* When the buffer is empty, there will never be a reply. */ if (r->len == 0) return REDIS_OK; /* Set first item to process when the stack is empty. */ if (r->ridx == -1) { r->rstack[0].type = -1; r->rstack[0].elements = -1; r->rstack[0].idx = -1; r->rstack[0].obj = NULL; r->rstack[0].parent = NULL; r->rstack[0].privdata = r->privdata; r->ridx = 0; } /* Process items in reply. */ while (r->ridx >= 0) if (processItem(r) < 0) break; /* Discard the consumed part of the buffer. */ if (r->pos > 0) { if (r->pos == r->len) { /* sdsrange has a quirck on this edge case. */ sdsfree(r->buf); r->buf = sdsempty(); } else { r->buf = sdsrange(r->buf,r->pos,r->len); } r->pos = 0; r->len = sdslen(r->buf); } /* Emit a reply when there is one. */ if (r->ridx == -1) { void *aux = r->reply; r->reply = NULL; /* Destroy the buffer when it is empty and is quite large. */ if (r->len == 0 && sdsavail(r->buf) > 16*1024) { sdsfree(r->buf); r->buf = sdsempty(); r->pos = 0; } /* Check if there actually *is* a reply. */ if (r->error != NULL) { return REDIS_ERR; } else { if (reply != NULL) *reply = aux; } } return REDIS_OK; }
/* Try to encode a string object in order to save space */ robj *tryObjectEncoding(robj *o) { long value; sds s = o->ptr; size_t len; if (o->encoding != REDIS_ENCODING_RAW) return o; /* Already encoded */ /* It's not safe to encode shared objects: shared objects can be shared * everywhere in the "object space" of Redis. Encoded objects can only * appear as "values" (and not, for instance, as keys) */ if (o->refcount > 1) return o; /* Currently we try to encode only strings */ redisAssertWithInfo(NULL,o,o->type == REDIS_STRING); /* Check if we can represent this string as a long integer */ len = sdslen(s); if (len > 21 || !string2l(s,len,&value)) { /* We can't encode the object... * * Do the last try, and at least optimize the SDS string inside * the string object to require little space, in case there * is more than 10% of free space at the end of the SDS string. * * We do that for larger strings, using the arbitrary value * of 32 bytes. This code was backported from the unstable branch * where this is performed when the object is too large to be * encoded as EMBSTR. */ if (len > 32 && o->encoding == REDIS_ENCODING_RAW && sdsavail(s) > len/10) { o->ptr = sdsRemoveFreeSpace(o->ptr); } /* Return the original object. */ return o; } /* Ok, this object can be encoded... * * Can I use a shared object? Only if the object is inside a given range * * Note that we also avoid using shared integers when maxmemory is used * because every object needs to have a private LRU field for the LRU * algorithm to work well. */ if (server.maxmemory == 0 && value >= 0 && value < REDIS_SHARED_INTEGERS) { decrRefCount(o); incrRefCount(shared.integers[value]); return shared.integers[value]; } else { o->encoding = REDIS_ENCODING_INT; sdsfree(o->ptr); o->ptr = (void*) value; return o; } }
static sds sdsMakeRoomFor(sds s,size_t addlen) { struct sdshdr*sh,*newsh; size_t free=sdsavail(s); size_t len,newlen; if(free>=addlen) return s; len=sdslen(s); sh=(void*)(s-(sizeof(struct sdshdr))); newlen=(len+addlen)*2; newsh=realloc(sh,sizeof(struct sdshdr)+newlen+1); if(newsh==NULL) sdsOomAbort(); newsh->free=newlen-len; return newsh->buf; }
sds sdsMakeRoomFor(sds s, size_t addlen) { size_t free = sdsavail(s); size_t len, newlen; if (free >= addlen) return s; len = sdslen(s); struct sdshdr *sh = (void*)(s - (sizeof(struct sdshdr))); newlen = (len + addlen); if (newlen < SDS_MAX_PREALLOC) newlen *= 2; else newlen += SDS_MAX_PREALLOC; struct sdshdr *newsh = zrealloc(sh, sizeof(struct sdshdr) + newlen + 1); if (newsh == NULL) return NULL; newsh->free = newlen - len; return newsh->buf; }
// try encode a string value as integer. value_t *tryValueEncoding(value_t *val) { if (val->encoding == ENCODING_INT) return val; size_t len = sdslen(val->ptr); long v; if (len > 21 || !string2l(val->ptr, len, &v)) { if (len > 32 && sdsavail(val->ptr) > len / 10) { val->ptr = sdsRemoveFreeSpace(val->ptr); } return val; } else { val->encoding = ENCODING_INT; sdsfree(val->ptr); val->ptr = (void*) ((long) v); } }
void redisReplyReaderFeed(void *reader, const char *buf, size_t len) { redisReader *r = reader; /* Copy the provided buffer. */ if (buf != NULL && len >= 1) { /* Destroy internal buffer when it is empty and is quite large. */ if (r->len == 0 && sdsavail(r->buf) > 16*1024) { sdsfree(r->buf); r->buf = sdsempty(); r->pos = 0; } r->buf = sdscatlen(r->buf,buf,len); r->len = sdslen(r->buf); } }
/* Enlarge the free space at the end of the sds string so that the caller * is sure that after calling this function can overwrite up to addlen * bytes after the end of the string, plus one more byte for nul term. * * Note: this does not change the *length* of the sds string as returned * by sdslen(), but only the free buffer space we have. */ sds sdsMakeRoomFor(sds s, size_t addlen) { void *sh, *newsh; size_t avail = sdsavail(s); size_t len, newlen; char type, oldtype = s[-1] & SDS_TYPE_MASK; int hdrlen; /* Return ASAP if there is enough space left. */ if (avail >= addlen) return s; len = sdslen(s); sh = (char*)s-sdsHdrSize(oldtype); newlen = (len+addlen); if (newlen < SDS_MAX_PREALLOC) newlen *= 2; else newlen += SDS_MAX_PREALLOC; type = sdsReqType(newlen); /* Don't use type 5: the user is appending to the string and type 5 is * not able to remember empty space, so sdsMakeRoomFor() must be called * at every appending operation. */ if (type == SDS_TYPE_5) type = SDS_TYPE_8; hdrlen = sdsHdrSize(type); if (oldtype==type) { newsh = s_realloc(sh, hdrlen+newlen+1); if (newsh == NULL) return NULL; s = (char*)newsh+hdrlen; } else { /* Since the header size changes, need to move the string forward, * and can't use realloc */ newsh = s_malloc(hdrlen+newlen+1); if (newsh == NULL) return NULL; memcpy((char*)newsh+hdrlen, s, len+1); s_free(sh); s = (char*)newsh+hdrlen; s[-1] = type; sdssetlen(s, len); } sdssetalloc(s, newlen); return s; }
int main(void) { { struct sdshdr *sh; sds x = sdsempty(); test_cond("sdsempty() should be strlen 0", strlen(x) == 0 && sdslen(x) == 0 && memcmp(x,"\0",1) == 0); sdsfree(x); x = sdsalloc(NULL, 2); test_cond("Create a NULL string with reserved space 2 bytes", sdslen(x) == 0 && sdsavail(x) == 2); sdsfree(x); } test_report() return 0; }
static sds sdsMakeRoomFor(sds s, size_t addlen) { struct sdshdr *sh, *newsh; size_t free = sdsavail(s); size_t len, newlen; if (free >= addlen) return s; len = sdslen(s); sh = (void*) (s-(sizeof(struct sdshdr))); newlen = (len+addlen)*2; newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1); #ifdef SDS_ABORT_ON_OOM if (newsh == NULL) sdsOomAbort(); #else if (newsh == NULL) return NULL; #endif newsh->free = (int)(newlen - len); return newsh->buf; }
/* Enlarge the free space at the end of the sds string so that the caller * is sure that after calling this function can overwrite up to addlen * bytes after the end of the string, plus one more byte for nul term. * * Note: this does not change the *length* of the sds string as returned * by sdslen(), but only the free buffer space we have. */ sds sdsMakeRoomFor(sds s, size_t addlen) { if (s == NULL) return NULL; sdshdr *sh, *newsh; size_t free = sdsavail(s); size_t len, newlen; if (free >= addlen) return s; len = sdslen(s); sh = sds_start(s); newlen = (len+addlen); if (newlen < SDS_MAX_PREALLOC) newlen *= 2; else newlen += SDS_MAX_PREALLOC; newsh = (sdshdr*) realloc(sh, sizeof *newsh+newlen+1); if (newsh == NULL) return NULL; newsh->free = newlen - len; return newsh->buf; }
/* * 对 sds 中 buf 的长度进行扩展,确保在函数执行之后, * buf 至少会有 addlen + 1 长度的空余空间 * (额外的 1 字节是为 \0 准备的) * * 返回值 * sds :扩展成功返回扩展后的 sds * 扩展失败返回 NULL * * 复杂度 * T = O(N) */ sds sdsMakeRoomFor(sds s, size_t addlen) { struct sdshdr *sh, *newsh; // 获取 s 目前的空余空间长度 size_t free = sdsavail(s); size_t len, newlen; // s 目前的空余空间已经足够,无须再进行扩展,直接返回 if (free >= addlen) return s; // 获取 s 目前已占用空间的长度 len = sdslen(s); sh = (void*) (s-(sizeof(struct sdshdr))); // s 最少需要的长度 newlen = (len+addlen); // 根据新长度,为 s 分配新空间所需的大小 if (newlen < SDS_MAX_PREALLOC) // 如果新长度小于 SDS_MAX_PREALLOC // 那么为它分配两倍于所需长度的空间 newlen *= 2; else // 否则,分配长度为目前长度加上 SDS_MAX_PREALLOC newlen += SDS_MAX_PREALLOC; // T = O(N) newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1); // 内存不足,分配失败,返回 if (newsh == NULL) return NULL; // 更新 sds 的空余长度 newsh->free = newlen - len; // 返回 sds return newsh->buf; }
sds sdsMakeRoomFor(sds s, size_t addlen) { struct sdshdr *sh, *newsh; size_t free = sdsavail(s); size_t len, newlen; if (free >= addlen) return s; len = sdslen(s); sh = (void*) (s-(sizeof(struct sdshdr))); newlen = (len+addlen); if (newlen < SDS_MAX_PREALLOC) newlen *= 2; else newlen += SDS_MAX_PREALLOC; newsh = realloc(sh, sizeof(struct sdshdr)+newlen+1); #ifdef SDS_ABORT_ON_OOM if (newsh == NULL) sdsOomAbort(); #else if (newsh == NULL) return NULL; #endif newsh->free = newlen - len; return newsh->buf; }
/* 在原有字符串中取得更大的空间,并返回扩展空间后的字符串 */ sds sdsMakeRoomFor(sds s, size_t addlen) { struct sdshdr *sh, *newsh; //获取当前字符串的可用长度 size_t free = sdsavail(s); size_t len, newlen; //如果当前可用空间已经大于需要值,直接返回原字符串 if (free >= addlen) return s; len = sdslen(s); sh = (void*) (s-(sizeof(struct sdshdr))); //计算要获取新字符串所要的长度大小=原长度+addlen newlen = (len+addlen); if (newlen < SDS_MAX_PREALLOC) newlen *= 2; else newlen += SDS_MAX_PREALLOC; newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1); if (newsh == NULL) return NULL; //新字符串可用空间等于新长度减去原使用的长度 newsh->free = newlen - len; //返回洗字符串中的buf字符串数组 return newsh->buf; }
static void * clientThread (void *arg) { client_t *c = (client_t *) arg; fd_set rfds, wfds; int ret; c->querybuf = sdsMakeRoomFor (sdsempty (), DEFAULT_QUERY_BUF_SIZE); c->replybuf = sdsMakeRoomFor (sdsempty (), DEFAULT_QUERY_BUF_SIZE); c->argc = 0; c->argv = NULL; c->argvlen = NULL; c->reqtype = 0; c->multibulklen = 0; c->bulklen = -1; c->rqst = arc_create_request (); c->flags = 0; c->total_append_command = 0; FD_ZERO (&rfds); FD_ZERO (&wfds); while (1) { struct timeval timeout; FD_CLR (c->fd, &rfds); FD_CLR (c->fd, &wfds); if (!(c->flags & REDIS_CLOSE_AFTER_REPLY)) FD_SET (c->fd, &rfds); if (sdslen (c->replybuf) > 0) FD_SET (c->fd, &wfds); timeout.tv_sec = 1; timeout.tv_usec = 0; ret = select (c->fd + 1, &rfds, &wfds, NULL, &timeout); if (ret == -1) { perror ("select"); freeClient (c); } if (server.shutdown_signal) { c->flags |= REDIS_CLOSE_AFTER_REPLY; } /* readable */ if (FD_ISSET (c->fd, &rfds)) { int pos = sdslen (c->querybuf); int avail = sdsavail (c->querybuf); ssize_t nread; if (avail == 0) { c->querybuf = sdsMakeRoomFor (c->querybuf, sdslen (c->querybuf)); avail = sdsavail (c->querybuf); } nread = read (c->fd, c->querybuf + pos, avail); if (nread > 0) { sdsIncrLen (c->querybuf, nread); processInputBuffer (c); if (c->total_append_command) { int arc_errno, arc_be_errno, be_errno; arc_ref_t *arc_ref; arc_ref = acquire_arc_ref (); ret = arc_do_request (arc_ref->arc, c->rqst, server.query_timeout_millis, &be_errno); if (ret == -1) { arc_errno = errno; arc_be_errno = be_errno; } else { ret = processReply (c, &be_errno); if (ret == -1) { arc_errno = errno; arc_be_errno = be_errno; } } arc_free_request (c->rqst); release_arc_ref (arc_ref); c->rqst = arc_create_request (); if (ret == -1) { if (arc_errno == ARC_ERR_TIMEOUT || (arc_errno == ARC_ERR_BACKEND && arc_be_errno == ARC_ERR_TIMEOUT)) { addReplyStr (c, "-ERR Redis Timeout\r\n"); } else { addReplyStr (c, "-ERR Internal Error\r\n"); } c->flags |= REDIS_CLOSE_AFTER_REPLY; } } } else { if (nread == -1 && errno == EAGAIN) { /* Skip */ } else { freeClient (c); } } } /* writable */ if (FD_ISSET (c->fd, &wfds)) { int pos = 0; int avail = sdslen (c->replybuf); ssize_t nwritten; nwritten = write (c->fd, c->replybuf + pos, avail); if (nwritten > 0) { avail -= nwritten; pos += nwritten; sdsrange (c->replybuf, pos, -1); } else { if (nwritten == -1 && errno == EAGAIN) { /* Skip */ } else { freeClient (c); } } } if (sdslen (c->replybuf) == 0 && (c->flags & REDIS_CLOSE_AFTER_REPLY)) { freeClient (c); } } return NULL; }
/* This function is similar to sdscatprintf, but much faster as it does * not rely on sprintf() family functions implemented by the libc that * are often very slow. Moreover directly handling the sds string as * new data is concatenated provides a performance improvement. * * However this function only handles an incompatible subset of printf-alike * format specifiers: * * %s - C String * %S - SDS string * %i - signed int * %I - 64 bit signed integer (long long, int64_t) * %u - unsigned int * %U - 64 bit unsigned integer (unsigned long long, uint64_t) * %% - Verbatim "%" character. */ sds sdscatfmt(sds s, char const *fmt, ...) { size_t initlen = sdslen(s); const char *f = fmt; int i; va_list ap; va_start(ap,fmt); f = fmt; /* Next format specifier byte to process. */ i = initlen; /* Position of the next byte to write to dest str. */ while(*f) { char next, *str; size_t l; long long num; unsigned long long unum; /* Make sure there is always space for at least 1 char. */ if (sdsavail(s)==0) { s = sdsMakeRoomFor(s,1); } switch(*f) { case '%': next = *(f+1); f++; switch(next) { case 's': case 'S': str = va_arg(ap,char*); l = (next == 's') ? strlen(str) : sdslen(str); if (sdsavail(s) < l) { s = sdsMakeRoomFor(s,l); } memcpy(s+i,str,l); sdsinclen(s,l); i += l; break; case 'i': case 'I': if (next == 'i') num = va_arg(ap,int); else num = va_arg(ap,long long); { char buf[SDS_LLSTR_SIZE]; l = sdsll2str(buf,num); if (sdsavail(s) < l) { s = sdsMakeRoomFor(s,l); } memcpy(s+i,buf,l); sdsinclen(s,l); i += l; } break; case 'u': case 'U': if (next == 'u') unum = va_arg(ap,unsigned int); else unum = va_arg(ap,unsigned long long); { char buf[SDS_LLSTR_SIZE]; l = sdsull2str(buf,unum); if (sdsavail(s) < l) { s = sdsMakeRoomFor(s,l); } memcpy(s+i,buf,l); sdsinclen(s,l); i += l; } break; default: /* Handle %% and generally %<unknown>. */ s[i++] = next; sdsinclen(s,1); break; } break; default: s[i++] = *f; sdsinclen(s,1); break; }
static int se_try_read(lua_State *L, int fd, int size, sds *pcache) { char sbuf[4 << 10]; char *cache = *pcache; char *buf; int bufsize; int nread; if (cache) { bufsize = sdsavail(cache); buf = cache + sdslen(cache); printf("continue try read: %d / %d\n", bufsize, size); } else { // first try bufsize = size > 0 ? size : size < 0 ? -size : sizeof(sbuf); if (bufsize <= sizeof(sbuf)) { buf = sbuf; } else { cache = sdsnewlen(NULL, bufsize); oom_check(cache); sdsclear(cache); *pcache = cache; buf = cache; } printf("try read: %d / %d\n", bufsize, size); } nread = read(fd, buf, bufsize); if (nread > 0) { if (size <= 0 || nread == bufsize) { // done if (cache) { lua_pushlstring(L, cache, sdslen(cache) + nread); sdsfree(cache); *pcache = NULL; } else { lua_pushlstring(L, buf, nread); } printf("read done: %d / %d / %d\n", nread, bufsize, size); return 1; } // partial read if (!cache) { cache = sdsnewlen(NULL, bufsize); oom_check(cache); sdsclear(cache); *pcache = cache; memcpy(cache, buf, nread); } sdsIncrLen(cache, nread); printf("partial read: %d / %d / %d\n", nread, bufsize, size); return -1; } if (nread == 0) return se_read_error(L, pcache, "EOF"); if (errno == EAGAIN || errno == EWOULDBLOCK) return -1; se_assert(L, errno != EBADF, "read(%d) error", fd); return se_read_error(L, pcache, strerror(errno)); }
/* Write the append only file buffer on disk. * * Since we are required to write the AOF before replying to the client, * and the only way the client socket can get a write is entering when the * the event loop, we accumulate all the AOF writes in a memory * buffer and write it on disk using this function just before entering * the event loop again. * * About the 'force' argument: * * When the fsync policy is set to 'everysec' we may delay the flush if there * is still an fsync() going on in the background thread, since for instance * on Linux write(2) will be blocked by the background fsync anyway. * When this happens we remember that there is some aof buffer to be * flushed ASAP, and will try to do that in the serverCron() function. * * However if force is set to 1 we'll write regardless of the background * fsync. */ #define AOF_WRITE_LOG_ERROR_RATE 30 /* Seconds between errors logging. */ void flushAppendOnlyFile(int force) { ssize_t nwritten; int sync_in_progress = 0; if (sdslen(server.aof_buf) == 0) return; if (server.aof_fsync == AOF_FSYNC_EVERYSEC) sync_in_progress = bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC) != 0; if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) { /* With this append fsync policy we do background fsyncing. * If the fsync is still in progress we can try to delay * the write for a couple of seconds. */ if (sync_in_progress) { if (server.aof_flush_postponed_start == 0) { /* No previous write postponinig, remember that we are * postponing the flush and return. */ server.aof_flush_postponed_start = server.unixtime; return; } else if (server.unixtime - server.aof_flush_postponed_start < 2) { /* We were already waiting for fsync to finish, but for less * than two seconds this is still ok. Postpone again. */ return; } /* Otherwise fall trough, and go write since we can't wait * over two seconds. */ server.aof_delayed_fsync++; redisLog(REDIS_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis."); } } /* If you are following this code path, then we are going to write so * set reset the postponed flush sentinel to zero. */ server.aof_flush_postponed_start = 0; /* We want to perform a single write. This should be guaranteed atomic * at least if the filesystem we are writing is a real physical one. * While this will save us against the server being killed I don't think * there is much to do about the whole server stopping for power problems * or alike */ nwritten = write(server.aof_fd,server.aof_buf,sdslen(server.aof_buf)); if (nwritten != (signed)sdslen(server.aof_buf)) { static time_t last_write_error_log = 0; int can_log = 0; /* Limit logging rate to 1 line per AOF_WRITE_LOG_ERROR_RATE seconds. */ if ((server.unixtime - last_write_error_log) > AOF_WRITE_LOG_ERROR_RATE) { can_log = 1; last_write_error_log = server.unixtime; } /* Lof the AOF write error and record the error code. */ if (nwritten == -1) { if (can_log) { redisLog(REDIS_WARNING,"Error writing to the AOF file: %s", strerror(errno)); server.aof_last_write_errno = errno; } } else { if (can_log) { redisLog(REDIS_WARNING,"Short write while writing to " "the AOF file: (nwritten=%lld, " "expected=%lld)", (long long)nwritten, (long long)sdslen(server.aof_buf)); } if (ftruncate(server.aof_fd, server.aof_current_size) == -1) { if (can_log) { redisLog(REDIS_WARNING, "Could not remove short write " "from the append-only file. Redis may refuse " "to load the AOF the next time it starts. " "ftruncate: %s", strerror(errno)); } } else { /* If the ftrunacate() succeeded we can set nwritten to * -1 since there is no longer partial data into the AOF. */ nwritten = -1; } server.aof_last_write_errno = ENOSPC; } /* Handle the AOF write error. */ if (server.aof_fsync == AOF_FSYNC_ALWAYS) { /* We can't recover when the fsync policy is ALWAYS since the * reply for the client is already in the output buffers, and we * have the contract with the user that on acknowledged write data * is synched on disk. */ redisLog(REDIS_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting..."); exit(1); } else { /* Recover from failed write leaving data into the buffer. However * set an error to stop accepting writes as long as the error * condition is not cleared. */ server.aof_last_write_status = REDIS_ERR; /* Trim the sds buffer if there was a partial write, and there * was no way to undo it with ftruncate(2). */ if (nwritten > 0) { server.aof_current_size += nwritten; sdsrange(server.aof_buf,nwritten,-1); } return; /* We'll try again on the next call... */ } } else { /* Successful write(2). If AOF was in error state, restore the * OK state and log the event. */ if (server.aof_last_write_status == REDIS_ERR) { redisLog(REDIS_WARNING, "AOF write error looks solved, Redis can write again."); server.aof_last_write_status = REDIS_OK; } } server.aof_current_size += nwritten; /* Re-use AOF buffer when it is small enough. The maximum comes from the * arena size of 4k minus some overhead (but is otherwise arbitrary). */ if ((sdslen(server.aof_buf)+sdsavail(server.aof_buf)) < 4000) { sdsclear(server.aof_buf); } else { sdsfree(server.aof_buf); server.aof_buf = sdsempty(); } /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are * children doing I/O in the background. */ if (server.aof_no_fsync_on_rewrite && (server.aof_child_pid != -1 || server.rdb_child_pid != -1)) return; /* Perform the fsync if needed. */ if (server.aof_fsync == AOF_FSYNC_ALWAYS) { /* aof_fsync is defined as fdatasync() for Linux in order to avoid * flushing metadata. */ aof_fsync(server.aof_fd); /* Let's try to get this data on the disk */ server.aof_last_fsync = server.unixtime; } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC && server.unixtime > server.aof_last_fsync)) { if (!sync_in_progress) aof_background_fsync(server.aof_fd); server.aof_last_fsync = server.unixtime; } }
// 尝试对字符串对象进行编码,以节约内存。 robj *tryObjectEncoding(robj *o) { long value; sds s = o->ptr; size_t len; /* Make sure this is a string object, the only type we encode * in this function. Other types use encoded memory efficient * representations but are handled by the commands implementing * the type. */ redisAssertWithInfo(NULL,o,o->type == REDIS_STRING); /* We try some specialized encoding only for objects that are * RAW or EMBSTR encoded, in other words objects that are still * in represented by an actually array of chars. */ // 只在字符串的编码为 RAW 或者 EMBSTR 时尝试进行编码 if (!sdsEncodedObject(o)) return o; /* It's not safe to encode shared objects: shared objects can be shared * everywhere in the "object space" of Redis and may end in places where * they are not handled. We handle them only as values in the keyspace. */ // 不对共享对象进行编码 if (o->refcount > 1) return o; /* Check if we can represent this string as a long integer. * Note that we are sure that a string larger than 21 chars is not * representable as a 32 nor 64 bit integer. */ // 对字符串进行检查 // 只对长度小于或等于 21 字节,并且可以被解释为整数的字符串进行编码 len = sdslen(s); if (len <= 21 && string2l(s,len,&value)) { //如果是整数字符串 /* This object is encodable as a long. Try to use a shared object. * Note that we avoid using shared integers when maxmemory is used * because every object needs to have a private LRU field for the LRU * algorithm to work well. */ if (server.maxmemory == 0 && value >= 0 && value < REDIS_SHARED_INTEGERS) { decrRefCount(o); //如果是10000以内的字符串,则直接使用shared.integers[value]标记就行了,增加其引用计数 incrRefCount(shared.integers[value]); return shared.integers[value]; } else { //如果是大于10000的字符串,则直接转换为REDIS_ENCODING_INT编码方式存储, if (o->encoding == REDIS_ENCODING_RAW) sdsfree(o->ptr); o->encoding = REDIS_ENCODING_INT; o->ptr = (void*) value; //直接用ptr存储字符串对应的整数,转换为地址 return o; } } /* If the string is small and is still RAW encoded, * try the EMBSTR encoding which is more efficient. * In this representation the object and the SDS string are allocated * in the same chunk of memory to save space and cache misses. */ // 尝试将 RAW 编码的字符串编码为 EMBSTR 编码 if (len <= REDIS_ENCODING_EMBSTR_SIZE_LIMIT) { //如果字符串小于39,并且之前不是REDIS_ENCODING_EMBSTR(obj+sdshdr+data)内存连续的,则转换为REDIS_ENCODING_EMBSTR内存连续编码方式 robj *emb; if (o->encoding == REDIS_ENCODING_EMBSTR) return o; emb = createEmbeddedStringObject(s,sdslen(s)); decrRefCount(o); return emb; } /* We can't encode the object... * * Do the last try, and at least optimize the SDS string inside * the string object to require little space, in case there * is more than 10% of free space at the end of the SDS string. * * We do that only for relatively large strings as this branch * is only entered if the length of the string is greater than * REDIS_ENCODING_EMBSTR_SIZE_LIMIT. */ // 这个对象没办法进行编码,尝试从 SDS 中移除所有空余空间 if (o->encoding == REDIS_ENCODING_RAW && sdsavail(s) > len/10) //剩余空间大于总分配obj空间的十分之一 { o->ptr = sdsRemoveFreeSpace(o->ptr); } /* Return the original object. */ return o; }
robj *tryObjectEncoding(robj *o) { long value; sds s = o->ptr; size_t len; /* Make sure this is a string object, the only type we encode * in this function. Other types use encoded memory efficient * representations but are handled by the commands implementing * the type. */ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING); /* We try some specialized encoding only for objects that are * RAW or EMBSTR encoded, in other words objects that are still * in represented by an actually array of chars. */ //如果字符串对象的编码类型为RAW或EMBSTR时,才对其重新编码 if (!sdsEncodedObject(o)) return o; /* It's not safe to encode shared objects: shared objects can be shared * everywhere in the "object space" of Redis and may end in places where * they are not handled. We handle them only as values in the keyspace. */ //如果refcount大于1,则说明对象的ptr指向的值是共享的,不对共享对象进行编码 if (o->refcount > 1) return o; /* Check if we can represent this string as a long integer. * Note that we are sure that a string larger than 20 chars is not * representable as a 32 nor 64 bit integer. */ len = sdslen(s); //获得字符串s的长度 //如果len小于等于20,表示符合long long可以表示的范围,且可以转换为long类型的字符串进行编码 if (len <= 20 && string2l(s,len,&value)) { /* This object is encodable as a long. Try to use a shared object. * Note that we avoid using shared integers when maxmemory is used * because every object needs to have a private LRU field for the LRU * algorithm to work well. */ if ((server.maxmemory == 0 || (server.maxmemory_policy != MAXMEMORY_VOLATILE_LRU && server.maxmemory_policy != MAXMEMORY_ALLKEYS_LRU)) && value >= 0 && value < OBJ_SHARED_INTEGERS) //如果value处于共享整数的范围内 { decrRefCount(o); //原对象的引用计数减1,释放对象 incrRefCount(shared.integers[value]); //增加共享对象的引用计数 return shared.integers[value]; //返回一个编码为整数的字符串对象 } else { //如果不处于共享整数的范围 if (o->encoding == OBJ_ENCODING_RAW) sdsfree(o->ptr); //释放编码为OBJ_ENCODING_RAW的对象 o->encoding = OBJ_ENCODING_INT; //转换为OBJ_ENCODING_INT编码 o->ptr = (void*) value; //指针ptr指向value对象 return o; } } /* If the string is small and is still RAW encoded, * try the EMBSTR encoding which is more efficient. * In this representation the object and the SDS string are allocated * in the same chunk of memory to save space and cache misses. */ //如果len小于44,44是最大的编码为EMBSTR类型的字符串对象长度 if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) { robj *emb; if (o->encoding == OBJ_ENCODING_EMBSTR) return o; //将RAW对象转换为OBJ_ENCODING_EMBSTR编码类型 emb = createEmbeddedStringObject(s,sdslen(s)); //创建一个编码类型为OBJ_ENCODING_EMBSTR的字符串对象 decrRefCount(o); //释放之前的对象 return emb; } /* We can't encode the object... * * Do the last try, and at least optimize the SDS string inside * the string object to require little space, in case there * is more than 10% of free space at the end of the SDS string. * * We do that only for relatively large strings as this branch * is only entered if the length of the string is greater than * OBJ_ENCODING_EMBSTR_SIZE_LIMIT. */ //无法进行编码,但是如果s的未使用的空间大于使用空间的10分之1 if (o->encoding == OBJ_ENCODING_RAW && sdsavail(s) > len/10) { o->ptr = sdsRemoveFreeSpace(o->ptr); //释放所有的未使用空间 } /* Return the original object. */ return o; }
/* Try to encode a string object in order to save space */ robj *tryObjectEncoding(robj *o) { long value; sds s = o->ptr; size_t len; /* Make sure this is a string object, the only type we encode * in this function. Other types use encoded memory efficient * representations but are handled by the commands implementing * the type. */ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING); /* We try some specialized encoding only for objects that are * RAW or EMBSTR encoded, in other words objects that are still * in represented by an actually array of chars. */ if (!sdsEncodedObject(o)) return o; /* It's not safe to encode shared objects: shared objects can be shared * everywhere in the "object space" of Redis and may end in places where * they are not handled. We handle them only as values in the keyspace. */ if (o->refcount > 1) return o; /* Check if we can represent this string as a long integer. * Note that we are sure that a string larger than 20 chars is not * representable as a 32 nor 64 bit integer. */ len = sdslen(s); if (len <= 20 && string2l(s,len,&value)) { /* This object is encodable as a long. Try to use a shared object. * Note that we avoid using shared integers when maxmemory is used * because every object needs to have a private LRU field for the LRU * algorithm to work well. */ if ((server.maxmemory == 0 || (server.maxmemory_policy != MAXMEMORY_VOLATILE_LRU && server.maxmemory_policy != MAXMEMORY_ALLKEYS_LRU)) && value >= 0 && value < OBJ_SHARED_INTEGERS) { decrRefCount(o); incrRefCount(shared.integers[value]); return shared.integers[value]; } else { if (o->encoding == OBJ_ENCODING_RAW) sdsfree(o->ptr); o->encoding = OBJ_ENCODING_INT; o->ptr = (void*) value; return o; } } /* If the string is small and is still RAW encoded, * try the EMBSTR encoding which is more efficient. * In this representation the object and the SDS string are allocated * in the same chunk of memory to save space and cache misses. */ if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) { robj *emb; if (o->encoding == OBJ_ENCODING_EMBSTR) return o; emb = createEmbeddedStringObject(s,sdslen(s)); decrRefCount(o); return emb; } /* We can't encode the object... * * Do the last try, and at least optimize the SDS string inside * the string object to require little space, in case there * is more than 10% of free space at the end of the SDS string. * * We do that only for relatively large strings as this branch * is only entered if the length of the string is greater than * OBJ_ENCODING_EMBSTR_SIZE_LIMIT. */ if (o->encoding == OBJ_ENCODING_RAW && sdsavail(s) > len/10) { o->ptr = sdsRemoveFreeSpace(o->ptr); } /* Return the original object. */ return o; }
/* Try to encode a string object in order to save space * 尝试编译字符串对象 目的未来节省空间*/ robj *tryObjectEncoding(robj *o) { long value; sds s = o->ptr; size_t len; if (o->encoding == REDIS_ENCODING_INT) return o; /* Already encoded */ /* It's not safe to encode shared objects: shared objects can be shared * everywhere in the "object space" of Redis. Encoded objects can only * appear as "values" (and not, for instance, as keys) */ if (o->refcount > 1) return o; /* Currently we try to encode only strings */ redisAssertWithInfo(NULL,o,o->type == REDIS_STRING); /* Check if we can represent this string as a long integer. * Note that we are sure that a string larger than 21 chars is not * representable as a 64 bit integer. */ len = sdslen(s); if (len > 21 || !string2l(s,len,&value)) { /* Integer encoding not possible. Check if we can use EMBSTR. */ if (sdslen(s) <= REDIS_ENCODING_EMBSTR_SIZE_LIMIT) { robj *emb = createEmbeddedStringObject(s,sdslen(s)); decrRefCount(o); return emb; } else { /* We can't encode the object... * * Do the last try, and at least optimize the SDS string inside * the string object to require little space, in case there * is more than 10% of free space at the end of the SDS string. * * We do that only for relatively large strings as this branch * is only entered if the length of the string is greater than * REDIS_ENCODING_EMBSTR_SIZE_LIMIT. */ if (o->encoding == REDIS_ENCODING_RAW && sdsavail(s) > len/10) { o->ptr = sdsRemoveFreeSpace(o->ptr); } /* Return the original object. */ return o; } } /* Ok, this object can be encoded... * * Can I use a shared object? Only if the object is inside a given range * * Note that we also avoid using shared integers when maxmemory is used * because every object needs to have a private LRU field for the LRU * algorithm to work well. */ if (server.maxmemory == 0 && value >= 0 && value < REDIS_SHARED_INTEGERS) { decrRefCount(o); incrRefCount(shared.integers[value]); return shared.integers[value]; } else { if (o->encoding == REDIS_ENCODING_RAW) sdsfree(o->ptr); o->encoding = REDIS_ENCODING_INT; o->ptr = (void*) value; return o; } }
/* Write the append only file buffer on disk. * * Since we are required to write the AOF before replying to the client, * and the only way the client socket can get a write is entering when the * the event loop, we accumulate all the AOF writes in a memory * buffer and write it on disk using this function just before entering * the event loop again. * * About the 'force' argument: * * When the fsync policy is set to 'everysec' we may delay the flush if there * is still an fsync() going on in the background thread, since for instance * on Linux write(2) will be blocked by the background fsync anyway. * When this happens we remember that there is some aof buffer to be * flushed ASAP, and will try to do that in the serverCron() function. * * However if force is set to 1 we'll write regardless of the background * fsync. */ void flushAppendOnlyFile(int force) { ssize_t nwritten; int sync_in_progress = 0; if (sdslen(server.aof_buf) == 0) return; if (server.aof_fsync == AOF_FSYNC_EVERYSEC) sync_in_progress = bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC) != 0; if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) { /* With this append fsync policy we do background fsyncing. * If the fsync is still in progress we can try to delay * the write for a couple of seconds. */ if (sync_in_progress) { if (server.aof_flush_postponed_start == 0) { /* No previous write postponinig, remember that we are * postponing the flush and return. */ server.aof_flush_postponed_start = server.unixtime; return; } else if (server.unixtime - server.aof_flush_postponed_start < 2) { /* We were already waiting for fsync to finish, but for less * than two seconds this is still ok. Postpone again. */ return; } /* Otherwise fall trough, and go write since we can't wait * over two seconds. */ server.aof_delayed_fsync++; redisLog(REDIS_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis."); } } /* If you are following this code path, then we are going to write so * set reset the postponed flush sentinel to zero. */ server.aof_flush_postponed_start = 0; /* We want to perform a single write. This should be guaranteed atomic * at least if the filesystem we are writing is a real physical one. * While this will save us against the server being killed I don't think * there is much to do about the whole server stopping for power problems * or alike */ nwritten = write(server.aof_fd,server.aof_buf,sdslen(server.aof_buf)); if (nwritten != (signed)sdslen(server.aof_buf)) { /* Ooops, we are in troubles. The best thing to do for now is * aborting instead of giving the illusion that everything is * working as expected. */ if (nwritten == -1) { redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno)); } else { redisLog(REDIS_WARNING,"Exiting on short write while writing to " "the append-only file: %s (nwritten=%ld, " "expected=%ld)", strerror(errno), (long)nwritten, (long)sdslen(server.aof_buf)); if (ftruncate(server.aof_fd, server.aof_current_size) == -1) { redisLog(REDIS_WARNING, "Could not remove short write " "from the append-only file. Redis may refuse " "to load the AOF the next time it starts. " "ftruncate: %s", strerror(errno)); } } exit(1); } server.aof_current_size += nwritten; /* Re-use AOF buffer when it is small enough. The maximum comes from the * arena size of 4k minus some overhead (but is otherwise arbitrary). */ if ((sdslen(server.aof_buf)+sdsavail(server.aof_buf)) < 4000) { sdsclear(server.aof_buf); } else { sdsfree(server.aof_buf); server.aof_buf = sdsempty(); } /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are * children doing I/O in the background. */ if (server.aof_no_fsync_on_rewrite && (server.aof_child_pid != -1 || server.rdb_child_pid != -1)) return; /* Perform the fsync if needed. */ if (server.aof_fsync == AOF_FSYNC_ALWAYS) { /* aof_fsync is defined as fdatasync() for Linux in order to avoid * flushing metadata. */ aof_fsync(server.aof_fd); /* Let's try to get this data on the disk */ server.aof_last_fsync = server.unixtime; } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC && server.unixtime > server.aof_last_fsync)) { if (!sync_in_progress) aof_background_fsync(server.aof_fd); server.aof_last_fsync = server.unixtime; } }