int main(void) { unsigned char *zm; zm = zipmapNew(); zm = zipmapSet(zm,(unsigned char*) "name",4, (unsigned char*) "foo",3,NULL); zm = zipmapSet(zm,(unsigned char*) "surname",7, (unsigned char*) "foo",3,NULL); zm = zipmapSet(zm,(unsigned char*) "age",3, (unsigned char*) "foo",3,NULL); zipmapRepr(zm); zm = zipmapSet(zm,(unsigned char*) "hello",5, (unsigned char*) "world!",6,NULL); zm = zipmapSet(zm,(unsigned char*) "foo",3, (unsigned char*) "bar",3,NULL); zm = zipmapSet(zm,(unsigned char*) "foo",3, (unsigned char*) "!",1,NULL); zipmapRepr(zm); zm = zipmapSet(zm,(unsigned char*) "foo",3, (unsigned char*) "12345",5,NULL); zipmapRepr(zm); zm = zipmapSet(zm,(unsigned char*) "new",3, (unsigned char*) "xx",2,NULL); zm = zipmapSet(zm,(unsigned char*) "noval",5, (unsigned char*) "",0,NULL); zipmapRepr(zm); zm = zipmapDel(zm,(unsigned char*) "new",3,NULL); zipmapRepr(zm); printf("\nLook up large key:\n"); { unsigned char buf[512]; unsigned char *value; unsigned int vlen, i; for (i = 0; i < 512; i++) buf[i] = 'a'; zm = zipmapSet(zm,buf,512,(unsigned char*) "long",4,NULL); if (zipmapGet(zm,buf,512,&value,&vlen)) { printf(" <long key> is associated to the %d bytes value: %.*s\n", vlen, vlen, value); } } printf("\nPerform a direct lookup:\n"); { unsigned char *value; unsigned int vlen; if (zipmapGet(zm,(unsigned char*) "foo",3,&value,&vlen)) { printf(" foo is associated to the %d bytes value: %.*s\n", vlen, vlen, value); } } printf("\nIterate through elements:\n"); { unsigned char *i = zipmapRewind(zm); unsigned char *key, *value; unsigned int klen, vlen; while((i = zipmapNext(i,&key,&klen,&value,&vlen)) != NULL) { printf(" %d:%.*s => %d:%.*s\n", klen, klen, key, vlen, vlen, value); } } return 0; }
/* Return the number of entries inside a zipmap */ unsigned int zipmapLen(unsigned char *zm) { unsigned int len = 0; if (zm[0] < ZIPMAP_BIGLEN) { len = zm[0]; } else { unsigned char *p = zipmapRewind(zm); while((p = zipmapNext(p,NULL,NULL,NULL,NULL)) != NULL) len++; /* Re-store length if small enough */ if (len < ZIPMAP_BIGLEN) zm[0] = len; } return len; }
/* load value which hash encoding with zipmap. */ static void *loadHashZipMapObject(unsigned char* zm, unsigned int *rlen) { unsigned char *key, *val, *p; unsigned int klen, vlen, len, i = 0; len = zipmapLen(zm); *rlen = len * 2; sds *results = (sds *) zmalloc(*rlen * sizeof(sds)); p = zipmapRewind(zm); while((p = zipmapNext(p,&key,&klen,&val,&vlen)) != NULL) { results[i] = sdsnewlen(key, klen); results[i+1] = sdsnewlen(val, vlen); i += 2; } return results; }
/* Return the number of entries inside a zipmap * * 返回 zipmap 中包含的节点数 * * T = O(N) */ unsigned int zipmapLen(unsigned char *zm) { unsigned int len = 0; if (zm[0] < ZIPMAP_BIGLEN) { // 长度可以用 1 字节保存 // T = O(1) len = zm[0]; } else { // 长度不能用 1 字节保存,需要遍历整个 zipmap // T = O(N) unsigned char *p = zipmapRewind(zm); while((p = zipmapNext(p,NULL,NULL,NULL,NULL)) != NULL) len++; /* Re-store length if small enough */ // 如果字节数量已经少于 ZIPMAP_BIGLEN ,那么重新将值保存到 len 中 // 这种情况在节点数超过 ZIPMAP_BIGLEN 之后,有节点被删除时会出现 if (len < ZIPMAP_BIGLEN) zm[0] = len; } return len; }
/* Write a sequence of commands able to fully rebuild the dataset into * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ int rewriteAppendOnlyFile(char *filename) { dictIterator *di = NULL; dictEntry *de; FILE *fp; char tmpfile[256]; int j; time_t now = time(NULL); /* Note that we have to use a different temp name here compared to the * one used by rewriteAppendOnlyFileBackground() function. */ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid()); fp = fopen(tmpfile,"w"); if (!fp) { redisLog(REDIS_WARNING, "Failed rewriting the append only file: %s", strerror(errno)); return REDIS_ERR; } for (j = 0; j < server.dbnum; j++) { char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; redisDb *db = server.db+j; dict *d = db->dict; if (dictSize(d) == 0) continue; di = dictGetSafeIterator(d); if (!di) { fclose(fp); return REDIS_ERR; } /* SELECT the new DB */ if (fwrite(selectcmd,sizeof(selectcmd)-1,1,fp) == 0) goto werr; if (fwriteBulkLongLong(fp,j) == 0) goto werr; /* Iterate this DB writing every entry */ while((de = dictNext(di)) != NULL) { sds keystr = dictGetEntryKey(de); robj key, *o; time_t expiretime; int swapped; keystr = dictGetEntryKey(de); o = dictGetEntryVal(de); initStaticStringObject(key,keystr); /* If the value for this key is swapped, load a preview in memory. * We use a "swapped" flag to remember if we need to free the * value object instead to just increment the ref count anyway * in order to avoid copy-on-write of pages if we are forked() */ if (!server.vm_enabled || o->storage == REDIS_VM_MEMORY || o->storage == REDIS_VM_SWAPPING) { swapped = 0; } else { o = vmPreviewObject(o); swapped = 1; } expiretime = getExpire(db,&key); /* Save the key and associated value */ if (o->type == REDIS_STRING) { /* Emit a SET command */ char cmd[]="*3\r\n$3\r\nSET\r\n"; if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; /* Key and value */ if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,o) == 0) goto werr; } else if (o->type == REDIS_LIST) { /* Emit the RPUSHes needed to rebuild the list */ char cmd[]="*3\r\n$5\r\nRPUSH\r\n"; if (o->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = o->ptr; unsigned char *p = ziplistIndex(zl,0); unsigned char *vstr; unsigned int vlen; long long vlong; while(ziplistGet(p,&vstr,&vlen,&vlong)) { if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (vstr) { if (fwriteBulkString(fp,(char*)vstr,vlen) == 0) goto werr; } else { if (fwriteBulkLongLong(fp,vlong) == 0) goto werr; } p = ziplistNext(zl,p); } } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) { list *list = o->ptr; listNode *ln; listIter li; listRewind(list,&li); while((ln = listNext(&li))) { robj *eleobj = listNodeValue(ln); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,eleobj) == 0) goto werr; } } else { redisPanic("Unknown list encoding"); } } else if (o->type == REDIS_SET) { char cmd[]="*3\r\n$4\r\nSADD\r\n"; /* Emit the SADDs needed to rebuild the set */ if (o->encoding == REDIS_ENCODING_INTSET) { int ii = 0; int64_t llval; while(intsetGet(o->ptr,ii++,&llval)) { if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkLongLong(fp,llval) == 0) goto werr; } } else if (o->encoding == REDIS_ENCODING_HT) { dictIterator *di = dictGetIterator(o->ptr); dictEntry *de; while((de = dictNext(di)) != NULL) { robj *eleobj = dictGetEntryKey(de); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,eleobj) == 0) goto werr; } dictReleaseIterator(di); } else { redisPanic("Unknown set encoding"); } } else if (o->type == REDIS_ZSET) { /* Emit the ZADDs needed to rebuild the sorted set */ char cmd[]="*4\r\n$4\r\nZADD\r\n"; if (o->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = o->ptr; unsigned char *eptr, *sptr; unsigned char *vstr; unsigned int vlen; long long vll; double score; eptr = ziplistIndex(zl,0); redisAssert(eptr != NULL); sptr = ziplistNext(zl,eptr); redisAssert(sptr != NULL); while (eptr != NULL) { redisAssert(ziplistGet(eptr,&vstr,&vlen,&vll)); score = zzlGetScore(sptr); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkDouble(fp,score) == 0) goto werr; if (vstr != NULL) { if (fwriteBulkString(fp,(char*)vstr,vlen) == 0) goto werr; } else { if (fwriteBulkLongLong(fp,vll) == 0) goto werr; } zzlNext(zl,&eptr,&sptr); } } else if (o->encoding == REDIS_ENCODING_SKIPLIST) { zset *zs = o->ptr; dictIterator *di = dictGetIterator(zs->dict); dictEntry *de; while((de = dictNext(di)) != NULL) { robj *eleobj = dictGetEntryKey(de); double *score = dictGetEntryVal(de); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkDouble(fp,*score) == 0) goto werr; if (fwriteBulkObject(fp,eleobj) == 0) goto werr; } dictReleaseIterator(di); } else { redisPanic("Unknown sorted set encoding"); } } else if (o->type == REDIS_HASH) { char cmd[]="*4\r\n$4\r\nHSET\r\n"; /* Emit the HSETs needed to rebuild the hash */ if (o->encoding == REDIS_ENCODING_ZIPMAP) { unsigned char *p = zipmapRewind(o->ptr); unsigned char *field, *val; unsigned int flen, vlen; while((p = zipmapNext(p,&field,&flen,&val,&vlen)) != NULL) { if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkString(fp,(char*)field,flen) == 0) goto werr; if (fwriteBulkString(fp,(char*)val,vlen) == 0) goto werr; } } else { dictIterator *di = dictGetIterator(o->ptr); dictEntry *de; while((de = dictNext(di)) != NULL) { robj *field = dictGetEntryKey(de); robj *val = dictGetEntryVal(de); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,field) == 0) goto werr; if (fwriteBulkObject(fp,val) == 0) goto werr; } dictReleaseIterator(di); } } else { redisPanic("Unknown object type"); } /* Save the expire time */ if (expiretime != -1) { char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n"; /* If this key is already expired skip it */ if (expiretime < now) continue; if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkLongLong(fp,expiretime) == 0) goto werr; } if (swapped) decrRefCount(o); } dictReleaseIterator(di); } /* Make sure data will not remain on the OS's output buffers */ fflush(fp); aof_fsync(fileno(fp)); fclose(fp); /* Use RENAME to make sure the DB file is changed atomically only * if the generate DB file is ok. */ if (rename(tmpfile,filename) == -1) { redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno)); unlink(tmpfile); return REDIS_ERR; } redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed"); return REDIS_OK; werr: fclose(fp); unlink(tmpfile); redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno)); if (di) dictReleaseIterator(di); return REDIS_ERR; }
/* How a good candidate is this object for swapping? * The better candidate it is, the greater the returned value. * * Currently we try to perform a fast estimation of the object size in * memory, and combine it with aging informations. * * Basically swappability = idle-time * log(estimated size) * * Bigger objects are preferred over smaller objects, but not * proportionally, this is why we use the logarithm. This algorithm is * just a first try and will probably be tuned later. */ double computeObjectSwappability(robj *o) { /* actual age can be >= minage, but not < minage. As we use wrapping * 21 bit clocks with minutes resolution for the LRU. */ time_t minage = estimateObjectIdleTime(o); #ifdef _WIN32 ssize_t asize = 0, elesize; #else long asize = 0, elesize; #endif robj *ele; list *l; listNode *ln; dict *d; struct dictEntry *de; if (minage <= 0) return 0; switch(o->type) { case REDIS_STRING: if (o->encoding != REDIS_ENCODING_RAW) { asize = sizeof(*o); } else { #ifdef _WIN32 asize = sdslen(o->ptr)+sizeof(*o)+sizeof(size_t)*2; #else asize = sdslen(o->ptr)+sizeof(*o)+sizeof(long)*2; #endif } break; case REDIS_LIST: if (o->encoding == REDIS_ENCODING_ZIPLIST) { asize = sizeof(*o)+ziplistBlobLen(o->ptr); } else { l = o->ptr; ln = listFirst(l); asize = sizeof(list); if (ln) { ele = ln->value; elesize = (ele->encoding == REDIS_ENCODING_RAW) ? (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o); asize += (sizeof(listNode)+elesize)*listLength(l); } } break; case REDIS_SET: if (o->encoding == REDIS_ENCODING_INTSET) { intset *is = o->ptr; asize = sizeof(*is)+is->encoding*is->length; } else { d = o->ptr; asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d)); if (dictSize(d)) { de = dictGetRandomKey(d); ele = dictGetEntryKey(de); elesize = (ele->encoding == REDIS_ENCODING_RAW) ? (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o); asize += (sizeof(struct dictEntry)+elesize)*dictSize(d); } } break; case REDIS_ZSET: if (o->encoding == REDIS_ENCODING_ZIPLIST) { asize = sizeof(*o)+(ziplistBlobLen(o->ptr) / 2); } else { d = ((zset*)o->ptr)->dict; asize = sizeof(zset)+(sizeof(struct dictEntry*)*dictSlots(d)); if (dictSize(d)) { de = dictGetRandomKey(d); ele = dictGetEntryKey(de); elesize = (ele->encoding == REDIS_ENCODING_RAW) ? (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o); asize += (sizeof(struct dictEntry)+elesize)*dictSize(d); asize += sizeof(zskiplistNode)*dictSize(d); } } break; case REDIS_HASH: if (o->encoding == REDIS_ENCODING_ZIPMAP) { unsigned char *p = zipmapRewind((unsigned char*)o->ptr); unsigned int len = zipmapLen((unsigned char*)o->ptr); unsigned int klen, vlen; unsigned char *key, *val; if ((p = zipmapNext(p,&key,&klen,&val,&vlen)) == NULL) { klen = 0; vlen = 0; } asize = len*(klen+vlen+3); } else if (o->encoding == REDIS_ENCODING_HT) { d = o->ptr; asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d)); if (dictSize(d)) { de = dictGetRandomKey(d); ele = dictGetEntryKey(de); elesize = (ele->encoding == REDIS_ENCODING_RAW) ? (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o); ele = dictGetEntryVal(de); elesize = (ele->encoding == REDIS_ENCODING_RAW) ? (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o); asize += (sizeof(struct dictEntry)+elesize)*dictSize(d); } } break; } return (double)minage*log(1+(double)asize); }
/* Write a sequence of commands able to fully rebuild the dataset into * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ static int rewriteAppendOnlyFile(char *filename) { dictIterator *di = NULL; dictEntry *de; FILE *fp; char tmpfile[256]; int j; time_t now = time(NULL); /* Note that we have to use a different temp name here compared to the * one used by rewriteAppendOnlyFileBackground() function. */ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid()); fp = fopen(tmpfile,"w"); for (j = 0; j < server.dbnum; j++) { char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; redisDb *db = server.db+j; dict *d = db->dict; if (dictSize(d) == 0) continue; di = dictGetIterator(d); /* SELECT the new DB */ fwrite(selectcmd,sizeof(selectcmd)-1,1,fp); fwriteBulkLong(fp,j); /* Iterate this DB writing every entry */ while((de = dictNext(di)) != NULL) { robj *key, *o; time_t expiretime; int swapped; key = dictGetEntryKey(de); /* If the value for this key is swapped, load a preview in memory. * We use a "swapped" flag to remember if we need to free the * value object instead to just increment the ref count anyway * in order to avoid copy-on-write of pages if we are forked() */ if (!server.vm_enabled || key->storage == REDIS_VM_MEMORY || key->storage == REDIS_VM_SWAPPING) { o = dictGetEntryVal(de); swapped = 0; } else { o = vmPreviewObject(key); swapped = 1; } expiretime = getExpire(db,key); /* Save the key and associated value */ if (o->type == REDIS_STRING) { /* Emit a SET command */ char cmd[]="*3\r\n$3\r\nSET\r\n"; fwrite(cmd, sizeof(cmd)-1,1,fp); /* Key and value */ fwriteBulkObject(fp,key); fwriteBulkObject(fp,o); } else if (o->type == REDIS_LIST) { /* Emit the RPUSHes needed to rebuild the list */ list *list = o->ptr; listNode *ln; listIter li; listRewind(list,&li); while((ln = listNext(&li))) { char cmd[]="*3\r\n$5\r\nRPUSH\r\n"; robj *eleobj = listNodeValue(ln); fwrite(cmd, sizeof(cmd)-1, 1, fp); fwriteBulkObject(fp,key); fwriteBulkObject(fp,eleobj); } } else if (o->type == REDIS_SET) { /* Emit the SADDs needed to rebuild the set */ dict *set = o->ptr; dictIterator *di = dictGetIterator(set); dictEntry *de; while((de = dictNext(di)) != NULL) { char cmd[]="*3\r\n$4\r\nSADD\r\n"; robj *eleobj = dictGetEntryKey(de); fwrite(cmd,sizeof(cmd)-1,1,fp) ; fwriteBulkObject(fp,key) ; fwriteBulkObject(fp,eleobj); } dictReleaseIterator(di); } else if (o->type == REDIS_ZSET) { /* Emit the ZADDs needed to rebuild the sorted set */ zset *zs = o->ptr; dictIterator *di = dictGetIterator(zs->dict); dictEntry *de; while((de = dictNext(di)) != NULL) { char cmd[]="*4\r\n$4\r\nZADD\r\n"; robj *eleobj = dictGetEntryKey(de); double *score = dictGetEntryVal(de); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,key) == 0) goto werr; if (fwriteBulkDouble(fp,*score) == 0) goto werr; if (fwriteBulkObject(fp,eleobj) == 0) goto werr; } dictReleaseIterator(di); } else if (o->type == REDIS_HASH) { char cmd[]="*4\r\n$4\r\nHSET\r\n"; /* Emit the HSETs needed to rebuild the hash */ if (o->encoding == REDIS_ENCODING_ZIPMAP) { unsigned char *p = zipmapRewind(o->ptr); unsigned char *field, *val; unsigned int flen, vlen; while((p = zipmapNext(p,&field,&flen,&val,&vlen)) != NULL) { if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,key) == 0) goto werr; if (fwriteBulkString(fp,(char*)field,flen) == -1) return -1; if (fwriteBulkString(fp,(char*)val,vlen) == -1) return -1; } } else { dictIterator *di = dictGetIterator(o->ptr); dictEntry *de; while((de = dictNext(di)) != NULL) { robj *field = dictGetEntryKey(de); robj *val = dictGetEntryVal(de); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,key) == 0) goto werr; if (fwriteBulkObject(fp,field) == -1) return -1; if (fwriteBulkObject(fp,val) == -1) return -1; } dictReleaseIterator(di); } } else { redisAssert(0); } /* Save the expire time */ if (expiretime != -1) { char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n"; /* If this key is already expired skip it */ if (expiretime < now) continue; fwrite(cmd,sizeof(cmd)-1,1,fp) ; fwriteBulkObject(fp,key) ; fwriteBulkLong(fp,expiretime) ; } if (swapped) decrRefCount(o); } dictReleaseIterator(di); } /* Make sure data will not remain on the OS's output buffers */ fflush(fp); fsync(fileno(fp)); fclose(fp); /* Use RENAME to make sure the DB file is changed atomically only * if the generate DB file is ok. */ rename(tmpfile,filename) ; redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed"); return REDIS_OK; }