/* Move to the next entry in the set. Returns the object at the current * position. * * Since set elements can be internally be stored as redis objects or * simple arrays of integers, setTypeNext returns the encoding of the * set object you are iterating, and will populate the appropriate pointer * (eobj) or (llobj) accordingly. * * When there are no longer elements -1 is returned. * Returned objects ref count is not incremented, so this function is * copy on write friendly. */ int setTypeNext(setTypeIterator *si, robj **objele, int64_t *llele) { if (si->encoding == REDIS_ENCODING_HT) { dictEntry *de = dictNext(si->di); if (de == NULL) return -1; *objele = dictGetEntryKey(de); } else if (si->encoding == REDIS_ENCODING_INTSET) { if (!intsetGet(si->subject->ptr,si->ii++,llele)) return -1; } return si->encoding; }
/* Move to the next entry in the set. Returns the object at the current * position. * * Since set elements can be internally be stored as redis objects or * simple arrays of integers, setTypeNext returns the encoding of the * set object you are iterating, and will populate the appropriate pointer * (objele) or (llele) accordingly. * * Note that both the objele and llele pointers should be passed and cannot * be NULL since the function will try to defensively populate the non * used field with values which are easy to trap if misused. * * When there are no longer elements -1 is returned. * Returned objects ref count is not incremented, so this function is * copy on write friendly. */ int setTypeNext(setTypeIterator *si, robj **objele, int64_t *llele) { if (si->encoding == REDIS_ENCODING_HT) { dictEntry *de = dictNext(si->di); if (de == NULL) return -1; *objele = dictGetKey(de); *llele = -123456789; /* Not needed. Defensive. */ } else if (si->encoding == REDIS_ENCODING_INTSET) { if (!intsetGet(si->subject->ptr,si->ii++,llele)) return -1; *objele = NULL; /* Not needed. Defensive. */ } else { redisPanic("Wrong set encoding in setTypeNext"); } return si->encoding; }
/* Emit the commands needed to rebuild a set object. * The function returns 0 on error, 1 on success. */ int rewriteSetObject(rio *r, robj *key, robj *o) { long long count = 0, items = setTypeSize(o); if (o->encoding == REDIS_ENCODING_INTSET) { int ii = 0; int64_t llval; while(intsetGet(o->ptr,ii++,&llval)) { if (count == 0) { int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ? REDIS_AOF_REWRITE_ITEMS_PER_CMD : items; if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0; if (rioWriteBulkString(r,"SADD",4) == 0) return 0; if (rioWriteBulkObject(r,key) == 0) return 0; } if (rioWriteBulkLongLong(r,llval) == 0) return 0; if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0; items--; } } else if (o->encoding == REDIS_ENCODING_HT) { dictIterator *di = dictGetIterator(o->ptr); dictEntry *de; while((de = dictNext(di)) != NULL) { robj *eleobj = dictGetKey(de); if (count == 0) { int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ? REDIS_AOF_REWRITE_ITEMS_PER_CMD : items; if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0; if (rioWriteBulkString(r,"SADD",4) == 0) return 0; if (rioWriteBulkObject(r,key) == 0) return 0; } if (rioWriteBulkObject(r,eleobj) == 0) return 0; if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0; items--; } dictReleaseIterator(di); } else { redisPanic("Unknown set encoding"); } return 1; }
/* This command implements SCAN, HSCAN and SSCAN commands. * If object 'o' is passed, then it must be a Hash or Set object, otherwise * if 'o' is NULL the command will operate on the dictionary associated with * the current database. * * When 'o' is not NULL the function assumes that the first argument in * the client arguments vector is a key so it skips it before iterating * in order to parse options. * * In the case of a Hash object the function returns both the field and value * of every element on the Hash. */ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { int i, j; list *keys = listCreate(); listNode *node, *nextnode; long count = 10; sds pat = NULL; int patlen = 0, use_pattern = 0; dict *ht; /* Object must be NULL (to iterate keys names), or the type of the object * must be Set, Sorted Set, or Hash. */ serverAssert(o == NULL || o->type == OBJ_SET || o->type == OBJ_HASH || o->type == OBJ_ZSET); /* Set i to the first option argument. The previous one is the cursor. */ i = (o == NULL) ? 2 : 3; /* Skip the key argument if needed. */ /* Step 1: Parse options. */ while (i < c->argc) { j = c->argc - i; if (!strcasecmp(c->argv[i]->ptr, "count") && j >= 2) { if (getLongFromObjectOrReply(c, c->argv[i+1], &count, NULL) != C_OK) { goto cleanup; } if (count < 1) { addReply(c,shared.syntaxerr); goto cleanup; } i += 2; } else if (!strcasecmp(c->argv[i]->ptr, "match") && j >= 2) { pat = c->argv[i+1]->ptr; patlen = sdslen(pat); /* The pattern always matches if it is exactly "*", so it is * equivalent to disabling it. */ use_pattern = !(pat[0] == '*' && patlen == 1); i += 2; } else { addReply(c,shared.syntaxerr); goto cleanup; } } /* Step 2: Iterate the collection. * * Note that if the object is encoded with a ziplist, intset, or any other * representation that is not a hash table, we are sure that it is also * composed of a small number of elements. So to avoid taking state we * just return everything inside the object in a single call, setting the * cursor to zero to signal the end of the iteration. */ /* Handle the case of a hash table. */ ht = NULL; if (o == NULL) { ht = c->db->dict; } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HT) { ht = o->ptr; } else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT) { ht = o->ptr; count *= 2; /* We return key / value for this type. */ } else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = o->ptr; ht = zs->dict; count *= 2; /* We return key / value for this type. */ } if (ht) { void *privdata[2]; /* We set the max number of iterations to ten times the specified * COUNT, so if the hash table is in a pathological state (very * sparsely populated) we avoid to block too much time at the cost * of returning no or very few elements. */ long maxiterations = count*10; /* We pass two pointers to the callback: the list to which it will * add new elements, and the object containing the dictionary so that * it is possible to fetch more data in a type-dependent way. */ privdata[0] = keys; privdata[1] = o; do { cursor = dictScan(ht, cursor, scanCallback, privdata); } while (cursor && maxiterations-- && listLength(keys) < (unsigned long)count); } else if (o->type == OBJ_SET) { int pos = 0; int64_t ll; while(intsetGet(o->ptr,pos++,&ll)) listAddNodeTail(keys,createStringObjectFromLongLong(ll)); cursor = 0; } else if (o->type == OBJ_HASH || o->type == OBJ_ZSET) { unsigned char *p = ziplistIndex(o->ptr,0); unsigned char *vstr; unsigned int vlen; long long vll; while(p) { ziplistGet(p,&vstr,&vlen,&vll); listAddNodeTail(keys, (vstr != NULL) ? createStringObject((char*)vstr,vlen) : createStringObjectFromLongLong(vll)); p = ziplistNext(o->ptr,p); } cursor = 0; } else { serverPanic("Not handled encoding in SCAN."); } /* Step 3: Filter elements. */ node = listFirst(keys); while (node) { robj *kobj = listNodeValue(node); nextnode = listNextNode(node); int filter = 0; /* Filter element if it does not match the pattern. */ if (!filter && use_pattern) { if (sdsEncodedObject(kobj)) { if (!stringmatchlen(pat, patlen, kobj->ptr, sdslen(kobj->ptr), 0)) filter = 1; } else { char buf[LONG_STR_SIZE]; int len; serverAssert(kobj->encoding == OBJ_ENCODING_INT); len = ll2string(buf,sizeof(buf),(long)kobj->ptr); if (!stringmatchlen(pat, patlen, buf, len, 0)) filter = 1; } } /* Filter element if it is an expired key. */ if (!filter && o == NULL && expireIfNeeded(c->db, kobj)) filter = 1; /* Remove the element and its associted value if needed. */ if (filter) { decrRefCount(kobj); listDelNode(keys, node); } /* If this is a hash or a sorted set, we have a flat list of * key-value elements, so if this element was filtered, remove the * value, or skip it if it was not filtered: we only match keys. */ if (o && (o->type == OBJ_ZSET || o->type == OBJ_HASH)) { node = nextnode; nextnode = listNextNode(node); if (filter) { kobj = listNodeValue(node); decrRefCount(kobj); listDelNode(keys, node); } } node = nextnode; } /* Step 4: Reply to the client. */ addReplyMultiBulkLen(c, 2); addReplyBulkLongLong(c,cursor); addReplyMultiBulkLen(c, listLength(keys)); while ((node = listFirst(keys)) != NULL) { robj *kobj = listNodeValue(node); addReplyBulk(c, kobj); decrRefCount(kobj); listDelNode(keys, node); } cleanup: listSetFreeMethod(keys,decrRefCountVoid); listRelease(keys); }
/* Write a sequence of commands able to fully rebuild the dataset into * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ int rewriteAppendOnlyFile(char *filename) { dictIterator *di = NULL; dictEntry *de; FILE *fp; char tmpfile[256]; int j; time_t now = time(NULL); /* Note that we have to use a different temp name here compared to the * one used by rewriteAppendOnlyFileBackground() function. */ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid()); fp = fopen(tmpfile,"w"); if (!fp) { redisLog(REDIS_WARNING, "Failed rewriting the append only file: %s", strerror(errno)); return REDIS_ERR; } for (j = 0; j < server.dbnum; j++) { char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; redisDb *db = server.db+j; dict *d = db->dict; if (dictSize(d) == 0) continue; di = dictGetSafeIterator(d); if (!di) { fclose(fp); return REDIS_ERR; } /* SELECT the new DB */ if (fwrite(selectcmd,sizeof(selectcmd)-1,1,fp) == 0) goto werr; if (fwriteBulkLongLong(fp,j) == 0) goto werr; /* Iterate this DB writing every entry */ while((de = dictNext(di)) != NULL) { sds keystr = dictGetEntryKey(de); robj key, *o; time_t expiretime; int swapped; keystr = dictGetEntryKey(de); o = dictGetEntryVal(de); initStaticStringObject(key,keystr); /* If the value for this key is swapped, load a preview in memory. * We use a "swapped" flag to remember if we need to free the * value object instead to just increment the ref count anyway * in order to avoid copy-on-write of pages if we are forked() */ if (!server.vm_enabled || o->storage == REDIS_VM_MEMORY || o->storage == REDIS_VM_SWAPPING) { swapped = 0; } else { o = vmPreviewObject(o); swapped = 1; } expiretime = getExpire(db,&key); /* Save the key and associated value */ if (o->type == REDIS_STRING) { /* Emit a SET command */ char cmd[]="*3\r\n$3\r\nSET\r\n"; if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; /* Key and value */ if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,o) == 0) goto werr; } else if (o->type == REDIS_LIST) { /* Emit the RPUSHes needed to rebuild the list */ char cmd[]="*3\r\n$5\r\nRPUSH\r\n"; if (o->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = o->ptr; unsigned char *p = ziplistIndex(zl,0); unsigned char *vstr; unsigned int vlen; long long vlong; while(ziplistGet(p,&vstr,&vlen,&vlong)) { if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (vstr) { if (fwriteBulkString(fp,(char*)vstr,vlen) == 0) goto werr; } else { if (fwriteBulkLongLong(fp,vlong) == 0) goto werr; } p = ziplistNext(zl,p); } } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) { list *list = o->ptr; listNode *ln; listIter li; listRewind(list,&li); while((ln = listNext(&li))) { robj *eleobj = listNodeValue(ln); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,eleobj) == 0) goto werr; } } else { redisPanic("Unknown list encoding"); } } else if (o->type == REDIS_SET) { char cmd[]="*3\r\n$4\r\nSADD\r\n"; /* Emit the SADDs needed to rebuild the set */ if (o->encoding == REDIS_ENCODING_INTSET) { int ii = 0; int64_t llval; while(intsetGet(o->ptr,ii++,&llval)) { if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkLongLong(fp,llval) == 0) goto werr; } } else if (o->encoding == REDIS_ENCODING_HT) { dictIterator *di = dictGetIterator(o->ptr); dictEntry *de; while((de = dictNext(di)) != NULL) { robj *eleobj = dictGetEntryKey(de); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,eleobj) == 0) goto werr; } dictReleaseIterator(di); } else { redisPanic("Unknown set encoding"); } } else if (o->type == REDIS_ZSET) { /* Emit the ZADDs needed to rebuild the sorted set */ char cmd[]="*4\r\n$4\r\nZADD\r\n"; if (o->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = o->ptr; unsigned char *eptr, *sptr; unsigned char *vstr; unsigned int vlen; long long vll; double score; eptr = ziplistIndex(zl,0); redisAssert(eptr != NULL); sptr = ziplistNext(zl,eptr); redisAssert(sptr != NULL); while (eptr != NULL) { redisAssert(ziplistGet(eptr,&vstr,&vlen,&vll)); score = zzlGetScore(sptr); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkDouble(fp,score) == 0) goto werr; if (vstr != NULL) { if (fwriteBulkString(fp,(char*)vstr,vlen) == 0) goto werr; } else { if (fwriteBulkLongLong(fp,vll) == 0) goto werr; } zzlNext(zl,&eptr,&sptr); } } else if (o->encoding == REDIS_ENCODING_SKIPLIST) { zset *zs = o->ptr; dictIterator *di = dictGetIterator(zs->dict); dictEntry *de; while((de = dictNext(di)) != NULL) { robj *eleobj = dictGetEntryKey(de); double *score = dictGetEntryVal(de); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkDouble(fp,*score) == 0) goto werr; if (fwriteBulkObject(fp,eleobj) == 0) goto werr; } dictReleaseIterator(di); } else { redisPanic("Unknown sorted set encoding"); } } else if (o->type == REDIS_HASH) { char cmd[]="*4\r\n$4\r\nHSET\r\n"; /* Emit the HSETs needed to rebuild the hash */ if (o->encoding == REDIS_ENCODING_ZIPMAP) { unsigned char *p = zipmapRewind(o->ptr); unsigned char *field, *val; unsigned int flen, vlen; while((p = zipmapNext(p,&field,&flen,&val,&vlen)) != NULL) { if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkString(fp,(char*)field,flen) == 0) goto werr; if (fwriteBulkString(fp,(char*)val,vlen) == 0) goto werr; } } else { dictIterator *di = dictGetIterator(o->ptr); dictEntry *de; while((de = dictNext(di)) != NULL) { robj *field = dictGetEntryKey(de); robj *val = dictGetEntryVal(de); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,field) == 0) goto werr; if (fwriteBulkObject(fp,val) == 0) goto werr; } dictReleaseIterator(di); } } else { redisPanic("Unknown object type"); } /* Save the expire time */ if (expiretime != -1) { char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n"; /* If this key is already expired skip it */ if (expiretime < now) continue; if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkLongLong(fp,expiretime) == 0) goto werr; } if (swapped) decrRefCount(o); } dictReleaseIterator(di); } /* Make sure data will not remain on the OS's output buffers */ fflush(fp); aof_fsync(fileno(fp)); fclose(fp); /* Use RENAME to make sure the DB file is changed atomically only * if the generate DB file is ok. */ if (rename(tmpfile,filename) == -1) { redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno)); unlink(tmpfile); return REDIS_ERR; } redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed"); return REDIS_OK; werr: fclose(fp); unlink(tmpfile); redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno)); if (di) dictReleaseIterator(di); return REDIS_ERR; }