/* copy a dictionary of redis objects Assumes copied directory uses COW destructors */ dict *copyonwrite_dictobj(dict *curdict, bkgdDbExt *extDict) { dict *newdict; dictIterator * di; dictEntry *de; /* checks if copy needed. else return curdict */ if (server.isBackgroundSaving == 0 || server.cowDictCopied == NULL) { return curdict; } /* create copy */ newdict = dictCreate(curdict->type, curdict->privdata); if (newdict != NULL) { /* copy all entries without refcounting or copying values */ /* can't just memcpy the whole dictionary because entries are allocated */ di = dictGetSafeIterator(curdict); while((de = dictNext(di)) != NULL) { dictAdd(newdict, de->key, de->val); } dictReleaseIterator(di); if (extDict != NULL) { /* fix types to not delete while saving */ extDict->savedType = newdict->type; newdict->type = extDict->cowType; curdict->type = extDict->readonlyType; } } return newdict; }
/* Unsubscribe from all the channels. Return the number of channels the * client was subscribed from. * * 退订客户端 c 订阅的所有频道。 * * 返回被退订频道的总数。 */ int pubsubUnsubscribeAllChannels(redisClient *c, int notify) { // 频道迭代器 dictIterator *di = dictGetSafeIterator(c->pubsub_channels); dictEntry *de; int count = 0; // 退订 while((de = dictNext(di)) != NULL) { robj *channel = dictGetKey(de); count += pubsubUnsubscribeChannel(c,channel,notify); } /* We were subscribed to nothing? Still reply to the client. */ // 如果在执行这个函数时,客户端没有订阅任何频道, // 那么向客户端发送回复 if (notify && count == 0) { addReply(c,shared.mbulkhdr[3]); addReply(c,shared.unsubscribebulk); addReply(c,shared.nullbulk); addReplyLongLong(c,dictSize(c->pubsub_channels)+ listLength(c->pubsub_patterns)); } dictReleaseIterator(di); // 被退订的频道的数量 return count; }
void keysCommand(redisClient *c) { dictIterator *di; dictEntry *de; sds pattern = c->argv[1]->ptr; int plen = sdslen(pattern), allkeys; unsigned long numkeys = 0; void *replylen = addDeferredMultiBulkLength(c); di = dictGetSafeIterator(c->db->dict); allkeys = (pattern[0] == '*' && pattern[1] == '\0'); while((de = dictNext(di)) != NULL) { sds key = dictGetKey(de); robj *keyobj; if (allkeys || stringmatchlen(pattern,plen,key,sdslen(key),0)) { keyobj = createStringObject(key,sdslen(key)); if (expireIfNeeded(c->db,keyobj) == 0) { addReplyBulk(c,keyobj); numkeys++; } decrRefCount(keyobj); } } dictReleaseIterator(di); setDeferredMultiBulkLength(c,replylen,numkeys); }
/* 取消客户端订阅的所有频道,最后返回退订的频道数量 */ int pubsubUnsubscribeAllChannels(redisClient *c, int notify) { // 获取迭代器 dictIterator *di = dictGetSafeIterator(c->pubsub_channels); dictEntry *de; int count = 0; // 遍历c->pubsub_channels字典,逐一退订所订阅的频道 while((de = dictNext(di)) != NULL) { robj *channel = dictGetKey(de); // 统计退订的频道数量 count += pubsubUnsubscribeChannel(c,channel,notify); } /* We were subscribed to nothing? Still reply to the client. */ // 如果count == 0,说明客户端没有订阅任何频道,回复客户端 if (notify && count == 0) { addReply(c,shared.mbulkhdr[3]); addReply(c,shared.unsubscribebulk); addReply(c,shared.nullbulk); addReplyLongLong(c,dictSize(c->pubsub_channels)+ listLength(c->pubsub_patterns)); } dictReleaseIterator(di); // 最后返回退订的频道数量 return count; }
/* Unsubscribe from all the channels. Return the number of channels the * client was subscribed from. */ int pubsubUnsubscribeAllChannels(redisClient *c, int notify) { dictIterator *di = dictGetSafeIterator(c->pubsub_channels); dictEntry *de; int count = 0; while((de = dictNext(di)) != NULL) { robj *channel = dictGetKey(de); count += pubsubUnsubscribeChannel(c,channel,notify); } dictReleaseIterator(di); return count; }
void slotsmgrt_cleanup() { dictIterator *di = dictGetSafeIterator(server.slotsmgrt_cached_sockfds); dictEntry *de; while((de = dictNext(di)) != NULL) { slotsmgrt_sockfd *pfd = dictGetVal(de); if ((server.unixtime - pfd->lasttime) > 30) { redisLog(REDIS_WARNING, "slotsmgrt: timeout target %s, lasttime = %ld, now = %ld", (char *)dictGetKey(de), pfd->lasttime, server.unixtime); dictDelete(server.slotsmgrt_cached_sockfds, dictGetKey(de)); close(pfd->fd); zfree(pfd); } } dictReleaseIterator(di); }
/* Reset data for the specified event, or all the events data if 'event' is * NULL. * * Note: this is O(N) even when event_to_reset is not NULL because makes * the code simpler and we have a small fixed max number of events. */ int latencyResetEvent(char *event_to_reset) { dictIterator *di; dictEntry *de; int resets = 0; di = dictGetSafeIterator(server.latency_events); while((de = dictNext(di)) != NULL) { char *event = dictGetKey(de); if (event_to_reset == NULL || strcasecmp(event,event_to_reset) == 0) { dictDelete(server.latency_events, event); resets++; } } dictReleaseIterator(di); return resets; }
void rdb_save_triggles(rio *rdb) { //save event //db_num int int int int //db //scripts_num //key event lua_scripts //key event lua_scripts //....... dictIterator *di = NULL; dictEntry *de; int i=0; for(i=0;i<server.dbnum;i++){ int eventid=server.bridge_db.bridge_event[i]; rioWrite(rdb,&eventid,4); } for(i=0;i<server.dbnum;i++) { dict *d = server.bridge_db.triggle_scipts[i]; int mysize=dictSize(d); rioWrite(rdb,&mysize,4); if (dictSize(d) == 0) continue; di = dictGetSafeIterator(d); if (!di) { return ; } /* Iterate this DB writing every entry */ while((de = dictNext(di)) != NULL) { sds keystr = dictGetKey(de); robj key; initStaticStringObject(key,keystr); if (rdbSaveStringObject(rdb,&key) == -1) return; struct bridge_db_triggle_t * tmptrg=dictGetVal(de); int event_id=tmptrg->event; rioWrite(rdb,&event_id,4); int db_id=tmptrg->dbid; rioWrite(rdb,&db_id,4); if (rdbSaveObjectType(rdb,tmptrg->lua_scripts) == -1) return ; if (rdbSaveObject(rdb,tmptrg->lua_scripts) == -1) return ; } } if (di) dictReleaseIterator(di); }
/* iterator for DB dictionary */ roDictIter *roDBGetIterator(int id) { roDictIter *iter; iter = (roDictIter *)zmalloc(sizeof(roDictIter)); cowLock(); iter->di = dictGetSafeIterator(server.db[id].dict); iter->hdict = server.db[id].dict; iter->ar = NULL; iter->pos = 0; if (server.isBackgroundSaving != 0) { if (server.cowSaveDbExt[id].dictArray != NULL) { iter->ar = server.cowSaveDbExt[id].dictArray; } server.cowCurIters.curDbDictIter = iter; } cowUnlock(); return iter; }
/* convert a hash dictionary encoding to a dictionary array encoding */ cowDictZArray *cowConvertDictToZArray(dict *hdict) { dictIterator * di; dictEntry *de; int dsize; cowDictZArray *dar; int dcount = 0; dictZEntry *dezNew; dictZEntry *dezPrev; /* create copy */ dsize = dictSize(hdict) > dictSlots(hdict) ? dictSize(hdict) : dictSlots(hdict); dar = (cowDictZArray *)zmalloc(sizeof(cowDictZArray) + (dsize * sizeof(dictZEntry)) ); /* copy all entries without refcounting or copying values */ /* can't just memcpy the whole dictionary because entries are allocated */ di = dictGetSafeIterator(hdict); dezNew = &dar->zde[0]; dezPrev = NULL; while((de = dictNext(di)) != NULL && dcount < dsize) { double *score = (double *)dictGetEntryVal(de); /* copy score value into array and point val to score. */ dezNew->de.key = de->key; dezNew->score = *score; dezNew->de.val = &dezNew->score; /* fix next ptr of prev entry */ if (dezPrev != NULL) { dezPrev->de.next = &dezNew->de; } dezPrev = dezNew; dezNew++; dcount++; } if (dezPrev != NULL) { dezPrev->de.next = NULL; } dar->numele = dcount; dictReleaseIterator(di); return dar; }
/* Unsubscribe from all the channels. Return the number of channels the * client was subscribed to. */ int smempubsubUnsubscribeAllChannels(client *c, int notify) { dictIterator *di = dictGetSafeIterator(c->smempubsub_channels); dictEntry *de; int count = 0; while((de = dictNext(di)) != NULL) { robj *channel = dictGetKey(de); count += smempubsubUnsubscribeChannel(c,channel,notify); } /* We were subscribed to nothing? Still reply to the client. */ if (notify && count == 0) { addReply(c,shared.mbulkhdr[3]); addReply(c,shared.unsubscribebulk); addReply(c,shared.nullbulk); addReplyLongLong(c,dictSize(c->smempubsub_channels)); } dictReleaseIterator(di); return count; }
/* convert a hash dictionary encoding to a dictionary array encoding */ cowDictArray *cowConvertDictToArray(dict *hdict) { dictIterator * di; dictEntry *de; int dsize; cowDictArray *dar; int dcount = 0; dictEntry *deNew; dictEntry *dePrev; /* create copy */ dsize = dictSize(hdict) > dictSlots(hdict) ? dictSize(hdict) : dictSlots(hdict); dar = (cowDictArray *)zmalloc(sizeof(cowDictArray) + (dsize * sizeof(dictEntry))); /* copy all entries without refcounting or copying values */ /* can't just memcpy the whole dictionary because entries are allocated */ di = dictGetSafeIterator(hdict); deNew = &dar->de[0]; dePrev = NULL; while((de = dictNext(di)) != NULL && dcount < dsize) { /* copy object value to dict array Do not incr ref count. */ deNew->val = de->val; deNew->key = de->key; /* fix next ptr of prev entry */ if (dePrev != NULL) { dePrev->next = deNew; } dePrev = deNew; deNew++; dcount++; } if (dePrev != NULL) { dePrev->next = NULL; } dar->numele = dcount; dictReleaseIterator(di); return dar; }
/* Write a sequence of commands able to fully rebuild the dataset into * "filename". Used both by REWRITEAOF and BGREWRITEAOF. * * In order to minimize the number of commands needed in the rewritten * log Redis uses variadic commands when possible, such as RPUSH, SADD * and ZADD. However at max REDIS_AOF_REWRITE_ITEMS_PER_CMD items per time * are inserted using a single command. */ int rewriteAppendOnlyFile(char *filename) { dictIterator *di = NULL; dictEntry *de; rio aof; FILE *fp; char tmpfile[256]; int j; long long now = mstime(); /* Note that we have to use a different temp name here compared to the * one used by rewriteAppendOnlyFileBackground() function. */ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid()); fp = fopen(tmpfile,"w"); if (!fp) { redisLog(REDIS_WARNING, "Opening the temp file for AOF rewrite in rewriteAppendOnlyFile(): %s", strerror(errno)); return REDIS_ERR; } rioInitWithFile(&aof,fp); for (j = 0; j < server.dbnum; j++) { char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; redisDb *db = server.db+j; dict *d = db->dict; if (dictSize(d) == 0) continue; di = dictGetSafeIterator(d); if (!di) { fclose(fp); return REDIS_ERR; } /* SELECT the new DB */ if (rioWrite(&aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr; if (rioWriteBulkLongLong(&aof,j) == 0) goto werr; /* Iterate this DB writing every entry */ while((de = dictNext(di)) != NULL) { sds keystr; robj key, *o; long long expiretime; keystr = dictGetKey(de); o = dictGetVal(de); initStaticStringObject(key,keystr); expiretime = getExpire(db,&key); /* Save the key and associated value */ if (o->type == REDIS_STRING) { /* Emit a SET command */ char cmd[]="*3\r\n$3\r\nSET\r\n"; if (rioWrite(&aof,cmd,sizeof(cmd)-1) == 0) goto werr; /* Key and value */ if (rioWriteBulkObject(&aof,&key) == 0) goto werr; if (rioWriteBulkObject(&aof,o) == 0) goto werr; } else if (o->type == REDIS_LIST) { if (rewriteListObject(&aof,&key,o) == 0) goto werr; } else if (o->type == REDIS_SET) { if (rewriteSetObject(&aof,&key,o) == 0) goto werr; } else if (o->type == REDIS_ZSET) { if (rewriteSortedSetObject(&aof,&key,o) == 0) goto werr; } else if (o->type == REDIS_HASH) { if (rewriteHashObject(&aof,&key,o) == 0) goto werr; } else { redisPanic("Unknown object type"); } /* Save the expire time */ if (expiretime != -1) { char cmd[]="*3\r\n$9\r\nPEXPIREAT\r\n"; /* If this key is already expired skip it */ if (expiretime < now) continue; if (rioWrite(&aof,cmd,sizeof(cmd)-1) == 0) goto werr; if (rioWriteBulkObject(&aof,&key) == 0) goto werr; if (rioWriteBulkLongLong(&aof,expiretime) == 0) goto werr; } } dictReleaseIterator(di); } /* Make sure data will not remain on the OS's output buffers */ fflush(fp); aof_fsync(fileno(fp)); fclose(fp); /* Use RENAME to make sure the DB file is changed atomically only * if the generate DB file is ok. */ if (rename(tmpfile,filename) == -1) { redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno)); unlink(tmpfile); return REDIS_ERR; } redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed"); return REDIS_OK; werr: fclose(fp); unlink(tmpfile); redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno)); if (di) dictReleaseIterator(di); return REDIS_ERR; }
/* Write a sequence of commands able to fully rebuild the dataset into * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ int rewriteAppendOnlyFile(char *filename) { dictIterator *di = NULL; dictEntry *de; FILE *fp; char tmpfile[256]; int j; time_t now = time(NULL); /* Note that we have to use a different temp name here compared to the * one used by rewriteAppendOnlyFileBackground() function. */ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid()); fp = fopen(tmpfile,"w"); if (!fp) { redisLog(REDIS_WARNING, "Failed rewriting the append only file: %s", strerror(errno)); return REDIS_ERR; } for (j = 0; j < server.dbnum; j++) { char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; redisDb *db = server.db+j; dict *d = db->dict; if (dictSize(d) == 0) continue; di = dictGetSafeIterator(d); if (!di) { fclose(fp); return REDIS_ERR; } /* SELECT the new DB */ if (fwrite(selectcmd,sizeof(selectcmd)-1,1,fp) == 0) goto werr; if (fwriteBulkLongLong(fp,j) == 0) goto werr; /* Iterate this DB writing every entry */ while((de = dictNext(di)) != NULL) { sds keystr = dictGetEntryKey(de); robj key, *o; time_t expiretime; int swapped; keystr = dictGetEntryKey(de); o = dictGetEntryVal(de); initStaticStringObject(key,keystr); /* If the value for this key is swapped, load a preview in memory. * We use a "swapped" flag to remember if we need to free the * value object instead to just increment the ref count anyway * in order to avoid copy-on-write of pages if we are forked() */ if (!server.vm_enabled || o->storage == REDIS_VM_MEMORY || o->storage == REDIS_VM_SWAPPING) { swapped = 0; } else { o = vmPreviewObject(o); swapped = 1; } expiretime = getExpire(db,&key); /* Save the key and associated value */ if (o->type == REDIS_STRING) { /* Emit a SET command */ char cmd[]="*3\r\n$3\r\nSET\r\n"; if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; /* Key and value */ if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,o) == 0) goto werr; } else if (o->type == REDIS_LIST) { /* Emit the RPUSHes needed to rebuild the list */ char cmd[]="*3\r\n$5\r\nRPUSH\r\n"; if (o->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = o->ptr; unsigned char *p = ziplistIndex(zl,0); unsigned char *vstr; unsigned int vlen; long long vlong; while(ziplistGet(p,&vstr,&vlen,&vlong)) { if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (vstr) { if (fwriteBulkString(fp,(char*)vstr,vlen) == 0) goto werr; } else { if (fwriteBulkLongLong(fp,vlong) == 0) goto werr; } p = ziplistNext(zl,p); } } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) { list *list = o->ptr; listNode *ln; listIter li; listRewind(list,&li); while((ln = listNext(&li))) { robj *eleobj = listNodeValue(ln); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,eleobj) == 0) goto werr; } } else { redisPanic("Unknown list encoding"); } } else if (o->type == REDIS_SET) { char cmd[]="*3\r\n$4\r\nSADD\r\n"; /* Emit the SADDs needed to rebuild the set */ if (o->encoding == REDIS_ENCODING_INTSET) { int ii = 0; int64_t llval; while(intsetGet(o->ptr,ii++,&llval)) { if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkLongLong(fp,llval) == 0) goto werr; } } else if (o->encoding == REDIS_ENCODING_HT) { dictIterator *di = dictGetIterator(o->ptr); dictEntry *de; while((de = dictNext(di)) != NULL) { robj *eleobj = dictGetEntryKey(de); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,eleobj) == 0) goto werr; } dictReleaseIterator(di); } else { redisPanic("Unknown set encoding"); } } else if (o->type == REDIS_ZSET) { /* Emit the ZADDs needed to rebuild the sorted set */ char cmd[]="*4\r\n$4\r\nZADD\r\n"; if (o->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = o->ptr; unsigned char *eptr, *sptr; unsigned char *vstr; unsigned int vlen; long long vll; double score; eptr = ziplistIndex(zl,0); redisAssert(eptr != NULL); sptr = ziplistNext(zl,eptr); redisAssert(sptr != NULL); while (eptr != NULL) { redisAssert(ziplistGet(eptr,&vstr,&vlen,&vll)); score = zzlGetScore(sptr); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkDouble(fp,score) == 0) goto werr; if (vstr != NULL) { if (fwriteBulkString(fp,(char*)vstr,vlen) == 0) goto werr; } else { if (fwriteBulkLongLong(fp,vll) == 0) goto werr; } zzlNext(zl,&eptr,&sptr); } } else if (o->encoding == REDIS_ENCODING_SKIPLIST) { zset *zs = o->ptr; dictIterator *di = dictGetIterator(zs->dict); dictEntry *de; while((de = dictNext(di)) != NULL) { robj *eleobj = dictGetEntryKey(de); double *score = dictGetEntryVal(de); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkDouble(fp,*score) == 0) goto werr; if (fwriteBulkObject(fp,eleobj) == 0) goto werr; } dictReleaseIterator(di); } else { redisPanic("Unknown sorted set encoding"); } } else if (o->type == REDIS_HASH) { char cmd[]="*4\r\n$4\r\nHSET\r\n"; /* Emit the HSETs needed to rebuild the hash */ if (o->encoding == REDIS_ENCODING_ZIPMAP) { unsigned char *p = zipmapRewind(o->ptr); unsigned char *field, *val; unsigned int flen, vlen; while((p = zipmapNext(p,&field,&flen,&val,&vlen)) != NULL) { if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkString(fp,(char*)field,flen) == 0) goto werr; if (fwriteBulkString(fp,(char*)val,vlen) == 0) goto werr; } } else { dictIterator *di = dictGetIterator(o->ptr); dictEntry *de; while((de = dictNext(di)) != NULL) { robj *field = dictGetEntryKey(de); robj *val = dictGetEntryVal(de); if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkObject(fp,field) == 0) goto werr; if (fwriteBulkObject(fp,val) == 0) goto werr; } dictReleaseIterator(di); } } else { redisPanic("Unknown object type"); } /* Save the expire time */ if (expiretime != -1) { char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n"; /* If this key is already expired skip it */ if (expiretime < now) continue; if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; if (fwriteBulkLongLong(fp,expiretime) == 0) goto werr; } if (swapped) decrRefCount(o); } dictReleaseIterator(di); } /* Make sure data will not remain on the OS's output buffers */ fflush(fp); aof_fsync(fileno(fp)); fclose(fp); /* Use RENAME to make sure the DB file is changed atomically only * if the generate DB file is ok. */ if (rename(tmpfile,filename) == -1) { redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno)); unlink(tmpfile); return REDIS_ERR; } redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed"); return REDIS_OK; werr: fclose(fp); unlink(tmpfile); redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno)); if (di) dictReleaseIterator(di); return REDIS_ERR; }
/* Create a human readable report of latency events for this Disque instance. */ sds createLatencyReport(void) { sds report = sdsempty(); int advise_better_vm = 0; /* Better virtual machines. */ int advise_slowlog_enabled = 0; /* Enable slowlog. */ int advise_slowlog_tuning = 0; /* Reconfigure slowlog. */ int advise_slowlog_inspect = 0; /* Check your slowlog. */ int advise_disk_contention = 0; /* Try to lower disk contention. */ int advise_scheduler = 0; /* Intrinsic latency. */ int advise_data_writeback = 0; /* data=writeback. */ int advise_no_appendfsync = 0; /* don't fsync during rewrites. */ int advise_local_disk = 0; /* Avoid remote disks. */ int advise_ssd = 0; /* Use an SSD drive. */ int advise_write_load_info = 0; /* Print info about AOF and write load. */ int advise_hz = 0; /* Use higher HZ. */ int advise_large_objects = 0; /* Deletion of large objects. */ int advise_relax_fsync_policy = 0; /* appendfsync always is slow. */ int advices = 0; /* Return ASAP if the latency engine is disabled and it looks like it * was never enabled so far. */ if (dictSize(server.latency_events) == 0 && server.latency_monitor_threshold == 0) { report = sdscat(report,"I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Disque instance. You may use \"CONFIG SET latency-monitor-threshold <milliseconds>.\" in order to enable it. If we weren't in a deep space mission I'd suggest to take a look at http://disque.io/topics/latency-monitor.\n"); return report; } /* Show all the events stats and add for each event some event-related * comment depending on the values. */ dictIterator *di; dictEntry *de; int eventnum = 0; di = dictGetSafeIterator(server.latency_events); while((de = dictNext(di)) != NULL) { char *event = dictGetKey(de); struct latencyTimeSeries *ts = dictGetVal(de); struct latencyStats ls; if (ts == NULL) continue; eventnum++; if (eventnum == 1) { report = sdscat(report,"Dave, I have observed latency spikes in this Disque instance. You don't mind talking about it, do you Dave?\n\n"); } analyzeLatencyForEvent(event,&ls); report = sdscatprintf(report, "%d. %s: %d latency spikes (average %lums, mean deviation %lums, period %.2f sec). Worst all time event %lums.", eventnum, event, ls.samples, (unsigned long) ls.avg, (unsigned long) ls.mad, (double) ls.period/ls.samples, (unsigned long) ts->max); /* Fork */ if (!strcasecmp(event,"fork")) { char *fork_quality; if (server.stat_fork_rate < 10) { fork_quality = "terrible"; advise_better_vm = 1; advices++; } else if (server.stat_fork_rate < 25) { fork_quality = "poor"; advise_better_vm = 1; advices++; } else if (server.stat_fork_rate < 100) { fork_quality = "good"; } else { fork_quality = "excellent"; } report = sdscatprintf(report, " Fork rate is %.2f GB/sec (%s).", server.stat_fork_rate, fork_quality); } /* Potentially commands. */ if (!strcasecmp(event,"command")) { if (server.slowlog_log_slower_than == 0) { advise_slowlog_enabled = 1; advices++; } else if (server.slowlog_log_slower_than/1000 > server.latency_monitor_threshold) { advise_slowlog_tuning = 1; advices++; } advise_slowlog_inspect = 1; advise_large_objects = 1; advices += 2; } /* fast-command. */ if (!strcasecmp(event,"fast-command")) { advise_scheduler = 1; advices++; } /* AOF and I/O. */ if (!strcasecmp(event,"aof-write-pending-fsync")) { advise_local_disk = 1; advise_disk_contention = 1; advise_ssd = 1; advise_data_writeback = 1; advices += 4; } if (!strcasecmp(event,"aof-write-active-child")) { advise_no_appendfsync = 1; advise_data_writeback = 1; advise_ssd = 1; advices += 3; } if (!strcasecmp(event,"aof-write-alone")) { advise_local_disk = 1; advise_data_writeback = 1; advise_ssd = 1; advices += 3; } if (!strcasecmp(event,"aof-fsync-always")) { advise_relax_fsync_policy = 1; advices++; } if (!strcasecmp(event,"aof-fstat") || !strcasecmp(event,"rdb-unlik-temp-file")) { advise_disk_contention = 1; advise_local_disk = 1; advices += 2; } if (!strcasecmp(event,"aof-rewrite-diff-write") || !strcasecmp(event,"aof-rename")) { advise_write_load_info = 1; advise_data_writeback = 1; advise_ssd = 1; advise_local_disk = 1; advices += 4; } /* Expire cycle. */ if (!strcasecmp(event,"expire-cycle")) { advise_hz = 1; advise_large_objects = 1; advices += 2; } /* Eviction cycle. */ if (!strcasecmp(event,"eviction-cycle")) { advise_large_objects = 1; advices++; } report = sdscatlen(report,"\n",1); } dictReleaseIterator(di); if (eventnum == 0) { report = sdscat(report,"Dave, no latency spike was observed during the lifetime of this Disque instance, not in the slightest bit. I honestly think you ought to sit down calmly, take a stress pill, and think things over.\n"); } else if (advices == 0) { report = sdscat(report,"\nWhile there are latency events logged, I'm not able to suggest any easy fix. Please use the Disque community to get some help, providing this report in your help request.\n"); } else { /* Add all the suggestions accumulated so far. */ /* Better VM. */ report = sdscat(report,"\nI have a few advices for you:\n\n"); if (advise_better_vm) { report = sdscat(report,"- If you are using a virtual machine, consider upgrading it with a faster one using an hypervisior that provides less latency during fork() calls. Xen is known to have poor fork() performance. Even in the context of the same VM provider, certain kinds of instances can execute fork faster than others.\n"); } /* Slow log. */ if (advise_slowlog_enabled) { report = sdscatprintf(report,"- There are latency issues with potentially slow commands you are using. Try to enable the Slow Log Disque feature using the command 'CONFIG SET slowlog-log-slower-than %llu'. If the Slow log is disabled Disque is not able to log slow commands execution for you.\n", (unsigned long long)server.latency_monitor_threshold*1000); } if (advise_slowlog_tuning) { report = sdscatprintf(report,"- Your current Slow Log configuration only logs events that are slower than your configured latency monitor threshold. Please use 'CONFIG SET slowlog-log-slower-than %llu'.\n", (unsigned long long)server.latency_monitor_threshold*1000); } if (advise_slowlog_inspect) { report = sdscat(report,"- Check your Slow Log to understand what are the commands you are running which are too slow to execute. Please check http://disque.io/commands/slowlog for more information.\n"); } /* Intrinsic latency. */ if (advise_scheduler) { report = sdscat(report,"- The system is slow to execute Disque code paths not containing system calls. This usually means the system does not provide Disque CPU time to run for long periods. You should try to:\n" " 1) Lower the system load.\n" " 2) Use a computer / VM just for Disque if you are running other softawre in the same system.\n" " 3) Check if you have a \"noisy neighbour\" problem.\n" " 4) Check with 'disque-cli --intrinsic-latency 100' what is the intrinsic latency in your system.\n" " 5) Check if the problem is allocator-related by recompiling Disque with MALLOC=libc, if you are using Jemalloc. However this may create fragmentation problems.\n"); } /* AOF / Disk latency. */ if (advise_local_disk) { report = sdscat(report,"- It is strongly advised to use local disks for persistence, especially if you are using AOF. Remote disks provided by platform-as-a-service providers are known to be slow.\n"); } if (advise_ssd) { report = sdscat(report,"- SSD disks are able to reduce fsync latency, and total time needed for snapshotting and AOF log rewriting (resulting in smaller memory usage and smaller final AOF rewrite buffer flushes). With extremely high write load SSD disks can be a good option. However Disque should perform reasonably with high load using normal disks. Use this advice as a last resort.\n"); } if (advise_data_writeback) { report = sdscat(report,"- Mounting ext3/4 filesystems with data=writeback can provide a performance boost compared to data=ordered, however this mode of operation provides less guarantees, and sometimes it can happen that after a hard crash the AOF file will have an half-written command at the end and will require to be repaired before Disque restarts.\n"); } if (advise_disk_contention) { report = sdscat(report,"- Try to lower the disk contention. This is often caused by other disk intensive processes running in the same computer (including other Disque instances).\n"); } if (advise_no_appendfsync) { report = sdscat(report,"- Assuming from the point of view of data safety this is viable in your environment, you could try to enable the 'no-appendfsync-on-rewrite' option, so that fsync will not be performed while there is a child rewriting the AOF file or producing an RDB file (the moment where there is high disk contention).\n"); } if (advise_relax_fsync_policy && server.aof_fsync == AOF_FSYNC_ALWAYS) { report = sdscat(report,"- Your fsync policy is set to 'always'. It is very hard to get good performances with such a setup, if possible try to relax the fsync policy to 'onesec'.\n"); } if (advise_write_load_info) { report = sdscat(report,"- Latency during the AOF atomic rename operation or when the final difference is flushed to the AOF file at the end of the rewrite, sometimes is caused by very high write load, causing the AOF buffer to get very large. If possible try to send less commands to accomplish the same work, or use Lua scripts to group multiple operations into a single EVALSHA call.\n"); } if (advise_hz && server.hz < 100) { report = sdscat(report,"- In order to make the Disque keys expiring process more incremental, try to set the 'hz' configuration parameter to 100 using 'CONFIG SET hz 100'.\n"); } if (advise_large_objects) { report = sdscat(report,"- Deleting, expiring or evicting (because of maxmemory policy) large objects is a blocking operation. If you have very large objects that are often deleted, expired, or evicted, try to fragment those objects into multiple smaller objects.\n"); } } return report; }