// ================================== READ DATA FROOM RDB FILE. ==================================== // static void rdb_check_crc(int fd, uint64_t real_crc) { uint64_t expected_crc = 0; rdb_crc_read(fd, &expected_crc, 8); memrev64ifbe(expected_crc); if (real_crc != expected_crc) { logger(ERROR, "checksum error, expect %llu, real %llu.\n", expected_crc, real_crc); } }
/* Set the value at pos, using the configured encoding. */ static void _intsetSet(intset *is, int pos, int64_t value) { if (is->encoding == INTSET_ENC_INT64) { ((int64_t*)is->contents)[pos] = value; memrev64ifbe(((int64_t*)is->contents)+pos); } else if (is->encoding == INTSET_ENC_INT32) { ((int32_t*)is->contents)[pos] = (int32_t)value; memrev32ifbe(((int32_t*)is->contents)+pos); } else { ((int16_t*)is->contents)[pos] = (int16_t)value; memrev16ifbe(((int16_t*)is->contents)+pos); } }
/* * * 设置以 pos 为起点的特定的 enc (编码) 的值 * */ static void _intsetSet(intset *is, int pos, int64_t value) { uint32_t encoding = intrev32ifbe(is->encoding); if (encoding == INTSET_ENC_INT64) { ((int64_t*)is->contents)[pos] = value; // value 顺序存放在数组 contents 中 memrev64ifbe(((int64_t*)is->contents)+pos); // 统一大小端 } else if (encoding == INTSET_ENC_INT32) { ((int32_t*)is->contents)[pos] = value; memrev32ifbe(((int32_t*)is->contents)+pos); } else { ((int16_t*)is->contents)[pos] = value; memrev16ifbe(((int16_t*)is->contents)+pos); } }
//将is中pos位置上的值设为value static void _intsetSet(intset *is, int pos, int64_t value) { uint32_t encoding = intrev32ifbe(is->encoding); //根据编码,将contents转换为对应得数组类型,然后对该位置的元素赋值 if (encoding == INTSET_ENC_INT64) { ((int64_t*)is->contents)[pos] = value; memrev64ifbe(((int64_t*)is->contents)+pos); } else if (encoding == INTSET_ENC_INT32) { ((int32_t*)is->contents)[pos] = value; memrev32ifbe(((int32_t*)is->contents)+pos); } else { ((int16_t*)is->contents)[pos] = value; memrev16ifbe(((int16_t*)is->contents)+pos); } }
/* * 根据给定的编码方式,返回给定位置上的值 * * T = theta(1) */ static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) { int64_t v64; int32_t v32; int16_t v16; if (enc == INTSET_ENC_INT64) { memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64)); memrev64ifbe(&v64); return v64; } else if (enc == INTSET_ENC_INT32) { memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32)); memrev32ifbe(&v32); return v32; } else { memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16)); memrev16ifbe(&v16); return v16; } }
int rl_write_signal(const char *signal_name, const char *data, size_t datalen) { char header[FIFO_HEADER_SIZE]; put_4bytes((unsigned char *)header, datalen); uint64_t crc = rl_crc64(0, (unsigned char *)data, datalen); memrev64ifbe(&crc); memcpy(&header[4], &crc, 8); int fd = open(signal_name, O_WRONLY | O_NONBLOCK); if (fd == -1) { // fifo may not always exist on our code // it is a way to signal between processes, but it is show and forget return RL_OK; } write(fd, header, FIFO_HEADER_SIZE); write(fd, data, datalen); close(fd); return RL_OK; }
/* Set the value at pos, using the configured encoding. * * 根据集合的编码方式,将底层数组在pos位置上的值设为value。 * * T = O(1) */ static void _intsetSet(intset *is, int pos, int64_t value) { // 取出集合的编码方式 uint32_t encoding = intrev32ifbe(is->encoding); // 根据编码((Enc_t*)is->contents)将数组转换回正确的类型 // 然后((Enc_t*)is->contents)[pos]定位到数组索引上 // 接着((Enc_t*)is->contents)[pos] = value将值赋给数组 // 最后,((Enc_t*)is->contents)+pos定位到刚刚设置的新值上 // 如果有需要的话,memrevEncifbe将对值进行大小端转换 if (encoding == INTSET_ENC_INT64) { ((int64_t*)is->contents)[pos] = value; memrev64ifbe(((int64_t*)is->contents)+pos); } else if (encoding == INTSET_ENC_INT32) { ((int32_t*)is->contents)[pos] = value; memrev32ifbe(((int32_t*)is->contents)+pos); } else { ((int16_t*)is->contents)[pos] = value; memrev16ifbe(((int16_t*)is->contents)+pos); } }
//对于给定的编码enc以及位置pos,返回is中的值 static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) { int64_t v64; int32_t v32; int16_t v16; if (enc == INTSET_ENC_INT64) { //如果是64的编码,将contents转换为指向int64_t的指针,移动到第pos个元素的位置, //将这里起得64位内存复制到v64中。 memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64)); memrev64ifbe(&v64); return v64; } else if (enc == INTSET_ENC_INT32) { //同上,只是这时contents相当与是存放int32_t的数组 memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32)); memrev32ifbe(&v32); return v32; } else { //同上,只是这时contents相当与是存放int16_t的数组 memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16)); memrev16ifbe(&v16); return v16; } }
/* Return the value at pos, given an encoding. * * 根据给定的编码方式enc,返回集合的底层数组在pos索引上的元素。 * * T = O(1) */ static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) { int64_t v64; int32_t v32; int16_t v16; // ((ENCODING*)is->contents) 首先将数组转换回被编码的类型 // 然后((ENCODING*)is->contents)+pos计算出元素在数组中的正确位置 // 之后member(&vEnc, ..., sizeof(vEnc))再从数组中拷贝出正确数量的字节 // 如果有需要的话,memrevEncifbe(&vEnc)会对拷贝出的字节进行大小端转换 // 最后将值返回 if (enc == INTSET_ENC_INT64) { memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64)); memrev64ifbe(&v64); return v64; } else if (enc == INTSET_ENC_INT32) { memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32)); memrev32ifbe(&v32); return v32; } else { memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16)); memrev16ifbe(&v16); return v16; } }
int rl_dump(struct rlite *db, const unsigned char *key, long keylen, unsigned char **data, long *datalen) { int retval; uint64_t crc; unsigned char type; unsigned char *value = NULL, *value2 = NULL; unsigned char *buf = NULL; long buflen; long valuelen, value2len; unsigned char **values = NULL; long i = -1, *valueslen = NULL; uint32_t length; double score; char f[40]; RL_CALL(rl_key_get, RL_FOUND, db, key, keylen, &type, NULL, NULL, NULL, NULL); if (type == RL_TYPE_STRING) { RL_CALL(rl_get, RL_OK, db, key, keylen, &value, &valuelen); RL_MALLOC(buf, sizeof(unsigned char) * (16 + valuelen)); buf[0] = REDIS_RDB_TYPE_STRING; buf[1] = (REDIS_RDB_32BITLEN << 6); length = htonl(valuelen); memcpy(&buf[2], &length, 4); memcpy(&buf[6], value, valuelen); buflen = valuelen + 6; } else if (type == RL_TYPE_LIST) { RL_CALL(rl_lrange, RL_OK, db, key, keylen, 0, -1, &valuelen, &values, &valueslen); buflen = 16; for (i = 0; i < valuelen; i++) { buflen += 5 + valueslen[i]; } RL_MALLOC(buf, sizeof(unsigned char) * buflen); buf[0] = REDIS_RDB_TYPE_LIST; buf[1] = (REDIS_RDB_32BITLEN << 6); length = htonl(valuelen); memcpy(&buf[2], &length, 4); buflen = 6; for (i = 0; i < valuelen; i++) { buf[buflen++] = (REDIS_RDB_32BITLEN << 6); length = htonl(valueslen[i]); memcpy(&buf[buflen], &length, 4); buflen += 4; memcpy(&buf[buflen], values[i], valueslen[i]); buflen += valueslen[i]; } } else if (type == RL_TYPE_SET) { rl_set_iterator *iterator; RL_CALL(rl_smembers, RL_OK, db, &iterator, key, keylen); buflen = 16; length = 0; while ((retval = rl_set_iterator_next(iterator, NULL, &valuelen)) == RL_OK) { buflen += 5 + valuelen; length++; } if (retval != RL_END) { goto cleanup; } RL_MALLOC(buf, sizeof(unsigned char) * buflen); buf[0] = REDIS_RDB_TYPE_SET; buf[1] = (REDIS_RDB_32BITLEN << 6); length = htonl(length); memcpy(&buf[2], &length, 4); buflen = 6; RL_CALL(rl_smembers, RL_OK, db, &iterator, key, keylen); while ((retval = rl_set_iterator_next(iterator, &value, &valuelen)) == RL_OK) { buf[buflen++] = (REDIS_RDB_32BITLEN << 6); length = htonl(valuelen); memcpy(&buf[buflen], &length, 4); buflen += 4; memcpy(&buf[buflen], value, valuelen); buflen += valuelen; rl_free(value); value = NULL; } if (retval != RL_END) { goto cleanup; } } else if (type == RL_TYPE_ZSET) { rl_zset_iterator *iterator; RL_CALL(rl_zrange, RL_OK, db, key, keylen, 0, -1, &iterator); buflen = 16; length = 0; while ((retval = rl_zset_iterator_next(iterator, &score, NULL, &valuelen)) == RL_OK) { buflen += 6 + valuelen + snprintf(f, 40, "%lf", score); length++; } if (retval != RL_END) { goto cleanup; } RL_MALLOC(buf, sizeof(unsigned char) * buflen); buf[0] = REDIS_RDB_TYPE_ZSET; buf[1] = (REDIS_RDB_32BITLEN << 6); length = htonl(length); memcpy(&buf[2], &length, 4); buflen = 6; RL_CALL(rl_zrange, RL_OK, db, key, keylen, 0, -1, &iterator); while ((retval = rl_zset_iterator_next(iterator, &score, &value, &valuelen)) == RL_OK) { buf[buflen++] = (REDIS_RDB_32BITLEN << 6); length = htonl(valuelen); memcpy(&buf[buflen], &length, 4); buflen += 4; memcpy(&buf[buflen], value, valuelen); buflen += valuelen; rl_free(value); value = NULL; valuelen = snprintf(f, 40, "%lf", score); buf[buflen++] = valuelen; memcpy(&buf[buflen], f, valuelen); buflen += valuelen; } if (retval != RL_END) { goto cleanup; } } else if (type == RL_TYPE_HASH) { rl_hash_iterator *iterator; RL_CALL(rl_hgetall, RL_OK, db, &iterator, key, keylen); buflen = 16; length = 0; while ((retval = rl_hash_iterator_next(iterator, NULL, &value2len, NULL, &valuelen)) == RL_OK) { buflen += 10 + valuelen + value2len; length++; } if (retval != RL_END) { goto cleanup; } RL_MALLOC(buf, sizeof(unsigned char) * buflen); buf[0] = REDIS_RDB_TYPE_HASH; buf[1] = (REDIS_RDB_32BITLEN << 6); length = htonl(length); memcpy(&buf[2], &length, 4); buflen = 6; RL_CALL(rl_hgetall, RL_OK, db, &iterator, key, keylen); while ((retval = rl_hash_iterator_next(iterator, &value, &valuelen, &value2, &value2len)) == RL_OK) { buf[buflen++] = (REDIS_RDB_32BITLEN << 6); length = htonl(valuelen); memcpy(&buf[buflen], &length, 4); buflen += 4; memcpy(&buf[buflen], value, valuelen); buflen += valuelen; rl_free(value); value = NULL; buf[buflen++] = (REDIS_RDB_32BITLEN << 6); length = htonl(value2len); memcpy(&buf[buflen], &length, 4); buflen += 4; memcpy(&buf[buflen], value2, value2len); buflen += value2len; rl_free(value2); value2 = NULL; } } else { retval = RL_UNEXPECTED; goto cleanup; } buf[buflen++] = REDIS_RDB_VERSION; buf[buflen++] = REDIS_RDB_VERSION >> 8; crc = rl_crc64(0, buf, buflen); memrev64ifbe(&crc); memcpy(&buf[buflen], &crc, 8); buflen += 8; *data = buf; *datalen = buflen; retval = RL_OK; cleanup: if (values) { for (i = 0; i < valuelen; i++) { rl_free(values[i]); } rl_free(values); } rl_free(valueslen); rl_free(value); rl_free(value2); return retval; }
int rl_read_signal(const char *signal_name, struct timeval *timeout, char **_data, size_t *_datalen) { char header[FIFO_HEADER_SIZE]; uint64_t crc; size_t readbytes; size_t datalen; int fd; int retval; char *data = NULL; fd_set rfds; int oflag = O_RDONLY; if (timeout) { // select will block, we don't want open to block oflag |= O_NONBLOCK; } fd = open(signal_name, oflag); if (fd == -1) { retval = RL_UNEXPECTED; goto cleanup; } if (timeout) { FD_ZERO(&rfds); FD_SET(fd, &rfds); retval = select(fd + 1, &rfds, NULL, NULL, timeout); if (retval == -1) { retval = RL_UNEXPECTED; goto cleanup; } else if (retval != 0) { retval = RL_TIMEOUT; goto cleanup; } } readbytes = read(fd, header, FIFO_HEADER_SIZE); if (readbytes != FIFO_HEADER_SIZE) { retval = RL_UNEXPECTED; goto cleanup; } datalen = (size_t)get_4bytes((unsigned char *)header); RL_MALLOC(data, sizeof(char) * datalen); readbytes = read(fd, data, datalen); if (readbytes != datalen) { retval = RL_UNEXPECTED; goto cleanup; } crc = rl_crc64(0, (unsigned char *)data, datalen); memrev64ifbe(&crc); memcpy(&header[4], &crc, 8); if (memcmp(&crc, &header[4], 8) != 0) { retval = RL_UNEXPECTED; goto cleanup; } if (_data) { *_data = data; } if (_datalen) { *_datalen = datalen; } retval = RL_OK; cleanup: if (fd >= 0) { close(fd); } if (retval != RL_OK || _data == NULL) { rl_free(data); } return retval; }
/* Check the specified RDB file. Return 0 if the RDB looks sane, otherwise * 1 is returned. */ int redis_check_rdb(char *rdbfilename) { uint64_t dbid; int type, rdbver; char buf[1024]; long long expiretime, now = mstime(); FILE *fp; static rio rdb; /* Pointed by global struct riostate. */ if ((fp = fopen(rdbfilename,"r")) == NULL) return 1; rioInitWithFile(&rdb,fp); rdbstate.rio = &rdb; rdb.update_cksum = rdbLoadProgressCallback; if (rioRead(&rdb,buf,9) == 0) goto eoferr; buf[9] = '\0'; if (memcmp(buf,"REDIS",5) != 0) { rdbCheckError("Wrong signature trying to load DB from file"); return 1; } rdbver = atoi(buf+5); if (rdbver < 1 || rdbver > RDB_VERSION) { rdbCheckError("Can't handle RDB format version %d",rdbver); return 1; } startLoading(fp); while(1) { robj *key, *val; expiretime = -1; /* Read type. */ rdbstate.doing = RDB_CHECK_DOING_READ_TYPE; if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; /* Handle special types. */ if (type == RDB_OPCODE_EXPIRETIME) { rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE; /* EXPIRETIME: load an expire associated with the next key * to load. Note that after loading an expire we need to * load the actual type, and continue. */ if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr; /* We read the time so we need to read the object type again. */ rdbstate.doing = RDB_CHECK_DOING_READ_TYPE; if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; /* the EXPIRETIME opcode specifies time in seconds, so convert * into milliseconds. */ expiretime *= 1000; } else if (type == RDB_OPCODE_EXPIRETIME_MS) { /* EXPIRETIME_MS: milliseconds precision expire times introduced * with RDB v3. Like EXPIRETIME but no with more precision. */ rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE; if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr; /* We read the time so we need to read the object type again. */ rdbstate.doing = RDB_CHECK_DOING_READ_TYPE; if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; } else if (type == RDB_OPCODE_EOF) { /* EOF: End of file, exit the main loop. */ break; } else if (type == RDB_OPCODE_SELECTDB) { /* SELECTDB: Select the specified database. */ rdbstate.doing = RDB_CHECK_DOING_READ_LEN; if ((dbid = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) goto eoferr; rdbCheckInfo("Selecting DB ID %d", dbid); continue; /* Read type again. */ } else if (type == RDB_OPCODE_RESIZEDB) { /* RESIZEDB: Hint about the size of the keys in the currently * selected data base, in order to avoid useless rehashing. */ uint64_t db_size, expires_size; rdbstate.doing = RDB_CHECK_DOING_READ_LEN; if ((db_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) goto eoferr; if ((expires_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) goto eoferr; continue; /* Read type again. */ } else if (type == RDB_OPCODE_AUX) { /* AUX: generic string-string fields. Use to add state to RDB * which is backward compatible. Implementations of RDB loading * are requierd to skip AUX fields they don't understand. * * An AUX field is composed of two strings: key and value. */ robj *auxkey, *auxval; rdbstate.doing = RDB_CHECK_DOING_READ_AUX; if ((auxkey = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; if ((auxval = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; rdbCheckInfo("AUX FIELD %s = '%s'", (char*)auxkey->ptr, (char*)auxval->ptr); decrRefCount(auxkey); decrRefCount(auxval); continue; /* Read type again. */ } else { if (!rdbIsObjectType(type)) { rdbCheckError("Invalid object type: %d", type); return 1; } rdbstate.key_type = type; } /* Read key */ rdbstate.doing = RDB_CHECK_DOING_READ_KEY; if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; rdbstate.key = key; rdbstate.keys++; /* Read value */ rdbstate.doing = RDB_CHECK_DOING_READ_OBJECT_VALUE; if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr; /* Check if the key already expired. This function is used when loading * an RDB file from disk, either at startup, or when an RDB was * received from the master. In the latter case, the master is * responsible for key expiry. If we would expire keys here, the * snapshot taken by the master may not be reflected on the slave. */ if (server.masterhost == NULL && expiretime != -1 && expiretime < now) rdbstate.already_expired++; if (expiretime != -1) rdbstate.expires++; rdbstate.key = NULL; decrRefCount(key); decrRefCount(val); rdbstate.key_type = -1; } /* Verify the checksum if RDB version is >= 5 */ if (rdbver >= 5 && server.rdb_checksum) { uint64_t cksum, expected = rdb.cksum; rdbstate.doing = RDB_CHECK_DOING_CHECK_SUM; if (rioRead(&rdb,&cksum,8) == 0) goto eoferr; memrev64ifbe(&cksum); if (cksum == 0) { rdbCheckInfo("RDB file was saved with checksum disabled: no check performed."); } else if (cksum != expected) { rdbCheckError("RDB CRC error"); } else { rdbCheckInfo("Checksum OK"); } } fclose(fp); return 0; eoferr: /* unexpected end of file is handled here with a fatal exit */ if (rdbstate.error_set) { rdbCheckError(rdbstate.error); } else { rdbCheckError("Unexpected EOF reading RDB file"); } return 1; }