BinlogQueue::BinlogQueue(leveldb::DB *db, bool enabled, int capacity){ this->db = db; this->min_seq = 0; this->last_seq = 0; this->tran_seq = 0; this->capacity = capacity; this->enabled = enabled; Binlog log; if(this->find_last(&log) == 1){ this->last_seq = log.seq(); } if(this->find_next(0, &log) == 1){ this->min_seq = log.seq(); } if(this->enabled){ log_info("binlogs capacity: %d, min: %" PRIu64 ", max: %" PRIu64 ",", this->capacity, this->min_seq, this->last_seq); } // start cleaning thread if(this->enabled){ thread_quit = false; pthread_t tid; int err = pthread_create(&tid, NULL, &BinlogQueue::log_clean_thread_func, this); if(err != 0){ log_fatal("can't create thread: %s", strerror(err)); exit(0); } } }
int Slave::proc_copy(const Binlog &log, const std::vector<Bytes> &req){ switch(log.cmd()){ case BinlogCommand::BEGIN: log_info("copy begin"); log_info("start flushdb..."); this->last_seq = 0; this->last_key = ""; this->save_status(); ssdb->flushdb(); log_info("end flushdb."); break; case BinlogCommand::END: log_info("copy end, copy_count: %" PRIu64 ", last_seq: %" PRIu64 ", seq: %" PRIu64, copy_count, this->last_seq, log.seq()); this->status = SYNC; this->last_key = ""; this->save_status(); break; default: if(++copy_count % 1000 == 1){ log_info("copy_count: %" PRIu64 ", last_seq: %" PRIu64 ", seq: %" PRIu64 "", copy_count, this->last_seq, log.seq()); } return proc_sync(log, req); break; } return 0; }
int Slave::proc_copy(const Binlog &log, const std::vector<Bytes> &req){ switch(log.cmd()){ case BinlogCommand::BEGIN: log_info("copy begin"); break; case BinlogCommand::END: log_info("copy end, copy_count: %" PRIu64 ", last_seq: %" PRIu64 ", seq: %" PRIu64, copy_count, this->last_seq, log.seq()); if (!this->is_mirror && this->last_seq > 0) { // no master ssdb->get_binlogs()->update(this->last_seq, BinlogType::NOOP, BinlogCommand::NONE, "", ""); ssdb->get_binlogs()->set_last_seq(this->last_seq); } this->last_key = ""; this->save_status(); status = SYNC; break; default: if (this->is_mirror) { // if master, don't set no_log return proc_sync(log, req); } else { // unable to support multi replications bool enabled = ssdb->get_binlogs()->is_enabled(); ssdb->get_binlogs()->set_enabled(false); // disabled binlog int result = proc_sync(log, req); ssdb->get_binlogs()->set_enabled(enabled); return result; } } return 0; }
// TESTING, slow, so not used void BinlogQueue::merge(){ std::map<std::string, uint64_t> key_map; uint64_t start = min_seq; uint64_t end = last_seq; int reduce_count = 0; int total = 0; total = end - start + 1; (void)total; // suppresses warning log_trace("merge begin"); for(; start <= end; start++){ Binlog log; if(this->get(start, &log) == 1){ if(log.type() == BinlogType::NOOP){ continue; } std::string key = log.key().String(); std::map<std::string, uint64_t>::iterator it = key_map.find(key); if(it != key_map.end()){ uint64_t seq = it->second; this->update(seq, BinlogType::NOOP, BinlogCommand::NONE, ""); //log_trace("merge update %" PRIu64 " to NOOP", seq); reduce_count ++; } key_map[key] = log.seq(); } } log_trace("merge reduce %d of %d binlogs", reduce_count, total); }
BinlogQueue::BinlogQueue(leveldb::DB *db){ this->db = db; this->min_seq = 0; this->last_seq = 0; this->tran_seq = 0; this->capacity = LOG_QUEUE_SIZE; this->no_log_ = false; Binlog log; if(this->find_last(&log) == 1){ this->last_seq = log.seq(); } if(this->last_seq > LOG_QUEUE_SIZE){ this->min_seq = this->last_seq - LOG_QUEUE_SIZE; }else{ this->min_seq = 0; } // TODO: use binary search to find out min_seq if(this->find_next(this->min_seq, &log) == 1){ this->min_seq = log.seq(); } log_info("binlogs capacity: %d, min: %" PRIu64 ", max: %" PRIu64 ",", capacity, min_seq, last_seq); //this->merge(); /* int noops = 0; int total = 0; uint64_t seq = this->min_seq; while(this->find_next(seq, &log) == 1){ total ++; seq = log.seq() + 1; if(log.type() != BinlogType::NOOP){ std::string s = log.dumps(); //log_trace("%s", s.c_str()); noops ++; } } log_debug("capacity: %d, min: %" PRIu64 ", max: %" PRIu64 ", noops: %d, total: %d", capacity, min_seq, last_seq, noops, total); */ // start cleaning thread thread_quit = false; pthread_t tid; int err = pthread_create(&tid, NULL, &BinlogQueue::log_clean_thread_func, this); if(err != 0){ log_fatal("can't create thread: %s", strerror(err)); exit(0); } }
int Slave::proc_copy(const Binlog &log, const std::vector<Bytes> &req){ switch(log.cmd()){ case BinlogCommand::BEGIN: log_info("copy begin"); break; case BinlogCommand::END: log_info("copy end, copy_count: %" PRIu64 ", last_seq: %" PRIu64 ", seq: %" PRIu64, copy_count, this->last_seq, log.seq()); this->last_key = ""; this->save_status(); break; default: return proc_sync(log, req); break; } return 0; }
int Slave::proc_noop(const Binlog &log, const std::vector<Bytes> &req){ uint64_t seq = log.seq(); if(this->last_seq != seq){ log_debug("noop last_seq: %" PRIu64 ", seq: %" PRIu64 "", this->last_seq, seq); this->last_seq = seq; this->save_status(); } return 0; }
BinlogQueue::BinlogQueue(leveldb::DB *db, bool enabled, int capacity){ this->db = db; this->min_seq = 0; this->last_seq = 0; this->tran_seq = 0; this->capacity = capacity; this->enabled = enabled; Binlog log; if(this->find_last(&log) == 1){ this->last_seq = log.seq(); } // 下面这段代码是可能性能非常差! //if(this->find_next(0, &log) == 1){ // this->min_seq = log.seq(); //} if(this->last_seq > this->capacity){ this->min_seq = this->last_seq - this->capacity; }else{ this->min_seq = 0; } if(this->find_next(this->min_seq, &log) == 1){ this->min_seq = log.seq(); } if(this->enabled){ log_info("binlogs capacity: %d, min: %" PRIu64 ", max: %" PRIu64 ",", this->capacity, this->min_seq, this->last_seq); // 这个方法有性能问题 // 但是, 如果不执行清理, 如果将 capacity 修改大, 可能会导致主从同步问题 //this->clean_obsolete_binlogs(); } // start cleaning thread if(this->enabled){ thread_quit = false; pthread_t tid; int err = pthread_create(&tid, NULL, &BinlogQueue::log_clean_thread_func, this); if(err != 0){ log_fatal("can't create thread: %s", strerror(err)); exit(0); } } }
// 创建一个操作日志队列 BinlogQueue::BinlogQueue(leveldb::DB *db, bool enabled){ this->db = db; this->min_seq = 0; this->last_seq = 0; this->tran_seq = 0; // 队列空间 this->capacity = LOG_QUEUE_SIZE; this->enabled = enabled; Binlog log; // 从leveldb中查找之前最大的序列号 if(this->find_last(&log) == 1){ this->last_seq = log.seq(); } // 超过了队列长度,最小应该是减去队列长度的序列号 if(this->last_seq > LOG_QUEUE_SIZE){ this->min_seq = this->last_seq - LOG_QUEUE_SIZE; }else{ // 否则,最小序列号是0 this->min_seq = 0; } // TODO: use binary search to find out min_seq // 通过leveldb中记录的操作日志更新最小序列号 if(this->find_next(this->min_seq, &log) == 1){ this->min_seq = log.seq(); } if(this->enabled){ log_info("binlogs capacity: %d, min: %" PRIu64 ", max: %" PRIu64 ",", capacity, min_seq, last_seq); } // start cleaning thread // 启动线程进行操作日志的清理 if(this->enabled){ thread_quit = false; pthread_t tid; int err = pthread_create(&tid, NULL, &BinlogQueue::log_clean_thread_func, this); if(err != 0){ log_fatal("can't create thread: %s", strerror(err)); exit(0); } } }
int Slave::proc(const std::vector<Bytes> &req){ Binlog log; if(log.load_format_key(req[0]) == -1){ log_error("invalid binlog!"); return 0; } const char *sync_type = this->is_mirror? "mirror" : "sync"; switch(log.type()){ case BinlogType::NOOP: return this->proc_noop(log, req); break; case BinlogType::COPY:{ status = COPY; if(++copy_count % 1000 == 1){ log_info("copy_count: %" PRIu64 ", last_seq: %" PRIu64 ", seq: %" PRIu64 "", copy_count, this->last_seq, log.seq()); } if(req.size() >= 2){ log_debug("[%s] %s [%d]", sync_type, log.dumps().c_str(), req[1].size()); }else{ log_debug("[%s] %s", sync_type, log.dumps().c_str()); } this->proc_copy(log, req); break; } case BinlogType::SYNC: case BinlogType::MIRROR:{ status = SYNC; if(++sync_count % 1000 == 1){ log_info("sync_count: %" PRIu64 ", last_seq: %" PRIu64 ", seq: %" PRIu64 "", sync_count, this->last_seq, log.seq()); } if(req.size() >= 2){ log_debug("[%s] %s [%d]", sync_type, log.dumps().c_str(), req[1].size()); }else{ log_debug("[%s] %s", sync_type, log.dumps().c_str()); } this->proc_sync(log, req); break; } default: break; } return 0; }
int Slave::proc(const std::vector<Bytes> &req){ Binlog log; if(log.load(req[0].Slice()) == -1){ log_error("invalid binlog!"); return 0; } if(log.type() != BinlogType::NOOP){ if(this->is_mirror){ log_debug("[mirror] %s", log.dumps().c_str()); }else{ log_debug("[sync] %s", log.dumps().c_str()); } } switch(log.type()){ case BinlogType::NOOP: return this->proc_noop(log, req); break; case BinlogType::COPY: return this->proc_copy(log, req); break; case BinlogType::SYNC: case BinlogType::MIRROR: return this->proc_sync(log, req); break; default: break; } return 0; }
int Slave::proc(const std::vector<Bytes> &req){ Binlog log; if(log.load(req[0].Slice()) == -1){ log_error("invalid binlog!"); return 0; } switch(log.type()){ case BinlogType::NOOP: return this->proc_noop(log, req); break; case BinlogType::COPY:{ if(++copy_count % 1000 == 1){ log_info("copy_count: %" PRIu64 ", last_seq: %" PRIu64 ", seq: %" PRIu64 "", copy_count, this->last_seq, log.seq()); } if(this->is_mirror){ log_trace("[mirror] %s", log.dumps().c_str()); }else{ log_trace("[sync] %s", log.dumps().c_str()); } return this->proc_copy(log, req); break; } case BinlogType::SYNC: case BinlogType::MIRROR:{ if(++sync_count % 1000 == 1){ log_info("sync_count: %" PRIu64 ", last_seq: %" PRIu64 ", seq: %" PRIu64 "", sync_count, this->last_seq, log.seq()); } if(this->is_mirror){ log_debug("[mirror] %s", log.dumps().c_str()); }else{ log_debug("[sync] %s", log.dumps().c_str()); } return this->proc_sync(log, req); break; } default: break; } return 0; }
int Slave::proc_copy(const Binlog &log, const std::vector<Bytes> &req){ switch(log.cmd()){ case BinlogCommand::BEGIN: log_info("copy begin"); break; case BinlogCommand::END: log_info("copy end, step in sync"); this->last_key = ""; this->save_status(); break; default: return proc_sync(log, req); break; } return 0; }
int BackendSync::Client::sync(BinlogQueue *logs) { Binlog log; while(1) { int ret = 0; uint64_t expect_seq = this->last_seq + 1; if(this->status == Client::COPY && this->last_seq == 0) { ret = logs->find_last(&log); } else { ret = logs->find_next(expect_seq, &log); } if(ret == 0) { return 0; } if(this->status == Client::COPY && log.key() > this->last_key) { log_debug("fd: %d, last_key: '%s', drop: %s", link->fd(), hexmem(this->last_key.data(), this->last_key.size()).c_str(), log.dumps().c_str()); this->last_seq = log.seq(); // WARN: When there are writes behind last_key, we MUST create // a new iterator, because iterator will not know this key. // Because iterator ONLY iterates throught keys written before // iterator is created. if(this->iter) { delete this->iter; this->iter = NULL; } continue; } if(this->last_seq != 0 && log.seq() != expect_seq) { log_warn("%s:%d fd: %d, OUT_OF_SYNC! log.seq: %" PRIu64 ", expect_seq: %" PRIu64 "", link->remote_ip, link->remote_port, link->fd(), log.seq(), expect_seq ); this->status = Client::OUT_OF_SYNC; return 1; } // update last_seq this->last_seq = log.seq(); char type = log.type(); if(type == BinlogType::MIRROR && this->is_mirror) { if(this->last_seq - this->last_noop_seq >= 1000) { this->noop(); return 1; } else { continue; } } break; } int ret = 0; std::string val; switch(log.cmd()) { case BinlogCommand::KSET: case BinlogCommand::HSET: case BinlogCommand::ZSET: case BinlogCommand::QSET: case BinlogCommand::QPUSH_BACK: case BinlogCommand::QPUSH_FRONT: ret = backend->ssdb->raw_get(log.key(), &val); if(ret == -1) { log_error("fd: %d, raw_get error!", link->fd()); } else if(ret == 0) { //log_debug("%s", hexmem(log.key().data(), log.key().size()).c_str()); log_trace("fd: %d, skip not found: %s", link->fd(), log.dumps().c_str()); } else { log_trace("fd: %d, %s", link->fd(), log.dumps().c_str()); link->send(log.repr(), val); } break; case BinlogCommand::KDEL: case BinlogCommand::HDEL: case BinlogCommand::ZDEL: case BinlogCommand::QPOP_BACK: case BinlogCommand::QPOP_FRONT: log_trace("fd: %d, %s", link->fd(), log.dumps().c_str()); link->send(log.repr()); break; } return 1; }
int Slave::proc_sync(const Binlog &log, const std::vector<Bytes> &req){ switch(log.cmd()){ case BinlogCommand::KSET: { if(req.size() != 2){ break; } std::string key; if(decode_kv_key(log.key(), &key) == -1){ break; } log_trace("set %s", hexmem(key.data(), key.size()).c_str()); if(ssdb->set(key, req[1], log_type) == -1){ return -1; } } break; case BinlogCommand::KDEL: { std::string key; if(decode_kv_key(log.key(), &key) == -1){ break; } log_trace("del %s", hexmem(key.data(), key.size()).c_str()); if(ssdb->del(key, log_type) == -1){ return -1; } } break; case BinlogCommand::HSET: { if(req.size() != 2){ break; } std::string name, key; if(decode_hash_key(log.key(), &name, &key) == -1){ break; } log_trace("hset %s %s", hexmem(name.data(), name.size()).c_str(), hexmem(key.data(), key.size()).c_str()); if(ssdb->hset(name, key, req[1], log_type) == -1){ return -1; } } break; case BinlogCommand::HDEL: { std::string name, key; if(decode_hash_key(log.key(), &name, &key) == -1){ break; } log_trace("hdel %s %s", hexmem(name.data(), name.size()).c_str(), hexmem(key.data(), key.size()).c_str()); if(ssdb->hdel(name, key, log_type) == -1){ return -1; } } break; case BinlogCommand::ZSET: { if(req.size() != 2){ break; } std::string name, key; if(decode_zset_key(log.key(), &name, &key) == -1){ break; } log_trace("zset %s %s", hexmem(name.data(), name.size()).c_str(), hexmem(key.data(), key.size()).c_str()); if(ssdb->zset(name, key, req[1], log_type) == -1){ return -1; } } break; case BinlogCommand::ZDEL: { std::string name, key; if(decode_zset_key(log.key(), &name, &key) == -1){ break; } log_trace("zdel %s %s", hexmem(name.data(), name.size()).c_str(), hexmem(key.data(), key.size()).c_str()); if(ssdb->zdel(name, key, log_type) == -1){ return -1; } } break; case BinlogCommand::QSET: case BinlogCommand::QPUSH_BACK: case BinlogCommand::QPUSH_FRONT: { if(req.size() != 2){ break; } std::string name; uint64_t seq; if(decode_qitem_key(log.key(), &name, &seq) == -1){ break; } if(seq < QITEM_MIN_SEQ || seq > QITEM_MAX_SEQ){ break; } int ret; if(log.cmd() == BinlogCommand::QSET){ log_trace("qset %s %" PRIu64 "", hexmem(name.data(), name.size()).c_str(), seq); ret = ssdb->qset_by_seq(name, seq, req[1], log_type); }else if(log.cmd() == BinlogCommand::QPUSH_BACK){ log_trace("qpush_back %s", hexmem(name.data(), name.size()).c_str()); ret = ssdb->qpush_back(name, req[1], log_type); }else{ log_trace("qpush_front %s", hexmem(name.data(), name.size()).c_str()); ret = ssdb->qpush_front(name, req[1], log_type); } if(ret == -1){ return -1; } } break; case BinlogCommand::QPOP_BACK: case BinlogCommand::QPOP_FRONT: { int ret; const Bytes name = log.key(); std::string tmp; if(log.cmd() == BinlogCommand::QPOP_BACK){ log_trace("qpop_back %s", hexmem(name.data(), name.size()).c_str()); ret = ssdb->qpop_back(name, &tmp, log_type); }else{ log_trace("qpop_front %s", hexmem(name.data(), name.size()).c_str()); ret = ssdb->qpop_front(name, &tmp, log_type); } if(ret == -1){ return -1; } } break; default: log_error("unknown binlog, type=%d, cmd=%d", log.type(), log.cmd()); break; } this->last_seq = log.seq(); if(log.type() == BinlogType::COPY){ this->last_key = log.key().String(); } this->save_status(); return 0; }
int BackendSync::Client::sync(BinlogQueue *logs){ Binlog log; while(1){ int ret = 0; uint64_t expect_seq = this->last_seq + 1; if(this->status == Client::COPY && this->last_seq == 0){ ret = logs->find_last(&log); }else{ ret = logs->find_next(expect_seq, &log); } if(ret == 0){ return 0; } // writes that are out of copied range will be discarded. if(this->status == Client::COPY && log.key() > this->last_key){ log_trace("fd: %d, last_key: '%s', drop: %s", link->fd(), hexmem(this->last_key.data(), this->last_key.size()).c_str(), log.dumps().c_str()); this->last_seq = log.seq(); //if(this->iter){ // delete this->iter; // this->iter = NULL; //} continue; } if(this->last_seq != 0 && log.seq() != expect_seq){ log_warn("fd: %d, OUT_OF_SYNC! log.seq: %" PRIu64", expect_seq: %" PRIu64"", link->fd(), log.seq(), expect_seq ); this->status = Client::OUT_OF_SYNC; return 1; } // update last_seq this->last_seq = log.seq(); char type = log.type(); if(type == BinlogType::MIRROR && this->is_mirror){ if(this->last_seq - this->last_noop_seq >= 1000){ this->noop(); return 1; }else{ continue; } } break; } int ret = 0; std::string val; switch(log.cmd()){ case BinlogCommand::KSET: case BinlogCommand::HSET: case BinlogCommand::ZSET: ret = backend->ssdb->raw_get(log.key(), &val); if(ret == -1){ log_error("fd: %d, raw_get error!", link->fd()); }else if(ret == 0){ //log_debug("%s", hexmem(log.key().data(), log.key().size()).c_str()); log_trace("fd: %d, skip not found: %s", link->fd(), log.dumps().c_str()); }else{ log_trace("fd: %d, %s", link->fd(), log.dumps().c_str()); link->send(log.repr(), val); } break; case BinlogCommand::KDEL: case BinlogCommand::HDEL: case BinlogCommand::ZDEL: log_trace("fd: %d, %s", link->fd(), log.dumps().c_str()); link->send(log.repr()); break; } return 1; }
int Slave::proc_sync(const Binlog &log, const std::vector<Bytes> &req){ switch(log.cmd()){ case BinlogCommand::KSET: { if(req.size() != 2){ break; } std::string key; if(decode_kv_key(log.key(), &key) == -1){ break; } log_trace("set %s", hexmem(key.data(), key.size()).c_str()); if(ssdb->set(key, req[1], log_type) == -1){ return -1; } } break; case BinlogCommand::KDEL: { std::string key; if(decode_kv_key(log.key(), &key) == -1){ break; } log_trace("del %s", hexmem(key.data(), key.size()).c_str()); if(ssdb->del(key, log_type) == -1){ return -1; } } break; case BinlogCommand::HSET: { if(req.size() != 2){ break; } std::string name, key; if(decode_hash_key(log.key(), &name, &key) == -1){ break; } log_trace("hset %s %s", hexmem(name.data(), name.size()).c_str(), hexmem(key.data(), key.size()).c_str()); if(ssdb->hset(name, key, req[1], log_type) == -1){ return -1; } } break; case BinlogCommand::HDEL: { std::string name, key; if(decode_hash_key(log.key(), &name, &key) == -1){ break; } log_trace("hdel %s %s", hexmem(name.data(), name.size()).c_str(), hexmem(key.data(), key.size()).c_str()); if(ssdb->hdel(name, key, log_type) == -1){ return -1; } } break; case BinlogCommand::ZSET: { if(req.size() != 2){ break; } std::string name, key; if(decode_zset_key(log.key(), &name, &key) == -1){ break; } log_trace("zset %s %s", hexmem(name.data(), name.size()).c_str(), hexmem(key.data(), key.size()).c_str()); if(ssdb->zset(name, key, req[1], log_type) == -1){ return -1; } } break; case BinlogCommand::ZDEL: { std::string name, key; if(decode_zset_key(log.key(), &name, &key) == -1){ break; } log_trace("zdel %s %s", hexmem(name.data(), name.size()).c_str(), hexmem(key.data(), key.size()).c_str()); if(ssdb->zdel(name, key, log_type) == -1){ return -1; } } break; default: log_error("unknown binlog, type=%d, cmd=%d", log.type(), log.cmd()); break; } this->last_seq = log.seq(); if(log.type() == BinlogType::COPY){ this->last_key = log.key().String(); } this->save_status(); return 0; }