void replica::check_state_completeness() { /* prepare commit durable */ dassert(max_prepared_decree() >= last_committed_decree(), ""); dassert(last_committed_decree() >= last_durable_decree(), ""); auto mind = _stub->_log->max_gced_decree(get_gpid(), _app->init_info().init_offset_in_shared_log); dassert(mind <= last_durable_decree(), ""); _stub->_log->check_valid_start_offset(get_gpid(), _app->init_info().init_offset_in_shared_log); if (_private_log != nullptr) { auto mind = _private_log->max_gced_decree(get_gpid(), _app->init_info().init_offset_in_private_log); dassert(mind <= last_durable_decree(), ""); _private_log->check_valid_start_offset(get_gpid(), _app->init_info().init_offset_in_private_log); } }
void replica::init_checkpoint() { check_hashed_access(); // only applicable to primary and secondary replicas if (status() != PS_PRIMARY && status() != PS_SECONDARY) return; // no need to checkpoint if (_app->is_delta_state_learning_supported()) return; // already running if (_secondary_states.checkpoint_task != nullptr) return; // private log must be enabled to make sure commits // are not lost during checkpinting dassert(nullptr != _private_log, "log_enable_private_prepare must be true for checkpointing"); // TODO: when NOT to checkpoint, but use private log replay to build the state if (last_committed_decree() - last_durable_decree() < 10000) return; // primary is downgraded to secondary for checkpointing as no write can be seen // during checkpointing (i.e., state is freezed) if (PS_PRIMARY == status()) { configuration_update_request proposal; proposal.config = _primary_states.membership; proposal.type = CT_DOWNGRADE_TO_SECONDARY; proposal.node = proposal.config.primary; downgrade_to_secondary_on_primary(proposal); } // secondary can start checkpint in the long running thread pool else { dassert(PS_SECONDARY == status(), ""); _secondary_states.checkpoint_task = tasking::enqueue( LPC_CHECKPOINT_REPLICA, this, &replica::checkpoint, gpid_to_hash(get_gpid()) ); } }
::dsn::error_code simple_kv_service_impl::checkpoint(int64_t version) { char name[256]; sprintf(name, "%s/checkpoint.%" PRId64, data_dir(), version ); zauto_lock l(_lock); if (version == last_durable_decree()) { dassert(utils::filesystem::file_exists(name), "checkpoint file %s is missing!", name ); return ERR_OK; } // TODO: should use async write instead std::ofstream os(name, std::ios::binary); uint64_t count = (uint64_t)_store.size(); int magic = 0xdeadbeef; os.write((const char*)&count, (uint32_t)sizeof(count)); os.write((const char*)&magic, (uint32_t)sizeof(magic)); for (auto it = _store.begin(); it != _store.end(); ++it) { const std::string& k = it->first; uint32_t sz = (uint32_t)k.length(); os.write((const char*)&sz, (uint32_t)sizeof(sz)); os.write((const char*)&k[0], sz); const std::string& v = it->second; sz = (uint32_t)v.length(); os.write((const char*)&sz, (uint32_t)sizeof(sz)); os.write((const char*)&v[0], sz); } os.close(); // TODO: gc checkpoints set_last_durable_decree(version); return ERR_OK; }
error_code replication_app_base::open_internal(replica* r, bool create_new) { auto err = open(create_new); if (err == ERR_OK) { dassert(last_committed_decree() == last_durable_decree(), ""); if (!create_new) { std::string info_path = utils::filesystem::path_combine(r->dir(), ".info"); err = _info.load(info_path.c_str()); } } _app_commit_decree.add(last_committed_decree()); return err; }
int rrdb_service_impl::open(bool create_new) { if (_is_open) return ERR_SERVICE_ALREADY_RUNNING; rocksdb::Options opts; opts.create_if_missing = create_new; opts.error_if_exists = create_new; auto status = rocksdb::DB::Open(opts, dir() + "/rdb", &_db); if (status.ok()) { _is_open = true; _last_committed_decree = last_durable_decree(); } return status.code(); }
int simple_kv_service_impl::flush(bool force) { zauto_lock l(_lock); if (last_committed_decree() == last_durable_decree()) { return 0; } // TODO: should use async write instead char name[256]; sprintf(name, "%s/checkpoint.%lld", data_dir().c_str(), static_cast<long long int>(last_committed_decree())); std::ofstream os(name); uint64_t count = (uint64_t)_store.size(); os.write((const char*)&count, (uint32_t)sizeof(count)); for (auto it = _store.begin(); it != _store.end(); it++) { const std::string& k = it->first; uint32_t sz = (uint32_t)k.length(); os.write((const char*)&sz, (uint32_t)sizeof(sz)); os.write((const char*)&k[0], sz); const std::string& v = it->second; sz = (uint32_t)v.length(); os.write((const char*)&sz, (uint32_t)sizeof(sz)); os.write((const char*)&v[0], sz); } _last_durable_decree = last_committed_decree(); return 0; }
// run in replica thread void replica::init_checkpoint() { // only applicable to primary and secondary replicas if (status() != PS_PRIMARY && status() != PS_SECONDARY) return; // no need to checkpoint if (_app->is_delta_state_learning_supported()) return; auto err = _app->checkpoint_async(); if (err != ERR_NOT_IMPLEMENTED) { if (err == ERR_OK) { ddebug("%s: checkpoint_async succeed, app_last_committed_decree=%" PRId64 ", app_last_durable_decree=%" PRId64, name(), _app->last_committed_decree(), _app->last_durable_decree()); } if (err != ERR_OK && err != ERR_WRONG_TIMING && err != ERR_NO_NEED_OPERATE && err != ERR_TRY_AGAIN) { derror("%s: checkpoint_async failed, err = %s", name(), err.to_string()); } return; } // private log must be enabled to make sure commits // are not lost during checkpinting dassert(nullptr != _private_log, "log_enable_private_prepare must be true for checkpointing"); if (last_committed_decree() - last_durable_decree() < _options->checkpoint_min_decree_gap) return; // primary cannot checkpoint (TODO: test if async checkpoint is supported) // therefore we have to copy checkpoints from secondaries if (PS_PRIMARY == status()) { // only one running instance if (nullptr == _primary_states.checkpoint_task) { if (_primary_states.membership.secondaries.size() == 0) return; std::shared_ptr<replica_configuration> rc(new replica_configuration); _primary_states.get_replica_config(PS_SECONDARY, *rc); rpc_address sd = _primary_states.membership.secondaries [dsn_random32(0, (int)_primary_states.membership.secondaries.size() - 1)]; _primary_states.checkpoint_task = rpc::call_typed( sd, RPC_REPLICA_COPY_LAST_CHECKPOINT, rc, this, &replica::on_copy_checkpoint_ack, gpid_to_hash(get_gpid()) ); } } // secondary can start checkpint in the long running thread pool else { dassert(PS_SECONDARY == status(), ""); // only one running instance if (!_secondary_states.checkpoint_is_running) { _secondary_states.checkpoint_is_running = true; tasking::enqueue( &_secondary_states.checkpoint_task, LPC_CHECKPOINT_REPLICA, this, &replica::background_checkpoint, gpid_to_hash(get_gpid()) ); } } }