void replica::on_group_check(const group_check_request& request, __out_param group_check_response& response) { ddebug( "%s: on_group_check from %s:%d", name(), request.config.primary.name.c_str(), request.config.primary.port ); if (request.config.ballot < get_ballot()) { response.err = ERR_VERSION_OUTDATED; return; } else if (request.config.ballot > get_ballot()) { update_local_configuration(request.config); } else if (is_same_ballot_status_change_allowed(status(), request.config.status)) { update_local_configuration(request.config, true); } switch (status()) { case PS_INACTIVE: break; case PS_SECONDARY: if (request.last_committed_decree > last_committed_decree()) { _prepare_list->commit(request.last_committed_decree, true); } break; case PS_POTENTIAL_SECONDARY: init_learn(request.learner_signature); break; case PS_ERROR: break; default: dassert (false, ""); } response.gpid = get_gpid(); response.node = primary_address(); response.err = ERR_SUCCESS; if (status() == PS_ERROR) { response.err = ERR_INVALID_STATE; } response.last_committed_decree_in_app = _app->last_committed_decree(); response.last_committed_decree_in_prepare_list = last_committed_decree(); response.learner_status_ = _potential_secondary_states.learning_status; response.learner_signature = _potential_secondary_states.learning_signature; }
// from primary void replica::on_remove(const replica_configuration& request) { if (request.ballot < get_ballot()) return; dassert (request.status == PS_INACTIVE, ""); update_local_configuration(request); }
bool replica::update_local_configuration_with_no_ballot_change(partition_status s) { if (status() == s) return false; auto config = _config; config.status = s; return update_local_configuration(config, true); }
// @ secondary void replica::on_copy_checkpoint(const replica_configuration& request, /*out*/ learn_response& response) { check_hashed_access(); if (request.ballot > get_ballot()) { if (!update_local_configuration(request)) { response.err = ERR_INVALID_STATE; return; } } if (status() != PS_SECONDARY) { response.err = ERR_INVALID_STATE; return; } if (_app->last_durable_decree() == 0) { response.err = ERR_OBJECT_NOT_FOUND; return; } blob placeholder; int err = _app->get_checkpoint(0, placeholder, response.state); if (err != 0) { response.err = ERR_LEARN_FILE_FAILED; } else { response.err = ERR_OK; response.last_committed_decree = last_committed_decree(); response.base_local_dir = _app->data_dir(); response.address = _stub->_primary_address; } }
bool replica::update_configuration(const partition_configuration& config) { dassert (config.ballot >= get_ballot(), ""); replica_configuration rconfig; replica_helper::get_replica_config(config, primary_address(), rconfig); if (rconfig.status == PS_PRIMARY && (rconfig.ballot > get_ballot() || status() != PS_PRIMARY) ) { _primary_states.reset_membership(config, config.primary != primary_address()); } if (config.ballot > get_ballot() || is_same_ballot_status_change_allowed(status(), rconfig.status) ) { return update_local_configuration(rconfig, true); } else return false; }
void replica::on_group_check(const group_check_request& request, /*out*/ group_check_response& response) { check_hashed_access(); ddebug( "%s: process group check, primary = %s, ballot = %" PRId64 ", status = %s, last_committed_decree = %" PRId64, name(), request.config.primary.to_string(), request.config.ballot, enum_to_string(request.config.status), request.last_committed_decree ); if (request.config.ballot < get_ballot()) { response.err = ERR_VERSION_OUTDATED; dwarn("%s: on_group_check reply %s", name(), response.err.to_string()); return; } else if (request.config.ballot > get_ballot()) { if (!update_local_configuration(request.config)) { response.err = ERR_INVALID_STATE; dwarn("%s: on_group_check reply %s", name(), response.err.to_string()); return; } } else if (is_same_ballot_status_change_allowed(status(), request.config.status)) { update_local_configuration(request.config, true); } switch (status()) { case partition_status::PS_INACTIVE: break; case partition_status::PS_SECONDARY: if (request.last_committed_decree > last_committed_decree()) { _prepare_list->commit(request.last_committed_decree, COMMIT_TO_DECREE_HARD); } break; case partition_status::PS_POTENTIAL_SECONDARY: init_learn(request.config.learner_signature); break; case partition_status::PS_ERROR: break; default: dassert (false, ""); } response.pid = get_gpid(); response.node = _stub->_primary_address; response.err = ERR_OK; if (status() == partition_status::PS_ERROR) { response.err = ERR_INVALID_STATE; dwarn("%s: on_group_check reply %s", name(), response.err.to_string()); } response.last_committed_decree_in_app = _app->last_committed_decree(); response.last_committed_decree_in_prepare_list = last_committed_decree(); response.learner_status_ = _potential_secondary_states.learning_status; response.learner_signature = _potential_secondary_states.learning_version; }
void replica::on_prepare(dsn_message_t request) { check_hashed_access(); replica_configuration rconfig; mutation_ptr mu; { rpc_read_stream reader(request); unmarshall(reader, rconfig); mu = mutation::read_from(reader, request); } decree decree = mu->data.header.decree; dinfo("%s: mutation %s on_prepare", name(), mu->name()); dassert(mu->data.header.ballot == rconfig.ballot, ""); if (mu->data.header.ballot < get_ballot()) { derror("%s: mutation %s on_prepare skipped due to old view", name(), mu->name()); // no need response because the rpc should have been cancelled on primary in this case return; } // update configuration when necessary else if (rconfig.ballot > get_ballot()) { if (!update_local_configuration(rconfig)) { derror( "%s: mutation %s on_prepare failed as update local configuration failed, state = %s", name(), mu->name(), enum_to_string(status()) ); ack_prepare_message(ERR_INVALID_STATE, mu); return; } } if (PS_INACTIVE == status() || PS_ERROR == status()) { derror( "%s: mutation %s on_prepare failed as invalid replica state, state = %s", name(), mu->name(), enum_to_string(status()) ); ack_prepare_message( (PS_INACTIVE == status() && _inactive_is_transient) ? ERR_INACTIVE_STATE : ERR_INVALID_STATE, mu ); return; } else if (PS_POTENTIAL_SECONDARY == status()) { // new learning process if (rconfig.learner_signature != _potential_secondary_states.learning_signature) { init_learn(rconfig.learner_signature); // no need response as rpc is already gone return; } if (!(_potential_secondary_states.learning_status == LearningWithPrepare || _potential_secondary_states.learning_status == LearningSucceeded)) { derror( "%s: mutation %s on_prepare skipped as invalid learning status, state = %s, learning_status = %s", name(), mu->name(), enum_to_string(status()), enum_to_string(_potential_secondary_states.learning_status) ); // no need response as rpc is already gone return; } } dassert (rconfig.status == status(), ""); if (decree <= last_committed_decree()) { ack_prepare_message(ERR_OK, mu); return; } // real prepare start auto mu2 = _prepare_list->get_mutation_by_decree(decree); if (mu2 != nullptr && mu2->data.header.ballot == mu->data.header.ballot) { if (mu2->is_logged()) { ack_prepare_message(ERR_OK, mu); } else { derror("%s: mutation %s on_prepare skipped as it is duplicate", name(), mu->name()); // response will be unnecessary when we add retry logic in rpc engine. // the retried rpc will use the same id therefore it will be considered responsed // even the response is for a previous try. } return; } error_code err = _prepare_list->prepare(mu, status()); dassert (err == ERR_OK, ""); if (PS_POTENTIAL_SECONDARY == status()) { dassert (mu->data.header.decree <= last_committed_decree() + _options->max_mutation_count_in_prepare_list, ""); } else { dassert (PS_SECONDARY == status(), ""); dassert (mu->data.header.decree <= last_committed_decree() + _options->staleness_for_commit, ""); } dassert(mu->log_task() == nullptr, ""); mu->log_task() = _stub->_log->append(mu, LPC_WRITE_REPLICATION_LOG, this, std::bind(&replica::on_append_log_completed, this, mu, std::placeholders::_1, std::placeholders::_2), gpid_to_hash(get_gpid()) ); }
void replica::on_prepare(dsn_message_t request) { check_hashed_access(); replica_configuration rconfig; mutation_ptr mu; { msg_binary_reader reader(request); unmarshall(reader, rconfig); mu = mutation::read_from(reader, request); } decree decree = mu->data.header.decree; ddebug( "%s: mutation %s on_prepare", name(), mu->name()); dassert (mu->data.header.ballot == rconfig.ballot, ""); if (mu->data.header.ballot < get_ballot()) { ddebug( "%s: mutation %s on_prepare skipped due to old view", name(), mu->name()); return; } // update configuration when necessary else if (rconfig.ballot > get_ballot()) { if (!update_local_configuration(rconfig)) { ddebug( "%s: mutation %s on_prepare to %s failed as update local configuration failed", name(), mu->name(), enum_to_string(status()) ); ack_prepare_message(ERR_INVALID_STATE, mu); return; } } if (PS_INACTIVE == status() || PS_ERROR == status()) { ddebug( "%s: mutation %s on_prepare to %s skipped", name(), mu->name(), enum_to_string(status()) ); ack_prepare_message( (PS_INACTIVE == status() && _inactive_is_transient) ? ERR_INACTIVE_STATE : ERR_INVALID_STATE, mu ); return; } else if (PS_POTENTIAL_SECONDARY == status()) { if (_potential_secondary_states.learning_status != LearningWithPrepare && _potential_secondary_states.learning_status != LearningSucceeded) { ddebug( "%s: mutation %s on_prepare to %s skipped, learnings state = %s", name(), mu->name(), enum_to_string(status()), enum_to_string(_potential_secondary_states.learning_status) ); // do not retry as there may retries later return; } } dassert (rconfig.status == status(), ""); if (decree <= last_committed_decree()) { ack_prepare_message(ERR_OK, mu); return; } // real prepare start auto mu2 = _prepare_list->get_mutation_by_decree(decree); if (mu2 != nullptr && mu2->data.header.ballot == mu->data.header.ballot) { ddebug( "%s: mutation %s redundant prepare skipped", name(), mu->name()); if (mu2->is_logged() || _options.prepare_ack_on_secondary_before_logging_allowed) { ack_prepare_message(ERR_OK, mu); } return; } error_code err = _prepare_list->prepare(mu, status()); dassert (err == ERR_OK, ""); if (PS_POTENTIAL_SECONDARY == status()) { dassert (mu->data.header.decree <= last_committed_decree() + _options.staleness_for_start_prepare_for_potential_secondary, ""); } else { dassert (PS_SECONDARY == status(), ""); dassert (mu->data.header.decree <= last_committed_decree() + _options.staleness_for_commit, ""); } // ack without logging if (_options.prepare_ack_on_secondary_before_logging_allowed) { ack_prepare_message(err, mu); } // write log dassert (mu->log_task() == nullptr, ""); mu->log_task() = _stub->_log->append(mu, LPC_WRITE_REPLICATION_LOG, this, std::bind(&replica::on_append_log_completed, this, mu, std::placeholders::_1, std::placeholders::_2), gpid_to_hash(get_gpid()) ); dassert(mu->log_task() != nullptr, ""); }