void daemon_s_service::on_update_configuration_on_meta_server_reply( ::dsn::replication::config_type::type type, std::shared_ptr<app_internal> && app, error_code err, dsn_message_t request, dsn_message_t response ) { if (false == _online) { err.end_tracking(); return; } configuration_update_response resp; if (err == ERR_OK) { ::dsn::unmarshall(response, resp); err = resp.err; } else if (err == ERR_TIMEOUT) { rpc::call( _fd->get_servers(), request, this, [=, cap_app = std::move(app)](error_code err, dsn_message_t reqmsg, dsn_message_t response) mutable { on_update_configuration_on_meta_server_reply(type, std::move(cap_app), err, reqmsg, response); } ); } else { if (type == config_type::CT_ADD_SECONDARY) kill_app(std::move(app)); } }
void meta_service::on_log_completed(error_code err, size_t size, blob buffer, std::shared_ptr<configuration_update_request> req, dsn_message_t resp) { dassert(err == ERR_OK, "log operation failed, cannot proceed, err = %s", err.to_string()); dassert(buffer.length() == size, "log size must equal to the specified buffer size"); configuration_update_response response; update_configuration(*req, response); if (resp != nullptr) { meta_response_header rhdr; rhdr.err = err; rhdr.primary_address = primary_address(); marshall(resp, rhdr); marshall(resp, response); dsn_rpc_reply(resp); } else { err.end_tracking(); } }
/*callback*/ void replication_app_client_base::replica_rw_reply( error_code err, dsn_message_t request, dsn_message_t response, request_context_ptr& rc ) { { zauto_lock l(rc->lock); if (rc->completed) { //dinfo("already time out before replica reply"); err.end_tracking(); return; } } if (err != ERR_OK) { goto Retry; } ::unmarshall(response, err); // // some error codes do not need retry // if (err == ERR_OK || err == ERR_HANDLER_NOT_FOUND) { end_request(rc, err, response); return; } // retry else { dsn::rpc_address adr = dsn_msg_from_address(response); } Retry: dinfo("%s.client: get error %s from replica with index %d", _app_name.c_str(), err.to_string(), rc->partition_index ); // clear partition configuration as it could be wrong { zauto_write_lock l(_config_lock); _config_cache.erase(rc->partition_index); } // then retry call(rc.get(), false); }
void replica::response_client_message(dsn_message_t request, error_code error) { if (nullptr == request) { error.end_tracking(); return; } ddebug("%s: reply client read/write, err = %s", name(), error.to_string()); dsn_rpc_reply(dsn_msg_create_response(request), error); }
void replication_app_client_base::end_request(request_context_ptr& request, error_code err, dsn_message_t resp) { zauto_lock l(request->lock); if (request->completed) { err.end_tracking(); return; } if (err != ERR_TIMEOUT && request->timeout_timer != nullptr) request->timeout_timer->cancel(false); request->callback_task->enqueue_rpc_response(err, resp); request->completed = true; }
void partition_resolver_simple::end_request(request_context_ptr&& request, error_code err, rpc_address addr, bool called_by_timer) const { zauto_lock l(request->lock); if (request->completed) { err.end_tracking(); return; } if (!called_by_timer && request->timeout_timer != nullptr) request->timeout_timer->cancel(false); request->callback(resolve_result{ err, addr, {_app_id, request->partition_index} }); request->completed = true; }
void replica::handle_remote_failure(partition_status st, ::dsn::rpc_address node, error_code error) { derror( "%s: handle remote failure error %s, status = %s, node = %s", name(), error.to_string(), enum_to_string(st), node.to_string() ); error.end_tracking(); dassert (status() == PS_PRIMARY, ""); dassert (node != _stub->_primary_address, ""); switch (st) { case PS_SECONDARY: dassert (_primary_states.check_exist(node, PS_SECONDARY), ""); { configuration_update_request request; request.node = node; request.type = CT_DOWNGRADE_TO_INACTIVE; request.config = _primary_states.membership; downgrade_to_inactive_on_primary(request); } break; case PS_POTENTIAL_SECONDARY: // potential secondary failure does not lead to ballot change // therefore, it is possible to have multiple exec here _primary_states.learners.erase(node); _primary_states.statuses.erase(node); break; case PS_INACTIVE: case PS_ERROR: break; default: dassert (false, ""); break; } }
void replica::on_update_configuration_on_meta_server_reply(error_code err, dsn_message_t request, dsn_message_t response, std::shared_ptr<configuration_update_request> req) { check_hashed_access(); if (PS_INACTIVE != status() || _stub->is_connected() == false) { _primary_states.reconfiguration_task = nullptr; err.end_tracking(); return; } configuration_update_response resp; if (err == ERR_OK) { ::unmarshall(response, resp); err = resp.err; } if (err != ERR_OK) { ddebug( "%s: update configuration reply with err %s, request ballot %lld", name(), err.to_string(), req->config.ballot ); if (err != ERR_INVALID_VERSION) { rpc_address target(_stub->_failure_detector->get_servers()); dsn_msg_add_ref(request); // added for another round of rpc::call _primary_states.reconfiguration_task = rpc::call( target, request, this, std::bind(&replica::on_update_configuration_on_meta_server_reply, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, req), gpid_to_hash(get_gpid()) ); return; } } ddebug( "%s: update configuration reply with err %s, ballot %lld, local %lld", name(), resp.err.to_string(), resp.config.ballot, get_ballot() ); if (resp.config.ballot < get_ballot()) { _primary_states.reconfiguration_task = nullptr; return; } // post-update work items? if (resp.err == ERR_OK) { dassert (req->config.gpid == resp.config.gpid, ""); dassert (req->config.app_type == resp.config.app_type, ""); dassert (req->config.primary == resp.config.primary, ""); dassert (req->config.secondaries == resp.config.secondaries, ""); switch (req->type) { case CT_UPGRADE_TO_PRIMARY: _primary_states.last_prepare_decree_on_new_primary = _prepare_list->max_decree(); break; case CT_ASSIGN_PRIMARY: case CT_DOWNGRADE_TO_SECONDARY: case CT_DOWNGRADE_TO_INACTIVE: case CT_UPGRADE_TO_SECONDARY: break; case CT_REMOVE: if (req->node != primary_address()) { replica_configuration rconfig; replica_helper::get_replica_config(resp.config, req->node, rconfig); rpc::call_one_way_typed(req->node, RPC_REMOVE_REPLICA, rconfig, gpid_to_hash(get_gpid())); } break; default: dassert (false, ""); } } update_configuration(resp.config); _primary_states.reconfiguration_task = nullptr; }