void replica::send_prepare_message(::dsn::rpc_address addr, partition_status status, mutation_ptr& mu, int timeout_milliseconds) { dsn_message_t msg = dsn_msg_create_request(RPC_PREPARE, timeout_milliseconds, gpid_to_hash(get_gpid())); replica_configuration rconfig; _primary_states.get_replica_config(status, rconfig); { rpc_write_stream writer(msg); marshall(writer, get_gpid()); marshall(writer, rconfig); mu->write_to(writer); } mu->remote_tasks()[addr] = rpc::call(addr, msg, this, std::bind(&replica::on_prepare_reply, this, std::make_pair(mu, rconfig.status), std::placeholders::_1, std::placeholders::_2, std::placeholders::_3), gpid_to_hash(get_gpid()) ); ddebug( "%s: mutation %s send_prepare_message to %s as %s", name(), mu->name(), addr.to_string(), enum_to_string(rconfig.status) ); }
void replica::send_prepare_message( ::dsn::rpc_address addr, partition_status status, mutation_ptr& mu, int timeout_milliseconds, int64_t learn_signature) { dsn_message_t msg = dsn_msg_create_request(RPC_PREPARE, timeout_milliseconds, gpid_to_hash(get_gpid())); replica_configuration rconfig; _primary_states.get_replica_config(status, rconfig, learn_signature); { rpc_write_stream writer(msg); marshall(writer, get_gpid()); marshall(writer, rconfig); mu->write_to(writer); } mu->remote_tasks()[addr] = rpc::call(addr, msg, this, [=](error_code err, dsn_message_t request, dsn_message_t reply) { on_prepare_reply(std::make_pair(mu, rconfig.status), err, request, reply); }, gpid_to_hash(get_gpid()) ); ddebug( "%s: mutation %s send_prepare_message to %s as %s", name(), mu->name(), addr.to_string(), enum_to_string(rconfig.status) ); }
virtual void send_beacon(::dsn::rpc_address node, uint64_t time) override { if (_send_ping_switch) failure_detector::send_beacon(node, time); else { dinfo("ignore send beacon, to node[%s], time[%" PRId64 "]", node.to_string(), time); } }
bool failure_detector::unregister_master(::dsn::rpc_address node) { zauto_lock l(_lock); auto it = _masters.find(node); if (it != _masters.end()) { it->second.send_beacon_timer->cancel(true); _masters.erase(it); dinfo("unregister master[%s] successfully", node.to_string()); return true; } else { ddebug("unregister master[%s] failed, cannot find it in FD", node.to_string()); return false; } }
void failure_detector::register_worker( ::dsn::rpc_address target, bool is_connected) { uint64_t now = now_ms(); /* * callers should use the fd::_lock necessarily */ worker_record record(target, now); record.is_alive = is_connected ? true : false; auto ret = _workers.insert(std::make_pair(target, record)); if ( ret.second ) { dinfo("register worker[%s] successfully", target.to_string()); } else { dinfo("worker[%s] already registered", target.to_string()); } }
void failure_detector::register_master(::dsn::rpc_address target) { bool setup_timer = false; uint64_t now = now_ms(); zauto_lock l(_lock); master_record record(target, now); auto ret = _masters.insert(std::make_pair(target, record)); if (ret.second) { dinfo("register master[%s] successfully", target.to_string()); setup_timer = true; } else { // active the beacon again in case previously local node is not in target's allow list if (ret.first->second.rejected) { ret.first->second.rejected = false; setup_timer = true; } dinfo("master[%s] already registered", target.to_string()); } if (setup_timer) { ret.first->second.send_beacon_timer = tasking::enqueue_timer(LPC_BEACON_SEND, this, [this, target]() { this->send_beacon(target, now_ms()); }, std::chrono::milliseconds(_beacon_interval_milliseconds) ); } }
void meta_server_failure_detector::on_worker_connected(::dsn::rpc_address node) { if (!is_primary()) { return; } node_states states; states.push_back(std::make_pair(node, true)); dwarn("Client reconnected", "Client %s", node.to_string()); _state->set_node_state(states, nullptr); }
void replica::upgrade_to_secondary_on_primary(::dsn::rpc_address node) { ddebug( "%s: upgrade potential secondary %s to secondary", name(), node.to_string() ); partition_configuration newConfig = _primary_states.membership; // add secondary newConfig.secondaries.push_back(node); update_configuration_on_meta_server(CT_UPGRADE_TO_SECONDARY, node, newConfig); }
void replica::handle_remote_failure(partition_status st, ::dsn::rpc_address node, error_code error) { derror( "%s: handle remote failure error %s, status = %s, node = %s", name(), error.to_string(), enum_to_string(st), node.to_string() ); error.end_tracking(); dassert (status() == PS_PRIMARY, ""); dassert (node != _stub->_primary_address, ""); switch (st) { case PS_SECONDARY: dassert (_primary_states.check_exist(node, PS_SECONDARY), ""); { configuration_update_request request; request.node = node; request.type = CT_DOWNGRADE_TO_INACTIVE; request.config = _primary_states.membership; downgrade_to_inactive_on_primary(request); } break; case PS_POTENTIAL_SECONDARY: // potential secondary failure does not lead to ballot change // therefore, it is possible to have multiple exec here _primary_states.learners.erase(node); _primary_states.statuses.erase(node); break; case PS_INACTIVE: case PS_ERROR: break; default: dassert (false, ""); break; } }
bool failure_detector::unregister_worker(::dsn::rpc_address node) { /* * callers should use the fd::_lock necessarily */ bool ret; size_t count = _workers.erase(node); if ( count == 0 ) { ret = false; } else { ret = true; } dinfo("unregister worker[%s] successfully, removed entry count is %u", node.to_string(), (uint32_t)count); return ret; }
bool failure_detector::switch_master(::dsn::rpc_address from, ::dsn::rpc_address to, uint32_t delay_milliseconds) { /* the caller of switch master shoud lock necessarily to protect _masters */ auto it = _masters.find(from); auto it2 = _masters.find(to); if (it != _masters.end()) { if (it2 != _masters.end()) { dwarn("switch master failed as both are already registered, from[%s], to[%s]", from.to_string(), to.to_string()); return false; } it->second.node = to; it->second.rejected = false; it->second.send_beacon_timer->cancel(true); it->second.send_beacon_timer = tasking::enqueue_timer(LPC_BEACON_SEND, this, [this, to]() { this->send_beacon(to, now_ms()); }, std::chrono::milliseconds(_beacon_interval_milliseconds), 0, std::chrono::milliseconds(delay_milliseconds) ); _masters.insert(std::make_pair(to, it->second)); _masters.erase(from); dinfo("switch master successfully, from[%s], to[%s]", from.to_string(), to.to_string()); } else { dwarn("switch master failed as from node is not registered yet, from[%s], to[%s]", from.to_string(), to.to_string()); return false; } return true; }
void send_message(const dsn::rpc_address &target, dsn::message_ex *request) { ddebug("send request to %s", target.to_string()); request->add_ref(); request->release_ref(); }
void failure_detector::report(::dsn::rpc_address node, bool is_master, bool is_connected) { ddebug("%s[%s] %sconnected", is_master ? "master":"worker", node.to_string(), is_connected ? "" : "dis"); }