ServerState::RoleEnum ServerState::getRole () { std::string id; std::string info; { auto role = loadRole(); if (role != ServerState::ROLE_UNDEFINED || ! _clusterEnabled) { return role; } info = _localInfo; id = _id; } if (id.empty()) { // We need to announce ourselves in the agency to get a role configured: LOG_DEBUG("Announcing our birth in Current/NewServers to the agency..."); AgencyComm comm; AgencyCommResult result; Json json(Json::Object, 1); json("endpoint", Json(TRI_UNKNOWN_MEM_ZONE, getAddress())); std::string description = getDescription(); if (! description.empty()) { json("Description", Json(TRI_UNKNOWN_MEM_ZONE, description)); } result = comm.setValue("Current/NewServers/"+_localInfo, json.json(), 0.0); if (! result.successful()) { LOG_ERROR("Could not talk to agency!"); return ROLE_UNDEFINED; } std::string jsonst = json.toString(); LOG_DEBUG("Have stored %s under Current/NewServers/%s in agency.", jsonst.c_str(), _localInfo.c_str()); } // role not yet set RoleEnum role = determineRole(info, id); std::string roleString = roleToString(role); LOG_DEBUG("Found my role: %s", roleString.c_str()); storeRole(role); return role; }
RestStatus RestShutdownHandler::execute() { if (_request->requestType() != rest::RequestType::DELETE_REQ) { generateError(rest::ResponseCode::METHOD_NOT_ALLOWED, 405); return RestStatus::DONE; } bool removeFromCluster; std::string const& remove = _request->value("remove_from_cluster", removeFromCluster); removeFromCluster = removeFromCluster && remove == "1"; bool shutdownClusterFound; std::string const& shutdownCluster = _request->value("shutdown_cluster", shutdownClusterFound); if (shutdownClusterFound && shutdownCluster == "1") { AgencyComm agency; VPackBuilder builder; builder.add(VPackValue(true)); AgencyCommResult result = agency.setValue("Shutdown", builder.slice(), 0.0); if (!result.successful()) { generateError(rest::ResponseCode::SERVER_ERROR, 500); return RestStatus::DONE; } removeFromCluster = true; } if (removeFromCluster) { ClusterFeature* clusterFeature = ApplicationServer::getFeature<ClusterFeature>("Cluster"); clusterFeature->setUnregisterOnShutdown(true); } ApplicationServer::server->beginShutdown(); try { VPackBuilder result; result.add(VPackValue("OK")); generateResult(rest::ResponseCode::OK, result.slice()); } catch (...) { // Ignore the error } return RestStatus::DONE; }
void ClusterFeature::start() { // return if cluster is disabled if (!_enableCluster) { return; } ServerState::instance()->setState(ServerState::STATE_STARTUP); // the agency about our state AgencyComm comm; comm.sendServerState(0.0); std::string const version = comm.getVersion(); ServerState::instance()->setInitialized(); std::string const endpoints = AgencyComm::getEndpointsString(); ServerState::RoleEnum role = ServerState::instance()->getRole(); LOG(INFO) << "Cluster feature is turned on. Agency version: " << version << ", Agency endpoints: " << endpoints << ", server id: '" << _myId << "', internal address: " << _myAddress << ", role: " << ServerState::roleToString(role); if (!_disableHeartbeat) { AgencyCommResult result = comm.getValues("Sync/HeartbeatIntervalMs"); if (result.successful()) { velocypack::Slice HeartbeatIntervalMs = result.slice()[0].get(std::vector<std::string>( {AgencyComm::prefix(), "Sync", "HeartbeatIntervalMs"})); if (HeartbeatIntervalMs.isInteger()) { try { _heartbeatInterval = HeartbeatIntervalMs.getUInt(); LOG(INFO) << "using heartbeat interval value '" << _heartbeatInterval << " ms' from agency"; } catch (...) { // Ignore if it is not a small int or uint } } } // no value set in agency. use default if (_heartbeatInterval == 0) { _heartbeatInterval = 5000; // 1/s LOG(WARN) << "unable to read heartbeat interval from agency. Using " << "default value '" << _heartbeatInterval << " ms'"; } // start heartbeat thread _heartbeatThread = std::make_shared<HeartbeatThread>( _agencyCallbackRegistry.get(), _heartbeatInterval * 1000, 5, SchedulerFeature::SCHEDULER->ioService()); if (!_heartbeatThread->init() || !_heartbeatThread->start()) { LOG(FATAL) << "heartbeat could not connect to agency endpoints (" << endpoints << ")"; FATAL_ERROR_EXIT(); } while (!_heartbeatThread->isReady()) { // wait until heartbeat is ready usleep(10000); } } AgencyCommResult result; while (true) { VPackBuilder builder; try { VPackObjectBuilder b(&builder); builder.add("endpoint", VPackValue(_myAddress)); } catch (...) { LOG(FATAL) << "out of memory"; FATAL_ERROR_EXIT(); } result = comm.setValue("Current/ServersRegistered/" + _myId, builder.slice(), 0.0); if (!result.successful()) { LOG(FATAL) << "unable to register server in agency: http code: " << result.httpCode() << ", body: " << result.body(); FATAL_ERROR_EXIT(); } else { break; } sleep(1); } if (role == ServerState::ROLE_COORDINATOR) { ServerState::instance()->setState(ServerState::STATE_SERVING); } else if (role == ServerState::ROLE_PRIMARY) { ServerState::instance()->setState(ServerState::STATE_SERVINGASYNC); } else if (role == ServerState::ROLE_SECONDARY) { ServerState::instance()->setState(ServerState::STATE_SYNCING); } }
static void raceForClusterBootstrap() { AgencyComm agency; auto ci = ClusterInfo::instance(); while (true) { AgencyCommResult result = agency.getValues("Bootstrap"); if (!result.successful()) { // Error in communication, note that value not found is not an error LOG_TOPIC(TRACE, Logger::STARTUP) << "raceForClusterBootstrap: no agency communication"; sleep(1); continue; } VPackSlice value = result.slice()[0].get( std::vector<std::string>({agency.prefix(), "Bootstrap"})); if (value.isString()) { // key was found and is a string if (value.copyString().find("done") != std::string::npos) { // all done, let's get out of here: LOG_TOPIC(TRACE, Logger::STARTUP) << "raceForClusterBootstrap: bootstrap already done"; return; } LOG_TOPIC(DEBUG, Logger::STARTUP) << "raceForClusterBootstrap: somebody else does the bootstrap"; sleep(1); continue; } // No value set, we try to do the bootstrap ourselves: VPackBuilder b; b.add(VPackValue(arangodb::ServerState::instance()->getId())); result = agency.casValue("Bootstrap", b.slice(), false, 300, 15); if (!result.successful()) { LOG_TOPIC(DEBUG, Logger::STARTUP) << "raceForClusterBootstrap: lost race, somebody else will bootstrap"; // Cannot get foot into the door, try again later: sleep(1); continue; } // OK, we handle things now, let's see whether a DBserver is there: auto dbservers = ci->getCurrentDBServers(); if (dbservers.size() == 0) { LOG_TOPIC(TRACE, Logger::STARTUP) << "raceForClusterBootstrap: no DBservers, waiting"; agency.removeValues("Bootstrap", false); sleep(1); continue; } LOG_TOPIC(DEBUG, Logger::STARTUP) << "raceForClusterBootstrap: race won, we do the bootstrap"; auto vocbase = DatabaseFeature::DATABASE->systemDatabase(); V8DealerFeature::DEALER->loadJavascriptFiles(vocbase, "server/bootstrap/cluster-bootstrap.js", 0); LOG_TOPIC(DEBUG, Logger::STARTUP) << "raceForClusterBootstrap: bootstrap done"; b.clear(); b.add(VPackValue(arangodb::ServerState::instance()->getId() + ": done")); result = agency.setValue("Bootstrap", b.slice(), 0); if (result.successful()) { return; } LOG_TOPIC(TRACE, Logger::STARTUP) << "raceForClusterBootstrap: could not indicate success"; sleep(1); } }
bool ApplicationCluster::open () { if (! enabled()) { return true; } ServerState::RoleEnum role = ServerState::instance()->getRole(); // tell the agency that we are ready { AgencyComm comm; AgencyCommResult result; AgencyCommLocker locker("Current", "WRITE"); if (locker.successful()) { TRI_json_t* ep = TRI_CreateString2CopyJson(TRI_UNKNOWN_MEM_ZONE, _myAddress.c_str(), _myAddress.size()); if (ep == 0) { locker.unlock(); LOG_FATAL_AND_EXIT("out of memory"); } TRI_json_t* json = TRI_CreateArray2Json(TRI_UNKNOWN_MEM_ZONE, 1); if (json == 0) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, ep); locker.unlock(); LOG_FATAL_AND_EXIT("out of memory"); } TRI_Insert2ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "endpoint", ep); result = comm.setValue("Current/ServersRegistered/" + _myId, json, 0.0); TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); } if (! result.successful()) { locker.unlock(); LOG_FATAL_AND_EXIT("unable to register server in agency: http code: %d, body: %s", (int) result.httpCode(), result.body().c_str()); } if (role == ServerState::ROLE_COORDINATOR) { TRI_json_t* json = TRI_CreateString2CopyJson(TRI_UNKNOWN_MEM_ZONE, "none", 4); if (json == 0) { locker.unlock(); LOG_FATAL_AND_EXIT("out of memory"); } ServerState::instance()->setState(ServerState::STATE_SERVING); // register coordinator AgencyCommResult result = comm.setValue("Current/Coordinators/" + _myId, json, 0.0); TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); if (! result.successful()) { locker.unlock(); LOG_FATAL_AND_EXIT("unable to register coordinator in agency"); } } else if (role == ServerState::ROLE_PRIMARY) { TRI_json_t* json = TRI_CreateString2CopyJson(TRI_UNKNOWN_MEM_ZONE, "none", 4); if (json == 0) { locker.unlock(); LOG_FATAL_AND_EXIT("out of memory"); } ServerState::instance()->setState(ServerState::STATE_SERVINGASYNC); // register server AgencyCommResult result = comm.setValue("Current/DBServers/" + _myId, json, 0.0); TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); if (! result.successful()) { locker.unlock(); LOG_FATAL_AND_EXIT("unable to register db server in agency"); } } else if (role == ServerState::ROLE_SECONDARY) { locker.unlock(); LOG_FATAL_AND_EXIT("secondary server tasks are currently not implemented"); } } return true; }