ServerState::RoleEnum ServerState::checkServersList (std::string const& id) { // fetch value at Plan/DBServers // we need to do this to determine the server's role const std::string key = "Plan/DBServers"; AgencyComm comm; AgencyCommResult result; { AgencyCommLocker locker("Plan", "READ"); if (locker.successful()) { result = comm.getValues(key, true); } } if (! result.successful()) { const std::string endpoints = AgencyComm::getEndpointsString(); LOG_TRACE("Could not fetch configuration from agency endpoints (%s): " "got status code %d, message: %s, key: %s", endpoints.c_str(), result._statusCode, result.errorMessage().c_str(), key.c_str()); return ServerState::ROLE_UNDEFINED; } ServerState::RoleEnum role = ServerState::ROLE_UNDEFINED; // check if we can find ourselves in the list returned by the agency result.parse("Plan/DBServers/", false); std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.find(id); if (it != result._values.end()) { // we are in the list. this means we are a primary server role = ServerState::ROLE_PRIMARY; } else { // check if we are a secondary... it = result._values.begin(); while (it != result._values.end()) { const std::string name = triagens::basics::JsonHelper::getStringValue((*it).second._json, ""); if (name == id) { role = ServerState::ROLE_SECONDARY; _idOfPrimary = it->first; break; } ++it; } } return role; }
int ServerState::lookupLocalInfoToId (std::string const& localInfo, std::string& id) { // fetch value at Plan/DBServers // we need to do this to determine the server's role const std::string key = "Target/MapLocalToID"; int count = 0; while (++count <= 600) { AgencyComm comm; AgencyCommResult result; { AgencyCommLocker locker("Target", "READ"); if (locker.successful()) { result = comm.getValues(key, true); } } if (! result.successful()) { const std::string endpoints = AgencyComm::getEndpointsString(); LOG_DEBUG("Could not fetch configuration from agency endpoints (%s): " "got status code %d, message: %s, key: %s", endpoints.c_str(), result._statusCode, result.errorMessage().c_str(), key.c_str()); } else { result.parse("Target/MapLocalToID/", false); std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.find(localInfo); if (it != result._values.end()) { TRI_json_t const* json = it->second._json; Json j(TRI_UNKNOWN_MEM_ZONE, json, Json::NOFREE); id = triagens::basics::JsonHelper::getStringValue(json, "ID", ""); if (id.empty()) { LOG_ERROR("ID not set!"); return TRI_ERROR_CLUSTER_COULD_NOT_DETERMINE_ID; } std::string description = triagens::basics::JsonHelper::getStringValue(json, "Description", ""); if (! description.empty()) { setDescription(description); } return TRI_ERROR_NO_ERROR; } } sleep(1); }; return TRI_ERROR_CLUSTER_COULD_NOT_DETERMINE_ID; }
ServerState::RoleEnum ServerState::checkCoordinatorsList (std::string const& id) { // fetch value at Plan/Coordinators // we need to do this to determine the server's role const std::string key = "Plan/Coordinators"; AgencyComm comm; AgencyCommResult result; { AgencyCommLocker locker("Plan", "READ"); if (locker.successful()) { result = comm.getValues(key, true); } } if (! result.successful()) { const std::string endpoints = AgencyComm::getEndpointsString(); LOG_TRACE("Could not fetch configuration from agency endpoints (%s): " "got status code %d, message: %s, key: %s", endpoints.c_str(), result._statusCode, result.errorMessage().c_str(), key.c_str()); return ServerState::ROLE_UNDEFINED; } if (! result.parse("Plan/Coordinators/", false)) { LOG_TRACE("Got an invalid JSON response for Plan/Coordinators"); return ServerState::ROLE_UNDEFINED; } // check if we can find ourselves in the list returned by the agency std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.find(id); if (it != result._values.end()) { // we are in the list. this means we are a primary server return ServerState::ROLE_COORDINATOR; } return ServerState::ROLE_UNDEFINED; }
void ClusterFeature::start() { // return if cluster is disabled if (!_enableCluster) { return; } ServerState::instance()->setState(ServerState::STATE_STARTUP); // the agency about our state AgencyComm comm; comm.sendServerState(0.0); std::string const version = comm.getVersion(); ServerState::instance()->setInitialized(); std::string const endpoints = AgencyComm::getEndpointsString(); ServerState::RoleEnum role = ServerState::instance()->getRole(); LOG(INFO) << "Cluster feature is turned on. Agency version: " << version << ", Agency endpoints: " << endpoints << ", server id: '" << _myId << "', internal address: " << _myAddress << ", role: " << ServerState::roleToString(role); if (!_disableHeartbeat) { AgencyCommResult result = comm.getValues("Sync/HeartbeatIntervalMs"); if (result.successful()) { velocypack::Slice HeartbeatIntervalMs = result.slice()[0].get(std::vector<std::string>( {AgencyComm::prefix(), "Sync", "HeartbeatIntervalMs"})); if (HeartbeatIntervalMs.isInteger()) { try { _heartbeatInterval = HeartbeatIntervalMs.getUInt(); LOG(INFO) << "using heartbeat interval value '" << _heartbeatInterval << " ms' from agency"; } catch (...) { // Ignore if it is not a small int or uint } } } // no value set in agency. use default if (_heartbeatInterval == 0) { _heartbeatInterval = 5000; // 1/s LOG(WARN) << "unable to read heartbeat interval from agency. Using " << "default value '" << _heartbeatInterval << " ms'"; } // start heartbeat thread _heartbeatThread = std::make_shared<HeartbeatThread>( _agencyCallbackRegistry.get(), _heartbeatInterval * 1000, 5, SchedulerFeature::SCHEDULER->ioService()); if (!_heartbeatThread->init() || !_heartbeatThread->start()) { LOG(FATAL) << "heartbeat could not connect to agency endpoints (" << endpoints << ")"; FATAL_ERROR_EXIT(); } while (!_heartbeatThread->isReady()) { // wait until heartbeat is ready usleep(10000); } } AgencyCommResult result; while (true) { VPackBuilder builder; try { VPackObjectBuilder b(&builder); builder.add("endpoint", VPackValue(_myAddress)); } catch (...) { LOG(FATAL) << "out of memory"; FATAL_ERROR_EXIT(); } result = comm.setValue("Current/ServersRegistered/" + _myId, builder.slice(), 0.0); if (!result.successful()) { LOG(FATAL) << "unable to register server in agency: http code: " << result.httpCode() << ", body: " << result.body(); FATAL_ERROR_EXIT(); } else { break; } sleep(1); } if (role == ServerState::ROLE_COORDINATOR) { ServerState::instance()->setState(ServerState::STATE_SERVING); } else if (role == ServerState::ROLE_PRIMARY) { ServerState::instance()->setState(ServerState::STATE_SERVINGASYNC); } else if (role == ServerState::ROLE_SECONDARY) { ServerState::instance()->setState(ServerState::STATE_SYNCING); } }
static void raceForClusterBootstrap() { AgencyComm agency; auto ci = ClusterInfo::instance(); while (true) { AgencyCommResult result = agency.getValues("Bootstrap"); if (!result.successful()) { // Error in communication, note that value not found is not an error LOG_TOPIC(TRACE, Logger::STARTUP) << "raceForClusterBootstrap: no agency communication"; sleep(1); continue; } VPackSlice value = result.slice()[0].get( std::vector<std::string>({agency.prefix(), "Bootstrap"})); if (value.isString()) { // key was found and is a string if (value.copyString().find("done") != std::string::npos) { // all done, let's get out of here: LOG_TOPIC(TRACE, Logger::STARTUP) << "raceForClusterBootstrap: bootstrap already done"; return; } LOG_TOPIC(DEBUG, Logger::STARTUP) << "raceForClusterBootstrap: somebody else does the bootstrap"; sleep(1); continue; } // No value set, we try to do the bootstrap ourselves: VPackBuilder b; b.add(VPackValue(arangodb::ServerState::instance()->getId())); result = agency.casValue("Bootstrap", b.slice(), false, 300, 15); if (!result.successful()) { LOG_TOPIC(DEBUG, Logger::STARTUP) << "raceForClusterBootstrap: lost race, somebody else will bootstrap"; // Cannot get foot into the door, try again later: sleep(1); continue; } // OK, we handle things now, let's see whether a DBserver is there: auto dbservers = ci->getCurrentDBServers(); if (dbservers.size() == 0) { LOG_TOPIC(TRACE, Logger::STARTUP) << "raceForClusterBootstrap: no DBservers, waiting"; agency.removeValues("Bootstrap", false); sleep(1); continue; } LOG_TOPIC(DEBUG, Logger::STARTUP) << "raceForClusterBootstrap: race won, we do the bootstrap"; auto vocbase = DatabaseFeature::DATABASE->systemDatabase(); V8DealerFeature::DEALER->loadJavascriptFiles(vocbase, "server/bootstrap/cluster-bootstrap.js", 0); LOG_TOPIC(DEBUG, Logger::STARTUP) << "raceForClusterBootstrap: bootstrap done"; b.clear(); b.add(VPackValue(arangodb::ServerState::instance()->getId() + ": done")); result = agency.setValue("Bootstrap", b.slice(), 0); if (result.successful()) { return; } LOG_TOPIC(TRACE, Logger::STARTUP) << "raceForClusterBootstrap: could not indicate success"; sleep(1); } }
bool ApplicationCluster::start () { // set authentication data ServerState::instance()->setAuthentication(_username, _password); // overwrite memory area _username = _password = "******"; ServerState::instance()->setDataPath(_dataPath); ServerState::instance()->setLogPath(_logPath); ServerState::instance()->setAgentPath(_agentPath); ServerState::instance()->setArangodPath(_arangodPath); ServerState::instance()->setDBserverConfig(_dbserverConfig); ServerState::instance()->setCoordinatorConfig(_coordinatorConfig); ServerState::instance()->setDisableDispatcherFrontend(_disableDispatcherFrontend); ServerState::instance()->setDisableDispatcherKickstarter(_disableDispatcherKickstarter); if (! enabled()) { return true; } ServerState::instance()->setId(_myId); // perfom an initial connect to the agency const std::string endpoints = AgencyComm::getEndpointsString(); if (! AgencyComm::tryConnect()) { LOG_FATAL_AND_EXIT("Could not connect to agency endpoints (%s)", endpoints.c_str()); } ServerState::RoleEnum role = ServerState::instance()->getRole(); if (role == ServerState::ROLE_UNDEFINED) { // no role found LOG_FATAL_AND_EXIT("unable to determine unambiguous role for server '%s'. No role configured in agency (%s)", _myId.c_str(), endpoints.c_str()); } // check if my-address is set if (_myAddress.empty()) { // no address given, now ask the agency for out address _myAddress = ServerState::instance()->getAddress(); } else { // register our own address ServerState::instance()->setAddress(_myAddress); } if (_myAddress.empty()) { LOG_FATAL_AND_EXIT("unable to determine internal address for server '%s'. " "Please specify --cluster.my-address or configure the address for this server in the agency.", _myId.c_str()); } // now we can validate --cluster.my-address const string unified = triagens::rest::Endpoint::getUnifiedForm(_myAddress); if (unified.empty()) { LOG_FATAL_AND_EXIT("invalid endpoint '%s' specified for --cluster.my-address", _myAddress.c_str()); } ServerState::instance()->setState(ServerState::STATE_STARTUP); // initialise ConnectionManager library httpclient::ConnectionManager::instance()->initialise(); // the agency about our state AgencyComm comm; comm.sendServerState(0.0); const std::string version = comm.getVersion(); ServerState::instance()->setInitialised(); LOG_INFO("Cluster feature is turned on. " "Agency version: %s, Agency endpoints: %s, " "server id: '%s', internal address: %s, role: %s", version.c_str(), endpoints.c_str(), _myId.c_str(), _myAddress.c_str(), ServerState::roleToString(role).c_str()); if (! _disableHeartbeat) { AgencyCommResult result = comm.getValues("Sync/HeartbeatIntervalMs", false); if (result.successful()) { result.parse("", false); std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.begin(); if (it != result._values.end()) { _heartbeatInterval = triagens::basics::JsonHelper::stringUInt64((*it).second._json); LOG_INFO("using heartbeat interval value '%llu ms' from agency", (unsigned long long) _heartbeatInterval); } } // no value set in agency. use default if (_heartbeatInterval == 0) { _heartbeatInterval = 1000; // 1/s LOG_WARNING("unable to read heartbeat interval from agency. Using default value '%llu ms'", (unsigned long long) _heartbeatInterval); } // start heartbeat thread _heartbeat = new HeartbeatThread(_server, _dispatcher, _applicationV8, _heartbeatInterval * 1000, 5); if (_heartbeat == 0) { LOG_FATAL_AND_EXIT("unable to start cluster heartbeat thread"); } if (! _heartbeat->init() || ! _heartbeat->start()) { LOG_FATAL_AND_EXIT("heartbeat could not connect to agency endpoints (%s)", endpoints.c_str()); } while (! _heartbeat->ready()) { // wait until heartbeat is ready usleep(10000); } } return true; }