Esempio n. 1
0
ServerState::RoleEnum ServerState::checkServersList (std::string const& id) {
  // fetch value at Plan/DBServers
  // we need to do this to determine the server's role

  const std::string key = "Plan/DBServers";

  AgencyComm comm;
  AgencyCommResult result;

  {
    AgencyCommLocker locker("Plan", "READ");

    if (locker.successful()) {
      result = comm.getValues(key, true);
    }
  }

  if (! result.successful()) {
    const std::string endpoints = AgencyComm::getEndpointsString();

    LOG_TRACE("Could not fetch configuration from agency endpoints (%s): "
              "got status code %d, message: %s, key: %s",
              endpoints.c_str(),
              result._statusCode,
              result.errorMessage().c_str(),
              key.c_str());

    return ServerState::ROLE_UNDEFINED;
  }

  ServerState::RoleEnum role = ServerState::ROLE_UNDEFINED;

  // check if we can find ourselves in the list returned by the agency
  result.parse("Plan/DBServers/", false);
  std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.find(id);

  if (it != result._values.end()) {
    // we are in the list. this means we are a primary server
    role = ServerState::ROLE_PRIMARY;
  }
  else {
    // check if we are a secondary...
    it = result._values.begin();

    while (it != result._values.end()) {
      const std::string name = triagens::basics::JsonHelper::getStringValue((*it).second._json, "");

      if (name == id) {
        role = ServerState::ROLE_SECONDARY;
        _idOfPrimary = it->first;
        break;
      }

      ++it;
    }
  }

  return role;
}
Esempio n. 2
0
int ServerState::lookupLocalInfoToId (std::string const& localInfo,
                                      std::string& id) {
  // fetch value at Plan/DBServers
  // we need to do this to determine the server's role

  const std::string key = "Target/MapLocalToID";

  int count = 0;
  while (++count <= 600) {
    AgencyComm comm;
    AgencyCommResult result;

    {
      AgencyCommLocker locker("Target", "READ");

      if (locker.successful()) {
        result = comm.getValues(key, true);
      }
    }

    if (! result.successful()) {
      const std::string endpoints = AgencyComm::getEndpointsString();

      LOG_DEBUG("Could not fetch configuration from agency endpoints (%s): "
                "got status code %d, message: %s, key: %s",
                endpoints.c_str(),
                result._statusCode,
                result.errorMessage().c_str(),
                key.c_str());
    }
    else {
      result.parse("Target/MapLocalToID/", false);
      std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.find(localInfo);

      if (it != result._values.end()) {
        TRI_json_t const* json = it->second._json;
        Json j(TRI_UNKNOWN_MEM_ZONE, json, Json::NOFREE);
        id = triagens::basics::JsonHelper::getStringValue(json, "ID", "");
        if (id.empty()) {
          LOG_ERROR("ID not set!");
          return TRI_ERROR_CLUSTER_COULD_NOT_DETERMINE_ID;
        }
        std::string description
          = triagens::basics::JsonHelper::getStringValue(json, "Description", "");
        if (! description.empty()) {
          setDescription(description);
        }
        return TRI_ERROR_NO_ERROR;
      }
    }
    sleep(1);
  };
  return TRI_ERROR_CLUSTER_COULD_NOT_DETERMINE_ID;
}
Esempio n. 3
0
ServerState::RoleEnum ServerState::checkCoordinatorsList (std::string const& id) {
  // fetch value at Plan/Coordinators
  // we need to do this to determine the server's role

  const std::string key = "Plan/Coordinators";

  AgencyComm comm;
  AgencyCommResult result;

  {
    AgencyCommLocker locker("Plan", "READ");

    if (locker.successful()) {
      result = comm.getValues(key, true);
    }
  }

  if (! result.successful()) {
    const std::string endpoints = AgencyComm::getEndpointsString();

    LOG_TRACE("Could not fetch configuration from agency endpoints (%s): "
              "got status code %d, message: %s, key: %s",
              endpoints.c_str(),
              result._statusCode,
              result.errorMessage().c_str(),
              key.c_str());

    return ServerState::ROLE_UNDEFINED;
  }

  if (! result.parse("Plan/Coordinators/", false)) {
    LOG_TRACE("Got an invalid JSON response for Plan/Coordinators");

    return ServerState::ROLE_UNDEFINED;
  }

  // check if we can find ourselves in the list returned by the agency
  std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.find(id);

  if (it != result._values.end()) {
    // we are in the list. this means we are a primary server
    return ServerState::ROLE_COORDINATOR;
  }

  return ServerState::ROLE_UNDEFINED;
}
Esempio n. 4
0
void ClusterFeature::start() {
  // return if cluster is disabled
  if (!_enableCluster) {
    return;
  }

  ServerState::instance()->setState(ServerState::STATE_STARTUP);

  // the agency about our state
  AgencyComm comm;
  comm.sendServerState(0.0);

  std::string const version = comm.getVersion();

  ServerState::instance()->setInitialized();

  std::string const endpoints = AgencyComm::getEndpointsString();

  ServerState::RoleEnum role = ServerState::instance()->getRole();

  LOG(INFO) << "Cluster feature is turned on. Agency version: " << version
            << ", Agency endpoints: " << endpoints << ", server id: '" << _myId
            << "', internal address: " << _myAddress
            << ", role: " << ServerState::roleToString(role);

  if (!_disableHeartbeat) {
    AgencyCommResult result = comm.getValues("Sync/HeartbeatIntervalMs");

    if (result.successful()) {
      velocypack::Slice HeartbeatIntervalMs =
          result.slice()[0].get(std::vector<std::string>(
              {AgencyComm::prefix(), "Sync", "HeartbeatIntervalMs"}));

      if (HeartbeatIntervalMs.isInteger()) {
        try {
          _heartbeatInterval = HeartbeatIntervalMs.getUInt();
          LOG(INFO) << "using heartbeat interval value '" << _heartbeatInterval
                    << " ms' from agency";
        } catch (...) {
          // Ignore if it is not a small int or uint
        }
      }
    }

    // no value set in agency. use default
    if (_heartbeatInterval == 0) {
      _heartbeatInterval = 5000;  // 1/s

      LOG(WARN) << "unable to read heartbeat interval from agency. Using "
                << "default value '" << _heartbeatInterval << " ms'";
    }

    // start heartbeat thread
    _heartbeatThread = std::make_shared<HeartbeatThread>(
        _agencyCallbackRegistry.get(), _heartbeatInterval * 1000, 5,
        SchedulerFeature::SCHEDULER->ioService());

    if (!_heartbeatThread->init() || !_heartbeatThread->start()) {
      LOG(FATAL) << "heartbeat could not connect to agency endpoints ("
                 << endpoints << ")";
      FATAL_ERROR_EXIT();
    }

    while (!_heartbeatThread->isReady()) {
      // wait until heartbeat is ready
      usleep(10000);
    }
  }

  AgencyCommResult result;

  while (true) {
    VPackBuilder builder;
    try {
      VPackObjectBuilder b(&builder);
      builder.add("endpoint", VPackValue(_myAddress));
    } catch (...) {
      LOG(FATAL) << "out of memory";
      FATAL_ERROR_EXIT();
    }

    result = comm.setValue("Current/ServersRegistered/" + _myId,
                           builder.slice(), 0.0);

    if (!result.successful()) {
      LOG(FATAL) << "unable to register server in agency: http code: "
                 << result.httpCode() << ", body: " << result.body();
      FATAL_ERROR_EXIT();
    } else {
      break;
    }

    sleep(1);
  }

  if (role == ServerState::ROLE_COORDINATOR) {
    ServerState::instance()->setState(ServerState::STATE_SERVING);
  } else if (role == ServerState::ROLE_PRIMARY) {
    ServerState::instance()->setState(ServerState::STATE_SERVINGASYNC);
  } else if (role == ServerState::ROLE_SECONDARY) {
    ServerState::instance()->setState(ServerState::STATE_SYNCING);
  }
}
Esempio n. 5
0
static void raceForClusterBootstrap() {
  AgencyComm agency;
  auto ci = ClusterInfo::instance();
  
  while (true) {
    AgencyCommResult result = agency.getValues("Bootstrap");
    if (!result.successful()) {
      // Error in communication, note that value not found is not an error
      LOG_TOPIC(TRACE, Logger::STARTUP)
          << "raceForClusterBootstrap: no agency communication";
      sleep(1);
      continue;
    }
    VPackSlice value = result.slice()[0].get(
        std::vector<std::string>({agency.prefix(), "Bootstrap"}));
    if (value.isString()) {
      // key was found and is a string
      if (value.copyString().find("done") != std::string::npos) {
        // all done, let's get out of here:
        LOG_TOPIC(TRACE, Logger::STARTUP)
            << "raceForClusterBootstrap: bootstrap already done";
        return;
      }
      LOG_TOPIC(DEBUG, Logger::STARTUP)
          << "raceForClusterBootstrap: somebody else does the bootstrap";
      sleep(1);
      continue;
    }

    // No value set, we try to do the bootstrap ourselves:
    VPackBuilder b;
    b.add(VPackValue(arangodb::ServerState::instance()->getId()));
    result = agency.casValue("Bootstrap", b.slice(), false, 300, 15);
    if (!result.successful()) {
      LOG_TOPIC(DEBUG, Logger::STARTUP)
          << "raceForClusterBootstrap: lost race, somebody else will bootstrap";
      // Cannot get foot into the door, try again later:
      sleep(1);
      continue;
    }

    // OK, we handle things now, let's see whether a DBserver is there:
    auto dbservers = ci->getCurrentDBServers();
    if (dbservers.size() == 0) {
      LOG_TOPIC(TRACE, Logger::STARTUP)
          << "raceForClusterBootstrap: no DBservers, waiting";
      agency.removeValues("Bootstrap", false);
      sleep(1);
      continue;
    }

    LOG_TOPIC(DEBUG, Logger::STARTUP)
        << "raceForClusterBootstrap: race won, we do the bootstrap";
    auto vocbase = DatabaseFeature::DATABASE->systemDatabase();
    V8DealerFeature::DEALER->loadJavascriptFiles(vocbase, "server/bootstrap/cluster-bootstrap.js", 0);

    LOG_TOPIC(DEBUG, Logger::STARTUP)
        << "raceForClusterBootstrap: bootstrap done";

    b.clear();
    b.add(VPackValue(arangodb::ServerState::instance()->getId() + ": done"));
    result = agency.setValue("Bootstrap", b.slice(), 0);
    if (result.successful()) {
      return;
    }

    LOG_TOPIC(TRACE, Logger::STARTUP)
        << "raceForClusterBootstrap: could not indicate success";

    sleep(1);
  }
}
Esempio n. 6
0
bool ApplicationCluster::start () {

    // set authentication data
    ServerState::instance()->setAuthentication(_username, _password);

    // overwrite memory area
    _username = _password = "******";

    ServerState::instance()->setDataPath(_dataPath);
    ServerState::instance()->setLogPath(_logPath);
    ServerState::instance()->setAgentPath(_agentPath);
    ServerState::instance()->setArangodPath(_arangodPath);
    ServerState::instance()->setDBserverConfig(_dbserverConfig);
    ServerState::instance()->setCoordinatorConfig(_coordinatorConfig);
    ServerState::instance()->setDisableDispatcherFrontend(_disableDispatcherFrontend);
    ServerState::instance()->setDisableDispatcherKickstarter(_disableDispatcherKickstarter);

    if (! enabled()) {
        return true;
    }

    ServerState::instance()->setId(_myId);

    // perfom an initial connect to the agency
    const std::string endpoints = AgencyComm::getEndpointsString();

    if (! AgencyComm::tryConnect()) {
        LOG_FATAL_AND_EXIT("Could not connect to agency endpoints (%s)",
                           endpoints.c_str());
    }


    ServerState::RoleEnum role = ServerState::instance()->getRole();

    if (role == ServerState::ROLE_UNDEFINED) {
        // no role found
        LOG_FATAL_AND_EXIT("unable to determine unambiguous role for server '%s'. No role configured in agency (%s)",
                           _myId.c_str(),
                           endpoints.c_str());
    }

    // check if my-address is set
    if (_myAddress.empty()) {
        // no address given, now ask the agency for out address
        _myAddress = ServerState::instance()->getAddress();
    }
    else {
        // register our own address
        ServerState::instance()->setAddress(_myAddress);
    }

    if (_myAddress.empty()) {
        LOG_FATAL_AND_EXIT("unable to determine internal address for server '%s'. "
                           "Please specify --cluster.my-address or configure the address for this server in the agency.",
                           _myId.c_str());
    }

    // now we can validate --cluster.my-address
    const string unified = triagens::rest::Endpoint::getUnifiedForm(_myAddress);

    if (unified.empty()) {
        LOG_FATAL_AND_EXIT("invalid endpoint '%s' specified for --cluster.my-address",
                           _myAddress.c_str());
    }

    ServerState::instance()->setState(ServerState::STATE_STARTUP);

    // initialise ConnectionManager library
    httpclient::ConnectionManager::instance()->initialise();

    // the agency about our state
    AgencyComm comm;
    comm.sendServerState(0.0);

    const std::string version = comm.getVersion();

    ServerState::instance()->setInitialised();

    LOG_INFO("Cluster feature is turned on. "
             "Agency version: %s, Agency endpoints: %s, "
             "server id: '%s', internal address: %s, role: %s",
             version.c_str(),
             endpoints.c_str(),
             _myId.c_str(),
             _myAddress.c_str(),
             ServerState::roleToString(role).c_str());

    if (! _disableHeartbeat) {
        AgencyCommResult result = comm.getValues("Sync/HeartbeatIntervalMs", false);

        if (result.successful()) {
            result.parse("", false);

            std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.begin();

            if (it != result._values.end()) {
                _heartbeatInterval = triagens::basics::JsonHelper::stringUInt64((*it).second._json);

                LOG_INFO("using heartbeat interval value '%llu ms' from agency",
                         (unsigned long long) _heartbeatInterval);
            }
        }

        // no value set in agency. use default
        if (_heartbeatInterval == 0) {
            _heartbeatInterval = 1000; // 1/s

            LOG_WARNING("unable to read heartbeat interval from agency. Using default value '%llu ms'",
                        (unsigned long long) _heartbeatInterval);
        }


        // start heartbeat thread
        _heartbeat = new HeartbeatThread(_server, _dispatcher, _applicationV8, _heartbeatInterval * 1000, 5);

        if (_heartbeat == 0) {
            LOG_FATAL_AND_EXIT("unable to start cluster heartbeat thread");
        }

        if (! _heartbeat->init() || ! _heartbeat->start()) {
            LOG_FATAL_AND_EXIT("heartbeat could not connect to agency endpoints (%s)",
                               endpoints.c_str());
        }

        while (! _heartbeat->ready()) {
            // wait until heartbeat is ready
            usleep(10000);
        }
    }

    return true;
}