Exemplo n.º 1
0
void ClusterFeature::start() {
  // return if cluster is disabled
  if (!_enableCluster) {
    return;
  }

  ServerState::instance()->setState(ServerState::STATE_STARTUP);

  // the agency about our state
  AgencyComm comm;
  comm.sendServerState(0.0);

  std::string const version = comm.getVersion();

  ServerState::instance()->setInitialized();

  std::string const endpoints = AgencyComm::getEndpointsString();

  ServerState::RoleEnum role = ServerState::instance()->getRole();

  LOG(INFO) << "Cluster feature is turned on. Agency version: " << version
            << ", Agency endpoints: " << endpoints << ", server id: '" << _myId
            << "', internal address: " << _myAddress
            << ", role: " << ServerState::roleToString(role);

  if (!_disableHeartbeat) {
    AgencyCommResult result = comm.getValues("Sync/HeartbeatIntervalMs");

    if (result.successful()) {
      velocypack::Slice HeartbeatIntervalMs =
          result.slice()[0].get(std::vector<std::string>(
              {AgencyComm::prefix(), "Sync", "HeartbeatIntervalMs"}));

      if (HeartbeatIntervalMs.isInteger()) {
        try {
          _heartbeatInterval = HeartbeatIntervalMs.getUInt();
          LOG(INFO) << "using heartbeat interval value '" << _heartbeatInterval
                    << " ms' from agency";
        } catch (...) {
          // Ignore if it is not a small int or uint
        }
      }
    }

    // no value set in agency. use default
    if (_heartbeatInterval == 0) {
      _heartbeatInterval = 5000;  // 1/s

      LOG(WARN) << "unable to read heartbeat interval from agency. Using "
                << "default value '" << _heartbeatInterval << " ms'";
    }

    // start heartbeat thread
    _heartbeatThread = std::make_shared<HeartbeatThread>(
        _agencyCallbackRegistry.get(), _heartbeatInterval * 1000, 5,
        SchedulerFeature::SCHEDULER->ioService());

    if (!_heartbeatThread->init() || !_heartbeatThread->start()) {
      LOG(FATAL) << "heartbeat could not connect to agency endpoints ("
                 << endpoints << ")";
      FATAL_ERROR_EXIT();
    }

    while (!_heartbeatThread->isReady()) {
      // wait until heartbeat is ready
      usleep(10000);
    }
  }

  AgencyCommResult result;

  while (true) {
    VPackBuilder builder;
    try {
      VPackObjectBuilder b(&builder);
      builder.add("endpoint", VPackValue(_myAddress));
    } catch (...) {
      LOG(FATAL) << "out of memory";
      FATAL_ERROR_EXIT();
    }

    result = comm.setValue("Current/ServersRegistered/" + _myId,
                           builder.slice(), 0.0);

    if (!result.successful()) {
      LOG(FATAL) << "unable to register server in agency: http code: "
                 << result.httpCode() << ", body: " << result.body();
      FATAL_ERROR_EXIT();
    } else {
      break;
    }

    sleep(1);
  }

  if (role == ServerState::ROLE_COORDINATOR) {
    ServerState::instance()->setState(ServerState::STATE_SERVING);
  } else if (role == ServerState::ROLE_PRIMARY) {
    ServerState::instance()->setState(ServerState::STATE_SERVINGASYNC);
  } else if (role == ServerState::ROLE_SECONDARY) {
    ServerState::instance()->setState(ServerState::STATE_SYNCING);
  }
}
Exemplo n.º 2
0
bool ApplicationCluster::open () {
    if (! enabled()) {
        return true;
    }

    ServerState::RoleEnum role = ServerState::instance()->getRole();

    // tell the agency that we are ready
    {
        AgencyComm comm;
        AgencyCommResult result;

        AgencyCommLocker locker("Current", "WRITE");

        if (locker.successful()) {
            TRI_json_t* ep = TRI_CreateString2CopyJson(TRI_UNKNOWN_MEM_ZONE, _myAddress.c_str(), _myAddress.size());
            if (ep == 0) {
                locker.unlock();
                LOG_FATAL_AND_EXIT("out of memory");
            }
            TRI_json_t* json = TRI_CreateArray2Json(TRI_UNKNOWN_MEM_ZONE, 1);
            if (json == 0) {
                TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, ep);
                locker.unlock();
                LOG_FATAL_AND_EXIT("out of memory");
            }
            TRI_Insert2ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "endpoint", ep);

            result = comm.setValue("Current/ServersRegistered/" + _myId, json, 0.0);
            TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);
        }

        if (! result.successful()) {
            locker.unlock();
            LOG_FATAL_AND_EXIT("unable to register server in agency: http code: %d, body: %s",
                               (int) result.httpCode(),
                               result.body().c_str());
        }

        if (role == ServerState::ROLE_COORDINATOR) {
            TRI_json_t* json = TRI_CreateString2CopyJson(TRI_UNKNOWN_MEM_ZONE, "none", 4);

            if (json == 0) {
                locker.unlock();
                LOG_FATAL_AND_EXIT("out of memory");
            }

            ServerState::instance()->setState(ServerState::STATE_SERVING);

            // register coordinator
            AgencyCommResult result = comm.setValue("Current/Coordinators/" + _myId, json, 0.0);
            TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);

            if (! result.successful()) {
                locker.unlock();
                LOG_FATAL_AND_EXIT("unable to register coordinator in agency");
            }
        }
        else if (role == ServerState::ROLE_PRIMARY) {
            TRI_json_t* json = TRI_CreateString2CopyJson(TRI_UNKNOWN_MEM_ZONE, "none", 4);

            if (json == 0) {
                locker.unlock();
                LOG_FATAL_AND_EXIT("out of memory");
            }

            ServerState::instance()->setState(ServerState::STATE_SERVINGASYNC);

            // register server
            AgencyCommResult result = comm.setValue("Current/DBServers/" + _myId, json, 0.0);
            TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);

            if (! result.successful()) {
                locker.unlock();
                LOG_FATAL_AND_EXIT("unable to register db server in agency");
            }
        }
        else if (role == ServerState::ROLE_SECONDARY) {
            locker.unlock();
            LOG_FATAL_AND_EXIT("secondary server tasks are currently not implemented");
        }
    }

    return true;
}