Ejemplo n.º 1
0
ServerState::RoleEnum ServerState::checkServersList (std::string const& id) {
  // fetch value at Plan/DBServers
  // we need to do this to determine the server's role

  const std::string key = "Plan/DBServers";

  AgencyComm comm;
  AgencyCommResult result;

  {
    AgencyCommLocker locker("Plan", "READ");

    if (locker.successful()) {
      result = comm.getValues(key, true);
    }
  }

  if (! result.successful()) {
    const std::string endpoints = AgencyComm::getEndpointsString();

    LOG_TRACE("Could not fetch configuration from agency endpoints (%s): "
              "got status code %d, message: %s, key: %s",
              endpoints.c_str(),
              result._statusCode,
              result.errorMessage().c_str(),
              key.c_str());

    return ServerState::ROLE_UNDEFINED;
  }

  ServerState::RoleEnum role = ServerState::ROLE_UNDEFINED;

  // check if we can find ourselves in the list returned by the agency
  result.parse("Plan/DBServers/", false);
  std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.find(id);

  if (it != result._values.end()) {
    // we are in the list. this means we are a primary server
    role = ServerState::ROLE_PRIMARY;
  }
  else {
    // check if we are a secondary...
    it = result._values.begin();

    while (it != result._values.end()) {
      const std::string name = triagens::basics::JsonHelper::getStringValue((*it).second._json, "");

      if (name == id) {
        role = ServerState::ROLE_SECONDARY;
        _idOfPrimary = it->first;
        break;
      }

      ++it;
    }
  }

  return role;
}
Ejemplo n.º 2
0
int ServerState::lookupLocalInfoToId (std::string const& localInfo,
                                      std::string& id) {
  // fetch value at Plan/DBServers
  // we need to do this to determine the server's role

  const std::string key = "Target/MapLocalToID";

  int count = 0;
  while (++count <= 600) {
    AgencyComm comm;
    AgencyCommResult result;

    {
      AgencyCommLocker locker("Target", "READ");

      if (locker.successful()) {
        result = comm.getValues(key, true);
      }
    }

    if (! result.successful()) {
      const std::string endpoints = AgencyComm::getEndpointsString();

      LOG_DEBUG("Could not fetch configuration from agency endpoints (%s): "
                "got status code %d, message: %s, key: %s",
                endpoints.c_str(),
                result._statusCode,
                result.errorMessage().c_str(),
                key.c_str());
    }
    else {
      result.parse("Target/MapLocalToID/", false);
      std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.find(localInfo);

      if (it != result._values.end()) {
        TRI_json_t const* json = it->second._json;
        Json j(TRI_UNKNOWN_MEM_ZONE, json, Json::NOFREE);
        id = triagens::basics::JsonHelper::getStringValue(json, "ID", "");
        if (id.empty()) {
          LOG_ERROR("ID not set!");
          return TRI_ERROR_CLUSTER_COULD_NOT_DETERMINE_ID;
        }
        std::string description
          = triagens::basics::JsonHelper::getStringValue(json, "Description", "");
        if (! description.empty()) {
          setDescription(description);
        }
        return TRI_ERROR_NO_ERROR;
      }
    }
    sleep(1);
  };
  return TRI_ERROR_CLUSTER_COULD_NOT_DETERMINE_ID;
}
Ejemplo n.º 3
0
void ApplicationCluster::close () {
    if (! enabled()) {
        return;
    }

    if (_heartbeat != 0) {
        _heartbeat->stop();
    }

    // change into shutdown state
    ServerState::instance()->setState(ServerState::STATE_SHUTDOWN);

    AgencyComm comm;
    comm.sendServerState(0.0);
}
Ejemplo n.º 4
0
ServerState::RoleEnum ServerState::checkCoordinatorsList (std::string const& id) {
  // fetch value at Plan/Coordinators
  // we need to do this to determine the server's role

  const std::string key = "Plan/Coordinators";

  AgencyComm comm;
  AgencyCommResult result;

  {
    AgencyCommLocker locker("Plan", "READ");

    if (locker.successful()) {
      result = comm.getValues(key, true);
    }
  }

  if (! result.successful()) {
    const std::string endpoints = AgencyComm::getEndpointsString();

    LOG_TRACE("Could not fetch configuration from agency endpoints (%s): "
              "got status code %d, message: %s, key: %s",
              endpoints.c_str(),
              result._statusCode,
              result.errorMessage().c_str(),
              key.c_str());

    return ServerState::ROLE_UNDEFINED;
  }

  if (! result.parse("Plan/Coordinators/", false)) {
    LOG_TRACE("Got an invalid JSON response for Plan/Coordinators");

    return ServerState::ROLE_UNDEFINED;
  }

  // check if we can find ourselves in the list returned by the agency
  std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.find(id);

  if (it != result._values.end()) {
    // we are in the list. this means we are a primary server
    return ServerState::ROLE_COORDINATOR;
  }

  return ServerState::ROLE_UNDEFINED;
}
Ejemplo n.º 5
0
ServerState::RoleEnum ServerState::getRole () {
  std::string id;
  std::string info;

  {
    auto role = loadRole();
    
    if (role != ServerState::ROLE_UNDEFINED || ! _clusterEnabled) {
      return role;
    }

    info = _localInfo;
    id = _id;
  }

  if (id.empty()) {
    // We need to announce ourselves in the agency to get a role configured:
    LOG_DEBUG("Announcing our birth in Current/NewServers to the agency...");
    AgencyComm comm;
    AgencyCommResult result;
    Json json(Json::Object, 1);
    json("endpoint", Json(TRI_UNKNOWN_MEM_ZONE, getAddress()));
    std::string description = getDescription();
    if (! description.empty()) {
      json("Description", Json(TRI_UNKNOWN_MEM_ZONE, description));
    }
    result = comm.setValue("Current/NewServers/"+_localInfo, json.json(), 0.0);
    if (! result.successful()) {
      LOG_ERROR("Could not talk to agency!");
      return ROLE_UNDEFINED;
    }
    std::string jsonst = json.toString();
    LOG_DEBUG("Have stored %s under Current/NewServers/%s in agency.",
              jsonst.c_str(), _localInfo.c_str());
  }

  // role not yet set
  RoleEnum role = determineRole(info, id);
  std::string roleString = roleToString(role);

  LOG_DEBUG("Found my role: %s", roleString.c_str());

  storeRole(role);

  return role;
}
Ejemplo n.º 6
0
RestStatus RestShutdownHandler::execute() {
  if (_request->requestType() != rest::RequestType::DELETE_REQ) {
    generateError(rest::ResponseCode::METHOD_NOT_ALLOWED, 405);
    return RestStatus::DONE;
  }
  bool removeFromCluster;
  std::string const& remove = _request->value("remove_from_cluster", removeFromCluster);
  removeFromCluster = removeFromCluster && remove == "1";

  bool shutdownClusterFound;
  std::string const& shutdownCluster = _request->value("shutdown_cluster", shutdownClusterFound);
  if (shutdownClusterFound && shutdownCluster == "1") {
    AgencyComm agency;

    VPackBuilder builder;
    builder.add(VPackValue(true));
    AgencyCommResult result = agency.setValue("Shutdown", builder.slice(), 0.0);
    if (!result.successful()) {
      generateError(rest::ResponseCode::SERVER_ERROR, 500);
      return RestStatus::DONE;
    }
    removeFromCluster = true;
  }
  if (removeFromCluster) {
    ClusterFeature* clusterFeature = ApplicationServer::getFeature<ClusterFeature>("Cluster");
    clusterFeature->setUnregisterOnShutdown(true);
  }

  ApplicationServer::server->beginShutdown();

  try {
    VPackBuilder result;
    result.add(VPackValue("OK"));
    generateResult(rest::ResponseCode::OK, result.slice());
  } catch (...) {
    // Ignore the error
  }

  return RestStatus::DONE;
}
Ejemplo n.º 7
0
void ApplicationCluster::stop () {
    if (! enabled()) {
        return;
    }

    // change into shutdown state
    ServerState::instance()->setState(ServerState::STATE_SHUTDOWN);

    AgencyComm comm;
    comm.sendServerState(0.0);

    if (_heartbeat != 0) {
        _heartbeat->stop();
    }

    {
        AgencyCommLocker locker("Current", "WRITE");

        if (locker.successful()) {
            // unregister ourselves
            ServerState::RoleEnum role = ServerState::instance()->getRole();

            if (role == ServerState::ROLE_PRIMARY) {
                comm.removeValues("Current/DBServers/" + _myId, false);
            }
            else if (role == ServerState::ROLE_COORDINATOR) {
                comm.removeValues("Current/Coordinators/" + _myId, false);
            }

            // unregister ourselves
            comm.removeValues("Current/ServersRegistered/" + _myId, false);
        }
    }

    ClusterComm::cleanup();
    ClusterInfo::cleanup();
    AgencyComm::cleanup();
}
Ejemplo n.º 8
0
void ClusterFeature::unprepare() {
  if (_enableCluster) {
    if (_heartbeatThread != nullptr) {
      _heartbeatThread->beginShutdown();
    }

    // change into shutdown state
    ServerState::instance()->setState(ServerState::STATE_SHUTDOWN);

    AgencyComm comm;
    comm.sendServerState(0.0);

    if (_heartbeatThread != nullptr) {
      int counter = 0;
      while (_heartbeatThread->isRunning()) {
        usleep(100000);
        // emit warning after 5 seconds
        if (++counter == 10 * 5) {
          LOG(WARN) << "waiting for heartbeat thread to finish";
        }
      }
    }

    if (_unregisterOnShutdown) {
      ServerState::instance()->unregister();
    }
  }

  if (!_enableCluster) {
    ClusterComm::cleanup();
    return;
  }

  // change into shutdown state
  ServerState::instance()->setState(ServerState::STATE_SHUTDOWN);

  AgencyComm comm;
  comm.sendServerState(0.0);

  // Try only once to unregister because maybe the agencycomm
  // is shutting down as well...

  ServerState::RoleEnum role = ServerState::instance()->getRole();

  AgencyWriteTransaction unreg;

  // Remove from role
  if (role == ServerState::ROLE_PRIMARY) {
    unreg.operations.push_back(AgencyOperation(
        "Current/DBServers/" + _myId, AgencySimpleOperationType::DELETE_OP));
  } else if (role == ServerState::ROLE_COORDINATOR) {
    unreg.operations.push_back(AgencyOperation(
        "Current/Coordinators/" + _myId, AgencySimpleOperationType::DELETE_OP));
  }

  // Unregister
  unreg.operations.push_back(
      AgencyOperation("Current/ServersRegistered/" + _myId,
                      AgencySimpleOperationType::DELETE_OP));

  comm.sendTransactionWithFailover(unreg, 120.0);

  while (_heartbeatThread->isRunning()) {
    usleep(50000);
  }
  
  AgencyComm::cleanup();
  ClusterComm::cleanup();
}
Ejemplo n.º 9
0
void ClusterFeature::start() {
  // return if cluster is disabled
  if (!_enableCluster) {
    return;
  }

  ServerState::instance()->setState(ServerState::STATE_STARTUP);

  // the agency about our state
  AgencyComm comm;
  comm.sendServerState(0.0);

  std::string const version = comm.getVersion();

  ServerState::instance()->setInitialized();

  std::string const endpoints = AgencyComm::getEndpointsString();

  ServerState::RoleEnum role = ServerState::instance()->getRole();

  LOG(INFO) << "Cluster feature is turned on. Agency version: " << version
            << ", Agency endpoints: " << endpoints << ", server id: '" << _myId
            << "', internal address: " << _myAddress
            << ", role: " << ServerState::roleToString(role);

  if (!_disableHeartbeat) {
    AgencyCommResult result = comm.getValues("Sync/HeartbeatIntervalMs");

    if (result.successful()) {
      velocypack::Slice HeartbeatIntervalMs =
          result.slice()[0].get(std::vector<std::string>(
              {AgencyComm::prefix(), "Sync", "HeartbeatIntervalMs"}));

      if (HeartbeatIntervalMs.isInteger()) {
        try {
          _heartbeatInterval = HeartbeatIntervalMs.getUInt();
          LOG(INFO) << "using heartbeat interval value '" << _heartbeatInterval
                    << " ms' from agency";
        } catch (...) {
          // Ignore if it is not a small int or uint
        }
      }
    }

    // no value set in agency. use default
    if (_heartbeatInterval == 0) {
      _heartbeatInterval = 5000;  // 1/s

      LOG(WARN) << "unable to read heartbeat interval from agency. Using "
                << "default value '" << _heartbeatInterval << " ms'";
    }

    // start heartbeat thread
    _heartbeatThread = std::make_shared<HeartbeatThread>(
        _agencyCallbackRegistry.get(), _heartbeatInterval * 1000, 5,
        SchedulerFeature::SCHEDULER->ioService());

    if (!_heartbeatThread->init() || !_heartbeatThread->start()) {
      LOG(FATAL) << "heartbeat could not connect to agency endpoints ("
                 << endpoints << ")";
      FATAL_ERROR_EXIT();
    }

    while (!_heartbeatThread->isReady()) {
      // wait until heartbeat is ready
      usleep(10000);
    }
  }

  AgencyCommResult result;

  while (true) {
    VPackBuilder builder;
    try {
      VPackObjectBuilder b(&builder);
      builder.add("endpoint", VPackValue(_myAddress));
    } catch (...) {
      LOG(FATAL) << "out of memory";
      FATAL_ERROR_EXIT();
    }

    result = comm.setValue("Current/ServersRegistered/" + _myId,
                           builder.slice(), 0.0);

    if (!result.successful()) {
      LOG(FATAL) << "unable to register server in agency: http code: "
                 << result.httpCode() << ", body: " << result.body();
      FATAL_ERROR_EXIT();
    } else {
      break;
    }

    sleep(1);
  }

  if (role == ServerState::ROLE_COORDINATOR) {
    ServerState::instance()->setState(ServerState::STATE_SERVING);
  } else if (role == ServerState::ROLE_PRIMARY) {
    ServerState::instance()->setState(ServerState::STATE_SERVINGASYNC);
  } else if (role == ServerState::ROLE_SECONDARY) {
    ServerState::instance()->setState(ServerState::STATE_SYNCING);
  }
}
Ejemplo n.º 10
0
static void raceForClusterBootstrap() {
  AgencyComm agency;
  auto ci = ClusterInfo::instance();
  
  while (true) {
    AgencyCommResult result = agency.getValues("Bootstrap");
    if (!result.successful()) {
      // Error in communication, note that value not found is not an error
      LOG_TOPIC(TRACE, Logger::STARTUP)
          << "raceForClusterBootstrap: no agency communication";
      sleep(1);
      continue;
    }
    VPackSlice value = result.slice()[0].get(
        std::vector<std::string>({agency.prefix(), "Bootstrap"}));
    if (value.isString()) {
      // key was found and is a string
      if (value.copyString().find("done") != std::string::npos) {
        // all done, let's get out of here:
        LOG_TOPIC(TRACE, Logger::STARTUP)
            << "raceForClusterBootstrap: bootstrap already done";
        return;
      }
      LOG_TOPIC(DEBUG, Logger::STARTUP)
          << "raceForClusterBootstrap: somebody else does the bootstrap";
      sleep(1);
      continue;
    }

    // No value set, we try to do the bootstrap ourselves:
    VPackBuilder b;
    b.add(VPackValue(arangodb::ServerState::instance()->getId()));
    result = agency.casValue("Bootstrap", b.slice(), false, 300, 15);
    if (!result.successful()) {
      LOG_TOPIC(DEBUG, Logger::STARTUP)
          << "raceForClusterBootstrap: lost race, somebody else will bootstrap";
      // Cannot get foot into the door, try again later:
      sleep(1);
      continue;
    }

    // OK, we handle things now, let's see whether a DBserver is there:
    auto dbservers = ci->getCurrentDBServers();
    if (dbservers.size() == 0) {
      LOG_TOPIC(TRACE, Logger::STARTUP)
          << "raceForClusterBootstrap: no DBservers, waiting";
      agency.removeValues("Bootstrap", false);
      sleep(1);
      continue;
    }

    LOG_TOPIC(DEBUG, Logger::STARTUP)
        << "raceForClusterBootstrap: race won, we do the bootstrap";
    auto vocbase = DatabaseFeature::DATABASE->systemDatabase();
    V8DealerFeature::DEALER->loadJavascriptFiles(vocbase, "server/bootstrap/cluster-bootstrap.js", 0);

    LOG_TOPIC(DEBUG, Logger::STARTUP)
        << "raceForClusterBootstrap: bootstrap done";

    b.clear();
    b.add(VPackValue(arangodb::ServerState::instance()->getId() + ": done"));
    result = agency.setValue("Bootstrap", b.slice(), 0);
    if (result.successful()) {
      return;
    }

    LOG_TOPIC(TRACE, Logger::STARTUP)
        << "raceForClusterBootstrap: could not indicate success";

    sleep(1);
  }
}
Ejemplo n.º 11
0
bool ApplicationCluster::open () {
    if (! enabled()) {
        return true;
    }

    ServerState::RoleEnum role = ServerState::instance()->getRole();

    // tell the agency that we are ready
    {
        AgencyComm comm;
        AgencyCommResult result;

        AgencyCommLocker locker("Current", "WRITE");

        if (locker.successful()) {
            TRI_json_t* ep = TRI_CreateString2CopyJson(TRI_UNKNOWN_MEM_ZONE, _myAddress.c_str(), _myAddress.size());
            if (ep == 0) {
                locker.unlock();
                LOG_FATAL_AND_EXIT("out of memory");
            }
            TRI_json_t* json = TRI_CreateArray2Json(TRI_UNKNOWN_MEM_ZONE, 1);
            if (json == 0) {
                TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, ep);
                locker.unlock();
                LOG_FATAL_AND_EXIT("out of memory");
            }
            TRI_Insert2ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "endpoint", ep);

            result = comm.setValue("Current/ServersRegistered/" + _myId, json, 0.0);
            TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);
        }

        if (! result.successful()) {
            locker.unlock();
            LOG_FATAL_AND_EXIT("unable to register server in agency: http code: %d, body: %s",
                               (int) result.httpCode(),
                               result.body().c_str());
        }

        if (role == ServerState::ROLE_COORDINATOR) {
            TRI_json_t* json = TRI_CreateString2CopyJson(TRI_UNKNOWN_MEM_ZONE, "none", 4);

            if (json == 0) {
                locker.unlock();
                LOG_FATAL_AND_EXIT("out of memory");
            }

            ServerState::instance()->setState(ServerState::STATE_SERVING);

            // register coordinator
            AgencyCommResult result = comm.setValue("Current/Coordinators/" + _myId, json, 0.0);
            TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);

            if (! result.successful()) {
                locker.unlock();
                LOG_FATAL_AND_EXIT("unable to register coordinator in agency");
            }
        }
        else if (role == ServerState::ROLE_PRIMARY) {
            TRI_json_t* json = TRI_CreateString2CopyJson(TRI_UNKNOWN_MEM_ZONE, "none", 4);

            if (json == 0) {
                locker.unlock();
                LOG_FATAL_AND_EXIT("out of memory");
            }

            ServerState::instance()->setState(ServerState::STATE_SERVINGASYNC);

            // register server
            AgencyCommResult result = comm.setValue("Current/DBServers/" + _myId, json, 0.0);
            TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);

            if (! result.successful()) {
                locker.unlock();
                LOG_FATAL_AND_EXIT("unable to register db server in agency");
            }
        }
        else if (role == ServerState::ROLE_SECONDARY) {
            locker.unlock();
            LOG_FATAL_AND_EXIT("secondary server tasks are currently not implemented");
        }
    }

    return true;
}
Ejemplo n.º 12
0
bool ApplicationCluster::start () {

    // set authentication data
    ServerState::instance()->setAuthentication(_username, _password);

    // overwrite memory area
    _username = _password = "******";

    ServerState::instance()->setDataPath(_dataPath);
    ServerState::instance()->setLogPath(_logPath);
    ServerState::instance()->setAgentPath(_agentPath);
    ServerState::instance()->setArangodPath(_arangodPath);
    ServerState::instance()->setDBserverConfig(_dbserverConfig);
    ServerState::instance()->setCoordinatorConfig(_coordinatorConfig);
    ServerState::instance()->setDisableDispatcherFrontend(_disableDispatcherFrontend);
    ServerState::instance()->setDisableDispatcherKickstarter(_disableDispatcherKickstarter);

    if (! enabled()) {
        return true;
    }

    ServerState::instance()->setId(_myId);

    // perfom an initial connect to the agency
    const std::string endpoints = AgencyComm::getEndpointsString();

    if (! AgencyComm::tryConnect()) {
        LOG_FATAL_AND_EXIT("Could not connect to agency endpoints (%s)",
                           endpoints.c_str());
    }


    ServerState::RoleEnum role = ServerState::instance()->getRole();

    if (role == ServerState::ROLE_UNDEFINED) {
        // no role found
        LOG_FATAL_AND_EXIT("unable to determine unambiguous role for server '%s'. No role configured in agency (%s)",
                           _myId.c_str(),
                           endpoints.c_str());
    }

    // check if my-address is set
    if (_myAddress.empty()) {
        // no address given, now ask the agency for out address
        _myAddress = ServerState::instance()->getAddress();
    }
    else {
        // register our own address
        ServerState::instance()->setAddress(_myAddress);
    }

    if (_myAddress.empty()) {
        LOG_FATAL_AND_EXIT("unable to determine internal address for server '%s'. "
                           "Please specify --cluster.my-address or configure the address for this server in the agency.",
                           _myId.c_str());
    }

    // now we can validate --cluster.my-address
    const string unified = triagens::rest::Endpoint::getUnifiedForm(_myAddress);

    if (unified.empty()) {
        LOG_FATAL_AND_EXIT("invalid endpoint '%s' specified for --cluster.my-address",
                           _myAddress.c_str());
    }

    ServerState::instance()->setState(ServerState::STATE_STARTUP);

    // initialise ConnectionManager library
    httpclient::ConnectionManager::instance()->initialise();

    // the agency about our state
    AgencyComm comm;
    comm.sendServerState(0.0);

    const std::string version = comm.getVersion();

    ServerState::instance()->setInitialised();

    LOG_INFO("Cluster feature is turned on. "
             "Agency version: %s, Agency endpoints: %s, "
             "server id: '%s', internal address: %s, role: %s",
             version.c_str(),
             endpoints.c_str(),
             _myId.c_str(),
             _myAddress.c_str(),
             ServerState::roleToString(role).c_str());

    if (! _disableHeartbeat) {
        AgencyCommResult result = comm.getValues("Sync/HeartbeatIntervalMs", false);

        if (result.successful()) {
            result.parse("", false);

            std::map<std::string, AgencyCommResultEntry>::const_iterator it = result._values.begin();

            if (it != result._values.end()) {
                _heartbeatInterval = triagens::basics::JsonHelper::stringUInt64((*it).second._json);

                LOG_INFO("using heartbeat interval value '%llu ms' from agency",
                         (unsigned long long) _heartbeatInterval);
            }
        }

        // no value set in agency. use default
        if (_heartbeatInterval == 0) {
            _heartbeatInterval = 1000; // 1/s

            LOG_WARNING("unable to read heartbeat interval from agency. Using default value '%llu ms'",
                        (unsigned long long) _heartbeatInterval);
        }


        // start heartbeat thread
        _heartbeat = new HeartbeatThread(_server, _dispatcher, _applicationV8, _heartbeatInterval * 1000, 5);

        if (_heartbeat == 0) {
            LOG_FATAL_AND_EXIT("unable to start cluster heartbeat thread");
        }

        if (! _heartbeat->init() || ! _heartbeat->start()) {
            LOG_FATAL_AND_EXIT("heartbeat could not connect to agency endpoints (%s)",
                               endpoints.c_str());
        }

        while (! _heartbeat->ready()) {
            // wait until heartbeat is ready
            usleep(10000);
        }
    }

    return true;
}