Exemple #1
0
void
startNewCoordinator(CoordinatorMode mode)
{
  string host;
  int port;
  getCoordHostAndPort(mode, host, &port);

  JASSERT(strcmp(host.c_str(), "localhost") == 0 ||
          strcmp(host.c_str(), "127.0.0.1") == 0 ||
          jalib::Filesystem::GetCurrentHostname() == host.c_str())
    (host) (jalib::Filesystem::GetCurrentHostname())
  .Text("Won't automatically start coordinator because DMTCP_HOST"
        " is set to a remote host.");

  // Create a socket and bind it to an unused port.
  errno = 0;
  jalib::JServerSocket coordinatorListenerSocket(jalib::JSockAddr::ANY,
                                                 port, 128);
  JASSERT(coordinatorListenerSocket.isValid())
    (coordinatorListenerSocket.port()) (JASSERT_ERRNO) (host) (port)
    .Text("Failed to create socket to coordinator port."
          "\nIf msg is \"Address already in use\","
             " this may be an old coordinator."
          "\nEither try again a few seconds or a minute later,"
          "\nOr kill other coordinators on this host and port:"
          "\n    dmtcp_command ---coord-host XXX --coord-port XXX"
          "\nOr specify --join-coordinator if joining existing computation.");
  // Now dup the sockfd to
  coordinatorListenerSocket.changeFd(PROTECTED_COORD_FD);
  setCoordPort(coordinatorListenerSocket.port());

  JTRACE("Starting a new coordinator automatically.")
    (coordinatorListenerSocket.port());

  if (fork() == 0) {
    /* NOTE:  This code assumes that dmtcp_launch (the current program)
     *  and dmtcp_coordinator are in the same directory.  Namely,
     *  GetProgramDir() gets the dir of the current program (dmtcp_launch).
     *  Hence, if dmtcp_coordinator is in a different directory, then
     *     jalib::Filesystem::GetProgramDir() + "/dmtcp_coordinator"
     *  will not exist, and the child will fail.
     */
    // We can't use Util::getPath() here since the SharedData has not been
    // initialized yet.
    string coordinator =
      jalib::Filesystem::GetProgramDir() + "/dmtcp_coordinator";

    char *modeStr = (char *)"--daemon";
    char *args[] = {
      (char *)coordinator.c_str(),
      (char *)"--quiet",

      /* If we wish to also suppress coordinator warnings, call --quiet twice */
      (char *)"--exit-on-last",
      modeStr,
      NULL
    };
    execv(args[0], args);
    JASSERT(false)(coordinator)(JASSERT_ERRNO).Text(
      "exec(dmtcp_coordinator) failed");
  } else {
    int status;
    _real_close(PROTECTED_COORD_FD);
    JASSERT(wait(&status) > 0) (JASSERT_ERRNO);
  }
}
void dmtcp::DmtcpCoordinatorAPI::startNewCoordinator(int modes, int isRestart)
{
  int coordinatorStatus = -1;
  //get location of coordinator
  const char *coordinatorAddr = getenv ( ENV_VAR_NAME_HOST );
  if(coordinatorAddr == NULL) coordinatorAddr = DEFAULT_HOST;
  const char *coordinatorPortStr = getenv ( ENV_VAR_NAME_PORT );

  dmtcp::string s = coordinatorAddr;
  if(s != "localhost" && s != "127.0.0.1" &&
     s != jalib::Filesystem::GetCurrentHostname()){
    JASSERT(false)(s)(jalib::Filesystem::GetCurrentHostname())
      .Text("Won't automatically start coordinator because DMTCP_HOST"
            " is set to a remote host.");
    _real_exit(DMTCP_FAIL_RC);
  }

  if ( modes & COORD_BATCH || modes & COORD_FORCE_NEW ) {
    // Create a socket and bind it to an unused port.
    jalib::JServerSocket coordinatorListenerSocket ( jalib::JSockAddr::ANY, 0 );
    errno = 0;
    JASSERT ( coordinatorListenerSocket.isValid() )
      ( coordinatorListenerSocket.port() ) ( JASSERT_ERRNO )
      .Text ( "Failed to create listen socket."
          "\nIf msg is \"Address already in use\", this may be an old coordinator."
          "\nKill other coordinators and try again in a minute or so." );
    // Now dup the sockfd to
    coordinatorListenerSocket.changeFd(PROTECTED_COORD_FD);
    dmtcp::string coordPort= jalib::XToString(coordinatorListenerSocket.port());
    setenv ( ENV_VAR_NAME_PORT, coordPort.c_str(), 1 );
  }

  JTRACE("Starting a new coordinator automatically.") (coordinatorPortStr);

  if(fork()==0){
    dmtcp::string coordinator = jalib::Filesystem::FindHelperUtility("dmtcp_coordinator");
    char *modeStr = (char *)"--background";
    if ( modes & COORD_BATCH ) {
      modeStr = (char *)"--batch";
    }
    char * args[] = {
      (char*)coordinator.c_str(),
      (char*)"--exit-on-last",
      modeStr,
      NULL
    };
    execv(args[0], args);
    JASSERT(false)(coordinator)(JASSERT_ERRNO).Text("exec(dmtcp_coordinator) failed");
  } else {
    _real_close ( PROTECTED_COORD_FD );
  }

  errno = 0;

  if ( modes & COORD_BATCH ) {
    // FIXME: If running in batch Mode, we sleep here for 5 seconds to let
    // the coordinator get started up.  We need to fix this in future.
    sleep(5);
  } else {
    JASSERT(wait(&coordinatorStatus)>0)(JASSERT_ERRNO);

    JASSERT(WEXITSTATUS(coordinatorStatus) == 0)
      .Text("Failed to start coordinator, port already in use.  You may use a different port by running with \'-p 12345\'\n");
  }
}