Example #1
0
static void runCoordinatorCmd(char c,
                              int *coordCmdStatus = NULL,
                              int *numPeers = NULL,
                              int *isRunning = NULL)
{
  _dmtcp_lock();
  {
    CoordinatorAPI coordinatorAPI;

    dmtcp_disable_ckpt();
    coordinatorAPI.connectAndSendUserCommand(c, coordCmdStatus, numPeers,
                                             isRunning);
    dmtcp_enable_ckpt();
  }
  _dmtcp_unlock();
}
Example #2
0
int main ( int argc, char** argv )
{
  bool quiet = false;
  dmtcp::string interval = "";
  dmtcp::string request = "h";

  initializeJalib();

  Util::initializeLogFile();

  //process args
  shift;
  while(argc>0){
    dmtcp::string s = argv[0];
    if((s=="--help" || s=="-h") && argc==1){
      fprintf(stderr, theUsage, "");
      return 1;
    } else if ((s=="--version") && argc==1){
      JASSERT_STDERR << DMTCP_VERSION_AND_COPYRIGHT_INFO;
      return 1;
    }else if(argc>1 && (s == "-h" || s == "--host")){
      setenv(ENV_VAR_NAME_HOST, argv[1], 1);
      shift; shift;
    }else if(argc>1 && (s == "-p" || s == "--port")){
      setenv(ENV_VAR_NAME_PORT, argv[1], 1);
      shift; shift;
    }else if(s == "--quiet"){
      quiet = true;
      shift;
    }else if(s == "h" || s == "-h" || s == "--help" || s == "?"){
      fprintf(stderr, theUsage, "");
      return 1;
    }else{ // else it's a request
      char* cmd = argv[0];
      //ignore leading dashes
      while(*cmd == '-') cmd++;
      s = cmd;

      if(*cmd == 'b' && *(cmd+1) != 'c'){
        // If blocking ckpt, next letter must be 'c'; else print the usage
        fprintf(stderr, theUsage, "");
        return 1;
      } else if (*cmd == 's' || *cmd == 'i' || *cmd == 'c' || *cmd == 'b'
		 || *cmd == 'f' || *cmd == 'k' || *cmd == 'q') {
        request = s;
        if (*cmd == 'i') {
	  if (isdigit(cmd[1])) { // if -i5, for example
	    interval = cmd+1;
	  } else { // else -i 5
            if (argc == 1) {
              fprintf(stderr, theUsage, "");
              return 1;
            }
	    interval = argv[1];
	    shift;
	  }
        }
        shift;
      }else{
	fprintf(stderr, theUsage, "");
	return 1;
      }
    }
  }

  if (! quiet)
    printf(  "DMTCP-" PACKAGE_VERSION " (+ MTCP), Copyright (C) 2006-2011"
  "  Jason Ansel, Michael Rieker,\n"
  "                                       Kapil Arya, and Gene Cooperman\n"
           "This program comes with ABSOLUTELY NO WARRANTY.\n"
           "This is free software, and you are welcome to redistribute it\n"
           "under certain conditions; see COPYING file for details.\n"
           "(Use flag \"--quiet\" to hide this message.)\n\n");

  int coordErrorCode = CoordinatorAPI::NOERROR;
  int numPeers;
  int isRunning;
  CoordinatorAPI coordinatorAPI;
  char *cmd = (char *)request.c_str();
  switch (*cmd) {
  case 'h':
    fprintf(stderr, theUsage, "");
    return 1;
  case 'i':
    setenv(ENV_VAR_CKPT_INTR, interval.c_str(), 1);
    coordinatorAPI.connectAndSendUserCommand(*cmd, &coordErrorCode);
    printf("Interval changed to %s\n", interval.c_str());
    break;
  case 'b':
    // blocking prefix
    coordinatorAPI.connectAndSendUserCommand(*cmd, &coordErrorCode);
    // actual command
    coordinatorAPI.connectAndSendUserCommand(*(cmd+1), &coordErrorCode);
    break;
  case 's':
    coordinatorAPI.connectAndSendUserCommand(*cmd, &coordErrorCode,
                                             &numPeers, &isRunning);
  case 'c':
  case 'f':
  case 'k':
  case 'q':
    coordinatorAPI.connectAndSendUserCommand(*cmd, &coordErrorCode);
    break;
  }

  //check for error
  if (coordErrorCode != CoordinatorAPI::NOERROR) {
    switch(coordErrorCode){
    case CoordinatorAPI::ERROR_COORDINATOR_NOT_FOUND:
      if (getenv("DMTCP_PORT"))
        fprintf(stderr, "Coordinator not found. Please check port and host.\n");
      else
        fprintf(stderr,
	      "Coordinator not found. Try specifying port with \'--port\'.\n");
      break;
    case CoordinatorAPI::ERROR_INVALID_COMMAND:
      fprintf(stderr,
	      "Unknown command: %c, try 'dmtcp_command --help'\n", *cmd);
      break;
    case CoordinatorAPI::ERROR_NOT_RUNNING_STATE:
      fprintf(stderr, "Error, computation not in running state."
	      "  Either a checkpoint is\n"
	      " currently happening or there are no connected processes.\n");
      break;
    default:
      fprintf(stderr, "Unknown error\n");
      break;
    }
    return 2;
  }

  if(*cmd == 's'){
    if (getenv(ENV_VAR_NAME_HOST))
      printf("  Host: %s\n", getenv(ENV_VAR_NAME_HOST));
    printf("  Port: %s\n", getenv(ENV_VAR_NAME_PORT));
    printf("Status...\n");
    printf("NUM_PEERS=%d\n", numPeers);
    printf("RUNNING=%s\n", (isRunning?"yes":"no"));
  }

  return 0;
}
Example #3
0
int main ( int argc, char** argv )
{
  string interval = "";
  string request = "h";

  initializeJalib();

  // No need to initialize the log file.
  // Util::initializeLogFile();

  //process args
  shift;
  while(argc>0){
    string s = argv[0];
    if((s=="--help" || s=="-h") && argc==1){
      printf("%s", theUsage);
      return 1;
    } else if ((s=="--version") && argc==1){
      printf("%s", DMTCP_VERSION_AND_COPYRIGHT_INFO);
      return 1;
    }else if(argc>1 && (s == "-h" || s == "--coord-host" || s == "--host")){
      setenv(ENV_VAR_NAME_HOST, argv[1], 1);
      shift; shift;
    } else if (argc>1 && (s == "-p" || s == "--coord-port" || s == "--port")) {
      setenv(ENV_VAR_NAME_PORT, argv[1], 1);
      shift; shift;
    } else if (argv[0][0] == '-' && argv[0][1] == 'p' &&
               isdigit(argv[0][2])) { // else if -p0, for example
      setenv(ENV_VAR_NAME_PORT, argv[0]+2, 1);
      shift;
    }else if(s == "h" || s == "-h" || s == "--help" || s == "?"){
      fprintf(stderr, theUsage, "");
      return 1;
    }else{ // else it's a request
      char* cmd = argv[0];
      //ignore leading dashes
      while(*cmd == '-') cmd++;
      s = cmd;

      if((*cmd == 'b' || *cmd == 'x') && *(cmd+1) != 'c'){
        // If blocking ckpt, next letter must be 'c'; else print the usage
        fprintf(stderr, theUsage, "");
        return 1;
      } else if (*cmd == 's' || *cmd == 'i' || *cmd == 'c' || *cmd == 'b' ||
                 *cmd == 'x' || *cmd == 'k' || *cmd == 'q') {
        request = s;
        if (*cmd == 'i') {
	  if (isdigit(cmd[1])) { // if -i5, for example
	    interval = cmd+1;
	  } else { // else -i 5
            if (argc == 1) {
              fprintf(stderr, theUsage, "");
              return 1;
            }
	    interval = argv[1];
	    shift;
	  }
        }
        shift;
      }else{
	fprintf(stderr, theUsage, "");
	return 1;
      }
    }
  }

  int coordCmdStatus = CoordCmdStatus::NOERROR;
  int numPeers;
  int isRunning;
  int ckptInterval;
  CoordinatorAPI coordinatorAPI;
  char *cmd = (char *)request.c_str();
  switch (*cmd) {
  case 'h':
    fprintf(stderr, theUsage, "");
    return 1;
  case 'i':
    setenv(ENV_VAR_CKPT_INTR, interval.c_str(), 1);
    coordinatorAPI.connectAndSendUserCommand(*cmd, &coordCmdStatus);
    printf("Interval changed to %s\n", interval.c_str());
    break;
  case 'b':
  case 'x':
    // blocking prefix
    coordinatorAPI.connectAndSendUserCommand(*cmd, &coordCmdStatus);
    // actual command
    coordinatorAPI.connectAndSendUserCommand(*(cmd+1), &coordCmdStatus);
    break;
  case 's':
    coordinatorAPI.connectAndSendUserCommand(*cmd, &coordCmdStatus,
                                        &numPeers, &isRunning, &ckptInterval);
  case 'c':
  case 'k':
  case 'q':
    coordinatorAPI.connectAndSendUserCommand(*cmd, &coordCmdStatus);
    break;
  }

  //check for error
  if (coordCmdStatus != CoordCmdStatus::NOERROR) {
    switch(coordCmdStatus){
    case CoordCmdStatus::ERROR_COORDINATOR_NOT_FOUND:
      if (getenv("DMTCP_COORD_PORT") || getenv("DMTCP_PORT"))
        fprintf(stderr, "Coordinator not found. Please check port and host.\n");
      else
        fprintf(stderr,
	      "Coordinator not found. Try specifying port with \'--port\'.\n");
      break;
    case CoordCmdStatus::ERROR_INVALID_COMMAND:
      fprintf(stderr,
	      "Unknown command: %c, try 'dmtcp_command --help'\n", *cmd);
      break;
    case CoordCmdStatus::ERROR_NOT_RUNNING_STATE:
      fprintf(stderr, "Error, computation not in running state."
	      "  Either a checkpoint is\n"
	      " currently happening or there are no connected processes.\n");
      break;
    default:
      fprintf(stderr, "Unknown error\n");
      break;
    }
    return 2;
  }

#define QUOTE(arg) #arg
#define STRINGIFY(arg) QUOTE(arg)
  if(*cmd == 's'){
    printf("Coordinator:\n");
    char *host = getenv(ENV_VAR_NAME_HOST);
    if (host == NULL) host = getenv("DMTCP_HOST"); // deprecated
    printf("  Host: %s\n", (host ? host : "localhost"));
    char *port = getenv(ENV_VAR_NAME_PORT);
    if (port == NULL) port = getenv("DMTCP_PORT"); // deprecated
    printf("  Port: %s\n",
           (port ? port : STRINGIFY(DEFAULT_PORT) " (default port)"));
    printf("Status...\n");
    printf("  NUM_PEERS=%d\n", numPeers);
    printf("  RUNNING=%s\n", (isRunning?"yes":"no"));
    if (ckptInterval) {
      printf("  CKPT_INTERVAL=%d\n", ckptInterval);
    } else {
      printf("  CKPT_INTERVAL=0 (checkpoint manually)\n");
    }
  }

  return 0;
}
Example #4
0
void RestoreTarget::CreateProcess(CoordinatorAPI& coordinatorAPI,
                                  SlidingFdTable& slidingFd)
{
  //change UniquePid
  UniquePid::resetOnFork(upid());
  //UniquePid::ThisProcess(true) = _conToFd.upid();

  Util::initializeLogFile(procname());
  JTRACE("Creating process during restart") (upid()) (procname());

  JTRACE("")(getpid())(getppid())(getsid(0));
  ProcessInfo &pInfo = _processInfo;
  pid_t psid = pInfo.sid();

  JTRACE("Restore /proc/self/* fds");
  ConnectionList& connections = ConnectionList::instance();
  ConnectionList::iterator it;
  for (it = connections.begin(); it != connections.end(); ++it) {
    dmtcp::Connection *con = it->second;
    if (con->subType() == FileConnection::FILE_PROCFS) {
      dmtcp::FileConnection *filecon = (dmtcp::FileConnection*) con;
      char buf[32];
      dmtcp::vector<int> fds;
      fds.push_back(slidingFd.getFdFor(con->id()));
      sprintf(buf, "/proc/%d/", pInfo.pid());
      if (dmtcp::Util::strStartsWith(filecon->filePath(), buf)) {
        filecon->restore(fds);
      }
    }
  }


  if (!isSessionLeader()) {

    // Restore Group information
    restoreGroup(slidingFd);

    // If process is not session leader, restore it and all children.
    t_iterator it = _children.begin();
    for (; it != _children.end(); it++) {
      JTRACE ("Forking Child Process") ((*it)->upid());
      pid_t cid = fork();

      if (cid == 0) {
        (*it)->CreateProcess (coordinatorAPI, slidingFd);
        JASSERT (false) . Text ("Unreachable");
      }
      JASSERT (cid > 0);
    }
  } else {
    // Process is session leader.
    // There may be not setsid-ed children.
    for (t_iterator it = _children.begin(); it != _children.end(); it++) {
      s_iterator sit = (*it)->getSmap().find(psid);
      JTRACE("Restore processes that were created before their parent called setsid()");
      if (sit == (*it)->getSmap().end()) {
        JTRACE ("Forking Child Process") ((*it)->upid());
        pid_t cid = fork();
        if (cid == 0) {
          (*it)->CreateProcess (coordinatorAPI, slidingFd);
          JASSERT (false) . Text ("Unreachable");
        }
        JASSERT (cid > 0);
      }
    }

    pid_t nsid = setsid();
    JTRACE("change SID")(nsid);

    // Restore Group information
    restoreGroup(slidingFd);

    for (t_iterator it = _children.begin(); it != _children.end(); it++) {
      JTRACE("Restore processes that was created after their parent called setsid()");
      s_iterator sit = (*it)->getSmap().find(psid);
      if (sit != (*it)->getSmap().end()) {
        JTRACE ("Forking Child Process") ((*it)->upid());
        pid_t cid = fork();
        if (cid == 0) {
          (*it)->CreateProcess (coordinatorAPI, slidingFd);
          JASSERT (false) . Text ("Unreachable");
        }
        JASSERT (cid> 0);
      }
    }

    for (t_iterator it = _roots.begin() ; it != _roots.end(); it++) {
      JTRACE ("Forking Dependent Root Process") ((*it)->upid());
      pid_t cid;
      if ((cid = fork())) {
        waitpid(cid, NULL, 0);
      } else {
        if (fork())
          exit(0);
        (*it)->CreateProcess(coordinatorAPI, slidingFd);
        JASSERT (false) . Text("Unreachable");
      }
    }
  }

  bool isTheGroupLeader = isGroupLeader(); // Calls JTRACE;avoid recursion
  JTRACE("Child and dependent root processes forked, restoring process")
    (upid())(getpid())(isTheGroupLeader);

  //Reconnect to dmtcp_coordinator
  WorkerState::setCurrentState (WorkerState::RESTARTING);

  coordinatorAPI.connectToCoordinator();
  coordinatorAPI.sendCoordinatorHandshake(procname(), _processInfo.compGroup());
  coordinatorAPI.recvCoordinatorHandshake();

  //restart targets[i]
  dupAllSockets (slidingFd);

  mtcpRestart();

  JASSERT (false).Text ("unreachable");
}