Esempio n. 1
0
int main(int argc, char** argv)
{
  char *tmpdir_arg = NULL;
  char *ckptdir_arg = NULL;

  initializeJalib();

  if (!getenv(ENV_VAR_QUIET)) {
    setenv(ENV_VAR_QUIET, "0", 0);
  }

  if (getenv(ENV_VAR_DISABLE_STRICT_CHECKING)) {
    noStrictChecking = true;
  }

  if (getenv(ENV_VAR_CHECKPOINT_DIR)) {
    ckptdir_arg = getenv(ENV_VAR_CHECKPOINT_DIR);
  }

  if (argc == 1) {
    printf("%s", DMTCP_VERSION_AND_COPYRIGHT_INFO);
    printf("(For help: %s --help)\n\n", argv[0]);
    return DMTCP_FAIL_RC;
  }

  //process args
  shift;
  while (true) {
    string s = argc>0 ? argv[0] : "--help";
    if (s == "--help" && argc == 1) {
      printf("%s", theUsage);
      return DMTCP_FAIL_RC;
    } else if ((s == "--version") && argc == 1) {
      printf("%s", DMTCP_VERSION_AND_COPYRIGHT_INFO);
      return DMTCP_FAIL_RC;
    } else if (s == "-j" || s == "--join") {
      allowedModes = COORD_JOIN;
      shift;
    } else if (s == "--new-coordinator") {
      allowedModes = COORD_NEW;
      shift;
    } else if (s == "--no-strict-checking") {
      noStrictChecking = true;
      shift;
    } else if (s == "-i" || s == "--interval") {
      setenv(ENV_VAR_CKPT_INTR, argv[1], 1);
      shift; shift;
    } else if (argv[0][0] == '-' && argv[0][1] == 'i' &&
               isdigit(argv[0][2])) { // else if -i5, for example
      setenv(ENV_VAR_CKPT_INTR, argv[0]+2, 1);
      shift;
    } else if (argc > 1 && (s == "-h" || s == "--coord-host" || s == "--host")){
      setenv(ENV_VAR_NAME_HOST, argv[1], 1);
      shift; shift;
    } else if (argc>1 && (s == "-p" || s == "--coord-port" || s == "--port")) {
      setenv(ENV_VAR_NAME_PORT, argv[1], 1);
      shift; shift;
    } else if (argv[0][0] == '-' && argv[0][1] == 'p' &&
               isdigit(argv[0][2])) { // else if -p0, for example
      setenv(ENV_VAR_NAME_PORT, argv[0]+2, 1);
      shift;
    } else if (argc>1 && s == "--port-file"){
      thePortFile = argv[1];
      shift; shift;
    } else if (argc > 1 && (s == "-c" || s == "--ckptdir")) {
      ckptdir_arg = argv[1];
      shift; shift;
    } else if (argc > 1 && (s == "-t" || s == "--tmpdir")) {
      tmpdir_arg = argv[1];
      shift; shift;
    } else if (s == "-q" || s == "--quiet") {
      *getenv(ENV_VAR_QUIET) = *getenv(ENV_VAR_QUIET) + 1;
      // Just in case a non-standard version of setenv is being used:
      setenv(ENV_VAR_QUIET, getenv(ENV_VAR_QUIET), 1);
      shift;
    } else if ((s.length() > 2 && s.substr(0, 2) == "--") ||
               (s.length() > 1 && s.substr(0, 1) == "-")) {
      printf("Invalid Argument\n%s", theUsage);
      return DMTCP_FAIL_RC;
    } else if (argc > 1 && s == "--") {
      shift;
      break;
    } else {
      break;
    }
  }

  tmpDir = Util::calcTmpDir(tmpdir_arg);
  if (ckptdir_arg) {
    setNewCkptDir(ckptdir_arg);
  }

  jassert_quiet = *getenv(ENV_VAR_QUIET) - '0';

  //make sure JASSERT initializes now, rather than during restart
  Util::initializeLogFile(tmpDir);

  if (!noStrictChecking && jassert_quiet < 2 &&
      (getuid() == 0 || geteuid() == 0)) {
    JASSERT_STDERR <<
      "WARNING:  Running dmtcp_restart as root can be dangerous.\n"
      "  An unknown checkpoint image or bugs in DMTCP may lead to unforeseen\n"
      "  consequences.  Continuing as root ....\n";
  }

  JTRACE("New dmtcp_restart process; _argc_ ckpt images") (argc);

  bool doAbort = false;
  for (; argc > 0; shift) {
    string restorename(argv[0]);
    struct stat buf;
    int rc = stat(restorename.c_str(), &buf);
    if (Util::strEndsWith(restorename, "_files")) {
      continue;
    } else if (!Util::strEndsWith(restorename, ".dmtcp")) {
      JNOTE("File doesn't have .dmtcp extension. Check Usage.") (restorename);
      // Don't test for --quiet here.  We're aborting.  We need to say why.
      JASSERT_STDERR << theUsage;
      doAbort = true;
    } else if (rc == -1) {
      char error_msg[1024];
      sprintf(error_msg, "\ndmtcp_restart: ckpt image %s", restorename.c_str());
      perror(error_msg);
      doAbort = true;
    } else if (buf.st_uid != getuid() && !noStrictChecking) {
      /*Could also run if geteuid() matches*/
      printf("\nProcess uid (%d) doesn't match uid (%d) of\n" \
             "checkpoint image (%s).\n" \
	     "This is dangerous.  Aborting for security reasons.\n" \
             "If you still want to do this, then re-run dmtcp_restart\n" \
             "  with the --no-strict-checking flag.\n",
             getuid(), buf.st_uid, restorename.c_str());
      doAbort = true;
    }
    if (doAbort) {
      exit(DMTCP_FAIL_RC);
    }

    JTRACE("Will restart ckpt image") (argv[0]);
    RestoreTarget *t = new RestoreTarget(argv[0]);
    targets[t->upid()] = t;
  }

  // Prepare list of independent process tree roots
  RestoreTargetMap::iterator i;
  for (i = targets.begin(); i != targets.end(); i++) {
    RestoreTarget *t1 = i->second;
    if (t1->isRootOfProcessTree()) {
      RestoreTargetMap::iterator j;
      for (j = targets.begin(); j != targets.end(); j++) {
        RestoreTarget *t2 = j->second;
        if (t1 == t2) continue;
        if (t1->sid() == t2->pid()) {
          break;
        }
      }
      if (j == targets.end()) {
        independentProcessTreeRoots[t1->upid()] = t1;
      }
    }
  }
  JASSERT(independentProcessTreeRoots.size() > 0)
    .Text("There must be at least one process tree that doesn't have\n"
          "  a different process as session leader.");

  WorkerState::setCurrentState(WorkerState::RESTARTING);

  /* Try to find non-orphaned process in independent procs list */
  RestoreTarget *t;
  bool foundNonOrphan = false;
  RestoreTargetMap::iterator it;
  int size = independentProcessTreeRoots.size();
  printf("size = %d\n", size);
  for (it = independentProcessTreeRoots.begin();
       it != independentProcessTreeRoots.end();
       it++) {
    t = it->second;
    if ( !t->isOrphan() ) {
      foundNonOrphan = true;
      break;
    }
  }

  JASSERT(t->pid() != 0);
  JASSERT(!t->noCoordinator() || allowedModes == COORD_ANY)
    .Text("Process had no coordinator prior to checkpoint;\n"
          "  but either --join or --new-coordinator was specified.");

  if( foundNonOrphan ){
    t->createProcess(true);
  } else {
      /* we were unable to find any non-orphaned procs.
       * pick the first one and orphan it */
      t = independentProcessTreeRoots.begin()->second;
      t->createOrphanedProcess(true);
  }

  JASSERT(false).Text("unreachable");
  return -1;
}
int main(int argc, char** argv)
{
  bool autoStartCoordinator=true;
  bool isRestart = true;
  dmtcp::CoordinatorAPI::CoordinatorMode allowedModes =
    dmtcp::CoordinatorAPI::COORD_ANY;

  initializeJalib();

  if (!getenv(ENV_VAR_QUIET)) {
    setenv(ENV_VAR_QUIET, "0", 0);
  }

  if (argc == 1) {
    JASSERT_STDERR << DMTCP_VERSION_AND_COPYRIGHT_INFO;
    JASSERT_STDERR << "(For help:  " << argv[0] << " --help)\n\n";
    return DMTCP_FAIL_RC;
  }

  //process args
  shift;
  while (true) {
    dmtcp::string s = argc>0 ? argv[0] : "--help";
    if (s == "--help" && argc == 1) {
      JASSERT_STDERR << theUsage;
      return DMTCP_FAIL_RC;
    } else if ((s == "--version") && argc == 1) {
      JASSERT_STDERR << DMTCP_VERSION_AND_COPYRIGHT_INFO;
      return DMTCP_FAIL_RC;
    } else if (s == "--no-check") {
      autoStartCoordinator = false;
      shift;
    } else if (s == "-j" || s == "--join") {
      allowedModes = dmtcp::CoordinatorAPI::COORD_JOIN;
      shift;
    } else if (s == "-n" || s == "--new") {
      allowedModes = dmtcp::CoordinatorAPI::COORD_NEW;
      shift;
    } else if (s == "--new-coordinator") {
      allowedModes = dmtcp::CoordinatorAPI::COORD_FORCE_NEW;
      shift;
    } else if (s == "-b" || s == "--batch") {
      allowedModes = dmtcp::CoordinatorAPI::COORD_BATCH;
      shift;
    } else if (s == "-i" || s == "--interval" ||
               (s.c_str()[0] == '-' && s.c_str()[1] == 'i' &&
                isdigit(s.c_str()[2]))) {
      if (isdigit(s.c_str()[2])) { // if -i5, for example
        setenv(ENV_VAR_CKPT_INTR, s.c_str()+2, 1);
        shift;
      } else { // else -i 5
        setenv(ENV_VAR_CKPT_INTR, argv[1], 1);
        shift; shift;
      }
    } else if (argc > 1 && (s == "-h" || s == "--host")) {
      setenv(ENV_VAR_NAME_HOST, argv[1], 1);
      shift; shift;
    } else if (argc > 1 && (s == "-p" || s == "--port")) {
      setenv(ENV_VAR_NAME_PORT, argv[1], 1);
      shift; shift;
    } else if (argc > 1 && (s == "-t" || s == "--tmpdir")) {
      setenv(ENV_VAR_TMPDIR, argv[1], 1);
      shift; shift;
    } else if (s == "-q" || s == "--quiet") {
      *getenv(ENV_VAR_QUIET) = *getenv(ENV_VAR_QUIET) + 1;
      // Just in case a non-standard version of setenv is being used:
      setenv(ENV_VAR_QUIET, getenv(ENV_VAR_QUIET), 1);
      shift;
    } else if ((s.length() > 2 && s.substr(0, 2) == "--") ||
               (s.length() > 1 && s.substr(0, 1) == "-")) {
      JASSERT_STDERR << "Invalid Argument\n";
      JASSERT_STDERR << theUsage;
      return DMTCP_FAIL_RC;
    } else if (argc > 1 && s == "--") {
      shift;
      break;
    } else {
      break;
    }
  }

  dmtcp::UniquePid::setTmpDir(getenv(ENV_VAR_TMPDIR));
  dmtcpTmpDir = dmtcp::UniquePid::getTmpDir();

  jassert_quiet = *getenv(ENV_VAR_QUIET) - '0';

  //make sure JASSERT initializes now, rather than during restart
  Util::initializeLogFile();

  if (jassert_quiet == 0)
    JASSERT_STDERR << DMTCP_BANNER;

  JTRACE("New dmtcp_restart process; _argc_ ckpt images") (argc);

  bool doAbort = false;
  for (; argc > 0; shift) {
    dmtcp::string restorename(argv[0]);
    struct stat buf;
    int rc = stat(restorename.c_str(), &buf);
    if (Util::strEndsWith(restorename, "_files")) {
      continue;
    } else if (!Util::strEndsWith(restorename, ".dmtcp")) {
      JNOTE("File doesn't have .dmtcp extension. Check Usage.")
        (restorename);
      JASSERT_STDERR << theUsage;
      doAbort = true;
    } else if (rc == -1) {
      char error_msg[1024];
      sprintf(error_msg, "\ndmtcp_restart: ckpt image %s", restorename.c_str());
      perror(error_msg);
      doAbort = true;
    } else if (buf.st_uid != getuid()) { /*Could also run if geteuid() matches*/
      printf("\nProcess uid (%d) doesn't match uid (%d) of\n" \
             "checkpoint image (%s).\n" \
	     "This is dangerous.  Aborting for security reasons.\n" \
             "If you still want to do this (at your own risk),\n" \
             "  then modify dmtcp/src/%s:%d and re-compile.\n",
             getuid(), buf.st_uid, restorename.c_str(), __FILE__, __LINE__ - 6);
      doAbort = true;
    }
    if (doAbort) {
      exit(DMTCP_FAIL_RC);
    }

    JTRACE("Will restart ckpt image") (argv[0]);
    RestoreTarget *t = new RestoreTarget(argv[0]);
    targets[t->upid()] = t;
  }

  // Prepare list of independent process tree roots
  RestoreTargetMap::iterator i;
  for (i = targets.begin(); i != targets.end(); i++) {
    RestoreTarget *t1 = i->second;
    if (t1->isRootOfProcessTree()) {
      RestoreTargetMap::iterator j;
      for (j = targets.begin(); j != targets.end(); j++) {
        RestoreTarget *t2 = j->second;
        if (t1 == t2) continue;
        if (t1->sid() == t2->pid()) {
          break;
        }
      }
      if (j == targets.end()) {
        independentProcessTreeRoots[t1->upid()] = t1;
      }
    }
  }
  JASSERT(independentProcessTreeRoots.size() > 0)
    .Text("There must atleast one process tree which doesn't have a different "
          "process as session leader.");

  if (autoStartCoordinator) {
    dmtcp::CoordinatorAPI::startCoordinatorIfNeeded(allowedModes,
                                                    isRestart);
  }

  RestoreTarget *t = independentProcessTreeRoots.begin()->second;
  JASSERT(t->pid() != 0);
  t->createProcess(true);
  JASSERT(false).Text("unreachable");
  return -1;
}