int main(int argc, char** argv) { char *tmpdir_arg = NULL; char *ckptdir_arg = NULL; initializeJalib(); if (!getenv(ENV_VAR_QUIET)) { setenv(ENV_VAR_QUIET, "0", 0); } if (getenv(ENV_VAR_DISABLE_STRICT_CHECKING)) { noStrictChecking = true; } if (getenv(ENV_VAR_CHECKPOINT_DIR)) { ckptdir_arg = getenv(ENV_VAR_CHECKPOINT_DIR); } if (argc == 1) { printf("%s", DMTCP_VERSION_AND_COPYRIGHT_INFO); printf("(For help: %s --help)\n\n", argv[0]); return DMTCP_FAIL_RC; } //process args shift; while (true) { string s = argc>0 ? argv[0] : "--help"; if (s == "--help" && argc == 1) { printf("%s", theUsage); return DMTCP_FAIL_RC; } else if ((s == "--version") && argc == 1) { printf("%s", DMTCP_VERSION_AND_COPYRIGHT_INFO); return DMTCP_FAIL_RC; } else if (s == "-j" || s == "--join") { allowedModes = COORD_JOIN; shift; } else if (s == "--new-coordinator") { allowedModes = COORD_NEW; shift; } else if (s == "--no-strict-checking") { noStrictChecking = true; shift; } else if (s == "-i" || s == "--interval") { setenv(ENV_VAR_CKPT_INTR, argv[1], 1); shift; shift; } else if (argv[0][0] == '-' && argv[0][1] == 'i' && isdigit(argv[0][2])) { // else if -i5, for example setenv(ENV_VAR_CKPT_INTR, argv[0]+2, 1); shift; } else if (argc > 1 && (s == "-h" || s == "--coord-host" || s == "--host")){ setenv(ENV_VAR_NAME_HOST, argv[1], 1); shift; shift; } else if (argc>1 && (s == "-p" || s == "--coord-port" || s == "--port")) { setenv(ENV_VAR_NAME_PORT, argv[1], 1); shift; shift; } else if (argv[0][0] == '-' && argv[0][1] == 'p' && isdigit(argv[0][2])) { // else if -p0, for example setenv(ENV_VAR_NAME_PORT, argv[0]+2, 1); shift; } else if (argc>1 && s == "--port-file"){ thePortFile = argv[1]; shift; shift; } else if (argc > 1 && (s == "-c" || s == "--ckptdir")) { ckptdir_arg = argv[1]; shift; shift; } else if (argc > 1 && (s == "-t" || s == "--tmpdir")) { tmpdir_arg = argv[1]; shift; shift; } else if (s == "-q" || s == "--quiet") { *getenv(ENV_VAR_QUIET) = *getenv(ENV_VAR_QUIET) + 1; // Just in case a non-standard version of setenv is being used: setenv(ENV_VAR_QUIET, getenv(ENV_VAR_QUIET), 1); shift; } else if ((s.length() > 2 && s.substr(0, 2) == "--") || (s.length() > 1 && s.substr(0, 1) == "-")) { printf("Invalid Argument\n%s", theUsage); return DMTCP_FAIL_RC; } else if (argc > 1 && s == "--") { shift; break; } else { break; } } tmpDir = Util::calcTmpDir(tmpdir_arg); if (ckptdir_arg) { setNewCkptDir(ckptdir_arg); } jassert_quiet = *getenv(ENV_VAR_QUIET) - '0'; //make sure JASSERT initializes now, rather than during restart Util::initializeLogFile(tmpDir); if (!noStrictChecking && jassert_quiet < 2 && (getuid() == 0 || geteuid() == 0)) { JASSERT_STDERR << "WARNING: Running dmtcp_restart as root can be dangerous.\n" " An unknown checkpoint image or bugs in DMTCP may lead to unforeseen\n" " consequences. Continuing as root ....\n"; } JTRACE("New dmtcp_restart process; _argc_ ckpt images") (argc); bool doAbort = false; for (; argc > 0; shift) { string restorename(argv[0]); struct stat buf; int rc = stat(restorename.c_str(), &buf); if (Util::strEndsWith(restorename, "_files")) { continue; } else if (!Util::strEndsWith(restorename, ".dmtcp")) { JNOTE("File doesn't have .dmtcp extension. Check Usage.") (restorename); // Don't test for --quiet here. We're aborting. We need to say why. JASSERT_STDERR << theUsage; doAbort = true; } else if (rc == -1) { char error_msg[1024]; sprintf(error_msg, "\ndmtcp_restart: ckpt image %s", restorename.c_str()); perror(error_msg); doAbort = true; } else if (buf.st_uid != getuid() && !noStrictChecking) { /*Could also run if geteuid() matches*/ printf("\nProcess uid (%d) doesn't match uid (%d) of\n" \ "checkpoint image (%s).\n" \ "This is dangerous. Aborting for security reasons.\n" \ "If you still want to do this, then re-run dmtcp_restart\n" \ " with the --no-strict-checking flag.\n", getuid(), buf.st_uid, restorename.c_str()); doAbort = true; } if (doAbort) { exit(DMTCP_FAIL_RC); } JTRACE("Will restart ckpt image") (argv[0]); RestoreTarget *t = new RestoreTarget(argv[0]); targets[t->upid()] = t; } // Prepare list of independent process tree roots RestoreTargetMap::iterator i; for (i = targets.begin(); i != targets.end(); i++) { RestoreTarget *t1 = i->second; if (t1->isRootOfProcessTree()) { RestoreTargetMap::iterator j; for (j = targets.begin(); j != targets.end(); j++) { RestoreTarget *t2 = j->second; if (t1 == t2) continue; if (t1->sid() == t2->pid()) { break; } } if (j == targets.end()) { independentProcessTreeRoots[t1->upid()] = t1; } } } JASSERT(independentProcessTreeRoots.size() > 0) .Text("There must be at least one process tree that doesn't have\n" " a different process as session leader."); WorkerState::setCurrentState(WorkerState::RESTARTING); /* Try to find non-orphaned process in independent procs list */ RestoreTarget *t; bool foundNonOrphan = false; RestoreTargetMap::iterator it; int size = independentProcessTreeRoots.size(); printf("size = %d\n", size); for (it = independentProcessTreeRoots.begin(); it != independentProcessTreeRoots.end(); it++) { t = it->second; if ( !t->isOrphan() ) { foundNonOrphan = true; break; } } JASSERT(t->pid() != 0); JASSERT(!t->noCoordinator() || allowedModes == COORD_ANY) .Text("Process had no coordinator prior to checkpoint;\n" " but either --join or --new-coordinator was specified."); if( foundNonOrphan ){ t->createProcess(true); } else { /* we were unable to find any non-orphaned procs. * pick the first one and orphan it */ t = independentProcessTreeRoots.begin()->second; t->createOrphanedProcess(true); } JASSERT(false).Text("unreachable"); return -1; }
int main(int argc, char** argv) { bool autoStartCoordinator=true; bool isRestart = true; dmtcp::CoordinatorAPI::CoordinatorMode allowedModes = dmtcp::CoordinatorAPI::COORD_ANY; initializeJalib(); if (!getenv(ENV_VAR_QUIET)) { setenv(ENV_VAR_QUIET, "0", 0); } if (argc == 1) { JASSERT_STDERR << DMTCP_VERSION_AND_COPYRIGHT_INFO; JASSERT_STDERR << "(For help: " << argv[0] << " --help)\n\n"; return DMTCP_FAIL_RC; } //process args shift; while (true) { dmtcp::string s = argc>0 ? argv[0] : "--help"; if (s == "--help" && argc == 1) { JASSERT_STDERR << theUsage; return DMTCP_FAIL_RC; } else if ((s == "--version") && argc == 1) { JASSERT_STDERR << DMTCP_VERSION_AND_COPYRIGHT_INFO; return DMTCP_FAIL_RC; } else if (s == "--no-check") { autoStartCoordinator = false; shift; } else if (s == "-j" || s == "--join") { allowedModes = dmtcp::CoordinatorAPI::COORD_JOIN; shift; } else if (s == "-n" || s == "--new") { allowedModes = dmtcp::CoordinatorAPI::COORD_NEW; shift; } else if (s == "--new-coordinator") { allowedModes = dmtcp::CoordinatorAPI::COORD_FORCE_NEW; shift; } else if (s == "-b" || s == "--batch") { allowedModes = dmtcp::CoordinatorAPI::COORD_BATCH; shift; } else if (s == "-i" || s == "--interval" || (s.c_str()[0] == '-' && s.c_str()[1] == 'i' && isdigit(s.c_str()[2]))) { if (isdigit(s.c_str()[2])) { // if -i5, for example setenv(ENV_VAR_CKPT_INTR, s.c_str()+2, 1); shift; } else { // else -i 5 setenv(ENV_VAR_CKPT_INTR, argv[1], 1); shift; shift; } } else if (argc > 1 && (s == "-h" || s == "--host")) { setenv(ENV_VAR_NAME_HOST, argv[1], 1); shift; shift; } else if (argc > 1 && (s == "-p" || s == "--port")) { setenv(ENV_VAR_NAME_PORT, argv[1], 1); shift; shift; } else if (argc > 1 && (s == "-t" || s == "--tmpdir")) { setenv(ENV_VAR_TMPDIR, argv[1], 1); shift; shift; } else if (s == "-q" || s == "--quiet") { *getenv(ENV_VAR_QUIET) = *getenv(ENV_VAR_QUIET) + 1; // Just in case a non-standard version of setenv is being used: setenv(ENV_VAR_QUIET, getenv(ENV_VAR_QUIET), 1); shift; } else if ((s.length() > 2 && s.substr(0, 2) == "--") || (s.length() > 1 && s.substr(0, 1) == "-")) { JASSERT_STDERR << "Invalid Argument\n"; JASSERT_STDERR << theUsage; return DMTCP_FAIL_RC; } else if (argc > 1 && s == "--") { shift; break; } else { break; } } dmtcp::UniquePid::setTmpDir(getenv(ENV_VAR_TMPDIR)); dmtcpTmpDir = dmtcp::UniquePid::getTmpDir(); jassert_quiet = *getenv(ENV_VAR_QUIET) - '0'; //make sure JASSERT initializes now, rather than during restart Util::initializeLogFile(); if (jassert_quiet == 0) JASSERT_STDERR << DMTCP_BANNER; JTRACE("New dmtcp_restart process; _argc_ ckpt images") (argc); bool doAbort = false; for (; argc > 0; shift) { dmtcp::string restorename(argv[0]); struct stat buf; int rc = stat(restorename.c_str(), &buf); if (Util::strEndsWith(restorename, "_files")) { continue; } else if (!Util::strEndsWith(restorename, ".dmtcp")) { JNOTE("File doesn't have .dmtcp extension. Check Usage.") (restorename); JASSERT_STDERR << theUsage; doAbort = true; } else if (rc == -1) { char error_msg[1024]; sprintf(error_msg, "\ndmtcp_restart: ckpt image %s", restorename.c_str()); perror(error_msg); doAbort = true; } else if (buf.st_uid != getuid()) { /*Could also run if geteuid() matches*/ printf("\nProcess uid (%d) doesn't match uid (%d) of\n" \ "checkpoint image (%s).\n" \ "This is dangerous. Aborting for security reasons.\n" \ "If you still want to do this (at your own risk),\n" \ " then modify dmtcp/src/%s:%d and re-compile.\n", getuid(), buf.st_uid, restorename.c_str(), __FILE__, __LINE__ - 6); doAbort = true; } if (doAbort) { exit(DMTCP_FAIL_RC); } JTRACE("Will restart ckpt image") (argv[0]); RestoreTarget *t = new RestoreTarget(argv[0]); targets[t->upid()] = t; } // Prepare list of independent process tree roots RestoreTargetMap::iterator i; for (i = targets.begin(); i != targets.end(); i++) { RestoreTarget *t1 = i->second; if (t1->isRootOfProcessTree()) { RestoreTargetMap::iterator j; for (j = targets.begin(); j != targets.end(); j++) { RestoreTarget *t2 = j->second; if (t1 == t2) continue; if (t1->sid() == t2->pid()) { break; } } if (j == targets.end()) { independentProcessTreeRoots[t1->upid()] = t1; } } } JASSERT(independentProcessTreeRoots.size() > 0) .Text("There must atleast one process tree which doesn't have a different " "process as session leader."); if (autoStartCoordinator) { dmtcp::CoordinatorAPI::startCoordinatorIfNeeded(allowedModes, isRestart); } RestoreTarget *t = independentProcessTreeRoots.begin()->second; JASSERT(t->pid() != 0); t->createProcess(true); JASSERT(false).Text("unreachable"); return -1; }