// See comments above for open_ckpt_to_read() int dmtcp::CkptSerializer::openDmtcpCheckpointFile(const dmtcp::string& path, int *offset, int skipBytes) { char buf[1024]; // Function also sets dmtcp::ext_decomp_pid::ConnectionToFds int fd = open_ckpt_to_read(path.c_str()); // The rest of this function is for compatibility with original definition. JASSERT(fd >= 0) (path) .Text("Failed to open file."); const int len = strlen(DMTCP_FILE_HEADER); JASSERT(_real_read(fd, buf, len) == len)(path) .Text("_real_read() failed"); if (strncmp(buf, DMTCP_FILE_HEADER, len) == 0) { JTRACE("opened checkpoint file [uncompressed]")(path); } else { close_ckpt_to_read(fd); fd = open_ckpt_to_read(path.c_str()); /* Re-open from beginning */ JASSERT(fd >= 0) (path) .Text("Failed to open file."); } if (offset != NULL) { *offset = strlen(DMTCP_FILE_HEADER); } skipBytes -= strlen(DMTCP_FILE_HEADER); if (skipBytes > 0) { JASSERT(dmtcp::Util::skipBytes(fd, skipBytes) == skipBytes) (skipBytes); } return fd; }
int openSharedFile(dmtcp::string name, int flags) { int fd; // try to create, truncate & open file if ((fd = open(name.c_str(), O_EXCL|O_CREAT|O_TRUNC | flags, 0600)) >= 0) { return fd; } if (fd < 0 && errno == EEXIST) { if ((fd = open(name.c_str(), flags, 0600)) > 0) { return fd; } } // unable to create & open OR open JASSERT(false)(name)(strerror(errno)).Text("Cannot open file"); return -1; }
const DmtcpLocalStatus* __real_dmtcpGetLocalStatus(){ //these must be static so their memory is not deleted. static dmtcp::string ckpt; static dmtcp::string pid; static DmtcpLocalStatus status; ckpt.reserve(1024); //get filenames pid=dmtcp::UniquePid::ThisProcess().toString(); ckpt=dmtcp::UniquePid::getCkptFilename(); status.numCheckpoints = numCheckpoints; status.numRestarts = numRestarts; status.checkpointFilename = ckpt.c_str(); status.uniquePidStr = pid.c_str(); return &status; }
static int queryPbsConfig(dmtcp::string option, dmtcp::string &pbs_config) { int fds[2]; const char *pbs_config_path = "pbs-config"; static const char *pbs_config_args[] = { "pbs-config", option.c_str(), NULL }; int cpid; if( pipe(fds) == -1){ // just go away - we cannot serve this request JTRACE("Cannot create pipe to execute pbs-config to find Torque/PBS library!"); return -1; } cpid = _real_fork(); if( cpid < 0 ){ JTRACE( "ERROR: cannot execute pbs-config. Will not run tm_spawn!"); return -1; } if( cpid == 0 ){ JTRACE ( "child process, will exec into external de-compressor"); fds[1] = dup(dup(dup(fds[1]))); close(fds[0]); JASSERT(dup2(fds[1], STDOUT_FILENO) == STDOUT_FILENO); close(fds[1]); _real_execvp(pbs_config_path, (char **)pbs_config_args); /* should not get here */ JASSERT(false)("ERROR: Failed to exec pbs-config. tm_spawn will fail with TM_BADINIT")(strerror(errno)); exit(0); } /* parent process */ JTRACE ( "created child process for pbs-config")(cpid); int status; if( waitpid(cpid,&status,0) < 0 ){ return -1; } if( !( WIFEXITED(status) && WEXITSTATUS(status) == 0 ) ){ return -1; } // set descriptor as non-blocking // JTRACE ( "Set pipe fds[0] as non-blocking"); int flags = fcntl(fds[0], F_GETFL); fcntl(fds[0], F_SETFL, flags | O_NONBLOCK); //JTRACE ( "Read pbs-config output from pipe"); pbs_config = ""; char buf[256]; int count = 0; while( (count = read(fds[0], buf, 255)) > 0 ){ buf[count] = '\0'; pbs_config += dmtcp::string() + buf; } JTRACE ( "pbs-config output:")(pbs_config); return 0; }
static void writeCurrentLogFileNameToPrevLogFile(dmtcp::string& path) { #ifdef DEBUG dmtcp::ostringstream o; o << "========================================\n" << "This process exec()'d into a new program\n" << "Program Name: " << jalib::Filesystem::GetProgramName() << "\n" << "New JAssertLog Path: " << getLogFilePath() << "\n" << "========================================\n"; int fd = open(path.c_str(), O_WRONLY | O_APPEND, 0); if (fd != -1) { Util::writeAll(fd, o.str().c_str(), o.str().length()); } _real_close(fd); #endif }
int dmtcp::CkptSerializer::openDmtcpCheckpointFile(const dmtcp::string& path){ int fd = open( path.c_str(), O_RDONLY); JASSERT(fd>=0)(path).Text("Failed to open file."); char buf[512]; const int len = strlen(DMTCP_FILE_HEADER); JASSERT(_real_read(fd, buf, len)==len)(path).Text("_real_read() failed"); if(strncmp(buf, DMTCP_FILE_HEADER, len)==0){ JTRACE("opened checkpoint file [uncompressed]")(path); return fd; }else{ close(fd); dmtcp::string cmd = dmtcp::string()+"exec gzip -d - < '"+path+"'"; FILE* t = _real_popen(cmd.c_str(),"r"); JASSERT(t!=NULL)(path)(cmd).Text("Failed to launch gzip."); JTRACE ( "created gzip child process to uncompress checkpoint file"); fd = fileno(t); JASSERT(_real_read(fd, buf, len)==len)(cmd)(path).Text("Invalid checkpoint file"); JASSERT(strncmp(buf, DMTCP_FILE_HEADER, len)==0)(path).Text("Invalid checkpoint file"); JTRACE("opened checkpoint file [compressed]")(path); return fd; } }
const char* dmtcp_get_uniquepid_str() { static dmtcp::string uniquepid_str; uniquepid_str = dmtcp::UniquePid::ThisProcess(true).toString(); return uniquepid_str.c_str(); }
const char* dmtcp_get_tmpdir() { static dmtcp::string tmpdir; tmpdir = dmtcp::UniquePid::getTmpDir(); return tmpdir.c_str(); }
bool dmtcp::Util::strEndsWith(const dmtcp::string& str, const char *pattern) { return strEndsWith(str.c_str(), pattern); }
void dmtcp::DmtcpCoordinatorAPI::sendCoordinatorHandshake ( const dmtcp::string& progname, UniquePid compGroup /*= UniquePid()*/, int np /*= -1*/, DmtcpMessageType msgType /*= DMT_HELLO_COORDINATOR*/) { JTRACE("sending coordinator handshake")(UniquePid::ThisProcess()); dmtcp::string hostname = jalib::Filesystem::GetCurrentHostname(); const char *prefixPathEnv = getenv(ENV_VAR_PREFIX_PATH); dmtcp::string prefixDir; DmtcpMessage hello_local; hello_local.type = msgType; hello_local.params[0] = np; hello_local.compGroup = compGroup; hello_local.restorePort = theRestorePort; if (getenv(ENV_VAR_VIRTUAL_PID) == NULL) { hello_local.virtualPid = -1; } else { hello_local.virtualPid = (pid_t) atoi(getenv(ENV_VAR_VIRTUAL_PID)); } const char* interval = getenv ( ENV_VAR_CKPT_INTR ); /* DmtcpMessage constructor default: * hello_local.theCheckpointInterval: DMTCPMESSAGE_SAME_CKPT_INTERVAL */ if ( interval != NULL ) hello_local.theCheckpointInterval = jalib::StringToInt ( interval ); // Tell the coordinator the ckpt interval only once. It can change later. _dmtcp_unsetenv ( ENV_VAR_CKPT_INTR ); hello_local.extraBytes = hostname.length() + 1 + progname.length() + 1; if (prefixPathEnv != NULL) { /* If --prefix was defined then this process is either running on the local * node (the home of first process in the comptation) or a remote node. * * If the process is running on the local node, the prefix-path-env may be * different from the prefix-dir of this binary, in which case, we want to * send the prefix-path of this binary to the coordinator and the * coordinator will save it as the local-prefix. * * However, if this is running on a remote node, the prefix-path-env would * be the same as the prefix-path of this binary and we should send the * prefix-path-env to the coordinator and the coordinator will note this as * the remote-prefix. */ dmtcp::string utilDirPrefix = jalib::Filesystem::DirName(getenv(ENV_VAR_UTILITY_DIR)); if (utilDirPrefix == jalib::Filesystem::ResolveSymlink(prefixPathEnv)) { prefixDir = prefixPathEnv; } else { prefixDir = utilDirPrefix; } hello_local.extraBytes += prefixDir.length() + 1; } _coordinatorSocket << hello_local; _coordinatorSocket.writeAll( hostname.c_str(),hostname.length()+1); _coordinatorSocket.writeAll( progname.c_str(),progname.length()+1); if (!prefixDir.empty()) { _coordinatorSocket.writeAll(prefixDir.c_str(), prefixDir.length()+1); } }
EXTERNC const char* dmtcp_get_ckpt_files_subdir(void) { static dmtcp::string tmpdir; tmpdir = dmtcp::ProcessInfo::instance().getCkptFilesSubDir(); return tmpdir.c_str(); }
EXTERNC const char* dmtcp_get_ckpt_filename(void) { static dmtcp::string filename; filename = dmtcp::ProcessInfo::instance().getCkptFilename(); return filename.c_str(); }
EXTERNC const char* dmtcp_get_coord_ckpt_dir(void) { static dmtcp::string dir; dir = CoordinatorAPI::instance().getCoordCkptDir(); return dir.c_str(); }
EXTERNC const char* dmtcp_get_ckpt_dir() { static dmtcp::string tmpdir; tmpdir = dmtcp::ProcessInfo::instance().getCkptDir(); return tmpdir.c_str(); }
void runMtcpRestore(const char* path, int offset, size_t argvSize, size_t envSize) { static dmtcp::string mtcprestart = jalib::Filesystem::FindHelperUtility ("mtcp_restart"); // Tell mtcp_restart process to write its debugging information to // PROTECTED_STDERR_FD. This way we prevent it from spitting out garbage onto // FD_STDERR if it is being used by the user process in a special way. char protected_stderr_fd_str[16]; sprintf(protected_stderr_fd_str, "%d", PROTECTED_STDERR_FD); #ifdef USE_MTCP_FD_CALLING int fd = ConnectionToFds::openMtcpCheckpointFile(path); char buf[64]; char buf2[64]; sprintf(buf, "%d", fd); // gzip_child_pid set by openMtcpCheckpointFile() above. sprintf(buf2, "%d", dmtcp::ConnectionToFds::gzip_child_pid); char* newArgs[] = { (char*) mtcprestart.c_str(), (char*) "--stderr-fd", protected_stderr_fd_str, (char*) "--fd", buf, (char*) "--gzip-child-pid", buf2, NULL }; if (dmtcp::ConnectionToFds::gzip_child_pid == -1) { // If no gzip compression newArgs[3] = NULL; } JTRACE ("launching mtcp_restart --fd")(fd)(path); #else char buf[64]; sprintf(buf, "%d", offset); char* newArgs[] = { (char*) mtcprestart.c_str(), (char*) "--stderr-fd", protected_stderr_fd_str, (char*) "--offset", buf, (char*) path, NULL }; JTRACE ("launching mtcp_restart --offset")(path)(offset); #endif // Create the placeholder for "MTCP_OLDPERS" environment. // setenv("MTCP_OLDPERS_DUMMY", "XXXXXXXXXXXXXXXX", 1); // FIXME: Put an explanation of the logic below. -- Kapil #define ENV_PTR(x) ((char*) (getenv(x) - strlen(x) - 1)) char* dummyEnviron = NULL; const int dummyEnvironIndex = 0; // index in newEnv[] const int pathIndex = 1; // index in newEnv[] // Eventually, newEnv = {ENV_PTR("MTCP_OLDPERS"), ENV_PTR("PATH"), NULL} char* newEnv[3] = {NULL, NULL, NULL}; // Will put ENV_PTR("MTCP_OLDPERS") here. newEnv[dummyEnvironIndex] = (char*) dummyEnviron; newEnv[pathIndex] = (getenv("PATH") ? ENV_PTR("PATH") : NULL); size_t newArgsSize = 0; for (int i = 0; newArgs[i] != 0; i++) { newArgsSize += strlen(newArgs[i]) + 1; } size_t newEnvSize = 0; for (int i = 0; newEnv[i] != 0; i++) { newEnvSize += strlen(newEnv[i]) + 1; } size_t originalArgvEnvSize = argvSize + envSize; size_t newArgvEnvSize = newArgsSize + newEnvSize + strlen(newArgs[0]); size_t argvSizeDiff = originalArgvEnvSize - newArgvEnvSize; dummyEnviron = (char*) malloc(argvSizeDiff); memset(dummyEnviron, '0', (argvSizeDiff >= 1 ? argvSizeDiff - 1 : 0)); strncpy(dummyEnviron, ENV_VAR_DMTCP_DUMMY "=0", strlen(ENV_VAR_DMTCP_DUMMY "=")); dummyEnviron[argvSizeDiff - 1] = '\0'; newEnv[dummyEnvironIndex] = dummyEnviron; JTRACE("Args/Env Sizes") (newArgsSize) (newEnvSize) (argvSize) (envSize) (argvSizeDiff); execve (newArgs[0], newArgs, newEnv); JASSERT (false) (newArgs[0]) (newArgs[1]) (JASSERT_ERRNO) .Text ("exec() failed"); }
int main ( int argc, char** argv ) { initializeJalib(); if (! getenv(ENV_VAR_QUIET)) setenv(ENV_VAR_QUIET, "0", 0); processArgs(&argc, &argv); // If --ssh-slave and --prefix both are present, verify that the prefix-dir // of this binary (dmtcp_checkpoint) is same as the one provided with // --prefix if (isSSHSlave && getenv(ENV_VAR_PREFIX_PATH) != NULL) { const char *str = getenv(ENV_VAR_PREFIX_PATH); dmtcp::string prefixDir = jalib::Filesystem::ResolveSymlink(str); dmtcp::string programPrefixDir = jalib::Filesystem::DirName(jalib::Filesystem::GetProgramDir()); JASSERT(prefixDir == programPrefixDir) (prefixDir) (programPrefixDir); } dmtcp::UniquePid::setTmpDir(getenv(ENV_VAR_TMPDIR)); dmtcp::UniquePid::ThisProcess(true); dmtcp::Util::initializeLogFile(); #ifdef FORKED_CHECKPOINTING /* When this is robust, add --forked-checkpointing option on command-line, * with #ifdef FORKED_CHECKPOINTING around the option, change default of * configure.ac, dmtcp/configure.ac, to enable, and change them * from enable-forked... to disable-... */ setenv(ENV_VAR_FORKED_CKPT, "1", 1); #endif if (jassert_quiet == 0) JASSERT_STDERR << DMTCP_BANNER; // This code will go away when zero-mapped pages are implemented in MTCP. struct rlimit rlim; getrlimit(RLIMIT_STACK, &rlim); if (rlim.rlim_cur > 256*1024*1024 && rlim.rlim_cur != RLIM_INFINITY) JASSERT_STDERR << "*** WARNING: RLIMIT_STACK > 1/4 GB. This causes each thread to" "\n*** receive a 1/4 GB stack segment. Checkpoint/restart will be slow," "\n*** and will potentially break if many threads are created." "\n*** Suggest setting (sh/bash): ulimit -s 10000" "\n*** (csh/tcsh): limit stacksize 10000" "\n*** prior to using DMTCP. (This will be fixed in the future, when" "\n*** DMTCP supports restoring zero-mapped pages.)\n\n\n" ; // Remove this when zero-mapped pages are supported. For segments with // no file backing: Start with 4096 (page) offset and keep doubling offset // until finding region of memory segment with many zeroes. // Then mark as CS_ZERO_PAGES in MTCP instead of CS_RESTORE (or mark // entire segment as CS_ZERO_PAGES and then overwrite with CS_RESTORE // region for portion to be read back from checkpoint image. // For CS_ZERO_PAGES region, mmap // on restart, but don't write in zeroes. // Also, after checkpointing segment, munmap zero pages, and mmap them again. // Don't try to find all pages. The above strategy may increase // the non-zero-mapped mapped pages to no more than double the actual // non-zero region (assuming that the zero-mapped pages are contiguous). // - Gene testMatlab(argv[0]); testJava(argv); // Warn that -Xmx flag needed to limit virtual memory size // If dmtcphijack.so is in standard search path and _also_ has setgid access, // then LD_PRELOAD will work. // Otherwise, it will only work if the application does not use setuid and // setgid access. So, we test // if the application does not use // setuid/setgid. (See 'man ld.so') // FIXME: ALSO DO THIS FOR execwrappers.cpp:dmtcpPrepareForExec() // Should pass dmtcphijack.so path, and let testSetuid determine // if setgid is set for it. If so, no problem: continue. // If not, call testScreen() and adapt 'screen' to run using // Util::patchArgvIfSetuid(argv[0], argv, &newArgv) (which shouldn't // will just modify argv[0] to point to /tmp/dmtcp-USER@HOST/screen // and other modifications: doesn't need newArgv). // If it's not 'screen' and if no setgid for dmtcphijack.so, then testSetuid // should issue the warning, unset our LD_PRELOAD, and hope for the best. // A program like /usr/libexec/utempter/utempter (Fedora path) // is short-lived and can be safely run. Ideally, we should // disable checkpoints while utempter is running, and enable checkpoints // when utempter finishes. See possible model at // execwrappers.cpp:execLibProcessAndExit(), since the same applies // to running /lib/libXXX.so for running libraries as executables. if (testSetuid(argv[0])) { char **newArgv; // THIS NEXT LINE IS DANGEROUS. MOST setuid PROGRAMS CAN'T RUN UNPRIVILEGED dmtcp::Util::patchArgvIfSetuid(argv[0], argv, &newArgv); argv = newArgv; }; if (argc > 0) { JTRACE("dmtcp_checkpoint starting new program:")(argv[0]); } //set up CHECKPOINT_DIR if(getenv(ENV_VAR_CHECKPOINT_DIR) == NULL){ const char* ckptDir = get_current_dir_name(); if(ckptDir != NULL ){ //copy to private buffer static dmtcp::string _buf = ckptDir; ckptDir = _buf.c_str(); }else{ ckptDir="."; } setenv ( ENV_VAR_CHECKPOINT_DIR, ckptDir, 0 ); JTRACE("setting " ENV_VAR_CHECKPOINT_DIR)(ckptDir); } dmtcp::string stderrDevice = jalib::Filesystem::ResolveSymlink ( _stderrProcPath() ); //TODO: // When stderr is a pseudo terminal for IPC between parent/child processes, // this logic fails and JASSERT may write data to FD 2 (stderr). // This will cause problems in programs that use FD 2 (stderr) for // algorithmic things ... if ( stderrDevice.length() > 0 && jalib::Filesystem::FileExists ( stderrDevice ) ) setenv ( ENV_VAR_STDERR_PATH,stderrDevice.c_str(), 0 ); else// if( isSSHSlave ) setenv ( ENV_VAR_STDERR_PATH, "/dev/null", 0 ); if ( getenv(ENV_VAR_SIGCKPT) != NULL ) setenv ( "MTCP_SIGCKPT", getenv(ENV_VAR_SIGCKPT), 1); else unsetenv("MTCP_SIGCKPT"); if ( checkpointOpenFiles ) setenv( ENV_VAR_CKPT_OPEN_FILES, "1", 0 ); else unsetenv( ENV_VAR_CKPT_OPEN_FILES); #ifdef PID_VIRTUALIZATION setenv( ENV_VAR_ROOT_PROCESS, "1", 1 ); #endif bool isElf, is32bitElf; if (dmtcp::Util::elfType(argv[0], &isElf, &is32bitElf) == -1) { // Couldn't read argv_buf // FIXME: This could have been a symbolic link. Don't issue an error, // unless we're sure that the executable is not readable. JASSERT_STDERR << "*** ERROR: Executable to run w/ DMTCP appears not to be readable,\n" "*** or no such executable in path.\n\n" << argv[0] << "\n"; exit(DMTCP_FAIL_RC); } else { #if defined(__x86_64__) && !defined(CONFIG_M32) if (is32bitElf) JASSERT_STDERR << "*** ERROR: You appear to be checkpointing " << "a 32-bit target under 64-bit Linux.\n" << "*** If this fails, then please try re-configuring DMTCP:\n" << "*** configure --enable-m32 ; make clean ; make\n\n"; #endif testStaticallyLinked(argv[0]); } // UNSET DISPLAY environment variable. unsetenv("DISPLAY"); // FIXME: Unify this code with code prior to execvp in execwrappers.cpp // Can use argument to dmtcpPrepareForExec() or getenv("DMTCP_...") // from DmtcpWorker constructor, to distinguish the two cases. dmtcp::Util::adjustRlimitStack(); // FIXME: This call should be moved closer to call to execvp(). dmtcp::Util::prepareDlsymWrapper(); if (autoStartCoordinator) dmtcp::DmtcpCoordinatorAPI::startCoordinatorIfNeeded(allowedModes); dmtcp::DmtcpCoordinatorAPI coordinatorAPI; pid_t virtualPid = coordinatorAPI.getVirtualPidFromCoordinator(); if (virtualPid != -1) { JTRACE("Got virtual pid from coordinator") (virtualPid); dmtcp::Util::setVirtualPidEnvVar(virtualPid, getppid()); } // preloadLibs are to set LD_PRELOAD: // LD_PRELOAD=PLUGIN_LIBS:UTILITY_DIR/dmtcphijack.so:R_LIBSR_UTILITY_DIR/ dmtcp::string preloadLibs = ""; // FIXME: If the colon-separated elements of ENV_VAR_PLUGIN are not // absolute pathnames, then they must be expanded to absolute pathnames. // Warn user if an absolute pathname is not valid. if ( getenv(ENV_VAR_PLUGIN) != NULL ) { preloadLibs += getenv(ENV_VAR_PLUGIN); preloadLibs += ":"; } // FindHelperUtiltiy requires ENV_VAR_UTILITY_DIR to be set dmtcp::string searchDir = jalib::Filesystem::GetProgramDir(); setenv ( ENV_VAR_UTILITY_DIR, searchDir.c_str(), 0 ); #ifdef PTRACE preloadLibs += jalib::Filesystem::FindHelperUtility ( "ptracehijack.so" ); preloadLibs += ":"; #endif preloadLibs += jalib::Filesystem::FindHelperUtility ( "dmtcphijack.so" ); #ifdef PID_VIRTUALIZATION preloadLibs += ":"; preloadLibs += jalib::Filesystem::FindHelperUtility ( "pidvirt.so" ); #endif setenv(ENV_VAR_HIJACK_LIBS, preloadLibs.c_str(), 1); // If dmtcp_checkpoint was called with user LD_PRELOAD, and if // if dmtcp_checkpoint survived the experience, then pass it back to user. if (getenv("LD_PRELOAD")) preloadLibs = preloadLibs + ":" + getenv("LD_PRELOAD"); setenv ( "LD_PRELOAD", preloadLibs.c_str(), 1 ); JTRACE("getting value of LD_PRELOAD")(getenv("LD_PRELOAD")); //run the user program char **newArgv = NULL; if (testScreen(argv, &newArgv)) execvp ( newArgv[0], newArgv ); else execvp ( argv[0], argv ); //should be unreachable JASSERT_STDERR << "ERROR: Failed to exec(\"" << argv[0] << "\"): " << JASSERT_ERRNO << "\n" << "Perhaps it is not in your $PATH?\n" << "See `dmtcp_checkpoint --help` for usage.\n"; //fprintf(stderr, theExecFailedMsg, argv[0], JASSERT_ERRNO); return -1; }
int main ( int argc, char** argv ) { for (size_t fd = PROTECTED_FD_START; fd < PROTECTED_FD_END; fd++) { close(fd); } if (! getenv(ENV_VAR_QUIET)) setenv(ENV_VAR_QUIET, "0", 0); processArgs(&argc, &argv); initializeJalib(); // If --ssh-slave and --prefix both are present, verify that the prefix-dir // of this binary (dmtcp_launch) is same as the one provided with // --prefix if (isSSHSlave && getenv(ENV_VAR_PREFIX_PATH) != NULL) { char buf[PATH_MAX]; string prefixPath = getenv(ENV_VAR_PREFIX_PATH); prefixPath += "/bin/dmtcp_launch"; JASSERT(realpath(prefixPath.c_str(), buf) != NULL) (prefixPath); prefixPath = buf; string programPath = jalib::Filesystem::GetProgramPath(); JASSERT(prefixPath == programPath) (prefixPath) (programPath); } dmtcp::Util::setTmpDir(getenv(ENV_VAR_TMPDIR)); dmtcp::UniquePid::ThisProcess(true); dmtcp::Util::initializeLogFile(); #ifdef FORKED_CHECKPOINTING /* When this is robust, add --forked-checkpointing option on command-line, * with #ifdef FORKED_CHECKPOINTING around the option, change default of * configure.ac, dmtcp/configure.ac, to enable, and change them * from enable-forked... to disable-... */ setenv(ENV_VAR_FORKED_CKPT, "1", 1); #endif // This code will go away when zero-mapped pages are implemented in MTCP. struct rlimit rlim; getrlimit(RLIMIT_STACK, &rlim); if (rlim.rlim_cur > 256*1024*1024 && rlim.rlim_cur != RLIM_INFINITY) JASSERT_STDERR << "*** WARNING: RLIMIT_STACK > 1/4 GB. This causes each thread to" "\n*** receive a 1/4 GB stack segment. Checkpoint/restart will be slow," "\n*** and will potentially break if many threads are created." "\n*** Suggest setting (sh/bash): ulimit -s 10000" "\n*** (csh/tcsh): limit stacksize 10000" "\n*** prior to using DMTCP. (This will be fixed in the future, when" "\n*** DMTCP supports restoring zero-mapped pages.)\n\n\n" ; // Remove this when zero-mapped pages are supported. For segments with // no file backing: Start with 4096 (page) offset and keep doubling offset // until finding region of memory segment with many zeroes. // Then mark as CS_ZERO_PAGES in MTCP instead of CS_RESTORE (or mark // entire segment as CS_ZERO_PAGES and then overwrite with CS_RESTORE // region for portion to be read back from checkpoint image. // For CS_ZERO_PAGES region, mmap // on restart, but don't write in zeroes. // Also, after checkpointing segment, munmap zero pages, and mmap them again. // Don't try to find all pages. The above strategy may increase // the non-zero-mapped mapped pages to no more than double the actual // non-zero region (assuming that the zero-mapped pages are contiguous). // - Gene testMatlab(argv[0]); testJava(argv); // Warn that -Xmx flag needed to limit virtual memory size // If libdmtcp.so is in standard search path and _also_ has setgid access, // then LD_PRELOAD will work. // Otherwise, it will only work if the application does not use setuid and // setgid access. So, we test // if the application does not use // setuid/setgid. (See 'man ld.so') // FIXME: ALSO DO THIS FOR execwrappers.cpp:dmtcpPrepareForExec() // Should pass libdmtcp.so path, and let testSetuid determine // if setgid is set for it. If so, no problem: continue. // If not, call testScreen() and adapt 'screen' to run using // Util::patchArgvIfSetuid(argv[0], argv, &newArgv) (which shouldn't // will just modify argv[0] to point to /tmp/dmtcp-USER@HOST/screen // and other modifications: doesn't need newArgv). // If it's not 'screen' and if no setgid for libdmtcp.so, then testSetuid // should issue the warning, unset our LD_PRELOAD, and hope for the best. // A program like /usr/libexec/utempter/utempter (Fedora path) // is short-lived and can be safely run. Ideally, we should // disable checkpoints while utempter is running, and enable checkpoints // when utempter finishes. See possible model at // execwrappers.cpp:execLibProcessAndExit(), since the same applies // to running /lib/libXXX.so for running libraries as executables. if (testSetuid(argv[0])) { char **newArgv; // THIS NEXT LINE IS DANGEROUS. MOST setuid PROGRAMS CAN'T RUN UNPRIVILEGED dmtcp::Util::patchArgvIfSetuid(argv[0], argv, &newArgv); argv = newArgv; }; if (argc > 0) { JTRACE("dmtcp_launch starting new program:")(argv[0]); } //set up CHECKPOINT_DIR if(getenv(ENV_VAR_CHECKPOINT_DIR) == NULL){ const char* ckptDir = get_current_dir_name(); if(ckptDir != NULL ){ //copy to private buffer static dmtcp::string _buf = ckptDir; ckptDir = _buf.c_str(); }else{ ckptDir="."; } setenv ( ENV_VAR_CHECKPOINT_DIR, ckptDir, 0 ); JTRACE("setting " ENV_VAR_CHECKPOINT_DIR)(ckptDir); } if ( checkpointOpenFiles ) setenv( ENV_VAR_CKPT_OPEN_FILES, "1", 0 ); else unsetenv( ENV_VAR_CKPT_OPEN_FILES); bool isElf, is32bitElf; if (dmtcp::Util::elfType(argv[0], &isElf, &is32bitElf) == -1) { // Couldn't read argv_buf // FIXME: This could have been a symbolic link. Don't issue an error, // unless we're sure that the executable is not readable. JASSERT_STDERR << "*** ERROR: Executable to run w/ DMTCP appears not to be readable,\n" "*** or no such executable in path.\n\n" << argv[0] << "\n"; exit(DMTCP_FAIL_RC); } else { testStaticallyLinked(argv[0]); } if (getenv("DISPLAY") != NULL) { setenv("ORIG_DISPLAY", getenv("DISPLAY"), 1); // UNSET DISPLAY environment variable. unsetenv("DISPLAY"); } if( explicitSrun ){ setenv(ENV_VAR_EXPLICIT_SRUN, "1", 1); } // FIXME: Unify this code with code prior to execvp in execwrappers.cpp // Can use argument to dmtcpPrepareForExec() or getenv("DMTCP_...") // from DmtcpWorker constructor, to distinguish the two cases. dmtcp::Util::adjustRlimitStack(); // Set DLSYM_OFFSET env var(s). dmtcp::Util::prepareDlsymWrapper(); DmtcpUniqueProcessId compId; CoordinatorInfo coordInfo; struct in_addr localIPAddr; CoordinatorAPI::instance().connectToCoordOnStartup(allowedModes, argv[0], &compId, &coordInfo, &localIPAddr); Util::writeCoordPortToFile(getenv(ENV_VAR_NAME_PORT), thePortFile.c_str()); /* We need to initialize SharedData here to make sure that it is * initialized with the correct coordinator timestamp. The coordinator * timestamp is updated only during postCkpt callback. However, the * SharedData area may be initialized earlier (for example, while * recreating threads), causing it to use *older* timestamp. */ SharedData::initialize(Util::getTmpDir().c_str(), &compId, &coordInfo, &localIPAddr); setLDPreloadLibs(is32bitElf); //run the user program char **newArgv = NULL; if (testScreen(argv, &newArgv)) execvp ( newArgv[0], newArgv ); else execvp ( argv[0], argv ); //should be unreachable JASSERT_STDERR << "ERROR: Failed to exec(\"" << argv[0] << "\"): " << JASSERT_ERRNO << "\n" << "Perhaps it is not in your $PATH?\n" << "See `dmtcp_launch --help` for usage.\n"; //fprintf(stderr, theExecFailedMsg, argv[0], JASSERT_ERRNO); return -1; }