extern "C" int execle(const char *path, const char *arg, ...) { JTRACE("execle() wrapper") (path); size_t argv_max = INITIAL_ARGV_MAX; const char *initial_argv[INITIAL_ARGV_MAX]; const char **argv = initial_argv; va_list args; argv[0] = arg; va_start (args, arg); unsigned int i = 0; while (argv[i++] != NULL) { if (i == argv_max) { argv_max *= 2; const char **nptr = (const char**) realloc (argv == initial_argv ? NULL : argv, argv_max * sizeof (const char *)); if (nptr == NULL) { if (argv != initial_argv) free (argv); return -1; } if (argv == initial_argv) /* We have to copy the already filled-in data ourselves. */ memcpy (nptr, argv, i * sizeof (const char *)); argv = nptr; } argv[i] = va_arg (args, const char *); } const char *const *envp = va_arg (args, const char *const *); va_end (args); int ret = execve (path, (char *const *) argv, (char *const *) envp); if (argv != initial_argv) free (argv); return ret; }
void dmtcp::VirtualPidTable::writePidMapsToFile() { //size_t numMaps = 0; dmtcp::string mapFile; mapFile = jalib::Filesystem::ResolveSymlink( "/proc/self/fd/" + jalib::XToString(PROTECTED_PIDMAP_FD)); JASSERT (mapFile.length() > 0) (mapFile); JTRACE ("Write PidMaps to file") (mapFile); // Lock fileset before any operations Util::lockFile(PROTECTED_PIDMAP_FD); _do_lock_tbl(); jalib::JBinarySerializeWriterRaw mapwr(mapFile, PROTECTED_PIDMAP_FD); mapwr.serializeMap(_pidMapTable); _do_unlock_tbl(); Util::unlockFile(PROTECTED_PIDMAP_FD); }
void resetOnFork(int sock) { JASSERT(Util::isValidFd(sock)); JASSERT(sock != PROTECTED_COORD_FD); Util::changeFd(sock, PROTECTED_COORD_FD); JASSERT(Util::isValidFd(coordinatorSocket)); JTRACE("Informing coordinator of new process") (UniquePid::ThisProcess()); DmtcpMessage msg(DMT_UPDATE_PROCESS_INFO_AFTER_FORK); if (dmtcp_virtual_to_real_pid) { msg.realPid = dmtcp_virtual_to_real_pid(getpid()); } else { msg.realPid = getpid(); } sendMsgToCoordinator(msg); _real_close(nsSock); nsSock = -1; }
void dmtcp::ConnectionState::preCheckpointDrain() { deleteStaleConnections(); ConnectionList& connections = ConnectionList::instance(); //initialize the drainer ConnectionList::iterator i; for (i= connections.begin(); i != connections.end(); ++i) { if (_conToFds[i->first].size() > 0) { (i->second)->preCheckpoint(_conToFds[i->first], _drain); } } //this will block until draining is complete _drain.monitorSockets(DRAINER_CHECK_FREQ); //handle disconnected sockets const dmtcp::vector<ConnectionIdentifier>& discn = _drain.getDisconnectedSockets(); for (size_t i=0; i<discn.size(); ++i) { const ConnectionIdentifier& id = discn[i]; TcpConnection& con = connections[id].asTcp(); dmtcp::vector<int>& fds = _conToFds[discn[i]]; JASSERT(fds.size()>0); JTRACE("recreating disconnected socket") (fds[0]) (id); //reading from the socket, and taking the error, resulted in an implicit //close(). //we will create a new, broken socket that is not closed con.onError(); con.restore(fds); //restoring a TCP_ERROR connection makes a dead socket KernelDeviceToConnection::instance().redirect(fds[0], id); } //re build fd table without stale connections and with disconnects _conToFds = ConnectionToFds(KernelDeviceToConnection::instance()); //FIXME: Moving the deleteDupFileConnections() near the top of this function //breaks make check-tcsh, why? deleteDupFileConnections(); }
// Invoked via __clone LIB_PRIVATE int clone_start(void *arg) { struct ThreadArg *threadArg = (struct ThreadArg*) arg; int (*fn) (void *) = threadArg->fn; void *thread_arg = threadArg->arg; pid_t virtualTid = threadArg -> virtualTid; if (dmtcp_is_running_state()) { dmtcpResetTid(virtualTid); } // Free memory previously allocated through JALLOC_HELPER_MALLOC in __clone JALLOC_HELPER_FREE(threadArg); dmtcp::VirtualPidTable::instance().updateMapping(virtualTid, _real_gettid()); JTRACE("Calling user function") (virtualTid); return (*fn) ( thread_arg ); }
extern "C" int execvp ( const char *filename, char *const argv[] ) { JTRACE ( "execvp() wrapper" ) ( filename ); /* Acquire the wrapperExeution lock to prevent checkpoint to happen while * processing this system call. */ WRAPPER_EXECUTION_GET_EXCL_LOCK(); char *newFilename; char **newArgv; dmtcpPrepareForExec(filename, argv, &newFilename, &newArgv); setenv("LD_PRELOAD", getUpdatedLdPreload().c_str(), 1); int retVal = _real_execvp ( newFilename, newArgv ); dmtcpProcessFailedExec(filename, newArgv); WRAPPER_EXECUTION_RELEASE_EXCL_LOCK(); return retVal; }
/* File local functions */ static void get_and_save_envvars() { const char *signal = getenv("DMTCP_CKPTTIMER_SIGNAL"); const char *action = getenv("DMTCP_CKPTTIMER_ACTION"); const char *interval = getenv("DMTCP_CKPTTIMER_INTERVAL"); if (signal) { g_sig_num = atoi(signal); JTRACE("Using signal for ckpt timer") (g_sig_num); } else { g_sig_num = DEFAULT_SIGNAL; } if (action) { g_action = atoi(action); } if (interval) { g_interval = atol(interval); } }
static void processDevPtmxConnection(int fd) { char ptsName[21]; JASSERT(_real_ptsname_r(fd, ptsName, 21) == 0) (JASSERT_ERRNO); dmtcp::string ptsNameStr = ptsName; dmtcp::string uniquePtsNameStr; // glibc allows only 20 char long ptsname // Check if there is enough room to insert the string "dmtcp_" before the // terminal number, if not then we ASSERT here. JASSERT((strlen(ptsName) + strlen("dmtcp_")) <= 20) .Text("string /dev/pts/<n> too long, can not be virtualized." "Once possible workarong here is to replace the string" "\"dmtcp_\" with something short like \"d_\" or even " "\"d\" and recompile DMTCP"); // Generate new Unique ptsName uniquePtsNameStr = UNIQUE_PTS_PREFIX_STR; uniquePtsNameStr += jalib::XToString(getNextFreeSlavePtyNum()); dmtcp::string ptmxDevice = jalib::Filesystem::ResolveSymlink("/proc/self/fd/" + jalib::XToString(fd)); dmtcp::string deviceName = "ptmx[" + ptsNameStr + "]:" + ptmxDevice; //"/dev/ptmx"; // dmtcp::string deviceName = "ptmx[" + dmtcp::UniquePid::ThisProcess().toString() // + ":" + jalib::XToString(_nextPtmxId()) // + "]:" + device; JTRACE("creating ptmx connection") (deviceName) (ptsNameStr) (uniquePtsNameStr); int type = dmtcp::PtyConnection::PTY_MASTER; dmtcp::Connection * c = new dmtcp::PtyConnection(ptsNameStr, uniquePtsNameStr, type); dmtcp::KernelDeviceToConnection::instance().createPtyDevice(fd, deviceName, c); dmtcp::UniquePtsNameToPtmxConId::instance().add(uniquePtsNameStr, c->id()); }
static PtraceProcState procfs_state(int pid) { int fd; char buf[512]; int retval = 0; char *str; const char *key = "State:"; int len = strlen(key); snprintf (buf, sizeof (buf), "/proc/%d/status", (int) pid); fd = _real_open (buf, O_RDONLY, 0); if (fd < 0) { JTRACE("open() failed") (buf); return PTRACE_PROC_INVALID; } dmtcp::Util::readAll(fd, buf, sizeof buf); close(fd); str = strstr(buf, key); JASSERT(str != NULL); str += len; while (*str == ' ' || *str == '\t') { str++; } if (strcasestr(str, "T (stopped)") != NULL) { return PTRACE_PROC_STOPPED; } else if (strcasestr(str, "T (tracing stop)") != NULL) { return PTRACE_PROC_TRACING_STOP; } else if (strcasestr(str, "S (sleeping)") != NULL) { return PTRACE_PROC_SLEEPING; } else if (strcasestr(str, "R (running)") != NULL) { return PTRACE_PROC_RUNNING; } return PTRACE_PROC_UNDEFINED; }
static void callbackPreCheckpoint( char ** ckptFilename ) { // All we want to do is unlock the jassert/jalloc locks, if we reset them, it // serves the purpose without having a callback. // TODO: Check for correctness. JALIB_CKPT_UNLOCK(); dmtcp_process_event(DMTCP_EVENT_START_PRE_CKPT_CB, NULL); //now user threads are stopped dmtcp::userHookTrampoline_preCkpt(); #ifdef EXTERNAL_SOCKET_HANDLING if (dmtcp::DmtcpWorker::instance().waitForStage2Checkpoint() == false) { char *nullDevice = (char *) "/dev/null"; *ckptFilename = nullDevice; delayedCheckpoint = true; } else #else dmtcp::DmtcpWorker::instance().waitForStage2Checkpoint(); #endif *ckptFilename = const_cast<char *>(dmtcp::UniquePid::getCkptFilename()); JTRACE ( "MTCP is about to write checkpoint image." )(*ckptFilename); }
static dmtcp::vector<dmtcp::string> copyUserEnv ( char *const envp[] ) { dmtcp::vector<dmtcp::string> strStorage; dmtcp::ostringstream out; out << "non-DMTCP env vars:\n"; for ( ; *envp != NULL; ++envp ) { if ( isImportantEnv ( *envp ) ) { if (dbg) { out << " skipping: " << *envp << '\n'; } continue; } dmtcp::string e(*envp); strStorage.push_back (e); if(dbg) { out << " addenv[user]:" << strStorage.back() << '\n'; } } JTRACE ( "Creating a copy of (non-DMTCP) user env vars..." ) (out.str()); return strStorage; }
void petabricks::IterationDefinition::genSplitCode(CodeGenerator& o, Transform& trans, RuleInterface& rule, RuleFlavor rf, unsigned int blockNumber, SpatialCallType spatialCallType) const { //create list of subregion SplitRegionList regions; SplitRegion seed; seed.reserve(dimensions()); fillSplitRegionList(regions, seed, blockNumber); //order them correctly std::sort(regions.begin(), regions.end(), SplitRegionCmp(_order)); //generate code if(rf != RuleFlavor::SEQUENTIAL) { o.write("GroupedDynamicTask<"+jalib::XToString(regions.size())+">* _split_task " "= new GroupedDynamicTask<"+jalib::XToString(regions.size())+">();"); } for(size_t a=0; a<regions.size(); ++a){ SimpleRegionPtr r= new SimpleRegion(regions[a]); rule.generateCallCode("(*_split_task)["+jalib::XToString(a)+"]", trans, o, r, rf, spatialCallType); if(rf != RuleFlavor::SEQUENTIAL){ for(size_t b=0; b<a; ++b){ if(canDependOn(regions[a], regions[b])){ JTRACE("adding dep")(regions[a])(regions[b]); o.write("(*_split_task)["+jalib::XToString(a)+"]->dependsOn((*_split_task)["+jalib::XToString(b)+"]);"); } } } } if(rf!=RuleFlavor::SEQUENTIAL){ o.write("return petabricks::run_task(_split_task);"); }else{ o.write("return NULL;"); } }
static void *start_wrapper(void *arg) { dmtcp::ThreadInfo::postPthreadCreate(); JASSERT(my_clone_id != -1); /* This start function calls the user's start function. We need this so that we gain control immediately after the user's start function terminates, but before control goes back to libpthread. Libpthread will do some cleanup involving a free() call and some low level locks. Since we can't control the low level locks, we must implement our own lock: thread_transition_mutex. */ void *retval; struct create_arg *createArg = (struct create_arg *)arg; void *(*user_fnc) (void *) = createArg->fn; void *thread_arg = createArg->thread_arg; dmtcp::ThreadInfo::updateState(pthread_self(), createArg->attr, createArg->userStack, createArg->userDetachState); JALLOC_HELPER_FREE(arg); retval = (*user_fnc)(thread_arg); JTRACE ( "User start function over." ); return retval; }
void dmtcp::ProcessInfo::refresh() { _pid = getpid(); _ppid = getppid(); _gid = getpgid(0); _sid = getsid(0); _fgid = -1; // Try to open the controlling terminal int tfd = _real_open("/dev/tty", O_RDWR); if (tfd != -1) { _fgid = tcgetpgrp(tfd); _real_close(tfd); } if (_ppid == 1) { _isRootOfProcessTree = true; _uppid = UniquePid(); } else { _uppid = UniquePid::ParentProcess(); } _procname = jalib::Filesystem::GetProgramName(); _hostname = jalib::Filesystem::GetCurrentHostname(); _upid = UniquePid::ThisProcess(); _noCoordinator = dmtcp_no_coordinator(); char buf[PATH_MAX]; JASSERT(getcwd(buf, sizeof buf) != NULL); _ckptCWD = buf; _sessionIds.clear(); refreshChildTable(); JTRACE("CHECK GROUP PID")(_gid)(_fgid)(_ppid)(_pid); }
LIB_PRIVATE void pthread_atfork_child() { if (!pthread_atfork_enabled) { return; } pthread_atfork_enabled = false; long host = dmtcp::UniquePid::ThisProcess().hostid(); dmtcp::UniquePid parent = dmtcp::UniquePid::ThisProcess(); dmtcp::UniquePid child = dmtcp::UniquePid(host, getpid(), child_time); dmtcp::string child_name = jalib::Filesystem::GetProgramName() + "_(forked)"; JALIB_RESET_ON_FORK(); _dmtcp_remutex_on_fork(); dmtcp::SyslogCheckpointer::resetOnFork(); dmtcp::ThreadSync::resetLocks(); dmtcp::UniquePid::resetOnFork(child); dmtcp::Util::initializeLogFile(child_name); dmtcp::ProcessInfo::instance().resetOnFork(); JTRACE("fork()ed [CHILD]") (child) (parent); dmtcp::DmtcpWorker::resetOnFork(coordinatorAPI.coordinatorSocket()); }
void petabricks::RemoteObject::onComplete() { JTRACE("complete")(_flags); }
extern "C" int execv ( const char *path, char *const argv[] ) { JTRACE ( "execv() wrapper, calling execve with environ" ) ( path ); return execve(path, argv, environ); }
static dmtcp::vector<const char*> patchUserEnv (dmtcp::vector<dmtcp::string> &envp) { dmtcp::vector<const char*> envVect; const char *userPreloadStr = NULL; envVect.clear(); JASSERT(envVect.size() == 0); dmtcp::ostringstream out; out << "non-DMTCP env vars:\n"; for ( size_t i = 0; i < envp.size(); i++) { if ( isImportantEnv ( envp[i].c_str() ) ) { if (dbg) { out << " skipping: " << envp[i] << '\n'; } continue; } if (dmtcp::Util::strStartsWith(envp[i], "LD_PRELOAD=")) { userPreloadStr = envp[i].c_str() + strlen("LD_PRELOAD="); continue; } envVect.push_back (envp[i].c_str()); if(dbg) { out << " addenv[user]:" << envVect.back() << '\n'; } } JTRACE ( "Creating a copy of (non-DMTCP) user env vars..." ) (out.str()); //pack up our ENV into the new ENV out.str("DMTCP env vars:\n"); for ( size_t i=0; i<ourImportantEnvsCnt; ++i ) { const char* v = getenv ( ourImportantEnvs[i] ); if ( v != NULL ) { envp.push_back ( dmtcp::string ( ourImportantEnvs[i] ) + '=' + v ); const char *ptr = envp.back().c_str(); JASSERT(ptr != NULL); envVect.push_back(ptr); if(dbg) { out << " addenv[dmtcp]:" << envVect.back() << '\n'; } } } dmtcp::string ldPreloadStr = "LD_PRELOAD="; ldPreloadStr += getUpdatedLdPreload(userPreloadStr); envp.push_back(ldPreloadStr); envVect.push_back(envp.back().c_str()); if(dbg) { out << " addenv[dmtcp]:" << envVect.back() << '\n'; } JTRACE ( "patching user envp..." ) (out.str()); envVect.push_back ( NULL ); JTRACE ( "Done patching environ" ); return envVect; }
string writeScript(const string& ckptDir, bool uniqueCkptFilenames, const time_t& ckptTimeStamp, const uint32_t theCheckpointInterval, const int thePort, const UniquePid& compId, const map<string, vector<string> >& restartFilenames) { ostringstream o; string uniqueFilename; o << string(ckptDir) << "/" << RESTART_SCRIPT_BASENAME << "_" << compId; if (uniqueCkptFilenames) { o << "_" << std::setw(5) << std::setfill('0') << compId.computationGeneration(); } o << "." << RESTART_SCRIPT_EXT; uniqueFilename = o.str(); const bool isSingleHost = (restartFilenames.size() == 1); map< string, vector<string> >::const_iterator host; size_t numPeers; for (host = restartFilenames.begin(); host != restartFilenames.end(); host++) { numPeers += host->second.size(); } vector<string>::const_iterator file; char hostname[80]; char timestamp[80]; gethostname ( hostname, 80 ); JTRACE ( "writing restart script" ) ( uniqueFilename ); FILE* fp = fopen ( uniqueFilename.c_str(),"w" ); JASSERT ( fp!=0 )(JASSERT_ERRNO)( uniqueFilename ) .Text ( "failed to open file" ); fprintf ( fp, "%s", header ); fprintf ( fp, "%s", checkLocal ); fprintf ( fp, "%s", slurmHelperContactFunction ); fprintf ( fp, "%s", usage ); ctime_r(&ckptTimeStamp, timestamp); // Remove the trailing '\n' timestamp[strlen(timestamp) - 1] = '\0'; fprintf ( fp, "ckpt_timestamp=\"%s\"\n\n", timestamp ); fprintf ( fp, "coord_host=$" ENV_VAR_NAME_HOST "\n" "if test -z \"$" ENV_VAR_NAME_HOST "\"; then\n" " coord_host=%s\nfi\n\n" "coord_port=$" ENV_VAR_NAME_PORT "\n" "if test -z \"$" ENV_VAR_NAME_PORT "\"; then\n" " coord_port=%d\nfi\n\n" "checkpoint_interval=$" ENV_VAR_CKPT_INTR "\n" "if test -z \"$" ENV_VAR_CKPT_INTR "\"; then\n" " checkpoint_interval=%d\nfi\n" "export DMTCP_CHECKPOINT_INTERVAL=${checkpoint_interval}\n\n", hostname, thePort, theCheckpointInterval ); fprintf ( fp, "%s", cmdlineArgHandler ); fprintf ( fp, "dmt_rstr_cmd=%s/" DMTCP_RESTART_CMD "\n" "which $dmt_rstr_cmd > /dev/null 2>&1" " || dmt_rstr_cmd=" DMTCP_RESTART_CMD "\n" "which $dmt_rstr_cmd > /dev/null 2>&1" " || echo \"$0: $dmt_rstr_cmd not found\"\n" "which $dmt_rstr_cmd > /dev/null 2>&1 || exit 1\n\n", jalib::Filesystem::GetProgramDir().c_str()); fprintf ( fp, "# Number of hosts in the computation = %zu\n" "# Number of processes in the computation = %zu\n\n", restartFilenames.size(), numPeers ); if ( isSingleHost ) { JTRACE ( "Single HOST" ); host=restartFilenames.begin(); ostringstream o; for ( file=host->second.begin(); file!=host->second.end(); ++file ) { o << " " << *file; } fprintf ( fp, "given_ckpt_files=\"%s\"\n\n", o.str().c_str()); fprintf ( fp, "%s", singleHostProcessing ); } else { fprintf ( fp, "%s", "# SYNTAX:\n" "# :: <HOST> :<MODE>: <CHECKPOINT_IMAGE> ...\n" "# Host names and filenames must not include \':\'\n" "# At most one fg (foreground) mode allowed; it must be last.\n" "# \'maybexterm\' and \'maybebg\' are set from <MODE>.\n"); fprintf ( fp, "%s", "worker_ckpts=\'" ); for ( host=restartFilenames.begin(); host!=restartFilenames.end(); ++host ) { fprintf ( fp, "\n :: %s :bg:", host->first.c_str() ); for ( file=host->second.begin(); file!=host->second.end(); ++file ) { fprintf ( fp," %s", file->c_str() ); } } fprintf ( fp, "%s", "\n\'\n\n" ); fprintf( fp, "# Check for resource manager\n" "ibrun_path=$(which ibrun 2> /dev/null)\n" "if [ ! -n \"$ibrun_path\" ]; then\n" " discover_rm_path=$(which dmtcp_discover_rm)\n" " if [ -n \"$discover_rm_path\" ]; then\n" " eval $(dmtcp_discover_rm -t)\n" " srun_path=$(which srun 2> /dev/null)\n" " llaunch=`which dmtcp_rm_loclaunch`\n" " if [ $RES_MANAGER = \"SLURM\" ] && [ -n \"$srun_path\" ]; then\n" " eval $(dmtcp_discover_rm -n \"$worker_ckpts\")\n" " if [ -n \"$DMTCP_DISCOVER_RM_ERROR\" ]; then\n" " echo \"Restart error: $DMTCP_DISCOVER_RM_ERROR\"\n" " echo \"Allocated resources: $manager_resources\"\n" " exit 0\n" " fi\n" " export DMTCP_REMLAUNCH_NODES=$DMTCP_REMLAUNCH_NODES\n" " bound=$(($DMTCP_REMLAUNCH_NODES - 1))\n" " for i in $(seq 0 $bound); do\n" " eval \"val=\\${DMTCP_REMLAUNCH_${i}_SLOTS}\"\n" " export DMTCP_REMLAUNCH_${i}_SLOTS=\"$val\"\n" " bound2=$(($val - 1))\n" " for j in $(seq 0 $bound2); do\n" " eval \"ckpts=\\${DMTCP_REMLAUNCH_${i}_${j}}\"\n" " export DMTCP_REMLAUNCH_${i}_${j}=\"$ckpts\"\n" " done\n" " done\n" " if [ \"$DMTCP_DISCOVER_PM_TYPE\" = \"HYDRA\" ]; then\n" " export DMTCP_SRUN_HELPER_SYNCFILE=`mktemp ./tmp.XXXXXXXXXX`\n" " rm $DMTCP_SRUN_HELPER_SYNCFILE\n" " dmtcp_srun_helper -r $srun_path \"$llaunch\"\n" " if [ ! -f $DMTCP_SRUN_HELPER_SYNCFILE ]; then\n" " echo \"Error launching application\"\n" " exit 1\n" " fi\n" " # export helper contact info\n" " . $DMTCP_SRUN_HELPER_SYNCFILE\n" " pass_slurm_helper_contact \"$DMTCP_LAUNCH_CKPTS\"\n" " rm $DMTCP_SRUN_HELPER_SYNCFILE\n" " dmtcp_restart --join --coord-host $DMTCP_COORD_HOST" " --coord-port $DMTCP_COORD_PORT" " $DMTCP_LAUNCH_CKPTS\n" " else\n" " DMTCP_REMLAUNCH_0_0=\"$DMTCP_REMLAUNCH_0_0" " $DMTCP_LAUNCH_CKPTS\"\n" " $srun_path \"$llaunch\"\n" " fi\n" " exit 0\n" " elif [ $RES_MANAGER = \"TORQUE\" ]; then\n" " #eval $(dmtcp_discover_rm \"$worker_ckpts\")\n" " #if [ -n \"$new_worker_ckpts\" ]; then\n" " # worker_ckpts=\"$new_worker_ckpts\"\n" " #fi\n" " eval $(dmtcp_discover_rm -n \"$worker_ckpts\")\n" " if [ -n \"$DMTCP_DISCOVER_RM_ERROR\" ]; then\n" " echo \"Restart error: $DMTCP_DISCOVER_RM_ERROR\"\n" " echo \"Allocated resources: $manager_resources\"\n" " exit 0\n" " fi\n" " arguments=\"PATH=$PATH DMTCP_COORD_HOST=$DMTCP_COORD_HOST" " DMTCP_COORD_PORT=$DMTCP_COORD_PORT\"\n" " arguments=$arguments\" DMTCP_CHECKPOINT_INTERVAL=$DMTCP_CHECKPOINT_INTERVAL\"\n" " arguments=$arguments\" DMTCP_TMPDIR=$DMTCP_TMPDIR\"\n" " arguments=$arguments\" DMTCP_REMLAUNCH_NODES=$DMTCP_REMLAUNCH_NODES\"\n" " bound=$(($DMTCP_REMLAUNCH_NODES - 1))\n" " for i in $(seq 0 $bound); do\n" " eval \"val=\\${DMTCP_REMLAUNCH_${i}_SLOTS}\"\n" " arguments=$arguments\" DMTCP_REMLAUNCH_${i}_SLOTS=\\\"$val\\\"\"\n" " bound2=$(($val - 1))\n" " for j in $(seq 0 $bound2); do\n" " eval \"ckpts=\\${DMTCP_REMLAUNCH_${i}_${j}}\"\n" " arguments=$arguments\" DMTCP_REMLAUNCH_${i}_${j}=\\\"$ckpts\\\"\"\n" " done\n" " done\n" " pbsdsh -u \"$llaunch\" \"$arguments\"\n" " exit 0\n" " fi\n" " fi\n" "fi\n" "\n\n" ); fprintf ( fp, "%s", multiHostProcessing ); } fclose ( fp ); { string filename = RESTART_SCRIPT_BASENAME "." RESTART_SCRIPT_EXT; string dirname = jalib::Filesystem::DirName(uniqueFilename); int dirfd = open(dirname.c_str(), O_DIRECTORY | O_RDONLY); JASSERT(dirfd != -1) (dirname) (JASSERT_ERRNO); /* Set execute permission for user. */ struct stat buf; JASSERT(::stat(uniqueFilename.c_str(), &buf) == 0); JASSERT(chmod(uniqueFilename.c_str(), buf.st_mode | S_IXUSR) == 0); // Create a symlink from // dmtcp_restart_script.sh -> dmtcp_restart_script_<curCompId>.sh unlink(filename.c_str()); JTRACE("linking \"dmtcp_restart_script.sh\" filename to uniqueFilename") (filename) (dirname) (uniqueFilename); // FIXME: Handle error case of symlink() JWARNING(symlinkat(basename(uniqueFilename.c_str()), dirfd, filename.c_str()) == 0) (JASSERT_ERRNO); JASSERT(close(dirfd) == 0); } return uniqueFilename; }
void waitForCheckpointCommand() { uint32_t ckptInterval = SharedData::getCkptInterval(); struct timeval tmptime = { 0, 0 }; long remaining = ckptInterval; do { struct timeval *timeout = NULL; struct timeval start; if (ckptInterval > 0) { timeout = &tmptime; timeout->tv_sec = remaining; JASSERT(gettimeofday(&start, NULL) == 0) (JASSERT_ERRNO); } // This call to poll() does nothing and returns. // But we want to find address of poll() using dlsym/libc before // allowing the user thread to continue. poll(NULL, 0, 0); if (sem_launch_first_time) { // Release user thread now that we've initialized the checkpoint thread. // This code is reached if the --no-coordinator flag is used. sem_post(&sem_launch); sem_launch_first_time = false; } struct pollfd socketFd = {0}; socketFd.fd = coordinatorSocket; socketFd.events = POLLIN; uint64_t millis = timeout ? ((timeout->tv_sec * (uint64_t)1000) + (timeout->tv_usec / 1000)) : -1; int retval = poll(&socketFd, 1, millis); if (retval == 0) { // timeout expired, time for checkpoint JTRACE("Timeout expired, checkpointing now."); return; } else if (retval > 0) { JASSERT(socketFd.revents & POLLIN); JTRACE("Connect request on virtual coordinator socket."); break; } JASSERT(errno == EINTR) (JASSERT_ERRNO); /* EINTR: a signal was caught */ if (ckptInterval > 0) { struct timeval end; JASSERT(gettimeofday(&end, NULL) == 0) (JASSERT_ERRNO); remaining -= end.tv_sec - start.tv_sec; // If the remaining time is negative, we can checkpoint now if (remaining < 0) { return; } } } while (remaining > 0); jalib::JSocket cmdSock(-1); DmtcpMessage msg; DmtcpMessage reply(DMT_USER_CMD_RESULT); do { cmdSock.close(); jalib::JServerSocket sock(coordinatorSocket); cmdSock = sock.accept(); msg.poison(); JTRACE("Reading from incoming connection..."); cmdSock >> msg; } while (!cmdSock.isValid()); JASSERT(msg.type == DMT_USER_CMD) (msg.type) .Text("Unexpected connection."); reply.coordCmdStatus = CoordCmdStatus::NOERROR; bool exitWhenDone = false; switch (msg.coordCmd) { // case 'b': case 'B': // prefix blocking command, prior to checkpoint // command // JTRACE("blocking checkpoint beginning..."); // blockUntilDone = true; // break; case 's': case 'S': JTRACE("Received status command"); reply.numPeers = 1; reply.isRunning = 1; break; case 'c': case 'C': JTRACE("checkpointing..."); break; case 'k': case 'K': case 'q': case 'Q': JTRACE("Received KILL command from user, exiting"); exitWhenDone = true; break; default: JTRACE("unhandled user command") (msg.coordCmd); reply.coordCmdStatus = CoordCmdStatus::ERROR_INVALID_COMMAND; } cmdSock << reply; cmdSock.close(); if (exitWhenDone) { _real_exit(0); } }
void startNewCoordinator(CoordinatorMode mode) { string host; int port; getCoordHostAndPort(mode, host, &port); JASSERT(strcmp(host.c_str(), "localhost") == 0 || strcmp(host.c_str(), "127.0.0.1") == 0 || jalib::Filesystem::GetCurrentHostname() == host.c_str()) (host) (jalib::Filesystem::GetCurrentHostname()) .Text("Won't automatically start coordinator because DMTCP_HOST" " is set to a remote host."); // Create a socket and bind it to an unused port. errno = 0; jalib::JServerSocket coordinatorListenerSocket(jalib::JSockAddr::ANY, port, 128); JASSERT(coordinatorListenerSocket.isValid()) (coordinatorListenerSocket.port()) (JASSERT_ERRNO) (host) (port) .Text("Failed to create socket to coordinator port." "\nIf msg is \"Address already in use\"," " this may be an old coordinator." "\nEither try again a few seconds or a minute later," "\nOr kill other coordinators on this host and port:" "\n dmtcp_command ---coord-host XXX --coord-port XXX" "\nOr specify --join-coordinator if joining existing computation."); // Now dup the sockfd to coordinatorListenerSocket.changeFd(PROTECTED_COORD_FD); setCoordPort(coordinatorListenerSocket.port()); JTRACE("Starting a new coordinator automatically.") (coordinatorListenerSocket.port()); if (fork() == 0) { /* NOTE: This code assumes that dmtcp_launch (the current program) * and dmtcp_coordinator are in the same directory. Namely, * GetProgramDir() gets the dir of the current program (dmtcp_launch). * Hence, if dmtcp_coordinator is in a different directory, then * jalib::Filesystem::GetProgramDir() + "/dmtcp_coordinator" * will not exist, and the child will fail. */ // We can't use Util::getPath() here since the SharedData has not been // initialized yet. string coordinator = jalib::Filesystem::GetProgramDir() + "/dmtcp_coordinator"; char *modeStr = (char *)"--daemon"; char *args[] = { (char *)coordinator.c_str(), (char *)"--quiet", /* If we wish to also suppress coordinator warnings, call --quiet twice */ (char *)"--exit-on-last", modeStr, NULL }; execv(args[0], args); JASSERT(false)(coordinator)(JASSERT_ERRNO).Text( "exec(dmtcp_coordinator) failed"); } else { int status; _real_close(PROTECTED_COORD_FD); JASSERT(wait(&status) > 0) (JASSERT_ERRNO); } }
extern "C" int pipe ( int fds[2] ) { JTRACE ( "promoting pipe() to socketpair()" ); //just promote pipes to socketpairs return socketpair ( AF_UNIX, SOCK_STREAM, 0, fds ); }
void dmtcp::ProcessInfo::growStack() { /* Grow the stack to the stack limit */ struct rlimit rlim; size_t stackSize; const rlim_t eightMB = 8 * MB; JASSERT(getrlimit(RLIMIT_STACK, &rlim) == 0) (JASSERT_ERRNO); if (rlim.rlim_cur == RLIM_INFINITY) { if (rlim.rlim_max == RLIM_INFINITY) { stackSize = 8 * 1024 * 1024; } else { stackSize = MIN(rlim.rlim_max, eightMB); } } else { stackSize = rlim.rlim_cur; } // Find the current stack area and heap ProcMapsArea area; bool flag = false; size_t allocSize; void *tmpbuf; int fd = _real_open("/proc/self/maps", O_RDONLY); JASSERT(fd != -1) (JASSERT_ERRNO); while (Util::readProcMapsLine(fd, &area)) { if (strcmp(area.name, "[heap]") == 0) { // Record start of heap which will later be used to restore heap _savedHeapStart = (unsigned long) area.addr; } if ((VA) &area >= area.addr && (VA) &area < area.endAddr) { // Stack found flag = true; break; } } _real_close(fd); JTRACE("Original stack area") ((void*)area.addr) (area.size); JASSERT(flag && area.addr != NULL); // Grow the stack { allocSize = stackSize - area.size - 4095; tmpbuf = alloca(allocSize); JASSERT(tmpbuf != NULL) (JASSERT_ERRNO); memset(tmpbuf, 0, allocSize); } #ifdef DEBUG { int fd = _real_open("/proc/self/maps", O_RDONLY); JASSERT(fd != -1) (JASSERT_ERRNO); while (Util::readProcMapsLine(fd, &area)) { if ((VA)&area >= area.addr && (VA)&area < area.endAddr) { // Stack found area = area; break; } } _real_close(fd); JTRACE("New stack size") ((void*)area.addr) (area.size); } #endif }
static void checkpoint() { timeLeft = alarm(0); JTRACE("*** Alarm stopped. ***") (timeLeft); }
void petabricks::RemoteObject::onNotify(int arg) { JTRACE("notify")(_flags)(arg); }
static void ptrace_single_step_thread(dmtcp::Inferior *inferiorInfo, int isRestart) { struct user_regs_struct regs; long peekdata; long low, upp; int status; unsigned long addr; unsigned long int eflags; pid_t inferior = inferiorInfo->tid(); pid_t superior = GETTID(); int last_command = inferiorInfo->lastCmd(); char inferior_st = inferiorInfo->state(); while(1) { int status; JASSERT(_real_ptrace(PTRACE_SINGLESTEP, inferior, 0, 0) != -1) (superior) (inferior) (JASSERT_ERRNO); if (_real_wait4(inferior, &status, 0, NULL) == -1) { JASSERT(_real_wait4(inferior, &status, __WCLONE, NULL) != -1) (superior) (inferior) (JASSERT_ERRNO); } if (WIFEXITED(status)) { JTRACE("thread is dead") (inferior) (WEXITSTATUS(status)); } else if(WIFSIGNALED(status)) { JTRACE("thread terminated by signal") (inferior); } JASSERT(_real_ptrace(PTRACE_GETREGS, inferior, 0, ®s) != -1) (superior) (inferior) (JASSERT_ERRNO); peekdata = _real_ptrace(PTRACE_PEEKDATA, inferior, (void*) regs.IP_REG, 0); long inst = peekdata & 0xffff; #ifdef __x86_64__ /* For 64 bit architectures. */ if (inst == SIGRETURN_INST_16 && regs.AX_REG == 0xf) { #else /* For 32 bit architectures.*/ if (inst == SIGRETURN_INST_16 && (regs.AX_REG == DMTCP_SYS_sigreturn || regs.AX_REG == DMTCP_SYS_rt_sigreturn)) { #endif if (isRestart) { /* Restart time. */ // FIXME: TODO: if (last_command == PTRACE_SINGLESTEP) { if (regs.AX_REG != DMTCP_SYS_rt_sigreturn) { addr = regs.SP_REG; } else { addr = regs.SP_REG + 8; addr = _real_ptrace(PTRACE_PEEKDATA, inferior, (void*) addr, 0); addr += 20; } addr += EFLAGS_OFFSET; errno = 0; JASSERT ((eflags = _real_ptrace(PTRACE_PEEKDATA, inferior, (void *)addr, 0)) != -1) (superior) (inferior) (JASSERT_ERRNO); eflags |= 0x0100; JASSERT(_real_ptrace(PTRACE_POKEDATA, inferior, (void *)addr, (void*) eflags) != -1) (superior) (inferior) (JASSERT_ERRNO); } else if (inferior_st != PTRACE_PROC_TRACING_STOP) { /* TODO: remove in future as GROUP restore becames stable * - Artem */ JASSERT(_real_ptrace(PTRACE_CONT, inferior, 0, 0) != -1) (superior) (inferior) (JASSERT_ERRNO); } } else { /* Resume time. */ if (inferior_st != PTRACE_PROC_TRACING_STOP) { JASSERT(_real_ptrace(PTRACE_CONT, inferior, 0, 0) != -1) (superior) (inferior) (JASSERT_ERRNO); } } /* In case we have checkpointed at a breakpoint, we don't want to * hit the same breakpoint twice. Thus this code. */ // TODO: FIXME: Replace this code with a raise(SIGTRAP) and see what happens if (inferior_st == PTRACE_PROC_TRACING_STOP) { JASSERT(_real_ptrace(PTRACE_SINGLESTEP, inferior, 0, 0) != -1) (superior) (inferior) (JASSERT_ERRNO); if (_real_wait4(inferior, &status, 0, NULL) == -1) { JASSERT(_real_wait4(inferior, &status, __WCLONE, NULL) != -1) (superior) (inferior) (JASSERT_ERRNO); } } break; } } //while(1) } /* This function detaches the user threads. */ static void ptrace_detach_user_threads () { PtraceProcState pstate; int status; struct rusage rusage; dmtcp::vector<dmtcp::Inferior*> inferiors; inferiors = dmtcp::PtraceInfo::instance().getInferiors(GETTID()); for (size_t i = 0; i < inferiors.size(); i++) { pid_t inferior = inferiors[i]->tid(); void *data = (void*) (unsigned long) dmtcp_get_ckpt_signal(); pstate = procfs_state(inferiors[i]->tid()); if (pstate == PTRACE_PROC_INVALID) { JTRACE("Inferior does not exist.") (inferior); dmtcp::PtraceInfo::instance().eraseInferior(inferior); continue; } inferiors[i]->setState(pstate); inferiors[i]->semInit(); if (inferiors[i]->isCkptThread()) { data = NULL; } int ret = _real_wait4(inferior, &status, __WALL | WNOHANG, &rusage); if (ret > 0) { if (!WIFSTOPPED(status) || WSTOPSIG(status) != dmtcp_get_ckpt_signal()) { inferiors[i]->setWait4Status(&status, &rusage); } } pstate = procfs_state(inferiors[i]->tid()); if (pstate == PTRACE_PROC_RUNNING || pstate == PTRACE_PROC_SLEEPING) { syscall(SYS_tkill, inferior, SIGSTOP); _real_wait4(inferior, &status, __WALL, NULL); JASSERT(_real_wait4(inferior, &status, __WALL | WNOHANG, NULL) == 0) (inferior) (JASSERT_ERRNO); } if (_real_ptrace(PTRACE_DETACH, inferior, 0, data) == -1) { JASSERT(errno == ESRCH) (GETTID()) (inferior) (JASSERT_ERRNO); dmtcp::PtraceInfo::instance().eraseInferior(inferior); continue; } pstate = procfs_state(inferiors[i]->tid()); if (pstate == PTRACE_PROC_STOPPED) { kill(inferior, SIGCONT); } JTRACE("Detached thread") (inferior); } }
bool OpenCLUtil::buildKernel(cl_program& clprog, cl_kernel& clkern, const char* clsrc) { std::string cachefile = srcToCacheFile(clsrc); cl_int err; size_t num_devices = 0; size_t binSize[MAX_DEVICES]; unsigned char* binary[MAX_DEVICES]; memset(binary, 0, sizeof binary); memset(binSize, 0, sizeof binSize); // Source for kernel. cl_context ctx = OpenCLUtil::getContext(); #ifndef MAC if(jalib::Filesystem::FileExists(cachefile + "_0")) { cl_platform_id platform = getPlatform(); JASSERT(platform != NULL); cl_uint device_count; JASSERT( CL_SUCCESS == clGetDeviceIDs( platform, CL_DEVICE_TYPE_ALL, 0, NULL, &device_count ) ).Text("Failed to get device count"); // Get device IDs. cl_device_id* device_ids = new cl_device_id[ device_count ]; JASSERT( CL_SUCCESS == clGetDeviceIDs( platform, CL_DEVICE_TYPE_ALL, device_count, device_ids, &device_count ) ).Text("Failed to get device IDs"); num_devices = device_count; JASSERT(num_devices < MAX_DEVICES); for(int i=0; i<num_devices; ++i) { FILE* binfile = fopen((cachefile+"_"+jalib::XToString(i)).c_str(), "rb"); JASSERT(binfile!=NULL)(cachefile).Text("failed to open file"); JASSERT(fread(&binSize[i], sizeof(size_t), 1, binfile)>0); binary[i] = new unsigned char[binSize[i]]; JASSERT(fread(binary[i], sizeof(char), binSize[i], binfile)>0); fclose(binfile); } //JTRACE("loading cached opencl")(num_devices)(binSize[0])(binSize[1])(cachefile); const unsigned char** binary_c = (const unsigned char**)binary; cl_int binary_status, errcode_ret; clprog = clCreateProgramWithBinary( ctx, num_devices, device_ids, binSize, binary_c, &binary_status, &errcode_ret); //JASSERT( CL_SUCCESS == errcode_ret).Text( "Failed to create program." ); err = clBuildProgram( clprog, 0, NULL, NULL, NULL, NULL); JASSERT( CL_SUCCESS == err ).Text( "Failed to build program." ); clkern = clCreateKernel( clprog, "kernel_main", &err ); JASSERT( CL_SUCCESS == err ).Text( "Failed to create kernel." ); for(int i=0; i<num_devices; ++i) { delete [] (binary[i]); } //JTRACE("cache hit"); return true; } #endif // Build program. clprog = clCreateProgramWithSource( ctx, 1, (const char **)&clsrc, NULL, &err ); JASSERT( CL_SUCCESS == err ).Text( "Failed to create program." ); err = clBuildProgram( clprog, 0, NULL, NULL, NULL, NULL); JASSERT( CL_SUCCESS == err ).Text( "Failed to build program." ); // Create kernel. clkern = clCreateKernel( clprog, "kernel_main", &err ); JASSERT( CL_SUCCESS == err ).Text( "Failed to create kernel." ); #ifndef MAC err = clGetProgramInfo(clprog, CL_PROGRAM_BINARY_SIZES, sizeof(binSize), binSize, &num_devices); JASSERT( CL_SUCCESS == err ).Text( "Failed to extract binary sizes." ); JASSERT((num_devices % sizeof(size_t)) == 0); num_devices /= sizeof(size_t); JASSERT(num_devices < MAX_DEVICES); for(int i=0; i<num_devices; ++i) { binary[i] = new unsigned char[binSize[i]]; } err = clGetProgramInfo(clprog, CL_PROGRAM_BINARIES, sizeof binary, binary, NULL); JASSERT( CL_SUCCESS == err ).Text( "Failed to extract binaries." ); JTRACE("creating cached opencl")(num_devices)(binSize[0])(binSize[1])(cachefile); for(int i=0; i<num_devices; ++i) { FILE* binfile = fopen((cachefile+"_"+jalib::XToString(i)).c_str(), "wb"); JASSERT(binfile!=NULL)(cachefile).Text("failed to open file"); JASSERT(fwrite(&binSize[i], sizeof(size_t), 1, binfile)>0); JASSERT(fwrite(binary[i], sizeof(char), binSize[i], binfile)>0); fclose(binfile); } for(int i=0; i<num_devices; ++i) { delete [] (binary[i]); } #endif return true; }
void dmtcp_event_hook(DmtcpEvent_t event, DmtcpEventData_t *data) { static char *filename = NULL; static bool restartingFromCkpt = false; static FILE *outfp = NULL; switch (event) { case DMTCP_EVENT_INIT: { if (!getenv("DMTCP_START_CTRS_ON_RESTART_STRATEGY")) { setup_handlers(); filename = getStatsFilename(getenv("STATFILE")); JWARNING(filename != NULL).Text("Could not get the stats filename in the init event."); JTRACE("Filename: ")(filename); } } break; case DMTCP_EVENT_WRITE_CKPT: { JTRACE("CHKP"); if (getenv("DMTCP_START_CTRS_ON_RESTART_STRATEGY")) { filename = getenv("STATFILE"); if (restartingFromCkpt) { JTRACE("WRITE CHKP"); JASSERT(filename); outfp = fopen(filename, "w+"); if (!outfp) { perror("Error opening stats file in w+ mode"); JASSERT(false); } read_ctrs(outfp); fclose(outfp); restartingFromCkpt = false; } } } break; case DMTCP_EVENT_RESUME: { if (getenv("DMTCP_KILL_ON_RESUME_STRATEGY")) { exit(0); } } break; case DMTCP_EVENT_RESTART: { if (getenv("DMTCP_START_CTRS_ON_RESTART_STRATEGY")) { restartingFromCkpt = true; filename = getStatsFilename(getenv("STATFILE")); JWARNING(filename != NULL).Text("Could not get the stats filename in the restart event."); JTRACE("Filename: ")(filename); JWARNING(setup_perf_ctr()).Text("Error setting up perf ctrs."); } } break; case DMTCP_EVENT_RESUME_USER_THREAD: { if (getenv("DMTCP_START_CTRS_ON_RESTART_STRATEGY")) { filename = getStatsFilename(getenv("STATFILE")); JWARNING(filename != NULL).Text("Could not get the stats filename in the resume_user_thread event."); JTRACE("Filename: ")(filename); } } break; default: break; } DMTCP_NEXT_EVENT_HOOK(event, data); }
int main ( int argc, char** argv ) { initializeJalib(); if (! getenv(ENV_VAR_QUIET)) setenv(ENV_VAR_QUIET, "0", 0); processArgs(&argc, &argv); // If --ssh-slave and --prefix both are present, verify that the prefix-dir // of this binary (dmtcp_checkpoint) is same as the one provided with // --prefix if (isSSHSlave && getenv(ENV_VAR_PREFIX_PATH) != NULL) { const char *str = getenv(ENV_VAR_PREFIX_PATH); dmtcp::string prefixDir = jalib::Filesystem::ResolveSymlink(str); dmtcp::string programPrefixDir = jalib::Filesystem::DirName(jalib::Filesystem::GetProgramDir()); JASSERT(prefixDir == programPrefixDir) (prefixDir) (programPrefixDir); } dmtcp::UniquePid::setTmpDir(getenv(ENV_VAR_TMPDIR)); dmtcp::UniquePid::ThisProcess(true); dmtcp::Util::initializeLogFile(); #ifdef FORKED_CHECKPOINTING /* When this is robust, add --forked-checkpointing option on command-line, * with #ifdef FORKED_CHECKPOINTING around the option, change default of * configure.ac, dmtcp/configure.ac, to enable, and change them * from enable-forked... to disable-... */ setenv(ENV_VAR_FORKED_CKPT, "1", 1); #endif if (jassert_quiet == 0) JASSERT_STDERR << DMTCP_BANNER; // This code will go away when zero-mapped pages are implemented in MTCP. struct rlimit rlim; getrlimit(RLIMIT_STACK, &rlim); if (rlim.rlim_cur > 256*1024*1024 && rlim.rlim_cur != RLIM_INFINITY) JASSERT_STDERR << "*** WARNING: RLIMIT_STACK > 1/4 GB. This causes each thread to" "\n*** receive a 1/4 GB stack segment. Checkpoint/restart will be slow," "\n*** and will potentially break if many threads are created." "\n*** Suggest setting (sh/bash): ulimit -s 10000" "\n*** (csh/tcsh): limit stacksize 10000" "\n*** prior to using DMTCP. (This will be fixed in the future, when" "\n*** DMTCP supports restoring zero-mapped pages.)\n\n\n" ; // Remove this when zero-mapped pages are supported. For segments with // no file backing: Start with 4096 (page) offset and keep doubling offset // until finding region of memory segment with many zeroes. // Then mark as CS_ZERO_PAGES in MTCP instead of CS_RESTORE (or mark // entire segment as CS_ZERO_PAGES and then overwrite with CS_RESTORE // region for portion to be read back from checkpoint image. // For CS_ZERO_PAGES region, mmap // on restart, but don't write in zeroes. // Also, after checkpointing segment, munmap zero pages, and mmap them again. // Don't try to find all pages. The above strategy may increase // the non-zero-mapped mapped pages to no more than double the actual // non-zero region (assuming that the zero-mapped pages are contiguous). // - Gene testMatlab(argv[0]); testJava(argv); // Warn that -Xmx flag needed to limit virtual memory size // If dmtcphijack.so is in standard search path and _also_ has setgid access, // then LD_PRELOAD will work. // Otherwise, it will only work if the application does not use setuid and // setgid access. So, we test // if the application does not use // setuid/setgid. (See 'man ld.so') // FIXME: ALSO DO THIS FOR execwrappers.cpp:dmtcpPrepareForExec() // Should pass dmtcphijack.so path, and let testSetuid determine // if setgid is set for it. If so, no problem: continue. // If not, call testScreen() and adapt 'screen' to run using // Util::patchArgvIfSetuid(argv[0], argv, &newArgv) (which shouldn't // will just modify argv[0] to point to /tmp/dmtcp-USER@HOST/screen // and other modifications: doesn't need newArgv). // If it's not 'screen' and if no setgid for dmtcphijack.so, then testSetuid // should issue the warning, unset our LD_PRELOAD, and hope for the best. // A program like /usr/libexec/utempter/utempter (Fedora path) // is short-lived and can be safely run. Ideally, we should // disable checkpoints while utempter is running, and enable checkpoints // when utempter finishes. See possible model at // execwrappers.cpp:execLibProcessAndExit(), since the same applies // to running /lib/libXXX.so for running libraries as executables. if (testSetuid(argv[0])) { char **newArgv; // THIS NEXT LINE IS DANGEROUS. MOST setuid PROGRAMS CAN'T RUN UNPRIVILEGED dmtcp::Util::patchArgvIfSetuid(argv[0], argv, &newArgv); argv = newArgv; }; if (argc > 0) { JTRACE("dmtcp_checkpoint starting new program:")(argv[0]); } //set up CHECKPOINT_DIR if(getenv(ENV_VAR_CHECKPOINT_DIR) == NULL){ const char* ckptDir = get_current_dir_name(); if(ckptDir != NULL ){ //copy to private buffer static dmtcp::string _buf = ckptDir; ckptDir = _buf.c_str(); }else{ ckptDir="."; } setenv ( ENV_VAR_CHECKPOINT_DIR, ckptDir, 0 ); JTRACE("setting " ENV_VAR_CHECKPOINT_DIR)(ckptDir); } dmtcp::string stderrDevice = jalib::Filesystem::ResolveSymlink ( _stderrProcPath() ); //TODO: // When stderr is a pseudo terminal for IPC between parent/child processes, // this logic fails and JASSERT may write data to FD 2 (stderr). // This will cause problems in programs that use FD 2 (stderr) for // algorithmic things ... if ( stderrDevice.length() > 0 && jalib::Filesystem::FileExists ( stderrDevice ) ) setenv ( ENV_VAR_STDERR_PATH,stderrDevice.c_str(), 0 ); else// if( isSSHSlave ) setenv ( ENV_VAR_STDERR_PATH, "/dev/null", 0 ); if ( getenv(ENV_VAR_SIGCKPT) != NULL ) setenv ( "MTCP_SIGCKPT", getenv(ENV_VAR_SIGCKPT), 1); else unsetenv("MTCP_SIGCKPT"); if ( checkpointOpenFiles ) setenv( ENV_VAR_CKPT_OPEN_FILES, "1", 0 ); else unsetenv( ENV_VAR_CKPT_OPEN_FILES); #ifdef PID_VIRTUALIZATION setenv( ENV_VAR_ROOT_PROCESS, "1", 1 ); #endif bool isElf, is32bitElf; if (dmtcp::Util::elfType(argv[0], &isElf, &is32bitElf) == -1) { // Couldn't read argv_buf // FIXME: This could have been a symbolic link. Don't issue an error, // unless we're sure that the executable is not readable. JASSERT_STDERR << "*** ERROR: Executable to run w/ DMTCP appears not to be readable,\n" "*** or no such executable in path.\n\n" << argv[0] << "\n"; exit(DMTCP_FAIL_RC); } else { #if defined(__x86_64__) && !defined(CONFIG_M32) if (is32bitElf) JASSERT_STDERR << "*** ERROR: You appear to be checkpointing " << "a 32-bit target under 64-bit Linux.\n" << "*** If this fails, then please try re-configuring DMTCP:\n" << "*** configure --enable-m32 ; make clean ; make\n\n"; #endif testStaticallyLinked(argv[0]); } // UNSET DISPLAY environment variable. unsetenv("DISPLAY"); // FIXME: Unify this code with code prior to execvp in execwrappers.cpp // Can use argument to dmtcpPrepareForExec() or getenv("DMTCP_...") // from DmtcpWorker constructor, to distinguish the two cases. dmtcp::Util::adjustRlimitStack(); // FIXME: This call should be moved closer to call to execvp(). dmtcp::Util::prepareDlsymWrapper(); if (autoStartCoordinator) dmtcp::DmtcpCoordinatorAPI::startCoordinatorIfNeeded(allowedModes); dmtcp::DmtcpCoordinatorAPI coordinatorAPI; pid_t virtualPid = coordinatorAPI.getVirtualPidFromCoordinator(); if (virtualPid != -1) { JTRACE("Got virtual pid from coordinator") (virtualPid); dmtcp::Util::setVirtualPidEnvVar(virtualPid, getppid()); } // preloadLibs are to set LD_PRELOAD: // LD_PRELOAD=PLUGIN_LIBS:UTILITY_DIR/dmtcphijack.so:R_LIBSR_UTILITY_DIR/ dmtcp::string preloadLibs = ""; // FIXME: If the colon-separated elements of ENV_VAR_PLUGIN are not // absolute pathnames, then they must be expanded to absolute pathnames. // Warn user if an absolute pathname is not valid. if ( getenv(ENV_VAR_PLUGIN) != NULL ) { preloadLibs += getenv(ENV_VAR_PLUGIN); preloadLibs += ":"; } // FindHelperUtiltiy requires ENV_VAR_UTILITY_DIR to be set dmtcp::string searchDir = jalib::Filesystem::GetProgramDir(); setenv ( ENV_VAR_UTILITY_DIR, searchDir.c_str(), 0 ); #ifdef PTRACE preloadLibs += jalib::Filesystem::FindHelperUtility ( "ptracehijack.so" ); preloadLibs += ":"; #endif preloadLibs += jalib::Filesystem::FindHelperUtility ( "dmtcphijack.so" ); #ifdef PID_VIRTUALIZATION preloadLibs += ":"; preloadLibs += jalib::Filesystem::FindHelperUtility ( "pidvirt.so" ); #endif setenv(ENV_VAR_HIJACK_LIBS, preloadLibs.c_str(), 1); // If dmtcp_checkpoint was called with user LD_PRELOAD, and if // if dmtcp_checkpoint survived the experience, then pass it back to user. if (getenv("LD_PRELOAD")) preloadLibs = preloadLibs + ":" + getenv("LD_PRELOAD"); setenv ( "LD_PRELOAD", preloadLibs.c_str(), 1 ); JTRACE("getting value of LD_PRELOAD")(getenv("LD_PRELOAD")); //run the user program char **newArgv = NULL; if (testScreen(argv, &newArgv)) execvp ( newArgv[0], newArgv ); else execvp ( argv[0], argv ); //should be unreachable JASSERT_STDERR << "ERROR: Failed to exec(\"" << argv[0] << "\"): " << JASSERT_ERRNO << "\n" << "Perhaps it is not in your $PATH?\n" << "See `dmtcp_checkpoint --help` for usage.\n"; //fprintf(stderr, theExecFailedMsg, argv[0], JASSERT_ERRNO); return -1; }
static void ptrace_attach_threads(int isRestart) { pid_t inferior; int status; dmtcp::vector<dmtcp::Inferior*> inferiors; inferiors = dmtcp::PtraceInfo::instance().getInferiors(GETTID()); if (inferiors.size() == 0) { return; } JTRACE("Attaching to inferior threads") (GETTID()); // Attach to all inferior user threads. for (size_t i = 0; i < inferiors.size(); i++) { inferior = inferiors[i]->tid(); JASSERT(inferiors[i]->state() != PTRACE_PROC_INVALID) (GETTID()) (inferior); if (!inferiors[i]->isCkptThread()) { JASSERT(_real_ptrace(PTRACE_ATTACH, inferior, 0, 0) != -1) (GETTID()) (inferior) (JASSERT_ERRNO); JASSERT(_real_wait4(inferior, &status, __WALL, NULL) != -1) (inferior) (JASSERT_ERRNO); JASSERT(_real_ptrace(PTRACE_SETOPTIONS, inferior, 0, inferiors[i]->getPtraceOptions()) != -1) (GETTID()) (inferior) (inferiors[i]->getPtraceOptions()) (JASSERT_ERRNO); // Run all user threads until the end of syscall(DMTCP_FAKE_SYSCALL) dmtcp::PtraceInfo::instance().processPreResumeAttach(inferior); ptrace_wait_for_inferior_to_reach_syscall(inferior, DMTCP_FAKE_SYSCALL); } } // Attach to and run all user ckpthreads until the end of syscall(DMTCP_FAKE_SYSCALL) for (size_t i = 0; i < inferiors.size(); i++) { inferior = inferiors[i]->tid(); if (inferiors[i]->isCkptThread()) { JASSERT(_real_ptrace(PTRACE_ATTACH, inferior, 0, 0) != -1) (GETTID()) (inferior) (JASSERT_ERRNO); JASSERT(_real_wait4(inferior, &status, __WALL, NULL) != -1) (inferior) (JASSERT_ERRNO); JASSERT(_real_ptrace(PTRACE_SETOPTIONS, inferior, 0, inferiors[i]->getPtraceOptions()) != -1) (GETTID()) (inferior) (inferiors[i]->getPtraceOptions()) (JASSERT_ERRNO); // Wait for all inferiors to execute dummy syscall 'DMTCP_FAKE_SYSCALL'. dmtcp::PtraceInfo::instance().processPreResumeAttach(inferior); ptrace_wait_for_inferior_to_reach_syscall(inferior, DMTCP_FAKE_SYSCALL); } } // Singlestep all user threads out of the signal handler for (size_t i = 0; i < inferiors.size(); i++) { int lastCmd = inferiors[i]->lastCmd(); inferior = inferiors[i]->tid(); if (!inferiors[i]->isCkptThread()) { /* After attach, the superior needs to singlestep the inferior out of * stopthisthread, aka the signal handler. */ ptrace_single_step_thread(inferiors[i], isRestart); if (inferiors[i]->isStopped() && (lastCmd == PTRACE_CONT || lastCmd == PTRACE_SYSCALL)) { JASSERT(_real_ptrace(lastCmd, inferior, 0, 0) != -1) (GETTID()) (inferior) (JASSERT_ERRNO); } } } // Move ckpthreads to next step (depending on state) for (size_t i = 0; i < inferiors.size(); i++) { int lastCmd = inferiors[i]->lastCmd(); inferior = inferiors[i]->tid(); if (inferiors[i]->isCkptThread() && !inferiors[i]->isStopped() && (lastCmd == PTRACE_CONT || lastCmd == PTRACE_SYSCALL)) { JASSERT(_real_ptrace(lastCmd, inferior, 0, 0) != -1) (GETTID()) (inferior) (JASSERT_ERRNO); } } JTRACE("thread done") (GETTID()); }