static void processDmtcpCommands(dmtcp::string programName, dmtcp::vector<dmtcp::string>& args) { JASSERT (programName == "dmtcp_coordinator" || programName == "dmtcp_checkpoint" || programName == "dmtcp_restart" || programName == "dmtcp_command" || programName == "mtcp_restart"); //make sure coordinator connection is closed _real_close ( PROTECTED_COORD_FD ); /* * When running gdb or any shell which does a waitpid() on the child * processes, executing dmtcp_command from within gdb session / shell results * in process getting hung up because: * gdb shell dmtcp_command -c => hangs because gdb forks off a new process * and it does a waitpid (in which we block signals) ... */ if (programName == "dmtcp_command") { pid_t cpid = _real_fork(); JASSERT (cpid != -1); if (cpid != 0) { _real_exit(0); } } //now repack args char** argv = new char*[args.size() + 1]; memset ( argv, 0, sizeof ( char* ) * ( args.size() + 1 ) ); for ( size_t i=0; i< args.size(); ++i ) { argv[i] = ( char* ) args[i].c_str(); } JNOTE ( "re-running without checkpointing" ) ( programName ); //now re-call the command restoreUserLDPRELOAD(); _real_execvp ( jalib::Filesystem::GetProgramPath().c_str(), argv ); //should be unreachable JASSERT ( false ) (jalib::Filesystem::GetProgramPath()) ( argv[0] ) ( JASSERT_ERRNO ) .Text ( "exec() failed" ); }
void SetupSessions() { for (size_t j = 0; j < roots.size(); j++) { roots[j].t->setupSessions(); } for (size_t i = 0; i < roots.size(); i++) { for (size_t j = 0; j < roots.size(); j++) { if (i == j) continue; pid_t sid; if ((sid = (roots[i].t)->checkDependence(roots[j].t)) >= 0) { // it2 depends on it1 JTRACE("Root target j depends on Root target i") (i) (roots[i].t->upid()) (j) (roots[j].t->upid()); (roots[i].t)->addRoot(roots[j].t, sid); roots[j].indep = false; } } } }
void dmtcp::DmtcpWorker::sendPeerLookupRequest (dmtcp::vector<TcpConnectionInfo>& conInfoTable ) { for (int i = 0; i < conInfoTable.size(); ++i) { DmtcpMessage msg; msg.type = DMT_PEER_LOOKUP; msg.localAddr = conInfoTable[i].localAddr(); msg.remoteAddr = conInfoTable[i].remoteAddr(); msg.localAddrlen = conInfoTable[i].addrlen(); msg.conId = conInfoTable[i].conId(); _coordinatorSocket << msg; } }
void dmtcp::FileConnList::remapShmMaps() { for (size_t i = 0; i < shmAreas.size(); i++) { Util::ProcMapsArea *area = &shmAreas[i]; FileConnection *fileCon = shmAreaConn[i]; int fd = fileCon->getFds()[0]; JTRACE("Restoring shared memory area") (area->name) (area->addr); void *addr = _real_mmap(area->addr, area->size, area->prot, MAP_FIXED | area->flags, fd, area->offset); JASSERT(addr != MAP_FAILED) (area->flags) (area->prot) (JASSERT_ERRNO) .Text("mmap failed"); _real_close(fd); processClose(fd); } shmAreas.clear(); shmAreaConn.clear(); }
static dmtcp::vector<const char*> patchUserEnv (dmtcp::vector<dmtcp::string> &envp) { dmtcp::vector<const char*> envVect; const char *userPreloadStr = NULL; envVect.clear(); JASSERT(envVect.size() == 0); dmtcp::ostringstream out; out << "non-DMTCP env vars:\n"; for ( size_t i = 0; i < envp.size(); i++) { if ( isImportantEnv ( envp[i].c_str() ) ) { if (dbg) { out << " skipping: " << envp[i] << '\n'; } continue; } if (dmtcp::Util::strStartsWith(envp[i], "LD_PRELOAD=")) { userPreloadStr = envp[i].c_str() + strlen("LD_PRELOAD="); continue; } envVect.push_back (envp[i].c_str()); if(dbg) { out << " addenv[user]:" << envVect.back() << '\n'; } } JTRACE ( "Creating a copy of (non-DMTCP) user env vars..." ) (out.str()); //pack up our ENV into the new ENV out.str("DMTCP env vars:\n"); for ( size_t i=0; i<ourImportantEnvsCnt; ++i ) { const char* v = getenv ( ourImportantEnvs[i] ); if ( v != NULL ) { envp.push_back ( dmtcp::string ( ourImportantEnvs[i] ) + '=' + v ); const char *ptr = envp.back().c_str(); JASSERT(ptr != NULL); envVect.push_back(ptr); if(dbg) { out << " addenv[dmtcp]:" << envVect.back() << '\n'; } } } dmtcp::string ldPreloadStr = "LD_PRELOAD="; ldPreloadStr += getUpdatedLdPreload(userPreloadStr); envp.push_back(ldPreloadStr); envVect.push_back(envp.back().c_str()); if(dbg) { out << " addenv[dmtcp]:" << envVect.back() << '\n'; } JTRACE ( "patching user envp..." ) (out.str()); envVect.push_back ( NULL ); JTRACE ( "Done patching environ" ); return envVect; }
void ProcessGroupInfo() { map<pid_t,session> smap; map<pid_t,session>::iterator it; // 1. divide processes into sessions and groups for (size_t j = 0; j < targets.size(); j++) { ProcessInfo& processInfo = targets[j].getProcessInfo(); JTRACE("Process ") (processInfo.pid()) (processInfo.ppid()) (processInfo.sid()) (processInfo.gid()) (processInfo.fgid()) (processInfo.isRootOfProcessTree()); pid_t sid = processInfo.sid(); pid_t gid = processInfo.gid(); //pid_t fgid = processInfo.fgid(); /* // If Group ID doesn't belong to known PIDs, indicate that fact // using -1 value. if (!virtualPidTable.pidExists(gid)) { JTRACE("DROP gid")(gid); virtualPidTable.setgid(-1); gid = -1; } // If foreground Group ID not belongs to known PIDs, // indicate that fact using -1 value. if (!virtualPidTable.pidExists(fgid)) { JTRACE("DROP fgid")(fgid); virtualPidTable.setfgid(-1); fgid = -1; } */ session &s = smap[sid]; // if this is first element of this session if (s.sid == -2) { s.sid = sid; } ProcessGroup &g = smap[sid].groups[gid]; // if this is first element of Group gid if (g.gid == -2) { g.gid = gid; } g.targets.push_back(&targets[j]); } // 2. Check if foreground setting is correct it = smap.begin(); for(;it != smap.end();it++) { session &s = it->second; session::group_it g_it = s.groups.begin(); pid_t fgid = -2; if (s.sid == -1) // skip default bash session all processes will join continue; for(; g_it != s.groups.end();g_it++) { ProcessGroup &g = g_it->second; for(size_t k = 0; k < g.targets.size(); k++) { ProcessInfo& processInfo = g.targets[k]->getProcessInfo(); pid_t cfgid = processInfo.fgid(); if (fgid == -2) { fgid = cfgid; } else if (fgid != -1 && cfgid != -1 && fgid != cfgid) { dmtcp::ostringstream o; // DEBUG PRINTOUT: { session::group_it g_it1 = s.groups.begin(); for(; g_it1 != s.groups.end();g_it1++) { ProcessGroup &g1 = g_it1->second; for(size_t m = 0; m < g1.targets.size() ;m++) { ProcessInfo& pInfo = g1.targets[m]->getProcessInfo(); pid_t pid = pInfo.pid(); pid_t ppid = pInfo.ppid(); pid_t sid = pInfo.sid(); pid_t cfgid = pInfo.fgid(); o << "\n\tPID=" << pid << " PPID=" << ppid << ", SID=" << sid << " <--> FGID = " << cfgid; } } } JASSERT (false) (fgid) (cfgid) (o.str()) .Text("processes from same session have different " "foreground Group ID"); } } JTRACE("Checked ") (fgid); } s.fgid = fgid; if (s.groups.find(s.fgid) == s.groups.end()) { // foreground Group is missing, don't need to change foreground Group s.fgid = -1; } { session::group_it g_it1 = s.groups.begin(); for(; g_it1 != s.groups.end();g_it1++) { ProcessGroup &g1 = g_it1->second; for(size_t m = 0; m < g1.targets.size(); m++) { ProcessInfo& processInfo = g1.targets[m]->getProcessInfo(); pid_t pid = processInfo.pid(); pid_t cfgid = processInfo.fgid(); JTRACE("PID=%d <--> FGID = %d") (pid) (cfgid); } } } } // Print out session mapping. JTRACE("Session number:") (smap.size()); it = smap.begin(); for(; it != smap.end(); it++) { session &s = it->second; JTRACE("Session printout:") (s.sid) (s.fgid) (s.upid.toString().c_str()); session::group_it g_it = s.groups.begin(); for(; g_it != s.groups.end();g_it++) { ProcessGroup &g = g_it->second; JTRACE("\tGroup ID: ") (g.gid); } } }
void BuildProcessTree() { for (size_t j = 0; j < targets.size(); ++j) { ProcessInfo& processInfo = targets[j].getProcessInfo(); if (processInfo.isRootOfProcessTree() == true) { // If this process is independent (root of process tree RootTarget rt; rt.t = &targets[j]; rt.indep = true; roots.push_back(rt); targets[j].markUsed(); } else if (!targets[j].isMarkedUsed()) { // We set used flag if we use target as somebody's child. // If it is used, then there is no need to check if it is root. // Iterate through all targets and try to find the one who has // this process as their child process. JTRACE("Process is not root of process tree: try to find if it has parent"); bool is_root = true; for (size_t i = 0; i < targets.size(); i++) { if (i == j) continue; ProcessInfo &pInfo = targets[i].getProcessInfo(); ProcessInfo::iterator it; // Search inside the child list of target[j], make sure that i != j for (it = pInfo.begin(); (it != pInfo.end()); it++) { UniquePid& childUniquePid = it->second; JTRACE("Check child") (childUniquePid) (" parent ") (targets[i].upid()) ("checked ") (targets[j].upid()); if (childUniquePid == targets[j].upid()) { is_root = false; break; } } } JTRACE("Root detection:") (is_root) (targets[j].upid()); if (is_root) { RootTarget rt; rt.t = &targets[j]; rt.indep = true; roots.push_back(rt); targets[j].markUsed(); } } // Add all children ProcessInfo::iterator it; for(it = processInfo.begin(); it != processInfo.end(); it++) { // find target bool found = false; pid_t childVirtualPid = it->first; UniquePid& childUniquePid = it->second; for (size_t i = 0; i < targets.size(); i++) { if (childUniquePid == targets[i].upid()) { found = 1; JTRACE ("Add child to current target") (targets[j].upid()) (childUniquePid); targets[i].markUsed(); targets[j].addChild(&targets[i]); } } if (!found) { JTRACE("Child not found")(childVirtualPid); processInfo.eraseChild(childVirtualPid); } } } }
int main(int argc, char** argv) { bool autoStartCoordinator=true; bool isRestart = true; int allowedModes = dmtcp::DmtcpCoordinatorAPI::COORD_ANY; initializeJalib(); if (!getenv(ENV_VAR_QUIET)) { setenv(ENV_VAR_QUIET, "0", 0); } if (argc == 1) { JASSERT_STDERR << DMTCP_VERSION_AND_COPYRIGHT_INFO; JASSERT_STDERR << "(For help: " << argv[0] << " --help)\n\n"; return DMTCP_FAIL_RC; } //process args shift; while (true) { dmtcp::string s = argc>0 ? argv[0] : "--help"; if (s == "--help" && argc == 1) { JASSERT_STDERR << theUsage; return DMTCP_FAIL_RC; } else if ((s == "--version") && argc == 1) { JASSERT_STDERR << DMTCP_VERSION_AND_COPYRIGHT_INFO; return DMTCP_FAIL_RC; } else if (s == "--no-check") { autoStartCoordinator = false; shift; } else if (s == "-j" || s == "--join") { allowedModes = dmtcp::DmtcpCoordinatorAPI::COORD_JOIN; shift; } else if (s == "-n" || s == "--new") { allowedModes = dmtcp::DmtcpCoordinatorAPI::COORD_NEW; shift; } else if (s == "--new-coordinator") { allowedModes = dmtcp::DmtcpCoordinatorAPI::COORD_FORCE_NEW; shift; } else if (s == "-b" || s == "--batch") { allowedModes = dmtcp::DmtcpCoordinatorAPI::COORD_BATCH; shift; } else if (s == "-i" || s == "--interval" || (s.c_str()[0] == '-' && s.c_str()[1] == 'i' && isdigit(s.c_str()[2]))) { if (isdigit(s.c_str()[2])) { // if -i5, for example setenv(ENV_VAR_CKPT_INTR, s.c_str()+2, 1); shift; } else { // else -i 5 setenv(ENV_VAR_CKPT_INTR, argv[1], 1); shift; shift; } } else if (argc > 1 && (s == "-h" || s == "--host")) { setenv(ENV_VAR_NAME_HOST, argv[1], 1); shift; shift; } else if (argc > 1 && (s == "-p" || s == "--port")) { setenv(ENV_VAR_NAME_PORT, argv[1], 1); shift; shift; } else if (argc > 1 && (s == "-t" || s == "--tmpdir")) { setenv(ENV_VAR_TMPDIR, argv[1], 1); shift; shift; } else if (s == "-q" || s == "--quiet") { *getenv(ENV_VAR_QUIET) = *getenv(ENV_VAR_QUIET) + 1; // Just in case a non-standard version of setenv is being used: setenv(ENV_VAR_QUIET, getenv(ENV_VAR_QUIET), 1); shift; } else if ((s.length() > 2 && s.substr(0, 2) == "--") || (s.length() > 1 && s.substr(0, 1) == "-")) { JASSERT_STDERR << "Invalid Argument\n"; JASSERT_STDERR << theUsage; return DMTCP_FAIL_RC; } else if (argc > 1 && s == "--") { shift; break; } else { break; } } dmtcp::UniquePid::setTmpDir(getenv(ENV_VAR_TMPDIR)); dmtcpTmpDir = dmtcp::UniquePid::getTmpDir(); jassert_quiet = *getenv(ENV_VAR_QUIET) - '0'; //make sure JASSERT initializes now, rather than during restart Util::initializeLogFile(); if (jassert_quiet == 0) JASSERT_STDERR << DMTCP_BANNER; if (autoStartCoordinator) dmtcp::DmtcpCoordinatorAPI::startCoordinatorIfNeeded(allowedModes, isRestart); JTRACE("New dmtcp_restart process; _argc_ ckpt images") (argc); bool doAbort = false; for (; argc > 0; shift) { dmtcp::string restorename(argv[0]); struct stat buf; int rc = stat(restorename.c_str(), &buf); if (Util::strStartsWith(restorename, "ckpt_") && Util::strEndsWith(restorename, "_files")) { continue; } else if (!Util::strEndsWith(restorename, ".dmtcp")) { JNOTE("File doesn't have .dmtcp extension. Check Usage.") (restorename); JASSERT_STDERR << theUsage; doAbort = true; } else if (rc == -1) { char error_msg[1024]; sprintf(error_msg, "\ndmtcp_restart: ckpt image %s", restorename.c_str()); perror(error_msg); doAbort = true; } else if (buf.st_uid != getuid()) { /*Could also run if geteuid() matches*/ printf("\nProcess uid (%d) doesn't match uid (%lu) of\n" \ "checkpoint image (%s).\n" \ "This is dangerous. Aborting for security reasons.\n" \ "If you still want to do this (at your own risk),\n" \ " then modify dmtcp/src/%s:%d and re-compile.\n", getuid(), buf.st_uid, restorename.c_str(), __FILE__, __LINE__ - 6); doAbort = true; } if (doAbort) { exit(DMTCP_FAIL_RC); } JTRACE("Will restart ckpt image _argv[0]_") (argv[0]); targets.push_back (RestoreTarget (argv[0])); } if (targets.size() <= 0) { JNOTE("ERROR: No DMTCP checkpoint image(s) found. Check Usage."); JASSERT_STDERR << theUsage; exit(DMTCP_FAIL_RC); } // Check that all targets belongs to one computation Group // If not - abort compGroup = targets[0].compGroup(); numPeers = targets[0].numPeers(); for(size_t i=0; i<targets.size(); i++) { JTRACE ("Check targets: ") (targets[i].path()) (targets[i].compGroup()) (targets[i].numPeers()); if (compGroup != targets[i].compGroup()) { JASSERT(false)(compGroup)(targets[i].compGroup()) .Text("ERROR: Restored programs belong to different computation IDs"); } else if (numPeers != targets[i].numPeers()) { JASSERT(false)(numPeers)(targets[i].numPeers()) .Text("ERROR: Different number of processes saved in checkpoint images"); } } SlidingFdTable slidingFd; ConnectionToFds conToFd; ostringstream out; out << "will restore:\n"; out << "\tfd -> connection-id\n"; ConnectionList& connections = ConnectionList::instance(); ConnectionList::iterator it; for (it = connections.begin(); it != connections.end(); ++it) { int fd = slidingFd.getFdFor(it->first); conToFd[it->first].push_back(fd); out << "\t" << fd << " -> " << (it->first) << " -> " << (it->second)->str() << "\n"; } JTRACE ("Allocating fds for Connections") (out.str()); //------------------------ WorkerState::setCurrentState(WorkerState::RESTARTING); ConnectionState ckptCoord(conToFd); DmtcpCoordinatorAPI coordinatorAPI; restoreSockets(coordinatorAPI, ckptCoord); /* Create the file to hold the pid/tid maps. */ openOriginalToCurrentMappingFiles(); #ifndef PID_VIRTUALIZATION int i = (int)targets.size(); //fork into targs.size() processes while (--i > 0) { int cid = fork(); if (cid == 0) break; else JASSERT(cid > 0); } RestoreTarget& targ = targets[i]; JTRACE("forked, restoring process") (i) (targets.size()) (targ.upid()) (getpid()); //change UniquePid UniquePid::resetOnFork(targ.upid()); //Reconnect to dmtcp_coordinator WorkerState::setCurrentState (WorkerState::RESTARTING); int tmpCoordFd = dup(PROTECTED_COORD_FD); JASSERT(tmpCoordFd != -1); coordinatorAPI.connectToCoordinator(); coordinatorAPI.sendCoordinatorHandshake(targ.procname(), targ.compGroup()); coordinatorAPI.recvCoordinatorHandshake(); close(tmpCoordFd); //restart targets[i] targets[i].dupAllSockets (slidingFd); targets[i].mtcpRestart(); JASSERT(false).Text("unreachable"); return -1; #endif //size_t i = targets.size(); // Create roots vector, assign children to their parents. // Delete children that don't exist. BuildProcessTree(); // Process all checkpoints to find one of them that can switch // needed Group to foreground. ProcessGroupInfo(); // Create session meta-information in each node of the process tree. // Node contains info about all sessions which exists at lower levels. // Also node is aware of session leader existence at lower levels. SetupSessions(); int pgrp_index=-1; JTRACE("Creating ROOT Processes") (roots.size()); for (size_t j = 0 ; j < roots.size(); ++j) { if (roots[j].indep == false) { // We will restore this process from one of the independent roots. continue; } if (pgrp_index == -1 && !roots[j].t->isInitChild()) { pgrp_index = j; continue; } pid_t cid = fork(); if (cid == 0) { JTRACE ("Root of process tree") (getpid()) (getppid()); if (roots[j].t->isInitChild()) { JTRACE ("Create init-child process") (getpid()) (getppid()); if (fork()) _exit(0); } roots[j].t->CreateProcess(coordinatorAPI, slidingFd); JASSERT (false) .Text("Unreachable"); } JASSERT (cid > 0); if (roots[j].t->isInitChild()) { waitpid(cid, NULL, 0); } } JTRACE("Restore processes without corresponding Root Target"); int flat_index = -1; size_t j = 0; if (pgrp_index < 0) { // No root processes at all // Find first flat process that can replace currently running // dmtcp_restart context. for (j = 0; j < targets.size(); ++j) { if (!targets[j].isMarkedUsed()) { // Save first flat-like process to be restored after all others flat_index = j; j++; break; } } } // Use j set to 0 (if at least one root non-init-child process exists), // or else j set to some value if no such process found. for(; j < targets.size(); ++j) { if (!targets[j].isMarkedUsed()) { if (pgrp_index < 0) { // Save first flat-like process to be restored after all others pgrp_index = j; continue; } else { targets[j].CreateProcess(coordinatorAPI, slidingFd); JTRACE("Need in flat-like restore for process") (targets[j].upid()); } } } if (pgrp_index >= 0) { JTRACE("Restore first Root Target")(roots[pgrp_index].t->upid()); roots[pgrp_index].t->CreateProcess(coordinatorAPI, slidingFd); } else if (flat_index >= 0) { JTRACE("Restore first Flat Target")(targets[flat_index].upid()); targets[flat_index].CreateProcess(coordinatorAPI, slidingFd); } else { // FIXME: Under what conditions will this path be exercised? JNOTE ("unknown type of target?") (targets[flat_index].path()); } // #endif }