static void runCoordinatorCmd(char c, int *coordCmdStatus = NULL, int *numPeers = NULL, int *isRunning = NULL) { _dmtcp_lock(); { CoordinatorAPI coordinatorAPI; dmtcp_disable_ckpt(); coordinatorAPI.connectAndSendUserCommand(c, coordCmdStatus, numPeers, isRunning); dmtcp_enable_ckpt(); } _dmtcp_unlock(); }
int main ( int argc, char** argv ) { bool quiet = false; dmtcp::string interval = ""; dmtcp::string request = "h"; initializeJalib(); Util::initializeLogFile(); //process args shift; while(argc>0){ dmtcp::string s = argv[0]; if((s=="--help" || s=="-h") && argc==1){ fprintf(stderr, theUsage, ""); return 1; } else if ((s=="--version") && argc==1){ JASSERT_STDERR << DMTCP_VERSION_AND_COPYRIGHT_INFO; return 1; }else if(argc>1 && (s == "-h" || s == "--host")){ setenv(ENV_VAR_NAME_HOST, argv[1], 1); shift; shift; }else if(argc>1 && (s == "-p" || s == "--port")){ setenv(ENV_VAR_NAME_PORT, argv[1], 1); shift; shift; }else if(s == "--quiet"){ quiet = true; shift; }else if(s == "h" || s == "-h" || s == "--help" || s == "?"){ fprintf(stderr, theUsage, ""); return 1; }else{ // else it's a request char* cmd = argv[0]; //ignore leading dashes while(*cmd == '-') cmd++; s = cmd; if(*cmd == 'b' && *(cmd+1) != 'c'){ // If blocking ckpt, next letter must be 'c'; else print the usage fprintf(stderr, theUsage, ""); return 1; } else if (*cmd == 's' || *cmd == 'i' || *cmd == 'c' || *cmd == 'b' || *cmd == 'f' || *cmd == 'k' || *cmd == 'q') { request = s; if (*cmd == 'i') { if (isdigit(cmd[1])) { // if -i5, for example interval = cmd+1; } else { // else -i 5 if (argc == 1) { fprintf(stderr, theUsage, ""); return 1; } interval = argv[1]; shift; } } shift; }else{ fprintf(stderr, theUsage, ""); return 1; } } } if (! quiet) printf( "DMTCP-" PACKAGE_VERSION " (+ MTCP), Copyright (C) 2006-2011" " Jason Ansel, Michael Rieker,\n" " Kapil Arya, and Gene Cooperman\n" "This program comes with ABSOLUTELY NO WARRANTY.\n" "This is free software, and you are welcome to redistribute it\n" "under certain conditions; see COPYING file for details.\n" "(Use flag \"--quiet\" to hide this message.)\n\n"); int coordErrorCode = CoordinatorAPI::NOERROR; int numPeers; int isRunning; CoordinatorAPI coordinatorAPI; char *cmd = (char *)request.c_str(); switch (*cmd) { case 'h': fprintf(stderr, theUsage, ""); return 1; case 'i': setenv(ENV_VAR_CKPT_INTR, interval.c_str(), 1); coordinatorAPI.connectAndSendUserCommand(*cmd, &coordErrorCode); printf("Interval changed to %s\n", interval.c_str()); break; case 'b': // blocking prefix coordinatorAPI.connectAndSendUserCommand(*cmd, &coordErrorCode); // actual command coordinatorAPI.connectAndSendUserCommand(*(cmd+1), &coordErrorCode); break; case 's': coordinatorAPI.connectAndSendUserCommand(*cmd, &coordErrorCode, &numPeers, &isRunning); case 'c': case 'f': case 'k': case 'q': coordinatorAPI.connectAndSendUserCommand(*cmd, &coordErrorCode); break; } //check for error if (coordErrorCode != CoordinatorAPI::NOERROR) { switch(coordErrorCode){ case CoordinatorAPI::ERROR_COORDINATOR_NOT_FOUND: if (getenv("DMTCP_PORT")) fprintf(stderr, "Coordinator not found. Please check port and host.\n"); else fprintf(stderr, "Coordinator not found. Try specifying port with \'--port\'.\n"); break; case CoordinatorAPI::ERROR_INVALID_COMMAND: fprintf(stderr, "Unknown command: %c, try 'dmtcp_command --help'\n", *cmd); break; case CoordinatorAPI::ERROR_NOT_RUNNING_STATE: fprintf(stderr, "Error, computation not in running state." " Either a checkpoint is\n" " currently happening or there are no connected processes.\n"); break; default: fprintf(stderr, "Unknown error\n"); break; } return 2; } if(*cmd == 's'){ if (getenv(ENV_VAR_NAME_HOST)) printf(" Host: %s\n", getenv(ENV_VAR_NAME_HOST)); printf(" Port: %s\n", getenv(ENV_VAR_NAME_PORT)); printf("Status...\n"); printf("NUM_PEERS=%d\n", numPeers); printf("RUNNING=%s\n", (isRunning?"yes":"no")); } return 0; }
int main ( int argc, char** argv ) { string interval = ""; string request = "h"; initializeJalib(); // No need to initialize the log file. // Util::initializeLogFile(); //process args shift; while(argc>0){ string s = argv[0]; if((s=="--help" || s=="-h") && argc==1){ printf("%s", theUsage); return 1; } else if ((s=="--version") && argc==1){ printf("%s", DMTCP_VERSION_AND_COPYRIGHT_INFO); return 1; }else if(argc>1 && (s == "-h" || s == "--coord-host" || s == "--host")){ setenv(ENV_VAR_NAME_HOST, argv[1], 1); shift; shift; } else if (argc>1 && (s == "-p" || s == "--coord-port" || s == "--port")) { setenv(ENV_VAR_NAME_PORT, argv[1], 1); shift; shift; } else if (argv[0][0] == '-' && argv[0][1] == 'p' && isdigit(argv[0][2])) { // else if -p0, for example setenv(ENV_VAR_NAME_PORT, argv[0]+2, 1); shift; }else if(s == "h" || s == "-h" || s == "--help" || s == "?"){ fprintf(stderr, theUsage, ""); return 1; }else{ // else it's a request char* cmd = argv[0]; //ignore leading dashes while(*cmd == '-') cmd++; s = cmd; if((*cmd == 'b' || *cmd == 'x') && *(cmd+1) != 'c'){ // If blocking ckpt, next letter must be 'c'; else print the usage fprintf(stderr, theUsage, ""); return 1; } else if (*cmd == 's' || *cmd == 'i' || *cmd == 'c' || *cmd == 'b' || *cmd == 'x' || *cmd == 'k' || *cmd == 'q') { request = s; if (*cmd == 'i') { if (isdigit(cmd[1])) { // if -i5, for example interval = cmd+1; } else { // else -i 5 if (argc == 1) { fprintf(stderr, theUsage, ""); return 1; } interval = argv[1]; shift; } } shift; }else{ fprintf(stderr, theUsage, ""); return 1; } } } int coordCmdStatus = CoordCmdStatus::NOERROR; int numPeers; int isRunning; int ckptInterval; CoordinatorAPI coordinatorAPI; char *cmd = (char *)request.c_str(); switch (*cmd) { case 'h': fprintf(stderr, theUsage, ""); return 1; case 'i': setenv(ENV_VAR_CKPT_INTR, interval.c_str(), 1); coordinatorAPI.connectAndSendUserCommand(*cmd, &coordCmdStatus); printf("Interval changed to %s\n", interval.c_str()); break; case 'b': case 'x': // blocking prefix coordinatorAPI.connectAndSendUserCommand(*cmd, &coordCmdStatus); // actual command coordinatorAPI.connectAndSendUserCommand(*(cmd+1), &coordCmdStatus); break; case 's': coordinatorAPI.connectAndSendUserCommand(*cmd, &coordCmdStatus, &numPeers, &isRunning, &ckptInterval); case 'c': case 'k': case 'q': coordinatorAPI.connectAndSendUserCommand(*cmd, &coordCmdStatus); break; } //check for error if (coordCmdStatus != CoordCmdStatus::NOERROR) { switch(coordCmdStatus){ case CoordCmdStatus::ERROR_COORDINATOR_NOT_FOUND: if (getenv("DMTCP_COORD_PORT") || getenv("DMTCP_PORT")) fprintf(stderr, "Coordinator not found. Please check port and host.\n"); else fprintf(stderr, "Coordinator not found. Try specifying port with \'--port\'.\n"); break; case CoordCmdStatus::ERROR_INVALID_COMMAND: fprintf(stderr, "Unknown command: %c, try 'dmtcp_command --help'\n", *cmd); break; case CoordCmdStatus::ERROR_NOT_RUNNING_STATE: fprintf(stderr, "Error, computation not in running state." " Either a checkpoint is\n" " currently happening or there are no connected processes.\n"); break; default: fprintf(stderr, "Unknown error\n"); break; } return 2; } #define QUOTE(arg) #arg #define STRINGIFY(arg) QUOTE(arg) if(*cmd == 's'){ printf("Coordinator:\n"); char *host = getenv(ENV_VAR_NAME_HOST); if (host == NULL) host = getenv("DMTCP_HOST"); // deprecated printf(" Host: %s\n", (host ? host : "localhost")); char *port = getenv(ENV_VAR_NAME_PORT); if (port == NULL) port = getenv("DMTCP_PORT"); // deprecated printf(" Port: %s\n", (port ? port : STRINGIFY(DEFAULT_PORT) " (default port)")); printf("Status...\n"); printf(" NUM_PEERS=%d\n", numPeers); printf(" RUNNING=%s\n", (isRunning?"yes":"no")); if (ckptInterval) { printf(" CKPT_INTERVAL=%d\n", ckptInterval); } else { printf(" CKPT_INTERVAL=0 (checkpoint manually)\n"); } } return 0; }
void RestoreTarget::CreateProcess(CoordinatorAPI& coordinatorAPI, SlidingFdTable& slidingFd) { //change UniquePid UniquePid::resetOnFork(upid()); //UniquePid::ThisProcess(true) = _conToFd.upid(); Util::initializeLogFile(procname()); JTRACE("Creating process during restart") (upid()) (procname()); JTRACE("")(getpid())(getppid())(getsid(0)); ProcessInfo &pInfo = _processInfo; pid_t psid = pInfo.sid(); JTRACE("Restore /proc/self/* fds"); ConnectionList& connections = ConnectionList::instance(); ConnectionList::iterator it; for (it = connections.begin(); it != connections.end(); ++it) { dmtcp::Connection *con = it->second; if (con->subType() == FileConnection::FILE_PROCFS) { dmtcp::FileConnection *filecon = (dmtcp::FileConnection*) con; char buf[32]; dmtcp::vector<int> fds; fds.push_back(slidingFd.getFdFor(con->id())); sprintf(buf, "/proc/%d/", pInfo.pid()); if (dmtcp::Util::strStartsWith(filecon->filePath(), buf)) { filecon->restore(fds); } } } if (!isSessionLeader()) { // Restore Group information restoreGroup(slidingFd); // If process is not session leader, restore it and all children. t_iterator it = _children.begin(); for (; it != _children.end(); it++) { JTRACE ("Forking Child Process") ((*it)->upid()); pid_t cid = fork(); if (cid == 0) { (*it)->CreateProcess (coordinatorAPI, slidingFd); JASSERT (false) . Text ("Unreachable"); } JASSERT (cid > 0); } } else { // Process is session leader. // There may be not setsid-ed children. for (t_iterator it = _children.begin(); it != _children.end(); it++) { s_iterator sit = (*it)->getSmap().find(psid); JTRACE("Restore processes that were created before their parent called setsid()"); if (sit == (*it)->getSmap().end()) { JTRACE ("Forking Child Process") ((*it)->upid()); pid_t cid = fork(); if (cid == 0) { (*it)->CreateProcess (coordinatorAPI, slidingFd); JASSERT (false) . Text ("Unreachable"); } JASSERT (cid > 0); } } pid_t nsid = setsid(); JTRACE("change SID")(nsid); // Restore Group information restoreGroup(slidingFd); for (t_iterator it = _children.begin(); it != _children.end(); it++) { JTRACE("Restore processes that was created after their parent called setsid()"); s_iterator sit = (*it)->getSmap().find(psid); if (sit != (*it)->getSmap().end()) { JTRACE ("Forking Child Process") ((*it)->upid()); pid_t cid = fork(); if (cid == 0) { (*it)->CreateProcess (coordinatorAPI, slidingFd); JASSERT (false) . Text ("Unreachable"); } JASSERT (cid> 0); } } for (t_iterator it = _roots.begin() ; it != _roots.end(); it++) { JTRACE ("Forking Dependent Root Process") ((*it)->upid()); pid_t cid; if ((cid = fork())) { waitpid(cid, NULL, 0); } else { if (fork()) exit(0); (*it)->CreateProcess(coordinatorAPI, slidingFd); JASSERT (false) . Text("Unreachable"); } } } bool isTheGroupLeader = isGroupLeader(); // Calls JTRACE;avoid recursion JTRACE("Child and dependent root processes forked, restoring process") (upid())(getpid())(isTheGroupLeader); //Reconnect to dmtcp_coordinator WorkerState::setCurrentState (WorkerState::RESTARTING); coordinatorAPI.connectToCoordinator(); coordinatorAPI.sendCoordinatorHandshake(procname(), _processInfo.compGroup()); coordinatorAPI.recvCoordinatorHandshake(); //restart targets[i] dupAllSockets (slidingFd); mtcpRestart(); JASSERT (false).Text ("unreachable"); }