예제 #1
0
// See comments above for open_ckpt_to_read()
int dmtcp::CkptSerializer::openDmtcpCheckpointFile(const dmtcp::string& path,
                                                   int *offset,
                                                   int skipBytes)
{
  char buf[1024];
  // Function also sets dmtcp::ext_decomp_pid::ConnectionToFds
  int fd = open_ckpt_to_read(path.c_str());
  // The rest of this function is for compatibility with original definition.
  JASSERT(fd >= 0) (path) .Text("Failed to open file.");
  const int len = strlen(DMTCP_FILE_HEADER);
  JASSERT(_real_read(fd, buf, len) == len)(path) .Text("_real_read() failed");
  if (strncmp(buf, DMTCP_FILE_HEADER, len) == 0) {
    JTRACE("opened checkpoint file [uncompressed]")(path);
  } else {
    close_ckpt_to_read(fd);
    fd = open_ckpt_to_read(path.c_str()); /* Re-open from beginning */
    JASSERT(fd >= 0) (path) .Text("Failed to open file.");
  }

  if (offset != NULL) {
    *offset = strlen(DMTCP_FILE_HEADER);
  }

  skipBytes -= strlen(DMTCP_FILE_HEADER);
  if (skipBytes > 0) {
    JASSERT(dmtcp::Util::skipBytes(fd, skipBytes) == skipBytes) (skipBytes);
  }
  return fd;
}
예제 #2
0
bool isTorqueFile(dmtcp::string relpath, dmtcp::string &path)
{
  JTRACE("Start");
  switch( rmgr_type ){
  case Empty:
    probeTorque();
    if( rmgr_type != torque )
      return false;
    break;
  case torque:
    break;
  default:
    return false;
  }

  if( torque_home().size() == 0 )
    return false;

  dmtcp::string abspath = torque_home() + "/" + relpath;
  JTRACE("Compare path with")(path)(abspath);
  if( path.size() < abspath.size() )
    return false;

  if( path.substr(0,abspath.size()) == abspath )
    return true;

  return false;
}
예제 #3
0
static void rem_trailing_slash(dmtcp::string &path)
{
    size_t i = path.size() - 1;
    while( (path[i] == ' ' || path[i] == '/' || path == "\\" ) && i>0 )
      i--;
    if( i+1 < path.size() )
      path = path.substr(0,i+1);
}
예제 #4
0
static bool isImportantEnv ( dmtcp::string str )
{
  str = str.substr(0, str.find("="));

  for ( size_t i=0; i<ourImportantEnvsCnt; ++i ) {
    if ( str == ourImportantEnvs[i] )
      return true;
  }
  return false;
}
예제 #5
0
bool isTorqueStderr(dmtcp::string &path)
{
  if( !isTorqueIOFile(path) )
    return false;

  dmtcp::string suffix = ".ER";

  if( (path.substr(path.size() - suffix.size()) == suffix) ){
    return true;
  }

  return false;
}
예제 #6
0
static void clear_path(dmtcp::string &path)
{
  size_t i;
  for(i=0;i<path.size();i++){
    if( path[i] == '/' || path[i] == '\\' ){
      size_t j = i+1;
      while( (path[j] == '/' || path[j] == '\\') && j < path.size() ){
        j++;
      }
      if( j != i+1 ){
        path.erase(i+1,j-(i+1));
      }
    }
  }
}
int openSharedFile(dmtcp::string name, int flags)
{
  int fd;
  // try to create, truncate & open file
  if ((fd = open(name.c_str(), O_EXCL|O_CREAT|O_TRUNC | flags, 0600)) >= 0) {
    return fd;
  }
  if (fd < 0 && errno == EEXIST) {
    if ((fd = open(name.c_str(), flags, 0600)) > 0) {
      return fd;
    }
  }
  // unable to create & open OR open
  JASSERT(false)(name)(strerror(errno)).Text("Cannot open file");
  return -1;
}
예제 #8
0
const DmtcpLocalStatus* __real_dmtcpGetLocalStatus(){
  //these must be static so their memory is not deleted.
  static dmtcp::string ckpt;
  static dmtcp::string pid;
  static DmtcpLocalStatus status;
  ckpt.reserve(1024);

  //get filenames
  pid=dmtcp::UniquePid::ThisProcess().toString();
  ckpt=dmtcp::UniquePid::getCkptFilename();

  status.numCheckpoints          = numCheckpoints;
  status.numRestarts             = numRestarts;
  status.checkpointFilename      = ckpt.c_str();
  status.uniquePidStr            = pid.c_str();
  return &status;
}
예제 #9
0
static int queryPbsConfig(dmtcp::string option, dmtcp::string &pbs_config)
{
  int fds[2];
  const char *pbs_config_path = "pbs-config";
  static const char *pbs_config_args[] = { "pbs-config", option.c_str(), NULL };
  int cpid;

  if( pipe(fds) == -1){
    // just go away - we cannot serve this request
    JTRACE("Cannot create pipe to execute pbs-config to find Torque/PBS library!");
    return -1;
  }

  cpid = _real_fork();

  if( cpid < 0 ){
    JTRACE( "ERROR: cannot execute pbs-config. Will not run tm_spawn!");
    return -1;
  }
  if( cpid == 0 ){
    JTRACE ( "child process, will exec into external de-compressor");
    fds[1] = dup(dup(dup(fds[1])));
    close(fds[0]);
    JASSERT(dup2(fds[1], STDOUT_FILENO) == STDOUT_FILENO);
    close(fds[1]);
    _real_execvp(pbs_config_path, (char **)pbs_config_args);
    /* should not get here */
    JASSERT(false)("ERROR: Failed to exec pbs-config. tm_spawn will fail with TM_BADINIT")(strerror(errno));
    exit(0);
  }

  /* parent process */
  JTRACE ( "created child process for pbs-config")(cpid);
  int status;
  if( waitpid(cpid,&status,0) < 0 ){
    return -1;
  }
  if( !( WIFEXITED(status) && WEXITSTATUS(status) == 0 ) ){
    return -1;
  }

  // set descriptor as non-blocking
  // JTRACE ( "Set pipe fds[0] as non-blocking");
  int flags = fcntl(fds[0], F_GETFL);
  fcntl(fds[0], F_SETFL, flags | O_NONBLOCK);

  //JTRACE ( "Read pbs-config output from pipe");
  pbs_config = "";
  char buf[256];
  int count = 0;
  while( (count = read(fds[0], buf, 255)) > 0 ){
    buf[count] = '\0';
    pbs_config += dmtcp::string() + buf;
  }

  JTRACE ( "pbs-config output:")(pbs_config);
  return 0;
}
예제 #10
0
bool isTorqueHomeFile(dmtcp::string &path)
{
  // check if file is in home directory
  char *ptr;
  dmtcp::string hpath = "";

  if ((ptr = getenv("HOME"))) {
    hpath = dmtcp::string() + ptr;
    JTRACE("Home directory:")(hpath)(path);
  }else{
    JTRACE("Cannot determine user HOME directory!");
    return false;
  }

  if( hpath.size() >= path.size() ){
    JTRACE("Length of path is less than home dir");
    return false;
  }

  if( path.substr(0,hpath.size()) != hpath ){
    JTRACE("prefix of path is not home directory")(path)(hpath);
    return false;
  }

  dmtcp::string suffix1 = ".OU", suffix2 = ".ER";

  if( !( (path.substr(path.size() - suffix1.size()) == suffix1) ||
        (path.substr(path.size() - suffix2.size()) == suffix2) ) ){
    JTRACE("path has no .OU or .ER suffix")(path);
    return false;
  }

  char jobid[256];
  sprintf(jobid,"%lu",torque_jobid);
  dmtcp::string spool_path = hpath + "/.pbs_spool/" + jobid;
  dmtcp::string home_path = hpath + jobid;

  if( path.substr(0,spool_path.size()) == spool_path ){
    JTRACE("File is located in $HOME/.pbs_spool/. It is Torque/PBS stdio file")(path);
    return true;
  }

  if( path.substr(0,home_path.size()) == home_path ){
    JTRACE("File is located in $HOME/. It is Torque/PBS stdio file")(path);
    return true;
  }

  return false;
}
예제 #11
0
int findLibTorque_maps(dmtcp::string &libpath)
{
  // /proc/self/maps looks like: "<start addr>-<end addr> <mode> <offset> <device> <inode> <libpath>
  // we need to extract libpath
  dmtcp::Util::ProcMapsArea area;
  int ret = -1;

  // we will search for first libpath and first libname
  int fd = _real_open ( "/proc/self/maps", O_RDONLY);

  if( fd < 0 ){
    JTRACE("Cannot open /proc/self/maps file");
    return -1;
  }

  while( dmtcp::Util::readProcMapsLine(fd, &area) ){
    libpath = area.name;
    JNOTE("Inspect new /proc/seft/maps line")(libpath);
    if( libpath.size() == 0 ){
      JNOTE("anonymous region, skip");
      continue;
    }

    if( libpath.find("libtorque") != dmtcp::string::npos ){
      // this is library path that contains libtorque. This is what we need
      JTRACE("Torque PBS libpath")(libpath);
      ret = 0;
      break;
    }else{
      JNOTE("Not a libtorque region")(libpath);
    }
  }

  _real_close(fd);
  return ret;
}
static void writeCurrentLogFileNameToPrevLogFile(dmtcp::string& path)
{
#ifdef DEBUG
  dmtcp::ostringstream o;
  o << "========================================\n"
    << "This process exec()'d into a new program\n"
    << "Program Name: " << jalib::Filesystem::GetProgramName() << "\n"
    << "New JAssertLog Path: " << getLogFilePath() << "\n"
    << "========================================\n";

  int fd = open(path.c_str(), O_WRONLY | O_APPEND, 0);
  if (fd != -1) {
    Util::writeAll(fd, o.str().c_str(), o.str().length());
  }
  _real_close(fd);
#endif
}
예제 #13
0
static dmtcp::string _resolveSymlink(dmtcp::string path)
{
  dmtcp::string device = jalib::Filesystem::ResolveSymlink(path);
  if (dmtcp_real_to_virtual_pid && path.length() > 0 &&
      dmtcp::Util::strStartsWith(device, "/proc/")) {
    int index = 6;
    char *rest;
    char newpath[128];
    JASSERT(device.length() < sizeof newpath);
    pid_t realPid = strtol(&path[index], &rest, 0);
    if (realPid > 0 && *rest == '/') {
      pid_t virtualPid = dmtcp_real_to_virtual_pid(realPid);
      sprintf(newpath, "/proc/%d%s", virtualPid, rest);
      device = newpath;
    }
  }
  return device;
}
예제 #14
0
int dmtcp::CkptSerializer::openDmtcpCheckpointFile(const dmtcp::string& path){
  int fd = open( path.c_str(), O_RDONLY);
  JASSERT(fd>=0)(path).Text("Failed to open file.");
  char buf[512];
  const int len = strlen(DMTCP_FILE_HEADER);
  JASSERT(_real_read(fd, buf, len)==len)(path).Text("_real_read() failed");
  if(strncmp(buf, DMTCP_FILE_HEADER, len)==0){
    JTRACE("opened checkpoint file [uncompressed]")(path);
    return fd;
  }else{
    close(fd);
    dmtcp::string cmd = dmtcp::string()+"exec gzip -d - < '"+path+"'";
    FILE* t = _real_popen(cmd.c_str(),"r");
    JASSERT(t!=NULL)(path)(cmd).Text("Failed to launch gzip.");
    JTRACE ( "created gzip child process to uncompress checkpoint file");
    fd = fileno(t);
    JASSERT(_real_read(fd, buf, len)==len)(cmd)(path).Text("Invalid checkpoint file");
    JASSERT(strncmp(buf, DMTCP_FILE_HEADER, len)==0)(path).Text("Invalid checkpoint file");
    JTRACE("opened checkpoint file [compressed]")(path);
    return fd;
  }
}
const char* dmtcp_get_uniquepid_str()
{
  static dmtcp::string uniquepid_str;
  uniquepid_str = dmtcp::UniquePid::ThisProcess(true).toString();
  return uniquepid_str.c_str();
}
예제 #16
0
int main ( int argc, char** argv )
{
  initializeJalib();

  if (! getenv(ENV_VAR_QUIET))
    setenv(ENV_VAR_QUIET, "0", 0);

  processArgs(&argc, &argv);

  // If --ssh-slave and --prefix both are present, verify that the prefix-dir
  // of this binary (dmtcp_checkpoint) is same as the one provided with
  // --prefix
  if (isSSHSlave && getenv(ENV_VAR_PREFIX_PATH) != NULL) {
    const char *str = getenv(ENV_VAR_PREFIX_PATH);
    dmtcp::string prefixDir = jalib::Filesystem::ResolveSymlink(str);
    dmtcp::string programPrefixDir =
      jalib::Filesystem::DirName(jalib::Filesystem::GetProgramDir());
    JASSERT(prefixDir == programPrefixDir)
      (prefixDir) (programPrefixDir);
  }

  dmtcp::UniquePid::setTmpDir(getenv(ENV_VAR_TMPDIR));
  dmtcp::UniquePid::ThisProcess(true);
  dmtcp::Util::initializeLogFile();

#ifdef FORKED_CHECKPOINTING
  /* When this is robust, add --forked-checkpointing option on command-line,
   * with #ifdef FORKED_CHECKPOINTING around the option, change default of
   * configure.ac, dmtcp/configure.ac, to enable, and change them
   * from enable-forked... to disable-...
   */
  setenv(ENV_VAR_FORKED_CKPT, "1", 1);
#endif

  if (jassert_quiet == 0)
    JASSERT_STDERR << DMTCP_BANNER;

  // This code will go away when zero-mapped pages are implemented in MTCP.
  struct rlimit rlim;
  getrlimit(RLIMIT_STACK, &rlim);
  if (rlim.rlim_cur > 256*1024*1024 && rlim.rlim_cur != RLIM_INFINITY)
    JASSERT_STDERR <<
      "*** WARNING:  RLIMIT_STACK > 1/4 GB.  This causes each thread to"
      "\n***  receive a 1/4 GB stack segment.  Checkpoint/restart will be slow,"
      "\n***  and will potentially break if many threads are created."
      "\n*** Suggest setting (sh/bash):  ulimit -s 10000"
      "\n***                (csh/tcsh):  limit stacksize 10000"
      "\n*** prior to using DMTCP.  (This will be fixed in the future, when"
      "\n*** DMTCP supports restoring zero-mapped pages.)\n\n\n" ;
  // Remove this when zero-mapped pages are supported.  For segments with
  // no file backing:  Start with 4096 (page) offset and keep doubling offset
  // until finding region of memory segment with many zeroes.
  // Then mark as CS_ZERO_PAGES in MTCP instead of CS_RESTORE (or mark
  // entire segment as CS_ZERO_PAGES and then overwrite with CS_RESTORE
  // region for portion to be read back from checkpoint image.
  // For CS_ZERO_PAGES region, mmap // on restart, but don't write in zeroes.
  // Also, after checkpointing segment, munmap zero pages, and mmap them again.
  // Don't try to find all pages.  The above strategy may increase
  // the non-zero-mapped mapped pages to no more than double the actual
  // non-zero region (assuming that the zero-mapped pages are contiguous).
  // - Gene

  testMatlab(argv[0]);
  testJava(argv);  // Warn that -Xmx flag needed to limit virtual memory size

  // If dmtcphijack.so is in standard search path and _also_ has setgid access,
  //   then LD_PRELOAD will work.
  // Otherwise, it will only work if the application does not use setuid and
  //   setgid access.  So, we test //   if the application does not use
  //   setuid/setgid.  (See 'man ld.so')
  // FIXME:  ALSO DO THIS FOR execwrappers.cpp:dmtcpPrepareForExec()
  //   Should pass dmtcphijack.so path, and let testSetuid determine
  //     if setgid is set for it.  If so, no problem:  continue.
  //   If not, call testScreen() and adapt 'screen' to run using
  //     Util::patchArgvIfSetuid(argv[0], argv, &newArgv) (which shouldn't
  //     will just modify argv[0] to point to /tmp/dmtcp-USER@HOST/screen
  //     and other modifications:  doesn't need newArgv).
  //   If it's not 'screen' and if no setgid for dmtcphijack.so, then testSetuid
  //    should issue the warning, unset our LD_PRELOAD, and hope for the best.
  //    A program like /usr/libexec/utempter/utempter (Fedora path)
  //    is short-lived and can be safely run.  Ideally, we should
  //    disable checkpoints while utempter is running, and enable checkpoints
  //    when utempter finishes.  See possible model at
  //    execwrappers.cpp:execLibProcessAndExit(), since the same applies
  //    to running /lib/libXXX.so for running libraries as executables.
  if (testSetuid(argv[0])) {
    char **newArgv;
    // THIS NEXT LINE IS DANGEROUS.  MOST setuid PROGRAMS CAN'T RUN UNPRIVILEGED
    dmtcp::Util::patchArgvIfSetuid(argv[0], argv, &newArgv);
    argv = newArgv;
  };

  if (argc > 0) {
    JTRACE("dmtcp_checkpoint starting new program:")(argv[0]);
  }

  //set up CHECKPOINT_DIR
  if(getenv(ENV_VAR_CHECKPOINT_DIR) == NULL){
    const char* ckptDir = get_current_dir_name();
    if(ckptDir != NULL ){
      //copy to private buffer
      static dmtcp::string _buf = ckptDir;
      ckptDir = _buf.c_str();
    }else{
      ckptDir=".";
    }
    setenv ( ENV_VAR_CHECKPOINT_DIR, ckptDir, 0 );
    JTRACE("setting " ENV_VAR_CHECKPOINT_DIR)(ckptDir);
  }

  dmtcp::string stderrDevice = jalib::Filesystem::ResolveSymlink ( _stderrProcPath() );

  //TODO:
  // When stderr is a pseudo terminal for IPC between parent/child processes,
  //  this logic fails and JASSERT may write data to FD 2 (stderr).
  // This will cause problems in programs that use FD 2 (stderr) for
  //  algorithmic things ...
  if ( stderrDevice.length() > 0
          && jalib::Filesystem::FileExists ( stderrDevice ) )
    setenv ( ENV_VAR_STDERR_PATH,stderrDevice.c_str(), 0 );
  else// if( isSSHSlave )
    setenv ( ENV_VAR_STDERR_PATH, "/dev/null", 0 );

  if ( getenv(ENV_VAR_SIGCKPT) != NULL )
    setenv ( "MTCP_SIGCKPT", getenv(ENV_VAR_SIGCKPT), 1);
  else
    unsetenv("MTCP_SIGCKPT");

  if ( checkpointOpenFiles )
    setenv( ENV_VAR_CKPT_OPEN_FILES, "1", 0 );
  else
    unsetenv( ENV_VAR_CKPT_OPEN_FILES);

#ifdef PID_VIRTUALIZATION
  setenv( ENV_VAR_ROOT_PROCESS, "1", 1 );
#endif

  bool isElf, is32bitElf;
  if  (dmtcp::Util::elfType(argv[0], &isElf, &is32bitElf) == -1) {
    // Couldn't read argv_buf
    // FIXME:  This could have been a symbolic link.  Don't issue an error,
    //         unless we're sure that the executable is not readable.
    JASSERT_STDERR <<
      "*** ERROR:  Executable to run w/ DMTCP appears not to be readable,\n"
      "***         or no such executable in path.\n\n"
      << argv[0] << "\n";
    exit(DMTCP_FAIL_RC);
  } else {
#if defined(__x86_64__) && !defined(CONFIG_M32)
    if (is32bitElf)
      JASSERT_STDERR << "*** ERROR:  You appear to be checkpointing "
        << "a 32-bit target under 64-bit Linux.\n"
        << "***  If this fails, then please try re-configuring DMTCP:\n"
        << "***  configure --enable-m32 ; make clean ; make\n\n";
#endif

    testStaticallyLinked(argv[0]);
  }

  // UNSET DISPLAY environment variable.
  unsetenv("DISPLAY");

// FIXME:  Unify this code with code prior to execvp in execwrappers.cpp
//   Can use argument to dmtcpPrepareForExec() or getenv("DMTCP_...")
//   from DmtcpWorker constructor, to distinguish the two cases.
  dmtcp::Util::adjustRlimitStack();

  // FIXME: This call should be moved closer to call to execvp().
  dmtcp::Util::prepareDlsymWrapper();

  if (autoStartCoordinator)
     dmtcp::DmtcpCoordinatorAPI::startCoordinatorIfNeeded(allowedModes);
  dmtcp::DmtcpCoordinatorAPI coordinatorAPI;
  pid_t virtualPid = coordinatorAPI.getVirtualPidFromCoordinator();
  if (virtualPid != -1) {
    JTRACE("Got virtual pid from coordinator") (virtualPid);
    dmtcp::Util::setVirtualPidEnvVar(virtualPid, getppid());
  }

  // preloadLibs are to set LD_PRELOAD:
  //   LD_PRELOAD=PLUGIN_LIBS:UTILITY_DIR/dmtcphijack.so:R_LIBSR_UTILITY_DIR/
  dmtcp::string preloadLibs = "";
  // FIXME:  If the colon-separated elements of ENV_VAR_PLUGIN are not
  //     absolute pathnames, then they must be expanded to absolute pathnames.
  //     Warn user if an absolute pathname is not valid.
  if ( getenv(ENV_VAR_PLUGIN) != NULL ) {
    preloadLibs += getenv(ENV_VAR_PLUGIN);
    preloadLibs += ":";
  }
  // FindHelperUtiltiy requires ENV_VAR_UTILITY_DIR to be set
  dmtcp::string searchDir = jalib::Filesystem::GetProgramDir();
  setenv ( ENV_VAR_UTILITY_DIR, searchDir.c_str(), 0 );

#ifdef PTRACE
  preloadLibs += jalib::Filesystem::FindHelperUtility ( "ptracehijack.so" );
  preloadLibs += ":";
#endif

  preloadLibs += jalib::Filesystem::FindHelperUtility ( "dmtcphijack.so" );

#ifdef PID_VIRTUALIZATION
  preloadLibs += ":";
  preloadLibs += jalib::Filesystem::FindHelperUtility ( "pidvirt.so" );
#endif

  setenv(ENV_VAR_HIJACK_LIBS, preloadLibs.c_str(), 1);

  // If dmtcp_checkpoint was called with user LD_PRELOAD, and if
  //   if dmtcp_checkpoint survived the experience, then pass it back to user.
  if (getenv("LD_PRELOAD"))
    preloadLibs = preloadLibs + ":" + getenv("LD_PRELOAD");

  setenv ( "LD_PRELOAD", preloadLibs.c_str(), 1 );
  JTRACE("getting value of LD_PRELOAD")(getenv("LD_PRELOAD"));

  //run the user program
  char **newArgv = NULL;
  if (testScreen(argv, &newArgv))
    execvp ( newArgv[0], newArgv );
  else
    execvp ( argv[0], argv );

  //should be unreachable
  JASSERT_STDERR <<
    "ERROR: Failed to exec(\"" << argv[0] << "\"): " << JASSERT_ERRNO << "\n"
    << "Perhaps it is not in your $PATH?\n"
    << "See `dmtcp_checkpoint --help` for usage.\n";
  //fprintf(stderr, theExecFailedMsg, argv[0], JASSERT_ERRNO);

  return -1;
}
void runMtcpRestore(const char* path, int offset, size_t argvSize,
                    size_t envSize)
{
  static dmtcp::string mtcprestart =
    jalib::Filesystem::FindHelperUtility ("mtcp_restart");

  // Tell mtcp_restart process to write its debugging information to
  // PROTECTED_STDERR_FD. This way we prevent it from spitting out garbage onto
  // FD_STDERR if it is being used by the user process in a special way.
  char protected_stderr_fd_str[16];
  sprintf(protected_stderr_fd_str, "%d", PROTECTED_STDERR_FD);

#ifdef USE_MTCP_FD_CALLING
  int fd = ConnectionToFds::openMtcpCheckpointFile(path);
  char buf[64];
  char buf2[64];

  sprintf(buf, "%d", fd);
  // gzip_child_pid set by openMtcpCheckpointFile() above.
  sprintf(buf2, "%d", dmtcp::ConnectionToFds::gzip_child_pid);
  char* newArgs[] = {
    (char*) mtcprestart.c_str(),
    (char*) "--stderr-fd",
    protected_stderr_fd_str,
    (char*) "--fd",
    buf,
    (char*) "--gzip-child-pid",
    buf2,
    NULL
  };
  if (dmtcp::ConnectionToFds::gzip_child_pid == -1) { // If no gzip compression
    newArgs[3] = NULL;
  }
  JTRACE ("launching mtcp_restart --fd")(fd)(path);
#else
  char buf[64];

  sprintf(buf, "%d", offset);
  char* newArgs[] = {
    (char*) mtcprestart.c_str(),
    (char*) "--stderr-fd",
    protected_stderr_fd_str,
    (char*) "--offset",
    buf,
    (char*) path,
    NULL
  };
  JTRACE ("launching mtcp_restart --offset")(path)(offset);
#endif

  // Create the placeholder for "MTCP_OLDPERS" environment.
  // setenv("MTCP_OLDPERS_DUMMY", "XXXXXXXXXXXXXXXX", 1);
  // FIXME: Put an explanation of the logic below.   -- Kapil
#define ENV_PTR(x) ((char*) (getenv(x) - strlen(x) - 1))
  char* dummyEnviron = NULL;
  const int dummyEnvironIndex = 0; // index in newEnv[]
  const int pathIndex = 1; // index in newEnv[]
  // Eventually, newEnv = {ENV_PTR("MTCP_OLDPERS"), ENV_PTR("PATH"), NULL}
  char* newEnv[3] = {NULL, NULL, NULL};
  // Will put ENV_PTR("MTCP_OLDPERS") here.
  newEnv[dummyEnvironIndex] = (char*) dummyEnviron;
  newEnv[pathIndex] = (getenv("PATH") ? ENV_PTR("PATH") : NULL);

  size_t newArgsSize = 0;
  for (int i = 0; newArgs[i] != 0; i++) {
    newArgsSize += strlen(newArgs[i]) + 1;
  }
  size_t newEnvSize = 0;
  for (int i = 0; newEnv[i] != 0; i++) {
    newEnvSize += strlen(newEnv[i]) + 1;
  }
  size_t originalArgvEnvSize = argvSize + envSize;
  size_t newArgvEnvSize = newArgsSize + newEnvSize + strlen(newArgs[0]);
  size_t argvSizeDiff = originalArgvEnvSize - newArgvEnvSize;
  dummyEnviron = (char*) malloc(argvSizeDiff);
  memset(dummyEnviron, '0', (argvSizeDiff >= 1 ? argvSizeDiff - 1 : 0));
  strncpy(dummyEnviron,
          ENV_VAR_DMTCP_DUMMY "=0",
          strlen(ENV_VAR_DMTCP_DUMMY "="));
  dummyEnviron[argvSizeDiff - 1] = '\0';

  newEnv[dummyEnvironIndex] = dummyEnviron;
  JTRACE("Args/Env Sizes")
    (newArgsSize) (newEnvSize) (argvSize) (envSize) (argvSizeDiff);

  execve (newArgs[0], newArgs, newEnv);
  JASSERT (false) (newArgs[0]) (newArgs[1]) (JASSERT_ERRNO)
          .Text ("exec() failed");
}
예제 #18
0
EXTERNC const char* dmtcp_get_ckpt_dir()
{
  static dmtcp::string tmpdir;
  tmpdir = dmtcp::ProcessInfo::instance().getCkptDir();
  return tmpdir.c_str();
}
예제 #19
0
EXTERNC const char* dmtcp_get_coord_ckpt_dir(void)
{
  static dmtcp::string dir;
  dir = CoordinatorAPI::instance().getCoordCkptDir();
  return dir.c_str();
}
예제 #20
0
EXTERNC const char* dmtcp_get_ckpt_filename(void)
{
  static dmtcp::string filename;
  filename = dmtcp::ProcessInfo::instance().getCkptFilename();
  return filename.c_str();
}
예제 #21
0
EXTERNC const char* dmtcp_get_ckpt_files_subdir(void)
{
  static dmtcp::string tmpdir;
  tmpdir = dmtcp::ProcessInfo::instance().getCkptFilesSubDir();
  return tmpdir.c_str();
}
예제 #22
0
int main ( int argc, char** argv )
{
  for (size_t fd = PROTECTED_FD_START; fd < PROTECTED_FD_END; fd++) {
    close(fd);
  }

  if (! getenv(ENV_VAR_QUIET))
    setenv(ENV_VAR_QUIET, "0", 0);

  processArgs(&argc, &argv);

  initializeJalib();
  // If --ssh-slave and --prefix both are present, verify that the prefix-dir
  // of this binary (dmtcp_launch) is same as the one provided with
  // --prefix
  if (isSSHSlave && getenv(ENV_VAR_PREFIX_PATH) != NULL) {
    char buf[PATH_MAX];
    string prefixPath = getenv(ENV_VAR_PREFIX_PATH);
    prefixPath += "/bin/dmtcp_launch";
    JASSERT(realpath(prefixPath.c_str(), buf) != NULL) (prefixPath);
    prefixPath = buf;
    string programPath = jalib::Filesystem::GetProgramPath();
    JASSERT(prefixPath == programPath) (prefixPath) (programPath);
  }

  dmtcp::Util::setTmpDir(getenv(ENV_VAR_TMPDIR));
  dmtcp::UniquePid::ThisProcess(true);
  dmtcp::Util::initializeLogFile();

#ifdef FORKED_CHECKPOINTING
  /* When this is robust, add --forked-checkpointing option on command-line,
   * with #ifdef FORKED_CHECKPOINTING around the option, change default of
   * configure.ac, dmtcp/configure.ac, to enable, and change them
   * from enable-forked... to disable-...
   */
  setenv(ENV_VAR_FORKED_CKPT, "1", 1);
#endif

  // This code will go away when zero-mapped pages are implemented in MTCP.
  struct rlimit rlim;
  getrlimit(RLIMIT_STACK, &rlim);
  if (rlim.rlim_cur > 256*1024*1024 && rlim.rlim_cur != RLIM_INFINITY)
    JASSERT_STDERR <<
      "*** WARNING:  RLIMIT_STACK > 1/4 GB.  This causes each thread to"
      "\n***  receive a 1/4 GB stack segment.  Checkpoint/restart will be slow,"
      "\n***  and will potentially break if many threads are created."
      "\n*** Suggest setting (sh/bash):  ulimit -s 10000"
      "\n***                (csh/tcsh):  limit stacksize 10000"
      "\n*** prior to using DMTCP.  (This will be fixed in the future, when"
      "\n*** DMTCP supports restoring zero-mapped pages.)\n\n\n" ;
  // Remove this when zero-mapped pages are supported.  For segments with
  // no file backing:  Start with 4096 (page) offset and keep doubling offset
  // until finding region of memory segment with many zeroes.
  // Then mark as CS_ZERO_PAGES in MTCP instead of CS_RESTORE (or mark
  // entire segment as CS_ZERO_PAGES and then overwrite with CS_RESTORE
  // region for portion to be read back from checkpoint image.
  // For CS_ZERO_PAGES region, mmap // on restart, but don't write in zeroes.
  // Also, after checkpointing segment, munmap zero pages, and mmap them again.
  // Don't try to find all pages.  The above strategy may increase
  // the non-zero-mapped mapped pages to no more than double the actual
  // non-zero region (assuming that the zero-mapped pages are contiguous).
  // - Gene

  testMatlab(argv[0]);
  testJava(argv);  // Warn that -Xmx flag needed to limit virtual memory size

  // If libdmtcp.so is in standard search path and _also_ has setgid access,
  //   then LD_PRELOAD will work.
  // Otherwise, it will only work if the application does not use setuid and
  //   setgid access.  So, we test //   if the application does not use
  //   setuid/setgid.  (See 'man ld.so')
  // FIXME:  ALSO DO THIS FOR execwrappers.cpp:dmtcpPrepareForExec()
  //   Should pass libdmtcp.so path, and let testSetuid determine
  //     if setgid is set for it.  If so, no problem:  continue.
  //   If not, call testScreen() and adapt 'screen' to run using
  //     Util::patchArgvIfSetuid(argv[0], argv, &newArgv) (which shouldn't
  //     will just modify argv[0] to point to /tmp/dmtcp-USER@HOST/screen
  //     and other modifications:  doesn't need newArgv).
  //   If it's not 'screen' and if no setgid for libdmtcp.so, then testSetuid
  //    should issue the warning, unset our LD_PRELOAD, and hope for the best.
  //    A program like /usr/libexec/utempter/utempter (Fedora path)
  //    is short-lived and can be safely run.  Ideally, we should
  //    disable checkpoints while utempter is running, and enable checkpoints
  //    when utempter finishes.  See possible model at
  //    execwrappers.cpp:execLibProcessAndExit(), since the same applies
  //    to running /lib/libXXX.so for running libraries as executables.
  if (testSetuid(argv[0])) {
    char **newArgv;
    // THIS NEXT LINE IS DANGEROUS.  MOST setuid PROGRAMS CAN'T RUN UNPRIVILEGED
    dmtcp::Util::patchArgvIfSetuid(argv[0], argv, &newArgv);
    argv = newArgv;
  };

  if (argc > 0) {
    JTRACE("dmtcp_launch starting new program:")(argv[0]);
  }

  //set up CHECKPOINT_DIR
  if(getenv(ENV_VAR_CHECKPOINT_DIR) == NULL){
    const char* ckptDir = get_current_dir_name();
    if(ckptDir != NULL ){
      //copy to private buffer
      static dmtcp::string _buf = ckptDir;
      ckptDir = _buf.c_str();
    }else{
      ckptDir=".";
    }
    setenv ( ENV_VAR_CHECKPOINT_DIR, ckptDir, 0 );
    JTRACE("setting " ENV_VAR_CHECKPOINT_DIR)(ckptDir);
  }

  if ( checkpointOpenFiles )
    setenv( ENV_VAR_CKPT_OPEN_FILES, "1", 0 );
  else
    unsetenv( ENV_VAR_CKPT_OPEN_FILES);

  bool isElf, is32bitElf;
  if  (dmtcp::Util::elfType(argv[0], &isElf, &is32bitElf) == -1) {
    // Couldn't read argv_buf
    // FIXME:  This could have been a symbolic link.  Don't issue an error,
    //         unless we're sure that the executable is not readable.
    JASSERT_STDERR <<
      "*** ERROR:  Executable to run w/ DMTCP appears not to be readable,\n"
      "***         or no such executable in path.\n\n"
      << argv[0] << "\n";
    exit(DMTCP_FAIL_RC);
  } else {
    testStaticallyLinked(argv[0]);
  }

  if (getenv("DISPLAY") != NULL) {
    setenv("ORIG_DISPLAY", getenv("DISPLAY"), 1);
    // UNSET DISPLAY environment variable.
    unsetenv("DISPLAY");
  }

  if( explicitSrun ){
      setenv(ENV_VAR_EXPLICIT_SRUN, "1", 1);
  }

// FIXME:  Unify this code with code prior to execvp in execwrappers.cpp
//   Can use argument to dmtcpPrepareForExec() or getenv("DMTCP_...")
//   from DmtcpWorker constructor, to distinguish the two cases.
  dmtcp::Util::adjustRlimitStack();

  // Set DLSYM_OFFSET env var(s).
  dmtcp::Util::prepareDlsymWrapper();

  DmtcpUniqueProcessId compId;
  CoordinatorInfo coordInfo;
  struct in_addr localIPAddr;
  CoordinatorAPI::instance().connectToCoordOnStartup(allowedModes, argv[0],
                                                     &compId, &coordInfo,
                                                     &localIPAddr);
  Util::writeCoordPortToFile(getenv(ENV_VAR_NAME_PORT), thePortFile.c_str());
  /* We need to initialize SharedData here to make sure that it is
   * initialized with the correct coordinator timestamp.  The coordinator
   * timestamp is updated only during postCkpt callback. However, the
   * SharedData area may be initialized earlier (for example, while
   * recreating threads), causing it to use *older* timestamp.
   */
  SharedData::initialize(Util::getTmpDir().c_str(), &compId, &coordInfo,
                         &localIPAddr);

  setLDPreloadLibs(is32bitElf);

  //run the user program
  char **newArgv = NULL;
  if (testScreen(argv, &newArgv))
    execvp ( newArgv[0], newArgv );
  else
    execvp ( argv[0], argv );

  //should be unreachable
  JASSERT_STDERR <<
    "ERROR: Failed to exec(\"" << argv[0] << "\"): " << JASSERT_ERRNO << "\n"
    << "Perhaps it is not in your $PATH?\n"
    << "See `dmtcp_launch --help` for usage.\n";
  //fprintf(stderr, theExecFailedMsg, argv[0], JASSERT_ERRNO);

  return -1;
}
예제 #23
0
bool dmtcp::Util::strEndsWith(const dmtcp::string& str, const char *pattern)
{
  return strEndsWith(str.c_str(), pattern);
}
const char* dmtcp_get_tmpdir()
{
  static dmtcp::string tmpdir;
  tmpdir = dmtcp::UniquePid::getTmpDir();
  return tmpdir.c_str();
}
void dmtcp::DmtcpCoordinatorAPI::sendCoordinatorHandshake (
  const dmtcp::string& progname,
  UniquePid compGroup /*= UniquePid()*/,
  int np /*= -1*/,
  DmtcpMessageType msgType /*= DMT_HELLO_COORDINATOR*/)
{
  JTRACE("sending coordinator handshake")(UniquePid::ThisProcess());

  dmtcp::string hostname = jalib::Filesystem::GetCurrentHostname();
  const char *prefixPathEnv = getenv(ENV_VAR_PREFIX_PATH);
  dmtcp::string prefixDir;
  DmtcpMessage hello_local;
  hello_local.type = msgType;
  hello_local.params[0] = np;
  hello_local.compGroup = compGroup;
  hello_local.restorePort = theRestorePort;

  if (getenv(ENV_VAR_VIRTUAL_PID) == NULL) {
    hello_local.virtualPid = -1;
  } else {
    hello_local.virtualPid = (pid_t) atoi(getenv(ENV_VAR_VIRTUAL_PID));
  }

  const char* interval = getenv ( ENV_VAR_CKPT_INTR );
  /* DmtcpMessage constructor default:
   *   hello_local.theCheckpointInterval: DMTCPMESSAGE_SAME_CKPT_INTERVAL
   */
  if ( interval != NULL )
    hello_local.theCheckpointInterval = jalib::StringToInt ( interval );
  // Tell the coordinator the ckpt interval only once.  It can change later.
  _dmtcp_unsetenv ( ENV_VAR_CKPT_INTR );

  hello_local.extraBytes = hostname.length() + 1 + progname.length() + 1;

  if (prefixPathEnv != NULL) {
    /* If --prefix was defined then this process is either running on the local
     * node (the home of first process in the comptation) or a remote node.
     *
     * If the process is running on the local node, the prefix-path-env may be
     * different from the prefix-dir of this binary, in which case, we want to
     * send the prefix-path of this binary to the coordinator and the
     * coordinator will save it as the local-prefix.
     *
     * However, if this is running on a remote node, the prefix-path-env would
     * be the same as the prefix-path of this binary and we should send the
     * prefix-path-env to the coordinator and the coordinator will note this as
     * the remote-prefix.
     */
    dmtcp::string utilDirPrefix =
      jalib::Filesystem::DirName(getenv(ENV_VAR_UTILITY_DIR));
    if (utilDirPrefix == jalib::Filesystem::ResolveSymlink(prefixPathEnv)) {
      prefixDir = prefixPathEnv;
    } else {
      prefixDir = utilDirPrefix;
    }
    hello_local.extraBytes += prefixDir.length() + 1;
  }

  _coordinatorSocket << hello_local;
  _coordinatorSocket.writeAll( hostname.c_str(),hostname.length()+1);
  _coordinatorSocket.writeAll( progname.c_str(),progname.length()+1);
  if (!prefixDir.empty()) {
    _coordinatorSocket.writeAll(prefixDir.c_str(), prefixDir.length()+1);
  }
}