Exemple #1
0
static void processClose(dmtcp::ConnectionIdentifier conId)
{
  if (dmtcp::DmtcpWorker::waitingForExternalSocketsToClose() == true) {
    dmtcp::vector <dmtcp::ConnectionIdentifier>::iterator i = externalTcpConnections.begin();
    for (i = externalTcpConnections.begin(); i != externalTcpConnections.end(); ++i) {
      if (conId == *i) {
        externalTcpConnections.erase(i);
        break;
      }
    }
    if (externalTcpConnections.empty() == true) {
    }
    sleep(4);
  }
}
void dmtcp::FileConnList::remapShmMaps()
{
  for (size_t i = 0; i < shmAreas.size(); i++) {
    Util::ProcMapsArea *area = &shmAreas[i];
    FileConnection *fileCon = shmAreaConn[i];
    int fd = fileCon->getFds()[0];
    JTRACE("Restoring shared memory area") (area->name) (area->addr);
    void *addr = _real_mmap(area->addr, area->size, area->prot,
                            MAP_FIXED | area->flags,
                            fd, area->offset);
    JASSERT(addr != MAP_FAILED) (area->flags) (area->prot) (JASSERT_ERRNO) .Text("mmap failed");
    _real_close(fd);
    processClose(fd);
  }
  shmAreas.clear();
  shmAreaConn.clear();
}
static void processDmtcpCommands(dmtcp::string programName,
                                 dmtcp::vector<dmtcp::string>& args)
{
  JASSERT (programName == "dmtcp_coordinator" ||
           programName == "dmtcp_checkpoint"  ||
           programName == "dmtcp_restart"     ||
           programName == "dmtcp_command"     ||
           programName == "mtcp_restart");

  //make sure coordinator connection is closed
  _real_close ( PROTECTED_COORD_FD );

  /*
   * When running gdb or any shell which does a waitpid() on the child
   * processes, executing dmtcp_command from within gdb session / shell results
   * in process getting hung up because:
   *   gdb shell dmtcp_command -c => hangs because gdb forks off a new process
   *   and it does a waitpid  (in which we block signals) ...
   */
  if (programName == "dmtcp_command") {
    pid_t cpid = _real_fork();
    JASSERT (cpid != -1);
    if (cpid != 0) {
      _real_exit(0);
    }
  }

  //now repack args
  char** argv = new char*[args.size() + 1];
  memset ( argv, 0, sizeof ( char* ) * ( args.size() + 1 ) );

  for ( size_t i=0; i< args.size(); ++i ) {
    argv[i] = ( char* ) args[i].c_str();
  }

  JNOTE ( "re-running without checkpointing" ) ( programName );

  //now re-call the command
  restoreUserLDPRELOAD();
  _real_execvp ( jalib::Filesystem::GetProgramPath().c_str(), argv );

  //should be unreachable
  JASSERT ( false ) (jalib::Filesystem::GetProgramPath()) ( argv[0] )
    ( JASSERT_ERRNO ) .Text ( "exec() failed" );
}
void SetupSessions()
{
  for (size_t j = 0; j < roots.size(); j++) {
    roots[j].t->setupSessions();
  }

  for (size_t i = 0; i < roots.size(); i++) {
    for (size_t j = 0; j < roots.size(); j++) {
      if (i == j)
        continue;
      pid_t sid;
      if ((sid = (roots[i].t)->checkDependence(roots[j].t)) >= 0) {
        // it2 depends on it1
        JTRACE("Root target j depends on Root target i")
          (i) (roots[i].t->upid()) (j) (roots[j].t->upid());
        (roots[i].t)->addRoot(roots[j].t, sid);
        roots[j].indep = false;
      }
    }
  }
}
void dmtcp::DmtcpWorker::sendPeerLookupRequest (dmtcp::vector<TcpConnectionInfo>& conInfoTable )
{
  for (int i = 0; i < conInfoTable.size(); ++i) {
    DmtcpMessage msg;
    msg.type = DMT_PEER_LOOKUP;
    msg.localAddr    = conInfoTable[i].localAddr();
    msg.remoteAddr   = conInfoTable[i].remoteAddr();
    msg.localAddrlen = conInfoTable[i].addrlen();
    msg.conId        = conInfoTable[i].conId();

    _coordinatorSocket << msg;
  }
}
void dmtcp::FileConnList::prepareShmList()
{
  Util::ProcMapsArea area;
  int mapsfd = _real_open("/proc/self/maps", O_RDONLY, 0);
  JASSERT(mapsfd != -1) (JASSERT_ERRNO);

  shmAreas.clear();
  shmAreaConn.clear();
  while (Util::readProcMapsLine(mapsfd, &area)) {
    if ((area.flags & MAP_SHARED) && area.prot != 0) {
      if (strstr(area.name, "ptraceSharedInfo") != NULL ||
          strstr(area.name, "dmtcpPidMap") != NULL ||
          strstr(area.name, "dmtcpSharedArea") != NULL ||
          strstr(area.name, "dmtcpSharedArea") != NULL ||
          strstr(area.name, "synchronization-log") != NULL ||
          strstr(area.name, "synchronization-read-log") != NULL) {
        continue;
      }
      if (jalib::Filesystem::FileExists(area.name)) {
        if (_real_access(area.name, W_OK) == 0) {
          JTRACE("Will checkpoint shared memory area") (area.name);
          int flags = Util::memProtToOpenFlags(area.prot);
          int fd = _real_open(area.name, flags, 0);
          JASSERT(fd != -1) (JASSERT_ERRNO) (area.name);
          FileConnection *fileConn =
            new FileConnection(area.name, flags, 0, FileConnection::FILE_SHM);
          add(fd, fileConn);
          shmAreas.push_back(area);
          shmAreaConn.push_back(fileConn);
          /* Instead of unmapping the shared memory area, we make it
           * non-readable. This way mtcp will skip the region while at the same
           * time, we prevent JALLOC arena to grow over it.
           *
           * By munmapping the area, a bug was observed on CCIS linux with
           * 'make check-java'. Once the region was unmapped, the JALLOC arena
           * grew over it. During restart, the JALLOC'd area was reclaimed for
           * remapping the shm file without informing JALLOC. Finally, during
           * the second checkpoint cycle, the area was again unmapped and later
           * JALLOC tried to access it, causing a SIGSEGV.
           */
          JASSERT(_real_mmap(area.addr, area.size, PROT_NONE,
                             MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
                             -1, 0) != MAP_FAILED) (JASSERT_ERRNO);
        } else {
          JTRACE("Will not checkpoint shared memory area") (area.name);
        }
      } else {
        // TODO: Shared memory areas with unlinked backing files.
#if 0
        JASSERT(Util::strEndsWith(area.name, DELETED_FILE_SUFFIX)) (area.name);
        if (Util::strStartsWith(area.name, DEV_ZERO_DELETED_STR) ||
            Util::strStartsWith(area.name, DEV_NULL_DELETED_STR)) {
          JWARNING(false) (area.name)
            .Text("Ckpt/Restart of Anon Shared memory not supported.");
        } else {
          JTRACE("Will recreate shm file on restart.") (area.name);
          //shmAreas[area] = NULL;
        }
#endif
      }
    }
  }
  _real_close(mapsfd);
}
static dmtcp::vector<const char*> patchUserEnv (dmtcp::vector<dmtcp::string>
                                                  &envp)
{
  dmtcp::vector<const char*> envVect;
  const char *userPreloadStr = NULL;
  envVect.clear();
  JASSERT(envVect.size() == 0);

  dmtcp::ostringstream out;
  out << "non-DMTCP env vars:\n";

  for ( size_t i = 0; i < envp.size(); i++) {
    if ( isImportantEnv ( envp[i].c_str() ) ) {
      if (dbg) {
        out << "     skipping: " << envp[i] << '\n';
      }
      continue;
    }
    if (dmtcp::Util::strStartsWith(envp[i], "LD_PRELOAD=")) {
      userPreloadStr = envp[i].c_str() + strlen("LD_PRELOAD=");
      continue;
    }

    envVect.push_back (envp[i].c_str());
    if(dbg) {
      out << "     addenv[user]:" << envVect.back() << '\n';
    }
  }
  JTRACE ( "Creating a copy of (non-DMTCP) user env vars..." ) (out.str());

  //pack up our ENV into the new ENV
  out.str("DMTCP env vars:\n");
  for ( size_t i=0; i<ourImportantEnvsCnt; ++i ) {
    const char* v = getenv ( ourImportantEnvs[i] );
    if ( v != NULL ) {
      envp.push_back ( dmtcp::string ( ourImportantEnvs[i] ) + '=' + v );
      const char *ptr = envp.back().c_str();
      JASSERT(ptr != NULL);
      envVect.push_back(ptr);
      if(dbg) {
        out << "     addenv[dmtcp]:" << envVect.back() << '\n';
      }
    }
  }

  dmtcp::string ldPreloadStr = "LD_PRELOAD=";
  ldPreloadStr += getUpdatedLdPreload(userPreloadStr);

  envp.push_back(ldPreloadStr);
  envVect.push_back(envp.back().c_str());
  if(dbg) {
    out << "     addenv[dmtcp]:" << envVect.back() << '\n';
  }

  JTRACE ( "patching user envp..." )  (out.str());

  envVect.push_back ( NULL );

  JTRACE ( "Done patching environ" );
  return envVect;
}
bool dmtcp::DmtcpWorker::waitForStage2bCheckpoint()
{
  waitForCoordinatorMsg ( "PEER_LOOKUP", DMT_DO_PEER_LOOKUP );
  JTRACE ( "Looking up Socket Peers..." );
  theTcpConnections.clear();
  theCheckpointState->preCheckpointPeerLookup(theTcpConnections);
  sendPeerLookupRequest(theTcpConnections);
  JTRACE ( "Done Socket Peer Lookup" );


  WorkerState::setCurrentState ( WorkerState::PEER_LOOKUP_COMPLETE );

  {
    dmtcp::DmtcpMessage msg;

    msg.type = DMT_OK;
    msg.state = WorkerState::currentState();
    _coordinatorSocket << msg;

    JTRACE ( "waiting for DRAIN/RESUME message" );

    do {
      msg.poison();
      _coordinatorSocket >> msg;
      msg.assertValid();

      if ( msg.type == DMT_KILL_PEER ) {
        JTRACE ( "Received KILL message from coordinator, exiting" );
        _exit ( 0 );
      }
      JTRACE ( "received message" ) (msg.type );
      if ( msg.type != DMT_UNKNOWN_PEER )
        break;

      JTRACE ("received DMT_UNKNOWN_PEER message") (msg.conId);

      TcpConnection* con =
        (TcpConnection*) &( ConnectionList::instance() [msg.conId] );
      con->markExternal();
      externalTcpConnections.push_back(msg.conId);
      _waitingForExternalSocketsToClose = true;

    } while ( msg.type == DMT_UNKNOWN_PEER );

    JASSERT ( msg.type == DMT_DO_DRAIN || msg.type == DMT_DO_RESUME )
            ( msg.type );

    ConnectionList& connections = ConnectionList::instance();

    // Tcp Accept and Connect connection with PeerType UNKNOWN should be marked as INTERNAL
    for ( ConnectionList::iterator i = connections.begin()
        ; i!= connections.end()
        ; ++i )
    {
      Connection* con =  i->second;
      if ( con->conType() == Connection::TCP ) {
        TcpConnection* tcpCon = (TcpConnection *) con;
        if ( (tcpCon->tcpType() == TcpConnection::TCP_ACCEPT ||
             tcpCon->tcpType() == TcpConnection::TCP_CONNECT) &&
             tcpCon->peerType() == TcpConnection::PEER_UNKNOWN )
          tcpCon->markInternal();
      }
    }
    if ( msg.type == DMT_DO_RESUME ) {
      JTRACE ( "Peer Lookup not complete, skipping checkpointing \n\n\n\n\n");
      return false;
    }

    JASSERT (msg.type == DMT_DO_DRAIN);
  }
}
void ProcessGroupInfo()
{
  map<pid_t,session> smap;
  map<pid_t,session>::iterator it;

  // 1. divide processes into sessions and groups
  for (size_t j = 0; j < targets.size(); j++)
  {
    ProcessInfo& processInfo = targets[j].getProcessInfo();
    JTRACE("Process ")
      (processInfo.pid()) (processInfo.ppid()) (processInfo.sid())
      (processInfo.gid()) (processInfo.fgid())
      (processInfo.isRootOfProcessTree());

    pid_t sid = processInfo.sid();
    pid_t gid = processInfo.gid();
    //pid_t fgid = processInfo.fgid();

    /*
    // If Group ID doesn't belong to known PIDs, indicate that fact
    //   using -1 value.
    if (!virtualPidTable.pidExists(gid)) {
    JTRACE("DROP gid")(gid);
    virtualPidTable.setgid(-1);
    gid = -1;
    }
    // If foreground Group ID not belongs to known PIDs,
    //   indicate that fact using -1 value.
    if (!virtualPidTable.pidExists(fgid)) {
    JTRACE("DROP fgid")(fgid);
    virtualPidTable.setfgid(-1);
    fgid = -1;
    }
    */

    session &s = smap[sid];
    // if this is first element of this session
    if (s.sid == -2) {
      s.sid = sid;
    }
    ProcessGroup &g = smap[sid].groups[gid];
    // if this is first element of Group gid
    if (g.gid == -2) {
      g.gid = gid;
    }
    g.targets.push_back(&targets[j]);
  }

  // 2. Check if foreground setting is correct
  it = smap.begin();
  for(;it != smap.end();it++) {
    session &s = it->second;
    session::group_it g_it = s.groups.begin();
    pid_t fgid = -2;
    if (s.sid == -1) // skip default bash session all processes will join
      continue;
    for(; g_it != s.groups.end();g_it++) {
      ProcessGroup &g = g_it->second;
      for(size_t k = 0; k < g.targets.size(); k++) {
        ProcessInfo& processInfo = g.targets[k]->getProcessInfo();
        pid_t cfgid = processInfo.fgid();
        if (fgid == -2) {
          fgid = cfgid;
        } else if (fgid != -1 && cfgid != -1 && fgid != cfgid) {
          dmtcp::ostringstream o;
          // DEBUG PRINTOUT:
          {
            session::group_it g_it1 = s.groups.begin();
            for(; g_it1 != s.groups.end();g_it1++) {
              ProcessGroup &g1 = g_it1->second;
              for(size_t m = 0; m < g1.targets.size() ;m++) {
                ProcessInfo& pInfo = g1.targets[m]->getProcessInfo();
                pid_t pid = pInfo.pid();
                pid_t ppid = pInfo.ppid();
                pid_t sid = pInfo.sid();
                pid_t cfgid = pInfo.fgid();
                o << "\n\tPID=" << pid << " PPID=" << ppid
                  << ", SID=" << sid << " <--> FGID = " << cfgid;
              }
            }
          }
          JASSERT (false) (fgid) (cfgid) (o.str())
            .Text("processes from same session have different "
                  "foreground Group ID");
        }
      }
      JTRACE("Checked ") (fgid);
    }
    s.fgid = fgid;
    if (s.groups.find(s.fgid) == s.groups.end()) {
      // foreground Group is missing, don't need to change foreground Group
      s.fgid = -1;
    }

    {
      session::group_it g_it1 = s.groups.begin();
      for(; g_it1 != s.groups.end();g_it1++) {
        ProcessGroup &g1 = g_it1->second;
        for(size_t m = 0; m < g1.targets.size(); m++) {
          ProcessInfo& processInfo = g1.targets[m]->getProcessInfo();
          pid_t pid = processInfo.pid();
          pid_t cfgid = processInfo.fgid();
          JTRACE("PID=%d <--> FGID = %d") (pid) (cfgid);
        }
      }
    }
  }

  // Print out session mapping.
  JTRACE("Session number:") (smap.size());
  it = smap.begin();
  for(; it != smap.end(); it++) {
    session &s = it->second;
    JTRACE("Session printout:") (s.sid) (s.fgid) (s.upid.toString().c_str());
    session::group_it g_it = s.groups.begin();
    for(; g_it != s.groups.end();g_it++) {
      ProcessGroup &g = g_it->second;
      JTRACE("\tGroup ID: ") (g.gid);
    }
  }
}
void BuildProcessTree()
{
  for (size_t j = 0; j < targets.size(); ++j)
  {
    ProcessInfo& processInfo = targets[j].getProcessInfo();
    if (processInfo.isRootOfProcessTree() == true) {
      // If this process is independent (root of process tree
      RootTarget rt;
      rt.t = &targets[j];
      rt.indep = true;
      roots.push_back(rt);
      targets[j].markUsed();
    } else if (!targets[j].isMarkedUsed()) {
      // We set used flag if we use target as somebody's child.
      // If it is used, then there is no need to check if it is root.
      // Iterate through all targets and try to find the one who has
      // this process as their child process.
      JTRACE("Process is not root of process tree: try to find if it has parent");
      bool is_root = true;
      for (size_t i = 0; i < targets.size(); i++) {
        if (i == j) continue;
        ProcessInfo &pInfo = targets[i].getProcessInfo();
        ProcessInfo::iterator it;
        // Search inside the child list of target[j], make sure that i != j
        for (it = pInfo.begin(); (it != pInfo.end()); it++) {
          UniquePid& childUniquePid = it->second;
          JTRACE("Check child") (childUniquePid) (" parent ") (targets[i].upid())
            ("checked ") (targets[j].upid());
          if (childUniquePid == targets[j].upid()) {
            is_root = false;
            break;
          }
        }
      }
      JTRACE("Root detection:") (is_root) (targets[j].upid());
      if (is_root) {
        RootTarget rt;
        rt.t = &targets[j];
        rt.indep = true;
        roots.push_back(rt);
        targets[j].markUsed();
      }
    }

    // Add all children
    ProcessInfo::iterator it;
    for(it = processInfo.begin(); it != processInfo.end(); it++) {
      // find target
      bool found = false;
      pid_t childVirtualPid = it->first;
      UniquePid& childUniquePid = it->second;

      for (size_t i = 0; i < targets.size(); i++) {
        if (childUniquePid == targets[i].upid()) {
          found = 1;
          JTRACE ("Add child to current target") (targets[j].upid()) (childUniquePid);
          targets[i].markUsed();
          targets[j].addChild(&targets[i]);
        }
      }
      if (!found) {
        JTRACE("Child not found")(childVirtualPid);
        processInfo.eraseChild(childVirtualPid);
      }
    }
  }
}
int main(int argc, char** argv)
{
  bool autoStartCoordinator=true;
  bool isRestart = true;
  int allowedModes = dmtcp::DmtcpCoordinatorAPI::COORD_ANY;

  initializeJalib();

  if (!getenv(ENV_VAR_QUIET)) {
    setenv(ENV_VAR_QUIET, "0", 0);
  }

  if (argc == 1) {
    JASSERT_STDERR << DMTCP_VERSION_AND_COPYRIGHT_INFO;
    JASSERT_STDERR << "(For help:  " << argv[0] << " --help)\n\n";
    return DMTCP_FAIL_RC;
  }

  //process args
  shift;
  while (true) {
    dmtcp::string s = argc>0 ? argv[0] : "--help";
    if (s == "--help" && argc == 1) {
      JASSERT_STDERR << theUsage;
      return DMTCP_FAIL_RC;
    } else if ((s == "--version") && argc == 1) {
      JASSERT_STDERR << DMTCP_VERSION_AND_COPYRIGHT_INFO;
      return DMTCP_FAIL_RC;
    } else if (s == "--no-check") {
      autoStartCoordinator = false;
      shift;
    } else if (s == "-j" || s == "--join") {
      allowedModes = dmtcp::DmtcpCoordinatorAPI::COORD_JOIN;
      shift;
    } else if (s == "-n" || s == "--new") {
      allowedModes = dmtcp::DmtcpCoordinatorAPI::COORD_NEW;
      shift;
    } else if (s == "--new-coordinator") {
      allowedModes = dmtcp::DmtcpCoordinatorAPI::COORD_FORCE_NEW;
      shift;
    } else if (s == "-b" || s == "--batch") {
      allowedModes = dmtcp::DmtcpCoordinatorAPI::COORD_BATCH;
      shift;
    } else if (s == "-i" || s == "--interval" ||
               (s.c_str()[0] == '-' && s.c_str()[1] == 'i' &&
                isdigit(s.c_str()[2]))) {
      if (isdigit(s.c_str()[2])) { // if -i5, for example
        setenv(ENV_VAR_CKPT_INTR, s.c_str()+2, 1);
        shift;
      } else { // else -i 5
        setenv(ENV_VAR_CKPT_INTR, argv[1], 1);
        shift; shift;
      }
    } else if (argc > 1 && (s == "-h" || s == "--host")) {
      setenv(ENV_VAR_NAME_HOST, argv[1], 1);
      shift; shift;
    } else if (argc > 1 && (s == "-p" || s == "--port")) {
      setenv(ENV_VAR_NAME_PORT, argv[1], 1);
      shift; shift;
    } else if (argc > 1 && (s == "-t" || s == "--tmpdir")) {
      setenv(ENV_VAR_TMPDIR, argv[1], 1);
      shift; shift;
    } else if (s == "-q" || s == "--quiet") {
      *getenv(ENV_VAR_QUIET) = *getenv(ENV_VAR_QUIET) + 1;
      // Just in case a non-standard version of setenv is being used:
      setenv(ENV_VAR_QUIET, getenv(ENV_VAR_QUIET), 1);
      shift;
    } else if ((s.length() > 2 && s.substr(0, 2) == "--") ||
               (s.length() > 1 && s.substr(0, 1) == "-")) {
      JASSERT_STDERR << "Invalid Argument\n";
      JASSERT_STDERR << theUsage;
      return DMTCP_FAIL_RC;
    } else if (argc > 1 && s == "--") {
      shift;
      break;
    } else {
      break;
    }
  }

  dmtcp::UniquePid::setTmpDir(getenv(ENV_VAR_TMPDIR));
  dmtcpTmpDir = dmtcp::UniquePid::getTmpDir();

  jassert_quiet = *getenv(ENV_VAR_QUIET) - '0';

  //make sure JASSERT initializes now, rather than during restart
  Util::initializeLogFile();

  if (jassert_quiet == 0)
    JASSERT_STDERR << DMTCP_BANNER;

  if (autoStartCoordinator)
    dmtcp::DmtcpCoordinatorAPI::startCoordinatorIfNeeded(allowedModes,
                                                         isRestart);

  JTRACE("New dmtcp_restart process; _argc_ ckpt images") (argc);

  bool doAbort = false;
  for (; argc > 0; shift) {
    dmtcp::string restorename(argv[0]);
    struct stat buf;
    int rc = stat(restorename.c_str(), &buf);
    if (Util::strStartsWith(restorename, "ckpt_") &&
        Util::strEndsWith(restorename, "_files")) {
      continue;
    } else if (!Util::strEndsWith(restorename, ".dmtcp")) {
      JNOTE("File doesn't have .dmtcp extension. Check Usage.")
        (restorename);
      JASSERT_STDERR << theUsage;
      doAbort = true;
    } else if (rc == -1) {
      char error_msg[1024];
      sprintf(error_msg, "\ndmtcp_restart: ckpt image %s", restorename.c_str());
      perror(error_msg);
      doAbort = true;
    } else if (buf.st_uid != getuid()) { /*Could also run if geteuid() matches*/
      printf("\nProcess uid (%d) doesn't match uid (%lu) of\n" \
             "checkpoint image (%s).\n" \
	     "This is dangerous.  Aborting for security reasons.\n" \
             "If you still want to do this (at your own risk),\n" \
             "  then modify dmtcp/src/%s:%d and re-compile.\n",
             getuid(), buf.st_uid, restorename.c_str(), __FILE__, __LINE__ - 6);
      doAbort = true;
    }
    if (doAbort) {
      exit(DMTCP_FAIL_RC);
    }

    JTRACE("Will restart ckpt image _argv[0]_") (argv[0]);
    targets.push_back (RestoreTarget (argv[0]));
  }

  if (targets.size() <= 0) {
    JNOTE("ERROR: No DMTCP checkpoint image(s) found. Check Usage.");
    JASSERT_STDERR << theUsage;
    exit(DMTCP_FAIL_RC);
  }

  // Check that all targets belongs to one computation Group
  // If not - abort
  compGroup = targets[0].compGroup();
  numPeers = targets[0].numPeers();
  for(size_t i=0; i<targets.size(); i++) {
    JTRACE ("Check targets: ")
      (targets[i].path()) (targets[i].compGroup()) (targets[i].numPeers());
    if (compGroup != targets[i].compGroup()) {
      JASSERT(false)(compGroup)(targets[i].compGroup())
	.Text("ERROR: Restored programs belong to different computation IDs");
    } else if (numPeers != targets[i].numPeers()) {
      JASSERT(false)(numPeers)(targets[i].numPeers())
	.Text("ERROR: Different number of processes saved in checkpoint images");
    }
  }

  SlidingFdTable slidingFd;
  ConnectionToFds conToFd;

  ostringstream out;
  out << "will restore:\n";
  out << "\tfd  -> connection-id\n";
  ConnectionList& connections = ConnectionList::instance();
  ConnectionList::iterator it;
  for (it = connections.begin(); it != connections.end(); ++it) {
    int fd = slidingFd.getFdFor(it->first);
    conToFd[it->first].push_back(fd);
    out << "\t" << fd << " -> " << (it->first)
        << " -> " << (it->second)->str() << "\n";
  }
  JTRACE ("Allocating fds for Connections") (out.str());

  //------------------------
  WorkerState::setCurrentState(WorkerState::RESTARTING);
  ConnectionState ckptCoord(conToFd);
  DmtcpCoordinatorAPI coordinatorAPI;
  restoreSockets(coordinatorAPI, ckptCoord);

  /* Create the file to hold the pid/tid maps. */
  openOriginalToCurrentMappingFiles();

#ifndef PID_VIRTUALIZATION
  int i = (int)targets.size();

  //fork into targs.size() processes
  while (--i > 0) {
    int cid = fork();
    if (cid == 0) break;
    else JASSERT(cid > 0);
  }
  RestoreTarget& targ = targets[i];

  JTRACE("forked, restoring process")
    (i) (targets.size()) (targ.upid()) (getpid());

  //change UniquePid
  UniquePid::resetOnFork(targ.upid());

  //Reconnect to dmtcp_coordinator
  WorkerState::setCurrentState (WorkerState::RESTARTING);

  int tmpCoordFd = dup(PROTECTED_COORD_FD);
  JASSERT(tmpCoordFd != -1);
  coordinatorAPI.connectToCoordinator();
  coordinatorAPI.sendCoordinatorHandshake(targ.procname(), targ.compGroup());
  coordinatorAPI.recvCoordinatorHandshake();
  close(tmpCoordFd);

  //restart targets[i]
  targets[i].dupAllSockets (slidingFd);
  targets[i].mtcpRestart();

  JASSERT(false).Text("unreachable");
  return -1;
#endif
  //size_t i = targets.size();

  // Create roots vector, assign children to their parents.
  // Delete children that don't exist.
  BuildProcessTree();

  // Process all checkpoints to find one of them that can switch
  // needed Group to foreground.
  ProcessGroupInfo();
  // Create session meta-information in each node of the process tree.
  // Node contains info about all sessions which exists at lower levels.
  // Also node is aware of session leader existence at lower levels.
  SetupSessions();

  int pgrp_index=-1;
  JTRACE("Creating ROOT Processes") (roots.size());
  for (size_t j = 0 ; j < roots.size(); ++j) {
    if (roots[j].indep == false) {
      // We will restore this process from one of the independent roots.
      continue;
    }
    if (pgrp_index == -1 && !roots[j].t->isInitChild()) {
      pgrp_index = j;
      continue;
    }

    pid_t cid = fork();
    if (cid == 0) {
      JTRACE ("Root of process tree") (getpid()) (getppid());
      if (roots[j].t->isInitChild()) {
        JTRACE ("Create init-child process") (getpid()) (getppid());
        if (fork())
          _exit(0);
      }
      roots[j].t->CreateProcess(coordinatorAPI, slidingFd);
      JASSERT (false) .Text("Unreachable");
    }
    JASSERT (cid > 0);
    if (roots[j].t->isInitChild()) {
      waitpid(cid, NULL, 0);
    }
  }

  JTRACE("Restore processes without corresponding Root Target");
  int flat_index = -1;
  size_t j = 0;
  if (pgrp_index < 0) { // No root processes at all
    // Find first flat process that can replace currently running
    //   dmtcp_restart context.
    for (j = 0; j < targets.size(); ++j) {
      if (!targets[j].isMarkedUsed()) {
        // Save first flat-like process to be restored after all others
        flat_index = j;
        j++;
        break;
      }
    }
  }
  // Use j set to 0 (if at least one root non-init-child process exists),
  // or else j set to some value if no such process found.
  for(; j < targets.size(); ++j) {
    if (!targets[j].isMarkedUsed()) {
      if (pgrp_index < 0) {
        // Save first flat-like process to be restored after all others
        pgrp_index = j;
        continue;
      } else {
        targets[j].CreateProcess(coordinatorAPI, slidingFd);
        JTRACE("Need in flat-like restore for process") (targets[j].upid());
      }
    }
  }

  if (pgrp_index >= 0) {
    JTRACE("Restore first Root Target")(roots[pgrp_index].t->upid());
    roots[pgrp_index].t->CreateProcess(coordinatorAPI, slidingFd);
  } else if (flat_index >= 0) {
    JTRACE("Restore first Flat Target")(targets[flat_index].upid());
    targets[flat_index].CreateProcess(coordinatorAPI, slidingFd);
  } else {
    // FIXME: Under what conditions will this path be exercised?
    JNOTE ("unknown type of target?") (targets[flat_index].path());
  }
// #endif
}