예제 #1
0
void dmtcp::FileConnList::remapShmMaps()
{
  for (size_t i = 0; i < shmAreas.size(); i++) {
    Util::ProcMapsArea *area = &shmAreas[i];
    FileConnection *fileCon = shmAreaConn[i];
    int fd = fileCon->getFds()[0];
    JTRACE("Restoring shared memory area") (area->name) (area->addr);
    void *addr = _real_mmap(area->addr, area->size, area->prot,
                            MAP_FIXED | area->flags,
                            fd, area->offset);
    JASSERT(addr != MAP_FAILED) (area->flags) (area->prot) (JASSERT_ERRNO) .Text("mmap failed");
    _real_close(fd);
    processClose(fd);
  }
  shmAreas.clear();
  shmAreaConn.clear();
}
예제 #2
0
void dmtcp::FileConnList::prepareShmList()
{
  Util::ProcMapsArea area;
  int mapsfd = _real_open("/proc/self/maps", O_RDONLY, 0);
  JASSERT(mapsfd != -1) (JASSERT_ERRNO);

  shmAreas.clear();
  shmAreaConn.clear();
  while (Util::readProcMapsLine(mapsfd, &area)) {
    if ((area.flags & MAP_SHARED) && area.prot != 0) {
      if (strstr(area.name, "ptraceSharedInfo") != NULL ||
          strstr(area.name, "dmtcpPidMap") != NULL ||
          strstr(area.name, "dmtcpSharedArea") != NULL ||
          strstr(area.name, "dmtcpSharedArea") != NULL ||
          strstr(area.name, "synchronization-log") != NULL ||
          strstr(area.name, "synchronization-read-log") != NULL) {
        continue;
      }
      if (jalib::Filesystem::FileExists(area.name)) {
        if (_real_access(area.name, W_OK) == 0) {
          JTRACE("Will checkpoint shared memory area") (area.name);
          int flags = Util::memProtToOpenFlags(area.prot);
          int fd = _real_open(area.name, flags, 0);
          JASSERT(fd != -1) (JASSERT_ERRNO) (area.name);
          FileConnection *fileConn =
            new FileConnection(area.name, flags, 0, FileConnection::FILE_SHM);
          add(fd, fileConn);
          shmAreas.push_back(area);
          shmAreaConn.push_back(fileConn);
          /* Instead of unmapping the shared memory area, we make it
           * non-readable. This way mtcp will skip the region while at the same
           * time, we prevent JALLOC arena to grow over it.
           *
           * By munmapping the area, a bug was observed on CCIS linux with
           * 'make check-java'. Once the region was unmapped, the JALLOC arena
           * grew over it. During restart, the JALLOC'd area was reclaimed for
           * remapping the shm file without informing JALLOC. Finally, during
           * the second checkpoint cycle, the area was again unmapped and later
           * JALLOC tried to access it, causing a SIGSEGV.
           */
          JASSERT(_real_mmap(area.addr, area.size, PROT_NONE,
                             MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
                             -1, 0) != MAP_FAILED) (JASSERT_ERRNO);
        } else {
          JTRACE("Will not checkpoint shared memory area") (area.name);
        }
      } else {
        // TODO: Shared memory areas with unlinked backing files.
#if 0
        JASSERT(Util::strEndsWith(area.name, DELETED_FILE_SUFFIX)) (area.name);
        if (Util::strStartsWith(area.name, DEV_ZERO_DELETED_STR) ||
            Util::strStartsWith(area.name, DEV_NULL_DELETED_STR)) {
          JWARNING(false) (area.name)
            .Text("Ckpt/Restart of Anon Shared memory not supported.");
        } else {
          JTRACE("Will recreate shm file on restart.") (area.name);
          //shmAreas[area] = NULL;
        }
#endif
      }
    }
  }
  _real_close(mapsfd);
}
bool dmtcp::DmtcpWorker::waitForStage2bCheckpoint()
{
  waitForCoordinatorMsg ( "PEER_LOOKUP", DMT_DO_PEER_LOOKUP );
  JTRACE ( "Looking up Socket Peers..." );
  theTcpConnections.clear();
  theCheckpointState->preCheckpointPeerLookup(theTcpConnections);
  sendPeerLookupRequest(theTcpConnections);
  JTRACE ( "Done Socket Peer Lookup" );


  WorkerState::setCurrentState ( WorkerState::PEER_LOOKUP_COMPLETE );

  {
    dmtcp::DmtcpMessage msg;

    msg.type = DMT_OK;
    msg.state = WorkerState::currentState();
    _coordinatorSocket << msg;

    JTRACE ( "waiting for DRAIN/RESUME message" );

    do {
      msg.poison();
      _coordinatorSocket >> msg;
      msg.assertValid();

      if ( msg.type == DMT_KILL_PEER ) {
        JTRACE ( "Received KILL message from coordinator, exiting" );
        _exit ( 0 );
      }
      JTRACE ( "received message" ) (msg.type );
      if ( msg.type != DMT_UNKNOWN_PEER )
        break;

      JTRACE ("received DMT_UNKNOWN_PEER message") (msg.conId);

      TcpConnection* con =
        (TcpConnection*) &( ConnectionList::instance() [msg.conId] );
      con->markExternal();
      externalTcpConnections.push_back(msg.conId);
      _waitingForExternalSocketsToClose = true;

    } while ( msg.type == DMT_UNKNOWN_PEER );

    JASSERT ( msg.type == DMT_DO_DRAIN || msg.type == DMT_DO_RESUME )
            ( msg.type );

    ConnectionList& connections = ConnectionList::instance();

    // Tcp Accept and Connect connection with PeerType UNKNOWN should be marked as INTERNAL
    for ( ConnectionList::iterator i = connections.begin()
        ; i!= connections.end()
        ; ++i )
    {
      Connection* con =  i->second;
      if ( con->conType() == Connection::TCP ) {
        TcpConnection* tcpCon = (TcpConnection *) con;
        if ( (tcpCon->tcpType() == TcpConnection::TCP_ACCEPT ||
             tcpCon->tcpType() == TcpConnection::TCP_CONNECT) &&
             tcpCon->peerType() == TcpConnection::PEER_UNKNOWN )
          tcpCon->markInternal();
      }
    }
    if ( msg.type == DMT_DO_RESUME ) {
      JTRACE ( "Peer Lookup not complete, skipping checkpointing \n\n\n\n\n");
      return false;
    }

    JASSERT (msg.type == DMT_DO_DRAIN);
  }
}