void dmtcp::FileConnList::remapShmMaps() { for (size_t i = 0; i < shmAreas.size(); i++) { Util::ProcMapsArea *area = &shmAreas[i]; FileConnection *fileCon = shmAreaConn[i]; int fd = fileCon->getFds()[0]; JTRACE("Restoring shared memory area") (area->name) (area->addr); void *addr = _real_mmap(area->addr, area->size, area->prot, MAP_FIXED | area->flags, fd, area->offset); JASSERT(addr != MAP_FAILED) (area->flags) (area->prot) (JASSERT_ERRNO) .Text("mmap failed"); _real_close(fd); processClose(fd); } shmAreas.clear(); shmAreaConn.clear(); }
void dmtcp::FileConnList::prepareShmList() { Util::ProcMapsArea area; int mapsfd = _real_open("/proc/self/maps", O_RDONLY, 0); JASSERT(mapsfd != -1) (JASSERT_ERRNO); shmAreas.clear(); shmAreaConn.clear(); while (Util::readProcMapsLine(mapsfd, &area)) { if ((area.flags & MAP_SHARED) && area.prot != 0) { if (strstr(area.name, "ptraceSharedInfo") != NULL || strstr(area.name, "dmtcpPidMap") != NULL || strstr(area.name, "dmtcpSharedArea") != NULL || strstr(area.name, "dmtcpSharedArea") != NULL || strstr(area.name, "synchronization-log") != NULL || strstr(area.name, "synchronization-read-log") != NULL) { continue; } if (jalib::Filesystem::FileExists(area.name)) { if (_real_access(area.name, W_OK) == 0) { JTRACE("Will checkpoint shared memory area") (area.name); int flags = Util::memProtToOpenFlags(area.prot); int fd = _real_open(area.name, flags, 0); JASSERT(fd != -1) (JASSERT_ERRNO) (area.name); FileConnection *fileConn = new FileConnection(area.name, flags, 0, FileConnection::FILE_SHM); add(fd, fileConn); shmAreas.push_back(area); shmAreaConn.push_back(fileConn); /* Instead of unmapping the shared memory area, we make it * non-readable. This way mtcp will skip the region while at the same * time, we prevent JALLOC arena to grow over it. * * By munmapping the area, a bug was observed on CCIS linux with * 'make check-java'. Once the region was unmapped, the JALLOC arena * grew over it. During restart, the JALLOC'd area was reclaimed for * remapping the shm file without informing JALLOC. Finally, during * the second checkpoint cycle, the area was again unmapped and later * JALLOC tried to access it, causing a SIGSEGV. */ JASSERT(_real_mmap(area.addr, area.size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != MAP_FAILED) (JASSERT_ERRNO); } else { JTRACE("Will not checkpoint shared memory area") (area.name); } } else { // TODO: Shared memory areas with unlinked backing files. #if 0 JASSERT(Util::strEndsWith(area.name, DELETED_FILE_SUFFIX)) (area.name); if (Util::strStartsWith(area.name, DEV_ZERO_DELETED_STR) || Util::strStartsWith(area.name, DEV_NULL_DELETED_STR)) { JWARNING(false) (area.name) .Text("Ckpt/Restart of Anon Shared memory not supported."); } else { JTRACE("Will recreate shm file on restart.") (area.name); //shmAreas[area] = NULL; } #endif } } } _real_close(mapsfd); }
bool dmtcp::DmtcpWorker::waitForStage2bCheckpoint() { waitForCoordinatorMsg ( "PEER_LOOKUP", DMT_DO_PEER_LOOKUP ); JTRACE ( "Looking up Socket Peers..." ); theTcpConnections.clear(); theCheckpointState->preCheckpointPeerLookup(theTcpConnections); sendPeerLookupRequest(theTcpConnections); JTRACE ( "Done Socket Peer Lookup" ); WorkerState::setCurrentState ( WorkerState::PEER_LOOKUP_COMPLETE ); { dmtcp::DmtcpMessage msg; msg.type = DMT_OK; msg.state = WorkerState::currentState(); _coordinatorSocket << msg; JTRACE ( "waiting for DRAIN/RESUME message" ); do { msg.poison(); _coordinatorSocket >> msg; msg.assertValid(); if ( msg.type == DMT_KILL_PEER ) { JTRACE ( "Received KILL message from coordinator, exiting" ); _exit ( 0 ); } JTRACE ( "received message" ) (msg.type ); if ( msg.type != DMT_UNKNOWN_PEER ) break; JTRACE ("received DMT_UNKNOWN_PEER message") (msg.conId); TcpConnection* con = (TcpConnection*) &( ConnectionList::instance() [msg.conId] ); con->markExternal(); externalTcpConnections.push_back(msg.conId); _waitingForExternalSocketsToClose = true; } while ( msg.type == DMT_UNKNOWN_PEER ); JASSERT ( msg.type == DMT_DO_DRAIN || msg.type == DMT_DO_RESUME ) ( msg.type ); ConnectionList& connections = ConnectionList::instance(); // Tcp Accept and Connect connection with PeerType UNKNOWN should be marked as INTERNAL for ( ConnectionList::iterator i = connections.begin() ; i!= connections.end() ; ++i ) { Connection* con = i->second; if ( con->conType() == Connection::TCP ) { TcpConnection* tcpCon = (TcpConnection *) con; if ( (tcpCon->tcpType() == TcpConnection::TCP_ACCEPT || tcpCon->tcpType() == TcpConnection::TCP_CONNECT) && tcpCon->peerType() == TcpConnection::PEER_UNKNOWN ) tcpCon->markInternal(); } } if ( msg.type == DMT_DO_RESUME ) { JTRACE ( "Peer Lookup not complete, skipping checkpointing \n\n\n\n\n"); return false; } JASSERT (msg.type == DMT_DO_DRAIN); } }