void dmtcp::DmtcpCoordinatorAPI::informCoordinatorOfNewProcessOnFork (jalib::JSocket& coordSock) { JASSERT(coordSock.isValid()); JASSERT(coordSock.sockfd() != PROTECTED_COORD_FD); _coordinatorSocket = coordSock; _coordinatorSocket.changeFd(PROTECTED_COORD_FD); JTRACE("Informing coordinator of new process") (UniquePid::ThisProcess()); sendCoordinatorHandshake(jalib::Filesystem::GetProgramName() + "_(forked)", UniquePid::ComputationId(), -1, DMT_UPDATE_PROCESS_INFO_AFTER_FORK); }
void dmtcp::ConnectionState::doReconnect(jalib::JSocket& coordinator, jalib::JSocket& restoreListen) { _rewirer.addDataSocket(new jalib::JChunkReader(coordinator, sizeof(DmtcpMessage))); _rewirer.addListenSocket(restoreListen); _rewirer.setCoordinatorFd(coordinator.sockfd()); handleDuplicateFilesInSeparateConnections(); ConnectionList& connections = ConnectionList::instance(); // Here we modify the restore algorithm by splitting it in two parts. In the // first part we restore all the connection except the PTY_SLAVE types and in // the second part we restore only PTY_SLAVE connections. This is done to // make sure that by the time we are trying to restore a PTY_SLAVE // connection, its corresponding PTY_MASTER connection has already been // restored. // UPDATE: We also restore the files for which the we didn't have the lock in // second iteration along with PTY_SLAVEs // Part 1: Restore all but Pseudo-terminal slaves and file connection which // were not checkpointed ConnectionList::iterator i; for (i= connections.begin(); i != connections.end(); ++i) { ConnectionIdentifier id = i->first; Connection *con = i->second; JASSERT(_conToFds[id].size() > 0) .Text("stale connections should be gone by now"); if (con->subType() == FileConnection::FILE_PROCFS) { continue; } if (con->conType() == Connection::TCP) { TcpConnection *tcpCon =(TcpConnection *) con; if (tcpCon->peerType() == TcpConnection::PEER_SOCKETPAIR) { ConnectionIdentifier peerId = tcpCon->getSocketpairPeerId(); TcpConnection *peerCon = (TcpConnection*) connections.getConnection(peerId); if (peerCon != NULL) { tcpCon->restoreSocketPair(_conToFds[id], peerCon, _conToFds[peerId]); continue; } } } if (con->restoreInSecondIteration() == false) { con->restore(_conToFds[id], &_rewirer); } } // Part 2: Restore all Pseudo-terminal slaves and file connections that were // not checkpointed. for (i = connections.begin(); i != connections.end(); ++i) { Connection *con = i->second; JASSERT(_conToFds[i->first].size() > 0) .Text("stale connections should be gone by now"); if (con->subType() == FileConnection::FILE_PROCFS) { continue; } if (con->restoreInSecondIteration() == true) { con->restore(_conToFds[i->first], &_rewirer); } } _rewirer.doReconnect(); }