void GenericCommunicator::send_msg( serializer * ser, int rcver ) { VT_FUNC_I( "Dist::GenComm::send_msg" ); // Transform id from global to local: rcver -= m_globalIdShift; // Check: CNC_ASSERT( 0 <= rcver && rcver < m_channel->numProcs() ); CNC_ASSERT( rcver != m_channel->localId() ); // no message to self allowed CNC_ASSERT( m_channel->isActive( rcver ) ); // Prepare the serializer for sending (add Size, CRC etc.) CNC_ASSERT( ser && ser->is_packing() ); //BufferAccess::finalizePack( *ser ); // ITAC logging: #ifdef WITHOUT_SENDER_THREAD_TRACING VT_SEND( rcver, (int)ser->get_total_size(), ITC_TAG_EXTERNAL ); #else VT_SEND( m_channel->localId(), (int)ser->get_total_size(), ITC_TAG_INTERNAL ); #endif // Send data: m_sendThread->pushForSend( ser, rcver ); }
int MpiChannelInterface::sendBytes( void * data, size_type headerSize, size_type bodySize, int rcverLocalId ) { VT_FUNC_I( "MPI::sendBytes" ); CNC_ASSERT( 0 <= rcverLocalId && rcverLocalId < numProcs() ); if( bodySize + headerSize > INT_MAX ) { std::cerr << "MPI_Get_count doesn't allow a count > " << INT_MAX << ". No workaround implemented yet." << std::endl; MPI_Abort( m_communicator, 1234 ); } char* header_data = static_cast<char*>( data ); MPI_Request request = 0; if( headerSize+bodySize < BUFF_SIZE ) { MPI_Isend( header_data, headerSize+bodySize, MPI_CHAR, rcverLocalId, FIRST_MSG, m_communicator, &request ); } else { CNC_ASSERT( bodySize > 0 ); // header Tag should not be equal to localId() MPI_Send( header_data, headerSize, MPI_CHAR, rcverLocalId, FIRST_MSG, m_communicator ); char * body_data = header_data+headerSize; MPI_Isend( body_data, bodySize, MPI_CHAR, rcverLocalId, SECOND_MSG, m_communicator, &request ); } // { Speaker oss; oss << "sendBytes " << headerSize << " " << bodySize; } #ifdef PRE_SEND_MSGS return request; #else MPI_Wait( &request, MPI_STATUS_IGNORE ); return 0; #endif }
serializer * MpiChannelInterface::waitForAnyClient( int & senderLocalId ) { VT_FUNC_I( "MPI::waitForAnyClient" ); MPI_Status status; MPI_Wait( &m_request, &status ); senderLocalId = status.MPI_SOURCE; CNC_ASSERT( 0 <= senderLocalId && senderLocalId < numProcs() ); int _cnt; MPI_Get_count( &status, MPI_CHAR, &_cnt ); size_type _bodySize = m_ser1->unpack_header(); // throws an exception in case of error CNC_ASSERT( _bodySize + m_ser1->get_header_size() == _cnt || m_ser1->get_header_size() == _cnt ); // if we did not receive the body yet, we need to do so now if( _bodySize != 0 ) { CNC_ASSERT( _bodySize != Buffer::invalid_size ); BufferAccess::acquire( *m_ser1, _bodySize ); // this is needed even if all is received: sets current pointer in buffer if( _cnt == m_ser1->get_header_size() ) { // Enlarge the buffer if needed MPI_Recv( m_ser1->get_body(), _bodySize, MPI_CHAR, senderLocalId, SECOND_MSG, m_communicator, MPI_STATUS_IGNORE ); } } std::swap( m_ser1, m_ser2 ); // double buffer exchange m_ser1->set_mode_unpack(); MPI_Irecv( m_ser1->get_header(), BufferAccess::capacity( *m_ser1 ), MPI_CHAR, MPI_ANY_SOURCE, FIRST_MSG, m_communicator, &m_request ); // { Speaker oss; oss << "recvBytes " << _bodySize; } return _bodySize != 0 ? m_ser2 : NULL; }
void GenericCommunicator::fini() { VT_FUNC_I( "Dist::GenComm::fini" ); // Something to do at all? if ( ! m_hasBeenInitialized ) { return; } else { m_hasBeenInitialized = false; } // Host sends termination requests to the remote clients. // Each client will send a response. if( m_channel->localId() == 0 ) { for ( int client = 1; client < numProcs(); ++client ) { send_termination_request( client ); } } // Stop sender and receiver threads: if ( m_recvThread ) { m_recvThread->stop(); } if ( m_sendThread ) { m_sendThread->stop(); } // Cleanup: delete m_recvThread; m_recvThread = NULL; delete m_sendThread; m_sendThread = NULL; // Cleanup ITAC stuff: VT_FINALIZE(); }
int GenericCommunicator::init( int minId, int /*flag*/ ) { VT_FUNC_I( "Dist::GenComm::init" ); // Already running? if ( m_hasBeenInitialized ) { return 0; } else { m_hasBeenInitialized = true; } // Initialize ids: m_globalIdShift = minId; /* * NOTE: This method will be called from init() of a derived class. * The derived class must have instantiated m_channel * accordingly. Therefore, we can use m_channel here. */ // A process cannot send a message to itself. // Therefore, disable this channel: CNC_ASSERT( m_channel ); m_channel->deactivate( m_channel->localId() ); // Start sender loop (same on host and on client): CNC_ASSERT( m_sendThread == 0 ); m_sendThread = new SendThread( *m_channel ); #ifndef WITHOUT_SENDER_THREAD # ifndef WITHOUT_SENDER_THREAD_TRACING m_sendThread->defineThreadName( "SEND", m_channel->localId() ); # endif m_sendThread->start(); #endif // Start receiver loop: CNC_ASSERT( m_recvThread == 0 ); m_recvThread = new RecvThread( *this ); // needs whole instance, not only the channel if ( ! remote() || distributor::distributed_env() ) { // Host(s) runs the receiver loop in a separate thread. // Prepare receiver thread: m_recvThread->defineThreadName( "RECV" ); m_recvThread->start(); } else { // clients run it in the main thread m_recvThread->runEventLoop(); fini(); // On certain OSes (such as MIC), a call to "exit" is // considered an abnormal termination, regarless of // the argument. if (m_exit0CallOk) exit( 0 ); } return 0; }
bool GenericCommunicator::bcast_msg( serializer * ser, const int * rcvers, int nrecvrs ) { VT_FUNC_I( "Dist::GenComm::bcast_msg" ); // Prepare the serializer for sending (add Size, CRC etc.) CNC_ASSERT( ser && ser->is_packing() ); //BufferAccess::finalizePack( *ser ); // Bcast data: return m_sendThread->pushForBcast( ser, rcvers, nrecvrs, m_globalIdShift ); }
void GenericCommunicator::bcast_msg( serializer * ser ) { VT_FUNC_I( "Dist::GenComm::bcast_msg" ); // Prepare the serializer for sending (add Size, CRC etc.) CNC_ASSERT( ser && ser->is_packing() ); //BufferAccess::finalizePack( *ser ); // Bcast data: m_sendThread->pushForBcast( ser ); }
void GenericCommunicator::send_termination_request( int rcverLocalId, bool isBlocking ) { VT_FUNC_I( "Dist::GenComm::send_termination_request" ); // Send data. No ITAC logging of termination message. if ( ! isBlocking ) { m_sendThread->pushTerminationRequest( rcverLocalId, 0 ); } else { volatile bool myIndicator = false; m_sendThread->pushTerminationRequest( rcverLocalId, &myIndicator ); // Blocking send: wait until the message has been sent successfully: while ( ! myIndicator ) { YIELD(); } } }
void MpiCommunicator::fini() { VT_FUNC_I( "MpiCommunicator::fini" ); // Already running? if ( ! m_hasBeenInitialized ) { return; } // First the generic cleanup: GenericCommunicator::fini(); // Cleanup of mpi specific stuff: delete m_channel; m_channel = NULL; if( ! ( isDistributed() || m_customComm ) ) { MPI_Finalize(); } }
void SocketClientInitializer::init_itac_comm() { #ifdef CNC_WITH_ITAC VT_FUNC_I( "Dist::Socket::init_itac_comm" ); UINT32 nBytes; PAL_SockRes_t ret; // Receive client ranks (local and global): int itacRank[2]; ret = PAL_Recv( HERE, m_channel.m_socketCommData[0].m_recvSocket, itacRank, 2 * sizeof( int ), &nBytes, -1, false ); CNC_ASSERT( ret == PAL_SOCK_OK && nBytes == sizeof( itacRank ) ); int localRank = itacRank[0]; int globalRank = itacRank[1]; // Define process name in ITAC tracefile: char clientName[128]; sprintf( clientName, "sClient%d", localRank ); // Determine VTcs contact string: const char* vtContact; VT_CLIENT_INIT( globalRank, NULL, &vtContact ); // Send VTcs contact string to host: int length = (int)strlen( vtContact ) + 1; ret = PAL_Send( HERE, m_channel.m_socketCommData[0].m_sendSocket, &length, sizeof( int ), &nBytes, -1 ); CNC_ASSERT( ret == PAL_SOCK_OK && nBytes == sizeof( length ) ); ret = PAL_Send( HERE, m_channel.m_socketCommData[0].m_sendSocket, vtContact, length, &nBytes, -1 ); CNC_ASSERT( ret == PAL_SOCK_OK && nBytes == length ); // Construct ITAC thread name: char thrName[64]; sprintf( thrName, "RECV%d", m_channel.localId() ); // Initialize VTcs: VT_INIT(); VT_THREAD_NAME( thrName ); #endif // CNC_WITH_ITAC }
int MpiCommunicator::init( int minId, long thecomm_ ) { VT_FUNC_I( "MpiCommunicator::init" ); assert( sizeof(thecomm_) >= sizeof(MPI_Comm) ); MPI_Comm thecomm = (MPI_Comm)thecomm_; // turn wait mode on for intel mpi if possible // this should greatly improve performance for intel mpi PAL_SetEnvVar( "I_MPI_WAIT_MODE", "enable", 0); int flag; MPI_Initialized( &flag ); if ( ! flag ) { int p; //!! FIXME passing NULL ptr breaks mvapich1 mpi implementation MPI_Init_thread( 0, NULL, MPI_THREAD_MULTIPLE, &p ); if( p != MPI_THREAD_MULTIPLE ) { // can't use Speaker yet, need Channels to be inited std::cerr << "[CnC] Warning: not MPI_THREAD_MULTIPLE (" << MPI_THREAD_MULTIPLE << "), but " << p << std::endl; } } else if( thecomm == 0 ) { CNC_ABORT( "Process has already been initialized" ); } MPI_Comm myComm = MPI_COMM_WORLD; int rank; MPI_Comm parentComm; if( thecomm == 0 ) { MPI_Comm_get_parent( &parentComm ); } else { m_customComm = true; m_exit0CallOk = false; myComm = thecomm; } MPI_Comm_rank( myComm, &rank ); // father of all checks if he's requested to spawn processes: if ( rank == 0 && parentComm == MPI_COMM_NULL ) { // Ok, let's spawn the clients. // I need some information for the startup. // 1. Name of the executable (default is the current exe) const char * _tmp = getenv( "CNC_MPI_SPAWN" ); if ( _tmp ) { int nClientsToSpawn = atol( _tmp ); _tmp = getenv( "CNC_MPI_EXECUTABLE" ); std::string clientExe( _tmp ? _tmp : "" ); if( clientExe.empty() ) clientExe = PAL_GetProgname(); CNC_ASSERT( ! clientExe.empty() ); // 3. Special setting for MPI_Info: hosts const char * clientHost = getenv( "CNC_MPI_HOSTS" ); // Prepare MPI_Info object: MPI_Info clientInfo = MPI_INFO_NULL; if ( clientHost ) { MPI_Info_create( &clientInfo ); if ( clientHost ) { MPI_Info_set( clientInfo, const_cast< char * >( "host" ), const_cast< char * >( clientHost ) ); // can't use Speaker yet, need Channels to be inited std::cerr << "[CnC " << rank << "] Set MPI_Info_set( \"host\", \"" << clientHost << "\" )\n"; } } // Now spawn the client processes: // can't use Speaker yet, need Channels to be inited std::cerr << "[CnC " << rank << "] Spawning " << nClientsToSpawn << " MPI processes" << std::endl; int* errCodes = new int[nClientsToSpawn]; MPI_Comm interComm; int err = MPI_Comm_spawn( const_cast< char * >( clientExe.c_str() ), MPI_ARGV_NULL, nClientsToSpawn, clientInfo, 0, MPI_COMM_WORLD, &interComm, errCodes ); delete [] errCodes; if ( err ) { // can't use Speaker yet, need Channels to be inited std::cerr << "[CnC " << rank << "] Error in MPI_Comm_spawn. Skipping process spawning"; } else { MPI_Intercomm_merge( interComm, 0, &myComm ); } } // else { // No process spawning // MPI-1 situation: all clients to be started by mpiexec // myComm = MPI_COMM_WORLD; //} } if ( thecomm == 0 && parentComm != MPI_COMM_NULL ) { // I am a child. Build intra-comm to the parent. MPI_Intercomm_merge( parentComm, 1, &myComm ); } MPI_Comm_rank( myComm, &rank ); CNC_ASSERT( m_channel == NULL ); MpiChannelInterface* myChannel = new MpiChannelInterface( use_crc(), myComm ); m_channel = myChannel; int size; MPI_Comm_size( myComm, &size ); // Are we on the host or on the remote side? if ( rank == 0 ) { if( size <= 1 ) { Speaker oss( std::cerr ); oss << "Warning: no clients avabilable. Forgot to set CNC_MPI_SPAWN?"; } // ==> HOST startup: // This initializes the mpi environment in myChannel. MpiHostInitializer hostInitializer( *myChannel ); hostInitializer.init_mpi_comm( myComm ); } else { // ==> CLIENT startup: // This initializes the mpi environment in myChannel. MpiClientInitializer clientInitializer( *myChannel ); clientInitializer.init_mpi_comm( myComm ); } { Speaker oss( std::cerr ); oss << "MPI initialization complete (rank " << rank << ")."; } // MPI_Barrier( myComm ); // Now the mpi specific setup is finished. // Do the generic initialization stuff. GenericCommunicator::init( minId ); return 0; }
MpiCommunicator::~MpiCommunicator() { VT_FUNC_I( "MpiCommunicator::~MpiCommunicator" ); CNC_ASSERT( m_channel == 0 ); // was deleted in fini() }
void MpiChannelInterface::wait( int * requests, int cnt ) { VT_FUNC_I( "MPI::wait" ); MPI_Waitall( cnt, requests, MPI_STATUSES_IGNORE ); }
void MpiChannelInterface::recvBodyBytes( void * body, size_type bodySize, int senderLocalId ) { VT_FUNC_I( "MPI::recvBodyBytes" ); CNC_ASSERT( 0 <= senderLocalId && senderLocalId < numProcs() ); MPI_Recv( body, bodySize, MPI_CHAR, senderLocalId, SECOND_MSG, m_communicator, MPI_STATUS_IGNORE ); }
void SocketClientInitializer::init_socket_comm() { VT_FUNC_I( "Dist::Socket::init_socket_comm" ); UINT32 nBytes; PAL_SockRes_t ret; // Initialize socket layer (must be done only once ...) PAL_SockInit( HERE ); // Create socket connection with the host. // Resulting communicators are put into m_channel.m_socketCommData[0]. // Socket connections to the other clients will be set up // later (in build_client_connections). PAL_Socket_t newSocket; // Predefined client id? int myClientId = 0; const char* cncSocketClientId = getenv( "CNC_SOCKET_CLIENT_ID" ); // envvar to be set from the client starter script, // don't make it a config setting! if ( cncSocketClientId ) { myClientId = atoi( cncSocketClientId ); CNC_ASSERT( myClientId >= 1 ); } // Create first socket connection to the host: ret = PAL_Connect( HERE, m_hostContactStr, -1, &newSocket ); CNC_ASSERT( ret == PAL_SOCK_OK ); int clientIdArr[2]; clientIdArr[0] = ( ! cncSocketClientId ) ? 0 : 1; // defines 1st stage AND whether client id was set on the client clientIdArr[1] = myClientId; ret = PAL_Send( HERE, newSocket, clientIdArr, 2 * sizeof( int ), &nBytes, -1 ); CNC_ASSERT( ret == PAL_SOCK_OK && nBytes == sizeof( clientIdArr ) ); const int myClientIdOrig = myClientId; int arr[2]; ret = PAL_Recv( HERE, newSocket, arr, 2 * sizeof( int ), &nBytes, -1, false ); CNC_ASSERT( ret == PAL_SOCK_OK && nBytes == sizeof( arr ) ); myClientId = arr[0]; int numProcsTotal = arr[1]; // Check client id: CNC_ASSERT( ! cncSocketClientId || myClientId == myClientIdOrig ); CNC_ASSERT( myClientId >= 0 ); // ==> Now the number of relevant processes is known. // Prepare the data structures accordingly !!! m_channel.setLocalId( myClientId ); m_channel.setNumProcs( numProcsTotal ); m_channel.m_socketCommData[0].m_recvSocket = newSocket; // Create second socket connection to the host: ret = PAL_Connect( HERE, m_hostContactStr, -1, &m_channel.m_socketCommData[0].m_sendSocket ); CNC_ASSERT( ret == PAL_SOCK_OK ); clientIdArr[0] = 2; // defines 2nd stage clientIdArr[1] = myClientId; // must now be >= 0 ret = PAL_Send( HERE, m_channel.m_socketCommData[0].m_sendSocket, clientIdArr, 2 * sizeof( int ), &nBytes, -1 ); CNC_ASSERT( ret == PAL_SOCK_OK && nBytes == sizeof( clientIdArr ) ); // Agree with the host on setup data like the number of worker threads. exchange_setup_info(); // Setup connections between clients. Host will be coordinating this. build_client_connections(); // Final step of initialization: setup of ITAC connections (VTcs): init_itac_comm(); }