int MpiNode::relion_MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status &status) { int result; MPI_Request request; double current_time = MPI_Wtime(); double start_time = current_time; // First make a non-blocking receive int result_irecv = MPI_Irecv(buf, count, datatype, source, tag, comm, &request); if (result_irecv != MPI_SUCCESS) { report_MPI_ERROR(result_irecv); } // I could do something in between. If not, Irecv == Recv // Wait for it to finish (MPI_Irecv + MPI_Wait == MPI_Recv) result = MPI_Wait(&request, &status); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } #ifdef VERBOSE_MPISENDRECV if (count > 100) std::cerr <<" relion_MPI_Recv: message from "<<source << " of size "<< count <<" arrived in " << MPI_Wtime() - start_time << " seconds" << std::endl; #endif return result; }
// MPI_TEST will be executed every this many seconds: so this determines the minimum time taken for every send operation!! //#define VERBOSE_MPISENDRECV int MpiNode::relion_MPI_Send(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) { int result; double start_time = MPI_Wtime(); #define ONLY_NORMAL_SEND #ifdef ONLY_NORMAL_SEND result = MPI_Send(buf, count, datatype, dest, tag, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } #else // Only use Bsend for larger messages, otherwise use normal send if (count > 100) { int size; MPI_Pack_size( count, datatype, comm, &size ); char *membuff; // Allocate memory for the package to be sent int attach_result = MPI_Buffer_attach( malloc(size + MPI_BSEND_OVERHEAD ), size + MPI_BSEND_OVERHEAD ); if (attach_result != MPI_SUCCESS) { report_MPI_ERROR(result); } // Actually start sending the message result = MPI_Bsend(buf, count, datatype, dest, tag, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } // The following will only complete once the message has been successfully sent (i.e. also received on the other side) int deattach_result = MPI_Buffer_detach( &membuff, &size); if (deattach_result != MPI_SUCCESS) { report_MPI_ERROR(result); } } else { result = MPI_Send(buf, count, datatype, dest, tag, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } } #endif #ifdef VERBOSE_MPISENDRECV if (count > 100) std::cerr <<" relion_MPI_Send: message to " << dest << " of size "<< count << " arrived in " << MPI_Wtime() - start_time << " seconds" << std::endl; #endif return result; }
int MpiNode::relion_MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { int result; result = MPI_Bcast(buffer, count, datatype, root, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } return result; }
// MPI_TEST will be executed every this many seconds: so this determines the minimum time taken for every send operation!! //#define VERBOSE_MPISENDRECV int MpiNode::relion_MPI_Send(void *buf, std::ptrdiff_t count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) { int result(0); DOUBLE start_time = MPI_Wtime(); //#define ONLY_NORMAL_SEND //#ifdef ONLY_NORMAL_SEND int unitsize(0); MPI_Type_size(datatype, &unitsize); const std::ptrdiff_t blocksize(512*1024*1024); const std::ptrdiff_t totalsize(count*unitsize); if (totalsize <= blocksize ) { result = MPI_Send(buf, count, datatype, dest, tag, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } } else { char * const buffer(reinterpret_cast<char*>(buf)); const std::ptrdiff_t ntimes(totalsize/blocksize); const std::ptrdiff_t nremain(totalsize%blocksize); std::ptrdiff_t i(0); for(; i<ntimes; ++i) { result = MPI_Send(buffer+i*blocksize, blocksize, MPI_CHAR, dest, tag, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } } if(nremain>0) { result = MPI_Send(buffer+i*blocksize, nremain, MPI_CHAR, dest, tag, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } } } /* #else // Only use Bsend for larger messages, otherwise use normal send if (count > 100) { int size; MPI_Pack_size( count, datatype, comm, &size ); char *membuff; // Allocate memory for the package to be sent int attach_result = MPI_Buffer_attach( malloc(size + MPI_BSEND_OVERHEAD ), size + MPI_BSEND_OVERHEAD ); if (attach_result != MPI_SUCCESS) { report_MPI_ERROR(result); } // Actually start sending the message result = MPI_Bsend(buf, count, datatype, dest, tag, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } // The following will only complete once the message has been successfully sent (i.e. also received on the other side) int deattach_result = MPI_Buffer_detach( &membuff, &size); if (deattach_result != MPI_SUCCESS) { report_MPI_ERROR(result); } } else { result = MPI_Send(buf, count, datatype, dest, tag, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } } #endif */ #ifdef VERBOSE_MPISENDRECV if (count > 100) std::cerr <<" relion_MPI_Send: message to " << dest << " of size "<< count << " arrived in " << MPI_Wtime() - start_time << " seconds" << std::endl; #endif return result; }
int MpiNode::relion_MPI_Recv(void *buf, std::ptrdiff_t count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status &status) { int result; MPI_Request request; DOUBLE current_time = MPI_Wtime(); DOUBLE start_time = current_time; int unitsize(0); MPI_Type_size(datatype, &unitsize); const std::ptrdiff_t blocksize(512*1024*1024); const std::ptrdiff_t totalsize(count*unitsize); if (totalsize <= blocksize ) { int result_irecv = MPI_Irecv(buf, count, datatype, source, tag, comm, &request); if (result_irecv != MPI_SUCCESS) { report_MPI_ERROR(result_irecv); } result = MPI_Wait(&request, &status); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } } else { char * const buffer(reinterpret_cast<char*>(buf)); const std::ptrdiff_t ntimes(totalsize/blocksize); const std::ptrdiff_t nremain(totalsize%blocksize); std::ptrdiff_t i(0); for(; i<ntimes; ++i) { int result_irecv = MPI_Irecv(buffer+i*blocksize, blocksize, MPI_CHAR, source, tag, comm, &request); if (result_irecv != MPI_SUCCESS) { report_MPI_ERROR(result_irecv); } result = MPI_Wait(&request, &status); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } } if(nremain>0) { int result_irecv = MPI_Irecv(buffer+i*blocksize, nremain, MPI_CHAR, source, tag, comm, &request); if (result_irecv != MPI_SUCCESS) { report_MPI_ERROR(result_irecv); } result = MPI_Wait(&request, &status); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } } } /* // First make a non-blocking receive int result_irecv = MPI_Irecv(buf, count, datatype, source, tag, comm, &request); if (result_irecv != MPI_SUCCESS) { report_MPI_ERROR(result_irecv); } // I could do something in between. If not, Irecv == Recv // Wait for it to finish (MPI_Irecv + MPI_Wait == MPI_Recv) result = MPI_Wait(&request, &status); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } */ #ifdef VERBOSE_MPISENDRECV if (count > 100) std::cerr <<" relion_MPI_Recv: message from "<<source << " of size "<< count <<" arrived in " << MPI_Wtime() - start_time << " seconds" << std::endl; #endif return result; }