inline void ApplyRowPivots ( DistMatrix<F>& A, const std::vector<int>& image, const std::vector<int>& preimage ) { const int b = image.size(); #ifndef RELEASE PushCallStack("ApplyRowPivots"); if( A.Height() < b || b != (int)preimage.size() ) throw std::logic_error ("image and preimage must be vectors of equal length that are not " "taller than A."); #endif const int localWidth = A.LocalWidth(); if( A.Height() == 0 || A.Width() == 0 ) { #ifndef RELEASE PopCallStack(); #endif return; } // Extract the relevant process grid information const Grid& g = A.Grid(); const int r = g.Height(); const int colAlignment = A.ColAlignment(); const int colShift = A.ColShift(); const int myRow = g.Row(); // Extract the send and recv counts from the image and preimage. // This process's sends may be logically partitioned into two sets: // (a) sends from rows [0,...,b-1] // (b) sends from rows [b,...] // The latter is analyzed with image, the former deduced with preimage. std::vector<int> sendCounts(r,0), recvCounts(r,0); for( int i=colShift; i<b; i+=r ) { const int sendRow = preimage[i]; const int sendTo = (colAlignment+sendRow) % r; sendCounts[sendTo] += localWidth; const int recvRow = image[i]; const int recvFrom = (colAlignment+recvRow) % r; recvCounts[recvFrom] += localWidth; } for( int i=0; i<b; ++i ) { const int sendRow = preimage[i]; if( sendRow >= b ) { const int sendTo = (colAlignment+sendRow) % r; if( sendTo == myRow ) { const int sendFrom = (colAlignment+i) % r; recvCounts[sendFrom] += localWidth; } } const int recvRow = image[i]; if( recvRow >= b ) { const int recvFrom = (colAlignment+recvRow) % r; if( recvFrom == myRow ) { const int recvTo = (colAlignment+i) % r; sendCounts[recvTo] += localWidth; } } } // Construct the send and recv displacements from the counts std::vector<int> sendDispls(r), recvDispls(r); int totalSend=0, totalRecv=0; for( int i=0; i<r; ++i ) { sendDispls[i] = totalSend; recvDispls[i] = totalRecv; totalSend += sendCounts[i]; totalRecv += recvCounts[i]; } #ifndef RELEASE if( totalSend != totalRecv ) { std::ostringstream msg; msg << "Send and recv counts do not match: (send,recv)=" << totalSend << "," << totalRecv; throw std::logic_error( msg.str().c_str() ); } #endif // Fill vectors with the send data const int ALDim = A.LocalLDim(); std::vector<F> sendData(std::max(1,totalSend)); std::vector<int> offsets(r,0); const int localHeight = LocalLength( b, colShift, r ); for( int iLocal=0; iLocal<localHeight; ++iLocal ) { const int sendRow = preimage[colShift+iLocal*r]; const int sendTo = (colAlignment+sendRow) % r; const int offset = sendDispls[sendTo]+offsets[sendTo]; const F* ABuffer = A.LocalBuffer(iLocal,0); for( int jLocal=0; jLocal<localWidth; ++jLocal ) sendData[offset+jLocal] = ABuffer[jLocal*ALDim]; offsets[sendTo] += localWidth; } for( int i=0; i<b; ++i ) { const int recvRow = image[i]; if( recvRow >= b ) { const int recvFrom = (colAlignment+recvRow) % r; if( recvFrom == myRow ) { const int recvTo = (colAlignment+i) % r; const int iLocal = (recvRow-colShift) / r; const int offset = sendDispls[recvTo]+offsets[recvTo]; const F* ABuffer = A.LocalBuffer(iLocal,0); for( int jLocal=0; jLocal<localWidth; ++jLocal ) sendData[offset+jLocal] = ABuffer[jLocal*ALDim]; offsets[recvTo] += localWidth; } } } // Communicate all pivot rows std::vector<F> recvData(std::max(1,totalRecv)); mpi::AllToAll ( &sendData[0], &sendCounts[0], &sendDispls[0], &recvData[0], &recvCounts[0], &recvDispls[0], g.ColComm() ); // Unpack the recv data for( int k=0; k<r; ++k ) { offsets[k] = 0; int thisColShift = Shift( k, colAlignment, r ); for( int i=thisColShift; i<b; i+=r ) { const int sendRow = preimage[i]; const int sendTo = (colAlignment+sendRow) % r; if( sendTo == myRow ) { const int offset = recvDispls[k]+offsets[k]; const int iLocal = (sendRow-colShift) / r; F* ABuffer = A.LocalBuffer(iLocal,0); for( int jLocal=0; jLocal<localWidth; ++jLocal ) ABuffer[jLocal*ALDim] = recvData[offset+jLocal]; offsets[k] += localWidth; } } } for( int i=0; i<b; ++i ) { const int recvRow = image[i]; if( recvRow >= b ) { const int recvTo = (colAlignment+i) % r; if( recvTo == myRow ) { const int recvFrom = (colAlignment+recvRow) % r; const int iLocal = (i-colShift) / r; const int offset = recvDispls[recvFrom]+offsets[recvFrom]; F* ABuffer = A.LocalBuffer(iLocal,0); for( int jLocal=0; jLocal<localWidth; ++jLocal ) ABuffer[jLocal*ALDim] = recvData[offset+jLocal]; offsets[recvFrom] += localWidth; } } } #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyColumnPivots ( DistMatrix<F>& A, const std::vector<int>& image, const std::vector<int>& preimage ) { const int b = image.size(); #ifndef RELEASE PushCallStack("ApplyColumnPivots"); if( A.Width() < b || b != preimage.size() ) throw std::logic_error ("image and preimage must be vectors of equal length that are not " "wider than A."); #endif const int localHeight = A.LocalHeight(); if( A.Height() == 0 || A.Width() == 0 ) { #ifndef RELEASE PopCallStack(); #endif return; } // Extract the relevant process grid information const Grid& g = A.Grid(); const int c = g.Width(); const int rowAlignment = A.RowAlignment(); const int rowShift = A.RowShift(); const int myCol = g.Col(); // Extract the send and recv counts from the image and preimage. // This process's sends may be logically partitioned into two sets: // (a) sends from rows [0,...,b-1] // (b) sends from rows [b,...] // The latter is analyzed with image, the former deduced with preimage. std::vector<int> sendCounts(c,0), recvCounts(c,0); for( int j=rowShift; j<b; j+=c ) { const int sendCol = preimage[j]; const int sendTo = (rowAlignment+sendCol) % c; sendCounts[sendTo] += localHeight; const int recvCol = image[j]; const int recvFrom = (rowAlignment+recvCol) % c; recvCounts[recvFrom] += localHeight; } for( int j=0; j<b; ++j ) { const int sendCol = preimage[j]; if( sendCol >= b ) { const int sendTo = (rowAlignment+sendCol) % c; if( sendTo == myCol ) { const int sendFrom = (rowAlignment+j) % c; recvCounts[sendFrom] += localHeight; } } const int recvCol = image[j]; if( recvCol >= b ) { const int recvFrom = (rowAlignment+recvCol) % c; if( recvFrom == myCol ) { const int recvTo = (rowAlignment+j) % c; sendCounts[recvTo] += localHeight; } } } // Construct the send and recv displacements from the counts std::vector<int> sendDispls(c), recvDispls(c); int totalSend=0, totalRecv=0; for( int i=0; i<c; ++i ) { sendDispls[i] = totalSend; recvDispls[i] = totalRecv; totalSend += sendCounts[i]; totalRecv += recvCounts[i]; } #ifndef RELEASE if( totalSend != totalRecv ) { std::ostringstream msg; msg << "Send and recv counts do not match: (send,recv)=" << totalSend << "," << totalRecv; throw std::logic_error( msg.str().c_str() ); } #endif // Fill vectors with the send data std::vector<F> sendData(std::max(1,totalSend)); std::vector<int> offsets(c,0); const int localWidth = LocalLength( b, rowShift, c ); for( int jLocal=0; jLocal<localWidth; ++jLocal ) { const int sendCol = preimage[rowShift+jLocal*c]; const int sendTo = (rowAlignment+sendCol) % c; const int offset = sendDispls[sendTo]+offsets[sendTo]; MemCopy( &sendData[offset], A.LocalBuffer(0,jLocal), localHeight ); offsets[sendTo] += localHeight; } for( int j=0; j<b; ++j ) { const int recvCol = image[j]; if( recvCol >= b ) { const int recvFrom = (rowAlignment+recvCol) % c; if( recvFrom == myCol ) { const int recvTo = (rowAlignment+j) % c; const int jLocal = (recvCol-rowShift) / c; const int offset = sendDispls[recvTo]+offsets[recvTo]; MemCopy ( &sendData[offset], A.LocalBuffer(0,jLocal), localHeight ); offsets[recvTo] += localHeight; } } } // Communicate all pivot rows std::vector<F> recvData(std::max(1,totalRecv)); mpi::AllToAll ( &sendData[0], &sendCounts[0], &sendDispls[0], &recvData[0], &recvCounts[0], &recvDispls[0], g.RowComm() ); // Unpack the recv data for( int k=0; k<c; ++k ) { offsets[k] = 0; int thisRowShift = Shift( k, rowAlignment, c ); for( int j=thisRowShift; j<b; j+=c ) { const int sendCol = preimage[j]; const int sendTo = (rowAlignment+sendCol) % c; if( sendTo == myCol ) { const int offset = recvDispls[k]+offsets[k]; const int jLocal = (sendCol-rowShift) / c; MemCopy ( A.LocalBuffer(0,jLocal), &recvData[offset], localHeight ); offsets[k] += localHeight; } } } for( int j=0; j<b; ++j ) { const int recvCol = image[j]; if( recvCol >= b ) { const int recvTo = (rowAlignment+j) % c; if( recvTo == myCol ) { const int recvFrom = (rowAlignment+recvCol) % c; const int jLocal = (j-rowShift) / c; const int offset = recvDispls[recvFrom]+offsets[recvFrom]; MemCopy ( A.LocalBuffer(0,jLocal), &recvData[offset], localHeight ); offsets[recvFrom] += localHeight; } } } #ifndef RELEASE PopCallStack(); #endif }
void recursive_bisection_contoller::initialize_serial_partitions( parallel::hypergraph &hgraph, MPI_Comm comm) { int i; int j; int ij; int numTotVertices = hypergraph_->number_of_vertices(); int ijk; int startOffset; int endOffset; int totToSend; ds::dynamic_array<int> hGraphPartitionVector; ds::dynamic_array<int> hGraphPartVectorOffsets; ds::dynamic_array<int> hGraphPartCuts; auto hPartitionVector = hypergraph_->partition_vector(); auto hPartOffsetsVector = hypergraph_->partition_offsets(); auto hPartitionCutsArray = hypergraph_->partition_cuts(); dynamic_array<int> numVperProc(number_of_processors_); dynamic_array<int> procDispls(number_of_processors_); dynamic_array<int> sendLens(number_of_processors_); dynamic_array<int> sendDispls(number_of_processors_); dynamic_array<int> recvLens(number_of_processors_); dynamic_array<int> recvDispls(number_of_processors_); dynamic_array<int> sendArray; hgraph.set_number_of_partitions(number_of_runs_); hGraphPartitionVector = hgraph.partition_vector(); hGraphPartVectorOffsets = hgraph.partition_offsets(); hGraphPartCuts = hgraph.partition_cuts(); // ### // communicate partition vector values // ### j = number_of_processors_ - 1; ij = numTotVertices / number_of_processors_; for (i = 0; i < j; ++i) numVperProc[i] = ij; numVperProc[i] = ij + (numTotVertices % number_of_processors_); j = 0; ij = 0; for (i = 0; i < number_of_processors_; ++i) { sendDispls[i] = j; procDispls[i] = ij; sendLens[i] = numVperProc[i] * number_of_partitions_; j += sendLens[i]; ij += numVperProc[i]; } sendArray.resize(j); totToSend = j; ij = 0; for (ijk = 0; ijk < number_of_processors_; ++ijk) { for (j = 0; j < number_of_partitions_; ++j) { startOffset = hPartOffsetsVector[j] + procDispls[ijk]; endOffset = startOffset + numVperProc[ijk]; for (i = startOffset; i < endOffset; ++i) { sendArray[ij++] = hPartitionVector[i]; } } } #ifdef DEBUG_CONTROLLER assert(ij == totToSend); #endif MPI_Alltoall(sendLens.data(), 1, MPI_INT, recvLens.data(), 1, MPI_INT, comm); ij = 0; for (i = 0; i < number_of_processors_; ++i) { recvDispls[i] = ij; ij += recvLens[i]; } #ifdef DEBUG_CONTROLLER assert(ij == hGraphPartVectorOffsets[numSeqRuns]); #endif MPI_Alltoallv(sendArray.data(), sendLens.data(), sendDispls.data(), MPI_INT, hGraphPartitionVector.data(), recvLens.data(), recvDispls.data(), MPI_INT, comm); // ### // communicate partition cuts // ### MPI_Allgather(&number_of_partitions_, 1, MPI_INT, recvLens.data(), 1, MPI_INT, comm); ij = 0; for (i = 0; i < number_of_processors_; ++i) { recvDispls[i] = ij; ij += recvLens[i]; } MPI_Allgatherv(hPartitionCutsArray.data(), number_of_partitions_, MPI_INT, hGraphPartCuts.data(), recvLens.data(), recvDispls.data(), MPI_INT, comm); for (i = 0; i < number_of_runs_; ++i) { progress("%i ", hGraphPartCuts[i]); } progress("\n"); }