/* * Broadcast a buffer. */ void Buffer::bcast(MPI::Intracomm& comm, int source) { int comm_size = comm.Get_size(); int myRank = comm.Get_rank(); if (source > comm_size - 1 || source < 0) { UTIL_THROW("Source rank out of bounds"); } int sendBytes; if (myRank == source) { sendBytes = sendPtr_ - sendBufferBegin_; comm.Bcast(&sendBytes, 1, MPI::INT, source); comm.Bcast(sendBufferBegin_, sendBytes, MPI::CHAR, source); sendPtr_ = sendBufferBegin_; sendType_ = NONE; } else { comm.Bcast(&sendBytes, 1, MPI::INT, source); comm.Bcast(recvBufferBegin_, sendBytes, MPI::CHAR, source); recvPtr_ = recvBufferBegin_; recvType_ = NONE; } if (sendBytes > maxSendLocal_) { maxSendLocal_ = sendBytes; } }
int getSize() const { int size = Network::INVALID_VALUE; if ( intraComm != MPI::COMM_NULL) { size = intraComm.Get_size(); }else { log.Err() << "getSize() returns invalid\n"; } return size; }
// not necessary to create a new comm object MPI::Intracomm init_workers(const MPI::Intracomm &comm_world, int managerid) { // get old group MPI::Group world_group = comm_world.Get_group(); // create new group from old group int worker_size = comm_world.Get_size() - 1; int *workers = new int[worker_size]; for (int i = 0, id = 0; i < worker_size; ++i, ++id) { if (id == managerid) ++id; // skip the manager id workers[i] = id; } MPI::Group worker_group = world_group.Incl(worker_size, workers); delete [] workers; return comm_world.Create(worker_group); }
/* * Send a block (nonblocking) */ void MemoryOArchive::iSend(MPI::Intracomm& comm, MPI::Request& req, int dest) { int comm_size = comm.Get_size(); int myRank = comm.Get_rank(); // Preconditions if (dest > comm_size - 1 || dest < 0) { UTIL_THROW("Destination rank out of bounds"); } if (dest == myRank) { UTIL_THROW("Source and desination identical"); } size_t sendBytes = cursor_ - buffer_; size_t* sizePtr = (size_t*)buffer_; *sizePtr = sendBytes; req = comm.Isend(buffer_, sendBytes, MPI::UNSIGNED_CHAR, dest, 5); }
/* * Receive a block. */ void PackedData::recv(MPI::Intracomm& comm, int source) { MPI::Request request; int myRank = comm.Get_rank(); int comm_size = comm.Get_size(); // Preconditons if (source > comm_size - 1 || source < 0) { UTIL_THROW("Source rank out of bounds"); } if (source == myRank) { UTIL_THROW("Source and desination identical"); } request = comm.Irecv(begin_, capacity_, MPI::UNSIGNED_CHAR, source, 5); request.Wait(); cursor_ = begin_; }
void transfH_MPI(mblock<double>** Ap, mblock<float>** A, unsigned* seq_part) { unsigned rank = COMM_AHMED.Get_rank(), nproc = COMM_AHMED.Get_size(); unsigned info[8]; if (rank==0) { copyH(seq_part[1]-seq_part[0], Ap, A); for (unsigned j=1; j<nproc; ++j) { for (unsigned i=seq_part[j]; i<seq_part[j+1]; ++i) { COMM_AHMED.Recv(info, 8, MPI::UNSIGNED, j, 7); A[i] = new mblock<float>(info[0], info[1]); float* tmp = NULL; if (info[7]) { tmp = new float[info[7]]; COMM_AHMED.Recv(tmp, info[7], MPI::FLOAT, j, 8); } A[i]->cpy_mbl(info+2, tmp); delete [] tmp; } } } else { for (unsigned i=seq_part[rank]; i<seq_part[rank+1]; ++i) { mblock<double>* p = Ap[i-seq_part[rank]]; info[0] = p->getn1(); info[1] = p->getn2(); p->get_prop(info+2); info[7] = p->nvals(); COMM_AHMED.Send(info, 8, MPI::UNSIGNED, 0, 7); if (info[7]) { float* tmp = new float[info[7]]; for (unsigned i=0; i<info[7]; ++i) tmp[i] = (float) p->getdata()[i]; COMM_AHMED.Send(tmp, info[7], MPI::FLOAT, 0, 8); delete [] tmp; } } } }
/* * Send and receive buffer. */ void Buffer::sendRecv(MPI::Intracomm& comm, int source, int dest) { MPI::Request request[2]; int sendBytes = 0; int myRank = comm.Get_rank(); int comm_size = comm.Get_size(); // Preconditions if (dest > comm_size - 1 || dest < 0) { UTIL_THROW("Destination rank out of bounds"); } if (source > comm_size - 1 || source < 0) { UTIL_THROW("Source rank out of bounds"); } if (dest == myRank) { UTIL_THROW("Destination and my rank are identical"); } if (source == myRank) { UTIL_THROW("Source and my rank are identical"); } // Start nonblocking receive. request[0] = comm.Irecv(recvBufferBegin_, bufferCapacity_ , MPI::CHAR, source, 5); // Start nonblocking send. sendBytes = sendPtr_ - sendBufferBegin_; request[1] = comm.Isend(sendBufferBegin_, sendBytes , MPI::CHAR, dest, 5); // Wait for completion of receive. request[0].Wait(); recvPtr_ = recvBufferBegin_; // Wait for completion of send. request[1].Wait(); // Update statistics. if (sendBytes > maxSendLocal_) { maxSendLocal_ = sendBytes; } }
/* * Send a block. */ void PackedData::send(MPI::Intracomm& comm, int dest) { MPI::Request request; int sendBytes = 0; int comm_size = comm.Get_size(); int myRank = comm.Get_rank(); // Preconditions if (dest > comm_size - 1 || dest < 0) { UTIL_THROW("Destination rank out of bounds"); } if (dest == myRank) { UTIL_THROW("Source and desination identical"); } sendBytes = cursor_ - begin_; request = comm.Isend(begin_, sendBytes, MPI::UNSIGNED_CHAR, dest, 5); request.Wait(); }
/* * Receive a buffer. */ void Buffer::recv(MPI::Intracomm& comm, int source) { MPI::Request request; int myRank = comm.Get_rank(); int comm_size = comm.Get_size(); // Preconditons if (source > comm_size - 1 || source < 0) { UTIL_THROW("Source rank out of bounds"); } if (source == myRank) { UTIL_THROW("Source and destination identical"); } request = comm.Irecv(recvBufferBegin_, bufferCapacity_, MPI::CHAR, source, 5); request.Wait(); recvType_ = NONE; recvPtr_ = recvBufferBegin_; }
/* * Receive a block. */ void MemoryIArchive::recv(MPI::Intracomm& comm, int source) { int myRank = comm.Get_rank(); int comm_size = comm.Get_size(); // Preconditions if (source > comm_size - 1 || source < 0) { UTIL_THROW("Source rank out of bounds"); } if (source == myRank) { UTIL_THROW("Source and desination identical"); } size_t recvCapacity = capacity_ + sizeof(size_t); comm.Recv(buffer_, recvCapacity, MPI::UNSIGNED_CHAR, source, 5); begin_ = buffer_ + sizeof(size_t); cursor_ = begin_; size_t* sizePtr = (size_t*) buffer_; size_t size = *sizePtr; end_ = buffer_ + size; }
FullyDistSpVec<IT, IT> FullyDistSpVec<IT, NT>::sort() { MPI::Intracomm World = commGrid->GetWorld(); FullyDistSpVec<IT,IT> temp(commGrid); IT nnz = getlocnnz(); pair<NT,IT> * vecpair = new pair<NT,IT>[nnz]; int nprocs = World.Get_size(); int rank = World.Get_rank(); IT * dist = new IT[nprocs]; dist[rank] = nnz; World.Allgather(MPI::IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>()); IT sizeuntil = accumulate(dist, dist+rank, 0); for(IT i=0; i< nnz; ++i) { vecpair[i].first = num[i]; // we'll sort wrt numerical values vecpair[i].second = ind[i] + sizeuntil; } SpParHelper::MemoryEfficientPSort(vecpair, nnz, dist, World); vector< IT > nind(nnz); vector< IT > nnum(nnz); for(IT i=0; i< nnz; ++i) { num[i] = vecpair[i].first; // sorted range (change the object itself) nind[i] = ind[i]; // make sure the sparsity distribution is the same nnum[i] = vecpair[i].second; // inverse permutation stored as numerical values } delete [] vecpair; delete [] dist; temp.NOT_FOUND = NOT_FOUND; temp.glen = glen; temp.ind = nind; temp.num = nnum; return temp; }
/* * Send a buffer. */ void Buffer::send(MPI::Intracomm& comm, int dest) { MPI::Request request; int sendBytes = 0; int comm_size = comm.Get_size(); int myRank = comm.Get_rank(); // Preconditions if (dest > comm_size - 1 || dest < 0) { UTIL_THROW("Destination rank out of bounds"); } if (dest == myRank) { UTIL_THROW("Source and destination identical"); } sendBytes = sendPtr_ - sendBufferBegin_; request = comm.Isend(sendBufferBegin_, sendBytes, MPI::CHAR, dest, 5); request.Wait(); // Update statistics. if (sendBytes > maxSendLocal_) { maxSendLocal_ = sendBytes; } }
ifstream& FullyDistSpVec<IT,NT>::ReadDistribute (ifstream& infile, int master) { IT total_nnz; MPI::Intracomm World = commGrid->GetWorld(); int neighs = World.Get_size(); // number of neighbors (including oneself) int buffperneigh = MEMORYINBYTES / (neighs * (sizeof(IT) + sizeof(NT))); int * displs = new int[neighs]; for (int i=0; i<neighs; ++i) displs[i] = i*buffperneigh; int * curptrs = NULL; int recvcount = 0; IT * inds = NULL; NT * vals = NULL; int rank = World.Get_rank(); if(rank == master) // 1 processor only { inds = new IT [ buffperneigh * neighs ]; vals = new NT [ buffperneigh * neighs ]; curptrs = new int[neighs]; fill_n(curptrs, neighs, 0); // fill with zero if (infile.is_open()) { infile.clear(); infile.seekg(0); infile >> glen >> total_nnz; World.Bcast(&glen, 1, MPIType<IT>(), master); IT tempind; NT tempval; double loadval; IT cnz = 0; while ( (!infile.eof()) && cnz < total_nnz) { infile >> tempind; //infile >> tempval; infile >> loadval; tempval = static_cast<NT>(loadval); tempind--; IT locind; int rec = Owner(tempind, locind); // recipient (owner) processor inds[ rec * buffperneigh + curptrs[rec] ] = locind; vals[ rec * buffperneigh + curptrs[rec] ] = tempval; ++ (curptrs[rec]); if(curptrs[rec] == buffperneigh || (cnz == (total_nnz-1)) ) // one buffer is full, or file is done ! { // first, send the receive counts ... World.Scatter(curptrs, 1, MPI::INT, &recvcount, 1, MPI::INT, master); // generate space for own recv data ... (use arrays because vector<bool> is cripled, if NT=bool) IT * tempinds = new IT[recvcount]; NT * tempvals = new NT[recvcount]; // then, send all buffers that to their recipients ... World.Scatterv(inds, curptrs, displs, MPIType<IT>(), tempinds, recvcount, MPIType<IT>(), master); World.Scatterv(vals, curptrs, displs, MPIType<NT>(), tempvals, recvcount, MPIType<NT>(), master); // now push what is ours to tuples for(IT i=0; i< recvcount; ++i) { ind.push_back( tempinds[i] ); // already offset'd by the sender num.push_back( tempvals[i] ); } // reset current pointers so that we can reuse {inds,vals} buffers fill_n(curptrs, neighs, 0); DeleteAll(tempinds, tempvals); } ++ cnz; } assert (cnz == total_nnz); // Signal the end of file to other processors along the diagonal fill_n(curptrs, neighs, numeric_limits<int>::max()); World.Scatter(curptrs, 1, MPI::INT, &recvcount, 1, MPI::INT, master); }
FullyDistVec<IT,NT> FullyDistSpVec<IT,NT>::operator() (const FullyDistVec<IT,IT> & ri) const { MPI::Intracomm World = commGrid->GetWorld(); // FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval, NT id); FullyDistVec<IT,NT> Indexed(ri.commGrid, ri.glen, zero, zero); int nprocs = World.Get_size(); unordered_map<IT, IT> revr_map; // inverted index that maps indices of *this to indices of output vector< vector<IT> > data_req(nprocs); IT locnnz = ri.LocArrSize(); // ABAB: Input sanity check int local = 1; int whole = 1; for(IT i=0; i < locnnz; ++i) { if(ri.arr[i] >= glen || ri.arr[i] < 0) { local = 0; } } World.Allreduce( &local, &whole, 1, MPI::INT, MPI::BAND); if(whole == 0) { throw outofrangeexception(); } for(IT i=0; i < locnnz; ++i) { IT locind; int owner = Owner(ri.arr[i], locind); // numerical values in ri are 0-based data_req[owner].push_back(locind); revr_map.insert(typename unordered_map<IT, IT>::value_type(locind, i)); } IT * sendbuf = new IT[locnnz]; int * sendcnt = new int[nprocs]; int * sdispls = new int[nprocs]; for(int i=0; i<nprocs; ++i) sendcnt[i] = data_req[i].size(); int * rdispls = new int[nprocs]; int * recvcnt = new int[nprocs]; World.Alltoall(sendcnt, 1, MPI::INT, recvcnt, 1, MPI::INT); // share the request counts sdispls[0] = 0; rdispls[0] = 0; for(int i=0; i<nprocs-1; ++i) { sdispls[i+1] = sdispls[i] + sendcnt[i]; rdispls[i+1] = rdispls[i] + recvcnt[i]; } IT totrecv = accumulate(recvcnt,recvcnt+nprocs,0); IT * recvbuf = new IT[totrecv]; for(int i=0; i<nprocs; ++i) { copy(data_req[i].begin(), data_req[i].end(), sendbuf+sdispls[i]); vector<IT>().swap(data_req[i]); } World.Alltoallv(sendbuf, sendcnt, sdispls, MPIType<IT>(), recvbuf, recvcnt, rdispls, MPIType<IT>()); // request data // We will return the requested data, // our return can be at most as big as the request // and smaller if we are missing some elements IT * indsback = new IT[totrecv]; NT * databack = new NT[totrecv]; int * ddispls = new int[nprocs]; copy(rdispls, rdispls+nprocs, ddispls); for(int i=0; i<nprocs; ++i) { // this is not the most efficient method because it scans ind vector nprocs = sqrt(p) times IT * it = set_intersection(recvbuf+rdispls[i], recvbuf+rdispls[i]+recvcnt[i], ind.begin(), ind.end(), indsback+rdispls[i]); recvcnt[i] = (it - (indsback+rdispls[i])); // update with size of the intersection IT vi = 0; for(int j = rdispls[i]; j < rdispls[i] + recvcnt[i]; ++j) // fetch the numerical values { // indsback is a subset of ind while(indsback[j] > ind[vi]) ++vi; databack[j] = num[vi++]; } } DeleteAll(recvbuf, ddispls); NT * databuf = new NT[ri.LocArrSize()]; World.Alltoall(recvcnt, 1, MPI::INT, sendcnt, 1, MPI::INT); // share the response counts, overriding request counts World.Alltoallv(indsback, recvcnt, rdispls, MPIType<IT>(), sendbuf, sendcnt, sdispls, MPIType<IT>()); // send indices World.Alltoallv(databack, recvcnt, rdispls, MPIType<NT>(), databuf, sendcnt, sdispls, MPIType<NT>()); // send data DeleteAll(rdispls, recvcnt, indsback, databack); // Now create the output from databuf (holds numerical values) and sendbuf (holds indices) // arr is already resized during its construction for(int i=0; i<nprocs; ++i) { // data will come globally sorted from processors // i.e. ind owned by proc_i is always smaller than // ind owned by proc_j for j < i for(int j=sdispls[i]; j< sdispls[i]+sendcnt[i]; ++j) { typename unordered_map<IT,IT>::iterator it = revr_map.find(sendbuf[j]); Indexed.arr[it->second] = databuf[j]; // cout << it->second << "(" << sendbuf[j] << "):" << databuf[j] << endl; } } DeleteAll(sdispls, sendcnt, sendbuf, databuf); return Indexed; }