void all_to_all_impl(const communicator& comm, const T* in_values, int n, T* out_values, mpl::false_) { int size = comm.size(); int rank = comm.rank(); // The amount of data to be sent to each process std::vector<int> send_sizes(size); // The displacements for each outgoing value. std::vector<int> send_disps(size); // The buffer that will store all of the outgoing values std::vector<char, allocator<char> > outgoing; // Pack the buffer with all of the outgoing values. for (int dest = 0; dest < size; ++dest) { // Keep track of the displacements send_disps[dest] = outgoing.size(); // Our own value will never be transmitted, so don't pack it. if (dest != rank) { packed_oarchive oa(comm, outgoing); for (int i = 0; i < n; ++i) oa << in_values[dest * n + i]; } // Keep track of the sizes send_sizes[dest] = outgoing.size() - send_disps[dest]; } // Determine how much data each process will receive. std::vector<int> recv_sizes(size); all_to_all(comm, send_sizes, recv_sizes); // Prepare a buffer to receive the incoming data. std::vector<int> recv_disps(size); int sum = 0; for (int src = 0; src < size; ++src) { recv_disps[src] = sum; sum += recv_sizes[src]; } std::vector<char, allocator<char> > incoming(sum > 0? sum : 1); // Make sure we don't try to reference an empty vector if (outgoing.empty()) outgoing.push_back(0); // Transmit the actual data BOOST_MPI_CHECK_RESULT(MPI_Alltoallv, (&outgoing[0], &send_sizes[0], &send_disps[0], MPI_PACKED, &incoming[0], &recv_sizes[0], &recv_disps[0], MPI_PACKED, comm)); // Deserialize data from the iarchive for (int src = 0; src < size; ++src) { if (src == rank) std::copy(in_values + src * n, in_values + (src + 1) * n, out_values + src * n); else { packed_iarchive ia(comm, incoming, boost::archive::no_header, recv_disps[src]); for (int i = 0; i < n; ++i) ia >> out_values[src * n + i]; } } }
int fei::Vector_core::scatterToOverlap() { if (fei::numProcs(comm_) == 1 || haveFEVector()) { return(0); } #ifndef FEI_SER if (!overlapAlreadySet_) { setOverlap(); } //...and now the overlap is whatever is in our remotelyOwned_ vectors. //first find out which procs we'll be receiving from. std::vector<int> recvProcs; for(unsigned i=0; i<remotelyOwned_.size(); ++i) { if (remotelyOwnedProcs_[i] == fei::localProc(comm_)) continue; if (remotelyOwned_[i]->size() == 0) continue; recvProcs.push_back(remotelyOwnedProcs_[i]); } //find out the send-procs. std::vector<int> sendProcs; fei::mirrorProcs(comm_, recvProcs, sendProcs); //declare arrays to send from, and corresponding sizes std::vector<std::vector<int> > send_ints(sendProcs.size()); std::vector<std::vector<double> > send_doubles(sendProcs.size()); std::vector<int> send_sizes(sendProcs.size()); std::vector<MPI_Request> mpiReqs(sendProcs.size()+recvProcs.size()); std::vector<MPI_Status> mpiStatuses(sendProcs.size()+recvProcs.size()); int tag1 = 11111; int tag2 = 11112; //first, the procs we're going to send to, have to let us know //how much data we're supposed to send. So we have to receive //sizes and then indices from the "send"-procs. for(unsigned i=0; i<sendProcs.size(); ++i) { MPI_Irecv(&send_sizes[i], 1, MPI_INT, sendProcs[i], tag1, comm_, &mpiReqs[i]); } //now we'll send the sizes of our remotely-owned data to the //procs that we will be receiving the data from, and also the //indices that we want to receive. for(unsigned i=0; i<recvProcs.size(); ++i) { int proc = recvProcs[i]; fei::CSVec* remoteVec = getRemotelyOwned(proc); int size = remoteVec->size(); MPI_Send(&size, 1, MPI_INT, proc, tag1, comm_); } MPI_Waitall(sendProcs.size(), &mpiReqs[0], &mpiStatuses[0]); //now resize our send_ints and send_doubles arrays, and post the recvs //for indices that we're supposed to pack. for(unsigned i=0; i<sendProcs.size(); ++i) { int proc = sendProcs[i]; int size = send_sizes[i]; send_ints[i].resize(size); MPI_Irecv(&(send_ints[i][0]), size, MPI_INT, proc, tag1, comm_, &mpiReqs[i]); send_doubles[i].resize(size); } //now send the indices that we want to receive data for. for(unsigned i=0; i<recvProcs.size(); ++i) { int proc = recvProcs[i]; fei::CSVec* remoteVec = getRemotelyOwned(proc); int size = remoteVec->size(); int* indices = &(remoteVec->indices())[0]; MPI_Send(indices, size, MPI_INT, proc, tag1, comm_); } MPI_Waitall(sendProcs.size(), &mpiReqs[0], &mpiStatuses[0]); //now post our recvs. for(unsigned i=0; i<recvProcs.size(); ++i) { int proc = recvProcs[i]; fei::CSVec* remoteVec = getRemotelyOwned(proc); int size = remoteVec->size(); double* coefs = &(remoteVec->coefs())[0]; MPI_Irecv(coefs, size, MPI_DOUBLE, proc, tag2, comm_, &mpiReqs[i]); } //now pack and send the coefs that the other procs need from us. for(unsigned i=0; i<sendProcs.size(); ++i) { int proc = sendProcs[i]; int num = send_sizes[i]; int err = copyOutOfUnderlyingVector(num, &(send_ints[i][0]), &(send_doubles[i][0]), 0); if (err != 0) { FEI_COUT << "fei::Vector_core::scatterToOverlap ERROR getting data to send."<<FEI_ENDL; return(err); } MPI_Send(&(send_doubles[i][0]), num, MPI_DOUBLE, proc, tag2, comm_); } MPI_Waitall(recvProcs.size(), &mpiReqs[0], &mpiStatuses[0]); #endif //#ifndef FEI_SER return(0); }
inline void copy_from_owned( const BulkData & mesh , const std::vector< const FieldBase *> & fields ) { if ( fields.empty() ) { return; } const int parallel_size = mesh.parallel_size(); const int parallel_rank = mesh.parallel_rank(); const std::vector<const FieldBase *>::const_iterator fe = fields.end(); const std::vector<const FieldBase *>::const_iterator fb = fields.begin(); std::vector<const FieldBase *>::const_iterator fi ; std::vector<std::vector<unsigned char> > send_data(parallel_size); std::vector<std::vector<unsigned char> > recv_data(parallel_size); const EntityCommListInfoVector &comm_info_vec = mesh.internal_comm_list(); size_t comm_info_vec_size = comm_info_vec.size(); std::vector<unsigned> send_sizes(parallel_size, 0); std::vector<unsigned> recv_sizes(parallel_size, 0); //this first loop calculates send_sizes and recv_sizes. for(fi = fb; fi != fe; ++fi) { const FieldBase & f = **fi; for(size_t i = 0; i<comm_info_vec_size; ++i) { const Bucket* bucket = comm_info_vec[i].bucket; int owner = comm_info_vec[i].owner; const bool owned = (owner == parallel_rank); unsigned e_size = 0; if(is_matching_rank(f, *bucket)) { const unsigned bucketId = bucket->bucket_id(); unsigned size = field_bytes_per_entity(f, bucketId); e_size += size; } if(e_size == 0) { continue; } if(owned) { const EntityCommInfoVector& infovec = comm_info_vec[i].entity_comm->comm_map; size_t infovec_size = infovec.size(); for(size_t j=0; j<infovec_size; ++j) { int proc = infovec[j].proc; send_sizes[proc] += e_size; } } else { recv_sizes[owner] += e_size; } } } //now size the send_data buffers size_t max_len = 0; for(int p=0; p<parallel_size; ++p) { if (send_sizes[p] > 0) { if (send_sizes[p] > max_len) { max_len = send_sizes[p]; } send_data[p].resize(send_sizes[p]); send_sizes[p] = 0; } } //now pack the send buffers std::vector<unsigned char> field_data(max_len); unsigned char* field_data_ptr = field_data.data(); for(fi = fb; fi != fe; ++fi) { const FieldBase & f = **fi; for(size_t i = 0; i<comm_info_vec_size; ++i) { const Bucket* bucket = comm_info_vec[i].bucket; int owner = comm_info_vec[i].owner; const bool owned = (owner == parallel_rank); unsigned e_size = 0; if(is_matching_rank(f, *bucket)) { const unsigned bucketId = bucket->bucket_id(); unsigned size = field_bytes_per_entity(f, bucketId); if (owned && size > 0) { unsigned char * ptr = reinterpret_cast<unsigned char*>(stk::mesh::field_data(f, bucketId, comm_info_vec[i].bucket_ordinal, size)); std::memcpy(field_data_ptr+e_size, ptr, size); // field_data.insert(field_data.end(), ptr, ptr+size); } e_size += size; } if(e_size == 0) { continue; } if(owned) { const EntityCommInfoVector& infovec = comm_info_vec[i].entity_comm->comm_map; size_t infovec_size = infovec.size(); for(size_t j=0; j<infovec_size; ++j) { int proc = infovec[j].proc; unsigned char* dest_ptr = send_data[proc].data()+send_sizes[proc]; unsigned char* src_ptr = field_data_ptr; std::memcpy(dest_ptr, src_ptr, e_size); send_sizes[proc] += e_size; // send_data[proc].insert(send_data[proc].end(), field_data.begin(), field_data.end()); } } else { recv_sizes[owner] += e_size; } } } for(int p=0; p<parallel_size; ++p) { if (recv_sizes[p] > 0) { recv_data[p].resize(recv_sizes[p]); recv_sizes[p] = 0; } } parallel_data_exchange_nonsym_known_sizes_t(send_data, recv_data, mesh.parallel()); //now unpack and store the recvd data for(fi = fb; fi != fe; ++fi) { const FieldBase & f = **fi; for(size_t i=0; i<comm_info_vec_size; ++i) { int owner = comm_info_vec[i].owner; const bool owned = (owner == parallel_rank); if(owned || recv_data[owner].size() == 0) { continue; } const Bucket* bucket = comm_info_vec[i].bucket; if(is_matching_rank(f, *bucket)) { const unsigned bucketId = bucket->bucket_id(); unsigned size = field_bytes_per_entity(f, bucketId); if (size > 0) { unsigned char * ptr = reinterpret_cast<unsigned char*>(stk::mesh::field_data(f, bucketId, comm_info_vec[i].bucket_ordinal, size)); std::memcpy(ptr, &(recv_data[owner][recv_sizes[owner]]), size); // for(unsigned j = 0; j < size; ++j) // { // ptr[j] = recv_data[owner][recv_sizes[owner]+j]; // } recv_sizes[owner] += size; } } } } }