Beispiel #1
0
  void
  all_to_all_impl(const communicator& comm, const T* in_values, int n,
                  T* out_values, mpl::false_)
  {
    int size = comm.size();
    int rank = comm.rank();

    // The amount of data to be sent to each process
    std::vector<int> send_sizes(size);

    // The displacements for each outgoing value.
    std::vector<int> send_disps(size);

    // The buffer that will store all of the outgoing values
    std::vector<char, allocator<char> > outgoing;

    // Pack the buffer with all of the outgoing values.
    for (int dest = 0; dest < size; ++dest) {
      // Keep track of the displacements
      send_disps[dest] = outgoing.size();

      // Our own value will never be transmitted, so don't pack it.
      if (dest != rank) {
        packed_oarchive oa(comm, outgoing);
        for (int i = 0; i < n; ++i)
          oa << in_values[dest * n + i];
      }

      // Keep track of the sizes
      send_sizes[dest] = outgoing.size() - send_disps[dest];
    }

    // Determine how much data each process will receive.
    std::vector<int> recv_sizes(size);
    all_to_all(comm, send_sizes, recv_sizes);

    // Prepare a buffer to receive the incoming data.
    std::vector<int> recv_disps(size);
    int sum = 0;
    for (int src = 0; src < size; ++src) {
      recv_disps[src] = sum;
      sum += recv_sizes[src];
    }
    std::vector<char, allocator<char> > incoming(sum > 0? sum : 1);

    // Make sure we don't try to reference an empty vector
    if (outgoing.empty())
      outgoing.push_back(0);

    // Transmit the actual data
    BOOST_MPI_CHECK_RESULT(MPI_Alltoallv,
                           (&outgoing[0], &send_sizes[0],
                            &send_disps[0], MPI_PACKED,
                            &incoming[0], &recv_sizes[0],
                            &recv_disps[0], MPI_PACKED,
                            comm));

    // Deserialize data from the iarchive
    for (int src = 0; src < size; ++src) {
      if (src == rank) 
        std::copy(in_values + src * n, in_values + (src + 1) * n, 
                  out_values + src * n);
      else {
        packed_iarchive ia(comm, incoming, boost::archive::no_header,
                           recv_disps[src]);
        for (int i = 0; i < n; ++i)
          ia >> out_values[src * n + i];
      }
    }
  }
int fei::Vector_core::scatterToOverlap()
{
  if (fei::numProcs(comm_) == 1 || haveFEVector()) {
    return(0);
  }

#ifndef FEI_SER
  if (!overlapAlreadySet_) {
    setOverlap();
  }

  //...and now the overlap is whatever is in our remotelyOwned_ vectors.

  //first find out which procs we'll be receiving from.
  std::vector<int> recvProcs;
  for(unsigned i=0; i<remotelyOwned_.size(); ++i) {
    if (remotelyOwnedProcs_[i] == fei::localProc(comm_)) continue;
    if (remotelyOwned_[i]->size() == 0) continue;

    recvProcs.push_back(remotelyOwnedProcs_[i]);
  }

  //find out the send-procs.
  std::vector<int> sendProcs;
  fei::mirrorProcs(comm_, recvProcs, sendProcs);

  //declare arrays to send from, and corresponding sizes
  std::vector<std::vector<int> > send_ints(sendProcs.size());
  std::vector<std::vector<double> > send_doubles(sendProcs.size());
  std::vector<int> send_sizes(sendProcs.size());

  std::vector<MPI_Request> mpiReqs(sendProcs.size()+recvProcs.size());
  std::vector<MPI_Status> mpiStatuses(sendProcs.size()+recvProcs.size());
  int tag1 = 11111;
  int tag2 = 11112;

  //first, the procs we're going to send to, have to let us know
  //how much data we're supposed to send. So we have to receive
  //sizes and then indices from the "send"-procs.
  for(unsigned i=0; i<sendProcs.size(); ++i) {
    MPI_Irecv(&send_sizes[i], 1, MPI_INT, sendProcs[i],
              tag1, comm_, &mpiReqs[i]);
  }

  //now we'll send the sizes of our remotely-owned data to the
  //procs that we will be receiving the data from, and also the
  //indices that we want to receive.
  for(unsigned i=0; i<recvProcs.size(); ++i) {
    int proc = recvProcs[i];

    fei::CSVec* remoteVec = getRemotelyOwned(proc);
    int size = remoteVec->size();
    MPI_Send(&size, 1, MPI_INT, proc, tag1, comm_);
  }
 
  MPI_Waitall(sendProcs.size(), &mpiReqs[0], &mpiStatuses[0]);

  //now resize our send_ints and send_doubles arrays, and post the recvs
  //for indices that we're supposed to pack.
  for(unsigned i=0; i<sendProcs.size(); ++i) {
    int proc = sendProcs[i];
    int size = send_sizes[i];
    send_ints[i].resize(size);
    MPI_Irecv(&(send_ints[i][0]), size, MPI_INT, proc, tag1,
              comm_, &mpiReqs[i]);
    send_doubles[i].resize(size);
  }

  //now send the indices that we want to receive data for.
  for(unsigned i=0; i<recvProcs.size(); ++i) {
    int proc = recvProcs[i];
    fei::CSVec* remoteVec = getRemotelyOwned(proc);
    int size = remoteVec->size();
    int* indices = &(remoteVec->indices())[0];
    MPI_Send(indices, size, MPI_INT, proc, tag1, comm_);
  }

  MPI_Waitall(sendProcs.size(), &mpiReqs[0], &mpiStatuses[0]);

  //now post our recvs.
  for(unsigned i=0; i<recvProcs.size(); ++i) {
    int proc = recvProcs[i];
    fei::CSVec* remoteVec = getRemotelyOwned(proc);
    int size = remoteVec->size();
    double* coefs = &(remoteVec->coefs())[0];
    MPI_Irecv(coefs, size, MPI_DOUBLE, proc, tag2, comm_, &mpiReqs[i]);
  }

  //now pack and send the coefs that the other procs need from us.
  for(unsigned i=0; i<sendProcs.size(); ++i) {
    int proc = sendProcs[i];

    int num = send_sizes[i];
    int err = copyOutOfUnderlyingVector(num, &(send_ints[i][0]),
                                        &(send_doubles[i][0]), 0);
    if (err != 0) {
      FEI_COUT << "fei::Vector_core::scatterToOverlap ERROR getting data to send."<<FEI_ENDL;
      return(err);
    }

    MPI_Send(&(send_doubles[i][0]), num, MPI_DOUBLE, proc, tag2, comm_);
  }

  MPI_Waitall(recvProcs.size(), &mpiReqs[0], &mpiStatuses[0]);

#endif  //#ifndef FEI_SER

  return(0);
}
inline void copy_from_owned(
  const BulkData                        & mesh ,
  const std::vector< const FieldBase *> & fields )
{
  if ( fields.empty() ) { return; }

  const int parallel_size = mesh.parallel_size();
  const int parallel_rank = mesh.parallel_rank();

  const std::vector<const FieldBase *>::const_iterator fe = fields.end();
  const std::vector<const FieldBase *>::const_iterator fb = fields.begin();
        std::vector<const FieldBase *>::const_iterator fi ;

  std::vector<std::vector<unsigned char> > send_data(parallel_size);
  std::vector<std::vector<unsigned char> > recv_data(parallel_size);

  const EntityCommListInfoVector &comm_info_vec = mesh.internal_comm_list();
  size_t comm_info_vec_size = comm_info_vec.size();

  std::vector<unsigned> send_sizes(parallel_size, 0);
  std::vector<unsigned> recv_sizes(parallel_size, 0);

  //this first loop calculates send_sizes and recv_sizes.
  for(fi = fb; fi != fe; ++fi)
  {
      const FieldBase & f = **fi;
      for(size_t i = 0; i<comm_info_vec_size; ++i)
      {
          const Bucket* bucket = comm_info_vec[i].bucket;

          int owner = comm_info_vec[i].owner;
          const bool owned = (owner == parallel_rank);

          unsigned e_size = 0;

          if(is_matching_rank(f, *bucket))
          {
              const unsigned bucketId = bucket->bucket_id();
              unsigned size = field_bytes_per_entity(f, bucketId);
              e_size += size;
          }

          if(e_size == 0)
          {
              continue;
          }

          if(owned)
          {
              const EntityCommInfoVector& infovec = comm_info_vec[i].entity_comm->comm_map;
              size_t infovec_size = infovec.size();
              for(size_t j=0; j<infovec_size; ++j)
              {
                  int proc = infovec[j].proc;

                  send_sizes[proc] += e_size;
              }
          }
          else
          {
              recv_sizes[owner] += e_size;
          }
      }
  }

  //now size the send_data buffers
  size_t max_len = 0;
  for(int p=0; p<parallel_size; ++p)
  {
      if (send_sizes[p] > 0)
      {
          if (send_sizes[p] > max_len)
          {
              max_len = send_sizes[p];
          }
          send_data[p].resize(send_sizes[p]);
          send_sizes[p] = 0;
      }
  }

  //now pack the send buffers
  std::vector<unsigned char> field_data(max_len);
  unsigned char* field_data_ptr = field_data.data();

  for(fi = fb; fi != fe; ++fi)
  {
      const FieldBase & f = **fi;
      for(size_t i = 0; i<comm_info_vec_size; ++i)
      {
          const Bucket* bucket = comm_info_vec[i].bucket;

          int owner = comm_info_vec[i].owner;
          const bool owned = (owner == parallel_rank);

          unsigned e_size = 0;

          if(is_matching_rank(f, *bucket))
          {
              const unsigned bucketId = bucket->bucket_id();
              unsigned size = field_bytes_per_entity(f, bucketId);
              if (owned && size > 0)
              {
                  unsigned char * ptr = reinterpret_cast<unsigned char*>(stk::mesh::field_data(f, bucketId, comm_info_vec[i].bucket_ordinal, size));
                  std::memcpy(field_data_ptr+e_size, ptr, size);
 //                 field_data.insert(field_data.end(), ptr, ptr+size);
              }
              e_size += size;
          }

          if(e_size == 0)
          {
              continue;
          }

          if(owned)
          {
              const EntityCommInfoVector& infovec = comm_info_vec[i].entity_comm->comm_map;
              size_t infovec_size = infovec.size();
              for(size_t j=0; j<infovec_size; ++j)
              {
                  int proc = infovec[j].proc;

                  unsigned char* dest_ptr = send_data[proc].data()+send_sizes[proc];
                  unsigned char* src_ptr = field_data_ptr;
                  std::memcpy(dest_ptr, src_ptr, e_size);
                  send_sizes[proc] += e_size;
     //             send_data[proc].insert(send_data[proc].end(), field_data.begin(), field_data.end());
              }
          }
          else
          {
              recv_sizes[owner] += e_size;
          }
      }
  }

  for(int p=0; p<parallel_size; ++p)
  {
      if (recv_sizes[p] > 0)
      {
          recv_data[p].resize(recv_sizes[p]);
          recv_sizes[p] = 0;
      }
  }

  parallel_data_exchange_nonsym_known_sizes_t(send_data, recv_data, mesh.parallel());

  //now unpack and store the recvd data
  for(fi = fb; fi != fe; ++fi)
  {
      const FieldBase & f = **fi;

      for(size_t i=0; i<comm_info_vec_size; ++i)
      {
          int owner = comm_info_vec[i].owner;
          const bool owned = (owner == parallel_rank);

          if(owned || recv_data[owner].size() == 0)
          {
              continue;
          }

          const Bucket* bucket = comm_info_vec[i].bucket;

          if(is_matching_rank(f, *bucket))
          {
              const unsigned bucketId = bucket->bucket_id();
              unsigned size = field_bytes_per_entity(f, bucketId);
              if (size > 0)
              {
                  unsigned char * ptr = reinterpret_cast<unsigned char*>(stk::mesh::field_data(f, bucketId, comm_info_vec[i].bucket_ordinal, size));

                  std::memcpy(ptr, &(recv_data[owner][recv_sizes[owner]]), size);
//                for(unsigned j = 0; j < size; ++j)
//                {
//                    ptr[j] = recv_data[owner][recv_sizes[owner]+j];
//                }
                  recv_sizes[owner] += size;
              }
          }
      }
  }
}