void sort(Master&                   master,               //!< master object
              const Assigner&           assigner,             //!< assigner object
              std::vector<T> Block::*   values,               //!< all values to sort
              std::vector<T> Block::*   samples,              //!< (output) boundaries of blocks
              size_t                    num_samples,          //!< desired number of samples
              const Cmp&                cmp,                  //!< comparison function
              int                       k   = 2,              //!< k-ary reduction will be used
              bool                      samples_only = false) //!< false: results will be all_to_all exchanged; true: only sort but don't exchange results
    {
        bool immediate = master.immediate();
        master.set_immediate(false);

        // NB: although sorter will go out of scope, its member functions sample()
        //     and exchange() will return functors whose copies get saved inside reduce
        detail::SampleSort<Block,T,Cmp> sorter(values, samples, cmp, num_samples);

        // swap-reduce to all-gather samples
        RegularDecomposer<DiscreteBounds> decomposer(1, interval(0,assigner.nblocks()), assigner.nblocks());
        RegularSwapPartners   partners(decomposer, k);
        reduce(master, assigner, partners, sorter.sample(), detail::SkipIntermediate(partners.rounds()));

        // all_to_all to exchange the values
        if (!samples_only)
            all_to_all(master, assigner, sorter.exchange(), k);

        master.set_immediate(immediate);
    }
Ejemplo n.º 2
0
void CR_Matrix::multiply(
  const double * const x ,
        double * const y ) const
{
  if ( 1 < m_comm_size ) {

    std::vector<double> x_work( m_work_disp[ m_comm_size ] );

    { // Remote gathered portion:
      std::vector<double> x_send( m_send_disp[ m_comm_size ] );

      for ( unsigned i = 0 ; i < m_send_map.size() ; ++i ) {
        x_send[i] = x[ m_send_map[i] ];
      }

      // Skips local-to-local
      all_to_all( m_comm , PARALLEL_DATATYPE_DOUBLE , m_sparse ,
                  & x_send[0] , & m_send_disp[0] ,
                  & x_work[0] , & m_work_disp[0] );
    }

    { // Local portion:
      double * d = & x_work[0] + m_work_disp[ m_comm_rank ];
      const double * s = x ;
      const double * const e = x + m_row_size ;
      while ( e != s ) { *d++ = *s++ ; }
    }

    txblas_cr_mxv( m_row_size ,
                   & m_prefix[0] ,
                   & m_coli[0] ,
                   & m_coef[0] ,
                   & x_work[0] , y );
  }
  else {
    txblas_cr_mxv( m_row_size ,
                   & m_prefix[0] ,
                   & m_coli[0] ,
                   & m_coef[0] ,
                   x , y );
  }
}
Ejemplo n.º 3
0
CR_Matrix::CR_Matrix(
 ParallelMachine arg_comm ,
 const std::vector<unsigned> & arg_partition ,
       std::vector<unsigned> & arg_prefix ,
       std::vector<unsigned> & arg_coli ,
       std::vector<double>   & arg_coef )
  : m_comm( arg_comm ),
    m_comm_size( parallel_machine_size( arg_comm ) ),
    m_comm_rank( parallel_machine_rank( arg_comm ) ),
    m_sparse( false ),
    m_work_disp(),
    m_send_disp(),
    m_send_map(),
    m_row_size( 0 ),
    m_prefix(),
    m_coli(),
    m_coef()
{
  static const char method[] = "phdmesh::CR_Matrix::CR_Matrix" ;

  if ( arg_prefix.empty() ) { return ; }

  //------------------------------------

  if ( arg_coli.size() != arg_prefix.back() ||
       arg_coef.size() != arg_prefix.back() ) {
    std::ostringstream msg ;
    msg << method << " ERROR" ;
    msg << " arg_coli.size() = " << arg_coli.size() ;
    msg << " arg_coef.size() = " << arg_coef.size() ;
    msg << " !=  arg_prefix.back() = " << arg_prefix.back() ;
    throw std::invalid_argument( msg.str() );
  }

  swap( m_prefix , arg_prefix );
  swap( m_coli , arg_coli );
  swap( m_coef , arg_coef );

  m_row_size = m_prefix.size() - 1 ;

  if ( 1 == m_comm_size ) { return ; }

  //------------------------------------

  if ( arg_partition.size() != 1 + m_comm_size ) {
    std::ostringstream msg ;
    msg << method << " ERROR" ;
    msg << " comm_size = " << m_comm_size ;
    msg << " + 1  !=  arg_partition.size() = " << arg_partition.size() ;
    throw std::invalid_argument( msg.str() );
  }

  const unsigned row_first = arg_partition[ m_comm_rank ];
  const unsigned row_end   = arg_partition[ m_comm_rank + 1 ] ;

  if ( m_row_size != ( row_end - row_first ) ) {
    std::ostringstream msg ;
    msg << method << " ERROR" ;
    msg << " arg_prefix'row_size = " << m_row_size ;
    msg << " !=  arg_partition'row_size = " << ( row_end - row_first );
    throw std::invalid_argument( msg.str() );
  }

  //------------------------------------

  m_send_disp.resize( m_comm_size + 1 );
  m_work_disp.resize( m_comm_size + 1 );

  // Generate a vector of off-processor column identifiers

  std::vector<unsigned> work_col_ident ;

  {
    const std::vector<unsigned>::iterator j = m_coli.end();
          std::vector<unsigned>::iterator b = m_coli.begin();
          std::vector<unsigned>::iterator i ;

    for ( i = b ; j != i ; ++i ) { 
      const unsigned global_col = *i ;
      if ( global_col < row_first || row_end <= global_col ) {
        ordered_insert( work_col_ident , global_col );
      }
    }
  }

  //------------------------------------
  // Map column global identifiers to local work offsets

  {
    const std::vector<unsigned>::iterator b = work_col_ident.begin();
    const std::vector<unsigned>::iterator e = work_col_ident.end();
          std::vector<unsigned>::iterator j ;

    j = std::lower_bound( b , e , row_end );

    const unsigned local_row_end = j - b ;

    for ( std::vector<unsigned>::iterator
          i = m_coli.begin() ; i != m_coli.end() ; ++i ) {
      const unsigned global_col = *i ;

      j = std::lower_bound( b, e, global_col );

      unsigned local_col = j - b ;

      if ( row_end <= global_col ) { local_col += local_row_end ; }

      *i = local_col ;
    }
  }

  //------------------------------------
  // Displacement prefix for work vector

  {
    std::vector<unsigned>::const_iterator i = work_col_ident.begin() ;

    m_work_disp[0] = 0 ;

    for ( unsigned p = 0 ; p < m_comm_size ; ++p ) {
      const unsigned p_row_end = arg_partition[p+1] ;
      unsigned count = 0 ;
      for ( ; i != work_col_ident.end() && *i < p_row_end ; ++i ) {
        ++count ;
      }

      m_work_disp[p+1] = m_work_disp[p] + count ;
    }
  }

  //------------------------------------
  // Set up communications to gather work subvector

  {
    std::vector<unsigned> send_col_size( m_comm_size );
    std::vector<unsigned> recv_col_size( m_comm_size );

    for ( unsigned p = 0 ; p < m_comm_size ; ++p ) {
      send_col_size[p] = m_work_disp[p+1] - m_work_disp[p] ;
    }

    if ( send_col_size[ m_comm_rank ] ) {
      std::ostringstream msg ;
      msg << method << " ERROR with communication sizing logic" ;
      throw std::logic_error( msg.str() );
    }

    unsigned num_msg_maximum = 0 ;

    comm_sizes( m_comm , m_comm_size / 4 , num_msg_maximum ,
                & send_col_size[0] , & recv_col_size[0] );

    m_sparse = num_msg_maximum < ( m_comm_size / 4 );

    m_send_disp[0] = 0 ;
    for ( unsigned p = 0 ; p < m_comm_size ; ++p ) {
      m_send_disp[p+1] = m_send_disp[p] + recv_col_size[p] ;
    }
  }

  const unsigned send_map_size = m_send_disp[ m_comm_size ];

  m_send_map.resize( send_map_size );

  all_to_all( m_comm , PARALLEL_DATATYPE_UNSIGNED , m_sparse ,
              & work_col_ident[0] , & m_work_disp[0],
              & m_send_map[0] ,     & m_send_disp[0] );

  //------------------------------------
  // Remap the 'm_work_disp' for receiving coefficients into the
  // work vector: [ lower_row_recv , local_row , upper_row_recv ]

  for ( unsigned p = m_comm_rank ; p < m_comm_size ; ++p ) {
    m_work_disp[p+1] += m_row_size ;
  }

  //------------------------------------
  // Map the send_map from global to local indices,
  // also sanity check it.

  for ( unsigned i = 0 ; i < send_map_size ; ++i ) {

    if ( m_send_map[i] < (int) row_first ||
                         (int) row_end <= m_send_map[i] ) {
      std::ostringstream msg ;
      msg << method << " ERROR Received index " ;
      msg << m_send_map[i] ;
      msg << " out of range [ " ;
      msg << row_first ;
      msg << " : " ;
      msg << row_end ;
      msg << " )" ;
      throw std::runtime_error( msg.str() );
    }

    m_send_map[i] -= row_first ;
  }
}
Ejemplo n.º 4
0
  void
  all_to_all_impl(const communicator& comm, const T* in_values, int n,
                  T* out_values, mpl::false_)
  {
    int size = comm.size();
    int rank = comm.rank();

    // The amount of data to be sent to each process
    std::vector<int> send_sizes(size);

    // The displacements for each outgoing value.
    std::vector<int> send_disps(size);

    // The buffer that will store all of the outgoing values
    std::vector<char, allocator<char> > outgoing;

    // Pack the buffer with all of the outgoing values.
    for (int dest = 0; dest < size; ++dest) {
      // Keep track of the displacements
      send_disps[dest] = outgoing.size();

      // Our own value will never be transmitted, so don't pack it.
      if (dest != rank) {
        packed_oarchive oa(comm, outgoing);
        for (int i = 0; i < n; ++i)
          oa << in_values[dest * n + i];
      }

      // Keep track of the sizes
      send_sizes[dest] = outgoing.size() - send_disps[dest];
    }

    // Determine how much data each process will receive.
    std::vector<int> recv_sizes(size);
    all_to_all(comm, send_sizes, recv_sizes);

    // Prepare a buffer to receive the incoming data.
    std::vector<int> recv_disps(size);
    int sum = 0;
    for (int src = 0; src < size; ++src) {
      recv_disps[src] = sum;
      sum += recv_sizes[src];
    }
    std::vector<char, allocator<char> > incoming(sum > 0? sum : 1);

    // Make sure we don't try to reference an empty vector
    if (outgoing.empty())
      outgoing.push_back(0);

    // Transmit the actual data
    BOOST_MPI_CHECK_RESULT(MPI_Alltoallv,
                           (&outgoing[0], &send_sizes[0],
                            &send_disps[0], MPI_PACKED,
                            &incoming[0], &recv_sizes[0],
                            &recv_disps[0], MPI_PACKED,
                            comm));

    // Deserialize data from the iarchive
    for (int src = 0; src < size; ++src) {
      if (src == rank) 
        std::copy(in_values + src * n, in_values + (src + 1) * n, 
                  out_values + src * n);
      else {
        packed_iarchive ia(comm, incoming, boost::archive::no_header,
                           recv_disps[src]);
        for (int i = 0; i < n; ++i)
          ia >> out_values[src * n + i];
      }
    }
  }