示例#1
0
  inline
  void operator()( const VectorType & v ) const
  {
    typedef typename VectorType::value_type  scalar_type ;

    const Teuchos::MpiComm<int> & teuchos_mpi_comm = dynamic_cast< const Teuchos::MpiComm<int> & >( *comm );

    MPI_Comm mpi_comm = * teuchos_mpi_comm.getRawMpiComm();

    const int mpi_tag = 42 ;
    const unsigned vchunk = v.dimension_1();

    // Subvector for receives
    const std::pair<unsigned,unsigned> recv_range( count_owned , count_owned + count_receive );
    const VectorType recv_vector = Kokkos::subview< VectorType >( v , recv_range );

    std::vector< MPI_Request > recv_request( recv_msg.dimension_0() , MPI_REQUEST_NULL );

    { // Post receives
      scalar_type * ptr =
        ReceiveInPlace ? recv_vector.ptr_on_device() : host_recv_buffer.ptr_on_device();

      for ( size_t i = 0 ; i < recv_msg.dimension_0() ; ++i ) {
        const int proc  = recv_msg(i,0);
        const int count = recv_msg(i,1) * vchunk ;

        MPI_Irecv( ptr , count * sizeof(scalar_type) , MPI_BYTE ,
                   proc , mpi_tag , mpi_comm , & recv_request[i] );

        ptr += count ;
      }
    }

    MPI_Barrier( mpi_comm );

    { // Pack and send 
      const Pack pack( send_nodeid , v , send_buffer );

      Kokkos::deep_copy( host_send_buffer , send_buffer );

      scalar_type * ptr = host_send_buffer.ptr_on_device();

      for ( size_t i = 0 ; i < send_msg.dimension_0() ; ++i ) {
        const int proc  = send_msg(i,0);
        const int count = send_msg(i,1) * vchunk ;

        // MPI_Ssend blocks until
        // (1) a receive is matched for the message and
        // (2) the send buffer can be re-used.
        //
        // It is suggested that MPI_Ssend will have the best performance:
        // http://www.mcs.anl.gov/research/projects/mpi/sendmode.html .

        MPI_Ssend( ptr ,
                   count * sizeof(scalar_type) , MPI_BYTE ,
                   proc , mpi_tag , mpi_comm );

        ptr += count ;
      }
    }

    // Wait for receives and verify:

    for ( size_t i = 0 ; i < recv_msg.dimension_0() ; ++i ) {
      MPI_Status recv_status ;
      int recv_which = 0 ;
      int recv_size  = 0 ;

      MPI_Waitany( recv_msg.dimension_0() , & recv_request[0] , & recv_which , & recv_status );

      const int recv_proc = recv_status.MPI_SOURCE ;

      MPI_Get_count( & recv_status , MPI_BYTE , & recv_size );

      // Verify message properly received:

      const int  expected_proc = recv_msg(recv_which,0);
      const int  expected_size = recv_msg(recv_which,1) * vchunk * sizeof(scalar_type);

      if ( ( expected_proc != recv_proc ) ||
           ( expected_size != recv_size ) ) {

        int local_rank  = 0 ;

        MPI_Comm_rank( mpi_comm , & local_rank );

        std::ostringstream msg ;
        msg << "VectorImport error:"
            << " P" << local_rank
            << " received from P" << recv_proc
            << " size "     << recv_size
            << " expected " << expected_size
            << " from P"    << expected_proc ;
        throw std::runtime_error( msg.str() );
      }
    }

    // Copy received data to device memory.

    if ( ! ReceiveInPlace ) { Kokkos::deep_copy( recv_vector , host_recv_buffer ); }
  }