Multiply( const matrix_type & A , const size_type nrow , const size_type ncol , const vector_type & x , const vector_type & y ) { CudaSparseSingleton & s = CudaSparseSingleton::singleton(); const scalar_type alpha = 1 , beta = 0 ; cusparseStatus_t status = cusparseScsrmv( s.handle , CUSPARSE_OPERATION_NON_TRANSPOSE , nrow , ncol , A.coefficients.dimension_0() , &alpha , s.descra , A.coefficients.ptr_on_device() , A.graph.row_map.ptr_on_device() , A.graph.entries.ptr_on_device() , x.ptr_on_device() , &beta , y.ptr_on_device() ); if ( CUSPARSE_STATUS_SUCCESS != status ) { throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) ); } }
static void apply( const matrix_type & A , const vector_type & x , const vector_type & y ) { CudaSparseSingleton & s = CudaSparseSingleton::singleton(); const double alpha = 1 , beta = 0 ; const int n = A.graph.row_map.dimension_0() - 1 ; const int nz = A.graph.entries.dimension_0(); cusparseStatus_t status = cusparseDcsrmv( s.handle , CUSPARSE_OPERATION_NON_TRANSPOSE , n , n , nz , &alpha , s.descra , A.values.ptr_on_device() , A.graph.row_map.ptr_on_device() , A.graph.entries.ptr_on_device() , x.ptr_on_device() , &beta , y.ptr_on_device() ); if ( CUSPARSE_STATUS_SUCCESS != status ) { throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) ); } }
void recv( const vector_type & v ) { const size_t recv_msg_count = m_recv_request.size(); const std::pair<unsigned,unsigned> recv_range( m_map.count_owned , m_map.count_owned + m_map.count_receive ); const vector_type vrecv = subview<vector_type>( v , recv_range ); // Wait for receives and verify: for ( size_t i = 0 ; i < recv_msg_count ; ++i ) { MPI_Status recv_status ; int recv_which = 0 ; int recv_size = 0 ; MPI_Waitany( recv_msg_count , & m_recv_request[0] , & recv_which , & recv_status ); const int recv_proc = recv_status.MPI_SOURCE ; MPI_Get_count( & recv_status , MPI_BYTE , & recv_size ); // Verify message properly received: const int expected_proc = m_map.host_recv(recv_which,0); const int expected_size = m_map.host_recv(recv_which,1) * m_chunk * sizeof(scalar_type); if ( ( expected_proc != recv_proc ) || ( expected_size != recv_size ) ) { std::ostringstream msg ; msg << "MatrixMultiply communication error:" << " P" << comm::rank( m_map.machine ) << " received from P" << recv_proc << " size " << recv_size << " expected " << expected_size << " from P" << expected_proc ; throw std::runtime_error( msg.str() ); } } // Copy received data to device memory. Impl::DeepCopy<typename Device::memory_space,HostSpace>( vrecv.ptr_on_device() , m_host_recv_buffer.ptr_on_device() , m_map.count_receive * m_chunk * sizeof(scalar_type) ); }