void mkl_apply( KernelHandle *handle, typename KernelHandle::row_lno_t m, typename KernelHandle::row_lno_t n, typename KernelHandle::row_lno_t k, in_row_index_view_type row_mapA, in_nonzero_index_view_type entriesA, in_nonzero_value_view_type valuesA, bool transposeA, in_row_index_view_type row_mapB, in_nonzero_index_view_type entriesB, in_nonzero_value_view_type valuesB, bool transposeB, typename in_row_index_view_type::non_const_type &row_mapC, typename in_nonzero_index_view_type::non_const_type &entriesC, typename in_nonzero_value_view_type::non_const_type &valuesC){ #ifdef KERNELS_HAVE_MKL typedef typename KernelHandle::row_lno_t idx; typedef in_row_index_view_type idx_array_type; typedef typename KernelHandle::nnz_scalar_t value_type; typedef typename in_row_index_view_type::device_type device1; typedef typename in_nonzero_index_view_type::device_type device2; typedef typename in_nonzero_value_view_type::device_type device3; typedef typename KernelHandle::HandleExecSpace MyExecSpace; std::cout << "RUNNING MKL" << std::endl; #if defined( KOKKOS_HAVE_CUDA ) if (!Kokkos::Impl::is_same<Kokkos::Cuda, device1 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN HOST DEVICE for MKL" << std::endl; return; } if (!Kokkos::Impl::is_same<Kokkos::Cuda, device2 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN HOST DEVICE for MKL" << std::endl; return; } if (!Kokkos::Impl::is_same<Kokkos::Cuda, device3 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN HOST DEVICE for MKL" << std::endl; return; } #endif if (Kokkos::Impl::is_same<idx, int>::value){ int *a_xadj = (int *)row_mapA.ptr_on_device(); int *b_xadj = (int *)row_mapB.ptr_on_device(); int *c_xadj = (int *)row_mapC.ptr_on_device(); int *a_adj = (int *)entriesA.ptr_on_device(); int *b_adj = (int *)entriesB.ptr_on_device(); int *c_adj = (int *)entriesC.ptr_on_device(); int nnzA = entriesA.dimension_0(); int nnzB = entriesB.dimension_0(); value_type *a_ew = valuesA.ptr_on_device(); value_type *b_ew = valuesB.ptr_on_device(); value_type *c_ew = valuesC.ptr_on_device(); sparse_matrix_t A; sparse_matrix_t B; sparse_matrix_t C; if (Kokkos::Impl::is_same<value_type, float>::value){ if (SPARSE_STATUS_SUCCESS != mkl_sparse_s_create_csr (&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj, a_xadj + 1, a_adj, (float *)a_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_s_create_csr A" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_s_create_csr (&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, b_xadj + 1, b_adj, (float *)b_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_s_create_csr B" << std::endl; return; } sparse_operation_t operation; if (transposeA && transposeB){ operation = SPARSE_OPERATION_TRANSPOSE; } else if (!(transposeA || transposeB)){ operation = SPARSE_OPERATION_NON_TRANSPOSE; } else { std::cerr << "Ask both to transpose or non transpose for MKL SPGEMM" << std::endl; return; } Kokkos::Impl::Timer timer1; bool success = SPARSE_STATUS_SUCCESS != mkl_sparse_spmm (operation, A, B, &C); std::cout << "Actual FLOAT MKL SPMM Time:" << timer1.seconds() << std::endl; if (success){ std::cerr << "CANNOT multiply mkl_sparse_spmm " << std::endl; return; } else{ sparse_index_base_t c_indexing; MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns; float *values; if (SPARSE_STATUS_SUCCESS != mkl_sparse_s_export_csr (C, &c_indexing, &c_rows, &c_cols, &rows_start, &rows_end, &columns, &values)){ std::cerr << "CANNOT export result matrix " << std::endl; return; } if (SPARSE_INDEX_BASE_ZERO != c_indexing){ std::cerr << "C is not zero based indexed." << std::endl; return; } row_mapC = typename in_row_index_view_type::non_const_type(Kokkos::ViewAllocateWithoutInitializing("rowmapC"), c_rows + 1); entriesC = typename in_nonzero_index_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("EntriesC") , rows_end[m - 1] ); valuesC = typename in_nonzero_value_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("valuesC") , rows_end[m - 1]); KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_row_index_view_type::non_const_type, MyExecSpace> (m, rows_start, row_mapC); idx nnz = row_mapC(m) = rows_end[m - 1]; KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_nonzero_index_view_type::non_const_type , MyExecSpace> (nnz, columns, entriesC); KokkosKernels::Experimental::Util::copy_vector<float *, typename in_nonzero_value_view_type::non_const_type, MyExecSpace> (m, values, valuesC); } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (A)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy A" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (B)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy B" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (C)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy C" << std::endl; return; } } else if (Kokkos::Impl::is_same<value_type, double>::value){ /* std::cout << "create a" << std::endl; std::cout << "m:" << m << " n:" << n << std::endl; std::cout << "a_xadj[0]:" << a_xadj[0] << " a_xadj[m]:" << a_xadj[m] << std::endl; std::cout << "a_adj[a_xadj[m] - 1]:" << a_adj[a_xadj[m] - 1] << " a_ew[a_xadj[m] - 1]:" << a_ew[a_xadj[m] - 1] << std::endl; */ if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj, a_xadj + 1, a_adj, (double *)a_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_d_create_csr A" << std::endl; return; } //std::cout << "create b" << std::endl; if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, b_xadj + 1, b_adj, (double *) b_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_d_create_csr B" << std::endl; return; } sparse_operation_t operation; if (transposeA && transposeB){ operation = SPARSE_OPERATION_TRANSPOSE; } else if (!(transposeA || transposeB)){ operation = SPARSE_OPERATION_NON_TRANSPOSE; } else { std::cerr << "Ask both to transpose or non transpose for MKL SPGEMM" << std::endl; return; } Kokkos::Impl::Timer timer1; bool success = SPARSE_STATUS_SUCCESS != mkl_sparse_spmm (operation, A, B, &C); std::cout << "Actual DOUBLE MKL SPMM Time:" << timer1.seconds() << std::endl; if (success){ std::cerr << "CANNOT multiply mkl_sparse_spmm " << std::endl; return; } else{ sparse_index_base_t c_indexing; MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns; double *values; if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_export_csr (C, &c_indexing, &c_rows, &c_cols, &rows_start, &rows_end, &columns, &values)){ std::cerr << "CANNOT export result matrix " << std::endl; return; } if (SPARSE_INDEX_BASE_ZERO != c_indexing){ std::cerr << "C is not zero based indexed." << std::endl; return; } { Kokkos::Impl::Timer copy_time; row_mapC = typename in_row_index_view_type::non_const_type(Kokkos::ViewAllocateWithoutInitializing("rowmapC"), c_rows + 1); entriesC = typename in_nonzero_index_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("EntriesC") , rows_end[m - 1] ); valuesC = typename in_nonzero_value_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("valuesC") , rows_end[m - 1]); KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_row_index_view_type::non_const_type, MyExecSpace> (m, rows_start, row_mapC); idx nnz = row_mapC(m) = rows_end[m - 1]; KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_nonzero_index_view_type::non_const_type, MyExecSpace> (nnz, columns, entriesC); KokkosKernels::Experimental::Util::copy_vector<double *, typename in_nonzero_value_view_type::non_const_type, MyExecSpace> (m, values, valuesC); double copy_time_d = copy_time.seconds(); std::cout << "MKL COPYTIME:" << copy_time_d << std::endl; } } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (A)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy A" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (B)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy B" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (C)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy C" << std::endl; return; } } else { std::cerr << "CUSPARSE requires float or double values. cuComplex and cuDoubleComplex are not implemented yet." << std::endl; return; } } else { //int *a_xadj = row_mapA.ptr_on_device(); std::cerr << "MKL requires integer values" << std::endl; if (Kokkos::Impl::is_same<idx, unsigned int>::value){ std::cerr << "MKL is given unsigned integer" << std::endl; } else if (Kokkos::Impl::is_same<idx, long>::value){ std::cerr << "MKL is given long" << std::endl; } else if (Kokkos::Impl::is_same<idx, const int>::value){ std::cerr << "MKL is given const int" << std::endl; } else if (Kokkos::Impl::is_same<idx, unsigned long>::value){ std::cerr << "MKL is given unsigned long" << std::endl; } else if (Kokkos::Impl::is_same<idx, const unsigned long>::value){ std::cerr << "MKL is given const unsigned long" << std::endl; } else{ std::cerr << "MKL is given something else" << std::endl; } return; } #else std::cerr << "MKL IS NOT DEFINED" << std::endl; return; #endif }
void viennaCL_apply( KernelHandle *handle, typename KernelHandle::nnz_lno_t m, typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, in_row_index_view_type row_mapA, in_nonzero_index_view_type entriesA, in_nonzero_value_view_type valuesA, bool transposeA, bin_row_index_view_type row_mapB, bin_nonzero_index_view_type entriesB, bin_nonzero_value_view_type valuesB, bool transposeB, cin_row_index_view_type &row_mapC, cin_nonzero_index_view_type &entriesC, cin_nonzero_value_view_type &valuesC){ #ifdef KERNELS_HAVE_VIENNACL typedef typename KernelHandle::nnz_lno_t idx; typedef in_row_index_view_type idx_array_type; typedef typename KernelHandle::nnz_scalar_t value_type; typedef typename in_row_index_view_type::device_type device1; typedef typename in_nonzero_index_view_type::device_type device2; typedef typename in_nonzero_value_view_type::device_type device3; typedef typename KernelHandle::HandleExecSpace MyExecSpace; std::cout << "RUNNING VIENNACL" << std::endl; typedef typename viennacl::compressed_matrix<value_type>::handle_type it; typedef typename viennacl::compressed_matrix<value_type>::value_type vt; if ((Kokkos::Impl::is_same<idx, int>::value && Kokkos::Impl::is_same<typename KernelHandle::size_type, int>::value )|| (Kokkos::Impl::is_same<idx, unsigned int>::value && Kokkos::Impl::is_same<typename KernelHandle::size_type, unsigned int>::value ) || (Kokkos::Impl::is_same<idx, it>::value && Kokkos::Impl::is_same<typename KernelHandle::size_type, it>::value ) ){ unsigned int * a_xadj = (unsigned int *)row_mapA.ptr_on_device(); unsigned int * b_xadj = (unsigned int * )row_mapB.ptr_on_device(); unsigned int * c_xadj = (unsigned int * )row_mapC.ptr_on_device(); unsigned int * a_adj = (unsigned int * )entriesA.ptr_on_device(); unsigned int * b_adj = (unsigned int * )entriesB.ptr_on_device(); unsigned int * c_adj = (unsigned int * )entriesC.ptr_on_device(); int nnzA = entriesA.dimension_0(); int nnzB = entriesB.dimension_0(); value_type *a_ew = valuesA.ptr_on_device(); value_type *b_ew = valuesB.ptr_on_device(); value_type *c_ew = valuesC.ptr_on_device(); /* std::cout << "create a" << std::endl; std::cout << "m:" << m << " n:" << n << std::endl; std::cout << "a_xadj[0]:" << a_xadj[0] << " a_xadj[m]:" << a_xadj[m] << std::endl; std::cout << "a_adj[a_xadj[m] - 1]:" << a_adj[a_xadj[m] - 1] << " a_ew[a_xadj[m] - 1]:" << a_ew[a_xadj[m] - 1] << std::endl; */ Kokkos::Impl::Timer timerset; viennacl::compressed_matrix<value_type> A; viennacl::compressed_matrix<value_type> B; A.set(a_xadj, a_adj, a_ew, m, n, nnzA); B.set(b_xadj, b_adj, b_ew, n, k, nnzB); std::cout << "compress matrix create:" << timerset.seconds() << std::endl; std::cout << "Now running ViennaCL" << std::endl; Kokkos::Impl::Timer timer1; viennacl::compressed_matrix<value_type> C = viennacl::linalg::prod(A, B); std::cout << "Actual VIENNACL SPMM Time:" << timer1.seconds() << std::endl; { unsigned int c_rows = m, c_cols = k, cnnz = C.nnz(); value_type const * values = viennacl::linalg::host_based::detail::extract_raw_pointer<value_type>(C.handle()); unsigned int const * rows_start = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(C.handle1()); unsigned int const * columns = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(C.handle2()); { Kokkos::Impl::Timer copy_time; row_mapC = typename cin_row_index_view_type::non_const_type(Kokkos::ViewAllocateWithoutInitializing("rowmapC"), c_rows + 1); entriesC = typename cin_nonzero_index_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("EntriesC") , cnnz); valuesC = typename cin_nonzero_value_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("valuesC") , cnnz); KokkosKernels::Experimental::Util::copy_vector<unsigned int const *, typename cin_row_index_view_type::non_const_type, MyExecSpace> (m, rows_start, row_mapC); idx nnz = cnnz; KokkosKernels::Experimental::Util::copy_vector<unsigned int const *, typename cin_nonzero_index_view_type::non_const_type, MyExecSpace> (nnz, columns, entriesC); KokkosKernels::Experimental::Util::copy_vector<value_type const *, typename cin_nonzero_value_view_type::non_const_type, MyExecSpace> (m, values, valuesC); double copy_time_d = copy_time.seconds(); std::cout << "VIENNACL COPYTIME:" << copy_time_d << std::endl; } } } else { //int *a_xadj = row_mapA.ptr_on_device(); std::cerr << "vienna requires (u) integer values" << std::endl; if (Kokkos::Impl::is_same<idx, long>::value){ std::cerr << "MKL is given long" << std::endl; } else if (Kokkos::Impl::is_same<idx, const int>::value){ std::cerr << "MKL is given const int" << std::endl; } else if (Kokkos::Impl::is_same<idx, unsigned long>::value){ std::cerr << "MKL is given unsigned long" << std::endl; } else if (Kokkos::Impl::is_same<idx, const unsigned long>::value){ std::cerr << "MKL is given const unsigned long" << std::endl; } else{ std::cerr << "MKL is given something else" << std::endl; } return; } #else std::cerr << "VIENNACL IS NOT DEFINED" << std::endl; return; #endif }
void cuSPARSE_symbolic( KernelHandle *handle, typename KernelHandle::row_lno_t m, typename KernelHandle::row_lno_t n, typename KernelHandle::row_lno_t k, in_row_index_view_type row_mapA, in_nonzero_index_view_type entriesA, bool transposeA, in_row_index_view_type row_mapB, in_nonzero_index_view_type entriesB, bool transposeB, typename in_row_index_view_type::non_const_type &row_mapC, typename in_nonzero_index_view_type::non_const_type &entriesC){ #ifdef KERNELS_HAVE_CUSPARSE typedef typename in_row_index_view_type::device_type device1; typedef typename in_nonzero_index_view_type::device_type device2; typedef typename KernelHandle::row_lno_t idx; typedef typename in_row_index_view_type::non_const_type idx_array_type; if (Kokkos::Impl::is_same<Kokkos::Cuda, device1 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl; return; } if (Kokkos::Impl::is_same<Kokkos::Cuda, device2 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl; return; } if (Kokkos::Impl::is_same<idx, int>::value){ row_mapC = idx_array_type("rowMapC", m + 1); const idx *a_xadj = row_mapA.ptr_on_device(); const idx *b_xadj = row_mapB.ptr_on_device(); idx *c_xadj = row_mapC.ptr_on_device(); const idx *a_adj = entriesA.ptr_on_device(); const idx *b_adj = entriesB.ptr_on_device(); handle->create_cuSPARSE_Handle(transposeA, transposeB); typename KernelHandle::SPGEMMcuSparseHandleType *h = handle->get_cuSparseHandle(); int nnzA = entriesA.dimension_0(); int nnzB = entriesB.dimension_0(); int baseC, nnzC; int *nnzTotalDevHostPtr = &nnzC; cusparseXcsrgemmNnz(h->handle, h->transA, h->transB, (int)m, (int)n, (int)k, h->a_descr, nnzA, (int *) a_xadj, (int *)a_adj, h->b_descr, nnzB, (int *)b_xadj, (int *)b_adj, h->c_descr, (int *)c_xadj, nnzTotalDevHostPtr ); if (NULL != nnzTotalDevHostPtr){ nnzC = *nnzTotalDevHostPtr; }else{ cudaMemcpy(&nnzC, c_xadj+m, sizeof(int), cudaMemcpyDeviceToHost); cudaMemcpy(&baseC, c_xadj, sizeof(int), cudaMemcpyDeviceToHost); nnzC -= baseC; } entriesC = in_nonzero_index_view_type("entriesC", nnzC); } else { std::cerr << "CUSPARSE requires integer values" << std::endl; return; } #else std::cerr << "CUSPARSE IS NOT DEFINED" << std::endl; return; #endif }
void CUSP_apply( KernelHandle *handle, typename KernelHandle::row_index_type m, typename KernelHandle::row_index_type n, typename KernelHandle::row_index_type k, in_row_index_view_type row_mapA, in_nonzero_index_view_type entriesA, in_nonzero_value_view_type valuesA, bool transposeA, in_row_index_view_type row_mapB, in_nonzero_index_view_type entriesB, in_nonzero_value_view_type valuesB, bool transposeB, in_row_index_view_type &row_mapC, in_nonzero_index_view_type &entriesC, in_nonzero_value_view_type &valuesC){ #ifdef KERNELS_HAVE_CUSP typedef typename KernelHandle::row_index_type idx; typedef typename KernelHandle::nonzero_value_type value_type; typedef typename in_row_index_view_type::device_type device1; typedef typename in_nonzero_index_view_type::device_type device2; typedef typename in_nonzero_value_view_type::device_type device3; std::cout << "RUNNING CUSP" << std::endl; if (Kokkos::Impl::is_same<Kokkos::Cuda, device1 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSP" << std::endl; return; } if (Kokkos::Impl::is_same<Kokkos::Cuda, device2 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSP" << std::endl; return; } if (Kokkos::Impl::is_same<Kokkos::Cuda, device3 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSP" << std::endl; return; } typedef in_row_index_view_type idx_array_type; typedef typename Kokkos::RangePolicy<typename KernelHandle::HandleExecSpace> my_exec_space; idx nnzA = entriesA.dimension_0(); idx nnzB = entriesB.dimension_0(); idx *a_xadj = (idx *)row_mapA.ptr_on_device(); idx *b_xadj = (idx *)row_mapB.ptr_on_device(); idx *a_adj = (idx *)entriesA.ptr_on_device(); idx *b_adj = (idx *)entriesB.ptr_on_device(); value_type *a_ew = valuesA.ptr_on_device(); value_type *b_ew = valuesB.ptr_on_device(); /* thrust::device_ptr<idx> dev_a_xadj(a_xadj); thrust::device_ptr<idx> dev_a_adj(a_adj); thrust::device_ptr<idx> dev_b_xadj(b_xadj); thrust::device_ptr<idx> dev_b_adj(b_adj); thrust::device_ptr<value_type> dev_a_ew(a_ew); thrust::device_ptr<value_type> dev_b_ew(b_ew); */ typedef typename cusp::array1d_view< thrust::device_ptr<idx> > IDXArray1dView; typedef typename cusp::array1d_view< thrust::device_ptr<value_type> > VALUEArray1dView; //typedef typename cusp::array1d<idx, cusp::device_memory> IDXArray1dView; //typedef typename cusp::array1d<value_type, cusp::device_memory> VALUEArray1dView; IDXArray1dView arraya_xadj(thrust::device_pointer_cast(a_xadj), thrust::device_pointer_cast(a_xadj) + m + 1); IDXArray1dView arraya_adj(thrust::device_pointer_cast(a_adj), thrust::device_pointer_cast(a_adj) + nnzA); IDXArray1dView arrayb_xadj(thrust::device_pointer_cast(b_xadj), thrust::device_pointer_cast(b_xadj) + n + 1); IDXArray1dView arrayb_adj(thrust::device_pointer_cast(b_adj), thrust::device_pointer_cast(b_adj) + nnzB); VALUEArray1dView arraya_ew(thrust::device_pointer_cast(a_ew), thrust::device_pointer_cast(a_ew) + nnzA); VALUEArray1dView arrayb_ew(thrust::device_pointer_cast(b_ew), thrust::device_pointer_cast(b_ew)+ nnzB); typedef typename cusp::csr_matrix_view<IDXArray1dView, IDXArray1dView, VALUEArray1dView, idx,value_type,cusp::device_memory> cuspMatrix_View; cuspMatrix_View A(m, n, entriesA.dimension_0(), arraya_xadj, arraya_adj, arraya_ew); cuspMatrix_View B(n, k, entriesB.dimension_0(), arrayb_xadj, arrayb_adj, arrayb_ew); /* CopyArrayToCuspArray<typename cuspMatrix::row_offsets_array_type, typename KernelHandle::idx_array_type> Aforward(A.row_offsets, row_mapA); Kokkos::parallel_for (my_exec_space (0, m + 1) , Aforward); Kokkos::parallel_for (my_exec_space (0, n + 1) , CopyArrayToCuspArray<typename cuspMatrix::row_offsets_array_type, typename KernelHandle::idx_array_type>(B.row_offsets, row_mapB)); Kokkos::parallel_for (my_exec_space (0, entriesA.dimension_0()) , CopyArrayToCuspArray<typename cuspMatrix::column_indices_array_type, typename KernelHandle::idx_edge_array_type>(A.column_indices, entriesA)); Kokkos::parallel_for (my_exec_space (0, entriesB.dimension_0()) , CopyArrayToCuspArray<typename cuspMatrix::column_indices_array_type, typename KernelHandle::idx_edge_array_type>(B.column_indices, entriesB)); Kokkos::parallel_for (my_exec_space (0, valuesA.dimension_0()) , CopyArrayToCuspArray<typename cuspMatrix::values_array_type, typename KernelHandle::value_array_type>(A.values, valuesA)); Kokkos::parallel_for (my_exec_space (0, valuesB.dimension_0()) , CopyArrayToCuspArray<typename cuspMatrix::values_array_type, typename KernelHandle::value_array_type>(B.values, valuesB)); */ typedef typename cusp::csr_matrix<idx,value_type,cusp::device_memory> cuspMatrix; //typedef cuspMatrix_View cuspMatrix; cuspMatrix C; cusp::multiply(A,B,C); std::cout << " C.column_indices.size():" << C.column_indices.size() << std::endl; std::cout << " C.values.size():" << C.values.size() << std::endl; row_mapC = typename in_row_index_view_type::non_const_type("rowmapC", m + 1); entriesC = typename in_nonzero_index_view_type::non_const_type ("EntriesC" , C.column_indices.size()); valuesC = typename in_nonzero_value_view_type::non_const_type ("valuesC" , C.values.size()); Kokkos::parallel_for (my_exec_space (0, m + 1) , CopyArrayToCuspArray<in_row_index_view_type, idx >(row_mapC, (idx *) thrust::raw_pointer_cast(C.row_offsets.data()))); Kokkos::parallel_for (my_exec_space (0, C.column_indices.size()) , CopyArrayToCuspArray<in_nonzero_index_view_type, idx >(entriesC, (idx *) thrust::raw_pointer_cast(C.column_indices.data()))); Kokkos::parallel_for (my_exec_space (0, C.values.size()) , CopyArrayToCuspArray<in_nonzero_value_view_type, value_type>(valuesC, (value_type *) thrust::raw_pointer_cast(C.values.data()))); #else std::cerr << "CUSP IS NOT DEFINED" << std::endl; return; #endif }
void cuSPARSE_apply( KernelHandle *handle, typename KernelHandle::row_lno_t m, typename KernelHandle::row_lno_t n, typename KernelHandle::row_lno_t k, in_row_index_view_type row_mapA, in_nonzero_index_view_type entriesA, in_nonzero_value_view_type valuesA, bool transposeA, in_row_index_view_type row_mapB, in_nonzero_index_view_type entriesB, in_nonzero_value_view_type valuesB, bool transposeB, typename in_row_index_view_type::non_const_type &row_mapC, typename in_nonzero_index_view_type::non_const_type &entriesC, typename in_nonzero_value_view_type::non_const_type &valuesC){ #ifdef KERNELS_HAVE_CUSPARSE typedef typename KernelHandle::row_lno_t idx; typedef in_row_index_view_type idx_array_type; typedef typename KernelHandle::nnz_scalar_t value_type; typedef typename in_row_index_view_type::device_type device1; typedef typename in_nonzero_index_view_type::device_type device2; typedef typename in_nonzero_value_view_type::device_type device3; std::cout << "RUNNING CUSParse" << std::endl; if (Kokkos::Impl::is_same<Kokkos::Cuda, device1 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl; return; } if (Kokkos::Impl::is_same<Kokkos::Cuda, device2 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl; return; } if (Kokkos::Impl::is_same<Kokkos::Cuda, device3 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl; return; } if (Kokkos::Impl::is_same<idx, int>::value){ int *a_xadj = (int *)row_mapA.ptr_on_device(); int *b_xadj = (int *)row_mapB.ptr_on_device(); int *c_xadj = (int *)row_mapC.ptr_on_device(); int *a_adj = (int *)entriesA.ptr_on_device(); int *b_adj = (int *)entriesB.ptr_on_device(); int *c_adj = (int *)entriesC.ptr_on_device(); typename KernelHandle::SPGEMMcuSparseHandleType *h = handle->get_cuSparseHandle(); int nnzA = entriesA.dimension_0(); int nnzB = entriesB.dimension_0(); value_type *a_ew = valuesA.ptr_on_device(); value_type *b_ew = valuesB.ptr_on_device(); value_type *c_ew = valuesC.ptr_on_device(); if (Kokkos::Impl::is_same<value_type, float>::value){ cusparseScsrgemm( h->handle, h->transA, h->transB, m, n, k, h->a_descr, nnzA, (float *)a_ew, a_xadj, a_adj, h->b_descr, nnzB, (float *)b_ew, b_xadj, b_adj, h->c_descr, (float *)c_ew, c_xadj, c_adj); } else if (Kokkos::Impl::is_same<value_type, double>::value){ cusparseDcsrgemm( h->handle, h->transA, h->transB, m, n, k, h->a_descr, nnzA, (double *)a_ew, a_xadj, a_adj, h->b_descr, nnzB, (double *)b_ew, b_xadj, b_adj, h->c_descr, (double *)c_ew, c_xadj, c_adj); } else { std::cerr << "CUSPARSE requires float or double values. cuComplex and cuDoubleComplex are not implemented yet." << std::endl; return; } } else { std::cerr << "CUSPARSE requires integer values" << std::endl; return; } #else std::cerr << "CUSPARSE IS NOT DEFINED" << std::endl; return; #endif }