void mkl_apply( KernelHandle *handle, typename KernelHandle::row_lno_t m, typename KernelHandle::row_lno_t n, typename KernelHandle::row_lno_t k, in_row_index_view_type row_mapA, in_nonzero_index_view_type entriesA, in_nonzero_value_view_type valuesA, bool transposeA, in_row_index_view_type row_mapB, in_nonzero_index_view_type entriesB, in_nonzero_value_view_type valuesB, bool transposeB, typename in_row_index_view_type::non_const_type &row_mapC, typename in_nonzero_index_view_type::non_const_type &entriesC, typename in_nonzero_value_view_type::non_const_type &valuesC){ #ifdef KERNELS_HAVE_MKL typedef typename KernelHandle::row_lno_t idx; typedef in_row_index_view_type idx_array_type; typedef typename KernelHandle::nnz_scalar_t value_type; typedef typename in_row_index_view_type::device_type device1; typedef typename in_nonzero_index_view_type::device_type device2; typedef typename in_nonzero_value_view_type::device_type device3; typedef typename KernelHandle::HandleExecSpace MyExecSpace; std::cout << "RUNNING MKL" << std::endl; #if defined( KOKKOS_HAVE_CUDA ) if (!Kokkos::Impl::is_same<Kokkos::Cuda, device1 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN HOST DEVICE for MKL" << std::endl; return; } if (!Kokkos::Impl::is_same<Kokkos::Cuda, device2 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN HOST DEVICE for MKL" << std::endl; return; } if (!Kokkos::Impl::is_same<Kokkos::Cuda, device3 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN HOST DEVICE for MKL" << std::endl; return; } #endif if (Kokkos::Impl::is_same<idx, int>::value){ int *a_xadj = (int *)row_mapA.ptr_on_device(); int *b_xadj = (int *)row_mapB.ptr_on_device(); int *c_xadj = (int *)row_mapC.ptr_on_device(); int *a_adj = (int *)entriesA.ptr_on_device(); int *b_adj = (int *)entriesB.ptr_on_device(); int *c_adj = (int *)entriesC.ptr_on_device(); int nnzA = entriesA.dimension_0(); int nnzB = entriesB.dimension_0(); value_type *a_ew = valuesA.ptr_on_device(); value_type *b_ew = valuesB.ptr_on_device(); value_type *c_ew = valuesC.ptr_on_device(); sparse_matrix_t A; sparse_matrix_t B; sparse_matrix_t C; if (Kokkos::Impl::is_same<value_type, float>::value){ if (SPARSE_STATUS_SUCCESS != mkl_sparse_s_create_csr (&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj, a_xadj + 1, a_adj, (float *)a_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_s_create_csr A" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_s_create_csr (&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, b_xadj + 1, b_adj, (float *)b_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_s_create_csr B" << std::endl; return; } sparse_operation_t operation; if (transposeA && transposeB){ operation = SPARSE_OPERATION_TRANSPOSE; } else if (!(transposeA || transposeB)){ operation = SPARSE_OPERATION_NON_TRANSPOSE; } else { std::cerr << "Ask both to transpose or non transpose for MKL SPGEMM" << std::endl; return; } Kokkos::Impl::Timer timer1; bool success = SPARSE_STATUS_SUCCESS != mkl_sparse_spmm (operation, A, B, &C); std::cout << "Actual FLOAT MKL SPMM Time:" << timer1.seconds() << std::endl; if (success){ std::cerr << "CANNOT multiply mkl_sparse_spmm " << std::endl; return; } else{ sparse_index_base_t c_indexing; MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns; float *values; if (SPARSE_STATUS_SUCCESS != mkl_sparse_s_export_csr (C, &c_indexing, &c_rows, &c_cols, &rows_start, &rows_end, &columns, &values)){ std::cerr << "CANNOT export result matrix " << std::endl; return; } if (SPARSE_INDEX_BASE_ZERO != c_indexing){ std::cerr << "C is not zero based indexed." << std::endl; return; } row_mapC = typename in_row_index_view_type::non_const_type(Kokkos::ViewAllocateWithoutInitializing("rowmapC"), c_rows + 1); entriesC = typename in_nonzero_index_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("EntriesC") , rows_end[m - 1] ); valuesC = typename in_nonzero_value_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("valuesC") , rows_end[m - 1]); KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_row_index_view_type::non_const_type, MyExecSpace> (m, rows_start, row_mapC); idx nnz = row_mapC(m) = rows_end[m - 1]; KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_nonzero_index_view_type::non_const_type , MyExecSpace> (nnz, columns, entriesC); KokkosKernels::Experimental::Util::copy_vector<float *, typename in_nonzero_value_view_type::non_const_type, MyExecSpace> (m, values, valuesC); } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (A)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy A" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (B)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy B" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (C)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy C" << std::endl; return; } } else if (Kokkos::Impl::is_same<value_type, double>::value){ /* std::cout << "create a" << std::endl; std::cout << "m:" << m << " n:" << n << std::endl; std::cout << "a_xadj[0]:" << a_xadj[0] << " a_xadj[m]:" << a_xadj[m] << std::endl; std::cout << "a_adj[a_xadj[m] - 1]:" << a_adj[a_xadj[m] - 1] << " a_ew[a_xadj[m] - 1]:" << a_ew[a_xadj[m] - 1] << std::endl; */ if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj, a_xadj + 1, a_adj, (double *)a_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_d_create_csr A" << std::endl; return; } //std::cout << "create b" << std::endl; if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, b_xadj + 1, b_adj, (double *) b_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_d_create_csr B" << std::endl; return; } sparse_operation_t operation; if (transposeA && transposeB){ operation = SPARSE_OPERATION_TRANSPOSE; } else if (!(transposeA || transposeB)){ operation = SPARSE_OPERATION_NON_TRANSPOSE; } else { std::cerr << "Ask both to transpose or non transpose for MKL SPGEMM" << std::endl; return; } Kokkos::Impl::Timer timer1; bool success = SPARSE_STATUS_SUCCESS != mkl_sparse_spmm (operation, A, B, &C); std::cout << "Actual DOUBLE MKL SPMM Time:" << timer1.seconds() << std::endl; if (success){ std::cerr << "CANNOT multiply mkl_sparse_spmm " << std::endl; return; } else{ sparse_index_base_t c_indexing; MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns; double *values; if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_export_csr (C, &c_indexing, &c_rows, &c_cols, &rows_start, &rows_end, &columns, &values)){ std::cerr << "CANNOT export result matrix " << std::endl; return; } if (SPARSE_INDEX_BASE_ZERO != c_indexing){ std::cerr << "C is not zero based indexed." << std::endl; return; } { Kokkos::Impl::Timer copy_time; row_mapC = typename in_row_index_view_type::non_const_type(Kokkos::ViewAllocateWithoutInitializing("rowmapC"), c_rows + 1); entriesC = typename in_nonzero_index_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("EntriesC") , rows_end[m - 1] ); valuesC = typename in_nonzero_value_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("valuesC") , rows_end[m - 1]); KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_row_index_view_type::non_const_type, MyExecSpace> (m, rows_start, row_mapC); idx nnz = row_mapC(m) = rows_end[m - 1]; KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_nonzero_index_view_type::non_const_type, MyExecSpace> (nnz, columns, entriesC); KokkosKernels::Experimental::Util::copy_vector<double *, typename in_nonzero_value_view_type::non_const_type, MyExecSpace> (m, values, valuesC); double copy_time_d = copy_time.seconds(); std::cout << "MKL COPYTIME:" << copy_time_d << std::endl; } } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (A)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy A" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (B)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy B" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (C)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy C" << std::endl; return; } } else { std::cerr << "CUSPARSE requires float or double values. cuComplex and cuDoubleComplex are not implemented yet." << std::endl; return; } } else { //int *a_xadj = row_mapA.ptr_on_device(); std::cerr << "MKL requires integer values" << std::endl; if (Kokkos::Impl::is_same<idx, unsigned int>::value){ std::cerr << "MKL is given unsigned integer" << std::endl; } else if (Kokkos::Impl::is_same<idx, long>::value){ std::cerr << "MKL is given long" << std::endl; } else if (Kokkos::Impl::is_same<idx, const int>::value){ std::cerr << "MKL is given const int" << std::endl; } else if (Kokkos::Impl::is_same<idx, unsigned long>::value){ std::cerr << "MKL is given unsigned long" << std::endl; } else if (Kokkos::Impl::is_same<idx, const unsigned long>::value){ std::cerr << "MKL is given const unsigned long" << std::endl; } else{ std::cerr << "MKL is given something else" << std::endl; } return; } #else std::cerr << "MKL IS NOT DEFINED" << std::endl; return; #endif }
void cuSPARSE_apply( KernelHandle *handle, typename KernelHandle::row_lno_t m, typename KernelHandle::row_lno_t n, typename KernelHandle::row_lno_t k, in_row_index_view_type row_mapA, in_nonzero_index_view_type entriesA, in_nonzero_value_view_type valuesA, bool transposeA, in_row_index_view_type row_mapB, in_nonzero_index_view_type entriesB, in_nonzero_value_view_type valuesB, bool transposeB, typename in_row_index_view_type::non_const_type &row_mapC, typename in_nonzero_index_view_type::non_const_type &entriesC, typename in_nonzero_value_view_type::non_const_type &valuesC){ #ifdef KERNELS_HAVE_CUSPARSE typedef typename KernelHandle::row_lno_t idx; typedef in_row_index_view_type idx_array_type; typedef typename KernelHandle::nnz_scalar_t value_type; typedef typename in_row_index_view_type::device_type device1; typedef typename in_nonzero_index_view_type::device_type device2; typedef typename in_nonzero_value_view_type::device_type device3; std::cout << "RUNNING CUSParse" << std::endl; if (Kokkos::Impl::is_same<Kokkos::Cuda, device1 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl; return; } if (Kokkos::Impl::is_same<Kokkos::Cuda, device2 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl; return; } if (Kokkos::Impl::is_same<Kokkos::Cuda, device3 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl; return; } if (Kokkos::Impl::is_same<idx, int>::value){ int *a_xadj = (int *)row_mapA.ptr_on_device(); int *b_xadj = (int *)row_mapB.ptr_on_device(); int *c_xadj = (int *)row_mapC.ptr_on_device(); int *a_adj = (int *)entriesA.ptr_on_device(); int *b_adj = (int *)entriesB.ptr_on_device(); int *c_adj = (int *)entriesC.ptr_on_device(); typename KernelHandle::SPGEMMcuSparseHandleType *h = handle->get_cuSparseHandle(); int nnzA = entriesA.dimension_0(); int nnzB = entriesB.dimension_0(); value_type *a_ew = valuesA.ptr_on_device(); value_type *b_ew = valuesB.ptr_on_device(); value_type *c_ew = valuesC.ptr_on_device(); if (Kokkos::Impl::is_same<value_type, float>::value){ cusparseScsrgemm( h->handle, h->transA, h->transB, m, n, k, h->a_descr, nnzA, (float *)a_ew, a_xadj, a_adj, h->b_descr, nnzB, (float *)b_ew, b_xadj, b_adj, h->c_descr, (float *)c_ew, c_xadj, c_adj); } else if (Kokkos::Impl::is_same<value_type, double>::value){ cusparseDcsrgemm( h->handle, h->transA, h->transB, m, n, k, h->a_descr, nnzA, (double *)a_ew, a_xadj, a_adj, h->b_descr, nnzB, (double *)b_ew, b_xadj, b_adj, h->c_descr, (double *)c_ew, c_xadj, c_adj); } else { std::cerr << "CUSPARSE requires float or double values. cuComplex and cuDoubleComplex are not implemented yet." << std::endl; return; } } else { std::cerr << "CUSPARSE requires integer values" << std::endl; return; } #else std::cerr << "CUSPARSE IS NOT DEFINED" << std::endl; return; #endif }