extern "C" magma_int_t magma_ccuspmm( magma_c_sparse_matrix A, magma_c_sparse_matrix B, magma_c_sparse_matrix *AB, magma_queue_t queue ) { if ( A.memory_location == Magma_DEV && B.memory_location == Magma_DEV && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) && ( B.storage_type == Magma_CSR || B.storage_type == Magma_CSRCOO ) ) { magma_c_sparse_matrix C; C.num_rows = A.num_rows; C.num_cols = B.num_cols; C.storage_type = A.storage_type; C.memory_location = A.memory_location; C.fill_mode = Magma_FULL; magma_int_t stat_dev = 0; C.val = NULL; C.col = NULL; C.row = NULL; C.rowidx = NULL; C.blockinfo = NULL; C.diag = NULL; C.dval = NULL; C.dcol = NULL; C.drow = NULL; C.drowidx = NULL; C.ddiag = NULL; // CUSPARSE context // cusparseHandle_t handle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&handle); cusparseSetStream( handle, queue ); if (cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrA; cusparseMatDescr_t descrB; cusparseMatDescr_t descrC; cusparseStatus = cusparseCreateMatDescr(&descrA); cusparseStatus = cusparseCreateMatDescr(&descrB); cusparseStatus = cusparseCreateMatDescr(&descrC); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrC,CUSPARSE_MATRIX_TYPE_GENERAL); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrC,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); // multiply A and B on the device magma_int_t baseC; // nnzTotalDevHostPtr points to host memory magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz; cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); stat_dev += magma_index_malloc( &C.drow, (A.num_rows + 1) ); cusparseXcsrgemmNnz(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_rows, A.num_rows, descrA, A.nnz, A.drow, A.dcol, descrB, B.nnz, B.drow, B.dcol, descrC, C.drow, nnzTotalDevHostPtr ); if (NULL != nnzTotalDevHostPtr) { C.nnz = *nnzTotalDevHostPtr; } else { // workaround as nnz and base C are magma_int_t magma_index_t base_t, nnz_t; magma_index_getvector( 1, C.drow+C.num_rows, 1, &nnz_t, 1 ); magma_index_getvector( 1, C.drow, 1, &base_t, 1 ); C.nnz = (magma_int_t) nnz_t; baseC = (magma_int_t) base_t; C.nnz -= baseC; } stat_dev += magma_index_malloc( &C.dcol, C.nnz ); stat_dev += magma_cmalloc( &C.dval, C.nnz ); if( stat_dev != 0 ){ magma_c_mfree( &C, queue ); return MAGMA_ERR_DEVICE_ALLOC; } cusparseCcsrgemm(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_rows, A.num_rows, descrA, A.nnz, A.dval, A.drow, A.dcol, descrB, B.nnz, B.dval, B.drow, B.dcol, descrC, C.dval, C.drow, C.dcol); cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroyMatDescr( descrC ); cusparseDestroy( handle ); // end CUSPARSE context // magma_c_mtransfer( C, AB, Magma_DEV, Magma_DEV, queue ); magma_c_mfree( &C, queue ); return MAGMA_SUCCESS; } else { printf("error: CSRMM only supported on device and CSR format.\n"); return MAGMA_SUCCESS; } }
extern "C" magma_int_t magma_zcuspmm( magma_z_matrix A, magma_z_matrix B, magma_z_matrix *AB, magma_queue_t queue ) { magma_int_t info = 0; magma_z_matrix C={Magma_CSR}; C.num_rows = A.num_rows; C.num_cols = B.num_cols; C.storage_type = A.storage_type; C.memory_location = A.memory_location; C.fill_mode = MagmaFull; C.val = NULL; C.col = NULL; C.row = NULL; C.rowidx = NULL; C.blockinfo = NULL; C.diag = NULL; C.dval = NULL; C.dcol = NULL; C.drow = NULL; C.drowidx = NULL; C.ddiag = NULL; magma_index_t base_t, nnz_t, baseC; cusparseHandle_t handle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrB=NULL; cusparseMatDescr_t descrC=NULL; if ( A.memory_location == Magma_DEV && B.memory_location == Magma_DEV && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) && ( B.storage_type == Magma_CSR || B.storage_type == Magma_CSRCOO ) ) { // CUSPARSE context / CHECK_CUSPARSE( cusparseCreate( &handle )); CHECK_CUSPARSE( cusparseSetStream( handle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrB )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrC )); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatType( descrB, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatType( descrC, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrB, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrC, CUSPARSE_INDEX_BASE_ZERO )); // nnzTotalDevHostPtr points to host memory magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz; CHECK_CUSPARSE( cusparseSetPointerMode( handle, CUSPARSE_POINTER_MODE_HOST )); CHECK( magma_index_malloc( &C.drow, (A.num_rows + 1) )); CHECK_CUSPARSE( cusparseXcsrgemmNnz( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, B.num_cols, A.num_cols, descrA, A.nnz, A.drow, A.dcol, descrB, B.nnz, B.drow, B.dcol, descrC, C.drow, nnzTotalDevHostPtr )); if (NULL != nnzTotalDevHostPtr) { C.nnz = *nnzTotalDevHostPtr; } else { // workaround as nnz and base C are magma_int_t magma_index_getvector( 1, C.drow+C.num_rows, 1, &nnz_t, 1, queue ); magma_index_getvector( 1, C.drow, 1, &base_t, 1, queue ); C.nnz = (magma_int_t) nnz_t; baseC = (magma_int_t) base_t; C.nnz -= baseC; } CHECK( magma_index_malloc( &C.dcol, C.nnz )); CHECK( magma_zmalloc( &C.dval, C.nnz )); CHECK_CUSPARSE( cusparseZcsrgemm( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, B.num_cols, A.num_cols, descrA, A.nnz, A.dval, A.drow, A.dcol, descrB, B.nnz, B.dval, B.drow, B.dcol, descrC, C.dval, C.drow, C.dcol )); // end CUSPARSE context // //magma_device_sync(); magma_queue_sync( queue ); CHECK( magma_zmtransfer( C, AB, Magma_DEV, Magma_DEV, queue )); } else { info = MAGMA_ERR_NOT_SUPPORTED; } cleanup: cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroyMatDescr( descrC ); cusparseDestroy( handle ); magma_zmfree( &C, queue ); return info; }
void cuSPARSE_symbolic( KernelHandle *handle, typename KernelHandle::row_lno_t m, typename KernelHandle::row_lno_t n, typename KernelHandle::row_lno_t k, in_row_index_view_type row_mapA, in_nonzero_index_view_type entriesA, bool transposeA, in_row_index_view_type row_mapB, in_nonzero_index_view_type entriesB, bool transposeB, typename in_row_index_view_type::non_const_type &row_mapC, typename in_nonzero_index_view_type::non_const_type &entriesC){ #ifdef KERNELS_HAVE_CUSPARSE typedef typename in_row_index_view_type::device_type device1; typedef typename in_nonzero_index_view_type::device_type device2; typedef typename KernelHandle::row_lno_t idx; typedef typename in_row_index_view_type::non_const_type idx_array_type; if (Kokkos::Impl::is_same<Kokkos::Cuda, device1 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl; return; } if (Kokkos::Impl::is_same<Kokkos::Cuda, device2 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl; return; } if (Kokkos::Impl::is_same<idx, int>::value){ row_mapC = idx_array_type("rowMapC", m + 1); const idx *a_xadj = row_mapA.ptr_on_device(); const idx *b_xadj = row_mapB.ptr_on_device(); idx *c_xadj = row_mapC.ptr_on_device(); const idx *a_adj = entriesA.ptr_on_device(); const idx *b_adj = entriesB.ptr_on_device(); handle->create_cuSPARSE_Handle(transposeA, transposeB); typename KernelHandle::SPGEMMcuSparseHandleType *h = handle->get_cuSparseHandle(); int nnzA = entriesA.dimension_0(); int nnzB = entriesB.dimension_0(); int baseC, nnzC; int *nnzTotalDevHostPtr = &nnzC; cusparseXcsrgemmNnz(h->handle, h->transA, h->transB, (int)m, (int)n, (int)k, h->a_descr, nnzA, (int *) a_xadj, (int *)a_adj, h->b_descr, nnzB, (int *)b_xadj, (int *)b_adj, h->c_descr, (int *)c_xadj, nnzTotalDevHostPtr ); if (NULL != nnzTotalDevHostPtr){ nnzC = *nnzTotalDevHostPtr; }else{ cudaMemcpy(&nnzC, c_xadj+m, sizeof(int), cudaMemcpyDeviceToHost); cudaMemcpy(&baseC, c_xadj, sizeof(int), cudaMemcpyDeviceToHost); nnzC -= baseC; } entriesC = in_nonzero_index_view_type("entriesC", nnzC); } else { std::cerr << "CUSPARSE requires integer values" << std::endl; return; } #else std::cerr << "CUSPARSE IS NOT DEFINED" << std::endl; return; #endif }