示例#1
0
extern "C" magma_int_t
magma_ccuspmm(
    magma_c_sparse_matrix A, magma_c_sparse_matrix B, 
    magma_c_sparse_matrix *AB,
    magma_queue_t queue )
{
    if (    A.memory_location == Magma_DEV 
        && B.memory_location == Magma_DEV
        && ( A.storage_type == Magma_CSR ||
             A.storage_type == Magma_CSRCOO )
        && ( B.storage_type == Magma_CSR ||
             B.storage_type == Magma_CSRCOO ) ) {
            
            magma_c_sparse_matrix C;
            C.num_rows = A.num_rows;
            C.num_cols = B.num_cols;
            C.storage_type = A.storage_type;
            C.memory_location = A.memory_location;
            C.fill_mode = Magma_FULL;
            
            magma_int_t stat_dev = 0;
            C.val = NULL;
            C.col = NULL;
            C.row = NULL;
            C.rowidx = NULL;
            C.blockinfo = NULL;
            C.diag = NULL;
            C.dval = NULL;
            C.dcol = NULL;
            C.drow = NULL;
            C.drowidx = NULL;
            C.ddiag = NULL;

            // CUSPARSE context //
            cusparseHandle_t handle;
            cusparseStatus_t cusparseStatus;
            cusparseStatus = cusparseCreate(&handle);
            cusparseSetStream( handle, queue );
             if (cusparseStatus != 0)    printf("error in Handle.\n");

            cusparseMatDescr_t descrA;
            cusparseMatDescr_t descrB;
            cusparseMatDescr_t descrC;
            cusparseStatus = cusparseCreateMatDescr(&descrA);
            cusparseStatus = cusparseCreateMatDescr(&descrB);
            cusparseStatus = cusparseCreateMatDescr(&descrC);
             if (cusparseStatus != 0)    printf("error in MatrDescr.\n");

            cusparseStatus =
            cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL);
            cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL);
            cusparseSetMatType(descrC,CUSPARSE_MATRIX_TYPE_GENERAL);
             if (cusparseStatus != 0)    printf("error in MatrType.\n");

            cusparseStatus =
            cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO);
            cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO);
            cusparseSetMatIndexBase(descrC,CUSPARSE_INDEX_BASE_ZERO);
             if (cusparseStatus != 0)    printf("error in IndexBase.\n");

            // multiply A and B on the device
            magma_int_t baseC;
            // nnzTotalDevHostPtr points to host memory
            magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz;
            cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST);
            stat_dev += magma_index_malloc( &C.drow, (A.num_rows + 1) );
            cusparseXcsrgemmNnz(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, 
                                        CUSPARSE_OPERATION_NON_TRANSPOSE, 
                                        A.num_rows, A.num_rows, A.num_rows, 
                                        descrA, A.nnz, A.drow, A.dcol,
                                        descrB, B.nnz, B.drow, B.dcol,
                                        descrC, C.drow, nnzTotalDevHostPtr );
            if (NULL != nnzTotalDevHostPtr) {
                C.nnz = *nnzTotalDevHostPtr;
            } else {
                // workaround as nnz and base C are magma_int_t 
                magma_index_t base_t, nnz_t; 
                magma_index_getvector( 1, C.drow+C.num_rows, 1, &nnz_t, 1 );
                magma_index_getvector( 1, C.drow,   1, &base_t,    1 );
                C.nnz = (magma_int_t) nnz_t;
                baseC = (magma_int_t) base_t;
                C.nnz -= baseC;
            }
            stat_dev += magma_index_malloc( &C.dcol, C.nnz );
            stat_dev += magma_cmalloc( &C.dval, C.nnz );
            if( stat_dev != 0 ){
                magma_c_mfree( &C, queue );
                return MAGMA_ERR_DEVICE_ALLOC;
            }
            
            cusparseCcsrgemm(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, 
                                        CUSPARSE_OPERATION_NON_TRANSPOSE, 
                            A.num_rows, A.num_rows, A.num_rows,
                            descrA, A.nnz,
                            A.dval, A.drow, A.dcol,
                            descrB, B.nnz,
                            B.dval, B.drow, B.dcol,
                            descrC,
                            C.dval, C.drow, C.dcol);



            cusparseDestroyMatDescr( descrA );
            cusparseDestroyMatDescr( descrB );
            cusparseDestroyMatDescr( descrC );
            cusparseDestroy( handle );
            // end CUSPARSE context //

            magma_c_mtransfer( C, AB, Magma_DEV, Magma_DEV, queue );
            magma_c_mfree( &C, queue );

        return MAGMA_SUCCESS; 
    }
    else {

        printf("error: CSRMM only supported on device and CSR format.\n");

        return MAGMA_SUCCESS; 
    }
}
示例#2
0
extern "C" magma_int_t
magma_zcuspmm(
    magma_z_matrix A, magma_z_matrix B,
    magma_z_matrix *AB,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    
    magma_z_matrix C={Magma_CSR};
    C.num_rows = A.num_rows;
    C.num_cols = B.num_cols;
    C.storage_type = A.storage_type;
    C.memory_location = A.memory_location;
    C.fill_mode = MagmaFull;
    
    C.val = NULL;
    C.col = NULL;
    C.row = NULL;
    C.rowidx = NULL;
    C.blockinfo = NULL;
    C.diag = NULL;
    C.dval = NULL;
    C.dcol = NULL;
    C.drow = NULL;
    C.drowidx = NULL;
    C.ddiag = NULL;
    
    magma_index_t base_t, nnz_t, baseC;
    
    cusparseHandle_t handle=NULL;
    cusparseMatDescr_t descrA=NULL;
    cusparseMatDescr_t descrB=NULL;
    cusparseMatDescr_t descrC=NULL;
    
    if (    A.memory_location == Magma_DEV
        && B.memory_location == Magma_DEV
        && ( A.storage_type == Magma_CSR ||
             A.storage_type == Magma_CSRCOO )
        && ( B.storage_type == Magma_CSR ||
             B.storage_type == Magma_CSRCOO ) )
    {
        // CUSPARSE context /
        CHECK_CUSPARSE( cusparseCreate( &handle ));
        CHECK_CUSPARSE( cusparseSetStream( handle, queue->cuda_stream() ));
        CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA ));
        CHECK_CUSPARSE( cusparseCreateMatDescr( &descrB ));
        CHECK_CUSPARSE( cusparseCreateMatDescr( &descrC ));
        CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL ));
        CHECK_CUSPARSE( cusparseSetMatType( descrB, CUSPARSE_MATRIX_TYPE_GENERAL ));
        CHECK_CUSPARSE( cusparseSetMatType( descrC, CUSPARSE_MATRIX_TYPE_GENERAL ));
        CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO ));
        CHECK_CUSPARSE( cusparseSetMatIndexBase( descrB, CUSPARSE_INDEX_BASE_ZERO ));
        CHECK_CUSPARSE( cusparseSetMatIndexBase( descrC, CUSPARSE_INDEX_BASE_ZERO ));

        // nnzTotalDevHostPtr points to host memory
        magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz;
        CHECK_CUSPARSE( cusparseSetPointerMode( handle, CUSPARSE_POINTER_MODE_HOST ));
        CHECK( magma_index_malloc( &C.drow, (A.num_rows + 1) ));
        CHECK_CUSPARSE( cusparseXcsrgemmNnz( handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                                    CUSPARSE_OPERATION_NON_TRANSPOSE,
                                    A.num_rows, B.num_cols, A.num_cols,
                                    descrA, A.nnz, A.drow, A.dcol,
                                    descrB, B.nnz, B.drow, B.dcol,
                                    descrC, C.drow, nnzTotalDevHostPtr ));
        if (NULL != nnzTotalDevHostPtr) {
            C.nnz = *nnzTotalDevHostPtr;
        } else {
            // workaround as nnz and base C are magma_int_t
            magma_index_getvector( 1, C.drow+C.num_rows, 1, &nnz_t, 1, queue );
            magma_index_getvector( 1, C.drow,   1, &base_t,    1, queue );
            C.nnz = (magma_int_t) nnz_t;
            baseC = (magma_int_t) base_t;
            C.nnz -= baseC;
        }
        CHECK( magma_index_malloc( &C.dcol, C.nnz ));
        CHECK( magma_zmalloc( &C.dval, C.nnz ));
        CHECK_CUSPARSE( cusparseZcsrgemm( handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                                    CUSPARSE_OPERATION_NON_TRANSPOSE,
                        A.num_rows, B.num_cols, A.num_cols,
                        descrA, A.nnz,
                        A.dval, A.drow, A.dcol,
                        descrB, B.nnz,
                        B.dval, B.drow, B.dcol,
                        descrC,
                        C.dval, C.drow, C.dcol ));
        // end CUSPARSE context //
        //magma_device_sync();
        magma_queue_sync( queue );
        CHECK( magma_zmtransfer( C, AB, Magma_DEV, Magma_DEV, queue ));
    }
    else {
        info = MAGMA_ERR_NOT_SUPPORTED; 
    }
    
cleanup:
    cusparseDestroyMatDescr( descrA );
    cusparseDestroyMatDescr( descrB );
    cusparseDestroyMatDescr( descrC );
    cusparseDestroy( handle );
    magma_zmfree( &C, queue );
    return info;
}
  void cuSPARSE_symbolic(
      KernelHandle *handle,
      typename KernelHandle::row_lno_t m,
      typename KernelHandle::row_lno_t n,
      typename KernelHandle::row_lno_t k,
      in_row_index_view_type row_mapA,
      in_nonzero_index_view_type entriesA,

      bool transposeA,
      in_row_index_view_type row_mapB,
      in_nonzero_index_view_type entriesB,
      bool transposeB,
      typename in_row_index_view_type::non_const_type &row_mapC,
      typename in_nonzero_index_view_type::non_const_type &entriesC){

#ifdef KERNELS_HAVE_CUSPARSE

    typedef typename in_row_index_view_type::device_type device1;
    typedef typename in_nonzero_index_view_type::device_type device2;

    typedef typename KernelHandle::row_lno_t idx;
    typedef typename in_row_index_view_type::non_const_type idx_array_type;


    if (Kokkos::Impl::is_same<Kokkos::Cuda, device1 >::value){
      std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl;
      return;
    }
    if (Kokkos::Impl::is_same<Kokkos::Cuda, device2 >::value){
      std::cerr << "MEMORY IS NOT ALLOCATED IN GPU DEVICE for CUSPARSE" << std::endl;
      return;
    }

    if (Kokkos::Impl::is_same<idx, int>::value){
      row_mapC = idx_array_type("rowMapC", m + 1);
      const idx *a_xadj = row_mapA.ptr_on_device();
      const idx *b_xadj = row_mapB.ptr_on_device();
      idx *c_xadj = row_mapC.ptr_on_device();

      const idx *a_adj = entriesA.ptr_on_device();
      const idx *b_adj = entriesB.ptr_on_device();
      handle->create_cuSPARSE_Handle(transposeA, transposeB);
      typename KernelHandle::SPGEMMcuSparseHandleType *h = handle->get_cuSparseHandle();

      int nnzA = entriesA.dimension_0();
      int nnzB = entriesB.dimension_0();

      int baseC, nnzC;
      int *nnzTotalDevHostPtr = &nnzC;

      cusparseXcsrgemmNnz(h->handle,
                          h->transA,
                          h->transB,
                          (int)m,
                          (int)n,
                          (int)k,
                          h->a_descr,
                          nnzA,
                          (int *) a_xadj,
                          (int *)a_adj,
                          h->b_descr,
                          nnzB,
                          (int *)b_xadj,
                          (int *)b_adj,
                          h->c_descr,
                          (int *)c_xadj,
                          nnzTotalDevHostPtr );

      if (NULL != nnzTotalDevHostPtr){
          nnzC = *nnzTotalDevHostPtr;
      }else{
          cudaMemcpy(&nnzC, c_xadj+m, sizeof(int), cudaMemcpyDeviceToHost);
          cudaMemcpy(&baseC, c_xadj, sizeof(int), cudaMemcpyDeviceToHost);
          nnzC -= baseC;
      }
      entriesC = in_nonzero_index_view_type("entriesC", nnzC);
    }
    else {
      std::cerr << "CUSPARSE requires integer values" << std::endl;
      return;
    }
#else
    std::cerr << "CUSPARSE IS NOT DEFINED" << std::endl;
    return;
#endif

  }