void CudaSparseMatrix<std::complex<double> >::mv(cusparseHandle_t cusparseHandle, cusparseOperation_t trans, const K &alpha, const K *xdevice, const K &beta, K *ydevice) const { cusparseZcsrmv( cusparseHandle, trans, nrow, ncol, nval, (cuDoubleComplex *)&alpha, descr, (cuDoubleComplex *)val, thrust::raw_pointer_cast(rowPtr.data()), thrust::raw_pointer_cast(col.data()), (cuDoubleComplex *)xdevice, (cuDoubleComplex *)&beta, (cuDoubleComplex *)ydevice); }
magma_int_t magma_z_spmv( magmaDoubleComplex alpha, magma_z_sparse_matrix A, magma_z_vector x, magmaDoubleComplex beta, magma_z_vector y ) { if( A.memory_location != x.memory_location || x.memory_location != y.memory_location ){ printf("error: linear algebra objects are not located in same memory!\n"); printf("memory locations are: %d %d %d\n", A.memory_location, x.memory_location, y.memory_location ); return MAGMA_ERR_INVALID_PTR; } // DEV case if( A.memory_location == Magma_DEV ){ if( A.num_cols == x.num_rows ){ if( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ){ //printf("using CSR kernel for SpMV: "); //magma_zgecsrmv( MagmaNoTrans, A.num_rows, A.num_cols, alpha, // A.val, A.row, A.col, x.val, beta, y.val ); //printf("done.\n"); cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); cusparseZcsrmv( cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_cols, A.nnz, &alpha, descr, A.val, A.row, A.col, x.val, &beta, y.val ); cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); return MAGMA_SUCCESS; } else if( A.storage_type == Magma_ELLPACK ){ //printf("using ELLPACK kernel for SpMV: "); magma_zgeellmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.val, A.col, x.val, beta, y.val ); //printf("done.\n"); return MAGMA_SUCCESS; } else if( A.storage_type == Magma_ELL ){ //printf("using ELL kernel for SpMV: "); magma_zgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.val, A.col, x.val, beta, y.val ); //printf("done.\n"); return MAGMA_SUCCESS; } else if( A.storage_type == Magma_ELLRT ){ //printf("using ELLRT kernel for SpMV: "); magma_zgeellrtmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.val, A.col, A.row, x.val, beta, y.val, A.alignment, A.blocksize ); //printf("done.\n"); return MAGMA_SUCCESS; } else if( A.storage_type == Magma_SELLC ){ //printf("using SELLC kernel for SpMV: "); magma_zgesellcmv( MagmaNoTrans, A.num_rows, A.num_cols, A.blocksize, A.numblocks, A.alignment, alpha, A.val, A.col, A.row, x.val, beta, y.val ); //printf("done.\n"); return MAGMA_SUCCESS; } else if( A.storage_type == Magma_SELLP ){ //printf("using SELLP kernel for SpMV: "); magma_zgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, A.blocksize, A.numblocks, A.alignment, alpha, A.val, A.col, A.row, x.val, beta, y.val ); //printf("done.\n"); return MAGMA_SUCCESS; } else if( A.storage_type == Magma_DENSE ){ //printf("using DENSE kernel for SpMV: "); magmablas_zgemv( MagmaNoTrans, A.num_rows, A.num_cols, alpha, A.val, A.num_rows, x.val, 1, beta, y.val, 1 ); //printf("done.\n"); return MAGMA_SUCCESS; } /* else if( A.storage_type == Magma_BCSR ){ //printf("using CUSPARSE BCSR kernel for SpMV: "); // CUSPARSE context // cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); // end CUSPARSE context // cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; int mb = (A.num_rows + A.blocksize-1)/A.blocksize; int nb = (A.num_cols + A.blocksize-1)/A.blocksize; cusparseZbsrmv( cusparseHandle, dirA, CUSPARSE_OPERATION_NON_TRANSPOSE, mb, nb, A.numblocks, &alpha, descr, A.val, A.row, A.col, A.blocksize, x.val, &beta, y.val ); //printf("done.\n"); return MAGMA_SUCCESS; }*/ else { printf("error: format not supported.\n"); return MAGMA_ERR_NOT_SUPPORTED; } } else if( A.num_cols < x.num_rows ){ magma_int_t num_vecs = x.num_rows / A.num_cols; if( A.storage_type == Magma_CSR ){ //printf("using CSR kernel for SpMV: "); magma_zmgecsrmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, alpha, A.val, A.row, A.col, x.val, beta, y.val ); //printf("done.\n"); return MAGMA_SUCCESS; } else if( A.storage_type == Magma_ELLPACK ){ //printf("using ELLPACK kernel for SpMV: "); magma_zmgeellmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.max_nnz_row, alpha, A.val, A.col, x.val, beta, y.val ); //printf("done.\n"); return MAGMA_SUCCESS; } else if( A.storage_type == Magma_ELL ){ //printf("using ELL kernel for SpMV: "); magma_zmgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.max_nnz_row, alpha, A.val, A.col, x.val, beta, y.val ); //printf("done.\n"); return MAGMA_SUCCESS; }else if( A.storage_type == Magma_SELLP ){ //printf("using SELLP kernel for SpMV: "); magma_zmgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.blocksize, A.numblocks, A.alignment, alpha, A.val, A.col, A.row, x.val, beta, y.val ); //printf("done.\n"); return MAGMA_SUCCESS; }/* if( A.storage_type == Magma_DENSE ){ //printf("using DENSE kernel for SpMV: "); magmablas_zmgemv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, alpha, A.val, A.num_rows, x.val, 1, beta, y.val, 1 ); //printf("done.\n"); return MAGMA_SUCCESS; }*/ else { printf("error: format not supported.\n"); return MAGMA_ERR_NOT_SUPPORTED; } } } // CPU case missing! else{ printf("error: CPU not yet supported.\n"); return MAGMA_ERR_NOT_SUPPORTED; } return MAGMA_SUCCESS; }
extern "C" magma_int_t magma_z_spmv( magmaDoubleComplex alpha, magma_z_sparse_matrix A, magma_z_vector x, magmaDoubleComplex beta, magma_z_vector y, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); if ( A.memory_location != x.memory_location || x.memory_location != y.memory_location ) { printf("error: linear algebra objects are not located in same memory!\n"); printf("memory locations are: %d %d %d\n", A.memory_location, x.memory_location, y.memory_location ); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_INVALID_PTR; } // DEV case if ( A.memory_location == Magma_DEV ) { if ( A.num_cols == x.num_rows && x.num_cols == 1 ) { if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { //printf("using CSR kernel for SpMV: "); //magma_zgecsrmv( MagmaNoTrans, A.num_rows, A.num_cols, alpha, // A.dval, A.drow, A.dcol, x.dval, beta, y.dval ); //printf("done.\n"); cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); cusparseZcsrmv( cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, &beta, y.dval ); cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); } else if ( A.storage_type == Magma_ELL ) { //printf("using ELLPACKT kernel for SpMV: "); magma_zgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); //printf("done.\n"); } else if ( A.storage_type == Magma_ELLPACKT ) { //printf("using ELL kernel for SpMV: "); magma_zgeellmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); //printf("done.\n"); } else if ( A.storage_type == Magma_ELLRT ) { //printf("using ELLRT kernel for SpMV: "); magma_zgeellrtmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, A.alignment, A.blocksize, queue ); //printf("done.\n"); } else if ( A.storage_type == Magma_SELLP ) { //printf("using SELLP kernel for SpMV: "); magma_zgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, queue ); //printf("done.\n"); } else if ( A.storage_type == Magma_DENSE ) { //printf("using DENSE kernel for SpMV: "); magmablas_zgemv( MagmaNoTrans, A.num_rows, A.num_cols, alpha, A.dval, A.num_rows, x.dval, 1, beta, y.dval, 1 ); //printf("done.\n"); } else if ( A.storage_type == Magma_SPMVFUNCTION ) { //printf("using DENSE kernel for SpMV: "); magma_zcustomspmv( alpha, x, beta, y, queue ); //printf("done.\n"); } /* else if ( A.storage_type == Magma_BCSR ) { //printf("using CUSPARSE BCSR kernel for SpMV: "); // CUSPARSE context // cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); // end CUSPARSE context // cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; int mb = (A.num_rows + A.blocksize-1)/A.blocksize; int nb = (A.num_cols + A.blocksize-1)/A.blocksize; cusparseZbsrmv( cusparseHandle, dirA, CUSPARSE_OPERATION_NON_TRANSPOSE, mb, nb, A.numblocks, &alpha, descr, A.dval, A.drow, A.dcol, A.blocksize, x.dval, &beta, y.dval ); //printf("done.\n"); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; }*/ else { printf("error: format not supported.\n"); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_NOT_SUPPORTED; } } else if ( A.num_cols < x.num_rows || x.num_cols > 1 ) { magma_int_t num_vecs = x.num_rows / A.num_cols * x.num_cols; if ( A.storage_type == Magma_CSR ) { cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); if ( x.major == MagmaColMajor) { cusparseZcsrmm(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, num_vecs, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, A.num_cols, &beta, y.dval, A.num_cols); } else if ( x.major == MagmaRowMajor) { cusparseZcsrmm2(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE, A.num_rows, num_vecs, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, A.num_cols, &beta, y.dval, A.num_cols); } cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); } else if ( A.storage_type == Magma_ELL ) { if ( x.major == MagmaColMajor) { magma_zmgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); } else if ( x.major == MagmaRowMajor) { // transpose first to col major magma_z_vector x2; magma_zvtranspose( x, &x2, queue ); magma_zmgeellmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); magma_z_vfree(&x2, queue ); } } else if ( A.storage_type == Magma_ELLPACKT ) { if ( x.major == MagmaColMajor) { magma_zmgeellmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); } else if ( x.major == MagmaRowMajor) { // transpose first to col major magma_z_vector x2; magma_zvtranspose( x, &x2, queue ); magma_zmgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); magma_z_vfree(&x2, queue ); } } else if ( A.storage_type == Magma_SELLP ) { if ( x.major == MagmaRowMajor) { magma_zmgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, queue ); } else if ( x.major == MagmaColMajor) { // transpose first to row major magma_z_vector x2; magma_zvtranspose( x, &x2, queue ); magma_zmgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x2.dval, beta, y.dval, queue ); magma_z_vfree(&x2, queue ); } }/* if ( A.storage_type == Magma_DENSE ) { //printf("using DENSE kernel for SpMV: "); magmablas_zmgemv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, alpha, A.dval, A.num_rows, x.dval, 1, beta, y.dval, 1 ); //printf("done.\n"); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; }*/ else { printf("error: format not supported.\n"); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_NOT_SUPPORTED; } } } // CPU case missing! else { printf("error: CPU not yet supported.\n"); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_NOT_SUPPORTED; } magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; }
/* //////////////////////////////////////////////////////////////////////////// -- testing sparse matrix vector product */ int main( int argc, char** argv ) { TESTING_INIT(); magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); magma_z_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL; hA_SELLP.blocksize = 8; hA_SELLP.alignment = 8; real_Double_t start, end, res; magma_int_t *pntre; magmaDoubleComplex c_one = MAGMA_Z_MAKE(1.0, 0.0); magmaDoubleComplex c_zero = MAGMA_Z_MAKE(0.0, 0.0); magma_int_t i, j; for( i = 1; i < argc; ++i ) { if ( strcmp("--blocksize", argv[i]) == 0 ) { hA_SELLP.blocksize = atoi( argv[++i] ); } else if ( strcmp("--alignment", argv[i]) == 0 ) { hA_SELLP.alignment = atoi( argv[++i] ); } else break; } printf( "\n# usage: ./run_zspmv" " [ --blocksize %d --alignment %d (for SELLP) ]" " matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_zm_5stencil( laplace_size, &hA, queue ); } else { // file-matrix test magma_z_csr_mtx( &hA, argv[i], queue ); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz ); real_Double_t FLOPS = 2.0*hA.nnz/1e9; magma_z_vector hx, hy, dx, dy, hrefvec, hcheck; // init CPU vectors magma_z_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue ); magma_z_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue ); // init DEV vectors magma_z_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue ); magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); #ifdef MAGMA_WITH_MKL // calling MKL with CSR pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) ); pntre[0] = 0; for (j=0; j<hA.num_rows; j++ ) { pntre[j] = hA.row[j+1]; } MKL_INT num_rows = hA.num_rows; MKL_INT num_cols = hA.num_cols; MKL_INT nnz = hA.nnz; MKL_INT *col; TESTING_MALLOC_CPU( col, MKL_INT, nnz ); for( magma_int_t t=0; t < hA.nnz; ++t ) { col[ t ] = hA.col[ t ]; } MKL_INT *row; TESTING_MALLOC_CPU( row, MKL_INT, num_rows ); for( magma_int_t t=0; t < hA.num_rows; ++t ) { row[ t ] = hA.col[ t ]; } start = magma_wtime(); for (j=0; j<10; j++ ) { mkl_zcsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); } end = magma_wtime(); printf( "\n > MKL : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); TESTING_FREE_CPU( row ); TESTING_FREE_CPU( col ); free(pntre); #endif // MAGMA_WITH_MKL // copy matrix to GPU magma_z_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); // SpMV on GPU (CSR) -- this is the reference! start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_z_spmv( c_one, dA, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_z_mfree(&dA, queue ); magma_z_vtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue ); // convert to ELL and copy to GPU magma_z_mconvert( hA, &hA_ELL, Magma_CSR, Magma_ELL, queue ); magma_z_mtransfer( hA_ELL, &dA_ELL, Magma_CPU, Magma_DEV, queue ); magma_z_mfree(&hA_ELL, queue ); magma_z_vfree( &dy, queue ); magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (ELL) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_z_spmv( c_one, dA_ELL, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard ELL).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_z_mfree(&dA_ELL, queue ); magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]); if ( res < .000001 ) printf("# tester spmv ELL: ok\n"); else printf("# tester spmv ELL: failed\n"); magma_z_vfree( &hcheck, queue ); // convert to SELLP and copy to GPU magma_z_mconvert( hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue ); magma_z_mtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue ); magma_z_mfree(&hA_SELLP, queue ); magma_z_vfree( &dy, queue ); magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (SELLP) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_z_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (SELLP).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv SELL-P: ok\n"); else printf("# tester spmv SELL-P: failed\n"); magma_z_vfree( &hcheck, queue ); magma_z_mfree(&dA_SELLP, queue ); // SpMV on GPU (CUSPARSE - CSR) // CUSPARSE context // cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); magmaDoubleComplex alpha = c_one; magmaDoubleComplex beta = c_zero; magma_z_vfree( &dy, queue ); magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // copy matrix to GPU magma_z_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseZcsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, hA.num_rows, hA.num_cols, hA.nnz, &alpha, descr, dA.dval, dA.drow, dA.dcol, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE CSR\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); cusparseMatDescr_t descrA; cusparseStatus = cusparseCreateMatDescr(&descrA); if (cusparseStatus != 0) printf("error\n"); cusparseHybMat_t hybA; cusparseStatus = cusparseCreateHybMat( &hybA ); if (cusparseStatus != 0) printf("error\n"); magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE CSR: ok\n"); else printf("# tester spmv cuSPARSE CSR: failed\n"); magma_z_vfree( &hcheck, queue ); magma_z_vfree( &dy, queue ); magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); cusparseZcsr2hyb(cusparseHandle, hA.num_rows, hA.num_cols, descrA, dA.dval, dA.drow, dA.dcol, hybA, 0, CUSPARSE_HYB_PARTITION_AUTO); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseZhybmv( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, descrA, hybA, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE HYB\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (HYB).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE HYB: ok\n"); else printf("# tester spmv cuSPARSE HYB: failed\n"); magma_z_vfree( &hcheck, queue ); cusparseDestroyMatDescr( descrA ); cusparseDestroyHybMat( hybA ); cusparseDestroy( cusparseHandle ); magma_z_mfree(&dA, queue ); printf("\n\n"); // free CPU memory magma_z_mfree(&hA, queue ); magma_z_vfree(&hx, queue ); magma_z_vfree(&hy, queue ); magma_z_vfree(&hrefvec, queue ); // free GPU memory magma_z_vfree(&dx, queue ); magma_z_vfree(&dy, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }