/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { TESTING_INIT(); magma_dopts zopts; magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); int i=1; magma_dparse_opts( argc, argv, &zopts, &i, queue ); real_Double_t res; magma_d_sparse_matrix Z, A, AT, A2, B, B_d; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_dm_5stencil( laplace_size, &Z, queue ); } else { // file-matrix test magma_d_csr_mtx( &Z, argv[i], queue ); } printf( "# matrix info: %d-by-%d with %d nonzeros\n", (int) Z.num_rows,(int) Z.num_cols,(int) Z.nnz ); // scale matrix magma_dmscale( &Z, zopts.scaling, queue ); // remove nonzeros in matrix magma_dmcsrcompressor( &Z, queue ); // convert to be non-symmetric magma_d_mconvert( Z, &A, Magma_CSR, Magma_CSRL, queue ); // transpose magma_d_mtranspose( A, &AT, queue ); // convert, copy back and forth to check everything works magma_d_mconvert( AT, &B, Magma_CSR, zopts.output_format, queue ); magma_d_mfree(&AT, queue ); magma_d_mtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ); magma_d_mfree(&B, queue ); magma_dmcsrcompressor_gpu( &B_d, queue ); magma_d_mtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue ); magma_d_mfree(&B_d, queue ); magma_d_mconvert( B, &AT, zopts.output_format,Magma_CSR, queue ); magma_d_mfree(&B, queue ); // transpose back magma_d_mtranspose( AT, &A2, queue ); magma_d_mfree(&AT, queue ); magma_dmdiff( A, A2, &res, queue); printf("# ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester: ok\n"); else printf("# tester: failed\n"); magma_d_mfree(&A, queue ); magma_d_mfree(&A2, queue ); magma_d_mfree(&Z, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing sparse matrix vector product */ int main( int argc, char** argv ) { TESTING_INIT(); magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); magma_d_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL; hA_SELLP.blocksize = 8; hA_SELLP.alignment = 8; real_Double_t start, end, res; magma_int_t *pntre; double c_one = MAGMA_D_MAKE(1.0, 0.0); double c_zero = MAGMA_D_MAKE(0.0, 0.0); magma_int_t i, j; for( i = 1; i < argc; ++i ) { if ( strcmp("--blocksize", argv[i]) == 0 ) { hA_SELLP.blocksize = atoi( argv[++i] ); } else if ( strcmp("--alignment", argv[i]) == 0 ) { hA_SELLP.alignment = atoi( argv[++i] ); } else break; } printf( "\n# usage: ./run_dspmv" " [ --blocksize %d --alignment %d (for SELLP) ]" " matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_dm_5stencil( laplace_size, &hA, queue ); } else { // file-matrix test magma_d_csr_mtx( &hA, argv[i], queue ); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz ); real_Double_t FLOPS = 2.0*hA.nnz/1e9; magma_d_vector hx, hy, dx, dy, hrefvec, hcheck; // init CPU vectors magma_d_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue ); magma_d_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue ); // init DEV vectors magma_d_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue ); magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); #ifdef MAGMA_WITH_MKL // calling MKL with CSR pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) ); pntre[0] = 0; for (j=0; j<hA.num_rows; j++ ) { pntre[j] = hA.row[j+1]; } MKL_INT num_rows = hA.num_rows; MKL_INT num_cols = hA.num_cols; MKL_INT nnz = hA.nnz; MKL_INT *col; TESTING_MALLOC_CPU( col, MKL_INT, nnz ); for( magma_int_t t=0; t < hA.nnz; ++t ) { col[ t ] = hA.col[ t ]; } MKL_INT *row; TESTING_MALLOC_CPU( row, MKL_INT, num_rows ); for( magma_int_t t=0; t < hA.num_rows; ++t ) { row[ t ] = hA.col[ t ]; } start = magma_wtime(); for (j=0; j<10; j++ ) { mkl_dcsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); } end = magma_wtime(); printf( "\n > MKL : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); TESTING_FREE_CPU( row ); TESTING_FREE_CPU( col ); free(pntre); #endif // MAGMA_WITH_MKL // copy matrix to GPU magma_d_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); // SpMV on GPU (CSR) -- this is the reference! start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_d_spmv( c_one, dA, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_d_mfree(&dA, queue ); magma_d_vtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue ); // convert to ELL and copy to GPU magma_d_mconvert( hA, &hA_ELL, Magma_CSR, Magma_ELL, queue ); magma_d_mtransfer( hA_ELL, &dA_ELL, Magma_CPU, Magma_DEV, queue ); magma_d_mfree(&hA_ELL, queue ); magma_d_vfree( &dy, queue ); magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (ELL) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_d_spmv( c_one, dA_ELL, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard ELL).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_d_mfree(&dA_ELL, queue ); magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]); if ( res < .000001 ) printf("# tester spmv ELL: ok\n"); else printf("# tester spmv ELL: failed\n"); magma_d_vfree( &hcheck, queue ); // convert to SELLP and copy to GPU magma_d_mconvert( hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue ); magma_d_mtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue ); magma_d_mfree(&hA_SELLP, queue ); magma_d_vfree( &dy, queue ); magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (SELLP) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_d_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (SELLP).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv SELL-P: ok\n"); else printf("# tester spmv SELL-P: failed\n"); magma_d_vfree( &hcheck, queue ); magma_d_mfree(&dA_SELLP, queue ); // SpMV on GPU (CUSPARSE - CSR) // CUSPARSE context // cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); double alpha = c_one; double beta = c_zero; magma_d_vfree( &dy, queue ); magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // copy matrix to GPU magma_d_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseDcsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, hA.num_rows, hA.num_cols, hA.nnz, &alpha, descr, dA.dval, dA.drow, dA.dcol, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE CSR\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); cusparseMatDescr_t descrA; cusparseStatus = cusparseCreateMatDescr(&descrA); if (cusparseStatus != 0) printf("error\n"); cusparseHybMat_t hybA; cusparseStatus = cusparseCreateHybMat( &hybA ); if (cusparseStatus != 0) printf("error\n"); magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE CSR: ok\n"); else printf("# tester spmv cuSPARSE CSR: failed\n"); magma_d_vfree( &hcheck, queue ); magma_d_vfree( &dy, queue ); magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); cusparseDcsr2hyb(cusparseHandle, hA.num_rows, hA.num_cols, descrA, dA.dval, dA.drow, dA.dcol, hybA, 0, CUSPARSE_HYB_PARTITION_AUTO); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseDhybmv( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, descrA, hybA, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE HYB\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (HYB).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE HYB: ok\n"); else printf("# tester spmv cuSPARSE HYB: failed\n"); magma_d_vfree( &hcheck, queue ); cusparseDestroyMatDescr( descrA ); cusparseDestroyHybMat( hybA ); cusparseDestroy( cusparseHandle ); magma_d_mfree(&dA, queue ); printf("\n\n"); // free CPU memory magma_d_mfree(&hA, queue ); magma_d_vfree(&hx, queue ); magma_d_vfree(&hy, queue ); magma_d_vfree(&hrefvec, queue ); // free GPU memory magma_d_vfree(&dx, queue ); magma_d_vfree(&dy, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { TESTING_INIT(); magma_dopts zopts; magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); int i=1; magma_dparse_opts( argc, argv, &zopts, &i, queue ); real_Double_t res; magma_d_sparse_matrix A, A2, A3, A4, A5; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_dm_5stencil( laplace_size, &A, queue ); } else { // file-matrix test magma_d_csr_mtx( &A, argv[i], queue ); } printf( "# matrix info: %d-by-%d with %d nonzeros\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // filename for temporary matrix storage const char *filename = "testmatrix.mtx"; // write to file write_d_csrtomtx( A, filename, queue ); // read from file magma_d_csr_mtx( &A2, filename, queue ); // delete temporary matrix unlink( filename ); //visualize printf("A2:\n"); magma_d_mvisu( A2, queue ); //visualize magma_d_mconvert(A2, &A4, Magma_CSR, Magma_CSRL, queue ); printf("A4:\n"); magma_d_mvisu( A4, queue ); magma_d_mconvert(A4, &A5, Magma_CSR, Magma_ELL, queue ); printf("A5:\n"); magma_d_mvisu( A5, queue ); // pass it to another application and back magma_int_t m, n; magma_index_t *row, *col; double *val; magma_dcsrget( A2, &m, &n, &row, &col, &val, queue ); magma_dcsrset( m, n, row, col, val, &A3, queue ); magma_dmdiff( A, A2, &res, queue ); printf("# ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester IO: ok\n"); else printf("# tester IO: failed\n"); magma_dmdiff( A, A3, &res, queue ); printf("# ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester matrix interface: ok\n"); else printf("# tester matrix interface: failed\n"); magma_d_mfree(&A, queue ); magma_d_mfree(&A2, queue ); magma_d_mfree(&A4, queue ); magma_d_mfree(&A5, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_CHECK( magma_init() ); magma_print_environment(); magma_dopts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); real_Double_t res; magma_d_matrix A={Magma_CSR}, A2={Magma_CSR}, A3={Magma_CSR}, A4={Magma_CSR}, A5={Magma_CSR}; int i=1; TESTING_CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); TESTING_CHECK( magma_dm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test TESTING_CHECK( magma_d_csr_mtx( &A, argv[i], queue )); } printf("%% matrix info: %lld-by-%lld with %lld nonzeros\n", (long long) A.num_rows, (long long) A.num_cols, (long long) A.nnz ); // filename for temporary matrix storage const char *filename = "testmatrix.mtx"; // write to file TESTING_CHECK( magma_dwrite_csrtomtx( A, filename, queue )); // read from file TESTING_CHECK( magma_d_csr_mtx( &A2, filename, queue )); // delete temporary matrix unlink( filename ); //visualize printf("A2:\n"); TESTING_CHECK( magma_dprint_matrix( A2, queue )); //visualize TESTING_CHECK( magma_dmconvert(A2, &A4, Magma_CSR, Magma_CSRL, queue )); printf("A4:\n"); TESTING_CHECK( magma_dprint_matrix( A4, queue )); TESTING_CHECK( magma_dmconvert(A4, &A5, Magma_CSR, Magma_ELL, queue )); printf("A5:\n"); TESTING_CHECK( magma_dprint_matrix( A5, queue )); // pass it to another application and back magma_int_t m, n; magma_index_t *row, *col; double *val=NULL; TESTING_CHECK( magma_dcsrget( A2, &m, &n, &row, &col, &val, queue )); TESTING_CHECK( magma_dcsrset( m, n, row, col, val, &A3, queue )); TESTING_CHECK( magma_dmdiff( A, A2, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester IO: ok\n"); else printf("%% tester IO: failed\n"); TESTING_CHECK( magma_dmdiff( A, A3, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester matrix interface: ok\n"); else printf("%% tester matrix interface: failed\n"); magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&A4, queue ); magma_dmfree(&A5, queue ); i++; } magma_queue_destroy( queue ); TESTING_CHECK( magma_finalize() ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { TESTING_INIT(); magma_dopts zopts; magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); int i=1; magma_dparse_opts( argc, argv, &zopts, &i, queue ); real_Double_t res; magma_d_sparse_matrix Z, Z2, A, A2, AT, AT2, B; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_dm_5stencil( laplace_size, &Z, queue ); } else { // file-matrix test magma_d_csr_mtx( &Z, argv[i], queue ); } printf( "# matrix info: %d-by-%d with %d nonzeros\n", (int) Z.num_rows,(int) Z.num_cols,(int) Z.nnz ); // convert to be non-symmetric magma_d_mconvert( Z, &A, Magma_CSR, Magma_CSRL, queue ); magma_d_mconvert( Z, &B, Magma_CSR, Magma_CSRU, queue ); // transpose magma_d_mtranspose( A, &AT, queue ); // quite some conversions //ELL magma_d_mconvert( AT, &AT2, Magma_CSR, Magma_ELL, queue ); magma_d_mfree(&AT, queue ); magma_d_mconvert( AT2, &AT, Magma_ELL, Magma_CSR, queue ); magma_d_mfree(&AT2, queue ); //ELLPACKT magma_d_mconvert( AT, &AT2, Magma_CSR, Magma_ELLPACKT, queue ); magma_d_mfree(&AT, queue ); magma_d_mconvert( AT2, &AT, Magma_ELLPACKT, Magma_CSR, queue ); magma_d_mfree(&AT2, queue ); //ELLRT AT2.blocksize = 8; AT2.alignment = 8; magma_d_mconvert( AT, &AT2, Magma_CSR, Magma_ELLRT, queue ); magma_d_mfree(&AT, queue ); magma_d_mconvert( AT2, &AT, Magma_ELLRT, Magma_CSR, queue ); magma_d_mfree(&AT2, queue ); //SELLP AT2.blocksize = 8; AT2.alignment = 8; magma_d_mconvert( AT, &AT2, Magma_CSR, Magma_SELLP, queue ); magma_d_mfree(&AT, queue ); magma_d_mconvert( AT2, &AT, Magma_SELLP, Magma_CSR, queue ); magma_d_mfree(&AT2, queue ); //ELLD magma_d_mconvert( AT, &AT2, Magma_CSR, Magma_ELLD, queue ); magma_d_mfree(&AT, queue ); magma_d_mconvert( AT2, &AT, Magma_ELLD, Magma_CSR, queue ); magma_d_mfree(&AT2, queue ); //CSRCOO magma_d_mconvert( AT, &AT2, Magma_CSR, Magma_CSRCOO, queue ); magma_d_mfree(&AT, queue ); magma_d_mconvert( AT2, &AT, Magma_CSRCOO, Magma_CSR, queue ); magma_d_mfree(&AT2, queue ); //CSRD magma_d_mconvert( AT, &AT2, Magma_CSR, Magma_CSRD, queue ); magma_d_mfree(&AT, queue ); magma_d_mconvert( AT2, &AT, Magma_CSRD, Magma_CSR, queue ); magma_d_mfree(&AT2, queue ); //BCSR magma_d_mconvert( AT, &AT2, Magma_CSR, Magma_BCSR, queue ); magma_d_mfree(&AT, queue ); magma_d_mconvert( AT2, &AT, Magma_BCSR, Magma_CSR, queue ); magma_d_mfree(&AT2, queue ); // transpose magma_d_mtranspose( AT, &A2, queue ); magma_dmdiff( A, A2, &res, queue); printf("# ||A-A2||_F = %8.2e\n", res); if ( res < .000001 ) printf("# conversion tester: ok\n"); else printf("# conversion tester: failed\n"); magma_dmlumerge( A2, B, &Z2, queue ); magma_dmdiff( Z, Z2, &res, queue); printf("# ||Z-Z2||_F = %8.2e\n", res); if ( res < .000001 ) printf("# LUmerge tester: ok\n"); else printf("# LUmerge tester: failed\n"); magma_d_mfree(&A, queue ); magma_d_mfree(&A2, queue ); magma_d_mfree(&AT, queue ); magma_d_mfree(&AT2, queue ); magma_d_mfree(&B, queue ); magma_d_mfree(&Z2, queue ); magma_d_mfree(&Z, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_dopts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); real_Double_t res; magma_d_matrix A={Magma_CSR}, AT={Magma_CSR}, A2={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; int i=1; real_Double_t start, end; CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_dm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test CHECK( magma_d_csr_mtx( &A, argv[i], queue )); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", int(A.num_rows), int(A.num_cols), int(A.nnz) ); // scale matrix CHECK( magma_dmscale( &A, zopts.scaling, queue )); // remove nonzeros in matrix start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_dmcsrcompressor( &A, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA CPU: %.2e seconds.\n", (end-start)/10 ); // transpose CHECK( magma_dmtranspose( A, &AT, queue )); // convert, copy back and forth to check everything works CHECK( magma_dmconvert( AT, &B, Magma_CSR, Magma_CSR, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&B, queue ); start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_dmcsrcompressor_gpu( &B_d, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA GPU: %.2e seconds.\n", (end-start)/10 ); CHECK( magma_dmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue )); magma_dmfree(&B_d, queue ); CHECK( magma_dmconvert( B, &AT, Magma_CSR, Magma_CSR, queue )); magma_dmfree(&B, queue ); // transpose back CHECK( magma_dmtranspose( AT, &A2, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmdiff( A, A2, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester matrix compressor: ok\n"); else printf("%% tester matrix compressor: failed\n"); magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); i++; } cleanup: magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_dopts zopts; magma_queue_t queue=NULL; magma_queue_create( &queue ); real_Double_t res; magma_d_matrix Z={Magma_CSR}, Z2={Magma_CSR}, A={Magma_CSR}, A2={Magma_CSR}, AT={Magma_CSR}, AT2={Magma_CSR}, B={Magma_CSR}; int i=1; CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_dm_5stencil( laplace_size, &Z, queue )); } else { // file-matrix test CHECK( magma_d_csr_mtx( &Z, argv[i], queue )); } printf("%% matrix info: %d-by-%d with %d nonzeros\n", int(Z.num_rows), int(Z.num_cols), int(Z.nnz) ); // convert to be non-symmetric CHECK( magma_dmconvert( Z, &A, Magma_CSR, Magma_CSRL, queue )); CHECK( magma_dmconvert( Z, &B, Magma_CSR, Magma_CSRU, queue )); // transpose CHECK( magma_dmtranspose( A, &AT, queue )); // quite some conversions //ELL CHECK( magma_dmconvert( AT, &AT2, Magma_CSR, Magma_ELL, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmconvert( AT2, &AT, Magma_ELL, Magma_CSR, queue )); magma_dmfree(&AT2, queue ); //ELLPACKT CHECK( magma_dmconvert( AT, &AT2, Magma_CSR, Magma_ELLPACKT, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmconvert( AT2, &AT, Magma_ELLPACKT, Magma_CSR, queue )); magma_dmfree(&AT2, queue ); //ELLRT AT2.blocksize = 8; AT2.alignment = 8; CHECK( magma_dmconvert( AT, &AT2, Magma_CSR, Magma_ELLRT, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmconvert( AT2, &AT, Magma_ELLRT, Magma_CSR, queue )); magma_dmfree(&AT2, queue ); //SELLP AT2.blocksize = 8; AT2.alignment = 8; CHECK( magma_dmconvert( AT, &AT2, Magma_CSR, Magma_SELLP, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmconvert( AT2, &AT, Magma_SELLP, Magma_CSR, queue )); magma_dmfree(&AT2, queue ); //ELLD CHECK( magma_dmconvert( AT, &AT2, Magma_CSR, Magma_ELLD, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmconvert( AT2, &AT, Magma_ELLD, Magma_CSR, queue )); magma_dmfree(&AT2, queue ); //CSRCOO CHECK( magma_dmconvert( AT, &AT2, Magma_CSR, Magma_CSRCOO, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmconvert( AT2, &AT, Magma_CSRCOO, Magma_CSR, queue )); magma_dmfree(&AT2, queue ); //CSRLIST CHECK( magma_dmconvert( AT, &AT2, Magma_CSR, Magma_CSRLIST, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmconvert( AT2, &AT, Magma_CSRLIST, Magma_CSR, queue )); magma_dmfree(&AT2, queue ); //CSRD CHECK( magma_dmconvert( AT, &AT2, Magma_CSR, Magma_CSRD, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmconvert( AT2, &AT, Magma_CSRD, Magma_CSR, queue )); magma_dmfree(&AT2, queue ); // transpose CHECK( magma_dmtranspose( AT, &A2, queue )); CHECK( magma_dmdiff( A, A2, &res, queue)); printf("%% ||A-A2||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% conversion tester: ok\n"); else printf("%% conversion tester: failed\n"); CHECK( magma_dmlumerge( A2, B, &Z2, queue )); CHECK( magma_dmdiff( Z, Z2, &res, queue)); printf("%% ||Z-Z2||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% LUmerge tester: ok\n"); else printf("%% LUmerge tester: failed\n"); magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&AT2, queue ); magma_dmfree(&B, queue ); magma_dmfree(&Z2, queue ); magma_dmfree(&Z, queue ); i++; } cleanup: magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&AT2, queue ); magma_dmfree(&B, queue ); magma_dmfree(&Z2, queue ); magma_dmfree(&Z, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_dopts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); real_Double_t res; magma_d_matrix Z={Magma_CSR}, A={Magma_CSR}, AT={Magma_CSR}, A2={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; magma_index_t *comm_i=NULL; double *comm_v=NULL; magma_int_t start, end; int i=1; CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_dm_5stencil( laplace_size, &Z, queue )); } else { // file-matrix test CHECK( magma_d_csr_mtx( &Z, argv[i], queue )); } printf("%% matrix info: %d-by-%d with %d nonzeros\n", int(Z.num_rows), int(Z.num_cols), int(Z.nnz) ); // slice matrix CHECK( magma_index_malloc_cpu( &comm_i, Z.num_rows ) ); CHECK( magma_dmalloc_cpu( &comm_v, Z.num_rows ) ); CHECK( magma_dmslice( 1, 0, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); CHECK( magma_dmslice( 9, 0, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); CHECK( magma_dmslice( 9, 1, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); CHECK( magma_dmslice( 9, 8, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); // scale matrix CHECK( magma_dmscale( &Z, zopts.scaling, queue )); // remove nonzeros in matrix CHECK( magma_dmcsrcompressor( &Z, queue )); // convert to be non-symmetric CHECK( magma_dmconvert( Z, &A, Magma_CSR, Magma_CSRL, queue )); // transpose CHECK( magma_dmtranspose( A, &AT, queue )); // convert, copy back and forth to check everything works CHECK( magma_dmconvert( AT, &B, Magma_CSR, zopts.output_format, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&B, queue ); CHECK( magma_dmcsrcompressor_gpu( &B_d, queue )); CHECK( magma_dmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue )); magma_dmfree(&B_d, queue ); CHECK( magma_dmconvert( B, &AT, zopts.output_format,Magma_CSR, queue )); magma_dmfree(&B, queue ); // transpose back CHECK( magma_dmtranspose( AT, &A2, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmdiff( A, A2, &res, queue)); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester: ok\n"); else printf("%% tester: failed\n"); magma_free_cpu( comm_i ); magma_free_cpu( comm_v ); comm_i=NULL; comm_v=NULL; magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&Z, queue ); i++; } cleanup: magma_free_cpu( comm_i ); magma_free_cpu( comm_v ); magma_dmfree(&AT, queue ); magma_dmfree(&A, queue ); magma_dmfree(&B, queue ); magma_dmfree(&B_d, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&Z, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }