/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_copts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); magma_c_matrix Z={Magma_CSR}; int i=1; CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); printf("matrixinfo = [ \n"); printf("%% size (n) || nonzeros (nnz) || nnz/n \n"); printf("%%=============================================================%%\n"); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_cm_5stencil( laplace_size, &Z, queue )); } else { // file-matrix test CHECK( magma_c_csr_mtx( &Z, argv[i], queue )); } printf(" %10d %10d %10d\n", int(Z.num_rows), int(Z.nnz), int(Z.nnz/Z.num_rows) ); magma_cmfree(&Z, queue ); i++; } printf("%%=============================================================%%\n"); printf("];\n"); cleanup: magma_cmfree(&Z, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { TESTING_INIT(); magma_copts zopts; magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); int i=1; magma_cparse_opts( argc, argv, &zopts, &i, queue ); real_Double_t res; magma_c_sparse_matrix Z, Z2, A, A2, AT, AT2, B; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_cm_5stencil( laplace_size, &Z, queue ); } else { // file-matrix test magma_c_csr_mtx( &Z, argv[i], queue ); } printf( "# matrix info: %d-by-%d with %d nonzeros\n", (int) Z.num_rows,(int) Z.num_cols,(int) Z.nnz ); // convert to be non-symmetric magma_c_mconvert( Z, &A, Magma_CSR, Magma_CSRL, queue ); magma_c_mconvert( Z, &B, Magma_CSR, Magma_CSRU, queue ); // transpose magma_c_mtranspose( A, &AT, queue ); // quite some conversions //ELL magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_ELL, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_ELL, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //ELLPACKT magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_ELLPACKT, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_ELLPACKT, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //ELLRT AT2.blocksize = 8; AT2.alignment = 8; magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_ELLRT, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_ELLRT, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //SELLP AT2.blocksize = 8; AT2.alignment = 8; magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_SELLP, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_SELLP, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //ELLD magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_ELLD, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_ELLD, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //CSRCOO magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_CSRCOO, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_CSRCOO, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //CSRD magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_CSRD, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_CSRD, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //BCSR magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_BCSR, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_BCSR, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); // transpose magma_c_mtranspose( AT, &A2, queue ); magma_cmdiff( A, A2, &res, queue); printf("# ||A-A2||_F = %8.2e\n", res); if ( res < .000001 ) printf("# conversion tester: ok\n"); else printf("# conversion tester: failed\n"); magma_cmlumerge( A2, B, &Z2, queue ); magma_cmdiff( Z, Z2, &res, queue); printf("# ||Z-Z2||_F = %8.2e\n", res); if ( res < .000001 ) printf("# LUmerge tester: ok\n"); else printf("# LUmerge tester: failed\n"); magma_c_mfree(&A, queue ); magma_c_mfree(&A2, queue ); magma_c_mfree(&AT, queue ); magma_c_mfree(&AT2, queue ); magma_c_mfree(&B, queue ); magma_c_mfree(&Z2, queue ); magma_c_mfree(&Z, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { TESTING_INIT(); magma_copts zopts; magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); int i=1; magma_cparse_opts( argc, argv, &zopts, &i, queue ); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_sparse_matrix A, B, B_d; magma_c_vector x, b; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; if ( zopts.solver_par.solver != Magma_PCG && zopts.solver_par.solver != Magma_PGMRES && zopts.solver_par.solver != Magma_PBICGSTAB && zopts.solver_par.solver != Magma_ITERREF ) zopts.precond_par.solver = Magma_NONE; magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par, queue ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_cm_5stencil( laplace_size, &A, queue ); } else { // file-matrix test magma_c_csr_mtx( &A, argv[i], queue ); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // for the eigensolver case zopts.solver_par.ev_length = A.num_rows; magma_ceigensolverinfo_init( &zopts.solver_par, queue ); // scale matrix magma_cmscale( &A, zopts.scaling, queue ); magma_c_mconvert( A, &B, Magma_CSR, zopts.output_format, queue ); magma_c_mtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ); // vectors and initial guess magma_c_vinit( &b, Magma_DEV, A.num_cols, one, queue ); magma_c_vinit( &x, Magma_DEV, A.num_cols, one, queue ); magma_c_spmv( one, B_d, x, zero, b, queue ); // b = A x magma_c_vfree(&x, queue ); magma_c_vinit( &x, Magma_DEV, A.num_cols, zero, queue ); magma_c_solver( B_d, b, &x, &zopts, queue ); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); magma_c_mfree(&B_d, queue ); magma_c_mfree(&B, queue ); magma_c_mfree(&A, queue ); magma_c_vfree(&x, queue ); magma_c_vfree(&b, queue ); i++; } magma_csolverinfo_free( &zopts.solver_par, &zopts.precond_par, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_copts zopts; magma_queue_t queue=NULL; magma_queue_create( /*devices[ opts->device ],*/ &queue ); real_Double_t res; magma_c_matrix A={Magma_CSR}, AT={Magma_CSR}, A2={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; int i=1; real_Double_t start, end; CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale matrix CHECK( magma_cmscale( &A, zopts.scaling, queue )); // remove nonzeros in matrix start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_cmcsrcompressor( &A, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA CPU: %.2e seconds.\n", (end-start)/10 ); // transpose CHECK( magma_cmtranspose( A, &AT, queue )); // convert, copy back and forth to check everything works CHECK( magma_cmconvert( AT, &B, Magma_CSR, Magma_CSR, queue )); magma_cmfree(&AT, queue ); CHECK( magma_cmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); magma_cmfree(&B, queue ); start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_cmcsrcompressor_gpu( &B_d, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA GPU: %.2e seconds.\n", (end-start)/10 ); CHECK( magma_cmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue )); magma_cmfree(&B_d, queue ); CHECK( magma_cmconvert( B, &AT, Magma_CSR, Magma_CSR, queue )); magma_cmfree(&B, queue ); // transpose back CHECK( magma_cmtranspose( AT, &A2, queue )); magma_cmfree(&AT, queue ); CHECK( magma_cmdiff( A, A2, &res, queue )); printf("# ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester matrix compressor: ok\n"); else printf("# tester matrix compressor: failed\n"); magma_cmfree(&A, queue ); magma_cmfree(&A2, queue ); i++; } cleanup: magma_cmfree(&AT, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&A2, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_CHECK( magma_init() ); magma_print_environment(); magma_copts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); real_Double_t res; magma_c_matrix A={Magma_CSR}, A2={Magma_CSR}, A3={Magma_CSR}, A4={Magma_CSR}, A5={Magma_CSR}; int i=1; TESTING_CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); TESTING_CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test TESTING_CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } printf("%% matrix info: %lld-by-%lld with %lld nonzeros\n", (long long) A.num_rows, (long long) A.num_cols, (long long) A.nnz ); // filename for temporary matrix storage const char *filename = "testmatrix.mtx"; // write to file TESTING_CHECK( magma_cwrite_csrtomtx( A, filename, queue )); // read from file TESTING_CHECK( magma_c_csr_mtx( &A2, filename, queue )); // delete temporary matrix unlink( filename ); //visualize printf("A2:\n"); TESTING_CHECK( magma_cprint_matrix( A2, queue )); //visualize TESTING_CHECK( magma_cmconvert(A2, &A4, Magma_CSR, Magma_CSRL, queue )); printf("A4:\n"); TESTING_CHECK( magma_cprint_matrix( A4, queue )); TESTING_CHECK( magma_cmconvert(A4, &A5, Magma_CSR, Magma_ELL, queue )); printf("A5:\n"); TESTING_CHECK( magma_cprint_matrix( A5, queue )); // pass it to another application and back magma_int_t m, n; magma_index_t *row, *col; magmaFloatComplex *val=NULL; TESTING_CHECK( magma_ccsrget( A2, &m, &n, &row, &col, &val, queue )); TESTING_CHECK( magma_ccsrset( m, n, row, col, val, &A3, queue )); TESTING_CHECK( magma_cmdiff( A, A2, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester IO: ok\n"); else printf("%% tester IO: failed\n"); TESTING_CHECK( magma_cmdiff( A, A3, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester matrix interface: ok\n"); else printf("%% tester matrix interface: failed\n"); magma_cmfree(&A, queue ); magma_cmfree(&A2, queue ); magma_cmfree(&A4, queue ); magma_cmfree(&A5, queue ); i++; } magma_queue_destroy( queue ); TESTING_CHECK( magma_finalize() ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing sparse matrix vector product */ int main( int argc, char** argv ) { TESTING_INIT(); magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); magma_c_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL; hA_SELLP.blocksize = 8; hA_SELLP.alignment = 8; real_Double_t start, end, res; magma_int_t *pntre; magmaFloatComplex c_one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex c_zero = MAGMA_C_MAKE(0.0, 0.0); magma_int_t i, j; for( i = 1; i < argc; ++i ) { if ( strcmp("--blocksize", argv[i]) == 0 ) { hA_SELLP.blocksize = atoi( argv[++i] ); } else if ( strcmp("--alignment", argv[i]) == 0 ) { hA_SELLP.alignment = atoi( argv[++i] ); } else break; } printf( "\n# usage: ./run_cspmv" " [ --blocksize %d --alignment %d (for SELLP) ]" " matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_cm_5stencil( laplace_size, &hA, queue ); } else { // file-matrix test magma_c_csr_mtx( &hA, argv[i], queue ); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz ); real_Double_t FLOPS = 2.0*hA.nnz/1e9; magma_c_vector hx, hy, dx, dy, hrefvec, hcheck; // init CPU vectors magma_c_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue ); magma_c_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue ); // init DEV vectors magma_c_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue ); magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); #ifdef MAGMA_WITH_MKL // calling MKL with CSR pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) ); pntre[0] = 0; for (j=0; j<hA.num_rows; j++ ) { pntre[j] = hA.row[j+1]; } MKL_INT num_rows = hA.num_rows; MKL_INT num_cols = hA.num_cols; MKL_INT nnz = hA.nnz; MKL_INT *col; TESTING_MALLOC_CPU( col, MKL_INT, nnz ); for( magma_int_t t=0; t < hA.nnz; ++t ) { col[ t ] = hA.col[ t ]; } MKL_INT *row; TESTING_MALLOC_CPU( row, MKL_INT, num_rows ); for( magma_int_t t=0; t < hA.num_rows; ++t ) { row[ t ] = hA.col[ t ]; } start = magma_wtime(); for (j=0; j<10; j++ ) { mkl_ccsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); } end = magma_wtime(); printf( "\n > MKL : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); TESTING_FREE_CPU( row ); TESTING_FREE_CPU( col ); free(pntre); #endif // MAGMA_WITH_MKL // copy matrix to GPU magma_c_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); // SpMV on GPU (CSR) -- this is the reference! start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_c_spmv( c_one, dA, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_c_mfree(&dA, queue ); magma_c_vtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue ); // convert to ELL and copy to GPU magma_c_mconvert( hA, &hA_ELL, Magma_CSR, Magma_ELL, queue ); magma_c_mtransfer( hA_ELL, &dA_ELL, Magma_CPU, Magma_DEV, queue ); magma_c_mfree(&hA_ELL, queue ); magma_c_vfree( &dy, queue ); magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (ELL) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_c_spmv( c_one, dA_ELL, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard ELL).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_c_mfree(&dA_ELL, queue ); magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); if ( res < .000001 ) printf("# tester spmv ELL: ok\n"); else printf("# tester spmv ELL: failed\n"); magma_c_vfree( &hcheck, queue ); // convert to SELLP and copy to GPU magma_c_mconvert( hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue ); magma_c_mtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue ); magma_c_mfree(&hA_SELLP, queue ); magma_c_vfree( &dy, queue ); magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (SELLP) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_c_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (SELLP).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv SELL-P: ok\n"); else printf("# tester spmv SELL-P: failed\n"); magma_c_vfree( &hcheck, queue ); magma_c_mfree(&dA_SELLP, queue ); // SpMV on GPU (CUSPARSE - CSR) // CUSPARSE context // cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); magmaFloatComplex alpha = c_one; magmaFloatComplex beta = c_zero; magma_c_vfree( &dy, queue ); magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // copy matrix to GPU magma_c_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseCcsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, hA.num_rows, hA.num_cols, hA.nnz, &alpha, descr, dA.dval, dA.drow, dA.dcol, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE CSR\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); cusparseMatDescr_t descrA; cusparseStatus = cusparseCreateMatDescr(&descrA); if (cusparseStatus != 0) printf("error\n"); cusparseHybMat_t hybA; cusparseStatus = cusparseCreateHybMat( &hybA ); if (cusparseStatus != 0) printf("error\n"); magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE CSR: ok\n"); else printf("# tester spmv cuSPARSE CSR: failed\n"); magma_c_vfree( &hcheck, queue ); magma_c_vfree( &dy, queue ); magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); cusparseCcsr2hyb(cusparseHandle, hA.num_rows, hA.num_cols, descrA, dA.dval, dA.drow, dA.dcol, hybA, 0, CUSPARSE_HYB_PARTITION_AUTO); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseChybmv( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, descrA, hybA, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE HYB\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (HYB).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE HYB: ok\n"); else printf("# tester spmv cuSPARSE HYB: failed\n"); magma_c_vfree( &hcheck, queue ); cusparseDestroyMatDescr( descrA ); cusparseDestroyHybMat( hybA ); cusparseDestroy( cusparseHandle ); magma_c_mfree(&dA, queue ); printf("\n\n"); // free CPU memory magma_c_mfree(&hA, queue ); magma_c_vfree(&hx, queue ); magma_c_vfree(&hy, queue ); magma_c_vfree(&hrefvec, queue ); // free GPU memory magma_c_vfree(&dx, queue ); magma_c_vfree(&dy, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_CHECK( magma_init() ); magma_print_environment(); magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_matrix A={Magma_CSR}, B_d={Magma_CSR}; magma_c_matrix x={Magma_CSR}, b={Magma_CSR}; int i=1; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); TESTING_CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test TESTING_CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } printf( "\n# matrix info: %lld-by-%lld with %lld nonzeros\n\n", (long long) A.num_rows, (long long) A.num_cols, (long long) A.nnz ); magma_int_t n = A.num_rows; TESTING_CHECK( magma_cmtransfer( A, &B_d, Magma_CPU, Magma_DEV, queue )); // vectors and initial guess TESTING_CHECK( magma_cvinit( &b, Magma_DEV, A.num_cols, 1, zero, queue )); TESTING_CHECK( magma_cvinit( &x, Magma_DEV, A.num_cols, 1, one, queue )); TESTING_CHECK( magma_cprint_vector( b, 90, 10, queue )); TESTING_CHECK( magma_cprint_matrix( A, queue )); printf("\n\n\n"); TESTING_CHECK( magma_cprint_matrix( B_d, queue )); float res; res = magma_scnrm2( n, b.dval, 1, queue ); printf("norm0: %f\n", res); TESTING_CHECK( magma_c_spmv( one, B_d, x, zero, b, queue )); // b = A x TESTING_CHECK( magma_cprint_vector( b, 0, 100, queue )); TESTING_CHECK( magma_cprint_vector( b, b.num_rows-10, 10, queue )); res = magma_scnrm2( n, b.dval, 1, queue ); printf("norm: %f\n", res); TESTING_CHECK( magma_cresidual( B_d, x, b, &res, queue )); printf("res: %f\n", res); magma_cmfree(&B_d, queue ); magma_cmfree(&A, queue ); magma_cmfree(&x, queue ); magma_cmfree(&b, queue ); i++; } magma_queue_destroy( queue ); magma_finalize(); return info; }
magma_int_t magma_ccustomicsetup( magma_c_matrix A, magma_c_matrix b, magma_c_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; magma_c_matrix hA={Magma_CSR}; char preconditionermatrix[255]; snprintf( preconditionermatrix, sizeof(preconditionermatrix), "/Users/hanzt0114cl306/work/matrices/ani/ani7_crop_ichol.mtx" ); CHECK( magma_c_csr_mtx( &hA, preconditionermatrix , queue) ); // for CUSPARSE CHECK( magma_cmtransfer( hA, &precond->M, Magma_CPU, Magma_DEV , queue )); // copy the matrix to precond->L and (transposed) to precond->U CHECK( magma_cmtransfer(precond->M, &(precond->L), Magma_DEV, Magma_DEV, queue )); CHECK( magma_cmtranspose( precond->L, &(precond->U), queue )); // extract the diagonal of L into precond->d CHECK( magma_cjacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_cvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_cjacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_cvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseCcsrsv_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrL, precond->M.val, precond->M.row, precond->M.col, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseCcsrsv_analysis( cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrU, precond->M.val, precond->M.row, precond->M.col, precond->cuinfoU )); cleanup: cusparseDestroy( cusparseHandle ); cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseHandle=NULL; descrL=NULL; descrU=NULL; magma_cmfree( &hA, queue ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv) { TESTING_INIT(); magma_copts zopts; int i=1; magma_cparse_opts( argc, argv, &zopts, &i); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_sparse_matrix A, B, B_d; magma_c_vector x, b; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; if ( zopts.solver_par.solver != Magma_PCG && zopts.solver_par.solver != Magma_PGMRES && zopts.solver_par.solver != Magma_PBICGSTAB && zopts.solver_par.solver != Magma_ITERREF ) zopts.precond_par.solver = Magma_NONE; magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par ); while( i < argc ){ magma_c_csr_mtx( &A, argv[i] ); printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale matrix magma_cmscale( &A, zopts.scaling ); magma_c_mconvert( A, &B, Magma_CSR, zopts.output_format ); magma_c_mtransfer( B, &B_d, Magma_CPU, Magma_DEV ); // vectors and initial guess magma_c_vinit( &b, Magma_DEV, A.num_cols, one ); magma_c_vinit( &x, Magma_DEV, A.num_cols, one ); magma_c_spmv( one, B_d, x, zero, b ); // b = A x magma_c_vfree(&x); magma_c_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_c_solver( B_d, b, &x, &zopts ); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par ); magma_c_mfree(&B_d); magma_c_mfree(&B); magma_c_mfree(&A); magma_c_vfree(&x); magma_c_vfree(&b); i++; } magma_csolverinfo_free( &zopts.solver_par, &zopts.precond_par ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing sparse matrix vector product */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_queue_t queue=NULL; magma_queue_create( &queue ); magma_c_matrix hA={Magma_CSR}, hA_SELLP={Magma_CSR}, hA_ELL={Magma_CSR}, dA={Magma_CSR}, dA_SELLP={Magma_CSR}, dA_ELL={Magma_CSR}; magma_c_matrix hx={Magma_CSR}, hy={Magma_CSR}, dx={Magma_CSR}, dy={Magma_CSR}, hrefvec={Magma_CSR}, hcheck={Magma_CSR}; hA_SELLP.blocksize = 8; hA_SELLP.alignment = 8; real_Double_t start, end, res; #ifdef MAGMA_WITH_MKL magma_int_t *pntre=NULL; #endif cusparseHandle_t cusparseHandle = NULL; cusparseMatDescr_t descr = NULL; magmaFloatComplex c_one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex c_zero = MAGMA_C_MAKE(0.0, 0.0); float accuracy = 1e-10; #define PRECISION_c #if defined(PRECISION_c) accuracy = 1e-4; #endif #if defined(PRECISION_s) accuracy = 1e-4; #endif magma_int_t i, j; for( i = 1; i < argc; ++i ) { if ( strcmp("--blocksize", argv[i]) == 0 ) { hA_SELLP.blocksize = atoi( argv[++i] ); } else if ( strcmp("--alignment", argv[i]) == 0 ) { hA_SELLP.alignment = atoi( argv[++i] ); } else break; } printf("\n# usage: ./run_cspmm" " [ --blocksize %d --alignment %d (for SELLP) ]" " matrices \n\n", int(hA_SELLP.blocksize), int(hA_SELLP.alignment) ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_cm_5stencil( laplace_size, &hA, queue )); } else { // file-matrix test CHECK( magma_c_csr_mtx( &hA, argv[i], queue )); } printf("%% matrix info: %d-by-%d with %d nonzeros\n", int(hA.num_rows), int(hA.num_cols), int(hA.nnz) ); real_Double_t FLOPS = 2.0*hA.nnz/1e9; // m - number of rows for the sparse matrix // n - number of vectors to be multiplied in the SpMM product magma_int_t m, n; m = hA.num_rows; n = 48; // init CPU vectors CHECK( magma_cvinit( &hx, Magma_CPU, m, n, c_one, queue )); CHECK( magma_cvinit( &hy, Magma_CPU, m, n, c_zero, queue )); // init DEV vectors CHECK( magma_cvinit( &dx, Magma_DEV, m, n, c_one, queue )); CHECK( magma_cvinit( &dy, Magma_DEV, m, n, c_zero, queue )); // calling MKL with CSR #ifdef MAGMA_WITH_MKL CHECK( magma_imalloc_cpu( &pntre, m + 1 ) ); pntre[0] = 0; for (j=0; j < m; j++ ) { pntre[j] = hA.row[j+1]; } MKL_INT num_rows = hA.num_rows; MKL_INT num_cols = hA.num_cols; MKL_INT nnz = hA.nnz; MKL_INT num_vecs = n; MKL_INT *col; TESTING_MALLOC_CPU( col, MKL_INT, nnz ); for( magma_int_t t=0; t < hA.nnz; ++t ) { col[ t ] = hA.col[ t ]; } MKL_INT *row; TESTING_MALLOC_CPU( row, MKL_INT, num_rows ); for( magma_int_t t=0; t < hA.num_rows; ++t ) { row[ t ] = hA.col[ t ]; } // === Call MKL with consecutive SpMVs, using mkl_ccsrmv === // warmp up mkl_ccsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); start = magma_wtime(); for (j=0; j<10; j++ ) mkl_ccsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); end = magma_wtime(); printf( "\n > MKL SpMVs : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); // === Call MKL with blocked SpMVs, using mkl_ccsrmm === char transa = 'n'; MKL_INT ldb = n, ldc=n; char matdescra[6] = {'g', 'l', 'n', 'c', 'x', 'x'}; // warm up mkl_ccsrmm( &transa, &num_rows, &num_vecs, &num_cols, MKL_ADDR(&c_one), matdescra, MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), &ldb, MKL_ADDR(&c_zero), MKL_ADDR(hy.val), &ldc ); start = magma_wtime(); for (j=0; j<10; j++ ) mkl_ccsrmm( &transa, &num_rows, &num_vecs, &num_cols, MKL_ADDR(&c_one), matdescra, MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), &ldb, MKL_ADDR(&c_zero), MKL_ADDR(hy.val), &ldc ); end = magma_wtime(); printf( "\n > MKL SpMM : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10.*n/(end-start) ); TESTING_FREE_CPU( row ); TESTING_FREE_CPU( col ); row = NULL; col = NULL; #endif // MAGMA_WITH_MKL // copy matrix to GPU CHECK( magma_cmtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue )); // SpMV on GPU (CSR) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) CHECK( magma_c_spmv( c_one, dA, dx, c_zero, dy, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard CSR).\n", (end-start)/10, FLOPS*10.*n/(end-start) ); CHECK( magma_cmtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue )); magma_cmfree(&dA, queue ); // convert to SELLP and copy to GPU CHECK( magma_cmconvert( hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue )); CHECK( magma_cmtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue )); magma_cmfree(&hA_SELLP, queue ); magma_cmfree( &dy, queue ); CHECK( magma_cvinit( &dy, Magma_DEV, dx.num_rows, dx.num_cols, c_zero, queue )); // SpMV on GPU (SELLP) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) CHECK( magma_c_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (SELLP).\n", (end-start)/10, FLOPS*10.*n/(end-start) ); CHECK( magma_cmtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue )); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); printf("%% |x-y|_F = %8.2e\n", res); if ( res < accuracy ) printf("%% tester spmm SELL-P: ok\n"); else printf("%% tester spmm SELL-P: failed\n"); magma_cmfree( &hcheck, queue ); magma_cmfree(&dA_SELLP, queue ); // SpMV on GPU (CUSPARSE - CSR) // CUSPARSE context // magma_cmfree( &dy, queue ); CHECK( magma_cvinit( &dy, Magma_DEV, dx.num_rows, dx.num_cols, c_zero, queue )); //#ifdef PRECISION_d start = magma_sync_wtime( queue ); CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descr )); CHECK_CUSPARSE( cusparseSetMatType( descr, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descr, CUSPARSE_INDEX_BASE_ZERO )); magmaFloatComplex alpha = c_one; magmaFloatComplex beta = c_zero; // copy matrix to GPU CHECK( magma_cmtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue) ); for (j=0; j<10; j++) cusparseCcsrmm(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, dA.num_rows, n, dA.num_cols, dA.nnz, &alpha, descr, dA.dval, dA.drow, dA.dcol, dx.dval, dA.num_cols, &beta, dy.dval, dA.num_cols); end = magma_sync_wtime( queue ); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10*n/(end-start) ); CHECK( magma_cmtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue )); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); printf("%% |x-y|_F = %8.2e\n", res); if ( res < accuracy ) printf("%% tester spmm cuSPARSE: ok\n"); else printf("%% tester spmm cuSPARSE: failed\n"); magma_cmfree( &hcheck, queue ); cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); descr = NULL; cusparseHandle = NULL; //#endif printf("\n\n"); // free CPU memory magma_cmfree(&hA, queue ); magma_cmfree(&hx, queue ); magma_cmfree(&hy, queue ); magma_cmfree(&hrefvec, queue ); // free GPU memory magma_cmfree(&dx, queue ); magma_cmfree(&dy, queue ); magma_cmfree(&dA, queue); i++; } cleanup: #ifdef MAGMA_WITH_MKL magma_free_cpu(pntre); #endif cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); magma_cmfree(&hA, queue ); magma_cmfree(&dA, queue ); magma_cmfree(&hA_ELL, queue ); magma_cmfree(&dA_ELL, queue ); magma_cmfree(&hA_SELLP, queue ); magma_cmfree(&dA_SELLP, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_CHECK( magma_init() ); magma_print_environment(); magma_copts zopts; magma_queue_t queue; magma_queue_create( 0, &queue ); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_matrix A={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; magma_c_matrix x={Magma_CSR}, b={Magma_CSR}; int i=1; TESTING_CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; TESTING_CHECK( magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); TESTING_CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test TESTING_CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } // for the eigensolver case zopts.solver_par.ev_length = A.num_cols; TESTING_CHECK( magma_ceigensolverinfo_init( &zopts.solver_par, queue )); // scale matrix TESTING_CHECK( magma_cmscale( &A, zopts.scaling, queue )); // preconditioner if ( zopts.solver_par.solver != Magma_ITERREF ) { TESTING_CHECK( magma_c_precondsetup( A, b, &zopts.solver_par, &zopts.precond_par, queue ) ); } TESTING_CHECK( magma_cmconvert( A, &B, Magma_CSR, zopts.output_format, queue )); printf( "\n%% matrix info: %lld-by-%lld with %lld nonzeros\n\n", (long long) A.num_rows, (long long) A.num_cols, (long long) A.nnz ); printf("matrixinfo = [\n"); printf("%% size (m x n) || nonzeros (nnz) || nnz/m || stored nnz\n"); printf("%%============================================================================%%\n"); printf(" %8lld %8lld %10lld %4lld %10lld\n", (long long) B.num_rows, (long long) B.num_cols, (long long) B.true_nnz, (long long) (B.true_nnz/B.num_rows), (long long) B.nnz ); printf("%%============================================================================%%\n"); printf("];\n"); TESTING_CHECK( magma_cmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); // vectors and initial guess TESTING_CHECK( magma_cvinit( &b, Magma_DEV, A.num_rows, 1, one, queue )); //magma_cvinit( &x, Magma_DEV, A.num_cols, 1, one, queue ); //magma_c_spmv( one, B_d, x, zero, b, queue ); // b = A x //magma_cmfree(&x, queue ); TESTING_CHECK( magma_cvinit( &x, Magma_DEV, A.num_cols, 1, zero, queue )); info = magma_c_solver( B_d, b, &x, &zopts, queue ); if( info != 0 ) { printf("%%error: solver returned: %s (%lld).\n", magma_strerror( info ), (long long) info ); } printf("convergence = [\n"); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); printf("];\n\n"); zopts.solver_par.verbose = 0; printf("solverinfo = [\n"); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); printf("];\n\n"); printf("precondinfo = [\n"); printf("%% setup runtime\n"); printf(" %.6f %.6f\n", zopts.precond_par.setuptime, zopts.precond_par.runtime ); printf("];\n\n"); magma_cmfree(&B_d, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&x, queue ); magma_cmfree(&b, queue ); i++; } magma_queue_destroy( queue ); TESTING_CHECK( magma_finalize() ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_copts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_matrix A={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; magma_c_matrix x={Magma_CSR}, b={Magma_CSR}, t={Magma_CSR}; magma_c_matrix x1={Magma_CSR}, x2={Magma_CSR}; //Chronometry real_Double_t tempo1, tempo2; int i=1; CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; CHECK( magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } printf( "\n%% matrix info: %d-by-%d with %d nonzeros\n\n", int(A.num_rows), int(A.num_cols), int(A.nnz) ); // for the eigensolver case zopts.solver_par.ev_length = A.num_rows; CHECK( magma_ceigensolverinfo_init( &zopts.solver_par, queue )); // scale matrix CHECK( magma_cmscale( &A, zopts.scaling, queue )); CHECK( magma_cmconvert( A, &B, Magma_CSR, zopts.output_format, queue )); CHECK( magma_cmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); // vectors and initial guess CHECK( magma_cvinit( &b, Magma_DEV, A.num_cols, 1, one, queue )); CHECK( magma_cvinit( &x, Magma_DEV, A.num_cols, 1, zero, queue )); CHECK( magma_cvinit( &t, Magma_DEV, A.num_cols, 1, zero, queue )); CHECK( magma_cvinit( &x1, Magma_DEV, A.num_cols, 1, zero, queue )); CHECK( magma_cvinit( &x2, Magma_DEV, A.num_cols, 1, zero, queue )); //preconditioner CHECK( magma_c_precondsetup( B_d, b, &zopts.solver_par, &zopts.precond_par, queue ) ); float residual; CHECK( magma_cresidual( B_d, b, x, &residual, queue )); zopts.solver_par.init_res = residual; printf("data = [\n"); printf("%%runtime left preconditioner:\n"); tempo1 = magma_sync_wtime( queue ); info = magma_c_applyprecond_left( MagmaNoTrans, B_d, b, &x1, &zopts.precond_par, queue ); tempo2 = magma_sync_wtime( queue ); if( info != 0 ){ printf("error: preconditioner returned: %s (%d).\n", magma_strerror( info ), int(info) ); } CHECK( magma_cresidual( B_d, b, x1, &residual, queue )); printf("%.8e %.8e\n", tempo2-tempo1, residual ); printf("%%runtime right preconditioner:\n"); tempo1 = magma_sync_wtime( queue ); info = magma_c_applyprecond_right( MagmaNoTrans, B_d, b, &x2, &zopts.precond_par, queue ); tempo2 = magma_sync_wtime( queue ); if( info != 0 ){ printf("error: preconditioner returned: %s (%d).\n", magma_strerror( info ), int(info) ); } CHECK( magma_cresidual( B_d, b, x2, &residual, queue )); printf("%.8e %.8e\n", tempo2-tempo1, residual ); printf("];\n"); info = magma_c_applyprecond_left( MagmaNoTrans, B_d, b, &t, &zopts.precond_par, queue ); info = magma_c_applyprecond_right( MagmaNoTrans, B_d, t, &x, &zopts.precond_par, queue ); CHECK( magma_cresidual( B_d, b, x, &residual, queue )); zopts.solver_par.final_res = residual; magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); magma_cmfree(&B_d, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&x, queue ); magma_cmfree(&x1, queue ); magma_cmfree(&x2, queue ); magma_cmfree(&b, queue ); magma_cmfree(&t, queue ); i++; } cleanup: magma_cmfree(&B_d, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&x, queue ); magma_cmfree(&x1, queue ); magma_cmfree(&x2, queue ); magma_cmfree(&b, queue ); magma_cmfree(&t, queue ); magma_csolverinfo_free( &zopts.solver_par, &zopts.precond_par, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- running magma_clobpcg */ int main( int argc, char** argv) { TESTING_INIT(); magma_c_solver_par solver_par; solver_par.epsilon = 1e-5; solver_par.maxiter = 1000; solver_par.verbose = 0; solver_par.num_eigenvalues = 32; solver_par.solver = Magma_LOBPCG; magma_c_preconditioner precond_par; precond_par.solver = Magma_JACOBI; int precond = 0; int format = 0; int scale = 0; magma_scale_t scaling = Magma_NOSCALE; magma_c_sparse_matrix A, B, dA; B.blocksize = 8; B.alignment = 8; B.storage_type = Magma_CSR; int i; for( i = 1; i < argc; ++i ) { if ( strcmp("--format", argv[i]) == 0 ) { format = atoi( argv[++i] ); switch( format ) { case 0: B.storage_type = Magma_CSR; break; case 1: B.storage_type = Magma_ELL; break; case 2: B.storage_type = Magma_ELLRT; break; case 3: B.storage_type = Magma_SELLP; break; } }else if ( strcmp("--mscale", argv[i]) == 0 ) { scale = atoi( argv[++i] ); switch( scale ) { case 0: scaling = Magma_NOSCALE; break; case 1: scaling = Magma_UNITDIAG; break; case 2: scaling = Magma_UNITROW; break; } }else if ( strcmp("--precond", argv[i]) == 0 ) { format = atoi( argv[++i] ); switch( precond ) { case 0: precond_par.solver = Magma_JACOBI; break; } }else if ( strcmp("--blocksize", argv[i]) == 0 ) { B.blocksize = atoi( argv[++i] ); }else if ( strcmp("--alignment", argv[i]) == 0 ) { B.alignment = atoi( argv[++i] ); }else if ( strcmp("--verbose", argv[i]) == 0 ) { solver_par.verbose = atoi( argv[++i] ); } else if ( strcmp("--maxiter", argv[i]) == 0 ) { solver_par.maxiter = atoi( argv[++i] ); } else if ( strcmp("--tol", argv[i]) == 0 ) { sscanf( argv[++i], "%f", &solver_par.epsilon ); } else if ( strcmp("--eigenvalues", argv[i]) == 0 ) { solver_par.num_eigenvalues = atoi( argv[++i] ); } else break; } printf( "\n# usage: ./run_clobpcg" " [ --format %d (0=CSR, 1=ELL, 2=ELLRT, 4=SELLP)" " [ --blocksize %d --alignment %d ]" " --mscale %d (0=no, 1=unitdiag, 2=unitrownrm)" " --verbose %d (0=summary, k=details every k iterations)" " --maxiter %d --tol %.2e" " --preconditioner %d (0=Jacobi) " " --eigenvalues %d ]" " matrices \n\n", format, (int) B.blocksize, (int) B.alignment, (int) scale, (int) solver_par.verbose, (int) solver_par.maxiter, solver_par.epsilon, precond, (int) solver_par.num_eigenvalues); while( i < argc ){ magma_c_csr_mtx( &A, argv[i] ); printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale initial guess magma_cmscale( &A, scaling ); solver_par.ev_length = A.num_cols; magma_c_sparse_matrix A2; A2.storage_type = Magma_SELLC; A2.blocksize = 8; A2.alignment = 4; magma_c_mconvert( A, &A2, Magma_CSR, A2.storage_type ); // copy matrix to GPU magma_c_mtransfer( A2, &dA, Magma_CPU, Magma_DEV); magma_csolverinfo_init( &solver_par, &precond_par ); // inside the loop! // as the matrix size has influence on the EV-length real_Double_t gpu_time; // Find the blockSize smallest eigenvalues and corresponding eigen-vectors gpu_time = magma_wtime(); magma_clobpcg( dA, &solver_par ); gpu_time = magma_wtime() - gpu_time; printf("Time (sec) = %7.2f\n", gpu_time); printf("solver runtime (sec) = %7.2f\n", solver_par.runtime ); magma_csolverinfo_free( &solver_par, &precond_par ); magma_c_mfree( &dA ); magma_c_mfree( &A2 ); magma_c_mfree( &A ); i++; } TESTING_FINALIZE(); return 0; }