/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_copts zopts; magma_queue_t queue=NULL; magma_queue_create( /*devices[ opts->device ],*/ &queue ); real_Double_t res; magma_c_matrix A={Magma_CSR}, AT={Magma_CSR}, A2={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; int i=1; real_Double_t start, end; CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale matrix CHECK( magma_cmscale( &A, zopts.scaling, queue )); // remove nonzeros in matrix start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_cmcsrcompressor( &A, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA CPU: %.2e seconds.\n", (end-start)/10 ); // transpose CHECK( magma_cmtranspose( A, &AT, queue )); // convert, copy back and forth to check everything works CHECK( magma_cmconvert( AT, &B, Magma_CSR, Magma_CSR, queue )); magma_cmfree(&AT, queue ); CHECK( magma_cmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); magma_cmfree(&B, queue ); start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_cmcsrcompressor_gpu( &B_d, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA GPU: %.2e seconds.\n", (end-start)/10 ); CHECK( magma_cmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue )); magma_cmfree(&B_d, queue ); CHECK( magma_cmconvert( B, &AT, Magma_CSR, Magma_CSR, queue )); magma_cmfree(&B, queue ); // transpose back CHECK( magma_cmtranspose( AT, &A2, queue )); magma_cmfree(&AT, queue ); CHECK( magma_cmdiff( A, A2, &res, queue )); printf("# ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester matrix compressor: ok\n"); else printf("# tester matrix compressor: failed\n"); magma_cmfree(&A, queue ); magma_cmfree(&A2, queue ); i++; } cleanup: magma_cmfree(&AT, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&A2, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { TESTING_INIT(); magma_copts zopts; magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); int i=1; magma_cparse_opts( argc, argv, &zopts, &i, queue ); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_sparse_matrix A, B, B_d; magma_c_vector x, b; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; if ( zopts.solver_par.solver != Magma_PCG && zopts.solver_par.solver != Magma_PGMRES && zopts.solver_par.solver != Magma_PBICGSTAB && zopts.solver_par.solver != Magma_ITERREF ) zopts.precond_par.solver = Magma_NONE; magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par, queue ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_cm_5stencil( laplace_size, &A, queue ); } else { // file-matrix test magma_c_csr_mtx( &A, argv[i], queue ); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // for the eigensolver case zopts.solver_par.ev_length = A.num_rows; magma_ceigensolverinfo_init( &zopts.solver_par, queue ); // scale matrix magma_cmscale( &A, zopts.scaling, queue ); magma_c_mconvert( A, &B, Magma_CSR, zopts.output_format, queue ); magma_c_mtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ); // vectors and initial guess magma_c_vinit( &b, Magma_DEV, A.num_cols, one, queue ); magma_c_vinit( &x, Magma_DEV, A.num_cols, one, queue ); magma_c_spmv( one, B_d, x, zero, b, queue ); // b = A x magma_c_vfree(&x, queue ); magma_c_vinit( &x, Magma_DEV, A.num_cols, zero, queue ); magma_c_solver( B_d, b, &x, &zopts, queue ); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); magma_c_mfree(&B_d, queue ); magma_c_mfree(&B, queue ); magma_c_mfree(&A, queue ); magma_c_vfree(&x, queue ); magma_c_vfree(&b, queue ); i++; } magma_csolverinfo_free( &zopts.solver_par, &zopts.precond_par, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
extern "C" magma_int_t magma_cmscale( magma_c_sparse_matrix *A, magma_scale_t scaling, magma_queue_t queue ) { if ( A->memory_location == Magma_CPU && A->storage_type == Magma_CSRCOO ) { if ( scaling == Magma_NOSCALE ) { // no scale ; } else if ( scaling == Magma_UNITROW ) { // scale to unit rownorm magmaFloatComplex *tmp; magma_cmalloc_cpu( &tmp, A->num_rows ); for( magma_int_t z=0; z<A->num_rows; z++ ) { magmaFloatComplex s = MAGMA_C_MAKE( 0.0, 0.0 ); for( magma_int_t f=A->row[z]; f<A->row[z+1]; f++ ) s+= MAGMA_C_REAL(A->val[f])*MAGMA_C_REAL(A->val[f]); tmp[z] = MAGMA_C_MAKE( 1.0/sqrt( MAGMA_C_REAL( s ) ), 0.0 ); } for( magma_int_t z=0; z<A->nnz; z++ ) { A->val[z] = A->val[z] * tmp[A->col[z]] * tmp[A->rowidx[z]]; } magma_free_cpu( tmp ); } else if (scaling == Magma_UNITDIAG ) { // scale to unit diagonal magmaFloatComplex *tmp; magma_cmalloc_cpu( &tmp, A->num_rows ); for( magma_int_t z=0; z<A->num_rows; z++ ) { magmaFloatComplex s = MAGMA_C_MAKE( 0.0, 0.0 ); for( magma_int_t f=A->row[z]; f<A->row[z+1]; f++ ) { if ( A->col[f]== z ) { // add some identity matrix //A->val[f] = A->val[f] + MAGMA_C_MAKE( 100000.0, 0.0 ); s = A->val[f]; } } if ( s == MAGMA_C_MAKE( 0.0, 0.0 ) ) printf("error: zero diagonal element.\n"); tmp[z] = MAGMA_C_MAKE( 1.0/sqrt( MAGMA_C_REAL( s ) ), 0.0 ); } for( magma_int_t z=0; z<A->nnz; z++ ) { A->val[z] = A->val[z] * tmp[A->col[z]] * tmp[A->rowidx[z]]; } magma_free_cpu( tmp ); } else printf( "error: scaling not supported\n" ); return MAGMA_SUCCESS; } else { magma_c_sparse_matrix hA, CSRA; magma_storage_t A_storage = A->storage_type; magma_location_t A_location = A->memory_location; magma_c_mtransfer( *A, &hA, A->memory_location, Magma_CPU, queue ); magma_c_mconvert( hA, &CSRA, hA.storage_type, Magma_CSRCOO, queue ); magma_cmscale( &CSRA, scaling, queue ); magma_c_mfree( &hA, queue ); magma_c_mfree( A, queue ); magma_c_mconvert( CSRA, &hA, Magma_CSRCOO, A_storage, queue ); magma_c_mtransfer( hA, A, Magma_CPU, A_location, queue ); magma_c_mfree( &hA, queue ); magma_c_mfree( &CSRA, queue ); return MAGMA_SUCCESS; } }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv) { TESTING_INIT(); magma_copts zopts; int i=1; magma_cparse_opts( argc, argv, &zopts, &i); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_sparse_matrix A, B, B_d; magma_c_vector x, b; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; if ( zopts.solver_par.solver != Magma_PCG && zopts.solver_par.solver != Magma_PGMRES && zopts.solver_par.solver != Magma_PBICGSTAB && zopts.solver_par.solver != Magma_ITERREF ) zopts.precond_par.solver = Magma_NONE; magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par ); while( i < argc ){ magma_c_csr_mtx( &A, argv[i] ); printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale matrix magma_cmscale( &A, zopts.scaling ); magma_c_mconvert( A, &B, Magma_CSR, zopts.output_format ); magma_c_mtransfer( B, &B_d, Magma_CPU, Magma_DEV ); // vectors and initial guess magma_c_vinit( &b, Magma_DEV, A.num_cols, one ); magma_c_vinit( &x, Magma_DEV, A.num_cols, one ); magma_c_spmv( one, B_d, x, zero, b ); // b = A x magma_c_vfree(&x); magma_c_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_c_solver( B_d, b, &x, &zopts ); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par ); magma_c_mfree(&B_d); magma_c_mfree(&B); magma_c_mfree(&A); magma_c_vfree(&x); magma_c_vfree(&b); i++; } magma_csolverinfo_free( &zopts.solver_par, &zopts.precond_par ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_CHECK( magma_init() ); magma_print_environment(); magma_copts zopts; magma_queue_t queue; magma_queue_create( 0, &queue ); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_matrix A={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; magma_c_matrix x={Magma_CSR}, b={Magma_CSR}; int i=1; TESTING_CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; TESTING_CHECK( magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); TESTING_CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test TESTING_CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } // for the eigensolver case zopts.solver_par.ev_length = A.num_cols; TESTING_CHECK( magma_ceigensolverinfo_init( &zopts.solver_par, queue )); // scale matrix TESTING_CHECK( magma_cmscale( &A, zopts.scaling, queue )); // preconditioner if ( zopts.solver_par.solver != Magma_ITERREF ) { TESTING_CHECK( magma_c_precondsetup( A, b, &zopts.solver_par, &zopts.precond_par, queue ) ); } TESTING_CHECK( magma_cmconvert( A, &B, Magma_CSR, zopts.output_format, queue )); printf( "\n%% matrix info: %lld-by-%lld with %lld nonzeros\n\n", (long long) A.num_rows, (long long) A.num_cols, (long long) A.nnz ); printf("matrixinfo = [\n"); printf("%% size (m x n) || nonzeros (nnz) || nnz/m || stored nnz\n"); printf("%%============================================================================%%\n"); printf(" %8lld %8lld %10lld %4lld %10lld\n", (long long) B.num_rows, (long long) B.num_cols, (long long) B.true_nnz, (long long) (B.true_nnz/B.num_rows), (long long) B.nnz ); printf("%%============================================================================%%\n"); printf("];\n"); TESTING_CHECK( magma_cmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); // vectors and initial guess TESTING_CHECK( magma_cvinit( &b, Magma_DEV, A.num_rows, 1, one, queue )); //magma_cvinit( &x, Magma_DEV, A.num_cols, 1, one, queue ); //magma_c_spmv( one, B_d, x, zero, b, queue ); // b = A x //magma_cmfree(&x, queue ); TESTING_CHECK( magma_cvinit( &x, Magma_DEV, A.num_cols, 1, zero, queue )); info = magma_c_solver( B_d, b, &x, &zopts, queue ); if( info != 0 ) { printf("%%error: solver returned: %s (%lld).\n", magma_strerror( info ), (long long) info ); } printf("convergence = [\n"); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); printf("];\n\n"); zopts.solver_par.verbose = 0; printf("solverinfo = [\n"); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); printf("];\n\n"); printf("precondinfo = [\n"); printf("%% setup runtime\n"); printf(" %.6f %.6f\n", zopts.precond_par.setuptime, zopts.precond_par.runtime ); printf("];\n\n"); magma_cmfree(&B_d, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&x, queue ); magma_cmfree(&b, queue ); i++; } magma_queue_destroy( queue ); TESTING_CHECK( magma_finalize() ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_copts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_matrix A={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; magma_c_matrix x={Magma_CSR}, b={Magma_CSR}, t={Magma_CSR}; magma_c_matrix x1={Magma_CSR}, x2={Magma_CSR}; //Chronometry real_Double_t tempo1, tempo2; int i=1; CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; CHECK( magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } printf( "\n%% matrix info: %d-by-%d with %d nonzeros\n\n", int(A.num_rows), int(A.num_cols), int(A.nnz) ); // for the eigensolver case zopts.solver_par.ev_length = A.num_rows; CHECK( magma_ceigensolverinfo_init( &zopts.solver_par, queue )); // scale matrix CHECK( magma_cmscale( &A, zopts.scaling, queue )); CHECK( magma_cmconvert( A, &B, Magma_CSR, zopts.output_format, queue )); CHECK( magma_cmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); // vectors and initial guess CHECK( magma_cvinit( &b, Magma_DEV, A.num_cols, 1, one, queue )); CHECK( magma_cvinit( &x, Magma_DEV, A.num_cols, 1, zero, queue )); CHECK( magma_cvinit( &t, Magma_DEV, A.num_cols, 1, zero, queue )); CHECK( magma_cvinit( &x1, Magma_DEV, A.num_cols, 1, zero, queue )); CHECK( magma_cvinit( &x2, Magma_DEV, A.num_cols, 1, zero, queue )); //preconditioner CHECK( magma_c_precondsetup( B_d, b, &zopts.solver_par, &zopts.precond_par, queue ) ); float residual; CHECK( magma_cresidual( B_d, b, x, &residual, queue )); zopts.solver_par.init_res = residual; printf("data = [\n"); printf("%%runtime left preconditioner:\n"); tempo1 = magma_sync_wtime( queue ); info = magma_c_applyprecond_left( MagmaNoTrans, B_d, b, &x1, &zopts.precond_par, queue ); tempo2 = magma_sync_wtime( queue ); if( info != 0 ){ printf("error: preconditioner returned: %s (%d).\n", magma_strerror( info ), int(info) ); } CHECK( magma_cresidual( B_d, b, x1, &residual, queue )); printf("%.8e %.8e\n", tempo2-tempo1, residual ); printf("%%runtime right preconditioner:\n"); tempo1 = magma_sync_wtime( queue ); info = magma_c_applyprecond_right( MagmaNoTrans, B_d, b, &x2, &zopts.precond_par, queue ); tempo2 = magma_sync_wtime( queue ); if( info != 0 ){ printf("error: preconditioner returned: %s (%d).\n", magma_strerror( info ), int(info) ); } CHECK( magma_cresidual( B_d, b, x2, &residual, queue )); printf("%.8e %.8e\n", tempo2-tempo1, residual ); printf("];\n"); info = magma_c_applyprecond_left( MagmaNoTrans, B_d, b, &t, &zopts.precond_par, queue ); info = magma_c_applyprecond_right( MagmaNoTrans, B_d, t, &x, &zopts.precond_par, queue ); CHECK( magma_cresidual( B_d, b, x, &residual, queue )); zopts.solver_par.final_res = residual; magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); magma_cmfree(&B_d, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&x, queue ); magma_cmfree(&x1, queue ); magma_cmfree(&x2, queue ); magma_cmfree(&b, queue ); magma_cmfree(&t, queue ); i++; } cleanup: magma_cmfree(&B_d, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&x, queue ); magma_cmfree(&x1, queue ); magma_cmfree(&x2, queue ); magma_cmfree(&b, queue ); magma_cmfree(&t, queue ); magma_csolverinfo_free( &zopts.solver_par, &zopts.precond_par, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- running magma_clobpcg */ int main( int argc, char** argv) { TESTING_INIT(); magma_c_solver_par solver_par; solver_par.epsilon = 1e-5; solver_par.maxiter = 1000; solver_par.verbose = 0; solver_par.num_eigenvalues = 32; solver_par.solver = Magma_LOBPCG; magma_c_preconditioner precond_par; precond_par.solver = Magma_JACOBI; int precond = 0; int format = 0; int scale = 0; magma_scale_t scaling = Magma_NOSCALE; magma_c_sparse_matrix A, B, dA; B.blocksize = 8; B.alignment = 8; B.storage_type = Magma_CSR; int i; for( i = 1; i < argc; ++i ) { if ( strcmp("--format", argv[i]) == 0 ) { format = atoi( argv[++i] ); switch( format ) { case 0: B.storage_type = Magma_CSR; break; case 1: B.storage_type = Magma_ELL; break; case 2: B.storage_type = Magma_ELLRT; break; case 3: B.storage_type = Magma_SELLP; break; } }else if ( strcmp("--mscale", argv[i]) == 0 ) { scale = atoi( argv[++i] ); switch( scale ) { case 0: scaling = Magma_NOSCALE; break; case 1: scaling = Magma_UNITDIAG; break; case 2: scaling = Magma_UNITROW; break; } }else if ( strcmp("--precond", argv[i]) == 0 ) { format = atoi( argv[++i] ); switch( precond ) { case 0: precond_par.solver = Magma_JACOBI; break; } }else if ( strcmp("--blocksize", argv[i]) == 0 ) { B.blocksize = atoi( argv[++i] ); }else if ( strcmp("--alignment", argv[i]) == 0 ) { B.alignment = atoi( argv[++i] ); }else if ( strcmp("--verbose", argv[i]) == 0 ) { solver_par.verbose = atoi( argv[++i] ); } else if ( strcmp("--maxiter", argv[i]) == 0 ) { solver_par.maxiter = atoi( argv[++i] ); } else if ( strcmp("--tol", argv[i]) == 0 ) { sscanf( argv[++i], "%f", &solver_par.epsilon ); } else if ( strcmp("--eigenvalues", argv[i]) == 0 ) { solver_par.num_eigenvalues = atoi( argv[++i] ); } else break; } printf( "\n# usage: ./run_clobpcg" " [ --format %d (0=CSR, 1=ELL, 2=ELLRT, 4=SELLP)" " [ --blocksize %d --alignment %d ]" " --mscale %d (0=no, 1=unitdiag, 2=unitrownrm)" " --verbose %d (0=summary, k=details every k iterations)" " --maxiter %d --tol %.2e" " --preconditioner %d (0=Jacobi) " " --eigenvalues %d ]" " matrices \n\n", format, (int) B.blocksize, (int) B.alignment, (int) scale, (int) solver_par.verbose, (int) solver_par.maxiter, solver_par.epsilon, precond, (int) solver_par.num_eigenvalues); while( i < argc ){ magma_c_csr_mtx( &A, argv[i] ); printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale initial guess magma_cmscale( &A, scaling ); solver_par.ev_length = A.num_cols; magma_c_sparse_matrix A2; A2.storage_type = Magma_SELLC; A2.blocksize = 8; A2.alignment = 4; magma_c_mconvert( A, &A2, Magma_CSR, A2.storage_type ); // copy matrix to GPU magma_c_mtransfer( A2, &dA, Magma_CPU, Magma_DEV); magma_csolverinfo_init( &solver_par, &precond_par ); // inside the loop! // as the matrix size has influence on the EV-length real_Double_t gpu_time; // Find the blockSize smallest eigenvalues and corresponding eigen-vectors gpu_time = magma_wtime(); magma_clobpcg( dA, &solver_par ); gpu_time = magma_wtime() - gpu_time; printf("Time (sec) = %7.2f\n", gpu_time); printf("solver runtime (sec) = %7.2f\n", solver_par.runtime ); magma_csolverinfo_free( &solver_par, &precond_par ); magma_c_mfree( &dA ); magma_c_mfree( &A2 ); magma_c_mfree( &A ); i++; } TESTING_FINALIZE(); return 0; }