extern "C" magma_int_t magma_cmdiagadd( magma_c_sparse_matrix *A, magmaFloatComplex add, magma_queue_t queue ) { if ( A->memory_location == Magma_CPU && A->storage_type == Magma_CSRCOO ) { for( magma_int_t z=0; z<A->nnz; z++ ) { if ( A->col[z]== A->rowidx[z] ) { // add some identity matrix A->val[z] = A->val[z] + add; } } return MAGMA_SUCCESS; } else { magma_c_sparse_matrix hA, CSRA; magma_storage_t A_storage = A->storage_type; magma_location_t A_location = A->memory_location; magma_c_mtransfer( *A, &hA, A->memory_location, Magma_CPU, queue ); magma_c_mconvert( hA, &CSRA, hA.storage_type, Magma_CSRCOO, queue ); magma_cmdiagadd( &CSRA, add, queue ); magma_c_mfree( &hA, queue ); magma_c_mfree( A, queue ); magma_c_mconvert( CSRA, &hA, Magma_CSRCOO, A_storage, queue ); magma_c_mtransfer( hA, A, Magma_CPU, A_location, queue ); magma_c_mfree( &hA, queue ); magma_c_mfree( &CSRA, queue ); return MAGMA_SUCCESS; } }
magma_int_t magma_cbaiter( magma_c_sparse_matrix A, magma_c_vector b, magma_c_vector *x, magma_c_solver_par *solver_par ) { // prepare solver feedback solver_par->solver = Magma_BAITER; solver_par->info = 0; magma_c_sparse_matrix A_d, D, R, D_d, R_d; magma_c_mtransfer( A, &A_d, Magma_CPU, Magma_DEV ); // initial residual real_Double_t tempo1, tempo2; float residual; magma_cresidual( A_d, b, *x, &residual ); solver_par->init_res = residual; solver_par->res_vec = NULL; solver_par->timing = NULL; // setup magma_ccsrsplit( 256, A, &D, &R ); magma_c_mtransfer( D, &D_d, Magma_CPU, Magma_DEV ); magma_c_mtransfer( R, &R_d, Magma_CPU, Magma_DEV ); magma_int_t localiter = 1; magma_device_sync(); tempo1=magma_wtime(); // block-asynchronous iteration iterator for( int iter=0; iter<solver_par->maxiter; iter++) magma_cbajac_csr( localiter, D_d, R_d, b, x ); magma_device_sync(); tempo2=magma_wtime(); solver_par->runtime = (real_Double_t) tempo2-tempo1; magma_cresidual( A_d, b, *x, &residual ); solver_par->final_res = residual; solver_par->numiter = solver_par->maxiter; if( solver_par->init_res > solver_par->final_res ) solver_par->info = 0; else solver_par->info = -1; magma_c_mfree(&D); magma_c_mfree(&R); magma_c_mfree(&D_d); magma_c_mfree(&R_d); magma_c_mfree(&A_d); return MAGMA_SUCCESS; } /* magma_cbaiter */
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { TESTING_INIT(); magma_copts zopts; magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); int i=1; magma_cparse_opts( argc, argv, &zopts, &i, queue ); real_Double_t res; magma_c_sparse_matrix Z, Z2, A, A2, AT, AT2, B; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_cm_5stencil( laplace_size, &Z, queue ); } else { // file-matrix test magma_c_csr_mtx( &Z, argv[i], queue ); } printf( "# matrix info: %d-by-%d with %d nonzeros\n", (int) Z.num_rows,(int) Z.num_cols,(int) Z.nnz ); // convert to be non-symmetric magma_c_mconvert( Z, &A, Magma_CSR, Magma_CSRL, queue ); magma_c_mconvert( Z, &B, Magma_CSR, Magma_CSRU, queue ); // transpose magma_c_mtranspose( A, &AT, queue ); // quite some conversions //ELL magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_ELL, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_ELL, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //ELLPACKT magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_ELLPACKT, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_ELLPACKT, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //ELLRT AT2.blocksize = 8; AT2.alignment = 8; magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_ELLRT, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_ELLRT, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //SELLP AT2.blocksize = 8; AT2.alignment = 8; magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_SELLP, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_SELLP, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //ELLD magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_ELLD, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_ELLD, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //CSRCOO magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_CSRCOO, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_CSRCOO, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //CSRD magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_CSRD, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_CSRD, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); //BCSR magma_c_mconvert( AT, &AT2, Magma_CSR, Magma_BCSR, queue ); magma_c_mfree(&AT, queue ); magma_c_mconvert( AT2, &AT, Magma_BCSR, Magma_CSR, queue ); magma_c_mfree(&AT2, queue ); // transpose magma_c_mtranspose( AT, &A2, queue ); magma_cmdiff( A, A2, &res, queue); printf("# ||A-A2||_F = %8.2e\n", res); if ( res < .000001 ) printf("# conversion tester: ok\n"); else printf("# conversion tester: failed\n"); magma_cmlumerge( A2, B, &Z2, queue ); magma_cmdiff( Z, Z2, &res, queue); printf("# ||Z-Z2||_F = %8.2e\n", res); if ( res < .000001 ) printf("# LUmerge tester: ok\n"); else printf("# LUmerge tester: failed\n"); magma_c_mfree(&A, queue ); magma_c_mfree(&A2, queue ); magma_c_mfree(&AT, queue ); magma_c_mfree(&AT2, queue ); magma_c_mfree(&B, queue ); magma_c_mfree(&Z2, queue ); magma_c_mfree(&Z, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { TESTING_INIT(); magma_copts zopts; magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); int i=1; magma_cparse_opts( argc, argv, &zopts, &i, queue ); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_sparse_matrix A, B, B_d; magma_c_vector x, b; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; if ( zopts.solver_par.solver != Magma_PCG && zopts.solver_par.solver != Magma_PGMRES && zopts.solver_par.solver != Magma_PBICGSTAB && zopts.solver_par.solver != Magma_ITERREF ) zopts.precond_par.solver = Magma_NONE; magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par, queue ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_cm_5stencil( laplace_size, &A, queue ); } else { // file-matrix test magma_c_csr_mtx( &A, argv[i], queue ); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // for the eigensolver case zopts.solver_par.ev_length = A.num_rows; magma_ceigensolverinfo_init( &zopts.solver_par, queue ); // scale matrix magma_cmscale( &A, zopts.scaling, queue ); magma_c_mconvert( A, &B, Magma_CSR, zopts.output_format, queue ); magma_c_mtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ); // vectors and initial guess magma_c_vinit( &b, Magma_DEV, A.num_cols, one, queue ); magma_c_vinit( &x, Magma_DEV, A.num_cols, one, queue ); magma_c_spmv( one, B_d, x, zero, b, queue ); // b = A x magma_c_vfree(&x, queue ); magma_c_vinit( &x, Magma_DEV, A.num_cols, zero, queue ); magma_c_solver( B_d, b, &x, &zopts, queue ); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); magma_c_mfree(&B_d, queue ); magma_c_mfree(&B, queue ); magma_c_mfree(&A, queue ); magma_c_vfree(&x, queue ); magma_c_vfree(&b, queue ); i++; } magma_csolverinfo_free( &zopts.solver_par, &zopts.precond_par, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
magma_int_t magma_cpastixsetup( magma_c_sparse_matrix A, magma_c_vector b, magma_c_preconditioner *precond ){ #if defined(HAVE_PASTIX) #if defined(PRECISION_d) pastix_data_t *pastix_data = NULL; /* Pointer to a storage structure needed by pastix */ pastix_int_t ncol; /* Size of the matrix */ pastix_int_t *colptr = NULL; /* Indexes of first element of each column in row and values */ pastix_int_t *rows = NULL; /* Row of each element of the matrix */ pastix_float_t *values = NULL; /* Value of each element of the matrix */ pastix_float_t *rhs = NULL; /* right hand side */ pastix_int_t *iparm = NULL; /* integer parameters for pastix */ float *dparm = NULL; /* floating parameters for pastix */ pastix_int_t *perm = NULL; /* Permutation tabular */ pastix_int_t *invp = NULL; /* Reverse permutation tabular */ pastix_int_t mat_type; magma_c_sparse_matrix A_h1, B; magma_c_vector diag, c_t, b_h; magma_c_vinit( &c_t, Magma_CPU, A.num_rows, MAGMA_C_ZERO ); magma_c_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_C_ZERO ); magma_c_vtransfer( b, &b_h, A.memory_location, Magma_CPU); if( A.storage_type != Magma_CSR ){ magma_c_mtransfer( A, &A_h1, A.memory_location, Magma_CPU); magma_c_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR); } else{ magma_c_mtransfer( A, &B, A.memory_location, Magma_CPU); } rhs = (pastix_float_t*) b_h.val; ncol = B.num_rows; colptr = B.row; rows = B.col; values = (pastix_float_t*) B.val; mat_type = API_SYM_NO; iparm = (pastix_int_t*)malloc(IPARM_SIZE*sizeof(pastix_int_t)); dparm = (pastix_float_t*)malloc(DPARM_SIZE*sizeof(pastix_float_t)); /*******************************************/ /* Initialize parameters to default values */ /*******************************************/ iparm[IPARM_MODIFY_PARAMETER] = API_NO; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhs, 1, iparm, dparm); iparm[IPARM_THREAD_NBR] = 16; iparm[IPARM_SYM] = mat_type; iparm[IPARM_FACTORIZATION] = API_FACT_LU; iparm[IPARM_VERBOSE] = API_VERBOSE_YES; iparm[IPARM_ORDERING] = API_ORDER_SCOTCH; iparm[IPARM_INCOMPLETE] = API_NO; iparm[IPARM_RHS_MAKING] = API_RHS_B; //iparm[IPARM_AMALGAMATION] = 5; iparm[IPARM_LEVEL_OF_FILL] = 0; /* if (incomplete == 1) { dparm[DPARM_EPSILON_REFINEMENT] = 1e-7; } */ /* * Matrix needs : * - to be in fortran numbering * - to have only the lower triangular part in symmetric case * - to have a graph with a symmetric structure in unsymmetric case * If those criteria are not matched, the csc will be reallocated and changed. */ iparm[IPARM_MATRIX_VERIFICATION] = API_YES; perm = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t)); invp = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t)); /*******************************************/ /* Step 1 - Ordering / Scotch */ /* Perform it only when the pattern of */ /* matrix change. */ /* eg: mesh refinement */ /* In many cases users can simply go from */ /* API_TASK_ORDERING to API_TASK_ANALYSE */ /* in one call. */ /*******************************************/ /*******************************************/ /* Step 2 - Symbolic factorization */ /* Perform it only when the pattern of */ /* matrix change. */ /*******************************************/ /*******************************************/ /* Step 3 - Mapping and Compute scheduling */ /* Perform it only when the pattern of */ /* matrix change. */ /*******************************************/ /*******************************************/ /* Step 4 - Numerical Factorisation */ /* Perform it each time the values of the */ /* matrix changed. */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_ORDERING; iparm[IPARM_END_TASK] = API_TASK_NUMFACT; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, NULL, 1, iparm, dparm); precond->int_array_1 = (magma_int_t*) perm; precond->int_array_2 = (magma_int_t*) invp; precond->M.val = (magmaFloatComplex*) values; precond->M.col = (magma_int_t*) colptr; precond->M.row = (magma_int_t*) rows; precond->M.num_rows = A.num_rows; precond->M.num_cols = A.num_cols; precond->M.memory_location = Magma_CPU; precond->pastix_data = pastix_data; precond->iparm = iparm; precond->dparm = dparm; if( A.storage_type != Magma_CSR){ magma_c_mfree( &A_h1 ); } magma_c_vfree( &b_h); magma_c_mfree( &B ); #else printf( "error: only real supported yet.\n"); #endif #else printf( "error: pastix not available.\n"); #endif return MAGMA_SUCCESS; }
/* //////////////////////////////////////////////////////////////////////////// -- testing sparse matrix vector product */ int main( int argc, char** argv ) { TESTING_INIT(); magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); magma_c_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL; hA_SELLP.blocksize = 8; hA_SELLP.alignment = 8; real_Double_t start, end, res; magma_int_t *pntre; magmaFloatComplex c_one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex c_zero = MAGMA_C_MAKE(0.0, 0.0); magma_int_t i, j; for( i = 1; i < argc; ++i ) { if ( strcmp("--blocksize", argv[i]) == 0 ) { hA_SELLP.blocksize = atoi( argv[++i] ); } else if ( strcmp("--alignment", argv[i]) == 0 ) { hA_SELLP.alignment = atoi( argv[++i] ); } else break; } printf( "\n# usage: ./run_cspmv" " [ --blocksize %d --alignment %d (for SELLP) ]" " matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_cm_5stencil( laplace_size, &hA, queue ); } else { // file-matrix test magma_c_csr_mtx( &hA, argv[i], queue ); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz ); real_Double_t FLOPS = 2.0*hA.nnz/1e9; magma_c_vector hx, hy, dx, dy, hrefvec, hcheck; // init CPU vectors magma_c_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue ); magma_c_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue ); // init DEV vectors magma_c_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue ); magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); #ifdef MAGMA_WITH_MKL // calling MKL with CSR pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) ); pntre[0] = 0; for (j=0; j<hA.num_rows; j++ ) { pntre[j] = hA.row[j+1]; } MKL_INT num_rows = hA.num_rows; MKL_INT num_cols = hA.num_cols; MKL_INT nnz = hA.nnz; MKL_INT *col; TESTING_MALLOC_CPU( col, MKL_INT, nnz ); for( magma_int_t t=0; t < hA.nnz; ++t ) { col[ t ] = hA.col[ t ]; } MKL_INT *row; TESTING_MALLOC_CPU( row, MKL_INT, num_rows ); for( magma_int_t t=0; t < hA.num_rows; ++t ) { row[ t ] = hA.col[ t ]; } start = magma_wtime(); for (j=0; j<10; j++ ) { mkl_ccsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); } end = magma_wtime(); printf( "\n > MKL : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); TESTING_FREE_CPU( row ); TESTING_FREE_CPU( col ); free(pntre); #endif // MAGMA_WITH_MKL // copy matrix to GPU magma_c_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); // SpMV on GPU (CSR) -- this is the reference! start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_c_spmv( c_one, dA, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_c_mfree(&dA, queue ); magma_c_vtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue ); // convert to ELL and copy to GPU magma_c_mconvert( hA, &hA_ELL, Magma_CSR, Magma_ELL, queue ); magma_c_mtransfer( hA_ELL, &dA_ELL, Magma_CPU, Magma_DEV, queue ); magma_c_mfree(&hA_ELL, queue ); magma_c_vfree( &dy, queue ); magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (ELL) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_c_spmv( c_one, dA_ELL, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard ELL).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_c_mfree(&dA_ELL, queue ); magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); if ( res < .000001 ) printf("# tester spmv ELL: ok\n"); else printf("# tester spmv ELL: failed\n"); magma_c_vfree( &hcheck, queue ); // convert to SELLP and copy to GPU magma_c_mconvert( hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue ); magma_c_mtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue ); magma_c_mfree(&hA_SELLP, queue ); magma_c_vfree( &dy, queue ); magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (SELLP) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_c_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (SELLP).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv SELL-P: ok\n"); else printf("# tester spmv SELL-P: failed\n"); magma_c_vfree( &hcheck, queue ); magma_c_mfree(&dA_SELLP, queue ); // SpMV on GPU (CUSPARSE - CSR) // CUSPARSE context // cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); magmaFloatComplex alpha = c_one; magmaFloatComplex beta = c_zero; magma_c_vfree( &dy, queue ); magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // copy matrix to GPU magma_c_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseCcsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, hA.num_rows, hA.num_cols, hA.nnz, &alpha, descr, dA.dval, dA.drow, dA.dcol, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE CSR\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); cusparseMatDescr_t descrA; cusparseStatus = cusparseCreateMatDescr(&descrA); if (cusparseStatus != 0) printf("error\n"); cusparseHybMat_t hybA; cusparseStatus = cusparseCreateHybMat( &hybA ); if (cusparseStatus != 0) printf("error\n"); magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE CSR: ok\n"); else printf("# tester spmv cuSPARSE CSR: failed\n"); magma_c_vfree( &hcheck, queue ); magma_c_vfree( &dy, queue ); magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); cusparseCcsr2hyb(cusparseHandle, hA.num_rows, hA.num_cols, descrA, dA.dval, dA.drow, dA.dcol, hybA, 0, CUSPARSE_HYB_PARTITION_AUTO); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseChybmv( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, descrA, hybA, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE HYB\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (HYB).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE HYB: ok\n"); else printf("# tester spmv cuSPARSE HYB: failed\n"); magma_c_vfree( &hcheck, queue ); cusparseDestroyMatDescr( descrA ); cusparseDestroyHybMat( hybA ); cusparseDestroy( cusparseHandle ); magma_c_mfree(&dA, queue ); printf("\n\n"); // free CPU memory magma_c_mfree(&hA, queue ); magma_c_vfree(&hx, queue ); magma_c_vfree(&hy, queue ); magma_c_vfree(&hrefvec, queue ); // free GPU memory magma_c_vfree(&dx, queue ); magma_c_vfree(&dy, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
extern "C" magma_int_t magma_c_cucsrtranspose( magma_c_sparse_matrix A, magma_c_sparse_matrix *B, magma_queue_t queue ) { // for symmetric matrices: convert to csc using cusparse if( A.storage_type == Magma_CSR && A.memory_location == Magma_DEV ) { magma_c_sparse_matrix C; magma_c_mtransfer( A, &C, Magma_DEV, Magma_DEV, queue ); // CUSPARSE context // cusparseHandle_t handle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&handle); cusparseSetStream( handle, queue ); if (cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrA; cusparseMatDescr_t descrB; cusparseStatus = cusparseCreateMatDescr(&descrA); cusparseStatus = cusparseCreateMatDescr(&descrB); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseCcsr2csc( handle, A.num_rows, A.num_rows, A.nnz, A.dval, A.drow, A.dcol, C.dval, C.dcol, C.drow, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in transpose: %d.\n", cusparseStatus); cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroy( handle ); magma_c_mtransfer( C, B, Magma_DEV, Magma_DEV, queue ); if( A.fill_mode == Magma_FULL ){ B->fill_mode = Magma_FULL; } else if( A.fill_mode == Magma_LOWER ){ B->fill_mode = Magma_UPPER; } else if ( A.fill_mode == Magma_UPPER ){ B->fill_mode = Magma_LOWER; } // end CUSPARSE context // return MAGMA_SUCCESS; }else if( A.storage_type == Magma_CSR && A.memory_location == Magma_CPU ){ magma_c_sparse_matrix A_d, B_d; magma_c_mtransfer( A, &A_d, A.memory_location, Magma_DEV, queue ); magma_c_cucsrtranspose( A_d, &B_d, queue ); magma_c_mtransfer( B_d, B, Magma_DEV, A.memory_location, queue ); magma_c_mfree( &A_d, queue ); magma_c_mfree( &B_d, queue ); return MAGMA_SUCCESS; }else { magma_c_sparse_matrix ACSR, BCSR; magma_c_mconvert( A, &ACSR, A.storage_type, Magma_CSR, queue ); magma_c_cucsrtranspose( ACSR, &BCSR, queue ); magma_c_mconvert( BCSR, B, Magma_CSR, A.storage_type, queue ); magma_c_mfree( &ACSR, queue ); magma_c_mfree( &BCSR, queue ); return MAGMA_SUCCESS; } }
extern "C" magma_int_t magma_ccuspmm( magma_c_sparse_matrix A, magma_c_sparse_matrix B, magma_c_sparse_matrix *AB, magma_queue_t queue ) { if ( A.memory_location == Magma_DEV && B.memory_location == Magma_DEV && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) && ( B.storage_type == Magma_CSR || B.storage_type == Magma_CSRCOO ) ) { magma_c_sparse_matrix C; C.num_rows = A.num_rows; C.num_cols = B.num_cols; C.storage_type = A.storage_type; C.memory_location = A.memory_location; C.fill_mode = Magma_FULL; magma_int_t stat_dev = 0; C.val = NULL; C.col = NULL; C.row = NULL; C.rowidx = NULL; C.blockinfo = NULL; C.diag = NULL; C.dval = NULL; C.dcol = NULL; C.drow = NULL; C.drowidx = NULL; C.ddiag = NULL; // CUSPARSE context // cusparseHandle_t handle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&handle); cusparseSetStream( handle, queue ); if (cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrA; cusparseMatDescr_t descrB; cusparseMatDescr_t descrC; cusparseStatus = cusparseCreateMatDescr(&descrA); cusparseStatus = cusparseCreateMatDescr(&descrB); cusparseStatus = cusparseCreateMatDescr(&descrC); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrC,CUSPARSE_MATRIX_TYPE_GENERAL); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrC,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); // multiply A and B on the device magma_int_t baseC; // nnzTotalDevHostPtr points to host memory magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz; cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); stat_dev += magma_index_malloc( &C.drow, (A.num_rows + 1) ); cusparseXcsrgemmNnz(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_rows, A.num_rows, descrA, A.nnz, A.drow, A.dcol, descrB, B.nnz, B.drow, B.dcol, descrC, C.drow, nnzTotalDevHostPtr ); if (NULL != nnzTotalDevHostPtr) { C.nnz = *nnzTotalDevHostPtr; } else { // workaround as nnz and base C are magma_int_t magma_index_t base_t, nnz_t; magma_index_getvector( 1, C.drow+C.num_rows, 1, &nnz_t, 1 ); magma_index_getvector( 1, C.drow, 1, &base_t, 1 ); C.nnz = (magma_int_t) nnz_t; baseC = (magma_int_t) base_t; C.nnz -= baseC; } stat_dev += magma_index_malloc( &C.dcol, C.nnz ); stat_dev += magma_cmalloc( &C.dval, C.nnz ); if( stat_dev != 0 ){ magma_c_mfree( &C, queue ); return MAGMA_ERR_DEVICE_ALLOC; } cusparseCcsrgemm(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_rows, A.num_rows, descrA, A.nnz, A.dval, A.drow, A.dcol, descrB, B.nnz, B.dval, B.drow, B.dcol, descrC, C.dval, C.drow, C.dcol); cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroyMatDescr( descrC ); cusparseDestroy( handle ); // end CUSPARSE context // magma_c_mtransfer( C, AB, Magma_DEV, Magma_DEV, queue ); magma_c_mfree( &C, queue ); return MAGMA_SUCCESS; } else { printf("error: CSRMM only supported on device and CSR format.\n"); return MAGMA_SUCCESS; } }
extern "C" magma_int_t magma_cmscale( magma_c_sparse_matrix *A, magma_scale_t scaling, magma_queue_t queue ) { if ( A->memory_location == Magma_CPU && A->storage_type == Magma_CSRCOO ) { if ( scaling == Magma_NOSCALE ) { // no scale ; } else if ( scaling == Magma_UNITROW ) { // scale to unit rownorm magmaFloatComplex *tmp; magma_cmalloc_cpu( &tmp, A->num_rows ); for( magma_int_t z=0; z<A->num_rows; z++ ) { magmaFloatComplex s = MAGMA_C_MAKE( 0.0, 0.0 ); for( magma_int_t f=A->row[z]; f<A->row[z+1]; f++ ) s+= MAGMA_C_REAL(A->val[f])*MAGMA_C_REAL(A->val[f]); tmp[z] = MAGMA_C_MAKE( 1.0/sqrt( MAGMA_C_REAL( s ) ), 0.0 ); } for( magma_int_t z=0; z<A->nnz; z++ ) { A->val[z] = A->val[z] * tmp[A->col[z]] * tmp[A->rowidx[z]]; } magma_free_cpu( tmp ); } else if (scaling == Magma_UNITDIAG ) { // scale to unit diagonal magmaFloatComplex *tmp; magma_cmalloc_cpu( &tmp, A->num_rows ); for( magma_int_t z=0; z<A->num_rows; z++ ) { magmaFloatComplex s = MAGMA_C_MAKE( 0.0, 0.0 ); for( magma_int_t f=A->row[z]; f<A->row[z+1]; f++ ) { if ( A->col[f]== z ) { // add some identity matrix //A->val[f] = A->val[f] + MAGMA_C_MAKE( 100000.0, 0.0 ); s = A->val[f]; } } if ( s == MAGMA_C_MAKE( 0.0, 0.0 ) ) printf("error: zero diagonal element.\n"); tmp[z] = MAGMA_C_MAKE( 1.0/sqrt( MAGMA_C_REAL( s ) ), 0.0 ); } for( magma_int_t z=0; z<A->nnz; z++ ) { A->val[z] = A->val[z] * tmp[A->col[z]] * tmp[A->rowidx[z]]; } magma_free_cpu( tmp ); } else printf( "error: scaling not supported\n" ); return MAGMA_SUCCESS; } else { magma_c_sparse_matrix hA, CSRA; magma_storage_t A_storage = A->storage_type; magma_location_t A_location = A->memory_location; magma_c_mtransfer( *A, &hA, A->memory_location, Magma_CPU, queue ); magma_c_mconvert( hA, &CSRA, hA.storage_type, Magma_CSRCOO, queue ); magma_cmscale( &CSRA, scaling, queue ); magma_c_mfree( &hA, queue ); magma_c_mfree( A, queue ); magma_c_mconvert( CSRA, &hA, Magma_CSRCOO, A_storage, queue ); magma_c_mtransfer( hA, A, Magma_CPU, A_location, queue ); magma_c_mfree( &hA, queue ); magma_c_mfree( &CSRA, queue ); return MAGMA_SUCCESS; } }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv) { TESTING_INIT(); magma_copts zopts; int i=1; magma_cparse_opts( argc, argv, &zopts, &i); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_sparse_matrix A, B, B_d; magma_c_vector x, b; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; if ( zopts.solver_par.solver != Magma_PCG && zopts.solver_par.solver != Magma_PGMRES && zopts.solver_par.solver != Magma_PBICGSTAB && zopts.solver_par.solver != Magma_ITERREF ) zopts.precond_par.solver = Magma_NONE; magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par ); while( i < argc ){ magma_c_csr_mtx( &A, argv[i] ); printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale matrix magma_cmscale( &A, zopts.scaling ); magma_c_mconvert( A, &B, Magma_CSR, zopts.output_format ); magma_c_mtransfer( B, &B_d, Magma_CPU, Magma_DEV ); // vectors and initial guess magma_c_vinit( &b, Magma_DEV, A.num_cols, one ); magma_c_vinit( &x, Magma_DEV, A.num_cols, one ); magma_c_spmv( one, B_d, x, zero, b ); // b = A x magma_c_vfree(&x); magma_c_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_c_solver( B_d, b, &x, &zopts ); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par ); magma_c_mfree(&B_d); magma_c_mfree(&B); magma_c_mfree(&A); magma_c_vfree(&x); magma_c_vfree(&b); i++; } magma_csolverinfo_free( &zopts.solver_par, &zopts.precond_par ); TESTING_FINALIZE(); return 0; }
extern "C" magma_int_t magma_cbaiter( magma_c_sparse_matrix A, magma_c_vector b, magma_c_vector *x, magma_c_solver_par *solver_par, magma_queue_t queue ) { // prepare solver feedback solver_par->solver = Magma_BAITER; solver_par->info = MAGMA_SUCCESS; magma_c_sparse_matrix Ah, ACSR, A_d, D, R, D_d, R_d; magma_c_mtransfer( A, &Ah, A.memory_location, Magma_CPU, queue ); magma_c_mconvert( Ah, &ACSR, Ah.storage_type, Magma_CSR, queue ); magma_c_mtransfer( ACSR, &A_d, Magma_CPU, Magma_DEV, queue ); // initial residual real_Double_t tempo1, tempo2; float residual; magma_cresidual( A_d, b, *x, &residual, queue ); solver_par->init_res = residual; solver_par->res_vec = NULL; solver_par->timing = NULL; // setup magma_ccsrsplit( 256, ACSR, &D, &R, queue ); magma_c_mtransfer( D, &D_d, Magma_CPU, Magma_DEV, queue ); magma_c_mtransfer( R, &R_d, Magma_CPU, Magma_DEV, queue ); magma_int_t localiter = 1; tempo1 = magma_sync_wtime( queue ); // block-asynchronous iteration iterator for( int iter=0; iter<solver_par->maxiter; iter++) magma_cbajac_csr( localiter, D_d, R_d, b, x, queue ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; magma_cresidual( A_d, b, *x, &residual, queue ); solver_par->final_res = residual; solver_par->numiter = solver_par->maxiter; if ( solver_par->init_res > solver_par->final_res ) solver_par->info = MAGMA_SUCCESS; else solver_par->info = MAGMA_DIVERGENCE; magma_c_mfree(&D, queue ); magma_c_mfree(&R, queue ); magma_c_mfree(&D_d, queue ); magma_c_mfree(&R_d, queue ); magma_c_mfree(&A_d, queue ); magma_c_mfree(&ACSR, queue ); magma_c_mfree(&Ah, queue ); return MAGMA_SUCCESS; } /* magma_cbaiter */
extern "C" magma_int_t magma_ccsrsplit( magma_int_t bsize, magma_c_sparse_matrix A, magma_c_sparse_matrix *D, magma_c_sparse_matrix *R, magma_queue_t queue ) { if ( A.memory_location == Magma_CPU && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) ) { magma_int_t i, k, j, nnz_diag, nnz_offd; magma_int_t stat_cpu = 0, stat_dev = 0; D->val = NULL; D->col = NULL; D->row = NULL; D->rowidx = NULL; D->blockinfo = NULL; D->diag = NULL; D->dval = NULL; D->dcol = NULL; D->drow = NULL; D->drowidx = NULL; D->ddiag = NULL; R->val = NULL; R->col = NULL; R->row = NULL; R->rowidx = NULL; R->blockinfo = NULL; R->diag = NULL; R->dval = NULL; R->dcol = NULL; R->drow = NULL; R->drowidx = NULL; R->ddiag = NULL; nnz_diag = nnz_offd = 0; // Count the new number of nonzeroes in the two matrices for( i=0; i<A.num_rows; i+=bsize ) for( k=i; k<min(A.num_rows,i+bsize); k++ ) for( j=A.row[k]; j<A.row[k+1]; j++ ) if ( A.col[j] < i ) nnz_offd++; else if ( A.col[j] < i+bsize ) nnz_diag++; else nnz_offd++; // Allocate memory for the new matrices D->storage_type = Magma_CSRD; D->memory_location = A.memory_location; D->num_rows = A.num_rows; D->num_cols = A.num_cols; D->nnz = nnz_diag; R->storage_type = Magma_CSR; R->memory_location = A.memory_location; R->num_rows = A.num_rows; R->num_cols = A.num_cols; R->nnz = nnz_offd; stat_cpu += magma_cmalloc_cpu( &D->val, nnz_diag ); stat_cpu += magma_index_malloc_cpu( &D->row, A.num_rows+1 ); stat_cpu += magma_index_malloc_cpu( &D->col, nnz_diag ); stat_cpu += magma_cmalloc_cpu( &R->val, nnz_offd ); stat_cpu += magma_index_malloc_cpu( &R->row, A.num_rows+1 ); stat_cpu += magma_index_malloc_cpu( &R->col, nnz_offd ); if( stat_cpu != 0 ){ magma_c_mfree( D, queue ); magma_c_mfree( R, queue ); return MAGMA_ERR_HOST_ALLOC; } // Fill up the new sparse matrices D->row[0] = 0; R->row[0] = 0; nnz_offd = nnz_diag = 0; for( i=0; i<A.num_rows; i+=bsize) { for( k=i; k<min(A.num_rows,i+bsize); k++ ) { D->row[k+1] = D->row[k]; R->row[k+1] = R->row[k]; for( j=A.row[k]; j<A.row[k+1]; j++ ) { if ( A.col[j] < i ) { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } else if ( A.col[j] < i+bsize ) { // larger than diagonal remain as before if ( A.col[j]>k ) { D->val[nnz_diag] = A.val[ j ]; D->col[nnz_diag] = A.col[ j ]; D->row[k+1]++; } // diagonal is written first else if ( A.col[j]==k ) { D->val[D->row[k]] = A.val[ j ]; D->col[D->row[k]] = A.col[ j ]; D->row[k+1]++; } // smaller than diagonal are shifted one to the right // to have room for the diagonal else { D->val[nnz_diag+1] = A.val[ j ]; D->col[nnz_diag+1] = A.col[ j ]; D->row[k+1]++; } nnz_diag++; } else { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } } } } return MAGMA_SUCCESS; } else { magma_c_sparse_matrix Ah, ACSR, DCSR, RCSR, Dh, Rh; magma_c_mtransfer( A, &Ah, A.memory_location, Magma_CPU, queue ); magma_c_mconvert( Ah, &ACSR, A.storage_type, Magma_CSR, queue ); magma_ccsrsplit( bsize, ACSR, &DCSR, &RCSR, queue ); magma_c_mconvert( DCSR, &Dh, Magma_CSR, A.storage_type, queue ); magma_c_mconvert( RCSR, &Rh, Magma_CSR, A.storage_type, queue ); magma_c_mtransfer( Dh, D, Magma_CPU, A.memory_location, queue ); magma_c_mtransfer( Rh, R, Magma_CPU, A.memory_location, queue ); magma_c_mfree( &Ah, queue ); magma_c_mfree( &ACSR, queue ); magma_c_mfree( &Dh, queue ); magma_c_mfree( &DCSR, queue ); magma_c_mfree( &Rh, queue ); magma_c_mfree( &RCSR, queue ); return MAGMA_SUCCESS; } }
/* //////////////////////////////////////////////////////////////////////////// -- running magma_clobpcg */ int main( int argc, char** argv) { TESTING_INIT(); magma_c_solver_par solver_par; solver_par.epsilon = 1e-5; solver_par.maxiter = 1000; solver_par.verbose = 0; solver_par.num_eigenvalues = 32; solver_par.solver = Magma_LOBPCG; magma_c_preconditioner precond_par; precond_par.solver = Magma_JACOBI; int precond = 0; int format = 0; int scale = 0; magma_scale_t scaling = Magma_NOSCALE; magma_c_sparse_matrix A, B, dA; B.blocksize = 8; B.alignment = 8; B.storage_type = Magma_CSR; int i; for( i = 1; i < argc; ++i ) { if ( strcmp("--format", argv[i]) == 0 ) { format = atoi( argv[++i] ); switch( format ) { case 0: B.storage_type = Magma_CSR; break; case 1: B.storage_type = Magma_ELL; break; case 2: B.storage_type = Magma_ELLRT; break; case 3: B.storage_type = Magma_SELLP; break; } }else if ( strcmp("--mscale", argv[i]) == 0 ) { scale = atoi( argv[++i] ); switch( scale ) { case 0: scaling = Magma_NOSCALE; break; case 1: scaling = Magma_UNITDIAG; break; case 2: scaling = Magma_UNITROW; break; } }else if ( strcmp("--precond", argv[i]) == 0 ) { format = atoi( argv[++i] ); switch( precond ) { case 0: precond_par.solver = Magma_JACOBI; break; } }else if ( strcmp("--blocksize", argv[i]) == 0 ) { B.blocksize = atoi( argv[++i] ); }else if ( strcmp("--alignment", argv[i]) == 0 ) { B.alignment = atoi( argv[++i] ); }else if ( strcmp("--verbose", argv[i]) == 0 ) { solver_par.verbose = atoi( argv[++i] ); } else if ( strcmp("--maxiter", argv[i]) == 0 ) { solver_par.maxiter = atoi( argv[++i] ); } else if ( strcmp("--tol", argv[i]) == 0 ) { sscanf( argv[++i], "%f", &solver_par.epsilon ); } else if ( strcmp("--eigenvalues", argv[i]) == 0 ) { solver_par.num_eigenvalues = atoi( argv[++i] ); } else break; } printf( "\n# usage: ./run_clobpcg" " [ --format %d (0=CSR, 1=ELL, 2=ELLRT, 4=SELLP)" " [ --blocksize %d --alignment %d ]" " --mscale %d (0=no, 1=unitdiag, 2=unitrownrm)" " --verbose %d (0=summary, k=details every k iterations)" " --maxiter %d --tol %.2e" " --preconditioner %d (0=Jacobi) " " --eigenvalues %d ]" " matrices \n\n", format, (int) B.blocksize, (int) B.alignment, (int) scale, (int) solver_par.verbose, (int) solver_par.maxiter, solver_par.epsilon, precond, (int) solver_par.num_eigenvalues); while( i < argc ){ magma_c_csr_mtx( &A, argv[i] ); printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale initial guess magma_cmscale( &A, scaling ); solver_par.ev_length = A.num_cols; magma_c_sparse_matrix A2; A2.storage_type = Magma_SELLC; A2.blocksize = 8; A2.alignment = 4; magma_c_mconvert( A, &A2, Magma_CSR, A2.storage_type ); // copy matrix to GPU magma_c_mtransfer( A2, &dA, Magma_CPU, Magma_DEV); magma_csolverinfo_init( &solver_par, &precond_par ); // inside the loop! // as the matrix size has influence on the EV-length real_Double_t gpu_time; // Find the blockSize smallest eigenvalues and corresponding eigen-vectors gpu_time = magma_wtime(); magma_clobpcg( dA, &solver_par ); gpu_time = magma_wtime() - gpu_time; printf("Time (sec) = %7.2f\n", gpu_time); printf("solver runtime (sec) = %7.2f\n", solver_par.runtime ); magma_csolverinfo_free( &solver_par, &precond_par ); magma_c_mfree( &dA ); magma_c_mfree( &A2 ); magma_c_mfree( &A ); i++; } TESTING_FINALIZE(); return 0; }