extern "C" magma_int_t magma_zjacobiiter( magma_z_sparse_matrix M, magma_z_vector c, magma_z_vector *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE, c_mone = MAGMA_Z_NEG_ONE; magma_int_t dofs = M.num_rows; magma_z_vector t, swap; magma_z_vinit( &t, Magma_DEV, dofs, c_zero, queue ); for( magma_int_t i=0; i<solver_par->maxiter; i++ ) { magma_z_spmv( c_mone, M, *x, c_zero, t, queue ); // t = - M * x magma_zaxpy( dofs, c_one , c.dval, 1 , t.dval, 1 ); // t = t + c // swap so that x again contains solution, and y is ready to be used swap = *x; *x = t; t = swap; //magma_zcopy( dofs, t.dval, 1 , x->dval, 1 ); // x = t } magma_z_vfree( &t, queue ); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } /* magma_zjacobiiter */
extern "C" magma_int_t magma_z_applyprecond( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_preconditioner *precond, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); if ( precond->solver == Magma_JACOBI ) { magma_zjacobi_diagscal( A.num_rows, precond->d, b, x, queue ); } else if ( precond->solver == Magma_PASTIX ) { magma_zapplypastix( b, x, precond, queue ); } else if ( precond->solver == Magma_ILU ) { magma_z_vector tmp; magma_z_vinit( &tmp, Magma_DEV, A.num_rows, MAGMA_Z_ZERO, queue ); magma_z_vfree( &tmp, queue ); } else if ( precond->solver == Magma_ICC ) { magma_z_vector tmp; magma_z_vinit( &tmp, Magma_DEV, A.num_rows, MAGMA_Z_ZERO, queue ); magma_z_vfree( &tmp, queue ); } else if ( precond->solver == Magma_NONE ) { magma_zcopy( b.num_rows, b.dval, 1, x->dval, 1 ); // x = b } else { printf( "error: preconditioner type not yet supported.\n" ); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_NOT_SUPPORTED; } magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; }
extern "C" magma_int_t magma_zjacobisetup_vector( magma_z_vector b, magma_z_vector d, magma_z_vector *c, magma_queue_t queue ) { if ( b.memory_location == Magma_CPU ) { magma_z_vector diag, c_t, b_h; magma_z_vinit( &c_t, Magma_CPU, b.num_rows, MAGMA_Z_ZERO, queue ); magma_z_vtransfer( b, &b_h, b.memory_location, Magma_CPU, queue ); magma_z_vtransfer( d, &diag, b.memory_location, Magma_CPU, queue ); for( magma_int_t rowindex=0; rowindex<b.num_rows; rowindex++ ) { c_t.val[rowindex] = b_h.val[rowindex] / diag.val[rowindex]; } magma_z_vtransfer( c_t, c, Magma_CPU, b.memory_location, queue ); magma_z_vfree( &diag, queue ); magma_z_vfree( &c_t, queue ); magma_z_vfree( &b_h, queue ); return MAGMA_SUCCESS; } else if ( b.memory_location == Magma_DEV ) { // fill vector magma_z_vector tmp; magma_z_vinit( &tmp, Magma_DEV, b.num_rows, MAGMA_Z_ZERO, queue ); magma_zjacobisetup_vector_gpu( b.num_rows, b, d, *c, &tmp, queue ); magma_z_vfree( &tmp, queue ); return MAGMA_SUCCESS; } return MAGMA_SUCCESS; }
extern "C" magma_int_t magma_zjacobisetup_diagscal( magma_z_sparse_matrix A, magma_z_vector *d, magma_queue_t queue ) { magma_int_t i; magma_z_sparse_matrix A_h1, B; magma_z_vector diag; magma_z_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue ); if ( A.storage_type != Magma_CSR) { magma_z_mtransfer( A, &A_h1, A.memory_location, Magma_CPU, queue ); magma_z_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR, queue ); } else { magma_z_mtransfer( A, &B, A.memory_location, Magma_CPU, queue ); } for( magma_int_t rowindex=0; rowindex<B.num_rows; rowindex++ ) { magma_int_t start = (B.drow[rowindex]); magma_int_t end = (B.drow[rowindex+1]); for( i=start; i<end; i++ ) { if ( B.dcol[i]==rowindex ) { diag.val[rowindex] = 1.0/B.val[i]; if ( MAGMA_Z_REAL( diag.val[rowindex]) == 0 ) printf(" error: zero diagonal element in row %d!\n", (int) rowindex); } } } magma_z_vtransfer( diag, d, Magma_CPU, A.memory_location, queue ); if ( A.storage_type != Magma_CSR) { magma_z_mfree( &A_h1, queue ); } magma_z_mfree( &B, queue ); magma_z_vfree( &diag, queue ); return MAGMA_SUCCESS; }
extern "C" magma_int_t magma_ziterref( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); // prepare solver feedback solver_par->solver = Magma_ITERREF; solver_par->numiter = 0; solver_par->info = MAGMA_SUCCESS; double residual; magma_zresidual( A, b, *x, &residual, queue ); solver_par->init_res = residual; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE, c_mone = MAGMA_Z_NEG_ONE; magma_int_t dofs = A.num_rows; // workspace magma_z_vector r,z; magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue ); magma_z_vinit( &z, Magma_DEV, dofs, c_zero, queue ); // solver variables double nom, nom0, r0; // solver setup magma_zscal( dofs, c_zero, x->dval, 1) ; // x = 0 magma_zcopy( dofs, b.dval, 1, r.dval, 1 ); // r = b nom0 = magma_dznrm2(dofs, r.dval, 1); // nom0 = || r || nom = nom0 * nom0; solver_par->init_res = nom0; if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) { magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ) { magma_zscal( dofs, MAGMA_Z_MAKE(1./nom, 0.), r.dval, 1) ; // scale it magma_z_precond( A, r, &z, precond_par, queue ); // inner solver: A * z = r magma_zscal( dofs, MAGMA_Z_MAKE(nom, 0.), z.dval, 1) ; // scale it magma_zaxpy(dofs, c_one, z.dval, 1, x->dval, 1); // x = x + z magma_z_spmv( c_mone, A, *x, c_zero, r, queue ); // r = - A x magma_zaxpy(dofs, c_one, b.dval, 1, r.dval, 1); // r = r + b nom = magma_dznrm2(dofs, r.dval, 1); // nom = || r || if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( nom < r0 ) { break; } } tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; magma_zresidual( A, b, *x, &residual, queue ); solver_par->final_res = residual; solver_par->iter_res = nom; if ( solver_par->numiter < solver_par->maxiter) { solver_par->info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_SLOW_CONVERGENCE; } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_DIVERGENCE; } magma_z_vfree(&r, queue ); magma_z_vfree(&z, queue ); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } /* magma_ziterref */
extern "C" magma_int_t magma_zpastixsetup( magma_z_sparse_matrix A, magma_z_vector b, magma_z_preconditioner *precond, magma_queue_t queue ) { #if defined(HAVE_PASTIX) #if defined(PRECISION_d) pastix_data_t *pastix_data = NULL; /* Pointer to a storage structure needed by pastix */ pastix_int_t ncol; /* Size of the matrix */ pastix_int_t *colptr = NULL; /* Indexes of first element of each column in row and values */ pastix_int_t *rows = NULL; /* Row of each element of the matrix */ pastix_float_t *values = NULL; /* Value of each element of the matrix */ pastix_float_t *rhs = NULL; /* right hand side */ pastix_int_t *iparm = NULL; /* integer parameters for pastix */ double *dparm = NULL; /* floating parameters for pastix */ pastix_int_t *perm = NULL; /* Permutation tabular */ pastix_int_t *invp = NULL; /* Reverse permutation tabular */ pastix_int_t mat_type; magma_z_sparse_matrix A_h1, B; magma_z_vector diag, c_t, b_h; magma_z_vinit( &c_t, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue ); magma_z_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue ); magma_z_vtransfer( b, &b_h, A.memory_location, Magma_CPU, queue ); if ( A.storage_type != Magma_CSR ) { magma_z_mtransfer( A, &A_h1, A.memory_location, Magma_CPU, queue ); magma_z_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR, queue ); } else { magma_z_mtransfer( A, &B, A.memory_location, Magma_CPU, queue ); } rhs = (pastix_float_t*) b_h.dval; ncol = B.num_rows; colptr = B.drow; rows = B.dcol; values = (pastix_float_t*) B.dval; mat_type = API_SYM_NO; iparm = (pastix_int_t*)malloc(IPARM_SIZE*sizeof(pastix_int_t)); dparm = (pastix_float_t*)malloc(DPARM_SIZE*sizeof(pastix_float_t)); /*******************************************/ /* Initialize parameters to default values */ /*******************************************/ iparm[IPARM_MODIFY_PARAMETER] = API_NO; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhs, 1, iparm, dparm); iparm[IPARM_THREAD_NBR] = 16; iparm[IPARM_SYM] = mat_type; iparm[IPARM_FACTORIZATION] = API_FACT_LU; iparm[IPARM_VERBOSE] = API_VERBOSE_YES; iparm[IPARM_ORDERING] = API_ORDER_SCOTCH; iparm[IPARM_INCOMPLETE] = API_NO; iparm[IPARM_RHS_MAKING] = API_RHS_B; //iparm[IPARM_AMALGAMATION] = 5; iparm[IPARM_LEVEL_OF_FILL] = 0; /* if (incomplete == 1) { dparm[DPARM_EPSILON_REFINEMENT] = 1e-7; } */ /* * Matrix needs : * - to be in fortran numbering * - to have only the lower triangular part in symmetric case * - to have a graph with a symmetric structure in unsymmetric case * If those criteria are not matched, the csc will be reallocated and changed. */ iparm[IPARM_MATRIX_VERIFICATION] = API_YES; perm = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t)); invp = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t)); /*******************************************/ /* Step 1 - Ordering / Scotch */ /* Perform it only when the pattern of */ /* matrix change. */ /* eg: mesh refinement */ /* In many cases users can simply go from */ /* API_TASK_ORDERING to API_TASK_ANALYSE */ /* in one call. */ /*******************************************/ /*******************************************/ /* Step 2 - Symbolic factorization */ /* Perform it only when the pattern of */ /* matrix change. */ /*******************************************/ /*******************************************/ /* Step 3 - Mapping and Compute scheduling */ /* Perform it only when the pattern of */ /* matrix change. */ /*******************************************/ /*******************************************/ /* Step 4 - Numerical Factorisation */ /* Perform it each time the values of the */ /* matrix changed. */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_ORDERING; iparm[IPARM_END_TASK] = API_TASK_NUMFACT; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, NULL, 1, iparm, dparm); precond->int_array_1 = (magma_int_t*) perm; precond->int_array_2 = (magma_int_t*) invp; precond->M.dval = (magmaDoubleComplex*) values; precond->M.dcol = (magma_int_t*) colptr; precond->M.drow = (magma_int_t*) rows; precond->M.num_rows = A.num_rows; precond->M.num_cols = A.num_cols; precond->M.memory_location = Magma_CPU; precond->pastix_data = pastix_data; precond->iparm = iparm; precond->dparm = dparm; if ( A.storage_type != Magma_CSR) { magma_z_mfree( &A_h1, queue ); } magma_z_vfree( &b_h, queue ); magma_z_mfree( &B, queue ); #else printf( "error: only double precision supported yet.\n"); #endif #else printf( "error: pastix not available.\n"); #endif return MAGMA_SUCCESS; }
extern "C" magma_int_t magma_zjacobi( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); // prepare solver feedback solver_par->solver = Magma_JACOBI; solver_par->info = MAGMA_SUCCESS; real_Double_t tempo1, tempo2; double residual; magma_zresidual( A, b, *x, &residual, queue ); solver_par->init_res = residual; solver_par->res_vec = NULL; solver_par->timing = NULL; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE, c_mone = MAGMA_Z_NEG_ONE; magma_int_t dofs = A.num_rows; double nom0; magma_z_sparse_matrix M; magma_z_vector c, r; magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue ); magma_z_spmv( c_one, A, *x, c_zero, r, queue ); // r = A x magma_zaxpy(dofs, c_mone, b.dval, 1, r.dval, 1); // r = r - b nom0 = magma_dznrm2(dofs, r.dval, 1); // den = || r || // Jacobi setup magma_zjacobisetup( A, b, &M, &c, queue ); magma_z_solver_par jacobiiter_par; jacobiiter_par.maxiter = solver_par->maxiter; tempo1 = magma_sync_wtime( queue ); // Jacobi iterator magma_zjacobiiter( M, c, x, &jacobiiter_par, queue ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; magma_zresidual( A, b, *x, &residual, queue ); solver_par->final_res = residual; solver_par->numiter = solver_par->maxiter; if ( solver_par->init_res > solver_par->final_res ) solver_par->info = MAGMA_SUCCESS; else solver_par->info = MAGMA_DIVERGENCE; magma_z_mfree( &M, queue ); magma_z_vfree( &c, queue ); magma_z_vfree( &r, queue ); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } /* magma_zjacobi */
extern "C" magma_int_t magma_zjacobisetup( magma_z_sparse_matrix A, magma_z_vector b, magma_z_sparse_matrix *M, magma_z_vector *c, magma_queue_t queue ) { magma_int_t i; magma_z_sparse_matrix A_h1, A_h2, B, C; magma_z_vector diag, c_t, b_h; magma_z_vinit( &c_t, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue ); magma_z_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue ); magma_z_vtransfer( b, &b_h, A.memory_location, Magma_CPU, queue ); if ( A.storage_type != Magma_CSR ) { magma_z_mtransfer( A, &A_h1, A.memory_location, Magma_CPU, queue ); magma_z_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR, queue ); } else { magma_z_mtransfer( A, &B, A.memory_location, Magma_CPU, queue ); } for( magma_int_t rowindex=0; rowindex<B.num_rows; rowindex++ ) { magma_int_t start = (B.drow[rowindex]); magma_int_t end = (B.drow[rowindex+1]); for( i=start; i<end; i++ ) { if ( B.dcol[i]==rowindex ) { diag.val[rowindex] = B.val[i]; if ( MAGMA_Z_REAL( diag.val[rowindex]) == 0 ) printf(" error: zero diagonal element in row %d!\n", (int) rowindex); } } for( i=start; i<end; i++ ) { B.val[i] = B.val[i] / diag.val[rowindex]; if ( B.dcol[i]==rowindex ) { B.val[i] = MAGMA_Z_MAKE( 0., 0. ); } } c_t.val[rowindex] = b_h.val[rowindex] / diag.val[rowindex]; } magma_z_csr_compressor(&B.val, &B.drow, &B.dcol, &C.val, &C.drow, &C.dcol, &B.num_rows, queue ); C.num_rows = B.num_rows; C.num_cols = B.num_cols; C.memory_location = B.memory_location; C.nnz = C.drow[B.num_rows]; C.storage_type = B.storage_type; C.memory_location = B.memory_location; if ( A.storage_type != Magma_CSR) { A_h2.alignment = A.alignment; A_h2.blocksize = A.blocksize; magma_z_mconvert( C, &A_h2, Magma_CSR, A_h1.storage_type, queue ); magma_z_mtransfer( A_h2, M, Magma_CPU, A.memory_location, queue ); } else { magma_z_mtransfer( C, M, Magma_CPU, A.memory_location, queue ); } magma_z_vtransfer( c_t, c, Magma_CPU, A.memory_location, queue ); if ( A.storage_type != Magma_CSR) { magma_z_mfree( &A_h1, queue ); magma_z_mfree( &A_h2, queue ); } magma_z_mfree( &B, queue ); magma_z_mfree( &C, queue ); magma_z_vfree( &diag, queue ); magma_z_vfree( &c_t, queue ); magma_z_vfree( &b_h, queue ); return MAGMA_SUCCESS; }
/* //////////////////////////////////////////////////////////////////////////// -- running magma_zbaiter */ int main( int argc, char** argv) { TESTING_INIT(); magma_z_solver_par solver_par; magma_z_preconditioner precond_par; solver_par.maxiter = 1000; solver_par.verbose = 0; solver_par.num_eigenvalues = 0; int scale = 0; magma_scale_t scaling = Magma_NOSCALE; magma_z_sparse_matrix A; magma_z_vector x, b; magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0); magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0); int i; for( i = 1; i < argc; ++i ) { if ( strcmp("--maxiter", argv[i]) == 0 ){ solver_par.maxiter = atoi( argv[++i] ); }else if ( strcmp("--mscale", argv[i]) == 0 ) { scale = atoi( argv[++i] ); switch( scale ) { case 0: scaling = Magma_NOSCALE; break; case 1: scaling = Magma_UNITDIAG; break; case 2: scaling = Magma_UNITROW; break; } }else break; } printf( "\n# usage: ./run_zbaiter" " [ " " --mscale %d (0=no, 1=unitdiag, 2=unitrownrm)" " --maxiter %d ]" " matrices \n\n", (int) scale, (int) solver_par.maxiter); magma_zsolverinfo_init( &solver_par, &precond_par ); while( i < argc ){ magma_z_csr_mtx( &A, argv[i] ); printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale initial guess magma_zmscale( &A, scaling ); magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zbaiter( A, b, &x, &solver_par ); magma_zsolverinfo( &solver_par, &precond_par ); magma_z_mfree(&A); magma_z_vfree(&x); magma_z_vfree(&b); i++; } magma_zsolverinfo_free( &solver_par, &precond_par ); TESTING_FINALIZE(); return 0; }
magma_int_t magma_zpcg( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par ){ // prepare solver feedback solver_par->solver = Magma_PCG; solver_par->numiter = 0; solver_par->info = 0; // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; magma_int_t dofs = A.num_rows; // GPU workspace magma_z_vector r, rt, p, q, h; magma_z_vinit( &r, Magma_DEV, dofs, c_zero ); magma_z_vinit( &rt, Magma_DEV, dofs, c_zero ); magma_z_vinit( &p, Magma_DEV, dofs, c_zero ); magma_z_vinit( &q, Magma_DEV, dofs, c_zero ); magma_z_vinit( &h, Magma_DEV, dofs, c_zero ); // solver variables magmaDoubleComplex alpha, beta; double nom, nom0, r0, gammaold, gammanew, den, res; // solver setup magma_zscal( dofs, c_zero, x->val, 1) ; // x = 0 magma_zcopy( dofs, b.val, 1, r.val, 1 ); // r = b // preconditioner magma_z_applyprecond_left( A, r, &rt, precond_par ); magma_z_applyprecond_right( A, rt, &h, precond_par ); magma_zcopy( dofs, h.val, 1, p.val, 1 ); // p = h nom = MAGMA_Z_REAL( magma_zdotc(dofs, r.val, 1, h.val, 1) ); nom0 = magma_dznrm2( dofs, r.val, 1 ); magma_z_spmv( c_one, A, p, c_zero, q ); // q = A p den = MAGMA_Z_REAL( magma_zdotc(dofs, p.val, 1, q.val, 1) );// den = p dot q solver_par->init_res = nom0; if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) return MAGMA_SUCCESS; // check positive definite if (den <= 0.0) { printf("Operator A is not postive definite. (Ar,r) = %f\n", den); return -100; } //Chronometry real_Double_t tempo1, tempo2; magma_device_sync(); tempo1=magma_wtime(); if( solver_par->verbose > 0 ){ solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ){ // preconditioner magma_z_applyprecond_left( A, r, &rt, precond_par ); magma_z_applyprecond_right( A, rt, &h, precond_par ); gammanew = MAGMA_Z_REAL( magma_zdotc(dofs, r.val, 1, h.val, 1) ); // gn = < r,h> if( solver_par->numiter==1 ){ magma_zcopy( dofs, h.val, 1, p.val, 1 ); // p = h }else{ beta = MAGMA_Z_MAKE(gammanew/gammaold, 0.); // beta = gn/go magma_zscal(dofs, beta, p.val, 1); // p = beta*p magma_zaxpy(dofs, c_one, h.val, 1, p.val, 1); // p = p + h } magma_z_spmv( c_one, A, p, c_zero, q ); // q = A p den = MAGMA_Z_REAL(magma_zdotc(dofs, p.val, 1, q.val, 1)); // den = p dot q alpha = MAGMA_Z_MAKE(gammanew/den, 0.); magma_zaxpy(dofs, alpha, p.val, 1, x->val, 1); // x = x + alpha p magma_zaxpy(dofs, -alpha, q.val, 1, r.val, 1); // r = r - alpha q gammaold = gammanew; res = magma_dznrm2( dofs, r.val, 1 ); if( solver_par->verbose > 0 ){ magma_device_sync(); tempo2=magma_wtime(); if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nom0 < solver_par->epsilon ) { break; } } magma_device_sync(); tempo2=magma_wtime(); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; magma_zresidual( A, b, *x, &residual ); solver_par->iter_res = res; solver_par->final_res = residual; if( solver_par->numiter < solver_par->maxiter){ solver_par->info = 0; }else if( solver_par->init_res > solver_par->final_res ){ if( solver_par->verbose > 0 ){ if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = -2; } else{ if( solver_par->verbose > 0 ){ if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = -1; } magma_z_vfree(&r); magma_z_vfree(&rt); magma_z_vfree(&p); magma_z_vfree(&q); magma_z_vfree(&h); return MAGMA_SUCCESS; } /* magma_zcg */
/* //////////////////////////////////////////////////////////////////////////// -- running magma_zgmres */ int main( int argc, char** argv) { TESTING_INIT(); magma_z_solver_par solver_par; magma_z_preconditioner precond_par; solver_par.epsilon = 10e-16; solver_par.maxiter = 1000; solver_par.restart = 30; solver_par.num_eigenvalues = 0; solver_par.ortho = Magma_CGS; solver_par.verbose = 0; int format = 0; int ortho = 0; int scale = 0; magma_scale_t scaling = Magma_NOSCALE; magma_z_sparse_matrix A, B, B_d; magma_z_vector x, b; B.blocksize = 8; B.alignment = 8; magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0); magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0); B.storage_type = Magma_CSR; int i; for( i = 1; i < argc; ++i ) { if ( strcmp("--format", argv[i]) == 0 ) { format = atoi( argv[++i] ); switch( format ) { case 0: B.storage_type = Magma_CSR; break; case 1: B.storage_type = Magma_ELL; break; case 2: B.storage_type = Magma_ELLRT; break; case 3: B.storage_type = Magma_SELLP; break; } }else if ( strcmp("--mscale", argv[i]) == 0 ) { scale = atoi( argv[++i] ); switch( scale ) { case 0: scaling = Magma_NOSCALE; break; case 1: scaling = Magma_UNITDIAG; break; case 2: scaling = Magma_UNITROW; break; } }else if ( strcmp("--blocksize", argv[i]) == 0 ) { B.blocksize = atoi( argv[++i] ); }else if ( strcmp("--alignment", argv[i]) == 0 ) { B.alignment = atoi( argv[++i] ); }else if ( strcmp("--verbose", argv[i]) == 0 ) { solver_par.verbose = atoi( argv[++i] ); } else if ( strcmp("--ortho", argv[i]) == 0 ) { ortho = atoi( argv[++i] ); switch( ortho ) { case 0: solver_par.ortho = Magma_CGS; break; case 1: solver_par.ortho = Magma_MGS; break; case 2: solver_par.ortho = Magma_FUSED_CGS; break; } } else if ( strcmp("--restart", argv[i]) == 0 ) { solver_par.restart = atoi( argv[++i] ); } else if ( strcmp("--maxiter", argv[i]) == 0 ) { solver_par.maxiter = atoi( argv[++i] ); } else if ( strcmp("--tol", argv[i]) == 0 ) { sscanf( argv[++i], "%lf", &solver_par.epsilon ); } else break; } printf( "\n# usage: ./run_zgmres" " [ --format %d (0=CSR, 1=ELL 2=ELLRT, 3=SELLP)" " [ --blocksize %d --alignment %d ]" " --mscale %d (0=no, 1=unitdiag, 2=unitrownrm)" " --verbose %d (0=summary, k=details every k iterations)" " --restart %d --maxiter %d --tol %.2e" " --ortho %d (0=CGS, 1=MGS, 2=FUSED_CGS) ]" " matrices \n\n", format, (int) B.blocksize, (int) B.alignment, (int) scale, (int) solver_par.verbose, (int) solver_par.restart, (int) solver_par.maxiter, solver_par.epsilon, ortho ); magma_zsolverinfo_init( &solver_par, &precond_par ); while( i < argc ){ magma_z_csr_mtx( &A, argv[i] ); printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale matrix magma_zmscale( &A, scaling ); magma_z_mconvert( A, &B, Magma_CSR, B.storage_type ); magma_z_mtransfer( B, &B_d, Magma_CPU, Magma_DEV ); // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zgmres( B_d, b, &x, &solver_par ); magma_zsolverinfo( &solver_par, &precond_par ); magma_z_mfree(&B_d); magma_z_mfree(&B); magma_z_mfree(&A); magma_z_vfree(&x); magma_z_vfree(&b); i++; } magma_zsolverinfo_free( &solver_par, &precond_par ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- Debugging file */ int main( int argc, char** argv) { TESTING_INIT(); magma_z_solver_par solver_par; magma_z_preconditioner precond_par; solver_par.epsilon = 10e-16; solver_par.maxiter = 1000; solver_par.verbose = 0; solver_par.restart = 30; solver_par.num_eigenvalues = 0; solver_par.ortho = Magma_CGS; magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0); magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0); magma_z_sparse_matrix A, B, B_d; magma_z_vector x, b; // generate matrix of desired structure and size magma_int_t n=100; // size is n*n magma_int_t nn = n*n; magma_int_t offdiags = 2; magma_index_t *diag_offset; magmaDoubleComplex *diag_vals; magma_zmalloc_cpu( &diag_vals, offdiags+1 ); magma_index_malloc_cpu( &diag_offset, offdiags+1 ); diag_offset[0] = 0; diag_offset[1] = 1; diag_offset[2] = n; diag_vals[0] = MAGMA_Z_MAKE( 4.1, 0.0 ); diag_vals[1] = MAGMA_Z_MAKE( -1.0, 0.0 ); diag_vals[2] = MAGMA_Z_MAKE( -1.0, 0.0 ); magma_zmgenerator( nn, offdiags, diag_offset, diag_vals, &A ); // convert marix into desired format B.storage_type = Magma_SELLC; B.blocksize = 8; B.alignment = 8; // scale matrix magma_zmscale( &A, Magma_UNITDIAG ); magma_z_mconvert( A, &B, Magma_CSR, B.storage_type ); magma_z_mtransfer( B, &B_d, Magma_CPU, Magma_DEV ); // test CG #################################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // solver magma_zcg_res( B_d, b, &x, &solver_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PCG Jacobi ############################ // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_JACOBI; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpcg( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PCG IC ################################ // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_ICC; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpcg( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PCG IC ################################ // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_ICC; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpcg( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test BICGSTAB #################################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // solver magma_zbicgstab( B_d, b, &x, &solver_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PBICGSTAB Jacobi ############################ // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_JACOBI; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); /* // test PBICGSTAB ILU ############################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_ILU; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PBICGSTAB ILU ############################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x);printf("here\n"); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_ILU; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test GMRES #################################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // solver magma_zgmres( B_d, b, &x, &solver_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PGMRES Jacobi ############################ // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_JACOBI; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpgmres( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b);*/ // test PGMRES ILU ############################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_ILU; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpgmres( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); printf("all tests passed.\n"); magma_z_mfree(&B_d); magma_z_mfree(&B); magma_z_mfree(&A); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing sparse matrix vector product */ int main( int argc, char** argv ) { TESTING_INIT(); magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); magma_z_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL; hA_SELLP.blocksize = 8; hA_SELLP.alignment = 8; real_Double_t start, end, res; magma_int_t *pntre; magmaDoubleComplex c_one = MAGMA_Z_MAKE(1.0, 0.0); magmaDoubleComplex c_zero = MAGMA_Z_MAKE(0.0, 0.0); magma_int_t i, j; for( i = 1; i < argc; ++i ) { if ( strcmp("--blocksize", argv[i]) == 0 ) { hA_SELLP.blocksize = atoi( argv[++i] ); } else if ( strcmp("--alignment", argv[i]) == 0 ) { hA_SELLP.alignment = atoi( argv[++i] ); } else break; } printf( "\n# usage: ./run_zspmv" " [ --blocksize %d --alignment %d (for SELLP) ]" " matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_zm_5stencil( laplace_size, &hA, queue ); } else { // file-matrix test magma_z_csr_mtx( &hA, argv[i], queue ); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz ); real_Double_t FLOPS = 2.0*hA.nnz/1e9; magma_z_vector hx, hy, dx, dy, hrefvec, hcheck; // init CPU vectors magma_z_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue ); magma_z_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue ); // init DEV vectors magma_z_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue ); magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); #ifdef MAGMA_WITH_MKL // calling MKL with CSR pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) ); pntre[0] = 0; for (j=0; j<hA.num_rows; j++ ) { pntre[j] = hA.row[j+1]; } MKL_INT num_rows = hA.num_rows; MKL_INT num_cols = hA.num_cols; MKL_INT nnz = hA.nnz; MKL_INT *col; TESTING_MALLOC_CPU( col, MKL_INT, nnz ); for( magma_int_t t=0; t < hA.nnz; ++t ) { col[ t ] = hA.col[ t ]; } MKL_INT *row; TESTING_MALLOC_CPU( row, MKL_INT, num_rows ); for( magma_int_t t=0; t < hA.num_rows; ++t ) { row[ t ] = hA.col[ t ]; } start = magma_wtime(); for (j=0; j<10; j++ ) { mkl_zcsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); } end = magma_wtime(); printf( "\n > MKL : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); TESTING_FREE_CPU( row ); TESTING_FREE_CPU( col ); free(pntre); #endif // MAGMA_WITH_MKL // copy matrix to GPU magma_z_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); // SpMV on GPU (CSR) -- this is the reference! start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_z_spmv( c_one, dA, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_z_mfree(&dA, queue ); magma_z_vtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue ); // convert to ELL and copy to GPU magma_z_mconvert( hA, &hA_ELL, Magma_CSR, Magma_ELL, queue ); magma_z_mtransfer( hA_ELL, &dA_ELL, Magma_CPU, Magma_DEV, queue ); magma_z_mfree(&hA_ELL, queue ); magma_z_vfree( &dy, queue ); magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (ELL) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_z_spmv( c_one, dA_ELL, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard ELL).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_z_mfree(&dA_ELL, queue ); magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]); if ( res < .000001 ) printf("# tester spmv ELL: ok\n"); else printf("# tester spmv ELL: failed\n"); magma_z_vfree( &hcheck, queue ); // convert to SELLP and copy to GPU magma_z_mconvert( hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue ); magma_z_mtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue ); magma_z_mfree(&hA_SELLP, queue ); magma_z_vfree( &dy, queue ); magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (SELLP) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_z_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (SELLP).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv SELL-P: ok\n"); else printf("# tester spmv SELL-P: failed\n"); magma_z_vfree( &hcheck, queue ); magma_z_mfree(&dA_SELLP, queue ); // SpMV on GPU (CUSPARSE - CSR) // CUSPARSE context // cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); magmaDoubleComplex alpha = c_one; magmaDoubleComplex beta = c_zero; magma_z_vfree( &dy, queue ); magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // copy matrix to GPU magma_z_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseZcsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, hA.num_rows, hA.num_cols, hA.nnz, &alpha, descr, dA.dval, dA.drow, dA.dcol, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE CSR\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); cusparseMatDescr_t descrA; cusparseStatus = cusparseCreateMatDescr(&descrA); if (cusparseStatus != 0) printf("error\n"); cusparseHybMat_t hybA; cusparseStatus = cusparseCreateHybMat( &hybA ); if (cusparseStatus != 0) printf("error\n"); magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE CSR: ok\n"); else printf("# tester spmv cuSPARSE CSR: failed\n"); magma_z_vfree( &hcheck, queue ); magma_z_vfree( &dy, queue ); magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); cusparseZcsr2hyb(cusparseHandle, hA.num_rows, hA.num_cols, descrA, dA.dval, dA.drow, dA.dcol, hybA, 0, CUSPARSE_HYB_PARTITION_AUTO); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseZhybmv( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, descrA, hybA, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE HYB\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (HYB).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE HYB: ok\n"); else printf("# tester spmv cuSPARSE HYB: failed\n"); magma_z_vfree( &hcheck, queue ); cusparseDestroyMatDescr( descrA ); cusparseDestroyHybMat( hybA ); cusparseDestroy( cusparseHandle ); magma_z_mfree(&dA, queue ); printf("\n\n"); // free CPU memory magma_z_mfree(&hA, queue ); magma_z_vfree(&hx, queue ); magma_z_vfree(&hy, queue ); magma_z_vfree(&hrefvec, queue ); // free GPU memory magma_z_vfree(&dx, queue ); magma_z_vfree(&dy, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
extern "C" magma_int_t magma_zcg( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); // prepare solver feedback solver_par->solver = Magma_CG; solver_par->numiter = 0; solver_par->info = MAGMA_SUCCESS; // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; magma_int_t dofs = A.num_rows; // GPU workspace magma_z_vector r, p, q; magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue ); magma_z_vinit( &p, Magma_DEV, dofs, c_zero, queue ); magma_z_vinit( &q, Magma_DEV, dofs, c_zero, queue ); // solver variables magmaDoubleComplex alpha, beta; double nom, nom0, r0, betanom, betanomsq, den; // solver setup magma_zscal( dofs, c_zero, x->dval, 1) ; // x = 0 magma_zcopy( dofs, b.dval, 1, r.dval, 1 ); // r = b magma_zcopy( dofs, b.dval, 1, p.dval, 1 ); // p = b nom0 = betanom = magma_dznrm2( dofs, r.dval, 1 ); nom = nom0 * nom0; // nom = r' * r magma_z_spmv( c_one, A, p, c_zero, q, queue ); // q = A p den = MAGMA_Z_REAL( magma_zdotc(dofs, p.dval, 1, q.dval, 1) );// den = p dot q solver_par->init_res = nom0; if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) { magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } // check positive definite if (den <= 0.0) { printf("Operator A is not postive definite. (Ar,r) = %f\n", den); magmablasSetKernelStream( orig_queue ); return MAGMA_NONSPD; solver_par->info = MAGMA_NONSPD; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ) { alpha = MAGMA_Z_MAKE(nom/den, 0.); magma_zaxpy(dofs, alpha, p.dval, 1, x->dval, 1); // x = x + alpha p magma_zaxpy(dofs, -alpha, q.dval, 1, r.dval, 1); // r = r - alpha q betanom = magma_dznrm2(dofs, r.dval, 1); // betanom = || r || betanomsq = betanom * betanom; // betanoms = r' * r if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( betanom < r0 ) { break; } beta = MAGMA_Z_MAKE(betanomsq/nom, 0.); // beta = betanoms/nom magma_zscal(dofs, beta, p.dval, 1); // p = beta*p magma_zaxpy(dofs, c_one, r.dval, 1, p.dval, 1); // p = p + r magma_z_spmv( c_one, A, p, c_zero, q, queue ); // q = A p den = MAGMA_Z_REAL(magma_zdotc(dofs, p.dval, 1, q.dval, 1)); // den = p dot q nom = betanomsq; } tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; magma_zresidual( A, b, *x, &residual, queue ); solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter) { solver_par->info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_SLOW_CONVERGENCE; } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_DIVERGENCE; } magma_z_vfree(&r, queue ); magma_z_vfree(&p, queue ); magma_z_vfree(&q, queue ); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } /* magma_zcg */
/* //////////////////////////////////////////////////////////////////////////// -- testing zdot */ int main( int argc, char** argv) { TESTING_INIT(); printf("#================================================================================================================================================\n"); printf("\n"); printf(" | runtime | GFLOPS\n"); printf("#n num_vecs | CUDOT CUGEMV MAGMAGEMV MDOT MDGM | CUDOT CUGEMV MAGMAGEMV MDOT MDGM \n"); printf("#------------------------------------------------------------------------------------------------------------------------------------------------\n"); printf("\n"); for( magma_int_t num_vecs=5; num_vecs<6; num_vecs+=1 ){ for( magma_int_t n=10000; n<100000001; n=n+10000 ){ magma_z_sparse_matrix A, B, C, D, E, F, G, H, I, J, K, Z; magma_z_vector a,b,c,x, y, z, skp; int iters = 10; double computations = (2.* n * iters * num_vecs); magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0); magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0); magmaDoubleComplex alpha; #define ENABLE_TIMER #ifdef ENABLE_TIMER double mdot1, mdot2, mdgm1, mdgm2, magmagemv1, magmagemv2, cugemv1, cugemv2, cudot1, cudot2; double mdot_time, mdgm_time, magmagemv_time, cugemv_time, cudot_time; #endif magma_z_vinit( &a, Magma_DEV, n*num_vecs, one ); magma_z_vinit( &b, Magma_DEV, num_vecs, one ); int min_ten = min(num_vecs, 15); magma_z_vinit( &x, Magma_DEV, min_ten*n, one ); magma_z_vinit( &y, Magma_DEV, min_ten*n, one ); magma_z_vinit( &skp, Magma_DEV, num_vecs, zero ); // warm up magma_zgemvmdot( n, num_vecs, a.val, b.val, x.val, y.val, skp.val ); // CUDOT #ifdef ENABLE_TIMER magma_device_sync(); cudot1=magma_wtime(); #endif for( int h=0; h<iters; h++){ for( int l=0; l<num_vecs; l++) alpha = magma_zdotc(n, a.val, 1, b.val, 1); } #ifdef ENABLE_TIMER magma_device_sync(); cudot2=magma_wtime(); cudot_time=cudot2-cudot1; #endif // CUGeMV #ifdef ENABLE_TIMER magma_device_sync(); cugemv1=magma_wtime(); #endif for( int h=0; h<iters; h++){ magma_zgemv(MagmaTrans, n, num_vecs, one, a.val, n, b.val, 1, zero, skp.val, 1); //h++; } #ifdef ENABLE_TIMER magma_device_sync(); cugemv2=magma_wtime(); cugemv_time=cugemv2-cugemv1; #endif // MAGMAGeMV #ifdef ENABLE_TIMER magma_device_sync(); magmagemv1=magma_wtime(); #endif for( int h=0; h<iters; h++){ magmablas_zgemv(MagmaTrans, n, num_vecs, one, a.val, n, b.val, 1, zero, skp.val, 1); //h++; } #ifdef ENABLE_TIMER magma_device_sync(); magmagemv2=magma_wtime(); magmagemv_time=magmagemv2-magmagemv1; #endif // MDOT #ifdef ENABLE_TIMER magma_device_sync(); mdot1=magma_wtime(); #endif for( int h=0; h<iters; h++){ //magma_zmdotc( n, num_vecs, a.val, b.val, x.val, y.val, skp.val ); magma_zmdotc( n, 2, a.val, b.val, x.val, y.val, skp.val ); magma_zmdotc( n, 2, a.val, b.val, x.val, y.val, skp.val ); magma_zmdotc( n, 1, a.val, b.val, x.val, y.val, skp.val ); //h++; } #ifdef ENABLE_TIMER magma_device_sync(); mdot2=magma_wtime(); mdot_time=mdot2-mdot1; #endif // MDGM #ifdef ENABLE_TIMER magma_device_sync(); mdgm1=magma_wtime(); #endif for( int h=0; h<iters; h++){ magma_zgemvmdot( n, num_vecs, a.val, b.val, x.val, y.val, skp.val ); //h++; } #ifdef ENABLE_TIMER magma_device_sync(); mdgm2=magma_wtime(); mdgm_time=mdgm2-mdgm1; #endif //magma_zprint_gpu(num_vecs,1,skp.val,num_vecs); //Chronometry #ifdef ENABLE_TIMER printf("%d %d %e %e %e %e %e %e %e %e %e %e\n", n, num_vecs, cudot_time/iters, (cugemv_time)/iters, (magmagemv_time)/iters, (mdot_time)/iters, (mdgm_time)/iters, (double)(computations)/(cudot_time*(1.e+09)), (double)(computations)/(cugemv_time*(1.e+09)), (double)(computations)/(magmagemv_time*(1.e+09)), (double)(computations)/(mdot_time*(1.e+09)), (double)(computations)/(mdgm_time*(1.e+09)) ); #endif magma_z_vfree(&a); magma_z_vfree(&b); magma_z_vfree(&x); magma_z_vfree(&y); magma_z_vfree(&skp); } // } printf("#================================================================================================================================================\n"); printf("\n"); printf("\n"); } TESTING_FINALIZE(); return 0; }