magma_int_t magma_spastixsetup( magma_s_sparse_matrix A, magma_s_vector b, magma_s_preconditioner *precond ){ #if defined(HAVE_PASTIX) #if defined(PRECISION_d) pastix_data_t *pastix_data = NULL; /* Pointer to a storage structure needed by pastix */ pastix_int_t ncol; /* Size of the matrix */ pastix_int_t *colptr = NULL; /* Indexes of first element of each column in row and values */ pastix_int_t *rows = NULL; /* Row of each element of the matrix */ pastix_float_t *values = NULL; /* Value of each element of the matrix */ pastix_float_t *rhs = NULL; /* right hand side */ pastix_int_t *iparm = NULL; /* integer parameters for pastix */ float *dparm = NULL; /* floating parameters for pastix */ pastix_int_t *perm = NULL; /* Permutation tabular */ pastix_int_t *invp = NULL; /* Reverse permutation tabular */ pastix_int_t mat_type; magma_s_sparse_matrix A_h1, B; magma_s_vector diag, c_t, b_h; magma_s_vinit( &c_t, Magma_CPU, A.num_rows, MAGMA_S_ZERO ); magma_s_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_S_ZERO ); magma_s_vtransfer( b, &b_h, A.memory_location, Magma_CPU); if( A.storage_type != Magma_CSR ){ magma_s_mtransfer( A, &A_h1, A.memory_location, Magma_CPU); magma_s_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR); } else{ magma_s_mtransfer( A, &B, A.memory_location, Magma_CPU); } rhs = (pastix_float_t*) b_h.val; ncol = B.num_rows; colptr = B.row; rows = B.col; values = (pastix_float_t*) B.val; mat_type = API_SYM_NO; iparm = (pastix_int_t*)malloc(IPARM_SIZE*sizeof(pastix_int_t)); dparm = (pastix_float_t*)malloc(DPARM_SIZE*sizeof(pastix_float_t)); /*******************************************/ /* Initialize parameters to default values */ /*******************************************/ iparm[IPARM_MODIFY_PARAMETER] = API_NO; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhs, 1, iparm, dparm); iparm[IPARM_THREAD_NBR] = 16; iparm[IPARM_SYM] = mat_type; iparm[IPARM_FACTORIZATION] = API_FACT_LU; iparm[IPARM_VERBOSE] = API_VERBOSE_YES; iparm[IPARM_ORDERING] = API_ORDER_SCOTCH; iparm[IPARM_INCOMPLETE] = API_NO; iparm[IPARM_RHS_MAKING] = API_RHS_B; //iparm[IPARM_AMALGAMATION] = 5; iparm[IPARM_LEVEL_OF_FILL] = 0; /* if (incomplete == 1) { dparm[DPARM_EPSILON_REFINEMENT] = 1e-7; } */ /* * Matrix needs : * - to be in fortran numbering * - to have only the lower triangular part in symmetric case * - to have a graph with a symmetric structure in unsymmetric case * If those criteria are not matched, the csc will be reallocated and changed. */ iparm[IPARM_MATRIX_VERIFICATION] = API_YES; perm = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t)); invp = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t)); /*******************************************/ /* Step 1 - Ordering / Scotch */ /* Perform it only when the pattern of */ /* matrix change. */ /* eg: mesh refinement */ /* In many cases users can simply go from */ /* API_TASK_ORDERING to API_TASK_ANALYSE */ /* in one call. */ /*******************************************/ /*******************************************/ /* Step 2 - Symbolic factorization */ /* Perform it only when the pattern of */ /* matrix change. */ /*******************************************/ /*******************************************/ /* Step 3 - Mapping and Compute scheduling */ /* Perform it only when the pattern of */ /* matrix change. */ /*******************************************/ /*******************************************/ /* Step 4 - Numerical Factorisation */ /* Perform it each time the values of the */ /* matrix changed. */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_ORDERING; iparm[IPARM_END_TASK] = API_TASK_NUMFACT; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, NULL, 1, iparm, dparm); precond->int_array_1 = (magma_int_t*) perm; precond->int_array_2 = (magma_int_t*) invp; precond->M.val = (float*) values; precond->M.col = (magma_int_t*) colptr; precond->M.row = (magma_int_t*) rows; precond->M.num_rows = A.num_rows; precond->M.num_cols = A.num_cols; precond->M.memory_location = Magma_CPU; precond->pastix_data = pastix_data; precond->iparm = iparm; precond->dparm = dparm; if( A.storage_type != Magma_CSR){ magma_s_mfree( &A_h1 ); } magma_s_vfree( &b_h); magma_s_mfree( &B ); #else printf( "error: only real supported yet.\n"); #endif #else printf( "error: pastix not available.\n"); #endif return MAGMA_SUCCESS; }
magma_int_t magma_scsrsplit( magma_int_t bsize, magma_s_sparse_matrix A, magma_s_sparse_matrix *D, magma_s_sparse_matrix *R ){ if( A.memory_location == Magma_CPU && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) ){ magma_int_t i, k, j, nnz_diag, nnz_offd; nnz_diag = nnz_offd = 0; // Count the new number of nonzeroes in the two matrices for( i=0; i<A.num_rows; i+=bsize ) for( k=i; k<min(A.num_rows,i+bsize); k++ ) for( j=A.row[k]; j<A.row[k+1]; j++ ) if ( A.col[j] < i ) nnz_offd++; else if ( A.col[j] < i+bsize ) nnz_diag++; else nnz_offd++; // Allocate memory for the new matrices D->storage_type = Magma_CSRD; D->memory_location = A.memory_location; D->num_rows = A.num_rows; D->num_cols = A.num_cols; D->nnz = nnz_diag; R->storage_type = Magma_CSR; R->memory_location = A.memory_location; R->num_rows = A.num_rows; R->num_cols = A.num_cols; R->nnz = nnz_offd; magma_smalloc_cpu( &D->val, nnz_diag ); magma_index_malloc_cpu( &D->row, A.num_rows+1 ); magma_index_malloc_cpu( &D->col, nnz_diag ); magma_smalloc_cpu( &R->val, nnz_offd ); magma_index_malloc_cpu( &R->row, A.num_rows+1 ); magma_index_malloc_cpu( &R->col, nnz_offd ); // Fill up the new sparse matrices D->row[0] = 0; R->row[0] = 0; nnz_offd = nnz_diag = 0; for( i=0; i<A.num_rows; i+=bsize){ for( k=i; k<min(A.num_rows,i+bsize); k++ ){ D->row[k+1] = D->row[k]; R->row[k+1] = R->row[k]; for( j=A.row[k]; j<A.row[k+1]; j++ ){ if ( A.col[j] < i ){ R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } else if ( A.col[j] < i+bsize ){ // larger than diagonal remain as before if ( A.col[j]>k ){ D->val[nnz_diag] = A.val[ j ]; D->col[nnz_diag] = A.col[ j ]; D->row[k+1]++; } // diagonal is written first else if ( A.col[j]==k ) { D->val[D->row[k]] = A.val[ j ]; D->col[D->row[k]] = A.col[ j ]; D->row[k+1]++; } // smaller than diagonal are shifted one to the right // to have room for the diagonal else { D->val[nnz_diag+1] = A.val[ j ]; D->col[nnz_diag+1] = A.col[ j ]; D->row[k+1]++; } nnz_diag++; } else { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } } } } return MAGMA_SUCCESS; } else{ magma_s_sparse_matrix Ah, ACSR, DCSR, RCSR, Dh, Rh; magma_s_mtransfer( A, &Ah, A.memory_location, Magma_CPU ); magma_s_mconvert( Ah, &ACSR, A.storage_type, Magma_CSR ); magma_scsrsplit( bsize, ACSR, &DCSR, &RCSR ); magma_s_mconvert( DCSR, &Dh, Magma_CSR, A.storage_type ); magma_s_mconvert( RCSR, &Rh, Magma_CSR, A.storage_type ); magma_s_mtransfer( Dh, D, Magma_CPU, A.memory_location ); magma_s_mtransfer( Rh, R, Magma_CPU, A.memory_location ); magma_s_mfree( &Ah ); magma_s_mfree( &ACSR ); magma_s_mfree( &Dh ); magma_s_mfree( &DCSR ); magma_s_mfree( &Rh ); magma_s_mfree( &RCSR ); return MAGMA_SUCCESS; } }
/* //////////////////////////////////////////////////////////////////////////// -- running magma_scg magma_scg_merge */ int main( int argc, char** argv) { TESTING_INIT(); magma_s_solver_par solver_par; solver_par.epsilon = 10e-16; solver_par.maxiter = 1000; solver_par.verbose = 0; solver_par.num_eigenvalues = 0; magma_s_preconditioner precond_par; precond_par.solver = Magma_JACOBI; precond_par.levels = 0; precond_par.sweeps = 10; int precond = 0; int format = 0; int version = 0; int scale = 0; magma_scale_t scaling = Magma_NOSCALE; magma_s_sparse_matrix A, B, B_d; magma_s_vector x, b; B.blocksize = 8; B.alignment = 8; float one = MAGMA_S_MAKE(1.0, 0.0); float zero = MAGMA_S_MAKE(0.0, 0.0); B.storage_type = Magma_CSR; int i; for( i = 1; i < argc; ++i ) { if ( strcmp("--format", argv[i]) == 0 ) { format = atoi( argv[++i] ); switch( format ) { case 0: B.storage_type = Magma_CSR; break; case 1: B.storage_type = Magma_ELL; break; case 2: B.storage_type = Magma_ELLRT; break; case 3: B.storage_type = Magma_SELLP; break; } }else if ( strcmp("--mscale", argv[i]) == 0 ) { scale = atoi( argv[++i] ); switch( scale ) { case 0: scaling = Magma_NOSCALE; break; case 1: scaling = Magma_UNITDIAG; break; case 2: scaling = Magma_UNITROW; break; } }else if ( strcmp("--precond", argv[i]) == 0 ) { precond = atoi( argv[++i] ); switch( precond ) { case 0: precond_par.solver = Magma_JACOBI; break; case 1: precond_par.solver = Magma_ICC; break; case 2: precond_par.solver = Magma_AICC; break; } }else if ( strcmp("--version", argv[i]) == 0 ) { version = atoi( argv[++i] ); }else if ( strcmp("--blocksize", argv[i]) == 0 ) { B.blocksize = atoi( argv[++i] ); }else if ( strcmp("--alignment", argv[i]) == 0 ) { B.alignment = atoi( argv[++i] ); }else if ( strcmp("--verbose", argv[i]) == 0 ) { solver_par.verbose = atoi( argv[++i] ); } else if ( strcmp("--maxiter", argv[i]) == 0 ) { solver_par.maxiter = atoi( argv[++i] ); } else if ( strcmp("--tol", argv[i]) == 0 ) { sscanf( argv[++i], "%f", &solver_par.epsilon ); } else if ( strcmp("--levels", argv[i]) == 0 ) { precond_par.levels = atoi( argv[++i] ); }else if ( strcmp("--sweeps", argv[i]) == 0 ) { precond_par.sweeps = atoi( argv[++i] ); } else break; } printf( "\n# usage: ./run_spcg" " [ --format %d (0=CSR, 1=ELL 2=ELLRT, 3=SELLP)" " [ --blocksize %d --alignment %d ]" " --mscale %d (0=no, 1=unitdiag, 2=unitrownrm)" " --verbose %d (0=summary, k=details every k iterations)" " --maxiter %d --tol %.2e" " --precond %d (0=Jacobi, 1=IC, 2=AIC [ --levels %d --sweeps %d]) ]" " matrices \n\n", format, (int)B.blocksize, (int)B.alignment, scale, (int)solver_par.verbose, (int)solver_par.maxiter, solver_par.epsilon, precond, (int) precond_par.levels, (int) precond_par.sweeps ); magma_ssolverinfo_init( &solver_par, &precond_par ); while( i < argc ){ magma_s_csr_mtx( &A, argv[i] ); printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale matrix magma_smscale( &A, scaling ); magma_s_mconvert( A, &B, Magma_CSR, B.storage_type ); magma_s_mtransfer( B, &B_d, Magma_CPU, Magma_DEV ); // vectors and initial guess magma_s_vinit( &b, Magma_DEV, A.num_cols, one ); magma_s_vinit( &x, Magma_DEV, A.num_cols, one ); magma_s_spmv( one, B_d, x, zero, b ); // b = A x magma_s_vfree(&x); magma_s_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_s_precondsetup( B_d, b, &precond_par ); magma_spcg( B_d, b, &x, &solver_par, &precond_par ); magma_ssolverinfo( &solver_par, &precond_par ); magma_s_mfree(&B_d); magma_s_mfree(&B); magma_s_mfree(&A); magma_s_vfree(&x); magma_s_vfree(&b); i++; } magma_ssolverinfo_free( &solver_par, &precond_par ); TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { TESTING_INIT(); magma_sopts zopts; magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); int i=1; magma_sparse_opts( argc, argv, &zopts, &i, queue ); real_Double_t res; magma_s_sparse_matrix Z, A, AT, A2, B, B_d; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_sm_5stencil( laplace_size, &Z, queue ); } else { // file-matrix test magma_s_csr_mtx( &Z, argv[i], queue ); } printf( "# matrix info: %d-by-%d with %d nonzeros\n", (int) Z.num_rows,(int) Z.num_cols,(int) Z.nnz ); // scale matrix magma_smscale( &Z, zopts.scaling, queue ); // remove nonzeros in matrix magma_smcsrcompressor( &Z, queue ); // convert to be non-symmetric magma_s_mconvert( Z, &A, Magma_CSR, Magma_CSRL, queue ); // transpose magma_s_mtranspose( A, &AT, queue ); // convert, copy back and forth to check everything works printf("here0\n"); magma_s_mconvert( AT, &B, Magma_CSR, zopts.output_format, queue ); magma_s_mfree(&AT, queue ); magma_s_mtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ); magma_s_mfree(&B, queue ); magma_smcsrcompressor_gpu( &B_d, queue ); magma_s_mtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue ); magma_s_mfree(&B_d, queue ); magma_s_mconvert( B, &AT, zopts.output_format,Magma_CSR, queue ); magma_s_mfree(&B, queue ); // transpose back magma_s_mtranspose( AT, &A2, queue ); magma_s_mfree(&AT, queue ); magma_smdiff( A, A2, &res, queue); printf("# ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester: ok\n"); else printf("# tester: failed\n"); magma_s_mfree(&A, queue ); magma_s_mfree(&A2, queue ); magma_s_mfree(&Z, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }