magma_int_t magma_sapplypastix( magma_s_vector b, magma_s_vector *x, magma_s_preconditioner *precond ){ #if defined(HAVE_PASTIX) #if defined(PRECISION_d) pastix_int_t ncol; /* Size of the matrix */ pastix_int_t *colptr = NULL; /* Indexes of first element of each column in row and values */ pastix_int_t *rows = NULL; /* Row of each element of the matrix */ pastix_float_t *values = NULL; /* Value of each element of the matrix */ pastix_float_t *rhs = NULL; /* right hand side */ pastix_int_t *iparm; /* integer parameters for pastix */ float *dparm; /* floating parameters for pastix */ pastix_int_t *perm = NULL; /* Permutation tabular */ pastix_int_t *invp = NULL; /* Reverse permutation tabular */ magma_s_vector b_h; magma_s_vtransfer( b, &b_h, b.memory_location, Magma_CPU); rhs = (pastix_float_t*) b_h.val; ncol = precond->M.num_rows; colptr = (pastix_int_t*) precond->M.col; rows = (pastix_int_t*) precond->M.row; values = (pastix_float_t*) precond->M.val; iparm = precond->iparm; dparm = precond->dparm; perm = (pastix_int_t*)precond->int_array_1; invp = (pastix_int_t*)precond->int_array_1; /*******************************************/ /* Step 5 - Solve */ /* For each one of your Right-hand-side */ /* members. */ /* Also consider using multiple */ /* right-hand-side members. */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_SOLVE; iparm[IPARM_END_TASK] = API_TASK_REFINEMENT; pastix(&(precond->pastix_data), MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, b_h.val, 1, iparm, dparm); // fix that x is not allocated every time // in case of many iterations, it might be faster to use // magma_ssetvector( ncol, // b_h.val, 1, x->val, 1 ); magma_s_vfree( x ); magma_s_vtransfer( b_h, x, Magma_CPU, b.memory_location); magma_s_vfree( &b_h); #else printf( "error: only real supported yet.\n"); #endif #else printf( "error: pastix not available.\n"); #endif return MAGMA_SUCCESS; }
magma_int_t magma_spastixsetup( magma_s_sparse_matrix A, magma_s_vector b, magma_s_preconditioner *precond ){ #if defined(HAVE_PASTIX) #if defined(PRECISION_d) pastix_data_t *pastix_data = NULL; /* Pointer to a storage structure needed by pastix */ pastix_int_t ncol; /* Size of the matrix */ pastix_int_t *colptr = NULL; /* Indexes of first element of each column in row and values */ pastix_int_t *rows = NULL; /* Row of each element of the matrix */ pastix_float_t *values = NULL; /* Value of each element of the matrix */ pastix_float_t *rhs = NULL; /* right hand side */ pastix_int_t *iparm = NULL; /* integer parameters for pastix */ float *dparm = NULL; /* floating parameters for pastix */ pastix_int_t *perm = NULL; /* Permutation tabular */ pastix_int_t *invp = NULL; /* Reverse permutation tabular */ pastix_int_t mat_type; magma_s_sparse_matrix A_h1, B; magma_s_vector diag, c_t, b_h; magma_s_vinit( &c_t, Magma_CPU, A.num_rows, MAGMA_S_ZERO ); magma_s_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_S_ZERO ); magma_s_vtransfer( b, &b_h, A.memory_location, Magma_CPU); if( A.storage_type != Magma_CSR ){ magma_s_mtransfer( A, &A_h1, A.memory_location, Magma_CPU); magma_s_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR); } else{ magma_s_mtransfer( A, &B, A.memory_location, Magma_CPU); } rhs = (pastix_float_t*) b_h.val; ncol = B.num_rows; colptr = B.row; rows = B.col; values = (pastix_float_t*) B.val; mat_type = API_SYM_NO; iparm = (pastix_int_t*)malloc(IPARM_SIZE*sizeof(pastix_int_t)); dparm = (pastix_float_t*)malloc(DPARM_SIZE*sizeof(pastix_float_t)); /*******************************************/ /* Initialize parameters to default values */ /*******************************************/ iparm[IPARM_MODIFY_PARAMETER] = API_NO; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhs, 1, iparm, dparm); iparm[IPARM_THREAD_NBR] = 16; iparm[IPARM_SYM] = mat_type; iparm[IPARM_FACTORIZATION] = API_FACT_LU; iparm[IPARM_VERBOSE] = API_VERBOSE_YES; iparm[IPARM_ORDERING] = API_ORDER_SCOTCH; iparm[IPARM_INCOMPLETE] = API_NO; iparm[IPARM_RHS_MAKING] = API_RHS_B; //iparm[IPARM_AMALGAMATION] = 5; iparm[IPARM_LEVEL_OF_FILL] = 0; /* if (incomplete == 1) { dparm[DPARM_EPSILON_REFINEMENT] = 1e-7; } */ /* * Matrix needs : * - to be in fortran numbering * - to have only the lower triangular part in symmetric case * - to have a graph with a symmetric structure in unsymmetric case * If those criteria are not matched, the csc will be reallocated and changed. */ iparm[IPARM_MATRIX_VERIFICATION] = API_YES; perm = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t)); invp = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t)); /*******************************************/ /* Step 1 - Ordering / Scotch */ /* Perform it only when the pattern of */ /* matrix change. */ /* eg: mesh refinement */ /* In many cases users can simply go from */ /* API_TASK_ORDERING to API_TASK_ANALYSE */ /* in one call. */ /*******************************************/ /*******************************************/ /* Step 2 - Symbolic factorization */ /* Perform it only when the pattern of */ /* matrix change. */ /*******************************************/ /*******************************************/ /* Step 3 - Mapping and Compute scheduling */ /* Perform it only when the pattern of */ /* matrix change. */ /*******************************************/ /*******************************************/ /* Step 4 - Numerical Factorisation */ /* Perform it each time the values of the */ /* matrix changed. */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_ORDERING; iparm[IPARM_END_TASK] = API_TASK_NUMFACT; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, NULL, 1, iparm, dparm); precond->int_array_1 = (magma_int_t*) perm; precond->int_array_2 = (magma_int_t*) invp; precond->M.val = (float*) values; precond->M.col = (magma_int_t*) colptr; precond->M.row = (magma_int_t*) rows; precond->M.num_rows = A.num_rows; precond->M.num_cols = A.num_cols; precond->M.memory_location = Magma_CPU; precond->pastix_data = pastix_data; precond->iparm = iparm; precond->dparm = dparm; if( A.storage_type != Magma_CSR){ magma_s_mfree( &A_h1 ); } magma_s_vfree( &b_h); magma_s_mfree( &B ); #else printf( "error: only real supported yet.\n"); #endif #else printf( "error: pastix not available.\n"); #endif return MAGMA_SUCCESS; }
int main (int argc, char **argv) { pastix_data_t *pastix_data = NULL; /* Pointer to a storage structure needed by pastix */ pastix_int_t ncol; /* Size of the matrix */ pastix_int_t *colptr = NULL; /* Indexes of first element of each column in row and values */ pastix_int_t *rows = NULL; /* Row of each element of the matrix */ pastix_float_t *values = NULL; /* Value of each element of the matrix */ pastix_float_t *rhs = NULL; /* right hand side */ pastix_float_t *rhssaved = NULL; /* right hand side */ pastix_float_t *ax = NULL; /* A times X product */ pastix_int_t iparm[IPARM_SIZE]; /* integer parameters for pastix */ double dparm[DPARM_SIZE]; /* floating parameters for pastix */ pastix_int_t *perm = NULL; /* Permutation tabular */ pastix_int_t *invp = NULL; /* Reverse permutation tabular */ char *type = NULL; /* type of the matrix */ char *rhstype = NULL; /* type of the right hand side */ #ifndef FORCE_NOMPI int required; /* MPI thread level required */ int provided; /* MPI thread level provided */ #endif driver_type_t *driver_type; /* Matrix driver(s) requested by user */ char **filename; /* Filename(s) given by user */ int nbmatrices; /* Number of matrices given by user */ int nbthread; /* Number of thread wanted by user */ int verbosemode; /* Level of verbose mode (0, 1, 2) */ int ordering; /* Ordering to use */ int incomplete; /* Indicate if we want to use incomplete factorisation */ int level_of_fill; /* Level of fill for incomplete factorisation */ int amalgamation; /* Level of amalgamation for Kass */ int ooc; /* OOC limit (Mo/percent depending on compilation options) */ int mpid; /* Global MPI rank */ pastix_int_t mat_type; long i,j; double norme1, norme2; int nfact = 2; int nsolv = 2; int nbrhs = 1; /*******************************************/ /* MPI initialisation */ /*******************************************/ #ifndef FORCE_NOMPI required = MPI_THREAD_MULTIPLE; provided = -1; MPI_Init_thread(&argc, &argv, required, &provided); switch (provided) { case MPI_THREAD_SINGLE: printf("MPI_Init_thread level = MPI_THREAD_SINGLE\n"); break; case MPI_THREAD_FUNNELED: printf("MPI_Init_thread level = MPI_THREAD_FUNNELED\n"); break; case MPI_THREAD_SERIALIZED: printf("MPI_Init_thread level = MPI_THREAD_SERIALIZED\n"); break; case MPI_THREAD_MULTIPLE: printf("MPI_Init_thread level = MPI_THREAD_MULTIPLE\n"); break; default: printf("MPI_Init_thread level = ???\n"); } MPI_Comm_rank (MPI_COMM_WORLD, &mpid); #else mpid = 0; #endif /*******************************************/ /* Get options from command line */ /*******************************************/ if (EXIT_FAILURE == get_options(argc, argv, &driver_type, &filename, &nbmatrices, &nbthread, &verbosemode, &ordering, &incomplete, &level_of_fill, &amalgamation, &ooc, &ncol)) return EXIT_FAILURE; if (nbmatrices != 1) { /* Matrices for each iteration must have the same patern, this is why we only authorize one matrix in this exemple. But it could be used with several matrices with same patern and different values. */ fprintf(stderr,"WARNING: should have only one matrix\n"); } /*******************************************/ /* Read Matrice */ /*******************************************/ read_matrix(filename[0], &ncol, &colptr, &rows, &values, &rhs, &type, &rhstype, driver_type[0], MPI_COMM_WORLD); for (i = 0; i < nbmatrices; i++) if (filename[i] != NULL) free(filename[i]); free(filename); free(driver_type); mat_type = API_SYM_NO; if (MTX_ISSYM(type)) mat_type = API_SYM_YES; if (MTX_ISHER(type)) mat_type = API_SYM_HER; /*******************************************/ /* Check Matrix format */ /*******************************************/ /* * Matrix needs : * - to be in fortran numbering * - to have only the lower triangular part in symmetric case * - to have a graph with a symmetric structure in unsymmetric case */ pastix_checkMatrix(MPI_COMM_WORLD, verbosemode, mat_type, API_YES, ncol, &colptr, &rows, &values, NULL, 1); /*******************************************/ /* Initialize parameters to default values */ /*******************************************/ iparm[IPARM_MODIFY_PARAMETER] = API_NO; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhs, 1, iparm, dparm); /*******************************************/ /* Customize some parameters */ /*******************************************/ iparm[IPARM_THREAD_NBR] = nbthread; iparm[IPARM_SYM] = mat_type; switch (mat_type) { case API_SYM_YES: iparm[IPARM_FACTORIZATION] = API_FACT_LDLT; break; case API_SYM_HER: iparm[IPARM_FACTORIZATION] = API_FACT_LDLH; break; default: iparm[IPARM_FACTORIZATION] = API_FACT_LU; } iparm[IPARM_MATRIX_VERIFICATION] = API_YES; iparm[IPARM_VERBOSE] = verbosemode; iparm[IPARM_ORDERING] = ordering; iparm[IPARM_INCOMPLETE] = incomplete; iparm[IPARM_OOC_LIMIT] = ooc; if (incomplete == 1) { dparm[DPARM_EPSILON_REFINEMENT] = 1e-7; } iparm[IPARM_LEVEL_OF_FILL] = level_of_fill; iparm[IPARM_AMALGAMATION_LEVEL] = amalgamation; iparm[IPARM_RHS_MAKING] = API_RHS_B; /* reread parameters to set IPARM/DPARM */ if (EXIT_FAILURE == get_idparm(argc, argv, iparm, dparm)) return EXIT_FAILURE; perm = malloc(ncol*sizeof(pastix_int_t)); invp = malloc(ncol*sizeof(pastix_int_t)); /*******************************************/ /* Step 1 - Ordering / Scotch */ /* Perform it only when the pattern of */ /* matrix change. */ /* eg: mesh refinement */ /* In many cases users can simply go from */ /* API_TASK_ORDERING to API_TASK_ANALYSE */ /* in one call. */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_ORDERING; iparm[IPARM_END_TASK] = API_TASK_ORDERING; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhs, 1, iparm, dparm); /*******************************************/ /* Step 2 - Symbolic factorization */ /* Perform it only when the pattern of */ /* matrix change. */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_SYMBFACT; iparm[IPARM_END_TASK] = API_TASK_SYMBFACT; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhs, 1, iparm, dparm); /*******************************************/ /* Step 3 - Mapping and Compute scheduling */ /* Perform it only when the pattern of */ /* matrix change. */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_ANALYSE; iparm[IPARM_END_TASK] = API_TASK_ANALYSE; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhs, 1, iparm, dparm); /*******************************************/ /* Save the rhs */ /* (it will be replaced by solution) */ /*******************************************/ rhssaved = malloc(nbrhs*ncol*sizeof(pastix_float_t)); for(i=0; i<nbrhs; i++) memcpy(rhssaved+(i*ncol), rhs, ncol*sizeof(pastix_float_t)); /* Do nfact factorization */ for (i = 0; i < nfact; i++) { /*******************************************/ /* Step 4 - Numerical Factorisation */ /* Perform it each time the values of the */ /* matrix changed. */ /*******************************************/ #ifndef FORCE_NOMPI if (mpid == 0) #endif fprintf(stdout, "\t> Factorisation number %ld <\n", (long)(i+1)); iparm[IPARM_START_TASK] = API_TASK_NUMFACT; iparm[IPARM_END_TASK] = API_TASK_NUMFACT; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, NULL, 1, iparm, dparm); /* Do two solve */ for (j = 0; j < nsolv; j++) { /* Restore RHS (in this example, we have n times the same rhs ) */ { int iter; for(iter=0; iter<nbrhs; iter++) memcpy(rhssaved+(iter*ncol), rhs, ncol*sizeof(pastix_float_t)); } /*******************************************/ /* Step 5 - Solve and Refinnement */ /* For each one of your Right-hand-side */ /* members. */ /* Also consider using multiple */ /* right-hand-side members. */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_SOLVE; iparm[IPARM_END_TASK] = API_TASK_REFINE; PRINT_RHS("RHS", rhssaved, ncol, mpid, iparm[IPARM_VERBOSE]); #ifndef FORCE_NOMPI if (mpid == 0) #endif fprintf(stdout, "\t>> Solve And Refine step number %ld <<\n", (long)(j+1)); pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhssaved, nbrhs, iparm, dparm); PRINT_RHS("SOL", rhssaved, ncol, mpid, iparm[IPARM_VERBOSE]); CHECK_SOL(rhssaved, rhs, ncol, mpid); /* If you want separate Solve and Refinnement Steps */ j++; /* Restore RHS */ { int iter; for(iter=0; iter<nbrhs; iter++) memcpy(rhssaved+(iter*ncol), rhs, ncol*sizeof(pastix_float_t)); } /*******************************************/ /* Step 5.1 - Solve */ /* If you don't need iterative refinement */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_SOLVE; iparm[IPARM_END_TASK] = API_TASK_SOLVE; #ifndef FORCE_NOMPI if (mpid == 0) #endif fprintf(stdout, "\t>> Solve step number %ld <<\n", (long)(j+1)); pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhssaved, nbrhs, iparm, dparm); PRINT_RHS("SOL", rhssaved, ncol, mpid, iparm[IPARM_VERBOSE]); CHECK_SOL(rhssaved, rhs, ncol, mpid); /* Restore RHS */ { int iter; for(iter=0; iter<nbrhs; iter++) memcpy(rhssaved+(iter*ncol), rhs, ncol*sizeof(pastix_float_t)); } /*******************************************/ /* Step 5.2 - Refinnement */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_REFINE; iparm[IPARM_END_TASK] = API_TASK_REFINE; #ifndef FORCE_NOMPI if (mpid == 0) #endif fprintf(stdout, "\t>> Refine step number %ld <<\n", (long)(j+1)); pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhssaved, nbrhs, iparm, dparm); PRINT_RHS("RAF", rhssaved, ncol, mpid, iparm[IPARM_VERBOSE]); CHECK_SOL(rhssaved, rhs, ncol, mpid); } } /*******************************************/ /* Step 6 - Clean structures */ /* When you don't need PaStiX anymore */ /*******************************************/ iparm[IPARM_START_TASK] = API_TASK_CLEAN; iparm[IPARM_END_TASK] = API_TASK_CLEAN; pastix(&pastix_data, MPI_COMM_WORLD, ncol, colptr, rows, values, perm, invp, rhssaved, nbrhs, iparm, dparm); free(rhssaved); free(colptr); free(rows); free(values); free(perm); free(invp); free(rhs); free(type); free(rhstype); #ifndef FORCE_NOMPI MPI_Finalize(); #endif return EXIT_SUCCESS; }