int c_fortran_pdgssvx_ABglobal_(int *iopt, int_t *n, int_t *nnz, int *nrhs, double *values, int_t *rowind, int_t *colptr, double *b, int *ldb, int grid_handle[HANDLE_SIZE], double *berr, int factors[HANDLE_SIZE], int *info) { /* * Purpose * ======= * * This is a Fortran wrapper to use pdgssvx_ABglobal(). * * Arguments * ========= * * iopt (input) int * Specifies the operation to be performed: * = 1, performs LU decomposition for the first time * = 2, performs a subsequent LU decomposition for a new matrix * with the same sparsity pattern * = 3, performs triangular solve * = 4, frees all the storage in the end * * n (input) int, order of the matrix A * * nnz (input) int, number of nonzeros in matrix A * * nrhs (input) int, number of right-hand sides in the system AX = B * * values/rowind/colptr (input) column compressed data structure for A * * b (input/output) double * On input, the right-hand side matrix of dimension (ldb, nrhs) * On output, the solution matrix * * ldb (input) int, leading dimension of the matrix B * * grid_handle (input) int array of size 8, holds a pointer to the process * grid structure, which is created and freed separately. * * berr (output) double, the backward error of each right-hand side * * factors (input/output) int array of size 8 * If iopt == 1, it is an output and contains the pointer pointing to * the structure of the factored matrices. * Otherwise, it it an input. * * info (output) int * */ superlu_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t *ScalePermstruct; LUstruct_t *LUstruct; int_t nprow, npcol; int iam; int report; int i; gridinfo_t *grid; factors_dist_t *LUfactors; /* * Set option for printing statistics. * report = 0: no reporting * report = 1: reporting */ report = 0; /* Locate the process grid. */ grid = (gridinfo_t *) grid_handle[0]; iam = (*grid).iam; nprow = (int_t) grid->nprow; npcol = (int_t) grid->npcol; if ( *iopt == 1 ) { /* LU decomposition */ if ( !iam ) printf(".. Process grid: %d X %d\n", nprow, npcol); /* Initialize the statistics variables. */ PStatInit(&stat); dCreate_CompCol_Matrix_dist(&A, *n, *n, *nnz, values, rowind, colptr, SLU_NC, SLU_D, SLU_GE); /* Set options. */ set_default_options(&options); /* Initialize ScalePermstruct and LUstruct. */ ScalePermstruct = (ScalePermstruct_t *) SUPERLU_MALLOC(sizeof(ScalePermstruct_t)); ScalePermstructInit(*n, *n, ScalePermstruct); LUstruct = (LUstruct_t *) SUPERLU_MALLOC(sizeof(LUstruct_t)); LUstructInit(*n, *n, LUstruct); /* Call global routine with nrhs=0 to perform the factorization. */ pdgssvx_ABglobal(&options, &A, ScalePermstruct, NULL, *ldb, 0, grid, LUstruct, berr, &stat, info); if ( *info == 0 ) { if ( report == 1 ) PStatPrint(&options, &stat, grid); } else { printf("pdgssvx_ABglobal() error returns INFO= %d\n", *info); } /* Save the LU factors in the factors handle */ LUfactors = (factors_dist_t*) SUPERLU_MALLOC(sizeof(factors_dist_t)); LUfactors->ScalePermstruct = ScalePermstruct; LUfactors->LUstruct = LUstruct; factors[0] = (int) LUfactors; /* Free un-wanted storage */ Destroy_SuperMatrix_Store_dist(&A); PStatFree(&stat); } else if ( *iopt == 2 ) { /* Factor a modified matrix with the same sparsity pattern using existing permutations and L U storage */ /* Extract the LU factors in the factors handle */ LUfactors = (factors_dist_t*) factors[0]; ScalePermstruct = LUfactors->ScalePermstruct; LUstruct = LUfactors->LUstruct; PStatInit(&stat); /* Reset SuperMatrix pointers. */ dCreate_CompCol_Matrix_dist(&A, *n, *n, *nnz, values, rowind, colptr, SLU_NC, SLU_D, SLU_GE); /* Set options. */ set_default_options(&options); options.Fact = SamePattern_SameRowPerm; /* Call the routine with nrhs=0 to perform the factorization. */ pdgssvx_ABglobal(&options, &A, ScalePermstruct, NULL, *ldb, 0, grid, LUstruct, berr, &stat, info); if ( *info == 0 ) { if ( report == 1 ) PStatPrint(&options, &stat, grid); } else { printf("pdgssvx_ABglobal() error returns INFO= %d\n", *info); } /* Free un-wanted storage */ Destroy_SuperMatrix_Store_dist(&A); PStatFree(&stat); } else if ( *iopt == 3 ) { /* Triangular solve */ /* Extract the LU factors in the factors handle */ LUfactors = (factors_dist_t*) factors[0]; ScalePermstruct = LUfactors->ScalePermstruct; LUstruct = LUfactors->LUstruct; PStatInit(&stat); /* Reset SuperMatrix pointers. */ dCreate_CompCol_Matrix_dist(&A, *n, *n, *nnz, values, rowind, colptr, SLU_NC, SLU_D, SLU_GE); /* Set options. */ set_default_options(&options); options.Fact = FACTORED; /* Solve the system A*X=B, overwriting B with X. */ pdgssvx_ABglobal(&options, &A, ScalePermstruct, b, *ldb, *nrhs, grid, LUstruct, berr, &stat, info); /* Free un-wanted storage */ Destroy_SuperMatrix_Store_dist(&A); PStatFree(&stat); } else if ( *iopt == 4 ) { /* Free storage */ /* Free the LU factors in the factors handle */ LUfactors = (factors_dist_t*) factors[0]; Destroy_LU(*n, grid, LUfactors->LUstruct); LUstructFree(LUfactors->LUstruct); ScalePermstructFree(LUfactors->ScalePermstruct); SUPERLU_FREE(LUfactors->ScalePermstruct); SUPERLU_FREE(LUfactors->LUstruct); SUPERLU_FREE(LUfactors); } else { fprintf(stderr, "Invalid iopt=%d passed to c_fortran_pdgssvx_ABglobal()\n", *iopt); exit(-1); } }
int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid1, grid2; double *berr; double *a, *b, *xtrue; int_t *asub, *xa; int_t i, j, m, n, nnz; int_t nprow, npcol, ldumap, p; int_t usermap[6]; int iam, info, ldb, ldx, nprocs; int nrhs = 1; /* Number of right-hand side. */ char trans[1]; char **cpp, c; FILE *fp, *fopen(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); if ( nprocs < 10 ) { fprintf(stderr, "Requires at least 10 processes\n"); exit(-1); } /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r <int>: process rows (default %d)\n", nprow); printf("\t-c <int>: process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID 1. ------------------------------------------------------------*/ nprow = 2; npcol = 3; ldumap = 2; p = 0; /* Grid 1 starts from process 0. */ for (i = 0; i < nprow; ++i) for (j = 0; j < npcol; ++j) usermap[i+j*ldumap] = p++; superlu_gridmap(MPI_COMM_WORLD, nprow, npcol, usermap, ldumap, &grid1); /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID 2. ------------------------------------------------------------*/ nprow = 2; npcol = 2; ldumap = 2; p = 6; /* Grid 2 starts from process 6. */ for (i = 0; i < nprow; ++i) for (j = 0; j < npcol; ++j) usermap[i+j*ldumap] = p++; superlu_gridmap(MPI_COMM_WORLD, nprow, npcol, usermap, ldumap, &grid2); /* Bail out if I do not belong in any of the 2 grids. */ MPI_Comm_rank( MPI_COMM_WORLD, &iam ); if ( iam >= 10 ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif if ( iam >= 0 && iam < 6 ) { /* I am in grid 1. */ iam = grid1.iam; /* Get the logical number in the new grid. */ /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("\tDimension\t%dx%d\t # nonzeros %d\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid1.nprow, (int) grid1.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid1.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid1.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid1.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid1.comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid1.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid1.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid1.comm ); } /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doubleMalloc_dist(m*nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doubleMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; dGenXtrue_dist(n, nrhs, xtrue, ldx); dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid1, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { dinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid1); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid1); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); Destroy_LU(n, &grid1, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); } else { /* I am in grid 2. */ iam = grid2.iam; /* Get the logical number in the new grid. */ /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("\tDimension\t%dx%d\t # nonzeros %d\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid2.nprow, (int) grid2.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid2.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid2.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid2.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid2.comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid2.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid2.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid2.comm ); } /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doubleMalloc_dist(m*nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doubleMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; dGenXtrue_dist(n, nrhs, xtrue, ldx); dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = MMD_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid2, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { dinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid2); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid2); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); Destroy_LU(n, &grid2, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); } /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRIDS. ------------------------------------------------------------*/ superlu_gridexit(&grid1); superlu_gridexit(&grid2); out: /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif }
int main(int argc, char *argv[]) { superlu_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid; double *berr; double *a, *b, *xtrue; int_t *asub, *xa; int_t m, n, nnz; int_t nprow, npcol; int iam, info, ldb, ldx, nrhs; char trans[1]; char **cpp, c; FILE *fp, *fopen(); extern int cpp_defs(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r <int>: process rows (default " IFMT ")\n", nprow); printf("\t-c <int>: process columns (default " IFMT ")\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; #if ( VAMPIR>=1 ) VT_traceoff(); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Print the CPP definitions. */ cpp_defs(); #if 1 /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); #else /* Read the matrix stored on disk in Harwell-Boeing format. */ printf(".. reading triplet file\n"); dreadtriple(fp, &m, &n, &nnz, &a, &asub, &xa); #endif printf("Input matrix file: %s\n", *cpp); printf("\tDimension\t" IFMT "x" IFMT "\t # nonzeros " IFMT "\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid.nprow, (int) grid.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doubleMalloc_dist(m*nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doubleMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; dGenXtrue_dist(n, nrhs, xtrue, ldx); dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { dinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid); } PStatPrint(&options, &stat, &grid); /* Print the statistics. */ /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); Destroy_LU(n, &grid, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif }
int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid; double *berr; double *a, *a1, *b, *b1, *xtrue; int_t *asub, *asub1, *xa, *xa1; int_t i, j, m, n, nnz; int_t nprow, npcol; int iam, info, ldb, ldx, nrhs; char trans[1]; char **cpp, c; FILE *fp, *fopen(); extern int cpp_defs(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r <int>: process rows (default %d)\n", nprow); printf("\t-c <int>: process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ Process 0 reads the matrix A, and then broadcasts it to all the other processes. ------------------------------------------------------------*/ if ( !iam ) { /* Print the CPP definitions. */ cpp_defs(); /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("Input matrix file: %s\n", *cpp); printf("\tDimension\t%dx%d\t # nonzeros %d\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid.nprow, (int) grid.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doubleMalloc_dist(m * nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doubleMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; dGenXtrue_dist(n, nrhs, xtrue, ldx); dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); /* Save a copy of the right-hand side. */ if ( !(b1 = doubleMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b1[]"); for (j = 0; j < nrhs; ++j) for (i = 0; i < m; ++i) b1[i+j*ldb] = b[i+j*ldb]; if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* Save a copy of the matrix A. */ dallocateA_dist(n, nnz, &a1, &asub1, &xa1); for (i = 0; i < nnz; ++i) { a1[i] = a[i]; asub1[i] = asub[i]; } for (i = 0; i < n+1; ++i) xa1[i] = xa[i]; /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { dinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid); } PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ SUPERLU_FREE(b); /* Free storage of right-hand side. */ /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM. ONLY THE SPARSITY PATTERN OF MATRIX A IS THE SAME. ------------------------------------------------------------*/ options.Fact = SamePattern; PStatInit(&stat); /* Initialize the statistics variables. */ /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a1, asub1, xa1, SLU_NC, SLU_D, SLU_GE); /* Solve the linear system. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { printf("Solve the system with the same sparsity pattern.\n"); dinf_norm_error_dist(n, nrhs, b1, ldb, xtrue, ldx, &grid); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); /* Deallocate the structure of L and U.*/ SUPERLU_FREE(b1); /* Free storage of right-hand side. */ SUPERLU_FREE(xtrue); /* Free storage of the exact solution. */ SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif }
PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) { Mat *tseq,A_seq = NULL; Mat_SeqAIJ *aa,*bb; Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr; PetscErrorCode ierr; PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, m=A->rmap->n, colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ PetscMPIInt size; SuperLUStat_t stat; double *berr=0; IS isrow; Mat F_diag=NULL; #if defined(PETSC_USE_COMPLEX) doublecomplex *av, *bv; #else double *av, *bv; #endif PetscFunctionBegin; ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); if (lu->MatInputMode == GLOBAL) { /* global mat input */ if (size > 1) { /* convert mpi A to seq mat A */ ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); ierr = ISDestroy(&isrow);CHKERRQ(ierr); A_seq = *tseq; ierr = PetscFree(tseq);CHKERRQ(ierr); aa = (Mat_SeqAIJ*)A_seq->data; } else { PetscBool flg; ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); if (flg) { Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data; A = At->A; } aa = (Mat_SeqAIJ*)A->data; } /* Convert Petsc NR matrix to SuperLU_DIST NC. Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ PetscStackCall("SuperLU_DIST:Destroy_CompCol_Matrix_dist",Destroy_CompCol_Matrix_dist(&lu->A_sup)); if (lu->FactPattern == SamePattern_SameRowPerm) { lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ } else { /* lu->FactPattern == SamePattern */ PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct)); lu->options.Fact = SamePattern; } } #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zCompRow_to_CompCol_dist",zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,(int_t*)aa->j,(int_t*)aa->i,&lu->val,&lu->col, &lu->row)); #else PetscStackCall("SuperLU_DIST:dCompRow_to_CompCol_dist",dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,(int_t*)aa->j,(int_t*)aa->i,&lu->val, &lu->col, &lu->row)); #endif /* Create compressed column matrix A_sup. */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zCreate_CompCol_Matrix_dist",zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE)); #else PetscStackCall("SuperLU_DIST:dCreate_CompCol_Matrix_dist",dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE)); #endif } else { /* distributed mat input */ Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; aa=(Mat_SeqAIJ*)(mat->A)->data; bb=(Mat_SeqAIJ*)(mat->B)->data; ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; #if defined(PETSC_USE_COMPLEX) av=(doublecomplex*)aa->a; bv=(doublecomplex*)bb->a; #else av=aa->a; bv=bb->a; #endif rstart = A->rmap->rstart; nz = aa->nz + bb->nz; garray = mat->garray; if (lu->options.Fact == DOFACT) { /* first numeric factorization */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); #else PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); #endif } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* this leads to crash! However, see SuperLU_DIST_2.5/EXAMPLE/pzdrive2.c */ if (lu->FactPattern == SamePattern_SameRowPerm) { lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ } else { PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct)); /* Deallocate storage associated with the L and U matrices. */ lu->options.Fact = SamePattern; } } nz = 0; for (i=0; i<m; i++) { lu->row[i] = nz; countA = ai[i+1] - ai[i]; countB = bi[i+1] - bi[i]; ajj = aj + ai[i]; /* ptr to the beginning of this row */ bjj = bj + bi[i]; /* B part, smaller col index */ colA_start = rstart + ajj[0]; /* the smallest global col index of A */ jB = 0; for (j=0; j<countB; j++) { jcol = garray[bjj[j]]; if (jcol > colA_start) { jB = j; break; } lu->col[nz] = jcol; lu->val[nz++] = *bv++; if (j==countB-1) jB = countB; } /* A part */ for (j=0; j<countA; j++) { lu->col[nz] = rstart + ajj[j]; lu->val[nz++] = *av++; } /* B part, larger col index */ for (j=jB; j<countB; j++) { lu->col[nz] = garray[bjj[j]]; lu->val[nz++] = *bv++; } } lu->row[m] = nz; #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zCreate_CompRowLoc_Matrix_dist",zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE)); #else PetscStackCall("SuperLU_DIST:dCreate_CompRowLoc_Matrix_dist",dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE)); #endif } /* Factor the matrix. */ PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ if (lu->MatInputMode == GLOBAL) { /* global mat input */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo)); #else PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo)); #endif } else { /* distributed mat input */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); #else PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); #endif } if (lu->MatInputMode == GLOBAL && size > 1) { ierr = MatDestroy(&A_seq);CHKERRQ(ierr); } if (lu->options.PrintStat) { PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ } PStatFree(&stat); if (size > 1) { F_diag = ((Mat_MPIAIJ*)(F)->data)->A; F_diag->assembled = PETSC_TRUE; } (F)->assembled = PETSC_TRUE; (F)->preallocated = PETSC_TRUE; lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ PetscFunctionReturn(0); }
PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) { Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; PetscErrorCode ierr; PetscMPIInt size; PetscInt M=A->rmap->N,m=A->rmap->n,nrhs; SuperLUStat_t stat; double berr[1]; PetscScalar *bptr; int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ PetscBool flg; PetscFunctionBegin; if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); ierr = PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); if (size > 1 && lu->MatInputMode == GLOBAL) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatInputMode=GLOBAL for nproc %d>1 is not supported",size); /* size==1 or distributed mat input */ if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, thus destroy it and create a new SOLVEstruct. Otherwise it may result in memory corruption or incorrect solution See src/mat/examples/tests/ex125.c */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); #else PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); #endif lu->options.SolveInitialized = NO; } ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatGetSize(B_mpi,NULL,&nrhs);CHKERRQ(ierr); PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ ierr = MatDenseGetArray(X,&bptr);CHKERRQ(ierr); if (lu->MatInputMode == GLOBAL) { /* size == 1 */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, M, nrhs,&lu->grid, &lu->LUstruct, berr, &stat, &info)); #else PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, M, nrhs, &lu->grid, &lu->LUstruct, berr, &stat, &info)); #endif } else { /* distributed mat input */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); #else PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); #endif } if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); ierr = MatDenseRestoreArray(X,&bptr);CHKERRQ(ierr); if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); lu->matsolve_iscalled = PETSC_FALSE; lu->matmatsolve_iscalled = PETSC_TRUE; PetscFunctionReturn(0); }
PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) { Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; PetscErrorCode ierr; PetscMPIInt size; PetscInt m=A->rmap->n,M=A->rmap->N,N=A->cmap->N; SuperLUStat_t stat; double berr[1]; PetscScalar *bptr; PetscInt nrhs=1; Vec x_seq; IS iden; VecScatter scat; int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ static PetscBool cite = PETSC_FALSE; PetscFunctionBegin; ierr = PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite);CHKERRQ(ierr); ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); if (size > 1 && lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); ierr = ISDestroy(&iden);CHKERRQ(ierr); ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); } else { /* size==1 || distributed mat input */ if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { /* see comments in MatMatSolve() */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); #else PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); #endif lu->options.SolveInitialized = NO; } ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); } if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ if (lu->MatInputMode == GLOBAL) { #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,M,nrhs,&lu->grid,&lu->LUstruct,berr,&stat,&info)); #else PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr,M,nrhs,&lu->grid,&lu->LUstruct,berr,&stat,&info)); #endif } else { /* distributed mat input */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); #else PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); #endif } if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); if (size > 1 && lu->MatInputMode == GLOBAL) { /* convert seq x to mpi x */ ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterDestroy(&scat);CHKERRQ(ierr); ierr = VecDestroy(&x_seq);CHKERRQ(ierr); } else { ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); lu->matsolve_iscalled = PETSC_TRUE; lu->matmatsolve_iscalled = PETSC_FALSE; } PetscFunctionReturn(0); }
PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) { Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; PetscErrorCode ierr; PetscMPIInt size; PetscInt m=A->rmap->n,M=A->rmap->N,N=A->cmap->N; SuperLUStat_t stat; double berr[1]; PetscScalar *bptr; PetscInt nrhs=1; Vec x_seq; IS iden; VecScatter scat; int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ PetscFunctionBegin; ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); if (size > 1 && lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); ierr = ISDestroy(&iden);CHKERRQ(ierr); ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); } else { /* size==1 || distributed mat input */ if (lu->options.SolveInitialized && !lu->matsolve_iscalled){ /* see comments in MatMatSolve() */ #if defined(PETSC_USE_COMPLEX) zSolveFinalize(&lu->options, &lu->SOLVEstruct); #else dSolveFinalize(&lu->options, &lu->SOLVEstruct); #endif lu->options.SolveInitialized = NO; } ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); } if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); PStatInit(&stat); /* Initialize the statistics variables. */ if (lu->MatInputMode == GLOBAL) { #if defined(PETSC_USE_COMPLEX) pzgssvx_ABglobal(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,M,nrhs, &lu->grid,&lu->LUstruct,berr,&stat,&info); #else pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr,M,nrhs, &lu->grid,&lu->LUstruct,berr,&stat,&info); #endif } else { /* distributed mat input */ #if defined(PETSC_USE_COMPLEX) pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info); #else pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info); #endif } if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); if (lu->options.PrintStat) { PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ } PStatFree(&stat); if (size > 1 && lu->MatInputMode == GLOBAL) { /* convert seq x to mpi x */ ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterDestroy(&scat);CHKERRQ(ierr); ierr = VecDestroy(&x_seq);CHKERRQ(ierr); } else { ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); lu->matsolve_iscalled = PETSC_TRUE; lu->matmatsolve_iscalled = PETSC_FALSE; } PetscFunctionReturn(0); }
int DistributedSuperLU::solve(void) { if (theSOE == 0) { opserr << "WARNING DistributedSuperLU::solve(void)- "; opserr << " No LinearSOE object has been set\n"; return -1; } // check for quick return if (theSOE->size == 0) return 0; // if subprocess recv A & B from p0 if (processID != 0) { Channel *theChannel = theChannels[0]; theChannel->recvVector(0, 0, (*theSOE->vectB)); Vector vectA(theSOE->A, theSOE->nnz); theChannel->recvVector(0, 0, vectA); } // // if main process, send B & A to all, solve and send back X, B & result else { Vector vectA(theSOE->A, theSOE->nnz); // send B & A to p1 through n-1 for (int j=0; j<numChannels; j++) { Channel *theChannel = theChannels[j]; theChannel->sendVector(0, 0, *(theSOE->vectB)); theChannel->sendVector(0, 0, vectA); } } int info; int iam = grid.iam; if (iam < (npRow * npCol)) { int n = theSOE->size; int nnz = theSOE->nnz; int ldb = n; int nrhs = 1; static double berr[1]; // first copy B into X double *Xptr = theSOE->X; double *Bptr = theSOE->B; for (int i=0; i<n; i++) *(Xptr++) = *(Bptr++); Xptr = theSOE->X; // // set the Fact options: // leave DOFACT if never been factored // otherwise use SamePattern if factored at least once // if ((options.Fact == FACTORED) && (theSOE->factored == false)) { options.Fact = SamePattern; for (int i=0; i<nnz; i++) rowA[i] = theSOE->rowA[i]; } // // solve & mark as factored // pdgssvx_ABglobal(&options, &A, &ScalePermstruct, Xptr, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); if (theSOE->factored == false) { options.Fact = FACTORED; theSOE->factored = true; } } /* // synch processes again if (processID != 0) { static ID idData(1); idData(0) = info; Channel *theChannel = theChannels[0]; theChannel->sendID(0, 0, idData); } else { for (int j=0; j<numChannels; j++) { static ID idData(1); Channel *theChannel = theChannels[j]; theChannel->recvID(0, 0, idData); } } */ return 0; }