main(int argc, char *argv[]) { /* * Purpose * ======= * * SDRIVE is the main test program for the FLOAT linear * equation driver routines SGSSV and SGSSVX. * * The program is invoked by a shell script file -- stest.csh. * The output from the tests are written into a file -- stest.out. * * ===================================================================== */ float *a, *a_save; int *asub, *asub_save; int *xa, *xa_save; SuperMatrix A, B, X, L, U; SuperMatrix ASAV, AC; GlobalLU_t Glu; /* Not needed on return. */ mem_usage_t mem_usage; int *perm_r; /* row permutation from partial pivoting */ int *perm_c, *pc_save; /* column permutation */ int *etree; float zero = 0.0; float *R, *C; float *ferr, *berr; float *rwork; float *wwork; void *work; int info, lwork, nrhs, panel_size, relax; int m, n, nnz; float *xact; float *rhsb, *solx, *bsav; int ldb, ldx; float rpg, rcond; int i, j, k1; float rowcnd, colcnd, amax; int maxsuper, rowblk, colblk; int prefact, nofact, equil, iequed; int nt, nrun, nfail, nerrs, imat, fimat, nimat; int nfact, ifact, itran; int kl, ku, mode, lda; int zerot, izero, ioff; double u; float anorm, cndnum; float *Afull; float result[NTESTS]; superlu_options_t options; fact_t fact; trans_t trans; SuperLUStat_t stat; static char matrix_type[8]; static char equed[1], path[4], sym[1], dist[1]; FILE *fp; /* Fixed set of parameters */ int iseed[] = {1988, 1989, 1990, 1991}; static char equeds[] = {'N', 'R', 'C', 'B'}; static fact_t facts[] = {FACTORED, DOFACT, SamePattern, SamePattern_SameRowPerm}; static trans_t transs[] = {NOTRANS, TRANS, CONJ}; /* Some function prototypes */ extern int sgst01(int, int, SuperMatrix *, SuperMatrix *, SuperMatrix *, int *, int *, float *); extern int sgst02(trans_t, int, int, int, SuperMatrix *, float *, int, float *, int, float *resid); extern int sgst04(int, int, float *, int, float *, int, float rcond, float *resid); extern int sgst07(trans_t, int, int, SuperMatrix *, float *, int, float *, int, float *, int, float *, float *, float *); extern int slatb4_slu(char *, int *, int *, int *, char *, int *, int *, float *, int *, float *, char *); extern int slatms_slu(int *, int *, char *, int *, char *, float *d, int *, float *, float *, int *, int *, char *, float *, int *, float *, int *); extern int sp_sconvert(int, int, float *, int, int, int, float *a, int *, int *, int *); /* Executable statements */ strcpy(path, "SGE"); nrun = 0; nfail = 0; nerrs = 0; /* Defaults */ lwork = 0; n = 1; nrhs = 1; panel_size = sp_ienv(1); relax = sp_ienv(2); u = 1.0; strcpy(matrix_type, "LA"); parse_command_line(argc, argv, matrix_type, &n, &panel_size, &relax, &nrhs, &maxsuper, &rowblk, &colblk, &lwork, &u, &fp); if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { fprintf(stderr, "expert: cannot allocate %d bytes\n", lwork); exit (-1); } } /* Set the default input options. */ set_default_options(&options); options.DiagPivotThresh = u; options.PrintStat = NO; options.PivotGrowth = YES; options.ConditionNumber = YES; options.IterRefine = SLU_SINGLE; if ( strcmp(matrix_type, "LA") == 0 ) { /* Test LAPACK matrix suite. */ m = n; lda = SUPERLU_MAX(n, 1); nnz = n * n; /* upper bound */ fimat = 1; nimat = NTYPES; Afull = floatCalloc(lda * n); sallocateA(n, nnz, &a, &asub, &xa); } else { /* Read a sparse matrix */ fimat = nimat = 0; sreadhb(fp, &m, &n, &nnz, &a, &asub, &xa); } sallocateA(n, nnz, &a_save, &asub_save, &xa_save); rhsb = floatMalloc(m * nrhs); bsav = floatMalloc(m * nrhs); solx = floatMalloc(n * nrhs); ldb = m; ldx = n; sCreate_Dense_Matrix(&B, m, nrhs, rhsb, ldb, SLU_DN, SLU_S, SLU_GE); sCreate_Dense_Matrix(&X, n, nrhs, solx, ldx, SLU_DN, SLU_S, SLU_GE); xact = floatMalloc(n * nrhs); etree = intMalloc(n); perm_r = intMalloc(n); perm_c = intMalloc(n); pc_save = intMalloc(n); R = (float *) SUPERLU_MALLOC(m*sizeof(float)); C = (float *) SUPERLU_MALLOC(n*sizeof(float)); ferr = (float *) SUPERLU_MALLOC(nrhs*sizeof(float)); berr = (float *) SUPERLU_MALLOC(nrhs*sizeof(float)); j = SUPERLU_MAX(m,n) * SUPERLU_MAX(4,nrhs); rwork = (float *) SUPERLU_MALLOC(j*sizeof(float)); for (i = 0; i < j; ++i) rwork[i] = 0.; if ( !R ) ABORT("SUPERLU_MALLOC fails for R"); if ( !C ) ABORT("SUPERLU_MALLOC fails for C"); if ( !ferr ) ABORT("SUPERLU_MALLOC fails for ferr"); if ( !berr ) ABORT("SUPERLU_MALLOC fails for berr"); if ( !rwork ) ABORT("SUPERLU_MALLOC fails for rwork"); wwork = floatCalloc( SUPERLU_MAX(m,n) * SUPERLU_MAX(4,nrhs) ); for (i = 0; i < n; ++i) perm_c[i] = pc_save[i] = i; options.ColPerm = MY_PERMC; for (imat = fimat; imat <= nimat; ++imat) { /* All matrix types */ if ( imat ) { /* Skip types 5, 6, or 7 if the matrix size is too small. */ zerot = (imat >= 5 && imat <= 7); if ( zerot && n < imat-4 ) continue; /* Set up parameters with SLATB4 and generate a test matrix with SLATMS. */ slatb4_slu(path, &imat, &n, &n, sym, &kl, &ku, &anorm, &mode, &cndnum, dist); slatms_slu(&n, &n, dist, iseed, sym, &rwork[0], &mode, &cndnum, &anorm, &kl, &ku, "No packing", Afull, &lda, &wwork[0], &info); if ( info ) { printf(FMT3, "SLATMS", info, izero, n, nrhs, imat, nfail); continue; } /* For types 5-7, zero one or more columns of the matrix to test that INFO is returned correctly. */ if ( zerot ) { if ( imat == 5 ) izero = 1; else if ( imat == 6 ) izero = n; else izero = n / 2 + 1; ioff = (izero - 1) * lda; if ( imat < 7 ) { for (i = 0; i < n; ++i) Afull[ioff + i] = zero; } else { for (j = 0; j < n - izero + 1; ++j) for (i = 0; i < n; ++i) Afull[ioff + i + j*lda] = zero; } } else { izero = 0; } /* Convert to sparse representation. */ sp_sconvert(n, n, Afull, lda, kl, ku, a, asub, xa, &nnz); } else { izero = 0; zerot = 0; } sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE); /* Save a copy of matrix A in ASAV */ sCreate_CompCol_Matrix(&ASAV, m, n, nnz, a_save, asub_save, xa_save, SLU_NC, SLU_S, SLU_GE); sCopy_CompCol_Matrix(&A, &ASAV); /* Form exact solution. */ sGenXtrue(n, nrhs, xact, ldx); StatInit(&stat); for (iequed = 0; iequed < 4; ++iequed) { *equed = equeds[iequed]; if (iequed == 0) nfact = 4; else nfact = 1; /* Only test factored, pre-equilibrated matrix */ for (ifact = 0; ifact < nfact; ++ifact) { fact = facts[ifact]; options.Fact = fact; for (equil = 0; equil < 2; ++equil) { options.Equil = equil; prefact = ( options.Fact == FACTORED || options.Fact == SamePattern_SameRowPerm ); /* Need a first factor */ nofact = (options.Fact != FACTORED); /* Not factored */ /* Restore the matrix A. */ sCopy_CompCol_Matrix(&ASAV, &A); if ( zerot ) { if ( prefact ) continue; } else if ( options.Fact == FACTORED ) { if ( equil || iequed ) { /* Compute row and column scale factors to equilibrate matrix A. */ sgsequ(&A, R, C, &rowcnd, &colcnd, &amax, &info); /* Force equilibration. */ if ( !info && n > 0 ) { if ( strncmp(equed, "R", 1)==0 ) { rowcnd = 0.; colcnd = 1.; } else if ( strncmp(equed, "C", 1)==0 ) { rowcnd = 1.; colcnd = 0.; } else if ( strncmp(equed, "B", 1)==0 ) { rowcnd = 0.; colcnd = 0.; } } /* Equilibrate the matrix. */ slaqgs(&A, R, C, rowcnd, colcnd, amax, equed); } } if ( prefact ) { /* Need a factor for the first time */ /* Save Fact option. */ fact = options.Fact; options.Fact = DOFACT; /* Preorder the matrix, obtain the column etree. */ sp_preorder(&options, &A, perm_c, etree, &AC); /* Factor the matrix AC. */ sgstrf(&options, &AC, relax, panel_size, etree, work, lwork, perm_c, perm_r, &L, &U, &Glu, &stat, &info); if ( info ) { printf("** First factor: info %d, equed %c\n", info, *equed); if ( lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); exit(0); } } Destroy_CompCol_Permuted(&AC); /* Restore Fact option. */ options.Fact = fact; } /* if .. first time factor */ for (itran = 0; itran < NTRAN; ++itran) { trans = transs[itran]; options.Trans = trans; /* Restore the matrix A. */ sCopy_CompCol_Matrix(&ASAV, &A); /* Set the right hand side. */ sFillRHS(trans, nrhs, xact, ldx, &A, &B); sCopy_Dense_Matrix(m, nrhs, rhsb, ldb, bsav, ldb); /*---------------- * Test sgssv *----------------*/ if ( options.Fact == DOFACT && itran == 0) { /* Not yet factored, and untransposed */ sCopy_Dense_Matrix(m, nrhs, rhsb, ldb, solx, ldx); sgssv(&options, &A, perm_c, perm_r, &L, &U, &X, &stat, &info); if ( info && info != izero ) { printf(FMT3, "sgssv", info, izero, n, nrhs, imat, nfail); } else { /* Reconstruct matrix from factors and compute residual. */ sgst01(m, n, &A, &L, &U, perm_c, perm_r, &result[0]); nt = 1; if ( izero == 0 ) { /* Compute residual of the computed solution. */ sCopy_Dense_Matrix(m, nrhs, rhsb, ldb, wwork, ldb); sgst02(trans, m, n, nrhs, &A, solx, ldx, wwork,ldb, &result[1]); nt = 2; } /* Print information about the tests that did not pass the threshold. */ for (i = 0; i < nt; ++i) { if ( result[i] >= THRESH ) { printf(FMT1, "sgssv", n, i, result[i]); ++nfail; } } nrun += nt; } /* else .. info == 0 */ /* Restore perm_c. */ for (i = 0; i < n; ++i) perm_c[i] = pc_save[i]; if (lwork == 0) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } } /* if .. end of testing sgssv */ /*---------------- * Test sgssvx *----------------*/ /* Equilibrate the matrix if fact = FACTORED and equed = 'R', 'C', or 'B'. */ if ( options.Fact == FACTORED && (equil || iequed) && n > 0 ) { slaqgs(&A, R, C, rowcnd, colcnd, amax, equed); } /* Solve the system and compute the condition number and error bounds using sgssvx. */ sgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &Glu, &mem_usage, &stat, &info); if ( info && info != izero ) { printf(FMT3, "sgssvx", info, izero, n, nrhs, imat, nfail); if ( lwork == -1 ) { printf("** Estimated memory: %.0f bytes\n", mem_usage.total_needed); exit(0); } } else { if ( !prefact ) { /* Reconstruct matrix from factors and compute residual. */ sgst01(m, n, &A, &L, &U, perm_c, perm_r, &result[0]); k1 = 0; } else { k1 = 1; } if ( !info ) { /* Compute residual of the computed solution.*/ sCopy_Dense_Matrix(m, nrhs, bsav, ldb, wwork, ldb); sgst02(trans, m, n, nrhs, &ASAV, solx, ldx, wwork, ldb, &result[1]); /* Check solution from generated exact solution. */ sgst04(n, nrhs, solx, ldx, xact, ldx, rcond, &result[2]); /* Check the error bounds from iterative refinement. */ sgst07(trans, n, nrhs, &ASAV, bsav, ldb, solx, ldx, xact, ldx, ferr, berr, &result[3]); /* Print information about the tests that did not pass the threshold. */ for (i = k1; i < NTESTS; ++i) { if ( result[i] >= THRESH ) { printf(FMT2, "sgssvx", options.Fact, trans, *equed, n, imat, i, result[i]); ++nfail; } } nrun += NTESTS; } /* if .. info == 0 */ } /* else .. end of testing sgssvx */ } /* for itran ... */ if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } } /* for equil ... */ } /* for ifact ... */ } /* for iequed ... */ #if 0 if ( !info ) { PrintPerf(&L, &U, &mem_usage, rpg, rcond, ferr, berr, equed); } #endif Destroy_SuperMatrix_Store(&A); Destroy_SuperMatrix_Store(&ASAV); StatFree(&stat); } /* for imat ... */ /* Print a summary of the results. */ PrintSumm("SGE", nfail, nrun, nerrs); if ( strcmp(matrix_type, "LA") == 0 ) SUPERLU_FREE (Afull); SUPERLU_FREE (rhsb); SUPERLU_FREE (bsav); SUPERLU_FREE (solx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (pc_save); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); SUPERLU_FREE (rwork); SUPERLU_FREE (wwork); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); #if 0 Destroy_CompCol_Matrix(&A); Destroy_CompCol_Matrix(&ASAV); #else SUPERLU_FREE(a); SUPERLU_FREE(asub); SUPERLU_FREE(xa); SUPERLU_FREE(a_save); SUPERLU_FREE(asub_save); SUPERLU_FREE(xa_save); #endif if ( lwork > 0 ) { SUPERLU_FREE (work); Destroy_SuperMatrix_Store(&L); Destroy_SuperMatrix_Store(&U); } return 0; }
void sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, SuperLUStat_t *stat, int *info ) { /* * Purpose * ======= * * SGSSV solves the system of linear equations A*X=B, using the * LU factorization from SGSTRF. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = SLU_NC): * * 1.1. Permute the columns of A, forming A*Pc, where Pc * is a permutation matrix. For more details of this step, * see sp_preorder.c. * * 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined * by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 1.3. Solve the system of equations A*X=B using the factored * form of A. * * 2. If A is stored row-wise (A->Stype = SLU_NR), apply the * above algorithm to the transpose of A: * * 2.1. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix. * For more details of this step, see sp_preorder.c. * * 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr * determined by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 2.3. Solve the system of equations A*X=B using the factored * form of A. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * options (input) superlu_options_t* * The structure defines the input parameters to control * how the LU decomposition will be performed and how the * system will be solved. * * A (input) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number * of linear equations is A->nrow. Currently, the type of A can be: * Stype = SLU_NC or SLU_NR; Dtype = SLU_S; Mtype = SLU_GE. * In the future, more general A may be handled. * * perm_c (input/output) int* * If A->Stype = SLU_NC, column permutation vector of size A->ncol * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * If A->Stype = SLU_NR, column permutation vector of size A->nrow * which describes permutation of columns of transpose(A) * (rows of A) as described above. * * If options->ColPerm = MY_PERMC or options->Fact = SamePattern or * options->Fact = SamePattern_SameRowPerm, it is an input argument. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * Otherwise, it is an output argument. * * perm_r (input/output) int* * If A->Stype = SLU_NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * If A->Stype = SLU_NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * If options->RowPerm = MY_PERMR or * options->Fact = SamePattern_SameRowPerm, perm_r is an * input argument. * otherwise it is an output argument. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU. * * B (input/output) SuperMatrix* * B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE. * On entry, the right hand side matrix. * On exit, the solution matrix if info = 0; * * stat (output) SuperLUStat_t* * Record the statistics on runtime and doubleing-point operation count. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * so the solution could not be computed. * > A->ncol: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ DNformat *Bstore; SuperMatrix *AA = NULL;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int lwork = 0, *etree, i; /* Set default values for some parameters */ int panel_size; /* panel size */ int relax; /* no of columns in a relaxed snodes */ int permc_spec; trans_t trans = NOTRANS; double *utime; double t; /* Temporary time */ /* Test the input parameters ... */ *info = 0; Bstore = B->Store; if ( options->Fact != DOFACT ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_S || A->Mtype != SLU_GE ) *info = -2; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE ) *info = -7; if ( *info != 0 ) { i = -(*info); xerbla_("sgssv", &i); return; } utime = stat->utime; /* Convert A to SLU_NC format when necessary. */ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); trans = TRANS; } else { if ( A->Stype == SLU_NC ) AA = A; } t = SuperLU_timer_(); /* * Get column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = COLAMD: approximate minimum degree column ordering * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) get_perm_c(permc_spec, AA, perm_c); utime[COLPERM] = SuperLU_timer_() - t; etree = intMalloc(A->ncol); t = SuperLU_timer_(); sp_preorder(options, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t; panel_size = sp_ienv(1); relax = sp_ienv(2); /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4));*/ t = SuperLU_timer_(); /* Compute the LU factorization of A. */ sgstrf(options, &AC, relax, panel_size, etree, NULL, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t; t = SuperLU_timer_(); if ( *info == 0 ) { /* Solve the system A*X=B, overwriting B with X. */ sgstrs (trans, L, U, perm_c, perm_r, B, stat, info); } utime[SOLVE] = SuperLU_timer_() - t; SUPERLU_FREE (etree); Destroy_CompCol_Permuted(&AC); if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } }
void sgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, int *etree, char *equed, float *R, float *C, SuperMatrix *L, SuperMatrix *U, void *work, int lwork, SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, float *rcond, float *ferr, float *berr, mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) { DNformat *Bstore, *Xstore; float *Bmat, *Xmat; int ldb, ldx, nrhs; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int colequ, equil, nofact, notran, rowequ, permc_spec; trans_t trant; char norm[1]; int i, j, info1; float amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; float diag_pivot_thresh; double t0; /* temporary time */ double *utime; /* External functions */ extern float slangs(char *, SuperMatrix *); Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; *info = 0; nofact = (options->Fact != FACTORED); equil = (options->Equil == YES); notran = (options->Trans == NOTRANS); if ( nofact ) { *(unsigned char *)equed = 'N'; rowequ = FALSE; colequ = FALSE; } else { rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); smlnum = slamch_("Safe minimum"); bignum = 1. / smlnum; } #if 0 printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n", options->Fact, options->Trans, *equed); #endif /* Test the input parameters */ if (options->Fact != DOFACT && options->Fact != SamePattern && options->Fact != SamePattern_SameRowPerm && options->Fact != FACTORED && options->Trans != NOTRANS && options->Trans != TRANS && options->Trans != CONJ && options->Equil != NO && options->Equil != YES) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_S || A->Mtype != SLU_GE ) *info = -2; else if (options->Fact == FACTORED && !(rowequ || colequ || lsame_(equed, "N"))) *info = -6; else { if (rowequ) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, R[j]); rcmax = SUPERLU_MAX(rcmax, R[j]); } if (rcmin <= 0.) *info = -7; else if ( A->nrow > 0) rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else rowcnd = 1.; } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, C[j]); rcmax = SUPERLU_MAX(rcmax, C[j]); } if (rcmin <= 0.) *info = -8; else if (A->nrow > 0) colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else colcnd = 1.; } if (*info == 0) { if ( lwork < -1 ) *info = -12; else if ( B->ncol < 0 ) *info = -13; else if ( B->ncol > 0 ) { /* no checking if B->ncol=0 */ if ( Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE ) *info = -13; } if ( X->ncol < 0 ) *info = -14; else if ( X->ncol > 0 ) { /* no checking if X->ncol=0 */ if ( Xstore->lda < SUPERLU_MAX(0, A->nrow) || (B->ncol != 0 && B->ncol != X->ncol) || X->Stype != SLU_DN || X->Dtype != SLU_S || X->Mtype != SLU_GE ) *info = -14; } } } if (*info != 0) { i = -(*info); xerbla_("sgssvx", &i); return; } /* Initialization for factor parameters */ panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = options->DiagPivotThresh; utime = stat->utime; /* Convert A to SLU_NC format when necessary. */ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ trant = TRANS; notran = 0; } else { trant = NOTRANS; notran = 1; } } else { /* A->Stype == SLU_NC */ trant = options->Trans; AA = A; } if ( nofact && equil ) { t0 = SuperLU_timer_(); /* Compute row and column scalings to equilibrate the matrix A. */ sgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); if ( info1 == 0 ) { /* Equilibrate matrix A. */ slaqgs(AA, R, C, rowcnd, colcnd, amax, equed); rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); } utime[EQUIL] = SuperLU_timer_() - t0; } if ( nofact ) { t0 = SuperLU_timer_(); /* * Gnet column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = COLAMD: approximate minimum degree column ordering * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) get_perm_c(permc_spec, AA, perm_c); utime[COLPERM] = SuperLU_timer_() - t0; t0 = SuperLU_timer_(); sp_preorder(options, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t0; /* printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4)); fflush(stdout); */ /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); sgstrf(options, &AC, relax, panel_size, etree, work, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { mem_usage->total_needed = *info - A->ncol; return; } } if ( options->PivotGrowth ) { if ( *info > 0 ) { if ( *info <= A->ncol ) { /* Compute the reciprocal pivot growth factor of the leading rank-deficient *info columns of A. */ *recip_pivot_growth = sPivotGrowth(*info, AA, perm_c, L, U); } return; } /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ *recip_pivot_growth = sPivotGrowth(A->ncol, AA, perm_c, L, U); } if ( options->ConditionNumber ) { /* Estimate the reciprocal of the condition number of A. */ t0 = SuperLU_timer_(); if ( notran ) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = slangs(norm, AA); sgscon(norm, L, U, anorm, rcond, stat, info); utime[RCOND] = SuperLU_timer_() - t0; } if ( nrhs > 0 ) { /* Scale the right hand side if equilibration was performed. */ if ( notran ) { if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) Bmat[i + j*ldb] *= R[i]; } } else if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) Bmat[i + j*ldb] *= C[i]; } /* Compute the solution matrix X. */ for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ for (i = 0; i < B->nrow; i++) Xmat[i + j*ldx] = Bmat[i + j*ldb]; t0 = SuperLU_timer_(); sgstrs (trant, L, U, perm_c, perm_r, X, stat, info); utime[SOLVE] = SuperLU_timer_() - t0; /* Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. */ t0 = SuperLU_timer_(); if ( options->IterRefine != NOREFINE ) { sgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B, X, ferr, berr, stat, info); } else { for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0; } utime[REFINE] = SuperLU_timer_() - t0; /* Transform the solution matrix X to a solution of the original system. */ if ( notran ) { if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) Xmat[i + j*ldx] *= C[i]; } } else if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) Xmat[i + j*ldx] *= R[i]; } } /* end if nrhs > 0 */ if ( options->ConditionNumber ) { /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ if ( *rcond < slamch_("E") ) *info = A->ncol + 1; } if ( nofact ) { sQuerySpace(L, U, mem_usage); Destroy_CompCol_Permuted(&AC); } if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } }
/* Here is a driver inspired by A. Sheffer's "cow flattener". */ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) { /* OpenNL Context */ __NLSparseMatrix* M = (context->least_squares)? &context->MtM: &context->M; NLuint n = context->n; NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */ /* Compressed Row Storage matrix representation */ NLint *xa = __NL_NEW_ARRAY(NLint, n+1); NLfloat *rhs = __NL_NEW_ARRAY(NLfloat, n); NLfloat *a = __NL_NEW_ARRAY(NLfloat, nnz); NLint *asub = __NL_NEW_ARRAY(NLint, nnz); NLint *etree = __NL_NEW_ARRAY(NLint, n); /* SuperLU variables */ SuperMatrix At, AtP; NLint info, panel_size, relax; superlu_options_t options; /* Temporary variables */ NLuint i, jj, count; __nl_assert(!(M->storage & __NL_SYMMETRIC)); __nl_assert(M->storage & __NL_ROWS); __nl_assert(M->m == M->n); /* Convert M to compressed column format */ for(i=0, count=0; i<n; i++) { __NLRowColumn *Ri = M->row + i; xa[i] = count; for(jj=0; jj<Ri->size; jj++, count++) { a[count] = Ri->coeff[jj].value; asub[count] = Ri->coeff[jj].index; } } xa[n] = nnz; /* Free M, don't need it anymore at this point */ __nlSparseMatrixClear(M); /* Create superlu A matrix transposed */ sCreate_CompCol_Matrix( &At, n, n, nnz, a, asub, xa, SLU_NC, /* Colum wise, no supernode */ SLU_S, /* floats */ SLU_GE /* general storage */ ); /* Set superlu options */ set_default_options(&options); options.ColPerm = MY_PERMC; options.Fact = DOFACT; StatInit(&(context->slu.stat)); panel_size = sp_ienv(1); /* sp_ienv give us the defaults */ relax = sp_ienv(2); /* Compute permutation and permuted matrix */ context->slu.perm_r = __NL_NEW_ARRAY(NLint, n); context->slu.perm_c = __NL_NEW_ARRAY(NLint, n); if ((permutation == NULL) || (*permutation == -1)) { get_perm_c(3, &At, context->slu.perm_c); if (permutation) memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n); } else memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n); sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP); /* Decompose into L and U */ sgstrf(&options, &AtP, relax, panel_size, etree, NULL, 0, context->slu.perm_c, context->slu.perm_r, &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info); /* Cleanup */ Destroy_SuperMatrix_Store(&At); Destroy_CompCol_Permuted(&AtP); __NL_DELETE_ARRAY(etree); __NL_DELETE_ARRAY(xa); __NL_DELETE_ARRAY(rhs); __NL_DELETE_ARRAY(a); __NL_DELETE_ARRAY(asub); context->slu.alloc_slu = NL_TRUE; return (info == 0); }