void dgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd, double *colcnd, double *amax, int *info) { /* Purpose ======= DGSEQU computes row and column scalings intended to equilibrate an M-by-N sparse matrix A and reduce its condition number. R returns the row scale factors and C the column scale factors, chosen to try to make the largest element in each row and column of the matrix B with elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. R(i) and C(j) are restricted to be between SMLNUM = smallest safe number and BIGNUM = largest safe number. Use of these scaling factors is not guaranteed to reduce the condition number of A but works well in practice. See supermatrix.h for the definition of 'SuperMatrix' structure. Arguments ========= A (input) SuperMatrix* The matrix of dimension (A->nrow, A->ncol) whose equilibration factors are to be computed. The type of A can be: Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE. R (output) double*, size A->nrow If INFO = 0 or INFO > M, R contains the row scale factors for A. C (output) double*, size A->ncol If INFO = 0, C contains the column scale factors for A. ROWCND (output) double* If INFO = 0 or INFO > M, ROWCND contains the ratio of the smallest R(i) to the largest R(i). If ROWCND >= 0.1 and AMAX is neither too large nor too small, it is not worth scaling by R. COLCND (output) double* If INFO = 0, COLCND contains the ratio of the smallest C(i) to the largest C(i). If COLCND >= 0.1, it is not worth scaling by C. AMAX (output) double* Absolute value of largest matrix element. If AMAX is very close to overflow or very close to underflow, the matrix should be scaled. INFO (output) int* = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, and i is <= A->nrow: the i-th row of A is exactly zero > A->ncol: the (i-M)-th column of A is exactly zero ===================================================================== */ /* Local variables */ NCformat *Astore; double *Aval; int i, j, irow; double rcmin, rcmax; double bignum, smlnum; extern double hypre_F90_NAME_LAPACK(dlamch,DLAMCH)(const char *); /* Test the input parameters. */ *info = 0; if ( A->nrow < 0 || A->ncol < 0 || A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -1; if (*info != 0) { i = -(*info); superlu_xerbla("dgsequ", &i); return; } /* Quick return if possible */ if ( A->nrow == 0 || A->ncol == 0 ) { *rowcnd = 1.; *colcnd = 1.; *amax = 0.; return; } Astore = (NCformat*) A->Store; Aval = (double*) Astore->nzval; /* Get machine constants. */ smlnum = hypre_F90_NAME_LAPACK(dlamch,DLAMCH)("S"); bignum = 1. / smlnum; /* Compute row scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 0.; /* Find the maximum element in each row. */ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; r[irow] = SUPERLU_MAX( r[irow], fabs(Aval[i]) ); } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (i = 0; i < A->nrow; ++i) { rcmax = SUPERLU_MAX(rcmax, r[i]); rcmin = SUPERLU_MIN(rcmin, r[i]); } *amax = rcmax; if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (i = 0; i < A->nrow; ++i) if (r[i] == 0.) { *info = i + 1; return; } } else { /* Invert the scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum ); /* Compute ROWCND = min(R(I)) / max(R(I)) */ *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } /* Compute column scale factors */ for (j = 0; j < A->ncol; ++j) c[j] = 0.; /* Find the maximum element in each column, assuming the row scalings computed above. */ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; c[j] = SUPERLU_MAX( c[j], fabs(Aval[i]) * r[irow] ); } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (j = 0; j < A->ncol; ++j) { rcmax = SUPERLU_MAX(rcmax, c[j]); rcmin = SUPERLU_MIN(rcmin, c[j]); } if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (j = 0; j < A->ncol; ++j) if ( c[j] == 0. ) { *info = A->nrow + j + 1; return; } } else { /* Invert the scale factors. */ for (j = 0; j < A->ncol; ++j) c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum); /* Compute COLCND = min(C(J)) / max(C(J)) */ *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } return; } /* dgsequ */
void dgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, int *etree, char *equed, double *R, double *C, SuperMatrix *L, SuperMatrix *U, void *work, int lwork, SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, double *rcond, double *ferr, double *berr, mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) { /* * Purpose * ======= * * DGSSVX solves the system of linear equations A*X=B or A'*X=B, using * the LU factorization from dgstrf(). Error bounds on the solution and * a condition estimate are also provided. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = SLU_NC): * * 1.1. If options->Equil = YES, scaling factors are computed to * equilibrate the system: * options->Trans = NOTRANS: * diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B * options->Trans = TRANS: * (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * options->Trans = CONJ: * (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B * (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans * = TRANS or CONJ). * * 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation * matrix that usually preserves sparsity. * For more details of this step, see sp_preorder.c. * * 1.3. If options->Fact != FACTORED, the LU decomposition is used to * factor the matrix A (after equilibration if options->Equil = YES) * as Pr*A*Pc = L*U, with Pr determined by partial pivoting. * * 1.4. Compute the reciprocal pivot growth factor. * * 1.5. If some U(i,i) = 0, so that U is exactly singular, then the * routine returns with info = i. Otherwise, the factored form of * A is used to estimate the condition number of the matrix A. If * the reciprocal of the condition number is less than machine * precision, info = A->ncol+1 is returned as a warning, but the * routine still goes on to solve for X and computes error bounds * as described below. * * 1.6. The system of equations is solved for X using the factored form * of A. * * 1.7. If options->IterRefine != NOREFINE, iterative refinement is * applied to improve the computed solution matrix and calculate * error bounds and backward error estimates for it. * * 1.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if options->Trans = NOTRANS) or diag(R) * (if options->Trans = TRANS or CONJ) so that it solves the * original system before equilibration. * * 2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm * to the transpose of A: * * 2.1. If options->Equil = YES, scaling factors are computed to * equilibrate the system: * options->Trans = NOTRANS: * diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B * options->Trans = TRANS: * (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * options->Trans = CONJ: * (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A' is * overwritten by diag(R)*A'*diag(C) and B by diag(R)*B * (if trans='N') or diag(C)*B (if trans = 'T' or 'C'). * * 2.2. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix that * usually preserves sparsity. * For more details of this step, see sp_preorder.c. * * 2.3. If options->Fact != FACTORED, the LU decomposition is used to * factor the transpose(A) (after equilibration if * options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the * permutation Pr determined by partial pivoting. * * 2.4. Compute the reciprocal pivot growth factor. * * 2.5. If some U(i,i) = 0, so that U is exactly singular, then the * routine returns with info = i. Otherwise, the factored form * of transpose(A) is used to estimate the condition number of the * matrix A. If the reciprocal of the condition number * is less than machine precision, info = A->nrow+1 is returned as * a warning, but the routine still goes on to solve for X and * computes error bounds as described below. * * 2.6. The system of equations is solved for X using the factored form * of transpose(A). * * 2.7. If options->IterRefine != NOREFINE, iterative refinement is * applied to improve the computed solution matrix and calculate * error bounds and backward error estimates for it. * * 2.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if options->Trans = NOTRANS) or diag(R) * (if options->Trans = TRANS or CONJ) so that it solves the * original system before equilibration. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * options (input) superlu_options_t* * The structure defines the input parameters to control * how the LU decomposition will be performed and how the * system will be solved. * * A (input/output) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number * of the linear equations is A->nrow. Currently, the type of A can be: * Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE. * In the future, more general A may be handled. * * On entry, If options->Fact = FACTORED and equed is not 'N', * then A must have been equilibrated by the scaling factors in * R and/or C. * On exit, A is not modified if options->Equil = NO, or if * options->Equil = YES but equed = 'N' on exit. * Otherwise, if options->Equil = YES and equed is not 'N', * A is scaled as follows: * If A->Stype = SLU_NC: * equed = 'R': A := diag(R) * A * equed = 'C': A := A * diag(C) * equed = 'B': A := diag(R) * A * diag(C). * If A->Stype = SLU_NR: * equed = 'R': transpose(A) := diag(R) * transpose(A) * equed = 'C': transpose(A) := transpose(A) * diag(C) * equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C). * * perm_c (input/output) int* * If A->Stype = SLU_NC, Column permutation vector of size A->ncol, * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype = SLU_NR, column permutation vector of size A->nrow, * which describes permutation of columns of transpose(A) * (rows of A) as described above. * * perm_r (input/output) int* * If A->Stype = SLU_NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype = SLU_NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * If options->Fact = SamePattern_SameRowPerm, the pivoting routine * will try to use the input perm_r, unless a certain threshold * criterion is violated. In that case, perm_r is overwritten by a * new permutation determined by partial pivoting or diagonal * threshold pivoting. * Otherwise, perm_r is output argument. * * etree (input/output) int*, dimension (A->ncol) * Elimination tree of Pc'*A'*A*Pc. * If options->Fact != FACTORED and options->Fact != DOFACT, * etree is an input argument, otherwise it is an output argument. * Note: etree is a vector of parent pointers for a forest whose * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. * * equed (input/output) char* * Specifies the form of equilibration that was done. * = 'N': No equilibration. * = 'R': Row equilibration, i.e., A was premultiplied by diag(R). * = 'C': Column equilibration, i.e., A was postmultiplied by diag(C). * = 'B': Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * If options->Fact = FACTORED, equed is an input argument, * otherwise it is an output argument. * * R (input/output) double*, dimension (A->nrow) * The row scale factors for A or transpose(A). * If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A) * (if A->Stype = SLU_NR) is multiplied on the left by diag(R). * If equed = 'N' or 'C', R is not accessed. * If options->Fact = FACTORED, R is an input argument, * otherwise, R is output. * If options->zFact = FACTORED and equed = 'R' or 'B', each element * of R must be positive. * * C (input/output) double*, dimension (A->ncol) * The column scale factors for A or transpose(A). * If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A) * (if A->Stype = SLU_NR) is multiplied on the right by diag(C). * If equed = 'N' or 'R', C is not accessed. * If options->Fact = FACTORED, C is an input argument, * otherwise, C is output. * If options->Fact = FACTORED and equed = 'C' or 'B', each element * of C must be positive. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype SLU_= NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU. * * work (workspace/output) void*, size (lwork) (in bytes) * User supplied workspace, should be large enough * to hold data structures for factors L and U. * On exit, if fact is not 'F', L and U point to this array. * * lwork (input) int * Specifies the size of work array in bytes. * = 0: allocate space internally by system malloc; * > 0: use user-supplied work array of length lwork in bytes, * returns error if space runs out. * = -1: the routine guesses the amount of space needed without * performing the factorization, and returns it in * mem_usage->total_needed; no other side effects. * * See argument 'mem_usage' for memory usage statistics. * * B (input/output) SuperMatrix* * B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. * On entry, the right hand side matrix. * If B->ncol = 0, only LU decomposition is performed, the triangular * solve is skipped. * On exit, * if equed = 'N', B is not modified; otherwise * if A->Stype = SLU_NC: * if options->Trans = NOTRANS and equed = 'R' or 'B', * B is overwritten by diag(R)*B; * if options->Trans = TRANS or CONJ and equed = 'C' of 'B', * B is overwritten by diag(C)*B; * if A->Stype = SLU_NR: * if options->Trans = NOTRANS and equed = 'C' or 'B', * B is overwritten by diag(C)*B; * if options->Trans = TRANS or CONJ and equed = 'R' of 'B', * B is overwritten by diag(R)*B. * * X (output) SuperMatrix* * X has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. * If info = 0 or info = A->ncol+1, X contains the solution matrix * to the original system of equations. Note that A and B are modified * on exit if equed is not 'N', and the solution to the equilibrated * system is inv(diag(C))*X if options->Trans = NOTRANS and * equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C' * and equed = 'R' or 'B'. * * recip_pivot_growth (output) double* * The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ). * The infinity norm is used. If recip_pivot_growth is much less * than 1, the stability of the LU factorization could be poor. * * rcond (output) double* * The estimate of the reciprocal condition number of the matrix A * after equilibration (if done). If rcond is less than the machine * precision (in particular, if rcond = 0), the matrix is singular * to working precision. This condition is indicated by a return * code of info > 0. * * FERR (output) double*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * If options->IterRefine = NOREFINE, ferr = 1.0. * * BERR (output) double*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * If options->IterRefine = NOREFINE, berr = 1.0. * * mem_usage (output) mem_usage_t* * Record the memory usage statistics, consisting of following fields: * - for_lu (float) * The amount of space used in bytes for L\U data structures. * - total_needed (float) * The amount of space needed in bytes to perform factorization. * - expansions (int) * The number of memory expansions during the LU factorization. * * stat (output) SuperLUStat_t* * Record the statistics on runtime and floating-point operation count. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly * singular, so the solution and error bounds * could not be computed. * = A->ncol+1: U is nonsingular, but RCOND is less than machine * precision, meaning that the matrix is singular to * working precision. Nevertheless, the solution and * error bounds are computed because there are a number * of situations where the computed solution can be more * accurate than the value of RCOND would suggest. * > A->ncol+1: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ DNformat *Bstore, *Xstore; double *Bmat, *Xmat; int ldb, ldx, nrhs; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int colequ, equil, nofact, notran, rowequ, permc_spec; trans_t trant; char norm[1]; int i, j, info1; double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; double drop_tol; double t0; /* temporary time */ double *utime; /* External functions */ extern double dlangs(char *, SuperMatrix *); extern double hypre_F90_NAME_LAPACK(dlamch,DLAMCH)(const char *); Bstore = (DNformat*) B->Store; Xstore = (DNformat*) X->Store; Bmat = ( double*) Bstore->nzval; Xmat = ( double*) Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; *info = 0; nofact = (options->Fact != FACTORED); equil = (options->Equil == YES); notran = (options->Trans == NOTRANS); if ( nofact ) { *(unsigned char *)equed = 'N'; rowequ = FALSE; colequ = FALSE; } else { rowequ = superlu_lsame(equed, "R") || superlu_lsame(equed, "B"); colequ = superlu_lsame(equed, "C") || superlu_lsame(equed, "B"); smlnum = hypre_F90_NAME_LAPACK(dlamch,DLAMCH)("Safe minimum"); bignum = 1. / smlnum; } #if 0 printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n", options->Fact, options->Trans, *equed); #endif /* Test the input parameters */ if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern && options->Fact != SamePattern_SameRowPerm && !notran && options->Trans != TRANS && options->Trans != CONJ && !equil && options->Equil != NO) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if (options->Fact == FACTORED && !(rowequ || colequ || superlu_lsame(equed, "N"))) *info = -6; else { if (rowequ) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, R[j]); rcmax = SUPERLU_MAX(rcmax, R[j]); } if (rcmin <= 0.) *info = -7; else if ( A->nrow > 0) rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else rowcnd = 1.; } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, C[j]); rcmax = SUPERLU_MAX(rcmax, C[j]); } if (rcmin <= 0.) *info = -8; else if (A->nrow > 0) colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else colcnd = 1.; } if (*info == 0) { if ( lwork < -1 ) *info = -12; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) *info = -13; else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || (B->ncol != 0 && B->ncol != X->ncol) || X->Stype != SLU_DN || X->Dtype != SLU_D || X->Mtype != SLU_GE ) *info = -14; } } if (*info != 0) { i = -(*info); superlu_xerbla("dgssvx", &i); return; } /* Initialization for factor parameters */ panel_size = sp_ienv(1); relax = sp_ienv(2); drop_tol = 0.0; utime = stat->utime; /* Convert A to SLU_NC format when necessary. */ if ( A->Stype == SLU_NR ) { NRformat *Astore = (NRformat*) A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, (double*) Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ trant = TRANS; notran = 0; } else { trant = NOTRANS; notran = 1; } } else { /* A->Stype == SLU_NC */ trant = options->Trans; AA = A; } if ( nofact && equil ) { t0 = SuperLU_timer_(); /* Compute row and column scalings to equilibrate the matrix A. */ dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); if ( info1 == 0 ) { /* Equilibrate matrix A. */ dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed); rowequ = superlu_lsame(equed, "R") || superlu_lsame(equed, "B"); colequ = superlu_lsame(equed, "C") || superlu_lsame(equed, "B"); } utime[EQUIL] = SuperLU_timer_() - t0; } if ( nrhs > 0 ) { /* Scale the right hand side if equilibration was performed. */ if ( notran ) { if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= R[i]; } } } else if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= C[i]; } } } if ( nofact ) { t0 = SuperLU_timer_(); /* * Gnet column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = COLAMD: approximate minimum degree column ordering * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) get_perm_c(permc_spec, AA, perm_c); utime[COLPERM] = SuperLU_timer_() - t0; t0 = SuperLU_timer_(); sp_preorder(options, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t0; /* printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4)); fflush(stdout); */ /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); dgstrf(options, &AC, drop_tol, relax, panel_size, etree, work, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { mem_usage->total_needed = *info - A->ncol; return; } } if ( options->PivotGrowth ) { if ( *info > 0 ) { if ( *info <= A->ncol ) { /* Compute the reciprocal pivot growth factor of the leading rank-deficient *info columns of A. */ *recip_pivot_growth = dPivotGrowth(*info, AA, perm_c, L, U); } return; } /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U); } if ( options->ConditionNumber ) { /* Estimate the reciprocal of the condition number of A. */ t0 = SuperLU_timer_(); if ( notran ) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = dlangs(norm, AA); dgscon(norm, L, U, anorm, rcond, stat, info); utime[RCOND] = SuperLU_timer_() - t0; } if ( nrhs > 0 ) { /* Compute the solution matrix X. */ for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ for (i = 0; i < B->nrow; i++) Xmat[i + j*ldx] = Bmat[i + j*ldb]; t0 = SuperLU_timer_(); dgstrs (trant, L, U, perm_c, perm_r, X, stat, info); utime[SOLVE] = SuperLU_timer_() - t0; /* Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. */ t0 = SuperLU_timer_(); if ( options->IterRefine != NOREFINE ) { dgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B, X, ferr, berr, stat, info); } else { for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0; } utime[REFINE] = SuperLU_timer_() - t0; /* Transform the solution matrix X to a solution of the original system. */ if ( notran ) { if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= C[i]; } } } else if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= R[i]; } } } /* end if nrhs > 0 */ if ( options->ConditionNumber ) { /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ if (*rcond < hypre_F90_NAME_LAPACK(dlamch,DLAMCH)("E")) *info=A->ncol+1; } if ( nofact ) { dQuerySpace(L, U, mem_usage); Destroy_CompCol_Permuted(&AC); } if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } }
void dgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, int *perm_c, int *perm_r, SuperMatrix *B, SuperLUStat_t *stat, int *info) { /* * Purpose * ======= * * DGSTRS solves a system of linear equations A*X=B or A'*X=B * with A sparse and B dense, using the LU factorization computed by * DGSTRF. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * trans (input) trans_t * Specifies the form of the system of equations: * = NOTRANS: A * X = B (No transpose) * = TRANS: A'* X = B (Transpose) * = CONJ: A**H * X = B (Conjugate transpose) * * L (input) SuperMatrix* * The factor L from the factorization Pr*A*Pc=L*U as computed by * dgstrf(). Use compressed row subscripts storage for supernodes, * i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU. * * U (input) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U as computed by * dgstrf(). Use column-wise storage scheme, i.e., U has types: * Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU. * * perm_c (input) int*, dimension (L->ncol) * Column permutation vector, which defines the * permutation matrix Pc; perm_c[i] = j means column i of A is * in position j in A*Pc. * * perm_r (input) int*, dimension (L->nrow) * Row permutation vector, which defines the permutation matrix Pr; * perm_r[i] = j means row i of A is in position j in Pr*A. * * B (input/output) SuperMatrix* * B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. * On entry, the right hand side matrix. * On exit, the solution matrix if info = 0; * * stat (output) SuperLUStat_t* * Record the statistics on runtime and floating-point operation count. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * */ #ifdef _CRAY _fcd ftcs1, ftcs2, ftcs3, ftcs4; #endif #if 0 int incx = 1, incy = 1; #endif #ifdef USE_VENDOR_BLAS double alpha = 1.0, beta = 1.0; double *work_col; #endif DNformat *Bstore; double *Bmat; SCformat *Lstore; NCformat *Ustore; double *Lval, *Uval; int fsupc, nrow, nsupr, nsupc, luptr, istart, irow; int i, j, k, iptr, jcol, n, ldb, nrhs; double *work, *rhs_work, *soln; flops_t solve_ops; void dprint_soln(); /* Test input parameters ... */ *info = 0; Bstore = B->Store; ldb = Bstore->lda; nrhs = B->ncol; if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1; else if ( L->nrow != L->ncol || L->nrow < 0 || L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU ) *info = -2; else if ( U->nrow != U->ncol || U->nrow < 0 || U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU ) *info = -3; else if ( ldb < SUPERLU_MAX(0, L->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) *info = -6; if ( *info ) { i = -(*info); superlu_xerbla("dgstrs", &i); return; } n = L->nrow; work = doubleCalloc(n * nrhs); if ( !work ) ABORT("Malloc fails for local work[]."); soln = doubleMalloc(n); if ( !soln ) ABORT("Malloc fails for local soln[]."); Bmat = Bstore->nzval; Lstore = L->Store; Lval = Lstore->nzval; Ustore = U->Store; Uval = Ustore->nzval; solve_ops = 0; if ( trans == NOTRANS ) { /* Permute right hand sides to form Pr*B */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k]; for (k = 0; k < n; k++) rhs_work[k] = soln[k]; } /* Forward solve PLy=Pb. */ for (k = 0; k <= Lstore->nsuper; k++) { fsupc = L_FST_SUPC(k); istart = L_SUB_START(fsupc); nsupr = L_SUB_START(fsupc+1) - istart; nsupc = L_FST_SUPC(k+1) - fsupc; nrow = nsupr - nsupc; solve_ops += nsupc * (nsupc - 1) * nrhs; solve_ops += 2 * nrow * nsupc * nrhs; if ( nsupc == 1 ) { for (j = 0; j < nrhs; j++) { rhs_work = &Bmat[j*ldb]; luptr = L_NZ_START(fsupc); for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){ irow = L_SUB(iptr); ++luptr; rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr]; } } } else { luptr = L_NZ_START(fsupc); #ifdef USE_VENDOR_BLAS #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("N", strlen("N")); ftcs3 = _cptofcd("U", strlen("U")); STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha, &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, &beta, &work[0], &n ); #else hypre_F90_NAME_BLAS(dtrsm,DTRSM)("L","L","N","U",&nsupc, &nrhs, &alpha, &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); hypre_F90_NAME_BLAS(dgemm,DGEMM)("N","N",&nrow,&nrhs,&nsupc, &alpha, &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, &beta, &work[0], &n ); #endif for (j = 0; j < nrhs; j++) { rhs_work = &Bmat[j*ldb]; work_col = &work[j*n]; iptr = istart + nsupc; for (i = 0; i < nrow; i++) { irow = L_SUB(iptr); rhs_work[irow] -= work_col[i]; /* Scatter */ work_col[i] = 0.0; iptr++; } } #else for (j = 0; j < nrhs; j++) { rhs_work = &Bmat[j*ldb]; sludlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]); sludmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc], &rhs_work[fsupc], &work[0] ); iptr = istart + nsupc; for (i = 0; i < nrow; i++) { irow = L_SUB(iptr); rhs_work[irow] -= work[i]; work[i] = 0.0; iptr++; } } #endif } /* else ... */ } /* for L-solve */ #ifdef DEBUG printf("After L-solve: y=\n"); dprint_soln(n, nrhs, Bmat); #endif /* * Back solve Ux=y. */ for (k = Lstore->nsuper; k >= 0; k--) { fsupc = L_FST_SUPC(k); istart = L_SUB_START(fsupc); nsupr = L_SUB_START(fsupc+1) - istart; nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); solve_ops += nsupc * (nsupc + 1) * nrhs; if ( nsupc == 1 ) { rhs_work = &Bmat[0]; for (j = 0; j < nrhs; j++) { rhs_work[fsupc] /= Lval[luptr]; rhs_work += ldb; } } else { #ifdef USE_VENDOR_BLAS #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("U", strlen("U")); ftcs3 = _cptofcd("N", strlen("N")); STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha, &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); #else hypre_F90_NAME_BLAS(dtrsm,DTRSM)("L","U","N","N", &nsupc, &nrhs, &alpha, &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); #endif #else for (j = 0; j < nrhs; j++) sludusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] ); #endif } for (j = 0; j < nrhs; ++j) { rhs_work = &Bmat[j*ldb]; for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) { solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){ irow = U_SUB(i); rhs_work[irow] -= rhs_work[jcol] * Uval[i]; } } } } /* for U-solve */ #ifdef DEBUG printf("After U-solve: x=\n"); dprint_soln(n, nrhs, Bmat); #endif /* Compute the final solution X := Pc*X. */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]]; for (k = 0; k < n; k++) rhs_work[k] = soln[k]; } stat->ops[SOLVE] = solve_ops; } else { /* Solve A'*X=B or CONJ(A)*X=B */ /* Permute right hand sides to form Pc'*B. */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k]; for (k = 0; k < n; k++) rhs_work[k] = soln[k]; } stat->ops[SOLVE] = 0; for (k = 0; k < nrhs; ++k) { /* Multiply by inv(U'). */ sp_dtrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info); /* Multiply by inv(L'). */ sp_dtrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info); } /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]]; for (k = 0; k < n; k++) rhs_work[k] = soln[k]; } } SUPERLU_FREE(work); SUPERLU_FREE(soln); }