/*! @file ilu_ccopy_to_ucol.c * \brief Copy a computed column of U to the compressed data structure * and drop some small entries * * <pre> * -- SuperLU routine (version 4.1) -- * Lawrence Berkeley National Laboratory * November, 2010 * </pre> */ #include "slu_cdefs.h" #ifdef DEBUG int num_drop_U; #endif extern void ccopy_(int *, complex [], int *, complex [], int *); #if SCIPY_FIX extern double dlamch_(char *); #endif #if 0 static complex *A; /* used in _compare_ only */ static int _compare_(const void *a, const void *b) { register int *x = (int *)a, *y = (int *)b; register float xx = c_abs1(&A[*x]), yy = c_abs1(&A[*y]); if (xx > yy) return -1; else if (xx < yy) return 1; else return 0; }
void cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, int *perm_r, int *perm_c, equed_t equed, float *R, float *C, SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr, Gstat_t *Gstat, int *info) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * * Purpose * ======= * * cgsrfs improves the computed solution to a system of linear * equations and provides error bounds and backward error estimates for * the solution. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * trans (input) trans_t * Specifies the form of the system of equations: * = NOTRANS: A * X = B (No transpose) * = TRANS: A**T * X = B (Transpose) * = CONJ: A**H * X = B (Conjugate transpose = Transpose) * * A (input) SuperMatrix* * The original matrix A in the system, or the scaled A if * equilibration was done. The type of A can be: * Stype = NC, Dtype = _D, Mtype = GE. * * L (input) SuperMatrix* * The factor L from the factorization Pr*A*Pc=L*U. Use * compressed row subscripts storage for supernodes, * i.e., L has types: Stype = SCP, Dtype = _D, Mtype = TRLU. * * U (input) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U as computed by * dgstrf(). Use column-wise storage scheme, * i.e., U has types: Stype = NCP, Dtype = _D, Mtype = TRU. * * perm_r (input) int*, dimension (A->nrow) * Row permutation vector, which defines the permutation matrix Pr; * perm_r[i] = j means row i of A is in position j in Pr*A. * * perm_c (input) int*, dimension (A->ncol) * Column permutation vector, which defines the * permutation matrix Pc; perm_c[i] = j means column i of A is * in position j in A*Pc. * * equed (input) equed_t * Specifies the form of equilibration that was done. * = NOEQUIL: No equilibration. * = ROW: Row equilibration, i.e., A was premultiplied by diag(R). * = COL: Column equilibration, i.e., A was postmultiplied by * diag(C). * = BOTH: Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * * R (input) double*, dimension (A->nrow) * The row scale factors for A. * If equed = ROW or BOTH, A is premultiplied by diag(R). * If equed = NOEQUIL or COL, R is not accessed. * * C (input) double*, dimension (A->ncol) * The column scale factors for A. * If equed = COL or BOTH, A is postmultiplied by diag(C). * If equed = NOEQUIL or ROW, C is not accessed. * * B (input) SuperMatrix* * B has types: Stype = DN, Dtype = _D, Mtype = GE. * The right hand side matrix B. * * X (input/output) SuperMatrix* * X has types: Stype = DN, Dtype = _D, Mtype = GE. * On entry, the solution matrix X, as computed by dgstrs(). * On exit, the improved solution matrix X. * * FERR (output) double*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * * BERR (output) double*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * info (output) int* * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * */ #define ITMAX 5 /* Table of constant values */ int ione = 1; complex ndone = {-1., 0.}; complex done = {1., 0.}; /* Local variables */ NCformat *Astore; complex *Aval; SuperMatrix Bjcol; DNformat *Bstore, *Xstore, *Bjcol_store; complex *Bmat, *Xmat, *Bptr, *Xptr; int kase; float safe1, safe2; int i, j, k, irow, nz, count, notran, rowequ, colequ; int ldb, ldx, nrhs; float s, xk, lstres, eps, safmin; char transc[1]; trans_t transt; complex *work; float *rwork; int *iwork; extern double slamch_(char *); extern int clacon_(int *, complex *, complex *, float *, int *); #ifdef _CRAY extern int CCOPY(int *, complex *, int *, complex *, int *); extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *); #else extern int ccopy_(int *, complex *, int *, complex *, int *); extern int caxpy_(int *, complex *, complex *, int *, complex *, int *); #endif Astore = A->Store; Aval = Astore->nzval; Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; /* Test the input parameters */ *info = 0; notran = (trans == NOTRANS); if ( !notran && trans != TRANS && trans != CONJ ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE ) *info = -2; else if ( L->nrow != L->ncol || L->nrow < 0 || L->Stype != SLU_SCP || L->Dtype != SLU_C || L->Mtype != SLU_TRLU ) *info = -3; else if ( U->nrow != U->ncol || U->nrow < 0 || U->Stype != SLU_NCP || U->Dtype != SLU_C || U->Mtype != SLU_TRU ) *info = -4; else if ( ldb < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE ) *info = -10; else if ( ldx < SUPERLU_MAX(0, A->nrow) || X->Stype != SLU_DN || X->Dtype != SLU_C || X->Mtype != SLU_GE ) *info = -11; if (*info != 0) { i = -(*info); xerbla_("cgsrfs", &i); return; } /* Quick return if possible */ if ( A->nrow == 0 || nrhs == 0) { for (j = 0; j < nrhs; ++j) { ferr[j] = 0.; berr[j] = 0.; } return; } rowequ = (equed == ROW) || (equed == BOTH); colequ = (equed == COL) || (equed == BOTH); /* Allocate working space */ work = complexMalloc(2*A->nrow); rwork = (float *) SUPERLU_MALLOC( (size_t) A->nrow * sizeof(float) ); iwork = intMalloc(A->nrow); if ( !work || !rwork || !iwork ) SUPERLU_ABORT("Malloc fails for work/rwork/iwork."); if ( notran ) { *(unsigned char *)transc = 'N'; transt = TRANS; } else { *(unsigned char *)transc = 'T'; transt = NOTRANS; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = A->ncol + 1; eps = slamch_("Epsilon"); safmin = slamch_("Safe minimum"); /* Set SAFE1 essentially to be the underflow threshold times the number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; /* Compute the number of nonzeros in each row (or column) of A */ for (i = 0; i < A->nrow; ++i) iwork[i] = 0; if ( notran ) { for (k = 0; k < A->ncol; ++k) for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) ++iwork[Astore->rowind[i]]; } else { for (k = 0; k < A->ncol; ++k) iwork[k] = Astore->colptr[k+1] - Astore->colptr[k]; } /* Copy one column of RHS B into Bjcol. */ Bjcol.Stype = B->Stype; Bjcol.Dtype = B->Dtype; Bjcol.Mtype = B->Mtype; Bjcol.nrow = B->nrow; Bjcol.ncol = 1; Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); if ( !Bjcol.Store ) SUPERLU_ABORT("SUPERLU_MALLOC fails for Bjcol.Store"); Bjcol_store = Bjcol.Store; Bjcol_store->lda = ldb; Bjcol_store->nzval = work; /* address aliasing */ /* Do for each right hand side ... */ for (j = 0; j < nrhs; ++j) { count = 0; lstres = 3.; Bptr = &Bmat[j*ldb]; Xptr = &Xmat[j*ldx]; while (1) { /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ #ifdef _CRAY CCOPY(&A->nrow, Bptr, &ione, work, &ione); #else ccopy_(&A->nrow, Bptr, &ione, work, &ione); #endif sp_cgemv(transc, ndone, A, Xptr, ione, done, work, ione); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th component of the numerator before dividing. */ for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if (notran) { for (k = 0; k < A->ncol; ++k) { xk = c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]); } rwork[k] += s; } } s = 0.; for (i = 0; i < A->nrow; ++i) { if (rwork[i] > safe2) { s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] ); } else if ( rwork[i] != 0.0 ) { s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] ); } /* If rwork[i] is exactly 0.0, then we know the true residual also must be exactly 0.0. */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) { /* Update solution and try again. */ cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, Gstat, info); #ifdef _CRAY CAXPY(&A->nrow, &done, work, &ione, &Xmat[j*ldx], &ione); #else caxpy_(&A->nrow, &done, work, &ione, &Xmat[j*ldx], &ione); #endif lstres = berr[j]; ++count; } else { break; } } /* end while */ /* Bound error from formula: norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use CLACON to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if ( notran ) { for (k = 0; k < A->ncol; ++k) { xk = c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; xk = c_abs1( &Xptr[irow] ); s += c_abs1(&Aval[i]) * xk; } rwork[k] += s; } } for (i = 0; i < A->nrow; ++i) if (rwork[i] > safe2) rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i]; else rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1; kase = 0; do { clacon_(&A->nrow, &work[A->nrow], work, &ferr[j], &kase); if (kase == 0) break; if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */ if ( notran && colequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], C[i]); } else if ( !notran && rowequ ) for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], R[i]); } cgstrs (transt, L, U, perm_r, perm_c, &Bjcol, Gstat, info); for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], rwork[i]); } } else { /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */ for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], rwork[i]); } cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, Gstat, info); if ( notran && colequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], C[i]); } else if ( !notran && rowequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], R[i]); } } } while ( kase != 0 ); /* Normalize error. */ lstres = 0.; if ( notran && colequ ) { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) ); } else if ( !notran && rowequ ) { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) ); } else { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) ); } if ( lstres != 0. ) ferr[j] /= lstres; } /* for each RHS j ... */ SUPERLU_FREE(work); SUPERLU_FREE(rwork); SUPERLU_FREE(iwork); SUPERLU_FREE(Bjcol.Store); return; } /* cgsrfs */
int cpivotL( const int jcol, /* in */ const float u, /* in - diagonal pivoting threshold */ int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */ int *perm_r, /* may be modified */ int *iperm_r, /* in - inverse of perm_r */ int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */ int *pivrow, /* out */ GlobalLU_t *Glu, /* modified - global LU data structures */ SuperLUStat_t *stat /* output */ ) { complex one = {1.0, 0.0}; int fsupc; /* first column in the supernode */ int nsupc; /* no of columns in the supernode */ int nsupr; /* no of rows in the supernode */ int lptr; /* points to the starting subscript of the supernode */ int pivptr, old_pivptr, diag, diagind; float pivmax, rtemp, thresh; complex temp; complex *lu_sup_ptr; complex *lu_col_ptr; int *lsub_ptr; int isub, icol, k, itemp; int *lsub, *xlsub; complex *lusup; int *xlusup; flops_t *ops = stat->ops; /* Initialize pointers */ lsub = Glu->lsub; xlsub = Glu->xlsub; lusup = Glu->lusup; xlusup = Glu->xlusup; fsupc = (Glu->xsup)[(Glu->supno)[jcol]]; nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */ lptr = xlsub[fsupc]; nsupr = xlsub[fsupc+1] - lptr; lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */ lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */ lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */ #ifdef DEBUG if ( jcol == MIN_COL ) { printf(_T("Before cdiv: col %d\n"), jcol); for (k = nsupc; k < nsupr; k++) printf(_T(" lu[%d] %f\n"), lsub_ptr[k], lu_col_ptr[k]); } #endif /* Determine the largest abs numerical value for partial pivoting; Also search for user-specified pivot, and diagonal element. */ if ( *usepr ) *pivrow = iperm_r[jcol]; diagind = iperm_c[jcol]; pivmax = 0.0; pivptr = nsupc; diag = EMPTY; old_pivptr = nsupc; for (isub = nsupc; isub < nsupr; ++isub) { rtemp = c_abs1 (&lu_col_ptr[isub]); if ( rtemp > pivmax ) { pivmax = rtemp; pivptr = isub; } if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub; if ( lsub_ptr[isub] == diagind ) diag = isub; } /* Test for singularity */ if ( pivmax == 0.0 ) { *pivrow = lsub_ptr[pivptr]; perm_r[*pivrow] = jcol; *usepr = 0; return (jcol+1); } thresh = u * pivmax; /* Choose appropriate pivotal element by our policy. */ if ( *usepr ) { rtemp = c_abs1 (&lu_col_ptr[old_pivptr]); if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr; else *usepr = 0; } if ( *usepr == 0 ) { /* Use diagonal pivot? */ if ( diag >= 0 ) { /* diagonal exists */ rtemp = c_abs1 (&lu_col_ptr[diag]); if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag; } *pivrow = lsub_ptr[pivptr]; } /* Record pivot row */ perm_r[*pivrow] = jcol; /* Interchange row subscripts */ if ( pivptr != nsupc ) { itemp = lsub_ptr[pivptr]; lsub_ptr[pivptr] = lsub_ptr[nsupc]; lsub_ptr[nsupc] = itemp; /* Interchange numerical values as well, for the whole snode, such * that L is indexed the same way as A. */ for (icol = 0; icol <= nsupc; icol++) { itemp = pivptr + icol * nsupr; temp = lu_sup_ptr[itemp]; lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr]; lu_sup_ptr[nsupc + icol*nsupr] = temp; } } /* if */ /* cdiv operation */ ops[FACT] += 10 * (nsupr - nsupc); c_div(&temp, &one, &lu_col_ptr[nsupc]); for (k = nsupc+1; k < nsupr; k++) cc_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp); return 0; }
void cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, int *etree, void *work, int lwork, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) { /* Local working arrays */ NCPformat *Astore; int *iperm_r = NULL; /* inverse of perm_r; used when options->Fact == SamePattern_SameRowPerm */ int *iperm_c; /* inverse of perm_c */ int *swap, *iswap; /* swap is used to store the row permutation during the factorization. Initially, it is set to iperm_c (row indeces of Pc*A*Pc'). iswap is the inverse of swap. After the factorization, it is equal to perm_r. */ int *iwork; complex *cwork; int *segrep, *repfnz, *parent, *xplore; int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ int *marker, *marker_relax; complex *dense, *tempv; float *stempv; int *relax_end, *relax_fsupc; complex *a; int *asub; int *xa_begin, *xa_end; int *xsup, *supno; int *xlsub, *xlusup, *xusub; int nzlumax; float *amax; complex drop_sum; float alpha, omega; /* used in MILU, mimicing DRIC */ static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ float *swork2; /* used by the second dropping rule */ /* Local scalars */ fact_t fact = options->Fact; double diag_pivot_thresh = options->DiagPivotThresh; double drop_tol = options->ILU_DropTol; /* tau */ double fill_ini = options->ILU_FillTol; /* tau^hat */ double gamma = options->ILU_FillFactor; int drop_rule = options->ILU_DropRule; milu_t milu = options->ILU_MILU; double fill_tol; int pivrow; /* pivotal row number in the original matrix A */ int nseg1; /* no of segments in U-column above panel row jcol */ int nseg; /* no of segments in each U-column */ register int jcol; register int kcol; /* end column of a relaxed snode */ register int icol; register int i, k, jj, new_next, iinfo; int m, n, min_mn, jsupno, fsupc, nextlu, nextu; int w_def; /* upper bound on panel width */ int usepr, iperm_r_allocated = 0; int nnzL, nnzU; int *panel_histo = stat->panel_histo; flops_t *ops = stat->ops; int last_drop;/* the last column which the dropping rules applied */ int quota; int nnzAj; /* number of nonzeros in A(:,1:j) */ int nnzLj, nnzUj; double tol_L = drop_tol, tol_U = drop_tol; complex zero = {0.0, 0.0}; float one = 1.0; /* Executable */ iinfo = 0; m = A->nrow; n = A->ncol; min_mn = SUPERLU_MIN(m, n); Astore = A->Store; a = Astore->nzval; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; /* Allocate storage common to the factor routines */ *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, gamma, L, U, &Glu, &iwork, &cwork); if ( *info ) return; xsup = Glu.xsup; supno = Glu.supno; xlsub = Glu.xlsub; xlusup = Glu.xlusup; xusub = Glu.xusub; SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, &repfnz, &panel_lsub, &marker_relax, &marker); cSetRWork(m, panel_size, cwork, &dense, &tempv); usepr = (fact == SamePattern_SameRowPerm); if ( usepr ) { /* Compute the inverse of perm_r */ iperm_r = (int *) intMalloc(m); for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; iperm_r_allocated = 1; } iperm_c = (int *) intMalloc(n); for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; swap = (int *)intMalloc(n); for (k = 0; k < n; k++) swap[k] = iperm_c[k]; iswap = (int *)intMalloc(n); for (k = 0; k < n; k++) iswap[k] = perm_c[k]; amax = (float *) floatMalloc(panel_size); if (drop_rule & DROP_SECONDARY) swork2 = (float *)floatMalloc(n); else swork2 = NULL; nnzAj = 0; nnzLj = 0; nnzUj = 0; last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95)); alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim); /* Identify relaxed snodes */ relax_end = (int *) intMalloc(n); relax_fsupc = (int *) intMalloc(n); if ( options->SymmetricMode == YES ) ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); else ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); ifill (perm_r, m, EMPTY); ifill (marker, m * NO_MARKER, EMPTY); supno[0] = -1; xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; w_def = panel_size; /* Mark the rows used by relaxed supernodes */ ifill (marker_relax, m, EMPTY); i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end, asub, marker_relax); #if ( PRNTlevel >= 1) printf("%d relaxed supernodes.\n", i); #endif /* * Work on one "panel" at a time. A panel is one of the following: * (a) a relaxed supernode at the bottom of the etree, or * (b) panel_size contiguous columns, defined by the user */ for (jcol = 0; jcol < min_mn; ) { if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ kcol = relax_end[jcol]; /* end of the relaxed snode */ panel_histo[kcol-jcol+1]++; /* Drop small rows in the previous supernode. */ if (jcol > 0 && jcol < last_drop) { int first = xsup[supno[jcol - 1]]; int last = jcol - 1; int quota; /* Compute the quota */ if (drop_rule & DROP_PROWS) quota = gamma * Astore->nnz / m * (m - first) / m * (last - first + 1); else if (drop_rule & DROP_COLUMN) { int i; quota = 0; for (i = first; i <= last; i++) quota += xa_end[i] - xa_begin[i]; quota = gamma * quota * (m - first) / m; } else if (drop_rule & DROP_AREA) quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - nnzLj; else quota = m * n; fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn); /* Drop small rows */ stempv = (float *) tempv; i = ilu_cdrop_row(options, first, last, tol_L, quota, &nnzLj, &fill_tol, &Glu, stempv, swork2, 0); /* Reset the parameters */ if (drop_rule & DROP_DYNAMIC) { if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) < nnzLj) tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); else tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); } if (fill_tol < 0) iinfo -= (int)fill_tol; #ifdef DEBUG num_drop_L += i * (last - first + 1); #endif } /* -------------------------------------- * Factorize the relaxed supernode(jcol:kcol) * -------------------------------------- */ /* Determine the union of the row structure of the snode */ if ( (*info = ilu_csnode_dfs(jcol, kcol, asub, xa_begin, xa_end, marker, &Glu)) != 0 ) return; nextu = xusub[jcol]; nextlu = xlusup[jcol]; jsupno = supno[jcol]; fsupc = xsup[jsupno]; new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); nzlumax = Glu.nzlumax; while ( new_next > nzlumax ) { if ((*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu))) return; } for (icol = jcol; icol <= kcol; icol++) { xusub[icol+1] = nextu; amax[0] = 0.0; /* Scatter into SPA dense[*] */ for (k = xa_begin[icol]; k < xa_end[icol]; k++) { register float tmp = c_abs1 (&a[k]); if (tmp > amax[0]) amax[0] = tmp; dense[asub[k]] = a[k]; } nnzAj += xa_end[icol] - xa_begin[icol]; if (amax[0] == 0.0) { amax[0] = fill_ini; #if ( PRNTlevel >= 1) printf("Column %d is entirely zero!\n", icol); fflush(stdout); #endif } /* Numeric update within the snode */ csnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); if (usepr) pivrow = iperm_r[icol]; fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn); if ( (*info = ilu_cpivotL(icol, diag_pivot_thresh, &usepr, perm_r, iperm_c[icol], swap, iswap, marker_relax, &pivrow, amax[0] * fill_tol, milu, zero, &Glu, stat)) ) { iinfo++; marker[pivrow] = kcol; } } jcol = kcol + 1; } else { /* Work on one panel of panel_size columns */ /* Adjust panel_size so that a panel won't overlap with the next * relaxed snode. */ panel_size = w_def; for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) if ( relax_end[k] != EMPTY ) { panel_size = k - jcol; break; } if ( k == min_mn ) panel_size = min_mn - jcol; panel_histo[panel_size]++; /* symbolic factor on a panel of columns */ ilu_cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, dense, amax, panel_lsub, segrep, repfnz, marker, parent, xplore, &Glu); /* numeric sup-panel updates in topological order */ cpanel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, &Glu, stat); /* Sparse LU within the panel, and below panel diagonal */ for (jj = jcol; jj < jcol + panel_size; jj++) { k = (jj - jcol) * m; /* column index for w-wide arrays */ nseg = nseg1; /* Begin after all the panel segments */ nnzAj += xa_end[jj] - xa_begin[jj]; if ((*info = ilu_ccolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k], segrep, &repfnz[k], marker, parent, xplore, &Glu))) return; /* Numeric updates */ if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k], tempv, &segrep[nseg1], &repfnz[k], jcol, &Glu, stat)) != 0) return; /* Make a fill-in position if the column is entirely zero */ if (xlsub[jj + 1] == xlsub[jj]) { register int i, row; int nextl; int nzlmax = Glu.nzlmax; int *lsub = Glu.lsub; int *marker2 = marker + 2 * m; /* Allocate memory */ nextl = xlsub[jj] + 1; if (nextl >= nzlmax) { int error = cLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu); if (error) { *info = error; return; } lsub = Glu.lsub; } xlsub[jj + 1]++; assert(xlusup[jj]==xlusup[jj+1]); xlusup[jj + 1]++; Glu.lusup[xlusup[jj]] = zero; /* Choose a row index (pivrow) for fill-in */ for (i = jj; i < n; i++) if (marker_relax[swap[i]] <= jj) break; row = swap[i]; marker2[row] = jj; lsub[xlsub[jj]] = row; #ifdef DEBUG printf("Fill col %d.\n", jj); fflush(stdout); #endif } /* Computer the quota */ if (drop_rule & DROP_PROWS) quota = gamma * Astore->nnz / m * jj / m; else if (drop_rule & DROP_COLUMN) quota = gamma * (xa_end[jj] - xa_begin[jj]) * (jj + 1) / m; else if (drop_rule & DROP_AREA) quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj; else quota = m; /* Copy the U-segments to ucol[*] and drop small entries */ if ((*info = ilu_ccopy_to_ucol(jj, nseg, segrep, &repfnz[k], perm_r, &dense[k], drop_rule, milu, amax[jj - jcol] * tol_U, quota, &drop_sum, &nnzUj, &Glu, swork2)) != 0) return; /* Reset the dropping threshold if required */ if (drop_rule & DROP_DYNAMIC) { if (gamma * 0.9 * nnzAj * 0.5 < nnzLj) tol_U = SUPERLU_MIN(1.0, tol_U * 2.0); else tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5); } if (drop_sum.r != 0.0 && drop_sum.i != 0.0) { omega = SUPERLU_MIN(2.0*(1.0-alpha)/c_abs1(&drop_sum), 1.0); cs_mult(&drop_sum, &drop_sum, omega); } if (usepr) pivrow = iperm_r[jj]; fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn); if ( (*info = ilu_cpivotL(jj, diag_pivot_thresh, &usepr, perm_r, iperm_c[jj], swap, iswap, marker_relax, &pivrow, amax[jj - jcol] * fill_tol, milu, drop_sum, &Glu, stat)) ) { iinfo++; marker[m + pivrow] = jj; marker[2 * m + pivrow] = jj; } /* Reset repfnz[] for this column */ resetrep_col (nseg, segrep, &repfnz[k]); /* Start a new supernode, drop the previous one */ if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) { int first = xsup[supno[jj - 1]]; int last = jj - 1; int quota; /* Compute the quota */ if (drop_rule & DROP_PROWS) quota = gamma * Astore->nnz / m * (m - first) / m * (last - first + 1); else if (drop_rule & DROP_COLUMN) { int i; quota = 0; for (i = first; i <= last; i++) quota += xa_end[i] - xa_begin[i]; quota = gamma * quota * (m - first) / m; } else if (drop_rule & DROP_AREA) quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - nnzLj; else quota = m * n; fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / (double)min_mn); /* Drop small rows */ stempv = (float *) tempv; i = ilu_cdrop_row(options, first, last, tol_L, quota, &nnzLj, &fill_tol, &Glu, stempv, swork2, 1); /* Reset the parameters */ if (drop_rule & DROP_DYNAMIC) { if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) < nnzLj) tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); else tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); } if (fill_tol < 0) iinfo -= (int)fill_tol; #ifdef DEBUG num_drop_L += i * (last - first + 1); #endif } /* if start a new supernode */ } /* for */ jcol += panel_size; /* Move to the next panel */ } /* else */ } /* for */ *info = iinfo; if ( m > n ) { k = 0; for (i = 0; i < m; ++i) if ( perm_r[i] == EMPTY ) { perm_r[i] = n + k; ++k; } } ilu_countnz(min_mn, &nnzL, &nnzU, &Glu); fixupL(min_mn, perm_r, &Glu); cLUWorkFree(iwork, cwork, &Glu); /* Free work space and compress storage */ if ( fact == SamePattern_SameRowPerm ) { /* L and U structures may have changed due to possibly different pivoting, even though the storage is available. There could also be memory expansions, so the array locations may have changed, */ ((SCformat *)L->Store)->nnz = nnzL; ((SCformat *)L->Store)->nsuper = Glu.supno[n]; ((SCformat *)L->Store)->nzval = Glu.lusup; ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; ((SCformat *)L->Store)->rowind = Glu.lsub; ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; ((NCformat *)U->Store)->nnz = nnzU; ((NCformat *)U->Store)->nzval = Glu.ucol; ((NCformat *)U->Store)->rowind = Glu.usub; ((NCformat *)U->Store)->colptr = Glu.xusub; } else { cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, Glu.xsup, SLU_SC, SLU_C, SLU_TRLU); cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, Glu.usub, Glu.xusub, SLU_NC, SLU_C, SLU_TRU); } ops[FACT] += ops[TRSV] + ops[GEMV]; stat->expansions = --(Glu.num_expansions); if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); SUPERLU_FREE (iperm_c); SUPERLU_FREE (relax_end); SUPERLU_FREE (swap); SUPERLU_FREE (iswap); SUPERLU_FREE (relax_fsupc); SUPERLU_FREE (amax); if ( swork2 ) SUPERLU_FREE (swork2); }
/*! \brief * * <pre> * Purpose * ======= * * CGSRFS improves the computed solution to a system of linear * equations and provides error bounds and backward error estimates for * the solution. * * If equilibration was performed, the system becomes: * (diag(R)*A_original*diag(C)) * X = diag(R)*B_original. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * trans (input) trans_t * Specifies the form of the system of equations: * = NOTRANS: A * X = B (No transpose) * = TRANS: A'* X = B (Transpose) * = CONJ: A**H * X = B (Conjugate transpose) * * A (input) SuperMatrix* * The original matrix A in the system, or the scaled A if * equilibration was done. The type of A can be: * Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_GE. * * L (input) SuperMatrix* * The factor L from the factorization Pr*A*Pc=L*U. Use * compressed row subscripts storage for supernodes, * i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU. * * U (input) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U as computed by * cgstrf(). Use column-wise storage scheme, * i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU. * * perm_c (input) int*, dimension (A->ncol) * Column permutation vector, which defines the * permutation matrix Pc; perm_c[i] = j means column i of A is * in position j in A*Pc. * * perm_r (input) int*, dimension (A->nrow) * Row permutation vector, which defines the permutation matrix Pr; * perm_r[i] = j means row i of A is in position j in Pr*A. * * equed (input) Specifies the form of equilibration that was done. * = 'N': No equilibration. * = 'R': Row equilibration, i.e., A was premultiplied by diag(R). * = 'C': Column equilibration, i.e., A was postmultiplied by * diag(C). * = 'B': Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * * R (input) float*, dimension (A->nrow) * The row scale factors for A. * If equed = 'R' or 'B', A is premultiplied by diag(R). * If equed = 'N' or 'C', R is not accessed. * * C (input) float*, dimension (A->ncol) * The column scale factors for A. * If equed = 'C' or 'B', A is postmultiplied by diag(C). * If equed = 'N' or 'R', C is not accessed. * * B (input) SuperMatrix* * B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. * The right hand side matrix B. * if equed = 'R' or 'B', B is premultiplied by diag(R). * * X (input/output) SuperMatrix* * X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. * On entry, the solution matrix X, as computed by cgstrs(). * On exit, the improved solution matrix X. * if *equed = 'C' or 'B', X should be premultiplied by diag(C) * in order to obtain the solution to the original system. * * FERR (output) float*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * * BERR (output) float*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * stat (output) SuperLUStat_t* * Record the statistics on runtime and floating-point operation count. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * * </pre> */ void cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, int *perm_c, int *perm_r, char *equed, float *R, float *C, SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr, SuperLUStat_t *stat, int *info) { #define ITMAX 5 /* Table of constant values */ int ione = 1; complex ndone = {-1., 0.}; complex done = {1., 0.}; /* Local variables */ NCformat *Astore; complex *Aval; SuperMatrix Bjcol; DNformat *Bstore, *Xstore, *Bjcol_store; complex *Bmat, *Xmat, *Bptr, *Xptr; int kase; float safe1, safe2; int i, j, k, irow, nz, count, notran, rowequ, colequ; int ldb, ldx, nrhs; float s, xk, lstres, eps, safmin; char transc[1]; trans_t transt; complex *work; float *rwork; int *iwork; int isave[3]; extern int clacon2_(int *, complex *, complex *, float *, int *, int []); #ifdef _CRAY extern int CCOPY(int *, complex *, int *, complex *, int *); extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *); #else extern int ccopy_(int *, complex *, int *, complex *, int *); extern int caxpy_(int *, complex *, complex *, int *, complex *, int *); #endif Astore = A->Store; Aval = Astore->nzval; Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; /* Test the input parameters */ *info = 0; notran = (trans == NOTRANS); if ( !notran && trans != TRANS && trans != CONJ ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE ) *info = -2; else if ( L->nrow != L->ncol || L->nrow < 0 || L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU ) *info = -3; else if ( U->nrow != U->ncol || U->nrow < 0 || U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU ) *info = -4; else if ( ldb < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE ) *info = -10; else if ( ldx < SUPERLU_MAX(0, A->nrow) || X->Stype != SLU_DN || X->Dtype != SLU_C || X->Mtype != SLU_GE ) *info = -11; if (*info != 0) { i = -(*info); input_error("cgsrfs", &i); return; } /* Quick return if possible */ if ( A->nrow == 0 || nrhs == 0) { for (j = 0; j < nrhs; ++j) { ferr[j] = 0.; berr[j] = 0.; } return; } rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); /* Allocate working space */ work = complexMalloc(2*A->nrow); rwork = (float *) SUPERLU_MALLOC( A->nrow * sizeof(float) ); iwork = intMalloc(A->nrow); if ( !work || !rwork || !iwork ) ABORT("Malloc fails for work/rwork/iwork."); if ( notran ) { *(unsigned char *)transc = 'N'; transt = TRANS; } else { *(unsigned char *)transc = 'T'; transt = NOTRANS; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = A->ncol + 1; eps = smach("Epsilon"); safmin = smach("Safe minimum"); /* Set SAFE1 essentially to be the underflow threshold times the number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; /* Compute the number of nonzeros in each row (or column) of A */ for (i = 0; i < A->nrow; ++i) iwork[i] = 0; if ( notran ) { for (k = 0; k < A->ncol; ++k) for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) ++iwork[Astore->rowind[i]]; } else { for (k = 0; k < A->ncol; ++k) iwork[k] = Astore->colptr[k+1] - Astore->colptr[k]; } /* Copy one column of RHS B into Bjcol. */ Bjcol.Stype = B->Stype; Bjcol.Dtype = B->Dtype; Bjcol.Mtype = B->Mtype; Bjcol.nrow = B->nrow; Bjcol.ncol = 1; Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store"); Bjcol_store = Bjcol.Store; Bjcol_store->lda = ldb; Bjcol_store->nzval = work; /* address aliasing */ /* Do for each right hand side ... */ for (j = 0; j < nrhs; ++j) { count = 0; lstres = 3.; Bptr = &Bmat[j*ldb]; Xptr = &Xmat[j*ldx]; while (1) { /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ #ifdef _CRAY CCOPY(&A->nrow, Bptr, &ione, work, &ione); #else ccopy_(&A->nrow, Bptr, &ione, work, &ione); #endif sp_cgemv(transc, ndone, A, Xptr, ione, done, work, ione); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th component of the numerator before dividing. */ for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if (notran) { for (k = 0; k < A->ncol; ++k) { xk = c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]); } rwork[k] += s; } } s = 0.; for (i = 0; i < A->nrow; ++i) { if (rwork[i] > safe2) { s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] ); } else if ( rwork[i] != 0.0 ) { s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] ); } /* If rwork[i] is exactly 0.0, then we know the true residual also must be exactly 0.0. */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) { /* Update solution and try again. */ cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info); #ifdef _CRAY CAXPY(&A->nrow, &done, work, &ione, &Xmat[j*ldx], &ione); #else caxpy_(&A->nrow, &done, work, &ione, &Xmat[j*ldx], &ione); #endif lstres = berr[j]; ++count; } else { break; } } /* end while */ stat->RefineSteps = count; /* Bound error from formula: norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use CLACON2 to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if ( notran ) { for (k = 0; k < A->ncol; ++k) { xk = c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; xk = c_abs1( &Xptr[irow] ); s += c_abs1(&Aval[i]) * xk; } rwork[k] += s; } } for (i = 0; i < A->nrow; ++i) if (rwork[i] > safe2) rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i]; else rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1; kase = 0; do { clacon2_(&A->nrow, &work[A->nrow], work, &ferr[j], &kase, isave); if (kase == 0) break; if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */ if ( notran && colequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], C[i]); } else if ( !notran && rowequ ) for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], R[i]); } cgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info); for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], rwork[i]); } } else { /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */ for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], rwork[i]); } cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info); if ( notran && colequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], C[i]); } else if ( !notran && rowequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], R[i]); } } } while ( kase != 0 ); /* Normalize error. */ lstres = 0.; if ( notran && colequ ) { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) ); } else if ( !notran && rowequ ) { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) ); } else { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) ); } if ( lstres != 0. ) ferr[j] /= lstres; } /* for each RHS j ... */ SUPERLU_FREE(work); SUPERLU_FREE(rwork); SUPERLU_FREE(iwork); SUPERLU_FREE(Bjcol.Store); return; } /* cgsrfs */
/*! \brief * <pre> * Purpose * ======= * ilu_cdrop_row() - Drop some small rows from the previous * supernode (L-part only). * </pre> */ int ilu_cdrop_row( superlu_options_t *options, /* options */ int first, /* index of the first column in the supernode */ int last, /* index of the last column in the supernode */ double drop_tol, /* dropping parameter */ int quota, /* maximum nonzero entries allowed */ int *nnzLj, /* in/out number of nonzeros in L(:, 1:last) */ double *fill_tol, /* in/out - on exit, fill_tol=-num_zero_pivots, * does not change if options->ILU_MILU != SMILU1 */ GlobalLU_t *Glu, /* modified */ float swork[], /* working space * the length of swork[] should be no less than * the number of rows in the supernode */ float swork2[], /* working space with the same size as swork[], * used only by the second dropping rule */ int lastc /* if lastc == 0, there is nothing after the * working supernode [first:last]; * if lastc == 1, there is one more column after * the working supernode. */ ) { register int i, j, k, m1; register int nzlc; /* number of nonzeros in column last+1 */ register int xlusup_first, xlsub_first; int m, n; /* m x n is the size of the supernode */ int r = 0; /* number of dropped rows */ register float *temp; register complex *lusup = (complex *) Glu->lusup; register int *lsub = Glu->lsub; register int *xlsub = Glu->xlsub; register int *xlusup = Glu->xlusup; register float d_max = 0.0, d_min = 1.0; int drop_rule = options->ILU_DropRule; milu_t milu = options->ILU_MILU; norm_t nrm = options->ILU_Norm; complex zero = {0.0, 0.0}; complex one = {1.0, 0.0}; complex none = {-1.0, 0.0}; int i_1 = 1; int inc_diag; /* inc_diag = m + 1 */ int nzp = 0; /* number of zero pivots */ float alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim); xlusup_first = xlusup[first]; xlsub_first = xlsub[first]; m = xlusup[first + 1] - xlusup_first; n = last - first + 1; m1 = m - 1; inc_diag = m + 1; nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0; temp = swork - n; /* Quick return if nothing to do. */ if (m == 0 || m == n || drop_rule == NODROP) { *nnzLj += m * n; return 0; } /* basic dropping: ILU(tau) */ for (i = n; i <= m1; ) { /* the average abs value of ith row */ switch (nrm) { case ONE_NORM: temp[i] = scasum_(&n, &lusup[xlusup_first + i], &m) / (double)n; break; case TWO_NORM: temp[i] = scnrm2_(&n, &lusup[xlusup_first + i], &m) / sqrt((double)n); break; case INF_NORM: default: k = icamax_(&n, &lusup[xlusup_first + i], &m) - 1; temp[i] = c_abs1(&lusup[xlusup_first + i + m * k]); break; } /* drop small entries due to drop_tol */ if (drop_rule & DROP_BASIC && temp[i] < drop_tol) { r++; /* drop the current row and move the last undropped row here */ if (r > 1) /* add to last row */ { /* accumulate the sum (for MILU) */ switch (milu) { case SMILU_1: case SMILU_2: caxpy_(&n, &one, &lusup[xlusup_first + i], &m, &lusup[xlusup_first + m - 1], &m); break; case SMILU_3: for (j = 0; j < n; j++) lusup[xlusup_first + (m - 1) + j * m].r += c_abs1(&lusup[xlusup_first + i + j * m]); break; case SILU: default: break; } ccopy_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); } /* if (r > 1) */ else /* move to last row */ { cswap_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); if (milu == SMILU_3) for (j = 0; j < n; j++) { lusup[xlusup_first + m1 + j * m].r = c_abs1(&lusup[xlusup_first + m1 + j * m]); lusup[xlusup_first + m1 + j * m].i = 0.0; } } lsub[xlsub_first + i] = lsub[xlsub_first + m1]; m1--; continue; } /* if dropping */ else { if (temp[i] > d_max) d_max = temp[i]; if (temp[i] < d_min) d_min = temp[i]; } i++; } /* for */ /* Secondary dropping: drop more rows according to the quota. */ quota = ceil((double)quota / (double)n); if (drop_rule & DROP_SECONDARY && m - r > quota) { register double tol = d_max; /* Calculate the second dropping tolerance */ if (quota > n) { if (drop_rule & DROP_INTERP) /* by interpolation */ { d_max = 1.0 / d_max; d_min = 1.0 / d_min; tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r)); } else /* by quick select */ { int len = m1 - n + 1; scopy_(&len, swork, &i_1, swork2, &i_1); tol = sqselect(len, swork2, quota - n); #if 0 register int *itemp = iwork - n; A = temp; for (i = n; i <= m1; i++) itemp[i] = i; qsort(iwork, m1 - n + 1, sizeof(int), _compare_); tol = temp[itemp[quota]]; #endif } } for (i = n; i <= m1; ) { if (temp[i] <= tol) { register int j; r++; /* drop the current row and move the last undropped row here */ if (r > 1) /* add to last row */ { /* accumulate the sum (for MILU) */ switch (milu) { case SMILU_1: case SMILU_2: caxpy_(&n, &one, &lusup[xlusup_first + i], &m, &lusup[xlusup_first + m - 1], &m); break; case SMILU_3: for (j = 0; j < n; j++) lusup[xlusup_first + (m - 1) + j * m].r += c_abs1(&lusup[xlusup_first + i + j * m]); break; case SILU: default: break; } ccopy_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); } /* if (r > 1) */ else /* move to last row */ { cswap_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); if (milu == SMILU_3) for (j = 0; j < n; j++) { lusup[xlusup_first + m1 + j * m].r = c_abs1(&lusup[xlusup_first + m1 + j * m]); lusup[xlusup_first + m1 + j * m].i = 0.0; } } lsub[xlsub_first + i] = lsub[xlsub_first + m1]; m1--; temp[i] = temp[m1]; continue; } i++; } /* for */ } /* if secondary dropping */ for (i = n; i < m; i++) temp[i] = 0.0; if (r == 0) { *nnzLj += m * n; return 0; } /* add dropped entries to the diagnal */ if (milu != SILU) { register int j; complex t; float omega; for (j = 0; j < n; j++) { t = lusup[xlusup_first + (m - 1) + j * m]; if (t.r == 0.0 && t.i == 0.0) continue; omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / c_abs1(&t), 1.0); cs_mult(&t, &t, omega); switch (milu) { case SMILU_1: if ( !(c_eq(&t, &none)) ) { c_add(&t, &t, &one); cc_mult(&lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], &t); } else { cs_mult( &lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], *fill_tol); #ifdef DEBUG printf("[1] ZERO PIVOT: FILL col %d.\n", first + j); fflush(stdout); #endif nzp++; } break; case SMILU_2: cs_mult(&lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], 1.0 + c_abs1(&t)); break; case SMILU_3: c_add(&t, &t, &one); cc_mult(&lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], &t); break; case SILU: default: break; } } if (nzp > 0) *fill_tol = -nzp; } /* Remove dropped entries from the memory and fix the pointers. */ m1 = m - r; for (j = 1; j < n; j++) { register int tmp1, tmp2; tmp1 = xlusup_first + j * m1; tmp2 = xlusup_first + j * m; for (i = 0; i < m1; i++) lusup[i + tmp1] = lusup[i + tmp2]; } for (i = 0; i < nzlc; i++) lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m]; for (i = 0; i < nzlc; i++) lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i]; for (i = first + 1; i <= last + 1; i++) { xlusup[i] -= r * (i - first); xlsub[i] -= r; } if (lastc) { xlusup[last + 2] -= r * n; xlsub[last + 2] -= r; } *nnzLj += (m - r) * n; return r; }
int ilu_cpivotL( const int jcol, /* in */ const double u, /* in - diagonal pivoting threshold */ int *usepr, /* re-use the pivot sequence given by * perm_r/iperm_r */ int *perm_r, /* may be modified */ int diagind, /* diagonal of Pc*A*Pc' */ int *swap, /* in/out record the row permutation */ int *iswap, /* in/out inverse of swap, it is the same as perm_r after the factorization */ int *marker, /* in */ int *pivrow, /* in/out, as an input if *usepr!=0 */ double fill_tol, /* in - fill tolerance of current column * used for a singular column */ milu_t milu, /* in */ complex drop_sum, /* in - computed in ilu_ccopy_to_ucol() (MILU only) */ GlobalLU_t *Glu, /* modified - global LU data structures */ SuperLUStat_t *stat /* output */ ) { int n; /* number of columns */ int fsupc; /* first column in the supernode */ int nsupc; /* no of columns in the supernode */ int nsupr; /* no of rows in the supernode */ int lptr; /* points to the starting subscript of the supernode */ register int pivptr; int old_pivptr, diag, ptr0; register float pivmax, rtemp; float thresh; complex temp; complex *lu_sup_ptr; complex *lu_col_ptr; int *lsub_ptr; register int isub, icol, k, itemp; int *lsub, *xlsub; complex *lusup; int *xlusup; flops_t *ops = stat->ops; int info; complex one = {1.0, 0.0}; /* Initialize pointers */ n = Glu->n; lsub = Glu->lsub; xlsub = Glu->xlsub; lusup = Glu->lusup; xlusup = Glu->xlusup; fsupc = (Glu->xsup)[(Glu->supno)[jcol]]; nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */ lptr = xlsub[fsupc]; nsupr = xlsub[fsupc+1] - lptr; lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */ lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */ lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */ /* Determine the largest abs numerical value for partial pivoting; Also search for user-specified pivot, and diagonal element. */ pivmax = -1.0; pivptr = nsupc; diag = EMPTY; old_pivptr = nsupc; ptr0 = EMPTY; for (isub = nsupc; isub < nsupr; ++isub) { if (marker[lsub_ptr[isub]] > jcol) continue; /* do not overlap with a later relaxed supernode */ switch (milu) { case SMILU_1: c_add(&temp, &lu_col_ptr[isub], &drop_sum); rtemp = c_abs1(&temp); break; case SMILU_2: case SMILU_3: /* In this case, drop_sum contains the sum of the abs. value */ rtemp = c_abs1(&lu_col_ptr[isub]); break; case SILU: default: rtemp = c_abs1(&lu_col_ptr[isub]); break; } if (rtemp > pivmax) { pivmax = rtemp; pivptr = isub; } if (*usepr && lsub_ptr[isub] == *pivrow) old_pivptr = isub; if (lsub_ptr[isub] == diagind) diag = isub; if (ptr0 == EMPTY) ptr0 = isub; } if (milu == SMILU_2 || milu == SMILU_3) pivmax += drop_sum.r; /* Test for singularity */ if (pivmax < 0.0) { fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); } if ( pivmax == 0.0 ) { if (diag != EMPTY) *pivrow = lsub_ptr[pivptr = diag]; else if (ptr0 != EMPTY) *pivrow = lsub_ptr[pivptr = ptr0]; else { /* look for the first row which does not belong to any later supernodes */ for (icol = jcol; icol < n; icol++) if (marker[swap[icol]] <= jcol) break; if (icol >= n) { fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); } *pivrow = swap[icol]; /* pick up the pivot row */ for (isub = nsupc; isub < nsupr; ++isub) if ( lsub_ptr[isub] == *pivrow ) { pivptr = isub; break; } } pivmax = fill_tol; lu_col_ptr[pivptr].r = pivmax; lu_col_ptr[pivptr].i = 0.0; *usepr = 0; #ifdef DEBUG printf("[0] ZERO PIVOT: FILL (%d, %d).\n", *pivrow, jcol); fflush(stdout); #endif info =jcol + 1; } /* if (*pivrow == 0.0) */ else { thresh = u * pivmax; /* Choose appropriate pivotal element by our policy. */ if ( *usepr ) { switch (milu) { case SMILU_1: c_add(&temp, &lu_col_ptr[old_pivptr], &drop_sum); rtemp = c_abs1(&temp); break; case SMILU_2: case SMILU_3: rtemp = c_abs1(&lu_col_ptr[old_pivptr]) + drop_sum.r; break; case SILU: default: rtemp = c_abs1(&lu_col_ptr[old_pivptr]); break; } if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr; else *usepr = 0; } if ( *usepr == 0 ) { /* Use diagonal pivot? */ if ( diag >= 0 ) { /* diagonal exists */ switch (milu) { case SMILU_1: c_add(&temp, &lu_col_ptr[diag], &drop_sum); rtemp = c_abs1(&temp); break; case SMILU_2: case SMILU_3: rtemp = c_abs1(&lu_col_ptr[diag]) + drop_sum.r; break; case SILU: default: rtemp = c_abs1(&lu_col_ptr[diag]); break; } if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag; } *pivrow = lsub_ptr[pivptr]; } info = 0; /* Reset the diagonal */ switch (milu) { case SMILU_1: c_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum); break; case SMILU_2: case SMILU_3: temp = c_sgn(&lu_col_ptr[pivptr]); cc_mult(&temp, &temp, &drop_sum); c_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum); break; case SILU: default: break; } } /* else */ /* Record pivot row */ perm_r[*pivrow] = jcol; if (jcol < n - 1) { register int t1, t2, t; t1 = iswap[*pivrow]; t2 = jcol; if (t1 != t2) { t = swap[t1]; swap[t1] = swap[t2]; swap[t2] = t; t1 = swap[t1]; t2 = t; t = iswap[t1]; iswap[t1] = iswap[t2]; iswap[t2] = t; } } /* if (jcol < n - 1) */ /* Interchange row subscripts */ if ( pivptr != nsupc ) { itemp = lsub_ptr[pivptr]; lsub_ptr[pivptr] = lsub_ptr[nsupc]; lsub_ptr[nsupc] = itemp; /* Interchange numerical values as well, for the whole snode, such * that L is indexed the same way as A. */ for (icol = 0; icol <= nsupc; icol++) { itemp = pivptr + icol * nsupr; temp = lu_sup_ptr[itemp]; lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr]; lu_sup_ptr[nsupc + icol*nsupr] = temp; } } /* if */ /* cdiv operation */ ops[FACT] += 10 * (nsupr - nsupc); c_div(&temp, &one, &lu_col_ptr[nsupc]); for (k = nsupc+1; k < nsupr; k++) cc_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp); return info; }
float cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, SuperMatrix *L, SuperMatrix *U) { NCformat *Astore; SCformat *Lstore; NCformat *Ustore; complex *Aval, *Lval, *Uval; int fsupc, nsupr, luptr, nz_in_U; int i, j, k, oldcol; int *inv_perm_c; float rpg, maxaj, maxuj; extern double slamch_(char *); float smlnum; complex *luval; /* Get machine constants. */ smlnum = slamch_("S"); rpg = 1. / smlnum; Astore = A->Store; Lstore = L->Store; Ustore = U->Store; Aval = Astore->nzval; Lval = Lstore->nzval; Uval = Ustore->nzval; inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int)); for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j; for (k = 0; k <= Lstore->nsuper; ++k) { fsupc = L_FST_SUPC(k); nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); luptr = L_NZ_START(fsupc); luval = &Lval[luptr]; nz_in_U = 1; for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) { maxaj = 0.; oldcol = inv_perm_c[j]; for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i) maxaj = SUPERLU_MAX( maxaj, c_abs1( &Aval[i]) ); maxuj = 0.; for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++) maxuj = SUPERLU_MAX( maxuj, c_abs1( &Uval[i]) ); /* Supernode */ for (i = 0; i < nz_in_U; ++i) maxuj = SUPERLU_MAX( maxuj, c_abs1( &luval[i]) ); ++nz_in_U; luval += nsupr; if ( maxuj == 0. ) rpg = SUPERLU_MIN( rpg, 1.); else rpg = SUPERLU_MIN( rpg, maxaj / maxuj ); } if ( j >= ncols ) break; } SUPERLU_FREE(inv_perm_c); return (rpg); }
int ilu_ccopy_to_ucol( int jcol, /* in */ int nseg, /* in */ int *segrep, /* in */ int *repfnz, /* in */ int *perm_r, /* in */ complex *dense, /* modified - reset to zero on return */ int drop_rule,/* in */ milu_t milu, /* in */ double drop_tol, /* in */ int quota, /* maximum nonzero entries allowed */ complex *sum, /* out - the sum of dropped entries */ int *nnzUj, /* in - out */ GlobalLU_t *Glu, /* modified */ float *work /* working space with minimum size n, * used by the second dropping rule */ ) { /* * Gather from SPA dense[*] to global ucol[*]. */ int ksub, krep, ksupno; int i, k, kfnz, segsze; int fsupc, isub, irow; int jsupno, nextu; int new_next, mem_error; int *xsup, *supno; int *lsub, *xlsub; complex *ucol; int *usub, *xusub; int nzumax; int m; /* number of entries in the nonzero U-segments */ register float d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum"); register double tmp; complex zero = {0.0, 0.0}; int i_1 = 1; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; ucol = Glu->ucol; usub = Glu->usub; xusub = Glu->xusub; nzumax = Glu->nzumax; *sum = zero; if (drop_rule == NODROP) { drop_tol = -1.0, quota = Glu->n; } jsupno = supno[jcol]; nextu = xusub[jcol]; k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep[k--]; ksupno = supno[krep]; if ( ksupno != jsupno ) { /* Should go into ucol[] */ kfnz = repfnz[krep]; if ( kfnz != EMPTY ) { /* Nonzero U-segment */ fsupc = xsup[ksupno]; isub = xlsub[fsupc] + kfnz - fsupc; segsze = krep - kfnz + 1; new_next = nextu + segsze; while ( new_next > nzumax ) { if ((mem_error = cLUMemXpand(jcol, nextu, UCOL, &nzumax, Glu)) != 0) return (mem_error); ucol = Glu->ucol; if ((mem_error = cLUMemXpand(jcol, nextu, USUB, &nzumax, Glu)) != 0) return (mem_error); usub = Glu->usub; lsub = Glu->lsub; } for (i = 0; i < segsze; i++) { irow = lsub[isub++]; tmp = c_abs1(&dense[irow]); /* first dropping rule */ if (quota > 0 && tmp >= drop_tol) { if (tmp > d_max) d_max = tmp; if (tmp < d_min) d_min = tmp; usub[nextu] = perm_r[irow]; ucol[nextu] = dense[irow]; nextu++; } else { switch (milu) { case SMILU_1: case SMILU_2: c_add(sum, sum, &dense[irow]); break; case SMILU_3: /* *sum += fabs(dense[irow]);*/ sum->r += tmp; break; case SILU: default: break; } #ifdef DEBUG num_drop_U++; #endif } dense[irow] = zero; } } } } /* for each segment... */ xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ m = xusub[jcol + 1] - xusub[jcol]; /* second dropping rule */ if (drop_rule & DROP_SECONDARY && m > quota) { register double tol = d_max; register int m0 = xusub[jcol] + m - 1; if (quota > 0) { if (drop_rule & DROP_INTERP) { d_max = 1.0 / d_max; d_min = 1.0 / d_min; tol = 1.0 / (d_max + (d_min - d_max) * quota / m); } else { i_1 = xusub[jcol]; for (i = 0; i < m; ++i, ++i_1) work[i] = c_abs1(&ucol[i_1]); tol = sqselect(m, work, quota); #if 0 A = &ucol[xusub[jcol]]; for (i = 0; i < m; i++) work[i] = i; qsort(work, m, sizeof(int), _compare_); tol = fabs(usub[xusub[jcol] + work[quota]]); #endif } } for (i = xusub[jcol]; i <= m0; ) { if (c_abs1(&ucol[i]) <= tol) { switch (milu) { case SMILU_1: case SMILU_2: c_add(sum, sum, &ucol[i]); break; case SMILU_3: sum->r += tmp; break; case SILU: default: break; } ucol[i] = ucol[m0]; usub[i] = usub[m0]; m0--; m--; #ifdef DEBUG num_drop_U++; #endif xusub[jcol + 1]--; continue; } i++; } } if (milu == SMILU_2) { sum->r = c_abs1(sum); sum->i = 0.0; } if (milu == SMILU_3) sum->i = 0.0; *nnzUj += m; return 0; }
int main(int argc, char *argv[]) { void cmatvec_mult(complex alpha, complex x[], complex beta, complex y[]); void cpsolve(int n, complex x[], complex y[]); extern int cfgmr( int n, void (*matvec_mult)(complex, complex [], complex, complex []), void (*psolve)(int n, complex [], complex[]), complex *rhs, complex *sol, double tol, int restrt, int *itmax, FILE *fits); extern int cfill_diag(int n, NCformat *Astore); char equed[1] = {'B'}; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; complex *a; int *asub, *xa; int *etree; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ int nrhs, ldx, lwork, info, m, n, nnz; complex *rhsb, *rhsx, *xact; complex *work = NULL; float *R, *C; float u, rpg, rcond; complex zero = {0.0, 0.0}; complex one = {1.0, 0.0}; complex none = {-1.0, 0.0}; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; int restrt, iter, maxit, i; double resid; complex *x, *b; #ifdef DEBUG extern int num_drop_L, num_drop_U; #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 0.1; /* u=1.0 for complete factorization */ trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 0.1; //different from complete LU options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; options.RowPerm = LargeDiag; options.ILU_DropTol = 1e-4; options.ILU_FillTol = 1e-2; options.ILU_FillFactor = 10.0; options.ILU_DropRule = DROP_BASIC | DROP_AREA; options.ILU_Norm = INF_NORM; options.ILU_MILU = SMILU_2; */ ilu_set_default_options(&options); /* Modify the defaults. */ options.PivotGrowth = YES; /* Compute reciprocal pivot growth */ options.ConditionNumber = YES;/* Compute reciprocal condition number */ if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) ABORT("Malloc fails for work[]."); } /* Read matrix A from a file in Harwell-Boeing format.*/ if (argc < 2) { printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n" "-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n" "-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n" "-t -triplet:\n\t[INPUT] is a triplet format matrix.\n", argv[0]); return 0; } else { switch (argv[1][1]) { case 'H': case 'h': printf("Input a Harwell-Boeing format matrix:\n"); creadhb(&m, &n, &nnz, &a, &asub, &xa); break; case 'R': case 'r': printf("Input a Rutherford-Boeing format matrix:\n"); creadrb(&m, &n, &nnz, &a, &asub, &xa); break; case 'T': case 't': printf("Input a triplet format matrix:\n"); creadtriple(&m, &n, &nnz, &a, &asub, &xa); break; default: printf("Unrecognized format.\n"); return 0; } } cCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_C, SLU_GE); Astore = A.Store; cfill_diag(n, Astore); printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); fflush(stdout); if ( !(rhsb = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); cCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_C, SLU_GE); cCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_C, SLU_GE); xact = complexMalloc(n * nrhs); ldx = n; cGenXtrue(n, nrhs, xact, ldx); cFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for C[]."); info = 0; #ifdef DEBUG num_drop_L = 0; num_drop_U = 0; #endif /* Initialize the statistics variables. */ StatInit(&stat); /* Compute the incomplete factorization and compute the condition number and pivot growth using dgsisx. */ cgsisx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, &mem_usage, &stat, &info); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("cgsisx(): info %d\n", info); if (info > 0 || rcond < 1e-8 || rpg > 1e8) printf("WARNING: This preconditioner might be unstable.\n"); if ( info == 0 || info == n+1 ) { if ( options.PivotGrowth == YES ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber == YES ) printf("Recip. condition number = %e\n", rcond); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } printf("n(A) = %d, nnz(A) = %d\n", n, Astore->nnz); printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("Fill ratio: nnz(F)/nnz(A) = %.3f\n", ((double)(Lstore->nnz) + (double)(Ustore->nnz) - (double)n) / (double)Astore->nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); /* Set the global variables. */ GLOBAL_A = &A; GLOBAL_L = &L; GLOBAL_U = &U; GLOBAL_STAT = &stat; GLOBAL_PERM_C = perm_c; GLOBAL_PERM_R = perm_r; /* Set the variables used by GMRES. */ restrt = SUPERLU_MIN(n / 3 + 1, 50); maxit = 1000; iter = maxit; resid = 1e-8; if (!(b = complexMalloc(m))) ABORT("Malloc fails for b[]."); if (!(x = complexMalloc(n))) ABORT("Malloc fails for x[]."); sp_cgemv("N", one, &A, xact, 1, zero, b, 1); if (info <= n + 1) { int i_1 = 1; double maxferr = 0.0, nrmA, nrmB, res, t; complex temp; extern float scnrm2_(int *, complex [], int *); extern void caxpy_(int *, complex *, complex [], int *, complex [], int *); /* Call GMRES. */ /*double *sol = (double*) ((DNformat*) X.Store)->nzval; for (i = 0; i < n; i++) x[i] = sol[i];*/ for (i = 0; i < n; i++) x[i] = zero; t = SuperLU_timer_(); cfgmr(n, cmatvec_mult, cpsolve, b, x, resid, restrt, &iter, stdout); t = SuperLU_timer_() - t; /* Output the result. */ nrmA = scnrm2_(&(Astore->nnz), (complex *)((DNformat *)A.Store)->nzval, &i_1); nrmB = scnrm2_(&m, b, &i_1); sp_cgemv("N", none, &A, x, 1, one, b, 1); res = scnrm2_(&m, b, &i_1); resid = res / nrmB; printf("||A||_F = %.1e, ||B||_2 = %.1e, ||B-A*X||_2 = %.1e, " "relres = %.1e\n", nrmA, nrmB, res, resid); if (iter >= maxit) { if (resid >= 1.0) iter = -180; else if (resid > 1e-8) iter = -111; } printf("iteration: %d\nresidual: %.1e\nGMRES time: %.2f seconds.\n", iter, resid, t); for (i = 0; i < m; i++) c_sub(&temp, &x[i], &xact[i]); maxferr = SUPERLU_MAX(maxferr, c_abs1(&temp)); printf("||X-X_true||_oo = %.1e\n", maxferr); } #ifdef DEBUG printf("%d entries in L and %d entries in U dropped.\n", num_drop_L, num_drop_U); #endif fflush(stdout); if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork >= 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } SUPERLU_FREE(b); SUPERLU_FREE(x); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif return 0; }
/*! \brief * * <pre> * Purpose * ======= * * Performs a symbolic factorization on a panel of columns [jcol, jcol+w). * * A supernode representative is the last column of a supernode. * The nonzeros in U[*,j] are segments that end at supernodal * representatives. * * The routine returns one list of the supernodal representatives * in topological order of the dfs that generates them. This list is * a superset of the topological order of each individual column within * the panel. * The location of the first nonzero in each supernodal segment * (supernodal entry location) is also returned. Each column has a * separate list for this purpose. * * Two marker arrays are used for dfs: * marker[i] == jj, if i was visited during dfs of current column jj; * marker1[i] >= jcol, if i was visited by earlier columns in this panel; * * marker: A-row --> A-row/col (0/1) * repfnz: SuperA-col --> PA-row * parent: SuperA-col --> SuperA-col * xplore: SuperA-col --> index to L-structure * </pre> */ void ilu_cpanel_dfs( const int m, /* in - number of rows in the matrix */ const int w, /* in */ const int jcol, /* in */ SuperMatrix *A, /* in - original matrix */ int *perm_r, /* in */ int *nseg, /* out */ complex *dense, /* out */ float *amax, /* out - max. abs. value of each column in panel */ int *panel_lsub, /* out */ int *segrep, /* out */ int *repfnz, /* out */ int *marker, /* out */ int *parent, /* working array */ int *xplore, /* working array */ GlobalLU_t *Glu /* modified */ ) { NCPformat *Astore; complex *a; int *asub; int *xa_begin, *xa_end; int krep, chperm, chmark, chrep, oldrep, kchild, myfnz; int k, krow, kmark, kperm; int xdfs, maxdfs, kpar; int jj; /* index through each column in the panel */ int *marker1; /* marker1[jj] >= jcol if vertex jj was visited by a previous column within this panel. */ int *repfnz_col; /* start of each column in the panel */ complex *dense_col; /* start of each column in the panel */ int nextl_col; /* next available position in panel_lsub[*,jj] */ int *xsup, *supno; int *lsub, *xlsub; float *amax_col; register double tmp; /* Initialize pointers */ Astore = A->Store; a = Astore->nzval; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; marker1 = marker + m; repfnz_col = repfnz; dense_col = dense; amax_col = amax; *nseg = 0; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; /* For each column in the panel */ for (jj = jcol; jj < jcol + w; jj++) { nextl_col = (jj - jcol) * m; #ifdef CHK_DFS printf("\npanel col %d: ", jj); #endif *amax_col = 0.0; /* For each nonz in A[*,jj] do dfs */ for (k = xa_begin[jj]; k < xa_end[jj]; k++) { krow = asub[k]; tmp = c_abs1(&a[k]); if (tmp > *amax_col) *amax_col = tmp; dense_col[krow] = a[k]; kmark = marker[krow]; if ( kmark == jj ) continue; /* krow visited before, go to the next nonzero */ /* For each unmarked nbr krow of jj * krow is in L: place it in structure of L[*,jj] */ marker[krow] = jj; kperm = perm_r[krow]; if ( kperm == EMPTY ) { panel_lsub[nextl_col++] = krow; /* krow is indexed into A */ } /* * krow is in U: if its supernode-rep krep * has been explored, update repfnz[*] */ else { krep = xsup[supno[kperm]+1] - 1; myfnz = repfnz_col[krep]; #ifdef CHK_DFS printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, kperm); #endif if ( myfnz != EMPTY ) { /* Representative visited before */ if ( myfnz > kperm ) repfnz_col[krep] = kperm; /* continue; */ } else { /* Otherwise, perform dfs starting at krep */ oldrep = EMPTY; parent[krep] = oldrep; repfnz_col[krep] = kperm; xdfs = xlsub[xsup[supno[krep]]]; maxdfs = xlsub[krep + 1]; #ifdef CHK_DFS printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); printf("\n"); #endif do { /* * For each unmarked kchild of krep */ while ( xdfs < maxdfs ) { kchild = lsub[xdfs]; xdfs++; chmark = marker[kchild]; if ( chmark != jj ) { /* Not reached yet */ marker[kchild] = jj; chperm = perm_r[kchild]; /* Case kchild is in L: place it in L[*,j] */ if ( chperm == EMPTY ) { panel_lsub[nextl_col++] = kchild; } /* Case kchild is in U: * chrep = its supernode-rep. If its rep has * been explored, update its repfnz[*] */ else { chrep = xsup[supno[chperm]+1] - 1; myfnz = repfnz_col[chrep]; #ifdef CHK_DFS printf("chrep %d,myfnz %d,perm_r[%d] %d\n",chrep,myfnz,kchild,chperm); #endif if ( myfnz != EMPTY ) { /* Visited before */ if ( myfnz > chperm ) repfnz_col[chrep] = chperm; } else { /* Cont. dfs at snode-rep of kchild */ xplore[krep] = xdfs; oldrep = krep; krep = chrep; /* Go deeper down G(L) */ parent[krep] = oldrep; repfnz_col[krep] = chperm; xdfs = xlsub[xsup[supno[krep]]]; maxdfs = xlsub[krep + 1]; #ifdef CHK_DFS printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); printf("\n"); #endif } /* else */ } /* else */ } /* if... */ } /* while xdfs < maxdfs */ /* krow has no more unexplored nbrs: * Place snode-rep krep in postorder DFS, if this * segment is seen for the first time. (Note that * "repfnz[krep]" may change later.) * Backtrack dfs to its parent. */ if ( marker1[krep] < jcol ) { segrep[*nseg] = krep; ++(*nseg); marker1[krep] = jj; } kpar = parent[krep]; /* Pop stack, mimic recursion */ if ( kpar == EMPTY ) break; /* dfs done */ krep = kpar; xdfs = xplore[krep]; maxdfs = xlsub[krep + 1]; #ifdef CHK_DFS printf(" pop stack: krep %d,xdfs %d,maxdfs %d: ", krep,xdfs,maxdfs); for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); printf("\n"); #endif } while ( kpar != EMPTY ); /* do-while - until empty stack */ } /* else */ } /* else */ } /* for each nonz in A[*,jj] */ repfnz_col += m; /* Move to next column */ dense_col += m; amax_col++; } /* for jj ... */ }
float cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, SuperMatrix *L, SuperMatrix *U) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * * Purpose * ======= * * Compute the reciprocal pivot growth factor of the leading ncols columns * of the matrix, using the formula: * min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) ) * * Arguments * ========= * * ncols (input) int * The number of columns of matrices A, L and U. * * A (input) SuperMatrix* * Original matrix A, permuted by columns, of dimension * (A->nrow, A->ncol). The type of A can be: * Stype = NC; Dtype = _D; Mtype = GE. * * L (output) SuperMatrix* * The factor L from the factorization Pr*A=L*U; use compressed row * subscripts storage for supernodes, i.e., L has type: * Stype = SC; Dtype = _D; Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise * storage scheme, i.e., U has types: Stype = NC; * Dtype = _D; Mtype = TRU. * */ NCformat *Astore; SCPformat *Lstore; NCPformat *Ustore; complex *Aval, *Lval, *Uval; int fsupc, nsupr, luptr, nz_in_U; int i, j, k, oldcol; int *inv_perm_c; float rpg, maxaj, maxuj; extern double slamch_(char *); float smlnum; complex *luval; complex temp_comp; /* Get machine constants. */ smlnum = slamch_("S"); rpg = 1. / smlnum; Astore = A->Store; Lstore = L->Store; Ustore = U->Store; Aval = Astore->nzval; Lval = Lstore->nzval; Uval = Ustore->nzval; inv_perm_c = (int *) SUPERLU_MALLOC( (size_t) A->ncol*sizeof(int) ); for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j; for (k = 0; k <= Lstore->nsuper; ++k) { fsupc = L_FST_SUPC(k); nsupr = L_SUB_END(fsupc) - L_SUB_START(fsupc); luptr = L_NZ_START(fsupc); luval = &Lval[luptr]; nz_in_U = 1; for (j = fsupc; j < L_LAST_SUPC(k) && j < ncols; ++j) { maxaj = 0.; oldcol = inv_perm_c[j]; for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; i++) maxaj = SUPERLU_MAX( maxaj, c_abs1( &Aval[i]) ); maxuj = 0.; for (i = Ustore->colbeg[j]; i < Ustore->colend[j]; i++) maxuj = SUPERLU_MAX( maxuj, c_abs1( &Uval[i]) ); /* Supernode */ for (i = 0; i < nz_in_U; ++i) maxuj = SUPERLU_MAX( maxuj, c_abs1( &luval[i]) ); ++nz_in_U; luval += nsupr; if ( maxuj == 0. ) rpg = SUPERLU_MIN( rpg, 1.); else rpg = SUPERLU_MIN( rpg, maxaj / maxuj ); } if ( j >= ncols ) break; } SUPERLU_FREE(inv_perm_c); return (rpg); }
/*! \brief * * <pre> * Purpose * ======= * * CGSEQU computes row and column scalings intended to equilibrate an * M-by-N sparse matrix A and reduce its condition number. R returns the row * scale factors and C the column scale factors, chosen to try to make * the largest element in each row and column of the matrix B with * elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. * * R(i) and C(j) are restricted to be between SMLNUM = smallest safe * number and BIGNUM = largest safe number. Use of these scaling * factors is not guaranteed to reduce the condition number of A but * works well in practice. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * A (input) SuperMatrix* * The matrix of dimension (A->nrow, A->ncol) whose equilibration * factors are to be computed. The type of A can be: * Stype = SLU_NC; Dtype = SLU_C; Mtype = SLU_GE. * * R (output) float*, size A->nrow * If INFO = 0 or INFO > M, R contains the row scale factors * for A. * * C (output) float*, size A->ncol * If INFO = 0, C contains the column scale factors for A. * * ROWCND (output) float* * If INFO = 0 or INFO > M, ROWCND contains the ratio of the * smallest R(i) to the largest R(i). If ROWCND >= 0.1 and * AMAX is neither too large nor too small, it is not worth * scaling by R. * * COLCND (output) float* * If INFO = 0, COLCND contains the ratio of the smallest * C(i) to the largest C(i). If COLCND >= 0.1, it is not * worth scaling by C. * * AMAX (output) float* * Absolute value of largest matrix element. If AMAX is very * close to overflow or very close to underflow, the matrix * should be scaled. * * INFO (output) int* * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, and i is * <= A->nrow: the i-th row of A is exactly zero * > A->ncol: the (i-M)-th column of A is exactly zero * * ===================================================================== * </pre> */ void cgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd, float *colcnd, float *amax, int *info) { /* Local variables */ NCformat *Astore; complex *Aval; int i, j, irow; float rcmin, rcmax; float bignum, smlnum; extern float smach(char *); /* Test the input parameters. */ *info = 0; if ( A->nrow < 0 || A->ncol < 0 || A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE ) *info = -1; if (*info != 0) { i = -(*info); input_error("cgsequ", &i); return; } /* Quick return if possible */ if ( A->nrow == 0 || A->ncol == 0 ) { *rowcnd = 1.; *colcnd = 1.; *amax = 0.; return; } Astore = A->Store; Aval = Astore->nzval; /* Get machine constants. */ smlnum = smach("S"); /* slamch_("S"); */ bignum = 1. / smlnum; /* Compute row scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 0.; /* Find the maximum element in each row. */ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; r[irow] = SUPERLU_MAX( r[irow], c_abs1(&Aval[i]) ); } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (i = 0; i < A->nrow; ++i) { rcmax = SUPERLU_MAX(rcmax, r[i]); rcmin = SUPERLU_MIN(rcmin, r[i]); } *amax = rcmax; if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (i = 0; i < A->nrow; ++i) if (r[i] == 0.) { *info = i + 1; return; } } else { /* Invert the scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum ); /* Compute ROWCND = min(R(I)) / max(R(I)) */ *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } /* Compute column scale factors */ for (j = 0; j < A->ncol; ++j) c[j] = 0.; /* Find the maximum element in each column, assuming the row scalings computed above. */ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; c[j] = SUPERLU_MAX( c[j], c_abs1(&Aval[i]) * r[irow] ); } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (j = 0; j < A->ncol; ++j) { rcmax = SUPERLU_MAX(rcmax, c[j]); rcmin = SUPERLU_MIN(rcmin, c[j]); } if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (j = 0; j < A->ncol; ++j) if ( c[j] == 0. ) { *info = A->nrow + j + 1; return; } } else { /* Invert the scale factors. */ for (j = 0; j < A->ncol; ++j) c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum); /* Compute COLCND = min(C(J)) / max(C(J)) */ *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } return; } /* cgsequ */