Exemple #1
0
/*! @file ilu_ccopy_to_ucol.c
 * \brief Copy a computed column of U to the compressed data structure
 * and drop some small entries
 *
 * <pre>
 * -- SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Laboratory
 * November, 2010
 * </pre>
 */

#include "slu_cdefs.h"

#ifdef DEBUG
int num_drop_U;
#endif

extern void ccopy_(int *, complex [], int *, complex [], int *);
#if SCIPY_FIX
extern double dlamch_(char *);
#endif

#if 0
static complex *A;  /* used in _compare_ only */
static int _compare_(const void *a, const void *b)
{
    register int *x = (int *)a, *y = (int *)b;
    register float xx = c_abs1(&A[*x]), yy = c_abs1(&A[*y]);
    if (xx > yy) return -1;
    else if (xx < yy) return 1;
    else return 0;
}
Exemple #2
0
void
cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
       int *perm_r, int *perm_c, equed_t equed, float *R, float *C,
       SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr,
       Gstat_t *Gstat, int *info)
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 *
 * Purpose
 * =======   
 *
 * cgsrfs improves the computed solution to a system of linear
 * equations and provides error bounds and backward error estimates for
 * the solution.
 *
 * See supermatrix.h for the definition of 'SuperMatrix' structure.
 *
 * Arguments
 * =========
 *
 * trans   (input) trans_t
 *         Specifies the form of the system of equations:
 *         = NOTRANS:  A * X = B     (No transpose)
 *         = TRANS:    A**T * X = B  (Transpose)
 *         = CONJ:     A**H * X = B  (Conjugate transpose = Transpose)
 *
 * A       (input) SuperMatrix*
 *         The original matrix A in the system, or the scaled A if
 *         equilibration was done. The type of A can be:
 *         Stype = NC, Dtype = _D, Mtype = GE.
 *
 * L       (input) SuperMatrix*
 *         The factor L from the factorization Pr*A*Pc=L*U. Use
 *         compressed row subscripts storage for supernodes,
 *         i.e., L has types: Stype = SCP, Dtype = _D, Mtype = TRLU.
 *
 * U       (input) SuperMatrix*
 *         The factor U from the factorization Pr*A*Pc=L*U as computed by
 *         dgstrf(). Use column-wise storage scheme,
 *         i.e., U has types: Stype = NCP, Dtype = _D, Mtype = TRU.
 *
 * perm_r  (input) int*, dimension (A->nrow)
 *         Row permutation vector, which defines the permutation matrix Pr;
 *         perm_r[i] = j means row i of A is in position j in Pr*A.
 *
 * perm_c  (input) int*, dimension (A->ncol)
 *         Column permutation vector, which defines the
 *         permutation matrix Pc; perm_c[i] = j means column i of A is 
 *         in position j in A*Pc.
 *
 * equed   (input) equed_t
 *         Specifies the form of equilibration that was done.
 *         = NOEQUIL: No equilibration.
 *         = ROW:  Row equilibration, i.e., A was premultiplied by diag(R).
 *         = COL:  Column equilibration, i.e., A was postmultiplied by
 *                 diag(C).
 *         = BOTH: Both row and column equilibration, i.e., A was replaced
 *                 by diag(R)*A*diag(C).
 *
 * R       (input) double*, dimension (A->nrow)
 *         The row scale factors for A.
 *         If equed = ROW or BOTH, A is premultiplied by diag(R).
 *         If equed = NOEQUIL or COL, R is not accessed.
 *
 * C       (input) double*, dimension (A->ncol)
 *         The column scale factors for A.
 *         If equed = COL or BOTH, A is postmultiplied by diag(C).
 *         If equed = NOEQUIL or ROW, C is not accessed.
 *
 * B       (input) SuperMatrix*
 *         B has types: Stype = DN, Dtype = _D, Mtype = GE.
 *         The right hand side matrix B.
 *
 * X       (input/output) SuperMatrix*
 *         X has types: Stype = DN, Dtype = _D, Mtype = GE.
 *         On entry, the solution matrix X, as computed by dgstrs().
 *         On exit, the improved solution matrix X.
 *
 * FERR    (output) double*, dimension (B->ncol)
 *         The estimated forward error bound for each solution vector
 *         X(j) (the j-th column of the solution matrix X).
 *         If XTRUE is the true solution corresponding to X(j), FERR(j)
 *         is an estimated upper bound for the magnitude of the largest
 *         element in (X(j) - XTRUE) divided by the magnitude of the
 *         largest element in X(j).  The estimate is as reliable as
 *         the estimate for RCOND, and is almost always a slight
 *         overestimate of the true error.
 *
 * BERR    (output) double*, dimension (B->ncol)
 *         The componentwise relative backward error of each solution
 *         vector X(j) (i.e., the smallest relative change in
 *         any element of A or B that makes X(j) an exact solution).
 *
 * info    (output) int*
 *         = 0:  successful exit
 *         < 0:  if INFO = -i, the i-th argument had an illegal value
 *
 * Internal Parameters
 * ===================
 *
 * ITMAX is the maximum number of steps of iterative refinement.
 *
 */

#define ITMAX 5
    
    /* Table of constant values */
    int    ione = 1;
    complex ndone = {-1., 0.};
    complex done = {1., 0.};
    
    /* Local variables */
    NCformat *Astore;
    complex   *Aval;
    SuperMatrix Bjcol;
    DNformat *Bstore, *Xstore, *Bjcol_store;
    complex   *Bmat, *Xmat, *Bptr, *Xptr;
    int      kase;
    float   safe1, safe2;
    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
    int      ldb, ldx, nrhs;
    float   s, xk, lstres, eps, safmin;
    char     transc[1];
    trans_t  transt;
    complex   *work;
    float   *rwork;
    int      *iwork;
    extern double slamch_(char *);
    extern int clacon_(int *, complex *, complex *, float *, int *);
#ifdef _CRAY
    extern int CCOPY(int *, complex *, int *, complex *, int *);
    extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *);
#else
    extern int ccopy_(int *, complex *, int *, complex *, int *);
    extern int caxpy_(int *, complex *, complex *, int *, complex *, int *);
#endif

    Astore = A->Store;
    Aval   = Astore->nzval;
    Bstore = B->Store;
    Xstore = X->Store;
    Bmat   = Bstore->nzval;
    Xmat   = Xstore->nzval;
    ldb    = Bstore->lda;
    ldx    = Xstore->lda;
    nrhs   = B->ncol;
    
    /* Test the input parameters */
    *info = 0;
    notran = (trans == NOTRANS);
    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
	      A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE )
	*info = -2;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
 	      L->Stype != SLU_SCP || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
	*info = -3;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
 	      U->Stype != SLU_NCP || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
	*info = -4;
    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
 	      B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
        *info = -10;
    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
 	      X->Stype != SLU_DN || X->Dtype != SLU_C || X->Mtype != SLU_GE )
	*info = -11;
    if (*info != 0) {
	i = -(*info);
	xerbla_("cgsrfs", &i);
	return;
    }

    /* Quick return if possible */
    if ( A->nrow == 0 || nrhs == 0) {
	for (j = 0; j < nrhs; ++j) {
	    ferr[j] = 0.;
	    berr[j] = 0.;
	}
	return;
    }

    rowequ = (equed == ROW) || (equed == BOTH);
    colequ = (equed == COL) || (equed == BOTH);
    
    /* Allocate working space */
    work = complexMalloc(2*A->nrow);
    rwork = (float *) SUPERLU_MALLOC( (size_t) A->nrow * sizeof(float) );
    iwork = intMalloc(A->nrow);
    if ( !work || !rwork || !iwork ) 
        SUPERLU_ABORT("Malloc fails for work/rwork/iwork.");
    
    if ( notran ) {
	*(unsigned char *)transc = 'N';
        transt = TRANS;
    } else {
	*(unsigned char *)transc = 'T';
	transt = NOTRANS;
    }

    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz     = A->ncol + 1;
    eps    = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    /* Set SAFE1 essentially to be the underflow threshold times the
       number of additions in each row. */
    safe1  = nz * safmin;
    safe2  = safe1 / eps;

    /* Compute the number of nonzeros in each row (or column) of A */
    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
    if ( notran ) {
	for (k = 0; k < A->ncol; ++k)
	    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
		++iwork[Astore->rowind[i]];
    } else {
	for (k = 0; k < A->ncol; ++k)
	    iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
    }	

    /* Copy one column of RHS B into Bjcol. */
    Bjcol.Stype = B->Stype;
    Bjcol.Dtype = B->Dtype;
    Bjcol.Mtype = B->Mtype;
    Bjcol.nrow  = B->nrow;
    Bjcol.ncol  = 1;
    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
    if ( !Bjcol.Store ) SUPERLU_ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
    Bjcol_store = Bjcol.Store;
    Bjcol_store->lda = ldb;
    Bjcol_store->nzval = work; /* address aliasing */
	
    /* Do for each right hand side ... */
    for (j = 0; j < nrhs; ++j) {
	count = 0;
	lstres = 3.;
	Bptr = &Bmat[j*ldb];
	Xptr = &Xmat[j*ldx];

	while (1) { /* Loop until stopping criterion is satisfied. */

	    /* Compute residual R = B - op(A) * X,   
	       where op(A) = A, A**T, or A**H, depending on TRANS. */
	    
#ifdef _CRAY
	    CCOPY(&A->nrow, Bptr, &ione, work, &ione);
#else
	    ccopy_(&A->nrow, Bptr, &ione, work, &ione);
#endif
	    sp_cgemv(transc, ndone, A, Xptr, ione, done, work, ione);

	    /* Compute componentwise relative backward error from formula 
	       max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
	       where abs(Z) is the componentwise absolute value of the matrix
	       or vector Z.  If the i-th component of the denominator is less
	       than SAFE2, then SAFE1 is added to the i-th component of the   
	       numerator before dividing. */

	    for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
	    
	    /* Compute abs(op(A))*abs(X) + abs(B). */
	    if (notran) {
		for (k = 0; k < A->ncol; ++k) {
		    xk = c_abs1( &Xptr[k] );
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
			rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
		}
	    } else {
		for (k = 0; k < A->ncol; ++k) {
		    s = 0.;
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
			irow = Astore->rowind[i];
			s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]);
		    }
		    rwork[k] += s;
		}
	    }
	    s = 0.;
	    for (i = 0; i < A->nrow; ++i) {
		if (rwork[i] > safe2) {
		    s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] );
		} else if ( rwork[i] != 0.0 ) {
		    s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] );
                }
                /* If rwork[i] is exactly 0.0, then we know the true 
                   residual also must be exactly 0.0. */
	    }
	    berr[j] = s;

	    /* Test stopping criterion. Continue iterating if   
	       1) The residual BERR(J) is larger than machine epsilon, and   
	       2) BERR(J) decreased by at least a factor of 2 during the   
	          last iteration, and   
	       3) At most ITMAX iterations tried. */

	    if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
		/* Update solution and try again. */
		cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, Gstat, info);
		
#ifdef _CRAY
		CAXPY(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#else
		caxpy_(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#endif
		lstres = berr[j];
		++count;
	    } else {
		break;
	    }
        
	} /* end while */

	/* Bound error from formula:
	   norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
	   ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(op(A)) is the inverse of op(A)   
            abs(Z) is the componentwise absolute value of the matrix or
	       vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
          is incremented by SAFE1 if the i-th component of   
          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   

          Use CLACON to estimate the infinity-norm of the matrix   
             inv(op(A)) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
	
	for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
	
	/* Compute abs(op(A))*abs(X) + abs(B). */
	if ( notran ) {
	    for (k = 0; k < A->ncol; ++k) {
		xk = c_abs1( &Xptr[k] );
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
		    rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
	    }
	} else {
	    for (k = 0; k < A->ncol; ++k) {
		s = 0.;
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
		    irow = Astore->rowind[i];
		    xk = c_abs1( &Xptr[irow] );
		    s += c_abs1(&Aval[i]) * xk;
		}
		rwork[k] += s;
	    }
	}
	
	for (i = 0; i < A->nrow; ++i)
	    if (rwork[i] > safe2)
		rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
	    else
		rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
	kase = 0;

	do {
	    clacon_(&A->nrow, &work[A->nrow], work,
		    &ferr[j], &kase);
	    if (kase == 0) break;

	    if (kase == 1) {
		/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], C[i]);
	            }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->nrow; ++i) {
		        cs_mult(&work[i], &work[i], R[i]);
                    }

		cgstrs (transt, L, U, perm_r, perm_c, &Bjcol, Gstat, info);
		
		for (i = 0; i < A->nrow; ++i) {
		    cs_mult(&work[i], &work[i], rwork[i]);
	 	}
	    } else {
		/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
		for (i = 0; i < A->nrow; ++i) {
		    cs_mult(&work[i], &work[i], rwork[i]);
		}
		
		cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, Gstat, info);
		
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], C[i]);
		    }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], R[i]);  
		    }
	    }
	    
	} while ( kase != 0 );

	/* Normalize error. */
	lstres = 0.;
 	if ( notran && colequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) );
  	} else if ( !notran && rowequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) );
	} else {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) );
	}
	if ( lstres != 0. )
	    ferr[j] /= lstres;

    } /* for each RHS j ... */
    
    SUPERLU_FREE(work);
    SUPERLU_FREE(rwork);
    SUPERLU_FREE(iwork);
    SUPERLU_FREE(Bjcol.Store);

    return;

} /* cgsrfs */
Exemple #3
0
int
cpivotL(
        const int  jcol,     /* in */
        const float u,      /* in - diagonal pivoting threshold */
        int        *usepr,   /* re-use the pivot sequence given by perm_r/iperm_r */
        int        *perm_r,  /* may be modified */
        int        *iperm_r, /* in - inverse of perm_r */
        int        *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
        int        *pivrow,  /* out */
        GlobalLU_t *Glu,     /* modified - global LU data structures */
	SuperLUStat_t *stat  /* output */
       )
{

    complex one = {1.0, 0.0};
    int          fsupc;	    /* first column in the supernode */
    int          nsupc;	    /* no of columns in the supernode */
    int          nsupr;     /* no of rows in the supernode */
    int          lptr;	    /* points to the starting subscript of the supernode */
    int          pivptr, old_pivptr, diag, diagind;
    float       pivmax, rtemp, thresh;
    complex       temp;
    complex       *lu_sup_ptr; 
    complex       *lu_col_ptr;
    int          *lsub_ptr;
    int          isub, icol, k, itemp;
    int          *lsub, *xlsub;
    complex       *lusup;
    int          *xlusup;
    flops_t      *ops = stat->ops;

    /* Initialize pointers */
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    lusup      = Glu->lusup;
    xlusup     = Glu->xlusup;
    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
    nsupc      = jcol - fsupc;	        /* excluding jcol; nsupc >= 0 */
    lptr       = xlsub[fsupc];
    nsupr      = xlsub[fsupc+1] - lptr;
    lu_sup_ptr = &lusup[xlusup[fsupc]];	/* start of the current supernode */
    lu_col_ptr = &lusup[xlusup[jcol]];	/* start of jcol in the supernode */
    lsub_ptr   = &lsub[lptr];	/* start of row indices of the supernode */

#ifdef DEBUG
if ( jcol == MIN_COL ) {
    printf(_T("Before cdiv: col %d\n"), jcol);
    for (k = nsupc; k < nsupr; k++) 
	printf(_T("  lu[%d] %f\n"), lsub_ptr[k], lu_col_ptr[k]);
}
#endif
    
    /* Determine the largest abs numerical value for partial pivoting;
       Also search for user-specified pivot, and diagonal element. */
    if ( *usepr ) *pivrow = iperm_r[jcol];
    diagind = iperm_c[jcol];
    pivmax = 0.0;
    pivptr = nsupc;
    diag = EMPTY;
    old_pivptr = nsupc;
    for (isub = nsupc; isub < nsupr; ++isub) {
        rtemp = c_abs1 (&lu_col_ptr[isub]);
	if ( rtemp > pivmax ) {
	    pivmax = rtemp;
	    pivptr = isub;
	}
	if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
	if ( lsub_ptr[isub] == diagind ) diag = isub;
    }

    /* Test for singularity */
    if ( pivmax == 0.0 ) {
	*pivrow = lsub_ptr[pivptr];
	perm_r[*pivrow] = jcol;
	*usepr = 0;
	return (jcol+1);
    }

    thresh = u * pivmax;
    
    /* Choose appropriate pivotal element by our policy. */
    if ( *usepr ) {
        rtemp = c_abs1 (&lu_col_ptr[old_pivptr]);
	if ( rtemp != 0.0 && rtemp >= thresh )
	    pivptr = old_pivptr;
	else
	    *usepr = 0;
    }
    if ( *usepr == 0 ) {
	/* Use diagonal pivot? */
	if ( diag >= 0 ) { /* diagonal exists */
            rtemp = c_abs1 (&lu_col_ptr[diag]);
	    if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
        }
	*pivrow = lsub_ptr[pivptr];
    }
    
    /* Record pivot row */
    perm_r[*pivrow] = jcol;
    
    /* Interchange row subscripts */
    if ( pivptr != nsupc ) {
	itemp = lsub_ptr[pivptr];
	lsub_ptr[pivptr] = lsub_ptr[nsupc];
	lsub_ptr[nsupc] = itemp;

	/* Interchange numerical values as well, for the whole snode, such 
	 * that L is indexed the same way as A.
 	 */
	for (icol = 0; icol <= nsupc; icol++) {
	    itemp = pivptr + icol * nsupr;
	    temp = lu_sup_ptr[itemp];
	    lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
	    lu_sup_ptr[nsupc + icol*nsupr] = temp;
	}
    } /* if */

    /* cdiv operation */
    ops[FACT] += 10 * (nsupr - nsupc);

    c_div(&temp, &one, &lu_col_ptr[nsupc]);
    for (k = nsupc+1; k < nsupr; k++) 
	cc_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp);

    return 0;
}
Exemple #4
0
void
cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
        int *etree, void *work, int lwork, int *perm_c, int *perm_r,
        SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info)
{
    /* Local working arrays */
    NCPformat *Astore;
    int       *iperm_r = NULL; /* inverse of perm_r; used when
                                  options->Fact == SamePattern_SameRowPerm */
    int       *iperm_c; /* inverse of perm_c */
    int       *swap, *iswap; /* swap is used to store the row permutation
                                during the factorization. Initially, it is set
                                to iperm_c (row indeces of Pc*A*Pc').
                                iswap is the inverse of swap. After the
                                factorization, it is equal to perm_r. */
    int       *iwork;
    complex   *cwork;
    int       *segrep, *repfnz, *parent, *xplore;
    int       *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
    int       *marker, *marker_relax;
    complex    *dense, *tempv;
    float *stempv;
    int       *relax_end, *relax_fsupc;
    complex    *a;
    int       *asub;
    int       *xa_begin, *xa_end;
    int       *xsup, *supno;
    int       *xlsub, *xlusup, *xusub;
    int       nzlumax;
    float    *amax;
    complex    drop_sum;
    float alpha, omega;  /* used in MILU, mimicing DRIC */
    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
    float    *swork2;      /* used by the second dropping rule */

    /* Local scalars */
    fact_t    fact = options->Fact;
    double    diag_pivot_thresh = options->DiagPivotThresh;
    double    drop_tol = options->ILU_DropTol; /* tau */
    double    fill_ini = options->ILU_FillTol; /* tau^hat */
    double    gamma = options->ILU_FillFactor;
    int       drop_rule = options->ILU_DropRule;
    milu_t    milu = options->ILU_MILU;
    double    fill_tol;
    int       pivrow;   /* pivotal row number in the original matrix A */
    int       nseg1;    /* no of segments in U-column above panel row jcol */
    int       nseg;     /* no of segments in each U-column */
    register int jcol;
    register int kcol;  /* end column of a relaxed snode */
    register int icol;
    register int i, k, jj, new_next, iinfo;
    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
    int       w_def;    /* upper bound on panel width */
    int       usepr, iperm_r_allocated = 0;
    int       nnzL, nnzU;
    int       *panel_histo = stat->panel_histo;
    flops_t   *ops = stat->ops;

    int       last_drop;/* the last column which the dropping rules applied */
    int       quota;
    int       nnzAj;    /* number of nonzeros in A(:,1:j) */
    int       nnzLj, nnzUj;
    double    tol_L = drop_tol, tol_U = drop_tol;
    complex zero = {0.0, 0.0};
    float one = 1.0;

    /* Executable */
    iinfo    = 0;
    m        = A->nrow;
    n        = A->ncol;
    min_mn   = SUPERLU_MIN(m, n);
    Astore   = A->Store;
    a        = Astore->nzval;
    asub     = Astore->rowind;
    xa_begin = Astore->colbeg;
    xa_end   = Astore->colend;

    /* Allocate storage common to the factor routines */
    *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
                       gamma, L, U, &Glu, &iwork, &cwork);
    if ( *info ) return;

    xsup    = Glu.xsup;
    supno   = Glu.supno;
    xlsub   = Glu.xlsub;
    xlusup  = Glu.xlusup;
    xusub   = Glu.xusub;

    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
             &repfnz, &panel_lsub, &marker_relax, &marker);
    cSetRWork(m, panel_size, cwork, &dense, &tempv);

    usepr = (fact == SamePattern_SameRowPerm);
    if ( usepr ) {
        /* Compute the inverse of perm_r */
        iperm_r = (int *) intMalloc(m);
        for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
        iperm_r_allocated = 1;
    }

    iperm_c = (int *) intMalloc(n);
    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
    swap = (int *)intMalloc(n);
    for (k = 0; k < n; k++) swap[k] = iperm_c[k];
    iswap = (int *)intMalloc(n);
    for (k = 0; k < n; k++) iswap[k] = perm_c[k];
    amax = (float *) floatMalloc(panel_size);
    if (drop_rule & DROP_SECONDARY)
        swork2 = (float *)floatMalloc(n);
    else
        swork2 = NULL;

    nnzAj = 0;
    nnzLj = 0;
    nnzUj = 0;
    last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
    alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim);

    /* Identify relaxed snodes */
    relax_end = (int *) intMalloc(n);
    relax_fsupc = (int *) intMalloc(n);
    if ( options->SymmetricMode == YES )
        ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
    else
        ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);

    ifill (perm_r, m, EMPTY);
    ifill (marker, m * NO_MARKER, EMPTY);
    supno[0] = -1;
    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
    w_def    = panel_size;

    /* Mark the rows used by relaxed supernodes */
    ifill (marker_relax, m, EMPTY);
    i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
                 asub, marker_relax);
#if ( PRNTlevel >= 1)
    printf("%d relaxed supernodes.\n", i);
#endif

    /*
     * Work on one "panel" at a time. A panel is one of the following:
     *     (a) a relaxed supernode at the bottom of the etree, or
     *     (b) panel_size contiguous columns, defined by the user
     */
    for (jcol = 0; jcol < min_mn; ) {

        if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
            kcol = relax_end[jcol];       /* end of the relaxed snode */
            panel_histo[kcol-jcol+1]++;

            /* Drop small rows in the previous supernode. */
            if (jcol > 0 && jcol < last_drop) {
                int first = xsup[supno[jcol - 1]];
                int last = jcol - 1;
                int quota;

                /* Compute the quota */
                if (drop_rule & DROP_PROWS)
                    quota = gamma * Astore->nnz / m * (m - first) / m
                            * (last - first + 1);
                else if (drop_rule & DROP_COLUMN) {
                    int i;
                    quota = 0;
                    for (i = first; i <= last; i++)
                        quota += xa_end[i] - xa_begin[i];
                    quota = gamma * quota * (m - first) / m;
                } else if (drop_rule & DROP_AREA)
                    quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
                            - nnzLj;
                else
                    quota = m * n;
                fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);

                /* Drop small rows */
                stempv = (float *) tempv;
                i = ilu_cdrop_row(options, first, last, tol_L, quota, &nnzLj,
                                  &fill_tol, &Glu, stempv, swork2, 0);
                /* Reset the parameters */
                if (drop_rule & DROP_DYNAMIC) {
                    if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
                             < nnzLj)
                        tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
                    else
                        tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
                }
                if (fill_tol < 0) iinfo -= (int)fill_tol;
#ifdef DEBUG
                num_drop_L += i * (last - first + 1);
#endif
            }

            /* --------------------------------------
             * Factorize the relaxed supernode(jcol:kcol)
             * -------------------------------------- */
            /* Determine the union of the row structure of the snode */
            if ( (*info = ilu_csnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
                                         marker, &Glu)) != 0 )
                return;

            nextu    = xusub[jcol];
            nextlu   = xlusup[jcol];
            jsupno   = supno[jcol];
            fsupc    = xsup[jsupno];
            new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
            nzlumax = Glu.nzlumax;
            while ( new_next > nzlumax ) {
                if ((*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)))
                    return;
            }

            for (icol = jcol; icol <= kcol; icol++) {
                xusub[icol+1] = nextu;

                amax[0] = 0.0;
                /* Scatter into SPA dense[*] */
                for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
                    register float tmp = c_abs1 (&a[k]);
                    if (tmp > amax[0]) amax[0] = tmp;
                    dense[asub[k]] = a[k];
                }
                nnzAj += xa_end[icol] - xa_begin[icol];
                if (amax[0] == 0.0) {
                    amax[0] = fill_ini;
#if ( PRNTlevel >= 1)
                    printf("Column %d is entirely zero!\n", icol);
                    fflush(stdout);
#endif
                }

                /* Numeric update within the snode */
                csnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);

                if (usepr) pivrow = iperm_r[icol];
                fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
                if ( (*info = ilu_cpivotL(icol, diag_pivot_thresh, &usepr,
                                          perm_r, iperm_c[icol], swap, iswap,
                                          marker_relax, &pivrow,
                                          amax[0] * fill_tol, milu, zero,
                                          &Glu, stat)) ) {
                    iinfo++;
                    marker[pivrow] = kcol;
                }

            }

            jcol = kcol + 1;

        } else { /* Work on one panel of panel_size columns */

            /* Adjust panel_size so that a panel won't overlap with the next
             * relaxed snode.
             */
            panel_size = w_def;
            for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
                if ( relax_end[k] != EMPTY ) {
                    panel_size = k - jcol;
                    break;
                }
            if ( k == min_mn ) panel_size = min_mn - jcol;
            panel_histo[panel_size]++;

            /* symbolic factor on a panel of columns */
            ilu_cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
                          dense, amax, panel_lsub, segrep, repfnz,
                          marker, parent, xplore, &Glu);

            /* numeric sup-panel updates in topological order */
            cpanel_bmod(m, panel_size, jcol, nseg1, dense,
                        tempv, segrep, repfnz, &Glu, stat);

            /* Sparse LU within the panel, and below panel diagonal */
            for (jj = jcol; jj < jcol + panel_size; jj++) {

                k = (jj - jcol) * m; /* column index for w-wide arrays */

                nseg = nseg1;   /* Begin after all the panel segments */

                nnzAj += xa_end[jj] - xa_begin[jj];

                if ((*info = ilu_ccolumn_dfs(m, jj, perm_r, &nseg,
                                             &panel_lsub[k], segrep, &repfnz[k],
                                             marker, parent, xplore, &Glu)))
                    return;

                /* Numeric updates */
                if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k],
                                          tempv, &segrep[nseg1], &repfnz[k],
                                          jcol, &Glu, stat)) != 0) return;

                /* Make a fill-in position if the column is entirely zero */
                if (xlsub[jj + 1] == xlsub[jj]) {
                    register int i, row;
                    int nextl;
                    int nzlmax = Glu.nzlmax;
                    int *lsub = Glu.lsub;
                    int *marker2 = marker + 2 * m;

                    /* Allocate memory */
                    nextl = xlsub[jj] + 1;
                    if (nextl >= nzlmax) {
                        int error = cLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu);
                        if (error) { *info = error; return; }
                        lsub = Glu.lsub;
                    }
                    xlsub[jj + 1]++;
                    assert(xlusup[jj]==xlusup[jj+1]);
                    xlusup[jj + 1]++;
                    Glu.lusup[xlusup[jj]] = zero;

                    /* Choose a row index (pivrow) for fill-in */
                    for (i = jj; i < n; i++)
                        if (marker_relax[swap[i]] <= jj) break;
                    row = swap[i];
                    marker2[row] = jj;
                    lsub[xlsub[jj]] = row;
#ifdef DEBUG
                    printf("Fill col %d.\n", jj);
                    fflush(stdout);
#endif
                }

                /* Computer the quota */
                if (drop_rule & DROP_PROWS)
                    quota = gamma * Astore->nnz / m * jj / m;
                else if (drop_rule & DROP_COLUMN)
                    quota = gamma * (xa_end[jj] - xa_begin[jj]) *
                            (jj + 1) / m;
                else if (drop_rule & DROP_AREA)
                    quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
                else
                    quota = m;

                /* Copy the U-segments to ucol[*] and drop small entries */
                if ((*info = ilu_ccopy_to_ucol(jj, nseg, segrep, &repfnz[k],
                                               perm_r, &dense[k], drop_rule,
                                               milu, amax[jj - jcol] * tol_U,
                                               quota, &drop_sum, &nnzUj, &Glu,
                                               swork2)) != 0)
                    return;

                /* Reset the dropping threshold if required */
                if (drop_rule & DROP_DYNAMIC) {
                    if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
                        tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
                    else
                        tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
                }

                if (drop_sum.r != 0.0 && drop_sum.i != 0.0)
                {
                    omega = SUPERLU_MIN(2.0*(1.0-alpha)/c_abs1(&drop_sum), 1.0);
                    cs_mult(&drop_sum, &drop_sum, omega);
                }
                if (usepr) pivrow = iperm_r[jj];
                fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
                if ( (*info = ilu_cpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
                                          iperm_c[jj], swap, iswap,
                                          marker_relax, &pivrow,
                                          amax[jj - jcol] * fill_tol, milu,
                                          drop_sum, &Glu, stat)) ) {
                    iinfo++;
                    marker[m + pivrow] = jj;
                    marker[2 * m + pivrow] = jj;
                }

                /* Reset repfnz[] for this column */
                resetrep_col (nseg, segrep, &repfnz[k]);

                /* Start a new supernode, drop the previous one */
                if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
                    int first = xsup[supno[jj - 1]];
                    int last = jj - 1;
                    int quota;

                    /* Compute the quota */
                    if (drop_rule & DROP_PROWS)
                        quota = gamma * Astore->nnz / m * (m - first) / m
                                * (last - first + 1);
                    else if (drop_rule & DROP_COLUMN) {
                        int i;
                        quota = 0;
                        for (i = first; i <= last; i++)
                            quota += xa_end[i] - xa_begin[i];
                        quota = gamma * quota * (m - first) / m;
                    } else if (drop_rule & DROP_AREA)
                        quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
                                / m) - nnzLj;
                    else
                        quota = m * n;
                    fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
                            (double)min_mn);

                    /* Drop small rows */
                    stempv = (float *) tempv;
                    i = ilu_cdrop_row(options, first, last, tol_L, quota,
                                      &nnzLj, &fill_tol, &Glu, stempv, swork2,
                                      1);

                    /* Reset the parameters */
                    if (drop_rule & DROP_DYNAMIC) {
                        if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
                                < nnzLj)
                            tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
                        else
                            tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
                    }
                    if (fill_tol < 0) iinfo -= (int)fill_tol;
#ifdef DEBUG
                    num_drop_L += i * (last - first + 1);
#endif
                } /* if start a new supernode */

            } /* for */

            jcol += panel_size; /* Move to the next panel */

        } /* else */

    } /* for */

    *info = iinfo;

    if ( m > n ) {
        k = 0;
        for (i = 0; i < m; ++i)
            if ( perm_r[i] == EMPTY ) {
                perm_r[i] = n + k;
                ++k;
            }
    }

    ilu_countnz(min_mn, &nnzL, &nnzU, &Glu);
    fixupL(min_mn, perm_r, &Glu);

    cLUWorkFree(iwork, cwork, &Glu); /* Free work space and compress storage */

    if ( fact == SamePattern_SameRowPerm ) {
        /* L and U structures may have changed due to possibly different
           pivoting, even though the storage is available.
           There could also be memory expansions, so the array locations
           may have changed, */
        ((SCformat *)L->Store)->nnz = nnzL;
        ((SCformat *)L->Store)->nsuper = Glu.supno[n];
        ((SCformat *)L->Store)->nzval = Glu.lusup;
        ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
        ((SCformat *)L->Store)->rowind = Glu.lsub;
        ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
        ((NCformat *)U->Store)->nnz = nnzU;
        ((NCformat *)U->Store)->nzval = Glu.ucol;
        ((NCformat *)U->Store)->rowind = Glu.usub;
        ((NCformat *)U->Store)->colptr = Glu.xusub;
    } else {
        cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup,
                                 Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
                                 Glu.xsup, SLU_SC, SLU_C, SLU_TRLU);
        cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
                               Glu.usub, Glu.xusub, SLU_NC, SLU_C, SLU_TRU);
    }

    ops[FACT] += ops[TRSV] + ops[GEMV];
    stat->expansions = --(Glu.num_expansions);

    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
    SUPERLU_FREE (iperm_c);
    SUPERLU_FREE (relax_end);
    SUPERLU_FREE (swap);
    SUPERLU_FREE (iswap);
    SUPERLU_FREE (relax_fsupc);
    SUPERLU_FREE (amax);
    if ( swork2 ) SUPERLU_FREE (swork2);

}
Exemple #5
0
/*! \brief
 *
 * <pre>
 *   Purpose   
 *   =======   
 *
 *   CGSRFS improves the computed solution to a system of linear   
 *   equations and provides error bounds and backward error estimates for 
 *   the solution.   
 *
 *   If equilibration was performed, the system becomes:
 *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
 *
 *   See supermatrix.h for the definition of 'SuperMatrix' structure.
 *
 *   Arguments   
 *   =========   
 *
 * trans   (input) trans_t
 *          Specifies the form of the system of equations:
 *          = NOTRANS: A * X = B  (No transpose)
 *          = TRANS:   A'* X = B  (Transpose)
 *          = CONJ:    A**H * X = B  (Conjugate transpose)
 *   
 *   A       (input) SuperMatrix*
 *           The original matrix A in the system, or the scaled A if
 *           equilibration was done. The type of A can be:
 *           Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_GE.
 *    
 *   L       (input) SuperMatrix*
 *	     The factor L from the factorization Pr*A*Pc=L*U. Use
 *           compressed row subscripts storage for supernodes, 
 *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
 * 
 *   U       (input) SuperMatrix*
 *           The factor U from the factorization Pr*A*Pc=L*U as computed by
 *           cgstrf(). Use column-wise storage scheme, 
 *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
 *
 *   perm_c  (input) int*, dimension (A->ncol)
 *	     Column permutation vector, which defines the 
 *           permutation matrix Pc; perm_c[i] = j means column i of A is 
 *           in position j in A*Pc.
 *
 *   perm_r  (input) int*, dimension (A->nrow)
 *           Row permutation vector, which defines the permutation matrix Pr;
 *           perm_r[i] = j means row i of A is in position j in Pr*A.
 *
 *   equed   (input) Specifies the form of equilibration that was done.
 *           = 'N': No equilibration.
 *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
 *           = 'C': Column equilibration, i.e., A was postmultiplied by
 *                  diag(C).
 *           = 'B': Both row and column equilibration, i.e., A was replaced 
 *                  by diag(R)*A*diag(C).
 *
 *   R       (input) float*, dimension (A->nrow)
 *           The row scale factors for A.
 *           If equed = 'R' or 'B', A is premultiplied by diag(R).
 *           If equed = 'N' or 'C', R is not accessed.
 * 
 *   C       (input) float*, dimension (A->ncol)
 *           The column scale factors for A.
 *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
 *           If equed = 'N' or 'R', C is not accessed.
 *
 *   B       (input) SuperMatrix*
 *           B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
 *           The right hand side matrix B.
 *           if equed = 'R' or 'B', B is premultiplied by diag(R).
 *
 *   X       (input/output) SuperMatrix*
 *           X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
 *           On entry, the solution matrix X, as computed by cgstrs().
 *           On exit, the improved solution matrix X.
 *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
 *               in order to obtain the solution to the original system.
 *
 *   FERR    (output) float*, dimension (B->ncol)   
 *           The estimated forward error bound for each solution vector   
 *           X(j) (the j-th column of the solution matrix X).   
 *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
 *           is an estimated upper bound for the magnitude of the largest 
 *           element in (X(j) - XTRUE) divided by the magnitude of the   
 *           largest element in X(j).  The estimate is as reliable as   
 *           the estimate for RCOND, and is almost always a slight   
 *           overestimate of the true error.
 *
 *   BERR    (output) float*, dimension (B->ncol)   
 *           The componentwise relative backward error of each solution   
 *           vector X(j) (i.e., the smallest relative change in   
 *           any element of A or B that makes X(j) an exact solution).
 *
 *   stat     (output) SuperLUStat_t*
 *            Record the statistics on runtime and floating-point operation count.
 *            See util.h for the definition of 'SuperLUStat_t'.
 *
 *   info    (output) int*   
 *           = 0:  successful exit   
 *            < 0:  if INFO = -i, the i-th argument had an illegal value   
 *
 *    Internal Parameters   
 *    ===================   
 *
 *    ITMAX is the maximum number of steps of iterative refinement.   
 *
 * </pre>
 */
void
cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
       int *perm_c, int *perm_r, char *equed, float *R, float *C,
       SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr,
       SuperLUStat_t *stat, int *info)
{


#define ITMAX 5
    
    /* Table of constant values */
    int    ione = 1;
    complex ndone = {-1., 0.};
    complex done = {1., 0.};
    
    /* Local variables */
    NCformat *Astore;
    complex   *Aval;
    SuperMatrix Bjcol;
    DNformat *Bstore, *Xstore, *Bjcol_store;
    complex   *Bmat, *Xmat, *Bptr, *Xptr;
    int      kase;
    float   safe1, safe2;
    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
    int      ldb, ldx, nrhs;
    float   s, xk, lstres, eps, safmin;
    char     transc[1];
    trans_t  transt;
    complex   *work;
    float   *rwork;
    int      *iwork;
    int      isave[3];

    extern int clacon2_(int *, complex *, complex *, float *, int *, int []);
#ifdef _CRAY
    extern int CCOPY(int *, complex *, int *, complex *, int *);
    extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *);
#else
    extern int ccopy_(int *, complex *, int *, complex *, int *);
    extern int caxpy_(int *, complex *, complex *, int *, complex *, int *);
#endif

    Astore = A->Store;
    Aval   = Astore->nzval;
    Bstore = B->Store;
    Xstore = X->Store;
    Bmat   = Bstore->nzval;
    Xmat   = Xstore->nzval;
    ldb    = Bstore->lda;
    ldx    = Xstore->lda;
    nrhs   = B->ncol;
    
    /* Test the input parameters */
    *info = 0;
    notran = (trans == NOTRANS);
    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
	      A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE )
	*info = -2;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
 	      L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
	*info = -3;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
 	      U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
	*info = -4;
    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
 	      B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
        *info = -10;
    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
 	      X->Stype != SLU_DN || X->Dtype != SLU_C || X->Mtype != SLU_GE )
	*info = -11;
    if (*info != 0) {
	i = -(*info);
	input_error("cgsrfs", &i);
	return;
    }

    /* Quick return if possible */
    if ( A->nrow == 0 || nrhs == 0) {
	for (j = 0; j < nrhs; ++j) {
	    ferr[j] = 0.;
	    berr[j] = 0.;
	}
	return;
    }

    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
    colequ = lsame_(equed, "C") || lsame_(equed, "B");
    
    /* Allocate working space */
    work = complexMalloc(2*A->nrow);
    rwork = (float *) SUPERLU_MALLOC( A->nrow * sizeof(float) );
    iwork = intMalloc(A->nrow);
    if ( !work || !rwork || !iwork ) 
        ABORT("Malloc fails for work/rwork/iwork.");
    
    if ( notran ) {
	*(unsigned char *)transc = 'N';
        transt = TRANS;
    } else {
	*(unsigned char *)transc = 'T';
	transt = NOTRANS;
    }

    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz     = A->ncol + 1;
    eps    = smach("Epsilon");
    safmin = smach("Safe minimum");

    /* Set SAFE1 essentially to be the underflow threshold times the
       number of additions in each row. */
    safe1  = nz * safmin;
    safe2  = safe1 / eps;

    /* Compute the number of nonzeros in each row (or column) of A */
    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
    if ( notran ) {
	for (k = 0; k < A->ncol; ++k)
	    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
		++iwork[Astore->rowind[i]];
    } else {
	for (k = 0; k < A->ncol; ++k)
	    iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
    }	

    /* Copy one column of RHS B into Bjcol. */
    Bjcol.Stype = B->Stype;
    Bjcol.Dtype = B->Dtype;
    Bjcol.Mtype = B->Mtype;
    Bjcol.nrow  = B->nrow;
    Bjcol.ncol  = 1;
    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
    Bjcol_store = Bjcol.Store;
    Bjcol_store->lda = ldb;
    Bjcol_store->nzval = work; /* address aliasing */
	
    /* Do for each right hand side ... */
    for (j = 0; j < nrhs; ++j) {
	count = 0;
	lstres = 3.;
	Bptr = &Bmat[j*ldb];
	Xptr = &Xmat[j*ldx];

	while (1) { /* Loop until stopping criterion is satisfied. */

	    /* Compute residual R = B - op(A) * X,   
	       where op(A) = A, A**T, or A**H, depending on TRANS. */
	    
#ifdef _CRAY
	    CCOPY(&A->nrow, Bptr, &ione, work, &ione);
#else
	    ccopy_(&A->nrow, Bptr, &ione, work, &ione);
#endif
	    sp_cgemv(transc, ndone, A, Xptr, ione, done, work, ione);

	    /* Compute componentwise relative backward error from formula 
	       max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
	       where abs(Z) is the componentwise absolute value of the matrix
	       or vector Z.  If the i-th component of the denominator is less
	       than SAFE2, then SAFE1 is added to the i-th component of the   
	       numerator before dividing. */

	    for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
	    
	    /* Compute abs(op(A))*abs(X) + abs(B). */
	    if (notran) {
		for (k = 0; k < A->ncol; ++k) {
		    xk = c_abs1( &Xptr[k] );
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
			rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
		}
	    } else {
		for (k = 0; k < A->ncol; ++k) {
		    s = 0.;
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
			irow = Astore->rowind[i];
			s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]);
		    }
		    rwork[k] += s;
		}
	    }
	    s = 0.;
	    for (i = 0; i < A->nrow; ++i) {
		if (rwork[i] > safe2) {
		    s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] );
                } else if ( rwork[i] != 0.0 ) {
		    s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] );
                }
                /* If rwork[i] is exactly 0.0, then we know the true 
                   residual also must be exactly 0.0. */
	    }
	    berr[j] = s;

	    /* Test stopping criterion. Continue iterating if   
	       1) The residual BERR(J) is larger than machine epsilon, and   
	       2) BERR(J) decreased by at least a factor of 2 during the   
	          last iteration, and   
	       3) At most ITMAX iterations tried. */

	    if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
		/* Update solution and try again. */
		cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
#ifdef _CRAY
		CAXPY(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#else
		caxpy_(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#endif
		lstres = berr[j];
		++count;
	    } else {
		break;
	    }
        
	} /* end while */

	stat->RefineSteps = count;

	/* Bound error from formula:
	   norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
	   ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(op(A)) is the inverse of op(A)   
            abs(Z) is the componentwise absolute value of the matrix or
	       vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
          is incremented by SAFE1 if the i-th component of   
          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   

          Use CLACON2 to estimate the infinity-norm of the matrix   
             inv(op(A)) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
	
	for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
	
	/* Compute abs(op(A))*abs(X) + abs(B). */
	if ( notran ) {
	    for (k = 0; k < A->ncol; ++k) {
		xk = c_abs1( &Xptr[k] );
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
		    rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
	    }
	} else {
	    for (k = 0; k < A->ncol; ++k) {
		s = 0.;
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
		    irow = Astore->rowind[i];
		    xk = c_abs1( &Xptr[irow] );
		    s += c_abs1(&Aval[i]) * xk;
		}
		rwork[k] += s;
	    }
	}
	
	for (i = 0; i < A->nrow; ++i)
	    if (rwork[i] > safe2)
		rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
	    else
		rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
	kase = 0;

	do {
	    clacon2_(&A->nrow, &work[A->nrow], work, &ferr[j], &kase, isave);
	    if (kase == 0) break;

	    if (kase == 1) {
		/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], C[i]);
	            }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->nrow; ++i) {
		        cs_mult(&work[i], &work[i], R[i]);
                    }

		cgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
		for (i = 0; i < A->nrow; ++i) {
		    cs_mult(&work[i], &work[i], rwork[i]);
	 	}
	    } else {
		/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
		for (i = 0; i < A->nrow; ++i) {
		    cs_mult(&work[i], &work[i], rwork[i]);
		}
		
		cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], C[i]);
		    }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], R[i]);  
		    }
	    }
	    
	} while ( kase != 0 );

	/* Normalize error. */
	lstres = 0.;
 	if ( notran && colequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) );
  	} else if ( !notran && rowequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) );
	} else {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) );
	}
	if ( lstres != 0. )
	    ferr[j] /= lstres;

    } /* for each RHS j ... */
    
    SUPERLU_FREE(work);
    SUPERLU_FREE(rwork);
    SUPERLU_FREE(iwork);
    SUPERLU_FREE(Bjcol.Store);

    return;

} /* cgsrfs */
Exemple #6
0
/*! \brief
 * <pre>
 * Purpose
 * =======
 *    ilu_cdrop_row() - Drop some small rows from the previous 
 *    supernode (L-part only).
 * </pre>
 */
int ilu_cdrop_row(
	superlu_options_t *options, /* options */
	int    first,	    /* index of the first column in the supernode */
	int    last,	    /* index of the last column in the supernode */
	double drop_tol,    /* dropping parameter */
	int    quota,	    /* maximum nonzero entries allowed */
	int    *nnzLj,	    /* in/out number of nonzeros in L(:, 1:last) */
	double *fill_tol,   /* in/out - on exit, fill_tol=-num_zero_pivots,
			     * does not change if options->ILU_MILU != SMILU1 */
	GlobalLU_t *Glu,    /* modified */
	float swork[],   /* working space
	                     * the length of swork[] should be no less than
			     * the number of rows in the supernode */
	float swork2[], /* working space with the same size as swork[],
			     * used only by the second dropping rule */
	int    lastc	    /* if lastc == 0, there is nothing after the
			     * working supernode [first:last];
			     * if lastc == 1, there is one more column after
			     * the working supernode. */ )
{
    register int i, j, k, m1;
    register int nzlc; /* number of nonzeros in column last+1 */
    register int xlusup_first, xlsub_first;
    int m, n; /* m x n is the size of the supernode */
    int r = 0; /* number of dropped rows */
    register float *temp;
    register complex *lusup = (complex *) Glu->lusup;
    register int *lsub = Glu->lsub;
    register int *xlsub = Glu->xlsub;
    register int *xlusup = Glu->xlusup;
    register float d_max = 0.0, d_min = 1.0;
    int    drop_rule = options->ILU_DropRule;
    milu_t milu = options->ILU_MILU;
    norm_t nrm = options->ILU_Norm;
    complex zero = {0.0, 0.0};
    complex one = {1.0, 0.0};
    complex none = {-1.0, 0.0};
    int i_1 = 1;
    int inc_diag; /* inc_diag = m + 1 */
    int nzp = 0;  /* number of zero pivots */
    float alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim);

    xlusup_first = xlusup[first];
    xlsub_first = xlsub[first];
    m = xlusup[first + 1] - xlusup_first;
    n = last - first + 1;
    m1 = m - 1;
    inc_diag = m + 1;
    nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0;
    temp = swork - n;

    /* Quick return if nothing to do. */
    if (m == 0 || m == n || drop_rule == NODROP)
    {
	*nnzLj += m * n;
	return 0;
    }

    /* basic dropping: ILU(tau) */
    for (i = n; i <= m1; )
    {
	/* the average abs value of ith row */
	switch (nrm)
	{
	    case ONE_NORM:
		temp[i] = scasum_(&n, &lusup[xlusup_first + i], &m) / (double)n;
		break;
	    case TWO_NORM:
		temp[i] = scnrm2_(&n, &lusup[xlusup_first + i], &m)
		    / sqrt((double)n);
		break;
	    case INF_NORM:
	    default:
		k = icamax_(&n, &lusup[xlusup_first + i], &m) - 1;
		temp[i] = c_abs1(&lusup[xlusup_first + i + m * k]);
		break;
	}

	/* drop small entries due to drop_tol */
	if (drop_rule & DROP_BASIC && temp[i] < drop_tol)
	{
	    r++;
	    /* drop the current row and move the last undropped row here */
	    if (r > 1) /* add to last row */
	    {
		/* accumulate the sum (for MILU) */
		switch (milu)
		{
		    case SMILU_1:
		    case SMILU_2:
			caxpy_(&n, &one, &lusup[xlusup_first + i], &m,
				&lusup[xlusup_first + m - 1], &m);
			break;
		    case SMILU_3:
			for (j = 0; j < n; j++)
			    lusup[xlusup_first + (m - 1) + j * m].r +=
				    c_abs1(&lusup[xlusup_first + i + j * m]);
			break;
		    case SILU:
		    default:
			break;
		}
		ccopy_(&n, &lusup[xlusup_first + m1], &m,
                       &lusup[xlusup_first + i], &m);
	    } /* if (r > 1) */
	    else /* move to last row */
	    {
		cswap_(&n, &lusup[xlusup_first + m1], &m,
			&lusup[xlusup_first + i], &m);
		if (milu == SMILU_3)
		    for (j = 0; j < n; j++) {
			lusup[xlusup_first + m1 + j * m].r =
				c_abs1(&lusup[xlusup_first + m1 + j * m]);
			lusup[xlusup_first + m1 + j * m].i = 0.0;
                    }
	    }
	    lsub[xlsub_first + i] = lsub[xlsub_first + m1];
	    m1--;
	    continue;
	} /* if dropping */
	else
	{
	    if (temp[i] > d_max) d_max = temp[i];
	    if (temp[i] < d_min) d_min = temp[i];
	}
	i++;
    } /* for */

    /* Secondary dropping: drop more rows according to the quota. */
    quota = ceil((double)quota / (double)n);
    if (drop_rule & DROP_SECONDARY && m - r > quota)
    {
	register double tol = d_max;

	/* Calculate the second dropping tolerance */
	if (quota > n)
	{
	    if (drop_rule & DROP_INTERP) /* by interpolation */
	    {
		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
		tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r));
	    }
	    else /* by quick select */
	    {
		int len = m1 - n + 1;
		scopy_(&len, swork, &i_1, swork2, &i_1);
		tol = sqselect(len, swork2, quota - n);
#if 0
		register int *itemp = iwork - n;
		A = temp;
		for (i = n; i <= m1; i++) itemp[i] = i;
		qsort(iwork, m1 - n + 1, sizeof(int), _compare_);
		tol = temp[itemp[quota]];
#endif
	    }
	}

	for (i = n; i <= m1; )
	{
	    if (temp[i] <= tol)
	    {
		register int j;
		r++;
		/* drop the current row and move the last undropped row here */
		if (r > 1) /* add to last row */
		{
		    /* accumulate the sum (for MILU) */
		    switch (milu)
		    {
			case SMILU_1:
			case SMILU_2:
			    caxpy_(&n, &one, &lusup[xlusup_first + i], &m,
				    &lusup[xlusup_first + m - 1], &m);
			    break;
			case SMILU_3:
			    for (j = 0; j < n; j++)
				lusup[xlusup_first + (m - 1) + j * m].r +=
   				  c_abs1(&lusup[xlusup_first + i + j * m]);
			    break;
			case SILU:
			default:
			    break;
		    }
		    ccopy_(&n, &lusup[xlusup_first + m1], &m,
			    &lusup[xlusup_first + i], &m);
		} /* if (r > 1) */
		else /* move to last row */
		{
		    cswap_(&n, &lusup[xlusup_first + m1], &m,
			    &lusup[xlusup_first + i], &m);
		    if (milu == SMILU_3)
			for (j = 0; j < n; j++) {
			    lusup[xlusup_first + m1 + j * m].r =
				    c_abs1(&lusup[xlusup_first + m1 + j * m]);
			    lusup[xlusup_first + m1 + j * m].i = 0.0;
                        }
		}
		lsub[xlsub_first + i] = lsub[xlsub_first + m1];
		m1--;
		temp[i] = temp[m1];

		continue;
	    }
	    i++;

	} /* for */

    } /* if secondary dropping */

    for (i = n; i < m; i++) temp[i] = 0.0;

    if (r == 0)
    {
	*nnzLj += m * n;
	return 0;
    }

    /* add dropped entries to the diagnal */
    if (milu != SILU)
    {
	register int j;
	complex t;
	float omega;
	for (j = 0; j < n; j++)
	{
	    t = lusup[xlusup_first + (m - 1) + j * m];
            if (t.r == 0.0 && t.i == 0.0) continue;
            omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / c_abs1(&t), 1.0);
	    cs_mult(&t, &t, omega);

 	    switch (milu)
	    {
		case SMILU_1:
		    if ( !(c_eq(&t, &none)) ) {
                        c_add(&t, &t, &one);
                        cc_mult(&lusup[xlusup_first + j * inc_diag],
			                  &lusup[xlusup_first + j * inc_diag],
                                          &t);
                    }
		    else
		    {
                        cs_mult(
                                &lusup[xlusup_first + j * inc_diag],
			        &lusup[xlusup_first + j * inc_diag],
                                *fill_tol);
#ifdef DEBUG
			printf("[1] ZERO PIVOT: FILL col %d.\n", first + j);
			fflush(stdout);
#endif
			nzp++;
		    }
		    break;
		case SMILU_2:
                    cs_mult(&lusup[xlusup_first + j * inc_diag],
                                          &lusup[xlusup_first + j * inc_diag],
                                          1.0 + c_abs1(&t));
		    break;
		case SMILU_3:
                    c_add(&t, &t, &one);
                    cc_mult(&lusup[xlusup_first + j * inc_diag],
	                              &lusup[xlusup_first + j * inc_diag],
                                      &t);
		    break;
		case SILU:
		default:
		    break;
	    }
	}
	if (nzp > 0) *fill_tol = -nzp;
    }

    /* Remove dropped entries from the memory and fix the pointers. */
    m1 = m - r;
    for (j = 1; j < n; j++)
    {
	register int tmp1, tmp2;
	tmp1 = xlusup_first + j * m1;
	tmp2 = xlusup_first + j * m;
	for (i = 0; i < m1; i++)
	    lusup[i + tmp1] = lusup[i + tmp2];
    }
    for (i = 0; i < nzlc; i++)
	lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m];
    for (i = 0; i < nzlc; i++)
	lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i];
    for (i = first + 1; i <= last + 1; i++)
    {
	xlusup[i] -= r * (i - first);
	xlsub[i] -= r;
    }
    if (lastc)
    {
	xlusup[last + 2] -= r * n;
	xlsub[last + 2] -= r;
    }

    *nnzLj += (m - r) * n;
    return r;
}
Exemple #7
0
int
ilu_cpivotL(
	const int  jcol,     /* in */
	const double u,      /* in - diagonal pivoting threshold */
	int	   *usepr,   /* re-use the pivot sequence given by
			      * perm_r/iperm_r */
	int	   *perm_r,  /* may be modified */
	int	   diagind,  /* diagonal of Pc*A*Pc' */
	int	   *swap,    /* in/out record the row permutation */
	int	   *iswap,   /* in/out inverse of swap, it is the same as
				perm_r after the factorization */
	int	   *marker,  /* in */
	int	   *pivrow,  /* in/out, as an input if *usepr!=0 */
	double	   fill_tol, /* in - fill tolerance of current column
			      * used for a singular column */
	milu_t	   milu,     /* in */
	complex	   drop_sum, /* in - computed in ilu_ccopy_to_ucol()
                                (MILU only) */
	GlobalLU_t *Glu,     /* modified - global LU data structures */
	SuperLUStat_t *stat  /* output */
       )
{

    int		 n;	 /* number of columns */
    int		 fsupc;  /* first column in the supernode */
    int		 nsupc;  /* no of columns in the supernode */
    int		 nsupr;  /* no of rows in the supernode */
    int		 lptr;	 /* points to the starting subscript of the supernode */
    register int	 pivptr;
    int		 old_pivptr, diag, ptr0;
    register float  pivmax, rtemp;
    float	 thresh;
    complex	 temp;
    complex	 *lu_sup_ptr;
    complex	 *lu_col_ptr;
    int		 *lsub_ptr;
    register int	 isub, icol, k, itemp;
    int		 *lsub, *xlsub;
    complex	 *lusup;
    int		 *xlusup;
    flops_t	 *ops = stat->ops;
    int		 info;
    complex one = {1.0, 0.0};

    /* Initialize pointers */
    n	       = Glu->n;
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    lusup      = Glu->lusup;
    xlusup     = Glu->xlusup;
    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
    nsupc      = jcol - fsupc;		/* excluding jcol; nsupc >= 0 */
    lptr       = xlsub[fsupc];
    nsupr      = xlsub[fsupc+1] - lptr;
    lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */
    lu_col_ptr = &lusup[xlusup[jcol]];	/* start of jcol in the supernode */
    lsub_ptr   = &lsub[lptr];	/* start of row indices of the supernode */

    /* Determine the largest abs numerical value for partial pivoting;
       Also search for user-specified pivot, and diagonal element. */
    pivmax = -1.0;
    pivptr = nsupc;
    diag = EMPTY;
    old_pivptr = nsupc;
    ptr0 = EMPTY;
    for (isub = nsupc; isub < nsupr; ++isub) {
        if (marker[lsub_ptr[isub]] > jcol)
            continue; /* do not overlap with a later relaxed supernode */

	switch (milu) {
	    case SMILU_1:
                c_add(&temp, &lu_col_ptr[isub], &drop_sum);
		rtemp = c_abs1(&temp);
		break;
	    case SMILU_2:
	    case SMILU_3:
                /* In this case, drop_sum contains the sum of the abs. value */
		rtemp = c_abs1(&lu_col_ptr[isub]);
		break;
	    case SILU:
	    default:
		rtemp = c_abs1(&lu_col_ptr[isub]);
		break;
	}
	if (rtemp > pivmax) { pivmax = rtemp; pivptr = isub; }
	if (*usepr && lsub_ptr[isub] == *pivrow) old_pivptr = isub;
	if (lsub_ptr[isub] == diagind) diag = isub;
	if (ptr0 == EMPTY) ptr0 = isub;
    }

    if (milu == SMILU_2 || milu == SMILU_3) pivmax += drop_sum.r;

    /* Test for singularity */
    if (pivmax < 0.0) {
	fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol);
	fflush(stderr);
	exit(1);
    }
    if ( pivmax == 0.0 ) {
	if (diag != EMPTY)
	    *pivrow = lsub_ptr[pivptr = diag];
	else if (ptr0 != EMPTY)
	    *pivrow = lsub_ptr[pivptr = ptr0];
	else {
	    /* look for the first row which does not
	       belong to any later supernodes */
	    for (icol = jcol; icol < n; icol++)
		if (marker[swap[icol]] <= jcol) break;
	    if (icol >= n) {
		fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol);
		fflush(stderr);
		exit(1);
	    }

	    *pivrow = swap[icol];

	    /* pick up the pivot row */
	    for (isub = nsupc; isub < nsupr; ++isub)
		if ( lsub_ptr[isub] == *pivrow ) { pivptr = isub; break; }
	}
	pivmax = fill_tol;
	lu_col_ptr[pivptr].r = pivmax;
	lu_col_ptr[pivptr].i = 0.0;
	*usepr = 0;
#ifdef DEBUG
	printf("[0] ZERO PIVOT: FILL (%d, %d).\n", *pivrow, jcol);
	fflush(stdout);
#endif
	info =jcol + 1;
    } /* if (*pivrow == 0.0) */
    else {
	thresh = u * pivmax;

	/* Choose appropriate pivotal element by our policy. */
	if ( *usepr ) {
	    switch (milu) {
		case SMILU_1:
                    c_add(&temp, &lu_col_ptr[old_pivptr], &drop_sum);
		    rtemp = c_abs1(&temp);
		    break;
		case SMILU_2:
		case SMILU_3:
		    rtemp = c_abs1(&lu_col_ptr[old_pivptr]) + drop_sum.r;
		    break;
		case SILU:
		default:
		    rtemp = c_abs1(&lu_col_ptr[old_pivptr]);
		    break;
	    }
	    if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr;
	    else *usepr = 0;
	}
	if ( *usepr == 0 ) {
	    /* Use diagonal pivot? */
	    if ( diag >= 0 ) { /* diagonal exists */
		switch (milu) {
		    case SMILU_1:
                        c_add(&temp, &lu_col_ptr[diag], &drop_sum);
         	        rtemp = c_abs1(&temp);
			break;
		    case SMILU_2:
		    case SMILU_3:
			rtemp = c_abs1(&lu_col_ptr[diag]) + drop_sum.r;
			break;
		    case SILU:
		    default:
			rtemp = c_abs1(&lu_col_ptr[diag]);
			break;
		}
		if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
	    }
	    *pivrow = lsub_ptr[pivptr];
	}
	info = 0;

	/* Reset the diagonal */
	switch (milu) {
	    case SMILU_1:
		c_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum);
		break;
	    case SMILU_2:
	    case SMILU_3:
                temp = c_sgn(&lu_col_ptr[pivptr]);
                cc_mult(&temp, &temp, &drop_sum);
                c_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum);
		break;
	    case SILU:
	    default:
		break;
	}

    } /* else */

    /* Record pivot row */
    perm_r[*pivrow] = jcol;
    if (jcol < n - 1) {
	register int t1, t2, t;
	t1 = iswap[*pivrow]; t2 = jcol;
	if (t1 != t2) {
	    t = swap[t1]; swap[t1] = swap[t2]; swap[t2] = t;
	    t1 = swap[t1]; t2 = t;
	    t = iswap[t1]; iswap[t1] = iswap[t2]; iswap[t2] = t;
	}
    } /* if (jcol < n - 1) */

    /* Interchange row subscripts */
    if ( pivptr != nsupc ) {
	itemp = lsub_ptr[pivptr];
	lsub_ptr[pivptr] = lsub_ptr[nsupc];
	lsub_ptr[nsupc] = itemp;

	/* Interchange numerical values as well, for the whole snode, such 
	 * that L is indexed the same way as A.
	 */
	for (icol = 0; icol <= nsupc; icol++) {
	    itemp = pivptr + icol * nsupr;
	    temp = lu_sup_ptr[itemp];
	    lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
	    lu_sup_ptr[nsupc + icol*nsupr] = temp;
	}
    } /* if */

    /* cdiv operation */
    ops[FACT] += 10 * (nsupr - nsupc);
    c_div(&temp, &one, &lu_col_ptr[nsupc]);
    for (k = nsupc+1; k < nsupr; k++) 
	cc_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp);

    return info;
}
Exemple #8
0
float
cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
             SuperMatrix *L, SuperMatrix *U)
{

    NCformat *Astore;
    SCformat *Lstore;
    NCformat *Ustore;
    complex  *Aval, *Lval, *Uval;
    int      fsupc, nsupr, luptr, nz_in_U;
    int      i, j, k, oldcol;
    int      *inv_perm_c;
    float   rpg, maxaj, maxuj;
    extern   double slamch_(char *);
    float   smlnum;
    complex   *luval;
   
    /* Get machine constants. */
    smlnum = slamch_("S");
    rpg = 1. / smlnum;

    Astore = A->Store;
    Lstore = L->Store;
    Ustore = U->Store;
    Aval = Astore->nzval;
    Lval = Lstore->nzval;
    Uval = Ustore->nzval;
    
    inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int));
    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;

    for (k = 0; k <= Lstore->nsuper; ++k) {
	fsupc = L_FST_SUPC(k);
	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	luptr = L_NZ_START(fsupc);
	luval = &Lval[luptr];
	nz_in_U = 1;
	
	for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) {
	    maxaj = 0.;
            oldcol = inv_perm_c[j];
	    for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
		maxaj = SUPERLU_MAX( maxaj, c_abs1( &Aval[i]) );
	
	    maxuj = 0.;
	    for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
		maxuj = SUPERLU_MAX( maxuj, c_abs1( &Uval[i]) );
	    
	    /* Supernode */
	    for (i = 0; i < nz_in_U; ++i)
		maxuj = SUPERLU_MAX( maxuj, c_abs1( &luval[i]) );

	    ++nz_in_U;
	    luval += nsupr;

	    if ( maxuj == 0. )
		rpg = SUPERLU_MIN( rpg, 1.);
	    else
		rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
	}
	
	if ( j >= ncols ) break;
    }

    SUPERLU_FREE(inv_perm_c);
    return (rpg);
}
Exemple #9
0
int
ilu_ccopy_to_ucol(
	      int	 jcol,	   /* in */
	      int	 nseg,	   /* in */
	      int	 *segrep,  /* in */
	      int	 *repfnz,  /* in */
	      int	 *perm_r,  /* in */
	      complex	 *dense,   /* modified - reset to zero on return */
	      int  	 drop_rule,/* in */
	      milu_t	 milu,	   /* in */
	      double	 drop_tol, /* in */
	      int	 quota,    /* maximum nonzero entries allowed */
	      complex	 *sum,	   /* out - the sum of dropped entries */
	      int	 *nnzUj,   /* in - out */
	      GlobalLU_t *Glu,	   /* modified */
	      float	 *work	   /* working space with minimum size n,
				    * used by the second dropping rule */
	      )
{
/*
 * Gather from SPA dense[*] to global ucol[*].
 */
    int       ksub, krep, ksupno;
    int       i, k, kfnz, segsze;
    int       fsupc, isub, irow;
    int       jsupno, nextu;
    int       new_next, mem_error;
    int       *xsup, *supno;
    int       *lsub, *xlsub;
    complex    *ucol;
    int       *usub, *xusub;
    int       nzumax;
    int       m; /* number of entries in the nonzero U-segments */
    register float d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum");
    register double tmp;
    complex zero = {0.0, 0.0};
    int i_1 = 1;

    xsup    = Glu->xsup;
    supno   = Glu->supno;
    lsub    = Glu->lsub;
    xlsub   = Glu->xlsub;
    ucol    = Glu->ucol;
    usub    = Glu->usub;
    xusub   = Glu->xusub;
    nzumax  = Glu->nzumax;

    *sum = zero;
    if (drop_rule == NODROP) {
	drop_tol = -1.0, quota = Glu->n;
    }

    jsupno = supno[jcol];
    nextu  = xusub[jcol];
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) {
	krep = segrep[k--];
	ksupno = supno[krep];

	if ( ksupno != jsupno ) { /* Should go into ucol[] */
	    kfnz = repfnz[krep];
	    if ( kfnz != EMPTY ) {	/* Nonzero U-segment */

		fsupc = xsup[ksupno];
		isub = xlsub[fsupc] + kfnz - fsupc;
		segsze = krep - kfnz + 1;

		new_next = nextu + segsze;
		while ( new_next > nzumax ) {
		    if ((mem_error = cLUMemXpand(jcol, nextu, UCOL, &nzumax,
			    Glu)) != 0)
			return (mem_error);
		    ucol = Glu->ucol;
		    if ((mem_error = cLUMemXpand(jcol, nextu, USUB, &nzumax,
			    Glu)) != 0)
			return (mem_error);
		    usub = Glu->usub;
		    lsub = Glu->lsub;
		}

		for (i = 0; i < segsze; i++) {
		    irow = lsub[isub++];
         	    tmp = c_abs1(&dense[irow]);

		    /* first dropping rule */
		    if (quota > 0 && tmp >= drop_tol) {
			if (tmp > d_max) d_max = tmp;
			if (tmp < d_min) d_min = tmp;
			usub[nextu] = perm_r[irow];
			ucol[nextu] = dense[irow];
			nextu++;
		    } else {
			switch (milu) {
			    case SMILU_1:
			    case SMILU_2:
                                c_add(sum, sum, &dense[irow]);
				break;
			    case SMILU_3:
				/* *sum += fabs(dense[irow]);*/
				sum->r += tmp;
				break;
			    case SILU:
			    default:
				break;
			}
#ifdef DEBUG
			num_drop_U++;
#endif
		    }
		    dense[irow] = zero;
		}

	    }

	}

    } /* for each segment... */

    xusub[jcol + 1] = nextu;	  /* Close U[*,jcol] */
    m = xusub[jcol + 1] - xusub[jcol];

    /* second dropping rule */
    if (drop_rule & DROP_SECONDARY && m > quota) {
	register double tol = d_max;
	register int m0 = xusub[jcol] + m - 1;

	if (quota > 0) {
	    if (drop_rule & DROP_INTERP) {
		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
		tol = 1.0 / (d_max + (d_min - d_max) * quota / m);
	    } else {
                i_1 = xusub[jcol];
                for (i = 0; i < m; ++i, ++i_1) work[i] = c_abs1(&ucol[i_1]);
		tol = sqselect(m, work, quota);
#if 0
		A = &ucol[xusub[jcol]];
		for (i = 0; i < m; i++) work[i] = i;
		qsort(work, m, sizeof(int), _compare_);
		tol = fabs(usub[xusub[jcol] + work[quota]]);
#endif
	    }
	}
	for (i = xusub[jcol]; i <= m0; ) {
	    if (c_abs1(&ucol[i]) <= tol) {
		switch (milu) {
		    case SMILU_1:
		    case SMILU_2:
			c_add(sum, sum, &ucol[i]);
			break;
		    case SMILU_3:
			sum->r += tmp;
			break;
		    case SILU:
		    default:
			break;
		}
		ucol[i] = ucol[m0];
		usub[i] = usub[m0];
		m0--;
		m--;
#ifdef DEBUG
		num_drop_U++;
#endif
		xusub[jcol + 1]--;
		continue;
	    }
	    i++;
	}
    }

    if (milu == SMILU_2) {
        sum->r = c_abs1(sum); sum->i = 0.0;
    }
    if (milu == SMILU_3) sum->i = 0.0;

    *nnzUj += m;

    return 0;
}
Exemple #10
0
int main(int argc, char *argv[])
{
    void cmatvec_mult(complex alpha, complex x[], complex beta, complex y[]);
    void cpsolve(int n, complex x[], complex y[]);
    extern int cfgmr( int n,
	void (*matvec_mult)(complex, complex [], complex, complex []),
	void (*psolve)(int n, complex [], complex[]),
	complex *rhs, complex *sol, double tol, int restrt, int *itmax,
	FILE *fits);
    extern int cfill_diag(int n, NCformat *Astore);

    char     equed[1] = {'B'};
    yes_no_t equil;
    trans_t  trans;
    SuperMatrix A, L, U;
    SuperMatrix B, X;
    NCformat *Astore;
    NCformat *Ustore;
    SCformat *Lstore;
    complex   *a;
    int      *asub, *xa;
    int      *etree;
    int      *perm_c; /* column permutation vector */
    int      *perm_r; /* row permutations from partial pivoting */
    int      nrhs, ldx, lwork, info, m, n, nnz;
    complex   *rhsb, *rhsx, *xact;
    complex   *work = NULL;
    float   *R, *C;
    float   u, rpg, rcond;
    complex zero = {0.0, 0.0};
    complex one = {1.0, 0.0};
    complex none = {-1.0, 0.0};
    mem_usage_t   mem_usage;
    superlu_options_t options;
    SuperLUStat_t stat;

    int restrt, iter, maxit, i;
    double resid;
    complex *x, *b;

#ifdef DEBUG
    extern int num_drop_L, num_drop_U;
#endif

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Enter main()");
#endif

    /* Defaults */
    lwork = 0;
    nrhs  = 1;
    equil = YES;
    u	  = 0.1; /* u=1.0 for complete factorization */
    trans = NOTRANS;

    /* Set the default input options:
	options.Fact = DOFACT;
	options.Equil = YES;
	options.ColPerm = COLAMD;
	options.DiagPivotThresh = 0.1; //different from complete LU
	options.Trans = NOTRANS;
	options.IterRefine = NOREFINE;
	options.SymmetricMode = NO;
	options.PivotGrowth = NO;
	options.ConditionNumber = NO;
	options.PrintStat = YES;
	options.RowPerm = LargeDiag;
	options.ILU_DropTol = 1e-4;
	options.ILU_FillTol = 1e-2;
	options.ILU_FillFactor = 10.0;
	options.ILU_DropRule = DROP_BASIC | DROP_AREA;
	options.ILU_Norm = INF_NORM;
	options.ILU_MILU = SMILU_2;
     */
    ilu_set_default_options(&options);

    /* Modify the defaults. */
    options.PivotGrowth = YES;	  /* Compute reciprocal pivot growth */
    options.ConditionNumber = YES;/* Compute reciprocal condition number */

    if ( lwork > 0 ) {
	work = SUPERLU_MALLOC(lwork);
	if ( !work ) ABORT("Malloc fails for work[].");
    }

    /* Read matrix A from a file in Harwell-Boeing format.*/
    if (argc < 2)
    {
	printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n"
		"-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n"
		"-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n"
		"-t -triplet:\n\t[INPUT] is a triplet format matrix.\n",
		argv[0]);
	return 0;
    }
    else
    {
	switch (argv[1][1])
	{
	    case 'H':
	    case 'h':
		printf("Input a Harwell-Boeing format matrix:\n");
		creadhb(&m, &n, &nnz, &a, &asub, &xa);
		break;
	    case 'R':
	    case 'r':
		printf("Input a Rutherford-Boeing format matrix:\n");
		creadrb(&m, &n, &nnz, &a, &asub, &xa);
		break;
	    case 'T':
	    case 't':
		printf("Input a triplet format matrix:\n");
		creadtriple(&m, &n, &nnz, &a, &asub, &xa);
		break;
	    default:
		printf("Unrecognized format.\n");
		return 0;
	}
    }

    cCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_C, SLU_GE);
    Astore = A.Store;
    cfill_diag(n, Astore);
    printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
    fflush(stdout);

    if ( !(rhsb = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[].");
    if ( !(rhsx = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[].");
    cCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_C, SLU_GE);
    cCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_C, SLU_GE);
    xact = complexMalloc(n * nrhs);
    ldx = n;
    cGenXtrue(n, nrhs, xact, ldx);
    cFillRHS(trans, nrhs, xact, ldx, &A, &B);

    if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
    if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");
    if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
    if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) )
	ABORT("SUPERLU_MALLOC fails for R[].");
    if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) )
	ABORT("SUPERLU_MALLOC fails for C[].");

    info = 0;
#ifdef DEBUG
    num_drop_L = 0;
    num_drop_U = 0;
#endif

    /* Initialize the statistics variables. */
    StatInit(&stat);

    /* Compute the incomplete factorization and compute the condition number
       and pivot growth using dgsisx. */
    cgsisx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work,
	   lwork, &B, &X, &rpg, &rcond, &mem_usage, &stat, &info);

    Lstore = (SCformat *) L.Store;
    Ustore = (NCformat *) U.Store;
    printf("cgsisx(): info %d\n", info);
    if (info > 0 || rcond < 1e-8 || rpg > 1e8)
	printf("WARNING: This preconditioner might be unstable.\n");

    if ( info == 0 || info == n+1 ) {

	if ( options.PivotGrowth == YES )
	    printf("Recip. pivot growth = %e\n", rpg);
	if ( options.ConditionNumber == YES )
	    printf("Recip. condition number = %e\n", rcond);

    } else if ( info > 0 && lwork == -1 ) {
	printf("** Estimated memory: %d bytes\n", info - n);
    }
    printf("n(A) = %d, nnz(A) = %d\n", n, Astore->nnz);
    printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
    printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
    printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
    printf("Fill ratio: nnz(F)/nnz(A) = %.3f\n",
	    ((double)(Lstore->nnz) + (double)(Ustore->nnz) - (double)n)
	    / (double)Astore->nnz);
    printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
	   mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
    fflush(stdout);

    /* Set the global variables. */
    GLOBAL_A = &A;
    GLOBAL_L = &L;
    GLOBAL_U = &U;
    GLOBAL_STAT = &stat;
    GLOBAL_PERM_C = perm_c;
    GLOBAL_PERM_R = perm_r;

    /* Set the variables used by GMRES. */
    restrt = SUPERLU_MIN(n / 3 + 1, 50);
    maxit = 1000;
    iter = maxit;
    resid = 1e-8;
    if (!(b = complexMalloc(m))) ABORT("Malloc fails for b[].");
    if (!(x = complexMalloc(n))) ABORT("Malloc fails for x[].");
    sp_cgemv("N", one, &A, xact, 1, zero, b, 1);

    if (info <= n + 1)
    {
	int i_1 = 1;
	double maxferr = 0.0, nrmA, nrmB, res, t;
        complex temp;
	extern float scnrm2_(int *, complex [], int *);
	extern void caxpy_(int *, complex *, complex [], int *, complex [], int *);

	/* Call GMRES. */
	/*double *sol = (double*) ((DNformat*) X.Store)->nzval;
	for (i = 0; i < n; i++) x[i] = sol[i];*/
	for (i = 0; i < n; i++) x[i] = zero;

	t = SuperLU_timer_();

	cfgmr(n, cmatvec_mult, cpsolve, b, x, resid, restrt, &iter, stdout);

	t = SuperLU_timer_() - t;

	/* Output the result. */
	nrmA = scnrm2_(&(Astore->nnz), (complex *)((DNformat *)A.Store)->nzval,
		&i_1);
	nrmB = scnrm2_(&m, b, &i_1);
	sp_cgemv("N", none, &A, x, 1, one, b, 1);
	res = scnrm2_(&m, b, &i_1);
	resid = res / nrmB;
	printf("||A||_F = %.1e, ||B||_2 = %.1e, ||B-A*X||_2 = %.1e, "
		"relres = %.1e\n", nrmA, nrmB, res, resid);

	if (iter >= maxit)
	{
	    if (resid >= 1.0) iter = -180;
	    else if (resid > 1e-8) iter = -111;
	}
	printf("iteration: %d\nresidual: %.1e\nGMRES time: %.2f seconds.\n",
		iter, resid, t);

	for (i = 0; i < m; i++)
            c_sub(&temp, &x[i], &xact[i]);
            maxferr = SUPERLU_MAX(maxferr, c_abs1(&temp));
	printf("||X-X_true||_oo = %.1e\n", maxferr);
    }
#ifdef DEBUG
    printf("%d entries in L and %d entries in U dropped.\n",
	    num_drop_L, num_drop_U);
#endif
    fflush(stdout);

    if ( options.PrintStat ) StatPrint(&stat);
    StatFree(&stat);

    SUPERLU_FREE (rhsb);
    SUPERLU_FREE (rhsx);
    SUPERLU_FREE (xact);
    SUPERLU_FREE (etree);
    SUPERLU_FREE (perm_r);
    SUPERLU_FREE (perm_c);
    SUPERLU_FREE (R);
    SUPERLU_FREE (C);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B);
    Destroy_SuperMatrix_Store(&X);
    if ( lwork >= 0 ) {
	Destroy_SuperNode_Matrix(&L);
	Destroy_CompCol_Matrix(&U);
    }
    SUPERLU_FREE(b);
    SUPERLU_FREE(x);

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Exit main()");
#endif

    return 0;
}
Exemple #11
0
/*! \brief
 *
 * <pre>
 * Purpose
 * =======
 *
 *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
 *
 *   A supernode representative is the last column of a supernode.
 *   The nonzeros in U[*,j] are segments that end at supernodal
 *   representatives.
 *
 *   The routine returns one list of the supernodal representatives
 *   in topological order of the dfs that generates them. This list is
 *   a superset of the topological order of each individual column within
 *   the panel.
 *   The location of the first nonzero in each supernodal segment
 *   (supernodal entry location) is also returned. Each column has a
 *   separate list for this purpose.
 *
 *   Two marker arrays are used for dfs:
 *     marker[i] == jj, if i was visited during dfs of current column jj;
 *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
 *
 *   marker: A-row --> A-row/col (0/1)
 *   repfnz: SuperA-col --> PA-row
 *   parent: SuperA-col --> SuperA-col
 *   xplore: SuperA-col --> index to L-structure
 * </pre>
 */
void
ilu_cpanel_dfs(
   const int  m,	   /* in - number of rows in the matrix */
   const int  w,	   /* in */
   const int  jcol,	   /* in */
   SuperMatrix *A,	   /* in - original matrix */
   int	      *perm_r,	   /* in */
   int	      *nseg,	   /* out */
   complex     *dense,	   /* out */
   float     *amax,	   /* out - max. abs. value of each column in panel */
   int	      *panel_lsub, /* out */
   int	      *segrep,	   /* out */
   int	      *repfnz,	   /* out */
   int	      *marker,	   /* out */
   int	      *parent,	   /* working array */
   int	      *xplore,	   /* working array */
   GlobalLU_t *Glu	   /* modified */
)
{

    NCPformat *Astore;
    complex    *a;
    int       *asub;
    int       *xa_begin, *xa_end;
    int       krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
    int       k, krow, kmark, kperm;
    int       xdfs, maxdfs, kpar;
    int       jj;	   /* index through each column in the panel */
    int       *marker1;    /* marker1[jj] >= jcol if vertex jj was visited
			      by a previous column within this panel. */
    int       *repfnz_col; /* start of each column in the panel */
    complex    *dense_col;  /* start of each column in the panel */
    int       nextl_col;   /* next available position in panel_lsub[*,jj] */
    int       *xsup, *supno;
    int       *lsub, *xlsub;
    float    *amax_col;
    register double tmp;

    /* Initialize pointers */
    Astore     = A->Store;
    a	       = Astore->nzval;
    asub       = Astore->rowind;
    xa_begin   = Astore->colbeg;
    xa_end     = Astore->colend;
    marker1    = marker + m;
    repfnz_col = repfnz;
    dense_col  = dense;
    amax_col   = amax;
    *nseg      = 0;
    xsup       = Glu->xsup;
    supno      = Glu->supno;
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;

    /* For each column in the panel */
    for (jj = jcol; jj < jcol + w; jj++) {
	nextl_col = (jj - jcol) * m;

#ifdef CHK_DFS
	printf("\npanel col %d: ", jj);
#endif

	*amax_col = 0.0;
	/* For each nonz in A[*,jj] do dfs */
	for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
	    krow = asub[k];
	    tmp = c_abs1(&a[k]);
	    if (tmp > *amax_col) *amax_col = tmp;
	    dense_col[krow] = a[k];
	    kmark = marker[krow];
	    if ( kmark == jj )
		continue;     /* krow visited before, go to the next nonzero */

	    /* For each unmarked nbr krow of jj
	     * krow is in L: place it in structure of L[*,jj]
	     */
	    marker[krow] = jj;
	    kperm = perm_r[krow];

	    if ( kperm == EMPTY ) {
		panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
	    }
	    /*
	     * krow is in U: if its supernode-rep krep
	     * has been explored, update repfnz[*]
	     */
	    else {
		
		krep = xsup[supno[kperm]+1] - 1;
		myfnz = repfnz_col[krep];
		
#ifdef CHK_DFS
		printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, kperm);
#endif
		if ( myfnz != EMPTY ) { /* Representative visited before */
		    if ( myfnz > kperm ) repfnz_col[krep] = kperm;
		    /* continue; */
		}
		else {
		    /* Otherwise, perform dfs starting at krep */
		    oldrep = EMPTY;
		    parent[krep] = oldrep;
		    repfnz_col[krep] = kperm;
		    xdfs = xlsub[xsup[supno[krep]]];
		    maxdfs = xlsub[krep + 1];

#ifdef CHK_DFS
		    printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
		    for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
		    printf("\n");
#endif
		    do {
			/*
			 * For each unmarked kchild of krep
			 */
			while ( xdfs < maxdfs ) {

			    kchild = lsub[xdfs];
			    xdfs++;
			    chmark = marker[kchild];

			    if ( chmark != jj ) { /* Not reached yet */
				marker[kchild] = jj;
				chperm = perm_r[kchild];

				/* Case kchild is in L: place it in L[*,j] */
				if ( chperm == EMPTY ) {
				    panel_lsub[nextl_col++] = kchild;
				}
				/* Case kchild is in U:
				 *   chrep = its supernode-rep. If its rep has
				 *   been explored, update its repfnz[*]
				 */
				else {
				    
				    chrep = xsup[supno[chperm]+1] - 1;
				    myfnz = repfnz_col[chrep];
#ifdef CHK_DFS
				    printf("chrep %d,myfnz %d,perm_r[%d] %d\n",chrep,myfnz,kchild,chperm);
#endif
				    if ( myfnz != EMPTY ) { /* Visited before */
					if ( myfnz > chperm )
					    repfnz_col[chrep] = chperm;
				    }
				    else {
					/* Cont. dfs at snode-rep of kchild */
					xplore[krep] = xdfs;
					oldrep = krep;
					krep = chrep; /* Go deeper down G(L) */
					parent[krep] = oldrep;
					repfnz_col[krep] = chperm;
					xdfs = xlsub[xsup[supno[krep]]];
					maxdfs = xlsub[krep + 1];
#ifdef CHK_DFS
					printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
					for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
					printf("\n");
#endif
				    } /* else */

				} /* else */

			    } /* if... */

			} /* while xdfs < maxdfs */

			/* krow has no more unexplored nbrs:
			 *    Place snode-rep krep in postorder DFS, if this
			 *    segment is seen for the first time. (Note that
			 *    "repfnz[krep]" may change later.)
			 *    Backtrack dfs to its parent.
			 */
			if ( marker1[krep] < jcol ) {
			    segrep[*nseg] = krep;
			    ++(*nseg);
			    marker1[krep] = jj;
			}

			kpar = parent[krep]; /* Pop stack, mimic recursion */
			if ( kpar == EMPTY ) break; /* dfs done */
			krep = kpar;
			xdfs = xplore[krep];
			maxdfs = xlsub[krep + 1];

#ifdef CHK_DFS
			printf("  pop stack: krep %d,xdfs %d,maxdfs %d: ", krep,xdfs,maxdfs);
			for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
			printf("\n");
#endif
		    } while ( kpar != EMPTY ); /* do-while - until empty stack */

		} /* else */
		
	    } /* else */

	} /* for each nonz in A[*,jj] */

	repfnz_col += m;    /* Move to next column */
	dense_col += m;
	amax_col++;

    } /* for jj ... */

}
float
cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
             SuperMatrix *L, SuperMatrix *U)
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 *
 * Purpose
 * =======
 *
 * Compute the reciprocal pivot growth factor of the leading ncols columns
 * of the matrix, using the formula:
 *     min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) )
 *
 * Arguments
 * =========
 *
 * ncols    (input) int
 *          The number of columns of matrices A, L and U.
 *
 * A        (input) SuperMatrix*
 *          Original matrix A, permuted by columns, of dimension
 *          (A->nrow, A->ncol). The type of A can be:
 *          Stype = NC; Dtype = _D; Mtype = GE.
 *
 * L        (output) SuperMatrix*
 *          The factor L from the factorization Pr*A=L*U; use compressed row
 *          subscripts storage for supernodes, i.e., L has type:
 *          Stype = SC; Dtype = _D; Mtype = TRLU.
 *
 * U        (output) SuperMatrix*
 *          The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
 *          storage scheme, i.e., U has types: Stype = NC;
 *          Dtype = _D; Mtype = TRU.
 *
 */
    NCformat *Astore;
    SCPformat *Lstore;
    NCPformat *Ustore;
    complex  *Aval, *Lval, *Uval;
    int      fsupc, nsupr, luptr, nz_in_U;
    int      i, j, k, oldcol;
    int      *inv_perm_c;
    float   rpg, maxaj, maxuj;
    extern   double slamch_(char *);
    float   smlnum;
    complex   *luval;
    complex   temp_comp;
   
    /* Get machine constants. */
    smlnum = slamch_("S");
    rpg = 1. / smlnum;

    Astore = A->Store;
    Lstore = L->Store;
    Ustore = U->Store;
    Aval = Astore->nzval;
    Lval = Lstore->nzval;
    Uval = Ustore->nzval;
    
    inv_perm_c = (int *) SUPERLU_MALLOC( (size_t) A->ncol*sizeof(int) );
    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;

    for (k = 0; k <= Lstore->nsuper; ++k) {
	fsupc = L_FST_SUPC(k);
	nsupr = L_SUB_END(fsupc) - L_SUB_START(fsupc);
	luptr = L_NZ_START(fsupc);
	luval = &Lval[luptr];
	nz_in_U = 1;
	
	for (j = fsupc; j < L_LAST_SUPC(k) && j < ncols; ++j) {
	    maxaj = 0.;
            oldcol = inv_perm_c[j];
	    for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; i++)
		maxaj = SUPERLU_MAX( maxaj, c_abs1( &Aval[i]) );
	
	    maxuj = 0.;
	    for (i = Ustore->colbeg[j]; i < Ustore->colend[j]; i++)
		maxuj = SUPERLU_MAX( maxuj, c_abs1( &Uval[i]) );
	    
	    /* Supernode */
	    for (i = 0; i < nz_in_U; ++i)
		maxuj = SUPERLU_MAX( maxuj, c_abs1( &luval[i]) );

	    ++nz_in_U;
	    luval += nsupr;

	    if ( maxuj == 0. )
		rpg = SUPERLU_MIN( rpg, 1.);
	    else
		rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
	}
	
	if ( j >= ncols ) break;
    }

    SUPERLU_FREE(inv_perm_c);
    return (rpg);
}
Exemple #13
0
/*! \brief
 *
 * <pre>
 * Purpose   
 *   =======   
 *
 *   CGSEQU computes row and column scalings intended to equilibrate an   
 *   M-by-N sparse matrix A and reduce its condition number. R returns the row
 *   scale factors and C the column scale factors, chosen to try to make   
 *   the largest element in each row and column of the matrix B with   
 *   elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
 *
 *   R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
 *   number and BIGNUM = largest safe number.  Use of these scaling   
 *   factors is not guaranteed to reduce the condition number of A but   
 *   works well in practice.   
 *
 *   See supermatrix.h for the definition of 'SuperMatrix' structure.
 *
 *   Arguments   
 *   =========   
 *
 *   A       (input) SuperMatrix*
 *           The matrix of dimension (A->nrow, A->ncol) whose equilibration
 *           factors are to be computed. The type of A can be:
 *           Stype = SLU_NC; Dtype = SLU_C; Mtype = SLU_GE.
 *	    
 *   R       (output) float*, size A->nrow
 *           If INFO = 0 or INFO > M, R contains the row scale factors   
 *           for A.
 *	    
 *   C       (output) float*, size A->ncol
 *           If INFO = 0,  C contains the column scale factors for A.
 *	    
 *   ROWCND  (output) float*
 *           If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
 *           smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
 *           AMAX is neither too large nor too small, it is not worth   
 *           scaling by R.
 *	    
 *   COLCND  (output) float*
 *           If INFO = 0, COLCND contains the ratio of the smallest   
 *           C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
 *           worth scaling by C.
 *	    
 *   AMAX    (output) float*
 *           Absolute value of largest matrix element.  If AMAX is very   
 *           close to overflow or very close to underflow, the matrix   
 *           should be scaled.
 *	    
 *   INFO    (output) int*
 *           = 0:  successful exit   
 *           < 0:  if INFO = -i, the i-th argument had an illegal value   
 *           > 0:  if INFO = i,  and i is   
 *                 <= A->nrow:  the i-th row of A is exactly zero   
 *                 >  A->ncol:  the (i-M)-th column of A is exactly zero   
 *
 *   ===================================================================== 
 * </pre>
 */
void
cgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
	float *colcnd, float *amax, int *info)
{


    /* Local variables */
    NCformat *Astore;
    complex   *Aval;
    int i, j, irow;
    float rcmin, rcmax;
    float bignum, smlnum;
    extern float smach(char *);
    
    /* Test the input parameters. */
    *info = 0;
    if ( A->nrow < 0 || A->ncol < 0 ||
	 A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE )
	*info = -1;
    if (*info != 0) {
	i = -(*info);
	input_error("cgsequ", &i);
	return;
    }

    /* Quick return if possible */
    if ( A->nrow == 0 || A->ncol == 0 ) {
	*rowcnd = 1.;
	*colcnd = 1.;
	*amax = 0.;
	return;
    }

    Astore = A->Store;
    Aval = Astore->nzval;
    
    /* Get machine constants. */
    smlnum = smach("S");  /* slamch_("S"); */
    bignum = 1. / smlnum;

    /* Compute row scale factors. */
    for (i = 0; i < A->nrow; ++i) r[i] = 0.;

    /* Find the maximum element in each row. */
    for (j = 0; j < A->ncol; ++j)
	for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
	    irow = Astore->rowind[i];
	    r[irow] = SUPERLU_MAX( r[irow], c_abs1(&Aval[i]) );
	}

    /* Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    for (i = 0; i < A->nrow; ++i) {
	rcmax = SUPERLU_MAX(rcmax, r[i]);
	rcmin = SUPERLU_MIN(rcmin, r[i]);
    }
    *amax = rcmax;

    if (rcmin == 0.) {
	/* Find the first zero scale factor and return an error code. */
	for (i = 0; i < A->nrow; ++i)
	    if (r[i] == 0.) {
		*info = i + 1;
		return;
	    }
    } else {
	/* Invert the scale factors. */
	for (i = 0; i < A->nrow; ++i)
	    r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum );
	/* Compute ROWCND = min(R(I)) / max(R(I)) */
	*rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
    }

    /* Compute column scale factors */
    for (j = 0; j < A->ncol; ++j) c[j] = 0.;

    /* Find the maximum element in each column, assuming the row
       scalings computed above. */
    for (j = 0; j < A->ncol; ++j)
	for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
	    irow = Astore->rowind[i];
	    c[j] = SUPERLU_MAX( c[j], c_abs1(&Aval[i]) * r[irow] );
	}

    /* Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    for (j = 0; j < A->ncol; ++j) {
	rcmax = SUPERLU_MAX(rcmax, c[j]);
	rcmin = SUPERLU_MIN(rcmin, c[j]);
    }

    if (rcmin == 0.) {
	/* Find the first zero scale factor and return an error code. */
	for (j = 0; j < A->ncol; ++j)
	    if ( c[j] == 0. ) {
		*info = A->nrow + j + 1;
		return;
	    }
    } else {
	/* Invert the scale factors. */
	for (j = 0; j < A->ncol; ++j)
	    c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum);
	/* Compute COLCND = min(C(J)) / max(C(J)) */
	*colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
    }

    return;

} /* cgsequ */