Exemplo n.º 1
0
/*! \brief
 *
 * <pre>
 * Purpose
 * =======
 *
 * PDGSRFS improves the computed solution to a system of linear
 * equations and provides error bounds and backward error estimates
 * for the solution.
 *
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * A      (input) SuperMatrix*
 *	  The original matrix A, or the scaled A if equilibration was done.
 *        A is also permuted into diag(R)*A*diag(C)*Pc'. The type of A can be:
 *        Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE.
 *
 * anorm  (input) double
 *        The norm of the original matrix A, or the scaled A if
 *        equilibration was done.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from pdgstrf for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t'.
 *
 * ScalePermstruct (input) ScalePermstruct_t* (global)
 *         The data structure to store the scaling and permutation vectors
 *         describing the transformations performed to the matrix A.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_defs.h for the definition of 'gridinfo_t'.
 *
 * B      (input) double* (local)
 *        The m_loc-by-NRHS right-hand side matrix of the possibly
 *        equilibrated system. That is, B may be overwritten by diag(R)*B.
 *
 * ldb    (input) int (local)
 *        Leading dimension of matrix B.
 *
 * X      (input/output) double* (local)
 *        On entry, the solution matrix Y, as computed by PDGSTRS, of the
 *            transformed system A1*Y = Pc*Pr*B. where
 *            A1 = Pc*Pr*diag(R)*A*diag(C)*Pc' and Y = Pc*diag(C)^(-1)*X.
 *        On exit, the improved solution matrix Y.
 *
 *        In order to obtain the solution X to the original system,
 *        Y should be permutated by Pc^T, and premultiplied by diag(C)
 *        if DiagScale = COL or BOTH.
 *        This must be done after this routine is called.
 *
 * ldx    (input) int (local)
 *        Leading dimension of matrix X.
 *
 * nrhs   (input) int
 *        Number of right-hand sides.
 *
 * SOLVEstruct (output) SOLVEstruct_t* (global)
 *        Contains the information for the communication during the
 *        solution phase.
 *
 * berr   (output) double*, dimension (nrhs)
 *         The componentwise relative backward error of each solution
 *         vector X(j) (i.e., the smallest relative change in
 *         any element of A or B that makes X(j) an exact solution).
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the refinement steps.
 *        See util.h for the definition of SuperLUStat_t.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *
 * Internal Parameters
 * ===================
 *
 * ITMAX is the maximum number of steps of iterative refinement.
 * </pre>
 */
void
pdgsrfs(int_t n, SuperMatrix *A, double anorm, LUstruct_t *LUstruct,
        ScalePermstruct_t *ScalePermstruct, gridinfo_t *grid,
        double *B, int_t ldb, double *X, int_t ldx, int nrhs,
        SOLVEstruct_t *SOLVEstruct,
        double *berr, SuperLUStat_t *stat, int *info)
{
#define ITMAX 20

    Glu_persist_t *Glu_persist = LUstruct->Glu_persist;
    LocalLU_t *Llu = LUstruct->Llu;
    double *ax, *R, *dx, *temp, *work, *B_col, *X_col;
    int_t count, i, j, lwork, nz;
    int   iam;
    double eps, lstres;
    double s, safmin, safe1, safe2;

    /* Data structures used by matrix-vector multiply routine. */
    pdgsmv_comm_t *gsmv_comm = SOLVEstruct->gsmv_comm;
    NRformat_loc *Astore;
    int_t        m_loc, fst_row;


    /* Initialization. */
    Astore = (NRformat_loc *) A->Store;
    m_loc = Astore->m_loc;
    fst_row = Astore->fst_row;
    iam = grid->iam;

    /* Test the input parameters. */
    *info = 0;
    if ( n < 0 ) *info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc
              || A->Dtype != SLU_D || A->Mtype != SLU_GE )
        *info = -2;
    else if ( ldb < SUPERLU_MAX(0, m_loc) ) *info = -10;
    else if ( ldx < SUPERLU_MAX(0, m_loc) ) *info = -12;
    else if ( nrhs < 0 ) *info = -13;
    if (*info != 0) {
        i = -(*info);
        pxerbla("PDGSRFS", grid, i);
        return;
    }

    /* Quick return if possible. */
    if ( n == 0 || nrhs == 0 ) {
        return;
    }


#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Enter pdgsrfs()");
#endif

    lwork = 2 * m_loc;  /* For ax/R/dx and temp */
    if ( !(work = doubleMalloc_dist(lwork)) )
        ABORT("Malloc fails for work[]");
    ax = R = dx = work;
    temp = ax + m_loc;

    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz     = A->ncol + 1;
    eps    = dmach("Epsilon");
    safmin = dmach("Safe minimum");

    /* Set SAFE1 essentially to be the underflow threshold times the
       number of additions in each row. */
    safe1  = nz * safmin;
    safe2  = safe1 / eps;

#if ( DEBUGlevel>=1 )
    if ( !iam ) printf(".. eps = %e\tanorm = %e\tsafe1 = %e\tsafe2 = %e\n",
                           eps, anorm, safe1, safe2);
#endif

    /* Do for each right-hand side ... */
    for (j = 0; j < nrhs; ++j) {
        count = 0;
        lstres = 3.;
        B_col = &B[j*ldb];
        X_col = &X[j*ldx];

        while (1) { /* Loop until stopping criterion is satisfied. */

            /* Compute residual R = B - op(A) * X,
               where op(A) = A, A**T, or A**H, depending on TRANS. */

            /* Matrix-vector multiply. */
            pdgsmv(0, A, grid, gsmv_comm, X_col, ax);

            /* Compute residual, stored in R[]. */
            for (i = 0; i < m_loc; ++i) R[i] = B_col[i] - ax[i];

            /* Compute abs(op(A))*abs(X) + abs(B), stored in temp[]. */
            pdgsmv(1, A, grid, gsmv_comm, X_col, temp);
            for (i = 0; i < m_loc; ++i) temp[i] += fabs(B_col[i]);

            s = 0.0;
            for (i = 0; i < m_loc; ++i) {
                if ( temp[i] > safe2 ) {
                    s = SUPERLU_MAX(s, fabs(R[i]) / temp[i]);
                } else if ( temp[i] != 0.0 ) {
                    /* Adding SAFE1 to the numerator guards against
                       spuriously zero residuals (underflow). */
                    s = SUPERLU_MAX(s, (safe1 + fabs(R[i])) /temp[i]);
                }
                /* If temp[i] is exactly 0.0 (computed by PxGSMV), then
                   we know the true residual also must be exactly 0.0. */
            }
            MPI_Allreduce( &s, &berr[j], 1, MPI_DOUBLE, MPI_MAX, grid->comm );

#if ( PRNTlevel>= 1 )
            if ( !iam )
                printf("(%2d) .. Step " IFMT ": berr[j] = %e\n", iam, count, berr[j]);
#endif
            if ( berr[j] > eps && berr[j] * 2 <= lstres && count < ITMAX ) {
                /* Compute new dx. */
                pdgstrs(n, LUstruct, ScalePermstruct, grid,
                        dx, m_loc, fst_row, m_loc, 1,
                        SOLVEstruct, stat, info);

                /* Update solution. */
                for (i = 0; i < m_loc; ++i) X_col[i] += dx[i];

                lstres = berr[j];
                ++count;
            } else {
                break;
            }
        } /* end while */

        stat->RefineSteps = count;

    } /* for j ... */

    /* Deallocate storage. */
    SUPERLU_FREE(work);

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Exit pdgsrfs()");
#endif

} /* PDGSRFS */
Exemplo n.º 2
0
int dgst01(int m, int n, SuperMatrix *A, SuperMatrix *L, 
		SuperMatrix *U, int *perm_c, int *perm_r, double *resid)
{
/* 
    Purpose   
    =======   

    DGST01 reconstructs a matrix A from its L*U factorization and   
    computes the residual   
       norm(L*U - A) / ( N * norm(A) * EPS ),   
    where EPS is the machine epsilon.   

    Arguments   
    ==========   

    M       (input) INT   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INT   
            The number of columns of the matrix A.  N >= 0.   

    A       (input) SuperMatrix *, dimension (A->nrow, A->ncol)
            The original M x N matrix A.   

    L       (input) SuperMatrix *, dimension (L->nrow, L->ncol)
            The factor matrix L.

    U       (input) SuperMatrix *, dimension (U->nrow, U->ncol)
            The factor matrix U.

    perm_c (input) INT array, dimension (N)
            The column permutation from DGSTRF.   

    perm_r  (input) INT array, dimension (M)
            The pivot indices from DGSTRF.   

    RESID   (output) DOUBLE*
            norm(L*U - A) / ( N * norm(A) * EPS )   

    ===================================================================== 
*/  

    /* Local variables */
    double zero = 0.0;
    int i, j, k, arow, lptr,isub,  urow, superno, fsupc, u_part;
    double utemp, comp_temp;
    double anorm, tnorm, cnorm;
    double eps;
    double *work;
    SCformat *Lstore;
    NCformat *Astore, *Ustore;
    double *Aval, *Lval, *Uval;
    int *colbeg, *colend;

    /* Function prototypes */
    extern double dlangs(char *, SuperMatrix *);

    /* Quick exit if M = 0 or N = 0. */

    if (m <= 0 || n <= 0) {
	*resid = 0.f;
	return 0;
    }

    work = (double *)doubleCalloc(m);

    Astore = A->Store;
    Aval = Astore->nzval;
    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;

    colbeg = intMalloc(n);
    colend = intMalloc(n);

        for (i = 0; i < n; i++) {
            colbeg[perm_c[i]] = Astore->colptr[i]; 
	    colend[perm_c[i]] = Astore->colptr[i+1];
        }
	
    /* Determine EPS and the norm of A. */
    eps = dmach("Epsilon");
    anorm = dlangs("1", A);
    cnorm = 0.;

    /* Compute the product L*U, one column at a time */
    for (k = 0; k < n; ++k) {

	/* The U part outside the rectangular supernode */
        for (i = U_NZ_START(k); i < U_NZ_START(k+1); ++i) {
	    urow = U_SUB(i);
	    utemp = Uval[i];
            superno = Lstore->col_to_sup[urow];
	    fsupc = L_FST_SUPC(superno);
	    u_part = urow - fsupc + 1;
	    lptr = L_SUB_START(fsupc) + u_part;
            work[L_SUB(lptr-1)] -= utemp;   /* L_ii = 1 */
	    for (j = L_NZ_START(urow) + u_part; j < L_NZ_START(urow+1); ++j) {
                isub = L_SUB(lptr);
	        work[isub] -= Lval[j] * utemp;
	        ++lptr;
	    }
	}

	/* The U part inside the rectangular supernode */
	superno = Lstore->col_to_sup[k];
	fsupc = L_FST_SUPC(superno);
	urow = L_NZ_START(k);
	for (i = fsupc; i <= k; ++i) {
	    utemp = Lval[urow++];
	    u_part = i - fsupc + 1;
	    lptr = L_SUB_START(fsupc) + u_part;
            work[L_SUB(lptr-1)] -= utemp;   /* L_ii = 1 */
	    for (j = L_NZ_START(i)+u_part; j < L_NZ_START(i+1); ++j) {
                isub = L_SUB(lptr);
	        work[isub] -= Lval[j] * utemp;
	        ++lptr;
	    }
	}

	/* Now compute A[k] - (L*U)[k] (Both matrices may be permuted.) */

	for (i = colbeg[k]; i < colend[k]; ++i) {
	    arow = Astore->rowind[i];
	    work[perm_r[arow]] += Aval[i];
        }

	/* Now compute the 1-norm of the column vector work */
        tnorm = 0.;
	for (i = 0; i < m; ++i) {
	    tnorm += fabs(work[i]);
	    work[i] = zero;
	}
	cnorm = SUPERLU_MAX(tnorm, cnorm);
    }

    *resid = cnorm;

    if (anorm <= 0.f) {
	if (*resid != 0.f) {
	    *resid = 1.f / eps;
	}
    } else {
	*resid = *resid / (float) n / anorm / eps;
    }

    SUPERLU_FREE(work);
    SUPERLU_FREE(colbeg);
    SUPERLU_FREE(colend);
    return 0;

/*     End of DGST01 */

} /* dgst01_ */
Exemplo n.º 3
0
/*! \brief

<pre>
    Purpose   
    =======   

    ZLAQGS_DIST equilibrates a general sparse M by N matrix A using the row
    and column scaling factors in the vectors R and C.   

    See supermatrix.h for the definition of 'SuperMatrix' structure.

    Arguments   
    =========   

    A       (input/output) SuperMatrix*
            On exit, the equilibrated matrix.  See EQUED for the form of 
            the equilibrated matrix. The type of A can be:
	    Stype = SLU_NC; Dtype = SLU_Z; Mtype = SLU_GE.
	    
    R       (input) double*, dimension (A->nrow)
            The row scale factors for A.
	    
    C       (input) double*, dimension (A->ncol)
            The column scale factors for A.
	    
    ROWCND  (input) double
            Ratio of the smallest R(i) to the largest R(i).
	    
    COLCND  (input) double
            Ratio of the smallest C(i) to the largest C(i).
	    
    AMAX    (input) double
            Absolute value of largest matrix entry.
	    
    EQUED   (output) char*
            Specifies the form of equilibration that was done.   
            = 'N':  No equilibration   
            = 'R':  Row equilibration, i.e., A has been premultiplied by  
                    diag(R).   
            = 'C':  Column equilibration, i.e., A has been postmultiplied  
                    by diag(C).   
            = 'B':  Both row and column equilibration, i.e., A has been
                    replaced by diag(R) * A * diag(C).   

    Internal Parameters   
    ===================   

    THRESH is a threshold value used to decide if row or column scaling   
    should be done based on the ratio of the row or column scaling   
    factors.  If ROWCND < THRESH, row scaling is done, and if   
    COLCND < THRESH, column scaling is done.   

    LARGE and SMALL are threshold values used to decide if row scaling   
    should be done based on the absolute size of the largest matrix   
    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   

    ===================================================================== 
</pre>
*/
void
zlaqgs_dist(SuperMatrix *A, double *r, double *c, 
	    double rowcnd, double colcnd, double amax, char *equed)
{
#define THRESH    (0.1)
    
    /* Local variables */
    NCformat *Astore;
    doublecomplex   *Aval;
    int i, j, irow;
    double large, small, cj;
    double temp;


    /* Quick return if possible */
    if (A->nrow <= 0 || A->ncol <= 0) {
	*(unsigned char *)equed = 'N';
	return;
    }

    Astore = (NCformat *) A->Store;
    Aval = (doublecomplex *) Astore->nzval;
    
    /* Initialize LARGE and SMALL. */
    small = dmach("Safe minimum") / dmach("Precision");
    large = 1. / small;

    if (rowcnd >= THRESH && amax >= small && amax <= large) {
	if (colcnd >= THRESH)
	    *(unsigned char *)equed = 'N';
	else {
	    /* Column scaling */
	    for (j = 0; j < A->ncol; ++j) {
		cj = c[j];
		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
		    zd_mult(&Aval[i], &Aval[i], cj);
                }
	    }
	    *(unsigned char *)equed = 'C';
	}
    } else if (colcnd >= THRESH) {
	/* Row scaling, no column scaling */
	for (j = 0; j < A->ncol; ++j)
	    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
		irow = Astore->rowind[i];
		zd_mult(&Aval[i], &Aval[i], r[irow]);
	    }
	*(unsigned char *)equed = 'R';
    } else {
	/* Row and column scaling */
	for (j = 0; j < A->ncol; ++j) {
	    cj = c[j];
	    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
		irow = Astore->rowind[i];
		temp = cj * r[irow];
		zd_mult(&Aval[i], &Aval[i], temp);
	    }
	}
	*(unsigned char *)equed = 'B';
    }

    return;

} /* zlaqgs_dist */
Exemplo n.º 4
0
int
ilu_zcopy_to_ucol(
	      int	 jcol,	   /* in */
	      int	 nseg,	   /* in */
	      int	 *segrep,  /* in */
	      int	 *repfnz,  /* in */
	      int	 *perm_r,  /* in */
	      doublecomplex	 *dense,   /* modified - reset to zero on return */
	      int  	 drop_rule,/* in */
	      milu_t	 milu,	   /* in */
	      double	 drop_tol, /* in */
	      int	 quota,    /* maximum nonzero entries allowed */
	      doublecomplex	 *sum,	   /* out - the sum of dropped entries */
	      int	 *nnzUj,   /* in - out */
	      GlobalLU_t *Glu,	   /* modified */
	      double	 *work	   /* working space with minimum size n,
				    * used by the second dropping rule */
	      )
{
/*
 * Gather from SPA dense[*] to global ucol[*].
 */
    int       ksub, krep, ksupno;
    int       i, k, kfnz, segsze;
    int       fsupc, isub, irow;
    int       jsupno, nextu;
    int       new_next, mem_error;
    int       *xsup, *supno;
    int       *lsub, *xlsub;
    doublecomplex    *ucol;
    int       *usub, *xusub;
    int       nzumax;
    int       m; /* number of entries in the nonzero U-segments */
    register double d_max = 0.0, d_min = 1.0 / dmach("Safe minimum");
    register double tmp;
    doublecomplex zero = {0.0, 0.0};
    int i_1 = 1;

    xsup    = Glu->xsup;
    supno   = Glu->supno;
    lsub    = Glu->lsub;
    xlsub   = Glu->xlsub;
    ucol    = (doublecomplex *) Glu->ucol;
    usub    = Glu->usub;
    xusub   = Glu->xusub;
    nzumax  = Glu->nzumax;

    *sum = zero;
    if (drop_rule == NODROP) {
	drop_tol = -1.0, quota = Glu->n;
    }

    jsupno = supno[jcol];
    nextu  = xusub[jcol];
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) {
	krep = segrep[k--];
	ksupno = supno[krep];

	if ( ksupno != jsupno ) { /* Should go into ucol[] */
	    kfnz = repfnz[krep];
	    if ( kfnz != EMPTY ) {	/* Nonzero U-segment */

		fsupc = xsup[ksupno];
		isub = xlsub[fsupc] + kfnz - fsupc;
		segsze = krep - kfnz + 1;

		new_next = nextu + segsze;
		while ( new_next > nzumax ) {
		    if ((mem_error = zLUMemXpand(jcol, nextu, UCOL, &nzumax,
			    Glu)) != 0)
			return (mem_error);
		    ucol = Glu->ucol;
		    if ((mem_error = zLUMemXpand(jcol, nextu, USUB, &nzumax,
			    Glu)) != 0)
			return (mem_error);
		    usub = Glu->usub;
		    lsub = Glu->lsub;
		}

		for (i = 0; i < segsze; i++) {
		    irow = lsub[isub++];
         	    tmp = z_abs1(&dense[irow]);

		    /* first dropping rule */
		    if (quota > 0 && tmp >= drop_tol) {
			if (tmp > d_max) d_max = tmp;
			if (tmp < d_min) d_min = tmp;
			usub[nextu] = perm_r[irow];
			ucol[nextu] = dense[irow];
			nextu++;
		    } else {
			switch (milu) {
			    case SMILU_1:
			    case SMILU_2:
                                z_add(sum, sum, &dense[irow]);
				break;
			    case SMILU_3:
				/* *sum += fabs(dense[irow]);*/
				sum->r += tmp;
				break;
			    case SILU:
			    default:
				break;
			}
#ifdef DEBUG
			num_drop_U++;
#endif
		    }
		    dense[irow] = zero;
		}

	    }

	}

    } /* for each segment... */

    xusub[jcol + 1] = nextu;	  /* Close U[*,jcol] */
    m = xusub[jcol + 1] - xusub[jcol];

    /* second dropping rule */
    if (drop_rule & DROP_SECONDARY && m > quota) {
	register double tol = d_max;
	register int m0 = xusub[jcol] + m - 1;

	if (quota > 0) {
	    if (drop_rule & DROP_INTERP) {
		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
		tol = 1.0 / (d_max + (d_min - d_max) * quota / m);
	    } else {
                i_1 = xusub[jcol];
                for (i = 0; i < m; ++i, ++i_1) work[i] = z_abs1(&ucol[i_1]);
		tol = dqselect(m, work, quota);
#if 0
		A = &ucol[xusub[jcol]];
		for (i = 0; i < m; i++) work[i] = i;
		qsort(work, m, sizeof(int), _compare_);
		tol = fabs(usub[xusub[jcol] + work[quota]]);
#endif
	    }
	}
	for (i = xusub[jcol]; i <= m0; ) {
	    if (z_abs1(&ucol[i]) <= tol) {
		switch (milu) {
		    case SMILU_1:
		    case SMILU_2:
			z_add(sum, sum, &ucol[i]);
			break;
		    case SMILU_3:
			sum->r += tmp;
			break;
		    case SILU:
		    default:
			break;
		}
		ucol[i] = ucol[m0];
		usub[i] = usub[m0];
		m0--;
		m--;
#ifdef DEBUG
		num_drop_U++;
#endif
		xusub[jcol + 1]--;
		continue;
	    }
	    i++;
	}
    }

    if (milu == SMILU_2) {
        sum->r = z_abs1(sum); sum->i = 0.0;
    }
    if (milu == SMILU_3) sum->i = 0.0;

    *nnzUj += m;

    return 0;
}
Exemplo n.º 5
0
void
dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
       int *etree, char *equed, double *R, double *C,
       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
       SuperMatrix *B, SuperMatrix *X,
       double *recip_pivot_growth, double *rcond,
       GlobalLU_t *Glu, mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info)
{

    DNformat  *Bstore, *Xstore;
    double    *Bmat, *Xmat;
    int       ldb, ldx, nrhs, n;
    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
    SuperMatrix AC; /* Matrix postmultiplied by Pc */
    int       colequ, equil, nofact, notran, rowequ, permc_spec, mc64;
    trans_t   trant;
    char      norm[1];
    int       i, j, info1;
    double    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
    int       relax, panel_size;
    double    diag_pivot_thresh;
    double    t0;      /* temporary time */
    double    *utime;

    int *perm = NULL; /* permutation returned from MC64 */

    /* External functions */
    extern double dlangs(char *, SuperMatrix *);

    Bstore = B->Store;
    Xstore = X->Store;
    Bmat   = Bstore->nzval;
    Xmat   = Xstore->nzval;
    ldb    = Bstore->lda;
    ldx    = Xstore->lda;
    nrhs   = B->ncol;
    n      = B->nrow;

    *info = 0;
    nofact = (options->Fact != FACTORED);
    equil = (options->Equil == YES);
    notran = (options->Trans == NOTRANS);
    mc64 = (options->RowPerm == LargeDiag);
    if ( nofact ) {
	*(unsigned char *)equed = 'N';
	rowequ = FALSE;
	colequ = FALSE;
    } else {
	rowequ = strncmp(equed, "R", 1)==0 || strncmp(equed, "B", 1)==0;
	colequ = strncmp(equed, "C", 1)==0 || strncmp(equed, "B", 1)==0;
	smlnum = dmach("Safe minimum");  /* lamch_("Safe minimum"); */
	bignum = 1. / smlnum;
    }

    /* Test the input parameters */
    if (options->Fact != DOFACT && options->Fact != SamePattern &&
	options->Fact != SamePattern_SameRowPerm &&
	options->Fact != FACTORED &&
	options->Trans != NOTRANS && options->Trans != TRANS && 
	options->Trans != CONJ &&
	options->Equil != NO && options->Equil != YES)
	*info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
	      (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
	      A->Dtype != SLU_D || A->Mtype != SLU_GE )
	*info = -2;
    else if ( options->Fact == FACTORED &&
	     !(rowequ || colequ || strncmp(equed, "N", 1)==0) )
	*info = -6;
    else {
	if (rowequ) {
	    rcmin = bignum;
	    rcmax = 0.;
	    for (j = 0; j < A->nrow; ++j) {
		rcmin = SUPERLU_MIN(rcmin, R[j]);
		rcmax = SUPERLU_MAX(rcmax, R[j]);
	    }
	    if (rcmin <= 0.) *info = -7;
	    else if ( A->nrow > 0)
		rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
	    else rowcnd = 1.;
	}
	if (colequ && *info == 0) {
	    rcmin = bignum;
	    rcmax = 0.;
	    for (j = 0; j < A->nrow; ++j) {
		rcmin = SUPERLU_MIN(rcmin, C[j]);
		rcmax = SUPERLU_MAX(rcmax, C[j]);
	    }
	    if (rcmin <= 0.) *info = -8;
	    else if (A->nrow > 0)
		colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
	    else colcnd = 1.;
	}
	if (*info == 0) {
	    if ( lwork < -1 ) *info = -12;
	    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
		      B->Stype != SLU_DN || B->Dtype != SLU_D || 
		      B->Mtype != SLU_GE )
		*info = -13;
	    else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
		      (B->ncol != 0 && B->ncol != X->ncol) ||
		      X->Stype != SLU_DN ||
		      X->Dtype != SLU_D || X->Mtype != SLU_GE )
		*info = -14;
	}
    }
    if (*info != 0) {
	i = -(*info);
	input_error("dgsisx", &i);
	return;
    }

    /* Initialization for factor parameters */
    panel_size = sp_ienv(1);
    relax      = sp_ienv(2);
    diag_pivot_thresh = options->DiagPivotThresh;

    utime = stat->utime;

    /* Convert A to SLU_NC format when necessary. */
    if ( A->Stype == SLU_NR ) {
	NRformat *Astore = A->Store;
	AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
	dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
			       Astore->nzval, Astore->colind, Astore->rowptr,
			       SLU_NC, A->Dtype, A->Mtype);
	if ( notran ) { /* Reverse the transpose argument. */
	    trant = TRANS;
	    notran = 0;
	} else {
	    trant = NOTRANS;
	    notran = 1;
	}
    } else { /* A->Stype == SLU_NC */
	trant = options->Trans;
	AA = A;
    }

    if ( nofact ) {
	register int i, j;
	NCformat *Astore = AA->Store;
	int nnz = Astore->nnz;
	int *colptr = Astore->colptr;
	int *rowind = Astore->rowind;
	double *nzval = (double *)Astore->nzval;

	if ( mc64 ) {
	    t0 = SuperLU_timer_();
	    if ((perm = intMalloc(n)) == NULL)
		ABORT("SUPERLU_MALLOC fails for perm[]");

	    info1 = dldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C);

	    if (info1 != 0) { /* MC64 fails, call dgsequ() later */
		mc64 = 0;
		SUPERLU_FREE(perm);
		perm = NULL;
	    } else {
	        if ( equil ) {
	            rowequ = colequ = 1;
		    for (i = 0; i < n; i++) {
		        R[i] = exp(R[i]);
		        C[i] = exp(C[i]);
		    }
		    /* scale the matrix */
		    for (j = 0; j < n; j++) {
		        for (i = colptr[j]; i < colptr[j + 1]; i++) {
			    nzval[i] *= R[rowind[i]] * C[j];
		        }
		    }
	            *equed = 'B';
                }

                /* permute the matrix */
		for (j = 0; j < n; j++) {
		    for (i = colptr[j]; i < colptr[j + 1]; i++) {
			/*nzval[i] *= R[rowind[i]] * C[j];*/
			rowind[i] = perm[rowind[i]];
		    }
		}
	    }
	    utime[EQUIL] = SuperLU_timer_() - t0;
	}

	if ( !mc64 & equil ) { /* Only perform equilibration, no row perm */
	    t0 = SuperLU_timer_();
	    /* Compute row and column scalings to equilibrate the matrix A. */
	    dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);

	    if ( info1 == 0 ) {
		/* Equilibrate matrix A. */
		dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
		rowequ = strncmp(equed, "R", 1)==0 || strncmp(equed, "B", 1)==0;
		colequ = strncmp(equed, "C", 1)==0 || strncmp(equed, "B", 1)==0;
	    }
	    utime[EQUIL] = SuperLU_timer_() - t0;
	}
    }


    if ( nofact ) {
	
	t0 = SuperLU_timer_();
	/*
	 * Gnet column permutation vector perm_c[], according to permc_spec:
	 *   permc_spec = NATURAL:  natural ordering 
	 *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
	 *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
	 *   permc_spec = COLAMD:   approximate minimum degree column ordering
	 *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
	 */
	permc_spec = options->ColPerm;
	if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
	    get_perm_c(permc_spec, AA, perm_c);
	utime[COLPERM] = SuperLU_timer_() - t0;

	t0 = SuperLU_timer_();
	sp_preorder(options, AA, perm_c, etree, &AC);
	utime[ETREE] = SuperLU_timer_() - t0;

	/* Compute the LU factorization of A*Pc. */
	t0 = SuperLU_timer_();
	dgsitrf(options, &AC, relax, panel_size, etree, work, lwork,
                perm_c, perm_r, L, U, Glu, stat, info);
	utime[FACT] = SuperLU_timer_() - t0;

	if ( lwork == -1 ) {
	    mem_usage->total_needed = *info - A->ncol;
	    return;
	}

	if ( mc64 ) { /* Fold MC64's perm[] into perm_r[]. */
	    NCformat *Astore = AA->Store;
	    int nnz = Astore->nnz, *rowind = Astore->rowind;
	    int *perm_tmp, *iperm;
	    if ((perm_tmp = intMalloc(2*n)) == NULL)
		ABORT("SUPERLU_MALLOC fails for perm_tmp[]");
	    iperm = perm_tmp + n;
	    for (i = 0; i < n; ++i) perm_tmp[i] = perm_r[perm[i]];
	    for (i = 0; i < n; ++i) {
		perm_r[i] = perm_tmp[i];
		iperm[perm[i]] = i;
	    }

	    /* Restore A's original row indices. */
	    for (i = 0; i < nnz; ++i) rowind[i] = iperm[rowind[i]];

	    SUPERLU_FREE(perm); /* MC64 permutation */
	    SUPERLU_FREE(perm_tmp);
	}
    }

    if ( options->PivotGrowth ) {
	if ( *info > 0 ) return;

	/* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
	*recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U);
    }

    if ( options->ConditionNumber ) {
	/* Estimate the reciprocal of the condition number of A. */
	t0 = SuperLU_timer_();
	if ( notran ) {
	    *(unsigned char *)norm = '1';
	} else {
	    *(unsigned char *)norm = 'I';
	}
	anorm = dlangs(norm, AA);
	dgscon(norm, L, U, anorm, rcond, stat, &info1);
	utime[RCOND] = SuperLU_timer_() - t0;
    }

    if ( nrhs > 0 ) { /* Solve the system */
        double *rhs_work;

	/* Scale and permute the right-hand side if equilibration
           and permutation from MC64 were performed. */
	if ( notran ) {
	    if ( rowequ ) {
		for (j = 0; j < nrhs; ++j)
		    for (i = 0; i < n; ++i)
		        Bmat[i + j*ldb] *= R[i];
	    }
	} else if ( colequ ) {
	    for (j = 0; j < nrhs; ++j)
		for (i = 0; i < n; ++i) {
	            Bmat[i + j*ldb] *= C[i];
		}
	}

	/* Compute the solution matrix X. */
	for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
	    for (i = 0; i < B->nrow; i++)
		Xmat[i + j*ldx] = Bmat[i + j*ldb];

	t0 = SuperLU_timer_();
	dgstrs (trant, L, U, perm_c, perm_r, X, stat, &info1);
	utime[SOLVE] = SuperLU_timer_() - t0;

	/* Transform the solution matrix X to a solution of the original
	   system. */
	if ( notran ) {
	    if ( colequ ) {
		for (j = 0; j < nrhs; ++j)
		    for (i = 0; i < n; ++i) {
                        Xmat[i + j*ldx] *= C[i];
                    }
	    }
	} else { /* transposed system */
	    if ( rowequ ) {
	        for (j = 0; j < nrhs; ++j)
		    for (i = 0; i < A->nrow; ++i) {
              	        Xmat[i + j*ldx] *= R[i];
                    }
	    }
	}

    } /* end if nrhs > 0 */

    if ( options->ConditionNumber ) {
	/* The matrix is singular to working precision. */
	/* if ( *rcond < dlamch_("E") && *info == 0) *info = A->ncol + 1; */
	if ( *rcond < dmach("E") && *info == 0) *info = A->ncol + 1;
    }

    if ( nofact ) {
	ilu_dQuerySpace(L, U, mem_usage);
	Destroy_CompCol_Permuted(&AC);
    }
    if ( A->Stype == SLU_NR ) {
	Destroy_SuperMatrix_Store(AA);
	SUPERLU_FREE(AA);
    }

}
Exemplo n.º 6
0
/*! \brief
 *
 * <pre>
 *   Purpose   
 *   =======   
 *
 *   DGSRFS improves the computed solution to a system of linear   
 *   equations and provides error bounds and backward error estimates for 
 *   the solution.   
 *
 *   If equilibration was performed, the system becomes:
 *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
 *
 *   See supermatrix.h for the definition of 'SuperMatrix' structure.
 *
 *   Arguments   
 *   =========   
 *
 * trans   (input) trans_t
 *          Specifies the form of the system of equations:
 *          = NOTRANS: A * X = B  (No transpose)
 *          = TRANS:   A'* X = B  (Transpose)
 *          = CONJ:    A**H * X = B  (Conjugate transpose)
 *   
 *   A       (input) SuperMatrix*
 *           The original matrix A in the system, or the scaled A if
 *           equilibration was done. The type of A can be:
 *           Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_GE.
 *    
 *   L       (input) SuperMatrix*
 *	     The factor L from the factorization Pr*A*Pc=L*U. Use
 *           compressed row subscripts storage for supernodes, 
 *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
 * 
 *   U       (input) SuperMatrix*
 *           The factor U from the factorization Pr*A*Pc=L*U as computed by
 *           dgstrf(). Use column-wise storage scheme, 
 *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
 *
 *   perm_c  (input) int*, dimension (A->ncol)
 *	     Column permutation vector, which defines the 
 *           permutation matrix Pc; perm_c[i] = j means column i of A is 
 *           in position j in A*Pc.
 *
 *   perm_r  (input) int*, dimension (A->nrow)
 *           Row permutation vector, which defines the permutation matrix Pr;
 *           perm_r[i] = j means row i of A is in position j in Pr*A.
 *
 *   equed   (input) Specifies the form of equilibration that was done.
 *           = 'N': No equilibration.
 *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
 *           = 'C': Column equilibration, i.e., A was postmultiplied by
 *                  diag(C).
 *           = 'B': Both row and column equilibration, i.e., A was replaced 
 *                  by diag(R)*A*diag(C).
 *
 *   R       (input) double*, dimension (A->nrow)
 *           The row scale factors for A.
 *           If equed = 'R' or 'B', A is premultiplied by diag(R).
 *           If equed = 'N' or 'C', R is not accessed.
 * 
 *   C       (input) double*, dimension (A->ncol)
 *           The column scale factors for A.
 *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
 *           If equed = 'N' or 'R', C is not accessed.
 *
 *   B       (input) SuperMatrix*
 *           B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
 *           The right hand side matrix B.
 *           if equed = 'R' or 'B', B is premultiplied by diag(R).
 *
 *   X       (input/output) SuperMatrix*
 *           X has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
 *           On entry, the solution matrix X, as computed by dgstrs().
 *           On exit, the improved solution matrix X.
 *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
 *               in order to obtain the solution to the original system.
 *
 *   FERR    (output) double*, dimension (B->ncol)   
 *           The estimated forward error bound for each solution vector   
 *           X(j) (the j-th column of the solution matrix X).   
 *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
 *           is an estimated upper bound for the magnitude of the largest 
 *           element in (X(j) - XTRUE) divided by the magnitude of the   
 *           largest element in X(j).  The estimate is as reliable as   
 *           the estimate for RCOND, and is almost always a slight   
 *           overestimate of the true error.
 *
 *   BERR    (output) double*, dimension (B->ncol)   
 *           The componentwise relative backward error of each solution   
 *           vector X(j) (i.e., the smallest relative change in   
 *           any element of A or B that makes X(j) an exact solution).
 *
 *   stat     (output) SuperLUStat_t*
 *            Record the statistics on runtime and floating-point operation count.
 *            See util.h for the definition of 'SuperLUStat_t'.
 *
 *   info    (output) int*   
 *           = 0:  successful exit   
 *            < 0:  if INFO = -i, the i-th argument had an illegal value   
 *
 *    Internal Parameters   
 *    ===================   
 *
 *    ITMAX is the maximum number of steps of iterative refinement.   
 *
 * </pre>
 */
void
dgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
       int *perm_c, int *perm_r, char *equed, double *R, double *C,
       SuperMatrix *B, SuperMatrix *X, double *ferr, double *berr,
       SuperLUStat_t *stat, int *info)
{


#define ITMAX 5
    
    /* Table of constant values */
    int    ione = 1;
    double ndone = -1.;
    double done = 1.;
    
    /* Local variables */
    NCformat *Astore;
    double   *Aval;
    SuperMatrix Bjcol;
    DNformat *Bstore, *Xstore, *Bjcol_store;
    double   *Bmat, *Xmat, *Bptr, *Xptr;
    int      kase;
    double   safe1, safe2;
    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
    int      ldb, ldx, nrhs;
    double   s, xk, lstres, eps, safmin;
    char     transc[1];
    trans_t  transt;
    double   *work;
    double   *rwork;
    int      *iwork;
    int      isave[3];

    extern int dlacon2_(int *, double *, double *, int *, double *, int *, int []);
#ifdef _CRAY
    extern int SCOPY(int *, double *, int *, double *, int *);
    extern int SSAXPY(int *, double *, double *, int *, double *, int *);
#else
    extern int dcopy_(int *, double *, int *, double *, int *);
    extern int daxpy_(int *, double *, double *, int *, double *, int *);
#endif

    Astore = A->Store;
    Aval   = Astore->nzval;
    Bstore = B->Store;
    Xstore = X->Store;
    Bmat   = Bstore->nzval;
    Xmat   = Xstore->nzval;
    ldb    = Bstore->lda;
    ldx    = Xstore->lda;
    nrhs   = B->ncol;
    
    /* Test the input parameters */
    *info = 0;
    notran = (trans == NOTRANS);
    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
	      A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE )
	*info = -2;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
 	      L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
	*info = -3;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
 	      U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU )
	*info = -4;
    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
 	      B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
        *info = -10;
    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
 	      X->Stype != SLU_DN || X->Dtype != SLU_D || X->Mtype != SLU_GE )
	*info = -11;
    if (*info != 0) {
	i = -(*info);
	input_error("dgsrfs", &i);
	return;
    }

    /* Quick return if possible */
    if ( A->nrow == 0 || nrhs == 0) {
	for (j = 0; j < nrhs; ++j) {
	    ferr[j] = 0.;
	    berr[j] = 0.;
	}
	return;
    }

    rowequ = strncmp(equed, "R", 1)==0 || strncmp(equed, "B", 1)==0;
    colequ = strncmp(equed, "C", 1)==0 || strncmp(equed, "B", 1)==0;
    
    /* Allocate working space */
    work = doubleMalloc(2*A->nrow);
    rwork = (double *) SUPERLU_MALLOC( A->nrow * sizeof(double) );
    iwork = intMalloc(2*A->nrow);
    if ( !work || !rwork || !iwork ) 
        ABORT("Malloc fails for work/rwork/iwork.");
    
    if ( notran ) {
	*(unsigned char *)transc = 'N';
        transt = TRANS;
    } else if ( trans == TRANS ) {
	*(unsigned char *)transc = 'T';
	transt = NOTRANS;
    } else if ( trans == CONJ ) {
	*(unsigned char *)transc = 'C';
	transt = NOTRANS;
    }    

    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz     = A->ncol + 1;
    eps    = dmach("Epsilon");
    safmin = dmach("Safe minimum");

    /* Set SAFE1 essentially to be the underflow threshold times the
       number of additions in each row. */
    safe1  = nz * safmin;
    safe2  = safe1 / eps;

    /* Compute the number of nonzeros in each row (or column) of A */
    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
    if ( notran ) {
	for (k = 0; k < A->ncol; ++k)
	    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
		++iwork[Astore->rowind[i]];
    } else {
	for (k = 0; k < A->ncol; ++k)
	    iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
    }	

    /* Copy one column of RHS B into Bjcol. */
    Bjcol.Stype = B->Stype;
    Bjcol.Dtype = B->Dtype;
    Bjcol.Mtype = B->Mtype;
    Bjcol.nrow  = B->nrow;
    Bjcol.ncol  = 1;
    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
    Bjcol_store = Bjcol.Store;
    Bjcol_store->lda = ldb;
    Bjcol_store->nzval = work; /* address aliasing */
	
    /* Do for each right hand side ... */
    for (j = 0; j < nrhs; ++j) {
	count = 0;
	lstres = 3.;
	Bptr = &Bmat[j*ldb];
	Xptr = &Xmat[j*ldx];

	while (1) { /* Loop until stopping criterion is satisfied. */

	    /* Compute residual R = B - op(A) * X,   
	       where op(A) = A, A**T, or A**H, depending on TRANS. */
	    
#ifdef _CRAY
	    SCOPY(&A->nrow, Bptr, &ione, work, &ione);
#else
	    dcopy_(&A->nrow, Bptr, &ione, work, &ione);
#endif
	    sp_dgemv(transc, ndone, A, Xptr, ione, done, work, ione);

	    /* Compute componentwise relative backward error from formula 
	       max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
	       where abs(Z) is the componentwise absolute value of the matrix
	       or vector Z.  If the i-th component of the denominator is less
	       than SAFE2, then SAFE1 is added to the i-th component of the   
	       numerator before dividing. */

	    for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
	    
	    /* Compute abs(op(A))*abs(X) + abs(B). */
	    if ( notran ) {
		for (k = 0; k < A->ncol; ++k) {
		    xk = fabs( Xptr[k] );
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
			rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
		}
	    } else {  /* trans = TRANS or CONJ */
		for (k = 0; k < A->ncol; ++k) {
		    s = 0.;
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
			irow = Astore->rowind[i];
			s += fabs(Aval[i]) * fabs(Xptr[irow]);
		    }
		    rwork[k] += s;
		}
	    }
	    s = 0.;
	    for (i = 0; i < A->nrow; ++i) {
		if (rwork[i] > safe2) {
		    s = SUPERLU_MAX( s, fabs(work[i]) / rwork[i] );
		} else if ( rwork[i] != 0.0 ) {
                    /* Adding SAFE1 to the numerator guards against
                       spuriously zero residuals (underflow). */
		    s = SUPERLU_MAX( s, (safe1 + fabs(work[i])) / rwork[i] );
                }
                /* If rwork[i] is exactly 0.0, then we know the true 
                   residual also must be exactly 0.0. */
	    }
	    berr[j] = s;

	    /* Test stopping criterion. Continue iterating if   
	       1) The residual BERR(J) is larger than machine epsilon, and   
	       2) BERR(J) decreased by at least a factor of 2 during the   
	          last iteration, and   
	       3) At most ITMAX iterations tried. */

	    if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
		/* Update solution and try again. */
		dgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
#ifdef _CRAY
		SAXPY(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#else
		daxpy_(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#endif
		lstres = berr[j];
		++count;
	    } else {
		break;
	    }
        
	} /* end while */

	stat->RefineSteps = count;

	/* Bound error from formula:
	   norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
	   ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(op(A)) is the inverse of op(A)   
            abs(Z) is the componentwise absolute value of the matrix or
	       vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
          is incremented by SAFE1 if the i-th component of   
          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   

          Use DLACON2 to estimate the infinity-norm of the matrix   
             inv(op(A)) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
	
	for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
	
	/* Compute abs(op(A))*abs(X) + abs(B). */
	if ( notran ) {
	    for (k = 0; k < A->ncol; ++k) {
		xk = fabs( Xptr[k] );
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
		    rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
	    }
	} else {  /* trans == TRANS or CONJ */
	    for (k = 0; k < A->ncol; ++k) {
		s = 0.;
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
		    irow = Astore->rowind[i];
		    xk = fabs( Xptr[irow] );
		    s += fabs(Aval[i]) * xk;
		}
		rwork[k] += s;
	    }
	}
	
	for (i = 0; i < A->nrow; ++i)
	    if (rwork[i] > safe2)
		rwork[i] = fabs(work[i]) + (iwork[i]+1)*eps*rwork[i];
	    else
		rwork[i] = fabs(work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;

	kase = 0;

	do {
	    dlacon2_(&A->nrow, &work[A->nrow], work,
		    &iwork[A->nrow], &ferr[j], &kase, isave);
	    if (kase == 0) break;

	    if (kase == 1) {
		/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
		else if ( !notran && rowequ )
		    for (i = 0; i < A->nrow; ++i) work[i] *= R[i];
		
		dgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
		for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
	    } else {
		/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
		for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
		
		dgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
		else if ( !notran && rowequ )
		    for (i = 0; i < A->ncol; ++i) work[i] *= R[i];  
	    }
	    
	} while ( kase != 0 );


	/* Normalize error. */
	lstres = 0.;
 	if ( notran && colequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, C[i] * fabs( Xptr[i]) );
  	} else if ( !notran && rowequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, R[i] * fabs( Xptr[i]) );
	} else {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, fabs( Xptr[i]) );
	}
	if ( lstres != 0. )
	    ferr[j] /= lstres;

    } /* for each RHS j ... */
    
    SUPERLU_FREE(work);
    SUPERLU_FREE(rwork);
    SUPERLU_FREE(iwork);
    SUPERLU_FREE(Bjcol.Store);

    return;

} /* dgsrfs */
Exemplo n.º 7
0
int dgst04(int n, int nrhs, double *x, int ldx, double *xact,
	      int ldxact, double rcond, double *resid)
{
/*
    Purpose   
    =======   

    DGST04 computes the difference between a computed solution and the   
    true solution to a system of linear equations.   
    RESID =  ( norm(X-XACT) * RCOND ) / ( norm(XACT) * EPS ),   
    where RCOND is the reciprocal of the condition number and EPS is the 
    machine epsilon.   

    Arguments   
    =========   

    N       (input) INT   
            The number of rows of the matrices X and XACT.  N >= 0.   

    NRHS    (input) INT   
            The number of columns of the matrices X and XACT.  NRHS >= 0. 

    X       (input) DOUBLE PRECISION array, dimension (LDX,NRHS)   
            The computed solution vectors.  Each vector is stored as a   
            column of the matrix X.   

    LDX     (input) INT   
            The leading dimension of the array X.  LDX >= max(1,N).   

    XACT    (input) DOUBLE PRECISION array, dimension( LDX, NRHS )   
            The exact solution vectors.  Each vector is stored as a   
            column of the matrix XACT.   

    LDXACT  (input) INT   
            The leading dimension of the array XACT.  LDXACT >= max(1,N). 

    RCOND   (input) DOUBLE PRECISION   
            The reciprocal of the condition number of the coefficient   
            matrix in the system of equations.   

    RESID   (output) DOUBLE PRECISION   
            The maximum over the NRHS solution vectors of   
            ( norm(X-XACT) * RCOND ) / ( norm(XACT) * EPS )   

    ===================================================================== 
*/
    /* Table of constant values */
    int c__1 = 1;

    /* System generated locals */
    double d__1, d__2, d__3, d__4;

    /* Local variables */
    int    i, j, n__1;
    int    ix;
    double xnorm;
    double eps;
    double diffnm;

    /* Function prototypes */
    extern int idamax_(int *, double *, int *);

    /* Quick exit if N = 0 or NRHS = 0. */
   if ( n <= 0 || nrhs <= 0 ) {
	*resid = 0.;
	return 0;
    }

    /* Exit with RESID = 1/EPS if RCOND is invalid. */
    eps = dmach("Epsilon");
    if ( rcond < 0. ) {
	*resid = 1. / eps;
	return 0;
    }

    /* Compute the maximum of norm(X - XACT) / ( norm(XACT) * EPS )   
       over all the vectors X and XACT . */

    *resid = 0.;
    for (j = 0; j < nrhs; ++j) {
	n__1 = n;
	ix = idamax_(&n__1, &xact[j*ldxact], &c__1);
	xnorm = (d__1 = xact[ix-1 + j*ldxact], fabs(d__1));

	diffnm = 0.;
	for (i = 0; i < n; ++i) {
	    /* Computing MAX */
	    d__3 = diffnm;
	    d__4 = (d__1 = x[i+j*ldx]-xact[i+j*ldxact], fabs(d__1));
	    diffnm = SUPERLU_MAX(d__3,d__4);
	}
	if (xnorm <= 0.) {
	    if (diffnm > 0.) {
		*resid = 1. / eps;
	    }
	} else {
	    /* Computing MAX */
	    d__1 = *resid, d__2 = diffnm / xnorm * rcond;
	    *resid = SUPERLU_MAX(d__1,d__2);
	}
    }
    if (*resid * eps < 1.) {
	*resid /= eps;
    }

    return 0;

} /* dgst04_ */
Exemplo n.º 8
0
/* Subroutine */ int zlatb4_(char *path, integer *imat, integer *m, integer *
	n, char *type, integer *kl, integer *ku, doublereal *anorm, integer *
	mode, doublereal *cndnum, char *dist)
{
    /* Initialized data */

    static logical first = TRUE_;

    /* System generated locals */
    integer i__1;

    /* Builtin functions */
    double sqrt(doublereal);


    /* Local variables */
    static doublereal badc1, badc2, large, small;
    static char c2[2];
    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
    extern doublereal dmach(char *);
    extern logical lsamen_(integer *, char *, char *);
    static integer mat;
    static doublereal eps;


/*  -- LAPACK test routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    ZLATB4 sets parameters for the matrix generator based on the type of 
  
    matrix to be generated.   

    Arguments   
    =========   

    PATH    (input) CHARACTER*3   
            The LAPACK path name.   

    IMAT    (input) INTEGER   
            An integer key describing which matrix to generate for this   
            path.   

    M       (input) INTEGER   
            The number of rows in the matrix to be generated.   

    N       (input) INTEGER   
            The number of columns in the matrix to be generated.   

    TYPE    (output) CHARACTER*1   
            The type of the matrix to be generated:   
            = 'S':  symmetric matrix   
            = 'P':  symmetric positive (semi)definite matrix   
            = 'N':  nonsymmetric matrix   

    KL      (output) INTEGER   
            The lower band width of the matrix to be generated.   

    KU      (output) INTEGER   
            The upper band width of the matrix to be generated.   

    ANORM   (output) DOUBLE PRECISION   
            The desired norm of the matrix to be generated.  The diagonal 
  
            matrix of singular values or eigenvalues is scaled by this   
            value.   

    MODE    (output) INTEGER   
            A key indicating how to choose the vector of eigenvalues.   

    CNDNUM  (output) DOUBLE PRECISION   
            The desired condition number.   

    DIST    (output) CHARACTER*1   
            The type of distribution to be used by the random number   
            generator.   

    ===================================================================== 
  


       Set some constants for use in the subroutine. */

    if (first) {
	first = FALSE_;
	eps = dmach("Precision");
	badc2 = .1 / eps;
	badc1 = sqrt(badc2);
	small = dmach("Safe minimum");
	large = 1. / small;

/*        If it looks like we're on a Cray, take the square root of   
          SMALL and LARGE to avoid overflow and underflow problems. */

	dlabad_(&small, &large);
	small = small / eps * .25;
	large = 1. / small;
    }

/*    s_copy(c2, path + 1, 2L, 2L);*/
    strncpy(c2, path + 1, 2);

/*     Set some parameters we don't plan to change. */

    *(unsigned char *)dist = 'S';
    *mode = 3;

/*     xQR, xLQ, xQL, xRQ:  Set parameters to generate a general   
                            M x N matrix. */

    if (lsamen_(&c__2, c2, "QR") || lsamen_(&c__2, c2, "LQ") 
	    || lsamen_(&c__2, c2, "QL") || lsamen_(&c__2, c2, "RQ")) {

/*        Set TYPE, the type of matrix to be generated. */

	*(unsigned char *)type = 'N';

/*        Set the lower and upper bandwidths. */

	if (*imat == 1) {
	    *kl = 0;
	    *ku = 0;
	} else if (*imat == 2) {
	    *kl = 0;
/* Computing MAX */
	    i__1 = *n - 1;
	    *ku = max(i__1,0);
	} else if (*imat == 3) {
/* Computing MAX */
	    i__1 = *m - 1;
	    *kl = max(i__1,0);
	    *ku = 0;
	} else {
/* Computing MAX */
	    i__1 = *m - 1;
	    *kl = max(i__1,0);
/* Computing MAX */
	    i__1 = *n - 1;
	    *ku = max(i__1,0);
	}

/*        Set the condition number and norm. */

	if (*imat == 5) {
	    *cndnum = badc1;
	} else if (*imat == 6) {
	    *cndnum = badc2;
	} else {
	    *cndnum = 2.;
	}

	if (*imat == 7) {
	    *anorm = small;
	} else if (*imat == 8) {
	    *anorm = large;
	} else {
	    *anorm = 1.;
	}

    } else if (lsamen_(&c__2, c2, "GE")) {

/*        xGE:  Set parameters to generate a general M x N matrix.   

          Set TYPE, the type of matrix to be generated. */

	*(unsigned char *)type = 'N';

/*        Set the lower and upper bandwidths. */

	if (*imat == 1) {
	    *kl = 0;
	    *ku = 0;
	} else if (*imat == 2) {
	    *kl = 0;
/* Computing MAX */
	    i__1 = *n - 1;
	    *ku = max(i__1,0);
	} else if (*imat == 3) {
/* Computing MAX */
	    i__1 = *m - 1;
	    *kl = max(i__1,0);
	    *ku = 0;
	} else {
/* Computing MAX */
	    i__1 = *m - 1;
	    *kl = max(i__1,0);
/* Computing MAX */
	    i__1 = *n - 1;
	    *ku = max(i__1,0);
	}

/*        Set the condition number and norm. */

	if (*imat == 8) {
	    *cndnum = badc1;
	} else if (*imat == 9) {
	    *cndnum = badc2;
	} else {
	    *cndnum = 2.;
	}

	if (*imat == 10) {
	    *anorm = small;
	} else if (*imat == 11) {
	    *anorm = large;
	} else {
	    *anorm = 1.;
	}

    } else if (lsamen_(&c__2, c2, "GB")) {

/*        xGB:  Set parameters to generate a general banded matrix.   

          Set TYPE, the type of matrix to be generated. */

	*(unsigned char *)type = 'N';

/*        Set the condition number and norm. */

	if (*imat == 5) {
	    *cndnum = badc1;
	} else if (*imat == 6) {
	    *cndnum = badc2 * .1;
	} else {
	    *cndnum = 2.;
	}

	if (*imat == 7) {
	    *anorm = small;
	} else if (*imat == 8) {
	    *anorm = large;
	} else {
	    *anorm = 1.;
	}

    } else if (lsamen_(&c__2, c2, "GT")) {

/*        xGT:  Set parameters to generate a general tridiagonal matri
x.   

          Set TYPE, the type of matrix to be generated. */

	*(unsigned char *)type = 'N';

/*        Set the lower and upper bandwidths. */

	if (*imat == 1) {
	    *kl = 0;
	} else {
	    *kl = 1;
	}
	*ku = *kl;

/*        Set the condition number and norm. */

	if (*imat == 3) {
	    *cndnum = badc1;
	} else if (*imat == 4) {
	    *cndnum = badc2;
	} else {
	    *cndnum = 2.;
	}

	if (*imat == 5 || *imat == 11) {
	    *anorm = small;
	} else if (*imat == 6 || *imat == 12) {
	    *anorm = large;
	} else {
	    *anorm = 1.;
	}

    } else if (lsamen_(&c__2, c2, "PO") || lsamen_(&c__2, c2, "PP") || lsamen_(&c__2, c2, "HE") || lsamen_(&c__2, c2, 
	    "HP") || lsamen_(&c__2, c2, "SY") || lsamen_(&
	    c__2, c2, "SP")) {

/*        xPO, xPP, xHE, xHP, xSY, xSP: Set parameters to generate a 
  
          symmetric or Hermitian matrix.   

          Set TYPE, the type of matrix to be generated. */

	*(unsigned char *)type = *(unsigned char *)c2;

/*        Set the lower and upper bandwidths. */

	if (*imat == 1) {
	    *kl = 0;
	} else {
/* Computing MAX */
	    i__1 = *n - 1;
	    *kl = max(i__1,0);
	}
	*ku = *kl;

/*        Set the condition number and norm. */

	if (*imat == 6) {
	    *cndnum = badc1;
	} else if (*imat == 7) {
	    *cndnum = badc2;
	} else {
	    *cndnum = 2.;
	}

	if (*imat == 8) {
	    *anorm = small;
	} else if (*imat == 9) {
	    *anorm = large;
	} else {
	    *anorm = 1.;
	}

    } else if (lsamen_(&c__2, c2, "PB")) {

/*        xPB:  Set parameters to generate a symmetric band matrix.   

          Set TYPE, the type of matrix to be generated. */

	*(unsigned char *)type = 'P';

/*        Set the norm and condition number. */

	if (*imat == 5) {
	    *cndnum = badc1;
	} else if (*imat == 6) {
	    *cndnum = badc2;
	} else {
	    *cndnum = 2.;
	}

	if (*imat == 7) {
	    *anorm = small;
	} else if (*imat == 8) {
	    *anorm = large;
	} else {
	    *anorm = 1.;
	}

    } else if (lsamen_(&c__2, c2, "PT")) {

/*        xPT:  Set parameters to generate a symmetric positive defini
te   
          tridiagonal matrix. */

	*(unsigned char *)type = 'P';
	if (*imat == 1) {
	    *kl = 0;
	} else {
	    *kl = 1;
	}
	*ku = *kl;

/*        Set the condition number and norm. */

	if (*imat == 3) {
	    *cndnum = badc1;
	} else if (*imat == 4) {
	    *cndnum = badc2;
	} else {
	    *cndnum = 2.;
	}

	if (*imat == 5 || *imat == 11) {
	    *anorm = small;
	} else if (*imat == 6 || *imat == 12) {
	    *anorm = large;
	} else {
	    *anorm = 1.;
	}

    } else if (lsamen_(&c__2, c2, "TR") || lsamen_(&c__2, c2, "TP")) {

/*        xTR, xTP:  Set parameters to generate a triangular matrix   

          Set TYPE, the type of matrix to be generated. */

	*(unsigned char *)type = 'N';

/*        Set the lower and upper bandwidths. */

	mat = abs(*imat);
	if (mat == 1 || mat == 7) {
	    *kl = 0;
	    *ku = 0;
	} else if (*imat < 0) {
/* Computing MAX */
	    i__1 = *n - 1;
	    *kl = max(i__1,0);
	    *ku = 0;
	} else {
	    *kl = 0;
/* Computing MAX */
	    i__1 = *n - 1;
	    *ku = max(i__1,0);
	}

/*        Set the condition number and norm. */

	if (mat == 3 || mat == 9) {
	    *cndnum = badc1;
	} else if (mat == 4 || mat == 10) {
	    *cndnum = badc2;
	} else {
	    *cndnum = 2.;
	}

	if (mat == 5) {
	    *anorm = small;
	} else if (mat == 6) {
	    *anorm = large;
	} else {
	    *anorm = 1.;
	}

    } else if (lsamen_(&c__2, c2, "TB")) {

/*        xTB:  Set parameters to generate a triangular band matrix. 
  

          Set TYPE, the type of matrix to be generated. */

	*(unsigned char *)type = 'N';

/*        Set the norm and condition number. */

	if (*imat == 2 || *imat == 8) {
	    *cndnum = badc1;
	} else if (*imat == 3 || *imat == 9) {
	    *cndnum = badc2;
	} else {
	    *cndnum = 2.;
	}

	if (*imat == 4) {
	    *anorm = small;
	} else if (*imat == 5) {
	    *anorm = large;
	} else {
	    *anorm = 1.;
	}
    }
    if (*n <= 1) {
	*cndnum = 1.;
    }

    return 0;

/*     End of ZLATB4 */

} /* zlatb4_ */
Exemplo n.º 9
0
void
pdlaqgs(SuperMatrix *A, double *r, double *c, 
       double rowcnd, double colcnd, double amax, char *equed)
{

#define THRESH    (0.1)
    
    /* Local variables */
    NRformat_loc *Astore;
    double *Aval;
    int_t i, j, irow, jcol, m_loc;
    double large, small;

    /* Quick return if possible */
    if (A->nrow <= 0 || A->ncol <= 0) {
	*(unsigned char *)equed = 'N';
	return;
    }

    Astore = A->Store;
    Aval = Astore->nzval;
    m_loc = Astore->m_loc;
    
    /* Initialize LARGE and SMALL. */
    small = dmach("Safe minimum") / dmach("Precision");
    large = 1. / small;

    if (rowcnd >= THRESH && amax >= small && amax <= large) {
	if (colcnd >= THRESH)
	    *(unsigned char *)equed = 'N';
	else {
	    /* Column scaling */
	    irow = Astore->fst_row;
	    for (i = 0; i < m_loc; ++i) {
	        for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) {
		    jcol = Astore->colind[j];
		    Aval[j] *= c[jcol];
	      }
	      ++irow;
	    }
	    *(unsigned char *)equed = 'C';
	}
    } else if (colcnd >= THRESH) {
	/* Row scaling, no column scaling */
	irow = Astore->fst_row;
	for (i = 0; i < m_loc; ++i) {
	    for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j)
	        Aval[j] *= r[irow];
	    ++irow;
	}
	*(unsigned char *)equed = 'R';
    } else {
	/* Both row and column scaling */
	irow = Astore->fst_row;
	for (i = 0; i < m_loc; ++i) {
	    for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) {
	        jcol = Astore->colind[j];
	        Aval[j] = Aval[j] * r[irow] * c[jcol];
	    }
	    ++irow;
	}
	*(unsigned char *)equed = 'B';
    }

    return;

} /* pdlaqgs */
Exemplo n.º 10
0
double
dPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
             SuperMatrix *L, SuperMatrix *U)
{

    NCformat *Astore;
    SCformat *Lstore;
    NCformat *Ustore;
    double  *Aval, *Lval, *Uval;
    int      fsupc, nsupr, luptr, nz_in_U;
    int      i, j, k, oldcol;
    int      *inv_perm_c;
    double   rpg, maxaj, maxuj;
    double   smlnum;
    double   *luval;
   
    /* Get machine constants. */
    smlnum = dmach("S");
    rpg = 1. / smlnum;

    Astore = A->Store;
    Lstore = L->Store;
    Ustore = U->Store;
    Aval = Astore->nzval;
    Lval = Lstore->nzval;
    Uval = Ustore->nzval;
    
    inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int));
    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;

    for (k = 0; k <= Lstore->nsuper; ++k) {
	fsupc = L_FST_SUPC(k);
	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	luptr = L_NZ_START(fsupc);
	luval = &Lval[luptr];
	nz_in_U = 1;
	
	for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) {
	    maxaj = 0.;
            oldcol = inv_perm_c[j];
	    for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
		maxaj = SUPERLU_MAX( maxaj, fabs(Aval[i]) );
	
	    maxuj = 0.;
	    for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
		maxuj = SUPERLU_MAX( maxuj, fabs(Uval[i]) );
	    
	    /* Supernode */
	    for (i = 0; i < nz_in_U; ++i)
		maxuj = SUPERLU_MAX( maxuj, fabs(luval[i]) );

	    ++nz_in_U;
	    luval += nsupr;

	    if ( maxuj == 0. )
		rpg = SUPERLU_MIN( rpg, 1.);
	    else
		rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
	}
	
	if ( j >= ncols ) break;
    }

    SUPERLU_FREE(inv_perm_c);
    return (rpg);
}
Exemplo n.º 11
0
/* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs, 
	doublereal *sn, doublereal *r)
{
/*  -- LAPACK auxiliary routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    DLARTG generate a plane rotation so that   

       [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1.   
       [ -SN  CS  ]     [ G ]     [ 0 ]   

    This is a slower, more accurate version of the BLAS1 routine DROTG,   
    with the following other differences:   
       F and G are unchanged on return.   
       If G=0, then CS=1 and SN=0.   
       If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any   
          floating point operations (saves work in DBDSQR when   
          there are zeros on the diagonal).   

    If F exceeds G in magnitude, CS will be positive.   

    Arguments   
    =========   

    F       (input) DOUBLE PRECISION   
            The first component of vector to be rotated.   

    G       (input) DOUBLE PRECISION   
            The second component of vector to be rotated.   

    CS      (output) DOUBLE PRECISION   
            The cosine of the rotation.   

    SN      (output) DOUBLE PRECISION   
            The sine of the rotation.   

    R       (output) DOUBLE PRECISION   
            The nonzero component of the rotated vector.   

    ===================================================================== 
*/
    /* Initialized data */
    static logical first = TRUE_;
    /* System generated locals */
    integer i__1;
    doublereal d__1, d__2;
    /* Builtin functions */
    double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal);
    /* Local variables */
    static integer i;
    static doublereal scale;
    static integer count;
    static doublereal f1, g1, safmn2, safmx2;
    extern doublereal dmach(char *);
    static doublereal safmin, eps;



    if (first) {
	first = FALSE_;
	safmin = dmach("S");
	eps = dmach("E");
	d__1 = dmach("B");
	i__1 = (integer) (log(safmin / eps) / log(dmach("B")) / 2.);
	safmn2 = pow_di(&d__1, &i__1);
	safmx2 = 1. / safmn2;
    }
    if (*g == 0.) {
	*cs = 1.;
	*sn = 0.;
	*r = *f;
    } else if (*f == 0.) {
	*cs = 0.;
	*sn = 1.;
	*r = *g;
    } else {
	f1 = *f;
	g1 = *g;
/* Computing MAX */
	d__1 = abs(f1), d__2 = abs(g1);
	scale = max(d__1,d__2);
	if (scale >= safmx2) {
	    count = 0;
L10:
	    ++count;
	    f1 *= safmn2;
	    g1 *= safmn2;
/* Computing MAX */
	    d__1 = abs(f1), d__2 = abs(g1);
	    scale = max(d__1,d__2);
	    if (scale >= safmx2) {
		goto L10;
	    }
/* Computing 2nd power */
	    d__1 = f1;
/* Computing 2nd power */
	    d__2 = g1;
	    *r = sqrt(d__1 * d__1 + d__2 * d__2);
	    *cs = f1 / *r;
	    *sn = g1 / *r;
	    i__1 = count;
	    for (i = 1; i <= count; ++i) {
		*r *= safmx2;
/* L20: */
	    }
	} else if (scale <= safmn2) {
	    count = 0;
L30:
	    ++count;
	    f1 *= safmx2;
	    g1 *= safmx2;
/* Computing MAX */
	    d__1 = abs(f1), d__2 = abs(g1);
	    scale = max(d__1,d__2);
	    if (scale <= safmn2) {
		goto L30;
	    }
/* Computing 2nd power */
	    d__1 = f1;
/* Computing 2nd power */
	    d__2 = g1;
	    *r = sqrt(d__1 * d__1 + d__2 * d__2);
	    *cs = f1 / *r;
	    *sn = g1 / *r;
	    i__1 = count;
	    for (i = 1; i <= count; ++i) {
		*r *= safmn2;
/* L40: */
	    }
	} else {
/* Computing 2nd power */
	    d__1 = f1;
/* Computing 2nd power */
	    d__2 = g1;
	    *r = sqrt(d__1 * d__1 + d__2 * d__2);
	    *cs = f1 / *r;
	    *sn = g1 / *r;
	}
	if (abs(*f) > abs(*g) && *cs < 0.) {
	    *cs = -(*cs);
	    *sn = -(*sn);
	    *r = -(*r);
	}
    }
    return 0;

/*     End of DLARTG */

} /* dlartg_ */
Exemplo n.º 12
0
void
pzgsrfs_ABXglobal(int_t n, SuperMatrix *A, double anorm, LUstruct_t *LUstruct,
		  gridinfo_t *grid, doublecomplex *B, int_t ldb, doublecomplex *X, int_t ldx,
		  int nrhs, double *berr, SuperLUStat_t *stat, int *info)
{


#define ITMAX 20
    
    Glu_persist_t *Glu_persist = LUstruct->Glu_persist;
    LocalLU_t *Llu = LUstruct->Llu;
    /* 
     * Data structures used by matrix-vector multiply routine.
     */
    int_t  N_update; /* Number of variables updated on this process */
    int_t  *update;  /* vector elements (global index) updated 
			on this processor.                     */
    int_t  *bindx;
    doublecomplex *val;
    int_t *mv_sup_to_proc;  /* Supernode to process mapping in
			       matrix-vector multiply.  */
    /*-- end data structures for matrix-vector multiply --*/
    doublecomplex *b, *ax, *R, *B_col, *temp, *work, *X_col,
           *x_trs, *dx_trs;
    double *rwork;
    int_t notran;
    int_t count, ii, j, jj, k, knsupc, lk, lwork,
          nprow, nsupers, nz, p;
    int   i, iam, pkk;
    int_t *ilsum, *xsup;
    double eps, lstres;
    double s, safmin, safe1, safe2;

    /* NEW STUFF */
    int_t num_diag_procs, *diag_procs; /* Record diagonal process numbers. */
    int_t *diag_len; /* Length of the X vector on diagonal processes. */

    /*-- Function prototypes --*/
    extern void pzgstrs1(int_t, LUstruct_t *, gridinfo_t *,
			 doublecomplex *, int, SuperLUStat_t *, int *);
    
    /* Test the input parameters. */
    *info = 0;
    if ( n < 0 ) *info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
	      A->Stype != SLU_NCP || A->Dtype != SLU_Z || A->Mtype != SLU_GE )
	*info = -2;
    else if ( ldb < SUPERLU_MAX(0, n) ) *info = -10;
    else if ( ldx < SUPERLU_MAX(0, n) )	*info = -12;
    else if ( nrhs < 0 ) *info = -13;
    if (*info != 0) {
	i = -(*info);
	xerbla_("pzgsrfs_ABXglobal", &i);
	return;
    }

    /* Quick return if possible. */
    if ( n == 0 || nrhs == 0 ) {
	return;
    }

    /* Initialization. */
    iam = grid->iam;
    nprow = grid->nprow;
    nsupers = Glu_persist->supno[n-1] + 1;
    xsup = Glu_persist->xsup;
    ilsum = Llu->ilsum;
    notran = 1;

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Enter pzgsrfs_ABXglobal()");
#endif

    get_diag_procs(n, Glu_persist, grid, &num_diag_procs,
		   &diag_procs, &diag_len);
#if ( PRNTlevel>=1 )
    if ( !iam ) {
	printf(".. number of diag processes = " IFMT "\n", num_diag_procs);
	PrintInt10("diag_procs", num_diag_procs, diag_procs);
	PrintInt10("diag_len", num_diag_procs, diag_len);
    }
#endif

    if ( !(mv_sup_to_proc = intCalloc_dist(nsupers)) )
	ABORT("Calloc fails for mv_sup_to_proc[]");

    pzgsmv_AXglobal_setup(A, Glu_persist, grid, &N_update, &update,
		          &val, &bindx, mv_sup_to_proc);

    i = CEILING( nsupers, nprow ); /* Number of local block rows */
    ii = Llu->ldalsum + i * XK_H;
    k = SUPERLU_MAX(N_update, sp_ienv_dist(3));
    jj = diag_len[0];
    for (j = 1; j < num_diag_procs; ++j) jj = SUPERLU_MAX( jj, diag_len[j] );
    jj = SUPERLU_MAX( jj, N_update );
    lwork = N_update         /* For ax and R */
	  + ii               /* For dx_trs */
	  + ii               /* For x_trs */
          + k                /* For b */
	  + jj;              /* for temp */
    if ( !(work = doublecomplexMalloc_dist(lwork)) )
	ABORT("Malloc fails for work[]");
    ax = R = work;
    dx_trs = work + N_update;
    x_trs  = dx_trs + ii;
    b      = x_trs + ii;
    temp   = b + k;
    if ( !(rwork = SUPERLU_MALLOC(N_update * sizeof(double))) )
	ABORT("Malloc fails for rwork[]");

#if ( DEBUGlevel>=2 )
    {
	doublecomplex *dwork = doublecomplexMalloc_dist(n);
	for (i = 0; i < n; ++i) {
	    if ( i & 1 ) dwork[i].r = 1.;
	    else dwork[i].r = 2.;
	    dwork[i].i = 0.;
        }
	/* Check correctness of matrix-vector multiply. */
	pzgsmv_AXglobal(N_update, update, val, bindx, dwork, ax);
	PrintDouble5("Mult A*x", N_update, ax);
	SUPERLU_FREE(dwork);
    }
#endif


    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz     = A->ncol + 1;
    eps    = dmach("Epsilon");
    safmin = dmach("Safe minimum");

    /* Set SAFE1 essentially to be the underflow threshold times the
       number of additions in each row. */
    safe1  = nz * safmin;
    safe2  = safe1 / eps;

#if ( DEBUGlevel>=1 )
    if ( !iam ) printf(".. eps = %e\tanorm = %e\tsafe1 = %e\tsafe2 = %e\n",
		       eps, anorm, safe1, safe2);
#endif

    /* Do for each right-hand side ... */
    for (j = 0; j < nrhs; ++j) {
	count = 0;
	lstres = 3.;

	/* Copy X into x on the diagonal processes. */
	B_col = &B[j*ldb];
	X_col = &X[j*ldx];
	for (p = 0; p < num_diag_procs; ++p) {
	    pkk = diag_procs[p];
	    if ( iam == pkk ) {
		for (k = p; k < nsupers; k += num_diag_procs) {
		    knsupc = SuperSize( k );
		    lk = LBi( k, grid );
		    ii = ilsum[lk] + (lk+1)*XK_H;
		    jj = FstBlockC( k );
		    for (i = 0; i < knsupc; ++i) x_trs[i+ii] = X_col[i+jj];
		    dx_trs[ii-XK_H].r = k;/* Block number prepended in header. */
		}
	    }
	}
	/* Copy B into b distributed the same way as matrix-vector product. */
        if ( N_update ) ii = update[0];
	for (i = 0; i < N_update; ++i) b[i] = B_col[i + ii];

	while (1) { /* Loop until stopping criterion is satisfied. */

	    /* Compute residual R = B - op(A) * X,   
	       where op(A) = A, A**T, or A**H, depending on TRANS. */

	    /* Matrix-vector multiply. */
	    pzgsmv_AXglobal(N_update, update, val, bindx, X_col, ax);
	    
	    /* Compute residual. */
	    for (i = 0; i < N_update; ++i) z_sub(&R[i], &b[i], &ax[i]);

	    /* Compute abs(op(A))*abs(X) + abs(B). */
	    pzgsmv_AXglobal_abs(N_update, update, val, bindx, X_col, rwork);
	    for (i = 0; i < N_update; ++i) rwork[i] += slud_z_abs1(&b[i]);
	    
	    s = 0.0;
	    for (i = 0; i < N_update; ++i) {
		if ( rwork[i] > safe2 ) {
		    s = SUPERLU_MAX(s, slud_z_abs1(&R[i]) / rwork[i]);
		} else if ( rwork[i] != 0.0 ) {
		    s = SUPERLU_MAX(s, (safe1 + slud_z_abs1(&R[i])) / rwork[i]);
                }
                /* If temp[i] is exactly 0.0 (computed by PxGSMV), then
                   we know the true residual also must be exactly 0.0. */
	    }
	    MPI_Allreduce( &s, &berr[j], 1, MPI_DOUBLE, MPI_MAX, grid->comm );
		
#if ( PRNTlevel>= 1 )
	    if ( !iam )
		printf("(%2d) .. Step " IFMT ": berr[j] = %e\n", iam, count, berr[j]);
#endif
	    if ( berr[j] > eps && berr[j] * 2 <= lstres && count < ITMAX ) {
		/* Compute new dx. */
		redist_all_to_diag(n, R, Glu_persist, Llu, grid,
				   mv_sup_to_proc, dx_trs);
		pzgstrs1(n, LUstruct, grid, dx_trs, 1, stat, info);

		/* Update solution. */
		for (p = 0; p < num_diag_procs; ++p) 
		    if ( iam == diag_procs[p] )
			for (k = p; k < nsupers; k += num_diag_procs) {
			    lk = LBi( k, grid );
			    ii = ilsum[lk] + (lk+1)*XK_H;
			    knsupc = SuperSize( k );
			    for (i = 0; i < knsupc; ++i)
				z_add(&x_trs[i + ii], &x_trs[i + ii], 
				      &dx_trs[i + ii]);
			}
		lstres = berr[j];
		++count;
		/* Transfer x_trs (on diagonal processes) into X
		   (on all processes). */
		gather_1rhs_diag_to_all(n, x_trs, Glu_persist, Llu, grid, 
					num_diag_procs, diag_procs, diag_len,
					X_col, temp);
	    } else {
		break;
	    }
	} /* end while */

	stat->RefineSteps = count;

    } /* for j ... */


    /* Deallocate storage used by matrix-vector multiplication. */
    SUPERLU_FREE(diag_procs);
    SUPERLU_FREE(diag_len);
    if ( N_update ) {
	SUPERLU_FREE(update);
	SUPERLU_FREE(bindx);
	SUPERLU_FREE(val);
    }
    SUPERLU_FREE(mv_sup_to_proc);
    SUPERLU_FREE(work);
    SUPERLU_FREE(rwork);

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Exit pzgsrfs_ABXglobal()");
#endif

} /* PZGSRFS_ABXGLOBAL */
Exemplo n.º 13
0
int zgst07(trans_t trans, int n, int nrhs, SuperMatrix *A, doublecomplex *b, 
	      int ldb, doublecomplex *x, int ldx, doublecomplex *xact, 
              int ldxact, double *ferr, double *berr, double *reslts)
{
/*
    Purpose   
    =======   

    ZGST07 tests the error bounds from iterative refinement for the   
    computed solution to a system of equations op(A)*X = B, where A is a 
    general n by n matrix and op(A) = A or A**T, depending on TRANS.
    
    RESLTS(1) = test of the error bound   
              = norm(X - XACT) / ( norm(X) * FERR )   
    A large value is returned if this ratio is not less than one.   

    RESLTS(2) = residual from the iterative refinement routine   
              = the maximum of BERR / ( (n+1)*EPS + (*) ), where   
                (*) = (n+1)*UNFL / (min_i (abs(op(A))*abs(X) +abs(b))_i ) 

    Arguments   
    =========   

    TRANS   (input) trans_t
            Specifies the form of the system of equations.   
            = NOTRANS:  A *x = b   
            = TRANS  :  A'*x = b, where A' is the transpose of A   
            = CONJ   :  A'*x = b, where A' is the transpose of A   

    N       (input) INT
            The number of rows of the matrices X and XACT.  N >= 0.   

    NRHS    (input) INT   
            The number of columns of the matrices X and XACT.  NRHS >= 0. 
  

    A       (input) SuperMatrix *, dimension (A->nrow, A->ncol)
            The original n by n matrix A.   

    B       (input) DOUBLE COMPLEX PRECISION array, dimension (LDB,NRHS)   
            The right hand side vectors for the system of linear   
            equations.   

    LDB     (input) INT   
            The leading dimension of the array B.  LDB >= max(1,N).   

    X       (input) DOUBLE COMPLEX PRECISION array, dimension (LDX,NRHS)   
            The computed solution vectors.  Each vector is stored as a   
            column of the matrix X.   

    LDX     (input) INT   
            The leading dimension of the array X.  LDX >= max(1,N).   

    XACT    (input) DOUBLE COMPLEX PRECISION array, dimension (LDX,NRHS)   
            The exact solution vectors.  Each vector is stored as a   
            column of the matrix XACT.   

    LDXACT  (input) INT   
            The leading dimension of the array XACT.  LDXACT >= max(1,N). 
  

    FERR    (input) DOUBLE COMPLEX PRECISION array, dimension (NRHS)   
            The estimated forward error bounds for each solution vector   
            X.  If XTRUE is the true solution, FERR bounds the magnitude 
            of the largest entry in (X - XTRUE) divided by the magnitude 
            of the largest entry in X.   

    BERR    (input) DOUBLE COMPLEX PRECISION array, dimension (NRHS)   
            The componentwise relative backward error of each solution   
            vector (i.e., the smallest relative change in any entry of A 
  
            or B that makes X an exact solution).   

    RESLTS  (output) DOUBLE PRECISION array, dimension (2)   
            The maximum over the NRHS solution vectors of the ratios:   
            RESLTS(1) = norm(X - XACT) / ( norm(X) * FERR )   
            RESLTS(2) = BERR / ( (n+1)*EPS + (*) )   

    ===================================================================== 
*/
    
    /* Table of constant values */
    int c__1 = 1;

    /* System generated locals */
    double d__1, d__2;
    double d__3, d__4;

    /* Local variables */
    double diff, axbi;
    int    imax, irow, n__1;
    int    i, j, k;
    double unfl, ovfl;
    double xnorm;
    double errbnd;
    int    notran;
    double eps, tmp;
    double *rwork;
    doublecomplex *Aval;
    NCformat *Astore;

    /* Function prototypes */
    extern int    lsame_(char *, char *);
    extern int    izamax_(int *, doublecomplex *, int *);


    /* Quick exit if N = 0 or NRHS = 0. */
    if ( n <= 0 || nrhs <= 0 ) {
	reslts[0] = 0.;
	reslts[1] = 0.;
	return 0;
    }

    eps = dmach("Epsilon");
    unfl = dmach("Safe minimum");
    ovfl   = 1. / unfl;
    notran = (trans == NOTRANS);

    rwork  = (double *) SUPERLU_MALLOC(n*sizeof(double));
    if ( !rwork ) ABORT("SUPERLU_MALLOC fails for rwork");
    Astore = A->Store;
    Aval   = (doublecomplex *) Astore->nzval;
    
    /* Test 1:  Compute the maximum of   
       norm(X - XACT) / ( norm(X) * FERR )   
       over all the vectors X and XACT using the infinity-norm. */

    errbnd = 0.;
    for (j = 0; j < nrhs; ++j) {
	n__1 = n;
	imax = izamax_(&n__1, &x[j*ldx], &c__1);
	d__1 = (d__2 = x[imax-1 + j*ldx].r, fabs(d__2)) + 
               (d__3 = x[imax-1 + j*ldx].i, fabs(d__3));
	xnorm = SUPERLU_MAX(d__1,unfl);
	diff = 0.;
	for (i = 0; i < n; ++i) {
	    d__1 = (d__2 = x[i+j*ldx].r - xact[i+j*ldxact].r, fabs(d__2)) +
                   (d__3 = x[i+j*ldx].i - xact[i+j*ldxact].i, fabs(d__3));
	    diff = SUPERLU_MAX(diff, d__1);
	}

	if (xnorm > 1.) {
	    goto L20;
	} else if (diff <= ovfl * xnorm) {
	    goto L20;
	} else {
	    errbnd = 1. / eps;
	    goto L30;
	}

L20:
#if 0	
	if (diff / xnorm <= ferr[j]) {
	    d__1 = diff / xnorm / ferr[j];
	    errbnd = SUPERLU_MAX(errbnd,d__1);
	} else {
	    errbnd = 1. / eps;
	}
#endif
	d__1 = diff / xnorm / ferr[j];
	errbnd = SUPERLU_MAX(errbnd,d__1);
	/*printf("Ferr: %f\n", errbnd);*/
L30:
	;
    }
    reslts[0] = errbnd;

    /* Test 2: Compute the maximum of BERR / ( (n+1)*EPS + (*) ), where 
       (*) = (n+1)*UNFL / (min_i (abs(op(A))*abs(X) + abs(b))_i ) */

    for (k = 0; k < nrhs; ++k) {
	for (i = 0; i < n; ++i) 
            rwork[i] = (d__1 = b[i + k*ldb].r, fabs(d__1)) +
                       (d__2 = b[i + k*ldb].i, fabs(d__2));
	if ( notran ) {
	    for (j = 0; j < n; ++j) {
		tmp = (d__1 = x[j + k*ldx].r, fabs(d__1)) +
                      (d__2 = x[j + k*ldx].i, fabs(d__2));
		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
		    d__1 = (d__2 = Aval[i].r, fabs(d__2)) +
                           (d__3 = Aval[i].i, fabs(d__3));
		    rwork[Astore->rowind[i]] += d__1 * tmp;
                }
	    }
	} else {
	    for (j = 0; j < n; ++j) {
		tmp = 0.;
		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
		    irow = Astore->rowind[i];
		    d__1 = (d__2 = x[irow + k*ldx].r, fabs(d__2)) +
                           (d__3 = x[irow + k*ldx].i, fabs(d__3));
                    d__2 = (d__3 = Aval[i].r, fabs(d__3)) +
                           (d__4 = Aval[i].i, fabs(d__4));
		    tmp += d__2 * d__1;
		}
		rwork[j] += tmp;
	    }
	}

	axbi = rwork[0];
	for (i = 1; i < n; ++i) axbi = SUPERLU_MIN(axbi, rwork[i]);
	
	/* Computing MAX */
	d__1 = axbi, d__2 = (n + 1) * unfl;
	tmp = berr[k] / ((n + 1) * eps + (n + 1) * unfl / SUPERLU_MAX(d__1,d__2));
	
	if (k == 0) {
	    reslts[1] = tmp;
	} else {
	    reslts[1] = SUPERLU_MAX(reslts[1],tmp);
	}
    }

    SUPERLU_FREE(rwork);
    return 0;

} /* zgst07 */