Esempio n. 1
0
/* 
 * Check the inf-norm of the error vector 
 */
void pdinf_norm_error(int iam, int_t n, int_t nrhs, double x[], int_t ldx,
		      double xtrue[], int_t ldxtrue, gridinfo_t *grid) 
{
    double err, xnorm, temperr, tempxnorm;
    double *x_work, *xtrue_work;
    int i, j;

    for (j = 0; j < nrhs; j++) {
      x_work = &x[j*ldx];
      xtrue_work = &xtrue[j*ldxtrue];
      err = xnorm = 0.0;
      for (i = 0; i < n; i++) {
	err = SUPERLU_MAX(err, fabs(x_work[i] - xtrue_work[i]));
	xnorm = SUPERLU_MAX(xnorm, fabs(x_work[i]));
      }

      /* get the golbal max err & xnrom */
      temperr = err;
      tempxnorm = xnorm;
      MPI_Allreduce( &temperr, &err, 1, MPI_DOUBLE, MPI_MAX, grid->comm);
      MPI_Allreduce( &tempxnorm, &xnorm, 1, MPI_DOUBLE, MPI_MAX, grid->comm);

      err = err / xnorm;
      if ( !iam ) printf("\tSol %2d: ||X-Xtrue||/||X|| = %e\n", j, err);
    }
}
Esempio n. 2
0
/*! \brief

<pre> 
    Purpose   
    =======   

    DLANGS_dist returns the value of the one norm, or the Frobenius norm, or 
    the infinity norm, or the element of largest absolute value of a 
    real matrix A.   

    Description   
    ===========   

    DLANGE returns the value   

       DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
                (   
                ( norm1(A),         NORM = '1', 'O' or 'o'   
                (   
                ( normI(A),         NORM = 'I' or 'i'   
                (   
                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   

    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   

    Arguments   
    =========   

    NORM    (input) CHARACTER*1   
            Specifies the value to be returned in DLANGE as described above.   
    A       (input) SuperMatrix*
            The M by N sparse matrix A. 

   ===================================================================== 
</pre>
*/
double dlangs_dist(char *norm, SuperMatrix *A)
{

    
    /* Local variables */
    NCformat *Astore;
    double   *Aval;
    int_t    i, j, irow;
    double   value=0., sum;
    double   *rwork;

    Astore = (NCformat *) A->Store;
    Aval   = (double *) Astore->nzval;
    
    if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) {
	value = 0.;
	
    } else if ( strncmp(norm, "M", 1)==0 ) {
	/* Find max(abs(A(i,j))). */
	value = 0.;
	for (j = 0; j < A->ncol; ++j)
	    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
		value = SUPERLU_MAX( value, fabs( Aval[i]) );
	
    } else if ( strncmp(norm, "O", 1)==0 || *(unsigned char *)norm == '1') {
	/* Find norm1(A). */
	value = 0.;
	for (j = 0; j < A->ncol; ++j) {
	    sum = 0.;
	    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) 
		sum += fabs(Aval[i]);
	    value = SUPERLU_MAX(value, sum);
	}
	
    } else if ( strncmp(norm, "I", 1)==0 ) {
	/* Find normI(A). */
	if ( !(rwork = (double *) SUPERLU_MALLOC(A->nrow * sizeof(double))) )
	    ABORT("SUPERLU_MALLOC fails for rwork.");
	for (i = 0; i < A->nrow; ++i) rwork[i] = 0.;
	for (j = 0; j < A->ncol; ++j)
	    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) {
		irow = Astore->rowind[i];
		rwork[irow] += fabs(Aval[i]);
	    }
	value = 0.;
	for (i = 0; i < A->nrow; ++i)
	    value = SUPERLU_MAX(value, rwork[i]);
	
	SUPERLU_FREE (rwork);
	
    } else if ( strncmp(norm, "F", 1)==0 || strncmp(norm, "E", 1)==0 ) {
	/* Find normF(A). */
	ABORT("Not implemented.");
    } else
	ABORT("Illegal norm specified.");

    return (value);

} /* dlangs_dist */
/*
 * Convert a full matrix into a sparse matrix format. 
 */
int
sp_dconvert(int m, int n, double *A, int lda, int kl, int ku,
	   double *a, int *asub, int *xa, int *nnz)
{
    int     lasta = 0;
    int     i, j, ilow, ihigh;
    int     *row;
    double  *val;

    for (j = 0; j < n; ++j) {
	xa[j] = lasta;
	val = &a[xa[j]];
	row = &asub[xa[j]];

	ilow = SUPERLU_MAX(0, j - ku);
	ihigh = SUPERLU_MIN(n-1, j + kl);
	for (i = ilow; i <= ihigh; ++i) {
	    val[i-ilow] = A[i + j*lda];
	    row[i-ilow] = i;
	}
	lasta += ihigh - ilow + 1;
    }

    xa[n] = *nnz = lasta;
    return 0;
}
Esempio n. 4
0
void log_memory(long long cur_bytes, SuperLUStat_t *stat) {
    stat->current_buffer += (float) cur_bytes;
    if (cur_bytes > 0) {
	stat->peak_buffer = 
	    SUPERLU_MAX(stat->peak_buffer, stat->current_buffer);
    }
}
Esempio n. 5
0
float sqselect(int n, float A[], int k)
{
    register int i, j, p;
    register float val;

    k = SUPERLU_MAX(k, 0);
    k = SUPERLU_MIN(k, n - 1);
    while (n > 1)
    {
	i = 0; j = n-1;
	p = j; val = A[p];
	while (i < j)
	{
	    for (; A[i] >= val && i < p; i++);
	    if (A[i] < val) { A[p] = A[i]; p = i; }
	    for (; A[j] <= val && j > p; j--);
	    if (A[j] > val) { A[p] = A[j]; p = j; }
	}
	A[p] = val;
	if (p == k) return val;
	else if (p > k) n = p;
	else
	{
	    p++;
	    n -= p; A += p; k -= p;
	}
    }

    return A[0];
}
Esempio n. 6
0
doublecomplex *doublecomplexMalloc_dist(int_t n)
{
    doublecomplex *buf;
    buf = (doublecomplex *) 
	SUPERLU_MALLOC(SUPERLU_MAX(1, n) * sizeof(doublecomplex)); 
    return (buf);
}
Esempio n. 7
0
/*
 * Generate a banded square matrix A, with dimension n and semi-bandwidth b.
 */
void
zband(int n, int b, int nonz, doublecomplex **nzval, int **rowind, int **colptr)
{
    int iseed[] = {1992,1993,1994,1995};    
    register int i, j, ub, lb, ilow, ihigh, lasta = 0;
    doublecomplex *a;
    int    *asub, *xa;
    doublecomplex *val;
    int    *row;
    extern double dlaran_();
    
    printf("A banded matrix.");
    zallocateA(n, nonz, nzval, rowind, colptr); /* Allocate storage */
    a    = *nzval;
    asub = *rowind;
    xa   = *colptr;
    ub = lb = b;
    
    for (i = 0; i < 4; ++i) iseed[i] = abs( iseed[i] ) % 4096;
    if ( iseed[3] % 2 != 1 ) ++iseed[3];

    for (j = 0; j < n; ++j) {
	xa[j] = lasta;
	val = &a[lasta];
	row = &asub[lasta];
	ilow = SUPERLU_MAX(0, j - ub);
	ihigh = SUPERLU_MIN(n-1, j + lb);
	for (i = ilow; i <= ihigh; ++i) {
	    val[i-ilow].r = dlaran_(iseed);
	    row[i-ilow] = i;
	}
	lasta += ihigh - ilow + 1;
    } /* for j ... */
    xa[n] = lasta;
}
Esempio n. 8
0
/*! \brief
 *
 * <pre>
 * Purpose
 * =======
 *
 * GetDiagU extracts the main diagonal of matrix U of the LU factorization.
 *
 * Arguments
 * =========
 *
 * n        (input) int
 *          Dimension of the matrix.
 *
 * LUstruct (input) LUstruct_t*
 *          The data structures to store the distributed L and U factors.
 *          see superlu_ddefs.h for its definition.
 *
 * grid     (input) gridinfo_t*
 *          The 2D process mesh. It contains the MPI communicator, the number
 *          of process rows (NPROW), the number of process columns (NPCOL),
 *          and my process rank. It is an input argument to all the
 *          parallel routines.
 *
 * diagU    (output) double*, dimension (n)
 *          The main diagonal of matrix U.
 *          On exit, it is available on all processes.
 *
 *
 * Note
 * ====
 *
 * The diagonal blocks of the L and U matrices are stored in the L
 * data structures, and are on the diagonal processes of the
 * 2D process grid.
 *
 * This routine is modified from gather_diag_to_all() in pzgstrs_Bglobal.c.
 * </pre>
 */
void pzGetDiagU(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid,
                  doublecomplex *diagU)
{

    int_t *xsup;
    int iam, knsupc, pkk;
    int nsupr; /* number of rows in the block L(:,k) (LDA) */
    int_t i, j, jj, k, lk, lwork, nsupers, p;
    int_t num_diag_procs, *diag_procs, *diag_len;
    Glu_persist_t *Glu_persist = LUstruct->Glu_persist;
    LocalLU_t *Llu = LUstruct->Llu;
    doublecomplex *zblock, *zwork, *lusup;

    iam = grid->iam;
    nsupers = Glu_persist->supno[n-1] + 1;
    xsup = Glu_persist->xsup;

    get_diag_procs(n, Glu_persist, grid, &num_diag_procs,
		   &diag_procs, &diag_len);
    jj = diag_len[0];
    for (j = 1; j < num_diag_procs; ++j) jj = SUPERLU_MAX( jj, diag_len[j] );
    if ( !(zwork = doublecomplexMalloc_dist(jj)) ) ABORT("Malloc fails for zwork[]");

    for (p = 0; p < num_diag_procs; ++p) {
	pkk = diag_procs[p];
	if ( iam == pkk ) {
	    /* Copy diagonal into buffer dwork[]. */
	    lwork = 0;
	    for (k = p; k < nsupers; k += num_diag_procs) {
		knsupc = SuperSize( k );
		lk = LBj( k, grid );
		nsupr = Llu->Lrowind_bc_ptr[lk][1]; /* LDA of lusup[] */
		lusup = Llu->Lnzval_bc_ptr[lk];
		for (i = 0; i < knsupc; ++i) /* Copy the diagonal. */
		    zwork[lwork+i] = lusup[i*(nsupr+1)];
		lwork += knsupc;
	    }
	    MPI_Bcast( zwork, lwork, SuperLU_MPI_DOUBLE_COMPLEX, pkk, grid->comm );
	} else {
	    MPI_Bcast( zwork, diag_len[p], SuperLU_MPI_DOUBLE_COMPLEX, pkk, grid->comm );
	}

	/* Scatter zwork[] into global diagU vector. */
	lwork = 0;
	for (k = p; k < nsupers; k += num_diag_procs) {
	    knsupc = SuperSize( k );
	    zblock = &diagU[FstBlockC( k )];
	    for (i = 0; i < knsupc; ++i) zblock[i] = zwork[lwork+i];
	    lwork += knsupc;
	}
    } /* for p = ... */

    SUPERLU_FREE(diag_procs);
    SUPERLU_FREE(diag_len);
    SUPERLU_FREE(zwork);
}
Esempio n. 9
0
double *doubleCalloc_dist(int_t n)
{
    double *buf;
    register int_t i;
    double zero = 0.0;
    buf = (double *) SUPERLU_MALLOC( SUPERLU_MAX(1, n) * sizeof(double));
    if ( !buf ) return (buf);
    for (i = 0; i < n; ++i) buf[i] = zero;
    return (buf);
}
Esempio n. 10
0
/* 
 * Check the inf-norm of the error vector 
 */
void dinf_norm_error_dist(int_t n, int_t nrhs, double *x, int_t ldx,
			  double *xtrue, int_t ldxtrue,
                          gridinfo_t *grid)
{
    double err, xnorm;
    double *x_work, *xtrue_work;
    int i, j;

    for (j = 0; j < nrhs; j++) {
      x_work = &x[j*ldx];
      xtrue_work = &xtrue[j*ldxtrue];
      err = xnorm = 0.0;
      for (i = 0; i < n; i++) {
	err = SUPERLU_MAX(err, fabs(x_work[i] - xtrue_work[i]));
	xnorm = SUPERLU_MAX(xnorm, fabs(x_work[i]));
      }
      err = err / xnorm;
      printf("(%d) .. ||X-Xtrue||/||X|| = %e\n", grid->iam, err);
    }
}
Esempio n. 11
0
doublecomplex *doublecomplexCalloc_dist(int_t n)
{
    doublecomplex *buf;
    register int_t i;
    doublecomplex zero = {0.0, 0.0};
    buf = (doublecomplex *) 
	SUPERLU_MALLOC(SUPERLU_MAX(1, n) * sizeof(doublecomplex));
    if ( !buf ) return (buf);
    for (i = 0; i < n; ++i) buf[i] = zero;
    return (buf);
}
Esempio n. 12
0
/* 
 * Check the inf-norm of the error vector 
 */
void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
{
    DNformat *Xstore;
    float err, xnorm;
    float *Xmat, *soln_work;
    int i, j;

    Xstore = X->Store;
    Xmat = Xstore->nzval;

    for (j = 0; j < nrhs; j++) {
      soln_work = &Xmat[j*Xstore->lda];
      err = xnorm = 0.0;
      for (i = 0; i < X->nrow; i++) {
	err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
	xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
      }
      err = err / xnorm;
      printf("||X - Xtrue||/||X|| = %e\n", err);
    }
}
Esempio n. 13
0
/*! \brief Set up pointers for real working arrays.
 */
void
sSetRWork(int m, int panel_size, float *dworkptr,
	 float **dense, float **tempv)
{
    float zero = 0.0;

    int maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
        rowblk   = sp_ienv(4);
    *dense = dworkptr;
    *tempv = *dense + panel_size*m;
    sfill (*dense, m * panel_size, zero);
    sfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
}
Esempio n. 14
0
/*! \brief Set up pointers for real working arrays.
 */
void
zSetRWork(int m, int panel_size, doublecomplex *dworkptr,
	 doublecomplex **dense, doublecomplex **tempv)
{
    doublecomplex zero = {0.0, 0.0};

    int maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
        rowblk   = sp_ienv(4);
    *dense = dworkptr;
    *tempv = *dense + panel_size*m;
    zfill (*dense, m * panel_size, zero);
    zfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
}
Esempio n. 15
0
/* 
 * Check the inf-norm of the error vector 
 */
void zinf_norm_error_dist(int_t n, int_t nrhs, doublecomplex *x, int_t ldx,
			  doublecomplex *xtrue, int_t ldxtrue,
                          gridinfo_t *grid)
{
    double err, xnorm;
    doublecomplex *x_work, *xtrue_work;
    doublecomplex temp;
    int i, j;

    for (j = 0; j < nrhs; j++) {
      x_work = &x[j*ldx];
      xtrue_work = &xtrue[j*ldxtrue];
      err = xnorm = 0.0;
      for (i = 0; i < n; i++) {
        z_sub(&temp, &x_work[i], &xtrue_work[i]);
	err = SUPERLU_MAX(err, z_abs(&temp));
	xnorm = SUPERLU_MAX(xnorm, z_abs(&x_work[i]));
      }
      err = err / xnorm;
      printf("\tRHS %2d: ||X-Xtrue||/||X|| = %e\n", j, err);
    }
}
Esempio n. 16
0
/*! \brief Check the inf-norm of the error vector 
 */
void cinf_norm_error(int nrhs, SuperMatrix *X, complex *xtrue)
{
    DNformat *Xstore;
    float err, xnorm;
    complex *Xmat, *soln_work;
    complex temp;
    int i, j;

    Xstore = X->Store;
    Xmat = Xstore->nzval;

    for (j = 0; j < nrhs; j++) {
      soln_work = &Xmat[j*Xstore->lda];
      err = xnorm = 0.0;
      for (i = 0; i < X->nrow; i++) {
        c_sub(&temp, &soln_work[i], &xtrue[i]);
	err = SUPERLU_MAX(err, c_abs(&temp));
	xnorm = SUPERLU_MAX(xnorm, c_abs(&soln_work[i]));
      }
      err = err / xnorm;
      printf("||X - Xtrue||/||X|| = %e\n", err);
    }
}
Esempio n. 17
0
/*! \brief Allocate known working storage. Returns 0 if success, otherwise
   returns the number of bytes allocated so far when failure occurred. */
int
sLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
            float **dworkptr, GlobalLU_t *Glu)
{
    int    isize, dsize, extra;
    float *old_ptr;
    int    maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
           rowblk   = sp_ienv(4);

    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
    dsize = (m * panel_size +
	     NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float);
    
    if ( Glu->MemModel == SYSTEM ) 
	*iworkptr = (int *) intCalloc(isize/sizeof(int));
    else
	*iworkptr = (int *) suser_malloc(isize, TAIL, Glu);
    if ( ! *iworkptr ) {
	fprintf(stderr, "sLUWorkInit: malloc fails for local iworkptr[]\n");
	return (isize + n);
    }

    if ( Glu->MemModel == SYSTEM )
	*dworkptr = (float *) SUPERLU_MALLOC(dsize);
    else {
	*dworkptr = (float *) suser_malloc(dsize, TAIL, Glu);
	if ( NotDoubleAlign(*dworkptr) ) {
	    old_ptr = *dworkptr;
	    *dworkptr = (float*) DoubleAlign(*dworkptr);
	    *dworkptr = (float*) ((double*)*dworkptr - 1);
	    extra = (char*)old_ptr - (char*)*dworkptr;
#ifdef DEBUG	    
	    printf("sLUWorkInit: not aligned, extra %d\n", extra);
#endif	    
	    Glu->stack.top2 -= extra;
	    Glu->stack.used += extra;
	}
    }
    if ( ! *dworkptr ) {
	fprintf(stderr, "malloc fails for local dworkptr[].");
	return (isize + dsize + n);
    }
	
    return 0;
}
Esempio n. 18
0
void
StatInit(SuperLUStat_t *stat)
{
    register int i, w, panel_size, relax;

    panel_size = sp_ienv(1);
    relax = sp_ienv(2);
    w = SUPERLU_MAX(panel_size, relax);
    stat->panel_histo = intCalloc(w+1);
    stat->utime = (double *) SUPERLU_MALLOC(NPHASES * sizeof(double));
    if (!stat->utime) ABORT("SUPERLU_MALLOC fails for stat->utime");
    stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t));
    if (!stat->ops) ABORT("SUPERLU_MALLOC fails for stat->ops");
    for (i = 0; i < NPHASES; ++i) {
        stat->utime[i] = 0.;
        stat->ops[i] = 0.;
    }
}
Esempio n. 19
0
/*
 * Allocate storage for various statistics.
 */
void
StatAlloc(const int n, const int nprocs, const int panel_size, 
	  const int relax, Gstat_t *Gstat)
{
    register int w;

    w = SUPERLU_MAX( panel_size, relax ) + 1;
    Gstat->panel_histo = intCalloc(w);
    Gstat->utime = (double *) doubleMalloc(NPHASES);
    Gstat->ops   = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t));
    
    if ( !(Gstat->procstat =
	   (procstat_t *) SUPERLU_MALLOC(nprocs*sizeof(procstat_t))) )
	SUPERLU_ABORT( "SUPERLU_MALLOC failed for procstat[]" );

#if (PRNTlevel==1)
    printf(".. StatAlloc(): n %d, nprocs %d, panel_size %d, relax %d\n",
		n, nprocs, panel_size, relax);
#endif
#ifdef PROFILE    
    if ( !(Gstat->panstat =
	   (panstat_t*) SUPERLU_MALLOC(n * sizeof(panstat_t))) )
	SUPERLU_ABORT( "SUPERLU_MALLOC failed for panstat[]" );
    Gstat->panhows = intCalloc(3);
    Gstat->height = intCalloc(n+1);
    if ( !(Gstat->flops_by_height =
	   (float *) SUPERLU_MALLOC(n * sizeof(float))) )
	SUPERLU_ABORT("SUPERLU_MALLOC failed for flops_by_height[]");
    
#endif
    
#ifdef PREDICT_OPT
    if ( !(cp_panel = (cp_panel_t *) SUPERLU_MALLOC(n * sizeof(cp_panel_t))) )
	SUPERLU_ABORT( "SUPERLU_MALLOC failed for cp_panel[]" );
    if ( !(desc_eft = (desc_eft_t *) SUPERLU_MALLOC(n * sizeof(desc_eft_t))) )
	SUPERLU_ABORT( "SUPERLU_MALLOC failed for desc_eft[]" );
    cp_firstkid = intMalloc(n+1);
    cp_nextkid = intMalloc(n+1);
#endif
    
}
int
pcgstrf_column_bmod(
		    const int  pnum,   /* process number */
		    const int  jcol,   /* current column in the panel */
		    const int  fpanelc,/* first column in the panel */
		    const int  nseg,   /* number of s-nodes to update jcol */
		    int        *segrep,/* in */
		    int        *repfnz,/* in */
		    complex     *dense, /* modified */
		    complex     *tempv, /* working array */
		    pxgstrf_shared_t *pxgstrf_shared, /* modified */
		    Gstat_t *Gstat     /* modified */
		    )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose:
 * ========
 *    Performs numeric block updates (sup-col) in topological order.
 *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
 *    Special processing on the supernodal portion of L\U[*,j].
 *
 * Return value:
 * =============
 *      0 - successful return
 *    > 0 - number of bytes allocated when run out of space
 *
 */
#if ( MACH==CRAY_PVP )
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
    
#ifdef USE_VENDOR_BLAS    
    int         incx = 1, incy = 1;
    complex      alpha, beta;
#endif
    GlobalLU_t *Glu = pxgstrf_shared->Glu;   /* modified */
    
    /* krep = representative of current k-th supernode
     * fsupc = first supernodal column
     * nsupc = no of columns in supernode
     * nsupr = no of rows in supernode (used as leading dimension)
     * luptr = location of supernodal LU-block in storage
     * kfnz = first nonz in the k-th supernodal segment
     * no_zeros = no of leading zeros in a supernodal U-segment
     */
    complex	  ukj, ukj1, ukj2;
    register int lptr, kfnz, isub, irow, i, no_zeros;
    register int luptr, luptr1, luptr2;
    int          fsupc, nsupc, nsupr, segsze;
    int          nrow;	  /* No of rows in the matrix of matrix-vector */
    int          jsupno, k, ksub, krep, krep_ind, ksupno;
    int          ufirst, nextlu;
    int          fst_col; /* First column within small LU update */
    int          d_fsupc; /* Distance between the first column of the current
			     panel and the first column of the current snode.*/
    int          *xsup, *supno;
    int          *lsub, *xlsub, *xlsub_end;
    complex       *lusup;
    int          *xlusup, *xlusup_end;
    complex       *tempv1;
    int          mem_error;
    register float flopcnt;

    complex      zero = {0.0, 0.0};
    complex      one = {1.0, 0.0};
    complex      none = {-1.0, 0.0};
    complex      comp_temp, comp_temp1;

    xsup       = Glu->xsup;
    supno      = Glu->supno;
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    xlsub_end  = Glu->xlsub_end;
    lusup      = Glu->lusup;
    xlusup     = Glu->xlusup;
    xlusup_end = Glu->xlusup_end;
    jsupno     = supno[jcol];

    /* 
     * For each nonz supernode segment of U[*,j] in topological order 
     */
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) {

	krep = segrep[k];
	k--;
	ksupno = supno[krep];
#if ( DEBUGlvel>=2 )
if (jcol==BADCOL)
printf("(%d) pcgstrf_column_bmod[1]: %d, nseg %d, krep %d, jsupno %d, ksupno %d\n",
       pnum, jcol, nseg, krep, jsupno, ksupno);
#endif    
	if ( jsupno != ksupno ) { /* Outside the rectangular supernode */

	    fsupc = xsup[ksupno];
	    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

  	    /* Distance from the current supernode to the current panel; 
	       d_fsupc=0 if fsupc >= fpanelc. */
  	    d_fsupc = fst_col - fsupc; 

	    luptr = xlusup[fst_col] + d_fsupc;
	    lptr = xlsub[fsupc] + d_fsupc;
	    kfnz = repfnz[krep];
	    kfnz = SUPERLU_MAX ( kfnz, fpanelc );
	    segsze = krep - kfnz + 1;
	    nsupc = krep - fst_col + 1;
	    nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	    nrow = nsupr - d_fsupc - nsupc;
	    krep_ind = lptr + nsupc - 1;

	flopcnt = segsze * (segsze - 1) + 2 * nrow * segsze;//sj
		Gstat->procstat[pnum].fcops += flopcnt;

#if ( DEBUGlevel>=2 )
if (jcol==BADCOL)	    
printf("(%d) pcgstrf_column_bmod[2]: %d, krep %d, kfnz %d, segsze %d, d_fsupc %d,\
fsupc %d, nsupr %d, nsupc %d\n",
       pnum, jcol, krep, kfnz, segsze, d_fsupc, fsupc, nsupr, nsupc);

#endif



            /*
             * Case 1: Update U-segment of size 1 -- col-col update
             */
            if ( segsze == 1 ) {
                ukj = dense[lsub[krep_ind]];
                luptr += nsupr*(nsupc-1) + nsupc;

                for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                    irow = lsub[i];
                    cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                    c_sub(&dense[irow], &dense[irow], &comp_temp);
                    luptr++;
                }

            } else if ( segsze <= 3 ) {
                ukj = dense[lsub[krep_ind]];
                luptr += nsupr*(nsupc-1) + nsupc-1;
                ukj1 = dense[lsub[krep_ind - 1]];
                luptr1 = luptr - nsupr;

                if ( segsze == 2 ) { /* Case 2: 2cols-col update */
                    cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                    c_sub(&ukj, &ukj, &comp_temp);
                    dense[lsub[krep_ind]] = ukj;
                    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                        irow = lsub[i];
                        luptr++;
                        luptr1++;
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&dense[irow], &dense[irow], &comp_temp);
                    }
                } else { /* Case 3: 3cols-col update */
                    ukj2 = dense[lsub[krep_ind - 2]];
                    luptr2 = luptr1 - nsupr;
                    cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
                    c_sub(&ukj1, &ukj1, &comp_temp);

                    cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                    cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                    c_add(&comp_temp, &comp_temp, &comp_temp1);
                    c_sub(&ukj, &ukj, &comp_temp);

                    dense[lsub[krep_ind]] = ukj;
                    dense[lsub[krep_ind-1]] = ukj1;
                    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                        irow = lsub[i];
                        luptr++;
                        luptr1++;
                        luptr2++;
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&dense[irow], &dense[irow], &comp_temp);
                    }
                }


	    } else {
	  	/*
		 * Case: sup-col update
		 * Perform a triangular solve and block update,
		 * then scatter the result of sup-col update to dense
		 */
		no_zeros = kfnz - fst_col;

	        /* Copy U[*,j] segment from dense[*] to tempv[*] */
	        isub = lptr + no_zeros;
	        for (i = 0; i < segsze; i++) {
	  	    irow = lsub[isub];
		    tempv[i] = dense[irow];
		    ++isub; 
	        }

	        /* Dense triangular solve -- start effective triangle */
		luptr += nsupr * no_zeros + no_zeros; 
#ifdef USE_VENDOR_BLAS
#if ( MACH==CRAY_PVP )
		CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#else
		ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#endif
		
 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		alpha = one;
		beta = zero;
#if ( MACH==CRAY_PVP )
		CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
		cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
		clsolve ( nsupr, segsze, &lusup[luptr], tempv );

 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		cmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
#endif
                /* Scatter tempv[] into SPA dense[*] */
                isub = lptr + no_zeros;
                for (i = 0; i < segsze; i++) {
                    irow = lsub[isub];
                    dense[irow] = tempv[i]; /* Scatter */
                    tempv[i] = zero;
                    isub++;
                }

		/* Scatter tempv1[] into SPA dense[*] */
		for (i = 0; i < nrow; i++) {
		    irow = lsub[isub];
                    c_sub(&dense[irow], &dense[irow], &tempv1[i]);
		    tempv1[i] = zero;
		    ++isub;
		}
	    } /* else segsze >= 4 */
	    
	} /* if jsupno ... */

    } /* for each segment... */

    
    /* ------------------------------------------
       Process the supernodal portion of L\U[*,j]
       ------------------------------------------ */
    
    fsupc = SUPER_FSUPC (jsupno);
    nsupr = xlsub_end[fsupc] - xlsub[fsupc];
    if ( (mem_error = Glu_alloc(pnum, jcol, nsupr, LUSUP, &nextlu, 
			       pxgstrf_shared)) )
	return mem_error;
    xlusup[jcol] = nextlu;
    lusup = Glu->lusup;
    
    /* Gather the nonzeros from SPA dense[*,j] into L\U[*,j] */
    for (isub = xlsub[fsupc]; isub < xlsub_end[fsupc]; ++isub) {
  	irow = lsub[isub];
	lusup[nextlu] = dense[irow];
	dense[irow] = zero;
#ifdef DEBUG
if (jcol == -1)
    printf("(%d) pcgstrf_column_bmod[lusup] jcol %d, irow %d, lusup %.10e\n",
	   pnum, jcol, irow, lusup[nextlu]);
#endif	
	++nextlu;
    }
    xlusup_end[jcol] = nextlu; /* close L\U[*,jcol] */

#if ( DEBUGlevel>=2 )
if (jcol == -1) {
    nrow = xlusup_end[jcol] - xlusup[jcol];
    print_double_vec("before sup-col update", nrow, &lsub[xlsub[fsupc]],
		     &lusup[xlusup[jcol]]);
}
#endif    
    
    /*
     * For more updates within the panel (also within the current supernode), 
     * should start from the first column of the panel, or the first column 
     * of the supernode, whichever is bigger. There are 2 cases:
     *    (1) fsupc < fpanelc,  then fst_col := fpanelc
     *    (2) fsupc >= fpanelc, then fst_col := fsupc
     */
    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

    if ( fst_col < jcol ) {

  	/* distance between the current supernode and the current panel;
	   d_fsupc=0 if fsupc >= fpanelc. */
  	d_fsupc = fst_col - fsupc;

	lptr = xlsub[fsupc] + d_fsupc;
	luptr = xlusup[fst_col] + d_fsupc;
	nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	nsupc = jcol - fst_col;	/* Excluding jcol */
	nrow = nsupr - d_fsupc - nsupc;

	/* points to the beginning of jcol in supernode L\U[*,jsupno] */
	ufirst = xlusup[jcol] + d_fsupc;	

#if ( DEBUGlevel>=2 )
if (jcol==BADCOL)
printf("(%d) pcgstrf_column_bmod[3] jcol %d, fsupc %d, nsupr %d, nsupc %d, nrow %d\n",
       pnum, jcol, fsupc, nsupr, nsupc, nrow);
#endif    

	flopcnt = nsupc * (nsupc - 1) + 2 * nrow * nsupc; //sj
	Gstat->procstat[pnum].fcops += flopcnt;

/*	ops[TRSV] += nsupc * (nsupc - 1);
	ops[GEMV] += 2 * nrow * nsupc;    */
	
#ifdef USE_VENDOR_BLAS
	alpha = none; beta = one; /* y := beta*y + alpha*A*x */
#if ( MACH==CRAY_PVP )
	CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
	CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
	ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
	cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
	clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );

	cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
		 &lusup[ufirst], tempv );
	
        /* Copy updates from tempv[*] into lusup[*] */
	isub = ufirst + nsupc;
	for (i = 0; i < nrow; i++) {
            c_sub(&lusup[isub], &lusup[isub], &tempv[i]);
            tempv[i] = zero;
	    ++isub;
	}
#endif
    } /* if fst_col < jcol ... */ 

    return 0;
}
Esempio n. 21
0
int cgst02(trans_t trans, int m, int n, int nrhs, SuperMatrix *A,
	      complex *x, int ldx, complex *b, int ldb, float *resid)
{
/*  
    Purpose   
    =======   

    CGST02 computes the residual for a solution of a system of linear   
    equations  A*x = b  or  A'*x = b:   
       RESID = norm(B - A*X) / ( norm(A) * norm(X) * EPS ),   
    where EPS is the machine epsilon.   

    Arguments   
    =========   

    TRANS   (input) trans_t
            Specifies the form of the system of equations:   
            = NOTRANS:  A *x = b   
            = TRANS  :  A'*x = b, where A' is the transpose of A   
            = CONJ   :  A'*x = b, where A' is the transpose of A   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of columns of B, the matrix of right hand sides.   
            NRHS >= 0.
	    
    A       (input) SuperMatrix*, dimension (LDA,N)   
            The original M x N sparse matrix A.   

    X       (input) COMPLEX PRECISION array, dimension (LDX,NRHS)   
            The computed solution vectors for the system of linear   
            equations.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  If TRANS = NOTRANS,   
            LDX >= max(1,N); if TRANS = TRANS or CONJ, LDX >= max(1,M).   

    B       (input/output) COMPLEX PRECISION array, dimension (LDB,NRHS)   
            On entry, the right hand side vectors for the system of   
            linear equations.   
            On exit, B is overwritten with the difference B - A*X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  IF TRANS = NOTRANS,
            LDB >= max(1,M); if TRANS = TRANS or CONJ, LDB >= max(1,N).
	    
    RESID   (output) FLOAT PRECISION   
            The maximum over the number of right hand sides of   
            norm(B - A*X) / ( norm(A) * norm(X) * EPS ).   

    =====================================================================
*/

    /* Table of constant values */
    complex alpha = {-1., 0.0};
    complex beta  = {1., 0.0};
    int    c__1  = 1;
    
    /* System generated locals */
    float d__1, d__2;

    /* Local variables */
    int j;
    int n1, n2;
    float anorm, bnorm;
    float xnorm;
    float eps;
    char transc[1];

    /* Function prototypes */
    extern int lsame_(char *, char *);
    extern float clangs(char *, SuperMatrix *);
    extern float scasum_(int *, complex *, int *);
    
    /* Function Body */
    if ( m <= 0 || n <= 0 || nrhs == 0) {
	*resid = 0.;
	return 0;
    }

    if ( (trans == TRANS) || (trans == CONJ) ) {
	n1 = n;
	n2 = m;
        *transc = 'T';
    } else {
	n1 = m;
	n2 = n;
	*transc = 'N';
    }

    /* Exit with RESID = 1/EPS if ANORM = 0. */

    eps = slamch_("Epsilon");
    anorm = clangs("1", A);
    if (anorm <= 0.) {
	*resid = 1. / eps;
	return 0;
    }

    /* Compute  B - A*X  (or  B - A'*X ) and store in B. */

    sp_cgemm(transc, "N", n1, nrhs, n2, alpha, A, x, ldx, beta, b, ldb);

    /* Compute the maximum over the number of right hand sides of   
       norm(B - A*X) / ( norm(A) * norm(X) * EPS ) . */

    *resid = 0.;
    for (j = 0; j < nrhs; ++j) {
	bnorm = scasum_(&n1, &b[j*ldb], &c__1);
	xnorm = scasum_(&n2, &x[j*ldx], &c__1);
	if (xnorm <= 0.) {
	    *resid = 1. / eps;
	} else {
	    /* Computing MAX */
	    d__1 = *resid, d__2 = bnorm / anorm / xnorm / eps;
	    *resid = SUPERLU_MAX(d__1, d__2);
	}
    }

    return 0;

} /* cgst02 */
Esempio n. 22
0
int main(int argc, char *argv[])
{
    void smatvec_mult(float alpha, float x[], float beta, float y[]);
    void spsolve(int n, float x[], float y[]);
    extern int sfgmr( int n,
	void (*matvec_mult)(float, float [], float, float []),
	void (*psolve)(int n, float [], float[]),
	float *rhs, float *sol, double tol, int restrt, int *itmax,
	FILE *fits);
    extern int sfill_diag(int n, NCformat *Astore);

    char     equed[1] = {'B'};
    yes_no_t equil;
    trans_t  trans;
    SuperMatrix A, L, U;
    SuperMatrix B, X;
    NCformat *Astore;
    NCformat *Ustore;
    SCformat *Lstore;
    GlobalLU_t	   Glu; /* facilitate multiple factorizations with 
                           SamePattern_SameRowPerm                  */
    float   *a;
    int      *asub, *xa;
    int      *etree;
    int      *perm_c; /* column permutation vector */
    int      *perm_r; /* row permutations from partial pivoting */
    int      nrhs, ldx, lwork, info, m, n, nnz;
    float   *rhsb, *rhsx, *xact;
    float   *work = NULL;
    float   *R, *C;
    float   u, rpg, rcond;
    float zero = 0.0;
    float one = 1.0;
    mem_usage_t   mem_usage;
    superlu_options_t options;
    SuperLUStat_t stat;
    FILE 	  *fp = stdin;

    int restrt, iter, maxit, i;
    double resid;
    float *x, *b;

#ifdef DEBUG
    extern int num_drop_L, num_drop_U;
#endif

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Enter main()");
#endif

    /* Defaults */
    lwork = 0;
    nrhs  = 1;
    trans = NOTRANS;

    /* Set the default input options:
	options.Fact = DOFACT;
	options.Equil = YES;
	options.ColPerm = COLAMD;
	options.DiagPivotThresh = 0.1; //different from complete LU
	options.Trans = NOTRANS;
	options.IterRefine = NOREFINE;
	options.SymmetricMode = NO;
	options.PivotGrowth = NO;
	options.ConditionNumber = NO;
	options.PrintStat = YES;
	options.RowPerm = LargeDiag;
	options.ILU_DropTol = 1e-4;
	options.ILU_FillTol = 1e-2;
	options.ILU_FillFactor = 10.0;
	options.ILU_DropRule = DROP_BASIC | DROP_AREA;
	options.ILU_Norm = INF_NORM;
	options.ILU_MILU = SILU;
     */
    ilu_set_default_options(&options);

    /* Modify the defaults. */
    options.PivotGrowth = YES;	  /* Compute reciprocal pivot growth */
    options.ConditionNumber = YES;/* Compute reciprocal condition number */

    if ( lwork > 0 ) {
	work = SUPERLU_MALLOC(lwork);
	if ( !work ) ABORT("Malloc fails for work[].");
    }

    /* Read matrix A from a file in Harwell-Boeing format.*/
    if (argc < 2)
    {
	printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n"
		"-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n"
		"-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n"
		"-t -triplet:\n\t[INPUT] is a triplet format matrix.\n",
		argv[0]);
	return 0;
    }
    else
    {
	switch (argv[1][1])
	{
	    case 'H':
	    case 'h':
		printf("Input a Harwell-Boeing format matrix:\n");
		sreadhb(fp, &m, &n, &nnz, &a, &asub, &xa);
		break;
	    case 'R':
	    case 'r':
		printf("Input a Rutherford-Boeing format matrix:\n");
		sreadrb(&m, &n, &nnz, &a, &asub, &xa);
		break;
	    case 'T':
	    case 't':
		printf("Input a triplet format matrix:\n");
		sreadtriple(&m, &n, &nnz, &a, &asub, &xa);
		break;
	    default:
		printf("Unrecognized format.\n");
		return 0;
	}
    }

    sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa,
                                SLU_NC, SLU_S, SLU_GE);
    Astore = A.Store;
    sfill_diag(n, Astore);
    printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
    fflush(stdout);

    /* Generate the right-hand side */
    if ( !(rhsb = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[].");
    if ( !(rhsx = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[].");
    sCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE);
    sCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE);
    xact = floatMalloc(n * nrhs);
    ldx = n;
    sGenXtrue(n, nrhs, xact, ldx);
    sFillRHS(trans, nrhs, xact, ldx, &A, &B);

    if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
    if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");
    if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
    if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) )
	ABORT("SUPERLU_MALLOC fails for R[].");
    if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) )
	ABORT("SUPERLU_MALLOC fails for C[].");

    info = 0;
#ifdef DEBUG
    num_drop_L = 0;
    num_drop_U = 0;
#endif

    /* Initialize the statistics variables. */
    StatInit(&stat);

    /* Compute the incomplete factorization and compute the condition number
       and pivot growth using dgsisx. */
    B.ncol = 0;  /* not to perform triangular solution */
    sgsisx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work,
	   lwork, &B, &X, &rpg, &rcond, &Glu, &mem_usage, &stat, &info);

    /* Set RHS for GMRES. */
    if (!(b = floatMalloc(m))) ABORT("Malloc fails for b[].");
    if (*equed == 'R' || *equed == 'B') {
	for (i = 0; i < n; ++i) b[i] = rhsb[i] * R[i];
    } else {
	for (i = 0; i < m; i++) b[i] = rhsb[i];
    }

    printf("sgsisx(): info %d, equed %c\n", info, equed[0]);
    if (info > 0 || rcond < 1e-8 || rpg > 1e8)
	printf("WARNING: This preconditioner might be unstable.\n");

    if ( info == 0 || info == n+1 ) {
	if ( options.PivotGrowth == YES )
	    printf("Recip. pivot growth = %e\n", rpg);
	if ( options.ConditionNumber == YES )
	    printf("Recip. condition number = %e\n", rcond);
    } else if ( info > 0 && lwork == -1 ) {
	printf("** Estimated memory: %d bytes\n", info - n);
    }

    Lstore = (SCformat *) L.Store;
    Ustore = (NCformat *) U.Store;
    printf("n(A) = %d, nnz(A) = %d\n", n, Astore->nnz);
    printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
    printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
    printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
    printf("Fill ratio: nnz(F)/nnz(A) = %.3f\n",
	    ((double)(Lstore->nnz) + (double)(Ustore->nnz) - (double)n)
	    / (double)Astore->nnz);
    printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
	   mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
    fflush(stdout);

    /* Set the global variables. */
    GLOBAL_A = &A;
    GLOBAL_L = &L;
    GLOBAL_U = &U;
    GLOBAL_STAT = &stat;
    GLOBAL_PERM_C = perm_c;
    GLOBAL_PERM_R = perm_r;
    GLOBAL_OPTIONS = &options;
    GLOBAL_R = R;
    GLOBAL_C = C;
    GLOBAL_MEM_USAGE = &mem_usage;

    /* Set the options to do solve-only. */
    options.Fact = FACTORED;
    options.PivotGrowth = NO;
    options.ConditionNumber = NO;

    /* Set the variables used by GMRES. */
    restrt = SUPERLU_MIN(n / 3 + 1, 50);
    maxit = 1000;
    iter = maxit;
    resid = 1e-8;
    if (!(x = floatMalloc(n))) ABORT("Malloc fails for x[].");

    if (info <= n + 1)
    {
	int i_1 = 1;
	double maxferr = 0.0, nrmA, nrmB, res, t;
        float temp;
	extern float snrm2_(int *, float [], int *);
	extern void saxpy_(int *, float *, float [], int *, float [], int *);

	/* Initial guess */
	for (i = 0; i < n; i++) x[i] = zero;

	t = SuperLU_timer_();

	/* Call GMRES */
	sfgmr(n, smatvec_mult, spsolve, b, x, resid, restrt, &iter, stdout);

	t = SuperLU_timer_() - t;

	/* Output the result. */
	nrmA = snrm2_(&(Astore->nnz), (float *)((DNformat *)A.Store)->nzval,
		&i_1);
	nrmB = snrm2_(&m, b, &i_1);
	sp_sgemv("N", -1.0, &A, x, 1, 1.0, b, 1);
	res = snrm2_(&m, b, &i_1);
	resid = res / nrmB;
	printf("||A||_F = %.1e, ||B||_2 = %.1e, ||B-A*X||_2 = %.1e, "
		"relres = %.1e\n", nrmA, nrmB, res, resid);

	if (iter >= maxit)
	{
	    if (resid >= 1.0) iter = -180;
	    else if (resid > 1e-8) iter = -111;
	}
	printf("iteration: %d\nresidual: %.1e\nGMRES time: %.2f seconds.\n",
		iter, resid, t);

	/* Scale the solution back if equilibration was performed. */
	if (*equed == 'C' || *equed == 'B') 
	    for (i = 0; i < n; i++) x[i] *= C[i];

	for (i = 0; i < m; i++) {
	    maxferr = SUPERLU_MAX(maxferr, fabs(x[i] - xact[i]));
        }
	printf("||X-X_true||_oo = %.1e\n", maxferr);
    }
#ifdef DEBUG
    printf("%d entries in L and %d entries in U dropped.\n",
	    num_drop_L, num_drop_U);
#endif
    fflush(stdout);

    if ( options.PrintStat ) StatPrint(&stat);
    StatFree(&stat);

    SUPERLU_FREE (rhsb);
    SUPERLU_FREE (rhsx);
    SUPERLU_FREE (xact);
    SUPERLU_FREE (etree);
    SUPERLU_FREE (perm_r);
    SUPERLU_FREE (perm_c);
    SUPERLU_FREE (R);
    SUPERLU_FREE (C);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B);
    Destroy_SuperMatrix_Store(&X);
    if ( lwork >= 0 ) {
	Destroy_SuperNode_Matrix(&L);
	Destroy_CompCol_Matrix(&U);
    }
    SUPERLU_FREE(b);
    SUPERLU_FREE(x);

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Exit main()");
#endif

    return 0;
}
Esempio n. 23
0
float
cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
             SuperMatrix *L, SuperMatrix *U)
{
/*
 * Purpose
 * =======
 *
 * Compute the reciprocal pivot growth factor of the leading ncols columns
 * of the matrix, using the formula:
 *     min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) )
 *
 * Arguments
 * =========
 *
 * ncols    (input) int
 *          The number of columns of matrices A, L and U.
 *
 * A        (input) SuperMatrix*
 *	    Original matrix A, permuted by columns, of dimension
 *          (A->nrow, A->ncol). The type of A can be:
 *          Stype = NC; Dtype = SLU_C; Mtype = GE.
 *
 * L        (output) SuperMatrix*
 *          The factor L from the factorization Pr*A=L*U; use compressed row 
 *          subscripts storage for supernodes, i.e., L has type: 
 *          Stype = SC; Dtype = SLU_C; Mtype = TRLU.
 *
 * U        (output) SuperMatrix*
 *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
 *          storage scheme, i.e., U has types: Stype = NC;
 *          Dtype = SLU_C; Mtype = TRU.
 *
 */
    NCformat *Astore;
    SCformat *Lstore;
    NCformat *Ustore;
    complex  *Aval, *Lval, *Uval;
    int      fsupc, nsupr, luptr, nz_in_U;
    int      i, j, k, oldcol;
    int      *inv_perm_c;
    float   rpg, maxaj, maxuj;
    extern   double slamch_(char *);
    float   smlnum;
    complex   *luval;
    complex   temp_comp;
   
    /* Get machine constants. */
    smlnum = slamch_("S");
    rpg = 1. / smlnum;

    Astore = A->Store;
    Lstore = L->Store;
    Ustore = U->Store;
    Aval = Astore->nzval;
    Lval = Lstore->nzval;
    Uval = Ustore->nzval;
    
    inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int));
    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;

    for (k = 0; k <= Lstore->nsuper; ++k) {
	fsupc = L_FST_SUPC(k);
	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	luptr = L_NZ_START(fsupc);
	luval = &Lval[luptr];
	nz_in_U = 1;
	
	for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) {
	    maxaj = 0.;
            oldcol = inv_perm_c[j];
	    for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
		maxaj = SUPERLU_MAX( maxaj, slu_c_abs1( &Aval[i]) );
	
	    maxuj = 0.;
	    for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
		maxuj = SUPERLU_MAX( maxuj, slu_c_abs1( &Uval[i]) );
	    
	    /* Supernode */
	    for (i = 0; i < nz_in_U; ++i)
		maxuj = SUPERLU_MAX( maxuj, slu_c_abs1( &luval[i]) );

	    ++nz_in_U;
	    luval += nsupr;

	    if ( maxuj == 0. )
		rpg = SUPERLU_MIN( rpg, 1.);
	    else
		rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
	}
	
	if ( j >= ncols ) break;
    }

    SUPERLU_FREE(inv_perm_c);
    return (rpg);
}
Esempio n. 24
0
void
dgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd,
        double *colcnd, double *amax, int *info)
{
/*    
    Purpose   
    =======   

    dgsequ() computes row and column scalings intended to equilibrate an   
    M-by-N sparse matrix A and reduce its condition number. R returns the row
    scale factors and C the column scale factors, chosen to try to make   
    the largest element in each row and column of the matrix B with   
    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   

    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
    number and BIGNUM = largest safe number.  Use of these scaling   
    factors is not guaranteed to reduce the condition number of A but   
    works well in practice.   

    See supermatrix.h for the definition of 'SuperMatrix' structure.
 
    Arguments   
    =========   

    A       (input) SuperMatrix*
            The matrix of dimension (A->nrow, A->ncol) whose equilibration
            factors are to be computed. The type of A can be:
            Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
	    
    R       (output) double*, size A->nrow
            If INFO = 0 or INFO > M, R contains the row scale factors   
            for A.
	    
    C       (output) double*, size A->ncol
            If INFO = 0,  C contains the column scale factors for A.
	    
    ROWCND  (output) double*
            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
            AMAX is neither too large nor too small, it is not worth   
            scaling by R.
	    
    COLCND  (output) double*
            If INFO = 0, COLCND contains the ratio of the smallest   
            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
            worth scaling by C.
	    
    AMAX    (output) double*
            Absolute value of largest matrix element.  If AMAX is very   
            close to overflow or very close to underflow, the matrix   
            should be scaled.
	    
    INFO    (output) int*
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i,  and i is   
                  <= M:  the i-th row of A is exactly zero   
                  >  M:  the (i-M)-th column of A is exactly zero   

    ===================================================================== 
*/

    /* Local variables */
    NCformat *Astore;
    double   *Aval;
    int i, j, irow;
    double rcmin, rcmax;
    double bignum, smlnum;
    extern double dlamch_(char *);
    
    /* Test the input parameters. */
    *info = 0;
    if ( A->nrow < 0 || A->ncol < 0 ||
	 A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE )
	*info = -1;
    if (*info != 0) {
	i = -(*info);
	xerbla_("dgsequ", &i);
	return;
    }

    /* Quick return if possible */
    if ( A->nrow == 0 || A->ncol == 0 ) {
	*rowcnd = 1.;
	*colcnd = 1.;
	*amax = 0.;
	return;
    }

    Astore = A->Store;
    Aval = Astore->nzval;
    
    /* Get machine constants. */
    smlnum = dlamch_("S");
    bignum = 1. / smlnum;

    /* Compute row scale factors. */
    for (i = 0; i < A->nrow; ++i) r[i] = 0.;

    /* Find the maximum element in each row. */
    for (j = 0; j < A->ncol; ++j)
        for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
            irow = Astore->rowind[i];
            r[irow] = SUPERLU_MAX( r[irow], fabs(Aval[i]) );
	}

    /* Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    for (i = 0; i < A->nrow; ++i) {
	rcmax = SUPERLU_MAX(rcmax, r[i]);
	rcmin = SUPERLU_MIN(rcmin, r[i]);
    }
    *amax = rcmax;

    if (rcmin == 0.) {
	/* Find the first zero scale factor and return an error code. */
	for (i = 0; i < A->nrow; ++i)
	    if (r[i] == 0.) {
		*info = i + 1;
		return;
	    }
    } else {
	/* Invert the scale factors. */
	for (i = 0; i < A->nrow; ++i)
	    r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum );
	/* Compute ROWCND = min(R(I)) / max(R(I)) */
	*rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
    }

    /* Compute column scale factors */
    for (j = 0; j < A->ncol; ++j) c[j] = 0.;

    /* Find the maximum element in each column, assuming the row
       scalings computed above. */
    for (j = 0; j < A->ncol; ++j)
	for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
	    irow = Astore->rowind[i];
            c[j] = SUPERLU_MAX( c[j], fabs(Aval[i]) * r[irow] );
	}

    /* Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    for (j = 0; j < A->ncol; ++j) {
	rcmax = SUPERLU_MAX(rcmax, c[j]);
	rcmin = SUPERLU_MIN(rcmin, c[j]);
    }

    if (rcmin == 0.) {
	/* Find the first zero scale factor and return an error code. */
	for (j = 0; j < A->ncol; ++j)
	    if ( c[j] == 0. ) {
		*info = A->nrow + j + 1;
		return;
	    }
    } else {
	/* Invert the scale factors. */
	for (j = 0; j < A->ncol; ++j)
	    c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum);
	/* Compute COLCND = min(C(J)) / max(C(J)) */
	*colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
    }

    return;

} /* dgsequ */
Esempio n. 25
0
void
cpanel_bmod (
            const int  m,          /* in - number of rows in the matrix */
            const int  w,          /* in */
            const int  jcol,       /* in */
            const int  nseg,       /* in */
            complex     *dense,     /* out, of size n by w */
            complex     *tempv,     /* working array */
            int        *segrep,    /* in */
            int        *repfnz,    /* in, of size n by w */
            GlobalLU_t *Glu,       /* modified */
            SuperLUStat_t *stat    /* output */
            )
{


#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
    int          incx = 1, incy = 1;
    complex       alpha, beta;
#endif

    register int k, ksub;
    int          fsupc, nsupc, nsupr, nrow;
    int          krep, krep_ind;
    complex       ukj, ukj1, ukj2;
    int          luptr, luptr1, luptr2;
    int          segsze;
    int          block_nrow;  /* no of rows in a block row */
    register int lptr;        /* Points to the row subscripts of a supernode */
    int          kfnz, irow, no_zeros;
    register int isub, isub1, i;
    register int jj;          /* Index through each column in the panel */
    int          *xsup, *supno;
    int          *lsub, *xlsub;
    complex       *lusup;
    int          *xlusup;
    int          *repfnz_col; /* repfnz[] for a column in the panel */
    complex       *dense_col;  /* dense[] for a column in the panel */
    complex       *tempv1;             /* Used in 1-D update */
    complex       *TriTmp, *MatvecTmp; /* used in 2-D update */
    complex      zero = {0.0, 0.0};
    complex      one = {1.0, 0.0};
    complex      comp_temp, comp_temp1;
    register int ldaTmp;
    register int r_ind, r_hi;
    static   int first = 1, maxsuper, rowblk, colblk;
    flops_t  *ops = stat->ops;

    xsup    = Glu->xsup;
    supno   = Glu->supno;
    lsub    = Glu->lsub;
    xlsub   = Glu->xlsub;
    lusup   = Glu->lusup;
    xlusup  = Glu->xlusup;

    if ( first ) {
        maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) );
        rowblk   = sp_ienv(4);
        colblk   = sp_ienv(5);
        first = 0;
    }
    ldaTmp = maxsuper + rowblk;

    /*
     * For each nonz supernode segment of U[*,j] in topological order
     */
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */

        /* krep = representative of current k-th supernode
         * fsupc = first supernodal column
         * nsupc = no of columns in a supernode
         * nsupr = no of rows in a supernode
         */
        krep = segrep[k--];
        fsupc = xsup[supno[krep]];
        nsupc = krep - fsupc + 1;
        nsupr = xlsub[fsupc+1] - xlsub[fsupc];
        nrow = nsupr - nsupc;
        lptr = xlsub[fsupc];
        krep_ind = lptr + nsupc - 1;

        repfnz_col = repfnz;
        dense_col = dense;

        if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */

            TriTmp = tempv;

            /* Sequence through each column in panel -- triangular solves */
            for (jj = jcol; jj < jcol + w; jj++,
                 repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {

                kfnz = repfnz_col[krep];
                if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */

                segsze = krep - kfnz + 1;
                luptr = xlusup[fsupc];

                ops[TRSV] += 4 * segsze * (segsze - 1);
                ops[GEMV] += 8 * nrow * segsze;

                /* Case 1: Update U-segment of size 1 -- col-col update */
                if ( segsze == 1 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    luptr += nsupr*(nsupc-1) + nsupc;

                    for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
                        irow = lsub[i];
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        ++luptr;
                    }

                } else if ( segsze <= 3 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    ukj1 = dense_col[lsub[krep_ind - 1]];
                    luptr += nsupr*(nsupc-1) + nsupc-1;
                    luptr1 = luptr - nsupr;

                    if ( segsze == 2 ) {
                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            luptr++; luptr1++;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    } else {
                        ukj2 = dense_col[lsub[krep_ind - 2]];
                        luptr2 = luptr1 - nsupr;
                        cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
                        c_sub(&ukj1, &ukj1, &comp_temp);

                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        dense_col[lsub[krep_ind-1]] = ukj1;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            luptr++; luptr1++; luptr2++;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    }

                } else  {       /* segsze >= 4 */

                    /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
                       holds the result of triangular solves.    */
                    no_zeros = kfnz - fsupc;
                    isub = lptr + no_zeros;
                    for (i = 0; i < segsze; ++i) {
                        irow = lsub[isub];
                        TriTmp[i] = dense_col[irow]; /* Gather */
                        ++isub;
                    }

                    /* start effective triangle */
                    luptr += nsupr * no_zeros + no_zeros;

#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                    CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
                           &nsupr, TriTmp, &incx );
#else
                    ctrsv_( "L", "N", "U", &segsze, &lusup[luptr],
                           &nsupr, TriTmp, &incx );
#endif
#else
                    clsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
#endif


                } /* else ... */

            }  /* for jj ... end tri-solves */

            /* Block row updates; push all the way into dense[*] block */
            for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {

                r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
                block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
                luptr = xlusup[fsupc] + nsupc + r_ind;
                isub1 = lptr + nsupc + r_ind;

                repfnz_col = repfnz;
                TriTmp = tempv;
                dense_col = dense;

                /* Sequence through each column in panel -- matrix-vector */
                for (jj = jcol; jj < jcol + w; jj++,
                     repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {

                    kfnz = repfnz_col[krep];
                    if ( kfnz == EMPTY ) continue; /* Skip any zero segment */

                    segsze = krep - kfnz + 1;
                    if ( segsze <= 3 ) continue;   /* skip unrolled cases */

                    /* Perform a block update, and scatter the result of
                       matrix-vector to dense[].                 */
                    no_zeros = kfnz - fsupc;
                    luptr1 = luptr + nsupr * no_zeros;
                    MatvecTmp = &TriTmp[maxsuper];

#ifdef USE_VENDOR_BLAS
                    alpha = one;
                    beta = zero;
#ifdef _CRAY
                    CGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1],
                           &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
#else
                    cgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1],
                           &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
#endif
#else
                    cmatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
                           TriTmp, MatvecTmp);
#endif

                    /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
                     * such that MatvecTmp[*] can be re-used for the
                     * the next blok row update. dense[] will be copied into
                     * global store after the whole panel has been finished.
                     */
                    isub = isub1;
                    for (i = 0; i < block_nrow; i++) {
                        irow = lsub[isub];
                        c_sub(&dense_col[irow], &dense_col[irow],
                              &MatvecTmp[i]);
                        MatvecTmp[i] = zero;
                        ++isub;
                    }

                } /* for jj ... */

            } /* for each block row ... */

            /* Scatter the triangular solves into SPA dense[*] */
            repfnz_col = repfnz;
            TriTmp = tempv;
            dense_col = dense;

            for (jj = jcol; jj < jcol + w; jj++,
                 repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
                kfnz = repfnz_col[krep];
                if ( kfnz == EMPTY ) continue; /* Skip any zero segment */

                segsze = krep - kfnz + 1;
                if ( segsze <= 3 ) continue; /* skip unrolled cases */

                no_zeros = kfnz - fsupc;
                isub = lptr + no_zeros;
                for (i = 0; i < segsze; i++) {
                    irow = lsub[isub];
                    dense_col[irow] = TriTmp[i];
                    TriTmp[i] = zero;
                    ++isub;
                }

            } /* for jj ... */

        } else { /* 1-D block modification */


            /* Sequence through each column in the panel */
            for (jj = jcol; jj < jcol + w; jj++,
                 repfnz_col += m, dense_col += m) {

                kfnz = repfnz_col[krep];
                if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */

                segsze = krep - kfnz + 1;
                luptr = xlusup[fsupc];

                ops[TRSV] += 4 * segsze * (segsze - 1);
                ops[GEMV] += 8 * nrow * segsze;

                /* Case 1: Update U-segment of size 1 -- col-col update */
                if ( segsze == 1 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    luptr += nsupr*(nsupc-1) + nsupc;

                    for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
                        irow = lsub[i];
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        ++luptr;
                    }

                } else if ( segsze <= 3 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    luptr += nsupr*(nsupc-1) + nsupc-1;
                    ukj1 = dense_col[lsub[krep_ind - 1]];
                    luptr1 = luptr - nsupr;

                    if ( segsze == 2 ) {
                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            ++luptr;  ++luptr1;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    } else {
                        ukj2 = dense_col[lsub[krep_ind - 2]];
                        luptr2 = luptr1 - nsupr;
                        cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
                        c_sub(&ukj1, &ukj1, &comp_temp);

                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        dense_col[lsub[krep_ind-1]] = ukj1;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            ++luptr; ++luptr1; ++luptr2;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    }

                } else  { /* segsze >= 4 */
                    /*
                     * Perform a triangular solve and block update,
                     * then scatter the result of sup-col update to dense[].
                     */
                    no_zeros = kfnz - fsupc;

                    /* Copy U[*,j] segment from dense[*] to tempv[*]:
                     *    The result of triangular solve is in tempv[*];
                     *    The result of matrix vector update is in dense_col[*]
                     */
                    isub = lptr + no_zeros;
                    for (i = 0; i < segsze; ++i) {
                        irow = lsub[isub];
                        tempv[i] = dense_col[irow]; /* Gather */
                        ++isub;
                    }

                    /* start effective triangle */
                    luptr += nsupr * no_zeros + no_zeros;

#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                    CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
                           &nsupr, tempv, &incx );
#else
                    ctrsv_( "L", "N", "U", &segsze, &lusup[luptr],
                           &nsupr, tempv, &incx );
#endif

                    luptr += segsze;    /* Dense matrix-vector */
                    tempv1 = &tempv[segsze];
                    alpha = one;
                    beta = zero;
#ifdef _CRAY
                    CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
                           &nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
                    cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
                           &nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
                    clsolve ( nsupr, segsze, &lusup[luptr], tempv );

                    luptr += segsze;        /* Dense matrix-vector */
                    tempv1 = &tempv[segsze];
                    cmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
#endif

                    /* Scatter tempv[*] into SPA dense[*] temporarily, such
                     * that tempv[*] can be used for the triangular solve of
                     * the next column of the panel. They will be copied into
                     * ucol[*] after the whole panel has been finished.
                     */
                    isub = lptr + no_zeros;
                    for (i = 0; i < segsze; i++) {
                        irow = lsub[isub];
                        dense_col[irow] = tempv[i];
                        tempv[i] = zero;
                        isub++;
                    }

                    /* Scatter the update from tempv1[*] into SPA dense[*] */
                    /* Start dense rectangular L */
                    for (i = 0; i < nrow; i++) {
                        irow = lsub[isub];
                        c_sub(&dense_col[irow], &dense_col[irow], &tempv1[i]);
                        tempv1[i] = zero;
                        ++isub;
                    }

                } /* else segsze>=4 ... */

            } /* for each column in the panel... */

        } /* else 1-D update ... */

    } /* for each updating supernode ... */

}
void
psgstrf_panel_bmod(
		   const int  pnum, /* process number */
		   const int  m,    /* number of rows in the matrix */
		   const int  w,    /* current panel width */
		   const int  jcol, /* leading column of the current panel */
		   const int  bcol, /* first column of the farthest busy snode*/
		   int   *inv_perm_r,/* in; inverse of the row pivoting */
		   int   *etree,     /* in */
		   int   *nseg,      /* modified */
		   int   *segrep,    /* modified */
		   int   *repfnz,    /* modified, size n-by-w */
		   int   *panel_lsub,/* modified */
		   int   *w_lsub_end,/* modified */
		   int   *spa_marker,/* modified; size n-by-w */
		   float *dense, /* modified, size n-by-w */
		   float *tempv, /* working array - zeros on input/output */
		   pxgstrf_shared_t *pxgstrf_shared /* modified */
		   )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose
 * =======
 *
 *    Performs numeric block updates (sup-panel) in topological order.
 *    It features combined 1D and 2D blocking of the source updating s-node.
 *    It consists of two steps:
 *       (1) accumulates updates from "done" s-nodes.
 *       (2) accumulates updates from "busy" s-nodes.
 *
 *    Before entering this routine, the nonzeros of the original A in
 *    this panel were already copied into the SPA dense[n,w].
 *
 * Updated/Output arguments
 * ========================
 *    L[*,j:j+w-1] and U[*,j:j+w-1] are returned collectively in the
 *    m-by-w vector dense[*,w]. The locations of nonzeros in L[*,j:j+w-1]
 *    are given by lsub[*] and U[*,j:j+w-1] by (nseg,segrep,repfnz).
 *
 */
    GlobalLU_t *Glu = pxgstrf_shared->Glu;  /* modified */
    Gstat_t *Gstat = pxgstrf_shared->Gstat; /* modified */
    register int j, k, ksub;
    register int fsupc, nsupc, nsupr, nrow;
    register int kcol, krep, ksupno, dadsupno;
    register int jj;	      /* index through each column in the panel */
    int          *xsup, *xsup_end, *supno;
    int          *lsub, *xlsub, *xlsub_end;
    int          *repfnz_col; /* repfnz[] for a column in the panel */
    float       *dense_col;  /* dense[] for a column in the panel */
    int          *col_marker; /* each column of the spa_marker[*,w] */
    int          *col_lsub;   /* each column of the panel_lsub[*,w] */
    static   int first = 1, rowblk, colblk;

#ifdef PROFILE
    double   t1, t2; /* temporary time */
#endif
    
#ifdef PREDICT_OPT    
    register float pmod, max_child_eft = 0, sum_pmod = 0, min_desc_eft = 0;
    register float pmod_eft;
    register int   kid, ndesc = 0;
#endif
    
#if ( DEBUGlevel>=2 )
    int dbg_addr = 0*m;
#endif
    
    if ( first ) {
	rowblk   = sp_ienv(4);
	colblk   = sp_ienv(5);
	first = 0;
    }
    
    xsup      = Glu->xsup;
    xsup_end  = Glu->xsup_end;
    supno     = Glu->supno;
    lsub      = Glu->lsub;
    xlsub     = Glu->xlsub;
    xlsub_end = Glu->xlsub_end;

#if ( DEBUGlevel>=2 )
    /*if (jcol >= LOCOL && jcol <= HICOL)
    check_panel_dfs_list(pnum, "begin", jcol, *nseg, segrep);*/
if (jcol == BADPAN)
    printf("(%d) Enter psgstrf_panel_bmod() jcol %d,BADCOL %d,dense_col[%d] %.10f\n",
	   pnum, jcol, BADCOL, BADROW, dense[dbg_addr+BADROW]);
#endif    

    /* --------------------------------------------------------------------
       For each non-busy supernode segment of U[*,jcol] in topological order,
       perform sup-panel update.
       -------------------------------------------------------------------- */
    k = *nseg - 1;
    for (ksub = 0; ksub < *nseg; ++ksub) {
	/*
	 * krep = representative of current k-th supernode
	 * fsupc = first supernodal column
	 * nsupc = no of columns in a supernode
	 * nsupr = no of rows in a supernode
	 */
        krep = segrep[k--];
	fsupc = xsup[supno[krep]];
	nsupc = krep - fsupc + 1;
	nsupr = xlsub_end[fsupc] - xlsub[fsupc];
	nrow = nsupr - nsupc;

#ifdef PREDICT_OPT
	pmod = Gstat->procstat[pnum].fcops;
#endif
	    
	if ( nsupc >= colblk && nrow >= rowblk ) {
	    /* 2-D block update */
#ifdef GEMV2
	    psgstrf_bmod2D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, 
			       nrow, repfnz, panel_lsub, w_lsub_end, 
			       spa_marker, dense, tempv, Glu, Gstat);
#else
	    psgstrf_bmod2D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow,
			   repfnz, panel_lsub, w_lsub_end, spa_marker,
			   dense, tempv, Glu, Gstat);
#endif
	} else {
	    /* 1-D block update */
#ifdef GEMV2
	    psgstrf_bmod1D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr,
			       nrow, repfnz, panel_lsub, w_lsub_end, 
			       spa_marker, dense, tempv, Glu, Gstat);
#else
	    psgstrf_bmod1D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow,
			   repfnz, panel_lsub, w_lsub_end, spa_marker,
			   dense, tempv, Glu, Gstat);
#endif
	}
	
#ifdef PREDICT_OPT
	pmod = Gstat->procstat[pnum].fcops - pmod;
	kid = (Glu->pan_status[krep].size > 0) ?
	    krep : (krep + Glu->pan_status[krep].size);
	desc_eft[ndesc].eft = cp_panel[kid].est + cp_panel[kid].pdiv;
	desc_eft[ndesc++].pmod = pmod;
#endif
	
#if ( DEBUGlevel>=2 )
if (jcol == BADPAN)
    printf("(%d) non-busy update: krep %d, repfnz %d, dense_col[%d] %.10e\n",
	   pnum, krep, repfnz[dbg_addr+krep], BADROW, dense[dbg_addr+BADROW]);
#endif

    } /* for each updating supernode ... */
    
#if ( DEBUGlevel>=2 )
if (jcol == BADPAN)
    printf("(%d) After non-busy update: dense_col[%d] %.10e\n",
	   pnum, BADROW, dense[dbg_addr+BADROW]);
#endif
    
    /* ---------------------------------------------------------------------
     * Now wait for the "busy" s-nodes to become "done" -- this amounts to
     * climbing up the e-tree along the path starting from "bcol".
     * Several points are worth noting:
     *
     *  (1) There are two possible relations between supernodes and panels
     *      along the path of the e-tree:
     *      o |s-node| < |panel|
     *        want to climb up the e-tree one column at a time in order
     *        to achieve more concurrency
     *      o |s-node| > |panel|
     *        want to climb up the e-tree one panel at a time; this
     *        processor is stalled anyway while waiting for the panel.
     *
     *  (2) Need to accommodate new fills, append them in panel_lsub[*,w].
     *      o use an n-by-w marker array, as part of the SPA (not scalable!)
     *
     *  (3) Symbolically, need to find out repfnz[S, w], for each (busy)
     *      supernode S.
     *      o use dense[inv_perm_r[kcol]], filter all zeros
     *      o detect the first nonzero in each segment
     *        (at this moment, the boundary of the busy supernode/segment
     *         S has already been identified)
     *
     * --------------------------------------------------------------------- */

    kcol = bcol;
    while ( kcol < jcol ) {
        /* Pointers to each column of the w-wide arrays. */
	repfnz_col = repfnz;
	dense_col = dense;
	col_marker = spa_marker;
	col_lsub = panel_lsub;

	/* Wait for the supernode, and collect wait-time statistics. */
	if ( pxgstrf_shared->spin_locks[kcol] ) {
#ifdef PROFILE
	    TIC(t1);
#endif
	    await( &pxgstrf_shared->spin_locks[kcol] );

#ifdef PROFILE
	    TOC(t2, t1);
	    Gstat->panstat[jcol].pipewaits++;
	    Gstat->panstat[jcol].spintime += t2;
	    Gstat->procstat[pnum].spintime += t2;
#ifdef DOPRINT
	    PRINT_SPIN_TIME(1);
#endif
#endif		
	}
	
        /* Find leading column "fsupc" in the supernode that
           contains column "kcol" */
	ksupno = supno[kcol];
	fsupc = kcol;

#if ( DEBUGlevel>=2 )
	/*if (jcol >= LOCOL && jcol <= HICOL)    */
  if ( jcol==BADCOL )
    printf("(%d) psgstrf_panel_bmod[1] kcol %d, ksupno %d, fsupc %d\n",
	   pnum, kcol, ksupno, fsupc);
#endif
	
	/* Wait for the whole supernode to become "done" --
	   climb up e-tree one column at a time */
	do {
	    krep = SUPER_REP( ksupno );
	    kcol = etree[kcol];
	    if ( kcol >= jcol ) break;
	    if ( pxgstrf_shared->spin_locks[kcol] ) {
#ifdef PROFILE
		TIC(t1);
#endif
		await ( &pxgstrf_shared->spin_locks[kcol] );

#ifdef PROFILE
		TOC(t2, t1);
		Gstat->panstat[jcol].pipewaits++;
		Gstat->panstat[jcol].spintime += t2;
		Gstat->procstat[pnum].spintime += t2;
#ifdef DOPRINT
		PRINT_SPIN_TIME(2);
#endif
#endif		
	    }

	    dadsupno = supno[kcol];

#if ( DEBUGlevel>=2 )
	    /*if (jcol >= LOCOL && jcol <= HICOL)*/
if ( jcol==BADCOL )
    printf("(%d) psgstrf_panel_bmod[2] krep %d, dad=kcol %d, dadsupno %d\n",
	   pnum, krep, kcol, dadsupno);
#endif	    

	} while ( dadsupno == ksupno );

	/* Append the new segment into segrep[*]. After column_bmod(),
	   copy_to_ucol() will use them. */
	segrep[*nseg] = krep;
        ++(*nseg);
        
	/* Determine repfnz[krep, w] for each column in the panel */
	for (jj = jcol; jj < jcol + w; ++jj, dense_col += m, 
	       repfnz_col += m, col_marker += m, col_lsub += m) {
	    /*
	     * Note: relaxed supernode may not form a path on the e-tree,
	     *       but its column numbers are contiguous.
	     */
#ifdef SCATTER_FOUND
 	    for (kcol = fsupc; kcol <= krep; ++kcol) {
		if ( col_marker[inv_perm_r[kcol]] == jj ) {
		    repfnz_col[krep] = kcol;

 		    /* Append new fills in panel_lsub[*,jj]. */
		    j = w_lsub_end[jj - jcol];
/*#pragma ivdep*/
		    for (k = xlsub[krep]; k < xlsub_end[krep]; ++k) {
			ksub = lsub[k];
			if ( col_marker[ksub] != jj ) {
			    col_marker[ksub] = jj;
			    col_lsub[j++] = ksub;
			}
		    }
		    w_lsub_end[jj - jcol] = j;

		    break; /* found the leading nonzero in the segment */
		}
	    }

#else
	    for (kcol = fsupc; kcol <= krep; ++kcol) {
                if ( dense_col[inv_perm_r[kcol]] != 0.0 ) {
		    repfnz_col[krep] = kcol;
		    break; /* Found the leading nonzero in the U-segment */
		}
	    }

	    /* In this case, we always treat the L-subscripts of the 
	       busy s-node [kcol : krep] as the new fills, even if the
	       corresponding U-segment may be all zero. */

	    /* Append new fills in panel_lsub[*,jj]. */
	    j = w_lsub_end[jj - jcol];
/*#pragma ivdep*/
	    for (k = xlsub[krep]; k < xlsub_end[krep]; ++k) {
	        ksub = lsub[k];
		if ( col_marker[ksub] != jj ) {
		    col_marker[ksub] = jj;
		    col_lsub[j++] = ksub;
		}
	    }
	    w_lsub_end[jj - jcol] = j;
#endif

#if ( DEBUGlevel>=2 )
if (jj == BADCOL) {
printf("(%d) psgstrf_panel_bmod[fills]: jj %d, repfnz_col[%d] %d, inv_pr[%d] %d\n",
	   pnum, jj, krep, repfnz_col[krep], fsupc, inv_perm_r[fsupc]);
printf("(%d) psgstrf_panel_bmod[fills] xlsub %d, xlsub_end %d, #lsub[%d] %d\n",
       pnum,xlsub[krep],xlsub_end[krep],krep, xlsub_end[krep]-xlsub[krep]);
}
#endif	   
	} /* for jj ... */

#ifdef PREDICT_OPT
	pmod = Gstat->procstat[pnum].fcops;
#endif
	
	/* Perform sup-panel updates - use combined 1D + 2D updates. */
	nsupc = krep - fsupc + 1;
	nsupr = xlsub_end[fsupc] - xlsub[fsupc];
	nrow = nsupr - nsupc;
	if ( nsupc >= colblk && nrow >= rowblk ) {
	    /* 2-D block update */
#ifdef GEMV2
	    psgstrf_bmod2D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr,
			       nrow, repfnz, panel_lsub, w_lsub_end, 
			       spa_marker, dense, tempv, Glu, Gstat);
#else
	    psgstrf_bmod2D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow,
			   repfnz, panel_lsub, w_lsub_end, spa_marker,
			   dense, tempv, Glu, Gstat);
#endif
	} else {
	    /* 1-D block update */
#ifdef GEMV2
	    psgstrf_bmod1D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr,
			       nrow, repfnz, panel_lsub, w_lsub_end, 
			       spa_marker, dense, tempv, Glu, Gstat);
#else
	    psgstrf_bmod1D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow,
			   repfnz, panel_lsub, w_lsub_end, spa_marker,
			   dense, tempv, Glu, Gstat);
#endif
	}

#ifdef PREDICT_OPT
	pmod = Gstat->procstat[pnum].fcops - pmod;
	kid = (pxgstrf_shared->pan_status[krep].size > 0) ?
	       krep : (krep + pxgstrf_shared->pan_status[krep].size);
	desc_eft[ndesc].eft = cp_panel[kid].est + cp_panel[kid].pdiv;
	desc_eft[ndesc++].pmod = pmod;
#endif
	
#if ( DEBUGlevel>=2 )
if (jcol == BADPAN)
    printf("(%d) After busy update: dense_col[%d] %.10f\n",
	   pnum, BADROW, dense[dbg_addr+BADROW]);
#endif
	
	/* Go to the parent of "krep" */
	kcol = etree[krep];
	
    } /* while kcol < jcol ... */
    
#if ( DEBUGlevel>=2 )
    /*if (jcol >= LOCOL && jcol <= HICOL)*/
if ( jcol==BADCOL )
    check_panel_dfs_list(pnum, "after-busy", jcol, *nseg, segrep);
#endif

#ifdef PREDICT_OPT
    qsort(desc_eft, ndesc, sizeof(desc_eft_t), (int(*)())numcomp);
    pmod_eft = 0;
    for (j = 0; j < ndesc; ++j) {
	pmod_eft = SUPERLU_MAX( pmod_eft, desc_eft[j].eft ) + desc_eft[j].pmod;
    }

    if ( ndesc == 0 ) {
	/* No modifications from descendants */
	pmod_eft = 0;
	for (j = cp_firstkid[jcol]; j != EMPTY; j = cp_nextkid[j]) {
	    kid = (pxgstrf_shared->pan_status[j].size > 0) ? 
			j : (j + pxgstrf_shared->pan_status[j].size);
	    pmod_eft = SUPERLU_MAX( pmod_eft,
			   	cp_panel[kid].est + cp_panel[kid].pdiv );
	}
    }
    
    cp_panel[jcol].est = pmod_eft;
    
#endif

}
Esempio n. 27
0
int dgst01(int m, int n, SuperMatrix *A, SuperMatrix *L, 
		SuperMatrix *U, int *perm_c, int *perm_r, double *resid)
{
/* 
    Purpose   
    =======   

    DGST01 reconstructs a matrix A from its L*U factorization and   
    computes the residual   
       norm(L*U - A) / ( N * norm(A) * EPS ),   
    where EPS is the machine epsilon.   

    Arguments   
    ==========   

    M       (input) INT   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INT   
            The number of columns of the matrix A.  N >= 0.   

    A       (input) SuperMatrix *, dimension (A->nrow, A->ncol)
            The original M x N matrix A.   

    L       (input) SuperMatrix *, dimension (L->nrow, L->ncol)
            The factor matrix L.

    U       (input) SuperMatrix *, dimension (U->nrow, U->ncol)
            The factor matrix U.

    perm_c (input) INT array, dimension (N)
            The column permutation from DGSTRF.   

    perm_r  (input) INT array, dimension (M)
            The pivot indices from DGSTRF.   

    RESID   (output) DOUBLE*
            norm(L*U - A) / ( N * norm(A) * EPS )   

    ===================================================================== 
*/  

    /* Local variables */
    double zero = 0.0;
    int i, j, k, arow, lptr,isub,  urow, superno, fsupc, u_part;
    double utemp, comp_temp;
    double anorm, tnorm, cnorm;
    double eps;
    double *work;
    SCformat *Lstore;
    NCformat *Astore, *Ustore;
    double *Aval, *Lval, *Uval;
    int *colbeg, *colend;

    /* Function prototypes */
    extern double dlangs(char *, SuperMatrix *);

    /* Quick exit if M = 0 or N = 0. */

    if (m <= 0 || n <= 0) {
	*resid = 0.f;
	return 0;
    }

    work = (double *)doubleCalloc(m);

    Astore = A->Store;
    Aval = Astore->nzval;
    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;

    colbeg = intMalloc(n);
    colend = intMalloc(n);

        for (i = 0; i < n; i++) {
            colbeg[perm_c[i]] = Astore->colptr[i]; 
	    colend[perm_c[i]] = Astore->colptr[i+1];
        }
	
    /* Determine EPS and the norm of A. */
    eps = dmach("Epsilon");
    anorm = dlangs("1", A);
    cnorm = 0.;

    /* Compute the product L*U, one column at a time */
    for (k = 0; k < n; ++k) {

	/* The U part outside the rectangular supernode */
        for (i = U_NZ_START(k); i < U_NZ_START(k+1); ++i) {
	    urow = U_SUB(i);
	    utemp = Uval[i];
            superno = Lstore->col_to_sup[urow];
	    fsupc = L_FST_SUPC(superno);
	    u_part = urow - fsupc + 1;
	    lptr = L_SUB_START(fsupc) + u_part;
            work[L_SUB(lptr-1)] -= utemp;   /* L_ii = 1 */
	    for (j = L_NZ_START(urow) + u_part; j < L_NZ_START(urow+1); ++j) {
                isub = L_SUB(lptr);
	        work[isub] -= Lval[j] * utemp;
	        ++lptr;
	    }
	}

	/* The U part inside the rectangular supernode */
	superno = Lstore->col_to_sup[k];
	fsupc = L_FST_SUPC(superno);
	urow = L_NZ_START(k);
	for (i = fsupc; i <= k; ++i) {
	    utemp = Lval[urow++];
	    u_part = i - fsupc + 1;
	    lptr = L_SUB_START(fsupc) + u_part;
            work[L_SUB(lptr-1)] -= utemp;   /* L_ii = 1 */
	    for (j = L_NZ_START(i)+u_part; j < L_NZ_START(i+1); ++j) {
                isub = L_SUB(lptr);
	        work[isub] -= Lval[j] * utemp;
	        ++lptr;
	    }
	}

	/* Now compute A[k] - (L*U)[k] (Both matrices may be permuted.) */

	for (i = colbeg[k]; i < colend[k]; ++i) {
	    arow = Astore->rowind[i];
	    work[perm_r[arow]] += Aval[i];
        }

	/* Now compute the 1-norm of the column vector work */
        tnorm = 0.;
	for (i = 0; i < m; ++i) {
	    tnorm += fabs(work[i]);
	    work[i] = zero;
	}
	cnorm = SUPERLU_MAX(tnorm, cnorm);
    }

    *resid = cnorm;

    if (anorm <= 0.f) {
	if (*resid != 0.f) {
	    *resid = 1.f / eps;
	}
    } else {
	*resid = *resid / (float) n / anorm / eps;
    }

    SUPERLU_FREE(work);
    SUPERLU_FREE(colbeg);
    SUPERLU_FREE(colend);
    return 0;

/*     End of DGST01 */

} /* dgst01_ */
Esempio n. 28
0
/*! \brief
 *
 * <pre>
 * Purpose
 * =======
 *
 * PDGSRFS improves the computed solution to a system of linear
 * equations and provides error bounds and backward error estimates
 * for the solution.
 *
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * A      (input) SuperMatrix*
 *	  The original matrix A, or the scaled A if equilibration was done.
 *        A is also permuted into diag(R)*A*diag(C)*Pc'. The type of A can be:
 *        Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE.
 *
 * anorm  (input) double
 *        The norm of the original matrix A, or the scaled A if
 *        equilibration was done.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from pdgstrf for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t'.
 *
 * ScalePermstruct (input) ScalePermstruct_t* (global)
 *         The data structure to store the scaling and permutation vectors
 *         describing the transformations performed to the matrix A.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_defs.h for the definition of 'gridinfo_t'.
 *
 * B      (input) double* (local)
 *        The m_loc-by-NRHS right-hand side matrix of the possibly
 *        equilibrated system. That is, B may be overwritten by diag(R)*B.
 *
 * ldb    (input) int (local)
 *        Leading dimension of matrix B.
 *
 * X      (input/output) double* (local)
 *        On entry, the solution matrix Y, as computed by PDGSTRS, of the
 *            transformed system A1*Y = Pc*Pr*B. where
 *            A1 = Pc*Pr*diag(R)*A*diag(C)*Pc' and Y = Pc*diag(C)^(-1)*X.
 *        On exit, the improved solution matrix Y.
 *
 *        In order to obtain the solution X to the original system,
 *        Y should be permutated by Pc^T, and premultiplied by diag(C)
 *        if DiagScale = COL or BOTH.
 *        This must be done after this routine is called.
 *
 * ldx    (input) int (local)
 *        Leading dimension of matrix X.
 *
 * nrhs   (input) int
 *        Number of right-hand sides.
 *
 * SOLVEstruct (output) SOLVEstruct_t* (global)
 *        Contains the information for the communication during the
 *        solution phase.
 *
 * berr   (output) double*, dimension (nrhs)
 *         The componentwise relative backward error of each solution
 *         vector X(j) (i.e., the smallest relative change in
 *         any element of A or B that makes X(j) an exact solution).
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the refinement steps.
 *        See util.h for the definition of SuperLUStat_t.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *
 * Internal Parameters
 * ===================
 *
 * ITMAX is the maximum number of steps of iterative refinement.
 * </pre>
 */
void
pdgsrfs(int_t n, SuperMatrix *A, double anorm, LUstruct_t *LUstruct,
        ScalePermstruct_t *ScalePermstruct, gridinfo_t *grid,
        double *B, int_t ldb, double *X, int_t ldx, int nrhs,
        SOLVEstruct_t *SOLVEstruct,
        double *berr, SuperLUStat_t *stat, int *info)
{
#define ITMAX 20

    Glu_persist_t *Glu_persist = LUstruct->Glu_persist;
    LocalLU_t *Llu = LUstruct->Llu;
    double *ax, *R, *dx, *temp, *work, *B_col, *X_col;
    int_t count, i, j, lwork, nz;
    int   iam;
    double eps, lstres;
    double s, safmin, safe1, safe2;

    /* Data structures used by matrix-vector multiply routine. */
    pdgsmv_comm_t *gsmv_comm = SOLVEstruct->gsmv_comm;
    NRformat_loc *Astore;
    int_t        m_loc, fst_row;


    /* Initialization. */
    Astore = (NRformat_loc *) A->Store;
    m_loc = Astore->m_loc;
    fst_row = Astore->fst_row;
    iam = grid->iam;

    /* Test the input parameters. */
    *info = 0;
    if ( n < 0 ) *info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc
              || A->Dtype != SLU_D || A->Mtype != SLU_GE )
        *info = -2;
    else if ( ldb < SUPERLU_MAX(0, m_loc) ) *info = -10;
    else if ( ldx < SUPERLU_MAX(0, m_loc) ) *info = -12;
    else if ( nrhs < 0 ) *info = -13;
    if (*info != 0) {
        i = -(*info);
        pxerbla("PDGSRFS", grid, i);
        return;
    }

    /* Quick return if possible. */
    if ( n == 0 || nrhs == 0 ) {
        return;
    }


#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Enter pdgsrfs()");
#endif

    lwork = 2 * m_loc;  /* For ax/R/dx and temp */
    if ( !(work = doubleMalloc_dist(lwork)) )
        ABORT("Malloc fails for work[]");
    ax = R = dx = work;
    temp = ax + m_loc;

    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz     = A->ncol + 1;
    eps    = dmach("Epsilon");
    safmin = dmach("Safe minimum");

    /* Set SAFE1 essentially to be the underflow threshold times the
       number of additions in each row. */
    safe1  = nz * safmin;
    safe2  = safe1 / eps;

#if ( DEBUGlevel>=1 )
    if ( !iam ) printf(".. eps = %e\tanorm = %e\tsafe1 = %e\tsafe2 = %e\n",
                           eps, anorm, safe1, safe2);
#endif

    /* Do for each right-hand side ... */
    for (j = 0; j < nrhs; ++j) {
        count = 0;
        lstres = 3.;
        B_col = &B[j*ldb];
        X_col = &X[j*ldx];

        while (1) { /* Loop until stopping criterion is satisfied. */

            /* Compute residual R = B - op(A) * X,
               where op(A) = A, A**T, or A**H, depending on TRANS. */

            /* Matrix-vector multiply. */
            pdgsmv(0, A, grid, gsmv_comm, X_col, ax);

            /* Compute residual, stored in R[]. */
            for (i = 0; i < m_loc; ++i) R[i] = B_col[i] - ax[i];

            /* Compute abs(op(A))*abs(X) + abs(B), stored in temp[]. */
            pdgsmv(1, A, grid, gsmv_comm, X_col, temp);
            for (i = 0; i < m_loc; ++i) temp[i] += fabs(B_col[i]);

            s = 0.0;
            for (i = 0; i < m_loc; ++i) {
                if ( temp[i] > safe2 ) {
                    s = SUPERLU_MAX(s, fabs(R[i]) / temp[i]);
                } else if ( temp[i] != 0.0 ) {
                    /* Adding SAFE1 to the numerator guards against
                       spuriously zero residuals (underflow). */
                    s = SUPERLU_MAX(s, (safe1 + fabs(R[i])) /temp[i]);
                }
                /* If temp[i] is exactly 0.0 (computed by PxGSMV), then
                   we know the true residual also must be exactly 0.0. */
            }
            MPI_Allreduce( &s, &berr[j], 1, MPI_DOUBLE, MPI_MAX, grid->comm );

#if ( PRNTlevel>= 1 )
            if ( !iam )
                printf("(%2d) .. Step " IFMT ": berr[j] = %e\n", iam, count, berr[j]);
#endif
            if ( berr[j] > eps && berr[j] * 2 <= lstres && count < ITMAX ) {
                /* Compute new dx. */
                pdgstrs(n, LUstruct, ScalePermstruct, grid,
                        dx, m_loc, fst_row, m_loc, 1,
                        SOLVEstruct, stat, info);

                /* Update solution. */
                for (i = 0; i < m_loc; ++i) X_col[i] += dx[i];

                lstres = berr[j];
                ++count;
            } else {
                break;
            }
        } /* end while */

        stat->RefineSteps = count;

    } /* for j ... */

    /* Deallocate storage. */
    SUPERLU_FREE(work);

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Exit pdgsrfs()");
#endif

} /* PDGSRFS */
Esempio n. 29
0
void
cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
       int *perm_r, int *perm_c, equed_t equed, float *R, float *C,
       SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr,
       Gstat_t *Gstat, int *info)
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 *
 * Purpose
 * =======   
 *
 * cgsrfs improves the computed solution to a system of linear
 * equations and provides error bounds and backward error estimates for
 * the solution.
 *
 * See supermatrix.h for the definition of 'SuperMatrix' structure.
 *
 * Arguments
 * =========
 *
 * trans   (input) trans_t
 *         Specifies the form of the system of equations:
 *         = NOTRANS:  A * X = B     (No transpose)
 *         = TRANS:    A**T * X = B  (Transpose)
 *         = CONJ:     A**H * X = B  (Conjugate transpose = Transpose)
 *
 * A       (input) SuperMatrix*
 *         The original matrix A in the system, or the scaled A if
 *         equilibration was done. The type of A can be:
 *         Stype = NC, Dtype = _D, Mtype = GE.
 *
 * L       (input) SuperMatrix*
 *         The factor L from the factorization Pr*A*Pc=L*U. Use
 *         compressed row subscripts storage for supernodes,
 *         i.e., L has types: Stype = SCP, Dtype = _D, Mtype = TRLU.
 *
 * U       (input) SuperMatrix*
 *         The factor U from the factorization Pr*A*Pc=L*U as computed by
 *         dgstrf(). Use column-wise storage scheme,
 *         i.e., U has types: Stype = NCP, Dtype = _D, Mtype = TRU.
 *
 * perm_r  (input) int*, dimension (A->nrow)
 *         Row permutation vector, which defines the permutation matrix Pr;
 *         perm_r[i] = j means row i of A is in position j in Pr*A.
 *
 * perm_c  (input) int*, dimension (A->ncol)
 *         Column permutation vector, which defines the
 *         permutation matrix Pc; perm_c[i] = j means column i of A is 
 *         in position j in A*Pc.
 *
 * equed   (input) equed_t
 *         Specifies the form of equilibration that was done.
 *         = NOEQUIL: No equilibration.
 *         = ROW:  Row equilibration, i.e., A was premultiplied by diag(R).
 *         = COL:  Column equilibration, i.e., A was postmultiplied by
 *                 diag(C).
 *         = BOTH: Both row and column equilibration, i.e., A was replaced
 *                 by diag(R)*A*diag(C).
 *
 * R       (input) double*, dimension (A->nrow)
 *         The row scale factors for A.
 *         If equed = ROW or BOTH, A is premultiplied by diag(R).
 *         If equed = NOEQUIL or COL, R is not accessed.
 *
 * C       (input) double*, dimension (A->ncol)
 *         The column scale factors for A.
 *         If equed = COL or BOTH, A is postmultiplied by diag(C).
 *         If equed = NOEQUIL or ROW, C is not accessed.
 *
 * B       (input) SuperMatrix*
 *         B has types: Stype = DN, Dtype = _D, Mtype = GE.
 *         The right hand side matrix B.
 *
 * X       (input/output) SuperMatrix*
 *         X has types: Stype = DN, Dtype = _D, Mtype = GE.
 *         On entry, the solution matrix X, as computed by dgstrs().
 *         On exit, the improved solution matrix X.
 *
 * FERR    (output) double*, dimension (B->ncol)
 *         The estimated forward error bound for each solution vector
 *         X(j) (the j-th column of the solution matrix X).
 *         If XTRUE is the true solution corresponding to X(j), FERR(j)
 *         is an estimated upper bound for the magnitude of the largest
 *         element in (X(j) - XTRUE) divided by the magnitude of the
 *         largest element in X(j).  The estimate is as reliable as
 *         the estimate for RCOND, and is almost always a slight
 *         overestimate of the true error.
 *
 * BERR    (output) double*, dimension (B->ncol)
 *         The componentwise relative backward error of each solution
 *         vector X(j) (i.e., the smallest relative change in
 *         any element of A or B that makes X(j) an exact solution).
 *
 * info    (output) int*
 *         = 0:  successful exit
 *         < 0:  if INFO = -i, the i-th argument had an illegal value
 *
 * Internal Parameters
 * ===================
 *
 * ITMAX is the maximum number of steps of iterative refinement.
 *
 */

#define ITMAX 5
    
    /* Table of constant values */
    int    ione = 1;
    complex ndone = {-1., 0.};
    complex done = {1., 0.};
    
    /* Local variables */
    NCformat *Astore;
    complex   *Aval;
    SuperMatrix Bjcol;
    DNformat *Bstore, *Xstore, *Bjcol_store;
    complex   *Bmat, *Xmat, *Bptr, *Xptr;
    int      kase;
    float   safe1, safe2;
    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
    int      ldb, ldx, nrhs;
    float   s, xk, lstres, eps, safmin;
    char     transc[1];
    trans_t  transt;
    complex   *work;
    float   *rwork;
    int      *iwork;
    extern double slamch_(char *);
    extern int clacon_(int *, complex *, complex *, float *, int *);
#ifdef _CRAY
    extern int CCOPY(int *, complex *, int *, complex *, int *);
    extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *);
#else
    extern int ccopy_(int *, complex *, int *, complex *, int *);
    extern int caxpy_(int *, complex *, complex *, int *, complex *, int *);
#endif

    Astore = A->Store;
    Aval   = Astore->nzval;
    Bstore = B->Store;
    Xstore = X->Store;
    Bmat   = Bstore->nzval;
    Xmat   = Xstore->nzval;
    ldb    = Bstore->lda;
    ldx    = Xstore->lda;
    nrhs   = B->ncol;
    
    /* Test the input parameters */
    *info = 0;
    notran = (trans == NOTRANS);
    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
	      A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE )
	*info = -2;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
 	      L->Stype != SLU_SCP || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
	*info = -3;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
 	      U->Stype != SLU_NCP || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
	*info = -4;
    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
 	      B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
        *info = -10;
    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
 	      X->Stype != SLU_DN || X->Dtype != SLU_C || X->Mtype != SLU_GE )
	*info = -11;
    if (*info != 0) {
	i = -(*info);
	xerbla_("cgsrfs", &i);
	return;
    }

    /* Quick return if possible */
    if ( A->nrow == 0 || nrhs == 0) {
	for (j = 0; j < nrhs; ++j) {
	    ferr[j] = 0.;
	    berr[j] = 0.;
	}
	return;
    }

    rowequ = (equed == ROW) || (equed == BOTH);
    colequ = (equed == COL) || (equed == BOTH);
    
    /* Allocate working space */
    work = complexMalloc(2*A->nrow);
    rwork = (float *) SUPERLU_MALLOC( (size_t) A->nrow * sizeof(float) );
    iwork = intMalloc(A->nrow);
    if ( !work || !rwork || !iwork ) 
        SUPERLU_ABORT("Malloc fails for work/rwork/iwork.");
    
    if ( notran ) {
	*(unsigned char *)transc = 'N';
        transt = TRANS;
    } else {
	*(unsigned char *)transc = 'T';
	transt = NOTRANS;
    }

    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz     = A->ncol + 1;
    eps    = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    /* Set SAFE1 essentially to be the underflow threshold times the
       number of additions in each row. */
    safe1  = nz * safmin;
    safe2  = safe1 / eps;

    /* Compute the number of nonzeros in each row (or column) of A */
    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
    if ( notran ) {
	for (k = 0; k < A->ncol; ++k)
	    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
		++iwork[Astore->rowind[i]];
    } else {
	for (k = 0; k < A->ncol; ++k)
	    iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
    }	

    /* Copy one column of RHS B into Bjcol. */
    Bjcol.Stype = B->Stype;
    Bjcol.Dtype = B->Dtype;
    Bjcol.Mtype = B->Mtype;
    Bjcol.nrow  = B->nrow;
    Bjcol.ncol  = 1;
    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
    if ( !Bjcol.Store ) SUPERLU_ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
    Bjcol_store = Bjcol.Store;
    Bjcol_store->lda = ldb;
    Bjcol_store->nzval = work; /* address aliasing */
	
    /* Do for each right hand side ... */
    for (j = 0; j < nrhs; ++j) {
	count = 0;
	lstres = 3.;
	Bptr = &Bmat[j*ldb];
	Xptr = &Xmat[j*ldx];

	while (1) { /* Loop until stopping criterion is satisfied. */

	    /* Compute residual R = B - op(A) * X,   
	       where op(A) = A, A**T, or A**H, depending on TRANS. */
	    
#ifdef _CRAY
	    CCOPY(&A->nrow, Bptr, &ione, work, &ione);
#else
	    ccopy_(&A->nrow, Bptr, &ione, work, &ione);
#endif
	    sp_cgemv(transc, ndone, A, Xptr, ione, done, work, ione);

	    /* Compute componentwise relative backward error from formula 
	       max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
	       where abs(Z) is the componentwise absolute value of the matrix
	       or vector Z.  If the i-th component of the denominator is less
	       than SAFE2, then SAFE1 is added to the i-th component of the   
	       numerator before dividing. */

	    for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
	    
	    /* Compute abs(op(A))*abs(X) + abs(B). */
	    if (notran) {
		for (k = 0; k < A->ncol; ++k) {
		    xk = c_abs1( &Xptr[k] );
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
			rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
		}
	    } else {
		for (k = 0; k < A->ncol; ++k) {
		    s = 0.;
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
			irow = Astore->rowind[i];
			s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]);
		    }
		    rwork[k] += s;
		}
	    }
	    s = 0.;
	    for (i = 0; i < A->nrow; ++i) {
		if (rwork[i] > safe2) {
		    s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] );
		} else if ( rwork[i] != 0.0 ) {
		    s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] );
                }
                /* If rwork[i] is exactly 0.0, then we know the true 
                   residual also must be exactly 0.0. */
	    }
	    berr[j] = s;

	    /* Test stopping criterion. Continue iterating if   
	       1) The residual BERR(J) is larger than machine epsilon, and   
	       2) BERR(J) decreased by at least a factor of 2 during the   
	          last iteration, and   
	       3) At most ITMAX iterations tried. */

	    if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
		/* Update solution and try again. */
		cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, Gstat, info);
		
#ifdef _CRAY
		CAXPY(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#else
		caxpy_(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#endif
		lstres = berr[j];
		++count;
	    } else {
		break;
	    }
        
	} /* end while */

	/* Bound error from formula:
	   norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
	   ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(op(A)) is the inverse of op(A)   
            abs(Z) is the componentwise absolute value of the matrix or
	       vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
          is incremented by SAFE1 if the i-th component of   
          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   

          Use CLACON to estimate the infinity-norm of the matrix   
             inv(op(A)) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
	
	for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
	
	/* Compute abs(op(A))*abs(X) + abs(B). */
	if ( notran ) {
	    for (k = 0; k < A->ncol; ++k) {
		xk = c_abs1( &Xptr[k] );
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
		    rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
	    }
	} else {
	    for (k = 0; k < A->ncol; ++k) {
		s = 0.;
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
		    irow = Astore->rowind[i];
		    xk = c_abs1( &Xptr[irow] );
		    s += c_abs1(&Aval[i]) * xk;
		}
		rwork[k] += s;
	    }
	}
	
	for (i = 0; i < A->nrow; ++i)
	    if (rwork[i] > safe2)
		rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
	    else
		rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
	kase = 0;

	do {
	    clacon_(&A->nrow, &work[A->nrow], work,
		    &ferr[j], &kase);
	    if (kase == 0) break;

	    if (kase == 1) {
		/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], C[i]);
	            }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->nrow; ++i) {
		        cs_mult(&work[i], &work[i], R[i]);
                    }

		cgstrs (transt, L, U, perm_r, perm_c, &Bjcol, Gstat, info);
		
		for (i = 0; i < A->nrow; ++i) {
		    cs_mult(&work[i], &work[i], rwork[i]);
	 	}
	    } else {
		/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
		for (i = 0; i < A->nrow; ++i) {
		    cs_mult(&work[i], &work[i], rwork[i]);
		}
		
		cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, Gstat, info);
		
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], C[i]);
		    }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], R[i]);  
		    }
	    }
	    
	} while ( kase != 0 );

	/* Normalize error. */
	lstres = 0.;
 	if ( notran && colequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) );
  	} else if ( !notran && rowequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) );
	} else {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) );
	}
	if ( lstres != 0. )
	    ferr[j] /= lstres;

    } /* for each RHS j ... */
    
    SUPERLU_FREE(work);
    SUPERLU_FREE(rwork);
    SUPERLU_FREE(iwork);
    SUPERLU_FREE(Bjcol.Store);

    return;

} /* cgsrfs */
Esempio n. 30
0
float clangs(char *norm, SuperMatrix *A)
{
/* 
    Purpose   
    =======   

    CLANGS returns the value of the one norm, or the Frobenius norm, or 
    the infinity norm, or the element of largest absolute value of a 
    real matrix A.   

    Description   
    ===========   

    CLANGE returns the value   

       CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
                (   
                ( norm1(A),         NORM = '1', 'O' or 'o'   
                (   
                ( normI(A),         NORM = 'I' or 'i'   
                (   
                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   

    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   

    Arguments   
    =========   

    NORM    (input) CHARACTER*1   
            Specifies the value to be returned in CLANGE as described above.   
    A       (input) SuperMatrix*
            The M by N sparse matrix A. 

   ===================================================================== 
*/
    
    /* Local variables */
    NCformat *Astore;
    complex   *Aval;
    int      i, j, irow;
    float   value, sum;
    float   *rwork;

    Astore = A->Store;
    Aval   = Astore->nzval;
    
    if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) {
	value = 0.;
	
    } else if (lsame_(norm, "M")) {
	/* Find max(abs(A(i,j))). */
	value = 0.;
	for (j = 0; j < A->ncol; ++j)
	    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
		value = SUPERLU_MAX( value, slu_c_abs( &Aval[i]) );
	
    } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') {
	/* Find norm1(A). */
	value = 0.;
	for (j = 0; j < A->ncol; ++j) {
	    sum = 0.;
	    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) 
		sum += slu_c_abs( &Aval[i] );
	    value = SUPERLU_MAX(value,sum);
	}
	
    } else if (lsame_(norm, "I")) {
	/* Find normI(A). */
	if ( !(rwork = (float *) SUPERLU_MALLOC(A->nrow * sizeof(float))) )
	    ABORT("SUPERLU_MALLOC fails for rwork.");
	for (i = 0; i < A->nrow; ++i) rwork[i] = 0.;
	for (j = 0; j < A->ncol; ++j)
	    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) {
		irow = Astore->rowind[i];
		rwork[irow] += slu_c_abs( &Aval[i] );
	    }
	value = 0.;
	for (i = 0; i < A->nrow; ++i)
	    value = SUPERLU_MAX(value, rwork[i]);
	
	SUPERLU_FREE (rwork);
	
    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
	/* Find normF(A). */
	ABORT("Not implemented.");
    } else
	ABORT("Illegal norm specified.");

    return (value);

} /* clangs */