/*! \brief * * <pre> * Purpose * ======= * * PDGSRFS improves the computed solution to a system of linear * equations and provides error bounds and backward error estimates * for the solution. * * Arguments * ========= * * n (input) int (global) * The order of the system of linear equations. * * A (input) SuperMatrix* * The original matrix A, or the scaled A if equilibration was done. * A is also permuted into diag(R)*A*diag(C)*Pc'. The type of A can be: * Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. * * anorm (input) double * The norm of the original matrix A, or the scaled A if * equilibration was done. * * LUstruct (input) LUstruct_t* * The distributed data structures storing L and U factors. * The L and U factors are obtained from pdgstrf for * the possibly scaled and permuted matrix A. * See superlu_ddefs.h for the definition of 'LUstruct_t'. * * ScalePermstruct (input) ScalePermstruct_t* (global) * The data structure to store the scaling and permutation vectors * describing the transformations performed to the matrix A. * * grid (input) gridinfo_t* * The 2D process mesh. It contains the MPI communicator, the number * of process rows (NPROW), the number of process columns (NPCOL), * and my process rank. It is an input argument to all the * parallel routines. * Grid can be initialized by subroutine SUPERLU_GRIDINIT. * See superlu_defs.h for the definition of 'gridinfo_t'. * * B (input) double* (local) * The m_loc-by-NRHS right-hand side matrix of the possibly * equilibrated system. That is, B may be overwritten by diag(R)*B. * * ldb (input) int (local) * Leading dimension of matrix B. * * X (input/output) double* (local) * On entry, the solution matrix Y, as computed by PDGSTRS, of the * transformed system A1*Y = Pc*Pr*B. where * A1 = Pc*Pr*diag(R)*A*diag(C)*Pc' and Y = Pc*diag(C)^(-1)*X. * On exit, the improved solution matrix Y. * * In order to obtain the solution X to the original system, * Y should be permutated by Pc^T, and premultiplied by diag(C) * if DiagScale = COL or BOTH. * This must be done after this routine is called. * * ldx (input) int (local) * Leading dimension of matrix X. * * nrhs (input) int * Number of right-hand sides. * * SOLVEstruct (output) SOLVEstruct_t* (global) * Contains the information for the communication during the * solution phase. * * berr (output) double*, dimension (nrhs) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * stat (output) SuperLUStat_t* * Record the statistics about the refinement steps. * See util.h for the definition of SuperLUStat_t. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * </pre> */ void pdgsrfs(int_t n, SuperMatrix *A, double anorm, LUstruct_t *LUstruct, ScalePermstruct_t *ScalePermstruct, gridinfo_t *grid, double *B, int_t ldb, double *X, int_t ldx, int nrhs, SOLVEstruct_t *SOLVEstruct, double *berr, SuperLUStat_t *stat, int *info) { #define ITMAX 20 Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; double *ax, *R, *dx, *temp, *work, *B_col, *X_col; int_t count, i, j, lwork, nz; int iam; double eps, lstres; double s, safmin, safe1, safe2; /* Data structures used by matrix-vector multiply routine. */ pdgsmv_comm_t *gsmv_comm = SOLVEstruct->gsmv_comm; NRformat_loc *Astore; int_t m_loc, fst_row; /* Initialization. */ Astore = (NRformat_loc *) A->Store; m_loc = Astore->m_loc; fst_row = Astore->fst_row; iam = grid->iam; /* Test the input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if ( ldb < SUPERLU_MAX(0, m_loc) ) *info = -10; else if ( ldx < SUPERLU_MAX(0, m_loc) ) *info = -12; else if ( nrhs < 0 ) *info = -13; if (*info != 0) { i = -(*info); pxerbla("PDGSRFS", grid, i); return; } /* Quick return if possible. */ if ( n == 0 || nrhs == 0 ) { return; } #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgsrfs()"); #endif lwork = 2 * m_loc; /* For ax/R/dx and temp */ if ( !(work = doubleMalloc_dist(lwork)) ) ABORT("Malloc fails for work[]"); ax = R = dx = work; temp = ax + m_loc; /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = A->ncol + 1; eps = dmach("Epsilon"); safmin = dmach("Safe minimum"); /* Set SAFE1 essentially to be the underflow threshold times the number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; #if ( DEBUGlevel>=1 ) if ( !iam ) printf(".. eps = %e\tanorm = %e\tsafe1 = %e\tsafe2 = %e\n", eps, anorm, safe1, safe2); #endif /* Do for each right-hand side ... */ for (j = 0; j < nrhs; ++j) { count = 0; lstres = 3.; B_col = &B[j*ldb]; X_col = &X[j*ldx]; while (1) { /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ /* Matrix-vector multiply. */ pdgsmv(0, A, grid, gsmv_comm, X_col, ax); /* Compute residual, stored in R[]. */ for (i = 0; i < m_loc; ++i) R[i] = B_col[i] - ax[i]; /* Compute abs(op(A))*abs(X) + abs(B), stored in temp[]. */ pdgsmv(1, A, grid, gsmv_comm, X_col, temp); for (i = 0; i < m_loc; ++i) temp[i] += fabs(B_col[i]); s = 0.0; for (i = 0; i < m_loc; ++i) { if ( temp[i] > safe2 ) { s = SUPERLU_MAX(s, fabs(R[i]) / temp[i]); } else if ( temp[i] != 0.0 ) { /* Adding SAFE1 to the numerator guards against spuriously zero residuals (underflow). */ s = SUPERLU_MAX(s, (safe1 + fabs(R[i])) /temp[i]); } /* If temp[i] is exactly 0.0 (computed by PxGSMV), then we know the true residual also must be exactly 0.0. */ } MPI_Allreduce( &s, &berr[j], 1, MPI_DOUBLE, MPI_MAX, grid->comm ); #if ( PRNTlevel>= 1 ) if ( !iam ) printf("(%2d) .. Step " IFMT ": berr[j] = %e\n", iam, count, berr[j]); #endif if ( berr[j] > eps && berr[j] * 2 <= lstres && count < ITMAX ) { /* Compute new dx. */ pdgstrs(n, LUstruct, ScalePermstruct, grid, dx, m_loc, fst_row, m_loc, 1, SOLVEstruct, stat, info); /* Update solution. */ for (i = 0; i < m_loc; ++i) X_col[i] += dx[i]; lstres = berr[j]; ++count; } else { break; } } /* end while */ stat->RefineSteps = count; } /* for j ... */ /* Deallocate storage. */ SUPERLU_FREE(work); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgsrfs()"); #endif } /* PDGSRFS */
int dgst01(int m, int n, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, int *perm_c, int *perm_r, double *resid) { /* Purpose ======= DGST01 reconstructs a matrix A from its L*U factorization and computes the residual norm(L*U - A) / ( N * norm(A) * EPS ), where EPS is the machine epsilon. Arguments ========== M (input) INT The number of rows of the matrix A. M >= 0. N (input) INT The number of columns of the matrix A. N >= 0. A (input) SuperMatrix *, dimension (A->nrow, A->ncol) The original M x N matrix A. L (input) SuperMatrix *, dimension (L->nrow, L->ncol) The factor matrix L. U (input) SuperMatrix *, dimension (U->nrow, U->ncol) The factor matrix U. perm_c (input) INT array, dimension (N) The column permutation from DGSTRF. perm_r (input) INT array, dimension (M) The pivot indices from DGSTRF. RESID (output) DOUBLE* norm(L*U - A) / ( N * norm(A) * EPS ) ===================================================================== */ /* Local variables */ double zero = 0.0; int i, j, k, arow, lptr,isub, urow, superno, fsupc, u_part; double utemp, comp_temp; double anorm, tnorm, cnorm; double eps; double *work; SCformat *Lstore; NCformat *Astore, *Ustore; double *Aval, *Lval, *Uval; int *colbeg, *colend; /* Function prototypes */ extern double dlangs(char *, SuperMatrix *); /* Quick exit if M = 0 or N = 0. */ if (m <= 0 || n <= 0) { *resid = 0.f; return 0; } work = (double *)doubleCalloc(m); Astore = A->Store; Aval = Astore->nzval; Lstore = L->Store; Lval = Lstore->nzval; Ustore = U->Store; Uval = Ustore->nzval; colbeg = intMalloc(n); colend = intMalloc(n); for (i = 0; i < n; i++) { colbeg[perm_c[i]] = Astore->colptr[i]; colend[perm_c[i]] = Astore->colptr[i+1]; } /* Determine EPS and the norm of A. */ eps = dmach("Epsilon"); anorm = dlangs("1", A); cnorm = 0.; /* Compute the product L*U, one column at a time */ for (k = 0; k < n; ++k) { /* The U part outside the rectangular supernode */ for (i = U_NZ_START(k); i < U_NZ_START(k+1); ++i) { urow = U_SUB(i); utemp = Uval[i]; superno = Lstore->col_to_sup[urow]; fsupc = L_FST_SUPC(superno); u_part = urow - fsupc + 1; lptr = L_SUB_START(fsupc) + u_part; work[L_SUB(lptr-1)] -= utemp; /* L_ii = 1 */ for (j = L_NZ_START(urow) + u_part; j < L_NZ_START(urow+1); ++j) { isub = L_SUB(lptr); work[isub] -= Lval[j] * utemp; ++lptr; } } /* The U part inside the rectangular supernode */ superno = Lstore->col_to_sup[k]; fsupc = L_FST_SUPC(superno); urow = L_NZ_START(k); for (i = fsupc; i <= k; ++i) { utemp = Lval[urow++]; u_part = i - fsupc + 1; lptr = L_SUB_START(fsupc) + u_part; work[L_SUB(lptr-1)] -= utemp; /* L_ii = 1 */ for (j = L_NZ_START(i)+u_part; j < L_NZ_START(i+1); ++j) { isub = L_SUB(lptr); work[isub] -= Lval[j] * utemp; ++lptr; } } /* Now compute A[k] - (L*U)[k] (Both matrices may be permuted.) */ for (i = colbeg[k]; i < colend[k]; ++i) { arow = Astore->rowind[i]; work[perm_r[arow]] += Aval[i]; } /* Now compute the 1-norm of the column vector work */ tnorm = 0.; for (i = 0; i < m; ++i) { tnorm += fabs(work[i]); work[i] = zero; } cnorm = SUPERLU_MAX(tnorm, cnorm); } *resid = cnorm; if (anorm <= 0.f) { if (*resid != 0.f) { *resid = 1.f / eps; } } else { *resid = *resid / (float) n / anorm / eps; } SUPERLU_FREE(work); SUPERLU_FREE(colbeg); SUPERLU_FREE(colend); return 0; /* End of DGST01 */ } /* dgst01_ */
/*! \brief <pre> Purpose ======= ZLAQGS_DIST equilibrates a general sparse M by N matrix A using the row and column scaling factors in the vectors R and C. See supermatrix.h for the definition of 'SuperMatrix' structure. Arguments ========= A (input/output) SuperMatrix* On exit, the equilibrated matrix. See EQUED for the form of the equilibrated matrix. The type of A can be: Stype = SLU_NC; Dtype = SLU_Z; Mtype = SLU_GE. R (input) double*, dimension (A->nrow) The row scale factors for A. C (input) double*, dimension (A->ncol) The column scale factors for A. ROWCND (input) double Ratio of the smallest R(i) to the largest R(i). COLCND (input) double Ratio of the smallest C(i) to the largest C(i). AMAX (input) double Absolute value of largest matrix entry. EQUED (output) char* Specifies the form of equilibration that was done. = 'N': No equilibration = 'R': Row equilibration, i.e., A has been premultiplied by diag(R). = 'C': Column equilibration, i.e., A has been postmultiplied by diag(C). = 'B': Both row and column equilibration, i.e., A has been replaced by diag(R) * A * diag(C). Internal Parameters =================== THRESH is a threshold value used to decide if row or column scaling should be done based on the ratio of the row or column scaling factors. If ROWCND < THRESH, row scaling is done, and if COLCND < THRESH, column scaling is done. LARGE and SMALL are threshold values used to decide if row scaling should be done based on the absolute size of the largest matrix element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. ===================================================================== </pre> */ void zlaqgs_dist(SuperMatrix *A, double *r, double *c, double rowcnd, double colcnd, double amax, char *equed) { #define THRESH (0.1) /* Local variables */ NCformat *Astore; doublecomplex *Aval; int i, j, irow; double large, small, cj; double temp; /* Quick return if possible */ if (A->nrow <= 0 || A->ncol <= 0) { *(unsigned char *)equed = 'N'; return; } Astore = (NCformat *) A->Store; Aval = (doublecomplex *) Astore->nzval; /* Initialize LARGE and SMALL. */ small = dmach("Safe minimum") / dmach("Precision"); large = 1. / small; if (rowcnd >= THRESH && amax >= small && amax <= large) { if (colcnd >= THRESH) *(unsigned char *)equed = 'N'; else { /* Column scaling */ for (j = 0; j < A->ncol; ++j) { cj = c[j]; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { zd_mult(&Aval[i], &Aval[i], cj); } } *(unsigned char *)equed = 'C'; } } else if (colcnd >= THRESH) { /* Row scaling, no column scaling */ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; zd_mult(&Aval[i], &Aval[i], r[irow]); } *(unsigned char *)equed = 'R'; } else { /* Row and column scaling */ for (j = 0; j < A->ncol; ++j) { cj = c[j]; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; temp = cj * r[irow]; zd_mult(&Aval[i], &Aval[i], temp); } } *(unsigned char *)equed = 'B'; } return; } /* zlaqgs_dist */
int ilu_zcopy_to_ucol( int jcol, /* in */ int nseg, /* in */ int *segrep, /* in */ int *repfnz, /* in */ int *perm_r, /* in */ doublecomplex *dense, /* modified - reset to zero on return */ int drop_rule,/* in */ milu_t milu, /* in */ double drop_tol, /* in */ int quota, /* maximum nonzero entries allowed */ doublecomplex *sum, /* out - the sum of dropped entries */ int *nnzUj, /* in - out */ GlobalLU_t *Glu, /* modified */ double *work /* working space with minimum size n, * used by the second dropping rule */ ) { /* * Gather from SPA dense[*] to global ucol[*]. */ int ksub, krep, ksupno; int i, k, kfnz, segsze; int fsupc, isub, irow; int jsupno, nextu; int new_next, mem_error; int *xsup, *supno; int *lsub, *xlsub; doublecomplex *ucol; int *usub, *xusub; int nzumax; int m; /* number of entries in the nonzero U-segments */ register double d_max = 0.0, d_min = 1.0 / dmach("Safe minimum"); register double tmp; doublecomplex zero = {0.0, 0.0}; int i_1 = 1; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; ucol = (doublecomplex *) Glu->ucol; usub = Glu->usub; xusub = Glu->xusub; nzumax = Glu->nzumax; *sum = zero; if (drop_rule == NODROP) { drop_tol = -1.0, quota = Glu->n; } jsupno = supno[jcol]; nextu = xusub[jcol]; k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep[k--]; ksupno = supno[krep]; if ( ksupno != jsupno ) { /* Should go into ucol[] */ kfnz = repfnz[krep]; if ( kfnz != EMPTY ) { /* Nonzero U-segment */ fsupc = xsup[ksupno]; isub = xlsub[fsupc] + kfnz - fsupc; segsze = krep - kfnz + 1; new_next = nextu + segsze; while ( new_next > nzumax ) { if ((mem_error = zLUMemXpand(jcol, nextu, UCOL, &nzumax, Glu)) != 0) return (mem_error); ucol = Glu->ucol; if ((mem_error = zLUMemXpand(jcol, nextu, USUB, &nzumax, Glu)) != 0) return (mem_error); usub = Glu->usub; lsub = Glu->lsub; } for (i = 0; i < segsze; i++) { irow = lsub[isub++]; tmp = z_abs1(&dense[irow]); /* first dropping rule */ if (quota > 0 && tmp >= drop_tol) { if (tmp > d_max) d_max = tmp; if (tmp < d_min) d_min = tmp; usub[nextu] = perm_r[irow]; ucol[nextu] = dense[irow]; nextu++; } else { switch (milu) { case SMILU_1: case SMILU_2: z_add(sum, sum, &dense[irow]); break; case SMILU_3: /* *sum += fabs(dense[irow]);*/ sum->r += tmp; break; case SILU: default: break; } #ifdef DEBUG num_drop_U++; #endif } dense[irow] = zero; } } } } /* for each segment... */ xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ m = xusub[jcol + 1] - xusub[jcol]; /* second dropping rule */ if (drop_rule & DROP_SECONDARY && m > quota) { register double tol = d_max; register int m0 = xusub[jcol] + m - 1; if (quota > 0) { if (drop_rule & DROP_INTERP) { d_max = 1.0 / d_max; d_min = 1.0 / d_min; tol = 1.0 / (d_max + (d_min - d_max) * quota / m); } else { i_1 = xusub[jcol]; for (i = 0; i < m; ++i, ++i_1) work[i] = z_abs1(&ucol[i_1]); tol = dqselect(m, work, quota); #if 0 A = &ucol[xusub[jcol]]; for (i = 0; i < m; i++) work[i] = i; qsort(work, m, sizeof(int), _compare_); tol = fabs(usub[xusub[jcol] + work[quota]]); #endif } } for (i = xusub[jcol]; i <= m0; ) { if (z_abs1(&ucol[i]) <= tol) { switch (milu) { case SMILU_1: case SMILU_2: z_add(sum, sum, &ucol[i]); break; case SMILU_3: sum->r += tmp; break; case SILU: default: break; } ucol[i] = ucol[m0]; usub[i] = usub[m0]; m0--; m--; #ifdef DEBUG num_drop_U++; #endif xusub[jcol + 1]--; continue; } i++; } } if (milu == SMILU_2) { sum->r = z_abs1(sum); sum->i = 0.0; } if (milu == SMILU_3) sum->i = 0.0; *nnzUj += m; return 0; }
void dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, int *etree, char *equed, double *R, double *C, SuperMatrix *L, SuperMatrix *U, void *work, int lwork, SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, double *rcond, GlobalLU_t *Glu, mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info) { DNformat *Bstore, *Xstore; double *Bmat, *Xmat; int ldb, ldx, nrhs, n; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int colequ, equil, nofact, notran, rowequ, permc_spec, mc64; trans_t trant; char norm[1]; int i, j, info1; double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; double diag_pivot_thresh; double t0; /* temporary time */ double *utime; int *perm = NULL; /* permutation returned from MC64 */ /* External functions */ extern double dlangs(char *, SuperMatrix *); Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; n = B->nrow; *info = 0; nofact = (options->Fact != FACTORED); equil = (options->Equil == YES); notran = (options->Trans == NOTRANS); mc64 = (options->RowPerm == LargeDiag); if ( nofact ) { *(unsigned char *)equed = 'N'; rowequ = FALSE; colequ = FALSE; } else { rowequ = strncmp(equed, "R", 1)==0 || strncmp(equed, "B", 1)==0; colequ = strncmp(equed, "C", 1)==0 || strncmp(equed, "B", 1)==0; smlnum = dmach("Safe minimum"); /* lamch_("Safe minimum"); */ bignum = 1. / smlnum; } /* Test the input parameters */ if (options->Fact != DOFACT && options->Fact != SamePattern && options->Fact != SamePattern_SameRowPerm && options->Fact != FACTORED && options->Trans != NOTRANS && options->Trans != TRANS && options->Trans != CONJ && options->Equil != NO && options->Equil != YES) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if ( options->Fact == FACTORED && !(rowequ || colequ || strncmp(equed, "N", 1)==0) ) *info = -6; else { if (rowequ) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, R[j]); rcmax = SUPERLU_MAX(rcmax, R[j]); } if (rcmin <= 0.) *info = -7; else if ( A->nrow > 0) rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else rowcnd = 1.; } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, C[j]); rcmax = SUPERLU_MAX(rcmax, C[j]); } if (rcmin <= 0.) *info = -8; else if (A->nrow > 0) colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else colcnd = 1.; } if (*info == 0) { if ( lwork < -1 ) *info = -12; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) *info = -13; else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || (B->ncol != 0 && B->ncol != X->ncol) || X->Stype != SLU_DN || X->Dtype != SLU_D || X->Mtype != SLU_GE ) *info = -14; } } if (*info != 0) { i = -(*info); input_error("dgsisx", &i); return; } /* Initialization for factor parameters */ panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = options->DiagPivotThresh; utime = stat->utime; /* Convert A to SLU_NC format when necessary. */ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ trant = TRANS; notran = 0; } else { trant = NOTRANS; notran = 1; } } else { /* A->Stype == SLU_NC */ trant = options->Trans; AA = A; } if ( nofact ) { register int i, j; NCformat *Astore = AA->Store; int nnz = Astore->nnz; int *colptr = Astore->colptr; int *rowind = Astore->rowind; double *nzval = (double *)Astore->nzval; if ( mc64 ) { t0 = SuperLU_timer_(); if ((perm = intMalloc(n)) == NULL) ABORT("SUPERLU_MALLOC fails for perm[]"); info1 = dldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C); if (info1 != 0) { /* MC64 fails, call dgsequ() later */ mc64 = 0; SUPERLU_FREE(perm); perm = NULL; } else { if ( equil ) { rowequ = colequ = 1; for (i = 0; i < n; i++) { R[i] = exp(R[i]); C[i] = exp(C[i]); } /* scale the matrix */ for (j = 0; j < n; j++) { for (i = colptr[j]; i < colptr[j + 1]; i++) { nzval[i] *= R[rowind[i]] * C[j]; } } *equed = 'B'; } /* permute the matrix */ for (j = 0; j < n; j++) { for (i = colptr[j]; i < colptr[j + 1]; i++) { /*nzval[i] *= R[rowind[i]] * C[j];*/ rowind[i] = perm[rowind[i]]; } } } utime[EQUIL] = SuperLU_timer_() - t0; } if ( !mc64 & equil ) { /* Only perform equilibration, no row perm */ t0 = SuperLU_timer_(); /* Compute row and column scalings to equilibrate the matrix A. */ dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); if ( info1 == 0 ) { /* Equilibrate matrix A. */ dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed); rowequ = strncmp(equed, "R", 1)==0 || strncmp(equed, "B", 1)==0; colequ = strncmp(equed, "C", 1)==0 || strncmp(equed, "B", 1)==0; } utime[EQUIL] = SuperLU_timer_() - t0; } } if ( nofact ) { t0 = SuperLU_timer_(); /* * Gnet column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = COLAMD: approximate minimum degree column ordering * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) get_perm_c(permc_spec, AA, perm_c); utime[COLPERM] = SuperLU_timer_() - t0; t0 = SuperLU_timer_(); sp_preorder(options, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t0; /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); dgsitrf(options, &AC, relax, panel_size, etree, work, lwork, perm_c, perm_r, L, U, Glu, stat, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { mem_usage->total_needed = *info - A->ncol; return; } if ( mc64 ) { /* Fold MC64's perm[] into perm_r[]. */ NCformat *Astore = AA->Store; int nnz = Astore->nnz, *rowind = Astore->rowind; int *perm_tmp, *iperm; if ((perm_tmp = intMalloc(2*n)) == NULL) ABORT("SUPERLU_MALLOC fails for perm_tmp[]"); iperm = perm_tmp + n; for (i = 0; i < n; ++i) perm_tmp[i] = perm_r[perm[i]]; for (i = 0; i < n; ++i) { perm_r[i] = perm_tmp[i]; iperm[perm[i]] = i; } /* Restore A's original row indices. */ for (i = 0; i < nnz; ++i) rowind[i] = iperm[rowind[i]]; SUPERLU_FREE(perm); /* MC64 permutation */ SUPERLU_FREE(perm_tmp); } } if ( options->PivotGrowth ) { if ( *info > 0 ) return; /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U); } if ( options->ConditionNumber ) { /* Estimate the reciprocal of the condition number of A. */ t0 = SuperLU_timer_(); if ( notran ) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = dlangs(norm, AA); dgscon(norm, L, U, anorm, rcond, stat, &info1); utime[RCOND] = SuperLU_timer_() - t0; } if ( nrhs > 0 ) { /* Solve the system */ double *rhs_work; /* Scale and permute the right-hand side if equilibration and permutation from MC64 were performed. */ if ( notran ) { if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < n; ++i) Bmat[i + j*ldb] *= R[i]; } } else if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < n; ++i) { Bmat[i + j*ldb] *= C[i]; } } /* Compute the solution matrix X. */ for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ for (i = 0; i < B->nrow; i++) Xmat[i + j*ldx] = Bmat[i + j*ldb]; t0 = SuperLU_timer_(); dgstrs (trant, L, U, perm_c, perm_r, X, stat, &info1); utime[SOLVE] = SuperLU_timer_() - t0; /* Transform the solution matrix X to a solution of the original system. */ if ( notran ) { if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < n; ++i) { Xmat[i + j*ldx] *= C[i]; } } } else { /* transposed system */ if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= R[i]; } } } } /* end if nrhs > 0 */ if ( options->ConditionNumber ) { /* The matrix is singular to working precision. */ /* if ( *rcond < dlamch_("E") && *info == 0) *info = A->ncol + 1; */ if ( *rcond < dmach("E") && *info == 0) *info = A->ncol + 1; } if ( nofact ) { ilu_dQuerySpace(L, U, mem_usage); Destroy_CompCol_Permuted(&AC); } if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } }
/*! \brief * * <pre> * Purpose * ======= * * DGSRFS improves the computed solution to a system of linear * equations and provides error bounds and backward error estimates for * the solution. * * If equilibration was performed, the system becomes: * (diag(R)*A_original*diag(C)) * X = diag(R)*B_original. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * trans (input) trans_t * Specifies the form of the system of equations: * = NOTRANS: A * X = B (No transpose) * = TRANS: A'* X = B (Transpose) * = CONJ: A**H * X = B (Conjugate transpose) * * A (input) SuperMatrix* * The original matrix A in the system, or the scaled A if * equilibration was done. The type of A can be: * Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_GE. * * L (input) SuperMatrix* * The factor L from the factorization Pr*A*Pc=L*U. Use * compressed row subscripts storage for supernodes, * i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU. * * U (input) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U as computed by * dgstrf(). Use column-wise storage scheme, * i.e., U has types: Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU. * * perm_c (input) int*, dimension (A->ncol) * Column permutation vector, which defines the * permutation matrix Pc; perm_c[i] = j means column i of A is * in position j in A*Pc. * * perm_r (input) int*, dimension (A->nrow) * Row permutation vector, which defines the permutation matrix Pr; * perm_r[i] = j means row i of A is in position j in Pr*A. * * equed (input) Specifies the form of equilibration that was done. * = 'N': No equilibration. * = 'R': Row equilibration, i.e., A was premultiplied by diag(R). * = 'C': Column equilibration, i.e., A was postmultiplied by * diag(C). * = 'B': Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * * R (input) double*, dimension (A->nrow) * The row scale factors for A. * If equed = 'R' or 'B', A is premultiplied by diag(R). * If equed = 'N' or 'C', R is not accessed. * * C (input) double*, dimension (A->ncol) * The column scale factors for A. * If equed = 'C' or 'B', A is postmultiplied by diag(C). * If equed = 'N' or 'R', C is not accessed. * * B (input) SuperMatrix* * B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. * The right hand side matrix B. * if equed = 'R' or 'B', B is premultiplied by diag(R). * * X (input/output) SuperMatrix* * X has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. * On entry, the solution matrix X, as computed by dgstrs(). * On exit, the improved solution matrix X. * if *equed = 'C' or 'B', X should be premultiplied by diag(C) * in order to obtain the solution to the original system. * * FERR (output) double*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * * BERR (output) double*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * stat (output) SuperLUStat_t* * Record the statistics on runtime and floating-point operation count. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * * </pre> */ void dgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, int *perm_c, int *perm_r, char *equed, double *R, double *C, SuperMatrix *B, SuperMatrix *X, double *ferr, double *berr, SuperLUStat_t *stat, int *info) { #define ITMAX 5 /* Table of constant values */ int ione = 1; double ndone = -1.; double done = 1.; /* Local variables */ NCformat *Astore; double *Aval; SuperMatrix Bjcol; DNformat *Bstore, *Xstore, *Bjcol_store; double *Bmat, *Xmat, *Bptr, *Xptr; int kase; double safe1, safe2; int i, j, k, irow, nz, count, notran, rowequ, colequ; int ldb, ldx, nrhs; double s, xk, lstres, eps, safmin; char transc[1]; trans_t transt; double *work; double *rwork; int *iwork; int isave[3]; extern int dlacon2_(int *, double *, double *, int *, double *, int *, int []); #ifdef _CRAY extern int SCOPY(int *, double *, int *, double *, int *); extern int SSAXPY(int *, double *, double *, int *, double *, int *); #else extern int dcopy_(int *, double *, int *, double *, int *); extern int daxpy_(int *, double *, double *, int *, double *, int *); #endif Astore = A->Store; Aval = Astore->nzval; Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; /* Test the input parameters */ *info = 0; notran = (trans == NOTRANS); if ( !notran && trans != TRANS && trans != CONJ ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if ( L->nrow != L->ncol || L->nrow < 0 || L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU ) *info = -3; else if ( U->nrow != U->ncol || U->nrow < 0 || U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU ) *info = -4; else if ( ldb < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) *info = -10; else if ( ldx < SUPERLU_MAX(0, A->nrow) || X->Stype != SLU_DN || X->Dtype != SLU_D || X->Mtype != SLU_GE ) *info = -11; if (*info != 0) { i = -(*info); input_error("dgsrfs", &i); return; } /* Quick return if possible */ if ( A->nrow == 0 || nrhs == 0) { for (j = 0; j < nrhs; ++j) { ferr[j] = 0.; berr[j] = 0.; } return; } rowequ = strncmp(equed, "R", 1)==0 || strncmp(equed, "B", 1)==0; colequ = strncmp(equed, "C", 1)==0 || strncmp(equed, "B", 1)==0; /* Allocate working space */ work = doubleMalloc(2*A->nrow); rwork = (double *) SUPERLU_MALLOC( A->nrow * sizeof(double) ); iwork = intMalloc(2*A->nrow); if ( !work || !rwork || !iwork ) ABORT("Malloc fails for work/rwork/iwork."); if ( notran ) { *(unsigned char *)transc = 'N'; transt = TRANS; } else if ( trans == TRANS ) { *(unsigned char *)transc = 'T'; transt = NOTRANS; } else if ( trans == CONJ ) { *(unsigned char *)transc = 'C'; transt = NOTRANS; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = A->ncol + 1; eps = dmach("Epsilon"); safmin = dmach("Safe minimum"); /* Set SAFE1 essentially to be the underflow threshold times the number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; /* Compute the number of nonzeros in each row (or column) of A */ for (i = 0; i < A->nrow; ++i) iwork[i] = 0; if ( notran ) { for (k = 0; k < A->ncol; ++k) for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) ++iwork[Astore->rowind[i]]; } else { for (k = 0; k < A->ncol; ++k) iwork[k] = Astore->colptr[k+1] - Astore->colptr[k]; } /* Copy one column of RHS B into Bjcol. */ Bjcol.Stype = B->Stype; Bjcol.Dtype = B->Dtype; Bjcol.Mtype = B->Mtype; Bjcol.nrow = B->nrow; Bjcol.ncol = 1; Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store"); Bjcol_store = Bjcol.Store; Bjcol_store->lda = ldb; Bjcol_store->nzval = work; /* address aliasing */ /* Do for each right hand side ... */ for (j = 0; j < nrhs; ++j) { count = 0; lstres = 3.; Bptr = &Bmat[j*ldb]; Xptr = &Xmat[j*ldx]; while (1) { /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ #ifdef _CRAY SCOPY(&A->nrow, Bptr, &ione, work, &ione); #else dcopy_(&A->nrow, Bptr, &ione, work, &ione); #endif sp_dgemv(transc, ndone, A, Xptr, ione, done, work, ione); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th component of the numerator before dividing. */ for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if ( notran ) { for (k = 0; k < A->ncol; ++k) { xk = fabs( Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk; } } else { /* trans = TRANS or CONJ */ for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; s += fabs(Aval[i]) * fabs(Xptr[irow]); } rwork[k] += s; } } s = 0.; for (i = 0; i < A->nrow; ++i) { if (rwork[i] > safe2) { s = SUPERLU_MAX( s, fabs(work[i]) / rwork[i] ); } else if ( rwork[i] != 0.0 ) { /* Adding SAFE1 to the numerator guards against spuriously zero residuals (underflow). */ s = SUPERLU_MAX( s, (safe1 + fabs(work[i])) / rwork[i] ); } /* If rwork[i] is exactly 0.0, then we know the true residual also must be exactly 0.0. */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) { /* Update solution and try again. */ dgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info); #ifdef _CRAY SAXPY(&A->nrow, &done, work, &ione, &Xmat[j*ldx], &ione); #else daxpy_(&A->nrow, &done, work, &ione, &Xmat[j*ldx], &ione); #endif lstres = berr[j]; ++count; } else { break; } } /* end while */ stat->RefineSteps = count; /* Bound error from formula: norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use DLACON2 to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if ( notran ) { for (k = 0; k < A->ncol; ++k) { xk = fabs( Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk; } } else { /* trans == TRANS or CONJ */ for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; xk = fabs( Xptr[irow] ); s += fabs(Aval[i]) * xk; } rwork[k] += s; } } for (i = 0; i < A->nrow; ++i) if (rwork[i] > safe2) rwork[i] = fabs(work[i]) + (iwork[i]+1)*eps*rwork[i]; else rwork[i] = fabs(work[i])+(iwork[i]+1)*eps*rwork[i]+safe1; kase = 0; do { dlacon2_(&A->nrow, &work[A->nrow], work, &iwork[A->nrow], &ferr[j], &kase, isave); if (kase == 0) break; if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */ if ( notran && colequ ) for (i = 0; i < A->ncol; ++i) work[i] *= C[i]; else if ( !notran && rowequ ) for (i = 0; i < A->nrow; ++i) work[i] *= R[i]; dgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info); for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i]; } else { /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */ for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i]; dgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info); if ( notran && colequ ) for (i = 0; i < A->ncol; ++i) work[i] *= C[i]; else if ( !notran && rowequ ) for (i = 0; i < A->ncol; ++i) work[i] *= R[i]; } } while ( kase != 0 ); /* Normalize error. */ lstres = 0.; if ( notran && colequ ) { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, C[i] * fabs( Xptr[i]) ); } else if ( !notran && rowequ ) { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, R[i] * fabs( Xptr[i]) ); } else { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, fabs( Xptr[i]) ); } if ( lstres != 0. ) ferr[j] /= lstres; } /* for each RHS j ... */ SUPERLU_FREE(work); SUPERLU_FREE(rwork); SUPERLU_FREE(iwork); SUPERLU_FREE(Bjcol.Store); return; } /* dgsrfs */
int dgst04(int n, int nrhs, double *x, int ldx, double *xact, int ldxact, double rcond, double *resid) { /* Purpose ======= DGST04 computes the difference between a computed solution and the true solution to a system of linear equations. RESID = ( norm(X-XACT) * RCOND ) / ( norm(XACT) * EPS ), where RCOND is the reciprocal of the condition number and EPS is the machine epsilon. Arguments ========= N (input) INT The number of rows of the matrices X and XACT. N >= 0. NRHS (input) INT The number of columns of the matrices X and XACT. NRHS >= 0. X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) The computed solution vectors. Each vector is stored as a column of the matrix X. LDX (input) INT The leading dimension of the array X. LDX >= max(1,N). XACT (input) DOUBLE PRECISION array, dimension( LDX, NRHS ) The exact solution vectors. Each vector is stored as a column of the matrix XACT. LDXACT (input) INT The leading dimension of the array XACT. LDXACT >= max(1,N). RCOND (input) DOUBLE PRECISION The reciprocal of the condition number of the coefficient matrix in the system of equations. RESID (output) DOUBLE PRECISION The maximum over the NRHS solution vectors of ( norm(X-XACT) * RCOND ) / ( norm(XACT) * EPS ) ===================================================================== */ /* Table of constant values */ int c__1 = 1; /* System generated locals */ double d__1, d__2, d__3, d__4; /* Local variables */ int i, j, n__1; int ix; double xnorm; double eps; double diffnm; /* Function prototypes */ extern int idamax_(int *, double *, int *); /* Quick exit if N = 0 or NRHS = 0. */ if ( n <= 0 || nrhs <= 0 ) { *resid = 0.; return 0; } /* Exit with RESID = 1/EPS if RCOND is invalid. */ eps = dmach("Epsilon"); if ( rcond < 0. ) { *resid = 1. / eps; return 0; } /* Compute the maximum of norm(X - XACT) / ( norm(XACT) * EPS ) over all the vectors X and XACT . */ *resid = 0.; for (j = 0; j < nrhs; ++j) { n__1 = n; ix = idamax_(&n__1, &xact[j*ldxact], &c__1); xnorm = (d__1 = xact[ix-1 + j*ldxact], fabs(d__1)); diffnm = 0.; for (i = 0; i < n; ++i) { /* Computing MAX */ d__3 = diffnm; d__4 = (d__1 = x[i+j*ldx]-xact[i+j*ldxact], fabs(d__1)); diffnm = SUPERLU_MAX(d__3,d__4); } if (xnorm <= 0.) { if (diffnm > 0.) { *resid = 1. / eps; } } else { /* Computing MAX */ d__1 = *resid, d__2 = diffnm / xnorm * rcond; *resid = SUPERLU_MAX(d__1,d__2); } } if (*resid * eps < 1.) { *resid /= eps; } return 0; } /* dgst04_ */
/* Subroutine */ int zlatb4_(char *path, integer *imat, integer *m, integer * n, char *type, integer *kl, integer *ku, doublereal *anorm, integer * mode, doublereal *cndnum, char *dist) { /* Initialized data */ static logical first = TRUE_; /* System generated locals */ integer i__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static doublereal badc1, badc2, large, small; static char c2[2]; extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); extern doublereal dmach(char *); extern logical lsamen_(integer *, char *, char *); static integer mat; static doublereal eps; /* -- LAPACK test routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= ZLATB4 sets parameters for the matrix generator based on the type of matrix to be generated. Arguments ========= PATH (input) CHARACTER*3 The LAPACK path name. IMAT (input) INTEGER An integer key describing which matrix to generate for this path. M (input) INTEGER The number of rows in the matrix to be generated. N (input) INTEGER The number of columns in the matrix to be generated. TYPE (output) CHARACTER*1 The type of the matrix to be generated: = 'S': symmetric matrix = 'P': symmetric positive (semi)definite matrix = 'N': nonsymmetric matrix KL (output) INTEGER The lower band width of the matrix to be generated. KU (output) INTEGER The upper band width of the matrix to be generated. ANORM (output) DOUBLE PRECISION The desired norm of the matrix to be generated. The diagonal matrix of singular values or eigenvalues is scaled by this value. MODE (output) INTEGER A key indicating how to choose the vector of eigenvalues. CNDNUM (output) DOUBLE PRECISION The desired condition number. DIST (output) CHARACTER*1 The type of distribution to be used by the random number generator. ===================================================================== Set some constants for use in the subroutine. */ if (first) { first = FALSE_; eps = dmach("Precision"); badc2 = .1 / eps; badc1 = sqrt(badc2); small = dmach("Safe minimum"); large = 1. / small; /* If it looks like we're on a Cray, take the square root of SMALL and LARGE to avoid overflow and underflow problems. */ dlabad_(&small, &large); small = small / eps * .25; large = 1. / small; } /* s_copy(c2, path + 1, 2L, 2L);*/ strncpy(c2, path + 1, 2); /* Set some parameters we don't plan to change. */ *(unsigned char *)dist = 'S'; *mode = 3; /* xQR, xLQ, xQL, xRQ: Set parameters to generate a general M x N matrix. */ if (lsamen_(&c__2, c2, "QR") || lsamen_(&c__2, c2, "LQ") || lsamen_(&c__2, c2, "QL") || lsamen_(&c__2, c2, "RQ")) { /* Set TYPE, the type of matrix to be generated. */ *(unsigned char *)type = 'N'; /* Set the lower and upper bandwidths. */ if (*imat == 1) { *kl = 0; *ku = 0; } else if (*imat == 2) { *kl = 0; /* Computing MAX */ i__1 = *n - 1; *ku = max(i__1,0); } else if (*imat == 3) { /* Computing MAX */ i__1 = *m - 1; *kl = max(i__1,0); *ku = 0; } else { /* Computing MAX */ i__1 = *m - 1; *kl = max(i__1,0); /* Computing MAX */ i__1 = *n - 1; *ku = max(i__1,0); } /* Set the condition number and norm. */ if (*imat == 5) { *cndnum = badc1; } else if (*imat == 6) { *cndnum = badc2; } else { *cndnum = 2.; } if (*imat == 7) { *anorm = small; } else if (*imat == 8) { *anorm = large; } else { *anorm = 1.; } } else if (lsamen_(&c__2, c2, "GE")) { /* xGE: Set parameters to generate a general M x N matrix. Set TYPE, the type of matrix to be generated. */ *(unsigned char *)type = 'N'; /* Set the lower and upper bandwidths. */ if (*imat == 1) { *kl = 0; *ku = 0; } else if (*imat == 2) { *kl = 0; /* Computing MAX */ i__1 = *n - 1; *ku = max(i__1,0); } else if (*imat == 3) { /* Computing MAX */ i__1 = *m - 1; *kl = max(i__1,0); *ku = 0; } else { /* Computing MAX */ i__1 = *m - 1; *kl = max(i__1,0); /* Computing MAX */ i__1 = *n - 1; *ku = max(i__1,0); } /* Set the condition number and norm. */ if (*imat == 8) { *cndnum = badc1; } else if (*imat == 9) { *cndnum = badc2; } else { *cndnum = 2.; } if (*imat == 10) { *anorm = small; } else if (*imat == 11) { *anorm = large; } else { *anorm = 1.; } } else if (lsamen_(&c__2, c2, "GB")) { /* xGB: Set parameters to generate a general banded matrix. Set TYPE, the type of matrix to be generated. */ *(unsigned char *)type = 'N'; /* Set the condition number and norm. */ if (*imat == 5) { *cndnum = badc1; } else if (*imat == 6) { *cndnum = badc2 * .1; } else { *cndnum = 2.; } if (*imat == 7) { *anorm = small; } else if (*imat == 8) { *anorm = large; } else { *anorm = 1.; } } else if (lsamen_(&c__2, c2, "GT")) { /* xGT: Set parameters to generate a general tridiagonal matri x. Set TYPE, the type of matrix to be generated. */ *(unsigned char *)type = 'N'; /* Set the lower and upper bandwidths. */ if (*imat == 1) { *kl = 0; } else { *kl = 1; } *ku = *kl; /* Set the condition number and norm. */ if (*imat == 3) { *cndnum = badc1; } else if (*imat == 4) { *cndnum = badc2; } else { *cndnum = 2.; } if (*imat == 5 || *imat == 11) { *anorm = small; } else if (*imat == 6 || *imat == 12) { *anorm = large; } else { *anorm = 1.; } } else if (lsamen_(&c__2, c2, "PO") || lsamen_(&c__2, c2, "PP") || lsamen_(&c__2, c2, "HE") || lsamen_(&c__2, c2, "HP") || lsamen_(&c__2, c2, "SY") || lsamen_(& c__2, c2, "SP")) { /* xPO, xPP, xHE, xHP, xSY, xSP: Set parameters to generate a symmetric or Hermitian matrix. Set TYPE, the type of matrix to be generated. */ *(unsigned char *)type = *(unsigned char *)c2; /* Set the lower and upper bandwidths. */ if (*imat == 1) { *kl = 0; } else { /* Computing MAX */ i__1 = *n - 1; *kl = max(i__1,0); } *ku = *kl; /* Set the condition number and norm. */ if (*imat == 6) { *cndnum = badc1; } else if (*imat == 7) { *cndnum = badc2; } else { *cndnum = 2.; } if (*imat == 8) { *anorm = small; } else if (*imat == 9) { *anorm = large; } else { *anorm = 1.; } } else if (lsamen_(&c__2, c2, "PB")) { /* xPB: Set parameters to generate a symmetric band matrix. Set TYPE, the type of matrix to be generated. */ *(unsigned char *)type = 'P'; /* Set the norm and condition number. */ if (*imat == 5) { *cndnum = badc1; } else if (*imat == 6) { *cndnum = badc2; } else { *cndnum = 2.; } if (*imat == 7) { *anorm = small; } else if (*imat == 8) { *anorm = large; } else { *anorm = 1.; } } else if (lsamen_(&c__2, c2, "PT")) { /* xPT: Set parameters to generate a symmetric positive defini te tridiagonal matrix. */ *(unsigned char *)type = 'P'; if (*imat == 1) { *kl = 0; } else { *kl = 1; } *ku = *kl; /* Set the condition number and norm. */ if (*imat == 3) { *cndnum = badc1; } else if (*imat == 4) { *cndnum = badc2; } else { *cndnum = 2.; } if (*imat == 5 || *imat == 11) { *anorm = small; } else if (*imat == 6 || *imat == 12) { *anorm = large; } else { *anorm = 1.; } } else if (lsamen_(&c__2, c2, "TR") || lsamen_(&c__2, c2, "TP")) { /* xTR, xTP: Set parameters to generate a triangular matrix Set TYPE, the type of matrix to be generated. */ *(unsigned char *)type = 'N'; /* Set the lower and upper bandwidths. */ mat = abs(*imat); if (mat == 1 || mat == 7) { *kl = 0; *ku = 0; } else if (*imat < 0) { /* Computing MAX */ i__1 = *n - 1; *kl = max(i__1,0); *ku = 0; } else { *kl = 0; /* Computing MAX */ i__1 = *n - 1; *ku = max(i__1,0); } /* Set the condition number and norm. */ if (mat == 3 || mat == 9) { *cndnum = badc1; } else if (mat == 4 || mat == 10) { *cndnum = badc2; } else { *cndnum = 2.; } if (mat == 5) { *anorm = small; } else if (mat == 6) { *anorm = large; } else { *anorm = 1.; } } else if (lsamen_(&c__2, c2, "TB")) { /* xTB: Set parameters to generate a triangular band matrix. Set TYPE, the type of matrix to be generated. */ *(unsigned char *)type = 'N'; /* Set the norm and condition number. */ if (*imat == 2 || *imat == 8) { *cndnum = badc1; } else if (*imat == 3 || *imat == 9) { *cndnum = badc2; } else { *cndnum = 2.; } if (*imat == 4) { *anorm = small; } else if (*imat == 5) { *anorm = large; } else { *anorm = 1.; } } if (*n <= 1) { *cndnum = 1.; } return 0; /* End of ZLATB4 */ } /* zlatb4_ */
void pdlaqgs(SuperMatrix *A, double *r, double *c, double rowcnd, double colcnd, double amax, char *equed) { #define THRESH (0.1) /* Local variables */ NRformat_loc *Astore; double *Aval; int_t i, j, irow, jcol, m_loc; double large, small; /* Quick return if possible */ if (A->nrow <= 0 || A->ncol <= 0) { *(unsigned char *)equed = 'N'; return; } Astore = A->Store; Aval = Astore->nzval; m_loc = Astore->m_loc; /* Initialize LARGE and SMALL. */ small = dmach("Safe minimum") / dmach("Precision"); large = 1. / small; if (rowcnd >= THRESH && amax >= small && amax <= large) { if (colcnd >= THRESH) *(unsigned char *)equed = 'N'; else { /* Column scaling */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { jcol = Astore->colind[j]; Aval[j] *= c[jcol]; } ++irow; } *(unsigned char *)equed = 'C'; } } else if (colcnd >= THRESH) { /* Row scaling, no column scaling */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) Aval[j] *= r[irow]; ++irow; } *(unsigned char *)equed = 'R'; } else { /* Both row and column scaling */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { jcol = Astore->colind[j]; Aval[j] = Aval[j] * r[irow] * c[jcol]; } ++irow; } *(unsigned char *)equed = 'B'; } return; } /* pdlaqgs */
double dPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, SuperMatrix *L, SuperMatrix *U) { NCformat *Astore; SCformat *Lstore; NCformat *Ustore; double *Aval, *Lval, *Uval; int fsupc, nsupr, luptr, nz_in_U; int i, j, k, oldcol; int *inv_perm_c; double rpg, maxaj, maxuj; double smlnum; double *luval; /* Get machine constants. */ smlnum = dmach("S"); rpg = 1. / smlnum; Astore = A->Store; Lstore = L->Store; Ustore = U->Store; Aval = Astore->nzval; Lval = Lstore->nzval; Uval = Ustore->nzval; inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int)); for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j; for (k = 0; k <= Lstore->nsuper; ++k) { fsupc = L_FST_SUPC(k); nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); luptr = L_NZ_START(fsupc); luval = &Lval[luptr]; nz_in_U = 1; for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) { maxaj = 0.; oldcol = inv_perm_c[j]; for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i) maxaj = SUPERLU_MAX( maxaj, fabs(Aval[i]) ); maxuj = 0.; for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++) maxuj = SUPERLU_MAX( maxuj, fabs(Uval[i]) ); /* Supernode */ for (i = 0; i < nz_in_U; ++i) maxuj = SUPERLU_MAX( maxuj, fabs(luval[i]) ); ++nz_in_U; luval += nsupr; if ( maxuj == 0. ) rpg = SUPERLU_MIN( rpg, 1.); else rpg = SUPERLU_MIN( rpg, maxaj / maxuj ); } if ( j >= ncols ) break; } SUPERLU_FREE(inv_perm_c); return (rpg); }
/* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs, doublereal *sn, doublereal *r) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DLARTG generate a plane rotation so that [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. [ -SN CS ] [ G ] [ 0 ] This is a slower, more accurate version of the BLAS1 routine DROTG, with the following other differences: F and G are unchanged on return. If G=0, then CS=1 and SN=0. If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any floating point operations (saves work in DBDSQR when there are zeros on the diagonal). If F exceeds G in magnitude, CS will be positive. Arguments ========= F (input) DOUBLE PRECISION The first component of vector to be rotated. G (input) DOUBLE PRECISION The second component of vector to be rotated. CS (output) DOUBLE PRECISION The cosine of the rotation. SN (output) DOUBLE PRECISION The sine of the rotation. R (output) DOUBLE PRECISION The nonzero component of the rotated vector. ===================================================================== */ /* Initialized data */ static logical first = TRUE_; /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal); /* Local variables */ static integer i; static doublereal scale; static integer count; static doublereal f1, g1, safmn2, safmx2; extern doublereal dmach(char *); static doublereal safmin, eps; if (first) { first = FALSE_; safmin = dmach("S"); eps = dmach("E"); d__1 = dmach("B"); i__1 = (integer) (log(safmin / eps) / log(dmach("B")) / 2.); safmn2 = pow_di(&d__1, &i__1); safmx2 = 1. / safmn2; } if (*g == 0.) { *cs = 1.; *sn = 0.; *r = *f; } else if (*f == 0.) { *cs = 0.; *sn = 1.; *r = *g; } else { f1 = *f; g1 = *g; /* Computing MAX */ d__1 = abs(f1), d__2 = abs(g1); scale = max(d__1,d__2); if (scale >= safmx2) { count = 0; L10: ++count; f1 *= safmn2; g1 *= safmn2; /* Computing MAX */ d__1 = abs(f1), d__2 = abs(g1); scale = max(d__1,d__2); if (scale >= safmx2) { goto L10; } /* Computing 2nd power */ d__1 = f1; /* Computing 2nd power */ d__2 = g1; *r = sqrt(d__1 * d__1 + d__2 * d__2); *cs = f1 / *r; *sn = g1 / *r; i__1 = count; for (i = 1; i <= count; ++i) { *r *= safmx2; /* L20: */ } } else if (scale <= safmn2) { count = 0; L30: ++count; f1 *= safmx2; g1 *= safmx2; /* Computing MAX */ d__1 = abs(f1), d__2 = abs(g1); scale = max(d__1,d__2); if (scale <= safmn2) { goto L30; } /* Computing 2nd power */ d__1 = f1; /* Computing 2nd power */ d__2 = g1; *r = sqrt(d__1 * d__1 + d__2 * d__2); *cs = f1 / *r; *sn = g1 / *r; i__1 = count; for (i = 1; i <= count; ++i) { *r *= safmn2; /* L40: */ } } else { /* Computing 2nd power */ d__1 = f1; /* Computing 2nd power */ d__2 = g1; *r = sqrt(d__1 * d__1 + d__2 * d__2); *cs = f1 / *r; *sn = g1 / *r; } if (abs(*f) > abs(*g) && *cs < 0.) { *cs = -(*cs); *sn = -(*sn); *r = -(*r); } } return 0; /* End of DLARTG */ } /* dlartg_ */
void pzgsrfs_ABXglobal(int_t n, SuperMatrix *A, double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, doublecomplex *B, int_t ldb, doublecomplex *X, int_t ldx, int nrhs, double *berr, SuperLUStat_t *stat, int *info) { #define ITMAX 20 Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; /* * Data structures used by matrix-vector multiply routine. */ int_t N_update; /* Number of variables updated on this process */ int_t *update; /* vector elements (global index) updated on this processor. */ int_t *bindx; doublecomplex *val; int_t *mv_sup_to_proc; /* Supernode to process mapping in matrix-vector multiply. */ /*-- end data structures for matrix-vector multiply --*/ doublecomplex *b, *ax, *R, *B_col, *temp, *work, *X_col, *x_trs, *dx_trs; double *rwork; int_t notran; int_t count, ii, j, jj, k, knsupc, lk, lwork, nprow, nsupers, nz, p; int i, iam, pkk; int_t *ilsum, *xsup; double eps, lstres; double s, safmin, safe1, safe2; /* NEW STUFF */ int_t num_diag_procs, *diag_procs; /* Record diagonal process numbers. */ int_t *diag_len; /* Length of the X vector on diagonal processes. */ /*-- Function prototypes --*/ extern void pzgstrs1(int_t, LUstruct_t *, gridinfo_t *, doublecomplex *, int, SuperLUStat_t *, int *); /* Test the input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NCP || A->Dtype != SLU_Z || A->Mtype != SLU_GE ) *info = -2; else if ( ldb < SUPERLU_MAX(0, n) ) *info = -10; else if ( ldx < SUPERLU_MAX(0, n) ) *info = -12; else if ( nrhs < 0 ) *info = -13; if (*info != 0) { i = -(*info); xerbla_("pzgsrfs_ABXglobal", &i); return; } /* Quick return if possible. */ if ( n == 0 || nrhs == 0 ) { return; } /* Initialization. */ iam = grid->iam; nprow = grid->nprow; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; ilsum = Llu->ilsum; notran = 1; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pzgsrfs_ABXglobal()"); #endif get_diag_procs(n, Glu_persist, grid, &num_diag_procs, &diag_procs, &diag_len); #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. number of diag processes = " IFMT "\n", num_diag_procs); PrintInt10("diag_procs", num_diag_procs, diag_procs); PrintInt10("diag_len", num_diag_procs, diag_len); } #endif if ( !(mv_sup_to_proc = intCalloc_dist(nsupers)) ) ABORT("Calloc fails for mv_sup_to_proc[]"); pzgsmv_AXglobal_setup(A, Glu_persist, grid, &N_update, &update, &val, &bindx, mv_sup_to_proc); i = CEILING( nsupers, nprow ); /* Number of local block rows */ ii = Llu->ldalsum + i * XK_H; k = SUPERLU_MAX(N_update, sp_ienv_dist(3)); jj = diag_len[0]; for (j = 1; j < num_diag_procs; ++j) jj = SUPERLU_MAX( jj, diag_len[j] ); jj = SUPERLU_MAX( jj, N_update ); lwork = N_update /* For ax and R */ + ii /* For dx_trs */ + ii /* For x_trs */ + k /* For b */ + jj; /* for temp */ if ( !(work = doublecomplexMalloc_dist(lwork)) ) ABORT("Malloc fails for work[]"); ax = R = work; dx_trs = work + N_update; x_trs = dx_trs + ii; b = x_trs + ii; temp = b + k; if ( !(rwork = SUPERLU_MALLOC(N_update * sizeof(double))) ) ABORT("Malloc fails for rwork[]"); #if ( DEBUGlevel>=2 ) { doublecomplex *dwork = doublecomplexMalloc_dist(n); for (i = 0; i < n; ++i) { if ( i & 1 ) dwork[i].r = 1.; else dwork[i].r = 2.; dwork[i].i = 0.; } /* Check correctness of matrix-vector multiply. */ pzgsmv_AXglobal(N_update, update, val, bindx, dwork, ax); PrintDouble5("Mult A*x", N_update, ax); SUPERLU_FREE(dwork); } #endif /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = A->ncol + 1; eps = dmach("Epsilon"); safmin = dmach("Safe minimum"); /* Set SAFE1 essentially to be the underflow threshold times the number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; #if ( DEBUGlevel>=1 ) if ( !iam ) printf(".. eps = %e\tanorm = %e\tsafe1 = %e\tsafe2 = %e\n", eps, anorm, safe1, safe2); #endif /* Do for each right-hand side ... */ for (j = 0; j < nrhs; ++j) { count = 0; lstres = 3.; /* Copy X into x on the diagonal processes. */ B_col = &B[j*ldb]; X_col = &X[j*ldx]; for (p = 0; p < num_diag_procs; ++p) { pkk = diag_procs[p]; if ( iam == pkk ) { for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = ilsum[lk] + (lk+1)*XK_H; jj = FstBlockC( k ); for (i = 0; i < knsupc; ++i) x_trs[i+ii] = X_col[i+jj]; dx_trs[ii-XK_H].r = k;/* Block number prepended in header. */ } } } /* Copy B into b distributed the same way as matrix-vector product. */ if ( N_update ) ii = update[0]; for (i = 0; i < N_update; ++i) b[i] = B_col[i + ii]; while (1) { /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ /* Matrix-vector multiply. */ pzgsmv_AXglobal(N_update, update, val, bindx, X_col, ax); /* Compute residual. */ for (i = 0; i < N_update; ++i) z_sub(&R[i], &b[i], &ax[i]); /* Compute abs(op(A))*abs(X) + abs(B). */ pzgsmv_AXglobal_abs(N_update, update, val, bindx, X_col, rwork); for (i = 0; i < N_update; ++i) rwork[i] += slud_z_abs1(&b[i]); s = 0.0; for (i = 0; i < N_update; ++i) { if ( rwork[i] > safe2 ) { s = SUPERLU_MAX(s, slud_z_abs1(&R[i]) / rwork[i]); } else if ( rwork[i] != 0.0 ) { s = SUPERLU_MAX(s, (safe1 + slud_z_abs1(&R[i])) / rwork[i]); } /* If temp[i] is exactly 0.0 (computed by PxGSMV), then we know the true residual also must be exactly 0.0. */ } MPI_Allreduce( &s, &berr[j], 1, MPI_DOUBLE, MPI_MAX, grid->comm ); #if ( PRNTlevel>= 1 ) if ( !iam ) printf("(%2d) .. Step " IFMT ": berr[j] = %e\n", iam, count, berr[j]); #endif if ( berr[j] > eps && berr[j] * 2 <= lstres && count < ITMAX ) { /* Compute new dx. */ redist_all_to_diag(n, R, Glu_persist, Llu, grid, mv_sup_to_proc, dx_trs); pzgstrs1(n, LUstruct, grid, dx_trs, 1, stat, info); /* Update solution. */ for (p = 0; p < num_diag_procs; ++p) if ( iam == diag_procs[p] ) for (k = p; k < nsupers; k += num_diag_procs) { lk = LBi( k, grid ); ii = ilsum[lk] + (lk+1)*XK_H; knsupc = SuperSize( k ); for (i = 0; i < knsupc; ++i) z_add(&x_trs[i + ii], &x_trs[i + ii], &dx_trs[i + ii]); } lstres = berr[j]; ++count; /* Transfer x_trs (on diagonal processes) into X (on all processes). */ gather_1rhs_diag_to_all(n, x_trs, Glu_persist, Llu, grid, num_diag_procs, diag_procs, diag_len, X_col, temp); } else { break; } } /* end while */ stat->RefineSteps = count; } /* for j ... */ /* Deallocate storage used by matrix-vector multiplication. */ SUPERLU_FREE(diag_procs); SUPERLU_FREE(diag_len); if ( N_update ) { SUPERLU_FREE(update); SUPERLU_FREE(bindx); SUPERLU_FREE(val); } SUPERLU_FREE(mv_sup_to_proc); SUPERLU_FREE(work); SUPERLU_FREE(rwork); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgsrfs_ABXglobal()"); #endif } /* PZGSRFS_ABXGLOBAL */
int zgst07(trans_t trans, int n, int nrhs, SuperMatrix *A, doublecomplex *b, int ldb, doublecomplex *x, int ldx, doublecomplex *xact, int ldxact, double *ferr, double *berr, double *reslts) { /* Purpose ======= ZGST07 tests the error bounds from iterative refinement for the computed solution to a system of equations op(A)*X = B, where A is a general n by n matrix and op(A) = A or A**T, depending on TRANS. RESLTS(1) = test of the error bound = norm(X - XACT) / ( norm(X) * FERR ) A large value is returned if this ratio is not less than one. RESLTS(2) = residual from the iterative refinement routine = the maximum of BERR / ( (n+1)*EPS + (*) ), where (*) = (n+1)*UNFL / (min_i (abs(op(A))*abs(X) +abs(b))_i ) Arguments ========= TRANS (input) trans_t Specifies the form of the system of equations. = NOTRANS: A *x = b = TRANS : A'*x = b, where A' is the transpose of A = CONJ : A'*x = b, where A' is the transpose of A N (input) INT The number of rows of the matrices X and XACT. N >= 0. NRHS (input) INT The number of columns of the matrices X and XACT. NRHS >= 0. A (input) SuperMatrix *, dimension (A->nrow, A->ncol) The original n by n matrix A. B (input) DOUBLE COMPLEX PRECISION array, dimension (LDB,NRHS) The right hand side vectors for the system of linear equations. LDB (input) INT The leading dimension of the array B. LDB >= max(1,N). X (input) DOUBLE COMPLEX PRECISION array, dimension (LDX,NRHS) The computed solution vectors. Each vector is stored as a column of the matrix X. LDX (input) INT The leading dimension of the array X. LDX >= max(1,N). XACT (input) DOUBLE COMPLEX PRECISION array, dimension (LDX,NRHS) The exact solution vectors. Each vector is stored as a column of the matrix XACT. LDXACT (input) INT The leading dimension of the array XACT. LDXACT >= max(1,N). FERR (input) DOUBLE COMPLEX PRECISION array, dimension (NRHS) The estimated forward error bounds for each solution vector X. If XTRUE is the true solution, FERR bounds the magnitude of the largest entry in (X - XTRUE) divided by the magnitude of the largest entry in X. BERR (input) DOUBLE COMPLEX PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector (i.e., the smallest relative change in any entry of A or B that makes X an exact solution). RESLTS (output) DOUBLE PRECISION array, dimension (2) The maximum over the NRHS solution vectors of the ratios: RESLTS(1) = norm(X - XACT) / ( norm(X) * FERR ) RESLTS(2) = BERR / ( (n+1)*EPS + (*) ) ===================================================================== */ /* Table of constant values */ int c__1 = 1; /* System generated locals */ double d__1, d__2; double d__3, d__4; /* Local variables */ double diff, axbi; int imax, irow, n__1; int i, j, k; double unfl, ovfl; double xnorm; double errbnd; int notran; double eps, tmp; double *rwork; doublecomplex *Aval; NCformat *Astore; /* Function prototypes */ extern int lsame_(char *, char *); extern int izamax_(int *, doublecomplex *, int *); /* Quick exit if N = 0 or NRHS = 0. */ if ( n <= 0 || nrhs <= 0 ) { reslts[0] = 0.; reslts[1] = 0.; return 0; } eps = dmach("Epsilon"); unfl = dmach("Safe minimum"); ovfl = 1. / unfl; notran = (trans == NOTRANS); rwork = (double *) SUPERLU_MALLOC(n*sizeof(double)); if ( !rwork ) ABORT("SUPERLU_MALLOC fails for rwork"); Astore = A->Store; Aval = (doublecomplex *) Astore->nzval; /* Test 1: Compute the maximum of norm(X - XACT) / ( norm(X) * FERR ) over all the vectors X and XACT using the infinity-norm. */ errbnd = 0.; for (j = 0; j < nrhs; ++j) { n__1 = n; imax = izamax_(&n__1, &x[j*ldx], &c__1); d__1 = (d__2 = x[imax-1 + j*ldx].r, fabs(d__2)) + (d__3 = x[imax-1 + j*ldx].i, fabs(d__3)); xnorm = SUPERLU_MAX(d__1,unfl); diff = 0.; for (i = 0; i < n; ++i) { d__1 = (d__2 = x[i+j*ldx].r - xact[i+j*ldxact].r, fabs(d__2)) + (d__3 = x[i+j*ldx].i - xact[i+j*ldxact].i, fabs(d__3)); diff = SUPERLU_MAX(diff, d__1); } if (xnorm > 1.) { goto L20; } else if (diff <= ovfl * xnorm) { goto L20; } else { errbnd = 1. / eps; goto L30; } L20: #if 0 if (diff / xnorm <= ferr[j]) { d__1 = diff / xnorm / ferr[j]; errbnd = SUPERLU_MAX(errbnd,d__1); } else { errbnd = 1. / eps; } #endif d__1 = diff / xnorm / ferr[j]; errbnd = SUPERLU_MAX(errbnd,d__1); /*printf("Ferr: %f\n", errbnd);*/ L30: ; } reslts[0] = errbnd; /* Test 2: Compute the maximum of BERR / ( (n+1)*EPS + (*) ), where (*) = (n+1)*UNFL / (min_i (abs(op(A))*abs(X) + abs(b))_i ) */ for (k = 0; k < nrhs; ++k) { for (i = 0; i < n; ++i) rwork[i] = (d__1 = b[i + k*ldb].r, fabs(d__1)) + (d__2 = b[i + k*ldb].i, fabs(d__2)); if ( notran ) { for (j = 0; j < n; ++j) { tmp = (d__1 = x[j + k*ldx].r, fabs(d__1)) + (d__2 = x[j + k*ldx].i, fabs(d__2)); for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { d__1 = (d__2 = Aval[i].r, fabs(d__2)) + (d__3 = Aval[i].i, fabs(d__3)); rwork[Astore->rowind[i]] += d__1 * tmp; } } } else { for (j = 0; j < n; ++j) { tmp = 0.; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; d__1 = (d__2 = x[irow + k*ldx].r, fabs(d__2)) + (d__3 = x[irow + k*ldx].i, fabs(d__3)); d__2 = (d__3 = Aval[i].r, fabs(d__3)) + (d__4 = Aval[i].i, fabs(d__4)); tmp += d__2 * d__1; } rwork[j] += tmp; } } axbi = rwork[0]; for (i = 1; i < n; ++i) axbi = SUPERLU_MIN(axbi, rwork[i]); /* Computing MAX */ d__1 = axbi, d__2 = (n + 1) * unfl; tmp = berr[k] / ((n + 1) * eps + (n + 1) * unfl / SUPERLU_MAX(d__1,d__2)); if (k == 0) { reslts[1] = tmp; } else { reslts[1] = SUPERLU_MAX(reslts[1],tmp); } } SUPERLU_FREE(rwork); return 0; } /* zgst07 */