void cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, int *perm_r, int *perm_c, equed_t equed, float *R, float *C, SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr, Gstat_t *Gstat, int *info) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * * Purpose * ======= * * cgsrfs improves the computed solution to a system of linear * equations and provides error bounds and backward error estimates for * the solution. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * trans (input) trans_t * Specifies the form of the system of equations: * = NOTRANS: A * X = B (No transpose) * = TRANS: A**T * X = B (Transpose) * = CONJ: A**H * X = B (Conjugate transpose = Transpose) * * A (input) SuperMatrix* * The original matrix A in the system, or the scaled A if * equilibration was done. The type of A can be: * Stype = NC, Dtype = _D, Mtype = GE. * * L (input) SuperMatrix* * The factor L from the factorization Pr*A*Pc=L*U. Use * compressed row subscripts storage for supernodes, * i.e., L has types: Stype = SCP, Dtype = _D, Mtype = TRLU. * * U (input) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U as computed by * dgstrf(). Use column-wise storage scheme, * i.e., U has types: Stype = NCP, Dtype = _D, Mtype = TRU. * * perm_r (input) int*, dimension (A->nrow) * Row permutation vector, which defines the permutation matrix Pr; * perm_r[i] = j means row i of A is in position j in Pr*A. * * perm_c (input) int*, dimension (A->ncol) * Column permutation vector, which defines the * permutation matrix Pc; perm_c[i] = j means column i of A is * in position j in A*Pc. * * equed (input) equed_t * Specifies the form of equilibration that was done. * = NOEQUIL: No equilibration. * = ROW: Row equilibration, i.e., A was premultiplied by diag(R). * = COL: Column equilibration, i.e., A was postmultiplied by * diag(C). * = BOTH: Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * * R (input) double*, dimension (A->nrow) * The row scale factors for A. * If equed = ROW or BOTH, A is premultiplied by diag(R). * If equed = NOEQUIL or COL, R is not accessed. * * C (input) double*, dimension (A->ncol) * The column scale factors for A. * If equed = COL or BOTH, A is postmultiplied by diag(C). * If equed = NOEQUIL or ROW, C is not accessed. * * B (input) SuperMatrix* * B has types: Stype = DN, Dtype = _D, Mtype = GE. * The right hand side matrix B. * * X (input/output) SuperMatrix* * X has types: Stype = DN, Dtype = _D, Mtype = GE. * On entry, the solution matrix X, as computed by dgstrs(). * On exit, the improved solution matrix X. * * FERR (output) double*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * * BERR (output) double*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * info (output) int* * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * */ #define ITMAX 5 /* Table of constant values */ int ione = 1; complex ndone = {-1., 0.}; complex done = {1., 0.}; /* Local variables */ NCformat *Astore; complex *Aval; SuperMatrix Bjcol; DNformat *Bstore, *Xstore, *Bjcol_store; complex *Bmat, *Xmat, *Bptr, *Xptr; int kase; float safe1, safe2; int i, j, k, irow, nz, count, notran, rowequ, colequ; int ldb, ldx, nrhs; float s, xk, lstres, eps, safmin; char transc[1]; trans_t transt; complex *work; float *rwork; int *iwork; extern double slamch_(char *); extern int clacon_(int *, complex *, complex *, float *, int *); #ifdef _CRAY extern int CCOPY(int *, complex *, int *, complex *, int *); extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *); #else extern int ccopy_(int *, complex *, int *, complex *, int *); extern int caxpy_(int *, complex *, complex *, int *, complex *, int *); #endif Astore = A->Store; Aval = Astore->nzval; Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; /* Test the input parameters */ *info = 0; notran = (trans == NOTRANS); if ( !notran && trans != TRANS && trans != CONJ ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE ) *info = -2; else if ( L->nrow != L->ncol || L->nrow < 0 || L->Stype != SLU_SCP || L->Dtype != SLU_C || L->Mtype != SLU_TRLU ) *info = -3; else if ( U->nrow != U->ncol || U->nrow < 0 || U->Stype != SLU_NCP || U->Dtype != SLU_C || U->Mtype != SLU_TRU ) *info = -4; else if ( ldb < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE ) *info = -10; else if ( ldx < SUPERLU_MAX(0, A->nrow) || X->Stype != SLU_DN || X->Dtype != SLU_C || X->Mtype != SLU_GE ) *info = -11; if (*info != 0) { i = -(*info); xerbla_("cgsrfs", &i); return; } /* Quick return if possible */ if ( A->nrow == 0 || nrhs == 0) { for (j = 0; j < nrhs; ++j) { ferr[j] = 0.; berr[j] = 0.; } return; } rowequ = (equed == ROW) || (equed == BOTH); colequ = (equed == COL) || (equed == BOTH); /* Allocate working space */ work = complexMalloc(2*A->nrow); rwork = (float *) SUPERLU_MALLOC( (size_t) A->nrow * sizeof(float) ); iwork = intMalloc(A->nrow); if ( !work || !rwork || !iwork ) SUPERLU_ABORT("Malloc fails for work/rwork/iwork."); if ( notran ) { *(unsigned char *)transc = 'N'; transt = TRANS; } else { *(unsigned char *)transc = 'T'; transt = NOTRANS; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = A->ncol + 1; eps = slamch_("Epsilon"); safmin = slamch_("Safe minimum"); /* Set SAFE1 essentially to be the underflow threshold times the number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; /* Compute the number of nonzeros in each row (or column) of A */ for (i = 0; i < A->nrow; ++i) iwork[i] = 0; if ( notran ) { for (k = 0; k < A->ncol; ++k) for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) ++iwork[Astore->rowind[i]]; } else { for (k = 0; k < A->ncol; ++k) iwork[k] = Astore->colptr[k+1] - Astore->colptr[k]; } /* Copy one column of RHS B into Bjcol. */ Bjcol.Stype = B->Stype; Bjcol.Dtype = B->Dtype; Bjcol.Mtype = B->Mtype; Bjcol.nrow = B->nrow; Bjcol.ncol = 1; Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); if ( !Bjcol.Store ) SUPERLU_ABORT("SUPERLU_MALLOC fails for Bjcol.Store"); Bjcol_store = Bjcol.Store; Bjcol_store->lda = ldb; Bjcol_store->nzval = work; /* address aliasing */ /* Do for each right hand side ... */ for (j = 0; j < nrhs; ++j) { count = 0; lstres = 3.; Bptr = &Bmat[j*ldb]; Xptr = &Xmat[j*ldx]; while (1) { /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ #ifdef _CRAY CCOPY(&A->nrow, Bptr, &ione, work, &ione); #else ccopy_(&A->nrow, Bptr, &ione, work, &ione); #endif sp_cgemv(transc, ndone, A, Xptr, ione, done, work, ione); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th component of the numerator before dividing. */ for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if (notran) { for (k = 0; k < A->ncol; ++k) { xk = c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]); } rwork[k] += s; } } s = 0.; for (i = 0; i < A->nrow; ++i) { if (rwork[i] > safe2) { s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] ); } else if ( rwork[i] != 0.0 ) { s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] ); } /* If rwork[i] is exactly 0.0, then we know the true residual also must be exactly 0.0. */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) { /* Update solution and try again. */ cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, Gstat, info); #ifdef _CRAY CAXPY(&A->nrow, &done, work, &ione, &Xmat[j*ldx], &ione); #else caxpy_(&A->nrow, &done, work, &ione, &Xmat[j*ldx], &ione); #endif lstres = berr[j]; ++count; } else { break; } } /* end while */ /* Bound error from formula: norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use CLACON to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if ( notran ) { for (k = 0; k < A->ncol; ++k) { xk = c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; xk = c_abs1( &Xptr[irow] ); s += c_abs1(&Aval[i]) * xk; } rwork[k] += s; } } for (i = 0; i < A->nrow; ++i) if (rwork[i] > safe2) rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i]; else rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1; kase = 0; do { clacon_(&A->nrow, &work[A->nrow], work, &ferr[j], &kase); if (kase == 0) break; if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */ if ( notran && colequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], C[i]); } else if ( !notran && rowequ ) for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], R[i]); } cgstrs (transt, L, U, perm_r, perm_c, &Bjcol, Gstat, info); for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], rwork[i]); } } else { /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */ for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], rwork[i]); } cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, Gstat, info); if ( notran && colequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], C[i]); } else if ( !notran && rowequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], R[i]); } } } while ( kase != 0 ); /* Normalize error. */ lstres = 0.; if ( notran && colequ ) { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) ); } else if ( !notran && rowequ ) { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) ); } else { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) ); } if ( lstres != 0. ) ferr[j] /= lstres; } /* for each RHS j ... */ SUPERLU_FREE(work); SUPERLU_FREE(rwork); SUPERLU_FREE(iwork); SUPERLU_FREE(Bjcol.Store); return; } /* cgsrfs */
/*! \brief * * <pre> * Purpose * ======= * * CGSRFS improves the computed solution to a system of linear * equations and provides error bounds and backward error estimates for * the solution. * * If equilibration was performed, the system becomes: * (diag(R)*A_original*diag(C)) * X = diag(R)*B_original. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * trans (input) trans_t * Specifies the form of the system of equations: * = NOTRANS: A * X = B (No transpose) * = TRANS: A'* X = B (Transpose) * = CONJ: A**H * X = B (Conjugate transpose) * * A (input) SuperMatrix* * The original matrix A in the system, or the scaled A if * equilibration was done. The type of A can be: * Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_GE. * * L (input) SuperMatrix* * The factor L from the factorization Pr*A*Pc=L*U. Use * compressed row subscripts storage for supernodes, * i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU. * * U (input) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U as computed by * cgstrf(). Use column-wise storage scheme, * i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU. * * perm_c (input) int*, dimension (A->ncol) * Column permutation vector, which defines the * permutation matrix Pc; perm_c[i] = j means column i of A is * in position j in A*Pc. * * perm_r (input) int*, dimension (A->nrow) * Row permutation vector, which defines the permutation matrix Pr; * perm_r[i] = j means row i of A is in position j in Pr*A. * * equed (input) Specifies the form of equilibration that was done. * = 'N': No equilibration. * = 'R': Row equilibration, i.e., A was premultiplied by diag(R). * = 'C': Column equilibration, i.e., A was postmultiplied by * diag(C). * = 'B': Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * * R (input) float*, dimension (A->nrow) * The row scale factors for A. * If equed = 'R' or 'B', A is premultiplied by diag(R). * If equed = 'N' or 'C', R is not accessed. * * C (input) float*, dimension (A->ncol) * The column scale factors for A. * If equed = 'C' or 'B', A is postmultiplied by diag(C). * If equed = 'N' or 'R', C is not accessed. * * B (input) SuperMatrix* * B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. * The right hand side matrix B. * if equed = 'R' or 'B', B is premultiplied by diag(R). * * X (input/output) SuperMatrix* * X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. * On entry, the solution matrix X, as computed by cgstrs(). * On exit, the improved solution matrix X. * if *equed = 'C' or 'B', X should be premultiplied by diag(C) * in order to obtain the solution to the original system. * * FERR (output) float*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * * BERR (output) float*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * stat (output) SuperLUStat_t* * Record the statistics on runtime and floating-point operation count. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * * </pre> */ void cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, int *perm_c, int *perm_r, char *equed, float *R, float *C, SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr, SuperLUStat_t *stat, int *info) { #define ITMAX 5 /* Table of constant values */ int ione = 1; complex ndone = {-1., 0.}; complex done = {1., 0.}; /* Local variables */ NCformat *Astore; complex *Aval; SuperMatrix Bjcol; DNformat *Bstore, *Xstore, *Bjcol_store; complex *Bmat, *Xmat, *Bptr, *Xptr; int kase; float safe1, safe2; int i, j, k, irow, nz, count, notran, rowequ, colequ; int ldb, ldx, nrhs; float s, xk, lstres, eps, safmin; char transc[1]; trans_t transt; complex *work; float *rwork; int *iwork; int isave[3]; extern int clacon2_(int *, complex *, complex *, float *, int *, int []); #ifdef _CRAY extern int CCOPY(int *, complex *, int *, complex *, int *); extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *); #else extern int ccopy_(int *, complex *, int *, complex *, int *); extern int caxpy_(int *, complex *, complex *, int *, complex *, int *); #endif Astore = A->Store; Aval = Astore->nzval; Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; /* Test the input parameters */ *info = 0; notran = (trans == NOTRANS); if ( !notran && trans != TRANS && trans != CONJ ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE ) *info = -2; else if ( L->nrow != L->ncol || L->nrow < 0 || L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU ) *info = -3; else if ( U->nrow != U->ncol || U->nrow < 0 || U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU ) *info = -4; else if ( ldb < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE ) *info = -10; else if ( ldx < SUPERLU_MAX(0, A->nrow) || X->Stype != SLU_DN || X->Dtype != SLU_C || X->Mtype != SLU_GE ) *info = -11; if (*info != 0) { i = -(*info); input_error("cgsrfs", &i); return; } /* Quick return if possible */ if ( A->nrow == 0 || nrhs == 0) { for (j = 0; j < nrhs; ++j) { ferr[j] = 0.; berr[j] = 0.; } return; } rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); /* Allocate working space */ work = complexMalloc(2*A->nrow); rwork = (float *) SUPERLU_MALLOC( A->nrow * sizeof(float) ); iwork = intMalloc(A->nrow); if ( !work || !rwork || !iwork ) ABORT("Malloc fails for work/rwork/iwork."); if ( notran ) { *(unsigned char *)transc = 'N'; transt = TRANS; } else { *(unsigned char *)transc = 'T'; transt = NOTRANS; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = A->ncol + 1; eps = smach("Epsilon"); safmin = smach("Safe minimum"); /* Set SAFE1 essentially to be the underflow threshold times the number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; /* Compute the number of nonzeros in each row (or column) of A */ for (i = 0; i < A->nrow; ++i) iwork[i] = 0; if ( notran ) { for (k = 0; k < A->ncol; ++k) for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) ++iwork[Astore->rowind[i]]; } else { for (k = 0; k < A->ncol; ++k) iwork[k] = Astore->colptr[k+1] - Astore->colptr[k]; } /* Copy one column of RHS B into Bjcol. */ Bjcol.Stype = B->Stype; Bjcol.Dtype = B->Dtype; Bjcol.Mtype = B->Mtype; Bjcol.nrow = B->nrow; Bjcol.ncol = 1; Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store"); Bjcol_store = Bjcol.Store; Bjcol_store->lda = ldb; Bjcol_store->nzval = work; /* address aliasing */ /* Do for each right hand side ... */ for (j = 0; j < nrhs; ++j) { count = 0; lstres = 3.; Bptr = &Bmat[j*ldb]; Xptr = &Xmat[j*ldx]; while (1) { /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ #ifdef _CRAY CCOPY(&A->nrow, Bptr, &ione, work, &ione); #else ccopy_(&A->nrow, Bptr, &ione, work, &ione); #endif sp_cgemv(transc, ndone, A, Xptr, ione, done, work, ione); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th component of the numerator before dividing. */ for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if (notran) { for (k = 0; k < A->ncol; ++k) { xk = c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]); } rwork[k] += s; } } s = 0.; for (i = 0; i < A->nrow; ++i) { if (rwork[i] > safe2) { s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] ); } else if ( rwork[i] != 0.0 ) { s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] ); } /* If rwork[i] is exactly 0.0, then we know the true residual also must be exactly 0.0. */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) { /* Update solution and try again. */ cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info); #ifdef _CRAY CAXPY(&A->nrow, &done, work, &ione, &Xmat[j*ldx], &ione); #else caxpy_(&A->nrow, &done, work, &ione, &Xmat[j*ldx], &ione); #endif lstres = berr[j]; ++count; } else { break; } } /* end while */ stat->RefineSteps = count; /* Bound error from formula: norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use CLACON2 to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if ( notran ) { for (k = 0; k < A->ncol; ++k) { xk = c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; xk = c_abs1( &Xptr[irow] ); s += c_abs1(&Aval[i]) * xk; } rwork[k] += s; } } for (i = 0; i < A->nrow; ++i) if (rwork[i] > safe2) rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i]; else rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1; kase = 0; do { clacon2_(&A->nrow, &work[A->nrow], work, &ferr[j], &kase, isave); if (kase == 0) break; if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */ if ( notran && colequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], C[i]); } else if ( !notran && rowequ ) for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], R[i]); } cgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info); for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], rwork[i]); } } else { /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */ for (i = 0; i < A->nrow; ++i) { cs_mult(&work[i], &work[i], rwork[i]); } cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info); if ( notran && colequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], C[i]); } else if ( !notran && rowequ ) for (i = 0; i < A->ncol; ++i) { cs_mult(&work[i], &work[i], R[i]); } } } while ( kase != 0 ); /* Normalize error. */ lstres = 0.; if ( notran && colequ ) { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) ); } else if ( !notran && rowequ ) { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) ); } else { for (i = 0; i < A->nrow; ++i) lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) ); } if ( lstres != 0. ) ferr[j] /= lstres; } /* for each RHS j ... */ SUPERLU_FREE(work); SUPERLU_FREE(rwork); SUPERLU_FREE(iwork); SUPERLU_FREE(Bjcol.Store); return; } /* cgsrfs */
int main(int argc, char *argv[]) { void cmatvec_mult(complex alpha, complex x[], complex beta, complex y[]); void cpsolve(int n, complex x[], complex y[]); extern int cfgmr( int n, void (*matvec_mult)(complex, complex [], complex, complex []), void (*psolve)(int n, complex [], complex[]), complex *rhs, complex *sol, double tol, int restrt, int *itmax, FILE *fits); extern int cfill_diag(int n, NCformat *Astore); char equed[1] = {'B'}; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; complex *a; int *asub, *xa; int *etree; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ int nrhs, ldx, lwork, info, m, n, nnz; complex *rhsb, *rhsx, *xact; complex *work = NULL; float *R, *C; float u, rpg, rcond; complex zero = {0.0, 0.0}; complex one = {1.0, 0.0}; complex none = {-1.0, 0.0}; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; int restrt, iter, maxit, i; double resid; complex *x, *b; #ifdef DEBUG extern int num_drop_L, num_drop_U; #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 0.1; /* u=1.0 for complete factorization */ trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 0.1; //different from complete LU options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; options.RowPerm = LargeDiag; options.ILU_DropTol = 1e-4; options.ILU_FillTol = 1e-2; options.ILU_FillFactor = 10.0; options.ILU_DropRule = DROP_BASIC | DROP_AREA; options.ILU_Norm = INF_NORM; options.ILU_MILU = SMILU_2; */ ilu_set_default_options(&options); /* Modify the defaults. */ options.PivotGrowth = YES; /* Compute reciprocal pivot growth */ options.ConditionNumber = YES;/* Compute reciprocal condition number */ if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) ABORT("Malloc fails for work[]."); } /* Read matrix A from a file in Harwell-Boeing format.*/ if (argc < 2) { printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n" "-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n" "-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n" "-t -triplet:\n\t[INPUT] is a triplet format matrix.\n", argv[0]); return 0; } else { switch (argv[1][1]) { case 'H': case 'h': printf("Input a Harwell-Boeing format matrix:\n"); creadhb(&m, &n, &nnz, &a, &asub, &xa); break; case 'R': case 'r': printf("Input a Rutherford-Boeing format matrix:\n"); creadrb(&m, &n, &nnz, &a, &asub, &xa); break; case 'T': case 't': printf("Input a triplet format matrix:\n"); creadtriple(&m, &n, &nnz, &a, &asub, &xa); break; default: printf("Unrecognized format.\n"); return 0; } } cCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_C, SLU_GE); Astore = A.Store; cfill_diag(n, Astore); printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); fflush(stdout); if ( !(rhsb = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); cCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_C, SLU_GE); cCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_C, SLU_GE); xact = complexMalloc(n * nrhs); ldx = n; cGenXtrue(n, nrhs, xact, ldx); cFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for C[]."); info = 0; #ifdef DEBUG num_drop_L = 0; num_drop_U = 0; #endif /* Initialize the statistics variables. */ StatInit(&stat); /* Compute the incomplete factorization and compute the condition number and pivot growth using dgsisx. */ cgsisx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, &mem_usage, &stat, &info); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("cgsisx(): info %d\n", info); if (info > 0 || rcond < 1e-8 || rpg > 1e8) printf("WARNING: This preconditioner might be unstable.\n"); if ( info == 0 || info == n+1 ) { if ( options.PivotGrowth == YES ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber == YES ) printf("Recip. condition number = %e\n", rcond); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } printf("n(A) = %d, nnz(A) = %d\n", n, Astore->nnz); printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("Fill ratio: nnz(F)/nnz(A) = %.3f\n", ((double)(Lstore->nnz) + (double)(Ustore->nnz) - (double)n) / (double)Astore->nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); /* Set the global variables. */ GLOBAL_A = &A; GLOBAL_L = &L; GLOBAL_U = &U; GLOBAL_STAT = &stat; GLOBAL_PERM_C = perm_c; GLOBAL_PERM_R = perm_r; /* Set the variables used by GMRES. */ restrt = SUPERLU_MIN(n / 3 + 1, 50); maxit = 1000; iter = maxit; resid = 1e-8; if (!(b = complexMalloc(m))) ABORT("Malloc fails for b[]."); if (!(x = complexMalloc(n))) ABORT("Malloc fails for x[]."); sp_cgemv("N", one, &A, xact, 1, zero, b, 1); if (info <= n + 1) { int i_1 = 1; double maxferr = 0.0, nrmA, nrmB, res, t; complex temp; extern float scnrm2_(int *, complex [], int *); extern void caxpy_(int *, complex *, complex [], int *, complex [], int *); /* Call GMRES. */ /*double *sol = (double*) ((DNformat*) X.Store)->nzval; for (i = 0; i < n; i++) x[i] = sol[i];*/ for (i = 0; i < n; i++) x[i] = zero; t = SuperLU_timer_(); cfgmr(n, cmatvec_mult, cpsolve, b, x, resid, restrt, &iter, stdout); t = SuperLU_timer_() - t; /* Output the result. */ nrmA = scnrm2_(&(Astore->nnz), (complex *)((DNformat *)A.Store)->nzval, &i_1); nrmB = scnrm2_(&m, b, &i_1); sp_cgemv("N", none, &A, x, 1, one, b, 1); res = scnrm2_(&m, b, &i_1); resid = res / nrmB; printf("||A||_F = %.1e, ||B||_2 = %.1e, ||B-A*X||_2 = %.1e, " "relres = %.1e\n", nrmA, nrmB, res, resid); if (iter >= maxit) { if (resid >= 1.0) iter = -180; else if (resid > 1e-8) iter = -111; } printf("iteration: %d\nresidual: %.1e\nGMRES time: %.2f seconds.\n", iter, resid, t); for (i = 0; i < m; i++) c_sub(&temp, &x[i], &xact[i]); maxferr = SUPERLU_MAX(maxferr, c_abs1(&temp)); printf("||X-X_true||_oo = %.1e\n", maxferr); } #ifdef DEBUG printf("%d entries in L and %d entries in U dropped.\n", num_drop_L, num_drop_U); #endif fflush(stdout); if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork >= 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } SUPERLU_FREE(b); SUPERLU_FREE(x); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif return 0; }
int sp_cgemm(char *trans, int m, int n, int k, complex alpha, SuperMatrix *A, complex *b, int ldb, complex beta, complex *c, int ldc) { /* Purpose ======= sp_c performs one of the matrix-matrix operations C := alpha*op( A )*op( B ) + beta*C, where op( X ) is one of op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. Parameters ========== TRANS - (input) char* On entry, TRANS specifies the operation to be performed as follows: TRANS = 'N' or 'n' y := alpha*A*x + beta*y. TRANS = 'T' or 't' y := alpha*A'*x + beta*y. TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. M - (input) int On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. Unchanged on exit. N - (input) int On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. Unchanged on exit. K - (input) int On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. Unchanged on exit. ALPHA - (input) complex On entry, ALPHA specifies the scalar alpha. A - (input) SuperMatrix* Matrix A with a sparse format, of dimension (A->nrow, A->ncol). Currently, the type of A can be: Stype = NC or NCP; Dtype = SLU_C; Mtype = GE. In the future, more general A can be handled. B - COMPLEX PRECISION array of DIMENSION ( LDB, kb ), where kb is n when TRANSB = 'N' or 'n', and is k otherwise. Before entry with TRANSB = 'N' or 'n', the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. Unchanged on exit. LDB - (input) int On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. LDB must be at least max( 1, n ). Unchanged on exit. BETA - (input) complex On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. C - COMPLEX PRECISION array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*B + beta*C ). LDC - (input) int On entry, LDC specifies the first dimension of C as declared in the calling (sub)program. LDC must be at least max(1,m). Unchanged on exit. ==== Sparse Level 3 Blas routine. */ int incx = 1, incy = 1; int j; for (j = 0; j < n; ++j) { sp_cgemv(trans, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy); } return 0; }
void cmatvec_mult(complex alpha, complex x[], complex beta, complex y[]) { SuperMatrix *A = GLOBAL_A; sp_cgemv("N", alpha, A, x, 1, beta, y, 1); }