GLOBAL void UMF_kernel_wrapup ( NumericType *Numeric, SymbolicType *Symbolic, WorkType *Work ) { /* ---------------------------------------------------------------------- */ /* local variables */ /* ---------------------------------------------------------------------- */ Entry pivot_value ; double d ; Entry *D ; Int i, k, col, row, llen, ulen, *ip, *Rperm, *Cperm, *Lilen, npiv, lp, *Uilen, *Lip, *Uip, *Cperm_init, up, pivrow, pivcol, *Lpos, *Upos, *Wr, *Wc, *Wp, *Frpos, *Fcpos, *Row_degree, *Col_degree, *Rperm_init, n_row, n_col, n_inner, zero_pivot, nan_pivot, n1 ; #ifndef NDEBUG UMF_dump_matrix (Numeric, Work, FALSE) ; #endif DEBUG0 (("Kernel complete, Starting Kernel wrapup\n")) ; n_row = Symbolic->n_row ; n_col = Symbolic->n_col ; n_inner = MIN (n_row, n_col) ; Rperm = Numeric->Rperm ; Cperm = Numeric->Cperm ; Lilen = Numeric->Lilen ; Uilen = Numeric->Uilen ; Upos = Numeric->Upos ; Lpos = Numeric->Lpos ; Lip = Numeric->Lip ; Uip = Numeric->Uip ; D = Numeric->D ; npiv = Work->npiv ; Numeric->npiv = npiv ; Numeric->ulen = Work->ulen ; ASSERT (n_row == Numeric->n_row) ; ASSERT (n_col == Symbolic->n_col) ; DEBUG0 (("Wrap-up: npiv "ID" ulen "ID"\n", npiv, Numeric->ulen)) ; ASSERT (npiv <= n_inner) ; /* this will be nonzero only if matrix is singular or rectangular */ ASSERT (IMPLIES (npiv == n_col, Work->ulen == 0)) ; /* ---------------------------------------------------------------------- */ /* find the smallest and largest entries in D */ /* ---------------------------------------------------------------------- */ for (k = 0 ; k < npiv ; k++) { pivot_value = D [k] ; ABS (d, pivot_value) ; zero_pivot = SCALAR_IS_ZERO (d) ; nan_pivot = SCALAR_IS_NAN (d) ; if (!zero_pivot) { /* the pivot is nonzero, but might be Inf or NaN */ Numeric->nnzpiv++ ; } if (k == 0) { Numeric->min_udiag = d ; Numeric->max_udiag = d ; } else { /* min (abs (diag (U))) behaves as follows: If any entry is zero, then the result is zero (regardless of the presence of NaN's). Otherwise, if any entry is NaN, then the result is NaN. Otherwise, the result is the smallest absolute value on the diagonal of U. */ if (SCALAR_IS_NONZERO (Numeric->min_udiag)) { if (zero_pivot || nan_pivot) { Numeric->min_udiag = d ; } else if (!SCALAR_IS_NAN (Numeric->min_udiag)) { /* d and min_udiag are both non-NaN */ Numeric->min_udiag = MIN (Numeric->min_udiag, d) ; } } /* max (abs (diag (U))) behaves as follows: If any entry is NaN then the result is NaN. Otherise, the result is the largest absolute value on the diagonal of U. */ if (nan_pivot) { Numeric->max_udiag = d ; } else if (!SCALAR_IS_NAN (Numeric->max_udiag)) { /* d and max_udiag are both non-NaN */ Numeric->max_udiag = MAX (Numeric->max_udiag, d) ; } } } /* ---------------------------------------------------------------------- */ /* check if matrix is singular or rectangular */ /* ---------------------------------------------------------------------- */ Col_degree = Cperm ; /* for NON_PIVOTAL_COL macro */ Row_degree = Rperm ; /* for NON_PIVOTAL_ROW macro */ if (npiv < n_row) { /* finalize the row permutation */ k = npiv ; DEBUGm3 (("Singular pivot rows "ID" to "ID"\n", k, n_row-1)) ; for (row = 0 ; row < n_row ; row++) { if (NON_PIVOTAL_ROW (row)) { Rperm [row] = ONES_COMPLEMENT (k) ; DEBUGm3 (("Singular row "ID" is k: "ID" pivot row\n", row, k)) ; ASSERT (!NON_PIVOTAL_ROW (row)) ; Lpos [row] = EMPTY ; Uip [row] = EMPTY ; Uilen [row] = 0 ; k++ ; } } ASSERT (k == n_row) ; } if (npiv < n_col) { /* finalize the col permutation */ k = npiv ; DEBUGm3 (("Singular pivot cols "ID" to "ID"\n", k, n_col-1)) ; for (col = 0 ; col < n_col ; col++) { if (NON_PIVOTAL_COL (col)) { Cperm [col] = ONES_COMPLEMENT (k) ; DEBUGm3 (("Singular col "ID" is k: "ID" pivot row\n", col, k)) ; ASSERT (!NON_PIVOTAL_COL (col)) ; Upos [col] = EMPTY ; Lip [col] = EMPTY ; Lilen [col] = 0 ; k++ ; } } ASSERT (k == n_col) ; } if (npiv < n_inner) { /* finalize the diagonal of U */ DEBUGm3 (("Diag of U is zero, "ID" to "ID"\n", npiv, n_inner-1)) ; for (k = npiv ; k < n_inner ; k++) { CLEAR (D [k]) ; } } /* save the pattern of the last row of U */ if (Numeric->ulen > 0) { DEBUGm3 (("Last row of U is not empty\n")) ; Numeric->Upattern = Work->Upattern ; Work->Upattern = (Int *) NULL ; } DEBUG2 (("Nnzpiv: "ID" npiv "ID"\n", Numeric->nnzpiv, npiv)) ; ASSERT (Numeric->nnzpiv <= npiv) ; if (Numeric->nnzpiv < n_inner && !SCALAR_IS_NAN (Numeric->min_udiag)) { /* the rest of the diagonal is zero, so min_udiag becomes 0, * unless it is already NaN. */ Numeric->min_udiag = 0.0 ; } /* ---------------------------------------------------------------------- */ /* size n_row, n_col workspaces that can be used here: */ /* ---------------------------------------------------------------------- */ Frpos = Work->Frpos ; /* of size n_row+1 */ Fcpos = Work->Fcpos ; /* of size n_col+1 */ Wp = Work->Wp ; /* of size MAX(n_row,n_col)+1 */ /* Work->Upattern ; cannot be used (in Numeric) */ Wr = Work->Lpattern ; /* of size n_row+1 */ Wc = Work->Wrp ; /* of size n_col+1 or bigger */ /* ---------------------------------------------------------------------- */ /* construct Rperm from inverse permutations */ /* ---------------------------------------------------------------------- */ /* use Frpos for temporary copy of inverse row permutation [ */ for (pivrow = 0 ; pivrow < n_row ; pivrow++) { k = Rperm [pivrow] ; ASSERT (k < 0) ; k = ONES_COMPLEMENT (k) ; ASSERT (k >= 0 && k < n_row) ; Wp [k] = pivrow ; Frpos [pivrow] = k ; } for (k = 0 ; k < n_row ; k++) { Rperm [k] = Wp [k] ; } /* ---------------------------------------------------------------------- */ /* construct Cperm from inverse permutation */ /* ---------------------------------------------------------------------- */ /* use Fcpos for temporary copy of inverse column permutation [ */ for (pivcol = 0 ; pivcol < n_col ; pivcol++) { k = Cperm [pivcol] ; ASSERT (k < 0) ; k = ONES_COMPLEMENT (k) ; ASSERT (k >= 0 && k < n_col) ; Wp [k] = pivcol ; /* save a copy of the inverse column permutation in Fcpos */ Fcpos [pivcol] = k ; } for (k = 0 ; k < n_col ; k++) { Cperm [k] = Wp [k] ; } #ifndef NDEBUG for (k = 0 ; k < n_col ; k++) { col = Cperm [k] ; ASSERT (col >= 0 && col < n_col) ; ASSERT (Fcpos [col] == k) ; /* col is the kth pivot */ } for (k = 0 ; k < n_row ; k++) { row = Rperm [k] ; ASSERT (row >= 0 && row < n_row) ; ASSERT (Frpos [row] == k) ; /* row is the kth pivot */ } #endif #ifndef NDEBUG UMF_dump_lu (Numeric) ; #endif /* ---------------------------------------------------------------------- */ /* permute Lpos, Upos, Lilen, Lip, Uilen, and Uip */ /* ---------------------------------------------------------------------- */ for (k = 0 ; k < npiv ; k++) { pivrow = Rperm [k] ; Wr [k] = Uilen [pivrow] ; Wp [k] = Uip [pivrow] ; } for (k = 0 ; k < npiv ; k++) { Uilen [k] = Wr [k] ; Uip [k] = Wp [k] ; } for (k = 0 ; k < npiv ; k++) { pivrow = Rperm [k] ; Wp [k] = Lpos [pivrow] ; } for (k = 0 ; k < npiv ; k++) { Lpos [k] = Wp [k] ; } for (k = 0 ; k < npiv ; k++) { pivcol = Cperm [k] ; Wc [k] = Lilen [pivcol] ; Wp [k] = Lip [pivcol] ; } for (k = 0 ; k < npiv ; k++) { Lilen [k] = Wc [k] ; Lip [k] = Wp [k] ; } for (k = 0 ; k < npiv ; k++) { pivcol = Cperm [k] ; Wp [k] = Upos [pivcol] ; } for (k = 0 ; k < npiv ; k++) { Upos [k] = Wp [k] ; } /* ---------------------------------------------------------------------- */ /* terminate the last Uchain and last Lchain */ /* ---------------------------------------------------------------------- */ Upos [npiv] = EMPTY ; Lpos [npiv] = EMPTY ; Uip [npiv] = EMPTY ; Lip [npiv] = EMPTY ; Uilen [npiv] = 0 ; Lilen [npiv] = 0 ; /* ---------------------------------------------------------------------- */ /* convert U to the new pivot order */ /* ---------------------------------------------------------------------- */ n1 = Symbolic->n1 ; for (k = 0 ; k < n1 ; k++) { /* this is a singleton row of U */ ulen = Uilen [k] ; DEBUG4 (("K "ID" New U. ulen "ID" Singleton 1\n", k, ulen)) ; if (ulen > 0) { up = Uip [k] ; ip = (Int *) (Numeric->Memory + up) ; for (i = 0 ; i < ulen ; i++) { col = *ip ; DEBUG4 ((" old col "ID" new col "ID"\n", col, Fcpos [col])); ASSERT (col >= 0 && col < n_col) ; *ip++ = Fcpos [col] ; } } } for (k = n1 ; k < npiv ; k++) { up = Uip [k] ; if (up < 0) { /* this is the start of a new Uchain (with a pattern) */ ulen = Uilen [k] ; DEBUG4 (("K "ID" New U. ulen "ID" End_Uchain 1\n", k, ulen)) ; if (ulen > 0) { up = -up ; ip = (Int *) (Numeric->Memory + up) ; for (i = 0 ; i < ulen ; i++) { col = *ip ; DEBUG4 ((" old col "ID" new col "ID"\n", col, Fcpos [col])); ASSERT (col >= 0 && col < n_col) ; *ip++ = Fcpos [col] ; } } } } ulen = Numeric->ulen ; if (ulen > 0) { /* convert last pivot row of U to the new pivot order */ DEBUG4 (("K "ID" (last)\n", k)) ; for (i = 0 ; i < ulen ; i++) { col = Numeric->Upattern [i] ; DEBUG4 ((" old col "ID" new col "ID"\n", col, Fcpos [col])) ; Numeric->Upattern [i] = Fcpos [col] ; } } /* Fcpos no longer needed ] */ /* ---------------------------------------------------------------------- */ /* convert L to the new pivot order */ /* ---------------------------------------------------------------------- */ for (k = 0 ; k < n1 ; k++) { llen = Lilen [k] ; DEBUG4 (("K "ID" New L. llen "ID" Singleton col\n", k, llen)) ; if (llen > 0) { lp = Lip [k] ; ip = (Int *) (Numeric->Memory + lp) ; for (i = 0 ; i < llen ; i++) { row = *ip ; DEBUG4 ((" old row "ID" new row "ID"\n", row, Frpos [row])) ; ASSERT (row >= 0 && row < n_row) ; *ip++ = Frpos [row] ; } } } for (k = n1 ; k < npiv ; k++) { llen = Lilen [k] ; DEBUG4 (("K "ID" New L. llen "ID" \n", k, llen)) ; if (llen > 0) { lp = Lip [k] ; if (lp < 0) { /* this starts a new Lchain */ lp = -lp ; } ip = (Int *) (Numeric->Memory + lp) ; for (i = 0 ; i < llen ; i++) { row = *ip ; DEBUG4 ((" old row "ID" new row "ID"\n", row, Frpos [row])) ; ASSERT (row >= 0 && row < n_row) ; *ip++ = Frpos [row] ; } } } /* Frpos no longer needed ] */ /* ---------------------------------------------------------------------- */ /* combine symbolic and numeric permutations */ /* ---------------------------------------------------------------------- */ Cperm_init = Symbolic->Cperm_init ; Rperm_init = Symbolic->Rperm_init ; for (k = 0 ; k < n_row ; k++) { Rperm [k] = Rperm_init [Rperm [k]] ; } for (k = 0 ; k < n_col ; k++) { Cperm [k] = Cperm_init [Cperm [k]] ; } /* Work object will be freed immediately upon return (to UMF_kernel */ /* and then to UMFPACK_numeric). */ }
GLOBAL Int UMF_solve ( Int sys, const Int Ap [ ], const Int Ai [ ], const double Ax [ ], double Xx [ ], const double Bx [ ], #ifdef COMPLEX const double Az [ ], double Xz [ ], const double Bz [ ], #endif NumericType *Numeric, Int irstep, double Info [UMFPACK_INFO], Int Pattern [ ], /* size n */ double SolveWork [ ] /* if irstep>0 real: size 5*n. complex:10*n */ /* otherwise real: size n. complex: 4*n */ ) { /* ---------------------------------------------------------------------- */ /* local variables */ /* ---------------------------------------------------------------------- */ Entry axx, wi, xj, zi, xi, aij, bi ; double omega [3], d, z2i, yi, flops ; Entry *W, *Z, *S, *X ; double *Z2, *Y, *B2, *Rs ; Int *Rperm, *Cperm, i, n, p, step, j, nz, status, p2, do_scale ; #ifdef COMPLEX Int AXsplit ; Int Bsplit ; #endif #ifndef NRECIPROCAL Int do_recip = Numeric->do_recip ; #endif /* ---------------------------------------------------------------------- */ /* initializations */ /* ---------------------------------------------------------------------- */ #ifndef NDEBUG UMF_dump_lu (Numeric) ; ASSERT (Numeric && Xx && Bx && Pattern && SolveWork && Info) ; #endif nz = 0 ; omega [0] = 0. ; omega [1] = 0. ; omega [2] = 0. ; Rperm = Numeric->Rperm ; Cperm = Numeric->Cperm ; Rs = Numeric->Rs ; /* row scale factors */ do_scale = (Rs != (double *) NULL) ; flops = 0 ; Info [UMFPACK_SOLVE_FLOPS] = 0 ; Info [UMFPACK_IR_TAKEN] = 0 ; Info [UMFPACK_IR_ATTEMPTED] = 0 ; /* UMFPACK_solve does not call this routine if A is rectangular */ ASSERT (Numeric->n_row == Numeric->n_col) ; n = Numeric->n_row ; if (Numeric->nnzpiv < n || SCALAR_IS_ZERO (Numeric->rcond) || SCALAR_IS_NAN (Numeric->rcond)) { /* Note that systems involving just L return UMFPACK_OK, even if */ /* A is singular (L is always has a unit diagonal). */ DEBUGm4 (("Note, matrix is singular in umf_solve\n")) ; status = UMFPACK_WARNING_singular_matrix ; irstep = 0 ; } else { status = UMFPACK_OK ; } irstep = MAX (0, irstep) ; /* make sure irstep is >= 0 */ W = (Entry *) SolveWork ; /* Entry W [0..n-1] */ Z = (Entry *) NULL ; /* unused if no iterative refinement */ S = (Entry *) NULL ; Y = (double *) NULL ; Z2 = (double *) NULL ; B2 = (double *) NULL ; #ifdef COMPLEX if (irstep > 0) { if (!Ap || !Ai || !Ax) { return (UMFPACK_ERROR_argument_missing) ; } /* A, B, and X in split format if Az, Bz, and Xz present */ AXsplit = SPLIT (Az) || SPLIT(Xz); Z = (Entry *) (SolveWork + 4*n) ; /* Entry Z [0..n-1] */ S = (Entry *) (SolveWork + 6*n) ; /* Entry S [0..n-1] */ Y = (double *) (SolveWork + 8*n) ; /* double Y [0..n-1] */ B2 = (double *) (SolveWork + 9*n) ; /* double B2 [0..n-1] */ Z2 = (double *) Z ; /* double Z2 [0..n-1], equiv. to Z */ } else { /* A is ignored, only look at X for split/packed cases */ AXsplit = SPLIT(Xz); } Bsplit = SPLIT (Bz); if (AXsplit) { X = (Entry *) (SolveWork + 2*n) ; /* Entry X [0..n-1] */ } else { X = (Entry *) Xx ; /* Entry X [0..n-1] */ } #else X = (Entry *) Xx ; /* Entry X [0..n-1] */ if (irstep > 0) { if (!Ap || !Ai || !Ax) { return (UMFPACK_ERROR_argument_missing) ; } Z = (Entry *) (SolveWork + n) ; /* Entry Z [0..n-1] */ S = (Entry *) (SolveWork + 2*n) ; /* Entry S [0..n-1] */ Y = (double *) (SolveWork + 3*n) ; /* double Y [0..n-1] */ B2 = (double *) (SolveWork + 4*n) ; /* double B2 [0..n-1] */ Z2 = (double *) Z ; /* double Z2 [0..n-1], equiv. to Z */ } #endif /* ---------------------------------------------------------------------- */ /* determine which system to solve */ /* ---------------------------------------------------------------------- */ if (sys == UMFPACK_A) { /* ------------------------------------------------------------------ */ /* solve A x = b with optional iterative refinement */ /* ------------------------------------------------------------------ */ if (irstep > 0) { /* -------------------------------------------------------------- */ /* using iterative refinement: compute Y and B2 */ /* -------------------------------------------------------------- */ nz = Ap [n] ; Info [UMFPACK_NZ] = nz ; /* A is stored by column */ /* Y (i) = ||R A_i||, 1-norm of row i of R A */ for (i = 0 ; i < n ; i++) { Y [i] = 0. ; } flops += (ABS_FLOPS + 1) * nz ; p2 = Ap [n] ; for (p = 0 ; p < p2 ; p++) { /* Y [Ai [p]] += ABS (Ax [p]) ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; Y [Ai [p]] += d ; } /* B2 = abs (B) */ flops += ABS_FLOPS * n ; for (i = 0 ; i < n ; i++) { /* B2 [i] = ABS (B [i]) ; */ ASSIGN (bi, Bx, Bz, i, Bsplit) ; ABS (B2 [i], bi) ; } /* scale Y and B2. */ if (do_scale) { /* Y = R Y */ /* B2 = R B2 */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { Y [i] *= Rs [i] ; B2 [i] *= Rs [i] ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { Y [i] /= Rs [i] ; B2 [i] /= Rs [i] ; } } flops += 2 * n ; } } for (step = 0 ; step <= irstep ; step++) { /* -------------------------------------------------------------- */ /* Solve A x = b (step 0): */ /* x = Q (U \ (L \ (P R b))) */ /* and then perform iterative refinement (step > 0): */ /* x = x + Q (U \ (L \ (P R (b - A x)))) */ /* -------------------------------------------------------------- */ if (step == 0) { if (do_scale) { /* W = P R b, using X as workspace, since Z is not * allocated if irstep = 0. */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { ASSIGN (X [i], Bx, Bz, i, Bsplit) ; SCALE (X [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { ASSIGN (X [i], Bx, Bz, i, Bsplit) ; SCALE_DIV (X [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; for (i = 0 ; i < n ; i++) { W [i] = X [Rperm [i]] ; } } else { /* W = P b, since the row scaling R = I */ for (i = 0 ; i < n ; i++) { /* W [i] = B [Rperm [i]] ; */ ASSIGN (W [i], Bx, Bz, Rperm [i], Bsplit) ; } } } else { for (i = 0 ; i < n ; i++) { /* Z [i] = B [i] ; */ ASSIGN (Z [i], Bx, Bz, i, Bsplit) ; } flops += MULTSUB_FLOPS * nz ; for (i = 0 ; i < n ; i++) { xi = X [i] ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* Z [Ai [p]] -= Ax [p] * xi ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; MULT_SUB (Z [Ai [p]], aij, xi) ; } } /* scale, Z = R Z */ if (do_scale) { #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (Z [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (Z [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; } for (i = 0 ; i < n ; i++) { W [i] = Z [Rperm [i]] ; } } flops += UMF_lsolve (Numeric, W, Pattern) ; flops += UMF_usolve (Numeric, W, Pattern) ; if (step == 0) { for (i = 0 ; i < n ; i++) { X [Cperm [i]] = W [i] ; } } else { flops += ASSEMBLE_FLOPS * n ; for (i = 0 ; i < n ; i++) { /* X [Cperm [i]] += W [i] ; */ ASSEMBLE (X [Cperm [i]], W [i]) ; } } /* -------------------------------------------------------------- */ /* sparse backward error estimate */ /* -------------------------------------------------------------- */ if (irstep > 0) { /* ---------------------------------------------------------- */ /* A is stored by column */ /* W (i) = R (b - A x)_i, residual */ /* Z2 (i) = R (|A||x|)_i */ /* ---------------------------------------------------------- */ for (i = 0 ; i < n ; i++) { /* W [i] = B [i] ; */ ASSIGN (W [i], Bx, Bz, i, Bsplit) ; Z2 [i] = 0. ; } flops += (MULT_FLOPS + DECREMENT_FLOPS + ABS_FLOPS + 1) * nz ; for (j = 0 ; j < n ; j++) { xj = X [j] ; p2 = Ap [j+1] ; for (p = Ap [j] ; p < p2 ; p++) { i = Ai [p] ; /* axx = Ax [p] * xj ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; MULT (axx, aij, xj) ; /* W [i] -= axx ; */ DECREMENT (W [i], axx) ; /* Z2 [i] += ABS (axx) ; */ ABS (d, axx) ; Z2 [i] += d ; } } /* scale W and Z2 */ if (do_scale) { /* Z2 = R Z2 */ /* W = R W */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (W [i], Rs [i]) ; Z2 [i] *= Rs [i] ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (W [i], Rs [i]) ; Z2 [i] /= Rs [i] ; } } flops += (SCALE_FLOPS + 1) * n ; } flops += (2*ABS_FLOPS + 5) * n ; if (do_step (omega, step, B2, X, W, Y, Z2, S, n, Info)) { /* iterative refinement is done */ break ; } } } } else if (sys == UMFPACK_At) { /* ------------------------------------------------------------------ */ /* solve A' x = b with optional iterative refinement */ /* ------------------------------------------------------------------ */ /* A' is the complex conjugate transpose */ if (irstep > 0) { /* -------------------------------------------------------------- */ /* using iterative refinement: compute Y */ /* -------------------------------------------------------------- */ nz = Ap [n] ; Info [UMFPACK_NZ] = nz ; /* A' is stored by row */ /* Y (i) = ||(A' R)_i||, 1-norm of row i of A' R */ if (do_scale) { flops += (ABS_FLOPS + 2) * nz ; #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) * Rs [Ai [p]] ; */ /* note that abs (aij) is the same as * abs (conj (aij)) */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += (d * Rs [Ai [p]]) ; } Y [i] = yi ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) / Rs [Ai [p]] ; */ /* note that abs (aij) is the same as * abs (conj (aij)) */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += (d / Rs [Ai [p]]) ; } Y [i] = yi ; } } } else { /* no scaling */ flops += (ABS_FLOPS + 1) * nz ; for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) ; */ /* note that abs (aij) is the same as * abs (conj (aij)) */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += d ; } Y [i] = yi ; } } /* B2 = abs (B) */ for (i = 0 ; i < n ; i++) { /* B2 [i] = ABS (B [i]) ; */ ASSIGN (bi, Bx, Bz, i, Bsplit) ; ABS (B2 [i], bi) ; } } for (step = 0 ; step <= irstep ; step++) { /* -------------------------------------------------------------- */ /* Solve A' x = b (step 0): */ /* x = R P' (L' \ (U' \ (Q' b))) */ /* and then perform iterative refinement (step > 0): */ /* x = x + R P' (L' \ (U' \ (Q' (b - A' x)))) */ /* -------------------------------------------------------------- */ if (step == 0) { /* W = Q' b */ for (i = 0 ; i < n ; i++) { /* W [i] = B [Cperm [i]] ; */ ASSIGN (W [i], Bx, Bz, Cperm [i], Bsplit) ; } } else { /* Z = b - A' x */ for (i = 0 ; i < n ; i++) { /* Z [i] = B [i] ; */ ASSIGN (Z [i], Bx, Bz, i, Bsplit) ; } flops += MULTSUB_FLOPS * nz ; for (i = 0 ; i < n ; i++) { zi = Z [i] ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* zi -= conjugate (Ax [p]) * X [Ai [p]] ; */ ASSIGN (aij, Ax, Az, p, Bsplit) ; MULT_SUB_CONJ (zi, X [Ai [p]], aij) ; } Z [i] = zi ; } /* W = Q' Z */ for (i = 0 ; i < n ; i++) { W [i] = Z [Cperm [i]] ; } } flops += UMF_uhsolve (Numeric, W, Pattern) ; flops += UMF_lhsolve (Numeric, W, Pattern) ; if (step == 0) { /* X = R P' W */ /* do not use Z, since it isn't allocated if irstep = 0 */ /* X = P' W */ for (i = 0 ; i < n ; i++) { X [Rperm [i]] = W [i] ; } if (do_scale) { /* X = R X */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (X [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (X [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; } } else { /* Z = P' W */ for (i = 0 ; i < n ; i++) { Z [Rperm [i]] = W [i] ; } if (do_scale) { /* Z = R Z */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (Z [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (Z [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; } flops += ASSEMBLE_FLOPS * n ; /* X += Z */ for (i = 0 ; i < n ; i++) { /* X [i] += Z [i] ; was +=W[i] in v4.3, which is wrong */ ASSEMBLE (X [i], Z [i]) ; /* bug fix, v4.3.1 */ } } /* -------------------------------------------------------------- */ /* sparse backward error estimate */ /* -------------------------------------------------------------- */ if (irstep > 0) { /* ---------------------------------------------------------- */ /* A' is stored by row */ /* W (i) = (b - A' x)_i, residual */ /* Z2 (i) = (|A'||x|)_i */ /* ---------------------------------------------------------- */ flops += (MULT_FLOPS + DECREMENT_FLOPS + ABS_FLOPS + 1) * nz ; for (i = 0 ; i < n ; i++) { /* wi = B [i] ; */ ASSIGN (wi, Bx, Bz, i, Bsplit) ; z2i = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* axx = conjugate (Ax [p]) * X [Ai [p]] ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; MULT_CONJ (axx, X [Ai [p]], aij) ; /* wi -= axx ; */ DECREMENT (wi, axx) ; /* z2i += ABS (axx) ; */ ABS (d, axx) ; z2i += d ; } W [i] = wi ; Z2 [i] = z2i ; } flops += (2*ABS_FLOPS + 5) * n ; if (do_step (omega, step, B2, X, W, Y, Z2, S, n, Info)) { /* iterative refinement is done */ break ; } } } } else if (sys == UMFPACK_Aat) { /* ------------------------------------------------------------------ */ /* solve A.' x = b with optional iterative refinement */ /* ------------------------------------------------------------------ */ /* A' is the array transpose */ if (irstep > 0) { /* -------------------------------------------------------------- */ /* using iterative refinement: compute Y */ /* -------------------------------------------------------------- */ nz = Ap [n] ; Info [UMFPACK_NZ] = nz ; /* A.' is stored by row */ /* Y (i) = ||(A.' R)_i||, 1-norm of row i of A.' R */ if (do_scale) { flops += (ABS_FLOPS + 2) * nz ; #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) * Rs [Ai [p]] ; */ /* note that A.' is the array transpose, * so no conjugate */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += (d * Rs [Ai [p]]) ; } Y [i] = yi ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) / Rs [Ai [p]] ; */ /* note that A.' is the array transpose, * so no conjugate */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += (d / Rs [Ai [p]]) ; } Y [i] = yi ; } } } else { /* no scaling */ flops += (ABS_FLOPS + 1) * nz ; for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) */ /* note that A.' is the array transpose, * so no conjugate */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += d ; } Y [i] = yi ; } } /* B2 = abs (B) */ for (i = 0 ; i < n ; i++) { /* B2 [i] = ABS (B [i]) ; */ ASSIGN (bi, Bx, Bz, i, Bsplit) ; ABS (B2 [i], bi) ; } } for (step = 0 ; step <= irstep ; step++) { /* -------------------------------------------------------------- */ /* Solve A.' x = b (step 0): */ /* x = R P' (L.' \ (U.' \ (Q' b))) */ /* and then perform iterative refinement (step > 0): */ /* x = x + R P' (L.' \ (U.' \ (Q' (b - A.' x)))) */ /* -------------------------------------------------------------- */ if (step == 0) { /* W = Q' b */ for (i = 0 ; i < n ; i++) { /* W [i] = B [Cperm [i]] ; */ ASSIGN (W [i], Bx, Bz, Cperm [i], Bsplit) ; } } else { /* Z = b - A.' x */ for (i = 0 ; i < n ; i++) { /* Z [i] = B [i] ; */ ASSIGN (Z [i], Bx, Bz, i, Bsplit) ; } flops += MULTSUB_FLOPS * nz ; for (i = 0 ; i < n ; i++) { zi = Z [i] ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* zi -= Ax [p] * X [Ai [p]] ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; MULT_SUB (zi, aij, X [Ai [p]]) ; } Z [i] = zi ; } /* W = Q' Z */ for (i = 0 ; i < n ; i++) { W [i] = Z [Cperm [i]] ; } } flops += UMF_utsolve (Numeric, W, Pattern) ; flops += UMF_ltsolve (Numeric, W, Pattern) ; if (step == 0) { /* X = R P' W */ /* do not use Z, since it isn't allocated if irstep = 0 */ /* X = P' W */ for (i = 0 ; i < n ; i++) { X [Rperm [i]] = W [i] ; } if (do_scale) { /* X = R X */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (X [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (X [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; } } else { /* Z = P' W */ for (i = 0 ; i < n ; i++) { Z [Rperm [i]] = W [i] ; } if (do_scale) { /* Z = R Z */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (Z [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (Z [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; } flops += ASSEMBLE_FLOPS * n ; /* X += Z */ for (i = 0 ; i < n ; i++) { /* X [i] += Z [i] ; was +=W[i] in v4.3, which is wrong */ ASSEMBLE (X [i], Z [i]) ; /* bug fix, v4.3.1 */ } } /* -------------------------------------------------------------- */ /* sparse backward error estimate */ /* -------------------------------------------------------------- */ if (irstep > 0) { /* ---------------------------------------------------------- */ /* A.' is stored by row */ /* W (i) = (b - A.' x)_i, residual */ /* Z (i) = (|A.'||x|)_i */ /* ---------------------------------------------------------- */ flops += (MULT_FLOPS + DECREMENT_FLOPS + ABS_FLOPS + 1) * nz ; for (i = 0 ; i < n ; i++) { /* wi = B [i] ; */ ASSIGN (wi, Bx, Bz, i, Bsplit) ; z2i = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* axx = Ax [p] * X [Ai [p]] ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; MULT (axx, aij, X [Ai [p]]) ; /* wi -= axx ; */ DECREMENT (wi, axx) ; /* z2i += ABS (axx) ; */ ABS (d, axx) ; z2i += d ; } W [i] = wi ; Z2 [i] = z2i ; } flops += (2*ABS_FLOPS + 5) * n ; if (do_step (omega, step, B2, X, W, Y, Z2, S, n, Info)) { /* iterative refinement is done */ break ; } } } } else if (sys == UMFPACK_Pt_L) { /* ------------------------------------------------------------------ */ /* Solve P'Lx=b: x = L \ Pb */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [Rperm [i]] ; */ ASSIGN (X [i], Bx, Bz, Rperm [i], Bsplit) ; } flops = UMF_lsolve (Numeric, X, Pattern) ; status = UMFPACK_OK ; } else if (sys == UMFPACK_L) { /* ------------------------------------------------------------------ */ /* Solve Lx=b: x = L \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_lsolve (Numeric, X, Pattern) ; status = UMFPACK_OK ; } else if (sys == UMFPACK_Lt_P) { /* ------------------------------------------------------------------ */ /* Solve L'Px=b: x = P' (L' \ b) */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* W [i] = B [i] ; */ ASSIGN (W [i], Bx, Bz, i, Bsplit) ; } flops = UMF_lhsolve (Numeric, W, Pattern) ; for (i = 0 ; i < n ; i++) { X [Rperm [i]] = W [i] ; } status = UMFPACK_OK ; } else if (sys == UMFPACK_Lat_P) { /* ------------------------------------------------------------------ */ /* Solve L.'Px=b: x = P' (L.' \ b) */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* W [i] = B [i] ; */ ASSIGN (W [i], Bx, Bz, i, Bsplit) ; } flops = UMF_ltsolve (Numeric, W, Pattern) ; for (i = 0 ; i < n ; i++) { X [Rperm [i]] = W [i] ; } status = UMFPACK_OK ; } else if (sys == UMFPACK_Lt) { /* ------------------------------------------------------------------ */ /* Solve L'x=b: x = L' \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_lhsolve (Numeric, X, Pattern) ; status = UMFPACK_OK ; } else if (sys == UMFPACK_Lat) { /* ------------------------------------------------------------------ */ /* Solve L.'x=b: x = L.' \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_ltsolve (Numeric, X, Pattern) ; status = UMFPACK_OK ; } else if (sys == UMFPACK_U_Qt) { /* ------------------------------------------------------------------ */ /* Solve UQ'x=b: x = Q (U \ b) */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* W [i] = B [i] ; */ ASSIGN (W [i], Bx, Bz, i, Bsplit) ; } flops = UMF_usolve (Numeric, W, Pattern) ; for (i = 0 ; i < n ; i++) { X [Cperm [i]] = W [i] ; } } else if (sys == UMFPACK_U) { /* ------------------------------------------------------------------ */ /* Solve Ux=b: x = U \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_usolve (Numeric, X, Pattern) ; } else if (sys == UMFPACK_Q_Ut) { /* ------------------------------------------------------------------ */ /* Solve QU'x=b: x = U' \ Q'b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [Cperm [i]] ; */ ASSIGN (X [i], Bx, Bz, Cperm [i], Bsplit) ; } flops = UMF_uhsolve (Numeric, X, Pattern) ; } else if (sys == UMFPACK_Q_Uat) { /* ------------------------------------------------------------------ */ /* Solve QU.'x=b: x = U.' \ Q'b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [Cperm [i]] ; */ ASSIGN (X [i], Bx, Bz, Cperm [i], Bsplit) ; } flops = UMF_utsolve (Numeric, X, Pattern) ; } else if (sys == UMFPACK_Ut) { /* ------------------------------------------------------------------ */ /* Solve U'x=b: x = U' \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_uhsolve (Numeric, X, Pattern) ; } else if (sys == UMFPACK_Uat) { /* ------------------------------------------------------------------ */ /* Solve U'x=b: x = U' \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_utsolve (Numeric, X, Pattern) ; } else { return (UMFPACK_ERROR_invalid_system) ; } #ifdef COMPLEX /* copy the solution back, from Entry X [ ] to double Xx [ ] and Xz [ ] */ if (AXsplit) { for (i = 0 ; i < n ; i++) { Xx [i] = REAL_COMPONENT (X [i]) ; Xz [i] = IMAG_COMPONENT (X [i]) ; } } #endif /* return UMFPACK_OK, or UMFPACK_WARNING_singular_matrix */ /* Note that systems involving just L will return UMFPACK_OK */ Info [UMFPACK_SOLVE_FLOPS] = flops ; return (status) ; }