static void lsolve_numeric ( /* input, not modified on output: */ Int Pinv [ ], /* Pinv [i] = k if i is kth pivot row, or EMPTY if row i * is not yet pivotal. */ Unit *LU, /* LU factors (pattern and values) */ Int Stack [ ], /* stack for dfs */ Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */ Int top, /* top of stack on input */ Int n, /* A is n-by-n */ Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */ /* output, must be zero on input: */ Entry X [ ] /* size n, initially zero. On output, * X [Ui [up1..up-1]] and X [Li [lp1..lp-1]] * contains the solution. */ ) { Entry xj ; Entry *Lx ; Int *Li ; Int p, s, j, jnew, len ; /* solve Lx=b */ for (s = top ; s < n ; s++) { /* forward solve with column j of L */ j = Stack [s] ; jnew = Pinv [j] ; ASSERT (jnew >= 0) ; xj = X [j] ; GET_POINTER (LU, Lip, Llen, Li, Lx, jnew, len) ; ASSERT (Lip [jnew] <= Lip [jnew+1]) ; for (p = 0 ; p < len ; p++) { /*X [Li [p]] -= Lx [p] * xj ; */ MULT_SUB (X [Li [p]], Lx [p], xj) ; } } }
GLOBAL double #ifdef CONJUGATE_SOLVE UMF_lhsolve /* solve L'x=b (complex conjugate transpose) */ #else UMF_ltsolve /* solve L.'x=b (array transpose) */ #endif ( NumericType *Numeric, Entry X [ ], /* b on input, solution x on output */ Int Pattern [ ] /* a work array of size n */ ) { Entry xk ; Entry *xp, *Lval ; Int k, deg, *ip, j, row, *Lpos, *Lilen, kstart, kend, *Lip, llen, lp, pos, npiv, n1, *Li ; /* ---------------------------------------------------------------------- */ if (Numeric->n_row != Numeric->n_col) return (0.) ; npiv = Numeric->npiv ; Lpos = Numeric->Lpos ; Lilen = Numeric->Lilen ; Lip = Numeric->Lip ; kstart = npiv ; n1 = Numeric->n1 ; #ifndef NDEBUG DEBUG4 (("Ltsolve start:\n")) ; for (j = 0 ; j < Numeric->n_row ; j++) { DEBUG4 (("Ltsolve start "ID": ", j)) ; EDEBUG4 (X [j]) ; DEBUG4 (("\n")) ; } #endif /* ---------------------------------------------------------------------- */ /* non-singletons */ /* ---------------------------------------------------------------------- */ for (kend = npiv-1 ; kend >= n1 ; kend = kstart-1) { /* ------------------------------------------------------------------ */ /* find the start of this Lchain */ /* ------------------------------------------------------------------ */ /* for (kstart = kend ; kstart >= 0 && Lip [kstart] > 0 ; kstart--) ; */ kstart = kend ; while (kstart >= 0 && Lip [kstart] > 0) { kstart-- ; } /* the Lchain goes from kstart to kend */ /* ------------------------------------------------------------------ */ /* scan the whole chain to find the pattern of the last column of L */ /* ------------------------------------------------------------------ */ deg = 0 ; DEBUG4 (("start of chain for column of L\n")) ; for (k = kstart ; k <= kend ; k++) { ASSERT (k >= 0 && k < npiv) ; /* -------------------------------------------------------------- */ /* make column k of L in Pattern [0..deg-1] */ /* -------------------------------------------------------------- */ /* remove pivot row */ pos = Lpos [k] ; if (pos != EMPTY) { DEBUG4 ((" k "ID" removing row "ID" at position "ID"\n", k, Pattern [pos], pos)) ; ASSERT (k != kstart) ; ASSERT (deg > 0) ; ASSERT (pos >= 0 && pos < deg) ; ASSERT (Pattern [pos] == k) ; Pattern [pos] = Pattern [--deg] ; } /* concatenate the pattern */ lp = Lip [k] ; if (k == kstart) { lp = -lp ; } ASSERT (lp > 0) ; ip = (Int *) (Numeric->Memory + lp) ; llen = Lilen [k] ; for (j = 0 ; j < llen ; j++) { row = *ip++ ; DEBUG4 ((" row "ID" k "ID"\n", row, k)) ; ASSERT (row > k) ; Pattern [deg++] = row ; } } /* Pattern [0..deg-1] is now the pattern of column kend */ /* ------------------------------------------------------------------ */ /* solve using this chain, in reverse order */ /* ------------------------------------------------------------------ */ DEBUG4 (("Unwinding Lchain\n")) ; for (k = kend ; k >= kstart ; k--) { /* -------------------------------------------------------------- */ /* use column k of L */ /* -------------------------------------------------------------- */ ASSERT (k >= 0 && k < npiv) ; lp = Lip [k] ; if (k == kstart) { lp = -lp ; } ASSERT (lp > 0) ; llen = Lilen [k] ; xp = (Entry *) (Numeric->Memory + lp + UNITS (Int, llen)) ; xk = X [k] ; for (j = 0 ; j < deg ; j++) { DEBUG4 ((" row "ID" k "ID" value", Pattern [j], k)) ; EDEBUG4 (*xp) ; DEBUG4 (("\n")) ; #ifdef CONJUGATE_SOLVE /* xk -= X [Pattern [j]] * conjugate (*xp) ; */ MULT_SUB_CONJ (xk, X [Pattern [j]], *xp) ; #else /* xk -= X [Pattern [j]] * (*xp) ; */ MULT_SUB (xk, X [Pattern [j]], *xp) ; #endif xp++ ; } X [k] = xk ; /* -------------------------------------------------------------- */ /* construct column k-1 of L */ /* -------------------------------------------------------------- */ /* un-concatenate the pattern */ deg -= llen ; /* add pivot row */ pos = Lpos [k] ; if (pos != EMPTY) { DEBUG4 ((" k "ID" adding row "ID" at position "ID"\n", k, k, pos)) ; ASSERT (k != kstart) ; ASSERT (pos >= 0 && pos <= deg) ; Pattern [deg++] = Pattern [pos] ; Pattern [pos] = k ; } } } /* ---------------------------------------------------------------------- */ /* singletons */ /* ---------------------------------------------------------------------- */ for (k = n1 - 1 ; k >= 0 ; k--) { DEBUG4 (("Singleton k "ID"\n", k)) ; deg = Lilen [k] ; if (deg > 0) { xk = X [k] ; lp = Lip [k] ; Li = (Int *) (Numeric->Memory + lp) ; lp += UNITS (Int, deg) ; Lval = (Entry *) (Numeric->Memory + lp) ; for (j = 0 ; j < deg ; j++) { DEBUG4 ((" row "ID" k "ID" value", Li [j], k)) ; EDEBUG4 (Lval [j]) ; DEBUG4 (("\n")) ; #ifdef CONJUGATE_SOLVE /* xk -= X [Li [j]] * conjugate (Lval [j]) ; */ MULT_SUB_CONJ (xk, X [Li [j]], Lval [j]) ; #else /* xk -= X [Li [j]] * Lval [j] ; */ MULT_SUB (xk, X [Li [j]], Lval [j]) ; #endif } X [k] = xk ; } } #ifndef NDEBUG for (j = 0 ; j < Numeric->n_row ; j++) { DEBUG4 (("Ltsolve done "ID": ", j)) ; EDEBUG4 (X [j]) ; DEBUG4 (("\n")) ; } DEBUG4 (("Ltsolve done.\n")) ; #endif return (MULTSUB_FLOPS * ((double) Numeric->lnz)) ; }
GLOBAL void UMF_blas3_update ( WorkType *Work ) { /* ---------------------------------------------------------------------- */ /* local variables */ /* ---------------------------------------------------------------------- */ Entry *L, *U, *C, *LU ; Int i, j, s, k, m, n, d, nb, dc ; #ifndef NBLAS Int blas_ok = TRUE ; #else #define blas_ok FALSE #endif DEBUG5 (("In UMF_blas3_update "ID" "ID" "ID"\n", Work->fnpiv, Work->fnrows, Work->fncols)) ; k = Work->fnpiv ; if (k == 0) { /* no work to do */ return ; } m = Work->fnrows ; n = Work->fncols ; d = Work->fnr_curr ; dc = Work->fnc_curr ; nb = Work->nb ; ASSERT (d >= 0 && (d % 2) == 1) ; C = Work->Fcblock ; /* ldc is fnr_curr */ L = Work->Flblock ; /* ldl is fnr_curr */ U = Work->Fublock ; /* ldu is fnc_curr, stored by rows */ LU = Work->Flublock ; /* nb-by-nb */ #ifndef NDEBUG DEBUG5 (("DO RANK-NB UPDATE of frontal:\n")) ; DEBUG5 (("DGEMM : "ID" "ID" "ID"\n", k, m, n)) ; DEBUG7 (("C block: ")) ; UMF_dump_dense (C, d, m, n) ; DEBUG7 (("A block: ")) ; UMF_dump_dense (L, d, m, k) ; DEBUG7 (("B' block: ")) ; UMF_dump_dense (U, dc, n, k) ; DEBUG7 (("LU block: ")) ; UMF_dump_dense (LU, nb, k, k) ; #endif if (k == 1) { #ifndef NBLAS BLAS_GER (m, n, L, U, C, d) ; #endif if (!blas_ok) { /* rank-1 outer product to update the C block */ for (j = 0 ; j < n ; j++) { Entry u_j = U [j] ; if (IS_NONZERO (u_j)) { Entry *c_ij, *l_is ; c_ij = & C [j*d] ; l_is = & L [0] ; #pragma ivdep for (i = 0 ; i < m ; i++) { /* C [i+j*d]-= L [i] * U [j] */ MULT_SUB (*c_ij, *l_is, u_j) ; c_ij++ ; l_is++ ; } } } } } else { /* triangular solve to update the U block */ #ifndef NBLAS BLAS_TRSM_RIGHT (n, k, LU, nb, U, dc) ; #endif if (!blas_ok) { /* use plain C code if no BLAS at compile time, or if integer * overflow has occurred */ for (s = 0 ; s < k ; s++) { for (i = s+1 ; i < k ; i++) { Entry l_is = LU [i+s*nb] ; if (IS_NONZERO (l_is)) { Entry *u_ij, *u_sj ; u_ij = & U [i*dc] ; u_sj = & U [s*dc] ; #pragma ivdep for (j = 0 ; j < n ; j++) { /* U [i*dc+j] -= LU [i+s*nb] * U [s*dc+j] ; */ MULT_SUB (*u_ij, l_is, *u_sj) ; u_ij++ ; u_sj++ ; } } } } } /* rank-k outer product to update the C block */ /* C = C - L*U' (U is stored by rows, not columns) */ #ifndef NBLAS BLAS_GEMM (m, n, k, L, U, dc, C, d) ; #endif if (!blas_ok) { /* use plain C code if no BLAS at compile time, or if integer * overflow has occurred */ for (s = 0 ; s < k ; s++) { for (j = 0 ; j < n ; j++) { Entry u_sj = U [j+s*dc] ; if (IS_NONZERO (u_sj)) { Entry *c_ij, *l_is ; c_ij = & C [j*d] ; l_is = & L [s*d] ; #pragma ivdep for (i = 0 ; i < m ; i++) { /* C [i+j*d]-= L [i+s*d] * U [s*dc+j] */ MULT_SUB (*c_ij, *l_is, u_sj) ; c_ij++ ; l_is++ ; } } } } } } #ifndef NDEBUG DEBUG5 (("RANK-NB UPDATE of frontal done:\n")) ; DEBUG5 (("DGEMM : "ID" "ID" "ID"\n", k, m, n)) ; DEBUG7 (("C block: ")) ; UMF_dump_dense (C, d, m, n) ; DEBUG7 (("A block: ")) ; UMF_dump_dense (L, d, m, k) ; DEBUG7 (("B' block: ")) ; UMF_dump_dense (U, dc, n, k) ; DEBUG7 (("LU block: ")) ; UMF_dump_dense (LU, nb, k, k) ; #endif DEBUG2 (("blas3 "ID" "ID" "ID"\n", k, Work->fnrows, Work->fncols)) ; }
Int KLU_solve ( /* inputs, not modified */ KLU_symbolic *Symbolic, KLU_numeric *Numeric, Int d, /* leading dimension of B */ Int nrhs, /* number of right-hand-sides */ /* right-hand-side on input, overwritten with solution to Ax=b on output */ double B [ ], /* size n*nrhs, in column-oriented form, with * leading dimension d. */ /* --------------- */ KLU_common *Common ) { Entry x [4], offik, s ; double rs, *Rs ; Entry *Offx, *X, *Bz, *Udiag ; Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ; Unit **LUbx ; Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ; /* ---------------------------------------------------------------------- */ /* check inputs */ /* ---------------------------------------------------------------------- */ if (Common == NULL) { return (FALSE) ; } if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 || B == NULL) { Common->status = KLU_INVALID ; return (FALSE) ; } Common->status = KLU_OK ; /* ---------------------------------------------------------------------- */ /* get the contents of the Symbolic object */ /* ---------------------------------------------------------------------- */ Bz = (Entry *) B ; n = Symbolic->n ; nblocks = Symbolic->nblocks ; Q = Symbolic->Q ; R = Symbolic->R ; /* ---------------------------------------------------------------------- */ /* get the contents of the Numeric object */ /* ---------------------------------------------------------------------- */ ASSERT (nblocks == Numeric->nblocks) ; Pnum = Numeric->Pnum ; Offp = Numeric->Offp ; Offi = Numeric->Offi ; Offx = (Entry *) Numeric->Offx ; Lip = Numeric->Lip ; Llen = Numeric->Llen ; Uip = Numeric->Uip ; Ulen = Numeric->Ulen ; LUbx = (Unit **) Numeric->LUbx ; Udiag = Numeric->Udiag ; Rs = Numeric->Rs ; X = (Entry *) Numeric->Xwork ; ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; /* ---------------------------------------------------------------------- */ /* solve in chunks of 4 columns at a time */ /* ---------------------------------------------------------------------- */ for (chunk = 0 ; chunk < nrhs ; chunk += 4) { /* ------------------------------------------------------------------ */ /* get the size of the current chunk */ /* ------------------------------------------------------------------ */ nr = MIN (nrhs - chunk, 4) ; /* ------------------------------------------------------------------ */ /* scale and permute the right hand side, X = P*(R\B) */ /* ------------------------------------------------------------------ */ if (Rs == NULL) { /* no scaling */ switch (nr) { case 1: for (k = 0 ; k < n ; k++) { X [k] = Bz [Pnum [k]] ; } break ; case 2: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; X [2*k ] = Bz [i ] ; X [2*k + 1] = Bz [i + d ] ; } break ; case 3: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; X [3*k ] = Bz [i ] ; X [3*k + 1] = Bz [i + d ] ; X [3*k + 2] = Bz [i + d*2] ; } break ; case 4: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; X [4*k ] = Bz [i ] ; X [4*k + 1] = Bz [i + d ] ; X [4*k + 2] = Bz [i + d*2] ; X [4*k + 3] = Bz [i + d*3] ; } break ; } } else { switch (nr) { case 1: for (k = 0 ; k < n ; k++) { SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ; } break ; case 2: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; rs = Rs [k] ; SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ; SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ; } break ; case 3: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; rs = Rs [k] ; SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ; SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ; SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ; } break ; case 4: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; rs = Rs [k] ; SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ; SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ; SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ; SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ; } break ; } } /* ------------------------------------------------------------------ */ /* solve X = (L*U + Off)\X */ /* ------------------------------------------------------------------ */ for (block = nblocks-1 ; block >= 0 ; block--) { /* -------------------------------------------------------------- */ /* the block of size nk is from rows/columns k1 to k2-1 */ /* -------------------------------------------------------------- */ k1 = R [block] ; k2 = R [block+1] ; nk = k2 - k1 ; PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ; /* solve the block system */ if (nk == 1) { s = Udiag [k1] ; switch (nr) { case 1: DIV (X [k1], X [k1], s) ; break ; case 2: DIV (X [2*k1], X [2*k1], s) ; DIV (X [2*k1 + 1], X [2*k1 + 1], s) ; break ; case 3: DIV (X [3*k1], X [3*k1], s) ; DIV (X [3*k1 + 1], X [3*k1 + 1], s) ; DIV (X [3*k1 + 2], X [3*k1 + 2], s) ; break ; case 4: DIV (X [4*k1], X [4*k1], s) ; DIV (X [4*k1 + 1], X [4*k1 + 1], s) ; DIV (X [4*k1 + 2], X [4*k1 + 2], s) ; DIV (X [4*k1 + 3], X [4*k1 + 3], s) ; break ; } } else { KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr, X + nr*k1) ; KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block], Udiag + k1, nr, X + nr*k1) ; } /* -------------------------------------------------------------- */ /* block back-substitution for the off-diagonal-block entries */ /* -------------------------------------------------------------- */ if (block > 0) { switch (nr) { case 1: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; x [0] = X [k] ; for (p = Offp [k] ; p < pend ; p++) { MULT_SUB (X [Offi [p]], Offx [p], x [0]) ; } } break ; case 2: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; x [0] = X [2*k ] ; x [1] = X [2*k + 1] ; for (p = Offp [k] ; p < pend ; p++) { i = Offi [p] ; offik = Offx [p] ; MULT_SUB (X [2*i], offik, x [0]) ; MULT_SUB (X [2*i + 1], offik, x [1]) ; } } break ; case 3: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; x [0] = X [3*k ] ; x [1] = X [3*k + 1] ; x [2] = X [3*k + 2] ; for (p = Offp [k] ; p < pend ; p++) { i = Offi [p] ; offik = Offx [p] ; MULT_SUB (X [3*i], offik, x [0]) ; MULT_SUB (X [3*i + 1], offik, x [1]) ; MULT_SUB (X [3*i + 2], offik, x [2]) ; } } break ; case 4: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; x [0] = X [4*k ] ; x [1] = X [4*k + 1] ; x [2] = X [4*k + 2] ; x [3] = X [4*k + 3] ; for (p = Offp [k] ; p < pend ; p++) { i = Offi [p] ; offik = Offx [p] ; MULT_SUB (X [4*i], offik, x [0]) ; MULT_SUB (X [4*i + 1], offik, x [1]) ; MULT_SUB (X [4*i + 2], offik, x [2]) ; MULT_SUB (X [4*i + 3], offik, x [3]) ; } } break ; } } } /* ------------------------------------------------------------------ */ /* permute the result, Bz = Q*X */ /* ------------------------------------------------------------------ */ switch (nr) { case 1: for (k = 0 ; k < n ; k++) { Bz [Q [k]] = X [k] ; } break ; case 2: for (k = 0 ; k < n ; k++) { i = Q [k] ; Bz [i ] = X [2*k ] ; Bz [i + d ] = X [2*k + 1] ; } break ; case 3: for (k = 0 ; k < n ; k++) { i = Q [k] ; Bz [i ] = X [3*k ] ; Bz [i + d ] = X [3*k + 1] ; Bz [i + d*2] = X [3*k + 2] ; } break ; case 4: for (k = 0 ; k < n ; k++) { i = Q [k] ; Bz [i ] = X [4*k ] ; Bz [i + d ] = X [4*k + 1] ; Bz [i + d*2] = X [4*k + 2] ; Bz [i + d*3] = X [4*k + 3] ; } break ; } /* ------------------------------------------------------------------ */ /* go to the next chunk of B */ /* ------------------------------------------------------------------ */ Bz += d*4 ; } return (TRUE) ; }
void KLU_utsolve ( /* inputs, not modified: */ Int n, Int Uip [ ], Int Ulen [ ], Unit LU [ ], Entry Udiag [ ], Int nrhs, #ifdef COMPLEX Int conj_solve, #endif /* right-hand-side on input, solution to Ux=b on output */ Entry X [ ] ) { Entry x [4], uik, ukk ; Int k, p, len, i ; Int *Ui ; Entry *Ux ; switch (nrhs) { case 1: for (k = 0 ; k < n ; k++) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; x [0] = X [k] ; for (p = 0 ; p < len ; p++) { #ifdef COMPLEX if (conj_solve) { /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */ MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ; } else #endif { /* x [0] -= Ux [p] * X [Ui [p]] ; */ MULT_SUB (x [0], Ux [p], X [Ui [p]]) ; } } #ifdef COMPLEX if (conj_solve) { CONJ (ukk, Udiag [k]) ; } else #endif { ukk = Udiag [k] ; } DIV (X [k], x [0], ukk) ; } break ; case 2: for (k = 0 ; k < n ; k++) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; x [0] = X [2*k ] ; x [1] = X [2*k + 1] ; for (p = 0 ; p < len ; p++) { i = Ui [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (uik, Ux [p]) ; } else #endif { uik = Ux [p] ; } MULT_SUB (x [0], uik, X [2*i]) ; MULT_SUB (x [1], uik, X [2*i + 1]) ; } #ifdef COMPLEX if (conj_solve) { CONJ (ukk, Udiag [k]) ; } else #endif { ukk = Udiag [k] ; } DIV (X [2*k], x [0], ukk) ; DIV (X [2*k + 1], x [1], ukk) ; } break ; case 3: for (k = 0 ; k < n ; k++) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; x [0] = X [3*k ] ; x [1] = X [3*k + 1] ; x [2] = X [3*k + 2] ; for (p = 0 ; p < len ; p++) { i = Ui [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (uik, Ux [p]) ; } else #endif { uik = Ux [p] ; } MULT_SUB (x [0], uik, X [3*i]) ; MULT_SUB (x [1], uik, X [3*i + 1]) ; MULT_SUB (x [2], uik, X [3*i + 2]) ; } #ifdef COMPLEX if (conj_solve) { CONJ (ukk, Udiag [k]) ; } else #endif { ukk = Udiag [k] ; } DIV (X [3*k], x [0], ukk) ; DIV (X [3*k + 1], x [1], ukk) ; DIV (X [3*k + 2], x [2], ukk) ; } break ; case 4: for (k = 0 ; k < n ; k++) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; x [0] = X [4*k ] ; x [1] = X [4*k + 1] ; x [2] = X [4*k + 2] ; x [3] = X [4*k + 3] ; for (p = 0 ; p < len ; p++) { i = Ui [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (uik, Ux [p]) ; } else #endif { uik = Ux [p] ; } MULT_SUB (x [0], uik, X [4*i]) ; MULT_SUB (x [1], uik, X [4*i + 1]) ; MULT_SUB (x [2], uik, X [4*i + 2]) ; MULT_SUB (x [3], uik, X [4*i + 3]) ; } #ifdef COMPLEX if (conj_solve) { CONJ (ukk, Udiag [k]) ; } else #endif { ukk = Udiag [k] ; } DIV (X [4*k], x [0], ukk) ; DIV (X [4*k + 1], x [1], ukk) ; DIV (X [4*k + 2], x [2], ukk) ; DIV (X [4*k + 3], x [3], ukk) ; } break ; } }
void KLU_ltsolve ( /* inputs, not modified: */ Int n, Int Lip [ ], Int Llen [ ], Unit LU [ ], Int nrhs, #ifdef COMPLEX Int conj_solve, #endif /* right-hand-side on input, solution to L'x=b on output */ Entry X [ ] ) { Entry x [4], lik ; Int *Li ; Entry *Lx ; Int k, p, len, i ; switch (nrhs) { case 1: for (k = n-1 ; k >= 0 ; k--) { GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; x [0] = X [k] ; for (p = 0 ; p < len ; p++) { #ifdef COMPLEX if (conj_solve) { /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */ MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ; } else #endif { /*x [0] -= Lx [p] * X [Li [p]] ;*/ MULT_SUB (x [0], Lx [p], X [Li [p]]) ; } } X [k] = x [0] ; } break ; case 2: for (k = n-1 ; k >= 0 ; k--) { x [0] = X [2*k ] ; x [1] = X [2*k + 1] ; GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; for (p = 0 ; p < len ; p++) { i = Li [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (lik, Lx [p]) ; } else #endif { lik = Lx [p] ; } MULT_SUB (x [0], lik, X [2*i]) ; MULT_SUB (x [1], lik, X [2*i + 1]) ; } X [2*k ] = x [0] ; X [2*k + 1] = x [1] ; } break ; case 3: for (k = n-1 ; k >= 0 ; k--) { x [0] = X [3*k ] ; x [1] = X [3*k + 1] ; x [2] = X [3*k + 2] ; GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; for (p = 0 ; p < len ; p++) { i = Li [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (lik, Lx [p]) ; } else #endif { lik = Lx [p] ; } MULT_SUB (x [0], lik, X [3*i]) ; MULT_SUB (x [1], lik, X [3*i + 1]) ; MULT_SUB (x [2], lik, X [3*i + 2]) ; } X [3*k ] = x [0] ; X [3*k + 1] = x [1] ; X [3*k + 2] = x [2] ; } break ; case 4: for (k = n-1 ; k >= 0 ; k--) { x [0] = X [4*k ] ; x [1] = X [4*k + 1] ; x [2] = X [4*k + 2] ; x [3] = X [4*k + 3] ; GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; for (p = 0 ; p < len ; p++) { i = Li [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (lik, Lx [p]) ; } else #endif { lik = Lx [p] ; } MULT_SUB (x [0], lik, X [4*i]) ; MULT_SUB (x [1], lik, X [4*i + 1]) ; MULT_SUB (x [2], lik, X [4*i + 2]) ; MULT_SUB (x [3], lik, X [4*i + 3]) ; } X [4*k ] = x [0] ; X [4*k + 1] = x [1] ; X [4*k + 2] = x [2] ; X [4*k + 3] = x [3] ; } break ; } }
void KLU_usolve ( /* inputs, not modified: */ Int n, Int Uip [ ], Int Ulen [ ], Unit LU [ ], Entry Udiag [ ], Int nrhs, /* right-hand-side on input, solution to Ux=b on output */ Entry X [ ] ) { Entry x [4], uik, ukk ; Int *Ui ; Entry *Ux ; Int k, p, len, i ; switch (nrhs) { case 1: for (k = n-1 ; k >= 0 ; k--) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; /* x [0] = X [k] / Udiag [k] ; */ DIV (x [0], X [k], Udiag [k]) ; X [k] = x [0] ; for (p = 0 ; p < len ; p++) { /* X [Ui [p]] -= Ux [p] * x [0] ; */ MULT_SUB (X [Ui [p]], Ux [p], x [0]) ; } } break ; case 2: for (k = n-1 ; k >= 0 ; k--) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; ukk = Udiag [k] ; /* x [0] = X [2*k ] / ukk ; x [1] = X [2*k + 1] / ukk ; */ DIV (x [0], X [2*k], ukk) ; DIV (x [1], X [2*k + 1], ukk) ; X [2*k ] = x [0] ; X [2*k + 1] = x [1] ; for (p = 0 ; p < len ; p++) { i = Ui [p] ; uik = Ux [p] ; /* X [2*i ] -= uik * x [0] ; X [2*i + 1] -= uik * x [1] ; */ MULT_SUB (X [2*i], uik, x [0]) ; MULT_SUB (X [2*i + 1], uik, x [1]) ; } } break ; case 3: for (k = n-1 ; k >= 0 ; k--) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; ukk = Udiag [k] ; DIV (x [0], X [3*k], ukk) ; DIV (x [1], X [3*k + 1], ukk) ; DIV (x [2], X [3*k + 2], ukk) ; X [3*k ] = x [0] ; X [3*k + 1] = x [1] ; X [3*k + 2] = x [2] ; for (p = 0 ; p < len ; p++) { i = Ui [p] ; uik = Ux [p] ; MULT_SUB (X [3*i], uik, x [0]) ; MULT_SUB (X [3*i + 1], uik, x [1]) ; MULT_SUB (X [3*i + 2], uik, x [2]) ; } } break ; case 4: for (k = n-1 ; k >= 0 ; k--) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; ukk = Udiag [k] ; DIV (x [0], X [4*k], ukk) ; DIV (x [1], X [4*k + 1], ukk) ; DIV (x [2], X [4*k + 2], ukk) ; DIV (x [3], X [4*k + 3], ukk) ; X [4*k ] = x [0] ; X [4*k + 1] = x [1] ; X [4*k + 2] = x [2] ; X [4*k + 3] = x [3] ; for (p = 0 ; p < len ; p++) { i = Ui [p] ; uik = Ux [p] ; MULT_SUB (X [4*i], uik, x [0]) ; MULT_SUB (X [4*i + 1], uik, x [1]) ; MULT_SUB (X [4*i + 2], uik, x [2]) ; MULT_SUB (X [4*i + 3], uik, x [3]) ; } } break ; } }
void KLU_lsolve ( /* inputs, not modified: */ Int n, Int Lip [ ], Int Llen [ ], Unit LU [ ], Int nrhs, /* right-hand-side on input, solution to Lx=b on output */ Entry X [ ] ) { Entry x [4], lik ; Int *Li ; Entry *Lx ; Int k, p, len, i ; switch (nrhs) { case 1: for (k = 0 ; k < n ; k++) { x [0] = X [k] ; GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; /* unit diagonal of L is not stored*/ for (p = 0 ; p < len ; p++) { /* X [Li [p]] -= Lx [p] * x [0] ; */ MULT_SUB (X [Li [p]], Lx [p], x [0]) ; } } break ; case 2: for (k = 0 ; k < n ; k++) { x [0] = X [2*k ] ; x [1] = X [2*k + 1] ; GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; for (p = 0 ; p < len ; p++) { i = Li [p] ; lik = Lx [p] ; MULT_SUB (X [2*i], lik, x [0]) ; MULT_SUB (X [2*i + 1], lik, x [1]) ; } } break ; case 3: for (k = 0 ; k < n ; k++) { x [0] = X [3*k ] ; x [1] = X [3*k + 1] ; x [2] = X [3*k + 2] ; GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; for (p = 0 ; p < len ; p++) { i = Li [p] ; lik = Lx [p] ; MULT_SUB (X [3*i], lik, x [0]) ; MULT_SUB (X [3*i + 1], lik, x [1]) ; MULT_SUB (X [3*i + 2], lik, x [2]) ; } } break ; case 4: for (k = 0 ; k < n ; k++) { x [0] = X [4*k ] ; x [1] = X [4*k + 1] ; x [2] = X [4*k + 2] ; x [3] = X [4*k + 3] ; GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; for (p = 0 ; p < len ; p++) { i = Li [p] ; lik = Lx [p] ; MULT_SUB (X [4*i], lik, x [0]) ; MULT_SUB (X [4*i + 1], lik, x [1]) ; MULT_SUB (X [4*i + 2], lik, x [2]) ; MULT_SUB (X [4*i + 3], lik, x [3]) ; } } break ; } }
GLOBAL double UMF_usolve ( NumericType *Numeric, Entry X [ ], /* b on input, solution x on output */ Int Pattern [ ] /* a work array of size n */ ) { /* ---------------------------------------------------------------------- */ /* local variables */ /* ---------------------------------------------------------------------- */ Int k, deg, j, *ip, col, *Upos, *Uilen, pos, *Uip, n, ulen, up, newUchain, npiv, n1, *Ui ; Entry *xp, xk, *D, *Uval ; /* ---------------------------------------------------------------------- */ /* get parameters */ /* ---------------------------------------------------------------------- */ if (Numeric->n_row != Numeric->n_col) return (0.) ; n = Numeric->n_row ; npiv = Numeric->npiv ; Upos = Numeric->Upos ; Uilen = Numeric->Uilen ; Uip = Numeric->Uip ; D = Numeric->D ; n1 = Numeric->n1 ; #ifndef NDEBUG DEBUG4 (("Usolve start: npiv = "ID" n = "ID"\n", npiv, n)) ; for (j = 0 ; j < n ; j++) { DEBUG4 (("Usolve start "ID": ", j)) ; EDEBUG4 (X [j]) ; DEBUG4 (("\n")) ; } #endif /* ---------------------------------------------------------------------- */ /* singular case */ /* ---------------------------------------------------------------------- */ /* handle the singular part of D, up to just before the last pivot */ for (k = n-1 ; k >= npiv ; k--) { /* This is an *** intentional *** divide-by-zero, to get Inf or Nan, * as appropriate. It is not a bug. */ ASSERT (IS_ZERO (D [k])) ; xk = X [k] ; /* X [k] = xk / D [k] ; */ DIV (X [k], xk, D [k]) ; } deg = Numeric->ulen ; if (deg > 0) { /* :: make last pivot row of U (singular matrices only) :: */ for (j = 0 ; j < deg ; j++) { DEBUG1 (("Last row of U: j="ID"\n", j)) ; DEBUG1 (("Last row of U: Upattern[j]="ID"\n", Numeric->Upattern [j]) ); Pattern [j] = Numeric->Upattern [j] ; } } /* ---------------------------------------------------------------------- */ /* nonsingletons */ /* ---------------------------------------------------------------------- */ for (k = npiv-1 ; k >= n1 ; k--) { /* ------------------------------------------------------------------ */ /* use row k of U */ /* ------------------------------------------------------------------ */ up = Uip [k] ; ulen = Uilen [k] ; newUchain = (up < 0) ; if (newUchain) { up = -up ; xp = (Entry *) (Numeric->Memory + up + UNITS (Int, ulen)) ; } else { xp = (Entry *) (Numeric->Memory + up) ; } xk = X [k] ; for (j = 0 ; j < deg ; j++) { DEBUG4 ((" k "ID" col "ID" value", k, Pattern [j])) ; EDEBUG4 (*xp) ; DEBUG4 (("\n")) ; /* xk -= X [Pattern [j]] * (*xp) ; */ MULT_SUB (xk, X [Pattern [j]], *xp) ; xp++ ; } /* Go ahead and divide by zero if D [k] is zero */ /* X [k] = xk / D [k] ; */ DIV (X [k], xk, D [k]) ; /* ------------------------------------------------------------------ */ /* make row k-1 of U in Pattern [0..deg-1] */ /* ------------------------------------------------------------------ */ if (k == n1) break ; if (newUchain) { /* next row is a new Uchain */ deg = ulen ; ASSERT (IMPLIES (k == 0, deg == 0)) ; DEBUG4 (("end of chain for row of U "ID" deg "ID"\n", k-1, deg)) ; ip = (Int *) (Numeric->Memory + up) ; for (j = 0 ; j < deg ; j++) { col = *ip++ ; DEBUG4 ((" k "ID" col "ID"\n", k-1, col)) ; ASSERT (k <= col) ; Pattern [j] = col ; } } else { deg -= ulen ; DEBUG4 (("middle of chain for row of U "ID" deg "ID"\n", k, deg)) ; ASSERT (deg >= 0) ; pos = Upos [k] ; if (pos != EMPTY) { /* add the pivot column */ DEBUG4 (("k "ID" add pivot entry at pos "ID"\n", k, pos)) ; ASSERT (pos >= 0 && pos <= deg) ; Pattern [deg++] = Pattern [pos] ; Pattern [pos] = k ; } } } /* ---------------------------------------------------------------------- */ /* singletons */ /* ---------------------------------------------------------------------- */ for (k = n1 - 1 ; k >= 0 ; k--) { deg = Uilen [k] ; xk = X [k] ; DEBUG4 (("Singleton k "ID"\n", k)) ; if (deg > 0) { up = Uip [k] ; Ui = (Int *) (Numeric->Memory + up) ; up += UNITS (Int, deg) ; Uval = (Entry *) (Numeric->Memory + up) ; for (j = 0 ; j < deg ; j++) { DEBUG4 ((" k "ID" col "ID" value", k, Ui [j])) ; EDEBUG4 (Uval [j]) ; DEBUG4 (("\n")) ; /* xk -= X [Ui [j]] * Uval [j] ; */ ASSERT (Ui [j] >= 0 && Ui [j] < n) ; MULT_SUB (xk, X [Ui [j]], Uval [j]) ; } } /* Go ahead and divide by zero if D [k] is zero */ /* X [k] = xk / D [k] ; */ DIV (X [k], xk, D [k]) ; } #ifndef NDEBUG for (j = 0 ; j < n ; j++) { DEBUG4 (("Usolve done "ID": ", j)) ; EDEBUG4 (X [j]) ; DEBUG4 (("\n")) ; } DEBUG4 (("Usolve done.\n")) ; #endif return (DIV_FLOPS * ((double) n) + MULTSUB_FLOPS * ((double) Numeric->unz)); }
GLOBAL double #ifdef CONJUGATE_SOLVE UMF_uhsolve /* solve U'x=b (complex conjugate transpose) */ #else UMF_utsolve /* solve U.'x=b (array transpose) */ #endif ( NumericType *Numeric, Entry X [ ], /* b on input, solution x on output */ Int Pattern [ ] /* a work array of size n */ ) { /* ---------------------------------------------------------------------- */ /* local variables */ /* ---------------------------------------------------------------------- */ Int k, deg, j, *ip, col, *Upos, *Uilen, kstart, kend, up, *Uip, n, uhead, ulen, pos, npiv, n1, *Ui ; Entry *xp, xk, *D, *Uval ; /* ---------------------------------------------------------------------- */ /* get parameters */ /* ---------------------------------------------------------------------- */ if (Numeric->n_row != Numeric->n_col) return (0.) ; n = Numeric->n_row ; npiv = Numeric->npiv ; Upos = Numeric->Upos ; Uilen = Numeric->Uilen ; Uip = Numeric->Uip ; D = Numeric->D ; kend = 0 ; n1 = Numeric->n1 ; #ifndef NDEBUG DEBUG4 (("Utsolve start: npiv "ID" n "ID"\n", npiv, n)) ; for (j = 0 ; j < n ; j++) { DEBUG4 (("Utsolve start "ID": ", j)) ; EDEBUG4 (X [j]) ; DEBUG4 (("\n")) ; } #endif /* ---------------------------------------------------------------------- */ /* singletons */ /* ---------------------------------------------------------------------- */ for (k = 0 ; k < n1 ; k++) { DEBUG4 (("Singleton k "ID"\n", k)) ; /* Go ahead and divide by zero if D [k] is zero. */ #ifdef CONJUGATE_SOLVE /* xk = X [k] / conjugate (D [k]) ; */ DIV_CONJ (xk, X [k], D [k]) ; #else /* xk = X [k] / D [k] ; */ DIV (xk, X [k], D [k]) ; #endif X [k] = xk ; deg = Uilen [k] ; if (deg > 0 && IS_NONZERO (xk)) { up = Uip [k] ; Ui = (Int *) (Numeric->Memory + up) ; up += UNITS (Int, deg) ; Uval = (Entry *) (Numeric->Memory + up) ; for (j = 0 ; j < deg ; j++) { DEBUG4 ((" k "ID" col "ID" value", k, Ui [j])) ; EDEBUG4 (Uval [j]) ; DEBUG4 (("\n")) ; #ifdef CONJUGATE_SOLVE /* X [Ui [j]] -= xk * conjugate (Uval [j]) ; */ MULT_SUB_CONJ (X [Ui [j]], xk, Uval [j]) ; #else /* X [Ui [j]] -= xk * Uval [j] ; */ MULT_SUB (X [Ui [j]], xk, Uval [j]) ; #endif } } } /* ---------------------------------------------------------------------- */ /* nonsingletons */ /* ---------------------------------------------------------------------- */ for (kstart = n1 ; kstart < npiv ; kstart = kend + 1) { /* ------------------------------------------------------------------ */ /* find the end of this Uchain */ /* ------------------------------------------------------------------ */ DEBUG4 (("kstart "ID" kend "ID"\n", kstart, kend)) ; /* for (kend = kstart ; kend < npiv && Uip [kend+1] > 0 ; kend++) ; */ kend = kstart ; while (kend < npiv && Uip [kend+1] > 0) { kend++ ; } /* ------------------------------------------------------------------ */ /* scan the whole Uchain to find the pattern of the first row of U */ /* ------------------------------------------------------------------ */ k = kend+1 ; DEBUG4 (("\nKend "ID" K "ID"\n", kend, k)) ; /* ------------------------------------------------------------------ */ /* start with last row in Uchain of U in Pattern [0..deg-1] */ /* ------------------------------------------------------------------ */ if (k == npiv) { deg = Numeric->ulen ; if (deg > 0) { /* :: make last pivot row of U (singular matrices only) :: */ for (j = 0 ; j < deg ; j++) { Pattern [j] = Numeric->Upattern [j] ; } } } else { ASSERT (k >= 0 && k < npiv) ; up = -Uip [k] ; ASSERT (up > 0) ; deg = Uilen [k] ; DEBUG4 (("end of chain for row of U "ID" deg "ID"\n", k-1, deg)) ; ip = (Int *) (Numeric->Memory + up) ; for (j = 0 ; j < deg ; j++) { col = *ip++ ; DEBUG4 ((" k "ID" col "ID"\n", k-1, col)) ; ASSERT (k <= col) ; Pattern [j] = col ; } } /* empty the stack at the bottom of Pattern */ uhead = n ; for (k = kend ; k > kstart ; k--) { /* Pattern [0..deg-1] is the pattern of row k of U */ /* -------------------------------------------------------------- */ /* make row k-1 of U in Pattern [0..deg-1] */ /* -------------------------------------------------------------- */ ASSERT (k >= 0 && k < npiv) ; ulen = Uilen [k] ; /* delete, and push on the stack */ for (j = 0 ; j < ulen ; j++) { ASSERT (uhead >= deg) ; Pattern [--uhead] = Pattern [--deg] ; } DEBUG4 (("middle of chain for row of U "ID" deg "ID"\n", k, deg)) ; ASSERT (deg >= 0) ; pos = Upos [k] ; if (pos != EMPTY) { /* add the pivot column */ DEBUG4 (("k "ID" add pivot entry at position "ID"\n", k, pos)) ; ASSERT (pos >= 0 && pos <= deg) ; Pattern [deg++] = Pattern [pos] ; Pattern [pos] = k ; } } /* Pattern [0..deg-1] is now the pattern of the first row in Uchain */ /* ------------------------------------------------------------------ */ /* solve using this Uchain, in reverse order */ /* ------------------------------------------------------------------ */ DEBUG4 (("Unwinding Uchain\n")) ; for (k = kstart ; k <= kend ; k++) { /* -------------------------------------------------------------- */ /* construct row k */ /* -------------------------------------------------------------- */ ASSERT (k >= 0 && k < npiv) ; pos = Upos [k] ; if (pos != EMPTY) { /* remove the pivot column */ DEBUG4 (("k "ID" add pivot entry at position "ID"\n", k, pos)) ; ASSERT (k > kstart) ; ASSERT (pos >= 0 && pos < deg) ; ASSERT (Pattern [pos] == k) ; Pattern [pos] = Pattern [--deg] ; } up = Uip [k] ; ulen = Uilen [k] ; if (k > kstart) { /* concatenate the deleted pattern; pop from the stack */ for (j = 0 ; j < ulen ; j++) { ASSERT (deg <= uhead && uhead < n) ; Pattern [deg++] = Pattern [uhead++] ; } DEBUG4 (("middle of chain, row of U "ID" deg "ID"\n", k, deg)) ; ASSERT (deg >= 0) ; } /* -------------------------------------------------------------- */ /* use row k of U */ /* -------------------------------------------------------------- */ /* Go ahead and divide by zero if D [k] is zero. */ #ifdef CONJUGATE_SOLVE /* xk = X [k] / conjugate (D [k]) ; */ DIV_CONJ (xk, X [k], D [k]) ; #else /* xk = X [k] / D [k] ; */ DIV (xk, X [k], D [k]) ; #endif X [k] = xk ; if (IS_NONZERO (xk)) { if (k == kstart) { up = -up ; xp = (Entry *) (Numeric->Memory + up + UNITS (Int, ulen)) ; } else { xp = (Entry *) (Numeric->Memory + up) ; } for (j = 0 ; j < deg ; j++) { DEBUG4 ((" k "ID" col "ID" value", k, Pattern [j])) ; EDEBUG4 (*xp) ; DEBUG4 (("\n")) ; #ifdef CONJUGATE_SOLVE /* X [Pattern [j]] -= xk * conjugate (*xp) ; */ MULT_SUB_CONJ (X [Pattern [j]], xk, *xp) ; #else /* X [Pattern [j]] -= xk * (*xp) ; */ MULT_SUB (X [Pattern [j]], xk, *xp) ; #endif xp++ ; } } } ASSERT (uhead == n) ; } for (k = npiv ; k < n ; k++) { /* This is an *** intentional *** divide-by-zero, to get Inf or Nan, * as appropriate. It is not a bug. */ ASSERT (IS_ZERO (D [k])) ; /* For conjugate solve, D [k] == conjugate (D [k]), in this case */ /* xk = X [k] / D [k] ; */ DIV (xk, X [k], D [k]) ; X [k] = xk ; } #ifndef NDEBUG for (j = 0 ; j < n ; j++) { DEBUG4 (("Utsolve done "ID": ", j)) ; EDEBUG4 (X [j]) ; DEBUG4 (("\n")) ; } DEBUG4 (("Utsolve done.\n")) ; #endif return (DIV_FLOPS * ((double) n) + MULTSUB_FLOPS * ((double) Numeric->unz)); }
GLOBAL double UMF_lsolve ( NumericType *Numeric, Entry X [ ], /* b on input, solution x on output */ Int Pattern [ ] /* a work array of size n */ ) { Entry xk ; Entry *xp, *Lval ; Int k, deg, *ip, j, row, *Lpos, *Lilen, *Lip, llen, lp, newLchain, pos, npiv, n1, *Li ; /* ---------------------------------------------------------------------- */ if (Numeric->n_row != Numeric->n_col) return (0.) ; npiv = Numeric->npiv ; Lpos = Numeric->Lpos ; Lilen = Numeric->Lilen ; Lip = Numeric->Lip ; n1 = Numeric->n1 ; #ifndef NDEBUG DEBUG4 (("Lsolve start:\n")) ; for (j = 0 ; j < Numeric->n_row ; j++) { DEBUG4 (("Lsolve start "ID": ", j)) ; EDEBUG4 (X [j]) ; DEBUG4 (("\n")) ; } #endif /* ---------------------------------------------------------------------- */ /* singletons */ /* ---------------------------------------------------------------------- */ for (k = 0 ; k < n1 ; k++) { DEBUG4 (("Singleton k "ID"\n", k)) ; xk = X [k] ; deg = Lilen [k] ; if (deg > 0 && IS_NONZERO (xk)) { lp = Lip [k] ; Li = (Int *) (Numeric->Memory + lp) ; lp += UNITS (Int, deg) ; Lval = (Entry *) (Numeric->Memory + lp) ; for (j = 0 ; j < deg ; j++) { DEBUG4 ((" row "ID" k "ID" value", Li [j], k)) ; EDEBUG4 (Lval [j]) ; DEBUG4 (("\n")) ; /* X [Li [j]] -= xk * Lval [j] ; */ MULT_SUB (X [Li [j]], xk, Lval [j]) ; } } } /* ---------------------------------------------------------------------- */ /* rest of L */ /* ---------------------------------------------------------------------- */ deg = 0 ; for (k = n1 ; k < npiv ; k++) { /* ------------------------------------------------------------------ */ /* make column of L in Pattern [0..deg-1] */ /* ------------------------------------------------------------------ */ lp = Lip [k] ; newLchain = (lp < 0) ; if (newLchain) { lp = -lp ; deg = 0 ; DEBUG4 (("start of chain for column of L\n")) ; } /* remove pivot row */ pos = Lpos [k] ; if (pos != EMPTY) { DEBUG4 ((" k "ID" removing row "ID" at position "ID"\n", k, Pattern [pos], pos)) ; ASSERT (!newLchain) ; ASSERT (deg > 0) ; ASSERT (pos >= 0 && pos < deg) ; ASSERT (Pattern [pos] == k) ; Pattern [pos] = Pattern [--deg] ; } /* concatenate the pattern */ ip = (Int *) (Numeric->Memory + lp) ; llen = Lilen [k] ; for (j = 0 ; j < llen ; j++) { row = *ip++ ; DEBUG4 ((" row "ID" k "ID"\n", row, k)) ; ASSERT (row > k) ; Pattern [deg++] = row ; } /* ------------------------------------------------------------------ */ /* use column k of L */ /* ------------------------------------------------------------------ */ xk = X [k] ; if (IS_NONZERO (xk)) { xp = (Entry *) (Numeric->Memory + lp + UNITS (Int, llen)) ; for (j = 0 ; j < deg ; j++) { DEBUG4 ((" row "ID" k "ID" value", Pattern [j], k)) ; EDEBUG4 (*xp) ; DEBUG4 (("\n")) ; /* X [Pattern [j]] -= xk * (*xp) ; */ MULT_SUB (X [Pattern [j]], xk, *xp) ; xp++ ; } } } #ifndef NDEBUG for (j = 0 ; j < Numeric->n_row ; j++) { DEBUG4 (("Lsolve done "ID": ", j)) ; EDEBUG4 (X [j]) ; DEBUG4 (("\n")) ; } DEBUG4 (("Lsolve done.\n")) ; #endif return (MULTSUB_FLOPS * ((double) Numeric->lnz)) ; }
GLOBAL Int UMF_solve ( Int sys, const Int Ap [ ], const Int Ai [ ], const double Ax [ ], double Xx [ ], const double Bx [ ], #ifdef COMPLEX const double Az [ ], double Xz [ ], const double Bz [ ], #endif NumericType *Numeric, Int irstep, double Info [UMFPACK_INFO], Int Pattern [ ], /* size n */ double SolveWork [ ] /* if irstep>0 real: size 5*n. complex:10*n */ /* otherwise real: size n. complex: 4*n */ ) { /* ---------------------------------------------------------------------- */ /* local variables */ /* ---------------------------------------------------------------------- */ Entry axx, wi, xj, zi, xi, aij, bi ; double omega [3], d, z2i, yi, flops ; Entry *W, *Z, *S, *X ; double *Z2, *Y, *B2, *Rs ; Int *Rperm, *Cperm, i, n, p, step, j, nz, status, p2, do_scale ; #ifdef COMPLEX Int AXsplit ; Int Bsplit ; #endif #ifndef NRECIPROCAL Int do_recip = Numeric->do_recip ; #endif /* ---------------------------------------------------------------------- */ /* initializations */ /* ---------------------------------------------------------------------- */ #ifndef NDEBUG UMF_dump_lu (Numeric) ; ASSERT (Numeric && Xx && Bx && Pattern && SolveWork && Info) ; #endif nz = 0 ; omega [0] = 0. ; omega [1] = 0. ; omega [2] = 0. ; Rperm = Numeric->Rperm ; Cperm = Numeric->Cperm ; Rs = Numeric->Rs ; /* row scale factors */ do_scale = (Rs != (double *) NULL) ; flops = 0 ; Info [UMFPACK_SOLVE_FLOPS] = 0 ; Info [UMFPACK_IR_TAKEN] = 0 ; Info [UMFPACK_IR_ATTEMPTED] = 0 ; /* UMFPACK_solve does not call this routine if A is rectangular */ ASSERT (Numeric->n_row == Numeric->n_col) ; n = Numeric->n_row ; if (Numeric->nnzpiv < n || SCALAR_IS_ZERO (Numeric->rcond) || SCALAR_IS_NAN (Numeric->rcond)) { /* Note that systems involving just L return UMFPACK_OK, even if */ /* A is singular (L is always has a unit diagonal). */ DEBUGm4 (("Note, matrix is singular in umf_solve\n")) ; status = UMFPACK_WARNING_singular_matrix ; irstep = 0 ; } else { status = UMFPACK_OK ; } irstep = MAX (0, irstep) ; /* make sure irstep is >= 0 */ W = (Entry *) SolveWork ; /* Entry W [0..n-1] */ Z = (Entry *) NULL ; /* unused if no iterative refinement */ S = (Entry *) NULL ; Y = (double *) NULL ; Z2 = (double *) NULL ; B2 = (double *) NULL ; #ifdef COMPLEX if (irstep > 0) { if (!Ap || !Ai || !Ax) { return (UMFPACK_ERROR_argument_missing) ; } /* A, B, and X in split format if Az, Bz, and Xz present */ AXsplit = SPLIT (Az) || SPLIT(Xz); Z = (Entry *) (SolveWork + 4*n) ; /* Entry Z [0..n-1] */ S = (Entry *) (SolveWork + 6*n) ; /* Entry S [0..n-1] */ Y = (double *) (SolveWork + 8*n) ; /* double Y [0..n-1] */ B2 = (double *) (SolveWork + 9*n) ; /* double B2 [0..n-1] */ Z2 = (double *) Z ; /* double Z2 [0..n-1], equiv. to Z */ } else { /* A is ignored, only look at X for split/packed cases */ AXsplit = SPLIT(Xz); } Bsplit = SPLIT (Bz); if (AXsplit) { X = (Entry *) (SolveWork + 2*n) ; /* Entry X [0..n-1] */ } else { X = (Entry *) Xx ; /* Entry X [0..n-1] */ } #else X = (Entry *) Xx ; /* Entry X [0..n-1] */ if (irstep > 0) { if (!Ap || !Ai || !Ax) { return (UMFPACK_ERROR_argument_missing) ; } Z = (Entry *) (SolveWork + n) ; /* Entry Z [0..n-1] */ S = (Entry *) (SolveWork + 2*n) ; /* Entry S [0..n-1] */ Y = (double *) (SolveWork + 3*n) ; /* double Y [0..n-1] */ B2 = (double *) (SolveWork + 4*n) ; /* double B2 [0..n-1] */ Z2 = (double *) Z ; /* double Z2 [0..n-1], equiv. to Z */ } #endif /* ---------------------------------------------------------------------- */ /* determine which system to solve */ /* ---------------------------------------------------------------------- */ if (sys == UMFPACK_A) { /* ------------------------------------------------------------------ */ /* solve A x = b with optional iterative refinement */ /* ------------------------------------------------------------------ */ if (irstep > 0) { /* -------------------------------------------------------------- */ /* using iterative refinement: compute Y and B2 */ /* -------------------------------------------------------------- */ nz = Ap [n] ; Info [UMFPACK_NZ] = nz ; /* A is stored by column */ /* Y (i) = ||R A_i||, 1-norm of row i of R A */ for (i = 0 ; i < n ; i++) { Y [i] = 0. ; } flops += (ABS_FLOPS + 1) * nz ; p2 = Ap [n] ; for (p = 0 ; p < p2 ; p++) { /* Y [Ai [p]] += ABS (Ax [p]) ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; Y [Ai [p]] += d ; } /* B2 = abs (B) */ flops += ABS_FLOPS * n ; for (i = 0 ; i < n ; i++) { /* B2 [i] = ABS (B [i]) ; */ ASSIGN (bi, Bx, Bz, i, Bsplit) ; ABS (B2 [i], bi) ; } /* scale Y and B2. */ if (do_scale) { /* Y = R Y */ /* B2 = R B2 */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { Y [i] *= Rs [i] ; B2 [i] *= Rs [i] ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { Y [i] /= Rs [i] ; B2 [i] /= Rs [i] ; } } flops += 2 * n ; } } for (step = 0 ; step <= irstep ; step++) { /* -------------------------------------------------------------- */ /* Solve A x = b (step 0): */ /* x = Q (U \ (L \ (P R b))) */ /* and then perform iterative refinement (step > 0): */ /* x = x + Q (U \ (L \ (P R (b - A x)))) */ /* -------------------------------------------------------------- */ if (step == 0) { if (do_scale) { /* W = P R b, using X as workspace, since Z is not * allocated if irstep = 0. */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { ASSIGN (X [i], Bx, Bz, i, Bsplit) ; SCALE (X [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { ASSIGN (X [i], Bx, Bz, i, Bsplit) ; SCALE_DIV (X [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; for (i = 0 ; i < n ; i++) { W [i] = X [Rperm [i]] ; } } else { /* W = P b, since the row scaling R = I */ for (i = 0 ; i < n ; i++) { /* W [i] = B [Rperm [i]] ; */ ASSIGN (W [i], Bx, Bz, Rperm [i], Bsplit) ; } } } else { for (i = 0 ; i < n ; i++) { /* Z [i] = B [i] ; */ ASSIGN (Z [i], Bx, Bz, i, Bsplit) ; } flops += MULTSUB_FLOPS * nz ; for (i = 0 ; i < n ; i++) { xi = X [i] ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* Z [Ai [p]] -= Ax [p] * xi ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; MULT_SUB (Z [Ai [p]], aij, xi) ; } } /* scale, Z = R Z */ if (do_scale) { #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (Z [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (Z [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; } for (i = 0 ; i < n ; i++) { W [i] = Z [Rperm [i]] ; } } flops += UMF_lsolve (Numeric, W, Pattern) ; flops += UMF_usolve (Numeric, W, Pattern) ; if (step == 0) { for (i = 0 ; i < n ; i++) { X [Cperm [i]] = W [i] ; } } else { flops += ASSEMBLE_FLOPS * n ; for (i = 0 ; i < n ; i++) { /* X [Cperm [i]] += W [i] ; */ ASSEMBLE (X [Cperm [i]], W [i]) ; } } /* -------------------------------------------------------------- */ /* sparse backward error estimate */ /* -------------------------------------------------------------- */ if (irstep > 0) { /* ---------------------------------------------------------- */ /* A is stored by column */ /* W (i) = R (b - A x)_i, residual */ /* Z2 (i) = R (|A||x|)_i */ /* ---------------------------------------------------------- */ for (i = 0 ; i < n ; i++) { /* W [i] = B [i] ; */ ASSIGN (W [i], Bx, Bz, i, Bsplit) ; Z2 [i] = 0. ; } flops += (MULT_FLOPS + DECREMENT_FLOPS + ABS_FLOPS + 1) * nz ; for (j = 0 ; j < n ; j++) { xj = X [j] ; p2 = Ap [j+1] ; for (p = Ap [j] ; p < p2 ; p++) { i = Ai [p] ; /* axx = Ax [p] * xj ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; MULT (axx, aij, xj) ; /* W [i] -= axx ; */ DECREMENT (W [i], axx) ; /* Z2 [i] += ABS (axx) ; */ ABS (d, axx) ; Z2 [i] += d ; } } /* scale W and Z2 */ if (do_scale) { /* Z2 = R Z2 */ /* W = R W */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (W [i], Rs [i]) ; Z2 [i] *= Rs [i] ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (W [i], Rs [i]) ; Z2 [i] /= Rs [i] ; } } flops += (SCALE_FLOPS + 1) * n ; } flops += (2*ABS_FLOPS + 5) * n ; if (do_step (omega, step, B2, X, W, Y, Z2, S, n, Info)) { /* iterative refinement is done */ break ; } } } } else if (sys == UMFPACK_At) { /* ------------------------------------------------------------------ */ /* solve A' x = b with optional iterative refinement */ /* ------------------------------------------------------------------ */ /* A' is the complex conjugate transpose */ if (irstep > 0) { /* -------------------------------------------------------------- */ /* using iterative refinement: compute Y */ /* -------------------------------------------------------------- */ nz = Ap [n] ; Info [UMFPACK_NZ] = nz ; /* A' is stored by row */ /* Y (i) = ||(A' R)_i||, 1-norm of row i of A' R */ if (do_scale) { flops += (ABS_FLOPS + 2) * nz ; #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) * Rs [Ai [p]] ; */ /* note that abs (aij) is the same as * abs (conj (aij)) */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += (d * Rs [Ai [p]]) ; } Y [i] = yi ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) / Rs [Ai [p]] ; */ /* note that abs (aij) is the same as * abs (conj (aij)) */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += (d / Rs [Ai [p]]) ; } Y [i] = yi ; } } } else { /* no scaling */ flops += (ABS_FLOPS + 1) * nz ; for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) ; */ /* note that abs (aij) is the same as * abs (conj (aij)) */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += d ; } Y [i] = yi ; } } /* B2 = abs (B) */ for (i = 0 ; i < n ; i++) { /* B2 [i] = ABS (B [i]) ; */ ASSIGN (bi, Bx, Bz, i, Bsplit) ; ABS (B2 [i], bi) ; } } for (step = 0 ; step <= irstep ; step++) { /* -------------------------------------------------------------- */ /* Solve A' x = b (step 0): */ /* x = R P' (L' \ (U' \ (Q' b))) */ /* and then perform iterative refinement (step > 0): */ /* x = x + R P' (L' \ (U' \ (Q' (b - A' x)))) */ /* -------------------------------------------------------------- */ if (step == 0) { /* W = Q' b */ for (i = 0 ; i < n ; i++) { /* W [i] = B [Cperm [i]] ; */ ASSIGN (W [i], Bx, Bz, Cperm [i], Bsplit) ; } } else { /* Z = b - A' x */ for (i = 0 ; i < n ; i++) { /* Z [i] = B [i] ; */ ASSIGN (Z [i], Bx, Bz, i, Bsplit) ; } flops += MULTSUB_FLOPS * nz ; for (i = 0 ; i < n ; i++) { zi = Z [i] ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* zi -= conjugate (Ax [p]) * X [Ai [p]] ; */ ASSIGN (aij, Ax, Az, p, Bsplit) ; MULT_SUB_CONJ (zi, X [Ai [p]], aij) ; } Z [i] = zi ; } /* W = Q' Z */ for (i = 0 ; i < n ; i++) { W [i] = Z [Cperm [i]] ; } } flops += UMF_uhsolve (Numeric, W, Pattern) ; flops += UMF_lhsolve (Numeric, W, Pattern) ; if (step == 0) { /* X = R P' W */ /* do not use Z, since it isn't allocated if irstep = 0 */ /* X = P' W */ for (i = 0 ; i < n ; i++) { X [Rperm [i]] = W [i] ; } if (do_scale) { /* X = R X */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (X [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (X [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; } } else { /* Z = P' W */ for (i = 0 ; i < n ; i++) { Z [Rperm [i]] = W [i] ; } if (do_scale) { /* Z = R Z */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (Z [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (Z [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; } flops += ASSEMBLE_FLOPS * n ; /* X += Z */ for (i = 0 ; i < n ; i++) { /* X [i] += Z [i] ; was +=W[i] in v4.3, which is wrong */ ASSEMBLE (X [i], Z [i]) ; /* bug fix, v4.3.1 */ } } /* -------------------------------------------------------------- */ /* sparse backward error estimate */ /* -------------------------------------------------------------- */ if (irstep > 0) { /* ---------------------------------------------------------- */ /* A' is stored by row */ /* W (i) = (b - A' x)_i, residual */ /* Z2 (i) = (|A'||x|)_i */ /* ---------------------------------------------------------- */ flops += (MULT_FLOPS + DECREMENT_FLOPS + ABS_FLOPS + 1) * nz ; for (i = 0 ; i < n ; i++) { /* wi = B [i] ; */ ASSIGN (wi, Bx, Bz, i, Bsplit) ; z2i = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* axx = conjugate (Ax [p]) * X [Ai [p]] ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; MULT_CONJ (axx, X [Ai [p]], aij) ; /* wi -= axx ; */ DECREMENT (wi, axx) ; /* z2i += ABS (axx) ; */ ABS (d, axx) ; z2i += d ; } W [i] = wi ; Z2 [i] = z2i ; } flops += (2*ABS_FLOPS + 5) * n ; if (do_step (omega, step, B2, X, W, Y, Z2, S, n, Info)) { /* iterative refinement is done */ break ; } } } } else if (sys == UMFPACK_Aat) { /* ------------------------------------------------------------------ */ /* solve A.' x = b with optional iterative refinement */ /* ------------------------------------------------------------------ */ /* A' is the array transpose */ if (irstep > 0) { /* -------------------------------------------------------------- */ /* using iterative refinement: compute Y */ /* -------------------------------------------------------------- */ nz = Ap [n] ; Info [UMFPACK_NZ] = nz ; /* A.' is stored by row */ /* Y (i) = ||(A.' R)_i||, 1-norm of row i of A.' R */ if (do_scale) { flops += (ABS_FLOPS + 2) * nz ; #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) * Rs [Ai [p]] ; */ /* note that A.' is the array transpose, * so no conjugate */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += (d * Rs [Ai [p]]) ; } Y [i] = yi ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) / Rs [Ai [p]] ; */ /* note that A.' is the array transpose, * so no conjugate */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += (d / Rs [Ai [p]]) ; } Y [i] = yi ; } } } else { /* no scaling */ flops += (ABS_FLOPS + 1) * nz ; for (i = 0 ; i < n ; i++) { yi = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* yi += ABS (Ax [p]) */ /* note that A.' is the array transpose, * so no conjugate */ ASSIGN (aij, Ax, Az, p, AXsplit) ; ABS (d, aij) ; yi += d ; } Y [i] = yi ; } } /* B2 = abs (B) */ for (i = 0 ; i < n ; i++) { /* B2 [i] = ABS (B [i]) ; */ ASSIGN (bi, Bx, Bz, i, Bsplit) ; ABS (B2 [i], bi) ; } } for (step = 0 ; step <= irstep ; step++) { /* -------------------------------------------------------------- */ /* Solve A.' x = b (step 0): */ /* x = R P' (L.' \ (U.' \ (Q' b))) */ /* and then perform iterative refinement (step > 0): */ /* x = x + R P' (L.' \ (U.' \ (Q' (b - A.' x)))) */ /* -------------------------------------------------------------- */ if (step == 0) { /* W = Q' b */ for (i = 0 ; i < n ; i++) { /* W [i] = B [Cperm [i]] ; */ ASSIGN (W [i], Bx, Bz, Cperm [i], Bsplit) ; } } else { /* Z = b - A.' x */ for (i = 0 ; i < n ; i++) { /* Z [i] = B [i] ; */ ASSIGN (Z [i], Bx, Bz, i, Bsplit) ; } flops += MULTSUB_FLOPS * nz ; for (i = 0 ; i < n ; i++) { zi = Z [i] ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* zi -= Ax [p] * X [Ai [p]] ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; MULT_SUB (zi, aij, X [Ai [p]]) ; } Z [i] = zi ; } /* W = Q' Z */ for (i = 0 ; i < n ; i++) { W [i] = Z [Cperm [i]] ; } } flops += UMF_utsolve (Numeric, W, Pattern) ; flops += UMF_ltsolve (Numeric, W, Pattern) ; if (step == 0) { /* X = R P' W */ /* do not use Z, since it isn't allocated if irstep = 0 */ /* X = P' W */ for (i = 0 ; i < n ; i++) { X [Rperm [i]] = W [i] ; } if (do_scale) { /* X = R X */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (X [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (X [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; } } else { /* Z = P' W */ for (i = 0 ; i < n ; i++) { Z [Rperm [i]] = W [i] ; } if (do_scale) { /* Z = R Z */ #ifndef NRECIPROCAL if (do_recip) { /* multiply by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE (Z [i], Rs [i]) ; } } else #endif { /* divide by the scale factors */ for (i = 0 ; i < n ; i++) { SCALE_DIV (Z [i], Rs [i]) ; } } flops += SCALE_FLOPS * n ; } flops += ASSEMBLE_FLOPS * n ; /* X += Z */ for (i = 0 ; i < n ; i++) { /* X [i] += Z [i] ; was +=W[i] in v4.3, which is wrong */ ASSEMBLE (X [i], Z [i]) ; /* bug fix, v4.3.1 */ } } /* -------------------------------------------------------------- */ /* sparse backward error estimate */ /* -------------------------------------------------------------- */ if (irstep > 0) { /* ---------------------------------------------------------- */ /* A.' is stored by row */ /* W (i) = (b - A.' x)_i, residual */ /* Z (i) = (|A.'||x|)_i */ /* ---------------------------------------------------------- */ flops += (MULT_FLOPS + DECREMENT_FLOPS + ABS_FLOPS + 1) * nz ; for (i = 0 ; i < n ; i++) { /* wi = B [i] ; */ ASSIGN (wi, Bx, Bz, i, Bsplit) ; z2i = 0. ; p2 = Ap [i+1] ; for (p = Ap [i] ; p < p2 ; p++) { /* axx = Ax [p] * X [Ai [p]] ; */ ASSIGN (aij, Ax, Az, p, AXsplit) ; MULT (axx, aij, X [Ai [p]]) ; /* wi -= axx ; */ DECREMENT (wi, axx) ; /* z2i += ABS (axx) ; */ ABS (d, axx) ; z2i += d ; } W [i] = wi ; Z2 [i] = z2i ; } flops += (2*ABS_FLOPS + 5) * n ; if (do_step (omega, step, B2, X, W, Y, Z2, S, n, Info)) { /* iterative refinement is done */ break ; } } } } else if (sys == UMFPACK_Pt_L) { /* ------------------------------------------------------------------ */ /* Solve P'Lx=b: x = L \ Pb */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [Rperm [i]] ; */ ASSIGN (X [i], Bx, Bz, Rperm [i], Bsplit) ; } flops = UMF_lsolve (Numeric, X, Pattern) ; status = UMFPACK_OK ; } else if (sys == UMFPACK_L) { /* ------------------------------------------------------------------ */ /* Solve Lx=b: x = L \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_lsolve (Numeric, X, Pattern) ; status = UMFPACK_OK ; } else if (sys == UMFPACK_Lt_P) { /* ------------------------------------------------------------------ */ /* Solve L'Px=b: x = P' (L' \ b) */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* W [i] = B [i] ; */ ASSIGN (W [i], Bx, Bz, i, Bsplit) ; } flops = UMF_lhsolve (Numeric, W, Pattern) ; for (i = 0 ; i < n ; i++) { X [Rperm [i]] = W [i] ; } status = UMFPACK_OK ; } else if (sys == UMFPACK_Lat_P) { /* ------------------------------------------------------------------ */ /* Solve L.'Px=b: x = P' (L.' \ b) */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* W [i] = B [i] ; */ ASSIGN (W [i], Bx, Bz, i, Bsplit) ; } flops = UMF_ltsolve (Numeric, W, Pattern) ; for (i = 0 ; i < n ; i++) { X [Rperm [i]] = W [i] ; } status = UMFPACK_OK ; } else if (sys == UMFPACK_Lt) { /* ------------------------------------------------------------------ */ /* Solve L'x=b: x = L' \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_lhsolve (Numeric, X, Pattern) ; status = UMFPACK_OK ; } else if (sys == UMFPACK_Lat) { /* ------------------------------------------------------------------ */ /* Solve L.'x=b: x = L.' \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_ltsolve (Numeric, X, Pattern) ; status = UMFPACK_OK ; } else if (sys == UMFPACK_U_Qt) { /* ------------------------------------------------------------------ */ /* Solve UQ'x=b: x = Q (U \ b) */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* W [i] = B [i] ; */ ASSIGN (W [i], Bx, Bz, i, Bsplit) ; } flops = UMF_usolve (Numeric, W, Pattern) ; for (i = 0 ; i < n ; i++) { X [Cperm [i]] = W [i] ; } } else if (sys == UMFPACK_U) { /* ------------------------------------------------------------------ */ /* Solve Ux=b: x = U \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_usolve (Numeric, X, Pattern) ; } else if (sys == UMFPACK_Q_Ut) { /* ------------------------------------------------------------------ */ /* Solve QU'x=b: x = U' \ Q'b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [Cperm [i]] ; */ ASSIGN (X [i], Bx, Bz, Cperm [i], Bsplit) ; } flops = UMF_uhsolve (Numeric, X, Pattern) ; } else if (sys == UMFPACK_Q_Uat) { /* ------------------------------------------------------------------ */ /* Solve QU.'x=b: x = U.' \ Q'b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [Cperm [i]] ; */ ASSIGN (X [i], Bx, Bz, Cperm [i], Bsplit) ; } flops = UMF_utsolve (Numeric, X, Pattern) ; } else if (sys == UMFPACK_Ut) { /* ------------------------------------------------------------------ */ /* Solve U'x=b: x = U' \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_uhsolve (Numeric, X, Pattern) ; } else if (sys == UMFPACK_Uat) { /* ------------------------------------------------------------------ */ /* Solve U'x=b: x = U' \ b */ /* ------------------------------------------------------------------ */ for (i = 0 ; i < n ; i++) { /* X [i] = B [i] ; */ ASSIGN (X [i], Bx, Bz, i, Bsplit) ; } flops = UMF_utsolve (Numeric, X, Pattern) ; } else { return (UMFPACK_ERROR_invalid_system) ; } #ifdef COMPLEX /* copy the solution back, from Entry X [ ] to double Xx [ ] and Xz [ ] */ if (AXsplit) { for (i = 0 ; i < n ; i++) { Xx [i] = REAL_COMPONENT (X [i]) ; Xz [i] = IMAG_COMPONENT (X [i]) ; } } #endif /* return UMFPACK_OK, or UMFPACK_WARNING_singular_matrix */ /* Note that systems involving just L will return UMFPACK_OK */ Info [UMFPACK_SOLVE_FLOPS] = flops ; return (status) ; }
Int KLU_refactor /* returns TRUE if successful, FALSE otherwise */ ( /* inputs, not modified */ Int Ap [ ], /* size n+1, column pointers */ Int Ai [ ], /* size nz, row indices */ double Ax [ ], KLU_symbolic<Entry, Int> *Symbolic, /* input/output */ KLU_numeric<Entry, Int> *Numeric, KLU_common<Entry, Int> *Common ) { Entry ukk, ujk, s ; Entry *Offx, *Lx, *Ux, *X, *Az, *Udiag ; double *Rs ; Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Ui, *Li, *Pinv, *Lip, *Uip, *Llen, *Ulen ; Unit **LUbx ; Unit *LU ; Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, p, newrow, scale, nblocks, poff, i, j, up, ulen, llen, maxblock, nzoff ; /* ---------------------------------------------------------------------- */ /* check inputs */ /* ---------------------------------------------------------------------- */ if (Common == NULL) { return (FALSE) ; } Common->status = KLU_OK ; if (Numeric == NULL) { /* invalid Numeric object */ Common->status = KLU_INVALID ; return (FALSE) ; } Common->numerical_rank = EMPTY ; Common->singular_col = EMPTY ; Az = (Entry *) Ax ; /* ---------------------------------------------------------------------- */ /* get the contents of the Symbolic object */ /* ---------------------------------------------------------------------- */ n = Symbolic->n ; P = Symbolic->P ; Q = Symbolic->Q ; R = Symbolic->R ; nblocks = Symbolic->nblocks ; maxblock = Symbolic->maxblock ; /* ---------------------------------------------------------------------- */ /* get the contents of the Numeric object */ /* ---------------------------------------------------------------------- */ Pnum = Numeric->Pnum ; Offp = Numeric->Offp ; Offi = Numeric->Offi ; Offx = (Entry *) Numeric->Offx ; LUbx = (Unit **) Numeric->LUbx ; scale = Common->scale ; if (scale > 0) { /* factorization was not scaled, but refactorization is scaled */ if (Numeric->Rs == NULL) { Numeric->Rs = (double *)KLU_malloc (n, sizeof (double), Common) ; if (Common->status < KLU_OK) { Common->status = KLU_OUT_OF_MEMORY ; return (FALSE) ; } } } else { /* no scaling for refactorization; ensure Numeric->Rs is freed. This * does nothing if Numeric->Rs is already NULL. */ Numeric->Rs = (double *) KLU_free (Numeric->Rs, n, sizeof (double), Common) ; } Rs = Numeric->Rs ; Pinv = Numeric->Pinv ; X = (Entry *) Numeric->Xwork ; Common->nrealloc = 0 ; Udiag = (Entry *) Numeric->Udiag ; nzoff = Symbolic->nzoff ; /* ---------------------------------------------------------------------- */ /* check the input matrix compute the row scale factors, Rs */ /* ---------------------------------------------------------------------- */ /* do no scale, or check the input matrix, if scale < 0 */ if (scale >= 0) { /* check for out-of-range indices, but do not check for duplicates */ if (!KLU_scale (scale, n, Ap, Ai, Ax, Rs, NULL, Common)) { return (FALSE) ; } } /* ---------------------------------------------------------------------- */ /* clear workspace X */ /* ---------------------------------------------------------------------- */ for (k = 0 ; k < maxblock ; k++) { /* X [k] = 0 */ CLEAR (X [k]) ; } poff = 0 ; /* ---------------------------------------------------------------------- */ /* factor each block */ /* ---------------------------------------------------------------------- */ if (scale <= 0) { /* ------------------------------------------------------------------ */ /* no scaling */ /* ------------------------------------------------------------------ */ for (block = 0 ; block < nblocks ; block++) { /* -------------------------------------------------------------- */ /* the block is from rows/columns k1 to k2-1 */ /* -------------------------------------------------------------- */ k1 = R [block] ; k2 = R [block+1] ; nk = k2 - k1 ; if (nk == 1) { /* ---------------------------------------------------------- */ /* singleton case */ /* ---------------------------------------------------------- */ oldcol = Q [k1] ; pend = Ap [oldcol+1] ; CLEAR (s) ; for (p = Ap [oldcol] ; p < pend ; p++) { newrow = Pinv [Ai [p]] - k1 ; if (newrow < 0 && poff < nzoff) { /* entry in off-diagonal block */ Offx [poff] = Az [p] ; poff++ ; } else { /* singleton */ s = Az [p] ; } } Udiag [k1] = s ; } else { /* ---------------------------------------------------------- */ /* construct and factor the kth block */ /* ---------------------------------------------------------- */ Lip = Numeric->Lip + k1 ; Llen = Numeric->Llen + k1 ; Uip = Numeric->Uip + k1 ; Ulen = Numeric->Ulen + k1 ; LU = LUbx [block] ; for (k = 0 ; k < nk ; k++) { /* ------------------------------------------------------ */ /* scatter kth column of the block into workspace X */ /* ------------------------------------------------------ */ oldcol = Q [k+k1] ; pend = Ap [oldcol+1] ; for (p = Ap [oldcol] ; p < pend ; p++) { newrow = Pinv [Ai [p]] - k1 ; if (newrow < 0 && poff < nzoff) { /* entry in off-diagonal block */ Offx [poff] = Az [p] ; poff++ ; } else { /* (newrow,k) is an entry in the block */ X [newrow] = Az [p] ; } } /* ------------------------------------------------------ */ /* compute kth column of U, and update kth column of A */ /* ------------------------------------------------------ */ GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ; for (up = 0 ; up < ulen ; up++) { j = Ui [up] ; ujk = X [j] ; /* X [j] = 0 */ CLEAR (X [j]) ; Ux [up] = ujk ; GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ; for (p = 0 ; p < llen ; p++) { /* X [Li [p]] -= Lx [p] * ujk */ MULT_SUB (X [Li [p]], Lx [p], ujk) ; } } /* get the diagonal entry of U */ ukk = X [k] ; /* X [k] = 0 */ CLEAR (X [k]) ; /* singular case */ if (IS_ZERO (ukk)) { /* matrix is numerically singular */ Common->status = KLU_SINGULAR ; if (Common->numerical_rank == EMPTY) { Common->numerical_rank = k+k1 ; Common->singular_col = Q [k+k1] ; } if (Common->halt_if_singular) { /* do not continue the factorization */ return (FALSE) ; } } Udiag [k+k1] = ukk ; /* gather and divide by pivot to get kth column of L */ GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ; for (p = 0 ; p < llen ; p++) { i = Li [p] ; DIV (Lx [p], X [i], ukk) ; CLEAR (X [i]) ; } } } } } else { /* ------------------------------------------------------------------ */ /* scaling */ /* ------------------------------------------------------------------ */ for (block = 0 ; block < nblocks ; block++) { /* -------------------------------------------------------------- */ /* the block is from rows/columns k1 to k2-1 */ /* -------------------------------------------------------------- */ k1 = R [block] ; k2 = R [block+1] ; nk = k2 - k1 ; if (nk == 1) { /* ---------------------------------------------------------- */ /* singleton case */ /* ---------------------------------------------------------- */ oldcol = Q [k1] ; pend = Ap [oldcol+1] ; CLEAR (s) ; for (p = Ap [oldcol] ; p < pend ; p++) { oldrow = Ai [p] ; newrow = Pinv [oldrow] - k1 ; if (newrow < 0 && poff < nzoff) { /* entry in off-diagonal block */ /* Offx [poff] = Az [p] / Rs [oldrow] */ SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]) ; poff++ ; } else { /* singleton */ /* s = Az [p] / Rs [oldrow] */ SCALE_DIV_ASSIGN (s, Az [p], Rs [oldrow]) ; } } Udiag [k1] = s ; } else { /* ---------------------------------------------------------- */ /* construct and factor the kth block */ /* ---------------------------------------------------------- */ Lip = Numeric->Lip + k1 ; Llen = Numeric->Llen + k1 ; Uip = Numeric->Uip + k1 ; Ulen = Numeric->Ulen + k1 ; LU = LUbx [block] ; for (k = 0 ; k < nk ; k++) { /* ------------------------------------------------------ */ /* scatter kth column of the block into workspace X */ /* ------------------------------------------------------ */ oldcol = Q [k+k1] ; pend = Ap [oldcol+1] ; for (p = Ap [oldcol] ; p < pend ; p++) { oldrow = Ai [p] ; newrow = Pinv [oldrow] - k1 ; if (newrow < 0 && poff < nzoff) { /* entry in off-diagonal part */ /* Offx [poff] = Az [p] / Rs [oldrow] */ SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]); poff++ ; } else { /* (newrow,k) is an entry in the block */ /* X [newrow] = Az [p] / Rs [oldrow] */ SCALE_DIV_ASSIGN (X [newrow], Az [p], Rs [oldrow]) ; } } /* ------------------------------------------------------ */ /* compute kth column of U, and update kth column of A */ /* ------------------------------------------------------ */ GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ; for (up = 0 ; up < ulen ; up++) { j = Ui [up] ; ujk = X [j] ; /* X [j] = 0 */ CLEAR (X [j]) ; Ux [up] = ujk ; GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ; for (p = 0 ; p < llen ; p++) { /* X [Li [p]] -= Lx [p] * ujk */ MULT_SUB (X [Li [p]], Lx [p], ujk) ; } } /* get the diagonal entry of U */ ukk = X [k] ; /* X [k] = 0 */ CLEAR (X [k]) ; /* singular case */ if (IS_ZERO (ukk)) { /* matrix is numerically singular */ Common->status = KLU_SINGULAR ; if (Common->numerical_rank == EMPTY) { Common->numerical_rank = k+k1 ; Common->singular_col = Q [k+k1] ; } if (Common->halt_if_singular) { /* do not continue the factorization */ return (FALSE) ; } } Udiag [k+k1] = ukk ; /* gather and divide by pivot to get kth column of L */ GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ; for (p = 0 ; p < llen ; p++) { i = Li [p] ; DIV (Lx [p], X [i], ukk) ; CLEAR (X [i]) ; } } } } } /* ---------------------------------------------------------------------- */ /* permute scale factors Rs according to pivotal row order */ /* ---------------------------------------------------------------------- */ if (scale > 0) { for (k = 0 ; k < n ; k++) { /* TODO : Check. REAL(X[k]) Can be just X[k] */ /* REAL (X [k]) = Rs [Pnum [k]] ; */ X [k] = Rs [Pnum [k]] ; } for (k = 0 ; k < n ; k++) { Rs [k] = REAL (X [k]) ; } } #ifndef NDEBUGKLU2 ASSERT (Offp [n] == poff) ; ASSERT (Symbolic->nzoff == poff) ; PRINTF (("\n------------------- Off diagonal entries, new:\n")) ; ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; if (Common->status == KLU_OK) { PRINTF (("\n ########### KLU_BTF_REFACTOR done, nblocks %d\n",nblocks)); for (block = 0 ; block < nblocks ; block++) { k1 = R [block] ; k2 = R [block+1] ; nk = k2 - k1 ; PRINTF (( "\n================KLU_refactor output: k1 %d k2 %d nk %d\n", k1, k2, nk)) ; if (nk == 1) { PRINTF (("singleton ")) ; PRINT_ENTRY (Udiag [k1]) ; } else { Lip = Numeric->Lip + k1 ; Llen = Numeric->Llen + k1 ; LU = (Unit *) Numeric->LUbx [block] ; PRINTF (("\n---- L block %d\n", block)) ; ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ; Uip = Numeric->Uip + k1 ; Ulen = Numeric->Ulen + k1 ; PRINTF (("\n---- U block %d\n", block)) ; ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ; } } } #endif return (TRUE) ; }