예제 #1
0
static void lsolve_numeric
(
    /* input, not modified on output: */
    Int Pinv [ ],	/* Pinv [i] = k if i is kth pivot row, or EMPTY if row i
			 * is not yet pivotal.  */
    Unit *LU,		/* LU factors (pattern and values) */
    Int Stack [ ],	/* stack for dfs */
    Int Lip [ ],	/* size n, Lip [k] is position in LU of column k of L */
    Int top,		/* top of stack on input */
    Int n,		/* A is n-by-n */
    Int Llen [ ],	/* size n, Llen [k] = # nonzeros in column k of L */

    /* output, must be zero on input: */
    Entry X [ ]	/* size n, initially zero.  On output,
		 * X [Ui [up1..up-1]] and X [Li [lp1..lp-1]]
		 * contains the solution. */

)
{
    Entry xj ;
    Entry *Lx ;
    Int *Li ;
    Int p, s, j, jnew, len ;

    /* solve Lx=b */
    for (s = top ; s < n ; s++)
    {
	/* forward solve with column j of L */
 	j = Stack [s] ;
	jnew = Pinv [j] ;
	ASSERT (jnew >= 0) ;
	xj = X [j] ;
	GET_POINTER (LU, Lip, Llen, Li, Lx, jnew, len) ;
	ASSERT (Lip [jnew] <= Lip [jnew+1]) ;
	for (p = 0 ; p < len ; p++)
	{
	    /*X [Li [p]] -= Lx [p] * xj ; */
	    MULT_SUB (X [Li [p]], Lx [p], xj) ;
	}
    }
}
예제 #2
0
파일: umf_ltsolve.c 프로젝트: rforge/matrix
GLOBAL double
#ifdef CONJUGATE_SOLVE
UMF_lhsolve			/* solve L'x=b  (complex conjugate transpose) */
#else
UMF_ltsolve			/* solve L.'x=b (array transpose) */
#endif
(
    NumericType *Numeric,
    Entry X [ ],		/* b on input, solution x on output */
    Int Pattern [ ]		/* a work array of size n */
)
{
    Entry xk ;
    Entry *xp, *Lval ;
    Int k, deg, *ip, j, row, *Lpos, *Lilen, kstart, kend, *Lip, llen,
	lp, pos, npiv, n1, *Li ;

    /* ---------------------------------------------------------------------- */

    if (Numeric->n_row != Numeric->n_col) return (0.) ;
    npiv = Numeric->npiv ;
    Lpos = Numeric->Lpos ;
    Lilen = Numeric->Lilen ;
    Lip = Numeric->Lip ;
    kstart = npiv ;
    n1 = Numeric->n1 ;

#ifndef NDEBUG
    DEBUG4 (("Ltsolve start:\n")) ;
    for (j = 0 ; j < Numeric->n_row ; j++)
    {
	DEBUG4 (("Ltsolve start "ID": ", j)) ;
	EDEBUG4 (X [j]) ;
	DEBUG4 (("\n")) ;
    }
#endif

    /* ---------------------------------------------------------------------- */
    /* non-singletons */
    /* ---------------------------------------------------------------------- */

    for (kend = npiv-1 ; kend >= n1 ; kend = kstart-1)
    {

	/* ------------------------------------------------------------------ */
	/* find the start of this Lchain */
	/* ------------------------------------------------------------------ */

	/* for (kstart = kend ; kstart >= 0 && Lip [kstart] > 0 ; kstart--) ; */
	kstart = kend ;
	while (kstart >= 0 && Lip [kstart] > 0)
	{
	    kstart-- ;
	}

	/* the Lchain goes from kstart to kend */

	/* ------------------------------------------------------------------ */
	/* scan the whole chain to find the pattern of the last column of L */
	/* ------------------------------------------------------------------ */

	deg = 0 ;
	DEBUG4 (("start of chain for column of L\n")) ;
	for (k = kstart ; k <= kend ; k++)
	{
	    ASSERT (k >= 0 && k < npiv) ;

	    /* -------------------------------------------------------------- */
	    /* make column k of L in Pattern [0..deg-1] */
	    /* -------------------------------------------------------------- */

	    /* remove pivot row */
	    pos = Lpos [k] ;
	    if (pos != EMPTY)
	    {
		DEBUG4 (("  k "ID" removing row "ID" at position "ID"\n",
		k, Pattern [pos], pos)) ;
		ASSERT (k != kstart) ;
		ASSERT (deg > 0) ;
		ASSERT (pos >= 0 && pos < deg) ;
		ASSERT (Pattern [pos] == k) ;
		Pattern [pos] = Pattern [--deg] ;
	    }

	    /* concatenate the pattern */
	    lp = Lip [k] ;
	    if (k == kstart)
	    {
		lp = -lp ;
	    }
	    ASSERT (lp > 0) ;
	    ip = (Int *) (Numeric->Memory + lp) ;
	    llen = Lilen [k] ;
	    for (j = 0 ; j < llen ; j++)
	    {
		row = *ip++ ;
		DEBUG4 (("  row "ID"  k "ID"\n", row, k)) ;
		ASSERT (row > k) ;
		Pattern [deg++] = row ;
	    }

	}
	/* Pattern [0..deg-1] is now the pattern of column kend */

	/* ------------------------------------------------------------------ */
	/* solve using this chain, in reverse order */
	/* ------------------------------------------------------------------ */

	DEBUG4 (("Unwinding Lchain\n")) ;
	for (k = kend ; k >= kstart ; k--)
	{

	    /* -------------------------------------------------------------- */
	    /* use column k of L */
	    /* -------------------------------------------------------------- */

	    ASSERT (k >= 0 && k < npiv) ;
	    lp = Lip [k] ;
	    if (k == kstart)
	    {
		lp = -lp ;
	    }
	    ASSERT (lp > 0) ;
	    llen = Lilen [k] ;
	    xp = (Entry *) (Numeric->Memory + lp + UNITS (Int, llen)) ;
	    xk = X [k] ;
	    for (j = 0 ; j < deg ; j++)
	    {
		DEBUG4 (("  row "ID"  k "ID" value", Pattern [j], k)) ;
		EDEBUG4 (*xp) ;
		DEBUG4 (("\n")) ;

#ifdef CONJUGATE_SOLVE
		/* xk -= X [Pattern [j]] * conjugate (*xp) ; */
		MULT_SUB_CONJ (xk, X [Pattern [j]], *xp) ;
#else
		/* xk -= X [Pattern [j]] * (*xp) ; */
		MULT_SUB (xk, X [Pattern [j]], *xp) ;
#endif

		xp++ ;
	    }
	    X [k] = xk ;

	    /* -------------------------------------------------------------- */
	    /* construct column k-1 of L */
	    /* -------------------------------------------------------------- */

	    /* un-concatenate the pattern */
	    deg -= llen ;

	    /* add pivot row */
	    pos = Lpos [k] ;
	    if (pos != EMPTY)
	    {
		DEBUG4 (("  k "ID" adding row "ID" at position "ID"\n",
		k, k, pos)) ;
		ASSERT (k != kstart) ;
		ASSERT (pos >= 0 && pos <= deg) ;
		Pattern [deg++] = Pattern [pos] ;
		Pattern [pos] = k ;
	    }
	}
    }

    /* ---------------------------------------------------------------------- */
    /* singletons */
    /* ---------------------------------------------------------------------- */

    for (k = n1 - 1 ; k >= 0 ; k--)
    {
	DEBUG4 (("Singleton k "ID"\n", k)) ;
	deg = Lilen [k] ;
	if (deg > 0)
	{
	    xk = X [k] ;
	    lp = Lip [k] ;
	    Li = (Int *) (Numeric->Memory + lp) ;
	    lp += UNITS (Int, deg) ;
	    Lval = (Entry *) (Numeric->Memory + lp) ;
	    for (j = 0 ; j < deg ; j++)
	    {
		DEBUG4 (("  row "ID"  k "ID" value", Li [j], k)) ;
		EDEBUG4 (Lval [j]) ;
		DEBUG4 (("\n")) ;
#ifdef CONJUGATE_SOLVE
		/* xk -= X [Li [j]] * conjugate (Lval [j]) ; */
		MULT_SUB_CONJ (xk, X [Li [j]], Lval [j]) ;
#else
		/* xk -= X [Li [j]] * Lval [j] ; */
		MULT_SUB (xk, X [Li [j]], Lval [j]) ;
#endif
	    }
	    X [k] = xk ;
	}
    }

#ifndef NDEBUG
    for (j = 0 ; j < Numeric->n_row ; j++)
    {
	DEBUG4 (("Ltsolve done "ID": ", j)) ;
	EDEBUG4 (X [j]) ;
	DEBUG4 (("\n")) ;
    }
    DEBUG4 (("Ltsolve done.\n")) ;
#endif

    return (MULTSUB_FLOPS * ((double) Numeric->lnz)) ;
}
예제 #3
0
GLOBAL void UMF_blas3_update
(
    WorkType *Work
)
{
    /* ---------------------------------------------------------------------- */
    /* local variables */
    /* ---------------------------------------------------------------------- */

    Entry *L, *U, *C, *LU ;
    Int i, j, s, k, m, n, d, nb, dc ;
    
#ifndef NBLAS
    Int blas_ok = TRUE ;
#else
#define blas_ok FALSE
#endif

    DEBUG5 (("In UMF_blas3_update "ID" "ID" "ID"\n",
	Work->fnpiv, Work->fnrows, Work->fncols)) ;

    k = Work->fnpiv ;
    if (k == 0)
    {
	/* no work to do */
	return ;
    }

    m = Work->fnrows ;
    n = Work->fncols ;

    d = Work->fnr_curr ;
    dc = Work->fnc_curr ;
    nb = Work->nb ;
    ASSERT (d >= 0 && (d % 2) == 1) ;
    C = Work->Fcblock ;	    /* ldc is fnr_curr */
    L =	Work->Flblock ;	    /* ldl is fnr_curr */
    U = Work->Fublock ;	    /* ldu is fnc_curr, stored by rows */
    LU = Work->Flublock ;   /* nb-by-nb */

#ifndef NDEBUG
    DEBUG5 (("DO RANK-NB UPDATE of frontal:\n")) ;
    DEBUG5 (("DGEMM : "ID" "ID" "ID"\n", k, m, n)) ;
    DEBUG7 (("C  block: ")) ; UMF_dump_dense (C,  d, m, n) ;
    DEBUG7 (("A  block: ")) ; UMF_dump_dense (L,  d, m, k) ;
    DEBUG7 (("B' block: ")) ; UMF_dump_dense (U, dc, n, k) ;
    DEBUG7 (("LU block: ")) ; UMF_dump_dense (LU, nb, k, k) ;
#endif

    if (k == 1)
    {

#ifndef NBLAS
	BLAS_GER (m, n, L, U, C, d) ;
#endif

	if (!blas_ok)
	{
	    /* rank-1 outer product to update the C block */
	    for (j = 0 ; j < n ; j++)
	    {
		Entry u_j = U [j] ;
		if (IS_NONZERO (u_j))
		{
		    Entry *c_ij, *l_is ;
		    c_ij = & C [j*d] ;
		    l_is = & L [0] ;
#pragma ivdep
		    for (i = 0 ; i < m ; i++)
		    {
			/* C [i+j*d]-= L [i] * U [j] */
			MULT_SUB (*c_ij, *l_is, u_j) ;
			c_ij++ ;
			l_is++ ;
		    }
		}
	    }
	}

    }
    else
    {

	/* triangular solve to update the U block */

#ifndef NBLAS
	BLAS_TRSM_RIGHT (n, k, LU, nb, U, dc) ;
#endif

	if (!blas_ok)
	{
	    /* use plain C code if no BLAS at compile time, or if integer
	     * overflow has occurred */
	    for (s = 0 ; s < k ; s++)
	    {
		for (i = s+1 ; i < k ; i++)
		{
		    Entry l_is = LU [i+s*nb] ;
		    if (IS_NONZERO (l_is))
		    {
			Entry *u_ij, *u_sj ;
			u_ij = & U [i*dc] ;
			u_sj = & U [s*dc] ;
#pragma ivdep
			for (j = 0 ; j < n ; j++)
			{
			    /* U [i*dc+j] -= LU [i+s*nb] * U [s*dc+j] ; */
			    MULT_SUB (*u_ij, l_is, *u_sj) ;
			    u_ij++ ;
			    u_sj++ ;
			}
		    }
		}
	    }
	}

	/* rank-k outer product to update the C block */
	/* C = C - L*U' (U is stored by rows, not columns) */

#ifndef NBLAS
	BLAS_GEMM (m, n, k, L, U, dc, C, d) ;
#endif

	if (!blas_ok)
	{
	    /* use plain C code if no BLAS at compile time, or if integer
	     * overflow has occurred */

	    for (s = 0 ; s < k ; s++)
	    {
		for (j = 0 ; j < n ; j++)
		{
		    Entry u_sj = U [j+s*dc] ;
		    if (IS_NONZERO (u_sj))
		    {
			Entry *c_ij, *l_is ;
			c_ij = & C [j*d] ;
			l_is = & L [s*d] ;
#pragma ivdep
			for (i = 0 ; i < m ; i++)
			{
			    /* C [i+j*d]-= L [i+s*d] * U [s*dc+j] */
			    MULT_SUB (*c_ij, *l_is, u_sj) ;
			    c_ij++ ;
			    l_is++ ;
			}
		    }
		}
	    }
	}
    }

#ifndef NDEBUG
    DEBUG5 (("RANK-NB UPDATE of frontal done:\n")) ;
    DEBUG5 (("DGEMM : "ID" "ID" "ID"\n", k, m, n)) ;
    DEBUG7 (("C  block: ")) ; UMF_dump_dense (C,  d, m, n) ;
    DEBUG7 (("A  block: ")) ; UMF_dump_dense (L,  d, m, k) ;
    DEBUG7 (("B' block: ")) ; UMF_dump_dense (U, dc, n, k) ;
    DEBUG7 (("LU block: ")) ; UMF_dump_dense (LU, nb, k, k) ;
#endif

    DEBUG2 (("blas3 "ID" "ID" "ID"\n", k, Work->fnrows, Work->fncols)) ;
}
예제 #4
0
Int KLU_solve
(
    /* inputs, not modified */
    KLU_symbolic *Symbolic,
    KLU_numeric *Numeric,
    Int d,		    /* leading dimension of B */
    Int nrhs,		    /* number of right-hand-sides */

    /* right-hand-side on input, overwritten with solution to Ax=b on output */
    double B [ ],	    /* size n*nrhs, in column-oriented form, with
			     * leading dimension d. */
    /* --------------- */
    KLU_common *Common
)
{
    Entry x [4], offik, s ;
    double rs, *Rs ;
    Entry *Offx, *X, *Bz, *Udiag ;
    Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
    Unit **LUbx ;
    Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    if (Common == NULL)
    {
	return (FALSE) ;
    }
    if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
	B == NULL)
    {
	Common->status = KLU_INVALID ;
	return (FALSE) ;
    }
    Common->status = KLU_OK ;

    /* ---------------------------------------------------------------------- */
    /* get the contents of the Symbolic object */
    /* ---------------------------------------------------------------------- */

    Bz = (Entry *) B ;
    n = Symbolic->n ;
    nblocks = Symbolic->nblocks ;
    Q = Symbolic->Q ;
    R = Symbolic->R ;

    /* ---------------------------------------------------------------------- */
    /* get the contents of the Numeric object */
    /* ---------------------------------------------------------------------- */

    ASSERT (nblocks == Numeric->nblocks) ;
    Pnum = Numeric->Pnum ;
    Offp = Numeric->Offp ;
    Offi = Numeric->Offi ;
    Offx = (Entry *) Numeric->Offx ;

    Lip  = Numeric->Lip ;
    Llen = Numeric->Llen ;
    Uip  = Numeric->Uip ;
    Ulen = Numeric->Ulen ;
    LUbx = (Unit **) Numeric->LUbx ;
    Udiag = Numeric->Udiag ;

    Rs = Numeric->Rs ;
    X = (Entry *) Numeric->Xwork ;

    ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;

    /* ---------------------------------------------------------------------- */
    /* solve in chunks of 4 columns at a time */
    /* ---------------------------------------------------------------------- */

    for (chunk = 0 ; chunk < nrhs ; chunk += 4)
    {

	/* ------------------------------------------------------------------ */
	/* get the size of the current chunk */
	/* ------------------------------------------------------------------ */

	nr = MIN (nrhs - chunk, 4) ;

	/* ------------------------------------------------------------------ */
	/* scale and permute the right hand side, X = P*(R\B) */
	/* ------------------------------------------------------------------ */

	if (Rs == NULL)
	{

	    /* no scaling */
	    switch (nr)
	    {

		case 1:

		    for (k = 0 ; k < n ; k++)
		    {
			X [k] = Bz [Pnum [k]] ;
		    }
		    break ;

		case 2:

		    for (k = 0 ; k < n ; k++)
		    {
			i = Pnum [k] ;
			X [2*k    ] = Bz [i      ] ;
			X [2*k + 1] = Bz  [i + d  ] ;
		    }
		    break ;

		case 3:

		    for (k = 0 ; k < n ; k++)
		    {
			i = Pnum [k] ;
			X [3*k    ] = Bz [i      ] ;
			X [3*k + 1] = Bz [i + d  ] ;
			X [3*k + 2] = Bz [i + d*2] ;
		    }
		    break ;

		case 4:

		    for (k = 0 ; k < n ; k++)
		    {
			i = Pnum [k] ;
			X [4*k    ] = Bz [i      ] ;
			X [4*k + 1] = Bz [i + d  ] ;
			X [4*k + 2] = Bz [i + d*2] ;
			X [4*k + 3] = Bz [i + d*3] ;
		    }
		    break ;
	    }

	}
	else
	{

	    switch (nr)
	    {

		case 1:

		    for (k = 0 ; k < n ; k++)
		    {
			SCALE_DIV_ASSIGN (X [k], Bz  [Pnum [k]], Rs [k]) ;
		    }
		    break ;

		case 2:

		    for (k = 0 ; k < n ; k++)
		    {
			i = Pnum [k] ;
			rs = Rs [k] ;
			SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
			SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
		    }
		    break ;

		case 3:

		    for (k = 0 ; k < n ; k++)
		    {
			i = Pnum [k] ;
			rs = Rs [k] ;
			SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
			SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
			SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
		    }
		    break ;

		case 4:

		    for (k = 0 ; k < n ; k++)
		    {
			i = Pnum [k] ;
			rs = Rs [k] ;
			SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
			SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
			SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
			SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
		    }
		    break ;
	    }
	}

	/* ------------------------------------------------------------------ */
	/* solve X = (L*U + Off)\X */
	/* ------------------------------------------------------------------ */

	for (block = nblocks-1 ; block >= 0 ; block--)
	{

	    /* -------------------------------------------------------------- */
	    /* the block of size nk is from rows/columns k1 to k2-1 */
	    /* -------------------------------------------------------------- */

	    k1 = R [block] ;
	    k2 = R [block+1] ;
	    nk = k2 - k1 ;
	    PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;

	    /* solve the block system */
	    if (nk == 1)
	    {
		s = Udiag [k1] ;
		switch (nr)
		{

		    case 1:
			DIV (X [k1], X [k1], s) ;
			break ;

		    case 2:
			DIV (X [2*k1], X [2*k1], s) ;
			DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
			break ;

		    case 3:
			DIV (X [3*k1], X [3*k1], s) ;
			DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
			DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
			break ;

		    case 4:
			DIV (X [4*k1], X [4*k1], s) ;
			DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
			DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
			DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
			break ;

		}
	    }
	    else
	    {
		KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
			X + nr*k1) ;
		KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
			Udiag + k1, nr, X + nr*k1) ;
	    }

	    /* -------------------------------------------------------------- */
	    /* block back-substitution for the off-diagonal-block entries */
	    /* -------------------------------------------------------------- */

	    if (block > 0)
	    {
		switch (nr)
		{

		    case 1:

			for (k = k1 ; k < k2 ; k++)
			{
			    pend = Offp [k+1] ;
			    x [0] = X [k] ;
			    for (p = Offp [k] ; p < pend ; p++)
			    {
				MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
			    }
			}
			break ;

		    case 2:

			for (k = k1 ; k < k2 ; k++)
			{
			    pend = Offp [k+1] ;
			    x [0] = X [2*k    ] ;
			    x [1] = X [2*k + 1] ;
			    for (p = Offp [k] ; p < pend ; p++)
			    {
				i = Offi [p] ;
				offik = Offx [p] ;
				MULT_SUB (X [2*i], offik, x [0]) ;
				MULT_SUB (X [2*i + 1], offik, x [1]) ;
			    }
			}
			break ;

		    case 3:

			for (k = k1 ; k < k2 ; k++)
			{
			    pend = Offp [k+1] ;
			    x [0] = X [3*k    ] ;
			    x [1] = X [3*k + 1] ;
			    x [2] = X [3*k + 2] ;
			    for (p = Offp [k] ; p < pend ; p++)
			    {
				i = Offi [p] ;
				offik = Offx [p] ;
				MULT_SUB (X [3*i], offik, x [0]) ;
				MULT_SUB (X [3*i + 1], offik, x [1]) ;
				MULT_SUB (X [3*i + 2], offik, x [2]) ;
			    }
			}
			break ;

		    case 4:

			for (k = k1 ; k < k2 ; k++)
			{
			    pend = Offp [k+1] ;
			    x [0] = X [4*k    ] ;
			    x [1] = X [4*k + 1] ;
			    x [2] = X [4*k + 2] ;
			    x [3] = X [4*k + 3] ;
			    for (p = Offp [k] ; p < pend ; p++)
			    {
				i = Offi [p] ;
				offik = Offx [p] ;
				MULT_SUB (X [4*i], offik, x [0]) ;
				MULT_SUB (X [4*i + 1], offik, x [1]) ;
				MULT_SUB (X [4*i + 2], offik, x [2]) ;
				MULT_SUB (X [4*i + 3], offik, x [3]) ;
			    }
			}
			break ;
		}
	    }
	}

	/* ------------------------------------------------------------------ */
	/* permute the result, Bz  = Q*X */
	/* ------------------------------------------------------------------ */

	switch (nr)
	{

	    case 1:

		for (k = 0 ; k < n ; k++)
		{
		    Bz  [Q [k]] = X [k] ;
		}
		break ;

	    case 2:

		for (k = 0 ; k < n ; k++)
		{
		    i = Q [k] ;
		    Bz  [i      ] = X [2*k    ] ;
		    Bz  [i + d  ] = X [2*k + 1] ;
		}
		break ;

	    case 3:

		for (k = 0 ; k < n ; k++)
		{
		    i = Q [k] ;
		    Bz  [i      ] = X [3*k    ] ;
		    Bz  [i + d  ] = X [3*k + 1] ;
		    Bz  [i + d*2] = X [3*k + 2] ;
		}
		break ;

	    case 4:

		for (k = 0 ; k < n ; k++)
		{
		    i = Q [k] ;
		    Bz  [i      ] = X [4*k    ] ;
		    Bz  [i + d  ] = X [4*k + 1] ;
		    Bz  [i + d*2] = X [4*k + 2] ;
		    Bz  [i + d*3] = X [4*k + 3] ;
		}
		break ;
	}

	/* ------------------------------------------------------------------ */
	/* go to the next chunk of B */
	/* ------------------------------------------------------------------ */

	Bz  += d*4 ;
    }
    return (TRUE) ;
}
예제 #5
0
파일: klu.c 프로젝트: Ascronia/fieldtrip
void KLU_utsolve
(
    /* inputs, not modified: */
    Int n,
    Int Uip [ ],
    Int Ulen [ ],
    Unit LU [ ],
    Entry Udiag [ ],
    Int nrhs,
#ifdef COMPLEX
    Int conj_solve,
#endif
    /* right-hand-side on input, solution to Ux=b on output */
    Entry X [ ]
)
{
    Entry x [4], uik, ukk ;
    Int k, p, len, i ;
    Int *Ui ;
    Entry *Ux ;

    switch (nrhs)
    {

        case 1:

            for (k = 0 ; k < n ; k++)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                x [0] = X [k] ;
                for (p = 0 ; p < len ; p++)
                {
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
                        MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
                    }
                    else
#endif
                    {
                        /* x [0] -= Ux [p] * X [Ui [p]] ; */
                        MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
                    }
                }
#ifdef COMPLEX
                if (conj_solve)
                {
                    CONJ (ukk, Udiag [k]) ;
                }
                else
#endif
                {
                    ukk = Udiag [k] ;
                }
                DIV (X [k], x [0], ukk) ;
            }
            break ;

        case 2:

            for (k = 0 ; k < n ; k++)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                x [0] = X [2*k    ] ;
                x [1] = X [2*k + 1] ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Ui [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (uik, Ux [p]) ;
                    }
                    else
#endif
                    {
                        uik = Ux [p] ;
                    }
                    MULT_SUB (x [0], uik, X [2*i]) ;
                    MULT_SUB (x [1], uik, X [2*i + 1]) ;
                }
#ifdef COMPLEX
                if (conj_solve)
                {
                    CONJ (ukk, Udiag [k]) ;
                }
                else
#endif
                {
                    ukk = Udiag [k] ;
                }
                DIV (X [2*k], x [0], ukk) ;
                DIV (X [2*k + 1], x [1], ukk) ;
            }
            break ;

        case 3:

            for (k = 0 ; k < n ; k++)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                x [0] = X [3*k    ] ;
                x [1] = X [3*k + 1] ;
                x [2] = X [3*k + 2] ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Ui [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (uik, Ux [p]) ;
                    }
                    else
#endif
                    {
                        uik = Ux [p] ;
                    }
                    MULT_SUB (x [0], uik, X [3*i]) ;
                    MULT_SUB (x [1], uik, X [3*i + 1]) ;
                    MULT_SUB (x [2], uik, X [3*i + 2]) ;
                }
#ifdef COMPLEX
                if (conj_solve)
                {
                    CONJ (ukk, Udiag [k]) ;
                }
                else
#endif
                {
                    ukk = Udiag [k] ;
                }
                DIV (X [3*k], x [0], ukk) ;
                DIV (X [3*k + 1], x [1], ukk) ;
                DIV (X [3*k + 2], x [2], ukk) ;
            }
            break ;

        case 4:

            for (k = 0 ; k < n ; k++)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                x [0] = X [4*k    ] ;
                x [1] = X [4*k + 1] ;
                x [2] = X [4*k + 2] ;
                x [3] = X [4*k + 3] ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Ui [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (uik, Ux [p]) ;
                    }
                    else
#endif
                    {
                        uik = Ux [p] ;
                    }
                    MULT_SUB (x [0], uik, X [4*i]) ;
                    MULT_SUB (x [1], uik, X [4*i + 1]) ;
                    MULT_SUB (x [2], uik, X [4*i + 2]) ;
                    MULT_SUB (x [3], uik, X [4*i + 3]) ;
                }
#ifdef COMPLEX
                if (conj_solve)
                {
                    CONJ (ukk, Udiag [k]) ;
                }
                else
#endif
                {
                    ukk = Udiag [k] ;
                }
                DIV (X [4*k], x [0], ukk) ;
                DIV (X [4*k + 1], x [1], ukk) ;
                DIV (X [4*k + 2], x [2], ukk) ;
                DIV (X [4*k + 3], x [3], ukk) ;
            }
            break ;
    }
}
예제 #6
0
파일: klu.c 프로젝트: Ascronia/fieldtrip
void KLU_ltsolve
(
    /* inputs, not modified: */
    Int n,
    Int Lip [ ],
    Int Llen [ ],
    Unit LU [ ],
    Int nrhs,
#ifdef COMPLEX
    Int conj_solve,
#endif
    /* right-hand-side on input, solution to L'x=b on output */
    Entry X [ ]
)
{
    Entry x [4], lik ;
    Int *Li ;
    Entry *Lx ;
    Int k, p, len, i ;

    switch (nrhs)
    {

        case 1:

            for (k = n-1 ; k >= 0 ; k--)
            {
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                x [0] = X [k] ;
                for (p = 0 ; p < len ; p++)
                {
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
                        MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
                    }
                    else
#endif
                    {
                        /*x [0] -= Lx [p] * X [Li [p]] ;*/
                        MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
                    }
                }
                X [k] = x [0] ;
            }
            break ;

        case 2:

            for (k = n-1 ; k >= 0 ; k--)
            {
                x [0] = X [2*k    ] ;
                x [1] = X [2*k + 1] ;
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Li [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (lik, Lx [p]) ;
                    }
                    else
#endif
                    {
                        lik = Lx [p] ;
                    }
                    MULT_SUB (x [0], lik, X [2*i]) ;
                    MULT_SUB (x [1], lik, X [2*i + 1]) ;
                }
                X [2*k    ] = x [0] ;
                X [2*k + 1] = x [1] ;
            }
            break ;

        case 3:

            for (k = n-1 ; k >= 0 ; k--)
            {
                x [0] = X [3*k    ] ;
                x [1] = X [3*k + 1] ;
                x [2] = X [3*k + 2] ;
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Li [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (lik, Lx [p]) ;
                    }
                    else
#endif
                    {
                        lik = Lx [p] ;
                    }
                    MULT_SUB (x [0], lik, X [3*i]) ;
                    MULT_SUB (x [1], lik, X [3*i + 1]) ;
                    MULT_SUB (x [2], lik, X [3*i + 2]) ;
                }
                X [3*k    ] = x [0] ;
                X [3*k + 1] = x [1] ;
                X [3*k + 2] = x [2] ;
            }
            break ;

        case 4:

            for (k = n-1 ; k >= 0 ; k--)
            {
                x [0] = X [4*k    ] ;
                x [1] = X [4*k + 1] ;
                x [2] = X [4*k + 2] ;
                x [3] = X [4*k + 3] ;
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Li [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (lik, Lx [p]) ;
                    }
                    else
#endif
                    {
                        lik = Lx [p] ;
                    }
                    MULT_SUB (x [0], lik, X [4*i]) ;
                    MULT_SUB (x [1], lik, X [4*i + 1]) ;
                    MULT_SUB (x [2], lik, X [4*i + 2]) ;
                    MULT_SUB (x [3], lik, X [4*i + 3]) ;
                }
                X [4*k    ] = x [0] ;
                X [4*k + 1] = x [1] ;
                X [4*k + 2] = x [2] ;
                X [4*k + 3] = x [3] ;
            }
            break ;
    }
}
예제 #7
0
파일: klu.c 프로젝트: Ascronia/fieldtrip
void KLU_usolve
(
    /* inputs, not modified: */
    Int n,
    Int Uip [ ],
    Int Ulen [ ],
    Unit LU [ ],
    Entry Udiag [ ],
    Int nrhs,
    /* right-hand-side on input, solution to Ux=b on output */
    Entry X [ ]
)
{
    Entry x [4], uik, ukk ;
    Int *Ui ;
    Entry *Ux ;
    Int k, p, len, i ;

    switch (nrhs)
    {

        case 1:

            for (k = n-1 ; k >= 0 ; k--)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                /* x [0] = X [k] / Udiag [k] ; */
                DIV (x [0], X [k], Udiag [k]) ;
                X [k] = x [0] ;
                for (p = 0 ; p < len ; p++)
                {
                    /* X [Ui [p]] -= Ux [p] * x [0] ; */
                    MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;

                }
            }

            break ;

        case 2:

            for (k = n-1 ; k >= 0 ; k--)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                ukk = Udiag [k] ;
                /* x [0] = X [2*k    ] / ukk ;
                x [1] = X [2*k + 1] / ukk ; */
                DIV (x [0], X [2*k], ukk) ;
                DIV (x [1], X [2*k + 1], ukk) ;

                X [2*k    ] = x [0] ;
                X [2*k + 1] = x [1] ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Ui [p] ;
                    uik = Ux [p] ;
                    /* X [2*i    ] -= uik * x [0] ;
                    X [2*i + 1] -= uik * x [1] ; */
                    MULT_SUB (X [2*i], uik, x [0]) ;
                    MULT_SUB (X [2*i + 1], uik, x [1]) ;
                }
            }

            break ;

        case 3:

            for (k = n-1 ; k >= 0 ; k--)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                ukk = Udiag [k] ;

                DIV (x [0], X [3*k], ukk) ;
                DIV (x [1], X [3*k + 1], ukk) ;
                DIV (x [2], X [3*k + 2], ukk) ;

                X [3*k    ] = x [0] ;
                X [3*k + 1] = x [1] ;
                X [3*k + 2] = x [2] ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Ui [p] ;
                    uik = Ux [p] ;
                    MULT_SUB (X [3*i], uik, x [0]) ;
                    MULT_SUB (X [3*i + 1], uik, x [1]) ;
                    MULT_SUB (X [3*i + 2], uik, x [2]) ;
                }
            }

            break ;

        case 4:

            for (k = n-1 ; k >= 0 ; k--)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                ukk = Udiag [k] ;

                DIV (x [0], X [4*k], ukk) ;
                DIV (x [1], X [4*k + 1], ukk) ;
                DIV (x [2], X [4*k + 2], ukk) ;
                DIV (x [3], X [4*k + 3], ukk) ;

                X [4*k    ] = x [0] ;
                X [4*k + 1] = x [1] ;
                X [4*k + 2] = x [2] ;
                X [4*k + 3] = x [3] ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Ui [p] ;
                    uik = Ux [p] ;

                    MULT_SUB (X [4*i], uik, x [0]) ;
                    MULT_SUB (X [4*i + 1], uik, x [1]) ;
                    MULT_SUB (X [4*i + 2], uik, x [2]) ;
                    MULT_SUB (X [4*i + 3], uik, x [3]) ;
                }
            }

            break ;

    }
}
예제 #8
0
파일: klu.c 프로젝트: Ascronia/fieldtrip
void KLU_lsolve
(
    /* inputs, not modified: */
    Int n,
    Int Lip [ ],
    Int Llen [ ],
    Unit LU [ ],
    Int nrhs,
    /* right-hand-side on input, solution to Lx=b on output */
    Entry X [ ]
)
{
    Entry x [4], lik ;
    Int *Li ;
    Entry *Lx ;
    Int k, p, len, i ;

    switch (nrhs)
    {

        case 1:
            for (k = 0 ; k < n ; k++)
            {
                x [0] = X [k] ;
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                /* unit diagonal of L is not stored*/
                for (p = 0 ; p < len ; p++)
                {
                    /* X [Li [p]] -= Lx [p] * x [0] ; */
                    MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
                }
            }
            break ;

        case 2:

            for (k = 0 ; k < n ; k++)
            {
                x [0] = X [2*k    ] ;
                x [1] = X [2*k + 1] ;
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Li [p] ;
                    lik = Lx [p] ;
                    MULT_SUB (X [2*i], lik, x [0]) ;
                    MULT_SUB (X [2*i + 1], lik, x [1]) ;
                }
            }
            break ;

        case 3:

            for (k = 0 ; k < n ; k++)
            {
                x [0] = X [3*k    ] ;
                x [1] = X [3*k + 1] ;
                x [2] = X [3*k + 2] ;
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Li [p] ;
                    lik = Lx [p] ;
                    MULT_SUB (X [3*i], lik, x [0]) ;
                    MULT_SUB (X [3*i + 1], lik, x [1]) ;
                    MULT_SUB (X [3*i + 2], lik, x [2]) ;
                }
            }
            break ;

        case 4:

            for (k = 0 ; k < n ; k++)
            {
                x [0] = X [4*k    ] ;
                x [1] = X [4*k + 1] ;
                x [2] = X [4*k + 2] ;
                x [3] = X [4*k + 3] ;
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Li [p] ;
                    lik = Lx [p] ;
                    MULT_SUB (X [4*i], lik, x [0]) ;
                    MULT_SUB (X [4*i + 1], lik, x [1]) ;
                    MULT_SUB (X [4*i + 2], lik, x [2]) ;
                    MULT_SUB (X [4*i + 3], lik, x [3]) ;
                }
            }
            break ;

    }
}
예제 #9
0
GLOBAL double UMF_usolve
(
    NumericType *Numeric,
    Entry X [ ],		/* b on input, solution x on output */
    Int Pattern [ ]		/* a work array of size n */
)
{
    /* ---------------------------------------------------------------------- */
    /* local variables */
    /* ---------------------------------------------------------------------- */

    Int k, deg, j, *ip, col, *Upos, *Uilen, pos,
	*Uip, n, ulen, up, newUchain, npiv, n1, *Ui ;
    Entry *xp, xk, *D, *Uval ;

    /* ---------------------------------------------------------------------- */
    /* get parameters */
    /* ---------------------------------------------------------------------- */

    if (Numeric->n_row != Numeric->n_col) return (0.) ;
    n = Numeric->n_row ;
    npiv = Numeric->npiv ;
    Upos = Numeric->Upos ;
    Uilen = Numeric->Uilen ;
    Uip = Numeric->Uip ;
    D = Numeric->D ;
    n1 = Numeric->n1 ;

#ifndef NDEBUG
    DEBUG4 (("Usolve start:  npiv = "ID" n = "ID"\n", npiv, n)) ;
    for (j = 0 ; j < n ; j++)
    {
	DEBUG4 (("Usolve start "ID": ", j)) ;
	EDEBUG4 (X [j]) ;
	DEBUG4 (("\n")) ;
    }
#endif

    /* ---------------------------------------------------------------------- */
    /* singular case */
    /* ---------------------------------------------------------------------- */

    /* handle the singular part of D, up to just before the last pivot */
    for (k = n-1 ; k >= npiv ; k--)
    {
	/* This is an *** intentional *** divide-by-zero, to get Inf or Nan,
	 * as appropriate.  It is not a bug. */
	ASSERT (IS_ZERO (D [k])) ;
	xk = X [k] ;
	/* X [k] = xk / D [k] ; */
	DIV (X [k], xk, D [k]) ;
    }

    deg = Numeric->ulen ;
    if (deg > 0)
    {
	/* :: make last pivot row of U (singular matrices only) :: */
	for (j = 0 ; j < deg ; j++)
	{
	    DEBUG1 (("Last row of U: j="ID"\n", j)) ;
	    DEBUG1 (("Last row of U: Upattern[j]="ID"\n",
		Numeric->Upattern [j]) );
	    Pattern [j] = Numeric->Upattern [j] ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* nonsingletons */
    /* ---------------------------------------------------------------------- */

    for (k = npiv-1 ; k >= n1 ; k--)
    {

	/* ------------------------------------------------------------------ */
	/* use row k of U */
	/* ------------------------------------------------------------------ */

	up = Uip [k] ;
	ulen = Uilen [k] ;
	newUchain = (up < 0) ;
	if (newUchain)
	{
	    up = -up ;
	    xp = (Entry *) (Numeric->Memory + up + UNITS (Int, ulen)) ;
	}
	else
	{
	    xp = (Entry *) (Numeric->Memory + up) ;
	}

	xk = X [k] ;
	for (j = 0 ; j < deg ; j++)
	{
	    DEBUG4 (("  k "ID" col "ID" value", k, Pattern [j])) ;
	    EDEBUG4 (*xp) ;
	    DEBUG4 (("\n")) ;
	    /* xk -= X [Pattern [j]] * (*xp) ; */
	    MULT_SUB (xk, X [Pattern [j]], *xp) ;
	    xp++ ;
	}

	/* Go ahead and divide by zero if D [k] is zero */
	/* X [k] = xk / D [k] ; */
	DIV (X [k], xk, D [k]) ;

	/* ------------------------------------------------------------------ */
	/* make row k-1 of U in Pattern [0..deg-1] */
	/* ------------------------------------------------------------------ */

	if (k == n1) break ;

	if (newUchain)
	{
	    /* next row is a new Uchain */
	    deg = ulen ;
	    ASSERT (IMPLIES (k == 0, deg == 0)) ;
	    DEBUG4 (("end of chain for row of U "ID" deg "ID"\n", k-1, deg)) ;
	    ip = (Int *) (Numeric->Memory + up) ;
	    for (j = 0 ; j < deg ; j++)
	    {
		col = *ip++ ;
		DEBUG4 (("  k "ID" col "ID"\n", k-1, col)) ;
		ASSERT (k <= col) ;
		Pattern [j] = col ;
	    }
	}
	else
	{
	    deg -= ulen ;
	    DEBUG4 (("middle of chain for row of U "ID" deg "ID"\n", k, deg)) ;
	    ASSERT (deg >= 0) ;
	    pos = Upos [k] ;
	    if (pos != EMPTY)
	    {
		/* add the pivot column */
		DEBUG4 (("k "ID" add pivot entry at pos "ID"\n", k, pos)) ;
		ASSERT (pos >= 0 && pos <= deg) ;
		Pattern [deg++] = Pattern [pos] ;
		Pattern [pos] = k ;
	    }
	}
    }

    /* ---------------------------------------------------------------------- */
    /* singletons */
    /* ---------------------------------------------------------------------- */

    for (k = n1 - 1 ; k >= 0 ; k--)
    {
	deg = Uilen [k] ;
	xk = X [k] ;
	DEBUG4 (("Singleton k "ID"\n", k)) ;
	if (deg > 0)
	{
	    up = Uip [k] ;
	    Ui = (Int *) (Numeric->Memory + up) ;
	    up += UNITS (Int, deg) ;
	    Uval = (Entry *) (Numeric->Memory + up) ;
	    for (j = 0 ; j < deg ; j++)
	    {
		DEBUG4 (("  k "ID" col "ID" value", k, Ui [j])) ;
		EDEBUG4 (Uval [j]) ;
		DEBUG4 (("\n")) ;
		/* xk -= X [Ui [j]] * Uval [j] ; */
		ASSERT (Ui [j] >= 0 && Ui [j] < n) ;
		MULT_SUB (xk, X [Ui [j]], Uval [j]) ;
	    }
	}
	/* Go ahead and divide by zero if D [k] is zero */
	/* X [k] = xk / D [k] ; */
	DIV (X [k], xk, D [k]) ;
    }

#ifndef NDEBUG
    for (j = 0 ; j < n ; j++)
    {
	DEBUG4 (("Usolve done "ID": ", j)) ;
	EDEBUG4 (X [j]) ;
	DEBUG4 (("\n")) ;
    }
    DEBUG4 (("Usolve done.\n")) ;
#endif

    return (DIV_FLOPS * ((double) n) + MULTSUB_FLOPS * ((double) Numeric->unz));
}
예제 #10
0
GLOBAL double
#ifdef CONJUGATE_SOLVE
UMF_uhsolve			/* solve U'x=b  (complex conjugate transpose) */
#else
UMF_utsolve			/* solve U.'x=b (array transpose) */
#endif
(
    NumericType *Numeric,
    Entry X [ ],		/* b on input, solution x on output */
    Int Pattern [ ]		/* a work array of size n */
)
{
    /* ---------------------------------------------------------------------- */
    /* local variables */
    /* ---------------------------------------------------------------------- */

    Int k, deg, j, *ip, col, *Upos, *Uilen, kstart, kend, up,
	*Uip, n, uhead, ulen, pos, npiv, n1, *Ui ;
    Entry *xp, xk, *D, *Uval ;

    /* ---------------------------------------------------------------------- */
    /* get parameters */
    /* ---------------------------------------------------------------------- */

    if (Numeric->n_row != Numeric->n_col) return (0.) ;
    n = Numeric->n_row ;
    npiv = Numeric->npiv ;
    Upos = Numeric->Upos ;
    Uilen = Numeric->Uilen ;
    Uip = Numeric->Uip ;
    D = Numeric->D ;
    kend = 0 ;
    n1 = Numeric->n1 ;

#ifndef NDEBUG
    DEBUG4 (("Utsolve start: npiv "ID" n "ID"\n", npiv, n)) ;
    for (j = 0 ; j < n ; j++)
    {
	DEBUG4 (("Utsolve start "ID": ", j)) ;
	EDEBUG4 (X [j]) ;
	DEBUG4 (("\n")) ;
    }
#endif

    /* ---------------------------------------------------------------------- */
    /* singletons */
    /* ---------------------------------------------------------------------- */

    for (k = 0 ; k < n1 ; k++)
    {
	DEBUG4 (("Singleton k "ID"\n", k)) ;
	/* Go ahead and divide by zero if D [k] is zero. */
#ifdef CONJUGATE_SOLVE
	/* xk = X [k] / conjugate (D [k]) ; */
	DIV_CONJ (xk, X [k], D [k]) ;
#else
	/* xk = X [k] / D [k] ; */
	DIV (xk, X [k], D [k]) ;
#endif
	X [k] = xk ;
	deg = Uilen [k] ;
	if (deg > 0 && IS_NONZERO (xk))
	{
	    up = Uip [k] ;
	    Ui = (Int *) (Numeric->Memory + up) ;
	    up += UNITS (Int, deg) ;
	    Uval = (Entry *) (Numeric->Memory + up) ;
	    for (j = 0 ; j < deg ; j++)
	    {
		DEBUG4 (("  k "ID" col "ID" value", k, Ui [j])) ;
		EDEBUG4 (Uval [j]) ;
		DEBUG4 (("\n")) ;
#ifdef CONJUGATE_SOLVE
		/* X [Ui [j]] -= xk * conjugate (Uval [j]) ; */
		MULT_SUB_CONJ (X [Ui [j]], xk, Uval [j]) ;
#else
		/* X [Ui [j]] -= xk * Uval [j] ; */
		MULT_SUB (X [Ui [j]], xk, Uval [j]) ;
#endif
	    }
	}
    }

    /* ---------------------------------------------------------------------- */
    /* nonsingletons */
    /* ---------------------------------------------------------------------- */

    for (kstart = n1 ; kstart < npiv ; kstart = kend + 1)
    {

	/* ------------------------------------------------------------------ */
	/* find the end of this Uchain */
	/* ------------------------------------------------------------------ */

	DEBUG4 (("kstart "ID" kend "ID"\n", kstart, kend)) ;
	/* for (kend = kstart ; kend < npiv && Uip [kend+1] > 0 ; kend++) ; */
	kend = kstart ;
	while (kend < npiv && Uip [kend+1] > 0)
	{
	    kend++ ;
	}

	/* ------------------------------------------------------------------ */
	/* scan the whole Uchain to find the pattern of the first row of U */
	/* ------------------------------------------------------------------ */

	k = kend+1 ;
	DEBUG4 (("\nKend "ID" K "ID"\n", kend, k)) ;

	/* ------------------------------------------------------------------ */
	/* start with last row in Uchain of U in Pattern [0..deg-1] */
	/* ------------------------------------------------------------------ */

	if (k == npiv)
	{
	    deg = Numeric->ulen ;
	    if (deg > 0)
	    {
		/* :: make last pivot row of U (singular matrices only) :: */
		for (j = 0 ; j < deg ; j++)
		{
		    Pattern [j] = Numeric->Upattern [j] ;
		}
	    }
	}
	else
	{
	    ASSERT (k >= 0 && k < npiv) ;
	    up = -Uip [k] ;
	    ASSERT (up > 0) ;
	    deg = Uilen [k] ;
	    DEBUG4 (("end of chain for row of U "ID" deg "ID"\n", k-1, deg)) ;
	    ip = (Int *) (Numeric->Memory + up) ;
	    for (j = 0 ; j < deg ; j++)
	    {
		col = *ip++ ;
		DEBUG4 (("  k "ID" col "ID"\n", k-1, col)) ;
		ASSERT (k <= col) ;
		Pattern [j] = col ;
	    }
	}

	/* empty the stack at the bottom of Pattern */
	uhead = n ;

	for (k = kend ; k > kstart ; k--)
	{
	    /* Pattern [0..deg-1] is the pattern of row k of U */

	    /* -------------------------------------------------------------- */
	    /* make row k-1 of U in Pattern [0..deg-1] */
	    /* -------------------------------------------------------------- */

	    ASSERT (k >= 0 && k < npiv) ;
	    ulen = Uilen [k] ;
	    /* delete, and push on the stack */
	    for (j = 0 ; j < ulen ; j++)
	    {
		ASSERT (uhead >= deg) ;
		Pattern [--uhead] = Pattern [--deg] ;
	    }
	    DEBUG4 (("middle of chain for row of U "ID" deg "ID"\n", k, deg)) ;
	    ASSERT (deg >= 0) ;

	    pos = Upos [k] ;
	    if (pos != EMPTY)
	    {
		/* add the pivot column */
		DEBUG4 (("k "ID" add pivot entry at position "ID"\n", k, pos)) ;
		ASSERT (pos >= 0 && pos <= deg) ;
		Pattern [deg++] = Pattern [pos] ;
		Pattern [pos] = k ;
	    }
	}

	/* Pattern [0..deg-1] is now the pattern of the first row in Uchain */

	/* ------------------------------------------------------------------ */
	/* solve using this Uchain, in reverse order */
	/* ------------------------------------------------------------------ */

	DEBUG4 (("Unwinding Uchain\n")) ;
	for (k = kstart ; k <= kend ; k++)
	{

	    /* -------------------------------------------------------------- */
	    /* construct row k */
	    /* -------------------------------------------------------------- */

	    ASSERT (k >= 0 && k < npiv) ;
	    pos = Upos [k] ;
	    if (pos != EMPTY)
	    {
		/* remove the pivot column */
		DEBUG4 (("k "ID" add pivot entry at position "ID"\n", k, pos)) ;
		ASSERT (k > kstart) ;
		ASSERT (pos >= 0 && pos < deg) ;
		ASSERT (Pattern [pos] == k) ;
		Pattern [pos] = Pattern [--deg] ;
	    }

	    up = Uip [k] ;
	    ulen = Uilen [k] ;
	    if (k > kstart)
	    {
		/* concatenate the deleted pattern; pop from the stack */
		for (j = 0 ; j < ulen ; j++)
		{
		    ASSERT (deg <= uhead && uhead < n) ;
		    Pattern [deg++] = Pattern [uhead++] ;
		}
		DEBUG4 (("middle of chain, row of U "ID" deg "ID"\n", k, deg)) ;
		ASSERT (deg >= 0) ;
	    }

	    /* -------------------------------------------------------------- */
	    /* use row k of U */
	    /* -------------------------------------------------------------- */

	    /* Go ahead and divide by zero if D [k] is zero. */
#ifdef CONJUGATE_SOLVE
	    /* xk = X [k] / conjugate (D [k]) ; */
	    DIV_CONJ (xk, X [k], D [k]) ;
#else
	    /* xk = X [k] / D [k] ; */
	    DIV (xk, X [k], D [k]) ;
#endif
	    X [k] = xk ;
	    if (IS_NONZERO (xk))
	    {
		if (k == kstart)
		{
		    up = -up ;
		    xp = (Entry *) (Numeric->Memory + up + UNITS (Int, ulen)) ;
		}
		else
		{
		    xp = (Entry *) (Numeric->Memory + up) ;
		}
		for (j = 0 ; j < deg ; j++)
		{
		    DEBUG4 (("  k "ID" col "ID" value", k, Pattern [j])) ;
		    EDEBUG4 (*xp) ;
		    DEBUG4 (("\n")) ;
#ifdef CONJUGATE_SOLVE
		    /* X [Pattern [j]] -= xk * conjugate (*xp) ; */
		    MULT_SUB_CONJ (X [Pattern [j]], xk, *xp) ;
#else
		    /* X [Pattern [j]] -= xk * (*xp) ; */
		    MULT_SUB (X [Pattern [j]], xk, *xp) ;
#endif
		    xp++ ;
		}
	    }
	}
	ASSERT (uhead == n) ;
    }

    for (k = npiv ; k < n ; k++)
    {
	/* This is an *** intentional *** divide-by-zero, to get Inf or Nan,
	 * as appropriate.  It is not a bug. */
	ASSERT (IS_ZERO (D [k])) ;
	/* For conjugate solve, D [k] == conjugate (D [k]), in this case */
	/* xk = X [k] / D [k] ; */
	DIV (xk, X [k], D [k]) ;
	X [k] = xk ;
    }

#ifndef NDEBUG
    for (j = 0 ; j < n ; j++)
    {
	DEBUG4 (("Utsolve done "ID": ", j)) ;
	EDEBUG4 (X [j]) ;
	DEBUG4 (("\n")) ;
    }
    DEBUG4 (("Utsolve done.\n")) ;
#endif

    return (DIV_FLOPS * ((double) n) + MULTSUB_FLOPS * ((double) Numeric->unz));
}
예제 #11
0
GLOBAL double UMF_lsolve
(
    NumericType *Numeric,
    Entry X [ ],		/* b on input, solution x on output */
    Int Pattern [ ]		/* a work array of size n */
)
{
    Entry xk ;
    Entry *xp, *Lval ;
    Int k, deg, *ip, j, row, *Lpos, *Lilen, *Lip, llen, lp, newLchain,
	pos, npiv, n1, *Li ;

    /* ---------------------------------------------------------------------- */

    if (Numeric->n_row != Numeric->n_col) return (0.) ;
    npiv = Numeric->npiv ;
    Lpos = Numeric->Lpos ;
    Lilen = Numeric->Lilen ;
    Lip = Numeric->Lip ;
    n1 = Numeric->n1 ;

#ifndef NDEBUG
    DEBUG4 (("Lsolve start:\n")) ;
    for (j = 0 ; j < Numeric->n_row ; j++)
    {
	DEBUG4 (("Lsolve start "ID": ", j)) ;
	EDEBUG4 (X [j]) ;
	DEBUG4 (("\n")) ;
    }
#endif

    /* ---------------------------------------------------------------------- */
    /* singletons */
    /* ---------------------------------------------------------------------- */

    for (k = 0 ; k < n1 ; k++)
    {
	DEBUG4 (("Singleton k "ID"\n", k)) ;
	xk = X [k] ;
	deg = Lilen [k] ;
	if (deg > 0 && IS_NONZERO (xk))
	{
	    lp = Lip [k] ;
	    Li = (Int *) (Numeric->Memory + lp) ;
	    lp += UNITS (Int, deg) ;
	    Lval = (Entry *) (Numeric->Memory + lp) ;
	    for (j = 0 ; j < deg ; j++)
	    {
		DEBUG4 (("  row "ID"  k "ID" value", Li [j], k)) ;
		EDEBUG4 (Lval [j]) ;
		DEBUG4 (("\n")) ;
		/* X [Li [j]] -= xk * Lval [j] ; */
		MULT_SUB (X [Li [j]], xk, Lval [j]) ;
	    }
	}
    }

    /* ---------------------------------------------------------------------- */
    /* rest of L */
    /* ---------------------------------------------------------------------- */

    deg = 0 ;

    for (k = n1 ; k < npiv ; k++)
    {

	/* ------------------------------------------------------------------ */
	/* make column of L in Pattern [0..deg-1] */
	/* ------------------------------------------------------------------ */

	lp = Lip [k] ;
	newLchain = (lp < 0) ;
	if (newLchain)
	{
	    lp = -lp ;
	    deg = 0 ;
	    DEBUG4 (("start of chain for column of L\n")) ;
	}

	/* remove pivot row */
	pos = Lpos [k] ;
	if (pos != EMPTY)
	{
	    DEBUG4 (("  k "ID" removing row "ID" at position "ID"\n",
	    k, Pattern [pos], pos)) ;
	    ASSERT (!newLchain) ;
	    ASSERT (deg > 0) ;
	    ASSERT (pos >= 0 && pos < deg) ;
	    ASSERT (Pattern [pos] == k) ;
	    Pattern [pos] = Pattern [--deg] ;
	}

	/* concatenate the pattern */
	ip = (Int *) (Numeric->Memory + lp) ;
	llen = Lilen [k] ;
	for (j = 0 ; j < llen ; j++)
	{
	    row = *ip++ ;
	    DEBUG4 (("  row "ID"  k "ID"\n", row, k)) ;
	    ASSERT (row > k) ;
	    Pattern [deg++] = row ;
	}

	/* ------------------------------------------------------------------ */
	/* use column k of L */
	/* ------------------------------------------------------------------ */

	xk = X [k] ;
	if (IS_NONZERO (xk))
	{
	    xp = (Entry *) (Numeric->Memory + lp + UNITS (Int, llen)) ;
	    for (j = 0 ; j < deg ; j++)
	    {
		DEBUG4 (("  row "ID"  k "ID" value", Pattern [j], k)) ;
		EDEBUG4 (*xp) ;
		DEBUG4 (("\n")) ;
		/* X [Pattern [j]] -= xk * (*xp) ; */
		MULT_SUB (X [Pattern [j]], xk, *xp) ;
		xp++ ;
	    }
	}
    }

#ifndef NDEBUG
    for (j = 0 ; j < Numeric->n_row ; j++)
    {
	DEBUG4 (("Lsolve done "ID": ", j)) ;
	EDEBUG4 (X [j]) ;
	DEBUG4 (("\n")) ;
    }
    DEBUG4 (("Lsolve done.\n")) ;
#endif

    return (MULTSUB_FLOPS * ((double) Numeric->lnz)) ;
}
예제 #12
0
GLOBAL Int UMF_solve
(
    Int sys,
    const Int Ap [ ],
    const Int Ai [ ],
    const double Ax [ ],
    double Xx [ ],
    const double Bx [ ],
#ifdef COMPLEX
    const double Az [ ],
    double Xz [ ],
    const double Bz [ ],
#endif
    NumericType *Numeric,
    Int irstep,
    double Info [UMFPACK_INFO],
    Int Pattern [ ],		/* size n */
    double SolveWork [ ]	/* if irstep>0 real:  size 5*n.  complex:10*n */
				/* otherwise   real:  size   n.  complex: 4*n */
)
{
    /* ---------------------------------------------------------------------- */
    /* local variables */
    /* ---------------------------------------------------------------------- */

    Entry axx, wi, xj, zi, xi, aij, bi ;
    double omega [3], d, z2i, yi, flops ;
    Entry *W, *Z, *S, *X ;
    double *Z2, *Y, *B2, *Rs ;
    Int *Rperm, *Cperm, i, n, p, step, j, nz, status, p2, do_scale ;
#ifdef COMPLEX
    Int AXsplit ;
    Int Bsplit ;
#endif
#ifndef NRECIPROCAL
    Int do_recip = Numeric->do_recip ;
#endif

    /* ---------------------------------------------------------------------- */
    /* initializations */
    /* ---------------------------------------------------------------------- */

#ifndef NDEBUG
    UMF_dump_lu (Numeric) ;
    ASSERT (Numeric && Xx && Bx && Pattern && SolveWork && Info) ;
#endif

    nz = 0 ;
    omega [0] = 0. ;
    omega [1] = 0. ;
    omega [2] = 0. ;
    Rperm = Numeric->Rperm ;
    Cperm = Numeric->Cperm ;
    Rs = Numeric->Rs ;		/* row scale factors */
    do_scale = (Rs != (double *) NULL) ;
    flops = 0 ;
    Info [UMFPACK_SOLVE_FLOPS] = 0 ;
    Info [UMFPACK_IR_TAKEN] = 0 ;
    Info [UMFPACK_IR_ATTEMPTED] = 0 ;

    /* UMFPACK_solve does not call this routine if A is rectangular */
    ASSERT (Numeric->n_row == Numeric->n_col) ;
    n = Numeric->n_row ;
    if (Numeric->nnzpiv < n
	|| SCALAR_IS_ZERO (Numeric->rcond) || SCALAR_IS_NAN (Numeric->rcond))
    {
	/* Note that systems involving just L return UMFPACK_OK, even if */
	/* A is singular (L is always has a unit diagonal). */
	DEBUGm4 (("Note, matrix is singular in umf_solve\n")) ;
	status = UMFPACK_WARNING_singular_matrix ;
	irstep = 0 ;
    }
    else
    {
	status = UMFPACK_OK ;
    }
    irstep = MAX (0, irstep) ;			/* make sure irstep is >= 0 */

    W = (Entry *) SolveWork ;			/* Entry W [0..n-1] */

    Z = (Entry *) NULL ;	/* unused if no iterative refinement */
    S = (Entry *) NULL ;
    Y = (double *) NULL ;
    Z2 = (double *) NULL ;
    B2 = (double *) NULL ;

#ifdef COMPLEX
    if (irstep > 0)
    {
	if (!Ap || !Ai || !Ax)
	{
	    return (UMFPACK_ERROR_argument_missing) ;
	}
	/* A, B, and X in split format if Az, Bz, and Xz present */
	AXsplit = SPLIT (Az) || SPLIT(Xz);
	Z = (Entry *) (SolveWork + 4*n) ;	/* Entry Z [0..n-1] */
	S = (Entry *) (SolveWork + 6*n) ;	/* Entry S [0..n-1] */
	Y = (double *) (SolveWork + 8*n) ;	/* double Y [0..n-1] */
	B2 = (double *) (SolveWork + 9*n) ;	/* double B2 [0..n-1] */
	Z2 = (double *) Z ;		/* double Z2 [0..n-1], equiv. to Z */
    }
    else
    {
      /* A is ignored, only  look at X for split/packed cases */
      AXsplit = SPLIT(Xz);
    }
    Bsplit = SPLIT (Bz);

    if (AXsplit)
    {
	X = (Entry *) (SolveWork + 2*n) ;	/* Entry X [0..n-1] */
    }
    else
    {
	X = (Entry *) Xx ;			/* Entry X [0..n-1] */
    }
#else
    X = (Entry *) Xx ;				/* Entry X [0..n-1] */
    if (irstep > 0)
    {
	if (!Ap || !Ai || !Ax)
	{
	    return (UMFPACK_ERROR_argument_missing) ;
	}
	Z = (Entry *) (SolveWork + n) ;		/* Entry Z [0..n-1] */
	S = (Entry *) (SolveWork + 2*n) ;	/* Entry S [0..n-1] */
	Y = (double *) (SolveWork + 3*n) ;	/* double Y [0..n-1] */
	B2 = (double *) (SolveWork + 4*n) ;	/* double B2 [0..n-1] */
	Z2 = (double *) Z ;		/* double Z2 [0..n-1], equiv. to Z */
    }
#endif

    /* ---------------------------------------------------------------------- */
    /* determine which system to solve */
    /* ---------------------------------------------------------------------- */

    if (sys == UMFPACK_A)
    {

	/* ------------------------------------------------------------------ */
	/* solve A x = b with optional iterative refinement */
	/* ------------------------------------------------------------------ */

	if (irstep > 0)
	{

	    /* -------------------------------------------------------------- */
	    /* using iterative refinement:  compute Y and B2 */
	    /* -------------------------------------------------------------- */

	    nz = Ap [n] ;
	    Info [UMFPACK_NZ] = nz ;

	    /* A is stored by column */
	    /* Y (i) = ||R A_i||, 1-norm of row i of R A */
	    for (i = 0 ; i < n ; i++)
	    {
		Y [i] = 0. ;
	    }
	    flops += (ABS_FLOPS + 1) * nz ;
	    p2 = Ap [n] ;
	    for (p = 0 ; p < p2 ; p++)
	    {
		/* Y [Ai [p]] += ABS (Ax [p]) ; */
	        ASSIGN (aij, Ax, Az, p, AXsplit) ;
		ABS (d, aij) ;
		Y [Ai [p]] += d ;
	    }

	    /* B2 = abs (B) */
	    flops += ABS_FLOPS * n ;
	    for (i = 0 ; i < n ; i++)
	    {
		/* B2 [i] = ABS (B [i]) ; */
		ASSIGN (bi, Bx, Bz, i, Bsplit) ;
		ABS (B2 [i], bi) ;
	    }

	    /* scale Y and B2. */
	    if (do_scale)
	    {
		/* Y = R Y */
		/* B2 = R B2 */
#ifndef NRECIPROCAL
		if (do_recip)
		{
		    /* multiply by the scale factors */
		    for (i = 0 ; i < n ; i++)
		    {
			Y [i]  *= Rs [i] ;
			B2 [i] *= Rs [i] ;
		    }
		}
		else
#endif
		{
		    /* divide by the scale factors */
		    for (i = 0 ; i < n ; i++)
		    {
			Y [i]  /= Rs [i] ;
			B2 [i] /= Rs [i] ;
		    }
		}

		flops += 2 * n ;
	    }

	}

	for (step = 0 ; step <= irstep ; step++)
	{

	    /* -------------------------------------------------------------- */
	    /* Solve A x = b (step 0): */
	    /*  x = Q (U \ (L \ (P R b))) */
	    /* and then perform iterative refinement (step > 0): */
	    /*  x = x + Q (U \ (L \ (P R (b - A x)))) */
	    /* -------------------------------------------------------------- */

	    if (step == 0)
	    {
		if (do_scale)
		{
		    /* W = P R b, using X as workspace, since Z is not
		     * allocated if irstep = 0. */
#ifndef NRECIPROCAL
		    if (do_recip)
		    {
			/* multiply by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    ASSIGN (X [i], Bx, Bz, i, Bsplit) ;
			    SCALE (X [i], Rs [i]) ;
			}
		    }
		    else
#endif
		    {
			/* divide by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    ASSIGN (X [i], Bx, Bz, i, Bsplit) ;
			    SCALE_DIV (X [i], Rs [i]) ;
			}
		    }
		    flops += SCALE_FLOPS * n ;
		    for (i = 0 ; i < n ; i++)
		    {
			W [i] = X [Rperm [i]] ;
		    }
		}
		else
		{
		    /* W = P b, since the row scaling R = I */
		    for (i = 0 ; i < n ; i++)
		    {
			/* W [i] = B [Rperm [i]] ; */
			ASSIGN (W [i], Bx, Bz, Rperm [i], Bsplit) ;
		    }
		}
	    }
	    else
	    {
		for (i = 0 ; i < n ; i++)
		{
		    /* Z [i] = B [i] ; */
		    ASSIGN (Z [i], Bx, Bz, i, Bsplit) ;
		}
		flops += MULTSUB_FLOPS * nz ;
		for (i = 0 ; i < n ; i++)
		{
		    xi = X [i] ;
		    p2 = Ap [i+1] ;
		    for (p = Ap [i] ; p < p2 ; p++)
		    {
			/* Z [Ai [p]] -= Ax [p] * xi ; */
			ASSIGN (aij, Ax, Az, p, AXsplit) ;
			MULT_SUB (Z [Ai [p]], aij, xi) ;
		    }
		}
		/* scale, Z = R Z */
		if (do_scale)
		{
#ifndef NRECIPROCAL
		    if (do_recip)
		    {
			/* multiply by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE (Z [i], Rs [i]) ;
			}
		    }
		    else
#endif
		    {
			/* divide by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE_DIV (Z [i], Rs [i]) ;
			}
		    }
		    flops += SCALE_FLOPS * n ;
		}
		for (i = 0 ; i < n ; i++)
		{
		    W [i] = Z [Rperm [i]] ;
		}
	    }

	    flops += UMF_lsolve (Numeric, W, Pattern) ;
	    flops += UMF_usolve (Numeric, W, Pattern) ;

	    if (step == 0)
	    {
		for (i = 0 ; i < n ; i++)
		{
		    X [Cperm [i]] = W [i] ;
		}
	    }
	    else
	    {
		flops += ASSEMBLE_FLOPS * n ;
		for (i = 0 ; i < n ; i++)
		{
		    /* X [Cperm [i]] += W [i] ; */
		    ASSEMBLE (X [Cperm [i]], W [i]) ;
		}
	    }

	    /* -------------------------------------------------------------- */
	    /* sparse backward error estimate */
	    /* -------------------------------------------------------------- */

	    if (irstep > 0)
	    {

		/* ---------------------------------------------------------- */
		/* A is stored by column */
		/* W (i) = R (b - A x)_i, residual */
		/* Z2 (i) = R (|A||x|)_i */
		/* ---------------------------------------------------------- */

		for (i = 0 ; i < n ; i++)
		{
		    /* W [i] = B [i] ; */
		    ASSIGN (W [i], Bx, Bz, i, Bsplit) ;
		    Z2 [i] = 0. ;
		}
		flops += (MULT_FLOPS + DECREMENT_FLOPS + ABS_FLOPS + 1) * nz ;
		for (j = 0 ; j < n ; j++)
		{
		    xj = X [j] ;
		    p2 = Ap [j+1] ;
		    for (p = Ap [j] ; p < p2 ; p++)
		    {
			i = Ai [p] ;

			/* axx = Ax [p] * xj ; */
			ASSIGN (aij, Ax, Az, p, AXsplit) ;
			MULT (axx, aij, xj) ;

			/* W [i] -= axx ; */
			DECREMENT (W [i], axx) ;

			/* Z2 [i] += ABS (axx) ; */
			ABS (d, axx) ;
			Z2 [i] += d ;
		    }
		}

		/* scale W and Z2 */
		if (do_scale)
		{
		    /* Z2 = R Z2 */
		    /* W = R W */
#ifndef NRECIPROCAL
		    if (do_recip)
		    {
			/* multiply by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE (W [i], Rs [i]) ;
			    Z2 [i] *= Rs [i] ;
			}
		    }
		    else
#endif
		    {
			/* divide by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE_DIV (W [i], Rs [i]) ;
			    Z2 [i] /= Rs [i] ;
			}
		    }
		    flops += (SCALE_FLOPS + 1) * n ;
		}

		flops += (2*ABS_FLOPS + 5) * n ;
		if (do_step (omega, step, B2, X, W, Y, Z2, S, n, Info))
		{
		    /* iterative refinement is done */
		    break ;
		}

	    }

	}

    }
    else if (sys == UMFPACK_At)
    {

	/* ------------------------------------------------------------------ */
	/* solve A' x = b with optional iterative refinement */
	/* ------------------------------------------------------------------ */

	/* A' is the complex conjugate transpose */

	if (irstep > 0)
	{

	    /* -------------------------------------------------------------- */
	    /* using iterative refinement:  compute Y */
	    /* -------------------------------------------------------------- */

	    nz = Ap [n] ;
	    Info [UMFPACK_NZ] = nz ;

	    /* A' is stored by row */
	    /* Y (i) = ||(A' R)_i||, 1-norm of row i of A' R */

	    if (do_scale)
	    {
		flops += (ABS_FLOPS + 2) * nz ;
#ifndef NRECIPROCAL
		if (do_recip)
		{
		    /* multiply by the scale factors */
		    for (i = 0 ; i < n ; i++)
		    {
			yi = 0. ;
			p2 = Ap [i+1] ;
			for (p = Ap [i] ; p < p2 ; p++)
			{
			    /* yi += ABS (Ax [p]) * Rs [Ai [p]] ; */
			    /* note that abs (aij) is the same as
			     * abs (conj (aij)) */
			    ASSIGN (aij, Ax, Az, p, AXsplit) ;
			    ABS (d, aij) ;
			    yi += (d * Rs [Ai [p]]) ;
			}
			Y [i] = yi ;
		    }
		}
		else
#endif
		{
		    /* divide by the scale factors */
		    for (i = 0 ; i < n ; i++)
		    {
			yi = 0. ;
			p2 = Ap [i+1] ;
			for (p = Ap [i] ; p < p2 ; p++)
			{
			    /* yi += ABS (Ax [p]) / Rs [Ai [p]] ; */
			    /* note that abs (aij) is the same as
			     * abs (conj (aij)) */
			    ASSIGN (aij, Ax, Az, p, AXsplit) ;
			    ABS (d, aij) ;
			    yi += (d / Rs [Ai [p]]) ;
			}
			Y [i] = yi ;
		    }
		}
	    }
	    else
	    {
		/* no scaling */
		flops += (ABS_FLOPS + 1) * nz ;
		for (i = 0 ; i < n ; i++)
		{
		    yi = 0. ;
		    p2 = Ap [i+1] ;
		    for (p = Ap [i] ; p < p2 ; p++)
		    {
			/* yi += ABS (Ax [p]) ; */
			/* note that abs (aij) is the same as
			 * abs (conj (aij)) */
			ASSIGN (aij, Ax, Az, p, AXsplit) ;
			ABS (d, aij) ;
			yi += d ;
		    }
		    Y [i] = yi ;
		}
	    }

	    /* B2 = abs (B) */
	    for (i = 0 ; i < n ; i++)
	    {
		/* B2 [i] = ABS (B [i]) ; */
		ASSIGN (bi, Bx, Bz, i, Bsplit) ;
		ABS (B2 [i], bi) ;
	    }

	}

	for (step = 0 ; step <= irstep ; step++)
	{

	    /* -------------------------------------------------------------- */
	    /* Solve A' x = b (step 0): */
	    /*	x = R P' (L' \ (U' \ (Q' b))) */
	    /* and then perform iterative refinement (step > 0): */
	    /*	x = x + R P' (L' \ (U' \ (Q' (b - A' x)))) */
	    /* -------------------------------------------------------------- */

	    if (step == 0)
	    {
		/* W = Q' b */
		for (i = 0 ; i < n ; i++)
		{
		    /* W [i] = B [Cperm [i]] ; */
		    ASSIGN (W [i], Bx, Bz, Cperm [i], Bsplit) ;
		}
	    }
	    else
	    {
		/* Z = b - A' x */
		for (i = 0 ; i < n ; i++)
		{
		    /* Z [i] = B [i] ; */
		    ASSIGN (Z [i], Bx, Bz, i, Bsplit) ;
		}
		flops += MULTSUB_FLOPS * nz ;
		for (i = 0 ; i < n ; i++)
		{
		    zi = Z [i] ;
		    p2 = Ap [i+1] ;
		    for (p = Ap [i] ; p < p2 ; p++)
		    {
			/* zi -= conjugate (Ax [p]) * X [Ai [p]] ; */
			ASSIGN (aij, Ax, Az, p, Bsplit) ;
			MULT_SUB_CONJ (zi, X [Ai [p]], aij) ;
		    }
		    Z [i] = zi ;
		}
		/* W = Q' Z */
		for (i = 0 ; i < n ; i++)
		{
		    W [i] = Z [Cperm [i]] ;
		}
	    }

	    flops += UMF_uhsolve (Numeric, W, Pattern) ;
	    flops += UMF_lhsolve (Numeric, W, Pattern) ;

	    if (step == 0)
	    {

		/* X = R P' W */
		/* do not use Z, since it isn't allocated if irstep = 0 */

		/* X = P' W */
		for (i = 0 ; i < n ; i++)
		{
		    X [Rperm [i]] = W [i] ;
		}
		if (do_scale)
		{
		    /* X = R X */
#ifndef NRECIPROCAL
		    if (do_recip)
		    {
			/* multiply by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE (X [i], Rs [i]) ;
			}
		    }
		    else
#endif
		    {
			/* divide by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE_DIV (X [i], Rs [i]) ;
			}
		    }
		    flops += SCALE_FLOPS * n ;
		}

	    }
	    else
	    {

		/* Z = P' W */
		for (i = 0 ; i < n ; i++)
		{
		    Z [Rperm [i]] = W [i] ;
		}
		if (do_scale)
		{
		    /* Z = R Z */
#ifndef NRECIPROCAL
		    if (do_recip)
		    {
			/* multiply by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE (Z [i], Rs [i]) ;
			}
		    }
		    else
#endif
		    {
			/* divide by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE_DIV (Z [i], Rs [i]) ;
			}
		    }
		    flops += SCALE_FLOPS * n ;
		}

		flops += ASSEMBLE_FLOPS * n ;
		/* X += Z */
		for (i = 0 ; i < n ; i++)
		{
		    /* X [i] += Z [i] ; was +=W[i] in v4.3, which is wrong */
		    ASSEMBLE (X [i], Z [i]) ;	/* bug fix, v4.3.1 */
		}
	    }

	    /* -------------------------------------------------------------- */
	    /* sparse backward error estimate */
	    /* -------------------------------------------------------------- */

	    if (irstep > 0)
	    {

		/* ---------------------------------------------------------- */
		/* A' is stored by row */
		/* W (i) = (b - A' x)_i, residual */
		/* Z2 (i) = (|A'||x|)_i */
		/* ---------------------------------------------------------- */

		flops += (MULT_FLOPS + DECREMENT_FLOPS + ABS_FLOPS + 1) * nz ;
		for (i = 0 ; i < n ; i++)
		{
		    /* wi = B [i] ; */
		    ASSIGN (wi, Bx, Bz, i, Bsplit) ;
		    z2i = 0. ;
		    p2 = Ap [i+1] ;
		    for (p = Ap [i] ; p < p2 ; p++)
		    {
			/* axx = conjugate (Ax [p]) * X [Ai [p]] ; */
			ASSIGN (aij, Ax, Az, p, AXsplit) ;
			MULT_CONJ (axx, X [Ai [p]], aij) ;

			/* wi -= axx ; */
			DECREMENT (wi, axx) ;

			/* z2i += ABS (axx) ; */
			ABS (d, axx) ;
			z2i += d ;
		    }
		    W [i] = wi ;
		    Z2 [i] = z2i ;
		}

		flops += (2*ABS_FLOPS + 5) * n ;
		if (do_step (omega, step, B2, X, W, Y, Z2, S, n, Info))
		{
		    /* iterative refinement is done */
		    break ;
		}

	    }

	}

    }
    else if (sys == UMFPACK_Aat)
    {

	/* ------------------------------------------------------------------ */
	/* solve A.' x = b with optional iterative refinement */
	/* ------------------------------------------------------------------ */

	/* A' is the array transpose */

	if (irstep > 0)
	{

	    /* -------------------------------------------------------------- */
	    /* using iterative refinement:  compute Y */
	    /* -------------------------------------------------------------- */

	    nz = Ap [n] ;
	    Info [UMFPACK_NZ] = nz ;

	    /* A.' is stored by row */
	    /* Y (i) = ||(A.' R)_i||, 1-norm of row i of A.' R */

	    if (do_scale)
	    {
		flops += (ABS_FLOPS + 2) * nz ;
#ifndef NRECIPROCAL
		if (do_recip)
		{
		    /* multiply by the scale factors */
		    for (i = 0 ; i < n ; i++)
		    {
			yi = 0. ;
			p2 = Ap [i+1] ;
			for (p = Ap [i] ; p < p2 ; p++)
			{
			    /* yi += ABS (Ax [p]) * Rs [Ai [p]] ; */
			    /* note that A.' is the array transpose,
			     * so no conjugate */
			    ASSIGN (aij, Ax, Az, p, AXsplit) ;
			    ABS (d, aij) ;
			    yi += (d * Rs [Ai [p]]) ;
			}
			Y [i] = yi ;
		    }
		}
		else
#endif
		{
		    /* divide by the scale factors */
		    for (i = 0 ; i < n ; i++)
		    {
			yi = 0. ;
			p2 = Ap [i+1] ;
			for (p = Ap [i] ; p < p2 ; p++)
			{
			    /* yi += ABS (Ax [p]) / Rs [Ai [p]] ; */
			    /* note that A.' is the array transpose,
			     * so no conjugate */
			    ASSIGN (aij, Ax, Az, p, AXsplit) ;
			    ABS (d, aij) ;
			    yi += (d / Rs [Ai [p]]) ;
			}
			Y [i] = yi ;
		    }
		}
	    }
	    else
	    {
		/* no scaling */
		flops += (ABS_FLOPS + 1) * nz ;
		for (i = 0 ; i < n ; i++)
		{
		    yi = 0. ;
		    p2 = Ap [i+1] ;
		    for (p = Ap [i] ; p < p2 ; p++)
		    {
			/* yi += ABS (Ax [p]) */
			/* note that A.' is the array transpose,
			 * so no conjugate */
			ASSIGN (aij, Ax, Az, p, AXsplit) ;
			ABS (d, aij) ;
			yi += d ;
		    }
		    Y [i] = yi ;
		}
	    }

	    /* B2 = abs (B) */
	    for (i = 0 ; i < n ; i++)
	    {
		/* B2 [i] = ABS (B [i]) ; */
		ASSIGN (bi, Bx, Bz, i, Bsplit) ;
		ABS (B2 [i], bi) ;
	    }

	}

	for (step = 0 ; step <= irstep ; step++)
	{

	    /* -------------------------------------------------------------- */
	    /* Solve A.' x = b (step 0): */
	    /*	x = R P' (L.' \ (U.' \ (Q' b))) */
	    /* and then perform iterative refinement (step > 0): */
	    /*	x = x + R P' (L.' \ (U.' \ (Q' (b - A.' x)))) */
	    /* -------------------------------------------------------------- */

	    if (step == 0)
	    {
		/* W = Q' b */
		for (i = 0 ; i < n ; i++)
		{
		    /* W [i] = B [Cperm [i]] ; */
		    ASSIGN (W [i], Bx, Bz, Cperm [i], Bsplit) ;
		}
	    }
	    else
	    {
		/* Z = b - A.' x */
		for (i = 0 ; i < n ; i++)
		{
		    /* Z [i] = B [i] ; */
		    ASSIGN (Z [i], Bx, Bz, i, Bsplit) ;
		}
		flops += MULTSUB_FLOPS * nz ;
		for (i = 0 ; i < n ; i++)
		{
		    zi = Z [i] ;
		    p2 = Ap [i+1] ;
		    for (p = Ap [i] ; p < p2 ; p++)
		    {
			/* zi -= Ax [p] * X [Ai [p]] ; */
			ASSIGN (aij, Ax, Az, p, AXsplit) ;
			MULT_SUB (zi, aij, X [Ai [p]]) ;
		    }
		    Z [i] = zi ;
		}
		/* W = Q' Z */
		for (i = 0 ; i < n ; i++)
		{
		    W [i] = Z [Cperm [i]] ;
		}
	    }

	    flops += UMF_utsolve (Numeric, W, Pattern) ;
	    flops += UMF_ltsolve (Numeric, W, Pattern) ;

	    if (step == 0)
	    {

		/* X = R P' W */
		/* do not use Z, since it isn't allocated if irstep = 0 */

		/* X = P' W */
		for (i = 0 ; i < n ; i++)
		{
		    X [Rperm [i]] = W [i] ;
		}
		if (do_scale)
		{
		    /* X = R X */
#ifndef NRECIPROCAL
		    if (do_recip)
		    {
			/* multiply by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE (X [i], Rs [i]) ;
			}
		    }
		    else
#endif
		    {
			/* divide by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE_DIV (X [i], Rs [i]) ;
			}
		    }
		    flops += SCALE_FLOPS * n ;
		}

	    }
	    else
	    {

		/* Z = P' W */
		for (i = 0 ; i < n ; i++)
		{
		    Z [Rperm [i]] = W [i] ;
		}
		if (do_scale)
		{
		    /* Z = R Z */
#ifndef NRECIPROCAL
		    if (do_recip)
		    {
			/* multiply by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE (Z [i], Rs [i]) ;
			}
		    }
		    else
#endif
		    {
			/* divide by the scale factors */
			for (i = 0 ; i < n ; i++)
			{
			    SCALE_DIV (Z [i], Rs [i]) ;
			}
		    }
		    flops += SCALE_FLOPS * n ;
		}

		flops += ASSEMBLE_FLOPS * n ;
		/* X += Z */
		for (i = 0 ; i < n ; i++)
		{
		    /* X [i] += Z [i] ; was +=W[i] in v4.3, which is wrong */
		    ASSEMBLE (X [i], Z [i]) ;	/* bug fix, v4.3.1 */
		}
	    }

	    /* -------------------------------------------------------------- */
	    /* sparse backward error estimate */
	    /* -------------------------------------------------------------- */

	    if (irstep > 0)
	    {

		/* ---------------------------------------------------------- */
		/* A.' is stored by row */
		/* W (i) = (b - A.' x)_i, residual */
		/* Z (i) = (|A.'||x|)_i */
		/* ---------------------------------------------------------- */

		flops += (MULT_FLOPS + DECREMENT_FLOPS + ABS_FLOPS + 1) * nz ;
		for (i = 0 ; i < n ; i++)
		{
		    /* wi = B [i] ; */
		    ASSIGN (wi, Bx, Bz, i, Bsplit) ;
		    z2i = 0. ;
		    p2 = Ap [i+1] ;
		    for (p = Ap [i] ; p < p2 ; p++)
		    {
			/* axx = Ax [p] * X [Ai [p]] ; */
			ASSIGN (aij, Ax, Az, p, AXsplit) ;
			MULT (axx, aij, X [Ai [p]]) ;

			/* wi -= axx ; */
			DECREMENT (wi, axx) ;

			/* z2i += ABS (axx) ; */
			ABS (d, axx) ;
			z2i += d ;
		    }
		    W [i] = wi ;
		    Z2 [i] = z2i ;
		}

		flops += (2*ABS_FLOPS + 5) * n ;
		if (do_step (omega, step, B2, X, W, Y, Z2, S, n, Info))
		{
		    /* iterative refinement is done */
		    break ;
		}

	    }

	}

    }
    else if (sys == UMFPACK_Pt_L)
    {

	/* ------------------------------------------------------------------ */
	/* Solve P'Lx=b:  x = L \ Pb */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* X [i] = B [Rperm [i]] ; */
	    ASSIGN (X [i], Bx, Bz, Rperm [i], Bsplit) ;
	}
	flops = UMF_lsolve (Numeric, X, Pattern) ;
	status = UMFPACK_OK ;

    }
    else if (sys == UMFPACK_L)
    {

	/* ------------------------------------------------------------------ */
	/* Solve Lx=b:  x = L \ b */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* X [i] = B [i] ; */
	    ASSIGN (X [i], Bx, Bz, i, Bsplit) ;
	}
	flops = UMF_lsolve (Numeric, X, Pattern) ;
	status = UMFPACK_OK ;

    }
    else if (sys == UMFPACK_Lt_P)
    {

	/* ------------------------------------------------------------------ */
	/* Solve L'Px=b:  x = P' (L' \ b) */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* W [i] = B [i] ; */
	    ASSIGN (W [i], Bx, Bz, i, Bsplit) ;
	}
	flops = UMF_lhsolve (Numeric, W, Pattern) ;
	for (i = 0 ; i < n ; i++)
	{
	    X [Rperm [i]] = W [i] ;
	}
	status = UMFPACK_OK ;

    }
    else if (sys == UMFPACK_Lat_P)
    {

	/* ------------------------------------------------------------------ */
	/* Solve L.'Px=b:  x = P' (L.' \ b) */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* W [i] = B [i] ; */
	    ASSIGN (W [i], Bx, Bz, i, Bsplit) ;
	}
	flops = UMF_ltsolve (Numeric, W, Pattern) ;
	for (i = 0 ; i < n ; i++)
	{
	    X [Rperm [i]] = W [i] ;
	}
	status = UMFPACK_OK ;

    }
    else if (sys == UMFPACK_Lt)
    {

	/* ------------------------------------------------------------------ */
	/* Solve L'x=b:  x = L' \ b */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* X [i] = B [i] ; */
	    ASSIGN (X [i], Bx, Bz, i, Bsplit) ;
	}
	flops = UMF_lhsolve (Numeric, X, Pattern) ;
	status = UMFPACK_OK ;

    }
    else if (sys == UMFPACK_Lat)
    {

	/* ------------------------------------------------------------------ */
	/* Solve L.'x=b:  x = L.' \ b */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* X [i] = B [i] ; */
	    ASSIGN (X [i], Bx, Bz, i, Bsplit) ;
	}
	flops = UMF_ltsolve (Numeric, X, Pattern) ;
	status = UMFPACK_OK ;

    }
    else if (sys == UMFPACK_U_Qt)
    {

	/* ------------------------------------------------------------------ */
	/* Solve UQ'x=b:  x = Q (U \ b) */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* W [i] = B [i] ; */
	    ASSIGN (W [i], Bx, Bz, i, Bsplit) ;
	}
	flops = UMF_usolve (Numeric, W, Pattern) ;
	for (i = 0 ; i < n ; i++)
	{
	    X [Cperm [i]] = W [i] ;
	}

    }
    else if (sys == UMFPACK_U)
    {

	/* ------------------------------------------------------------------ */
	/* Solve Ux=b:  x = U \ b */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* X [i] = B [i] ; */
	    ASSIGN (X [i], Bx, Bz, i, Bsplit) ;
	}
	flops = UMF_usolve (Numeric, X, Pattern) ;

    }
    else if (sys == UMFPACK_Q_Ut)
    {

	/* ------------------------------------------------------------------ */
	/* Solve QU'x=b:  x = U' \ Q'b */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* X [i] = B [Cperm [i]] ; */
	    ASSIGN (X [i], Bx, Bz, Cperm [i], Bsplit) ;
	}
	flops = UMF_uhsolve (Numeric, X, Pattern) ;

    }
    else if (sys == UMFPACK_Q_Uat)
    {

	/* ------------------------------------------------------------------ */
	/* Solve QU.'x=b:  x = U.' \ Q'b */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* X [i] = B [Cperm [i]] ; */
	    ASSIGN (X [i], Bx, Bz, Cperm [i], Bsplit) ;
	}
	flops = UMF_utsolve (Numeric, X, Pattern) ;

    }
    else if (sys == UMFPACK_Ut)
    {

	/* ------------------------------------------------------------------ */
	/* Solve U'x=b:  x = U' \ b */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* X [i] = B [i] ; */
	  ASSIGN (X [i], Bx, Bz, i, Bsplit) ;
	}
	flops = UMF_uhsolve (Numeric, X, Pattern) ;

    }
    else if (sys == UMFPACK_Uat)
    {

	/* ------------------------------------------------------------------ */
	/* Solve U'x=b:  x = U' \ b */
	/* ------------------------------------------------------------------ */

	for (i = 0 ; i < n ; i++)
	{
	    /* X [i] = B [i] ; */
	    ASSIGN (X [i], Bx, Bz, i, Bsplit) ;
	}
	flops = UMF_utsolve (Numeric, X, Pattern) ;

    }
    else
    {
	return (UMFPACK_ERROR_invalid_system) ;
    }

#ifdef COMPLEX
    /* copy the solution back, from Entry X [ ] to double Xx [ ] and Xz [ ] */
    if (AXsplit)
    {
	for (i = 0 ; i < n ; i++)
	{
	    Xx [i] = REAL_COMPONENT (X [i]) ;
	    Xz [i] = IMAG_COMPONENT (X [i]) ;
	}
    }
#endif

    /* return UMFPACK_OK, or UMFPACK_WARNING_singular_matrix */
    /* Note that systems involving just L will return UMFPACK_OK */
    Info [UMFPACK_SOLVE_FLOPS] = flops ;
    return (status) ;
}
예제 #13
0
Int KLU_refactor        /* returns TRUE if successful, FALSE otherwise */
(
    /* inputs, not modified */
    Int Ap [ ],         /* size n+1, column pointers */
    Int Ai [ ],         /* size nz, row indices */
    double Ax [ ],
    KLU_symbolic<Entry, Int> *Symbolic,

    /* input/output */
    KLU_numeric<Entry, Int> *Numeric,
    KLU_common<Entry, Int>  *Common
)
{
    Entry ukk, ujk, s ;
    Entry *Offx, *Lx, *Ux, *X, *Az, *Udiag ;
    double *Rs ;
    Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Ui, *Li, *Pinv, *Lip, *Uip, *Llen,
        *Ulen ;
    Unit **LUbx ;
    Unit *LU ;
    Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, p, newrow, scale,
        nblocks, poff, i, j, up, ulen, llen, maxblock, nzoff ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    if (Common == NULL)
    {
        return (FALSE) ;
    }
    Common->status = KLU_OK ;

    if (Numeric == NULL)
    {
        /* invalid Numeric object */
        Common->status = KLU_INVALID ;
        return (FALSE) ;
    }

    Common->numerical_rank = EMPTY ;
    Common->singular_col = EMPTY ;

    Az = (Entry *) Ax ;

    /* ---------------------------------------------------------------------- */
    /* get the contents of the Symbolic object */
    /* ---------------------------------------------------------------------- */

    n = Symbolic->n ;
    P = Symbolic->P ;
    Q = Symbolic->Q ;
    R = Symbolic->R ;
    nblocks = Symbolic->nblocks ;
    maxblock = Symbolic->maxblock ;

    /* ---------------------------------------------------------------------- */
    /* get the contents of the Numeric object */
    /* ---------------------------------------------------------------------- */

    Pnum = Numeric->Pnum ;
    Offp = Numeric->Offp ;
    Offi = Numeric->Offi ;
    Offx = (Entry *) Numeric->Offx ;

    LUbx = (Unit **) Numeric->LUbx ;

    scale = Common->scale ;
    if (scale > 0)
    {
        /* factorization was not scaled, but refactorization is scaled */
        if (Numeric->Rs == NULL)
        {
            Numeric->Rs = (double *)KLU_malloc (n, sizeof (double), Common) ;
            if (Common->status < KLU_OK)
            {
                Common->status = KLU_OUT_OF_MEMORY ;
                return (FALSE) ;
            }
        }
    }
    else
    {
        /* no scaling for refactorization; ensure Numeric->Rs is freed.  This
         * does nothing if Numeric->Rs is already NULL. */
        Numeric->Rs = (double *) KLU_free (Numeric->Rs, n, sizeof (double), Common) ;
    }
    Rs = Numeric->Rs ;

    Pinv = Numeric->Pinv ;
    X = (Entry *) Numeric->Xwork ;
    Common->nrealloc = 0 ;
    Udiag = (Entry *) Numeric->Udiag ;
    nzoff = Symbolic->nzoff ;

    /* ---------------------------------------------------------------------- */
    /* check the input matrix compute the row scale factors, Rs */
    /* ---------------------------------------------------------------------- */

    /* do no scale, or check the input matrix, if scale < 0 */
    if (scale >= 0)
    {
        /* check for out-of-range indices, but do not check for duplicates */
        if (!KLU_scale (scale, n, Ap, Ai, Ax, Rs, NULL, Common))
        {
            return (FALSE) ;
        }
    }

    /* ---------------------------------------------------------------------- */
    /* clear workspace X */
    /* ---------------------------------------------------------------------- */

    for (k = 0 ; k < maxblock ; k++)
    {
        /* X [k] = 0 */
        CLEAR (X [k]) ;
    }

    poff = 0 ;

    /* ---------------------------------------------------------------------- */
    /* factor each block */
    /* ---------------------------------------------------------------------- */

    if (scale <= 0)
    {

        /* ------------------------------------------------------------------ */
        /* no scaling */
        /* ------------------------------------------------------------------ */

        for (block = 0 ; block < nblocks ; block++)
        {

            /* -------------------------------------------------------------- */
            /* the block is from rows/columns k1 to k2-1 */
            /* -------------------------------------------------------------- */

            k1 = R [block] ;
            k2 = R [block+1] ;
            nk = k2 - k1 ;

            if (nk == 1)
            {

                /* ---------------------------------------------------------- */
                /* singleton case */
                /* ---------------------------------------------------------- */

                oldcol = Q [k1] ;
                pend = Ap [oldcol+1] ;
                CLEAR (s) ;
                for (p = Ap [oldcol] ; p < pend ; p++)
                {
                    newrow = Pinv [Ai [p]] - k1 ;
                    if (newrow < 0 && poff < nzoff)
                    {
                        /* entry in off-diagonal block */
                        Offx [poff] = Az [p] ;
                        poff++ ;
                    }
                    else
                    {
                        /* singleton */
                        s = Az [p] ;
                    }
                }
                Udiag [k1] = s ;

            }
            else
            {

                /* ---------------------------------------------------------- */
                /* construct and factor the kth block */
                /* ---------------------------------------------------------- */

                Lip  = Numeric->Lip  + k1 ;
                Llen = Numeric->Llen + k1 ;
                Uip  = Numeric->Uip  + k1 ;
                Ulen = Numeric->Ulen + k1 ;
                LU = LUbx [block] ;

                for (k = 0 ; k < nk ; k++)
                {

                    /* ------------------------------------------------------ */
                    /* scatter kth column of the block into workspace X */
                    /* ------------------------------------------------------ */

                    oldcol = Q [k+k1] ;
                    pend = Ap [oldcol+1] ;
                    for (p = Ap [oldcol] ; p < pend ; p++)
                    {
                        newrow = Pinv [Ai [p]] - k1 ;
                        if (newrow < 0 && poff < nzoff)
                        {
                            /* entry in off-diagonal block */
                            Offx [poff] = Az [p] ;
                            poff++ ;
                        }
                        else
                        {
                            /* (newrow,k) is an entry in the block */
                            X [newrow] = Az [p] ;
                        }
                    }

                    /* ------------------------------------------------------ */
                    /* compute kth column of U, and update kth column of A */
                    /* ------------------------------------------------------ */

                    GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ;
                    for (up = 0 ; up < ulen ; up++)
                    {
                        j = Ui [up] ;
                        ujk = X [j] ;
                        /* X [j] = 0 */
                        CLEAR (X [j]) ;
                        Ux [up] = ujk ;
                        GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ;
                        for (p = 0 ; p < llen ; p++)
                        {
                            /* X [Li [p]] -= Lx [p] * ujk */
                            MULT_SUB (X [Li [p]], Lx [p], ujk) ;
                        }
                    }
                    /* get the diagonal entry of U */
                    ukk = X [k] ;
                    /* X [k] = 0 */
                    CLEAR (X [k]) ;
                    /* singular case */
                    if (IS_ZERO (ukk))
                    {
                        /* matrix is numerically singular */
                        Common->status = KLU_SINGULAR ;
                        if (Common->numerical_rank == EMPTY)
                        {
                            Common->numerical_rank = k+k1 ;
                            Common->singular_col = Q [k+k1] ;
                        }
                        if (Common->halt_if_singular)
                        {
                            /* do not continue the factorization */
                            return (FALSE) ;
                        }
                    }
                    Udiag [k+k1] = ukk ;
                    /* gather and divide by pivot to get kth column of L */
                    GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ;
                    for (p = 0 ; p < llen ; p++)
                    {
                        i = Li [p] ;
                        DIV (Lx [p], X [i], ukk) ;
                        CLEAR (X [i]) ;
                    }

                }
            }
        }

    }
    else
    {

        /* ------------------------------------------------------------------ */
        /* scaling */
        /* ------------------------------------------------------------------ */

        for (block = 0 ; block < nblocks ; block++)
        {

            /* -------------------------------------------------------------- */
            /* the block is from rows/columns k1 to k2-1 */
            /* -------------------------------------------------------------- */

            k1 = R [block] ;
            k2 = R [block+1] ;
            nk = k2 - k1 ;

            if (nk == 1)
            {

                /* ---------------------------------------------------------- */
                /* singleton case */
                /* ---------------------------------------------------------- */

                oldcol = Q [k1] ;
                pend = Ap [oldcol+1] ;
                CLEAR (s) ;
                for (p = Ap [oldcol] ; p < pend ; p++)
                {
                    oldrow = Ai [p] ;
                    newrow = Pinv [oldrow] - k1 ;
                    if (newrow < 0 && poff < nzoff)
                    {
                        /* entry in off-diagonal block */
                        /* Offx [poff] = Az [p] / Rs [oldrow] */
                        SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]) ;
                        poff++ ;
                    }
                    else
                    {
                        /* singleton */
                        /* s = Az [p] / Rs [oldrow] */
                        SCALE_DIV_ASSIGN (s, Az [p], Rs [oldrow]) ;
                    }
                }
                Udiag [k1] = s ;

            }
            else
            {

                /* ---------------------------------------------------------- */
                /* construct and factor the kth block */
                /* ---------------------------------------------------------- */

                Lip  = Numeric->Lip  + k1 ;
                Llen = Numeric->Llen + k1 ;
                Uip  = Numeric->Uip  + k1 ;
                Ulen = Numeric->Ulen + k1 ;
                LU = LUbx [block] ;

                for (k = 0 ; k < nk ; k++)
                {

                    /* ------------------------------------------------------ */
                    /* scatter kth column of the block into workspace X */
                    /* ------------------------------------------------------ */

                    oldcol = Q [k+k1] ;
                    pend = Ap [oldcol+1] ;
                    for (p = Ap [oldcol] ; p < pend ; p++)
                    {
                        oldrow = Ai [p] ;
                        newrow = Pinv [oldrow] - k1 ;
                        if (newrow < 0 && poff < nzoff)
                        {
                            /* entry in off-diagonal part */
                            /* Offx [poff] = Az [p] / Rs [oldrow] */
                            SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]);
                            poff++ ;
                        }
                        else
                        {
                            /* (newrow,k) is an entry in the block */
                            /* X [newrow] = Az [p] / Rs [oldrow] */
                            SCALE_DIV_ASSIGN (X [newrow], Az [p], Rs [oldrow]) ;
                        }
                    }

                    /* ------------------------------------------------------ */
                    /* compute kth column of U, and update kth column of A */
                    /* ------------------------------------------------------ */

                    GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ;
                    for (up = 0 ; up < ulen ; up++)
                    {
                        j = Ui [up] ;
                        ujk = X [j] ;
                        /* X [j] = 0 */
                        CLEAR (X [j]) ;
                        Ux [up] = ujk ;
                        GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ;
                        for (p = 0 ; p < llen ; p++)
                        {
                            /* X [Li [p]] -= Lx [p] * ujk */
                            MULT_SUB (X [Li [p]], Lx [p], ujk) ;
                        }
                    }
                    /* get the diagonal entry of U */
                    ukk = X [k] ;
                    /* X [k] = 0 */
                    CLEAR (X [k]) ;
                    /* singular case */
                    if (IS_ZERO (ukk))
                    {
                        /* matrix is numerically singular */
                        Common->status = KLU_SINGULAR ;
                        if (Common->numerical_rank == EMPTY)
                        {
                            Common->numerical_rank = k+k1 ;
                            Common->singular_col = Q [k+k1] ;
                        }
                        if (Common->halt_if_singular)
                        {
                            /* do not continue the factorization */
                            return (FALSE) ;
                        }
                    }
                    Udiag [k+k1] = ukk ;
                    /* gather and divide by pivot to get kth column of L */
                    GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ;
                    for (p = 0 ; p < llen ; p++)
                    {
                        i = Li [p] ;
                        DIV (Lx [p], X [i], ukk) ;
                        CLEAR (X [i]) ;
                    }
                }
            }
        }
    }

    /* ---------------------------------------------------------------------- */
    /* permute scale factors Rs according to pivotal row order */
    /* ---------------------------------------------------------------------- */

    if (scale > 0)
    {
        for (k = 0 ; k < n ; k++)
        {
            /* TODO : Check. REAL(X[k]) Can be just X[k] */
            /* REAL (X [k]) = Rs [Pnum [k]] ; */
            X [k] = Rs [Pnum [k]] ;
        }
        for (k = 0 ; k < n ; k++)
        {
            Rs [k] = REAL (X [k]) ;
        }
    }

#ifndef NDEBUGKLU2
    ASSERT (Offp [n] == poff) ;
    ASSERT (Symbolic->nzoff == poff) ;
    PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
    ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
    if (Common->status == KLU_OK)
    {
        PRINTF (("\n ########### KLU_BTF_REFACTOR done, nblocks %d\n",nblocks));
        for (block = 0 ; block < nblocks ; block++)
        {
            k1 = R [block] ;
            k2 = R [block+1] ;
            nk = k2 - k1 ;
            PRINTF ((
                "\n================KLU_refactor output: k1 %d k2 %d nk %d\n",
                k1, k2, nk)) ;
            if (nk == 1)
            {
                PRINTF (("singleton  ")) ;
                PRINT_ENTRY (Udiag [k1]) ;
            }
            else
            {
                Lip = Numeric->Lip + k1 ;
                Llen = Numeric->Llen + k1 ;
                LU = (Unit *) Numeric->LUbx [block] ;
                PRINTF (("\n---- L block %d\n", block)) ;
                ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
                Uip = Numeric->Uip + k1 ;
                Ulen = Numeric->Ulen + k1 ;
                PRINTF (("\n---- U block %d\n", block)) ;
                ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
            }
        }
    }
#endif

    return (TRUE) ;
}