Ejemplo n.º 1
0
int CHOLMOD(super_numeric)
(
    /* ---- input ---- */
    cholmod_sparse *A,	/* matrix to factorize */
    cholmod_sparse *F,	/* F = A' or A(:,f)' */
    double beta [2],	/* beta*I is added to diagonal of matrix to factorize */
    /* ---- in/out --- */
    cholmod_factor *L,	/* factorization */
    /* --------------- */
    cholmod_common *Common
)
{
    cholmod_dense *C ;
    Int *Super, *Map, *SuperMap ;
    size_t maxcsize ;
    Int nsuper, n, i, k, s, stype, nrow ;
    int ok = TRUE, symbolic ;
    size_t t, w ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (FALSE) ;
    RETURN_IF_NULL (L, FALSE) ;
    RETURN_IF_NULL (A, FALSE) ;
    RETURN_IF_XTYPE_INVALID (A, CHOLMOD_REAL, CHOLMOD_ZOMPLEX, FALSE) ;
    RETURN_IF_XTYPE_INVALID (L, CHOLMOD_PATTERN, CHOLMOD_COMPLEX, FALSE) ;
    stype = A->stype ;
    if (stype < 0)
    {
        if (A->nrow != A->ncol || A->nrow != L->n)
        {
            ERROR (CHOLMOD_INVALID, "invalid dimensions") ;
            return (FALSE) ;
        }
    }
    else if (stype == 0)
    {
        if (A->nrow != L->n)
        {
            ERROR (CHOLMOD_INVALID, "invalid dimensions") ;
            return (FALSE) ;
        }
        RETURN_IF_NULL (F, FALSE) ;
        RETURN_IF_XTYPE_INVALID (F, CHOLMOD_REAL, CHOLMOD_ZOMPLEX, FALSE) ;
        if (A->nrow != F->ncol || A->ncol != F->nrow || F->stype != 0)
        {
            ERROR (CHOLMOD_INVALID, "F invalid") ;
            return (FALSE) ;
        }
        if (A->xtype != F->xtype)
        {
            ERROR (CHOLMOD_INVALID, "A and F must have same xtype") ;
            return (FALSE) ;
        }
    }
    else
    {
        /* symmetric upper case not suppored */
        ERROR (CHOLMOD_INVALID, "symmetric upper case not supported") ;
        return (FALSE) ;
    }
    if (!(L->is_super))
    {
        ERROR (CHOLMOD_INVALID, "L not supernodal") ;
        return (FALSE) ;
    }
    if (L->xtype != CHOLMOD_PATTERN)
    {
        if (! ((A->xtype == CHOLMOD_REAL    && L->xtype == CHOLMOD_REAL)
                || (A->xtype == CHOLMOD_COMPLEX && L->xtype == CHOLMOD_COMPLEX)
                || (A->xtype == CHOLMOD_ZOMPLEX && L->xtype == CHOLMOD_COMPLEX)))
        {
            ERROR (CHOLMOD_INVALID, "complex type mismatch") ;
            return (FALSE) ;
        }
    }
    Common->status = CHOLMOD_OK ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace in Common */
    /* ---------------------------------------------------------------------- */

    nsuper = L->nsuper ;
    maxcsize = L->maxcsize ;
    nrow = A->nrow ;
    n = nrow ;

    PRINT1 (("nsuper "ID" maxcsize %g\n", nsuper, (double) maxcsize)) ;
    ASSERT (nsuper >= 0 && maxcsize > 0) ;

    /* w = 2*n + 4*nsuper */
    w = CHOLMOD(mult_size_t) (n, 2, &ok) ;
    t = CHOLMOD(mult_size_t) (nsuper, 4, &ok) ;
    w = CHOLMOD(add_size_t) (w, t, &ok) ;
    if (!ok)
    {
        ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
        return (FALSE) ;
    }

    CHOLMOD(allocate_work) (n, w, 0, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
        return (FALSE) ;
    }
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 0, Common)) ;

    /* ---------------------------------------------------------------------- */
    /* get the current factor L and allocate numerical part, if needed */
    /* ---------------------------------------------------------------------- */

    Super = L->super ;
    symbolic = (L->xtype == CHOLMOD_PATTERN) ;
    if (symbolic)
    {
        /* convert to supernodal numeric by allocating L->x */
        CHOLMOD(change_factor) (
            (A->xtype == CHOLMOD_REAL) ? CHOLMOD_REAL : CHOLMOD_COMPLEX,
            TRUE, TRUE, TRUE, TRUE, L, Common) ;
        if (Common->status < CHOLMOD_OK)
        {
            /* the factor L remains in symbolic supernodal form */
            return (FALSE) ;
        }
    }
    ASSERT (L->dtype == DTYPE) ;
    ASSERT (L->xtype == CHOLMOD_REAL || L->xtype == CHOLMOD_COMPLEX) ;

    /* supernodal LDL' is not supported */
    L->is_ll = TRUE ;

    /* ---------------------------------------------------------------------- */
    /* get more workspace */
    /* ---------------------------------------------------------------------- */

    C = CHOLMOD(allocate_dense) (maxcsize, 1, maxcsize, L->xtype, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
        int status = Common->status ;
        if (symbolic)
        {
            /* Change L back to symbolic, since the numeric values are not
             * initialized.  This cannot fail. */
            CHOLMOD(change_factor) (CHOLMOD_PATTERN, TRUE, TRUE, TRUE, TRUE,
                                    L, Common) ;
        }
        /* the factor L is now back to the form it had on input */
        Common->status = status ;
        return (FALSE) ;
    }

    /* ---------------------------------------------------------------------- */
    /* get workspace */
    /* ---------------------------------------------------------------------- */

    SuperMap = Common->Iwork ;		/* size n (i/i/l) */
    Map = Common->Flag ;    /* size n, use Flag as workspace for Map array */
    for (i = 0 ; i < n ; i++)
    {
        Map [i] = EMPTY ;
    }

    /* ---------------------------------------------------------------------- */
    /* find the mapping of nodes to relaxed supernodes */
    /* ---------------------------------------------------------------------- */

    /* SuperMap [k] = s if column k is contained in supernode s */
    for (s = 0 ; s < nsuper ; s++)
    {
        PRINT1 (("Super ["ID"] "ID" ncols "ID"\n",
                 s, Super[s], Super[s+1]-Super[s]));
        for (k = Super [s] ; k < Super [s+1] ; k++)
        {
            SuperMap [k] = s ;
            PRINT2 (("relaxed SuperMap ["ID"] = "ID"\n", k, SuperMap [k])) ;
        }
    }

    /* ---------------------------------------------------------------------- */
    /* supernodal numerical factorization, using template routine */
    /* ---------------------------------------------------------------------- */

    switch (A->xtype)
    {
    case CHOLMOD_REAL:
        ok = r_cholmod_super_numeric (A, F, beta, L, C, Common) ;
        break ;

    case CHOLMOD_COMPLEX:
        ok = c_cholmod_super_numeric (A, F, beta, L, C, Common) ;
        break ;

    case CHOLMOD_ZOMPLEX:
        /* This operates on complex L, not zomplex */
        ok = z_cholmod_super_numeric (A, F, beta, L, C, Common) ;
        break ;
    }

    /* ---------------------------------------------------------------------- */
    /* clear Common workspace, free temp workspace C, and return */
    /* ---------------------------------------------------------------------- */

    /* Flag array was used as workspace, clear it */
    Common->mark = EMPTY ;
    /* CHOLMOD(clear_flag) (Common) ; */
    CHOLMOD_CLEAR_FLAG (Common) ;
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 0, Common)) ;
    CHOLMOD(free_dense) (&C, Common) ;
    return (ok) ;
}
Ejemplo n.º 2
0
int CHOLMOD(row_subtree)
(
    /* ---- input ---- */
    cholmod_sparse *A,	/* matrix to analyze */
    cholmod_sparse *F,	/* used for A*A' case only. F=A' or A(:,f)' */
    size_t krow,	/* row k of L */
    Int *Parent,	/* elimination tree */
    /* ---- output --- */
    cholmod_sparse *R,	/* pattern of L(k,:), 1-by-n with R->nzmax >= n */
    /* --------------- */
    cholmod_common *Common
)
{
    Int *Rp, *Stack, *Flag, *Ap, *Ai, *Anz, *Fp, *Fi, *Fnz ;
    Int p, pend, parent, t, stype, nrow, k, pf, pfend, Fpacked, packed,
	sorted, top, len, i, mark ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (FALSE) ;
    RETURN_IF_NULL (A, FALSE) ;
    RETURN_IF_NULL (R, FALSE) ;
    RETURN_IF_NULL (Parent, FALSE) ;
    RETURN_IF_XTYPE_INVALID (A, CHOLMOD_PATTERN, CHOLMOD_ZOMPLEX, FALSE) ;
    RETURN_IF_XTYPE_INVALID (R, CHOLMOD_PATTERN, CHOLMOD_ZOMPLEX, FALSE) ;
    stype = A->stype ;
    if (stype == 0)
    {
	RETURN_IF_NULL (F, FALSE) ;
	RETURN_IF_XTYPE_INVALID (F, CHOLMOD_PATTERN, CHOLMOD_ZOMPLEX, FALSE) ;
    }
    if (krow >= A->nrow)
    {
	ERROR (CHOLMOD_INVALID, "subtree: k invalid") ;
	return (FALSE) ;
    }
    if (R->ncol != 1 || A->nrow != R->nrow || A->nrow > R->nzmax)
    {
	ERROR (CHOLMOD_INVALID, "subtree: R invalid") ;
	return (FALSE) ;
    }
    Common->status = CHOLMOD_OK ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace */
    /* ---------------------------------------------------------------------- */

    nrow = A->nrow ;
    CHOLMOD(allocate_work) (nrow, 0, 0, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
	return (FALSE) ;
    }
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 0, Common)) ;

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    if (stype > 0)
    {
	/* symmetric upper case: F is not needed.  It may be NULL */
	Fp = NULL ;
	Fi = NULL ;
	Fnz = NULL ;
	Fpacked = TRUE ;
    }
    else if (stype == 0)
    {
	/* unsymmetric case: F is required. */
	Fp = F->p ;
	Fi = F->i ;
	Fnz = F->nz ;
	Fpacked = F->packed ;
    }
    else
    {
	/* symmetric lower triangular form not supported */
	ERROR (CHOLMOD_INVALID, "symmetric lower not supported") ;
	return (FALSE) ;
    }

    Ap = A->p ;
    Ai = A->i ;
    Anz = A->nz ;
    packed = A->packed ;
    sorted = A->sorted ;

    k = krow ;
    Stack = R->i ;

    /* ---------------------------------------------------------------------- */
    /* get workspace */
    /* ---------------------------------------------------------------------- */

    Flag = Common->Flag ;	/* size nrow, Flag [i] < mark must hold */
    /* mark = CHOLMOD(clear_flag) (Common) ; */
    CHOLMOD_CLEAR_FLAG (Common) ;
    mark = Common->mark ;

    /* ---------------------------------------------------------------------- */
    /* compute the pattern of L(k,:) */
    /* ---------------------------------------------------------------------- */

    top = nrow ;		/* Stack is empty */
    Flag [k] = mark ;		/* do not include diagonal entry in Stack */

#define SCATTER			/* do not scatter numerical values */
#define PARENT(i) Parent [i]	/* use Parent for etree */

    if (stype != 0)
    {
	/* scatter kth col of triu (A), get pattern L(k,:) */
	p = Ap [k] ;
	pend = (packed) ? (Ap [k+1]) : (p + Anz [k]) ;
	SUBTREE ;
    }
    else
    {
	/* scatter kth col of triu (beta*I+AA'), get pattern L(k,:) */
	pf = Fp [k] ;
	pfend = (Fpacked) ? (Fp [k+1]) : (pf + Fnz [k]) ;
	for ( ; pf < pfend ; pf++)
	{
	    /* get nonzero entry F (t,k) */
	    t = Fi [pf] ;
	    p = Ap [t] ;
	    pend = (packed) ? (Ap [t+1]) : (p + Anz [t]) ;
	    SUBTREE ;
	}
    }

#undef SCATTER
#undef PARENT

    /* shift the stack upwards, to the first part of R */
    len = nrow - top ;
    for (i = 0 ; i < len ; i++)
    {
	Stack [i] = Stack [top + i] ;
    }

    Rp = R->p ;
    Rp [0] = 0 ;
    Rp [1] = len ;
    R->sorted = FALSE ;

    CHOLMOD(clear_flag) (Common) ;
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 0, Common)) ;
    return (TRUE) ;
}
Ejemplo n.º 3
0
int CHOLMOD(super_symbolic2)
(
    /* ---- input ---- */
    int for_whom,       /* FOR_SPQR     (0): for SPQR but not GPU-accelerated
                           FOR_CHOLESKY (1): for Cholesky (GPU or not)
                           FOR_SPQRGPU  (2): for SPQR with GPU acceleration */
    cholmod_sparse *A,	/* matrix to analyze */
    cholmod_sparse *F,	/* F = A' or A(:,f)' */
    Int *Parent,	/* elimination tree */
    /* ---- in/out --- */
    cholmod_factor *L,	/* simplicial symbolic on input,
			 * supernodal symbolic on output */
    /* --------------- */
    cholmod_common *Common
)
{
    double zrelax0, zrelax1, zrelax2, xxsize ;
    Int *Wi, *Wj, *Super, *Snz, *Ap, *Ai, *Flag, *Head, *Ls, *Lpi, *Lpx, *Fnz,
	*Sparent, *Anz, *SuperMap, *Merged, *Nscol, *Zeros, *Fp, *Fj,
	*ColCount, *Lpi2, *Lsuper, *Iwork ;
    Int nsuper, d, n, j, k, s, mark, parent, p, pend, k1, k2, packed, nscol,
	nsrow, ndrow1, ndrow2, stype, ssize, xsize, sparent, plast, slast,
	csize, maxcsize, ss, nscol0, nscol1, ns, nfsuper, newzeros, totzeros,
	merge, snext, esize, maxesize, nrelax0, nrelax1, nrelax2, Asorted ;
    size_t w ;
    int ok = TRUE, find_xsize ;
    const char* env_use_gpu;
    const char* env_max_bytes;
    size_t max_bytes;
    const char* env_max_fraction;
    double max_fraction;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (FALSE) ;
    RETURN_IF_NULL (A, FALSE) ;
    RETURN_IF_NULL (L, FALSE) ;
    RETURN_IF_NULL (Parent, FALSE) ;
    RETURN_IF_XTYPE_INVALID (A, CHOLMOD_PATTERN, CHOLMOD_ZOMPLEX, FALSE) ;
    RETURN_IF_XTYPE_INVALID (L, CHOLMOD_PATTERN, CHOLMOD_PATTERN, FALSE) ;
    stype = A->stype ;
    if (stype < 0)
    {
	/* invalid symmetry; symmetric lower form not supported */
	ERROR (CHOLMOD_INVALID, "symmetric lower not supported") ;
	return (FALSE) ;
    }
    if (stype == 0)
    {
	/* F must be present in the unsymmetric case */
	RETURN_IF_NULL (F, FALSE) ;
    }
    if (L->is_super)
    {
	/* L must be a simplicial symbolic factor */
	ERROR (CHOLMOD_INVALID, "L must be symbolic on input") ;
	return (FALSE) ;
    }
    Common->status = CHOLMOD_OK ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace */
    /* ---------------------------------------------------------------------- */

    n = A->nrow ;

    /* w = 5*n */
    w = CHOLMOD(mult_size_t) (n, 5, &ok) ;
    if (!ok)
    {
	ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
	return (FALSE) ;
    }

    CHOLMOD(allocate_work) (n, w, 0, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
	/* out of memory */
	return (FALSE) ;
    }
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 0, Common)) ;

    /* ---------------------------------------------------------------------- */
    /* allocate GPU workspace */
    /* ---------------------------------------------------------------------- */

    L->useGPU = 0 ;     /* only used for Cholesky factorization, not QR */

#ifdef GPU_BLAS

    /* GPU module is installed */
    if ( for_whom == CHOLMOD_ANALYZE_FOR_CHOLESKY )
    {
        /* only allocate GPU workspace for supernodal Cholesky, and only when
           the GPU is requested and available. */

        max_bytes = 0;
        max_fraction = 0;

#ifdef DLONG
        if ( Common->useGPU == EMPTY )
        {
            /* useGPU not explicity requested by the user, but not explicitly
             * prohibited either.  Query OS environment variables for request.*/
            env_use_gpu  = getenv("CHOLMOD_USE_GPU");

            if ( env_use_gpu )
            {
                /* CHOLMOD_USE_GPU environment variable is set to something */
                if ( atoi ( env_use_gpu ) == 0 )
                {
                    Common->useGPU = 0; /* don't use the gpu */
                }
                else
                {
                    Common->useGPU = 1; /* use the gpu */
                    env_max_bytes = getenv("CHOLMOD_GPU_MEM_BYTES");
                    env_max_fraction = getenv("CHOLMOD_GPU_MEM_FRACTION");
                    if ( env_max_bytes )
                    {
                        max_bytes = atol(env_max_bytes);
                        Common->maxGpuMemBytes = max_bytes;
                    }
                    if ( env_max_fraction )
                    {
                        max_fraction = atof (env_max_fraction);
                        if ( max_fraction < 0 ) max_fraction = 0;
                        if ( max_fraction > 1 ) max_fraction = 1;
                        Common->maxGpuMemFraction = max_fraction;
                    }	  
                }
            }
            else
            {
                /* CHOLMOD_USE_GPU environment variable not set, so no GPU
                 * acceleration will be used */
                Common->useGPU = 0;
            }
            /* fprintf (stderr, "useGPU queried: %d\n", Common->useGPU) ; */
        }

        /* Ensure that a GPU is present */
        if ( Common->useGPU == 1 )
        {
            /* fprintf (stderr, "\nprobe GPU:\n") ; */
            Common->useGPU = CHOLMOD(gpu_probe) (Common); 
            /* fprintf (stderr, "\nprobe GPU: result %d\n", Common->useGPU) ; */
        }

        if ( Common->useGPU == 1 )
        {
            /* Cholesky + GPU, so allocate space */
            /* fprintf (stderr, "allocate GPU:\n") ; */
            CHOLMOD(gpu_allocate) ( Common );
            /* fprintf (stderr, "allocate GPU done\n") ; */
        }
#else
        /* GPU acceleration is only supported for long int version */
        Common->useGPU = 0;
#endif

        /* Cache the fact that the symbolic factorization supports 
         * GPU acceleration */
        L->useGPU = Common->useGPU;

    }

#else
    /* GPU module is not installed */
    Common->useGPU = 0 ;
#endif

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    /* A is now either A or triu(A(p,p)) for the symmetric case.  It is either
     * A or A(p,f) for the unsymmetric case (both in column form).  It can be
     * either packed or unpacked, and either sorted or unsorted.  Entries in
     * the lower triangular part may be present if A is symmetric, but these
     * are ignored. */

    Ap = A->p ;
    Ai = A->i ;
    Anz = A->nz ;

    if (stype != 0)
    {
	/* F not accessed */
	Fp = NULL ;
	Fj = NULL ;
	Fnz = NULL ;
	packed = TRUE ;
    }
    else
    {
	/* F = A(:,f) or A(p,f) in packed row form, either sorted or unsorted */
	Fp = F->p ;
	Fj = F->i ;
	Fnz = F->nz ;
	packed = F->packed ;
    }

    ColCount = L->ColCount ;

    nrelax0 = Common->nrelax [0] ;
    nrelax1 = Common->nrelax [1] ;
    nrelax2 = Common->nrelax [2] ;

    zrelax0 = Common->zrelax [0] ;
    zrelax1 = Common->zrelax [1] ;
    zrelax2 = Common->zrelax [2] ;

    zrelax0 = IS_NAN (zrelax0) ? 0 : zrelax0 ;
    zrelax1 = IS_NAN (zrelax1) ? 0 : zrelax1 ;
    zrelax2 = IS_NAN (zrelax2) ? 0 : zrelax2 ;

    ASSERT (CHOLMOD(dump_parent) (Parent, n, "Parent", Common)) ;

    /* ---------------------------------------------------------------------- */
    /* get workspace */
    /* ---------------------------------------------------------------------- */

    /* Sparent, Snz, and Merged could be allocated later, of size nfsuper */

    Iwork = Common->Iwork ;
    Wi      = Iwork ;	    /* size n (i/l/l).  Lpi2 is i/l/l */
    Wj      = Iwork + n ;   /* size n (i/l/l).  Zeros is i/l/l */
    Sparent = Iwork + 2*((size_t) n) ; /* size nfsuper <= n [ */
    Snz     = Iwork + 3*((size_t) n) ; /* size nfsuper <= n [ */
    Merged  = Iwork + 4*((size_t) n) ; /* size nfsuper <= n [ */

    Flag = Common->Flag ;   /* size n */
    Head = Common->Head ;   /* size n+1 */

    /* ---------------------------------------------------------------------- */
    /* find the fundamental supernodes */
    /* ---------------------------------------------------------------------- */

    /* count the number of children of each node, using Wi [ */
    for (j = 0 ; j < n ; j++)
    {
	Wi [j] = 0 ;
    }
    for (j = 0 ; j < n ; j++)
    {
	parent = Parent [j] ;
	if (parent != EMPTY)
	{
	    Wi [parent]++ ;
	}
    }

    Super = Head ;  /* use Head [0..nfsuper] as workspace for Super list ( */

    /* column 0 always starts a new supernode */
    nfsuper = (n == 0) ? 0 : 1 ;	/* number of fundamental supernodes */
    Super [0] = 0 ;

    for (j = 1 ; j < n ; j++)
    {
	/* check if j starts new supernode, or in the same supernode as j-1 */
	if (Parent [j-1] != j	    /* parent of j-1 is not j */
	    || (ColCount [j-1] != ColCount [j] + 1) /* j-1 not subset of j*/
	    || Wi [j] > 1	    /* j has more than one child */
#ifdef GPU_BLAS
	    /* Ensure that the supernode will fit in the GPU buffers */
	    /* Data size of 16 bytes must be assumed for case of PATTERN */
	    || (for_whom == CHOLMOD_ANALYZE_FOR_CHOLESKY && L->useGPU && 
		 (j-Super[nfsuper-1]+1) * 
		 ColCount[Super[nfsuper-1]] * sizeof(double) * 2 >= 
		 Common->devBuffSize)
#endif
	    )
	{
	    /* j is the leading node of a supernode */
	    Super [nfsuper++] = j ;
	}
    }
    Super [nfsuper] = n ;

    /* contents of Wi no longer needed for child count ] */

    Nscol = Wi ; /* use Wi as size-nfsuper workspace for Nscol [ */

    /* ---------------------------------------------------------------------- */
    /* find the mapping of fundamental nodes to supernodes */
    /* ---------------------------------------------------------------------- */

    SuperMap = Wj ;	/* use Wj as workspace for SuperMap [ */

    /* SuperMap [k] = s if column k is contained in supernode s */
    for (s = 0 ; s < nfsuper ; s++)
    {
	for (k = Super [s] ; k < Super [s+1] ; k++)
	{
	    SuperMap [k] = s ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* construct the fundamental supernodal etree */
    /* ---------------------------------------------------------------------- */

    for (s = 0 ; s < nfsuper ; s++)
    {
	j = Super [s+1] - 1 ;	/* last node in supernode s */
	parent = Parent [j] ;	/* parent of last node */
	Sparent [s] = (parent == EMPTY) ? EMPTY : SuperMap [parent] ;
	PRINT1 (("Sparent ["ID"] = "ID"\n", s, Sparent [s])) ;
    }

    /* contents of Wj no longer needed as workspace for SuperMap ]
     * SuperMap will be recomputed below, for the relaxed supernodes. */

    Zeros = Wj ;   /* use Wj for Zeros, workspace of size nfsuper [ */

    /* ---------------------------------------------------------------------- */
    /* relaxed amalgamation */
    /* ---------------------------------------------------------------------- */

    for (s = 0 ; s < nfsuper ; s++)
    {
	Merged [s] = EMPTY ;			/* s not merged into another */
	Nscol [s] = Super [s+1] - Super [s] ;	/* # of columns in s */
	Zeros [s] = 0 ;				/* # of zero entries in s */
	ASSERT (s <= Super [s]) ;
	Snz [s] = ColCount [Super [s]] ;  /* # of entries in leading col of s */
	PRINT2 (("lnz ["ID"] "ID"\n", s, Snz [s])) ;
    }

    for (s = nfsuper-2 ; s >= 0 ; s--)
    {
        double lnz1 ;

	/* should supernodes s and s+1 merge into a new node s? */
	PRINT1 (("\n========= Check relax of s "ID" and s+1 "ID"\n", s, s+1)) ;

	ss = Sparent [s] ;
	if (ss == EMPTY)
	{
	    PRINT1 (("s "ID" is a root, no merge with s+1 = "ID"\n", s, s+1)) ;
	    continue ;
	}

	/* find the current parent of s (perform path compression as needed) */
	for (ss = Sparent [s] ; Merged [ss] != EMPTY ; ss = Merged [ss]) ;
	sparent = ss ;
	PRINT2 (("Current sparent of s "ID" is "ID"\n", s, sparent)) ;

	/* ss is the current parent of s */
	for (ss = Sparent [s] ; Merged [ss] != EMPTY ; ss = snext)
	{
	    snext = Merged [ss] ;
	    PRINT2 (("ss "ID" is dead, merged into snext "ID"\n", ss, snext)) ;
	    Merged [ss] = sparent ;
	}

	/* if s+1 is not the current parent of s, do not merge */
	if (sparent != s+1)
	{
	    continue ;
	}

	nscol0 = Nscol [s] ;	/* # of columns in s */
	nscol1 = Nscol [s+1] ;	/* # of columns in s+1 */
	ns = nscol0 + nscol1 ;
	PRINT2 (("ns "ID" nscol0 "ID" nscol1 "ID"\n", ns, nscol0, nscol1)) ;

	totzeros = Zeros [s+1] ;	/* current # of zeros in s+1 */
	lnz1 = (double) (Snz [s+1]) ;	/* # entries in leading column of s+1 */

	/* determine if supernodes s and s+1 should merge */
	if (ns <= nrelax0)
	{
	    PRINT2 (("ns is tiny ("ID"), so go ahead and merge\n", ns)) ;
	    merge = TRUE ;
	}
	else
	{
	    /* use double to avoid integer overflow */
	    double lnz0 = Snz [s] ;	/* # entries in leading column of s */
	    double xnewzeros = nscol0 * (lnz1 + nscol0 - lnz0) ;

	    /* use Int for the final update of Zeros [s] below */
	    newzeros = nscol0 * (Snz [s+1] + nscol0 - Snz [s]) ;
	    ASSERT (newzeros == xnewzeros) ;

	    PRINT2 (("lnz0 %g lnz1 %g xnewzeros %g\n", lnz0, lnz1, xnewzeros)) ;
	    if (xnewzeros == 0)
	    {
		/* no new zeros, so go ahead and merge */
		PRINT2 (("no new fillin, so go ahead and merge\n")) ;
		merge = TRUE ;
	    }
	    else
	    {
		/* # of zeros if merged */
		double xtotzeros = ((double) totzeros) + xnewzeros ;

		/* xtotsize: total size of merged supernode, if merged: */
		double xns = (double) ns ;
		double xtotsize  = (xns * (xns+1) / 2) + xns * (lnz1 - nscol1) ;
		double z = xtotzeros / xtotsize ;

		Int totsize ;
		totsize  = (ns * (ns+1) / 2) + ns * (Snz [s+1] - nscol1) ;

		PRINT2 (("oldzeros "ID" newzeros "ID" xtotsize %g z %g\n",
			    Zeros [s+1], newzeros, xtotsize, z)) ;

		/* use Int for the final update of Zeros [s] below */
		totzeros += newzeros ;

		/* do not merge if supernode would become too big
		 * (Int overflow).  Continue computing; not (yet) an error. */
		/* fl.pt. compare, but no NaN's can occur here */
		merge = ((ns <= nrelax1 && z < zrelax0) ||
			 (ns <= nrelax2 && z < zrelax1) ||
					  (z < zrelax2)) &&
			(xtotsize < Int_max / sizeof (double)) ;

	    }
	}

#ifdef GPU_BLAS
	if ( for_whom == CHOLMOD_ANALYZE_FOR_CHOLESKY && L->useGPU ) {
	  /* Ensure that the aggregated supernode fits in the device 
	     supernode buffers */
	  double xns = (double) ns;
	  if ( ((xns * xns) + xns * (lnz1 - nscol1))*sizeof(double)*2  >= 
	       Common->devBuffSize ) {
	    merge = FALSE;
	  }
	}
#endif

	if (merge)
	{
	    PRINT1 (("Merge node s ("ID") and s+1 ("ID")\n", s, s+1)) ;
	    Zeros [s] = totzeros ;
	    Merged [s+1] = s ;
	    Snz [s] = nscol0 + Snz [s+1] ;
	    Nscol [s] += Nscol [s+1] ;
	}
    }

    /* contents of Wj no longer needed for Zeros ] */
    /* contents of Wi no longer needed for Nscol ] */
    /* contents of Sparent no longer needed (recomputed below) */

    /* ---------------------------------------------------------------------- */
    /* construct the relaxed supernode list */
    /* ---------------------------------------------------------------------- */

    nsuper = 0 ;
    for (s = 0 ; s < nfsuper ; s++)
    {
	if (Merged [s] == EMPTY)
	{
	    PRINT1 (("live supernode: "ID" snz "ID"\n", s, Snz [s])) ;
	    Super [nsuper] = Super [s] ;
	    Snz [nsuper] = Snz [s] ;
	    nsuper++ ;
	}
    }
    Super [nsuper] = n ;
    PRINT1 (("Fundamental supernodes: "ID"  relaxed "ID"\n", nfsuper, nsuper)) ;

    /* Merged no longer needed ] */

    /* ---------------------------------------------------------------------- */
    /* find the mapping of relaxed nodes to supernodes */
    /* ---------------------------------------------------------------------- */

    /* use Wj as workspace for SuperMap { */

    /* SuperMap [k] = s if column k is contained in supernode s */
    for (s = 0 ; s < nsuper ; s++)
    {
	for (k = Super [s] ; k < Super [s+1] ; k++)
	{
	    SuperMap [k] = s ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* construct the relaxed supernodal etree */
    /* ---------------------------------------------------------------------- */

    for (s = 0 ; s < nsuper ; s++)
    {
	j = Super [s+1] - 1 ;	/* last node in supernode s */
	parent = Parent [j] ;	/* parent of last node */
	Sparent [s] = (parent == EMPTY) ? EMPTY : SuperMap [parent] ;
	PRINT1 (("new Sparent ["ID"] = "ID"\n", s, Sparent [s])) ;
    }

    /* ---------------------------------------------------------------------- */
    /* determine the size of L->s and L->x */
    /* ---------------------------------------------------------------------- */

    ssize = 0 ;
    xsize = 0 ;
    xxsize = 0 ;
    find_xsize = for_whom == CHOLMOD_ANALYZE_FOR_CHOLESKY ||
                 for_whom == CHOLMOD_ANALYZE_FOR_SPQRGPU ;
    for (s = 0 ; s < nsuper ; s++)
    {
	nscol = Super [s+1] - Super [s] ;
	nsrow = Snz [s] ;
	ASSERT (nscol > 0) ;
	ssize += nsrow ;
        if (find_xsize)
        {
            xsize += nscol * nsrow ;
            /* also compute xsize in double to guard against Int overflow */
            xxsize += ((double) nscol) * ((double) nsrow) ;
        }
	if (ssize < 0 ||(find_xsize && xxsize > Int_max))
	{
	    /* Int overflow, clear workspace and return.
               QR factorization will not use xxsize, so that error is ignored.
               For Cholesky factorization, however, memory of space xxsize
               will be allocated, so this is a failure.  Both QR and Cholesky
               fail if ssize overflows. */
	    ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
	    FREE_WORKSPACE ;
	    return (FALSE) ;
	}
	ASSERT (ssize > 0) ;
        ASSERT (IMPLIES (find_xsize, xsize > 0)) ;
    }
    xsize = MAX (1, xsize) ;
    ssize = MAX (1, ssize) ;
    PRINT1 (("ix sizes: "ID" "ID" nsuper "ID"\n", ssize, xsize, nsuper)) ;

    /* ---------------------------------------------------------------------- */
    /* allocate L (all except real part L->x) */
    /* ---------------------------------------------------------------------- */

    L->ssize = ssize ;
    L->xsize = xsize ;
    L->nsuper = nsuper ;

    CHOLMOD(change_factor) (CHOLMOD_PATTERN, TRUE, TRUE, TRUE, TRUE, L, Common);

    if (Common->status < CHOLMOD_OK)
    {
	/* out of memory; L is still a valid simplicial symbolic factor */
	FREE_WORKSPACE ;
	return (FALSE) ;
    }

    DEBUG (CHOLMOD(dump_factor) (L, "L to symbolic super", Common)) ;
    ASSERT (L->is_ll && L->xtype == CHOLMOD_PATTERN && L->is_super) ;

    Lpi = L->pi ;
    Lpx = L->px ;
    Ls = L->s ;
    Ls [0] = 0 ;    /* flag for cholmod_check_factor; supernodes are defined */
    Lsuper = L->super ;

    /* copy the list of relaxed supernodes into the final list in L */
    for (s = 0 ; s <= nsuper ; s++)
    {
	Lsuper [s] = Super [s] ;
    }

    /* Head no longer needed as workspace for fundamental Super list ) */

    Super = Lsuper ;	    /* Super is now the list of relaxed supernodes */

    /* ---------------------------------------------------------------------- */
    /* construct column pointers of relaxed supernodal pattern (L->pi) */
    /* ---------------------------------------------------------------------- */

    p = 0 ;
    for (s = 0 ; s < nsuper ; s++)
    {
	Lpi [s] = p ;
	p += Snz [s] ;
	PRINT1 (("Snz ["ID"] = "ID", Super ["ID"] = "ID"\n",
		    s, Snz [s], s, Super[s])) ;
    }
    Lpi [nsuper] = p ;
    ASSERT ((Int) (L->ssize) == MAX (1,p)) ;

    /* ---------------------------------------------------------------------- */
    /* construct pointers for supernodal values (L->px) */
    /* ---------------------------------------------------------------------- */

    if (for_whom == CHOLMOD_ANALYZE_FOR_CHOLESKY ||
        for_whom == CHOLMOD_ANALYZE_FOR_SPQRGPU)
    {
        Lpx [0] = 0 ;
        p = 0 ;
        for (s = 0 ; s < nsuper ; s++)
        {
            nscol = Super [s+1] - Super [s] ;   /* number of columns in s */
            nsrow = Snz [s] ;           /* # of rows, incl triangular part*/
            Lpx [s] = p ;               /* pointer to numerical part of s */
            p += nscol * nsrow ;
        }
        Lpx [s] = p ;
        ASSERT ((Int) (L->xsize) == MAX (1,p)) ;
    }
    else
    {
        /* L->px is not needed for non-GPU accelerated QR factorization (it may
         * lead to Int overflow, anyway, if xsize caused Int overflow above).
         * Use a magic number to tell cholmod_check_factor to ignore Lpx. */
        Lpx [0] = 123456 ;
    }

    /* Snz no longer needed ] */

    /* ---------------------------------------------------------------------- */
    /* symbolic analysis to construct the relaxed supernodal pattern (L->s) */
    /* ---------------------------------------------------------------------- */

    Lpi2 = Wi ;	    /* copy Lpi into Lpi2, using Wi as workspace for Lpi2 [ */
    for (s = 0 ; s < nsuper ; s++)
    {
	Lpi2 [s] = Lpi [s] ;
    }

    Asorted = A->sorted ;

    for (s = 0 ; s < nsuper ; s++)
    {
	/* sth supernode is in columns k1 to k2-1.
	 * compute nonzero pattern of L (k1:k2-1,:). */

	/* place rows k1 to k2-1 in leading column of supernode s */
	k1 = Super [s] ;
	k2 = Super [s+1] ;
	PRINT1 (("=========>>> Supernode "ID" k1 "ID" k2-1 "ID"\n",
		    s, k1, k2-1)) ;
	for (k = k1 ; k < k2 ; k++)
	{
	    Ls [Lpi2 [s]++] = k ;
	}

	/* compute nonzero pattern each row k1 to k2-1 */
	for (k = k1 ; k < k2 ; k++)
	{
	    /* compute row k of L.  In the symmetric case, the pattern of L(k,:)
	     * is the set of nodes reachable in the supernodal etree from any
	     * row i in the nonzero pattern of A(0:k,k).  In the unsymmetric
	     * case, the pattern of the kth column of A*A' is the set union
	     * of all columns A(0:k,j) for each nonzero F(j,k). */

	    /* clear the Flag array and mark the current supernode */
	    /* mark = CHOLMOD(clear_flag) (Common) ; */
	    CHOLMOD_CLEAR_FLAG (Common) ;
	    mark = Common->mark ;
	    Flag [s] = mark ;
	    ASSERT (s == SuperMap [k]) ;

	    /* traverse the row subtree for each nonzero in A or AA' */
	    if (stype != 0)
	    {
		subtree (k, k, Ap, Ai, Anz, SuperMap, Sparent, mark,
                        Asorted, k1, Flag, Ls, Lpi2) ;
	    }
	    else
	    {
		/* for each j nonzero in F (:,k) do */
		p = Fp [k] ;
		pend = (packed) ? (Fp [k+1]) : (p + Fnz [k]) ;
		for ( ; p < pend ; p++)
		{
		    subtree (Fj [p], k, Ap, Ai, Anz, SuperMap, Sparent, mark,
			    Asorted, k1, Flag, Ls, Lpi2) ;
		}
	    }
	}
    }
#ifndef NDEBUG
    for (s = 0 ; s < nsuper ; s++)
    {
	PRINT1 (("Lpi2[s] "ID" Lpi[s+1] "ID"\n", Lpi2 [s], Lpi [s+1])) ;
	ASSERT (Lpi2 [s] == Lpi [s+1]) ;
	CHOLMOD(dump_super) (s, Super, Lpi, Ls, NULL, NULL, 0, Common) ;
    }
#endif

    /* contents of Wi no longer needed for Lpi2 ] */
    /* Sparent no longer needed ] */

    /* ---------------------------------------------------------------------- */
    /* determine the largest update matrix (L->maxcsize) */
    /* ---------------------------------------------------------------------- */

    /* maxcsize could be determined before L->s is allocated and defined, which
     * would mean that all memory requirements for both the symbolic and numeric
     * factorizations could be computed using O(nnz(A)+O(n)) space.  However, it
     * would require a lot of extra work.  The analysis phase, above, would need
     * to be duplicated, but with Ls not kept; instead, the algorithm would keep
     * track of the current s and slast for each supernode d, and update them
     * when a new row index appears in supernode d.  An alternative would be to
     * do this computation only if the allocation of L->s failed, in which case
     * the following code would be skipped.
     *
     * The csize for a supernode is the size of its largest contribution to
     * a subsequent ancestor supernode.  For example, suppose the rows of #'s
     * in the figure below correspond to the columns of a subsequent supernode,
     * and the dots are the entries in that ancestore.
     *
     *	    c
     *	    c c
     *	    c c c
     *	    x x x
     *	    x x x
     *	    # # #   .
     *	    # # #   . .
     *	    * * *   . .
     *	    * * *   . .
     *	    * * *   . .
     *	            . .
     *
     * Then for this update, the csize is 3-by-2, or 6, because there are 3
     * rows of *'s which is the number of rows in the update, and there are
     * 2 rows of #'s, which is the number columns in the update.  The csize
     * of a supernode is the largest such contribution for any ancestor
     * supernode.  maxcsize, for the whole matrix, has a rough upper bound of
     * the maximum size of any supernode.  This bound is loose, because the
     * the contribution must be less than the size of the ancestor supernodal
     * that it's updating.  maxcsize of a completely dense matrix, with one
     * supernode, is zero.
     *
     * maxesize is the column dimension for the workspace E needed for the
     * solve.  E is of size nrhs-by-maxesize, where the nrhs is the number of
     * columns in the right-hand-side.  The maxesize is the largest esize of
     * any supernode.  The esize of a supernode is the number of row indices
     * it contains, excluding the column indices of the supernode itself.
     * For the following example, esize is 4:
     *
     *	    c
     *	    c c
     *	    c c c
     *	    x x x
     *	    x x x
     *	    x x x
     *	    x x x
     *
     * maxesize can be no bigger than n.
     */

    maxcsize = 1 ;
    maxesize = 1 ;

    /* Do not need to guard csize against Int overflow since xsize is OK. */

    if (for_whom == CHOLMOD_ANALYZE_FOR_CHOLESKY ||
        for_whom == CHOLMOD_ANALYZE_FOR_SPQRGPU)
    {
        /* this is not needed for non-GPU accelerated QR factorization */
        for (d = 0 ; d < nsuper ; d++)
        {
            nscol = Super [d+1] - Super [d] ;
            p = Lpi [d] + nscol ;
            plast = p ;
            pend = Lpi [d+1] ;
            esize = pend - p ;
            maxesize = MAX (maxesize, esize) ;
            slast = (p == pend) ? (EMPTY) : (SuperMap [Ls [p]]) ;
            for ( ; p <= pend ; p++)
            {
                s = (p == pend) ? (EMPTY) : (SuperMap [Ls [p]]) ;
                if (s != slast)
                {
                    /* row i is the start of a new supernode */
                    ndrow1 = p - plast ;
                    ndrow2 = pend - plast ;
                    csize = ndrow2 * ndrow1 ;
                    PRINT1 (("Supernode "ID" ancestor "ID" C: "ID"-by-"ID
                        "  csize "ID"\n", d, slast, ndrow1, ndrow2, csize)) ;
                    maxcsize = MAX (maxcsize, csize) ;
                    plast = p ;
                    slast = s ;
                }
            }
        }
        PRINT1 (("max csize "ID"\n", maxcsize)) ;
    }

    /* Wj no longer needed for SuperMap } */

    L->maxcsize = maxcsize ;
    L->maxesize = maxesize ;
    L->is_super = TRUE ;
    ASSERT (L->xtype == CHOLMOD_PATTERN && L->is_ll) ;

    /* ---------------------------------------------------------------------- */
    /* supernodal symbolic factorization is complete */
    /* ---------------------------------------------------------------------- */

    FREE_WORKSPACE ;
    return (TRUE) ;
}
Ejemplo n.º 4
0
int CHOLMOD(resymbol_noperm)
(
    /* ---- input ---- */
    cholmod_sparse *A,	/* matrix to analyze */
    Int *fset,		/* subset of 0:(A->ncol)-1 */
    size_t fsize,	/* size of fset */
    int pack,		/* if TRUE, pack the columns of L */
    /* ---- in/out --- */
    cholmod_factor *L,	/* factorization, entries pruned on output */
    /* --------------- */
    cholmod_common *Common
)
{
    double *Lx, *Lz ;
    Int i, j, k, row, parent, p, pend, pdest, ncol, apacked, sorted, nrow, nf,
        use_fset, mark, jj, stype, xtype ;
    Int *Ap, *Ai, *Anz, *Li, *Lp, *Lnz, *Flag, *Head, *Link, *Anext, *Iwork ;
    size_t s ;
    int ok = TRUE ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (FALSE) ;
    RETURN_IF_NULL (A, FALSE) ;
    RETURN_IF_NULL (L, FALSE) ;
    RETURN_IF_XTYPE_INVALID (A, CHOLMOD_PATTERN, CHOLMOD_ZOMPLEX, FALSE) ;
    RETURN_IF_XTYPE_INVALID (L, CHOLMOD_REAL, CHOLMOD_ZOMPLEX, FALSE) ;
    ncol = A->ncol ;
    nrow = A->nrow ;
    stype = A->stype ;
    ASSERT (IMPLIES (stype != 0, nrow == ncol)) ;
    if (stype > 0)
    {
        /* symmetric, with upper triangular part, not supported */
        ERROR (CHOLMOD_INVALID, "symmetric upper not supported ") ;
        return (FALSE) ;
    }
    if (L->is_super)
    {
        /* cannot operate on a supernodal or symbolic factorization */
        ERROR (CHOLMOD_INVALID, "cannot operate on supernodal L") ;
        return (FALSE) ;
    }
    if (L->n != A->nrow)
    {
        /* dimensions must agree */
        ERROR (CHOLMOD_INVALID, "A and L dimensions do not match") ;
        return (FALSE) ;
    }
    Common->status = CHOLMOD_OK ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace */
    /* ---------------------------------------------------------------------- */

    /* s = 2*nrow + (stype ? 0 : ncol) */
    s = CHOLMOD(mult_size_t) (nrow, 2, &ok) ;
    if (stype != 0)
    {
        s = CHOLMOD(add_size_t) (s, ncol, &ok) ;
    }
    if (!ok)
    {
        ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
        return (FALSE) ;
    }

    CHOLMOD(allocate_work) (nrow, s, 0, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
        return (FALSE) ;	/* out of memory */
    }
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 0, Common)) ;

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    Ai = A->i ;
    Ap = A->p ;
    Anz = A->nz ;
    apacked = A->packed ;
    sorted = A->sorted ;

    Li = L->i ;
    Lx = L->x ;
    Lz = L->z ;
    Lp = L->p ;
    Lnz = L->nz ;
    xtype = L->xtype ;

    /* If L is monotonic on input, then it can be packed or
     * unpacked on output, depending on the pack input parameter. */

    /* cannot pack a non-monotonic matrix */
    if (!(L->is_monotonic))
    {
        pack = FALSE ;
    }

    ASSERT (L->nzmax >= (size_t) (Lp [L->n])) ;

    pdest = 0 ;

    PRINT1 (("\n\n===================== Resymbol pack %d Apacked %d\n",
             pack, A->packed)) ;
    ASSERT (CHOLMOD(dump_sparse) (A, "ReSymbol A:", Common) >= 0) ;
    DEBUG (CHOLMOD(dump_factor) (L, "ReSymbol initial L (i, x):", Common)) ;

    /* ---------------------------------------------------------------------- */
    /* get workspace */
    /* ---------------------------------------------------------------------- */

    Flag  = Common->Flag ;	/* size nrow */
    Head  = Common->Head ;	/* size nrow+1 */
    Iwork = Common->Iwork ;
    Link  = Iwork ;		/* size nrow (i/i/l) [ */
    Lnz   = Iwork + nrow ;	/* size nrow (i/i/l), if L not packed */
    Anext = Iwork + 2*((size_t) nrow) ;	/* size ncol (i/i/l), unsym. only */
    for (j = 0 ; j < nrow ; j++)
    {
        Link [j] = EMPTY ;
    }

    /* use Lnz in L itself */
    Lnz = L->nz ;
    ASSERT (Lnz != NULL) ;

    /* ---------------------------------------------------------------------- */
    /* for the unsymmetric case, queue each column of A (:,f) */
    /* ---------------------------------------------------------------------- */

    /* place each column of the basis set on the link list corresponding to */
    /* the smallest row index in that column */

    if (stype == 0)
    {
        use_fset = (fset != NULL) ;
        if (use_fset)
        {
            nf = fsize ;
            /* This is the only O(ncol) loop in cholmod_resymbol.
             * It is required only to check the fset. */
            for (j = 0 ; j < ncol ; j++)
            {
                Anext [j] = -2 ;
            }
            for (jj = 0 ; jj < nf ; jj++)
            {
                j = fset [jj] ;
                if (j < 0 || j > ncol || Anext [j] != -2)
                {
                    /* out-of-range or duplicate entry in fset */
                    ERROR (CHOLMOD_INVALID, "fset invalid") ;
                    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 0, Common)) ;
                    return (FALSE) ;
                }
                /* flag column j as having been seen */
                Anext [j] = EMPTY ;
            }
            /* the fset is now valid */
            ASSERT (CHOLMOD(dump_perm) (fset, nf, ncol, "fset", Common)) ;
        }
        else
        {
            nf = ncol ;
        }
        for (jj = 0 ; jj < nf ; jj++)
        {
            j = (use_fset) ? (fset [jj]) : jj ;
            /* column j is the fset; find the smallest row (if any) */
            p = Ap [j] ;
            pend = (apacked) ? (Ap [j+1]) : (p + Anz [j]) ;
            if (pend > p)
            {
                k = Ai [p] ;
                if (!sorted)
                {
                    for ( ; p < pend ; p++)
                    {
                        k = MIN (k, Ai [p]) ;
                    }
                }
                /* place column j on link list k */
                ASSERT (k >= 0 && k < nrow) ;
                Anext [j] = Head [k] ;
                Head [k] = j ;
            }
        }
    }

    /* ---------------------------------------------------------------------- */
    /* recompute symbolic LDL' factorization */
    /* ---------------------------------------------------------------------- */

    for (k = 0 ; k < nrow ; k++)
    {

#ifndef NDEBUG
        PRINT1 (("\n\n================== Initial column k = "ID"\n", k)) ;
        for (p = Lp [k] ; p < Lp [k] + Lnz [k] ; p++)
        {
            PRINT1 ((" row: "ID"  value: ", Li [p])) ;
            PRINT1 (("\n")) ;
        }
        PRINT1 (("Recomputing LDL, column k = "ID"\n", k)) ;
#endif

        /* ------------------------------------------------------------------ */
        /* compute column k of I+F*F' or I+A */
        /* ------------------------------------------------------------------ */

        /* flag the diagonal entry */
        /* mark = CHOLMOD(clear_flag) (Common) ; */
        CHOLMOD_CLEAR_FLAG (Common) ;
        mark = Common->mark ;

        Flag [k] = mark ;
        PRINT1 (("	row: "ID" (diagonal)\n", k)) ;

        if (stype != 0)
        {
            /* merge column k of A into Flag (lower triangular part only) */
            p = Ap [k] ;
            pend = (apacked) ? (Ap [k+1]) : (p + Anz [k]) ;
            for ( ; p < pend ; p++)
            {
                i = Ai [p] ;
                if (i > k)
                {
                    Flag [i] = mark ;
                }
            }
        }
        else
        {
            /* for each column j whos first row index is in row k */
            for (j = Head [k] ; j != EMPTY ; j = Anext [j])
            {
                /* merge column j of A into Flag */
                PRINT1 (("	---- A column "ID"\n", j)) ;
                p = Ap [j] ;
                pend = (apacked) ? (Ap [j+1]) : (p + Anz [j]) ;
                PRINT1 (("  length "ID"  adding\n", pend-p)) ;
                for ( ; p < pend ; p++)
                {
#ifndef NDEBUG
                    ASSERT (Ai [p] >= k && Ai [p] < nrow) ;
                    if (Flag [Ai [p]] < mark) PRINT1 ((" row "ID"\n", Ai [p])) ;
#endif
                    Flag [Ai [p]] = mark ;
                }
            }
            /* clear the kth link list */
            Head [k] = EMPTY ;
        }

        /* ------------------------------------------------------------------ */
        /* compute pruned pattern of kth column of L = union of children */
        /* ------------------------------------------------------------------ */

        /* for each column j of L whose parent is k */
        for (j = Link [k] ; j != EMPTY ; j = Link [j])
        {
            /* merge column j of L into Flag */
            PRINT1 (("	---- L column "ID"\n", k)) ;
            ASSERT (j < k) ;
            ASSERT (Lnz [j] > 0) ;
            p = Lp [j] ;
            pend = p + Lnz [j] ;
            ASSERT (Li [p] == j && Li [p+1] == k) ;
            p++ ;	    /* skip past the diagonal entry */
            for ( ; p < pend ; p++)
            {
                /* add to pattern */
                ASSERT (Li [p] >= k && Li [p] < nrow) ;
                Flag [Li [p]] = mark ;
            }
        }

        /* ------------------------------------------------------------------ */
        /* prune the kth column of L */
        /* ------------------------------------------------------------------ */

        PRINT1 (("Final column of L:\n")) ;
        p = Lp [k] ;
        pend = p + Lnz [k] ;

        if (pack)
        {
            /* shift column k upwards */
            Lp [k] = pdest ;
        }
        else
        {
            /* leave column k in place, just reduce Lnz [k] */
            pdest = p ;
        }

        for ( ; p < pend ; p++)
        {
            ASSERT (pdest < pend) ;
            ASSERT (pdest <= p) ;
            row = Li [p] ;
            ASSERT (row >= k && row < nrow) ;
            if (Flag [row] == mark)
            {
                /* keep this entry */
                Li [pdest] = row ;
                if (xtype == CHOLMOD_REAL)
                {
                    Lx [pdest] = Lx [p] ;
                }
                else if (xtype == CHOLMOD_COMPLEX)
                {
                    Lx [2*pdest  ] = Lx [2*p  ] ;
                    Lx [2*pdest+1] = Lx [2*p+1] ;
                }
                else if (xtype == CHOLMOD_ZOMPLEX)
                {
                    Lx [pdest] = Lx [p] ;
                    Lz [pdest] = Lz [p] ;
                }
                pdest++ ;
            }
        }

        /* ------------------------------------------------------------------ */
        /* prepare this column for its parent */
        /* ------------------------------------------------------------------ */

        Lnz [k] = pdest - Lp [k] ;

        PRINT1 ((" L("ID") length "ID"\n", k, Lnz [k])) ;
        ASSERT (Lnz [k] > 0) ;

        /* parent is the first entry in the column after the diagonal */
        parent = (Lnz [k] > 1) ? (Li [Lp [k] + 1]) : EMPTY ;

        PRINT1 (("parent ("ID") = "ID"\n", k, parent)) ;
        ASSERT ((parent > k && parent < nrow) || (parent == EMPTY)) ;

        if (parent != EMPTY)
        {
            Link [k] = Link [parent] ;
            Link [parent] = k ;
        }
    }

    /* done using Iwork for Link, Lnz (if needed), and Anext ] */

    /* ---------------------------------------------------------------------- */
    /* convert L to packed, if requested */
    /* ---------------------------------------------------------------------- */

    if (pack)
    {
        /* finalize Lp */
        Lp [nrow] = pdest ;
        /* Shrink L to be just large enough.  It cannot fail. */
        /* workspace: none */
        ASSERT ((size_t) (Lp [nrow]) <= L->nzmax) ;
        CHOLMOD(reallocate_factor) (Lp [nrow], L, Common) ;
        ASSERT (Common->status >= CHOLMOD_OK) ;
    }

    /* ---------------------------------------------------------------------- */
    /* clear workspace */
    /* ---------------------------------------------------------------------- */

    /* CHOLMOD(clear_flag) (Common) ; */
    CHOLMOD_CLEAR_FLAG (Common) ;

    DEBUG (CHOLMOD(dump_factor) (L, "ReSymbol final L (i, x):", Common)) ;
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 0, Common)) ;
    return (TRUE) ;
}
Ejemplo n.º 5
0
cholmod_sparse *CHOLMOD(add)
(
    /* ---- input ---- */
    cholmod_sparse *A,	    /* matrix to add */
    cholmod_sparse *B,	    /* matrix to add */
    double alpha [2],	    /* scale factor for A */
    double beta [2],	    /* scale factor for B */
    int values,		    /* if TRUE compute the numerical values of C */
    int sorted,		    /* if TRUE, sort columns of C */
    /* --------------- */
    cholmod_common *Common
)
{
    double *Ax, *Bx, *Cx, *W ;
    Int apacked, up, lo, nrow, ncol, bpacked, nzmax, pa, paend, pb, pbend, i,
	j, p, mark, nz ;
    Int *Ap, *Ai, *Anz, *Bp, *Bi, *Bnz, *Flag, *Cp, *Ci ;
    cholmod_sparse *A2, *B2, *C ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (NULL) ;
    RETURN_IF_NULL (A, NULL) ;
    RETURN_IF_NULL (B, NULL) ;
    values = values &&
	(A->xtype != CHOLMOD_PATTERN) && (B->xtype != CHOLMOD_PATTERN) ;
    RETURN_IF_XTYPE_INVALID (A, CHOLMOD_PATTERN,
	    values ? CHOLMOD_REAL : CHOLMOD_ZOMPLEX, NULL) ;
    RETURN_IF_XTYPE_INVALID (B, CHOLMOD_PATTERN,
	    values ? CHOLMOD_REAL : CHOLMOD_ZOMPLEX, NULL) ;
    if (A->nrow != B->nrow || A->ncol != B->ncol)
    {
	/* A and B must have the same dimensions */
	ERROR (CHOLMOD_INVALID, "A and B dimesions do not match") ;
	return (NULL) ;
    }
    /* A and B must have the same numerical type if values is TRUE (both must
     * be CHOLMOD_REAL, this is implicitly checked above) */

    Common->status = CHOLMOD_OK ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace */
    /* ---------------------------------------------------------------------- */

    nrow = A->nrow ;
    ncol = A->ncol ;
    CHOLMOD(allocate_work) (nrow, MAX (nrow,ncol), values ? nrow : 0, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
	return (NULL) ;	    /* out of memory */
    }

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    if (nrow <= 1)
    {
	/* C will be implicitly sorted, so no need to sort it here */
	sorted = FALSE ;
    }

    /* convert A or B to unsymmetric, if necessary */
    A2 = NULL ;
    B2 = NULL ;

    if (A->stype != B->stype)
    {
	if (A->stype)
	{
	    /* workspace: Iwork (max (nrow,ncol)) */
	    A2 = CHOLMOD(copy) (A, 0, values, Common) ;
	    if (Common->status < CHOLMOD_OK)
	    {
		return (NULL) ;	    /* out of memory */
	    }
	    A = A2 ;
	}
	if (B->stype)
	{
	    /* workspace: Iwork (max (nrow,ncol)) */
	    B2 = CHOLMOD(copy) (B, 0, values, Common) ;
	    if (Common->status < CHOLMOD_OK)
	    {
		CHOLMOD(free_sparse) (&A2, Common) ;
		return (NULL) ;	    /* out of memory */
	    }
	    B = B2 ;
	}
    }

    /* get the A matrix */
    ASSERT (A->stype == B->stype) ;
    up = (A->stype > 0) ;
    lo = (A->stype < 0) ;

    Ap  = A->p ;
    Anz = A->nz ;
    Ai  = A->i ;
    Ax  = A->x ;
    apacked = A->packed ;

    /* get the B matrix */
    Bp  = B->p ;
    Bnz = B->nz ;
    Bi  = B->i ;
    Bx  = B->x ;
    bpacked = B->packed ;

    /* get workspace */
    W = Common->Xwork ;	    /* size nrow, used if values is TRUE */
    Flag = Common->Flag ;   /* size nrow, Flag [0..nrow-1] < mark on input */

    /* ---------------------------------------------------------------------- */
    /* allocate the result C */
    /* ---------------------------------------------------------------------- */

    /* If integer overflow occurs, nzmax < 0 and the allocate fails properly
     * (likewise in most other matrix manipulation routines). */

    nzmax = CHOLMOD(nnz) (A, Common) + CHOLMOD(nnz) (B, Common) ;

    C = CHOLMOD(allocate_sparse) (nrow, ncol, nzmax, FALSE, TRUE,
	    SIGN (A->stype), values ? A->xtype : CHOLMOD_PATTERN, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
	CHOLMOD(free_sparse) (&A2, Common) ;
	CHOLMOD(free_sparse) (&B2, Common) ;
	return (NULL) ;	    /* out of memory */
    }

    Cp = C->p ;
    Ci = C->i ;
    Cx = C->x ;

    /* ---------------------------------------------------------------------- */
    /* compute C = alpha*A + beta*B */
    /* ---------------------------------------------------------------------- */

    nz = 0 ;
    for (j = 0 ; j < ncol ; j++)
    {
	Cp [j] = nz ;

	/* clear the Flag array */
	/* mark = CHOLMOD(clear_flag) (Common) ; */
	CHOLMOD_CLEAR_FLAG (Common) ;
	mark = Common->mark ;

	/* scatter B into W */
	pb = Bp [j] ;
	pbend = (bpacked) ? (Bp [j+1]) : (pb + Bnz [j]) ;
	for (p = pb ; p < pbend ; p++)
	{
	    i = Bi [p] ;
	    if ((up && i > j) || (lo && i < j))
	    {
		continue ;
	    }
	    Flag [i] = mark ;
	    if (values)
	    {
		W [i] = beta [0] * Bx [p] ;
	    }
	}

	/* add A and gather from W into C(:,j) */
	pa = Ap [j] ;
	paend = (apacked) ? (Ap [j+1]) : (pa + Anz [j]) ;
	for (p = pa ; p < paend ; p++)
	{
	    i = Ai [p] ;
	    if ((up && i > j) || (lo && i < j))
	    {
		continue ;
	    }
	    Flag [i] = EMPTY ;
	    Ci [nz] = i ;
	    if (values)
	    {
		Cx [nz] = W [i] + alpha [0] * Ax [p] ;
		W [i] = 0 ;
	    }
	    nz++ ;
	}

	/* gather remaining entries into C(:,j), using pattern of B */
	for (p = pb ; p < pbend ; p++)
	{
	    i = Bi [p] ;
	    if ((up && i > j) || (lo && i < j))
	    {
		continue ;
	    }
	    if (Flag [i] == mark)
	    {
		Ci [nz] = i ;
		if (values)
		{
		    Cx [nz] = W [i] ;
		    W [i] = 0 ;
		}
		nz++ ;
	    }
	}
    }

    Cp [ncol] = nz ;

    /* ---------------------------------------------------------------------- */
    /* reduce C in size and free temporary matrices */
    /* ---------------------------------------------------------------------- */

    ASSERT (MAX (1,nz) <= C->nzmax) ;
    CHOLMOD(reallocate_sparse) (nz, C, Common) ;
    ASSERT (Common->status >= CHOLMOD_OK) ;

    /* clear the Flag array */
    mark = CHOLMOD(clear_flag) (Common) ;

    CHOLMOD(free_sparse) (&A2, Common) ;
    CHOLMOD(free_sparse) (&B2, Common) ;

    /* ---------------------------------------------------------------------- */
    /* sort C, if requested */
    /* ---------------------------------------------------------------------- */

    if (sorted)
    {
	/* workspace: Iwork (max (nrow,ncol)) */
	if (!CHOLMOD(sort) (C, Common))
	{
	    CHOLMOD(free_sparse) (&C, Common) ;
	    if (Common->status < CHOLMOD_OK)
	    {
		return (NULL) ;		/* out of memory */
	    }
	}
    }

    /* ---------------------------------------------------------------------- */
    /* return result */
    /* ---------------------------------------------------------------------- */

    ASSERT (CHOLMOD(dump_sparse) (C, "add", Common) >= 0) ;
    return (C) ;
}
Ejemplo n.º 6
0
cholmod_sparse *CHOLMOD(ssmult)
(
    /* ---- input ---- */
    cholmod_sparse *A,	/* left matrix to multiply */
    cholmod_sparse *B,	/* right matrix to multiply */
    int stype,		/* requested stype of C */
    int values,		/* TRUE: do numerical values, FALSE: pattern only */
    int sorted,		/* if TRUE then return C with sorted columns */
    /* --------------- */
    cholmod_common *Common
)
{
    double bjt ;
    double *Ax, *Bx, *Cx, *W ;
    Int *Ap, *Anz, *Ai, *Bp, *Bnz, *Bi, *Cp, *Ci, *Flag ;
    cholmod_sparse *C, *A2, *B2, *A3, *B3, *C2 ;
    Int apacked, bpacked, j, i, pa, paend, pb, pbend, ncol, mark, cnz, t, p,
        nrow, anz, bnz, do_swap_and_transpose, n1, n2 ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (NULL) ;
    RETURN_IF_NULL (A, NULL) ;
    RETURN_IF_NULL (B, NULL) ;
    values = values &&
             (A->xtype != CHOLMOD_PATTERN) && (B->xtype != CHOLMOD_PATTERN) ;
    RETURN_IF_XTYPE_INVALID (A, CHOLMOD_PATTERN,
                             values ? CHOLMOD_REAL : CHOLMOD_ZOMPLEX, NULL) ;
    RETURN_IF_XTYPE_INVALID (B, CHOLMOD_PATTERN,
                             values ? CHOLMOD_REAL : CHOLMOD_ZOMPLEX, NULL) ;
    if (A->ncol != B->nrow)
    {
        /* inner dimensions must agree */
        ERROR (CHOLMOD_INVALID, "A and B inner dimensions must match") ;
        return (NULL) ;
    }
    /* A and B must have the same numerical type if values is TRUE (both must
     * be CHOLMOD_REAL, this is implicitly checked above) */
    Common->status = CHOLMOD_OK ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace */
    /* ---------------------------------------------------------------------- */

    if (A->nrow <= 1)
    {
        /* C will be implicitly sorted, so no need to sort it here */
        sorted = FALSE ;
    }
    if (sorted)
    {
        n1 = MAX (A->nrow, B->ncol) ;
    }
    else
    {
        n1 = A->nrow ;
    }
    n2 = MAX4 (A->ncol, A->nrow, B->nrow, B->ncol) ;
    CHOLMOD(allocate_work) (n1, n2, values ? n1 : 0, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
        /* out of memory */
        return (NULL) ;
    }
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, values ? n1 : 0, Common)) ;

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    /* convert A to unsymmetric, if necessary */
    A2 = NULL ;
    B2 = NULL ;
    if (A->stype)
    {
        /* workspace: Iwork (max (A->nrow,A->ncol)) */
        A2 = CHOLMOD(copy) (A, 0, values, Common) ;
        if (Common->status < CHOLMOD_OK)
        {
            /* out of memory */
            ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, values ? n1:0, Common)) ;
            return (NULL) ;
        }
        A = A2 ;
    }

    /* convert B to unsymmetric, if necessary */
    if (B->stype)
    {
        /* workspace: Iwork (max (B->nrow,B->ncol)) */
        B2 = CHOLMOD(copy) (B, 0, values, Common) ;
        if (Common->status < CHOLMOD_OK)
        {
            /* out of memory */
            CHOLMOD(free_sparse) (&A2, Common) ;
            ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, values ? n1:0, Common)) ;
            return (NULL) ;
        }
        B = B2 ;
    }

    ASSERT (CHOLMOD(dump_sparse) (A, "A", Common) >= 0) ;
    ASSERT (CHOLMOD(dump_sparse) (B, "B", Common) >= 0) ;

    /* get the A matrix */
    Ap  = A->p ;
    Anz = A->nz ;
    Ai  = A->i ;
    Ax  = A->x ;
    apacked = A->packed ;

    /* get the B matrix */
    Bp  = B->p ;
    Bnz = B->nz ;
    Bi  = B->i ;
    Bx  = B->x ;
    bpacked = B->packed ;

    /* get the size of C */
    nrow = A->nrow ;
    ncol = B->ncol ;

    /* get workspace */
    W = Common->Xwork ;		/* size nrow, unused if values is FALSE */
    Flag = Common->Flag ;	/* size nrow, Flag [0..nrow-1] < mark on input*/

    /* ---------------------------------------------------------------------- */
    /* count the number of entries in the result C */
    /* ---------------------------------------------------------------------- */

    cnz = 0 ;
    for (j = 0 ; j < ncol ; j++)
    {
        /* clear the Flag array */
        /* mark = CHOLMOD(clear_flag) (Common) ; */
        CHOLMOD_CLEAR_FLAG (Common) ;
        mark = Common->mark ;

        /* for each nonzero B(t,j) in column j, do: */
        pb = Bp [j] ;
        pbend = (bpacked) ? (Bp [j+1]) : (pb + Bnz [j]) ;
        for ( ; pb < pbend ; pb++)
        {
            /* B(t,j) is nonzero */
            t = Bi [pb] ;

            /* add the nonzero pattern of A(:,t) to the pattern of C(:,j) */
            pa = Ap [t] ;
            paend = (apacked) ? (Ap [t+1]) : (pa + Anz [t]) ;
            for ( ; pa < paend ; pa++)
            {
                i = Ai [pa] ;
                if (Flag [i] != mark)
                {
                    Flag [i] = mark ;
                    cnz++ ;
                }
            }
        }
        if (cnz < 0)
        {
            break ;	    /* integer overflow case */
        }
    }

    /* mark = CHOLMOD(clear_flag) (Common) ; */
    CHOLMOD_CLEAR_FLAG (Common) ;
    mark = Common->mark ;

    /* ---------------------------------------------------------------------- */
    /* check for integer overflow */
    /* ---------------------------------------------------------------------- */

    if (cnz < 0)
    {
        ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
        CHOLMOD(free_sparse) (&A2, Common) ;
        CHOLMOD(free_sparse) (&B2, Common) ;
        ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, values ? n1:0, Common)) ;
        return (NULL) ;
    }

    /* ---------------------------------------------------------------------- */
    /* Determine how to return C sorted (if requested) */
    /* ---------------------------------------------------------------------- */

    do_swap_and_transpose = FALSE ;

    if (sorted)
    {
        /* Determine the best way to return C with sorted columns.  Computing
         * C = (B'*A')' takes cnz + anz + bnz time (ignoring O(n) terms).
         * Sorting C when done, C = (A*B)'', takes 2*cnz time.  Pick the one
         * with the least amount of work. */

        anz = CHOLMOD(nnz) (A, Common) ;
        bnz = CHOLMOD(nnz) (B, Common) ;

        do_swap_and_transpose = (anz + bnz < cnz) ;

        if (do_swap_and_transpose)
        {

            /* -------------------------------------------------------------- */
            /* C = (B'*A')' */
            /* -------------------------------------------------------------- */

            /* workspace: Iwork (A->nrow) */
            A3 = CHOLMOD(ptranspose) (A, values, NULL, NULL, 0, Common) ;
            CHOLMOD(free_sparse) (&A2, Common) ;
            A2 = A3 ;
            if (Common->status < CHOLMOD_OK)
            {
                /* out of memory */
                CHOLMOD(free_sparse) (&A2, Common) ;
                CHOLMOD(free_sparse) (&B2, Common) ;
                ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, values ? n1:0, Common));
                return (NULL) ;
            }
            /* workspace: Iwork (B->nrow) */
            B3 = CHOLMOD(ptranspose) (B, values, NULL, NULL, 0, Common) ;
            CHOLMOD(free_sparse) (&B2, Common) ;
            B2 = B3 ;
            if (Common->status < CHOLMOD_OK)
            {
                /* out of memory */
                CHOLMOD(free_sparse) (&A2, Common) ;
                CHOLMOD(free_sparse) (&B2, Common) ;
                ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, values ? n1:0, Common));
                return (NULL) ;
            }
            A = B2 ;
            B = A2 ;

            /* get the new A matrix */
            Ap  = A->p ;
            Anz = A->nz ;
            Ai  = A->i ;
            Ax  = A->x ;
            apacked = A->packed ;

            /* get the new B matrix */
            Bp  = B->p ;
            Bnz = B->nz ;
            Bi  = B->i ;
            Bx  = B->x ;
            bpacked = B->packed ;

            /* get the size of C' */
            nrow = A->nrow ;
            ncol = B->ncol ;
        }
    }

    /* ---------------------------------------------------------------------- */
    /* allocate C */
    /* ---------------------------------------------------------------------- */

    C = CHOLMOD(allocate_sparse) (nrow, ncol, cnz, FALSE, TRUE, 0,
                                  values ? A->xtype : CHOLMOD_PATTERN, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
        /* out of memory */
        CHOLMOD(free_sparse) (&A2, Common) ;
        CHOLMOD(free_sparse) (&B2, Common) ;
        ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, values ? n1:0, Common)) ;
        return (NULL) ;
    }

    Cp = C->p ;
    Ci = C->i ;
    Cx = C->x ;

    /* ---------------------------------------------------------------------- */
    /* C = A*B */
    /* ---------------------------------------------------------------------- */

    cnz = 0 ;

    if (values)
    {

        /* pattern and values */
        for (j = 0 ; j < ncol ; j++)
        {
            /* clear the Flag array */
            /* mark = CHOLMOD(clear_flag (Common)) ; */
            CHOLMOD_CLEAR_FLAG (Common) ;
            mark = Common->mark ;

            /* start column j of C */
            Cp [j] = cnz ;

            /* for each nonzero B(t,j) in column j, do: */
            pb = Bp [j] ;
            pbend = (bpacked) ? (Bp [j+1]) : (pb + Bnz [j]) ;
            for ( ; pb < pbend ; pb++)
            {
                /* B(t,j) is nonzero */
                t = Bi [pb] ;
                bjt = Bx [pb] ;

                /* add the nonzero pattern of A(:,t) to the pattern of C(:,j)
                 * and scatter the values into W */
                pa = Ap [t] ;
                paend = (apacked) ? (Ap [t+1]) : (pa + Anz [t]) ;
                for ( ; pa < paend ; pa++)
                {
                    i = Ai [pa] ;
                    if (Flag [i] != mark)
                    {
                        Flag [i] = mark ;
                        Ci [cnz++] = i ;
                    }
                    W [i] += Ax [pa] * bjt ;
                }
            }

            /* gather the values into C(:,j) */
            for (p = Cp [j] ; p < cnz ; p++)
            {
                i = Ci [p] ;
                Cx [p] = W [i] ;
                W [i] = 0 ;
            }
        }

    }
    else
    {

        /* pattern only */
        for (j = 0 ; j < ncol ; j++)
        {
            /* clear the Flag array */
            /* mark = CHOLMOD(clear_flag) (Common) ; */
            CHOLMOD_CLEAR_FLAG (Common) ;
            mark = Common->mark ;

            /* start column j of C */
            Cp [j] = cnz ;

            /* for each nonzero B(t,j) in column j, do: */
            pb = Bp [j] ;
            pbend = (bpacked) ? (Bp [j+1]) : (pb + Bnz [j]) ;
            for ( ; pb < pbend ; pb++)
            {
                /* B(t,j) is nonzero */
                t = Bi [pb] ;

                /* add the nonzero pattern of A(:,t) to the pattern of C(:,j) */
                pa = Ap [t] ;
                paend = (apacked) ? (Ap [t+1]) : (pa + Anz [t]) ;
                for ( ; pa < paend ; pa++)
                {
                    i = Ai [pa] ;
                    if (Flag [i] != mark)
                    {
                        Flag [i] = mark ;
                        Ci [cnz++] = i ;
                    }
                }
            }
        }
    }

    Cp [ncol] = cnz ;
    ASSERT (MAX (1,cnz) == C->nzmax) ;

    /* ---------------------------------------------------------------------- */
    /* clear workspace and free temporary matrices */
    /* ---------------------------------------------------------------------- */

    CHOLMOD(free_sparse) (&A2, Common) ;
    CHOLMOD(free_sparse) (&B2, Common) ;
    /* CHOLMOD(clear_flag) (Common) ; */
    CHOLMOD_CLEAR_FLAG (Common) ;
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, values ? n1:0, Common)) ;

    /* ---------------------------------------------------------------------- */
    /* convert C to a symmetric upper/lower matrix if requested */
    /* ---------------------------------------------------------------------- */

    /* convert C in place, which cannot fail since no memory is allocated */
    if (stype > 0)
    {
        /* C = triu (C), in place */
        (void) CHOLMOD(band_inplace) (0, ncol, values, C, Common) ;
        C->stype = 1 ;
    }
    else if (stype < 0)
    {
        /* C = tril (C), in place */
        (void) CHOLMOD(band_inplace) (-nrow, 0, values, C, Common) ;
        C->stype = -1 ;
    }
    ASSERT (Common->status >= CHOLMOD_OK) ;

    /* ---------------------------------------------------------------------- */
    /* sort C, if requested */
    /* ---------------------------------------------------------------------- */

    if (sorted)
    {
        if (do_swap_and_transpose)
        {
            /* workspace: Iwork (C->ncol), which is A->nrow since C=(B'*A') */
            C2 = CHOLMOD(ptranspose) (C, values, NULL, NULL, 0, Common) ;
            CHOLMOD(free_sparse) (&C, Common) ;
            if (Common->status < CHOLMOD_OK)
            {
                /* out of memory */
                ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, values ? n1:0, Common));
                return (NULL) ;
            }
            C = C2 ;
        }
        else
        {
            /* workspace: Iwork (max (C->nrow,C->ncol)) */
            if (!CHOLMOD(sort) (C, Common))
            {
                /* out of memory */
                CHOLMOD(free_sparse) (&C, Common) ;
                ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, values ? n1:0, Common));
                return (NULL) ;
            }
        }
    }

    /* ---------------------------------------------------------------------- */
    /* return result */
    /* ---------------------------------------------------------------------- */

    DEBUG (CHOLMOD(dump_sparse) (C, "ssmult", Common) >= 0) ;
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, values ? n1:0, Common)) ;
    return (C) ;
}
Ejemplo n.º 7
0
int CHOLMOD(rowadd_mark)
(
    /* ---- input ---- */
    size_t kadd,	/* row/column index to add */
    cholmod_sparse *R,	/* row/column of matrix to factorize (n-by-1) */
    double bk [2],	/* kth entry of the right hand side, b */
    Int *colmark,	/* Int array of size 1.  See cholmod_updown.c */
    /* ---- in/out --- */
    cholmod_factor *L,	/* factor to modify */
    cholmod_dense *X,	/* solution to Lx=b (size n-by-1) */
    cholmod_dense *DeltaB,  /* change in b, zero on output */
    /* --------------- */
    cholmod_common *Common
)
{
    double dk, yj, l_kj, lx, l_ij, sqrt_dk, dj, xk, rnz, fl ;
    double *Lx, *W, *Cx, *Rx, *Xx, *Nx ;
    Int *Li, *Lp, *Lnz, *Flag, *Stack, *Ci, *Rj, *Rp, *Lnext, *Iwork, *Rnz ;
    cholmod_sparse *C, Cmatrix ;
    Int i, j, p, pend, top, len, kk, li, lnz, mark, k, n, parent, Cp [2],
	do_solve, do_update ;
    size_t s ;
    int ok = TRUE ;
    DEBUG (Int lastrow) ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (FALSE) ;
    RETURN_IF_NULL (L, FALSE) ;
    RETURN_IF_NULL (R, FALSE) ;
    RETURN_IF_XTYPE_INVALID (L, CHOLMOD_PATTERN, CHOLMOD_REAL, FALSE) ;
    RETURN_IF_XTYPE_INVALID (R, CHOLMOD_REAL, CHOLMOD_REAL, FALSE) ;
    n = L->n ;
    k = kadd ;
    if (kadd >= L->n || k < 0)
    {
	ERROR (CHOLMOD_INVALID, "k invalid") ;
	return (FALSE) ;
    }
    if (R->ncol != 1 || R->nrow != L->n)
    {
	ERROR (CHOLMOD_INVALID, "R invalid") ;
	return (FALSE) ;
    }
    Rj = R->i ;
    Rx = R->x ;
    Rp = R->p ;
    Rnz = R->nz ;
    rnz = (R->packed) ? (Rp [1]) : (Rnz [0]) ;
    do_solve = (X != NULL) && (DeltaB != NULL) ;
    if (do_solve)
    {
	RETURN_IF_XTYPE_INVALID (X, CHOLMOD_REAL, CHOLMOD_REAL, FALSE) ;
	RETURN_IF_XTYPE_INVALID (DeltaB, CHOLMOD_REAL, CHOLMOD_REAL, FALSE) ;
	Xx = X->x ;
	Nx = DeltaB->x ;
	if (X->nrow != L->n || X->ncol != 1 || DeltaB->nrow != L->n ||
		DeltaB->ncol != 1 || Xx == NULL || Nx == NULL)
	{
	    ERROR (CHOLMOD_INVALID, "X and/or DeltaB invalid") ;
	    return (FALSE) ;
	}
    }
    else
    {
	Xx = NULL ;
	Nx = NULL ;
    }
    Common->status = CHOLMOD_OK ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace */
    /* ---------------------------------------------------------------------- */

    /* s = 2*n */
    s = CHOLMOD(mult_size_t) (n, 2, &ok) ;
    if (!ok)
    {
	ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
	return (FALSE) ;
    }

    CHOLMOD(allocate_work) (n, s, s, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
	return (FALSE) ;
    }
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, s, Common)) ;

    /* ---------------------------------------------------------------------- */
    /* convert to simplicial numeric LDL' factor, if not already */
    /* ---------------------------------------------------------------------- */

    if (L->xtype == CHOLMOD_PATTERN || L->is_super || L->is_ll) 
    {
	/* can only update/downdate a simplicial LDL' factorization */
	CHOLMOD(change_factor) (CHOLMOD_REAL, FALSE, FALSE, FALSE, FALSE, L,
		Common) ;
	if (Common->status < CHOLMOD_OK)
	{
	    /* out of memory, L is returned unchanged */
	    return (FALSE) ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    /* inputs, not modified on output: */
    Lp = L->p ;		/* size n+1.  input, not modified on output */

    /* outputs, contents defined on input for incremental case only: */
    Lnz = L->nz ;	/* size n */
    Li = L->i ;		/* size L->nzmax.  Can change in size. */
    Lx = L->x ;		/* size L->nzmax.  Can change in size. */
    Lnext = L->next ;	/* size n+2 */

    ASSERT (L->nz != NULL) ;

    PRINT1 (("rowadd:\n")) ;
    fl = 0 ;

#if 0
#ifndef NDEBUG
    /* column k of L should be zero, except for the diagonal.  This test is
     * overly cautious. */
    for (p = Lp [k] + 1 ; p < Lp [k] + Lnz [k] ; p++) ASSERT (Lx [p] == 0) ;
#endif
#endif

    /* ---------------------------------------------------------------------- */
    /* get workspace */
    /* ---------------------------------------------------------------------- */

    Flag = Common->Flag ;   /* size n */
    W = Common->Xwork ;     /* size n */
    Cx = W + n ;	    /* size n (use 2nd column of Xwork for C) */
    Iwork = Common->Iwork ;
    Stack = Iwork ;	    /* size n (i/i/l), also in cholmod_updown */
    Ci = Iwork + n ;	    /* size n (i/i/l) */
    /* NOTE: cholmod_updown uses Iwork [0..n-1] (i/i/l) as Stack as well */

    mark = Common->mark ;

    /* copy Rj/Rx into W/Ci */
    for (p = 0 ; p < rnz ; p++)
    {
	i = Rj [p] ;
	ASSERT (i >= 0 && i < n) ;
	W [i] = Rx [p] ;
	Ci [p] = i ;
    }

    /* At this point, W [Ci [0..rnz-1]] holds the sparse vector to add */
    /* The nonzero pattern of column W is held in Ci (it may be unsorted). */

    /* ---------------------------------------------------------------------- */
    /* symbolic factorization to get pattern of kth row of L */
    /* ---------------------------------------------------------------------- */

    DEBUG (for (p = 0 ; p < rnz ; p++)
	    PRINT1 (("C ("ID",%g)\n", Ci [p], W [Ci [p]]))) ;
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 0, Common)) ;

    /* flag the diagonal */
    Flag [k] = mark ;

    /* find the union of all the paths */
    top = n ;
    lnz = 0 ;	/* # of nonzeros in column k of L, excluding diagonal */
    for (p = 0 ; p < rnz ; p++)
    {
	i = Ci [p] ;

	if (i < k)
	{

	    /* walk from i = entry in Ci to root (and stop if i marked)*/
	    PRINT2 (("\nwalk from i = "ID" towards k = "ID"\n", i, k)) ;
	    len = 0 ;

	    /* walk up tree, but stop if we go below the diagonal */
	    while (i < k && i != EMPTY && Flag [i] < mark)
	    {
		PRINT2 (("   Add "ID" to path\n", i)) ;
		ASSERT (i >= 0 && i < k) ;
		Stack [len++] = i ;	/* place i on the stack */
		Flag [i] = mark ;		/* mark i as visited */
		/* parent is the first entry in the column after the diagonal */
		ASSERT (Lnz [i] > 0) ;
		parent = (Lnz [i] > 1) ? (Li [Lp [i] + 1]) : EMPTY ;
		PRINT2 (("                      parent: "ID"\n", parent)) ;
		i = parent ;	/* go up the tree */
	    }
	    ASSERT (len <= top) ;

	    /* move the path down to the bottom of the stack */
	    /* this shifts Stack [0..len-1] down to [ ... oldtop-1] */
	    while (len > 0)
	    {
		Stack [--top] = Stack [--len] ;
	    }
	}
	else if (i > k)
	{
	    /* prune the diagonal and upper triangular entries from Ci */
	    Ci [lnz++] = i ;
	    Flag [i] = mark ;
	}
    }

#ifndef NDEBUG
    PRINT1 (("length of S after prune: "ID"\n", lnz)) ;
    for (p = 0 ; p < lnz ; p++)
    {
	PRINT1 (("After prune Ci ["ID"] = "ID"\n", p, Ci [p])) ;
	ASSERT (Ci [p] > k) ;
    }
#endif

    /* ---------------------------------------------------------------------- */
    /* ensure each column of L has enough space to grow */
    /* ---------------------------------------------------------------------- */

    for (kk = top ; kk < n ; kk++)
    {
	/* could skip this if we knew column j already included row k */
	j = Stack [kk] ;
	if (Lp [j] + Lnz [j] >= Lp [Lnext [j]])
	{
	    PRINT1 (("Col "ID" realloc, old Lnz "ID"\n", j, Lnz [j])) ;
	    if (!CHOLMOD(reallocate_column) (j, Lnz [j] + 1, L, Common))
	    {
		/* out of memory, L is now simplicial symbolic */
		/* CHOLMOD(clear_flag) (Common) ; */
		CHOLMOD_CLEAR_FLAG (Common) ;
		for (i = 0 ; i < n ; i++)
		{
		    W [i] = 0 ;
		}
		return (FALSE) ;
	    }
	    /* L->i and L->x may have moved */
	    Li = L->i ;
	    Lx = L->x ;
	}
	ASSERT (Lp [j] + Lnz [j] < Lp [Lnext [j]]
	    || (Lp [Lnext [j]] - Lp [j] == n-j)) ;
    }

    /* ---------------------------------------------------------------------- */
    /* compute kth row of L and store in column form */
    /* ---------------------------------------------------------------------- */

    /* solve L (1:k-1, 1:k-1) * y (1:k-1) = b (1:k-1) */
    /* where b (1:k) is in W and Ci */

    /* L (k, 1:k-1) = y (1:k-1) ./ D (1:k-1) */
    /* D (k) = B (k,k) - L (k, 1:k-1) * y (1:k-1) */

    PRINT2 (("\nForward solve: "ID" to "ID"\n", top, n)) ;
    ASSERT (Lnz [k] >= 1 && Li [Lp [k]] == k) ;
    DEBUG (for (i = top ; i < n ; i++) PRINT2 ((" Path: "ID"\n", Stack [i]))) ;

    dk = W [k] ;
    W [k] = 0.0 ;

    /* if do_solve: compute x (k) = b (k) - L (k, 1:k-1) * x (1:k-1) */
    xk = bk [0] ;
    PRINT2 (("B [k] = %g\n", xk)) ;

    for (kk = top ; kk < n ; kk++)
    {
	j = Stack [kk] ;
	i = j ;
	PRINT2 (("Forward solve col j = "ID":\n", j)) ;
	ASSERT (j >= 0 && j < k) ;

	/* forward solve using L (j+1:k-1,j) */
	yj = W [j] ;
	W [j] = 0.0 ;
	p = Lp [j] ;
	pend = p + Lnz [j] ;
	ASSERT (Lnz [j] > 0) ;
	dj = Lx [p++] ;
	for ( ; p < pend ; p++)
	{
	    i = Li [p] ;
	    PRINT2 (("    row "ID"\n", i)) ;
	    ASSERT (i > j) ;
	    ASSERT (i < n) ;
	    /* stop at row k */
	    if (i >= k)
	    {
		break ;
	    }
	    W [i] -= Lx [p] * yj ;
	}

	/* each iteration of the above for loop did 2 flops, and 3 flops
	 * are done below.  so: fl += 2 * (Lp [j] - p - 1) + 3 becomes: */
	fl += 2 * (Lp [j] - p) + 1 ;

	/* scale L (k,1:k-1) and compute dot product for D (k,k) */
	l_kj = yj / dj ;
	dk -= l_kj * yj ;

	/* compute dot product for X(k) */
	if (do_solve)
	{
	    xk -= l_kj * Xx [j] ;
	}

	/* store l_kj in the jth column of L */
	/* and shift the rest of the column down */

	li = k ;
	lx = l_kj ;

	if (i == k)
	{
	    /* no need to modify the nonzero pattern of L, since it already
	     * contains row index k. */
	    ASSERT (Li [p] == k) ;
	    Lx [p] = l_kj ;

	    for (p++ ; p < pend ; p++)
	    {
		i    = Li [p] ;
		l_ij = Lx [p] ;
		ASSERT (i > k && i < n) ;
		PRINT2 (("   apply to row "ID" of column k of L\n", i)) ;

		/* add to the pattern of the kth column of L */
		if (Flag [i] < mark)
		{
		    PRINT2 (("   add Ci["ID"] = "ID"\n", lnz, i)) ;
		    ASSERT (i > k) ;
		    Ci [lnz++] = i ;
		    Flag [i] = mark ;
		}

		/* apply the update to the kth column of L */
		/* yj is equal to l_kj * d_j */

		W [i] -= l_ij * yj ;
	    }

	}
	else
	{

	    PRINT2 (("Shift col j = "ID", apply saxpy to col k of L\n", j)) ;
	    for ( ; p < pend ; p++)
	    {
		/* swap (Li [p],Lx [p]) with (li,lx) */
		i    = Li [p] ;
		l_ij = Lx [p] ;
		Li [p] = li ;
		Lx [p] = lx ;
		li = i ;
		lx = l_ij ;
		ASSERT (i > k && i < n) ;
		PRINT2 (("   apply to row "ID" of column k of L\n", i)) ;

		/* add to the pattern of the kth column of L */
		if (Flag [i] < mark)
		{
		    PRINT2 (("   add Ci["ID"] = "ID"\n", lnz, i)) ;
		    ASSERT (i > k) ;
		    Ci [lnz++] = i ;
		    Flag [i] = mark ;
		}

		/* apply the update to the kth column of L */
		/* yj is equal to l_kj * d_j */

		W [i] -= l_ij * yj ;
	    }

	    /* store the last value in the jth column of L */
	    Li [p] = li ;
	    Lx [p] = lx ;
	    Lnz [j]++ ;

	}
    }

    /* ---------------------------------------------------------------------- */
    /* merge C with the pattern of the existing column of L */
    /* ---------------------------------------------------------------------- */

    /* This column should be zero, but it may contain explicit zero entries.
     * These entries should be kept, not dropped. */
    p = Lp [k] ;
    pend = p + Lnz [k] ;
    for (p++ ; p < pend ; p++)
    {
	i = Li [p] ;
	/* add to the pattern of the kth column of L */
	if (Flag [i] < mark)
	{
	    PRINT2 (("   add Ci["ID"] = "ID" from existing col k\n", lnz, i)) ;
	    ASSERT (i > k) ;
	    Ci [lnz++] = i ;
	    Flag [i] = mark ;
	}
    }

    /* ---------------------------------------------------------------------- */

    if (do_solve)
    {
	Xx [k] = xk ;
	PRINT2 (("Xx [k] = %g\n", Xx [k])) ;
    }

    /* ---------------------------------------------------------------------- */
    /* ensure abs (dk) >= dbound, if dbound is given */
    /* ---------------------------------------------------------------------- */

    dk = (IS_GT_ZERO (Common->dbound)) ? (CHOLMOD(dbound) (dk, Common)) : dk ;

    PRINT2 (("D [k = "ID"] = %g\n", k, dk)) ;

    /* ---------------------------------------------------------------------- */
    /* store the kth column of L */
    /* ---------------------------------------------------------------------- */

    /* ensure the new column of L has enough space */
    if (Lp [k] + lnz + 1 > Lp [Lnext [k]])
    {
	PRINT1 (("New Col "ID" realloc, old Lnz "ID"\n", k, Lnz [k])) ;
	if (!CHOLMOD(reallocate_column) (k, lnz + 1, L, Common))
	{
	    /* out of memory, L is now simplicial symbolic */
	    CHOLMOD(clear_flag) (Common) ;
	    for (i = 0 ; i < n ; i++)
	    {
		W [i] = 0 ;
	    }
	    return (FALSE) ;
	}
	/* L->i and L->x may have moved */
	Li = L->i ;
	Lx = L->x ;
    }
    ASSERT (Lp [k] + lnz + 1 <= Lp [Lnext [k]]) ;

#ifndef NDEBUG
    PRINT2 (("\nPrior to sort: lnz "ID" (excluding diagonal)\n", lnz)) ;
    for (kk = 0 ; kk < lnz ; kk++)
    {
	i = Ci [kk] ;
	PRINT2 (("L ["ID"] kept: "ID" %e\n", kk, i, W [i] / dk)) ;
    }
#endif

    /* sort Ci */
    qsort (Ci, lnz, sizeof (Int), (int (*) (const void *, const void *)) icomp);

    /* store the kth column of L */
    DEBUG (lastrow = k) ;
    p = Lp [k] ;
    Lx [p++] = dk ;
    Lnz [k] = lnz + 1 ;
    fl += lnz ;
    for (kk = 0 ; kk < lnz ; kk++, p++)
    {
	i = Ci [kk] ;
	PRINT2 (("L ["ID"] after sort: "ID", %e\n", kk, i, W [i] / dk)) ;
	ASSERT (i > lastrow) ;
	Li [p] = i ;
	Lx [p] = W [i] / dk ;
	W [i] = 0.0 ;
	DEBUG (lastrow = i) ;
    }

    /* compute DeltaB for updown (in DeltaB) */
    if (do_solve)
    {
	p = Lp [k] ;
	pend = p + Lnz [k] ;
	for (p++ ; p < pend ; p++)
	{
	    ASSERT (Li [p] > k) ;
	    Nx [Li [p]] -= Lx [p] * xk ;
	}
    }

    /* clear the flag for the update */
    mark = CHOLMOD(clear_flag) (Common) ;

    /* workspaces are now cleared */
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 2*n, Common)) ;

    /* ---------------------------------------------------------------------- */
    /* update/downdate */
    /* ---------------------------------------------------------------------- */

    /* update or downdate L (k+1:n, k+1:n) with the vector
     * C = L (:,k) * sqrt (abs (D [k])).
     * Do a numeric update if D[k] < 0, numeric downdate otherwise.
     */

    ok = TRUE ;
    Common->modfl = 0 ;

    PRINT1 (("rowadd update lnz = "ID"\n", lnz)) ;
    if (lnz > 0)
    {
	do_update = IS_LT_ZERO (dk) ;
	if (do_update)
	{
	    dk = -dk ;
	}
	sqrt_dk = sqrt (dk) ;
	p = Lp [k] + 1 ;
	for (kk = 0 ; kk < lnz ; kk++, p++)
	{
	    Cx [kk] = Lx [p] * sqrt_dk ;
	}
	fl += lnz + 1 ;

	/* create a n-by-1 sparse matrix to hold the single column */
	C = &Cmatrix ;
	C->nrow = n ;
	C->ncol = 1 ;
	C->nzmax = lnz ;
	C->sorted = TRUE ;
	C->packed = TRUE ;
	C->p = Cp ;
	C->i = Ci ;
	C->x = Cx ;
	C->nz = NULL ;
	C->itype = L->itype ;
	C->xtype = L->xtype ;
	C->dtype = L->dtype ;
	C->z = NULL ;
	C->stype = 0 ;

	Cp [0] = 0 ;
	Cp [1] = lnz ;

	/* numeric downdate if dk > 0, and optional Lx=b change */
	/* workspace: Flag (nrow), Head (nrow+1), W (nrow), Iwork (2*nrow) */
	ok = CHOLMOD(updown_mark) (do_update ? (1) : (0), C, colmark,
		L, X, DeltaB, Common) ;

	/* clear workspace */
	for (kk = 0 ; kk < lnz ; kk++)
	{
	    Cx [kk] = 0 ;
	}
    }

    Common->modfl += fl ;

    DEBUG (CHOLMOD(dump_factor) (L, "LDL factorization, L:", Common)) ;
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 2*n, Common)) ;
    return (ok) ;
}
int CHOLMOD(rowcolcounts)
(
    /* ---- input ---- */
    cholmod_sparse *A,	/* matrix to analyze */
    Int *fset,		/* subset of 0:(A->ncol)-1 */
    size_t fsize,	/* size of fset */
    Int *Parent,	/* size nrow.  Parent [i] = p if p is the parent of i */
    Int *Post,		/* size nrow.  Post [k] = i if i is the kth node in
			 * the postordered etree. */
    /* ---- output --- */
    Int *RowCount,	/* size nrow. RowCount [i] = # entries in the ith row of
			 * L, including the diagonal. */
    Int *ColCount,	/* size nrow. ColCount [i] = # entries in the ith
			 * column of L, including the diagonal. */
    Int *First,		/* size nrow.  First [i] = k is the least postordering
			 * of any descendant of i. */
    Int *Level,		/* size nrow.  Level [i] is the length of the path from
			 * i to the root, with Level [root] = 0. */
    /* --------------- */
    cholmod_common *Common
)
{
    double fl, ff ;
    Int *Ap, *Ai, *Anz, *PrevNbr, *SetParent, *Head, *PrevLeaf, *Anext, *Ipost,
	*Iwork ;
    Int i, j, r, k, len, s, p, pend, inew, stype, nf, anz, inode, parent,
	nrow, ncol, packed, use_fset, jj ;
    size_t w ;
    int ok = TRUE ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (FALSE) ;
    RETURN_IF_NULL (A, FALSE) ;
    RETURN_IF_NULL (Parent, FALSE) ;
    RETURN_IF_NULL (Post, FALSE) ;
    RETURN_IF_NULL (ColCount, FALSE) ;
    RETURN_IF_NULL (First, FALSE) ;
    RETURN_IF_NULL (Level, FALSE) ;
    RETURN_IF_XTYPE_INVALID (A, CHOLMOD_PATTERN, CHOLMOD_ZOMPLEX, FALSE) ;
    stype = A->stype ;
    if (stype > 0)
    {
	/* symmetric with upper triangular part not supported */
	ERROR (CHOLMOD_INVALID, "symmetric upper not supported") ;
	return (FALSE) ;
    }
    Common->status = CHOLMOD_OK ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace */
    /* ---------------------------------------------------------------------- */

    nrow = A->nrow ;	/* the number of rows of A */
    ncol = A->ncol ;	/* the number of columns of A */

    /* w = 2*nrow + (stype ? 0 : ncol) */
    w = CHOLMOD(mult_size_t) (nrow, 2, &ok) ;
    w = CHOLMOD(add_size_t) (w, (stype ? 0 : ncol), &ok) ;
    if (!ok)
    {
	ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
	return (FALSE) ;
    }

    CHOLMOD(allocate_work) (nrow, w, 0, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
	return (FALSE) ;
    }

    ASSERT (CHOLMOD(dump_perm) (Post, nrow, nrow, "Post", Common)) ;
    ASSERT (CHOLMOD(dump_parent) (Parent, nrow, "Parent", Common)) ;

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    Ap = A->p ;	/* size ncol+1, column pointers for A */
    Ai = A->i ;	/* the row indices of A, of size nz=Ap[ncol+1] */
    Anz = A->nz ;
    packed = A->packed ;
    ASSERT (IMPLIES (!packed, Anz != NULL)) ;

    /* ---------------------------------------------------------------------- */
    /* get workspace */
    /* ---------------------------------------------------------------------- */

    Iwork = Common->Iwork ;
    SetParent = Iwork ;		    /* size nrow (i/i/l) */
    PrevNbr   = Iwork + nrow ;	    /* size nrow (i/i/l) */
    Anext     = Iwork + 2*((size_t) nrow) ;    /* size ncol (i/i/l) (unsym only) */
    PrevLeaf  = Common->Flag ;	    /* size nrow */
    Head      = Common->Head ;	    /* size nrow+1 (unsym only)*/

    /* ---------------------------------------------------------------------- */
    /* find the first descendant and level of each node in the tree */
    /* ---------------------------------------------------------------------- */

    /* First [i] = k if the postordering of first descendent of node i is k */
    /* Level [i] = length of path from node i to the root (Level [root] = 0) */

    for (i = 0 ; i < nrow ; i++)
    {
	First [i] = EMPTY ;
    }

    /* postorder traversal of the etree */
    for (k = 0 ; k < nrow ; k++)
    {
	/* node i of the etree is the kth node in the postordered etree */
	i = Post [k] ;

	/* i is a leaf if First [i] is still EMPTY */
	/* ColCount [i] starts at 1 if i is a leaf, zero otherwise */
	ColCount [i] = (First [i] == EMPTY) ? 1 : 0 ;

	/* traverse the path from node i to the root, stopping if we find a
	 * node r whose First [r] is already defined. */
	len = 0 ;
	for (r = i ; (r != EMPTY) && (First [r] == EMPTY) ; r = Parent [r])
	{
	    First [r] = k ;
	    len++ ;
	}
	if (r == EMPTY)
	{
	    /* we hit a root node, the level of which is zero */
	    len-- ;
	}
	else
	{
	    /* we stopped at node r, where Level [r] is already defined */
	    len += Level [r] ;
	}
	/* re-traverse the path from node i to r; set the level of each node */
	for (s = i ; s != r ; s = Parent [s])
	{
	    Level [s] = len-- ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* AA' case: sort columns of A according to first postordered row index */
    /* ---------------------------------------------------------------------- */

    fl = 0.0 ;
    if (stype == 0)
    {
	/* [ use PrevNbr [0..nrow-1] as workspace for Ipost */
	Ipost = PrevNbr ;
	/* Ipost [i] = k if i is the kth node in the postordered etree. */
	for (k = 0 ; k < nrow ; k++)
	{
	    Ipost [Post [k]] = k ;
	}
	use_fset = (fset != NULL) ;
	if (use_fset)
	{
	    nf = fsize ;
	    /* clear Anext to check fset */
	    for (j = 0 ; j < ncol ; j++)
	    {
		Anext [j] = -2 ;
	    }
	    /* find the first postordered row in each column of A (post,f)
	     * and place the column in the corresponding link list */
	    for (jj = 0 ; jj < nf ; jj++)
	    {
		j = fset [jj] ;
		if (j < 0 || j > ncol || Anext [j] != -2)
		{
		    /* out-of-range or duplicate entry in fset */
		    ERROR (CHOLMOD_INVALID, "fset invalid") ;
		    return (FALSE) ;
		}
		/* flag column j as having been seen */
		Anext [j] = EMPTY ;
	    }
	    /* fset is now valid */
	    ASSERT (CHOLMOD(dump_perm) (fset, nf, ncol, "fset", Common)) ;
	}
	else
	{
	    nf = ncol ;
	}
	for (jj = 0 ; jj < nf ; jj++)
	{
	    j = (use_fset) ? (fset [jj]) : jj ;
	    /* column j is in the fset; find the smallest row (if any) */
	    p = Ap [j] ;
	    pend = (packed) ? (Ap [j+1]) : (p + Anz [j]) ;
	    ff = (double) MAX (0, pend - p) ;
	    fl += ff*ff + ff ;
	    if (pend > p)
	    {
		k = Ipost [Ai [p]] ;
		for ( ; p < pend ; p++)
		{
		    inew = Ipost [Ai [p]] ;
		    k = MIN (k, inew) ;
		}
		/* place column j in link list k */
		ASSERT (k >= 0 && k < nrow) ;
		Anext [j] = Head [k] ;
		Head [k] = j ;
	    }
	}
	/* Ipost no longer needed for inverse postordering ]
	 * Head [k] contains a link list of all columns whose first
	 * postordered row index is equal to k, for k = 0 to nrow-1. */
    }

    /* ---------------------------------------------------------------------- */
    /* compute the row counts and node weights */
    /* ---------------------------------------------------------------------- */

    if (RowCount != NULL)
    {
	for (i = 0 ; i < nrow ; i++)
	{
	    RowCount [i] = 1 ;
	}
    }
    for (i = 0 ; i < nrow ; i++)
    {
	PrevLeaf [i] = EMPTY ;
	PrevNbr [i] = EMPTY ;
	SetParent [i] = i ;	/* every node is in its own set, by itself */
    }

    if (stype != 0)
    {

	/* ------------------------------------------------------------------ */
	/* symmetric case: LL' = A */
	/* ------------------------------------------------------------------ */

	/* also determine the number of entries in triu(A) */
	anz = nrow ;
	for (k = 0 ; k < nrow ; k++)
	{
	    /* j is the kth node in the postordered etree */
	    j = initialize_node (k, Post, Parent, ColCount, PrevNbr) ;

	    /* for all nonzeros A(i,j) below the diagonal, in column j of A */
	    p = Ap [j] ;
	    pend = (packed) ? (Ap [j+1]) : (p + Anz [j]) ;
	    for ( ; p < pend ; p++)
	    {
		i = Ai [p] ;
		if (i > j)
		{
		    /* j is a descendant of i in etree(A) */
		    anz++ ;
		    process_edge (j, i, k, First, PrevNbr, ColCount,
			    PrevLeaf, RowCount, SetParent, Level) ;
		}
	    }
	    /* update SetParent: UNION (j, Parent [j]) */
	    finalize_node (j, Parent, SetParent) ;
	}
	Common->anz = anz ;
    }
    else
    {

	/* ------------------------------------------------------------------ */
	/* unsymmetric case: LL' = AA' */
	/* ------------------------------------------------------------------ */

	for (k = 0 ; k < nrow ; k++)
	{
	    /* inode is the kth node in the postordered etree */
	    inode = initialize_node (k, Post, Parent, ColCount, PrevNbr) ;

	    /* for all cols j whose first postordered row is k: */
	    for (j = Head [k] ; j != EMPTY ; j = Anext [j])
	    {
		/* k is the first postordered row in column j of A */
		/* for all rows i in column j: */
		p = Ap [j] ;
		pend = (packed) ? (Ap [j+1]) : (p + Anz [j]) ;
		for ( ; p < pend ; p++)
		{
		    i = Ai [p] ;
		    /* has i already been considered at this step k */
		    if (PrevNbr [i] < k)
		    {
			/* inode is a descendant of i in etree(AA') */
			/* process edge (inode,i) and set PrevNbr[i] to k */
			process_edge (inode, i, k, First, PrevNbr, ColCount,
				PrevLeaf, RowCount, SetParent, Level) ;
		    }
		}
	    }
	    /* clear link list k */
	    Head [k] = EMPTY ;
	    /* update SetParent: UNION (inode, Parent [inode]) */
	    finalize_node (inode, Parent, SetParent) ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* finish computing the column counts */
    /* ---------------------------------------------------------------------- */

    for (j = 0 ; j < nrow ; j++)
    {
	parent = Parent [j] ;
	if (parent != EMPTY)
	{
	    /* add the ColCount of j to its parent */
	    ColCount [parent] += ColCount [j] ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* clear workspace */
    /* ---------------------------------------------------------------------- */

    Common->mark = EMPTY ;
    /* CHOLMOD(clear_flag) (Common) ; */
    CHOLMOD_CLEAR_FLAG (Common) ;

    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 0, Common)) ;

    /* ---------------------------------------------------------------------- */
    /* flop count and nnz(L) for subsequent LL' numerical factorization */
    /* ---------------------------------------------------------------------- */

    /* use double to avoid integer overflow.  lnz cannot be NaN. */
    Common->aatfl = fl ;
    Common->lnz = 0. ;
    fl = 0 ;
    for (j = 0 ; j < nrow ; j++)
    {
	ff = (double) (ColCount [j]) ;
	Common->lnz += ff ;
	fl += ff*ff ;
    }

    Common->fl = fl ;
    PRINT1 (("rowcol fl %g lnz %g\n", Common->fl, Common->lnz)) ;

    return (TRUE) ;
}
Ejemplo n.º 9
0
int CHOLMOD(updown_mask)
(
    /* ---- input ---- */
    int update,		/* TRUE for update, FALSE for downdate */
    cholmod_sparse *C,	/* the incoming sparse update */
    Int *colmark,	/* Int array of size n.  See cholmod_updown.c */
    Int *mask,		/* size n */
    /* ---- in/out --- */
    cholmod_factor *L,	/* factor to modify */
    cholmod_dense *X,	/* solution to Lx=b (size n-by-1) */
    cholmod_dense *DeltaB,  /* change in b, zero on output */
    /* --------------- */
    cholmod_common *Common
)
{
    double xj, fl ;
    double *Lx, *W, *Xx, *Nx ;
    Int *Li, *Lp, *Lnz, *Cp, *Ci, *Cnz, *Head, *Flag, *Stack, *Lnext, *Iwork,
	*Set_ps1 [32], *Set_ps2 [32], *ps1, *ps2 ;
    size_t maxrank ;
    Path_type OrderedPath [32], Path [32] ;
    Int n, wdim, k1, k2, npaths, i, j, row, packed, ccol, p, cncol, do_solve,
	mark, jj, j2, kk, nextj, p1, p2, c, use_colmark, newlnz,
	k, newpath, path_order, w_order, scattered, path, newparent, pp1, pp2,
	smax, maxrow, row1, nsets, s, p3, newlnz1, Set [32], top, len, lnz, m,
	botrow ;
    size_t w ;
    int ok = TRUE ;
    DEBUG (Int oldparent) ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (FALSE) ;
    RETURN_IF_NULL (C, FALSE) ;
    RETURN_IF_NULL (L, FALSE) ;
    RETURN_IF_XTYPE_INVALID (L, CHOLMOD_PATTERN, CHOLMOD_REAL, FALSE) ;
    RETURN_IF_XTYPE_INVALID (C, CHOLMOD_REAL, CHOLMOD_REAL, FALSE) ;
    n = L->n ;
    cncol = C->ncol ;
    if (!(C->sorted))
    {
	ERROR (CHOLMOD_INVALID, "C must have sorted columns") ;
	return (FALSE) ;
    }
    if (n != (Int) (C->nrow))
    {
	ERROR (CHOLMOD_INVALID, "C and L dimensions do not match") ;
	return (FALSE) ;
    }
    do_solve = (X != NULL) && (DeltaB != NULL) ;
    if (do_solve)
    {
	RETURN_IF_XTYPE_INVALID (X, CHOLMOD_REAL, CHOLMOD_REAL, FALSE) ;
	RETURN_IF_XTYPE_INVALID (DeltaB, CHOLMOD_REAL, CHOLMOD_REAL, FALSE) ;
	Xx = X->x ;
	Nx = DeltaB->x ;
	if (X->nrow != L->n || X->ncol != 1 || DeltaB->nrow != L->n ||
		DeltaB->ncol != 1 || Xx == NULL || Nx == NULL)
	{
	    ERROR (CHOLMOD_INVALID, "X and/or DeltaB invalid") ;
	    return (FALSE) ;
	}
    }
    else
    {
	Xx = NULL ;
	Nx = NULL ;
    }
    Common->status = CHOLMOD_OK ;
    Common->modfl = 0 ;

    fl = 0 ;
    use_colmark = (colmark != NULL) ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace */
    /* ---------------------------------------------------------------------- */

    /* Note: cholmod_rowadd and cholmod_rowdel use the second n doubles in
     * Common->Xwork for Cx, and then perform a rank-1 update here, which uses
     * the first n doubles in Common->Xwork.   Both the rowadd and rowdel
     * routines allocate enough workspace so that Common->Xwork isn't destroyed
     * below.  Also, both cholmod_rowadd and cholmod_rowdel use the second n
     * ints in Common->Iwork for Ci.
     */

    /* make sure maxrank is in the proper range */
    maxrank = CHOLMOD(maxrank) (n, Common) ;
    k = MIN (cncol, (Int) maxrank) ;	/* maximum k is wdim */
    wdim = Power2 [k] ;		/* number of columns needed in W */
    ASSERT (wdim <= (Int) maxrank) ;
    PRINT1 (("updown wdim final "ID" k "ID"\n", wdim, k)) ;

    /* w = wdim * n */
    w = CHOLMOD(mult_size_t) (n, wdim, &ok) ;
    if (!ok)
    {
	ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
	return (FALSE) ;
    }

    CHOLMOD(allocate_work) (n, n, w, Common) ;
    if (Common->status < CHOLMOD_OK || maxrank == 0)
    {
	/* out of memory, L is returned unchanged */
	return (FALSE) ;
    }

    /* ---------------------------------------------------------------------- */
    /* convert to simplicial numeric LDL' factor, if not already */
    /* ---------------------------------------------------------------------- */

    if (L->xtype == CHOLMOD_PATTERN || L->is_super || L->is_ll) 
    {
	/* can only update/downdate a simplicial LDL' factorization */
	CHOLMOD(change_factor) (CHOLMOD_REAL, FALSE, FALSE, FALSE, FALSE, L,
		Common) ;
	if (Common->status < CHOLMOD_OK)
	{
	    /* out of memory, L is returned unchanged */
	    return (FALSE) ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    /* mark = CHOLMOD(clear_flag) (Common) ; */
    CHOLMOD_CLEAR_FLAG (Common) ;
    mark = Common->mark ;

    PRINT1 (("updown, rank %g update %d\n", (double) C->ncol, update)) ;
    DEBUG (CHOLMOD(dump_factor) (L, "input L for updown", Common)) ;
    ASSERT (CHOLMOD(dump_sparse) (C, "input C for updown", Common) >= 0) ;

    Ci = C->i ;
    Cp = C->p ;
    Cnz = C->nz ;
    packed = C->packed ;
    ASSERT (IMPLIES (!packed, Cnz != NULL)) ;

    /* ---------------------------------------------------------------------- */
    /* quick return */
    /* ---------------------------------------------------------------------- */

    if (cncol <= 0 || n == 0)
    {
	/* nothing to do */
	return (TRUE) ;
    }

    /* ---------------------------------------------------------------------- */
    /* get L */
    /* ---------------------------------------------------------------------- */

    Li = L->i ;
    Lx = L->x ;
    Lp = L->p ;
    Lnz = L->nz ;
    Lnext = L->next ;
    ASSERT (Lnz != NULL) ;

    /* ---------------------------------------------------------------------- */
    /* get workspace */
    /* ---------------------------------------------------------------------- */

    Flag = Common->Flag ;	/* size n, Flag [i] <= mark must hold */
    Head = Common->Head ;	/* size n, Head [i] == EMPTY must hold */
    W = Common->Xwork ;		/* size n-by-wdim, zero on input and output*/

    /* note that Iwork [n .. 2*n-1] (i/i/l) may be in use in rowadd/rowdel: */
    Iwork = Common->Iwork ;
    Stack = Iwork ;		/* size n, uninitialized (i/i/l) */

    /* ---------------------------------------------------------------------- */
    /* entire rank-cncol update, done as a sequence of rank-k updates */
    /* ---------------------------------------------------------------------- */

    ps1 = NULL ;
    ps2 = NULL ;

    for (k1 = 0 ; k1 < cncol ; k1 += k)
    {

	/* ------------------------------------------------------------------ */
	/* get the next k columns of C for the update/downdate */
	/* ------------------------------------------------------------------ */

	/* the last update/downdate might be less than rank-k */
	if (k > cncol - k1)
	{
	    k = cncol - k1 ;
	    wdim = Power2 [k] ;
	}
	k2 = k1 + k - 1 ;

	/* workspaces are in the following state, on input and output */
	ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, wdim, Common)) ;

	/* ------------------------------------------------------------------ */
	/* create a zero-length path for each column of W */
	/* ------------------------------------------------------------------ */

	nextj = n ;
	path = 0 ;
	for (ccol = k1 ; ccol <= k2 ; ccol++)
	{
	    PRINT1 (("Column ["ID"]: "ID"\n", path, ccol)) ;
	    ASSERT (ccol >= 0 && ccol <= cncol) ;
	    pp1 = Cp [ccol] ;
	    pp2 = (packed) ? (Cp [ccol+1]) : (pp1 + Cnz [ccol]) ;
	    /* get the row index j of the first entry in C (:,ccol) */
	    if (pp2 > pp1)
	    {
		/* Column ccol of C has at least one entry. */
		j = Ci [pp1] ;
	    }
	    else
	    {
		/* Column ccol of C is empty.  Pretend it has one entry in
		 * the last column with numerical value of zero. */
		j = n-1 ;
	    }
	    ASSERT (j >= 0 && j < n) ;

	    /* find first column to work on */
	    nextj = MIN (nextj, j) ;

	    Path [path].ccol = ccol ;	/* which column of C this path is for */
	    Path [path].start = EMPTY ;	/* paths for C have zero length */
	    Path [path].end = EMPTY ;
	    Path [path].parent = EMPTY ;    /* no parent yet */
	    Path [path].rank = 1 ;	    /* one column of W */
	    Path [path].c = EMPTY ;	    /* no child of this path (case A) */
	    Path [path].next = Head [j] ;   /* this path is pending at col j */
	    Path [path].pending = j ;	    /* this path is pending at col j */
	    Head [j] = path ;		    /* this path is pending at col j */
	    PRINT1(("Path "ID" starts: start "ID" end "ID" parent "ID" c "ID""
		    "j "ID" ccol "ID"\n", path, Path [path].start,
		    Path [path].end, Path [path].parent,
		    Path [path].c, j, ccol)) ;

	    /* initialize botrow for this path */
	    Path [path].botrow = (use_colmark) ? colmark [ccol] : n ;

	    path++ ;
	}

	/* we start with paths 0 to k-1.  Next one (now unused) is npaths */
	npaths = k ;

	j = nextj ;
	ASSERT (j < n) ;
	scattered = FALSE ;

	/* ------------------------------------------------------------------ */
	/* symbolic update of columns of L */
	/* ------------------------------------------------------------------ */

	while (j < n)
	{
	    ASSERT (j >= 0 && j < n && Lnz [j] > 0) ;

	    /* the old column, Li [p1..p2-1].  D (j,j) is stored in Lx [p1] */
	    p1 = Lp [j] ;
	    newlnz = Lnz [j] ;
	    p2 = p1 + newlnz  ;

#ifndef NDEBUG
	    PRINT1 (("\n=========Column j="ID" p1 "ID" p2 "ID" lnz "ID" \n",
			j, p1, p2, newlnz)) ;
	    dump_col ("Old", j, p1, p2, Li, Lx, n, Common) ;
	    oldparent = (Lnz [j] > 1) ? (Li [p1 + 1]) : EMPTY ;
	    ASSERT (CHOLMOD(dump_work) (TRUE, FALSE, 0, Common)) ;
	    ASSERT (!scattered) ;
	    PRINT1 (("Col "ID": Checking paths, npaths: "ID"\n", j, npaths)) ;
	    for (kk = 0 ; kk < npaths ; kk++)
	    {
		Int kk2, found, j3 = Path [kk].pending ;
		PRINT2 (("Path "ID" pending at "ID".\n", kk, j3)) ;
		if (j3 != EMPTY)
		{
		    /* Path kk must be somewhere in link list for column j3 */
		    ASSERT (Head [j3] != EMPTY) ;
		    PRINT3 (("    List at "ID": ", j3)) ;
		    found = FALSE ;
		    for (kk2 = Head [j3] ; kk2 != EMPTY ; kk2 = Path [kk2].next)
		    {
			PRINT3 ((""ID" ", kk2)) ;
			ASSERT (Path [kk2].pending == j3) ;
			found = found || (kk2 == kk) ;
		    }
		    PRINT3 (("\n")) ;
		    ASSERT (found) ;
		}
	    }
	    PRINT1 (("\nCol "ID": Paths at this column, head "ID"\n",
			j, Head [j]));
	    ASSERT (Head [j] != EMPTY) ;
	    for (kk = Head [j] ; kk != EMPTY ; kk = Path [kk].next)
	    {
		PRINT1 (("path "ID": (c="ID" j="ID") npaths "ID"\n",
			    kk, Path[kk].c, j, npaths)) ;
		ASSERT (kk >= 0 && kk < npaths) ;
		ASSERT (Path [kk].pending == j) ;
	    }
#endif

	    /* -------------------------------------------------------------- */
	    /* determine the path we're on */
	    /* -------------------------------------------------------------- */

	    /* get the first old path at column j */
	    path = Head [j] ;

	    /* -------------------------------------------------------------- */
	    /* update/downdate of forward solve, Lx=b */
	    /* -------------------------------------------------------------- */

	    if (do_solve)
	    {
		xj = Xx [j] ;
		if (IS_NONZERO (xj))
		{
		    xj = Xx [j] ;
		    /* This is first time column j has been seen for entire */
		    /* rank-k update/downdate. */

		    /* DeltaB += Lold (j:botrow-1,j) * X (j) */
		    Nx [j] += xj ;			/* diagonal of L */

		    /* find the botrow for this column */
		    botrow = (use_colmark) ? Path [path].botrow : n ;

		    for (p = p1 + 1 ; p < p2 ; p++)
		    {
			i = Li [p] ;
			if (i >= botrow)
			{
			    break ;
			}
			Nx [i] += Lx [p] * xj ;
		    }

		    /* clear X[j] to flag col j of Lold as having been seen.  If
		     * X (j) was initially zero, then the above code is never
		     * executed for column j.  This is safe, since if xj=0 the
		     * code above does not do anything anyway.  */
		    Xx [j] = 0.0 ;
		}
	    }

	    /* -------------------------------------------------------------- */
	    /* start a new path at this column if two or more paths merge */
	    /* -------------------------------------------------------------- */

	    newpath =
		/* start a new path if paths have merged */
		(Path [path].next != EMPTY)
		/* or if j is the first node on a path (case A). */
		|| (Path [path].c == EMPTY) ;

	    if (newpath)
	    {
		/* get the botrow of the first path at column j */
		botrow = (use_colmark) ? Path [path].botrow : n ;

		path = npaths++ ;
		ASSERT (npaths <= 3*k) ;
		Path [path].ccol = EMPTY ; /* no single col of C for this path*/
		Path [path].start = j ;	   /* path starts at this column j */
		Path [path].end = EMPTY ;  /* don't know yet where it ends */
		Path [path].parent = EMPTY ;/* don't know parent path yet */
		Path [path].rank = 0 ;	/* rank is sum of child path ranks */
		PRINT1 (("Path "ID" starts: start "ID" end "ID" parent "ID"\n",
		path, Path [path].start, Path [path].end, Path [path].parent)) ;

		/* set the botrow of the new path */
		Path [path].botrow = (use_colmark) ? botrow : n ;
	    }

	    /* -------------------------------------------------------------- */
	    /* for each path kk pending at column j */
	    /* -------------------------------------------------------------- */

	    /* make a list of the sets that need to be merged into column j */
	    nsets = 0 ;

	    for (kk = Head [j] ; kk != EMPTY ; kk = Path [kk].next)
	    {

		/* ---------------------------------------------------------- */
		/* path kk is at (c,j) */
		/* ---------------------------------------------------------- */

		c = Path [kk].c ;
		ASSERT (c < j) ;
		PRINT1 (("TUPLE on path "ID" (c="ID" j="ID")\n", kk, c, j)) ;
		ASSERT (Path [kk].pending == j) ;

		if (newpath)
		{
		    /* finalize path kk and find rank of this path */
		    Path [kk].end = c ;	/* end of old path is previous node c */
		    Path [kk].parent = path ;	/* parent is this path */
		    Path [path].rank += Path [kk].rank ;    /* sum up ranks */
		    Path [kk].pending = EMPTY ;
		    PRINT1 (("Path "ID" done:start "ID" end "ID" parent "ID"\n",
		    kk, Path [kk].start, Path [kk].end, Path [kk].parent)) ;
		}

		if (c == EMPTY)
		{

		    /* ------------------------------------------------------ */
		    /* CASE A: first node in path */
		    /* ------------------------------------------------------ */

		    /* update:  add pattern of incoming column */

		    /* Column ccol of C is in Ci [pp1 ... pp2-1] */
		    ccol = Path [kk].ccol ;
		    pp1 = Cp [ccol] ;
		    pp2 = (packed) ? (Cp [ccol+1]) : (pp1 + Cnz [ccol]) ;
		    PRINT1 (("Case A, ccol = "ID" len "ID"\n", ccol, pp2-pp1)) ;
		    ASSERT (IMPLIES (pp2 > pp1, Ci [pp1] == j)) ;

		    if (!scattered)
		    {
			/* scatter the original pattern of column j of L */
			for (p = p1 ; p < p2 ; p++)
			{
			    Flag [Li [p]] = mark ;
			}
			scattered = TRUE ;
		    }

		    /* scatter column ccol of C (skip first entry, j) */
		    newlnz1 = newlnz ;
		    for (p = pp1 + 1 ; p < pp2 ; p++)
		    {
			row = Ci [p] ;
			if (Flag [row] < mark)
			{
			    /* this is a new entry in Lj' */
			    Flag [row] = mark ;
			    newlnz++ ;
			}
		    }
		    if (newlnz1 != newlnz)
		    {
			/* column ccol of C adds something to column j of L */
			Set [nsets++] = FLIP (ccol) ;
		    }

		}
		else if (Head [c] == 1)
		{

		    /* ------------------------------------------------------ */
		    /* CASE B: c is old, but changed, child of j */
		    /* CASE C: new child of j */
		    /* ------------------------------------------------------ */

		    /* Head [c] is 1 if col c of L has new entries,
		     * EMPTY otherwise */
		    Flag [c] = 0 ;
		    Head [c] = EMPTY ;

		    /* update: add Lc' */

		    /* column c of L is in Li [pp1 .. pp2-1] */
		    pp1 = Lp [c] ;
		    pp2 = pp1 + Lnz [c] ;
		    PRINT1 (("Case B/C: c = "ID"\n", c)) ;
		    DEBUG (dump_col ("Child", c, pp1, pp2, Li, Lx, n, Common)) ;
		    ASSERT (j == Li [pp1 + 1]) ; /* j is new parent of c */

		    if (!scattered)
		    {
			/* scatter the original pattern of column j of L */
			for (p = p1 ; p < p2 ; p++)
			{
			    Flag [Li [p]] = mark ;
			}
			scattered = TRUE ;
		    }

		    /* scatter column c of L (skip first two entries, c and j)*/
		    newlnz1 = newlnz ;
		    for (p = pp1 + 2 ; p < pp2 ; p++)
		    {
			row = Li [p] ;
			if (Flag [row] < mark)
			{
			    /* this is a new entry in Lj' */
			    Flag [row] = mark ;
			    newlnz++ ;
			}
		    }
		    PRINT2 (("\n")) ;

		    if (newlnz1 != newlnz)
		    {
			/* column c of L adds something to column j of L */
			Set [nsets++] = c ;
		    }
		}
	    }

	    /* -------------------------------------------------------------- */
	    /* update the pattern of column j of L */
	    /* -------------------------------------------------------------- */

	    /* Column j of L will be in Li/Lx [p1 .. p3-1] */
	    p3 = p1 + newlnz ;
	    ASSERT (IMPLIES (nsets == 0, newlnz == Lnz [j])) ;
	    PRINT1 (("p1 "ID" p2 "ID" p3 "ID" nsets "ID"\n", p1, p2, p3,nsets));

	    /* -------------------------------------------------------------- */
	    /* ensure we have enough space for the longer column */
	    /* -------------------------------------------------------------- */

	    if (nsets > 0 && p3 > Lp [Lnext [j]])
	    {
		PRINT1 (("Col realloc: j "ID" newlnz "ID"\n", j, newlnz)) ;
		if (!CHOLMOD(reallocate_column) (j, newlnz, L, Common))
		{
		    /* out of memory, L is now simplicial symbolic */
		    CHOLMOD(clear_flag) (Common) ;
		    for (j = 0 ; j <= n ; j++)
		    {
			Head [j] = EMPTY ;
		    }
		    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, wdim, Common)) ;
		    return (FALSE) ;
		}
		/* L->i and L->x may have moved.  Column j has moved too */
		Li = L->i ;
		Lx = L->x ;
		p1 = Lp [j] ;
		p2 = p1 + Lnz [j] ;
		p3 = p1 + newlnz ;
	    }

	    /* -------------------------------------------------------------- */
	    /* create set pointers */
	    /* -------------------------------------------------------------- */

	    for (s = 0 ; s < nsets ; s++)
	    {
		/* Pattern of Set s is *(Set_ps1 [s] ... Set_ps2 [s]-1) */
		c = Set [s] ;
		if (c < EMPTY)
		{
		    /* column ccol of C, skip first entry (j) */
		    ccol = FLIP (c) ;
		    pp1 = Cp [ccol] ;
		    pp2 = (packed) ? (Cp [ccol+1]) : (pp1 + Cnz [ccol]) ;
		    ASSERT (pp2 - pp1 > 1) ;
		    Set_ps1 [s] = &(Ci [pp1 + 1]) ;
		    Set_ps2 [s] = &(Ci [pp2]) ;
		    PRINT1 (("set "ID" is ccol "ID"\n", s, ccol)) ;
		}
		else
		{
		    /* column c of L, skip first two entries (c and j)  */
		    pp1 = Lp [c] ;
		    pp2 = pp1 + Lnz [c]  ;
		    ASSERT (Lnz [c] > 2) ;
		    Set_ps1 [s] = &(Li [pp1 + 2]) ;
		    Set_ps2 [s] = &(Li [pp2]) ;
		    PRINT1 (("set "ID" is L "ID"\n", s, c)) ;
		}
		DEBUG (dump_set (s, Set_ps1, Set_ps2, j, n, Common)) ;
	    }

	    /* -------------------------------------------------------------- */
	    /* multiset merge */
	    /* -------------------------------------------------------------- */

	    /* Merge the sets into a single sorted set, Lj'.  Before the merge
	     * starts, column j is located in Li/Lx [p1 ... p2-1] and the
	     * space Li/Lx [p2 ... p3-1] is empty.  p1 is Lp [j], p2 is
	     * Lp [j] + Lnz [j] (the old length of the column), and p3 is
	     * Lp [j] + newlnz (the new and longer length of the column).
	     *
	     * The sets 0 to nsets-1 are defined by the Set_ps1 and Set_ps2
	     * pointers.  Set s is located in *(Set_ps1 [s] ... Set_ps2 [s]-1).
	     * It may be a column of C, or a column of L.  All row indices i in
	     * the sets are in the range i > j and i < n.  All sets are sorted.
	     *
	     * The merge into column j of L is done in place.
	     *
	     * During the merge, p2 and p3 are updated.  Li/Lx [p1..p2-1]
	     * reflects the indices of the old column j of L that are yet to
	     * be merged into the new column.  Entries in their proper place in
	     * the new column j of L are located in Li/Lx [p3 ... p1+newlnz-1].
	     * The merge finishes when p2 == p3.
	     *
	     * During the merge, set s consumed as it is merged into column j of
	     * L.  Its unconsumed contents are *(Set_ps1 [s] ... Set_ps2 [s]-1).
	     * When a set is completely consumed, it is removed from the set of
	     * sets, and nsets is decremented.
	     *
	     * The multiset merge and 2-set merge finishes when p2 == p3.
	     */

	    PRINT1 (("Multiset merge p3 "ID" p2 "ID" nsets "ID"\n",
			p3, p2, nsets)) ;

	    while (p3 > p2 && nsets > 1)
	    {

#ifndef NDEBUG
		PRINT2 (("\nMultiset merge.  nsets = "ID"\n", nsets)) ;
		PRINT2 (("Source col p1 = "ID", p2 = "ID", p3= "ID"\n",
			    p1, p2, p3)) ;
		for (p = p1 + 1 ; p < p2 ; p++)
		{
		    PRINT2 (("    p: "ID" source row "ID" %g\n",
				p, Li[p], Lx[p])) ;
		    ASSERT (Li [p] > j && Li [p] < n) ;
		}
		PRINT2 (("---\n")) ;
		for (p = p3 ; p < p1 + newlnz ; p++)
		{
		    PRINT2 (("    p: "ID" target row "ID" %g\n",
				p, Li[p], Lx[p])) ;
		    ASSERT (Li [p] > j && Li [p] <  n) ;
		}
		for (s = 0 ; s < nsets ; s++)
		{
		    dump_set (s, Set_ps1, Set_ps2, j, n, Common) ;
		}
#endif

		/* get the entry at the tail end of source column Lj */
		row1 = Li [p2 - 1] ;
		ASSERT (row1 >= j && p2 >= p1) ;

		/* find the largest row in all the sets */
		maxrow = row1 ;
		smax = EMPTY ;
		for (s = nsets-1 ; s >= 0 ; s--)
		{
		    ASSERT (Set_ps1 [s] < Set_ps2 [s]) ;
		    row = *(Set_ps2 [s] - 1) ;
		    if (row == maxrow)
		    {
			/* skip past this entry in set s (it is a duplicate) */
			Set_ps2 [s]-- ;
			if (Set_ps1 [s] == Set_ps2 [s])
			{
			    /* nothing more in this set */
			    nsets-- ;
			    Set_ps1 [s] = Set_ps1 [nsets] ;
			    Set_ps2 [s] = Set_ps2 [nsets] ;
			    if (smax == nsets)
			    {
				/* Set smax redefined; it is now this set */
				smax = s ;
			    }
			}
		    }
		    else if (row > maxrow)
		    {
			maxrow = row ;
			smax = s ;
		    }
		}
		ASSERT (maxrow > j) ;

		/* move the row onto the stack of the target column */
		if (maxrow == row1)
		{
		    /* next entry is in Lj, move to the bottom of Lj' */
		    ASSERT (smax == EMPTY) ;
		    p2-- ;
		    p3-- ;
		    Li [p3] = maxrow ;
		    Lx [p3] = Lx [p2] ;
		}
		else
		{
		    /* new entry in Lj' */
		    ASSERT (smax >= 0 && smax < nsets) ;
		    Set_ps2 [smax]-- ;
		    p3-- ;
		    Li [p3] = maxrow ;
		    Lx [p3] = 0.0 ;
		    if (Set_ps1 [smax] == Set_ps2 [smax])
		    {
			/* nothing more in this set */
			nsets-- ;
			Set_ps1 [smax] = Set_ps1 [nsets] ;
			Set_ps2 [smax] = Set_ps2 [nsets] ;
			PRINT1 (("Set "ID" now empty\n", smax)) ;
		    }
		}
	    }

	    /* -------------------------------------------------------------- */
	    /* 2-set merge: */
	    /* -------------------------------------------------------------- */

	    /* This the same as the multi-set merge, except there is only one
	     * set s = 0 left.  The source column j and the set 0 are being
	     * merged into the target column j. */

	    if (nsets > 0)
	    {
		ps1 = Set_ps1 [0] ;
		ps2 = Set_ps2 [0] ;
	    }

	    while (p3 > p2)
	    {

#ifndef NDEBUG
		PRINT2 (("\n2-set merge.\n")) ;
		ASSERT (nsets == 1) ;
		PRINT2 (("Source col p1 = "ID", p2 = "ID", p3= "ID"\n",
			    p1, p2, p3)) ;
		for (p = p1 + 1 ; p < p2 ; p++)
		{
		    PRINT2 (("    p: "ID" source row "ID" %g\n",
				p, Li[p], Lx[p])) ;
		    ASSERT (Li [p] > j && Li [p] < n) ;
		}
		PRINT2 (("---\n")) ;
		for (p = p3 ; p < p1 + newlnz ; p++)
		{
		    PRINT2 (("    p: "ID" target row "ID" %g\n",
				p, Li[p], Lx[p])) ;
		    ASSERT (Li [p] > j && Li [p] <  n) ;
		}
		dump_set (0, Set_ps1, Set_ps2, j, n, Common) ;
#endif

		if (p2 == p1 + 1)
		{
		    /* the top of Lj is empty; copy the set and quit */
		    while (p3 > p2)
		    {
			/* new entry in Lj' */
			row = *(--ps2) ;
			p3-- ;
			Li [p3] = row ;
			Lx [p3] = 0.0 ;
		    }
		}
		else
		{
		    /* get the entry at the tail end of Lj */
		    row1 = Li [p2 - 1] ;
		    ASSERT (row1 > j && row1 < n) ;
		    /* get the entry at the tail end of the incoming set */
		    ASSERT (ps1 < ps2) ;
		    row = *(ps2-1) ;
		    ASSERT (row > j && row1 < n) ;
		    /* move the larger of the two entries to the target set */
		    if (row1 >= row)
		    {
			/* next entry is in Lj, move to the bottom */
			if (row1 == row)
			{
			    /* skip past this entry in the set */
			    ps2-- ;
			}
			p2-- ;
			p3-- ;
			Li [p3] = row1 ;
			Lx [p3] = Lx [p2] ;
		    }
		    else
		    {
			/* new entry in Lj' */
			ps2-- ;
			p3-- ;
			Li [p3] = row ;
			Lx [p3] = 0.0 ;
		    }
		}
	    }

	    /* -------------------------------------------------------------- */
	    /* The new column j of L is now in Li/Lx [p1 ... p2-1] */
	    /* -------------------------------------------------------------- */

	    p2 = p1 + newlnz ;
	    DEBUG (dump_col ("After merge: ", j, p1, p2, Li, Lx, n, Common)) ;

	    fl += Path [path].rank * (6 + 4 * (double) newlnz) ;

	    /* -------------------------------------------------------------- */
	    /* clear Flag; original pattern of column j L no longer marked */
	    /* -------------------------------------------------------------- */

	    mark = CHOLMOD(clear_flag) (Common) ;
	    scattered = FALSE ;

	    /* -------------------------------------------------------------- */
	    /* find the new parent */
	    /* -------------------------------------------------------------- */

	    newparent = (newlnz > 1) ? (Li [p1 + 1]) : EMPTY ;
	    PRINT1 (("\nNew parent, Lnz: "ID": "ID" "ID"\n",
			j, newparent,newlnz));
	    ASSERT (oldparent == EMPTY || newparent <= oldparent) ;

	    /* -------------------------------------------------------------- */
	    /* go to the next node in the path */
	    /* -------------------------------------------------------------- */

	    /* path moves to (j,nextj) unless j is a root */
	    nextj = (newparent == EMPTY) ? n : newparent ;

	    /* place path at head of list for nextj, or terminate the path */
	    PRINT1 (("\n j = "ID" nextj = "ID"\n\n", j, nextj)) ;
	    Path [path].c = j ;
	    if (nextj < n)
	    {
		/* put path on link list of pending paths at column nextj */
		Path [path].next = Head [nextj] ;
		Path [path].pending = nextj ;
		Head [nextj] = path ;
		PRINT1 (("Path "ID" continues to ("ID","ID").  Rank "ID"\n",
		    path, Path [path].c, nextj, Path [path].rank)) ;
	    }
	    else
	    {
		/* path has ended here, at a root */
		Path [path].next = EMPTY ;
		Path [path].pending = EMPTY ;
		Path [path].end = j ;
		PRINT1 (("Path "ID" ends at root ("ID").  Rank "ID"\n",
		    path, Path [path].end, Path [path].rank)) ;
	    }

	    /* The link list Head [j] can now be emptied.  Set Head [j] to 1
	     * if column j has changed (it is no longer used as a link list). */
	    PRINT1 (("column "ID", oldlnz = "ID"\n", j, Lnz [j])) ;
	    Head [j] = (Lnz [j] != newlnz) ? 1 : EMPTY ;
	    Lnz [j] = newlnz ;
	    PRINT1 (("column "ID", newlnz = "ID"\n", j, newlnz)) ;
	    DEBUG (dump_col ("New", j, p1, p2, Li, Lx, n, Common)) ;

	    /* move to the next column */
	    if (k == Path [path].rank)
	    {
		/* only one path left */
		j = nextj ;
	    }
	    else
	    {
		/* The current path is moving from column j to column nextj
		 * (nextj is n if the path has ended).  However, there may be
		 * other paths pending in columns j+1 to nextj-1.  There are
		 * two methods for looking for the next column with a pending
		 * update.  The first one looks at all columns j+1 to nextj-1
		 * for a non-empty link list.  This can be costly if j and
		 * nextj differ by a large amount (it can be O(n), but this
		 * entire routine may take Omega(1) time).  The second method
		 * looks at all paths and finds the smallest column at which any
		 * path is pending.  It takes O(# of paths), which is bounded
		 * by 23: one for each column of C (up to 8), and then 15 for a
		 * balanced binary tree with 8 leaves.  However, if j and
		 * nextj differ by a tiny amount (nextj is often j+1 near
		 * the end of the matrix), looking at columns j+1 to nextj
		 * would be faster.  Both methods give the same answer. */

		if (nextj - j < npaths)
		{
		    /* there are fewer columns to search than paths */
		    PRINT1 (("check j="ID" to nextj="ID"\n", j, nextj)) ;
		    for (j2 = j + 1 ; j2 < nextj ; j2++)
		    {
			PRINT1 (("check j="ID" "ID"\n", j2, Head [j2])) ;
			if (Head [j2] != EMPTY)
			{
			    PRINT1 (("found, j="ID"\n", j2)) ;
			    ASSERT (Path [Head [j2]].pending == j2) ;
			    break ;
			}
		    }
		}
		else
		{
		    /* there are fewer paths than columns to search */
		    j2 = nextj ;
		    for (kk = 0 ; kk < npaths ; kk++)
		    {
			jj = Path [kk].pending ;
			PRINT2 (("Path "ID" pending at "ID"\n", kk, jj)) ;
			if (jj != EMPTY) j2 = MIN (j2, jj) ;
		    }
		}
		j = j2 ;
	    }
	}

	/* ensure workspaces are back to the values required on input */
	ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, TRUE, Common)) ;

	/* ------------------------------------------------------------------ */
	/* depth-first-search of tree to order the paths */
	/* ------------------------------------------------------------------ */

	/* create lists of child paths */
	PRINT1 (("\n\nDFS search:\n\n")) ;
	for (path = 0 ; path < npaths ; path++)
	{
	    Path [path].c = EMPTY ;	    /* first child of path */
	    Path [path].next = EMPTY ;	    /* next sibling of path */
	    Path [path].order = EMPTY ;	    /* path is not ordered yet */
	    Path [path].wfirst = EMPTY ;    /* 1st column of W not found yet */

#ifndef NDEBUG
	    j = Path [path].start ;
	    PRINT1 (("Path "ID" : start "ID" end "ID" parent "ID" ccol "ID"\n", 
	    path, j, Path [path].end, Path [path].parent, Path [path].ccol)) ;
	    for ( ; ; )
	    {
		PRINT1 (("	column "ID"\n", j)) ;
		ASSERT (j == EMPTY || (j >= 0 && j < n)) ;
		if (j == Path [path].end)
		{
		    break ;
		}
		ASSERT (j >= 0 && j < n) ;
		j = (Lnz [j] > 1) ? (Li [Lp [j] + 1]) : EMPTY ;
	    }
#endif
	}

	for (path = 0 ; path < npaths ; path++)
	{
	    p = Path [path].parent ;	/* add path to child list of parent */
	    if (p != EMPTY)
	    {
		ASSERT (p < npaths) ;
		Path [path].next = Path [p].c ;
		Path [p].c = path ;
	    }
	}

	path_order = k ;
	w_order = 0 ;
	for (path = npaths-1 ; path >= 0 ; path--)
	{
	    if (Path [path].order == EMPTY)
	    {
		/* this path is the root of a subtree of Tbar */
		PRINT1 (("Root path "ID"\n", path)) ;
		ASSERT (path >= k) ;
		dfs (Path, k, path, &path_order, &w_order, 0, npaths) ;
	    }
	}
	ASSERT (path_order == npaths) ;
	ASSERT (w_order == k) ;

	/* reorder the paths */
	for (path = 0 ; path < npaths ; path++)
	{
	    /* old order is path, new order is Path [path].order */
	    OrderedPath [Path [path].order] = Path [path] ;
	}

#ifndef NDEBUG
	for (path = 0 ; path < npaths ; path++)
	{
	    PRINT1 (("Ordered Path "ID": start "ID" end "ID" wfirst "ID" rank "
		    ""ID" ccol "ID"\n", path, OrderedPath [path].start,
		    OrderedPath [path].end, OrderedPath [path].wfirst,
		    OrderedPath [path].rank, OrderedPath [path].ccol)) ;
	    if (path < k)
	    {
		ASSERT (OrderedPath [path].ccol >= 0) ;
	    }
	    else
	    {
		ASSERT (OrderedPath [path].ccol == EMPTY) ;
	    }
	}
#endif

	/* ------------------------------------------------------------------ */
	/* numeric update/downdate for all paths */
	/* ------------------------------------------------------------------ */

	ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, wdim, Common)) ;

	switch (wdim)
	{
	    case 1:
		updown_1_r (update, C, k, L, W, OrderedPath, npaths, mask,
		    Common) ;
		break ;
	    case 2:
		updown_2_r (update, C, k, L, W, OrderedPath, npaths, mask,
		    Common) ;
		break ;
	    case 4:
		updown_4_r (update, C, k, L, W, OrderedPath, npaths, mask,
		    Common) ;
		break ;
	    case 8:
		updown_8_r (update, C, k, L, W, OrderedPath, npaths, mask,
		    Common) ;
		break ;
	}

	ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, wdim, Common)) ;
    }

    /* ---------------------------------------------------------------------- */
    /* update/downdate the forward solve */
    /* ---------------------------------------------------------------------- */

    if (do_solve)
    {
	/* We now have DeltaB += Lold (:,j) * X (j) for all columns j in union
	 * of all paths seen during the entire rank-cncol update/downdate. For
	 * each j in path, do DeltaB -= Lnew (:,j)*DeltaB(j) 
	 * in topological order. */

#ifndef NDEBUG
	PRINT1 (("\ndo_solve, DeltaB + Lold(:,Path)*X(Path):\n")) ;
	for (i = 0 ; i < n ; i++)
	{
	    PRINT1 (("do_solve: "ID" %30.20e\n", i, Nx [i])) ;
	}
#endif

	/* Note that the downdate, if it deleted entries, would need to compute
	 * the Stack prior to doing any downdates. */

	/* find the union of all the paths in the new L */
	top = n ;	/* "top" is stack pointer, not a row or column index */
	for (ccol = 0 ; ccol < cncol ; ccol++)
	{

	    /* -------------------------------------------------------------- */
	    /* j = first row index of C (:,ccol) */
	    /* -------------------------------------------------------------- */

	    pp1 = Cp [ccol] ;
	    pp2 = (packed) ? (Cp [ccol+1]) : (pp1 + Cnz [ccol]) ;
	    if (pp2 > pp1)
	    {
		/* Column ccol of C has at least one entry. */
		j = Ci [pp1] ;
	    }
	    else
	    {
		/* Column ccol of C is empty */
		j = n-1 ;
	    }
	    PRINT1 (("\ndo_solve:      ccol= "ID"\n", ccol)) ;
	    ASSERT (j >= 0 && j < n) ;
	    len = 0 ;

	    /* -------------------------------------------------------------- */
	    /* find the new rowmark */
	    /* -------------------------------------------------------------- */

	    /* Each column of C can redefine the region of L that takes part in
	     * the update/downdate of the triangular solve Lx=b.  If
	     * i = colmark [ccol] for column C(:,ccol), then i = rowmark [j] is
	     * redefined for all columns along the path modified by C(:,ccol).
	     * If more than one column modifies any given column j of L, then
	     * the rowmark of j is determined by the colmark of the least-
	     * numbered column that affects column j.  That is, if both
	     * C(:,ccol1) and C(:,ccol2) affect column j of L, then
	     * rowmark [j] = colmark [MIN (ccol1, ccol2)].
	     *
	     * rowmark [j] is not modified if rowmark or colmark are NULL,
	     * or if colmark [ccol] is EMPTY.
	     */

	    botrow = (use_colmark) ? (colmark [ccol]) : EMPTY ;

	    /* -------------------------------------------------------------- */
	    /* traverse from j towards root, stopping if node already visited */
	    /* -------------------------------------------------------------- */

	    while (j != EMPTY && Flag [j] < mark)
	    {
		PRINT1 (("do_solve: subpath j= "ID"\n", j)) ;
		ASSERT (j >= 0 && j < n) ;
		Stack [len++] = j ;		/* place j on the stack */
		Flag [j] = mark ;		/* flag j as visited */

		/* if using colmark, mark column j with botrow */
		ASSERT (Li [Lp [j]] == j) ;	/* diagonal is always present */
		if (use_colmark)
		{
		    Li [Lp [j]] = botrow ;	/* use the space for botrow */
		}

		/* go up the tree, to the parent of j */
		j = (Lnz [j] > 1) ? (Li [Lp [j] + 1]) : EMPTY ;
	    }

	    /* -------------------------------------------------------------- */
	    /* move the path down to the bottom of the stack */
	    /* -------------------------------------------------------------- */

	    ASSERT (len <= top) ;
	    while (len > 0)
	    {
		Stack [--top] = Stack [--len] ;
	    }
	}

#ifndef NDEBUG
	/* Union of paths now in Stack [top..n-1] in topological order */
	PRINT1 (("\nTopological order:\n")) ;
	for (i = top ; i < n ; i++)
	{
	    PRINT1 (("column "ID" in full path\n", Stack [i])) ;
	}
#endif

	/* Do the forward solve for the full path part of L */
	for (m = top ; m < n ; m++)
	{
	    j = Stack [m] ;
	    ASSERT (j >= 0 && j < n) ;
	    PRINT1 (("do_solve: path j= "ID"\n", j)) ;
	    p1 = Lp [j] ;
	    lnz = Lnz [j] ;
	    p2 = p1 + lnz ;
	    xj = Nx [j] ;

	    /* copy new solution onto old one, for all cols in full path */
	    Xx [j] = xj ;
	    Nx [j] = 0. ;

	    /* DeltaB -= Lnew (j+1:botrow-1,j) * deltab(j) */
	    if (use_colmark)
	    {
		botrow = Li [p1] ;	/* get botrow */
		Li [p1] = j ;		/* restore diagonal entry */
		for (p = p1 + 1 ; p < p2 ; p++)
		{
		    i = Li [p] ;
		    if (i >= botrow) break ;
		    Nx [i] -= Lx [p] * xj ;
		}
	    }
	    else
	    {
		for (p = p1 + 1 ; p < p2 ; p++)
		{
		    Nx [Li [p]] -= Lx [p] * xj ;
		}
	    }
	}

	/* clear the Flag */
	mark = CHOLMOD(clear_flag) (Common) ;
    }

    /* ---------------------------------------------------------------------- */
    /* successful update/downdate */
    /* ---------------------------------------------------------------------- */

    Common->modfl = fl ;
    DEBUG (for (j = 0 ; j < n ; j++) ASSERT (IMPLIES (do_solve, Nx[j] == 0.))) ;
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, TRUE, Common)) ;
    DEBUG (CHOLMOD(dump_factor) (L, "output L for updown", Common)) ;
    return (TRUE) ;
}