Esempio n. 1
0
GLOBAL void UMF_fsize
(
    Int nn,
    Int Fsize [ ],
    Int Fnrows [ ],
    Int Fncols [ ],
    Int Parent [ ],
    Int Npiv [ ]
)
{
    Int j, parent, frsize, r, c ;

    for (j = 0 ; j < nn ; j++)
    {
	Fsize [j] = EMPTY ;
    }

    /* ---------------------------------------------------------------------- */
    /* find max front size for tree rooted at node j, for each front j */
    /* ---------------------------------------------------------------------- */

    DEBUG1 (("\n\n========================================FRONTS:\n")) ;
    for (j = 0 ; j < nn ; j++)
    {
	if (Npiv [j] > 0)
	{
	    /* this is a frontal matrix */
	    parent = Parent [j] ;
	    r = Fnrows [j] ;
	    c = Fncols [j] ;
	    frsize = r * c ;
	    /* avoid integer overflow */
	    if (INT_OVERFLOW (((double) r) * ((double) c)))
	    {
		/* :: frsize int overflow :: */
		frsize = Int_MAX ;
	    }
	    DEBUG1 ((""ID" : npiv "ID" size "ID" parent "ID" ",
		j, Npiv [j], frsize, parent)) ;
	    Fsize [j] = MAX (Fsize [j], frsize) ;
	    DEBUG1 (("Fsize [j = "ID"] = "ID"\n", j, Fsize [j])) ;
	    if (parent != EMPTY)
	    {
		/* find the maximum frontsize of self and children */
		ASSERT (Npiv [parent] > 0) ;
		ASSERT (parent > j) ;
		Fsize [parent] = MAX (Fsize [parent], Fsize [j]) ;
		DEBUG1 (("Fsize [parent = "ID"] = "ID"\n",
		    parent, Fsize [parent]));
	    }
	}
    }
}
Esempio n. 2
0
GLOBAL Int UMF_mem_alloc_element
(
    NumericType *Numeric,
    Int nrows,
    Int ncols,
    Int **Rows,
    Int **Cols,
    Entry **C,
    Int *size,
    Element **epout
)
{

    Element *ep ;
    Unit *p ;
    Int i ;

    ASSERT (Numeric != (NumericType *) NULL) ;
    ASSERT (Numeric->Memory != (Unit *) NULL) ;

    *size = GET_ELEMENT_SIZE (nrows, ncols) ;
    if (INT_OVERFLOW (DGET_ELEMENT_SIZE (nrows, ncols) + 1))
    {
	/* :: allocate element, int overflow :: */
	return (0) ;	/* problem is too large */
    }

    i = UMF_mem_alloc_tail_block (Numeric, *size) ;
    (*size)++ ;
    if (!i)
    {
	DEBUG0 (("alloc element failed - out of memory\n")) ;
	return (0) ;	/* out of memory */
    }
    p = Numeric->Memory + i ;

    ep = (Element *) p ;

    DEBUG2 (("alloc_element done ("ID" x "ID"): p: "ID" i "ID"\n",
	nrows, ncols, (Int) (p-Numeric->Memory), i)) ;

    /* Element data structure, in order: */
    p += UNITS (Element, 1) ;		/* (1) Element header */
    *Cols = (Int *) p ;			/* (2) col [0..ncols-1] indices */
    *Rows = *Cols + ncols ;		/* (3) row [0..nrows-1] indices */
    p += UNITS (Int, ncols + nrows) ;
    *C = (Entry *) p ;			/* (4) C [0..nrows-1, 0..ncols-1] */

    ep->nrows = nrows ;		/* initialize the header information */
    ep->ncols = ncols ;
    ep->nrowsleft = nrows ;
    ep->ncolsleft = ncols ;
    ep->cdeg = 0 ;
    ep->rdeg = 0 ;
    ep->next = EMPTY ;

    DEBUG2 (("new block size: "ID" ", GET_BLOCK_SIZE (Numeric->Memory + i))) ;
    DEBUG2 (("Element size needed "ID"\n", GET_ELEMENT_SIZE (nrows, ncols))) ;

    *epout = ep ;

    /* return the offset into Numeric->Memory */
    return (i) ;
}
GLOBAL Int UMFPACK_numeric
(
    const Int Ap [ ],
    const Int Ai [ ],
    const double Ax [ ],
#ifdef COMPLEX
    const double Az [ ],
#endif
    void *SymbolicHandle,
    void **NumericHandle,
    const double Control [UMFPACK_CONTROL],
    double User_Info [UMFPACK_INFO]
)
{

    /* ---------------------------------------------------------------------- */
    /* local variables */
    /* ---------------------------------------------------------------------- */

    double Info2 [UMFPACK_INFO], alloc_init, relpt, relpt2, droptol,
	front_alloc_init, stats [2] ;
    double *Info ;
    WorkType WorkSpace, *Work ;
    NumericType *Numeric ;
    SymbolicType *Symbolic ;
    Int n_row, n_col, n_inner, newsize, i, status, *inew, npiv, ulen, scale ;
    Unit *mnew ;

    /* ---------------------------------------------------------------------- */
    /* get the amount of time used by the process so far */
    /* ---------------------------------------------------------------------- */

    umfpack_tic (stats) ;

    /* ---------------------------------------------------------------------- */
    /* initialize and check inputs */
    /* ---------------------------------------------------------------------- */

#ifndef NDEBUG
    UMF_dump_start ( ) ;
    init_count = UMF_malloc_count ;
    DEBUGm4 (("\nUMFPACK numeric: U transpose version\n")) ;
#endif

    /* If front_alloc_init negative then allocate that size of front in
     * UMF_start_front.  If alloc_init negative, then allocate that initial
     * size of Numeric->Memory. */

    relpt = GET_CONTROL (UMFPACK_PIVOT_TOLERANCE,
	UMFPACK_DEFAULT_PIVOT_TOLERANCE) ;
    relpt2 = GET_CONTROL (UMFPACK_SYM_PIVOT_TOLERANCE,
	UMFPACK_DEFAULT_SYM_PIVOT_TOLERANCE) ;
    alloc_init = GET_CONTROL (UMFPACK_ALLOC_INIT, UMFPACK_DEFAULT_ALLOC_INIT) ;
    front_alloc_init = GET_CONTROL (UMFPACK_FRONT_ALLOC_INIT,
	UMFPACK_DEFAULT_FRONT_ALLOC_INIT) ;
    scale = GET_CONTROL (UMFPACK_SCALE, UMFPACK_DEFAULT_SCALE) ;
    droptol = GET_CONTROL (UMFPACK_DROPTOL, UMFPACK_DEFAULT_DROPTOL) ;

    relpt   = MAX (0.0, MIN (relpt,  1.0)) ;
    relpt2  = MAX (0.0, MIN (relpt2, 1.0)) ;
    droptol = MAX (0.0, droptol) ;
    front_alloc_init = MIN (1.0, front_alloc_init) ;

    if (scale != UMFPACK_SCALE_NONE && scale != UMFPACK_SCALE_MAX)
    {
	scale = UMFPACK_DEFAULT_SCALE ;
    }

    if (User_Info != (double *) NULL)
    {
	/* return Info in user's array */
	Info = User_Info ;
	/* clear the parts of Info that are set by UMFPACK_numeric */
	for (i = UMFPACK_NUMERIC_SIZE ; i <= UMFPACK_MAX_FRONT_NCOLS ; i++)
	{
	    Info [i] = EMPTY ;
	}
	for (i = UMFPACK_NUMERIC_DEFRAG ; i < UMFPACK_IR_TAKEN ; i++)
	{
	    Info [i] = EMPTY ;
	}
    }
    else
    {
	/* no Info array passed - use local one instead */
	Info = Info2 ;
	for (i = 0 ; i < UMFPACK_INFO ; i++)
	{
	    Info [i] = EMPTY ;
	}
    }

    Symbolic = (SymbolicType *) SymbolicHandle ;
    Numeric = (NumericType *) NULL ;
    if (!UMF_valid_symbolic (Symbolic))
    {
	Info [UMFPACK_STATUS] = UMFPACK_ERROR_invalid_Symbolic_object ;
	return (UMFPACK_ERROR_invalid_Symbolic_object) ;
    }

    /* compute alloc_init automatically for AMD or other symmetric ordering */
    if (/* Symbolic->ordering == UMFPACK_ORDERING_AMD */ alloc_init >= 0
        && Symbolic->amd_lunz > 0)
    {
	alloc_init = (Symbolic->nz + Symbolic->amd_lunz) / Symbolic->lunz_bound;
	alloc_init = MIN (1.0, alloc_init) ;
	alloc_init *= UMF_REALLOC_INCREASE ;
    }

    n_row = Symbolic->n_row ;
    n_col = Symbolic->n_col ;
    n_inner = MIN (n_row, n_col) ;

    /* check for integer overflow in Numeric->Memory minimum size */
    if (INT_OVERFLOW (Symbolic->dnum_mem_init_usage * sizeof (Unit)))
    {
	/* :: int overflow, initial Numeric->Memory size :: */
	/* There's no hope to allocate a Numeric object big enough simply to
	 * hold the initial matrix, so return an out-of-memory condition */
	DEBUGm4 (("out of memory: numeric int overflow\n")) ;
	Info [UMFPACK_STATUS] = UMFPACK_ERROR_out_of_memory ;
	return (UMFPACK_ERROR_out_of_memory) ;
    }

    Info [UMFPACK_STATUS] = UMFPACK_OK ;
    Info [UMFPACK_NROW] = n_row ;
    Info [UMFPACK_NCOL] = n_col ;
    Info [UMFPACK_SIZE_OF_UNIT] = (double) (sizeof (Unit)) ;

    if (!Ap || !Ai || !Ax || !NumericHandle)
    {
	Info [UMFPACK_STATUS] = UMFPACK_ERROR_argument_missing ;
	return (UMFPACK_ERROR_argument_missing) ;
    }

    Info [UMFPACK_NZ] = Ap [n_col] ;
    *NumericHandle = (void *) NULL ;

    /* ---------------------------------------------------------------------- */
    /* allocate the Work object */
    /* ---------------------------------------------------------------------- */

    /* (1) calls UMF_malloc 15 or 17 times, to obtain temporary workspace of
     * size c+1 Entry's and 2*(n_row+1) + 3*(n_col+1) + (n_col+n_inner+1) +
     * (nn+1) + * 3*(c+1) + 2*(r+1) + max(r,c) + (nfr+1) integers plus 2*nn
     * more integers if diagonal pivoting is to be done.  r is the maximum
     * number of rows in any frontal matrix, c is the maximum number of columns
     * in any frontal matrix, n_inner is min (n_row,n_col), nn is
     * max (n_row,n_col), and nfr is the number of frontal matrices.  For a
     * square matrix, this is c+1 Entry's and about 8n + 3c + 2r + max(r,c) +
     * nfr integers, plus 2n more for diagonal pivoting.
     */

    Work = &WorkSpace ;
    Work->n_row = n_row ;
    Work->n_col = n_col ;
    Work->nfr = Symbolic->nfr ;
    Work->nb = Symbolic->nb ;
    Work->n1 = Symbolic->n1 ;

    if (!work_alloc (Work, Symbolic))
    {
	DEBUGm4 (("out of memory: numeric work\n")) ;
	Info [UMFPACK_STATUS] = UMFPACK_ERROR_out_of_memory ;
	error (&Numeric, Work) ;
	return (UMFPACK_ERROR_out_of_memory) ;
    }
    ASSERT (UMF_malloc_count == init_count + 16 + 2*Symbolic->prefer_diagonal) ;

    /* ---------------------------------------------------------------------- */
    /* allocate Numeric object */
    /* ---------------------------------------------------------------------- */

    /* (2) calls UMF_malloc 10 or 11 times, for a total space of
     * sizeof (NumericType) bytes, 4*(n_row+1) + 4*(n_row+1) integers, and
     * (n_inner+1) Entry's, plus n_row Entry's if row scaling is to be done.
     * sizeof (NumericType) is a small constant.  Next, it calls UMF_malloc
     * once, for the variable-sized part of the Numeric object
     * (Numeric->Memory).  The size of this object is the larger of
     * (Control [UMFPACK_ALLOC_INIT]) *  (the approximate upper bound computed
     * by UMFPACK_symbolic), and the minimum required to start the numerical
     * factorization.  * This request is reduced if it fails.
     */

    if (!numeric_alloc (&Numeric, Symbolic, alloc_init, scale))
    {
	DEBUGm4 (("out of memory: initial numeric\n")) ;
	Info [UMFPACK_STATUS] = UMFPACK_ERROR_out_of_memory ;
	error (&Numeric, Work) ;
	return (UMFPACK_ERROR_out_of_memory) ;
    }
    DEBUG0 (("malloc: init_count "ID" UMF_malloc_count "ID"\n",
	init_count, UMF_malloc_count)) ;
    ASSERT (UMF_malloc_count == init_count
	+ (16 + 2*Symbolic->prefer_diagonal)
	+ (11 + (scale != UMFPACK_SCALE_NONE))) ;

    /* set control parameters */
    Numeric->relpt = relpt ;
    Numeric->relpt2 = relpt2 ;
    Numeric->droptol = droptol ;
    Numeric->alloc_init = alloc_init ;
    Numeric->front_alloc_init = front_alloc_init ;
    Numeric->scale = scale ;

    DEBUG0 (("umf relpt %g %g init %g %g inc %g red %g\n",
	relpt, relpt2, alloc_init, front_alloc_init,
	UMF_REALLOC_INCREASE, UMF_REALLOC_REDUCTION)) ;

    /* ---------------------------------------------------------------------- */
    /* scale and factorize */
    /* ---------------------------------------------------------------------- */

    /* (3) During numerical factorization (inside UMF_kernel), the variable-size
     * block of memory is increased in size via a call to UMF_realloc if it is
     * found to be too small.  During factorization, this block holds the
     * pattern and values of L and U at the top end, and the elements
     * (contibution blocks) and the current frontal matrix (Work->F*) at the
     * bottom end.  The peak size of the variable-sized object is estimated in
     * UMFPACK_*symbolic (Info [UMFPACK_VARIABLE_PEAK_ESTIMATE]), although this
     * upper bound can be very loose.  The size of the Symbolic object
     * (which is currently allocated) is in Info [UMFPACK_SYMBOLIC_SIZE], and
     * is between 2*n and 13*n integers.
     */

    DEBUG0 (("Calling umf_kernel\n")) ;
    status = UMF_kernel (Ap, Ai, Ax,
#ifdef COMPLEX
	Az,
#endif
	Numeric, Work, Symbolic) ;

    Info [UMFPACK_STATUS] = status ;
    if (status < UMFPACK_OK)
    {
	/* out of memory, or pattern has changed */
	error (&Numeric, Work) ;
	return (status) ;
    }

    Info [UMFPACK_FORCED_UPDATES] = Work->nforced ;
    Info [UMFPACK_VARIABLE_INIT] = Numeric->init_usage ;
    if (Symbolic->prefer_diagonal)
    {
	Info [UMFPACK_NOFF_DIAG] = Work->noff_diagonal ;
    }

    DEBUG0 (("malloc: init_count "ID" UMF_malloc_count "ID"\n",
	init_count, UMF_malloc_count)) ;

    npiv = Numeric->npiv ;	/* = n_inner for nonsingular matrices */
    ulen = Numeric->ulen ;	/* = 0 for square nonsingular matrices */

    /* ---------------------------------------------------------------------- */
    /* free Work object */
    /* ---------------------------------------------------------------------- */

    /* (4) After numerical factorization all of the objects allocated in step
     * (1) are freed via UMF_free, except that one object of size n_col+1 is
     * kept if there are off-diagonal nonzeros in the last pivot row (can only
     * occur for singular or rectangular matrices).  This is Work->Upattern,
     * which is transfered to Numeric->Upattern if ulen > 0.
     */

    DEBUG0 (("malloc: init_count "ID" UMF_malloc_count "ID"\n",
	init_count, UMF_malloc_count)) ;

    free_work (Work) ;

    DEBUG0 (("malloc: init_count "ID" UMF_malloc_count "ID"\n",
	init_count, UMF_malloc_count)) ;
    DEBUG0 (("Numeric->ulen: "ID" scale: "ID"\n", ulen, scale)) ;
    ASSERT (UMF_malloc_count == init_count + (ulen > 0) +
	(11 + (scale != UMFPACK_SCALE_NONE))) ;

    /* ---------------------------------------------------------------------- */
    /* reduce Lpos, Lilen, Lip, Upos, Uilen and Uip to size npiv+1 */
    /* ---------------------------------------------------------------------- */

    /* (5) Six components of the Numeric object are reduced in size if the
     * matrix is singular or rectangular.   The original size is 3*(n_row+1) +
     * 3*(n_col+1) integers.  The new size is 6*(npiv+1) integers.  For
     * square non-singular matrices, these two sizes are the same.
     */

    if (npiv < n_row)
    {
	/* reduce Lpos, Uilen, and Uip from size n_row+1 to size npiv */
	inew = (Int *) UMF_realloc (Numeric->Lpos, npiv+1, sizeof (Int)) ;
	if (inew)
	{
	    Numeric->Lpos = inew ;
	}
	inew = (Int *) UMF_realloc (Numeric->Uilen, npiv+1, sizeof (Int)) ;
	if (inew)
	{
	    Numeric->Uilen = inew ;
	}
	inew = (Int *) UMF_realloc (Numeric->Uip, npiv+1, sizeof (Int)) ;
	if (inew)
	{
	    Numeric->Uip = inew ;
	}
    }

    if (npiv < n_col)
    {
	/* reduce Upos, Lilen, and Lip from size n_col+1 to size npiv */
	inew = (Int *) UMF_realloc (Numeric->Upos, npiv+1, sizeof (Int)) ;
	if (inew)
	{
	    Numeric->Upos = inew ;
	}
	inew = (Int *) UMF_realloc (Numeric->Lilen, npiv+1, sizeof (Int)) ;
	if (inew)
	{
	    Numeric->Lilen = inew ;
	}
	inew = (Int *) UMF_realloc (Numeric->Lip, npiv+1, sizeof (Int)) ;
	if (inew)
	{
	    Numeric->Lip = inew ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* reduce Numeric->Upattern from size n_col+1 to size ulen+1 */
    /* ---------------------------------------------------------------------- */

    /* (6) The size of Numeric->Upattern (formerly Work->Upattern) is reduced
     * from size n_col+1 to size ulen + 1.  If ulen is zero, the object does
     * not exist. */

    DEBUG4 (("ulen: "ID" Upattern "ID"\n", ulen, (Int) Numeric->Upattern)) ;
    ASSERT (IMPLIES (ulen == 0, Numeric->Upattern == (Int *) NULL)) ;
    if (ulen > 0 && ulen < n_col)
    {
	inew = (Int *) UMF_realloc (Numeric->Upattern, ulen+1, sizeof (Int)) ;
	if (inew)
	{
	    Numeric->Upattern = inew ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* reduce Numeric->Memory to hold just the LU factors at the head */
    /* ---------------------------------------------------------------------- */

    /* (7) The variable-sized block (Numeric->Memory) is reduced to hold just L
     * and U, via a call to UMF_realloc, since the frontal matrices are no
     * longer needed.
     */

    newsize = Numeric->ihead ;
    if (newsize < Numeric->size)
    {
	mnew = (Unit *) UMF_realloc (Numeric->Memory, newsize, sizeof (Unit)) ;
	if (mnew)
	{
	    /* realloc succeeded (how can it fail since the size is reduced?) */
	    Numeric->Memory = mnew ;
	    Numeric->size = newsize ;
	}
    }
    Numeric->ihead = Numeric->size ;
    Numeric->itail = Numeric->ihead ;
    Numeric->tail_usage = 0 ;
    Numeric->ibig = EMPTY ;
    /* UMF_mem_alloc_tail_block can no longer be called (no tail marker) */

    /* ---------------------------------------------------------------------- */
    /* report the results and return the Numeric object */
    /* ---------------------------------------------------------------------- */

    UMF_set_stats (
	Info,
	Symbolic,
	(double) Numeric->max_usage,	/* actual peak Numeric->Memory */
	(double) Numeric->size,		/* actual final Numeric->Memory */
	Numeric->flops,			/* actual "true flops" */
	(double) Numeric->lnz + n_inner,		/* actual nz in L */
	(double) Numeric->unz + Numeric->nnzpiv,	/* actual nz in U */
	(double) Numeric->maxfrsize,	/* actual largest front size */
	(double) ulen,			/* actual Numeric->Upattern size */
	(double) npiv,			/* actual # pivots found */
	(double) Numeric->maxnrows,	/* actual largest #rows in front */
	(double) Numeric->maxncols,	/* actual largest #cols in front */
	scale != UMFPACK_SCALE_NONE,
	Symbolic->prefer_diagonal,
	ACTUAL) ;

    Info [UMFPACK_ALLOC_INIT_USED] = Numeric->alloc_init ;
    Info [UMFPACK_NUMERIC_DEFRAG] = Numeric->ngarbage ;
    Info [UMFPACK_NUMERIC_REALLOC] = Numeric->nrealloc ;
    Info [UMFPACK_NUMERIC_COSTLY_REALLOC] = Numeric->ncostly ;
    Info [UMFPACK_COMPRESSED_PATTERN] = Numeric->isize ;
    Info [UMFPACK_LU_ENTRIES] = Numeric->nLentries + Numeric->nUentries +
	    Numeric->npiv ;
    Info [UMFPACK_UDIAG_NZ] = Numeric->nnzpiv ;
    Info [UMFPACK_RSMIN] = Numeric->rsmin ;
    Info [UMFPACK_RSMAX] = Numeric->rsmax ;
    Info [UMFPACK_WAS_SCALED] = Numeric->scale ;

    /* nz in L and U with no dropping of small entries */
    Info [UMFPACK_ALL_LNZ] = Numeric->all_lnz + n_inner ;
    Info [UMFPACK_ALL_UNZ] = Numeric->all_unz + Numeric->nnzpiv ;
    Info [UMFPACK_NZDROPPED] =
	  (Numeric->all_lnz - Numeric->lnz)
	+ (Numeric->all_unz - Numeric->unz) ;

    /* estimate of the reciprocal of the condition number. */
    if (SCALAR_IS_ZERO (Numeric->min_udiag)
     || SCALAR_IS_ZERO (Numeric->max_udiag)
     ||	SCALAR_IS_NAN (Numeric->min_udiag)
     ||	SCALAR_IS_NAN (Numeric->max_udiag))
    {
	/* rcond is zero if there is any zero or NaN on the diagonal */
	Numeric->rcond = 0.0 ;
    }
    else
    {
	/* estimate of the recipricol of the condition number. */
	/* This is NaN if diagonal is zero-free, but has one or more NaN's. */
	Numeric->rcond = Numeric->min_udiag / Numeric->max_udiag ;
    }
    Info [UMFPACK_UMIN]  = Numeric->min_udiag ;
    Info [UMFPACK_UMAX]  = Numeric->max_udiag ;
    Info [UMFPACK_RCOND] = Numeric->rcond ;

    if (Numeric->nnzpiv < n_inner
    || SCALAR_IS_ZERO (Numeric->rcond) || SCALAR_IS_NAN (Numeric->rcond))
    {
	/* there are zeros and/or NaN's on the diagonal of U */
	DEBUG0 (("Warning, matrix is singular in umfpack_numeric\n")) ;
	DEBUG0 (("nnzpiv "ID" n_inner "ID" rcond %g\n", Numeric->nnzpiv,
	    n_inner, Numeric->rcond)) ;
	status = UMFPACK_WARNING_singular_matrix ;
	Info [UMFPACK_STATUS] = status ;
    }

    Numeric->valid = NUMERIC_VALID ;
    *NumericHandle = (void *) Numeric ;

    /* Numeric has 11 to 13 objects */
    ASSERT (UMF_malloc_count == init_count + 11 +
	+ (ulen > 0)			    /* Numeric->Upattern */
	+ (scale != UMFPACK_SCALE_NONE)) ;  /* Numeric->Rs */

    /* ---------------------------------------------------------------------- */
    /* get the time used by UMFPACK_numeric */
    /* ---------------------------------------------------------------------- */

    umfpack_toc (stats) ;
    Info [UMFPACK_NUMERIC_WALLTIME] = stats [0] ;
    Info [UMFPACK_NUMERIC_TIME] = stats [1] ;

    /* return UMFPACK_OK or UMFPACK_WARNING_singular_matrix */
    return (status) ;

}
Esempio n. 4
0
size_t KLU_kernel_factor            /* 0 if failure, size of LU if OK */
(
    /* inputs, not modified */
    Int n,          /* A is n-by-n. n must be > 0. */
    Int Ap [ ],     /* size n+1, column pointers for A */
    Int Ai [ ],     /* size nz = Ap [n], row indices for A */
    Entry Ax [ ],   /* size nz, values of A */
    Int Q [ ],      /* size n, optional column permutation */
    double Lsize,   /* estimate of number of nonzeros in L */

    /* outputs, not defined on input */
    Unit **p_LU,        /* row indices and values of L and U */
    Entry Udiag [ ],    /* size n, diagonal of U */
    Int Llen [ ],       /* size n, column length of L */
    Int Ulen [ ],       /* size n, column length of U */
    Int Lip [ ],        /* size n, column pointers for L */
    Int Uip [ ],        /* size n, column pointers for U */
    Int P [ ],          /* row permutation, size n */
    Int *lnz,           /* size of L */
    Int *unz,           /* size of U */

    /* workspace, undefined on input */
    Entry *X,       /* size n double's, zero on output */
    Int *Work,      /* size 5n Int's */

    /* inputs, not modified on output */
    Int k1,             /* the block of A is from k1 to k2-1 */
    Int PSinv [ ],      /* inverse of P from symbolic factorization */
    double Rs [ ],      /* scale factors for A */

    /* inputs, modified on output */
    Int Offp [ ],   /* off-diagonal matrix (modified by this routine) */
    Int Offi [ ],
    Entry Offx [ ],
    /* --------------- */
    KLU_common *Common
)
{
    double maxlnz, dunits ;
    Unit *LU ;
    Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
    Int lsize, usize, anz, ok ;
    size_t lusize ;
    ASSERT (Common != NULL) ;

    /* ---------------------------------------------------------------------- */
    /* get control parameters, or use defaults */
    /* ---------------------------------------------------------------------- */

    n = MAX (1, n) ;
    anz = Ap [n+k1] - Ap [k1] ;

    if (Lsize <= 0)
    {
        Lsize = -Lsize ;
        Lsize = MAX (Lsize, 1.0) ;
        lsize = Lsize * anz + n ;
    }
    else
    {
        lsize = Lsize ;
    }

    usize = lsize ;

    lsize  = MAX (n+1, lsize) ;
    usize  = MAX (n+1, usize) ;

    maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
    maxlnz = MIN (maxlnz, ((double) INT_MAX)) ;
    lsize  = MIN (maxlnz, lsize) ;
    usize  = MIN (maxlnz, usize) ;

    PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
        n, anz, k1, lsize, usize, maxlnz)) ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace and outputs */
    /* ---------------------------------------------------------------------- */

    /* return arguments are not yet assigned */
    *p_LU = (Unit *) NULL ;

    /* these computations are safe from size_t overflow */
    W = Work ;
    Pinv = (Int *) W ;      W += n ;
    Stack = (Int *) W ;     W += n ;
    Flag = (Int *) W ;      W += n ;
    Lpend = (Int *) W ;     W += n ;
    Ap_pos = (Int *) W ;    W += n ;

    dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
             DUNITS (Int, usize) + DUNITS (Entry, usize) ;
    lusize = (size_t) dunits ;
    ok = !INT_OVERFLOW (dunits) ; 
    LU = ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL ;
    if (LU == NULL)
    {
        /* out of memory, or problem too large */
        Common->status = KLU_OUT_OF_MEMORY ;
        lusize = 0 ;
        return (lusize) ;
    }

    /* ---------------------------------------------------------------------- */
    /* factorize */
    /* ---------------------------------------------------------------------- */

    /* with pruning, and non-recursive depth-first-search */
    lusize = KLU_kernel (n, Ap, Ai, Ax, Q, lusize,
            Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
            X, Stack, Flag, Ap_pos, Lpend,
            k1, PSinv, Rs, Offp, Offi, Offx, Common) ;

    /* ---------------------------------------------------------------------- */
    /* return LU factors, or return nothing if an error occurred */
    /* ---------------------------------------------------------------------- */

    if (Common->status < KLU_OK)
    {
        LU = KLU_free (LU, lusize, sizeof (Unit), Common) ;
        lusize = 0 ;
    }
    *p_LU = LU ;
    PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ;
    return (lusize) ;
}
Esempio n. 5
0
GLOBAL Int UMF_grow_front
(
    NumericType *Numeric,
    Int fnr2,		/* desired size is fnr2-by-fnc2 */
    Int fnc2,
    WorkType *Work,
    Int do_what		/* -1: UMF_start_front
			 * 0:  UMF_init_front, do not recompute Fcpos
			 * 1:  UMF_extend_front
			 * 2:  UMF_init_front, recompute Fcpos */
)
{
    /* ---------------------------------------------------------------------- */
    /* local variables */
    /* ---------------------------------------------------------------------- */

    double s ;
    Entry *Fcold, *Fcnew ;
    Int j, i, col, *Fcpos, *Fcols, fnrows_max, fncols_max, fnr_curr, nb,
	fnrows_new, fncols_new, fnr_min, fnc_min, minsize,
	newsize, fnrows, fncols, *E, eloc ;

    /* ---------------------------------------------------------------------- */
    /* get parameters */
    /* ---------------------------------------------------------------------- */

#ifndef NDEBUG
    if (do_what != -1) UMF_debug++ ;
    DEBUG0 (("\n\n====================GROW FRONT: do_what: "ID"\n", do_what)) ;
    if (do_what != -1) UMF_debug-- ;
    ASSERT (Work->do_grow) ;
    ASSERT (Work->fnpiv == 0) ;
#endif

    Fcols = Work->Fcols ;
    Fcpos = Work->Fcpos ;
    E = Work->E ;

    /* ---------------------------------------------------------------------- */
    /* The current front is too small, find the new size */
    /* ---------------------------------------------------------------------- */

    /* maximum size of frontal matrix for this chain */
    nb = Work->nb ;
    fnrows_max = Work->fnrows_max + nb ;
    fncols_max = Work->fncols_max + nb ;
    ASSERT (fnrows_max >= 0 && (fnrows_max % 2) == 1) ;
    DEBUG0 (("Max     size: "ID"-by-"ID" (incl. "ID" pivot block\n",
	fnrows_max, fncols_max, nb)) ;

    /* current dimensions of frontal matrix: fnr-by-fnc */
    DEBUG0 (("Current : "ID"-by-"ID" (excl "ID" pivot blocks)\n",
		Work->fnr_curr, Work->fnc_curr, nb)) ;
    ASSERT (Work->fnr_curr >= 0) ;
    ASSERT ((Work->fnr_curr % 2 == 1) || Work->fnr_curr == 0) ;

    /* required dimensions of frontal matrix: fnr_min-by-fnc_min */
    fnrows_new = Work->fnrows_new + 1 ;
    fncols_new = Work->fncols_new + 1 ;
    ASSERT (fnrows_new >= 0) ;
    if (fnrows_new % 2 == 0) fnrows_new++ ;
    fnrows_new += nb ;
    fncols_new += nb ;
    fnr_min = MIN (fnrows_new, fnrows_max) ;
    fnc_min = MIN (fncols_new, fncols_max) ;
    minsize = fnr_min * fnc_min ;
    if (INT_OVERFLOW ((double) fnr_min * (double) fnc_min * sizeof (Entry)))
    {
	/* :: the minimum front size is bigger than the integer maximum :: */
	return (FALSE) ;
    }
    ASSERT (fnr_min >= 0) ;
    ASSERT (fnr_min % 2 == 1) ;

    DEBUG0 (("Min     : "ID"-by-"ID"\n", fnr_min, fnc_min)) ;

    /* grow the front to fnr2-by-fnc2, but no bigger than the maximum,
     * and no smaller than the minumum. */
    DEBUG0 (("Desired : ("ID"+"ID")-by-("ID"+"ID")\n", fnr2, nb, fnc2, nb)) ;
    fnr2 += nb ;
    fnc2 += nb ;
    ASSERT (fnr2 >= 0) ;
    if (fnr2 % 2 == 0) fnr2++ ;
    fnr2 = MAX (fnr2, fnr_min) ;
    fnc2 = MAX (fnc2, fnc_min) ;
    fnr2 = MIN (fnr2, fnrows_max) ;
    fnc2 = MIN (fnc2, fncols_max) ;
    DEBUG0 (("Try     : "ID"-by-"ID"\n", fnr2, fnc2)) ;
    ASSERT (fnr2 >= 0) ;
    ASSERT (fnr2 % 2 == 1) ;

    s = ((double) fnr2) * ((double) fnc2) ;
    if (INT_OVERFLOW (s * sizeof (Entry)))
    {
	/* :: frontal matrix size int overflow :: */
	/* the desired front size is bigger than the integer maximum */
	/* compute a such that a*a*s < Int_MAX / sizeof (Entry) */
	double a = 0.9 * sqrt ((Int_MAX / sizeof (Entry)) / s) ;
	fnr2 = MAX (fnr_min, a * fnr2) ;
	fnc2 = MAX (fnc_min, a * fnc2) ;
	/* the new frontal size is a*r*a*c = a*a*s */
	newsize = fnr2 * fnc2 ;
	ASSERT (fnr2 >= 0) ;
	if (fnr2 % 2 == 0) fnr2++ ;
	fnc2 = newsize / fnr2 ;
    }

    fnr2 = MAX (fnr2, fnr_min) ;
    fnc2 = MAX (fnc2, fnc_min) ;
    newsize = fnr2 * fnc2 ;

    ASSERT (fnr2 >= 0) ;
    ASSERT (fnr2 % 2 == 1) ;
    ASSERT (fnr2 >= fnr_min) ;
    ASSERT (fnc2 >= fnc_min) ;
    ASSERT (newsize >= minsize) ;

    /* ---------------------------------------------------------------------- */
    /* free the current front if it is empty of any numerical values */
    /* ---------------------------------------------------------------------- */

    if (E [0] && do_what != 1)
    {
	/* free the current front, if it exists and has nothing in it */
	DEBUG0 (("Freeing empty front\n")) ;
	UMF_mem_free_tail_block (Numeric, E [0]) ;
	E [0] = 0 ;
	Work->Flublock = (Entry *) NULL ;
	Work->Flblock  = (Entry *) NULL ;
	Work->Fublock  = (Entry *) NULL ;
	Work->Fcblock  = (Entry *) NULL ;
    }

    /* ---------------------------------------------------------------------- */
    /* allocate the new front, doing garbage collection if necessary */
    /* ---------------------------------------------------------------------- */

#ifndef NDEBUG
    UMF_allocfail = FALSE ;
    if (UMF_gprob > 0)  /* a double relop, but ignore NaN case */
    {
	double rrr = ((double) (rand ( ))) / (((double) RAND_MAX) + 1) ;
	DEBUG1 (("Check random %e %e\n", rrr, UMF_gprob)) ;
	UMF_allocfail = rrr < UMF_gprob ;
	if (UMF_allocfail) DEBUGm2 (("Random garbage collection (grow)\n")) ;
    }
#endif

    DEBUG0 (("Attempt size: "ID"-by-"ID"\n", fnr2, fnc2)) ;
    eloc = UMF_mem_alloc_tail_block (Numeric, UNITS (Entry, newsize)) ;

    if (!eloc)
    {
	/* Do garbage collection, realloc, and try again. Compact the current
	 * contribution block in the front to fnrows-by-fncols.  Note that
	 * there are no pivot rows/columns in current front.  Do not recompute
	 * Fcpos in UMF_garbage_collection. */
	DEBUGm3 (("get_memory from umf_grow_front\n")) ;
	if (!UMF_get_memory (Numeric, Work, 1 + UNITS (Entry, newsize),
	    Work->fnrows, Work->fncols, FALSE))
	{
	    /* :: out of memory in umf_grow_front :: */
	    return (FALSE) ;	/* out of memory */
	}
	DEBUG0 (("Attempt size: "ID"-by-"ID" again\n", fnr2, fnc2)) ;
	eloc = UMF_mem_alloc_tail_block (Numeric, UNITS (Entry, newsize)) ;
    }

    /* try again with something smaller */
    while ((fnr2 != fnr_min || fnc2 != fnc_min) && !eloc)
    {
	fnr2 = MIN (fnr2 - 2, fnr2 * UMF_REALLOC_REDUCTION) ;
	fnc2 = MIN (fnc2 - 2, fnc2 * UMF_REALLOC_REDUCTION) ;
	ASSERT (fnr_min >= 0) ;
	ASSERT (fnr_min % 2 == 1) ;
	fnr2 = MAX (fnr_min, fnr2) ;
	fnc2 = MAX (fnc_min, fnc2) ;
	ASSERT (fnr2 >= 0) ;
	if (fnr2 % 2 == 0) fnr2++ ;
	newsize = fnr2 * fnc2 ;
	DEBUGm3 (("Attempt smaller size: "ID"-by-"ID" minsize "ID"-by-"ID"\n",
	    fnr2, fnc2, fnr_min, fnc_min)) ;
	eloc = UMF_mem_alloc_tail_block (Numeric, UNITS (Entry, newsize)) ;
    }

    /* try again with the smallest possible size */
    if (!eloc)
    {
	fnr2 = fnr_min ;
	fnc2 = fnc_min ;
	newsize = minsize ;
	DEBUG0 (("Attempt minsize: "ID"-by-"ID"\n", fnr2, fnc2)) ;
	eloc = UMF_mem_alloc_tail_block (Numeric, UNITS (Entry, newsize)) ;
    }

    if (!eloc)
    {
	/* out of memory */
	return (FALSE) ;
    }

    ASSERT (fnr2 >= 0) ;
    ASSERT (fnr2 % 2 == 1) ;
    ASSERT (fnr2 >= fnr_min && fnc2 >= fnc_min) ;

    /* ---------------------------------------------------------------------- */
    /* copy the old frontal matrix into the new one */
    /* ---------------------------------------------------------------------- */

    /* old contribution block (if any) */
    fnr_curr = Work->fnr_curr ;	    /* garbage collection can change fn*_curr */
    ASSERT (fnr_curr >= 0) ;
    ASSERT ((fnr_curr % 2 == 1) || fnr_curr == 0) ;
    fnrows = Work->fnrows ;
    fncols = Work->fncols ;
    Fcold = Work->Fcblock ;

    /* remove nb from the sizes */
    fnr2 -= nb ;
    fnc2 -= nb ;

    /* new frontal matrix */
    Work->Flublock = (Entry *) (Numeric->Memory + eloc) ;
    Work->Flblock  = Work->Flublock + nb * nb ;
    Work->Fublock  = Work->Flblock  + nb * fnr2 ;
    Work->Fcblock  = Work->Fublock  + nb * fnc2 ;
    Fcnew = Work->Fcblock ;

    if (E [0])
    {
	/* copy the old contribution block into the new one */
	for (j = 0 ; j < fncols ; j++)
	{
	    col = Fcols [j] ;
	    DEBUG1 (("copy col "ID" \n",col)) ;
	    ASSERT (col >= 0 && col < Work->n_col) ;
	    for (i = 0 ; i < fnrows ; i++)
	    {
		Fcnew [i] = Fcold [i] ;
	    }
	    Fcnew += fnr2 ;
	    Fcold += fnr_curr ;
	    DEBUG1 (("new offset col "ID" "ID"\n",col, j * fnr2)) ;
	    Fcpos [col] = j * fnr2 ;
	}
    }
    else if (do_what == 2)
    {
	/* just find the new column offsets */
	for (j = 0 ; j < fncols ; j++)
	{
	    col = Fcols [j] ;
	    DEBUG1 (("new offset col "ID" "ID"\n",col, j * fnr2)) ;
	    Fcpos [col] = j * fnr2 ;
	}
    }

    /* free the old frontal matrix */
    UMF_mem_free_tail_block (Numeric, E [0]) ;

    /* ---------------------------------------------------------------------- */
    /* new frontal matrix size */
    /* ---------------------------------------------------------------------- */

    E [0] = eloc ;
    Work->fnr_curr = fnr2 ;	    /* C block is fnr2-by-fnc2 */
    Work->fnc_curr = fnc2 ;
    Work->fcurr_size = newsize ;    /* including LU, L, U, and C blocks */
    Work->do_grow = FALSE ;	    /* the front has just been grown */

    ASSERT (Work->fnr_curr >= 0) ;
    ASSERT (Work->fnr_curr % 2 == 1) ;
    DEBUG0 (("Newly grown front: "ID"+"ID" by "ID"+"ID"\n", Work->fnr_curr,
	nb, Work->fnc_curr, nb)) ;
    return (TRUE) ;
}
Esempio n. 6
0
GLOBAL Int UMF_start_front    /* returns TRUE if successful, FALSE otherwise */
(
    Int chain,
    NumericType *Numeric,
    WorkType *Work,
    SymbolicType *Symbolic
)
{
    Int fnrows_max, fncols_max, fnr2, fnc2, fsize, fcurr_size, maxfrsize,
	overflow, nb, f, cdeg ;
    double maxbytes ;

    nb = Symbolic->nb ;
    fnrows_max = Symbolic->Chain_maxrows [chain] ;
    fncols_max = Symbolic->Chain_maxcols [chain] ;

    DEBUGm2 (("Start Front for chain "ID".  fnrows_max "ID" fncols_max "ID"\n",
	chain, fnrows_max, fncols_max)) ;

    Work->fnrows_max = fnrows_max ;
    Work->fncols_max = fncols_max ;
    Work->any_skip = FALSE ;

    maxbytes = sizeof (Entry) *
	(double) (fnrows_max + nb) * (double) (fncols_max + nb) ;
    fcurr_size = Work->fcurr_size ;

    if (Symbolic->prefer_diagonal)
    {
	/* Get a rough upper bound on the degree of the first pivot column in
	 * this front.  Note that Col_degree is not maintained if diagonal
	 * pivoting is preferred.  For most matrices, the first pivot column
	 * of the first frontal matrix of a new chain has only one tuple in
	 * it anyway, so this bound is exact in that case. */
	Int col, tpi, e, *E, *Col_tuples, *Col_tlen, *Cols ;
	Tuple *tp, *tpend ;
	Unit *Memory, *p ;
	Element *ep ;
	E = Work->E ;
	Memory = Numeric->Memory ;
	Col_tuples = Numeric->Lip ;
	Col_tlen = Numeric->Lilen ;
	col = Work->nextcand ;
	tpi = Col_tuples [col] ;
	tp = (Tuple *) Memory + tpi ;
	tpend = tp + Col_tlen [col] ;
	cdeg = 0 ;
	DEBUGm3 (("\n=============== start front: col "ID" tlen "ID"\n",
		col, Col_tlen [col])) ;
	for ( ; tp < tpend ; tp++)
	{
	    DEBUG1 (("Tuple ("ID","ID")\n", tp->e, tp->f)) ;
	    e = tp->e ;
	    if (!E [e]) continue ;
	    f = tp->f ;
	    p = Memory + E [e] ;
	    ep = (Element *) p ;
	    p += UNITS (Element, 1) ;
	    Cols = (Int *) p ;
	    if (Cols [f] == EMPTY) continue ;
	    DEBUG1 (("  nrowsleft "ID"\n", ep->nrowsleft)) ;
	    cdeg += ep->nrowsleft ;
	}
#ifndef NDEBUG
	DEBUGm3 (("start front cdeg: "ID" col "ID"\n", cdeg, col)) ;
	UMF_dump_rowcol (1, Numeric, Work, col, FALSE) ;
#endif

	/* cdeg is now the rough upper bound on the degree of the next pivot
	 * column. */

	/* If AMD was called, we know the maximum number of nonzeros in any
	 * column of L.  Use this as an upper bound for cdeg, but add 2 to
	 * account for a small amount of off-diagonal pivoting. */
	if (Symbolic->amd_dmax > 0)
	{
	    cdeg = MIN (cdeg, Symbolic->amd_dmax) ;
	}

	/* Increase it to account for larger columns later on.
	 * Also ensure that it's larger than zero. */
	cdeg += 2 ;

	/* cdeg cannot be larger than fnrows_max */
	cdeg = MIN (cdeg, fnrows_max) ;

    }
    else
    {
	/* don't do the above cdeg computation */
	cdeg = 0 ;
    }

    DEBUGm2 (("fnrows max "ID" fncols_max "ID"\n", fnrows_max, fncols_max)) ;

    /* the current frontal matrix is empty */
    ASSERT (Work->fnrows == 0 && Work->fncols == 0 && Work->fnpiv == 0) ;

    /* maximum row dimension is always odd, to avoid bad cache effects */
    ASSERT (fnrows_max >= 0) ;
    ASSERT (fnrows_max % 2 == 1) ;

    /* ----------------------------------------------------------------------
     * allocate working array for current frontal matrix:
     * minimum size: 1-by-1
     * maximum size: fnrows_max-by-fncols_max
     * desired size:
     *
     *   if Numeric->front_alloc_init >= 0:
     *
     *	    for unsymmetric matrices:
     *	    Numeric->front_alloc_init * (fnrows_max-by-fncols_max)
     *
     *	    for symmetric matrices (diagonal pivoting preference, actually):
     *	    Numeric->front_alloc_init * (fnrows_max-by-fncols_max), or
     *	    cdeg*cdeg, whichever is smaller.
     *
     *   if Numeric->front_alloc_init < 0:
     *	    allocate a front of size -Numeric->front_alloc_init.
     *
     * Allocate the whole thing if it's small (less than 2*nb^2).  Make sure the
     * leading dimension of the frontal matrix is odd.
     *
     * Also allocate the nb-by-nb LU block, the dr-by-nb L block, and the
     * nb-by-dc U block.
     * ---------------------------------------------------------------------- */

    /* get the maximum front size, avoiding integer overflow */
    overflow = INT_OVERFLOW (maxbytes) ;
    if (overflow)
    {
	/* :: int overflow, max front size :: */
	maxfrsize = Int_MAX / sizeof (Entry) ;
    }
    else
    {
	maxfrsize = (fnrows_max + nb) * (fncols_max + nb) ;
    }
    ASSERT (!INT_OVERFLOW ((double) maxfrsize * sizeof (Entry))) ;

    if (Numeric->front_alloc_init < 0)
    {
	/* allocate a front of -Numeric->front_alloc_init entries */
	fsize = -Numeric->front_alloc_init ;
	fsize = MAX (1, fsize) ;
    }
    else
    {
	if (INT_OVERFLOW (Numeric->front_alloc_init * maxbytes))
	{
	    /* :: int overflow, requested front size :: */
	    fsize = Int_MAX / sizeof (Entry) ;
	}
	else
	{
	    fsize = Numeric->front_alloc_init * maxfrsize ;
	}

	if (cdeg > 0)
	{
	    /* diagonal pivoting is in use.  cdeg was computed above */
	    Int fsize2 ;

	    /* add the L and U blocks */
	    cdeg += nb ;

	    if (INT_OVERFLOW (((double) cdeg * (double) cdeg) * sizeof (Entry)))
	    {
		/* :: int overflow, symmetric front size :: */
		fsize2 = Int_MAX / sizeof (Entry) ;
	    }
	    else
	    {
		fsize2 = MAX (cdeg * cdeg, fcurr_size) ;
	    }
	    fsize = MIN (fsize, fsize2) ;
	}
    }

    fsize = MAX (fsize, 2*nb*nb) ;

    /* fsize and maxfrsize are now safe from integer overflow.  They both
     * include the size of the pivot blocks. */
    ASSERT (!INT_OVERFLOW ((double) fsize * sizeof (Entry))) ;

    Work->fnrows_new = 0 ;
    Work->fncols_new = 0 ;

    /* desired size is fnr2-by-fnc2 (includes L and U blocks): */
    DEBUGm2 (("    fsize "ID"  fcurr_size "ID"\n", fsize, fcurr_size)) ;
    DEBUGm2 (("    maxfrsize "ID"  fnr_curr "ID" fnc_curr "ID"\n", maxfrsize,
	Work->fnr_curr, Work->fnc_curr)) ;

    if (fsize >= maxfrsize && !overflow)
    {
	/* max working array is small, allocate all of it */
	fnr2 = fnrows_max + nb ;
	fnc2 = fncols_max + nb ;
	fsize = maxfrsize ;
	DEBUGm1 (("   sufficient for ("ID"+"ID")-by-("ID"+"ID")\n",
	    fnrows_max, nb, fncols_max, nb)) ;
    }
    else
    {
	/* allocate a smaller working array */
	if (fnrows_max <= fncols_max)
	{
	    fnr2 = (Int) sqrt ((double) fsize) ;
	    /* make sure fnr2 is odd */
	    fnr2 = MAX (fnr2, 1) ;
	    if (fnr2 % 2 == 0) fnr2++ ;
	    fnr2 = MIN (fnr2, fnrows_max + nb) ;
	    fnc2 = fsize / fnr2 ;
	}
	else
	{
	    fnc2 = (Int) sqrt ((double) fsize) ;
	    fnc2 = MIN (fnc2, fncols_max + nb) ;
	    fnr2 = fsize / fnc2 ;
	    /* make sure fnr2 is odd */
	    fnr2 = MAX (fnr2, 1) ;
	    if (fnr2 % 2 == 0)
	    {
		fnr2++ ;
		fnc2 = fsize / fnr2 ;
	    }
	}
	DEBUGm1 (("   smaller "ID"-by-"ID"\n", fnr2, fnc2)) ;
    }
    fnr2 = MIN (fnr2, fnrows_max + nb) ;
    fnc2 = MIN (fnc2, fncols_max + nb) ;
    ASSERT (fnr2 % 2 == 1) ;
    ASSERT (fnr2 * fnc2 <= fsize) ;

    fnr2 -= nb ;
    fnc2 -= nb ;
    ASSERT (fnr2 >= 0) ;
    ASSERT (fnc2 >= 0) ;

    if (fsize > fcurr_size)
    {
	DEBUGm1 (("   Grow front \n")) ;
	Work->do_grow = TRUE ;
	if (!UMF_grow_front (Numeric, fnr2, fnc2, Work, -1))
	{
	    /* since the minimum front size is 1-by-1, it would be nearly
	     * impossible to run out of memory here. */
	    DEBUGm4 (("out of memory: start front\n")) ;
	    return (FALSE) ;
	}
    }
    else
    {
	/* use the existing front */
	DEBUGm1 (("   existing front ok\n")) ;
	Work->fnr_curr = fnr2 ;
	Work->fnc_curr = fnc2 ;
	Work->Flblock  = Work->Flublock + nb * nb ;
	Work->Fublock  = Work->Flblock  + nb * fnr2 ;
	Work->Fcblock  = Work->Fublock  + nb * fnc2 ;
    }
    ASSERT (Work->Flblock  == Work->Flublock + Work->nb*Work->nb) ;
    ASSERT (Work->Fublock  == Work->Flblock  + Work->fnr_curr*Work->nb) ;
    ASSERT (Work->Fcblock  == Work->Fublock  + Work->nb*Work->fnc_curr) ;
    return (TRUE) ;
}
size_t TRILINOS_KLU_kernel   /* final size of LU on output */
(
    /* input, not modified */
    Int n,	    /* A is n-by-n */
    Int Ap [ ],	    /* size n+1, column pointers for A */
    Int Ai [ ],	    /* size nz = Ap [n], row indices for A */
    Entry Ax [ ],   /* size nz, values of A */
    Int Q [ ],	    /* size n, optional input permutation */
    size_t lusize,  /* initial size of LU on input */

    /* output, not defined on input */
    Int Pinv [ ],   /* size n, inverse row permutation, where Pinv [i] = k if
		     * row i is the kth pivot row */
    Int P [ ],	    /* size n, row permutation, where P [k] = i if row i is the
		     * kth pivot row. */
    Unit **p_LU,	/* LU array, size lusize on input */
    Entry Udiag [ ],	/* size n, diagonal of U */
    Int Llen [ ],       /* size n, column length of L */
    Int Ulen [ ],	/* size n, column length of U */
    Int Lip [ ],	/* size n, column pointers for L */
    Int Uip [ ],	/* size n, column pointers for U */
    Int *lnz,		/* size of L*/
    Int *unz,		/* size of U*/
    /* workspace, not defined on input */
    Entry X [ ],    /* size n, undefined on input, zero on output */

    /* workspace, not defined on input or output */
    Int Stack [ ],  /* size n */
    Int Flag [ ],   /* size n */
    Int Ap_pos [ ],	/* size n */

    /* other workspace: */
    Int Lpend [ ],		    /* size n workspace, for pruning only */

    /* inputs, not modified on output */
    Int k1,	    	/* the block of A is from k1 to k2-1 */
    Int PSinv [ ],  	/* inverse of P from symbolic factorization */
    double Rs [ ],  	/* scale factors for A */

    /* inputs, modified on output */
    Int Offp [ ],   /* off-diagonal matrix (modified by this routine) */
    Int Offi [ ],
    Entry Offx [ ],
    /* --------------- */
    TRILINOS_KLU_common *Common
)
{
    Entry pivot ;
    double abs_pivot, xsize, nunits, tol, memgrow ;
    Entry *Ux ;
    Int *Li, *Ui ;
    Unit *LU ;		/* LU factors (pattern and values) */
    Int k, p, i, j, pivrow, kbar, diagrow, firstrow, lup, top, scale, len ;
    size_t newlusize ;

#ifndef NDEBUG
    Entry *Lx ;
#endif

    ASSERT (Common != NULL) ;
    scale = Common->scale ;
    tol = Common->tol ;
    memgrow = Common->memgrow ;
    *lnz = 0 ;
    *unz = 0 ;

    /* ---------------------------------------------------------------------- */
    /* get initial Li, Lx, Ui, and Ux */
    /* ---------------------------------------------------------------------- */

    PRINTF (("input: lusize %d \n", lusize)) ;
    ASSERT (lusize > 0) ;
    LU = *p_LU ;

    /* ---------------------------------------------------------------------- */
    /* initializations */
    /* ---------------------------------------------------------------------- */

    firstrow = 0 ;
    lup = 0 ;

    for (k = 0 ; k < n ; k++)
    {
	/* X [k] = 0 ; */
	CLEAR (X [k]) ;
	Flag [k] = EMPTY ;
	Lpend [k] = EMPTY ;	/* flag k as not pruned */
    }

    /* ---------------------------------------------------------------------- */
    /* mark all rows as non-pivotal and determine initial diagonal mapping */
    /* ---------------------------------------------------------------------- */

    /* PSinv does the symmetric permutation, so don't do it here */
    for (k = 0 ; k < n ; k++)
    {
	P [k] = k ;
	Pinv [k] = FLIP (k) ;	/* mark all rows as non-pivotal */
    }
    /* initialize the construction of the off-diagonal matrix */
    Offp [0] = 0 ;

    /* P [k] = row means that UNFLIP (Pinv [row]) = k, and visa versa.
     * If row is pivotal, then Pinv [row] >= 0.  A row is initially "flipped"
     * (Pinv [k] < EMPTY), and then marked "unflipped" when it becomes
     * pivotal. */

#ifndef NDEBUG
    for (k = 0 ; k < n ; k++)
    {
	PRINTF (("Initial P [%d] = %d\n", k, P [k])) ;
    }
#endif

    /* ---------------------------------------------------------------------- */
    /* factorize */
    /* ---------------------------------------------------------------------- */

    for (k = 0 ; k < n ; k++)
    {

	PRINTF (("\n\n==================================== k: %d\n", k)) ;

	/* ------------------------------------------------------------------ */
	/* determine if LU factors have grown too big */
	/* ------------------------------------------------------------------ */

	/* (n - k) entries for L and k entries for U */
	nunits = DUNITS (Int, n - k) + DUNITS (Int, k) +
		 DUNITS (Entry, n - k) + DUNITS (Entry, k) ;

        /* LU can grow by at most 'nunits' entries if the column is dense */
        PRINTF (("lup %d lusize %g lup+nunits: %g\n", lup, (double) lusize,
	    lup+nunits));
	xsize = ((double) lup) + nunits ;
	if (xsize > (double) lusize)
        {
            /* check here how much to grow */
	    xsize = (memgrow * ((double) lusize) + 4*n + 1) ;
            if (INT_OVERFLOW (xsize))
            {
                PRINTF (("Matrix is too large (Int overflow)\n")) ;
		Common->status = TRILINOS_KLU_TOO_LARGE ;
                return (lusize) ;
            }
            newlusize = memgrow * lusize + 2*n + 1 ;
	    /* Future work: retry mechanism in case of malloc failure */
	    LU = (Unit*) TRILINOS_KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ;
	    Common->nrealloc++ ;
            *p_LU = LU ;
            if (Common->status == TRILINOS_KLU_OUT_OF_MEMORY)
            {
                PRINTF (("Matrix is too large (LU)\n")) ;
                return (lusize) ;
            }
	    lusize = newlusize ;
            PRINTF (("inc LU to %d done\n", lusize)) ;
        }

	/* ------------------------------------------------------------------ */
	/* start the kth column of L and U */
	/* ------------------------------------------------------------------ */

	Lip [k] = lup ;

	/* ------------------------------------------------------------------ */
	/* compute the nonzero pattern of the kth column of L and U */
	/* ------------------------------------------------------------------ */

#ifndef NDEBUG
	for (i = 0 ; i < n ; i++)
	{
	    ASSERT (Flag [i] < k) ;
	    /* ASSERT (X [i] == 0) ; */
	    ASSERT (IS_ZERO (X [i])) ;
	}
#endif

	top = lsolve_symbolic (n, k, Ap, Ai, Q, Pinv, Stack, Flag,
		    Lpend, Ap_pos, LU, lup, Llen, Lip, k1, PSinv) ;

#ifndef NDEBUG
	PRINTF (("--- in U:\n")) ;
	for (p = top ; p < n ; p++)
	{
	    PRINTF (("pattern of X for U: %d : %d pivot row: %d\n",
		p, Stack [p], Pinv [Stack [p]])) ;
	    ASSERT (Flag [Stack [p]] == k) ;
	}
	PRINTF (("--- in L:\n")) ;
	Li = (Int *) (LU + Lip [k]);
	for (p = 0 ; p < Llen [k] ; p++)
	{
	    PRINTF (("pattern of X in L: %d : %d pivot row: %d\n",
		p, Li [p], Pinv [Li [p]])) ;
	    ASSERT (Flag [Li [p]] == k) ;
	}
	p = 0 ;
	for (i = 0 ; i < n ; i++)
	{
	    ASSERT (Flag [i] <= k) ;
	    if (Flag [i] == k) p++ ;
	}
#endif

	/* ------------------------------------------------------------------ */
	/* get the column of the matrix to factorize and scatter into X */
	/* ------------------------------------------------------------------ */

	construct_column (k, Ap, Ai, Ax, Q, X,
	    k1, PSinv, Rs, scale, Offp, Offi, Offx) ;

	/* ------------------------------------------------------------------ */
	/* compute the numerical values of the kth column (s = L \ A (:,k)) */
	/* ------------------------------------------------------------------ */

	lsolve_numeric (Pinv, LU, Stack, Lip, top, n, Llen, X) ;

#ifndef NDEBUG
	for (p = top ; p < n ; p++)
	{
	    PRINTF (("X for U %d : ",  Stack [p])) ;
	    PRINT_ENTRY (X [Stack [p]]) ;
	}
	Li = (Int *) (LU + Lip [k]) ;
	for (p = 0 ; p < Llen [k] ; p++)
	{
	    PRINTF (("X for L %d : ", Li [p])) ;
	    PRINT_ENTRY (X [Li [p]]) ;
	}
#endif

	/* ------------------------------------------------------------------ */
	/* partial pivoting with diagonal preference */
	/* ------------------------------------------------------------------ */

	/* determine what the "diagonal" is */
	diagrow = P [k] ;   /* might already be pivotal */
	PRINTF (("k %d, diagrow = %d, UNFLIP (diagrow) = %d\n",
	    k, diagrow, UNFLIP (diagrow))) ;

	/* find a pivot and scale the pivot column */
	if (!lpivot (diagrow, &pivrow, &pivot, &abs_pivot, tol, X, LU, Lip,
		    Llen, k, n, Pinv, &firstrow, Common))
	{
	    /* matrix is structurally or numerically singular */
	    Common->status = TRILINOS_KLU_SINGULAR ;
	    if (Common->numerical_rank == EMPTY)
	    {
		Common->numerical_rank = k+k1 ;
		Common->singular_col = Q [k+k1] ;
	    }
	    if (Common->halt_if_singular)
	    {
		/* do not continue the factorization */
		return (lusize) ;
	    }
	}

	/* we now have a valid pivot row, even if the column has NaN's or
	 * has no entries on or below the diagonal at all. */
	PRINTF (("\nk %d : Pivot row %d : ", k, pivrow)) ;
	PRINT_ENTRY (pivot) ;
	ASSERT (pivrow >= 0 && pivrow < n) ;
	ASSERT (Pinv [pivrow] < 0) ;

	/* set the Uip pointer */
	Uip [k] = Lip [k] + UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ;

        /* move the lup pointer to the position where indices of U
         * should be stored */
        lup += UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ;

        Ulen [k] = n - top ;

        /* extract Stack [top..n-1] to Ui and the values to Ux and clear X */
	GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
        for (p = top, i = 0 ; p < n ; p++, i++)
        {
	    j = Stack [p] ;
	    Ui [i] = Pinv [j] ;
	    Ux [i] = X [j] ;
	    CLEAR (X [j]) ;
        }

        /* position the lu index at the starting point for next column */
        lup += UNITS (Int, Ulen [k]) + UNITS (Entry, Ulen [k]) ;

	/* U(k,k) = pivot */
	Udiag [k] = pivot ;

	/* ------------------------------------------------------------------ */
	/* log the pivot permutation */
	/* ------------------------------------------------------------------ */

	ASSERT (UNFLIP (Pinv [diagrow]) < n) ;
	ASSERT (P [UNFLIP (Pinv [diagrow])] == diagrow) ;

	if (pivrow != diagrow)
	{
	    /* an off-diagonal pivot has been chosen */
	    Common->noffdiag++ ;
	    PRINTF ((">>>>>>>>>>>>>>>>> pivrow %d k %d off-diagonal\n",
			pivrow, k)) ;
	    if (Pinv [diagrow] < 0)
	    {
		/* the former diagonal row index, diagrow, has not yet been
		 * chosen as a pivot row.  Log this diagrow as the "diagonal"
		 * entry in the column kbar for which the chosen pivot row,
		 * pivrow, was originally logged as the "diagonal" */
		kbar = FLIP (Pinv [pivrow]) ;
		P [kbar] = diagrow ;
		Pinv [diagrow] = FLIP (kbar) ;
	    }
	}
	P [k] = pivrow ;
	Pinv [pivrow] = k ;

#ifndef NDEBUG
	for (i = 0 ; i < n ; i++) { ASSERT (IS_ZERO (X [i])) ;}
	GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
	for (p = 0 ; p < len ; p++)
	{
	    PRINTF (("Column %d of U: %d : ", k, Ui [p])) ;
	    PRINT_ENTRY (Ux [p]) ;
	}
	GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
	for (p = 0 ; p < len ; p++)
	{
	    PRINTF (("Column %d of L: %d : ", k, Li [p])) ;
	    PRINT_ENTRY (Lx [p]) ;
	}
#endif

	/* ------------------------------------------------------------------ */
	/* symmetric pruning */
	/* ------------------------------------------------------------------ */

	prune (Lpend, Pinv, k, pivrow, LU, Uip, Lip, Ulen, Llen) ;

	*lnz += Llen [k] + 1 ; /* 1 added to lnz for diagonal */
	*unz += Ulen [k] + 1 ; /* 1 added to unz for diagonal */
    }

    /* ---------------------------------------------------------------------- */
    /* finalize column pointers for L and U, and put L in the pivotal order */
    /* ---------------------------------------------------------------------- */

    for (p = 0 ; p < n ; p++)
    {
	Li = (Int *) (LU + Lip [p]) ;
	for (i = 0 ; i < Llen [p] ; i++)
	{
	    Li [i] = Pinv [Li [i]] ;
	}
    }

#ifndef NDEBUG
    for (i = 0 ; i < n ; i++)
    {
	PRINTF (("P [%d] = %d   Pinv [%d] = %d\n", i, P [i], i, Pinv [i])) ;
    }
    for (i = 0 ; i < n ; i++)
    {
	ASSERT (Pinv [i] >= 0 && Pinv [i] < n) ;
	ASSERT (P [i] >= 0 && P [i] < n) ;
	ASSERT (P [Pinv [i]] == i) ;
	ASSERT (IS_ZERO (X [i])) ;
    }
#endif

    /* ---------------------------------------------------------------------- */
    /* shrink the LU factors to just the required size */
    /* ---------------------------------------------------------------------- */

    newlusize = lup ;
    ASSERT ((size_t) newlusize <= lusize) ;

    /* this cannot fail, since the block is descreasing in size */
    LU = (Unit*) TRILINOS_KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ;
    *p_LU = LU ;
    return (newlusize) ;
}