GLOBAL void UMF_fsize ( Int nn, Int Fsize [ ], Int Fnrows [ ], Int Fncols [ ], Int Parent [ ], Int Npiv [ ] ) { Int j, parent, frsize, r, c ; for (j = 0 ; j < nn ; j++) { Fsize [j] = EMPTY ; } /* ---------------------------------------------------------------------- */ /* find max front size for tree rooted at node j, for each front j */ /* ---------------------------------------------------------------------- */ DEBUG1 (("\n\n========================================FRONTS:\n")) ; for (j = 0 ; j < nn ; j++) { if (Npiv [j] > 0) { /* this is a frontal matrix */ parent = Parent [j] ; r = Fnrows [j] ; c = Fncols [j] ; frsize = r * c ; /* avoid integer overflow */ if (INT_OVERFLOW (((double) r) * ((double) c))) { /* :: frsize int overflow :: */ frsize = Int_MAX ; } DEBUG1 ((""ID" : npiv "ID" size "ID" parent "ID" ", j, Npiv [j], frsize, parent)) ; Fsize [j] = MAX (Fsize [j], frsize) ; DEBUG1 (("Fsize [j = "ID"] = "ID"\n", j, Fsize [j])) ; if (parent != EMPTY) { /* find the maximum frontsize of self and children */ ASSERT (Npiv [parent] > 0) ; ASSERT (parent > j) ; Fsize [parent] = MAX (Fsize [parent], Fsize [j]) ; DEBUG1 (("Fsize [parent = "ID"] = "ID"\n", parent, Fsize [parent])); } } } }
GLOBAL Int UMF_mem_alloc_element ( NumericType *Numeric, Int nrows, Int ncols, Int **Rows, Int **Cols, Entry **C, Int *size, Element **epout ) { Element *ep ; Unit *p ; Int i ; ASSERT (Numeric != (NumericType *) NULL) ; ASSERT (Numeric->Memory != (Unit *) NULL) ; *size = GET_ELEMENT_SIZE (nrows, ncols) ; if (INT_OVERFLOW (DGET_ELEMENT_SIZE (nrows, ncols) + 1)) { /* :: allocate element, int overflow :: */ return (0) ; /* problem is too large */ } i = UMF_mem_alloc_tail_block (Numeric, *size) ; (*size)++ ; if (!i) { DEBUG0 (("alloc element failed - out of memory\n")) ; return (0) ; /* out of memory */ } p = Numeric->Memory + i ; ep = (Element *) p ; DEBUG2 (("alloc_element done ("ID" x "ID"): p: "ID" i "ID"\n", nrows, ncols, (Int) (p-Numeric->Memory), i)) ; /* Element data structure, in order: */ p += UNITS (Element, 1) ; /* (1) Element header */ *Cols = (Int *) p ; /* (2) col [0..ncols-1] indices */ *Rows = *Cols + ncols ; /* (3) row [0..nrows-1] indices */ p += UNITS (Int, ncols + nrows) ; *C = (Entry *) p ; /* (4) C [0..nrows-1, 0..ncols-1] */ ep->nrows = nrows ; /* initialize the header information */ ep->ncols = ncols ; ep->nrowsleft = nrows ; ep->ncolsleft = ncols ; ep->cdeg = 0 ; ep->rdeg = 0 ; ep->next = EMPTY ; DEBUG2 (("new block size: "ID" ", GET_BLOCK_SIZE (Numeric->Memory + i))) ; DEBUG2 (("Element size needed "ID"\n", GET_ELEMENT_SIZE (nrows, ncols))) ; *epout = ep ; /* return the offset into Numeric->Memory */ return (i) ; }
GLOBAL Int UMFPACK_numeric ( const Int Ap [ ], const Int Ai [ ], const double Ax [ ], #ifdef COMPLEX const double Az [ ], #endif void *SymbolicHandle, void **NumericHandle, const double Control [UMFPACK_CONTROL], double User_Info [UMFPACK_INFO] ) { /* ---------------------------------------------------------------------- */ /* local variables */ /* ---------------------------------------------------------------------- */ double Info2 [UMFPACK_INFO], alloc_init, relpt, relpt2, droptol, front_alloc_init, stats [2] ; double *Info ; WorkType WorkSpace, *Work ; NumericType *Numeric ; SymbolicType *Symbolic ; Int n_row, n_col, n_inner, newsize, i, status, *inew, npiv, ulen, scale ; Unit *mnew ; /* ---------------------------------------------------------------------- */ /* get the amount of time used by the process so far */ /* ---------------------------------------------------------------------- */ umfpack_tic (stats) ; /* ---------------------------------------------------------------------- */ /* initialize and check inputs */ /* ---------------------------------------------------------------------- */ #ifndef NDEBUG UMF_dump_start ( ) ; init_count = UMF_malloc_count ; DEBUGm4 (("\nUMFPACK numeric: U transpose version\n")) ; #endif /* If front_alloc_init negative then allocate that size of front in * UMF_start_front. If alloc_init negative, then allocate that initial * size of Numeric->Memory. */ relpt = GET_CONTROL (UMFPACK_PIVOT_TOLERANCE, UMFPACK_DEFAULT_PIVOT_TOLERANCE) ; relpt2 = GET_CONTROL (UMFPACK_SYM_PIVOT_TOLERANCE, UMFPACK_DEFAULT_SYM_PIVOT_TOLERANCE) ; alloc_init = GET_CONTROL (UMFPACK_ALLOC_INIT, UMFPACK_DEFAULT_ALLOC_INIT) ; front_alloc_init = GET_CONTROL (UMFPACK_FRONT_ALLOC_INIT, UMFPACK_DEFAULT_FRONT_ALLOC_INIT) ; scale = GET_CONTROL (UMFPACK_SCALE, UMFPACK_DEFAULT_SCALE) ; droptol = GET_CONTROL (UMFPACK_DROPTOL, UMFPACK_DEFAULT_DROPTOL) ; relpt = MAX (0.0, MIN (relpt, 1.0)) ; relpt2 = MAX (0.0, MIN (relpt2, 1.0)) ; droptol = MAX (0.0, droptol) ; front_alloc_init = MIN (1.0, front_alloc_init) ; if (scale != UMFPACK_SCALE_NONE && scale != UMFPACK_SCALE_MAX) { scale = UMFPACK_DEFAULT_SCALE ; } if (User_Info != (double *) NULL) { /* return Info in user's array */ Info = User_Info ; /* clear the parts of Info that are set by UMFPACK_numeric */ for (i = UMFPACK_NUMERIC_SIZE ; i <= UMFPACK_MAX_FRONT_NCOLS ; i++) { Info [i] = EMPTY ; } for (i = UMFPACK_NUMERIC_DEFRAG ; i < UMFPACK_IR_TAKEN ; i++) { Info [i] = EMPTY ; } } else { /* no Info array passed - use local one instead */ Info = Info2 ; for (i = 0 ; i < UMFPACK_INFO ; i++) { Info [i] = EMPTY ; } } Symbolic = (SymbolicType *) SymbolicHandle ; Numeric = (NumericType *) NULL ; if (!UMF_valid_symbolic (Symbolic)) { Info [UMFPACK_STATUS] = UMFPACK_ERROR_invalid_Symbolic_object ; return (UMFPACK_ERROR_invalid_Symbolic_object) ; } /* compute alloc_init automatically for AMD or other symmetric ordering */ if (/* Symbolic->ordering == UMFPACK_ORDERING_AMD */ alloc_init >= 0 && Symbolic->amd_lunz > 0) { alloc_init = (Symbolic->nz + Symbolic->amd_lunz) / Symbolic->lunz_bound; alloc_init = MIN (1.0, alloc_init) ; alloc_init *= UMF_REALLOC_INCREASE ; } n_row = Symbolic->n_row ; n_col = Symbolic->n_col ; n_inner = MIN (n_row, n_col) ; /* check for integer overflow in Numeric->Memory minimum size */ if (INT_OVERFLOW (Symbolic->dnum_mem_init_usage * sizeof (Unit))) { /* :: int overflow, initial Numeric->Memory size :: */ /* There's no hope to allocate a Numeric object big enough simply to * hold the initial matrix, so return an out-of-memory condition */ DEBUGm4 (("out of memory: numeric int overflow\n")) ; Info [UMFPACK_STATUS] = UMFPACK_ERROR_out_of_memory ; return (UMFPACK_ERROR_out_of_memory) ; } Info [UMFPACK_STATUS] = UMFPACK_OK ; Info [UMFPACK_NROW] = n_row ; Info [UMFPACK_NCOL] = n_col ; Info [UMFPACK_SIZE_OF_UNIT] = (double) (sizeof (Unit)) ; if (!Ap || !Ai || !Ax || !NumericHandle) { Info [UMFPACK_STATUS] = UMFPACK_ERROR_argument_missing ; return (UMFPACK_ERROR_argument_missing) ; } Info [UMFPACK_NZ] = Ap [n_col] ; *NumericHandle = (void *) NULL ; /* ---------------------------------------------------------------------- */ /* allocate the Work object */ /* ---------------------------------------------------------------------- */ /* (1) calls UMF_malloc 15 or 17 times, to obtain temporary workspace of * size c+1 Entry's and 2*(n_row+1) + 3*(n_col+1) + (n_col+n_inner+1) + * (nn+1) + * 3*(c+1) + 2*(r+1) + max(r,c) + (nfr+1) integers plus 2*nn * more integers if diagonal pivoting is to be done. r is the maximum * number of rows in any frontal matrix, c is the maximum number of columns * in any frontal matrix, n_inner is min (n_row,n_col), nn is * max (n_row,n_col), and nfr is the number of frontal matrices. For a * square matrix, this is c+1 Entry's and about 8n + 3c + 2r + max(r,c) + * nfr integers, plus 2n more for diagonal pivoting. */ Work = &WorkSpace ; Work->n_row = n_row ; Work->n_col = n_col ; Work->nfr = Symbolic->nfr ; Work->nb = Symbolic->nb ; Work->n1 = Symbolic->n1 ; if (!work_alloc (Work, Symbolic)) { DEBUGm4 (("out of memory: numeric work\n")) ; Info [UMFPACK_STATUS] = UMFPACK_ERROR_out_of_memory ; error (&Numeric, Work) ; return (UMFPACK_ERROR_out_of_memory) ; } ASSERT (UMF_malloc_count == init_count + 16 + 2*Symbolic->prefer_diagonal) ; /* ---------------------------------------------------------------------- */ /* allocate Numeric object */ /* ---------------------------------------------------------------------- */ /* (2) calls UMF_malloc 10 or 11 times, for a total space of * sizeof (NumericType) bytes, 4*(n_row+1) + 4*(n_row+1) integers, and * (n_inner+1) Entry's, plus n_row Entry's if row scaling is to be done. * sizeof (NumericType) is a small constant. Next, it calls UMF_malloc * once, for the variable-sized part of the Numeric object * (Numeric->Memory). The size of this object is the larger of * (Control [UMFPACK_ALLOC_INIT]) * (the approximate upper bound computed * by UMFPACK_symbolic), and the minimum required to start the numerical * factorization. * This request is reduced if it fails. */ if (!numeric_alloc (&Numeric, Symbolic, alloc_init, scale)) { DEBUGm4 (("out of memory: initial numeric\n")) ; Info [UMFPACK_STATUS] = UMFPACK_ERROR_out_of_memory ; error (&Numeric, Work) ; return (UMFPACK_ERROR_out_of_memory) ; } DEBUG0 (("malloc: init_count "ID" UMF_malloc_count "ID"\n", init_count, UMF_malloc_count)) ; ASSERT (UMF_malloc_count == init_count + (16 + 2*Symbolic->prefer_diagonal) + (11 + (scale != UMFPACK_SCALE_NONE))) ; /* set control parameters */ Numeric->relpt = relpt ; Numeric->relpt2 = relpt2 ; Numeric->droptol = droptol ; Numeric->alloc_init = alloc_init ; Numeric->front_alloc_init = front_alloc_init ; Numeric->scale = scale ; DEBUG0 (("umf relpt %g %g init %g %g inc %g red %g\n", relpt, relpt2, alloc_init, front_alloc_init, UMF_REALLOC_INCREASE, UMF_REALLOC_REDUCTION)) ; /* ---------------------------------------------------------------------- */ /* scale and factorize */ /* ---------------------------------------------------------------------- */ /* (3) During numerical factorization (inside UMF_kernel), the variable-size * block of memory is increased in size via a call to UMF_realloc if it is * found to be too small. During factorization, this block holds the * pattern and values of L and U at the top end, and the elements * (contibution blocks) and the current frontal matrix (Work->F*) at the * bottom end. The peak size of the variable-sized object is estimated in * UMFPACK_*symbolic (Info [UMFPACK_VARIABLE_PEAK_ESTIMATE]), although this * upper bound can be very loose. The size of the Symbolic object * (which is currently allocated) is in Info [UMFPACK_SYMBOLIC_SIZE], and * is between 2*n and 13*n integers. */ DEBUG0 (("Calling umf_kernel\n")) ; status = UMF_kernel (Ap, Ai, Ax, #ifdef COMPLEX Az, #endif Numeric, Work, Symbolic) ; Info [UMFPACK_STATUS] = status ; if (status < UMFPACK_OK) { /* out of memory, or pattern has changed */ error (&Numeric, Work) ; return (status) ; } Info [UMFPACK_FORCED_UPDATES] = Work->nforced ; Info [UMFPACK_VARIABLE_INIT] = Numeric->init_usage ; if (Symbolic->prefer_diagonal) { Info [UMFPACK_NOFF_DIAG] = Work->noff_diagonal ; } DEBUG0 (("malloc: init_count "ID" UMF_malloc_count "ID"\n", init_count, UMF_malloc_count)) ; npiv = Numeric->npiv ; /* = n_inner for nonsingular matrices */ ulen = Numeric->ulen ; /* = 0 for square nonsingular matrices */ /* ---------------------------------------------------------------------- */ /* free Work object */ /* ---------------------------------------------------------------------- */ /* (4) After numerical factorization all of the objects allocated in step * (1) are freed via UMF_free, except that one object of size n_col+1 is * kept if there are off-diagonal nonzeros in the last pivot row (can only * occur for singular or rectangular matrices). This is Work->Upattern, * which is transfered to Numeric->Upattern if ulen > 0. */ DEBUG0 (("malloc: init_count "ID" UMF_malloc_count "ID"\n", init_count, UMF_malloc_count)) ; free_work (Work) ; DEBUG0 (("malloc: init_count "ID" UMF_malloc_count "ID"\n", init_count, UMF_malloc_count)) ; DEBUG0 (("Numeric->ulen: "ID" scale: "ID"\n", ulen, scale)) ; ASSERT (UMF_malloc_count == init_count + (ulen > 0) + (11 + (scale != UMFPACK_SCALE_NONE))) ; /* ---------------------------------------------------------------------- */ /* reduce Lpos, Lilen, Lip, Upos, Uilen and Uip to size npiv+1 */ /* ---------------------------------------------------------------------- */ /* (5) Six components of the Numeric object are reduced in size if the * matrix is singular or rectangular. The original size is 3*(n_row+1) + * 3*(n_col+1) integers. The new size is 6*(npiv+1) integers. For * square non-singular matrices, these two sizes are the same. */ if (npiv < n_row) { /* reduce Lpos, Uilen, and Uip from size n_row+1 to size npiv */ inew = (Int *) UMF_realloc (Numeric->Lpos, npiv+1, sizeof (Int)) ; if (inew) { Numeric->Lpos = inew ; } inew = (Int *) UMF_realloc (Numeric->Uilen, npiv+1, sizeof (Int)) ; if (inew) { Numeric->Uilen = inew ; } inew = (Int *) UMF_realloc (Numeric->Uip, npiv+1, sizeof (Int)) ; if (inew) { Numeric->Uip = inew ; } } if (npiv < n_col) { /* reduce Upos, Lilen, and Lip from size n_col+1 to size npiv */ inew = (Int *) UMF_realloc (Numeric->Upos, npiv+1, sizeof (Int)) ; if (inew) { Numeric->Upos = inew ; } inew = (Int *) UMF_realloc (Numeric->Lilen, npiv+1, sizeof (Int)) ; if (inew) { Numeric->Lilen = inew ; } inew = (Int *) UMF_realloc (Numeric->Lip, npiv+1, sizeof (Int)) ; if (inew) { Numeric->Lip = inew ; } } /* ---------------------------------------------------------------------- */ /* reduce Numeric->Upattern from size n_col+1 to size ulen+1 */ /* ---------------------------------------------------------------------- */ /* (6) The size of Numeric->Upattern (formerly Work->Upattern) is reduced * from size n_col+1 to size ulen + 1. If ulen is zero, the object does * not exist. */ DEBUG4 (("ulen: "ID" Upattern "ID"\n", ulen, (Int) Numeric->Upattern)) ; ASSERT (IMPLIES (ulen == 0, Numeric->Upattern == (Int *) NULL)) ; if (ulen > 0 && ulen < n_col) { inew = (Int *) UMF_realloc (Numeric->Upattern, ulen+1, sizeof (Int)) ; if (inew) { Numeric->Upattern = inew ; } } /* ---------------------------------------------------------------------- */ /* reduce Numeric->Memory to hold just the LU factors at the head */ /* ---------------------------------------------------------------------- */ /* (7) The variable-sized block (Numeric->Memory) is reduced to hold just L * and U, via a call to UMF_realloc, since the frontal matrices are no * longer needed. */ newsize = Numeric->ihead ; if (newsize < Numeric->size) { mnew = (Unit *) UMF_realloc (Numeric->Memory, newsize, sizeof (Unit)) ; if (mnew) { /* realloc succeeded (how can it fail since the size is reduced?) */ Numeric->Memory = mnew ; Numeric->size = newsize ; } } Numeric->ihead = Numeric->size ; Numeric->itail = Numeric->ihead ; Numeric->tail_usage = 0 ; Numeric->ibig = EMPTY ; /* UMF_mem_alloc_tail_block can no longer be called (no tail marker) */ /* ---------------------------------------------------------------------- */ /* report the results and return the Numeric object */ /* ---------------------------------------------------------------------- */ UMF_set_stats ( Info, Symbolic, (double) Numeric->max_usage, /* actual peak Numeric->Memory */ (double) Numeric->size, /* actual final Numeric->Memory */ Numeric->flops, /* actual "true flops" */ (double) Numeric->lnz + n_inner, /* actual nz in L */ (double) Numeric->unz + Numeric->nnzpiv, /* actual nz in U */ (double) Numeric->maxfrsize, /* actual largest front size */ (double) ulen, /* actual Numeric->Upattern size */ (double) npiv, /* actual # pivots found */ (double) Numeric->maxnrows, /* actual largest #rows in front */ (double) Numeric->maxncols, /* actual largest #cols in front */ scale != UMFPACK_SCALE_NONE, Symbolic->prefer_diagonal, ACTUAL) ; Info [UMFPACK_ALLOC_INIT_USED] = Numeric->alloc_init ; Info [UMFPACK_NUMERIC_DEFRAG] = Numeric->ngarbage ; Info [UMFPACK_NUMERIC_REALLOC] = Numeric->nrealloc ; Info [UMFPACK_NUMERIC_COSTLY_REALLOC] = Numeric->ncostly ; Info [UMFPACK_COMPRESSED_PATTERN] = Numeric->isize ; Info [UMFPACK_LU_ENTRIES] = Numeric->nLentries + Numeric->nUentries + Numeric->npiv ; Info [UMFPACK_UDIAG_NZ] = Numeric->nnzpiv ; Info [UMFPACK_RSMIN] = Numeric->rsmin ; Info [UMFPACK_RSMAX] = Numeric->rsmax ; Info [UMFPACK_WAS_SCALED] = Numeric->scale ; /* nz in L and U with no dropping of small entries */ Info [UMFPACK_ALL_LNZ] = Numeric->all_lnz + n_inner ; Info [UMFPACK_ALL_UNZ] = Numeric->all_unz + Numeric->nnzpiv ; Info [UMFPACK_NZDROPPED] = (Numeric->all_lnz - Numeric->lnz) + (Numeric->all_unz - Numeric->unz) ; /* estimate of the reciprocal of the condition number. */ if (SCALAR_IS_ZERO (Numeric->min_udiag) || SCALAR_IS_ZERO (Numeric->max_udiag) || SCALAR_IS_NAN (Numeric->min_udiag) || SCALAR_IS_NAN (Numeric->max_udiag)) { /* rcond is zero if there is any zero or NaN on the diagonal */ Numeric->rcond = 0.0 ; } else { /* estimate of the recipricol of the condition number. */ /* This is NaN if diagonal is zero-free, but has one or more NaN's. */ Numeric->rcond = Numeric->min_udiag / Numeric->max_udiag ; } Info [UMFPACK_UMIN] = Numeric->min_udiag ; Info [UMFPACK_UMAX] = Numeric->max_udiag ; Info [UMFPACK_RCOND] = Numeric->rcond ; if (Numeric->nnzpiv < n_inner || SCALAR_IS_ZERO (Numeric->rcond) || SCALAR_IS_NAN (Numeric->rcond)) { /* there are zeros and/or NaN's on the diagonal of U */ DEBUG0 (("Warning, matrix is singular in umfpack_numeric\n")) ; DEBUG0 (("nnzpiv "ID" n_inner "ID" rcond %g\n", Numeric->nnzpiv, n_inner, Numeric->rcond)) ; status = UMFPACK_WARNING_singular_matrix ; Info [UMFPACK_STATUS] = status ; } Numeric->valid = NUMERIC_VALID ; *NumericHandle = (void *) Numeric ; /* Numeric has 11 to 13 objects */ ASSERT (UMF_malloc_count == init_count + 11 + + (ulen > 0) /* Numeric->Upattern */ + (scale != UMFPACK_SCALE_NONE)) ; /* Numeric->Rs */ /* ---------------------------------------------------------------------- */ /* get the time used by UMFPACK_numeric */ /* ---------------------------------------------------------------------- */ umfpack_toc (stats) ; Info [UMFPACK_NUMERIC_WALLTIME] = stats [0] ; Info [UMFPACK_NUMERIC_TIME] = stats [1] ; /* return UMFPACK_OK or UMFPACK_WARNING_singular_matrix */ return (status) ; }
size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */ ( /* inputs, not modified */ Int n, /* A is n-by-n. n must be > 0. */ Int Ap [ ], /* size n+1, column pointers for A */ Int Ai [ ], /* size nz = Ap [n], row indices for A */ Entry Ax [ ], /* size nz, values of A */ Int Q [ ], /* size n, optional column permutation */ double Lsize, /* estimate of number of nonzeros in L */ /* outputs, not defined on input */ Unit **p_LU, /* row indices and values of L and U */ Entry Udiag [ ], /* size n, diagonal of U */ Int Llen [ ], /* size n, column length of L */ Int Ulen [ ], /* size n, column length of U */ Int Lip [ ], /* size n, column pointers for L */ Int Uip [ ], /* size n, column pointers for U */ Int P [ ], /* row permutation, size n */ Int *lnz, /* size of L */ Int *unz, /* size of U */ /* workspace, undefined on input */ Entry *X, /* size n double's, zero on output */ Int *Work, /* size 5n Int's */ /* inputs, not modified on output */ Int k1, /* the block of A is from k1 to k2-1 */ Int PSinv [ ], /* inverse of P from symbolic factorization */ double Rs [ ], /* scale factors for A */ /* inputs, modified on output */ Int Offp [ ], /* off-diagonal matrix (modified by this routine) */ Int Offi [ ], Entry Offx [ ], /* --------------- */ KLU_common *Common ) { double maxlnz, dunits ; Unit *LU ; Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ; Int lsize, usize, anz, ok ; size_t lusize ; ASSERT (Common != NULL) ; /* ---------------------------------------------------------------------- */ /* get control parameters, or use defaults */ /* ---------------------------------------------------------------------- */ n = MAX (1, n) ; anz = Ap [n+k1] - Ap [k1] ; if (Lsize <= 0) { Lsize = -Lsize ; Lsize = MAX (Lsize, 1.0) ; lsize = Lsize * anz + n ; } else { lsize = Lsize ; } usize = lsize ; lsize = MAX (n+1, lsize) ; usize = MAX (n+1, usize) ; maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ; maxlnz = MIN (maxlnz, ((double) INT_MAX)) ; lsize = MIN (maxlnz, lsize) ; usize = MIN (maxlnz, usize) ; PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n", n, anz, k1, lsize, usize, maxlnz)) ; /* ---------------------------------------------------------------------- */ /* allocate workspace and outputs */ /* ---------------------------------------------------------------------- */ /* return arguments are not yet assigned */ *p_LU = (Unit *) NULL ; /* these computations are safe from size_t overflow */ W = Work ; Pinv = (Int *) W ; W += n ; Stack = (Int *) W ; W += n ; Flag = (Int *) W ; W += n ; Lpend = (Int *) W ; W += n ; Ap_pos = (Int *) W ; W += n ; dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) + DUNITS (Int, usize) + DUNITS (Entry, usize) ; lusize = (size_t) dunits ; ok = !INT_OVERFLOW (dunits) ; LU = ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL ; if (LU == NULL) { /* out of memory, or problem too large */ Common->status = KLU_OUT_OF_MEMORY ; lusize = 0 ; return (lusize) ; } /* ---------------------------------------------------------------------- */ /* factorize */ /* ---------------------------------------------------------------------- */ /* with pruning, and non-recursive depth-first-search */ lusize = KLU_kernel (n, Ap, Ai, Ax, Q, lusize, Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz, X, Stack, Flag, Ap_pos, Lpend, k1, PSinv, Rs, Offp, Offi, Offx, Common) ; /* ---------------------------------------------------------------------- */ /* return LU factors, or return nothing if an error occurred */ /* ---------------------------------------------------------------------- */ if (Common->status < KLU_OK) { LU = KLU_free (LU, lusize, sizeof (Unit), Common) ; lusize = 0 ; } *p_LU = LU ; PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ; return (lusize) ; }
GLOBAL Int UMF_grow_front ( NumericType *Numeric, Int fnr2, /* desired size is fnr2-by-fnc2 */ Int fnc2, WorkType *Work, Int do_what /* -1: UMF_start_front * 0: UMF_init_front, do not recompute Fcpos * 1: UMF_extend_front * 2: UMF_init_front, recompute Fcpos */ ) { /* ---------------------------------------------------------------------- */ /* local variables */ /* ---------------------------------------------------------------------- */ double s ; Entry *Fcold, *Fcnew ; Int j, i, col, *Fcpos, *Fcols, fnrows_max, fncols_max, fnr_curr, nb, fnrows_new, fncols_new, fnr_min, fnc_min, minsize, newsize, fnrows, fncols, *E, eloc ; /* ---------------------------------------------------------------------- */ /* get parameters */ /* ---------------------------------------------------------------------- */ #ifndef NDEBUG if (do_what != -1) UMF_debug++ ; DEBUG0 (("\n\n====================GROW FRONT: do_what: "ID"\n", do_what)) ; if (do_what != -1) UMF_debug-- ; ASSERT (Work->do_grow) ; ASSERT (Work->fnpiv == 0) ; #endif Fcols = Work->Fcols ; Fcpos = Work->Fcpos ; E = Work->E ; /* ---------------------------------------------------------------------- */ /* The current front is too small, find the new size */ /* ---------------------------------------------------------------------- */ /* maximum size of frontal matrix for this chain */ nb = Work->nb ; fnrows_max = Work->fnrows_max + nb ; fncols_max = Work->fncols_max + nb ; ASSERT (fnrows_max >= 0 && (fnrows_max % 2) == 1) ; DEBUG0 (("Max size: "ID"-by-"ID" (incl. "ID" pivot block\n", fnrows_max, fncols_max, nb)) ; /* current dimensions of frontal matrix: fnr-by-fnc */ DEBUG0 (("Current : "ID"-by-"ID" (excl "ID" pivot blocks)\n", Work->fnr_curr, Work->fnc_curr, nb)) ; ASSERT (Work->fnr_curr >= 0) ; ASSERT ((Work->fnr_curr % 2 == 1) || Work->fnr_curr == 0) ; /* required dimensions of frontal matrix: fnr_min-by-fnc_min */ fnrows_new = Work->fnrows_new + 1 ; fncols_new = Work->fncols_new + 1 ; ASSERT (fnrows_new >= 0) ; if (fnrows_new % 2 == 0) fnrows_new++ ; fnrows_new += nb ; fncols_new += nb ; fnr_min = MIN (fnrows_new, fnrows_max) ; fnc_min = MIN (fncols_new, fncols_max) ; minsize = fnr_min * fnc_min ; if (INT_OVERFLOW ((double) fnr_min * (double) fnc_min * sizeof (Entry))) { /* :: the minimum front size is bigger than the integer maximum :: */ return (FALSE) ; } ASSERT (fnr_min >= 0) ; ASSERT (fnr_min % 2 == 1) ; DEBUG0 (("Min : "ID"-by-"ID"\n", fnr_min, fnc_min)) ; /* grow the front to fnr2-by-fnc2, but no bigger than the maximum, * and no smaller than the minumum. */ DEBUG0 (("Desired : ("ID"+"ID")-by-("ID"+"ID")\n", fnr2, nb, fnc2, nb)) ; fnr2 += nb ; fnc2 += nb ; ASSERT (fnr2 >= 0) ; if (fnr2 % 2 == 0) fnr2++ ; fnr2 = MAX (fnr2, fnr_min) ; fnc2 = MAX (fnc2, fnc_min) ; fnr2 = MIN (fnr2, fnrows_max) ; fnc2 = MIN (fnc2, fncols_max) ; DEBUG0 (("Try : "ID"-by-"ID"\n", fnr2, fnc2)) ; ASSERT (fnr2 >= 0) ; ASSERT (fnr2 % 2 == 1) ; s = ((double) fnr2) * ((double) fnc2) ; if (INT_OVERFLOW (s * sizeof (Entry))) { /* :: frontal matrix size int overflow :: */ /* the desired front size is bigger than the integer maximum */ /* compute a such that a*a*s < Int_MAX / sizeof (Entry) */ double a = 0.9 * sqrt ((Int_MAX / sizeof (Entry)) / s) ; fnr2 = MAX (fnr_min, a * fnr2) ; fnc2 = MAX (fnc_min, a * fnc2) ; /* the new frontal size is a*r*a*c = a*a*s */ newsize = fnr2 * fnc2 ; ASSERT (fnr2 >= 0) ; if (fnr2 % 2 == 0) fnr2++ ; fnc2 = newsize / fnr2 ; } fnr2 = MAX (fnr2, fnr_min) ; fnc2 = MAX (fnc2, fnc_min) ; newsize = fnr2 * fnc2 ; ASSERT (fnr2 >= 0) ; ASSERT (fnr2 % 2 == 1) ; ASSERT (fnr2 >= fnr_min) ; ASSERT (fnc2 >= fnc_min) ; ASSERT (newsize >= minsize) ; /* ---------------------------------------------------------------------- */ /* free the current front if it is empty of any numerical values */ /* ---------------------------------------------------------------------- */ if (E [0] && do_what != 1) { /* free the current front, if it exists and has nothing in it */ DEBUG0 (("Freeing empty front\n")) ; UMF_mem_free_tail_block (Numeric, E [0]) ; E [0] = 0 ; Work->Flublock = (Entry *) NULL ; Work->Flblock = (Entry *) NULL ; Work->Fublock = (Entry *) NULL ; Work->Fcblock = (Entry *) NULL ; } /* ---------------------------------------------------------------------- */ /* allocate the new front, doing garbage collection if necessary */ /* ---------------------------------------------------------------------- */ #ifndef NDEBUG UMF_allocfail = FALSE ; if (UMF_gprob > 0) /* a double relop, but ignore NaN case */ { double rrr = ((double) (rand ( ))) / (((double) RAND_MAX) + 1) ; DEBUG1 (("Check random %e %e\n", rrr, UMF_gprob)) ; UMF_allocfail = rrr < UMF_gprob ; if (UMF_allocfail) DEBUGm2 (("Random garbage collection (grow)\n")) ; } #endif DEBUG0 (("Attempt size: "ID"-by-"ID"\n", fnr2, fnc2)) ; eloc = UMF_mem_alloc_tail_block (Numeric, UNITS (Entry, newsize)) ; if (!eloc) { /* Do garbage collection, realloc, and try again. Compact the current * contribution block in the front to fnrows-by-fncols. Note that * there are no pivot rows/columns in current front. Do not recompute * Fcpos in UMF_garbage_collection. */ DEBUGm3 (("get_memory from umf_grow_front\n")) ; if (!UMF_get_memory (Numeric, Work, 1 + UNITS (Entry, newsize), Work->fnrows, Work->fncols, FALSE)) { /* :: out of memory in umf_grow_front :: */ return (FALSE) ; /* out of memory */ } DEBUG0 (("Attempt size: "ID"-by-"ID" again\n", fnr2, fnc2)) ; eloc = UMF_mem_alloc_tail_block (Numeric, UNITS (Entry, newsize)) ; } /* try again with something smaller */ while ((fnr2 != fnr_min || fnc2 != fnc_min) && !eloc) { fnr2 = MIN (fnr2 - 2, fnr2 * UMF_REALLOC_REDUCTION) ; fnc2 = MIN (fnc2 - 2, fnc2 * UMF_REALLOC_REDUCTION) ; ASSERT (fnr_min >= 0) ; ASSERT (fnr_min % 2 == 1) ; fnr2 = MAX (fnr_min, fnr2) ; fnc2 = MAX (fnc_min, fnc2) ; ASSERT (fnr2 >= 0) ; if (fnr2 % 2 == 0) fnr2++ ; newsize = fnr2 * fnc2 ; DEBUGm3 (("Attempt smaller size: "ID"-by-"ID" minsize "ID"-by-"ID"\n", fnr2, fnc2, fnr_min, fnc_min)) ; eloc = UMF_mem_alloc_tail_block (Numeric, UNITS (Entry, newsize)) ; } /* try again with the smallest possible size */ if (!eloc) { fnr2 = fnr_min ; fnc2 = fnc_min ; newsize = minsize ; DEBUG0 (("Attempt minsize: "ID"-by-"ID"\n", fnr2, fnc2)) ; eloc = UMF_mem_alloc_tail_block (Numeric, UNITS (Entry, newsize)) ; } if (!eloc) { /* out of memory */ return (FALSE) ; } ASSERT (fnr2 >= 0) ; ASSERT (fnr2 % 2 == 1) ; ASSERT (fnr2 >= fnr_min && fnc2 >= fnc_min) ; /* ---------------------------------------------------------------------- */ /* copy the old frontal matrix into the new one */ /* ---------------------------------------------------------------------- */ /* old contribution block (if any) */ fnr_curr = Work->fnr_curr ; /* garbage collection can change fn*_curr */ ASSERT (fnr_curr >= 0) ; ASSERT ((fnr_curr % 2 == 1) || fnr_curr == 0) ; fnrows = Work->fnrows ; fncols = Work->fncols ; Fcold = Work->Fcblock ; /* remove nb from the sizes */ fnr2 -= nb ; fnc2 -= nb ; /* new frontal matrix */ Work->Flublock = (Entry *) (Numeric->Memory + eloc) ; Work->Flblock = Work->Flublock + nb * nb ; Work->Fublock = Work->Flblock + nb * fnr2 ; Work->Fcblock = Work->Fublock + nb * fnc2 ; Fcnew = Work->Fcblock ; if (E [0]) { /* copy the old contribution block into the new one */ for (j = 0 ; j < fncols ; j++) { col = Fcols [j] ; DEBUG1 (("copy col "ID" \n",col)) ; ASSERT (col >= 0 && col < Work->n_col) ; for (i = 0 ; i < fnrows ; i++) { Fcnew [i] = Fcold [i] ; } Fcnew += fnr2 ; Fcold += fnr_curr ; DEBUG1 (("new offset col "ID" "ID"\n",col, j * fnr2)) ; Fcpos [col] = j * fnr2 ; } } else if (do_what == 2) { /* just find the new column offsets */ for (j = 0 ; j < fncols ; j++) { col = Fcols [j] ; DEBUG1 (("new offset col "ID" "ID"\n",col, j * fnr2)) ; Fcpos [col] = j * fnr2 ; } } /* free the old frontal matrix */ UMF_mem_free_tail_block (Numeric, E [0]) ; /* ---------------------------------------------------------------------- */ /* new frontal matrix size */ /* ---------------------------------------------------------------------- */ E [0] = eloc ; Work->fnr_curr = fnr2 ; /* C block is fnr2-by-fnc2 */ Work->fnc_curr = fnc2 ; Work->fcurr_size = newsize ; /* including LU, L, U, and C blocks */ Work->do_grow = FALSE ; /* the front has just been grown */ ASSERT (Work->fnr_curr >= 0) ; ASSERT (Work->fnr_curr % 2 == 1) ; DEBUG0 (("Newly grown front: "ID"+"ID" by "ID"+"ID"\n", Work->fnr_curr, nb, Work->fnc_curr, nb)) ; return (TRUE) ; }
GLOBAL Int UMF_start_front /* returns TRUE if successful, FALSE otherwise */ ( Int chain, NumericType *Numeric, WorkType *Work, SymbolicType *Symbolic ) { Int fnrows_max, fncols_max, fnr2, fnc2, fsize, fcurr_size, maxfrsize, overflow, nb, f, cdeg ; double maxbytes ; nb = Symbolic->nb ; fnrows_max = Symbolic->Chain_maxrows [chain] ; fncols_max = Symbolic->Chain_maxcols [chain] ; DEBUGm2 (("Start Front for chain "ID". fnrows_max "ID" fncols_max "ID"\n", chain, fnrows_max, fncols_max)) ; Work->fnrows_max = fnrows_max ; Work->fncols_max = fncols_max ; Work->any_skip = FALSE ; maxbytes = sizeof (Entry) * (double) (fnrows_max + nb) * (double) (fncols_max + nb) ; fcurr_size = Work->fcurr_size ; if (Symbolic->prefer_diagonal) { /* Get a rough upper bound on the degree of the first pivot column in * this front. Note that Col_degree is not maintained if diagonal * pivoting is preferred. For most matrices, the first pivot column * of the first frontal matrix of a new chain has only one tuple in * it anyway, so this bound is exact in that case. */ Int col, tpi, e, *E, *Col_tuples, *Col_tlen, *Cols ; Tuple *tp, *tpend ; Unit *Memory, *p ; Element *ep ; E = Work->E ; Memory = Numeric->Memory ; Col_tuples = Numeric->Lip ; Col_tlen = Numeric->Lilen ; col = Work->nextcand ; tpi = Col_tuples [col] ; tp = (Tuple *) Memory + tpi ; tpend = tp + Col_tlen [col] ; cdeg = 0 ; DEBUGm3 (("\n=============== start front: col "ID" tlen "ID"\n", col, Col_tlen [col])) ; for ( ; tp < tpend ; tp++) { DEBUG1 (("Tuple ("ID","ID")\n", tp->e, tp->f)) ; e = tp->e ; if (!E [e]) continue ; f = tp->f ; p = Memory + E [e] ; ep = (Element *) p ; p += UNITS (Element, 1) ; Cols = (Int *) p ; if (Cols [f] == EMPTY) continue ; DEBUG1 ((" nrowsleft "ID"\n", ep->nrowsleft)) ; cdeg += ep->nrowsleft ; } #ifndef NDEBUG DEBUGm3 (("start front cdeg: "ID" col "ID"\n", cdeg, col)) ; UMF_dump_rowcol (1, Numeric, Work, col, FALSE) ; #endif /* cdeg is now the rough upper bound on the degree of the next pivot * column. */ /* If AMD was called, we know the maximum number of nonzeros in any * column of L. Use this as an upper bound for cdeg, but add 2 to * account for a small amount of off-diagonal pivoting. */ if (Symbolic->amd_dmax > 0) { cdeg = MIN (cdeg, Symbolic->amd_dmax) ; } /* Increase it to account for larger columns later on. * Also ensure that it's larger than zero. */ cdeg += 2 ; /* cdeg cannot be larger than fnrows_max */ cdeg = MIN (cdeg, fnrows_max) ; } else { /* don't do the above cdeg computation */ cdeg = 0 ; } DEBUGm2 (("fnrows max "ID" fncols_max "ID"\n", fnrows_max, fncols_max)) ; /* the current frontal matrix is empty */ ASSERT (Work->fnrows == 0 && Work->fncols == 0 && Work->fnpiv == 0) ; /* maximum row dimension is always odd, to avoid bad cache effects */ ASSERT (fnrows_max >= 0) ; ASSERT (fnrows_max % 2 == 1) ; /* ---------------------------------------------------------------------- * allocate working array for current frontal matrix: * minimum size: 1-by-1 * maximum size: fnrows_max-by-fncols_max * desired size: * * if Numeric->front_alloc_init >= 0: * * for unsymmetric matrices: * Numeric->front_alloc_init * (fnrows_max-by-fncols_max) * * for symmetric matrices (diagonal pivoting preference, actually): * Numeric->front_alloc_init * (fnrows_max-by-fncols_max), or * cdeg*cdeg, whichever is smaller. * * if Numeric->front_alloc_init < 0: * allocate a front of size -Numeric->front_alloc_init. * * Allocate the whole thing if it's small (less than 2*nb^2). Make sure the * leading dimension of the frontal matrix is odd. * * Also allocate the nb-by-nb LU block, the dr-by-nb L block, and the * nb-by-dc U block. * ---------------------------------------------------------------------- */ /* get the maximum front size, avoiding integer overflow */ overflow = INT_OVERFLOW (maxbytes) ; if (overflow) { /* :: int overflow, max front size :: */ maxfrsize = Int_MAX / sizeof (Entry) ; } else { maxfrsize = (fnrows_max + nb) * (fncols_max + nb) ; } ASSERT (!INT_OVERFLOW ((double) maxfrsize * sizeof (Entry))) ; if (Numeric->front_alloc_init < 0) { /* allocate a front of -Numeric->front_alloc_init entries */ fsize = -Numeric->front_alloc_init ; fsize = MAX (1, fsize) ; } else { if (INT_OVERFLOW (Numeric->front_alloc_init * maxbytes)) { /* :: int overflow, requested front size :: */ fsize = Int_MAX / sizeof (Entry) ; } else { fsize = Numeric->front_alloc_init * maxfrsize ; } if (cdeg > 0) { /* diagonal pivoting is in use. cdeg was computed above */ Int fsize2 ; /* add the L and U blocks */ cdeg += nb ; if (INT_OVERFLOW (((double) cdeg * (double) cdeg) * sizeof (Entry))) { /* :: int overflow, symmetric front size :: */ fsize2 = Int_MAX / sizeof (Entry) ; } else { fsize2 = MAX (cdeg * cdeg, fcurr_size) ; } fsize = MIN (fsize, fsize2) ; } } fsize = MAX (fsize, 2*nb*nb) ; /* fsize and maxfrsize are now safe from integer overflow. They both * include the size of the pivot blocks. */ ASSERT (!INT_OVERFLOW ((double) fsize * sizeof (Entry))) ; Work->fnrows_new = 0 ; Work->fncols_new = 0 ; /* desired size is fnr2-by-fnc2 (includes L and U blocks): */ DEBUGm2 ((" fsize "ID" fcurr_size "ID"\n", fsize, fcurr_size)) ; DEBUGm2 ((" maxfrsize "ID" fnr_curr "ID" fnc_curr "ID"\n", maxfrsize, Work->fnr_curr, Work->fnc_curr)) ; if (fsize >= maxfrsize && !overflow) { /* max working array is small, allocate all of it */ fnr2 = fnrows_max + nb ; fnc2 = fncols_max + nb ; fsize = maxfrsize ; DEBUGm1 ((" sufficient for ("ID"+"ID")-by-("ID"+"ID")\n", fnrows_max, nb, fncols_max, nb)) ; } else { /* allocate a smaller working array */ if (fnrows_max <= fncols_max) { fnr2 = (Int) sqrt ((double) fsize) ; /* make sure fnr2 is odd */ fnr2 = MAX (fnr2, 1) ; if (fnr2 % 2 == 0) fnr2++ ; fnr2 = MIN (fnr2, fnrows_max + nb) ; fnc2 = fsize / fnr2 ; } else { fnc2 = (Int) sqrt ((double) fsize) ; fnc2 = MIN (fnc2, fncols_max + nb) ; fnr2 = fsize / fnc2 ; /* make sure fnr2 is odd */ fnr2 = MAX (fnr2, 1) ; if (fnr2 % 2 == 0) { fnr2++ ; fnc2 = fsize / fnr2 ; } } DEBUGm1 ((" smaller "ID"-by-"ID"\n", fnr2, fnc2)) ; } fnr2 = MIN (fnr2, fnrows_max + nb) ; fnc2 = MIN (fnc2, fncols_max + nb) ; ASSERT (fnr2 % 2 == 1) ; ASSERT (fnr2 * fnc2 <= fsize) ; fnr2 -= nb ; fnc2 -= nb ; ASSERT (fnr2 >= 0) ; ASSERT (fnc2 >= 0) ; if (fsize > fcurr_size) { DEBUGm1 ((" Grow front \n")) ; Work->do_grow = TRUE ; if (!UMF_grow_front (Numeric, fnr2, fnc2, Work, -1)) { /* since the minimum front size is 1-by-1, it would be nearly * impossible to run out of memory here. */ DEBUGm4 (("out of memory: start front\n")) ; return (FALSE) ; } } else { /* use the existing front */ DEBUGm1 ((" existing front ok\n")) ; Work->fnr_curr = fnr2 ; Work->fnc_curr = fnc2 ; Work->Flblock = Work->Flublock + nb * nb ; Work->Fublock = Work->Flblock + nb * fnr2 ; Work->Fcblock = Work->Fublock + nb * fnc2 ; } ASSERT (Work->Flblock == Work->Flublock + Work->nb*Work->nb) ; ASSERT (Work->Fublock == Work->Flblock + Work->fnr_curr*Work->nb) ; ASSERT (Work->Fcblock == Work->Fublock + Work->nb*Work->fnc_curr) ; return (TRUE) ; }
size_t TRILINOS_KLU_kernel /* final size of LU on output */ ( /* input, not modified */ Int n, /* A is n-by-n */ Int Ap [ ], /* size n+1, column pointers for A */ Int Ai [ ], /* size nz = Ap [n], row indices for A */ Entry Ax [ ], /* size nz, values of A */ Int Q [ ], /* size n, optional input permutation */ size_t lusize, /* initial size of LU on input */ /* output, not defined on input */ Int Pinv [ ], /* size n, inverse row permutation, where Pinv [i] = k if * row i is the kth pivot row */ Int P [ ], /* size n, row permutation, where P [k] = i if row i is the * kth pivot row. */ Unit **p_LU, /* LU array, size lusize on input */ Entry Udiag [ ], /* size n, diagonal of U */ Int Llen [ ], /* size n, column length of L */ Int Ulen [ ], /* size n, column length of U */ Int Lip [ ], /* size n, column pointers for L */ Int Uip [ ], /* size n, column pointers for U */ Int *lnz, /* size of L*/ Int *unz, /* size of U*/ /* workspace, not defined on input */ Entry X [ ], /* size n, undefined on input, zero on output */ /* workspace, not defined on input or output */ Int Stack [ ], /* size n */ Int Flag [ ], /* size n */ Int Ap_pos [ ], /* size n */ /* other workspace: */ Int Lpend [ ], /* size n workspace, for pruning only */ /* inputs, not modified on output */ Int k1, /* the block of A is from k1 to k2-1 */ Int PSinv [ ], /* inverse of P from symbolic factorization */ double Rs [ ], /* scale factors for A */ /* inputs, modified on output */ Int Offp [ ], /* off-diagonal matrix (modified by this routine) */ Int Offi [ ], Entry Offx [ ], /* --------------- */ TRILINOS_KLU_common *Common ) { Entry pivot ; double abs_pivot, xsize, nunits, tol, memgrow ; Entry *Ux ; Int *Li, *Ui ; Unit *LU ; /* LU factors (pattern and values) */ Int k, p, i, j, pivrow, kbar, diagrow, firstrow, lup, top, scale, len ; size_t newlusize ; #ifndef NDEBUG Entry *Lx ; #endif ASSERT (Common != NULL) ; scale = Common->scale ; tol = Common->tol ; memgrow = Common->memgrow ; *lnz = 0 ; *unz = 0 ; /* ---------------------------------------------------------------------- */ /* get initial Li, Lx, Ui, and Ux */ /* ---------------------------------------------------------------------- */ PRINTF (("input: lusize %d \n", lusize)) ; ASSERT (lusize > 0) ; LU = *p_LU ; /* ---------------------------------------------------------------------- */ /* initializations */ /* ---------------------------------------------------------------------- */ firstrow = 0 ; lup = 0 ; for (k = 0 ; k < n ; k++) { /* X [k] = 0 ; */ CLEAR (X [k]) ; Flag [k] = EMPTY ; Lpend [k] = EMPTY ; /* flag k as not pruned */ } /* ---------------------------------------------------------------------- */ /* mark all rows as non-pivotal and determine initial diagonal mapping */ /* ---------------------------------------------------------------------- */ /* PSinv does the symmetric permutation, so don't do it here */ for (k = 0 ; k < n ; k++) { P [k] = k ; Pinv [k] = FLIP (k) ; /* mark all rows as non-pivotal */ } /* initialize the construction of the off-diagonal matrix */ Offp [0] = 0 ; /* P [k] = row means that UNFLIP (Pinv [row]) = k, and visa versa. * If row is pivotal, then Pinv [row] >= 0. A row is initially "flipped" * (Pinv [k] < EMPTY), and then marked "unflipped" when it becomes * pivotal. */ #ifndef NDEBUG for (k = 0 ; k < n ; k++) { PRINTF (("Initial P [%d] = %d\n", k, P [k])) ; } #endif /* ---------------------------------------------------------------------- */ /* factorize */ /* ---------------------------------------------------------------------- */ for (k = 0 ; k < n ; k++) { PRINTF (("\n\n==================================== k: %d\n", k)) ; /* ------------------------------------------------------------------ */ /* determine if LU factors have grown too big */ /* ------------------------------------------------------------------ */ /* (n - k) entries for L and k entries for U */ nunits = DUNITS (Int, n - k) + DUNITS (Int, k) + DUNITS (Entry, n - k) + DUNITS (Entry, k) ; /* LU can grow by at most 'nunits' entries if the column is dense */ PRINTF (("lup %d lusize %g lup+nunits: %g\n", lup, (double) lusize, lup+nunits)); xsize = ((double) lup) + nunits ; if (xsize > (double) lusize) { /* check here how much to grow */ xsize = (memgrow * ((double) lusize) + 4*n + 1) ; if (INT_OVERFLOW (xsize)) { PRINTF (("Matrix is too large (Int overflow)\n")) ; Common->status = TRILINOS_KLU_TOO_LARGE ; return (lusize) ; } newlusize = memgrow * lusize + 2*n + 1 ; /* Future work: retry mechanism in case of malloc failure */ LU = (Unit*) TRILINOS_KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ; Common->nrealloc++ ; *p_LU = LU ; if (Common->status == TRILINOS_KLU_OUT_OF_MEMORY) { PRINTF (("Matrix is too large (LU)\n")) ; return (lusize) ; } lusize = newlusize ; PRINTF (("inc LU to %d done\n", lusize)) ; } /* ------------------------------------------------------------------ */ /* start the kth column of L and U */ /* ------------------------------------------------------------------ */ Lip [k] = lup ; /* ------------------------------------------------------------------ */ /* compute the nonzero pattern of the kth column of L and U */ /* ------------------------------------------------------------------ */ #ifndef NDEBUG for (i = 0 ; i < n ; i++) { ASSERT (Flag [i] < k) ; /* ASSERT (X [i] == 0) ; */ ASSERT (IS_ZERO (X [i])) ; } #endif top = lsolve_symbolic (n, k, Ap, Ai, Q, Pinv, Stack, Flag, Lpend, Ap_pos, LU, lup, Llen, Lip, k1, PSinv) ; #ifndef NDEBUG PRINTF (("--- in U:\n")) ; for (p = top ; p < n ; p++) { PRINTF (("pattern of X for U: %d : %d pivot row: %d\n", p, Stack [p], Pinv [Stack [p]])) ; ASSERT (Flag [Stack [p]] == k) ; } PRINTF (("--- in L:\n")) ; Li = (Int *) (LU + Lip [k]); for (p = 0 ; p < Llen [k] ; p++) { PRINTF (("pattern of X in L: %d : %d pivot row: %d\n", p, Li [p], Pinv [Li [p]])) ; ASSERT (Flag [Li [p]] == k) ; } p = 0 ; for (i = 0 ; i < n ; i++) { ASSERT (Flag [i] <= k) ; if (Flag [i] == k) p++ ; } #endif /* ------------------------------------------------------------------ */ /* get the column of the matrix to factorize and scatter into X */ /* ------------------------------------------------------------------ */ construct_column (k, Ap, Ai, Ax, Q, X, k1, PSinv, Rs, scale, Offp, Offi, Offx) ; /* ------------------------------------------------------------------ */ /* compute the numerical values of the kth column (s = L \ A (:,k)) */ /* ------------------------------------------------------------------ */ lsolve_numeric (Pinv, LU, Stack, Lip, top, n, Llen, X) ; #ifndef NDEBUG for (p = top ; p < n ; p++) { PRINTF (("X for U %d : ", Stack [p])) ; PRINT_ENTRY (X [Stack [p]]) ; } Li = (Int *) (LU + Lip [k]) ; for (p = 0 ; p < Llen [k] ; p++) { PRINTF (("X for L %d : ", Li [p])) ; PRINT_ENTRY (X [Li [p]]) ; } #endif /* ------------------------------------------------------------------ */ /* partial pivoting with diagonal preference */ /* ------------------------------------------------------------------ */ /* determine what the "diagonal" is */ diagrow = P [k] ; /* might already be pivotal */ PRINTF (("k %d, diagrow = %d, UNFLIP (diagrow) = %d\n", k, diagrow, UNFLIP (diagrow))) ; /* find a pivot and scale the pivot column */ if (!lpivot (diagrow, &pivrow, &pivot, &abs_pivot, tol, X, LU, Lip, Llen, k, n, Pinv, &firstrow, Common)) { /* matrix is structurally or numerically singular */ Common->status = TRILINOS_KLU_SINGULAR ; if (Common->numerical_rank == EMPTY) { Common->numerical_rank = k+k1 ; Common->singular_col = Q [k+k1] ; } if (Common->halt_if_singular) { /* do not continue the factorization */ return (lusize) ; } } /* we now have a valid pivot row, even if the column has NaN's or * has no entries on or below the diagonal at all. */ PRINTF (("\nk %d : Pivot row %d : ", k, pivrow)) ; PRINT_ENTRY (pivot) ; ASSERT (pivrow >= 0 && pivrow < n) ; ASSERT (Pinv [pivrow] < 0) ; /* set the Uip pointer */ Uip [k] = Lip [k] + UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ; /* move the lup pointer to the position where indices of U * should be stored */ lup += UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ; Ulen [k] = n - top ; /* extract Stack [top..n-1] to Ui and the values to Ux and clear X */ GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; for (p = top, i = 0 ; p < n ; p++, i++) { j = Stack [p] ; Ui [i] = Pinv [j] ; Ux [i] = X [j] ; CLEAR (X [j]) ; } /* position the lu index at the starting point for next column */ lup += UNITS (Int, Ulen [k]) + UNITS (Entry, Ulen [k]) ; /* U(k,k) = pivot */ Udiag [k] = pivot ; /* ------------------------------------------------------------------ */ /* log the pivot permutation */ /* ------------------------------------------------------------------ */ ASSERT (UNFLIP (Pinv [diagrow]) < n) ; ASSERT (P [UNFLIP (Pinv [diagrow])] == diagrow) ; if (pivrow != diagrow) { /* an off-diagonal pivot has been chosen */ Common->noffdiag++ ; PRINTF ((">>>>>>>>>>>>>>>>> pivrow %d k %d off-diagonal\n", pivrow, k)) ; if (Pinv [diagrow] < 0) { /* the former diagonal row index, diagrow, has not yet been * chosen as a pivot row. Log this diagrow as the "diagonal" * entry in the column kbar for which the chosen pivot row, * pivrow, was originally logged as the "diagonal" */ kbar = FLIP (Pinv [pivrow]) ; P [kbar] = diagrow ; Pinv [diagrow] = FLIP (kbar) ; } } P [k] = pivrow ; Pinv [pivrow] = k ; #ifndef NDEBUG for (i = 0 ; i < n ; i++) { ASSERT (IS_ZERO (X [i])) ;} GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; for (p = 0 ; p < len ; p++) { PRINTF (("Column %d of U: %d : ", k, Ui [p])) ; PRINT_ENTRY (Ux [p]) ; } GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; for (p = 0 ; p < len ; p++) { PRINTF (("Column %d of L: %d : ", k, Li [p])) ; PRINT_ENTRY (Lx [p]) ; } #endif /* ------------------------------------------------------------------ */ /* symmetric pruning */ /* ------------------------------------------------------------------ */ prune (Lpend, Pinv, k, pivrow, LU, Uip, Lip, Ulen, Llen) ; *lnz += Llen [k] + 1 ; /* 1 added to lnz for diagonal */ *unz += Ulen [k] + 1 ; /* 1 added to unz for diagonal */ } /* ---------------------------------------------------------------------- */ /* finalize column pointers for L and U, and put L in the pivotal order */ /* ---------------------------------------------------------------------- */ for (p = 0 ; p < n ; p++) { Li = (Int *) (LU + Lip [p]) ; for (i = 0 ; i < Llen [p] ; i++) { Li [i] = Pinv [Li [i]] ; } } #ifndef NDEBUG for (i = 0 ; i < n ; i++) { PRINTF (("P [%d] = %d Pinv [%d] = %d\n", i, P [i], i, Pinv [i])) ; } for (i = 0 ; i < n ; i++) { ASSERT (Pinv [i] >= 0 && Pinv [i] < n) ; ASSERT (P [i] >= 0 && P [i] < n) ; ASSERT (P [Pinv [i]] == i) ; ASSERT (IS_ZERO (X [i])) ; } #endif /* ---------------------------------------------------------------------- */ /* shrink the LU factors to just the required size */ /* ---------------------------------------------------------------------- */ newlusize = lup ; ASSERT ((size_t) newlusize <= lusize) ; /* this cannot fail, since the block is descreasing in size */ LU = (Unit*) TRILINOS_KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ; *p_LU = LU ; return (newlusize) ; }