Пример #1
0
template <typename Entry> void spqr_rsolve
(
    // inputs
    SuiteSparseQR_factorization <Entry> *QR,
    int use_Q1fill,         // if TRUE, do X=E*(R\B), otherwise do X=R\B

    Int nrhs,               // number of columns of B
    Int ldb,                // leading dimension of B
    Entry *B,               // size m-by-nrhs with leading dimesion ldb

    // output
    Entry *X,               // size n-by-nrhs with leading dimension n

    // workspace
    Entry **Rcolp,          // size QRnum->maxfrank
    Int *Rlive,             // size QRnum->maxfrank
    Entry *W,               // size QRnum->maxfrank * nrhs

    cholmod_common *cc
)
{
    spqr_symbolic *QRsym ;
    spqr_numeric <Entry> *QRnum ;
    Int n1rows, n1cols, n ;
    Int *Q1fill, *R1p, *R1j ;
    Entry *R1x ;

    Entry xi ;
    Entry **Rblock, *R, *W1, *B1, *X1 ;
    Int *Rp, *Rj, *Super, *HStair, *Hm, *Stair ;
    char *Rdead ;
    Int nf, m, rank, j, f, col1, col2, fp, pr, fn, rm, k, i, row1, row2, ii,
        keepH, fm, h, t, live, kk ;

    // -------------------------------------------------------------------------
    // get the contents of the QR object
    // -------------------------------------------------------------------------

    QRsym = QR->QRsym ;
    QRnum = QR->QRnum ;
    n1rows = QR->n1rows ;
    n1cols = QR->n1cols ;
    n = QR->nacols ;
    Q1fill = use_Q1fill ? QR->Q1fill : NULL ;
    R1p = QR->R1p ;
    R1j = QR->R1j ;
    R1x = QR->R1x ;

    keepH = QRnum->keepH ;
    PR (("rsolve keepH %ld\n", keepH)) ;
    nf = QRsym->nf ;
    m = QRsym->m ;
    Rblock = QRnum->Rblock ;
    Rp = QRsym->Rp ;
    Rj = QRsym->Rj ;
    Super = QRsym->Super ;
    Rdead = QRnum->Rdead ;
    rank = QR->rank ;   // R22 is R(n1rows:rank-1,n1cols:n-1) of
                        // the global R.
    HStair = QRnum->HStair ;
    Hm = QRnum->Hm ;

    // -------------------------------------------------------------------------
    // X = 0
    // -------------------------------------------------------------------------

    X1 = X ;
    for (kk = 0 ; kk < nrhs ; kk++)
    {
        for (i = 0 ; i < n ; i++)
        {
            X1 [i] = 0 ;
        }
        X1 += n ;
    }

    // =========================================================================
    // === solve with the multifrontal rows of R ===============================
    // =========================================================================

    Stair = NULL ;
    fm = 0 ;
    h = 0 ;
    t = 0 ;

    // start with row2 = QR-num->rank + n1rows, the last row of the combined R
    // factor of [A Binput]

    row2 = QRnum->rank + n1rows ;
    for (f = nf-1 ; f >= 0 ; f--)
    {

        // ---------------------------------------------------------------------
        // get the R block for front F
        // ---------------------------------------------------------------------

        R = Rblock [f] ;
        col1 = Super [f] ;                  // first pivot column in front F
        col2 = Super [f+1] ;                // col2-1 is last pivot col
        fp = col2 - col1 ;                  // number of pivots in front F
        pr = Rp [f] ;                       // pointer to row indices for F
        fn = Rp [f+1] - pr ;                // # of columns in front F

        if (keepH)
        {
            Stair = HStair + pr ;           // staircase of front F
            fm = Hm [f] ;                   // # of rows in front F
            h = 0 ;                         // H vector starts in row h
        }

        // ---------------------------------------------------------------------
        // find the live pivot columns in this R or RH block
        // ---------------------------------------------------------------------

        rm = 0 ;                            // number of rows in R block
        for (k = 0 ; k < fp ; k++)
        {
            j = col1 + k ;
            ASSERT (Rj [pr + k] == j) ;
            if (keepH)
            {
                t = Stair [k] ;             // length of R+H vector
                ASSERT (t >= 0 && t <= fm) ;
                if (t == 0)
                {
                    live = FALSE ;          // column k is dead
                    t = rm ;                // dead col, R only, no H
                    h = rm ;
                }
                else
                {
                    live = (rm < fm) ;      // k is live, unless we hit the wall
                    h = rm + 1 ;            // H vector starts in row h
                }
                ASSERT (t >= h) ;
            }
            else
            {
                live = (!Rdead [j])  ;
            }

            if (live)
            {
                // R (rm,k) is a "diagonal"; rm and k are local indices.
                // Keep track of a pointer to the first entry R(0,k)
                Rcolp [rm] = R ;
                Rlive [rm] = j ;
                rm++ ;
            }
            else
            {
                // compute the basic solution; dead columns are zero
                ii = Q1fill ? Q1fill [j+n1cols] : j+n1cols ;
                if (ii < n)
                {
                    for (kk = 0 ; kk < nrhs ; kk++)
                    {
                        // X (ii,kk) = 0; note this is stride n
                        X [INDEX (ii,kk,n)] = 0 ;
                    }
                }
            }

            // advance to the next column of R in the R block
            R += rm + (keepH ? (t-h) : 0) ;
        }

        // There are rm rows in this R block, corresponding to the rm live
        // columns in the range col1:col2-1.  The list of live global column
        // indices is given in Rlive [0:rm-1].  Pointers to the numerical
        // entries for each of these columns in this R block are given in
        // Rcolp [0:rm-1].   The rm rows in this R block correspond to
        // row1:row2-1 of R and b.

        row1 = row2 - rm ;

        // ---------------------------------------------------------------------
        // get the right-hand sides for these rm equations
        // ---------------------------------------------------------------------

        // W = B (row1:row2-1,:)
        ASSERT (rm <= QRnum->maxfrank) ;
        W1 = W ;
        B1 = B ;
        for (kk = 0 ; kk < nrhs ; kk++)
        {
            for (i = 0 ; i < rm ; i++)
            {
                ii = row1 + i ;
                ASSERT (ii >= n1rows) ;
                W1 [i] = (ii < rank) ? B1 [ii] : 0 ;
            }
            W1 += rm ;
            B1 += ldb ;
        }

        // ---------------------------------------------------------------------
        // solve with the rectangular part of R (W = W - R2*x2)
        // ---------------------------------------------------------------------

        for ( ; k < fn ; k++)
        {
            j = Rj [pr + k] ;
            ASSERT (j >= col2 && j < QRsym->n) ;
            ii = Q1fill ? Q1fill [j+n1cols] : j+n1cols ;
            ASSERT ((ii < n) == (j+n1cols < n)) ;
            // break if past the last column of A in QR of [A Binput]
            if (ii >= n) break ;

            if (!Rdead [j])
            {
                // global column j is live
                W1 = W ;
                for (kk = 0 ; kk < nrhs ; kk++)
                {
                    xi = X [INDEX (ii,kk,n)] ;        // xi = X (ii,kk)
                    if (xi != (Entry) 0)
                    {
                        FLOP_COUNT (2*rm) ;
                        for (i = 0 ; i < rm ; i++)
                        {
                            W1 [i] -= R [i] * xi ;
                        }
                    }
                    W1 += rm ;
                }
            }

            // go to the next column of R
            R += rm ;
            if (keepH)
            {
                t = Stair [k] ;             // length of R+H vector
                ASSERT (t >= 0 && t <= fm) ;
                h = MIN (h+1, fm) ;         // H vector starts in row h
                ASSERT (t >= h) ;
                R += (t-h) ;
            }
        }

        // ---------------------------------------------------------------------
        // solve with the squeezed upper triangular part of R
        // ---------------------------------------------------------------------

        for (k = rm-1 ; k >= 0 ; k--)
        {
            R = Rcolp [k] ;                 // kth live pivot column
            j = Rlive [k] ;                 // is jth global column
            ii = Q1fill ? Q1fill [j+n1cols] : j+n1cols ;
            ASSERT ((ii < n) == (j+n1cols < n)) ;
            if (ii < n)
            {
                W1 = W ;
                for (kk = 0 ; kk < nrhs ; kk++)
                {
                    // divide by the "diagonal"
                    // xi = W1 [k] / R [k] ;
                    xi = spqr_divide (W1 [k], R [k], cc) ;
                    FLOP_COUNT (1) ;
                    X [INDEX(ii,kk,n)] = xi ;
                    if (xi != (Entry) 0)
                    {
                        FLOP_COUNT (2*k) ;
                        for (i = 0 ; i < k ; i++)
                        {
                            W1 [i] -= R [i] * xi ;
                        }
                    }
                    W1 += rm ;
                }
            }
        }

        // ---------------------------------------------------------------------
        // prepare for the R block for front f-1
        // ---------------------------------------------------------------------

        row2 = row1 ;
    }
    ASSERT (row2 == n1rows) ;

    // =========================================================================
    // === solve with the singleton rows of R ==================================
    // =========================================================================

    FLOP_COUNT ((n1rows <= 0) ? 0 :
        nrhs * (n1rows + (2 * (R1p [n1rows] - n1rows)))) ;

    for (kk = 0 ; kk < nrhs ; kk++)
    {
        for (i = n1rows-1 ; i >= 0 ; i--)
        {
            // get the right-hand side for this ith singleton row
            Entry x = B [i] ;
            // solve with the "off-diagonal" entries, x = x-R(i,:)*x2
            for (Int p = R1p [i] + 1 ; p < R1p [i+1] ; p++)
            {
                Int jnew = R1j [p] ;
                ASSERT (jnew >= i && jnew < n) ;
                Int jold = Q1fill ? Q1fill [jnew] : jnew ;
                ASSERT (jold >= 0 && jold < n) ;
                x -= R1x [p] * X [jold] ;
            }
            // divide by the "diagonal" (the singleton entry itself)
            Int p = R1p [i] ;
            Int jnew = R1j [p] ;
            Int jold = Q1fill ? Q1fill [jnew] : jnew ;
            ASSERT (jold >= 0 && jold < n) ;
            // X [jold] = x / R1x [p] ; using cc->complex_divide
            X [jold] = spqr_divide (x, R1x [p], cc) ;
        }
        B += ldb ;
        X += n ;
    }
}
Пример #2
0
template <typename Entry> Long spqr_front
(
    // input, not modified
    Long m,             // F is m-by-n with leading dimension m
    Long n,
    Long npiv,          // number of pivot columns
    double tol,         // a column is flagged as dead if its norm is <= tol
    Long ntol,          // apply tol only to first ntol pivot columns
    Long fchunk,        // block size for compact WY Householder reflections,
    // treated as 1 if fchunk <= 1

    // input/output
    Entry *F,           // frontal matrix F of size m-by-n
    Long *Stair,        // size n, entries F (Stair[k]:m-1, k) are all zero,
    // for each k = 0:n-1, and remain zero on output.
    char *Rdead,        // size npiv; all zero on input.  If k is dead,
    // Rdead [k] is set to 1

    // output, not defined on input
    Entry *Tau,         // size n, Householder coefficients

    // workspace, undefined on input and output
    Entry *W,           // size b*n, where b = min (fchunk,n,m)

    // input/output
    double *wscale,
    double *wssq,

    cholmod_common *cc  // for cc->hypotenuse function
)
{
    Entry tau ;
    double wk ;
    Entry *V ;
    Long k, t, g, g1, nv, k1, k2, i, t0, vzeros, mleft, nleft, vsize, minchunk,
         rank ;

    // NOTE: inputs are not checked for NULL (except if debugging enabled)

    ASSERT (F != NULL) ;
    ASSERT (Stair != NULL) ;
    ASSERT (Rdead != NULL) ;
    ASSERT (Tau != NULL) ;
    ASSERT (W != NULL) ;
    ASSERT (m >= 0 && n >= 0) ;

    npiv = MAX (0, npiv) ;  // npiv must be between 0 and n
    npiv = MIN (n, npiv) ;

    g1 = 0 ;                // row index of first queued-up Householder
    k1 = 0 ;                // pending Householders are in F (g1:t, k1:k2-1)
    k2 = 0 ;
    V = F ;                 // Householder vectors start here
    g = 0 ;                 // number of good Householders found
    nv = 0 ;                // number of Householder reflections queued up
    vzeros = 0 ;            // number of explicit zeros in queued-up H's
    t = 0 ;                 // staircase of current column
    fchunk = MAX (fchunk, 1) ;
    minchunk = MAX (MINCHUNK, fchunk/MINCHUNK_RATIO) ;
    rank = MIN (m,npiv) ;  // F (rank,npiv) is the first entry in C.  rank
    // is also the number of rows in the R block,
    // and the number of good pivot columns found.

    ntol = MIN (ntol, npiv) ;   // Note ntol can be negative, which means to
    // not use tol at all.

    PR (("Front %ld by %ld with %ld pivots\n", m, n, npiv)) ;
    for (k = 0 ; k < n ; k++)
    {

        // ---------------------------------------------------------------------
        // reduce the kth column of F to eliminate all but "diagonal" F (g,k)
        // ---------------------------------------------------------------------

        // get the staircase for the kth column, and operate on the "diagonal"
        // F (g,k); eliminate F (g+1:t-1, k) to zero
        t0 = t ;            // t0 = staircase of column k-1
        t = Stair [k] ;     // t = staircase of this column k

        PR (("k %ld g %ld m %ld n %ld npiv %ld\n", k, g, m, n, npiv)) ;
        if (g >= m)
        {
            // F (g,k) is outside the matrix, so we're done.  If this happens
            // when k < npiv, then there is no contribution block.
            PR (("hit the wall, npiv: %ld k %ld rank %ld\n", npiv, k, rank)) ;
            for ( ; k < npiv ; k++)
            {
                Rdead [k] = 1 ;
                Stair [k] = 0 ;         // remaining pivot columns all dead
                Tau [k] = 0 ;
            }
            for ( ; k < n ; k++)
            {
                Stair [k] = m ;         // non-pivotal columns
                Tau [k] = 0 ;
            }
            ASSERT (nv == 0) ;          // there can be no pending updates
            return (rank) ;
        }

        // if t < g+1, then this column is all zero; fix staircase so that R is
        // always inside the staircase.
        t = MAX (g+1,t) ;
        Stair [k] = t ;

        // ---------------------------------------------------------------------
        // If t just grew a lot since the last t, apply H now to all of F
        // ---------------------------------------------------------------------

        // vzeros is the number of zero entries in V after including the next
        // Householder vector.  If it would exceed 50% of the size of V,
        // apply the pending Householder reflections now, but only if
        // enough vectors have accumulated.

        vzeros += nv * (t - t0) ;
        if (nv >= minchunk)
        {
            vsize = (nv*(nv+1))/2 + nv*(t-g1-nv) ;
            if (vzeros > MAX (16, vsize/2))
            {
                // apply pending block of Householder reflections
                PR (("(1) apply k1 %ld k2 %ld\n", k1, k2)) ;
                spqr_larftb (
                    0,                          // method 0: Left, Transpose
                    t0-g1, n-k2, nv, m, m,
                    V,                          // F (g1:t-1, k1:k1+nv-1)
                    &Tau [k1],                  // Tau (k1:k-1)
                    &F [INDEX (g1,k2,m)],       // F (g1:t-1, k2:n-1)
                    W, cc) ;                    // size nv*nv + nv*(n-k2)
                nv = 0 ;        // clear queued-up Householder reflections
                vzeros = 0 ;
            }
        }

        // ---------------------------------------------------------------------
        // find a Householder reflection that reduces column k
        // ---------------------------------------------------------------------

        tau = spqr_private_house (t-g, &F [INDEX (g,k,m)], cc) ;

        // ---------------------------------------------------------------------
        // check to see if the kth column is OK
        // ---------------------------------------------------------------------

        if (k < ntol && (wk = spqr_abs (F [INDEX (g,k,m)], cc)) <= tol)
        {

            // -----------------------------------------------------------------
            // norm (F (g:t-1, k)) is too tiny; the kth pivot column is dead
            // -----------------------------------------------------------------

            // keep track of the 2-norm of w, the dead column 2-norms
            if (wk != 0)
            {
                // see also LAPACK's dnrm2 function
                if ((*wscale) == 0)
                {
                    // this is the nonzero first entry in w
                    (*wssq) = 1 ;
                }
                if ((*wscale) < wk)
                {
                    double rr = (*wscale) / wk ;
                    (*wssq) = 1 + (*wssq) * rr * rr ;
                    (*wscale) = wk ;
                }
                else
                {
                    double rr = wk / (*wscale) ;
                    (*wssq) += rr * rr ;
                }
            }

            // zero out F (g:m-1,k) and flag it as dead
            for (i = g ; i < m ; i++)
            {
                // This is not strictly necessary.  On output, entries outside
                // the staircase are ignored.
                F [INDEX (i,k,m)] = 0 ;
            }
            Stair [k] = 0 ;
            Tau [k] = 0 ;
            Rdead [k] = 1 ;

            if (nv > 0)
            {
                // apply pending block of Householder reflections
                PR (("(2) apply k1 %ld k2 %ld\n", k1, k2)) ;
                spqr_larftb (
                    0,                          // method 0: Left, Transpose
                    t0-g1, n-k2, nv, m, m,
                    V,                          // F (g1:t-1, k1:k1+nv-1)
                    &Tau [k1],                  // Tau (k1:k-1)
                    &F [INDEX (g1,k2,m)],       // F (g1:t-1, k2:n-1)
                    W, cc) ;                    // size nv*nv + nv*(n-k2)
                nv = 0 ;        // clear queued-up Householder reflections
                vzeros = 0 ;
            }

        }
        else
        {

            // -----------------------------------------------------------------
            // one more good pivot column found
            // -----------------------------------------------------------------

            Tau [k] = tau ;             // save the Householder coefficient
            if (nv == 0)
            {
                // start the queue of pending Householder updates, and define
                // the current panel as k1:k2-1
                g1 = g ;                        // first row of V
                k1 = k ;                        // first column of V
                k2 = MIN (n, k+fchunk) ;        // k2-1 is last col in panel
                V = &F [INDEX (g1,k1,m)] ;      // pending V starts here

                // check for switch to unblocked code
                mleft = m-g1 ;                  // number of rows left
                nleft = n-k1 ;                  // number of columns left
                if (mleft * (nleft-(fchunk+4)) < SMALL || mleft <= fchunk/2
                        || fchunk <= 1)
                {
                    // remaining matrix is small; switch to unblocked code by
                    // including the rest of the matrix in the panel.  Always
                    // use unblocked code if fchunk <= 1.
                    k2 = n ;
                }
            }
            nv++ ;  // one more pending update; V is F (g1:t-1, k1:k1+nv-1)

            // -----------------------------------------------------------------
            // keep track of "pure" flops, for performance testing only
            // -----------------------------------------------------------------

            // The Householder vector is of length t-g, including the unit
            // diagonal, and takes 3*(t-g) flops to compute.  It will be
            // applied as a block, but compute the "pure" flops by assuming
            // that this single Householder vector is computed and then applied
            // just by itself to the rest of the frontal matrix (columns
            // k+1:n-1, or n-k-1 columns).  Applying the Householder reflection
            // to just one column takes 4*(t-g) flops.  This computation only
            // works if TBB is disabled, merely because it uses a global
            // variable to keep track of the flop count.  If TBB is used, this
            // computation may result in a race condition; it is disabled in
            // that case.

            FLOP_COUNT ((t-g) * (3 + 4 * (n-k-1))) ;

            // -----------------------------------------------------------------
            // apply the kth Householder reflection to the current panel
            // -----------------------------------------------------------------

            // F (g:t-1, k+1:k2-1) -= v * tau * v' * F (g:t-1, k+1:k2-1), where
            // v is stored in F (g:t-1,k).  This applies just one reflection
            // to the current panel.
            PR (("apply 1: k %ld\n", k)) ;
            spqr_private_apply1 (t-g, k2-k-1, m, &F [INDEX (g,k,m)], tau,
                                 &F [INDEX (g,k+1,m)], W, cc) ;

            g++ ;   // one more pivot found

            // -----------------------------------------------------------------
            // apply the Householder reflections if end of panel reached
            // -----------------------------------------------------------------

            if (k == k2-1 || g == m)            // or if last pivot is found
            {
                // apply pending block of Householder reflections
                PR (("(3) apply k1 %ld k2 %ld\n", k1, k2)) ;
                spqr_larftb (
                    0,                          // method 0: Left, Transpose
                    t-g1, n-k2, nv, m, m,
                    V,                          // F (g1:t-1, k1:k1+nv-1)
                    &Tau [k1],                  // Tau (k1:k2-1)
                    &F [INDEX (g1,k2,m)],       // F (g1:t-1, k2:n-1)
                    W, cc) ;                    // size nv*nv + nv*(n-k2)
                nv = 0 ;        // clear queued-up Householder reflections
                vzeros = 0 ;
            }
        }

        // ---------------------------------------------------------------------
        // determine the rank of the pivot columns
        // ---------------------------------------------------------------------

        if (k == npiv-1)
        {
            // the rank is the number of good columns found in the first
            // npiv columns.  It is also the number of rows in the R block.
            // F (rank,npiv) is first entry in the C block.
            rank = g ;
            PR (("rank of Front pivcols: %ld\n", rank)) ;
        }
    }

    if (CHECK_BLAS_INT && !cc->blas_ok)
    {
        // This cannot occur if the BLAS_INT and the Long are the same integer.
        // In that case, CHECK_BLAS_INT is FALSE at compile-time, and the
        // compiler will then remove this as dead code.
        ERROR (CHOLMOD_INVALID, "problem too large for the BLAS") ;
        return (0) ;
    }

    return (rank) ;
}