template <typename Entry> void spqr_rsolve ( // inputs SuiteSparseQR_factorization <Entry> *QR, int use_Q1fill, // if TRUE, do X=E*(R\B), otherwise do X=R\B Int nrhs, // number of columns of B Int ldb, // leading dimension of B Entry *B, // size m-by-nrhs with leading dimesion ldb // output Entry *X, // size n-by-nrhs with leading dimension n // workspace Entry **Rcolp, // size QRnum->maxfrank Int *Rlive, // size QRnum->maxfrank Entry *W, // size QRnum->maxfrank * nrhs cholmod_common *cc ) { spqr_symbolic *QRsym ; spqr_numeric <Entry> *QRnum ; Int n1rows, n1cols, n ; Int *Q1fill, *R1p, *R1j ; Entry *R1x ; Entry xi ; Entry **Rblock, *R, *W1, *B1, *X1 ; Int *Rp, *Rj, *Super, *HStair, *Hm, *Stair ; char *Rdead ; Int nf, m, rank, j, f, col1, col2, fp, pr, fn, rm, k, i, row1, row2, ii, keepH, fm, h, t, live, kk ; // ------------------------------------------------------------------------- // get the contents of the QR object // ------------------------------------------------------------------------- QRsym = QR->QRsym ; QRnum = QR->QRnum ; n1rows = QR->n1rows ; n1cols = QR->n1cols ; n = QR->nacols ; Q1fill = use_Q1fill ? QR->Q1fill : NULL ; R1p = QR->R1p ; R1j = QR->R1j ; R1x = QR->R1x ; keepH = QRnum->keepH ; PR (("rsolve keepH %ld\n", keepH)) ; nf = QRsym->nf ; m = QRsym->m ; Rblock = QRnum->Rblock ; Rp = QRsym->Rp ; Rj = QRsym->Rj ; Super = QRsym->Super ; Rdead = QRnum->Rdead ; rank = QR->rank ; // R22 is R(n1rows:rank-1,n1cols:n-1) of // the global R. HStair = QRnum->HStair ; Hm = QRnum->Hm ; // ------------------------------------------------------------------------- // X = 0 // ------------------------------------------------------------------------- X1 = X ; for (kk = 0 ; kk < nrhs ; kk++) { for (i = 0 ; i < n ; i++) { X1 [i] = 0 ; } X1 += n ; } // ========================================================================= // === solve with the multifrontal rows of R =============================== // ========================================================================= Stair = NULL ; fm = 0 ; h = 0 ; t = 0 ; // start with row2 = QR-num->rank + n1rows, the last row of the combined R // factor of [A Binput] row2 = QRnum->rank + n1rows ; for (f = nf-1 ; f >= 0 ; f--) { // --------------------------------------------------------------------- // get the R block for front F // --------------------------------------------------------------------- R = Rblock [f] ; col1 = Super [f] ; // first pivot column in front F col2 = Super [f+1] ; // col2-1 is last pivot col fp = col2 - col1 ; // number of pivots in front F pr = Rp [f] ; // pointer to row indices for F fn = Rp [f+1] - pr ; // # of columns in front F if (keepH) { Stair = HStair + pr ; // staircase of front F fm = Hm [f] ; // # of rows in front F h = 0 ; // H vector starts in row h } // --------------------------------------------------------------------- // find the live pivot columns in this R or RH block // --------------------------------------------------------------------- rm = 0 ; // number of rows in R block for (k = 0 ; k < fp ; k++) { j = col1 + k ; ASSERT (Rj [pr + k] == j) ; if (keepH) { t = Stair [k] ; // length of R+H vector ASSERT (t >= 0 && t <= fm) ; if (t == 0) { live = FALSE ; // column k is dead t = rm ; // dead col, R only, no H h = rm ; } else { live = (rm < fm) ; // k is live, unless we hit the wall h = rm + 1 ; // H vector starts in row h } ASSERT (t >= h) ; } else { live = (!Rdead [j]) ; } if (live) { // R (rm,k) is a "diagonal"; rm and k are local indices. // Keep track of a pointer to the first entry R(0,k) Rcolp [rm] = R ; Rlive [rm] = j ; rm++ ; } else { // compute the basic solution; dead columns are zero ii = Q1fill ? Q1fill [j+n1cols] : j+n1cols ; if (ii < n) { for (kk = 0 ; kk < nrhs ; kk++) { // X (ii,kk) = 0; note this is stride n X [INDEX (ii,kk,n)] = 0 ; } } } // advance to the next column of R in the R block R += rm + (keepH ? (t-h) : 0) ; } // There are rm rows in this R block, corresponding to the rm live // columns in the range col1:col2-1. The list of live global column // indices is given in Rlive [0:rm-1]. Pointers to the numerical // entries for each of these columns in this R block are given in // Rcolp [0:rm-1]. The rm rows in this R block correspond to // row1:row2-1 of R and b. row1 = row2 - rm ; // --------------------------------------------------------------------- // get the right-hand sides for these rm equations // --------------------------------------------------------------------- // W = B (row1:row2-1,:) ASSERT (rm <= QRnum->maxfrank) ; W1 = W ; B1 = B ; for (kk = 0 ; kk < nrhs ; kk++) { for (i = 0 ; i < rm ; i++) { ii = row1 + i ; ASSERT (ii >= n1rows) ; W1 [i] = (ii < rank) ? B1 [ii] : 0 ; } W1 += rm ; B1 += ldb ; } // --------------------------------------------------------------------- // solve with the rectangular part of R (W = W - R2*x2) // --------------------------------------------------------------------- for ( ; k < fn ; k++) { j = Rj [pr + k] ; ASSERT (j >= col2 && j < QRsym->n) ; ii = Q1fill ? Q1fill [j+n1cols] : j+n1cols ; ASSERT ((ii < n) == (j+n1cols < n)) ; // break if past the last column of A in QR of [A Binput] if (ii >= n) break ; if (!Rdead [j]) { // global column j is live W1 = W ; for (kk = 0 ; kk < nrhs ; kk++) { xi = X [INDEX (ii,kk,n)] ; // xi = X (ii,kk) if (xi != (Entry) 0) { FLOP_COUNT (2*rm) ; for (i = 0 ; i < rm ; i++) { W1 [i] -= R [i] * xi ; } } W1 += rm ; } } // go to the next column of R R += rm ; if (keepH) { t = Stair [k] ; // length of R+H vector ASSERT (t >= 0 && t <= fm) ; h = MIN (h+1, fm) ; // H vector starts in row h ASSERT (t >= h) ; R += (t-h) ; } } // --------------------------------------------------------------------- // solve with the squeezed upper triangular part of R // --------------------------------------------------------------------- for (k = rm-1 ; k >= 0 ; k--) { R = Rcolp [k] ; // kth live pivot column j = Rlive [k] ; // is jth global column ii = Q1fill ? Q1fill [j+n1cols] : j+n1cols ; ASSERT ((ii < n) == (j+n1cols < n)) ; if (ii < n) { W1 = W ; for (kk = 0 ; kk < nrhs ; kk++) { // divide by the "diagonal" // xi = W1 [k] / R [k] ; xi = spqr_divide (W1 [k], R [k], cc) ; FLOP_COUNT (1) ; X [INDEX(ii,kk,n)] = xi ; if (xi != (Entry) 0) { FLOP_COUNT (2*k) ; for (i = 0 ; i < k ; i++) { W1 [i] -= R [i] * xi ; } } W1 += rm ; } } } // --------------------------------------------------------------------- // prepare for the R block for front f-1 // --------------------------------------------------------------------- row2 = row1 ; } ASSERT (row2 == n1rows) ; // ========================================================================= // === solve with the singleton rows of R ================================== // ========================================================================= FLOP_COUNT ((n1rows <= 0) ? 0 : nrhs * (n1rows + (2 * (R1p [n1rows] - n1rows)))) ; for (kk = 0 ; kk < nrhs ; kk++) { for (i = n1rows-1 ; i >= 0 ; i--) { // get the right-hand side for this ith singleton row Entry x = B [i] ; // solve with the "off-diagonal" entries, x = x-R(i,:)*x2 for (Int p = R1p [i] + 1 ; p < R1p [i+1] ; p++) { Int jnew = R1j [p] ; ASSERT (jnew >= i && jnew < n) ; Int jold = Q1fill ? Q1fill [jnew] : jnew ; ASSERT (jold >= 0 && jold < n) ; x -= R1x [p] * X [jold] ; } // divide by the "diagonal" (the singleton entry itself) Int p = R1p [i] ; Int jnew = R1j [p] ; Int jold = Q1fill ? Q1fill [jnew] : jnew ; ASSERT (jold >= 0 && jold < n) ; // X [jold] = x / R1x [p] ; using cc->complex_divide X [jold] = spqr_divide (x, R1x [p], cc) ; } B += ldb ; X += n ; } }
template <typename Entry> Long spqr_front ( // input, not modified Long m, // F is m-by-n with leading dimension m Long n, Long npiv, // number of pivot columns double tol, // a column is flagged as dead if its norm is <= tol Long ntol, // apply tol only to first ntol pivot columns Long fchunk, // block size for compact WY Householder reflections, // treated as 1 if fchunk <= 1 // input/output Entry *F, // frontal matrix F of size m-by-n Long *Stair, // size n, entries F (Stair[k]:m-1, k) are all zero, // for each k = 0:n-1, and remain zero on output. char *Rdead, // size npiv; all zero on input. If k is dead, // Rdead [k] is set to 1 // output, not defined on input Entry *Tau, // size n, Householder coefficients // workspace, undefined on input and output Entry *W, // size b*n, where b = min (fchunk,n,m) // input/output double *wscale, double *wssq, cholmod_common *cc // for cc->hypotenuse function ) { Entry tau ; double wk ; Entry *V ; Long k, t, g, g1, nv, k1, k2, i, t0, vzeros, mleft, nleft, vsize, minchunk, rank ; // NOTE: inputs are not checked for NULL (except if debugging enabled) ASSERT (F != NULL) ; ASSERT (Stair != NULL) ; ASSERT (Rdead != NULL) ; ASSERT (Tau != NULL) ; ASSERT (W != NULL) ; ASSERT (m >= 0 && n >= 0) ; npiv = MAX (0, npiv) ; // npiv must be between 0 and n npiv = MIN (n, npiv) ; g1 = 0 ; // row index of first queued-up Householder k1 = 0 ; // pending Householders are in F (g1:t, k1:k2-1) k2 = 0 ; V = F ; // Householder vectors start here g = 0 ; // number of good Householders found nv = 0 ; // number of Householder reflections queued up vzeros = 0 ; // number of explicit zeros in queued-up H's t = 0 ; // staircase of current column fchunk = MAX (fchunk, 1) ; minchunk = MAX (MINCHUNK, fchunk/MINCHUNK_RATIO) ; rank = MIN (m,npiv) ; // F (rank,npiv) is the first entry in C. rank // is also the number of rows in the R block, // and the number of good pivot columns found. ntol = MIN (ntol, npiv) ; // Note ntol can be negative, which means to // not use tol at all. PR (("Front %ld by %ld with %ld pivots\n", m, n, npiv)) ; for (k = 0 ; k < n ; k++) { // --------------------------------------------------------------------- // reduce the kth column of F to eliminate all but "diagonal" F (g,k) // --------------------------------------------------------------------- // get the staircase for the kth column, and operate on the "diagonal" // F (g,k); eliminate F (g+1:t-1, k) to zero t0 = t ; // t0 = staircase of column k-1 t = Stair [k] ; // t = staircase of this column k PR (("k %ld g %ld m %ld n %ld npiv %ld\n", k, g, m, n, npiv)) ; if (g >= m) { // F (g,k) is outside the matrix, so we're done. If this happens // when k < npiv, then there is no contribution block. PR (("hit the wall, npiv: %ld k %ld rank %ld\n", npiv, k, rank)) ; for ( ; k < npiv ; k++) { Rdead [k] = 1 ; Stair [k] = 0 ; // remaining pivot columns all dead Tau [k] = 0 ; } for ( ; k < n ; k++) { Stair [k] = m ; // non-pivotal columns Tau [k] = 0 ; } ASSERT (nv == 0) ; // there can be no pending updates return (rank) ; } // if t < g+1, then this column is all zero; fix staircase so that R is // always inside the staircase. t = MAX (g+1,t) ; Stair [k] = t ; // --------------------------------------------------------------------- // If t just grew a lot since the last t, apply H now to all of F // --------------------------------------------------------------------- // vzeros is the number of zero entries in V after including the next // Householder vector. If it would exceed 50% of the size of V, // apply the pending Householder reflections now, but only if // enough vectors have accumulated. vzeros += nv * (t - t0) ; if (nv >= minchunk) { vsize = (nv*(nv+1))/2 + nv*(t-g1-nv) ; if (vzeros > MAX (16, vsize/2)) { // apply pending block of Householder reflections PR (("(1) apply k1 %ld k2 %ld\n", k1, k2)) ; spqr_larftb ( 0, // method 0: Left, Transpose t0-g1, n-k2, nv, m, m, V, // F (g1:t-1, k1:k1+nv-1) &Tau [k1], // Tau (k1:k-1) &F [INDEX (g1,k2,m)], // F (g1:t-1, k2:n-1) W, cc) ; // size nv*nv + nv*(n-k2) nv = 0 ; // clear queued-up Householder reflections vzeros = 0 ; } } // --------------------------------------------------------------------- // find a Householder reflection that reduces column k // --------------------------------------------------------------------- tau = spqr_private_house (t-g, &F [INDEX (g,k,m)], cc) ; // --------------------------------------------------------------------- // check to see if the kth column is OK // --------------------------------------------------------------------- if (k < ntol && (wk = spqr_abs (F [INDEX (g,k,m)], cc)) <= tol) { // ----------------------------------------------------------------- // norm (F (g:t-1, k)) is too tiny; the kth pivot column is dead // ----------------------------------------------------------------- // keep track of the 2-norm of w, the dead column 2-norms if (wk != 0) { // see also LAPACK's dnrm2 function if ((*wscale) == 0) { // this is the nonzero first entry in w (*wssq) = 1 ; } if ((*wscale) < wk) { double rr = (*wscale) / wk ; (*wssq) = 1 + (*wssq) * rr * rr ; (*wscale) = wk ; } else { double rr = wk / (*wscale) ; (*wssq) += rr * rr ; } } // zero out F (g:m-1,k) and flag it as dead for (i = g ; i < m ; i++) { // This is not strictly necessary. On output, entries outside // the staircase are ignored. F [INDEX (i,k,m)] = 0 ; } Stair [k] = 0 ; Tau [k] = 0 ; Rdead [k] = 1 ; if (nv > 0) { // apply pending block of Householder reflections PR (("(2) apply k1 %ld k2 %ld\n", k1, k2)) ; spqr_larftb ( 0, // method 0: Left, Transpose t0-g1, n-k2, nv, m, m, V, // F (g1:t-1, k1:k1+nv-1) &Tau [k1], // Tau (k1:k-1) &F [INDEX (g1,k2,m)], // F (g1:t-1, k2:n-1) W, cc) ; // size nv*nv + nv*(n-k2) nv = 0 ; // clear queued-up Householder reflections vzeros = 0 ; } } else { // ----------------------------------------------------------------- // one more good pivot column found // ----------------------------------------------------------------- Tau [k] = tau ; // save the Householder coefficient if (nv == 0) { // start the queue of pending Householder updates, and define // the current panel as k1:k2-1 g1 = g ; // first row of V k1 = k ; // first column of V k2 = MIN (n, k+fchunk) ; // k2-1 is last col in panel V = &F [INDEX (g1,k1,m)] ; // pending V starts here // check for switch to unblocked code mleft = m-g1 ; // number of rows left nleft = n-k1 ; // number of columns left if (mleft * (nleft-(fchunk+4)) < SMALL || mleft <= fchunk/2 || fchunk <= 1) { // remaining matrix is small; switch to unblocked code by // including the rest of the matrix in the panel. Always // use unblocked code if fchunk <= 1. k2 = n ; } } nv++ ; // one more pending update; V is F (g1:t-1, k1:k1+nv-1) // ----------------------------------------------------------------- // keep track of "pure" flops, for performance testing only // ----------------------------------------------------------------- // The Householder vector is of length t-g, including the unit // diagonal, and takes 3*(t-g) flops to compute. It will be // applied as a block, but compute the "pure" flops by assuming // that this single Householder vector is computed and then applied // just by itself to the rest of the frontal matrix (columns // k+1:n-1, or n-k-1 columns). Applying the Householder reflection // to just one column takes 4*(t-g) flops. This computation only // works if TBB is disabled, merely because it uses a global // variable to keep track of the flop count. If TBB is used, this // computation may result in a race condition; it is disabled in // that case. FLOP_COUNT ((t-g) * (3 + 4 * (n-k-1))) ; // ----------------------------------------------------------------- // apply the kth Householder reflection to the current panel // ----------------------------------------------------------------- // F (g:t-1, k+1:k2-1) -= v * tau * v' * F (g:t-1, k+1:k2-1), where // v is stored in F (g:t-1,k). This applies just one reflection // to the current panel. PR (("apply 1: k %ld\n", k)) ; spqr_private_apply1 (t-g, k2-k-1, m, &F [INDEX (g,k,m)], tau, &F [INDEX (g,k+1,m)], W, cc) ; g++ ; // one more pivot found // ----------------------------------------------------------------- // apply the Householder reflections if end of panel reached // ----------------------------------------------------------------- if (k == k2-1 || g == m) // or if last pivot is found { // apply pending block of Householder reflections PR (("(3) apply k1 %ld k2 %ld\n", k1, k2)) ; spqr_larftb ( 0, // method 0: Left, Transpose t-g1, n-k2, nv, m, m, V, // F (g1:t-1, k1:k1+nv-1) &Tau [k1], // Tau (k1:k2-1) &F [INDEX (g1,k2,m)], // F (g1:t-1, k2:n-1) W, cc) ; // size nv*nv + nv*(n-k2) nv = 0 ; // clear queued-up Householder reflections vzeros = 0 ; } } // --------------------------------------------------------------------- // determine the rank of the pivot columns // --------------------------------------------------------------------- if (k == npiv-1) { // the rank is the number of good columns found in the first // npiv columns. It is also the number of rows in the R block. // F (rank,npiv) is first entry in the C block. rank = g ; PR (("rank of Front pivcols: %ld\n", rank)) ; } } if (CHECK_BLAS_INT && !cc->blas_ok) { // This cannot occur if the BLAS_INT and the Long are the same integer. // In that case, CHECK_BLAS_INT is FALSE at compile-time, and the // compiler will then remove this as dead code. ERROR (CHOLMOD_INVALID, "problem too large for the BLAS") ; return (0) ; } return (rank) ; }