Esempio n. 1
0
template <typename Entry> spqr_numeric <Entry> *spqr_factorize
(
    // input, optionally freed on output
    cholmod_sparse **Ahandle,

    // inputs, not modified
    Long freeA,                     // if TRUE, free A on output
    double tol,                     // for rank detection
    Long ntol,                      // apply tol only to first ntol columns
    spqr_symbolic *QRsym,

    // workspace and parameters
    cholmod_common *cc
)
{
    Long *Wi, *Qfill, *PLinv, *Cm, *Sp, *Stack_size,
        *TaskFront, *TaskFrontp, *TaskStack, *Stack_maxstack ;
    Entry *Sx, **Rblock, **Cblock, **Stacks ;
    spqr_numeric <Entry> *QRnum ;
    Long nf, m, n, anz, fchunk, maxfn, rank, maxfrank, rjsize, rank1,
        maxstack,j, wtsize, stack, ns, ntasks, keepH, hisize ;
    char *Rdead ;
    cholmod_sparse *A ;
    spqr_work <Entry> *Work ;

    // -------------------------------------------------------------------------
    // get inputs and contents of symbolic object
    // -------------------------------------------------------------------------

    if (QRsym == NULL)
    {
        // out of memory in caller
        if (freeA)
        {
            // if freeA is true, A must always be freed, even on error
            cholmod_l_free_sparse (Ahandle, cc) ;
        }
        return (NULL) ;
    }

    A = *Ahandle ;

    nf = QRsym->nf ;                // number of frontal matrices
    m = QRsym->m ;                  // A is m-by-n
    n = QRsym->n ;
    anz = QRsym->anz ;              // nnz (A)

    keepH = QRsym->keepH ;

    rjsize = QRsym->rjsize ;

    Sp = QRsym->Sp ;                // size m+1, row pointers for S
    Qfill = QRsym->Qfill ;          // fill-reducing ordering
    PLinv = QRsym->PLinv ;          // size m, leftmost column sort

    ns = QRsym->ns ;                // number of stacks
    ntasks = QRsym->ntasks ;        // number of tasks

    // FUTURE: compute a unique maxfn for each stack.  Current maxfn is OK, but
    // it's a global max of the fn of all fronts, and need only be max fn of
    // the fronts in any given stack.

    maxfn  = QRsym->maxfn ;         // max # of columns in any front
    ASSERT (maxfn <= n) ;
    hisize = QRsym->hisize ;        // # of integers in Hii, Householder vectors

    TaskFrontp = QRsym->TaskFrontp ;
    TaskFront  = QRsym->TaskFront ;
    TaskStack  = QRsym->TaskStack ;

    maxstack = QRsym->maxstack ;
    Stack_maxstack = QRsym->Stack_maxstack ;

    if (!(QRsym->do_rank_detection))
    {
        // disable rank detection if not accounted for in analysis
        tol = -1 ;
    }

    // If there is one task, there is only one stack, and visa versa
    ASSERT ((ns == 1) == (ntasks == 1)) ;

    PR (("factorize with ns %ld ntasks %ld\n", ns, ntasks)) ;

    // -------------------------------------------------------------------------
    // allocate workspace
    // -------------------------------------------------------------------------

    cholmod_l_allocate_work (0, MAX (m,nf), 0, cc) ;

    // shared Long workspace
    Wi = (Long *) cc->Iwork ;   // size m, aliased with the rest of Iwork
    Cm = Wi ;                   // size nf

    // Cblock is workspace shared by all threads
    Cblock = (Entry **) cholmod_l_malloc (nf+1, sizeof (Entry *), cc) ;

    Work = NULL ;               // Work and its contents not yet allocated
    fchunk = MIN (m, FCHUNK) ;
    wtsize = 0 ;

    // -------------------------------------------------------------------------
    // create S
    // -------------------------------------------------------------------------

    // create numeric values of S = A(p,q) in row-form in Sx
    Sx = (Entry *) cholmod_l_malloc (anz, sizeof (Entry), cc) ;

    if (cc->status == CHOLMOD_OK)
    {
        // use Wi as workspace (Iwork (0:m-1)) [
        spqr_stranspose2 (A, Qfill, Sp, PLinv, Sx, Wi) ;
        // Wi no longer needed ]
    }

    PR (("status after creating Sx: %d\n", cc->status)) ;

    // -------------------------------------------------------------------------
    // input matrix A no longer needed; free it if the user doesn't need it
    // -------------------------------------------------------------------------

    if (freeA)
    {
        // this is done even if out of memory, above
        cholmod_l_free_sparse (Ahandle, cc) ;
        ASSERT (*Ahandle == NULL) ;
    }

    if (cc->status < CHOLMOD_OK)
    {
        // out of memory
        FREE_WORK ;
        return (NULL) ;
    }

    // -------------------------------------------------------------------------
    // allocate numeric object
    // -------------------------------------------------------------------------

    QRnum = (spqr_numeric<Entry> *)
        cholmod_l_malloc (1, sizeof (spqr_numeric<Entry>), cc) ;

    if (cc->status < CHOLMOD_OK)
    {
        // out of memory
        FREE_WORK ;
        return (NULL) ;
    }

    Rblock     = (Entry **) cholmod_l_malloc (nf, sizeof (Entry *), cc) ;
    Rdead      = (char *)   cholmod_l_calloc (n,  sizeof (char),    cc) ;

    // these may be revised (with ns=1) if we run out of memory
    Stacks     = (Entry **) cholmod_l_calloc (ns, sizeof (Entry *), cc) ;
    Stack_size = (Long *)   cholmod_l_calloc (ns, sizeof (Long),    cc) ;

    QRnum->Rblock     = Rblock ;
    QRnum->Rdead      = Rdead ;
    QRnum->Stacks     = Stacks ;
    QRnum->Stack_size = Stack_size ;

    if (keepH)
    {
        // allocate permanent space for Stair, Tau, Hii for each front
        QRnum->HStair= (Long *)  cholmod_l_malloc (rjsize, sizeof (Long),  cc) ;
        QRnum->HTau  = (Entry *) cholmod_l_malloc (rjsize, sizeof (Entry), cc) ;
        QRnum->Hii   = (Long *)  cholmod_l_malloc (hisize, sizeof (Long),  cc) ;
        QRnum->Hm    = (Long *)  cholmod_l_malloc (nf,     sizeof (Long),  cc) ;
        QRnum->Hr    = (Long *)  cholmod_l_malloc (nf,     sizeof (Long),  cc) ;
        QRnum->HPinv = (Long *)  cholmod_l_malloc (m,      sizeof (Long),  cc) ;
    }
    else
    {
        // H is not kept; this part of the numeric object is not used
        QRnum->HStair = NULL ;
        QRnum->HTau = NULL ;
        QRnum->Hii = NULL ;
        QRnum->Hm = NULL ;
        QRnum->Hr = NULL ;
        QRnum->HPinv = NULL ;
    }

    QRnum->n = n ;
    QRnum->m = m ;
    QRnum->nf = nf ;
    QRnum->rjsize = rjsize ;
    QRnum->hisize = hisize ;
    QRnum->keepH = keepH ;
    QRnum->maxstack = maxstack ;
    QRnum->ns = ns ;
    QRnum->ntasks = ntasks ;
    QRnum->maxfm = EMPTY ;      // max (Hm [0:nf-1]), computed only if H is kept

    if (cc->status < CHOLMOD_OK)
    {
        // out of memory
        spqr_freenum (&QRnum, cc) ;
        FREE_WORK ;
        return (NULL) ;
    }

    // -------------------------------------------------------------------------
    // allocate workspace
    // -------------------------------------------------------------------------

    Work = get_Work <Entry> (ns, n, maxfn, keepH, fchunk, &wtsize, cc) ;

    // -------------------------------------------------------------------------
    // allocate and initialize each Stack
    // -------------------------------------------------------------------------

    if (cc->status == CHOLMOD_OK)
    {
        for (stack = 0 ; stack < ns ; stack++)
        {
            Entry *Stack ;
            size_t stacksize = (ntasks == 1) ?
                maxstack : Stack_maxstack [stack] ;
            Stack_size [stack] = stacksize ;
            Stack = (Entry *) cholmod_l_malloc (stacksize, sizeof (Entry), cc) ;
            Stacks [stack] = Stack ;
            Work [stack].Stack_head = Stack ;
            Work [stack].Stack_top  = Stack + stacksize ;
        }
    }

    // -------------------------------------------------------------------------
    // punt to sequential case and fchunk = 1 if out of memory
    // -------------------------------------------------------------------------

    if (cc->status < CHOLMOD_OK)
    {
        // PUNT: ran out of memory; try again with smaller workspace
        // out of memory; free any stacks that were successfully allocated
        if (Stacks != NULL)
        {
            for (stack = 0 ; stack < ns ; stack++)
            {
                size_t stacksize = (ntasks == 1) ?
                    maxstack : Stack_maxstack [stack] ;
                cholmod_l_free (stacksize, sizeof (Entry), Stacks [stack], cc) ;
            }
        }
        cholmod_l_free (ns, sizeof (Entry *), Stacks,     cc) ;
        cholmod_l_free (ns, sizeof (Long),    Stack_size, cc) ;

        // free the contents of Work, and the Work array itself
        free_Work <Entry> (Work, ns, n, maxfn, wtsize, cc) ;
        cholmod_l_free (ns, sizeof (spqr_work <Entry>), Work, cc) ;

        // punt to a single stack, a single task, and fchunk of 1
        ns = 1 ;
        ntasks = 1 ;
        fchunk = 1 ;
        cc->status = CHOLMOD_OK ;
        Work = get_Work <Entry> (ns, n, maxfn, keepH, fchunk, &wtsize, cc) ;
        Stacks     = (Entry **) cholmod_l_calloc (ns, sizeof (Entry *), cc) ;
        Stack_size = (Long *)   cholmod_l_calloc (ns, sizeof (Long),    cc) ;
        QRnum->Stacks     = Stacks ;
        QRnum->Stack_size = Stack_size ;
        if (cc->status == CHOLMOD_OK)
        {
            Entry *Stack ;
            Stack_size [0] = maxstack ;
            Stack = (Entry *) cholmod_l_malloc (maxstack, sizeof (Entry), cc) ;
            Stacks [0] = Stack ;
            Work [0].Stack_head = Stack ;
            Work [0].Stack_top  = Stack + maxstack ;
        }
    }

    // actual # of stacks and tasks used
    QRnum->ns = ns ;
    QRnum->ntasks = ntasks ;

    // -------------------------------------------------------------------------
    // check if everything was allocated OK
    // -------------------------------------------------------------------------

    if (cc->status < CHOLMOD_OK)
    {
        spqr_freenum (&QRnum, cc) ;
        FREE_WORK ;
        return (NULL) ;
    }

    // At this point, the factorization is guaranteed to succeed, unless
    // sizeof (BLAS_INT) < sizeof (Long), in which case, you really should get
    // a 64-bit BLAS.

    // -------------------------------------------------------------------------
    // create the Blob : everything the numeric factorization kernel needs
    // -------------------------------------------------------------------------

    spqr_blob <Entry> Blob ;
    Blob.QRsym = QRsym ;
    Blob.QRnum = QRnum ;
    Blob.tol = tol ;
    Blob.Work = Work ;
    Blob.Cm = Cm ;
    Blob.Cblock = Cblock ;
    Blob.Sx = Sx ;
    Blob.ntol = ntol ;
    Blob.fchunk = fchunk ;
    Blob.cc = cc ;

    // -------------------------------------------------------------------------
    // initialize the "pure" flop count (for performance testing only)
    // -------------------------------------------------------------------------

    cc->other1 [0] = 0 ;

    // -------------------------------------------------------------------------
    // numeric QR factorization
    // -------------------------------------------------------------------------

    if (ntasks == 1)
    {
        // Just one task, with or without TBB installed: don't use TBB
        spqr_kernel (0, &Blob) ;        // sequential case
    }
    else
    {
#ifdef HAVE_TBB
        // parallel case: TBB is installed, and there is more than one task
        int nthreads = MAX (0, cc->SPQR_nthreads) ;
        spqr_parallel (ntasks, nthreads, &Blob) ;
#else
        // TBB not installed, but the work is still split into multiple tasks.
        // do tasks 0 to ntasks-2 (skip the placeholder root task id = ntasks-1)
        for (Long id = 0 ; id < ntasks-1 ; id++)
        {
            spqr_kernel (id, &Blob) ;
        }
#endif
    }

    // -------------------------------------------------------------------------
    // check for BLAS Long overflow
    // -------------------------------------------------------------------------

    if (CHECK_BLAS_INT && cc->status < CHOLMOD_OK)
    {
        // problem too large for the BLAS.  This can only occur if, for example
        // you're on a 64-bit platform (with sizeof (Long) = 8) and using a
        // 32-bit BLAS (with sizeof (BLAS_INT) = 4).  If sizeof (BLAS_INT) is
        // equal to sizeof (Long), then CHECK_BLAS_INT is FALSE at
        // compile-time, and this entire code is removed as dead code by the
        // compiler.
        spqr_freenum (&QRnum, cc) ;
        FREE_WORK ;
        return (NULL) ;
    }

    // -------------------------------------------------------------------------
    // finalize the rank
    // -------------------------------------------------------------------------

    rank = 0 ;
    maxfrank = 1 ;
    for (stack = 0 ; stack < ns ; stack++)
    {
        rank += Work [stack].sumfrank ;
        maxfrank = MAX (maxfrank, Work [stack].maxfrank) ;
    }
    QRnum->rank = rank ;                    // required by spqr_hpinv
    QRnum->maxfrank = maxfrank ;
    PR (("m %ld n %ld my QR rank %ld\n", m, n, rank)) ;

    // -------------------------------------------------------------------------
    // finalize norm(w) for the dead column 2-norms
    // -------------------------------------------------------------------------

    double wscale = 0 ;
    double wssq = 1 ;
    for (stack = 0 ; stack < ns ; stack++)
    {
        // norm_E_fro = norm (s.*sqrt(q)) ; see also LAPACK's dnrm2
        double ws = Work [stack].wscale ;
        double wq = Work [stack].wssq ;
        if (wq != 0)
        {
            double wk = ws * sqrt (wq) ;
            if (wscale < wk)
            {
                double rr = wscale / wk ;
                wssq = 1 + wssq * rr * rr ;
                wscale = wk ;
            }
            else
            {
                double rr = wk / wscale ;
                wssq += rr * rr ;
            }
        }
    }
    QRnum->norm_E_fro = wscale * sqrt (wssq) ;
    cc->SPQR_xstat [2] = QRnum->norm_E_fro ;

    // -------------------------------------------------------------------------
    // free all workspace, except Cblock and Work
    // -------------------------------------------------------------------------

    FREE_WORK_PART1 ;

    // -------------------------------------------------------------------------
    // shrink the Stacks to hold just R (and H, if H kept)
    // -------------------------------------------------------------------------

    // If shrink is <= 0, then the Stacks are not modified.
    // If shrink is 1, each Stack is realloc'ed to the right size (default)
    // If shrink is > 1, then each Stack is forcibly moved and shrunk.
    // This option is mainly meant for testing, not production use.
    // It should be left at 1 for production use.

    Long any_moved = FALSE ;

    int shrink = cc->SPQR_shrink ;

    if (shrink > 0)
    {
        for (stack = 0 ; stack < ns ; stack++)
        {
            // stacksize is the current size of the this Stack
            size_t stacksize = Stack_size [stack] ;
            Entry *Stack = Stacks [stack] ;
            // Work [stack].Stack_head points to the first empty slot in stack,
            // so newstacksize is the size of the space in use by R and H.
            size_t newstacksize = Work [stack].Stack_head - Stack ;
            ASSERT (newstacksize <= stacksize) ;
            // Reduce the size of this stack.  Cblock [0:nf-1] is no longer
            // needed for holding pointers to the C blocks of each frontal
            // matrix.  Reuse it to hold the reallocated stacks. 
            if (shrink > 1)
            {
                // force the block to move by malloc'ing a new one;
                // this option is mainly for testing only.
                Cblock [stack] = (Entry *) cholmod_l_malloc (newstacksize,
                    sizeof (Entry), cc) ;
                if (Cblock [stack] == NULL)
                {
                    // oops, the malloc failed; just use the old block
                    cc->status = CHOLMOD_OK ;
                    Cblock [stack] = Stack ;
                    // update the memory usage statistics, however
		    cc->memory_inuse +=
                        ((newstacksize-stacksize) * sizeof (Entry)) ;
                }
                else
                {
                    // malloc is OK; copy the block over and free the old one
                    memcpy (Cblock [stack], Stack, newstacksize*sizeof(Entry)) ;
                    cholmod_l_free (stacksize, sizeof (Entry), Stack, cc) ;
                }
                // the Stack has been shrunk to the new size
                stacksize = newstacksize ;
            }
            else
            {
                // normal method; just realloc the block
                Cblock [stack] =    // pointer to the new Stack
                    (Entry *) cholmod_l_realloc (
                    newstacksize,   // requested size of Stack, in # of Entries
                    sizeof (Entry), // size of each Entry in the Stack
                    Stack,          // pointer to the old Stack
                    &stacksize,     // input: old stack size; output: new size
                    cc) ;
            }
            Stack_size [stack] = stacksize ;
            any_moved = any_moved || (Cblock [stack] != Stack) ;
            // reducing the size of a block of memory always succeeds
            ASSERT (cc->status == CHOLMOD_OK) ;
        }
    }

    // -------------------------------------------------------------------------
    // adjust the Rblock pointers if the Stacks have been moved
    // -------------------------------------------------------------------------

    if (any_moved)
    {
        // at least one Stack has moved; check all fronts and adjust them
        for (Long task = 0 ; task < ntasks ; task++)
        {
            Long kfirst, klast ;
            if (ntasks == 1)
            {
                // sequential case
                kfirst = 0 ;
                klast = nf ;
                stack = 0 ;
            }
            else
            {
                kfirst = TaskFrontp [task] ;
                klast  = TaskFrontp [task+1] ;
                stack  = TaskStack [task] ;
            }
            ASSERT (stack >= 0 && stack < ns) ;
            Entry *Old_Stack = Stacks [stack] ;
            Entry *New_Stack = Cblock [stack] ;
            if (New_Stack != Old_Stack)
            {
                for (Long kf = kfirst ; kf < klast ; kf++)
                {
                    Long f = (ntasks == 1) ? kf : TaskFront [kf] ;
                    Rblock [f] = New_Stack + (Rblock [f] - Old_Stack) ;
                }
            }
        }
        // finalize the Stacks
        for (stack = 0 ; stack < ns ; stack++)
        {
            Stacks [stack] = Cblock [stack] ;
        }
    }

    // -------------------------------------------------------------------------
    // free the rest of the workspace
    // -------------------------------------------------------------------------
    
    FREE_WORK_PART2 ;

    // -------------------------------------------------------------------------
    // extract the implicit row permutation for H
    // -------------------------------------------------------------------------

    // this must be done sequentially, when all threads are finished
    if (keepH)
    {
        // use Wi as workspace (Iwork (0:m-1)) [
        spqr_hpinv (QRsym, QRnum, Wi) ;
        // Wi no longer needed ]
    }

    // -------------------------------------------------------------------------
    // find the rank and return the result
    // -------------------------------------------------------------------------

    // find the rank of the first ntol columns of A
    if (ntol >= n)
    {
        rank1 = rank ;
    }
    else
    {
        rank1 = 0 ;
        for (j = 0 ; j < ntol ; j++)
        {
            if (!Rdead [j])
            {
                rank1++ ;
            }
        }
    }
    QRnum->rank1 = rank1 ;
    return (QRnum) ;
}
Esempio n. 2
0
template <typename Entry> int spqr_1fixed
(
    // inputs, not modified
    double tol,             // only accept singletons above tol
    Long bncols,            // number of columns of B
    cholmod_sparse *A,      // m-by-n sparse matrix

    // output arrays, neither allocated nor defined on input.

    Long **p_R1p,           // size n1rows+1, R1p [k] = # of nonzeros in kth
                            // row of R1.  NULL if n1cols == 0.
    Long **p_P1inv,         // size m, singleton row inverse permutation.
                            // If row i of A is the kth singleton row, then
                            // P1inv [i] = k.  NULL if n1cols is zero.

    cholmod_sparse **p_Y,   // on output, only the first n-n1cols+1 entries of
                            // Y->p are defined (if Y is not NULL), where
                            // Y = [A B] or Y = [A2 B2].  If B is empty and
                            // there are no column singletons, Y is NULL

    Long *p_n1cols,         // number of column singletons found
    Long *p_n1rows,         // number of corresponding rows found

    // workspace and parameters
    cholmod_common *cc
)
{
    cholmod_sparse *Y ;
    Long *P1inv, *R1p, *Yp, *Qrows, *Ap, *Ai ;
    char *Mark ;
    Entry *Ax ;
    Long i, j, k, p, d, row, n1rows, n1cols, ynz, iold, inew, kk, m, n, xtype ;

    // -------------------------------------------------------------------------
    // get inputs
    // -------------------------------------------------------------------------

    xtype = spqr_type <Entry> ( ) ;

    m = A->nrow ;
    n = A->ncol ;
    Ap = (Long *) A->p ;
    Ai = (Long *) A->i ;
    Ax = (Entry *) A->x ;

    // set outputs to NULL in case of early return
    *p_R1p    = NULL ;
    *p_P1inv  = NULL ;
    *p_Y      = NULL ;
    *p_n1cols = EMPTY ;
    *p_n1rows = EMPTY ;

    // -------------------------------------------------------------------------
    // allocate workspace
    // -------------------------------------------------------------------------

    Mark = (char *) cholmod_l_calloc (m, sizeof (char), cc) ;
    Qrows = (Long *) cholmod_l_malloc (n, sizeof (Long), cc) ;

    if (cc->status < CHOLMOD_OK)
    {
        // out of memory
        cholmod_l_free (m, sizeof (char), Mark, cc) ;
        cholmod_l_free (n, sizeof (Long), Qrows, cc) ;
        return (FALSE) ;
    }

    // -------------------------------------------------------------------------
    // find singletons; no column permutations allowed
    // -------------------------------------------------------------------------

    n1cols = 0 ;        // number of column singletons found
    n1rows = 0 ;        // number of corresponding singleton rows

    for (j = 0 ; j < n ; j++)
    {
        // count the number of unmarked rows in column j
        Entry aij = 0 ;
        d = 0 ;
        row = EMPTY ;
        for (p = Ap [j] ; d < 2 && p < Ap [j+1] ; p++)
        {
            i = Ai [p] ;
            if (!Mark [i])
            {
                // row i is not taken by a prior column singleton.  If this
                // is the only unflagged row and the value is large enough,
                // it will become the row for this column singleton. 
                aij = Ax [p] ;
                row = i ;
                d++ ;
            }
        }
        if (d == 0)
        {
            // j is a dead column singleton
            Qrows [n1cols++] = EMPTY ;
        }
        else if (d == 1 && spqr_abs (aij, cc) > tol)
        {
            // j is a live column singleton
            Qrows [n1cols++] = row ;
            // flag row i as taken
            Mark [row] = TRUE ;
            n1rows++ ;
        }
        else
        {
            // j is not a singleton; quit searching
            break ;
        }
    }

    // -------------------------------------------------------------------------
    // construct P1inv permutation, row counts R1p, and col pointers Yp
    // -------------------------------------------------------------------------

    if (n1cols == 0 && bncols == 0)
    {

        // ---------------------------------------------------------------------
        // no singletons, and B empty; Y=A will be done via pointer alias
        // ---------------------------------------------------------------------

        Y = NULL ;
        Yp = NULL ;
        P1inv = NULL ;
        R1p = NULL ;

    }
    else if (n1cols == 0)
    {

        // ---------------------------------------------------------------------
        // no singletons in the matrix; no R1 matrix, no P1inv permutation
        // ---------------------------------------------------------------------

        // Y has no entries yet; nnz(Y) will be determined later
        Y = cholmod_l_allocate_sparse (m, n+bncols, 0,
            FALSE, TRUE, 0, xtype, cc) ;

        if (cc->status < CHOLMOD_OK)
        {
            // out of memory
            cholmod_l_free (m, sizeof (char), Mark, cc) ;
            cholmod_l_free (n, sizeof (Long), Qrows, cc) ;
            return (FALSE) ;
        }

        Yp = (Long *) Y->p ;

        ASSERT (n1rows == 0) ;
        P1inv = NULL ;
        R1p = NULL ;

        // ---------------------------------------------------------------------
        // copy the column pointers of A for the first part of Y = [A B]
        // ---------------------------------------------------------------------

        ynz = Ap [n] ;
        for (k = 0 ; k <= n ; k++)
        {
            Yp [k] = Ap [k] ;
        }

    }
    else
    {

        // ---------------------------------------------------------------------
        // construct the row singleton permutation
        // ---------------------------------------------------------------------

        // Y has no entries yet; nnz(Y) will be determined later
        Y = cholmod_l_allocate_sparse (m-n1rows, n-n1cols+bncols, 0,
            TRUE, TRUE, 0, xtype, cc) ;
        P1inv = (Long *) cholmod_l_malloc (m, sizeof (Long), cc) ;
        R1p   = (Long *) cholmod_l_calloc (n1rows+1, sizeof (Long), cc) ;

        if (cc->status < CHOLMOD_OK)
        {
            // out of memory
            cholmod_l_free_sparse (&Y, cc) ;
            cholmod_l_free (m, sizeof (Long), P1inv, cc) ;
            cholmod_l_free (n1rows+1, sizeof (Long), R1p, cc) ;
            cholmod_l_free (m, sizeof (char), Mark, cc) ;
            cholmod_l_free (n, sizeof (Long), Qrows, cc) ;
            return (FALSE) ;
        }

        Yp = (Long *) Y->p ;

#ifndef NDEBUG
        for (i = 0 ; i < m ; i++) P1inv [i] = EMPTY ;
#endif

        kk = 0 ;
        for (k = 0 ; k < n1cols ; k++)
        {
            i = Qrows [k] ;
            if (i != EMPTY)
            {
                // row i is the kk-th singleton row
                ASSERT (Mark [i]) ;
                ASSERT (P1inv [i] == EMPTY) ;
                P1inv [i] = kk ;
                kk++ ;
            }
        }
        for (i = 0 ; i < m ; i++)
        {
            if (!Mark [i])
            {
                // row i is not a singleton row
                ASSERT (P1inv [i] == EMPTY) ;
                P1inv [i] = kk ;
                kk++ ;
            }
        }
        ASSERT (kk == m) ;

        // ---------------------------------------------------------------------
        // find row counts for R11
        // ---------------------------------------------------------------------

        for (k = 0 ; k < n1cols ; k++)
        {
            for (p = Ap [k] ; p < Ap [k+1] ; p++)
            {
                iold = Ai [p] ;
                inew = P1inv [iold] ;
                ASSERT (inew < n1rows) ;
                R1p [inew]++ ;              // a singleton row; in R1
            }
        }

        // ---------------------------------------------------------------------
        // find row counts for R12 and column pointers for A2 part of Y
        // ---------------------------------------------------------------------

        ynz = 0 ;
        for ( ; k < n ; k++)
        {
            Yp [k-n1cols] = ynz ;
            for (p = Ap [k] ; p < Ap [k+1] ; p++)
            {
                iold = Ai [p] ;
                inew = P1inv [iold] ;
                if (inew < n1rows)
                {
                    R1p [inew]++ ;          // a singleton row; in R1
                }
                else
                {
                    ynz++ ;                 // not a singleton row; in A2
                }
            }
        }
        Yp [n-n1cols] = ynz ;

#ifndef NDEBUG
        PR (("n1cols: %ld\n", n1cols)) ;
        for (i = 0 ; i < n1rows ; i++)
        {
            PR (("R1p [%ld] is %ld\n", i, R1p [i])) ;
            ASSERT (R1p [i] > 0) ;
        }
#endif
    }

    // -------------------------------------------------------------------------
    // free workspace and return results
    // -------------------------------------------------------------------------

    cholmod_l_free (n, sizeof (Long), Qrows, cc) ;
    cholmod_l_free (m, sizeof (char), Mark, cc) ;

    *p_R1p    = R1p ;
    *p_P1inv  = P1inv ;
    *p_Y      = Y ;
    *p_n1cols = n1cols ;
    *p_n1rows = n1rows ;

    return (TRUE) ;
}