Пример #1
0
   cholmod_sparse* Sparse :: operator*( void )
   {
      if( A )
      {
         cholmod_l_free_sparse( &A, common );
         A = NULL;
      }

      int nzmax = data.size();
      int sorted = true;
      int packed = true;
      A = cholmod_l_allocate_sparse( m, n, nzmax, sorted, packed, stype, xtype, common );

      // build compressed matrix (note that EntryMap stores entries in column-major order)
      double* pr = (double*) A->x;
      double* pi = (double*) A->z;
      UF_long* ir = (UF_long*) A->i;
      UF_long* jc = (UF_long*) A->p;
      int i = 0;
      int j = -1;
      for( EntryMap::const_iterator e = data.begin(); e != data.end(); e++ )
      {
         int c = e->first.first;
         if( c != j )
         {
            for( int k = j+1; k <= c; k++ )
            {
               jc[k] = i;
            }
            j = c;
         }

         ir[i] = e->first.second;
         pr[i] = e->second.first;
         if( xtype == CHOLMOD_ZOMPLEX )
         {
            pi[i] = e->second.second;
         }
         i++;
      }
      for( int k = j+1; k <= n; k++ )
      {
         jc[k] = i;
      }

      return A;
   }
Пример #2
0
template <typename Entry> int spqr_1colamd  // TRUE if OK, FALSE otherwise
(
    // inputs, not modified
    int ordering,           // all available, except 0:fixed and 3:given
                            // treated as 1:natural
    double tol,             // only accept singletons above tol
    Long bncols,            // number of columns of B
    cholmod_sparse *A,      // m-by-n sparse matrix

    // outputs, neither allocated nor defined on input

    Long **p_Q1fill,        // size n+bncols, fill-reducing
                            // or natural ordering

    Long **p_R1p,           // size n1rows+1, R1p [k] = # of nonzeros in kth
                            // row of R1.  NULL if n1cols == 0.
    Long **p_P1inv,         // size m, singleton row inverse permutation.
                            // If row i of A is the kth singleton row, then
                            // P1inv [i] = k.  NULL if n1cols is zero.

    cholmod_sparse **p_Y,   // on output, only the first n-n1cols+1 entries of
                            // Y->p are defined (if Y is not NULL), where
                            // Y = [A B] or Y = [A2 B2].  If B is empty and
                            // there are no column singletons, Y is NULL

    Long *p_n1cols,         // number of column singletons found
    Long *p_n1rows,         // number of corresponding rows found

    // workspace and parameters
    cholmod_common *cc
)
{
    Long *Q1fill, *Degree, *Qrows, *W, *Winv, *ATp, *ATj, *R1p, *P1inv, *Yp,
        *Ap, *Ai, *Work ;
    Entry *Ax ;
    Long p, d, j, i, k, n1cols, n1rows, row, col, pend, n2rows, n2cols = EMPTY,
        nz2, kk, p2, col2, ynz, fill_reducing_ordering, m, n, xtype, worksize ;
    cholmod_sparse *AT, *Y ;

    // -------------------------------------------------------------------------
    // get inputs
    // -------------------------------------------------------------------------

    xtype = spqr_type <Entry> ( ) ;

    m = A->nrow ;
    n = A->ncol ;
    Ap = (Long *) A->p ;
    Ai = (Long *) A->i ;
    Ax = (Entry *) A->x ;

    // set outputs to NULL in case of early return
    *p_Q1fill = NULL ;
    *p_R1p = NULL ;
    *p_P1inv = NULL ;
    *p_Y = NULL ;
    *p_n1cols = EMPTY ;
    *p_n1rows = EMPTY ;

    // -------------------------------------------------------------------------
    // allocate result Q1fill (Y, R1p, P1inv allocated later)
    // -------------------------------------------------------------------------

    Q1fill = (Long *) cholmod_l_malloc (n+bncols, sizeof (Long), cc) ;

    // -------------------------------------------------------------------------
    // allocate workspace
    // -------------------------------------------------------------------------

    fill_reducing_ordering = !
        ((ordering == SPQR_ORDERING_FIXED) ||
         (ordering == SPQR_ORDERING_GIVEN) ||
         (ordering == SPQR_ORDERING_NATURAL)) ;

    worksize = ((fill_reducing_ordering) ? 3:2) * n ;

    Work = (Long *) cholmod_l_malloc (worksize, sizeof (Long), cc) ;
    Degree = Work ;         // size n
    Qrows  = Work + n ;     // size n
    Winv   = Qrows ;        // Winv and Qrows not needed at the same time
    W      = Qrows + n ;    // size n if fill-reducing ordering, else size 0

    if (cc->status < CHOLMOD_OK)
    {
        // out of memory; free everything and return
        cholmod_l_free (worksize, sizeof (Long), Work, cc) ;
        cholmod_l_free (n+bncols, sizeof (Long), Q1fill, cc) ;
        return (FALSE) ;
    }

    // -------------------------------------------------------------------------
    // initialze queue with empty columns, and columns with just one entry
    // -------------------------------------------------------------------------

    n1cols = 0 ;
    n1rows = 0 ;

    for (j = 0 ; j < n ; j++)
    {
        p = Ap [j] ;
        d = Ap [j+1] - p ;
        if (d == 0)
        {
            // j is a dead column singleton
            PR (("initial dead %ld\n", j)) ;
            Q1fill [n1cols] = j ;
            Qrows [n1cols] = EMPTY ;
            n1cols++ ;
            Degree [j] = EMPTY ;
        }
        else if (d == 1 && spqr_abs (Ax [p], cc) > tol)
        {
            // j is a column singleton, live or dead
            PR (("initial live %ld %ld\n", j, Ai [p])) ;
            Q1fill [n1cols] = j ;
            Qrows [n1cols] = Ai [p] ;       // this might be a duplicate
            n1cols++ ;
            Degree [j] = EMPTY ;
        }
        else
        {
            // j has degree > 1, it is not (yet) a singleton
            Degree [j] = d ;
        }
    }

    // Degree [j] = EMPTY if j is in the singleton queue, or the Degree [j] > 1
    // is the degree of column j otherwise

    // -------------------------------------------------------------------------
    // create AT = spones (A')
    // -------------------------------------------------------------------------

    AT = cholmod_l_transpose (A, 0, cc) ;       // [

    if (cc->status < CHOLMOD_OK)
    {
        // out of memory; free everything and return
        cholmod_l_free (worksize, sizeof (Long), Work, cc) ;
        cholmod_l_free (n+bncols, sizeof (Long), Q1fill, cc) ;
        return (FALSE) ;
    }

    ATp = (Long *) AT->p ;
    ATj = (Long *) AT->i ;

    // -------------------------------------------------------------------------
    // remove column singletons via breadth-first-search
    // -------------------------------------------------------------------------

    for (k = 0 ; k < n1cols ; k++)
    {

        // ---------------------------------------------------------------------
        // get a new singleton from the queue
        // ---------------------------------------------------------------------

        col = Q1fill [k] ;
        row = Qrows [k] ;
        PR (("\n---- singleton col %ld row %ld\n", col, row)) ;
        ASSERT (Degree [col] == EMPTY) ;

        if (row == EMPTY || ATp [row] < 0)
        {

            // -----------------------------------------------------------------
            // col is a dead column singleton; remove duplicate row index
            // -----------------------------------------------------------------

            Qrows [k] = EMPTY ;
            row = EMPTY ;
            PR (("dead: %ld\n", col)) ;

        }
        else
        {

            // -----------------------------------------------------------------
            // col is a live col singleton; remove its row from matrix
            // -----------------------------------------------------------------

            n1rows++ ;
            p = ATp [row] ;
            ATp [row] = FLIP (p) ;          // flag the singleton row
            pend = UNFLIP (ATp [row+1]) ;
            PR (("live: %ld row %ld\n", col, row)) ;
            for ( ; p < pend ; p++)
            {
                // look for new column singletons after row is removed
                j = ATj [p] ;
                d = Degree [j] ;
                if (d == EMPTY)
                {
                    // j is already in the singleton queue
                    continue ;
                }
                ASSERT (d >= 1) ;
                ASSERT2 (spqrDebug_listcount (j, Q1fill, n1cols, 0) == 0) ;
                d-- ;
                Degree [j] = d ;
                if (d == 0)
                {
                    // a new dead col singleton
                    PR (("newly dead %ld\n", j)) ;
                    Q1fill [n1cols] = j ;
                    Qrows [n1cols] = EMPTY ;
                    n1cols++ ;
                    Degree [j] = EMPTY ;
                }
                else if (d == 1)
                {
                    // a new live col singleton; find its single live row
                    for (p2 = Ap [j] ; p2 < Ap [j+1] ; p2++)
                    {
                        i = Ai [p2] ;
                        if (ATp [i] >= 0 && spqr_abs (Ax [p2], cc) > tol)
                        {
                            // i might appear in Qrows [k+1:n1cols-1]
                            PR (("newly live %ld\n", j)) ;
                            ASSERT2 (spqrDebug_listcount (i,Qrows,k+1,1) == 0) ;
                            Q1fill [n1cols] = j ;
                            Qrows [n1cols] = i ;
                            n1cols++ ;
                            Degree [j] = EMPTY ;
                            break ;
                        }
                    }
                }
            }
        }
        // Q1fill [0:k] and Qrows [0:k] have no duplicates
        ASSERT2 (spqrDebug_listcount (col, Q1fill, n1cols, 0) == 1) ; 
        ASSERT2 (IMPLIES (row >= 0, spqrDebug_listcount 
            (row, Qrows, k+1, 1) == 1)) ; 
    }

    // -------------------------------------------------------------------------
    // Degree flags the column singletons, ATp flags their rows
    // -------------------------------------------------------------------------

#ifndef NDEBUG
    k = 0 ;
    for (j = 0 ; j < n ; j++)
    {
        PR (("j %ld Degree[j] %ld\n", j, Degree [j])) ;
        if (Degree [j] > 0) k++ ;        // j is not a column singleton
    }
    PR (("k %ld n %ld n1cols %ld\n", k, n, n1cols)) ;
    ASSERT (k == n - n1cols) ;
    for (k = 0 ; k < n1cols ; k++)
    {
        col = Q1fill [k] ;
        ASSERT (Degree [col] <= 0) ;
    }
    k = 0 ;
    for (i = 0 ; i < m ; i++)
    {
        if (ATp [i] >= 0) k++ ;     // i is not a row of a col singleton
    }
    ASSERT (k == m - n1rows) ;
    for (k = 0 ; k < n1cols ; k++)
    {
        row = Qrows [k] ;
        ASSERT (IMPLIES (row != EMPTY, ATp [row] < 0)) ;
    }
#endif

    // -------------------------------------------------------------------------
    // find the row ordering
    // -------------------------------------------------------------------------

    if (n1cols == 0)
    {

        // ---------------------------------------------------------------------
        // no singletons in the matrix; no R1 matrix, no P1inv permutation
        // ---------------------------------------------------------------------

        ASSERT (n1rows == 0) ;
        R1p = NULL ;
        P1inv = NULL ;

    }
    else
    {

        // ---------------------------------------------------------------------
        // construct the row singleton permutation
        // ---------------------------------------------------------------------

        // allocate result arrays R1p and P1inv
        R1p   = (Long *) cholmod_l_malloc (n1rows+1, sizeof (Long), cc) ;
        P1inv = (Long *) cholmod_l_malloc (m,        sizeof (Long), cc) ;

        if (cc->status < CHOLMOD_OK)
        {
            // out of memory; free everything and return
            cholmod_l_free_sparse (&AT, cc) ;
            cholmod_l_free (worksize, sizeof (Long), Work, cc) ;
            cholmod_l_free (n+bncols, sizeof (Long), Q1fill, cc) ;
            cholmod_l_free (n1rows+1, sizeof (Long), R1p, cc) ;
            cholmod_l_free (m,        sizeof (Long), P1inv, cc) ;
            return (FALSE) ;
        }

#ifndef NDEBUG
        for (i = 0 ; i < m ; i++) P1inv [i] = EMPTY ;
#endif

        kk = 0 ;
        for (k = 0 ; k < n1cols ; k++)
        {
            i = Qrows [k] ;
            PR (("singleton col %ld row %ld\n", Q1fill [k], i)) ;
            if (i != EMPTY)
            {
                // row i is the kk-th singleton row
                ASSERT (ATp [i] < 0) ;
                ASSERT (P1inv [i] == EMPTY) ;
                P1inv [i] = kk ;
                // also find # of entries in row kk of R1
                R1p [kk] = UNFLIP (ATp [i+1]) - UNFLIP (ATp [i]) ;
                kk++ ;
            }
        }
        ASSERT (kk == n1rows) ;
        for (i = 0 ; i < m ; i++)
        {
            if (ATp [i] >= 0)
            {
                // row i is not a singleton row
                ASSERT (P1inv [i] == EMPTY) ;
                P1inv [i] = kk ;
                kk++ ;
            }
        }
        ASSERT (kk == m) ;

    }

    // Qrows is no longer needed.

    // -------------------------------------------------------------------------
    // complete the column ordering
    // -------------------------------------------------------------------------

    if (!fill_reducing_ordering)
    {

        // ---------------------------------------------------------------------
        // natural ordering
        // ---------------------------------------------------------------------

        if (n1cols == 0)
        {

            // no singletons, so natural ordering is 0:n-1 for now
            for (k = 0 ; k < n ; k++)
            {
                Q1fill [k] = k ;
            }

        }
        else
        {

            // singleton columns appear first, then non column singletons
            k = n1cols ;
            for (j = 0 ; j < n ; j++)
            {
                if (Degree [j] > 0)
                {
                    // column j is not a column singleton
                    Q1fill [k++] = j ;
                }
            }
            ASSERT (k == n) ;
        }

    }
    else
    {

        // ---------------------------------------------------------------------
        // fill-reducing ordering of pruned submatrix
        // ---------------------------------------------------------------------

        if (n1cols == 0)
        {

            // -----------------------------------------------------------------
            // no singletons found; do fill-reducing on entire matrix
            // -----------------------------------------------------------------

            n2cols = n ;
            n2rows = m ;

        }
        else
        {

            // -----------------------------------------------------------------
            // create the pruned matrix for fill-reducing by removing singletons
            // -----------------------------------------------------------------

            // find the mapping of original columns to pruned columns
            n2cols = 0 ;
            for (j = 0 ; j < n ; j++)
            {
                if (Degree [j] > 0)
                {
                    // column j is not a column singleton
                    W [j] = n2cols++ ;
                    PR (("W [%ld] = %ld\n", j, W [j])) ;
                }
                else
                {
                    // column j is a column singleton
                    W [j] = EMPTY ;
                    PR (("W [%ld] = %ld (j is col singleton)\n", j, W [j])) ;
                }
            }
            ASSERT (n2cols == n - n1cols) ;

            // W is now a mapping of the original columns to the columns in the
            // pruned matrix.  W [col] == EMPTY if col is a column singleton.
            // Otherwise col2 = W [j] is a column of the pruned matrix.

            // -----------------------------------------------------------------
            // delete row and column singletons from A'
            // -----------------------------------------------------------------

            // compact A' by removing row and column singletons
            nz2 = 0 ;
            n2rows = 0 ;
            for (i = 0 ; i < m ; i++)
            {
                p = ATp [i] ;
                if (p >= 0)
                {
                    // row i is not a row of a column singleton
                    ATp [n2rows++] = nz2 ;
                    pend = UNFLIP (ATp [i+1]) ;
                    for (p = ATp [i] ; p < pend ; p++)
                    {
                        j = ATj [p] ;
                        ASSERT (W [j] >= 0 && W [j] < n-n1cols) ;
                        ATj [nz2++] = W [j] ;
                    }
                }
            }
            ATp [n2rows] = nz2 ;
            ASSERT (n2rows == m - n1rows) ;
        }

        // ---------------------------------------------------------------------
        // fill-reducing ordering of the transpose of the pruned A' matrix
        // ---------------------------------------------------------------------

        PR (("n1cols %ld n1rows %ld n2cols %ld n2rows %ld\n",
            n1cols, n1rows, n2cols, n2rows)) ;
        ASSERT ((Long) AT->nrow == n) ;
        ASSERT ((Long) AT->ncol == m) ;

        AT->nrow = n2cols ;
        AT->ncol = n2rows ;

        // save the current CHOLMOD settings
        Long save [6] ;
        save [0] = cc->supernodal ;
        save [1] = cc->nmethods ;
        save [2] = cc->postorder ;
        save [3] = cc->method [0].ordering ;
        save [4] = cc->method [1].ordering ;
        save [5] = cc->method [2].ordering ;

        // follow the ordering with a postordering of the column etree
        cc->postorder = TRUE ;

        // 8:best: best of COLAMD(A), AMD(A'A), and METIS (if available)
        if (ordering == SPQR_ORDERING_BEST)
        {
            ordering = SPQR_ORDERING_CHOLMOD ;
            cc->nmethods = 2 ;
            cc->method [0].ordering = CHOLMOD_COLAMD ;
            cc->method [1].ordering = CHOLMOD_AMD ;
#ifndef NPARTITION
            cc->nmethods = 3 ;
            cc->method [2].ordering = CHOLMOD_METIS ;
#endif
        }

        // 9:bestamd: best of COLAMD(A) and AMD(A'A)
        if (ordering == SPQR_ORDERING_BESTAMD)
        {
            // if METIS is not installed, this option is the same as 8:best
            ordering = SPQR_ORDERING_CHOLMOD ;
            cc->nmethods = 2 ;
            cc->method [0].ordering = CHOLMOD_COLAMD ;
            cc->method [1].ordering = CHOLMOD_AMD ;
        }

#ifdef NPARTITION
        if (ordering == SPQR_ORDERING_METIS)
        {
            // METIS not installed; use default ordering
            ordering = SPQR_ORDERING_DEFAULT ;
        }
#endif

        if (ordering == SPQR_ORDERING_DEFAULT)
        {
            // Version 1.2.0:  just use COLAMD
            ordering = SPQR_ORDERING_COLAMD ;

#if 0
            // Version 1.1.2 and earlier:
            if (n2rows <= 2*n2cols)
            {
                // just use COLAMD; do not try AMD or METIS
                ordering = SPQR_ORDERING_COLAMD ;
            }
            else
            {
#ifndef NPARTITION
                // use CHOLMOD's default ordering: try AMD and then METIS
                // if AMD gives high fill-in, and take the best ordering found
                ordering = SPQR_ORDERING_CHOLMOD ;
                cc->nmethods = 0 ;
#else
                // METIS is not installed, so just use AMD
                ordering = SPQR_ORDERING_AMD ;
#endif
            }
#endif

        }

        if (ordering == SPQR_ORDERING_AMD)
        {
            // use CHOLMOD's interface to AMD to order A'*A
            cholmod_l_amd (AT, NULL, 0, (Long *) (Q1fill + n1cols), cc) ;
        }
#ifndef NPARTITION
        else if (ordering == SPQR_ORDERING_METIS)
        {
            // use CHOLMOD's interface to METIS to order A'*A (if installed)
            cholmod_l_metis (AT, NULL, 0, TRUE,
                (Long *) (Q1fill + n1cols), cc) ;
        }
#endif
        else if (ordering == SPQR_ORDERING_CHOLMOD)
        {
            // use CHOLMOD's internal ordering (defined by cc) to order AT
            PR (("Using CHOLMOD, nmethods %d\n", cc->nmethods)) ;
            cc->supernodal = CHOLMOD_SIMPLICIAL ;
            cc->postorder = TRUE ;
            cholmod_factor *Sc ;
            Sc = cholmod_l_analyze_p2 (FALSE, AT, NULL, NULL, 0, cc) ;
            if (Sc != NULL)
            {
                // copy perm from Sc->Perm [0:n2cols-1] to Q1fill (n1cols:n)
                Long *Sc_perm = (Long *) Sc->Perm ;
                for (k = 0 ; k < n2cols ; k++)
                {
                    Q1fill [k + n1cols] = Sc_perm [k] ;
                }
                // CHOLMOD selected an ordering; determine the ordering used
                switch (Sc->ordering)
                {
                    case CHOLMOD_AMD:    ordering = SPQR_ORDERING_AMD    ;break;
                    case CHOLMOD_COLAMD: ordering = SPQR_ORDERING_COLAMD ;break;
                    case CHOLMOD_METIS:  ordering = SPQR_ORDERING_METIS  ;break;
                }
            }
            cholmod_l_free_factor (&Sc, cc) ;
            PR (("CHOLMOD used method %d : ordering: %d\n", cc->selected,
                cc->method [cc->selected].ordering)) ;
        }
        else // SPQR_ORDERING_DEFAULT or SPQR_ORDERING_COLAMD
        {
            // use CHOLMOD's interface to COLAMD to order AT
            ordering = SPQR_ORDERING_COLAMD ;
            cholmod_l_colamd (AT, NULL, 0, TRUE,
                (Long *) (Q1fill + n1cols), cc) ;
        }

        cc->SPQR_istat [7] = ordering ;

        // restore the CHOLMOD settings
        cc->supernodal              = save [0] ;
        cc->nmethods                = save [1] ;
        cc->postorder               = save [2] ;
        cc->method [0].ordering     = save [3] ;
        cc->method [1].ordering     = save [4] ;
        cc->method [2].ordering     = save [5] ;

        AT->nrow = n ;
        AT->ncol = m ;
    }

    // -------------------------------------------------------------------------
    // free AT
    // -------------------------------------------------------------------------

    cholmod_l_free_sparse (&AT, cc) ;   // ]

    // -------------------------------------------------------------------------
    // check if the method succeeded
    // -------------------------------------------------------------------------

    if (cc->status < CHOLMOD_OK)
    {
        // out of memory; free everything and return
        cholmod_l_free (worksize, sizeof (Long), Work, cc) ;
        cholmod_l_free (n+bncols, sizeof (Long), Q1fill, cc) ;
        cholmod_l_free (n1rows+1, sizeof (Long), R1p, cc) ;
        cholmod_l_free (m,        sizeof (Long), P1inv, cc) ;
        return (FALSE) ;
    }

    // -------------------------------------------------------------------------
    // map the fill-reducing ordering ordering back to A
    // -------------------------------------------------------------------------

    if (n1cols > 0 && fill_reducing_ordering)
    {
        // Winv is workspace of size n2cols <= n

        #ifndef NDEBUG
        for (j = 0 ; j < n2cols ; j++) Winv [j] = EMPTY ;
        #endif

        for (j = 0 ; j < n ; j++)
        {
            // j is a column of A.  col2 = W [j] is either EMPTY, or it is 
            // the corresponding column of the pruned matrix
            col2 = W [j] ;
            if (col2 != EMPTY)
            {
                ASSERT (col2 >= 0 && col2 < n2cols) ;
                Winv [col2] = j ;
            }
        }

        for (k = n1cols ; k < n ; k++)
        {
            // col2 is a column of the pruned matrix
            col2 = Q1fill [k] ;
            // j is the corresonding column of the A matrix
            j = Winv [col2] ;
            ASSERT (j >= 0 && j < n) ;
            Q1fill [k] = j ;
        }
    }

    // -------------------------------------------------------------------------
    // identity permutation of the columns of B
    // -------------------------------------------------------------------------

    for (k = n ; k < n+bncols ; k++)
    {
        // tack on the identity permutation for columns of B
        Q1fill [k] = k ;
    }

    // -------------------------------------------------------------------------
    // find column pointers for Y = [A2 B2]; columns of A2
    // -------------------------------------------------------------------------

    if (n1cols == 0 && bncols == 0)
    {
        // A will be factorized instead of Y
        Y = NULL ;
    }
    else
    {
        // Y has no entries yet; nnz(Y) will be determined later
        Y = cholmod_l_allocate_sparse (m-n1rows, n-n1cols+bncols, 0,
            FALSE, TRUE, 0, xtype, cc) ;

        if (cc->status < CHOLMOD_OK)
        {
            // out of memory; free everything and return
            cholmod_l_free (worksize, sizeof (Long), Work, cc) ;
            cholmod_l_free (n+bncols, sizeof (Long), Q1fill, cc) ;
            cholmod_l_free (n1rows+1, sizeof (Long), R1p, cc) ;
            cholmod_l_free (m,        sizeof (Long), P1inv, cc) ;
            return (FALSE) ;
        }

        Yp = (Long *) Y->p ; 

        ynz = 0 ;
        PR (("1c wrapup: n1cols %ld n %ld\n", n1cols, n)) ;
        for (k = n1cols ; k < n ; k++)
        {
            j = Q1fill [k] ;
            d = Degree [j] ;
            ASSERT (d >= 1 && d <= m) ;
            Yp [k-n1cols] = ynz ;
            ynz += d ;
        }
        Yp [n-n1cols] = ynz ;
    }

    // -------------------------------------------------------------------------
    // free workspace and return results
    // -------------------------------------------------------------------------

    cholmod_l_free (worksize, sizeof (Long), Work, cc) ;

    *p_Q1fill = Q1fill ;
    *p_R1p    = R1p ;
    *p_P1inv  = P1inv ;
    *p_Y      = Y ;
    *p_n1cols = n1cols ;
    *p_n1rows = n1rows ;
    return (TRUE) ;
}
Пример #3
0
void mexFunction
(
    int nargout,
    mxArray *pargout [ ],
    int nargin,
    const mxArray *pargin [ ]
)
{
    double dummy = 0 ;
    double *Lx, *px ;
    Int *Parent, *Post, *ColCount, *First, *Level, *Rp, *Ri, *Lp, *Li, *W ;
    cholmod_sparse *A, Amatrix, *F, *Aup, *Alo, *R, *A1, *A2, *L, *S ;
    cholmod_common Common, *cm ;
    Int n, i, coletree, j, lnz, p, k, height, c ;
    char buf [LEN] ;

    /* ---------------------------------------------------------------------- */
    /* start CHOLMOD and set defaults */
    /* ---------------------------------------------------------------------- */

    cm = &Common ;
    cholmod_l_start (cm) ;
    sputil_config (SPUMONI, cm) ;

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    if (nargout > 5 || nargin < 1 || nargin > 3)
    {
	mexErrMsgTxt (
	    "Usage: [count h parent post R] = symbfact2 (A, mode, Lmode)") ;
    }

    /* ---------------------------------------------------------------------- */
    /* get input matrix A */
    /* ---------------------------------------------------------------------- */

    A = sputil_get_sparse_pattern (pargin [0], &Amatrix, &dummy, cm) ;
    S = (A == &Amatrix) ? NULL : A ;

    /* ---------------------------------------------------------------------- */
    /* get A->stype, default is to use triu(A) */
    /* ---------------------------------------------------------------------- */

    A->stype = 1 ;
    n = A->nrow ;
    coletree = FALSE ;
    if (nargin > 1)
    {
	buf [0] = '\0' ;
	if (mxIsChar (pargin [1]))
	{
	    mxGetString (pargin [1], buf, LEN) ;
	}
	c = buf [0] ;
	if (tolower (c) == 'r')
	{
	    /* unsymmetric case (A*A') if string starts with 'r' */
	    A->stype = 0 ;
	}
	else if (tolower (c) == 'c')
	{
	    /* unsymmetric case (A'*A) if string starts with 'c' */
	    n = A->ncol ;
	    coletree = TRUE ;
	    A->stype = 0 ;
	}
	else if (tolower (c) == 's')
	{
	    /* symmetric upper case (A) if string starts with 's' */
	    A->stype = 1 ;
	}
	else if (tolower (c) == 'l')
	{
	    /* symmetric lower case (A) if string starts with 'l' */
	    A->stype = -1 ;
	}
	else
	{
	    mexErrMsgTxt ("symbfact2: unrecognized mode") ;
	}
    }

    if (A->stype && A->nrow != A->ncol)
    {
	mexErrMsgTxt ("symbfact2: A must be square") ;
    }

    /* ---------------------------------------------------------------------- */
    /* compute the etree, its postorder, and the row/column counts */
    /* ---------------------------------------------------------------------- */

    Parent = cholmod_l_malloc (n, sizeof (Int), cm) ;
    Post = cholmod_l_malloc (n, sizeof (Int), cm) ;
    ColCount = cholmod_l_malloc (n, sizeof (Int), cm) ;
    First = cholmod_l_malloc (n, sizeof (Int), cm) ;
    Level = cholmod_l_malloc (n, sizeof (Int), cm) ;

    /* F = A' */
    F = cholmod_l_transpose (A, 0, cm) ;

    if (A->stype == 1 || coletree)
    {
	/* symmetric upper case: find etree of A, using triu(A) */
	/* column case: find column etree of A, which is etree of A'*A */
	Aup = A ;
	Alo = F ;
    }
    else
    {
	/* symmetric lower case: find etree of A, using tril(A) */
	/* row case: find row etree of A, which is etree of A*A' */
	Aup = F ;
	Alo = A ;
    }

    cholmod_l_etree (Aup, Parent, cm) ;

    if (cm->status < CHOLMOD_OK)
    {
	/* out of memory or matrix invalid */
	mexErrMsgTxt ("symbfact2 failed: matrix corrupted!") ;
    }

    if (cholmod_l_postorder (Parent, n, NULL, Post, cm) != n)
    {
	/* out of memory or Parent invalid */
	mexErrMsgTxt ("symbfact2 postorder failed!") ;
    }

    /* symmetric upper case: analyze tril(F), which is triu(A) */
    /* column case: analyze F*F', which is A'*A */
    /* symmetric lower case: analyze tril(A) */
    /* row case: analyze A*A' */
    cholmod_l_rowcolcounts (Alo, NULL, 0, Parent, Post, NULL, ColCount,
		First, Level, cm) ;

    if (cm->status < CHOLMOD_OK)
    {
	/* out of memory or matrix invalid */
	mexErrMsgTxt ("symbfact2 failed: matrix corrupted!") ;
    }

    /* ---------------------------------------------------------------------- */
    /* return results to MATLAB: count, h, parent, and post */
    /* ---------------------------------------------------------------------- */

    pargout [0] = sputil_put_int (ColCount, n, 0) ;
    if (nargout > 1)
    {
	/* compute the elimination tree height */
	height = 0 ;
	for (i = 0 ; i < n ; i++)
	{
	    height = MAX (height, Level [i]) ;
	}
	height++ ;
	pargout [1] = mxCreateDoubleMatrix (1, 1, mxREAL) ;
	px = mxGetPr (pargout [1]) ;
	px [0] = height ;
    }
    if (nargout > 2)
    {
	pargout [2] = sputil_put_int (Parent, n, 1) ;
    }
    if (nargout > 3)
    {
	pargout [3] = sputil_put_int (Post, n, 1) ;
    }

    /* ---------------------------------------------------------------------- */
    /* construct L, if requested */
    /* ---------------------------------------------------------------------- */

    if (nargout > 4)
    {

	if (A->stype == 1)
	{
	    /* symmetric upper case: use triu(A) only, A2 not needed */
	    A1 = A ;
	    A2 = NULL ;
	}
	else if (A->stype == -1)
	{
	    /* symmetric lower case: use tril(A) only, A2 not needed */
	    A1 = F ;
	    A2 = NULL ;
	}
	else if (coletree)
	{
	    /* column case: analyze F*F' */
	    A1 = F ;
	    A2 = A ;
	}
	else
	{
	    /* row case: analyze A*A' */
	    A1 = A ;
	    A2 = F ;
	}

	/* count the total number of entries in L */
	lnz = 0 ;
	for (j = 0 ; j < n ; j++)
	{
	    lnz += ColCount [j] ;
	}

	/* allocate the output matrix L (pattern-only) */
	L = cholmod_l_allocate_sparse (n, n, lnz, TRUE, TRUE, 0,
	    CHOLMOD_PATTERN, cm) ;
	Lp = L->p ;
	Li = L->i ;

	/* initialize column pointers */
	lnz = 0 ;
	for (j = 0 ; j < n ; j++)
	{
	    Lp [j] = lnz ;
	    lnz += ColCount [j] ;
	}
	Lp [j] = lnz ;

	/* create a copy of the column pointers */
	W = First ;
	for (j = 0 ; j < n ; j++)
	{
	    W [j] = Lp [j] ;
	}

	/* get workspace for computing one row of L */
	R = cholmod_l_allocate_sparse (n, 1, n, FALSE, TRUE, 0, CHOLMOD_PATTERN,
		cm) ;
	Rp = R->p ;
	Ri = R->i ;

	/* compute L one row at a time */
	for (k = 0 ; k < n ; k++)
	{
	    /* get the kth row of L and store in the columns of L */
	    cholmod_l_row_subtree (A1, A2, k, Parent, R, cm) ;
	    for (p = 0 ; p < Rp [1] ; p++)
	    {
		Li [W [Ri [p]]++] = k ;
	    }
	    /* add the diagonal entry */
	    Li [W [k]++] = k ;
	}

	/* free workspace */
	cholmod_l_free_sparse (&R, cm) ;

	/* transpose L to get R, or leave as is */
	if (nargin < 3)
	{
	    /* R = L' */
	    R = cholmod_l_transpose (L, 0, cm) ;
	    cholmod_l_free_sparse (&L, cm) ;
	    L = R ;
	}

	/* fill numerical values of L with one's (only MATLAB needs this...) */
	L->x = cholmod_l_malloc (lnz, sizeof (double), cm) ;
	Lx = L->x ;
	for (p = 0 ; p < lnz ; p++)
	{
	    Lx [p] = 1 ;
	}
	L->xtype = CHOLMOD_REAL ;

	/* return L (or R) to MATLAB */
	pargout [4] = sputil_put_sparse (&L, cm) ;
    }

    /* ---------------------------------------------------------------------- */
    /* free workspace */
    /* ---------------------------------------------------------------------- */

    cholmod_l_free (n, sizeof (Int), Parent, cm) ;
    cholmod_l_free (n, sizeof (Int), Post, cm) ;
    cholmod_l_free (n, sizeof (Int), ColCount, cm) ;
    cholmod_l_free (n, sizeof (Int), First, cm) ;
    cholmod_l_free (n, sizeof (Int), Level, cm) ;
    cholmod_l_free_sparse (&F, cm) ;
    cholmod_l_free_sparse (&S, cm) ;
    cholmod_l_finish (cm) ;
    cholmod_l_print_common (" ", cm) ;
    /*
    if (cm->malloc_count != ((nargout == 5) ? 3:0)) mexErrMsgTxt ("!") ;
    */
}
Пример #4
0
int main() {
  /* Time Recording Variables */
  int now_s, now_u;
  struct rusage usage;
  double time_now, time_past;
  
  /* x, y, z and global indices */
  int i, j, k, m, n, nodes;
  /* Connectivity enties */
  int m_above, m_below, m_left, m_right, m_front, m_back;
  double c_above, c_below, c_left, c_right, c_front, c_back, c_self;
  
  /* Creat cholmod object */
  cholmod_common Common, *cc;
  /* Compress column form sparse matrix */
  cholmod_sparse *S, *ST;
  /* forcing function on input; voltages after solving */
  cholmod_dense *b, *v0, *r;  
  size_t *Si, *Sp, *Snz, *bi, *bj;
  double *Sx, *bx, *v0x;
 
  /* store the sparse matrix */
  FILE *store;
  char filename[100];
  
  /* start CHOLMOD */
  cc = &Common;
  cholmod_l_start(cc);
  
  /* initialize timing variables*/ 
  getrusage(0, &usage);
  now_s = usage.ru_utime.tv_sec;
  now_u = usage.ru_utime.tv_usec;
  time_past = now_s + now_u  / 1.e6;
  time_now = time_past;
  
  /* total nodes in the grid */
  nodes = IMAX * JMAX * KMAX;
  
  /* Allocate space for connectivity matrix and forcing vector. */
  S = cholmod_l_allocate_sparse(nodes, nodes, 7 * nodes, 
				0, 0, 0, CHOLMOD_REAL, cc);

  b = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  bx = b->x;

  v0 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  v0x = v0->x;



  /*================================================================*/
  /*===============  make connectivity matrix ======================*/
  /*================================================================*/
  Si = (size_t *) S->i;
  Sp = (size_t *) S->p;
  Sx = (double *) S->x;
  Snz = (size_t *) S->nz;
  n = 0;
  Sp[0] = 0;
  for (k = 0; k < KMAX; k++) {
    for (j = 0; j < JMAX; j++) {
      for (i = 0; i < IMAX; i++) {
	/* Global index in x-fastest order*/
	m = k * IMAX * JMAX + j * IMAX + i;
	Sp[m + 1] = Sp[m];
	v0x[m] = (k + 1.0) / (KMAX + 1.0);

	/* Six coef.s of every node */
	c_below = (mepr(i - 0.5, j - 0.5, k - 0.5) + 
		   mepr(i + 0.5, j - 0.5, k - 0.5) + 
		   mepr(i - 0.5, j + 0.5, k - 0.5) +
		   mepr(i + 0.5, j + 0.5, k - 0.5)) / 4.0;

	c_front = (mepr(i - 0.5, j - 0.5, k - 0.5) + 
		   mepr(i + 0.5, j - 0.5, k - 0.5) + 
		   mepr(i - 0.5, j - 0.5, k + 0.5) +
		   mepr(i + 0.5, j - 0.5, k + 0.5)) / 4.0;

	c_left = (mepr(i - 0.5, j + 0.5, k - 0.5) + 
		  mepr(i - 0.5, j - 0.5, k - 0.5) + 
		  mepr(i - 0.5, j + 0.5, k + 0.5) +
		  mepr(i - 0.5, j - 0.5, k + 0.5)) / 4.0;

	c_right = (mepr(i + 0.5, j + 0.5, k - 0.5) + 
		   mepr(i + 0.5, j - 0.5, k - 0.5) + 
		   mepr(i + 0.5, j + 0.5, k + 0.5) +
		   mepr(i + 0.5, j - 0.5, k + 0.5)) / 4.0;

	c_back = (mepr(i - 0.5, j + 0.5, k - 0.5) + 
		  mepr(i + 0.5, j + 0.5, k - 0.5) + 
		  mepr(i - 0.5, j + 0.5, k + 0.5) +
		  mepr(i + 0.5, j + 0.5, k + 0.5)) / 4.0;

	c_above = (mepr(i - 0.5, j - 0.5, k + 0.5) + 
		   mepr(i + 0.5, j - 0.5, k + 0.5) + 
		   mepr(i - 0.5, j + 0.5, k + 0.5) +
		   mepr(i + 0.5, j + 0.5, k + 0.5))/4.0;

	/* Self term. */
	c_self = -(c_above + c_below + c_left + c_right + c_front + c_back);
	Si[n] = m; Sx[n] = c_self; n++; Sp[m + 1]++;

	/* Node below. Ensure not on bottom face */
	if (k != 0) {
	  m_below = m - IMAX * JMAX;
 	  Si[n] = m_below; Sx[n] = c_below; n++; Sp[m + 1]++;
	} else {
	  bx[m] = -c_below * VBOT;
	}

	/* Node front.  Ensure not on front face. */
	if (j != 0) {
	  m_front = m - IMAX;
	} else {
	  m_front = m + (JMAX - 1) * IMAX;
	}
	Si[n] = m_front; Sx[n] = c_front; n++; Sp[m + 1]++;

	/* Node to left.  Ensure not on left face. */
	if (i != 0) {
	  m_left = m - 1;
	} else {
	  m_left = m + IMAX -1;
	}
	Si[n] = m_left; Sx[n] = c_left; n++; Sp[m + 1]++;	   

	/* Node to right.  Ensure not on right face. */
	if (i != IMAX - 1) {
	  m_right = m + 1;
	} else{
	  m_right = m - IMAX + 1;
	}
	Si[n] = m_right; Sx[n] = c_right; n++; Sp[m + 1]++;

	/* Node back.  Ensure not on back face. */
	if (j != JMAX - 1) {
	  m_back = m + IMAX;
	} else {
	  m_back = m - (JMAX - 1) * IMAX;
	}
	Si[n] = m_back; Sx[n] = c_back; n++; Sp[m + 1]++;

	/* Node top. Ensure not on top face */
	if (k != KMAX - 1) {
	  m_above = m + IMAX * JMAX;
 	  Si[n] = m_above; Sx[n] = c_above; n++; Sp[m + 1]++;
	} else {
	  bx[m] = -c_above * VTOP;
	}

	Snz[m] = Sp[m + 1] - Sp[m];
      }
    }
  }
  /*====================================================================*/
  /*===================Done creating connectivity matrix.===============*/
  /*====================================================================*/

  /* report time*/
  getrusage(0, &usage);
  now_s = usage.ru_utime.tv_sec;
  now_u = usage.ru_utime.tv_usec;
  time_now = now_s + now_u / 1.e6;
  printf("\nFinished creating connectivity matrix\n"
	 "  Incremental time %f\n"
	 "  Running time %f\n", time_now - time_past, time_now);
  time_past = time_now;

  /* Print all three matrixes */
  cholmod_l_print_sparse(S, "S", cc);
  cholmod_l_print_dense(b, "b", cc);
  cholmod_l_print_dense(v0, "v0", cc);

  /* Allocate residual vector */
  r = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);

  /* The Preconditioned Conjugate Gradient Method */
  /* Calculate the first residual value */
  double one[2], zero[2], minusone[2];
  zero[0] = 0.0;
  zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  minusone[0] = -1.0;
  minusone[1] = 0.0;

  cholmod_l_copy_dense2(b, r, cc);
  cholmod_l_sdmult(S, 0, minusone, one, v0, r, cc);
  printf("Initial 2-norm = %f\n", cholmod_l_norm_dense(r, 2, cc));
  printf("Initial 1-norm = %f\n", cholmod_l_norm_dense(r, 1, cc));
  printf("Initial 0-norm = %f\n", cholmod_l_norm_dense(r, 0, cc));
  

  /* The iteration */
  double rho1, rho0, beta = 0.0, alpha = 0.0;
  cholmod_dense *p1, *p0, *q;
  double *p1x, *p0x, *qx, *rx;
  /*
  p1 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  p0 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  q = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);  
  */
  p1 = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);
  p0 = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);
  q = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);
  p1x = (double *) p1->x;
  p0x = (double *) p0->x;
  qx = (double *) q->x;
  rx = (double *) r->x;

  int iter;
  for (iter = 0; iter < MAX_ITER; iter++) {
    if (cholmod_l_norm_dense(r, 0, cc) < 1e-10)
      break;
    
    rho1 = cholmod_l_norm_dense(r, 2, cc);
    rho1 = rho1 * rho1;
    
    if (iter == 0) {
      cholmod_l_copy_dense2(r, p1, cc);
    } else {
      beta = rho1 / rho0;
      for (i = 0; i < nodes; i++) {
	p1x[i] = rx[i] + beta * p0x[i];
      }
    }
   
    cholmod_l_sdmult(S, 0, one, zero, p1, q, cc);
    alpha = 0;
    for (i = 0; i < nodes; i++) {
      alpha += p1x[i] * qx[i];
    }
    alpha = rho1 / alpha;

    for (i = 0; i < nodes; i++) {
      v0x[i] += alpha * p1x[i];
      rx[i] -= alpha * qx[i];
    }
    /*
    printf("iter = %d\n: rho1 = %f, rho0 = %f, alpha = %f, beta = %f\n", 
	   iter, rho1, rho0, alpha, beta);
    */
    cholmod_l_copy_dense2(p1, p0, cc);
    rho0 = rho1;
  }
  
  cholmod_l_copy_dense2(b, r, cc);
  cholmod_l_sdmult(S, 0, minusone, one, v0, r, cc);
  printf("After %d iterations:\n", iter);
  printf("Final 2-norm: %f\n", cholmod_l_norm_dense(r, 2, cc));
  printf("Final 1-norm: %f\n", cholmod_l_norm_dense(r, 1, cc));
  printf("Final 0-norm: %f\n", cholmod_l_norm_dense(r, 0, cc));
  /* sort sparse matrix and store it 
  cholmod_l_sort(S, cc);
  S->stype = 0;
  */

  /* Check if S is symmetric
  ST = cholmod_l_transpose(S, 2, cc);
  store = fopen("LpT.dat", "w");
  cholmod_l_write_sparse(store, ST, NULL, NULL, cc);
  fclose(store);
  cholmod_l_free_sparse(&ST, cc);
  */
  
  /* store the sparse matrix   */
  /*
  store = fopen("Laplace.dat", "w");
  cholmod_l_write_sparse(store, S, NULL, NULL, cc);
  fclose(store);
  cholmod_l_free_sparse(&S, cc);
  */

  /* store the force vector  */
  /*
  store = fopen("Force.dat", "w");
  cholmod_l_write_dense(store, b, NULL, cc);
  fclose(store);
  cholmod_l_free_dense(&b,cc);
  */

  /* store the guess vector   */
  store = fopen("Voltage.dat", "w");
  cholmod_l_write_dense(store, v0, NULL, cc);
  fclose(store);

  cholmod_l_free_dense(&v0, cc);

  /* store the residual vector  */
  store = fopen("Residual.dat", "w");
  cholmod_l_write_dense(store, r, NULL, cc);
  fclose(store);

  cholmod_l_free_dense(&r, cc);

  /* report time*/
  getrusage(0, &usage);
  now_s = usage.ru_utime.tv_sec;
  now_u = usage.ru_utime.tv_usec;
  time_now = now_s + now_u / 1.e6;
  printf("\nFinished writing matrix\n"
	 "  Incremental time %f\n"
	 "  Running time %f\n", time_now - time_past, time_now);

  cholmod_l_finish(cc);
  return 0;
}
int main(int argc, char* argv[])
{
	const int bufsize = 512;
    	char buffer[bufsize];
	int m,n,S;
	double time_st,time_end,time_avg;
	//omp_set_num_threads(2);
//	printf("\n-----------------\nnumber of threads fired = %d\n-----------------\n",(int)omp_get_num_threads());
	if(argc!=2)
	{
		cout<<"Insufficient arguments"<<endl;
		return 1;
	}
	
	graph G;

	cerr<<"Start reading                    ";
//	time_st=dsecnd();
	G.create_graph(argv[1]);
//	time_end=dsecnd();
//	time_avg = (time_end-time_st);
//	cout<<"Success              "<<endl;
//	cerr<<"Reading time                     "<<time_avg<<endl;

	cerr<<"Constructing Matrices            ";
//	time_st=dsecnd();
	G.construct_MNA();
	G.construct_NA();
//	time_end=dsecnd();
//	time_avg = (time_end-time_st);
//	cerr<<"Done                 "<<time_avg<<endl;

//	G.construct_sparse_MNA();
	m=G.node_array.size()-1;
	n=G.voltage_edge_id.size();
	
	cout<<endl;
	cout<<"MATRIX STAT:"<<endl;
	cout<<"Nonzero elements:               "<<G.nonzero<<endl;
	cout<<"Number of Rows:                 "<<m+n<<endl;
	cout<<"Nonzero in G:			"<<G.Gnonzero<<endl;
	cout<<"Number of rows in G:		"<<m<<endl;
	cout<<"Nonzero in P: 			"<<G.Pnonzero<<endl;
	cout<<"Number of rows in P:		"<<m<<endl;


//	printf("\n Nonzero = %d", G.nonzero);
//	printf("\n Rows = %d", m+n);

	cout<<"MAT val:		       "<<endl;
	int i,j;

	G.Mat_val[0] += 100;
	G.Gmat[0] +=100;
/*
	for(i=0;i<G.Gnonzero;i++)
		cout<<" "<<G.Gmat[i];
	cout<<endl;
	for(i=0;i<G.Gnonzero;i++)
		cout<<" "<<G.Gcolumns[i];
	cout<<endl;	
	for(i=0;i<m+1;i++)
		cout<<" "<<G.GrowIndex[i];
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.b1[i]);
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.x1[i]);
	cout<<endl;	
	
	
*/	SuiteSparse_long *Gnz = (SuiteSparse_long*)calloc(m,sizeof(SuiteSparse_long));
	for(i=0;i<m;i++)
	{
	//	cout<<endl;
		SuiteSparse_long startindex=G.GrowIndex[i];
		SuiteSparse_long endindex=G.GrowIndex[i+1];
		Gnz[i] = endindex - startindex;

//		for(j=startindex;j<endindex;j++)
//			cout<<" "<<G.Gmat[j];
//		cout<<endl;
		
	}


/*	for(i=0;i<G.Pnonzero;i++)
		cout<<" "<<G.Pmat[i];
	cout<<endl;
	for(i=0;i<G.Pnonzero;i++)
		cout<<" "<<G.Pcolumns[i];
	cout<<endl;	
	for(i=0;i<m+1;i++)
		cout<<" "<<G.ProwIndex[i];
	cout<<endl;
/*	for(i=0;i<m;i++)
		printf(" %.8f", G.b1[i]);
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.x1[i]);
	cout<<endl;	
	
	
	for(i=0;i<m;i++)
	{
		cout<<endl;
		int startindex=G.ProwIndex[i];
		int endindex=G.ProwIndex[i+1];
		for(j=startindex;j<endindex;j++)
			cout<<" "<<G.Pmat[j];
		cout<<endl;
		
	}
	
/*	for(i=0;i<G.nonzero;i++)
		cout<<" "<<G.Mat_val[i];
	cout<<endl;
	for(i=0;i<G.nonzero;i++)
		cout<<" "<<G.columns[i];
	cout<<endl;	
	for(i=0;i<m+n+1;i++)
		cout<<" "<<G.rowIndex[i];
	cout<<endl;
	for(i=0;i<m+n;i++)
		printf(" %.8f", G.b[i]);
	cout<<endl;
	for(i=0;i<m+n;i++)
		printf(" %.8f", G.x[i]);
	cout<<endl;	
	
	
	for(i=0;i<m+n;i++)
	{
		cout<<endl;
		int startindex=G.rowIndex[i];
		int endindex=G.rowIndex[i+1];
		for(j=startindex;j<endindex;j++)
			cout<<" "<<G.Mat_val[j];
		cout<<endl;
		
	}
*/
/*	for (i=0;i<m+n+1;i++)
	{
		//cout<<endl;
		if(G.rowIndex[i]==G.rowIndex[i+1])
			break;
		
		for(j=G.rowIndex[i];j<G.rowIndex[i+1];j++)
		{
			if(G.Mat_val[j]>10)
				cout<<G.Mat_val[j]<<"\t";
		}
		//cout<<endl;
		/*for(j=G.rowIndex[i];j<G.rowIndex[i+1];j++)
		{
			cout<<G.columns[j]<<"\t";
		}
		//cout<<endl;
	}
	cout<<endl;
*/

//printing the matrix
	printf("\n Fine till here");
	printf("\n");
//	int* rowmIndex=(int*)calloc(m+1,sizeof(int));
	printf("\n Fine till here");
	printf("\n");
	//int rowmIndex[5]={1,2,3,4,5};
/*	for(i=0;i<m+1;i++)
	{
		rowmIndex[i]=G.rowIndex[i];
		printf(" %d", rowmIndex[i]);
	}
*/
	printf("\n Allocating GPU memory\n");
	cudaDeviceReset();
	size_t free, total;
	cudaMemGetInfo(&free, &total);
	printf("\n Free Mem = %lf MB, Total mem = %lf MB\n", (double)(free)/(1024*1024), (double)(total)/(1024*1024));


	double *dev_csrValA, *dev_b, *dev_x;
	int *dev_csrRowIdxA, *dev_csrColA;

	double *dev_GcsrVal, *dev_b1, *dev_x1;
	double *dev_PcsrVal, *dev_b2, *dev_x2;
	
	int *dev_GcsrRowIdx, *dev_PcsrRowIdx, *dev_GcsrCol, *dev_PcsrCol;
	

	
	cudaMalloc((void**)&dev_PcsrVal, G.Pnonzero*sizeof(double));
	cudaMalloc((void**)&dev_PcsrRowIdx, (m+1)*sizeof(int));
	cudaMalloc((void**)&dev_PcsrCol, G.Pnonzero*sizeof(int));


	cudaMalloc((void**)&dev_b1, (m)*sizeof(double));
	cudaMalloc((void**)&dev_b2, n*sizeof(double));
	cudaMalloc((void**)&dev_x1, m*sizeof(double));
	cudaMalloc((void**)&dev_x2, n*sizeof(double));

	cudaMemcpy(dev_b1, G.b1, (m)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_x1, G.x1, (m)*sizeof(double), cudaMemcpyHostToDevice);

	cudaMemcpy(dev_PcsrVal, G.Pmat, G.Pnonzero*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_b2, G.b2, (n)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_x2, G.x2, (n)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_PcsrRowIdx, G.ProwIndex, (m+1)*sizeof(int), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_PcsrCol, G.Pcolumns, (G.Pnonzero)*sizeof(int), cudaMemcpyHostToDevice);


	/* Matrix has been created and stored in CSR format.
	However, CHOLMOD requires CSC format. Since our matrix is symmetric positive definite, we can simply swap 
	csrColA with csrRowIdx and vice versa
	*/

	/* Starting the CHOLMOD routine now*/
	printf("\n Initiating CHOLMOD\n");
	cholmod_sparse *A, *P;
	cholmod_dense *x, *b, *r, *midvec;
	cholmod_factor *L;
	cholmod_common *Common, cm;
	Common = &cm;
	cholmod_l_start(Common);
//	&Common->useGPU=1;
	printf("\n m = %d, G.Gnonzero = %d\n", m, G.Gnonzero);	


	
	cholmod_sparse *C = cholmod_l_allocate_sparse((size_t)(m), (size_t)(m), (size_t)(G.Gnonzero), 1, 0, 1, 1, Common); 
//	P = cholmod_l_allocate_sparse((size_t)(m), (size_t)(n), (size_t)(G.Pnonzero), 1, 0, 0, 1, Common); 
//	printf("\n Allocated \n");

	C->itype = CHOLMOD_LONG;	
//	printf("\n Itype \n");
	C->p = &G.GrowIndex[0];
//	printf("\n Columns \n");
	C->nz = &Gnz[0];
//	printf("\n Rows \n");
	C->i = &G.Gcolumns[0];
	C->dtype = 0;
	C->x = &G.Gmat[0];
	 
/*	P->itype = CHOLMOD_LONG;
	P->p = &G.ProwIndex[0];
	P->nz = &Pnz[0];
	P->i = &G.Pcolumns[0];
	P->dtype = 0;
	P->x = &G.Pmat[0]; 
*/	 

	b = cholmod_l_allocate_dense((size_t)(m), 1, (size_t)(m), 1, Common);
	b->dtype=0;
	b->x = &G.b1[0];
	b->xtype = 1;

	printf("\n CHOLMOD manually set\n");
	cholmod_l_print_sparse(C, "A", Common);
	cholmod_l_print_dense(b, "b", Common);


	cudaEvent_t start, stop;
	cudaEventCreate(&start);
	cudaEventCreate(&stop);

	cudaEventRecord(start, 0);

	L = cholmod_l_analyze(C, Common);
	printf("\n Analysis: Flops: %g \t lnz: %g\n", Common->fl, Common->lnz);
	cholmod_l_factorize(C, L, Common);
	x = cholmod_l_solve(CHOLMOD_A, L, b, Common);
	
	cudaEventRecord(stop, 0);
	cudaEventSynchronize(stop);
	
	

	float elapsedTime;
	cudaEventElapsedTime(&elapsedTime, start, stop);
	printf("\n Time : %.6f secs :\n", elapsedTime);
	
	cholmod_l_print_dense(x, "X", Common);
	
	double *x1_mod = (double*)x->x;
	
	cudaMemcpy(dev_x1, x1_mod, m*sizeof(double), cudaMemcpyHostToDevice);
	
	cusparseStatus_t cuSparseStatus;
	cusparseHandle_t cuspHandle;
	cuSparseStatus = cusparseCreate(&cuspHandle);

	cusparseMatDescr_t descrP;
	cusparseCreateMatDescr(&descrP);

	cusparseSetMatType(descrP, CUSPARSE_MATRIX_TYPE_GENERAL);	
	cusparseSetMatIndexBase(descrP, CUSPARSE_INDEX_BASE_ZERO);
	
	
	double *dev_res1, *dev_simple;
	double *res1 = (double*)calloc(n,sizeof(double));
	cudaMalloc((void**)&dev_res1, n*sizeof(double));
	cudaMalloc((void**)&dev_simple, n*sizeof(double));
	
	const double alpha = 1.0, beta=0.0;
	//alpha = 1.0;
	//beta = 0.0;
	
	//solving P^T * G^-1 * b1 Result stored in dev_res1
	
	cuSparseStatus = cusparseDcsrmv(cuspHandle, CUSPARSE_OPERATION_TRANSPOSE, m, n, G.Pnonzero, &alpha, descrP, dev_PcsrVal, dev_PcsrRowIdx, dev_PcsrCol, dev_x1, &beta, dev_res1);
		
	if(cuSparseStatus == CUSPARSE_STATUS_SUCCESS)
	{
/*		cudaMemcpy(res1, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
		for(i=0;i<n;i++)
		{
			printf("\nres1[%d] = %.8f", i, res1[i]);
		}
		printf("\n P^T * G^-1 * b1 done! Vector stored in res1");
*/	}
	else
	{
		printf("\n P^T * G^-1 * b1 failed\n");
		exit(1);
	}
	
	const double alphaneg = -1.0;
		
		//Solving P^T * G^-1 * b1 - b2 ; Result stored in dev_res1
	
		
	cublasStatus_t cuBlasStatus;
	cublasHandle_t cubHandle;
	cuBlasStatus = cublasCreate(&cubHandle);
		
	cuBlasStatus = cublasDaxpy(cubHandle, n, &alphaneg, dev_b2, 1, dev_res1, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
//		cudaMemcpy(res1, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
//		for(i=0;i<n;i++)
//		{
//			printf("\nres1[%d] = %.8f", i, res1[i]);
//		}
		printf("\n res1 = res1 - b2 done\n");
		 
	}
	else
	{
		printf("\n res1 = res1 - b2 failed\n");
	}
		
	
	
	
	///NOW COMPUTING G^-1 * P
	
	
	int k = 0;
	int breakloop=0;
	
	double **midMat = (double**)malloc(m*sizeof(double*));
	for(i=0;i<m;i++)
	{
		midMat[i] = (double*)calloc(n,sizeof(double));
	}
	
	cudaEventRecord(start, 0);
	
	for(i=0;i<n;i++)
	{
		breakloop = 0;
		double *vect = (double*)calloc(m,sizeof(double*));

		for(j=0;j<m;j++)
		{
			int startin = G.ProwIndex[j];
			int endin = G.ProwIndex[j+1];
			if(startin == endin)
				continue;
				
			k = startin;

			while(k<endin)
			{

				if(G.Pcolumns[k] == i)
				{	
					vect[j] = G.Pmat[k];
					breakloop=1;
					break;
				
				}
				k++;
			}
			if(breakloop == 1)
			{
				break;
			}
		}
		
		midvec = cholmod_l_allocate_dense((size_t)(m), 1, (size_t)(m), 1, Common);
		midvec->dtype=0;
		midvec->x=&vect[0];
		midvec->xtype = 1;
		
		cholmod_dense *res2;
		
		res2 = cholmod_l_solve(CHOLMOD_A, L, midvec, Common);

		
		double *re = (double*)res2->x;
		
//		printf("\n vector %d is:\n", i);
		int i1, j1, k1;
//		for(j1=0;j1<m;j1++)
//		{
//			midmat2flat[i+j1*n] = re[j1];
//			printf(" %lf", re[j1]);
//		}
//		printf("\n");
		for(i1=0;i1<m;i1++)
		{
			midMat[i1][i] = re[i1];
		}
		
		cholmod_l_free_dense(&midvec, Common);
			
	}
	
/*	printf("\n Midmat = \n");
	for(i=0;i<m;i++)
	{
		for(j=0;j<n;j++)
		{
			printf(" %lf", midMat[i][j]);
		}
		printf("\n");
	} 
*/
	double *midMatflat = (double*)calloc((m*n),sizeof(double));
	double *dev_midMat;
	double *dev_solut;
	int counter = 0;
	for(i=0;i<n;i++)
	{
		for(j=0;j<m;j++)
		{
			midMatflat[counter] = midMat[j][i];
			counter++; 
		}
	}
	
	cudaMalloc((void**)&dev_midMat, m*n*sizeof(double));
	cudaMalloc((void**)&dev_solut, n*n*sizeof(double));
	
	cudaMemcpy(dev_midMat, midMatflat, m*n*sizeof(double), cudaMemcpyHostToDevice);
		
	//Solving P^T * midMat; Result stored in dev_solut
		
	cuSparseStatus = cusparseDcsrmm(cuspHandle, CUSPARSE_OPERATION_TRANSPOSE, m, n, n, G.Pnonzero, &alpha, descrP, dev_PcsrVal, dev_PcsrRowIdx, dev_PcsrCol, dev_midMat, m, &beta, dev_solut, n);
	
	if(cuSparseStatus == CUSPARSE_STATUS_SUCCESS)
	{
		printf("\n Solved P^T * G^-1 * P. Result stored in solut\n");
		
	}  
	else
	{
		printf("\n  Failed to Solve P^T * G^-1 * P \n");
		exit(1);
	}

/*	double *matGflat = (double*)calloc(n*n,sizeof(double));
	cudaMemcpy(matGflat, dev_solut, n*n*sizeof(double), cudaMemcpyDeviceToHost);
	counter = 0;
	printf("\nBefore LU starts\n");
	for(i=0;i<n;i++)
	{
		for(j=0;j<n;j++)
		{
			printf(" %lf ", matGflat[counter]);
			counter++;
		}
		printf("\n");
	}
	printf("\n");
*/	
	cusolverStatus_t cuSolverStatus;
	
	
	cusolverDnHandle_t cudenHandle;
	cuSolverStatus = cusolverDnCreate(&cudenHandle);
	int Lwork = 0;
	cuSolverStatus = cusolverDnDgetrf_bufferSize(cudenHandle, n, n, dev_solut, n, &Lwork);
	
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n Buffer works\n Lwork = %d\n", Lwork);
	}
	else
	{
		exit(1);	
	}
	double *dev_Workspace;
	int *dev_Ipiv, *dev_Info;
	
	cudaMalloc((void**)&dev_Workspace, Lwork*sizeof(double));
	cudaMalloc((void**)&dev_Ipiv, n*sizeof(int));
	cudaMalloc((void**)&dev_Info, sizeof(int));
	
	//Calculating LU for dev_solut
//	double *nnmat = (double*)calloc(n*n,sizeof(double));
//	cudaMemcpy(nnmat, dev_solut, n*n*sizeof(double), cudaMemcpyDeviceToHost);
//	cuSolverStatus = cusolverDnDgetrfHost(cudenHandle, n, n, 

	cuSolverStatus = cusolverDnDgetrf(cudenHandle, n, n, dev_solut, n, dev_Workspace, dev_Ipiv, dev_Info);
	
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n solut has be defactorized into L and U. dev_Ipiv * solut = L * U\n");
	}
	else
	{
	
		printf("\n Unable to defactorize solut into LU\n");
		exit(1);	
	}
	
	//solving dev_solut * x = dev_res1. Result stored in dev_res1
	
	cuSolverStatus = cusolverDnDgetrs(cudenHandle, CUBLAS_OP_N, n, 1, dev_solut, n, dev_Ipiv, dev_res1, n, dev_Info);
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n Solution obtained for x2 \n");
	}
	else
	{
		printf("\n LU decomposition obtained by LU solver failed\n");
	}

/*	cudaMemcpy(G.x2, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
	printf("\n x2 = \n");
	for(i=0;i<n;i++)
	{
		printf("\n x2[%d] = %lf", i, G.x2[i]); 
	}
*/	
	
	double *dev_dummy;
	cudaMalloc((void**)&dev_dummy, m*sizeof(double));
	cudaMemset(dev_dummy, 0.0, m*sizeof(double));
	
	printf("\n Starting solving for x1 \n");
	//Solving for x1
	
		//Solving G^-1 * P * x2;  G^-1 * P is stored in midMat
	cuBlasStatus = cublasDgemv(cubHandle, CUBLAS_OP_N, m, n, &alpha, dev_midMat, m, dev_res1, 1, &beta, dev_dummy, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
/*		double *toprint = (double*)calloc(m,sizeof(double));
		cudaMemcpy(toprint, dev_dummy, m*sizeof(double), cudaMemcpyDeviceToHost);
		printf("\n Intermediate vector :\n");
		for(i=0;i<m;i++)
		{
			printf("\ndummy[%d] = %lf", i, toprint[i]);
		}
*/		printf("\n midmat * x2 obtained. Stored in dummy\n");
	}
	else
	{
		printf("\n Failed to obtain midmat * x2\n");
	}
	
	cuBlasStatus = cublasDaxpy(cubHandle, m, &alphaneg, dev_dummy, 1, dev_x1, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
/*		cudaMemcpy(G.x1, dev_x1, m*sizeof(double), cudaMemcpyDeviceToHost);
		printf("\n x1 = \n");
		for(i=0;i<m;i++)
		{
			printf("\n x1[%d] = %.15f", i, G.x1[i]);
		}
*/		printf("\n x1 obtained");
	}
	else
	{
		printf("\n Failed to obtain x1");
	}

	printf("\n Solver finished its work\n");

/*		cudaEventRecord(stop, 0);
		cudaEventSynchronize(stop);

		cudaEventElapsedTime(&elapsedTime, start, stop);
		printf("\n Time: %.6f msecs :\n", elapsedTime);
*/








	
	
	
	
	

	cholmod_l_finish(Common);
	return 0;

} 
Пример #6
0
template <typename Entry> int spqr_1fixed
(
    // inputs, not modified
    double tol,             // only accept singletons above tol
    Long bncols,            // number of columns of B
    cholmod_sparse *A,      // m-by-n sparse matrix

    // output arrays, neither allocated nor defined on input.

    Long **p_R1p,           // size n1rows+1, R1p [k] = # of nonzeros in kth
                            // row of R1.  NULL if n1cols == 0.
    Long **p_P1inv,         // size m, singleton row inverse permutation.
                            // If row i of A is the kth singleton row, then
                            // P1inv [i] = k.  NULL if n1cols is zero.

    cholmod_sparse **p_Y,   // on output, only the first n-n1cols+1 entries of
                            // Y->p are defined (if Y is not NULL), where
                            // Y = [A B] or Y = [A2 B2].  If B is empty and
                            // there are no column singletons, Y is NULL

    Long *p_n1cols,         // number of column singletons found
    Long *p_n1rows,         // number of corresponding rows found

    // workspace and parameters
    cholmod_common *cc
)
{
    cholmod_sparse *Y ;
    Long *P1inv, *R1p, *Yp, *Qrows, *Ap, *Ai ;
    char *Mark ;
    Entry *Ax ;
    Long i, j, k, p, d, row, n1rows, n1cols, ynz, iold, inew, kk, m, n, xtype ;

    // -------------------------------------------------------------------------
    // get inputs
    // -------------------------------------------------------------------------

    xtype = spqr_type <Entry> ( ) ;

    m = A->nrow ;
    n = A->ncol ;
    Ap = (Long *) A->p ;
    Ai = (Long *) A->i ;
    Ax = (Entry *) A->x ;

    // set outputs to NULL in case of early return
    *p_R1p    = NULL ;
    *p_P1inv  = NULL ;
    *p_Y      = NULL ;
    *p_n1cols = EMPTY ;
    *p_n1rows = EMPTY ;

    // -------------------------------------------------------------------------
    // allocate workspace
    // -------------------------------------------------------------------------

    Mark = (char *) cholmod_l_calloc (m, sizeof (char), cc) ;
    Qrows = (Long *) cholmod_l_malloc (n, sizeof (Long), cc) ;

    if (cc->status < CHOLMOD_OK)
    {
        // out of memory
        cholmod_l_free (m, sizeof (char), Mark, cc) ;
        cholmod_l_free (n, sizeof (Long), Qrows, cc) ;
        return (FALSE) ;
    }

    // -------------------------------------------------------------------------
    // find singletons; no column permutations allowed
    // -------------------------------------------------------------------------

    n1cols = 0 ;        // number of column singletons found
    n1rows = 0 ;        // number of corresponding singleton rows

    for (j = 0 ; j < n ; j++)
    {
        // count the number of unmarked rows in column j
        Entry aij = 0 ;
        d = 0 ;
        row = EMPTY ;
        for (p = Ap [j] ; d < 2 && p < Ap [j+1] ; p++)
        {
            i = Ai [p] ;
            if (!Mark [i])
            {
                // row i is not taken by a prior column singleton.  If this
                // is the only unflagged row and the value is large enough,
                // it will become the row for this column singleton. 
                aij = Ax [p] ;
                row = i ;
                d++ ;
            }
        }
        if (d == 0)
        {
            // j is a dead column singleton
            Qrows [n1cols++] = EMPTY ;
        }
        else if (d == 1 && spqr_abs (aij, cc) > tol)
        {
            // j is a live column singleton
            Qrows [n1cols++] = row ;
            // flag row i as taken
            Mark [row] = TRUE ;
            n1rows++ ;
        }
        else
        {
            // j is not a singleton; quit searching
            break ;
        }
    }

    // -------------------------------------------------------------------------
    // construct P1inv permutation, row counts R1p, and col pointers Yp
    // -------------------------------------------------------------------------

    if (n1cols == 0 && bncols == 0)
    {

        // ---------------------------------------------------------------------
        // no singletons, and B empty; Y=A will be done via pointer alias
        // ---------------------------------------------------------------------

        Y = NULL ;
        Yp = NULL ;
        P1inv = NULL ;
        R1p = NULL ;

    }
    else if (n1cols == 0)
    {

        // ---------------------------------------------------------------------
        // no singletons in the matrix; no R1 matrix, no P1inv permutation
        // ---------------------------------------------------------------------

        // Y has no entries yet; nnz(Y) will be determined later
        Y = cholmod_l_allocate_sparse (m, n+bncols, 0,
            FALSE, TRUE, 0, xtype, cc) ;

        if (cc->status < CHOLMOD_OK)
        {
            // out of memory
            cholmod_l_free (m, sizeof (char), Mark, cc) ;
            cholmod_l_free (n, sizeof (Long), Qrows, cc) ;
            return (FALSE) ;
        }

        Yp = (Long *) Y->p ;

        ASSERT (n1rows == 0) ;
        P1inv = NULL ;
        R1p = NULL ;

        // ---------------------------------------------------------------------
        // copy the column pointers of A for the first part of Y = [A B]
        // ---------------------------------------------------------------------

        ynz = Ap [n] ;
        for (k = 0 ; k <= n ; k++)
        {
            Yp [k] = Ap [k] ;
        }

    }
    else
    {

        // ---------------------------------------------------------------------
        // construct the row singleton permutation
        // ---------------------------------------------------------------------

        // Y has no entries yet; nnz(Y) will be determined later
        Y = cholmod_l_allocate_sparse (m-n1rows, n-n1cols+bncols, 0,
            TRUE, TRUE, 0, xtype, cc) ;
        P1inv = (Long *) cholmod_l_malloc (m, sizeof (Long), cc) ;
        R1p   = (Long *) cholmod_l_calloc (n1rows+1, sizeof (Long), cc) ;

        if (cc->status < CHOLMOD_OK)
        {
            // out of memory
            cholmod_l_free_sparse (&Y, cc) ;
            cholmod_l_free (m, sizeof (Long), P1inv, cc) ;
            cholmod_l_free (n1rows+1, sizeof (Long), R1p, cc) ;
            cholmod_l_free (m, sizeof (char), Mark, cc) ;
            cholmod_l_free (n, sizeof (Long), Qrows, cc) ;
            return (FALSE) ;
        }

        Yp = (Long *) Y->p ;

#ifndef NDEBUG
        for (i = 0 ; i < m ; i++) P1inv [i] = EMPTY ;
#endif

        kk = 0 ;
        for (k = 0 ; k < n1cols ; k++)
        {
            i = Qrows [k] ;
            if (i != EMPTY)
            {
                // row i is the kk-th singleton row
                ASSERT (Mark [i]) ;
                ASSERT (P1inv [i] == EMPTY) ;
                P1inv [i] = kk ;
                kk++ ;
            }
        }
        for (i = 0 ; i < m ; i++)
        {
            if (!Mark [i])
            {
                // row i is not a singleton row
                ASSERT (P1inv [i] == EMPTY) ;
                P1inv [i] = kk ;
                kk++ ;
            }
        }
        ASSERT (kk == m) ;

        // ---------------------------------------------------------------------
        // find row counts for R11
        // ---------------------------------------------------------------------

        for (k = 0 ; k < n1cols ; k++)
        {
            for (p = Ap [k] ; p < Ap [k+1] ; p++)
            {
                iold = Ai [p] ;
                inew = P1inv [iold] ;
                ASSERT (inew < n1rows) ;
                R1p [inew]++ ;              // a singleton row; in R1
            }
        }

        // ---------------------------------------------------------------------
        // find row counts for R12 and column pointers for A2 part of Y
        // ---------------------------------------------------------------------

        ynz = 0 ;
        for ( ; k < n ; k++)
        {
            Yp [k-n1cols] = ynz ;
            for (p = Ap [k] ; p < Ap [k+1] ; p++)
            {
                iold = Ai [p] ;
                inew = P1inv [iold] ;
                if (inew < n1rows)
                {
                    R1p [inew]++ ;          // a singleton row; in R1
                }
                else
                {
                    ynz++ ;                 // not a singleton row; in A2
                }
            }
        }
        Yp [n-n1cols] = ynz ;

#ifndef NDEBUG
        PR (("n1cols: %ld\n", n1cols)) ;
        for (i = 0 ; i < n1rows ; i++)
        {
            PR (("R1p [%ld] is %ld\n", i, R1p [i])) ;
            ASSERT (R1p [i] > 0) ;
        }
#endif
    }

    // -------------------------------------------------------------------------
    // free workspace and return results
    // -------------------------------------------------------------------------

    cholmod_l_free (n, sizeof (Long), Qrows, cc) ;
    cholmod_l_free (m, sizeof (char), Mark, cc) ;

    *p_R1p    = R1p ;
    *p_P1inv  = P1inv ;
    *p_Y      = Y ;
    *p_n1cols = n1cols ;
    *p_n1rows = n1rows ;

    return (TRUE) ;
}