cholmod_sparse* Sparse :: operator*( void ) { if( A ) { cholmod_l_free_sparse( &A, common ); A = NULL; } int nzmax = data.size(); int sorted = true; int packed = true; A = cholmod_l_allocate_sparse( m, n, nzmax, sorted, packed, stype, xtype, common ); // build compressed matrix (note that EntryMap stores entries in column-major order) double* pr = (double*) A->x; double* pi = (double*) A->z; UF_long* ir = (UF_long*) A->i; UF_long* jc = (UF_long*) A->p; int i = 0; int j = -1; for( EntryMap::const_iterator e = data.begin(); e != data.end(); e++ ) { int c = e->first.first; if( c != j ) { for( int k = j+1; k <= c; k++ ) { jc[k] = i; } j = c; } ir[i] = e->first.second; pr[i] = e->second.first; if( xtype == CHOLMOD_ZOMPLEX ) { pi[i] = e->second.second; } i++; } for( int k = j+1; k <= n; k++ ) { jc[k] = i; } return A; }
template <typename Entry> int spqr_1colamd // TRUE if OK, FALSE otherwise ( // inputs, not modified int ordering, // all available, except 0:fixed and 3:given // treated as 1:natural double tol, // only accept singletons above tol Long bncols, // number of columns of B cholmod_sparse *A, // m-by-n sparse matrix // outputs, neither allocated nor defined on input Long **p_Q1fill, // size n+bncols, fill-reducing // or natural ordering Long **p_R1p, // size n1rows+1, R1p [k] = # of nonzeros in kth // row of R1. NULL if n1cols == 0. Long **p_P1inv, // size m, singleton row inverse permutation. // If row i of A is the kth singleton row, then // P1inv [i] = k. NULL if n1cols is zero. cholmod_sparse **p_Y, // on output, only the first n-n1cols+1 entries of // Y->p are defined (if Y is not NULL), where // Y = [A B] or Y = [A2 B2]. If B is empty and // there are no column singletons, Y is NULL Long *p_n1cols, // number of column singletons found Long *p_n1rows, // number of corresponding rows found // workspace and parameters cholmod_common *cc ) { Long *Q1fill, *Degree, *Qrows, *W, *Winv, *ATp, *ATj, *R1p, *P1inv, *Yp, *Ap, *Ai, *Work ; Entry *Ax ; Long p, d, j, i, k, n1cols, n1rows, row, col, pend, n2rows, n2cols = EMPTY, nz2, kk, p2, col2, ynz, fill_reducing_ordering, m, n, xtype, worksize ; cholmod_sparse *AT, *Y ; // ------------------------------------------------------------------------- // get inputs // ------------------------------------------------------------------------- xtype = spqr_type <Entry> ( ) ; m = A->nrow ; n = A->ncol ; Ap = (Long *) A->p ; Ai = (Long *) A->i ; Ax = (Entry *) A->x ; // set outputs to NULL in case of early return *p_Q1fill = NULL ; *p_R1p = NULL ; *p_P1inv = NULL ; *p_Y = NULL ; *p_n1cols = EMPTY ; *p_n1rows = EMPTY ; // ------------------------------------------------------------------------- // allocate result Q1fill (Y, R1p, P1inv allocated later) // ------------------------------------------------------------------------- Q1fill = (Long *) cholmod_l_malloc (n+bncols, sizeof (Long), cc) ; // ------------------------------------------------------------------------- // allocate workspace // ------------------------------------------------------------------------- fill_reducing_ordering = ! ((ordering == SPQR_ORDERING_FIXED) || (ordering == SPQR_ORDERING_GIVEN) || (ordering == SPQR_ORDERING_NATURAL)) ; worksize = ((fill_reducing_ordering) ? 3:2) * n ; Work = (Long *) cholmod_l_malloc (worksize, sizeof (Long), cc) ; Degree = Work ; // size n Qrows = Work + n ; // size n Winv = Qrows ; // Winv and Qrows not needed at the same time W = Qrows + n ; // size n if fill-reducing ordering, else size 0 if (cc->status < CHOLMOD_OK) { // out of memory; free everything and return cholmod_l_free (worksize, sizeof (Long), Work, cc) ; cholmod_l_free (n+bncols, sizeof (Long), Q1fill, cc) ; return (FALSE) ; } // ------------------------------------------------------------------------- // initialze queue with empty columns, and columns with just one entry // ------------------------------------------------------------------------- n1cols = 0 ; n1rows = 0 ; for (j = 0 ; j < n ; j++) { p = Ap [j] ; d = Ap [j+1] - p ; if (d == 0) { // j is a dead column singleton PR (("initial dead %ld\n", j)) ; Q1fill [n1cols] = j ; Qrows [n1cols] = EMPTY ; n1cols++ ; Degree [j] = EMPTY ; } else if (d == 1 && spqr_abs (Ax [p], cc) > tol) { // j is a column singleton, live or dead PR (("initial live %ld %ld\n", j, Ai [p])) ; Q1fill [n1cols] = j ; Qrows [n1cols] = Ai [p] ; // this might be a duplicate n1cols++ ; Degree [j] = EMPTY ; } else { // j has degree > 1, it is not (yet) a singleton Degree [j] = d ; } } // Degree [j] = EMPTY if j is in the singleton queue, or the Degree [j] > 1 // is the degree of column j otherwise // ------------------------------------------------------------------------- // create AT = spones (A') // ------------------------------------------------------------------------- AT = cholmod_l_transpose (A, 0, cc) ; // [ if (cc->status < CHOLMOD_OK) { // out of memory; free everything and return cholmod_l_free (worksize, sizeof (Long), Work, cc) ; cholmod_l_free (n+bncols, sizeof (Long), Q1fill, cc) ; return (FALSE) ; } ATp = (Long *) AT->p ; ATj = (Long *) AT->i ; // ------------------------------------------------------------------------- // remove column singletons via breadth-first-search // ------------------------------------------------------------------------- for (k = 0 ; k < n1cols ; k++) { // --------------------------------------------------------------------- // get a new singleton from the queue // --------------------------------------------------------------------- col = Q1fill [k] ; row = Qrows [k] ; PR (("\n---- singleton col %ld row %ld\n", col, row)) ; ASSERT (Degree [col] == EMPTY) ; if (row == EMPTY || ATp [row] < 0) { // ----------------------------------------------------------------- // col is a dead column singleton; remove duplicate row index // ----------------------------------------------------------------- Qrows [k] = EMPTY ; row = EMPTY ; PR (("dead: %ld\n", col)) ; } else { // ----------------------------------------------------------------- // col is a live col singleton; remove its row from matrix // ----------------------------------------------------------------- n1rows++ ; p = ATp [row] ; ATp [row] = FLIP (p) ; // flag the singleton row pend = UNFLIP (ATp [row+1]) ; PR (("live: %ld row %ld\n", col, row)) ; for ( ; p < pend ; p++) { // look for new column singletons after row is removed j = ATj [p] ; d = Degree [j] ; if (d == EMPTY) { // j is already in the singleton queue continue ; } ASSERT (d >= 1) ; ASSERT2 (spqrDebug_listcount (j, Q1fill, n1cols, 0) == 0) ; d-- ; Degree [j] = d ; if (d == 0) { // a new dead col singleton PR (("newly dead %ld\n", j)) ; Q1fill [n1cols] = j ; Qrows [n1cols] = EMPTY ; n1cols++ ; Degree [j] = EMPTY ; } else if (d == 1) { // a new live col singleton; find its single live row for (p2 = Ap [j] ; p2 < Ap [j+1] ; p2++) { i = Ai [p2] ; if (ATp [i] >= 0 && spqr_abs (Ax [p2], cc) > tol) { // i might appear in Qrows [k+1:n1cols-1] PR (("newly live %ld\n", j)) ; ASSERT2 (spqrDebug_listcount (i,Qrows,k+1,1) == 0) ; Q1fill [n1cols] = j ; Qrows [n1cols] = i ; n1cols++ ; Degree [j] = EMPTY ; break ; } } } } } // Q1fill [0:k] and Qrows [0:k] have no duplicates ASSERT2 (spqrDebug_listcount (col, Q1fill, n1cols, 0) == 1) ; ASSERT2 (IMPLIES (row >= 0, spqrDebug_listcount (row, Qrows, k+1, 1) == 1)) ; } // ------------------------------------------------------------------------- // Degree flags the column singletons, ATp flags their rows // ------------------------------------------------------------------------- #ifndef NDEBUG k = 0 ; for (j = 0 ; j < n ; j++) { PR (("j %ld Degree[j] %ld\n", j, Degree [j])) ; if (Degree [j] > 0) k++ ; // j is not a column singleton } PR (("k %ld n %ld n1cols %ld\n", k, n, n1cols)) ; ASSERT (k == n - n1cols) ; for (k = 0 ; k < n1cols ; k++) { col = Q1fill [k] ; ASSERT (Degree [col] <= 0) ; } k = 0 ; for (i = 0 ; i < m ; i++) { if (ATp [i] >= 0) k++ ; // i is not a row of a col singleton } ASSERT (k == m - n1rows) ; for (k = 0 ; k < n1cols ; k++) { row = Qrows [k] ; ASSERT (IMPLIES (row != EMPTY, ATp [row] < 0)) ; } #endif // ------------------------------------------------------------------------- // find the row ordering // ------------------------------------------------------------------------- if (n1cols == 0) { // --------------------------------------------------------------------- // no singletons in the matrix; no R1 matrix, no P1inv permutation // --------------------------------------------------------------------- ASSERT (n1rows == 0) ; R1p = NULL ; P1inv = NULL ; } else { // --------------------------------------------------------------------- // construct the row singleton permutation // --------------------------------------------------------------------- // allocate result arrays R1p and P1inv R1p = (Long *) cholmod_l_malloc (n1rows+1, sizeof (Long), cc) ; P1inv = (Long *) cholmod_l_malloc (m, sizeof (Long), cc) ; if (cc->status < CHOLMOD_OK) { // out of memory; free everything and return cholmod_l_free_sparse (&AT, cc) ; cholmod_l_free (worksize, sizeof (Long), Work, cc) ; cholmod_l_free (n+bncols, sizeof (Long), Q1fill, cc) ; cholmod_l_free (n1rows+1, sizeof (Long), R1p, cc) ; cholmod_l_free (m, sizeof (Long), P1inv, cc) ; return (FALSE) ; } #ifndef NDEBUG for (i = 0 ; i < m ; i++) P1inv [i] = EMPTY ; #endif kk = 0 ; for (k = 0 ; k < n1cols ; k++) { i = Qrows [k] ; PR (("singleton col %ld row %ld\n", Q1fill [k], i)) ; if (i != EMPTY) { // row i is the kk-th singleton row ASSERT (ATp [i] < 0) ; ASSERT (P1inv [i] == EMPTY) ; P1inv [i] = kk ; // also find # of entries in row kk of R1 R1p [kk] = UNFLIP (ATp [i+1]) - UNFLIP (ATp [i]) ; kk++ ; } } ASSERT (kk == n1rows) ; for (i = 0 ; i < m ; i++) { if (ATp [i] >= 0) { // row i is not a singleton row ASSERT (P1inv [i] == EMPTY) ; P1inv [i] = kk ; kk++ ; } } ASSERT (kk == m) ; } // Qrows is no longer needed. // ------------------------------------------------------------------------- // complete the column ordering // ------------------------------------------------------------------------- if (!fill_reducing_ordering) { // --------------------------------------------------------------------- // natural ordering // --------------------------------------------------------------------- if (n1cols == 0) { // no singletons, so natural ordering is 0:n-1 for now for (k = 0 ; k < n ; k++) { Q1fill [k] = k ; } } else { // singleton columns appear first, then non column singletons k = n1cols ; for (j = 0 ; j < n ; j++) { if (Degree [j] > 0) { // column j is not a column singleton Q1fill [k++] = j ; } } ASSERT (k == n) ; } } else { // --------------------------------------------------------------------- // fill-reducing ordering of pruned submatrix // --------------------------------------------------------------------- if (n1cols == 0) { // ----------------------------------------------------------------- // no singletons found; do fill-reducing on entire matrix // ----------------------------------------------------------------- n2cols = n ; n2rows = m ; } else { // ----------------------------------------------------------------- // create the pruned matrix for fill-reducing by removing singletons // ----------------------------------------------------------------- // find the mapping of original columns to pruned columns n2cols = 0 ; for (j = 0 ; j < n ; j++) { if (Degree [j] > 0) { // column j is not a column singleton W [j] = n2cols++ ; PR (("W [%ld] = %ld\n", j, W [j])) ; } else { // column j is a column singleton W [j] = EMPTY ; PR (("W [%ld] = %ld (j is col singleton)\n", j, W [j])) ; } } ASSERT (n2cols == n - n1cols) ; // W is now a mapping of the original columns to the columns in the // pruned matrix. W [col] == EMPTY if col is a column singleton. // Otherwise col2 = W [j] is a column of the pruned matrix. // ----------------------------------------------------------------- // delete row and column singletons from A' // ----------------------------------------------------------------- // compact A' by removing row and column singletons nz2 = 0 ; n2rows = 0 ; for (i = 0 ; i < m ; i++) { p = ATp [i] ; if (p >= 0) { // row i is not a row of a column singleton ATp [n2rows++] = nz2 ; pend = UNFLIP (ATp [i+1]) ; for (p = ATp [i] ; p < pend ; p++) { j = ATj [p] ; ASSERT (W [j] >= 0 && W [j] < n-n1cols) ; ATj [nz2++] = W [j] ; } } } ATp [n2rows] = nz2 ; ASSERT (n2rows == m - n1rows) ; } // --------------------------------------------------------------------- // fill-reducing ordering of the transpose of the pruned A' matrix // --------------------------------------------------------------------- PR (("n1cols %ld n1rows %ld n2cols %ld n2rows %ld\n", n1cols, n1rows, n2cols, n2rows)) ; ASSERT ((Long) AT->nrow == n) ; ASSERT ((Long) AT->ncol == m) ; AT->nrow = n2cols ; AT->ncol = n2rows ; // save the current CHOLMOD settings Long save [6] ; save [0] = cc->supernodal ; save [1] = cc->nmethods ; save [2] = cc->postorder ; save [3] = cc->method [0].ordering ; save [4] = cc->method [1].ordering ; save [5] = cc->method [2].ordering ; // follow the ordering with a postordering of the column etree cc->postorder = TRUE ; // 8:best: best of COLAMD(A), AMD(A'A), and METIS (if available) if (ordering == SPQR_ORDERING_BEST) { ordering = SPQR_ORDERING_CHOLMOD ; cc->nmethods = 2 ; cc->method [0].ordering = CHOLMOD_COLAMD ; cc->method [1].ordering = CHOLMOD_AMD ; #ifndef NPARTITION cc->nmethods = 3 ; cc->method [2].ordering = CHOLMOD_METIS ; #endif } // 9:bestamd: best of COLAMD(A) and AMD(A'A) if (ordering == SPQR_ORDERING_BESTAMD) { // if METIS is not installed, this option is the same as 8:best ordering = SPQR_ORDERING_CHOLMOD ; cc->nmethods = 2 ; cc->method [0].ordering = CHOLMOD_COLAMD ; cc->method [1].ordering = CHOLMOD_AMD ; } #ifdef NPARTITION if (ordering == SPQR_ORDERING_METIS) { // METIS not installed; use default ordering ordering = SPQR_ORDERING_DEFAULT ; } #endif if (ordering == SPQR_ORDERING_DEFAULT) { // Version 1.2.0: just use COLAMD ordering = SPQR_ORDERING_COLAMD ; #if 0 // Version 1.1.2 and earlier: if (n2rows <= 2*n2cols) { // just use COLAMD; do not try AMD or METIS ordering = SPQR_ORDERING_COLAMD ; } else { #ifndef NPARTITION // use CHOLMOD's default ordering: try AMD and then METIS // if AMD gives high fill-in, and take the best ordering found ordering = SPQR_ORDERING_CHOLMOD ; cc->nmethods = 0 ; #else // METIS is not installed, so just use AMD ordering = SPQR_ORDERING_AMD ; #endif } #endif } if (ordering == SPQR_ORDERING_AMD) { // use CHOLMOD's interface to AMD to order A'*A cholmod_l_amd (AT, NULL, 0, (Long *) (Q1fill + n1cols), cc) ; } #ifndef NPARTITION else if (ordering == SPQR_ORDERING_METIS) { // use CHOLMOD's interface to METIS to order A'*A (if installed) cholmod_l_metis (AT, NULL, 0, TRUE, (Long *) (Q1fill + n1cols), cc) ; } #endif else if (ordering == SPQR_ORDERING_CHOLMOD) { // use CHOLMOD's internal ordering (defined by cc) to order AT PR (("Using CHOLMOD, nmethods %d\n", cc->nmethods)) ; cc->supernodal = CHOLMOD_SIMPLICIAL ; cc->postorder = TRUE ; cholmod_factor *Sc ; Sc = cholmod_l_analyze_p2 (FALSE, AT, NULL, NULL, 0, cc) ; if (Sc != NULL) { // copy perm from Sc->Perm [0:n2cols-1] to Q1fill (n1cols:n) Long *Sc_perm = (Long *) Sc->Perm ; for (k = 0 ; k < n2cols ; k++) { Q1fill [k + n1cols] = Sc_perm [k] ; } // CHOLMOD selected an ordering; determine the ordering used switch (Sc->ordering) { case CHOLMOD_AMD: ordering = SPQR_ORDERING_AMD ;break; case CHOLMOD_COLAMD: ordering = SPQR_ORDERING_COLAMD ;break; case CHOLMOD_METIS: ordering = SPQR_ORDERING_METIS ;break; } } cholmod_l_free_factor (&Sc, cc) ; PR (("CHOLMOD used method %d : ordering: %d\n", cc->selected, cc->method [cc->selected].ordering)) ; } else // SPQR_ORDERING_DEFAULT or SPQR_ORDERING_COLAMD { // use CHOLMOD's interface to COLAMD to order AT ordering = SPQR_ORDERING_COLAMD ; cholmod_l_colamd (AT, NULL, 0, TRUE, (Long *) (Q1fill + n1cols), cc) ; } cc->SPQR_istat [7] = ordering ; // restore the CHOLMOD settings cc->supernodal = save [0] ; cc->nmethods = save [1] ; cc->postorder = save [2] ; cc->method [0].ordering = save [3] ; cc->method [1].ordering = save [4] ; cc->method [2].ordering = save [5] ; AT->nrow = n ; AT->ncol = m ; } // ------------------------------------------------------------------------- // free AT // ------------------------------------------------------------------------- cholmod_l_free_sparse (&AT, cc) ; // ] // ------------------------------------------------------------------------- // check if the method succeeded // ------------------------------------------------------------------------- if (cc->status < CHOLMOD_OK) { // out of memory; free everything and return cholmod_l_free (worksize, sizeof (Long), Work, cc) ; cholmod_l_free (n+bncols, sizeof (Long), Q1fill, cc) ; cholmod_l_free (n1rows+1, sizeof (Long), R1p, cc) ; cholmod_l_free (m, sizeof (Long), P1inv, cc) ; return (FALSE) ; } // ------------------------------------------------------------------------- // map the fill-reducing ordering ordering back to A // ------------------------------------------------------------------------- if (n1cols > 0 && fill_reducing_ordering) { // Winv is workspace of size n2cols <= n #ifndef NDEBUG for (j = 0 ; j < n2cols ; j++) Winv [j] = EMPTY ; #endif for (j = 0 ; j < n ; j++) { // j is a column of A. col2 = W [j] is either EMPTY, or it is // the corresponding column of the pruned matrix col2 = W [j] ; if (col2 != EMPTY) { ASSERT (col2 >= 0 && col2 < n2cols) ; Winv [col2] = j ; } } for (k = n1cols ; k < n ; k++) { // col2 is a column of the pruned matrix col2 = Q1fill [k] ; // j is the corresonding column of the A matrix j = Winv [col2] ; ASSERT (j >= 0 && j < n) ; Q1fill [k] = j ; } } // ------------------------------------------------------------------------- // identity permutation of the columns of B // ------------------------------------------------------------------------- for (k = n ; k < n+bncols ; k++) { // tack on the identity permutation for columns of B Q1fill [k] = k ; } // ------------------------------------------------------------------------- // find column pointers for Y = [A2 B2]; columns of A2 // ------------------------------------------------------------------------- if (n1cols == 0 && bncols == 0) { // A will be factorized instead of Y Y = NULL ; } else { // Y has no entries yet; nnz(Y) will be determined later Y = cholmod_l_allocate_sparse (m-n1rows, n-n1cols+bncols, 0, FALSE, TRUE, 0, xtype, cc) ; if (cc->status < CHOLMOD_OK) { // out of memory; free everything and return cholmod_l_free (worksize, sizeof (Long), Work, cc) ; cholmod_l_free (n+bncols, sizeof (Long), Q1fill, cc) ; cholmod_l_free (n1rows+1, sizeof (Long), R1p, cc) ; cholmod_l_free (m, sizeof (Long), P1inv, cc) ; return (FALSE) ; } Yp = (Long *) Y->p ; ynz = 0 ; PR (("1c wrapup: n1cols %ld n %ld\n", n1cols, n)) ; for (k = n1cols ; k < n ; k++) { j = Q1fill [k] ; d = Degree [j] ; ASSERT (d >= 1 && d <= m) ; Yp [k-n1cols] = ynz ; ynz += d ; } Yp [n-n1cols] = ynz ; } // ------------------------------------------------------------------------- // free workspace and return results // ------------------------------------------------------------------------- cholmod_l_free (worksize, sizeof (Long), Work, cc) ; *p_Q1fill = Q1fill ; *p_R1p = R1p ; *p_P1inv = P1inv ; *p_Y = Y ; *p_n1cols = n1cols ; *p_n1rows = n1rows ; return (TRUE) ; }
void mexFunction ( int nargout, mxArray *pargout [ ], int nargin, const mxArray *pargin [ ] ) { double dummy = 0 ; double *Lx, *px ; Int *Parent, *Post, *ColCount, *First, *Level, *Rp, *Ri, *Lp, *Li, *W ; cholmod_sparse *A, Amatrix, *F, *Aup, *Alo, *R, *A1, *A2, *L, *S ; cholmod_common Common, *cm ; Int n, i, coletree, j, lnz, p, k, height, c ; char buf [LEN] ; /* ---------------------------------------------------------------------- */ /* start CHOLMOD and set defaults */ /* ---------------------------------------------------------------------- */ cm = &Common ; cholmod_l_start (cm) ; sputil_config (SPUMONI, cm) ; /* ---------------------------------------------------------------------- */ /* get inputs */ /* ---------------------------------------------------------------------- */ if (nargout > 5 || nargin < 1 || nargin > 3) { mexErrMsgTxt ( "Usage: [count h parent post R] = symbfact2 (A, mode, Lmode)") ; } /* ---------------------------------------------------------------------- */ /* get input matrix A */ /* ---------------------------------------------------------------------- */ A = sputil_get_sparse_pattern (pargin [0], &Amatrix, &dummy, cm) ; S = (A == &Amatrix) ? NULL : A ; /* ---------------------------------------------------------------------- */ /* get A->stype, default is to use triu(A) */ /* ---------------------------------------------------------------------- */ A->stype = 1 ; n = A->nrow ; coletree = FALSE ; if (nargin > 1) { buf [0] = '\0' ; if (mxIsChar (pargin [1])) { mxGetString (pargin [1], buf, LEN) ; } c = buf [0] ; if (tolower (c) == 'r') { /* unsymmetric case (A*A') if string starts with 'r' */ A->stype = 0 ; } else if (tolower (c) == 'c') { /* unsymmetric case (A'*A) if string starts with 'c' */ n = A->ncol ; coletree = TRUE ; A->stype = 0 ; } else if (tolower (c) == 's') { /* symmetric upper case (A) if string starts with 's' */ A->stype = 1 ; } else if (tolower (c) == 'l') { /* symmetric lower case (A) if string starts with 'l' */ A->stype = -1 ; } else { mexErrMsgTxt ("symbfact2: unrecognized mode") ; } } if (A->stype && A->nrow != A->ncol) { mexErrMsgTxt ("symbfact2: A must be square") ; } /* ---------------------------------------------------------------------- */ /* compute the etree, its postorder, and the row/column counts */ /* ---------------------------------------------------------------------- */ Parent = cholmod_l_malloc (n, sizeof (Int), cm) ; Post = cholmod_l_malloc (n, sizeof (Int), cm) ; ColCount = cholmod_l_malloc (n, sizeof (Int), cm) ; First = cholmod_l_malloc (n, sizeof (Int), cm) ; Level = cholmod_l_malloc (n, sizeof (Int), cm) ; /* F = A' */ F = cholmod_l_transpose (A, 0, cm) ; if (A->stype == 1 || coletree) { /* symmetric upper case: find etree of A, using triu(A) */ /* column case: find column etree of A, which is etree of A'*A */ Aup = A ; Alo = F ; } else { /* symmetric lower case: find etree of A, using tril(A) */ /* row case: find row etree of A, which is etree of A*A' */ Aup = F ; Alo = A ; } cholmod_l_etree (Aup, Parent, cm) ; if (cm->status < CHOLMOD_OK) { /* out of memory or matrix invalid */ mexErrMsgTxt ("symbfact2 failed: matrix corrupted!") ; } if (cholmod_l_postorder (Parent, n, NULL, Post, cm) != n) { /* out of memory or Parent invalid */ mexErrMsgTxt ("symbfact2 postorder failed!") ; } /* symmetric upper case: analyze tril(F), which is triu(A) */ /* column case: analyze F*F', which is A'*A */ /* symmetric lower case: analyze tril(A) */ /* row case: analyze A*A' */ cholmod_l_rowcolcounts (Alo, NULL, 0, Parent, Post, NULL, ColCount, First, Level, cm) ; if (cm->status < CHOLMOD_OK) { /* out of memory or matrix invalid */ mexErrMsgTxt ("symbfact2 failed: matrix corrupted!") ; } /* ---------------------------------------------------------------------- */ /* return results to MATLAB: count, h, parent, and post */ /* ---------------------------------------------------------------------- */ pargout [0] = sputil_put_int (ColCount, n, 0) ; if (nargout > 1) { /* compute the elimination tree height */ height = 0 ; for (i = 0 ; i < n ; i++) { height = MAX (height, Level [i]) ; } height++ ; pargout [1] = mxCreateDoubleMatrix (1, 1, mxREAL) ; px = mxGetPr (pargout [1]) ; px [0] = height ; } if (nargout > 2) { pargout [2] = sputil_put_int (Parent, n, 1) ; } if (nargout > 3) { pargout [3] = sputil_put_int (Post, n, 1) ; } /* ---------------------------------------------------------------------- */ /* construct L, if requested */ /* ---------------------------------------------------------------------- */ if (nargout > 4) { if (A->stype == 1) { /* symmetric upper case: use triu(A) only, A2 not needed */ A1 = A ; A2 = NULL ; } else if (A->stype == -1) { /* symmetric lower case: use tril(A) only, A2 not needed */ A1 = F ; A2 = NULL ; } else if (coletree) { /* column case: analyze F*F' */ A1 = F ; A2 = A ; } else { /* row case: analyze A*A' */ A1 = A ; A2 = F ; } /* count the total number of entries in L */ lnz = 0 ; for (j = 0 ; j < n ; j++) { lnz += ColCount [j] ; } /* allocate the output matrix L (pattern-only) */ L = cholmod_l_allocate_sparse (n, n, lnz, TRUE, TRUE, 0, CHOLMOD_PATTERN, cm) ; Lp = L->p ; Li = L->i ; /* initialize column pointers */ lnz = 0 ; for (j = 0 ; j < n ; j++) { Lp [j] = lnz ; lnz += ColCount [j] ; } Lp [j] = lnz ; /* create a copy of the column pointers */ W = First ; for (j = 0 ; j < n ; j++) { W [j] = Lp [j] ; } /* get workspace for computing one row of L */ R = cholmod_l_allocate_sparse (n, 1, n, FALSE, TRUE, 0, CHOLMOD_PATTERN, cm) ; Rp = R->p ; Ri = R->i ; /* compute L one row at a time */ for (k = 0 ; k < n ; k++) { /* get the kth row of L and store in the columns of L */ cholmod_l_row_subtree (A1, A2, k, Parent, R, cm) ; for (p = 0 ; p < Rp [1] ; p++) { Li [W [Ri [p]]++] = k ; } /* add the diagonal entry */ Li [W [k]++] = k ; } /* free workspace */ cholmod_l_free_sparse (&R, cm) ; /* transpose L to get R, or leave as is */ if (nargin < 3) { /* R = L' */ R = cholmod_l_transpose (L, 0, cm) ; cholmod_l_free_sparse (&L, cm) ; L = R ; } /* fill numerical values of L with one's (only MATLAB needs this...) */ L->x = cholmod_l_malloc (lnz, sizeof (double), cm) ; Lx = L->x ; for (p = 0 ; p < lnz ; p++) { Lx [p] = 1 ; } L->xtype = CHOLMOD_REAL ; /* return L (or R) to MATLAB */ pargout [4] = sputil_put_sparse (&L, cm) ; } /* ---------------------------------------------------------------------- */ /* free workspace */ /* ---------------------------------------------------------------------- */ cholmod_l_free (n, sizeof (Int), Parent, cm) ; cholmod_l_free (n, sizeof (Int), Post, cm) ; cholmod_l_free (n, sizeof (Int), ColCount, cm) ; cholmod_l_free (n, sizeof (Int), First, cm) ; cholmod_l_free (n, sizeof (Int), Level, cm) ; cholmod_l_free_sparse (&F, cm) ; cholmod_l_free_sparse (&S, cm) ; cholmod_l_finish (cm) ; cholmod_l_print_common (" ", cm) ; /* if (cm->malloc_count != ((nargout == 5) ? 3:0)) mexErrMsgTxt ("!") ; */ }
int main() { /* Time Recording Variables */ int now_s, now_u; struct rusage usage; double time_now, time_past; /* x, y, z and global indices */ int i, j, k, m, n, nodes; /* Connectivity enties */ int m_above, m_below, m_left, m_right, m_front, m_back; double c_above, c_below, c_left, c_right, c_front, c_back, c_self; /* Creat cholmod object */ cholmod_common Common, *cc; /* Compress column form sparse matrix */ cholmod_sparse *S, *ST; /* forcing function on input; voltages after solving */ cholmod_dense *b, *v0, *r; size_t *Si, *Sp, *Snz, *bi, *bj; double *Sx, *bx, *v0x; /* store the sparse matrix */ FILE *store; char filename[100]; /* start CHOLMOD */ cc = &Common; cholmod_l_start(cc); /* initialize timing variables*/ getrusage(0, &usage); now_s = usage.ru_utime.tv_sec; now_u = usage.ru_utime.tv_usec; time_past = now_s + now_u / 1.e6; time_now = time_past; /* total nodes in the grid */ nodes = IMAX * JMAX * KMAX; /* Allocate space for connectivity matrix and forcing vector. */ S = cholmod_l_allocate_sparse(nodes, nodes, 7 * nodes, 0, 0, 0, CHOLMOD_REAL, cc); b = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc); bx = b->x; v0 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc); v0x = v0->x; /*================================================================*/ /*=============== make connectivity matrix ======================*/ /*================================================================*/ Si = (size_t *) S->i; Sp = (size_t *) S->p; Sx = (double *) S->x; Snz = (size_t *) S->nz; n = 0; Sp[0] = 0; for (k = 0; k < KMAX; k++) { for (j = 0; j < JMAX; j++) { for (i = 0; i < IMAX; i++) { /* Global index in x-fastest order*/ m = k * IMAX * JMAX + j * IMAX + i; Sp[m + 1] = Sp[m]; v0x[m] = (k + 1.0) / (KMAX + 1.0); /* Six coef.s of every node */ c_below = (mepr(i - 0.5, j - 0.5, k - 0.5) + mepr(i + 0.5, j - 0.5, k - 0.5) + mepr(i - 0.5, j + 0.5, k - 0.5) + mepr(i + 0.5, j + 0.5, k - 0.5)) / 4.0; c_front = (mepr(i - 0.5, j - 0.5, k - 0.5) + mepr(i + 0.5, j - 0.5, k - 0.5) + mepr(i - 0.5, j - 0.5, k + 0.5) + mepr(i + 0.5, j - 0.5, k + 0.5)) / 4.0; c_left = (mepr(i - 0.5, j + 0.5, k - 0.5) + mepr(i - 0.5, j - 0.5, k - 0.5) + mepr(i - 0.5, j + 0.5, k + 0.5) + mepr(i - 0.5, j - 0.5, k + 0.5)) / 4.0; c_right = (mepr(i + 0.5, j + 0.5, k - 0.5) + mepr(i + 0.5, j - 0.5, k - 0.5) + mepr(i + 0.5, j + 0.5, k + 0.5) + mepr(i + 0.5, j - 0.5, k + 0.5)) / 4.0; c_back = (mepr(i - 0.5, j + 0.5, k - 0.5) + mepr(i + 0.5, j + 0.5, k - 0.5) + mepr(i - 0.5, j + 0.5, k + 0.5) + mepr(i + 0.5, j + 0.5, k + 0.5)) / 4.0; c_above = (mepr(i - 0.5, j - 0.5, k + 0.5) + mepr(i + 0.5, j - 0.5, k + 0.5) + mepr(i - 0.5, j + 0.5, k + 0.5) + mepr(i + 0.5, j + 0.5, k + 0.5))/4.0; /* Self term. */ c_self = -(c_above + c_below + c_left + c_right + c_front + c_back); Si[n] = m; Sx[n] = c_self; n++; Sp[m + 1]++; /* Node below. Ensure not on bottom face */ if (k != 0) { m_below = m - IMAX * JMAX; Si[n] = m_below; Sx[n] = c_below; n++; Sp[m + 1]++; } else { bx[m] = -c_below * VBOT; } /* Node front. Ensure not on front face. */ if (j != 0) { m_front = m - IMAX; } else { m_front = m + (JMAX - 1) * IMAX; } Si[n] = m_front; Sx[n] = c_front; n++; Sp[m + 1]++; /* Node to left. Ensure not on left face. */ if (i != 0) { m_left = m - 1; } else { m_left = m + IMAX -1; } Si[n] = m_left; Sx[n] = c_left; n++; Sp[m + 1]++; /* Node to right. Ensure not on right face. */ if (i != IMAX - 1) { m_right = m + 1; } else{ m_right = m - IMAX + 1; } Si[n] = m_right; Sx[n] = c_right; n++; Sp[m + 1]++; /* Node back. Ensure not on back face. */ if (j != JMAX - 1) { m_back = m + IMAX; } else { m_back = m - (JMAX - 1) * IMAX; } Si[n] = m_back; Sx[n] = c_back; n++; Sp[m + 1]++; /* Node top. Ensure not on top face */ if (k != KMAX - 1) { m_above = m + IMAX * JMAX; Si[n] = m_above; Sx[n] = c_above; n++; Sp[m + 1]++; } else { bx[m] = -c_above * VTOP; } Snz[m] = Sp[m + 1] - Sp[m]; } } } /*====================================================================*/ /*===================Done creating connectivity matrix.===============*/ /*====================================================================*/ /* report time*/ getrusage(0, &usage); now_s = usage.ru_utime.tv_sec; now_u = usage.ru_utime.tv_usec; time_now = now_s + now_u / 1.e6; printf("\nFinished creating connectivity matrix\n" " Incremental time %f\n" " Running time %f\n", time_now - time_past, time_now); time_past = time_now; /* Print all three matrixes */ cholmod_l_print_sparse(S, "S", cc); cholmod_l_print_dense(b, "b", cc); cholmod_l_print_dense(v0, "v0", cc); /* Allocate residual vector */ r = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc); /* The Preconditioned Conjugate Gradient Method */ /* Calculate the first residual value */ double one[2], zero[2], minusone[2]; zero[0] = 0.0; zero[1] = 0.0; one[0] = 1.0; one[1] = 0.0; minusone[0] = -1.0; minusone[1] = 0.0; cholmod_l_copy_dense2(b, r, cc); cholmod_l_sdmult(S, 0, minusone, one, v0, r, cc); printf("Initial 2-norm = %f\n", cholmod_l_norm_dense(r, 2, cc)); printf("Initial 1-norm = %f\n", cholmod_l_norm_dense(r, 1, cc)); printf("Initial 0-norm = %f\n", cholmod_l_norm_dense(r, 0, cc)); /* The iteration */ double rho1, rho0, beta = 0.0, alpha = 0.0; cholmod_dense *p1, *p0, *q; double *p1x, *p0x, *qx, *rx; /* p1 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc); p0 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc); q = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc); */ p1 = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc); p0 = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc); q = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc); p1x = (double *) p1->x; p0x = (double *) p0->x; qx = (double *) q->x; rx = (double *) r->x; int iter; for (iter = 0; iter < MAX_ITER; iter++) { if (cholmod_l_norm_dense(r, 0, cc) < 1e-10) break; rho1 = cholmod_l_norm_dense(r, 2, cc); rho1 = rho1 * rho1; if (iter == 0) { cholmod_l_copy_dense2(r, p1, cc); } else { beta = rho1 / rho0; for (i = 0; i < nodes; i++) { p1x[i] = rx[i] + beta * p0x[i]; } } cholmod_l_sdmult(S, 0, one, zero, p1, q, cc); alpha = 0; for (i = 0; i < nodes; i++) { alpha += p1x[i] * qx[i]; } alpha = rho1 / alpha; for (i = 0; i < nodes; i++) { v0x[i] += alpha * p1x[i]; rx[i] -= alpha * qx[i]; } /* printf("iter = %d\n: rho1 = %f, rho0 = %f, alpha = %f, beta = %f\n", iter, rho1, rho0, alpha, beta); */ cholmod_l_copy_dense2(p1, p0, cc); rho0 = rho1; } cholmod_l_copy_dense2(b, r, cc); cholmod_l_sdmult(S, 0, minusone, one, v0, r, cc); printf("After %d iterations:\n", iter); printf("Final 2-norm: %f\n", cholmod_l_norm_dense(r, 2, cc)); printf("Final 1-norm: %f\n", cholmod_l_norm_dense(r, 1, cc)); printf("Final 0-norm: %f\n", cholmod_l_norm_dense(r, 0, cc)); /* sort sparse matrix and store it cholmod_l_sort(S, cc); S->stype = 0; */ /* Check if S is symmetric ST = cholmod_l_transpose(S, 2, cc); store = fopen("LpT.dat", "w"); cholmod_l_write_sparse(store, ST, NULL, NULL, cc); fclose(store); cholmod_l_free_sparse(&ST, cc); */ /* store the sparse matrix */ /* store = fopen("Laplace.dat", "w"); cholmod_l_write_sparse(store, S, NULL, NULL, cc); fclose(store); cholmod_l_free_sparse(&S, cc); */ /* store the force vector */ /* store = fopen("Force.dat", "w"); cholmod_l_write_dense(store, b, NULL, cc); fclose(store); cholmod_l_free_dense(&b,cc); */ /* store the guess vector */ store = fopen("Voltage.dat", "w"); cholmod_l_write_dense(store, v0, NULL, cc); fclose(store); cholmod_l_free_dense(&v0, cc); /* store the residual vector */ store = fopen("Residual.dat", "w"); cholmod_l_write_dense(store, r, NULL, cc); fclose(store); cholmod_l_free_dense(&r, cc); /* report time*/ getrusage(0, &usage); now_s = usage.ru_utime.tv_sec; now_u = usage.ru_utime.tv_usec; time_now = now_s + now_u / 1.e6; printf("\nFinished writing matrix\n" " Incremental time %f\n" " Running time %f\n", time_now - time_past, time_now); cholmod_l_finish(cc); return 0; }
int main(int argc, char* argv[]) { const int bufsize = 512; char buffer[bufsize]; int m,n,S; double time_st,time_end,time_avg; //omp_set_num_threads(2); // printf("\n-----------------\nnumber of threads fired = %d\n-----------------\n",(int)omp_get_num_threads()); if(argc!=2) { cout<<"Insufficient arguments"<<endl; return 1; } graph G; cerr<<"Start reading "; // time_st=dsecnd(); G.create_graph(argv[1]); // time_end=dsecnd(); // time_avg = (time_end-time_st); // cout<<"Success "<<endl; // cerr<<"Reading time "<<time_avg<<endl; cerr<<"Constructing Matrices "; // time_st=dsecnd(); G.construct_MNA(); G.construct_NA(); // time_end=dsecnd(); // time_avg = (time_end-time_st); // cerr<<"Done "<<time_avg<<endl; // G.construct_sparse_MNA(); m=G.node_array.size()-1; n=G.voltage_edge_id.size(); cout<<endl; cout<<"MATRIX STAT:"<<endl; cout<<"Nonzero elements: "<<G.nonzero<<endl; cout<<"Number of Rows: "<<m+n<<endl; cout<<"Nonzero in G: "<<G.Gnonzero<<endl; cout<<"Number of rows in G: "<<m<<endl; cout<<"Nonzero in P: "<<G.Pnonzero<<endl; cout<<"Number of rows in P: "<<m<<endl; // printf("\n Nonzero = %d", G.nonzero); // printf("\n Rows = %d", m+n); cout<<"MAT val: "<<endl; int i,j; G.Mat_val[0] += 100; G.Gmat[0] +=100; /* for(i=0;i<G.Gnonzero;i++) cout<<" "<<G.Gmat[i]; cout<<endl; for(i=0;i<G.Gnonzero;i++) cout<<" "<<G.Gcolumns[i]; cout<<endl; for(i=0;i<m+1;i++) cout<<" "<<G.GrowIndex[i]; cout<<endl; for(i=0;i<m;i++) printf(" %.8f", G.b1[i]); cout<<endl; for(i=0;i<m;i++) printf(" %.8f", G.x1[i]); cout<<endl; */ SuiteSparse_long *Gnz = (SuiteSparse_long*)calloc(m,sizeof(SuiteSparse_long)); for(i=0;i<m;i++) { // cout<<endl; SuiteSparse_long startindex=G.GrowIndex[i]; SuiteSparse_long endindex=G.GrowIndex[i+1]; Gnz[i] = endindex - startindex; // for(j=startindex;j<endindex;j++) // cout<<" "<<G.Gmat[j]; // cout<<endl; } /* for(i=0;i<G.Pnonzero;i++) cout<<" "<<G.Pmat[i]; cout<<endl; for(i=0;i<G.Pnonzero;i++) cout<<" "<<G.Pcolumns[i]; cout<<endl; for(i=0;i<m+1;i++) cout<<" "<<G.ProwIndex[i]; cout<<endl; /* for(i=0;i<m;i++) printf(" %.8f", G.b1[i]); cout<<endl; for(i=0;i<m;i++) printf(" %.8f", G.x1[i]); cout<<endl; for(i=0;i<m;i++) { cout<<endl; int startindex=G.ProwIndex[i]; int endindex=G.ProwIndex[i+1]; for(j=startindex;j<endindex;j++) cout<<" "<<G.Pmat[j]; cout<<endl; } /* for(i=0;i<G.nonzero;i++) cout<<" "<<G.Mat_val[i]; cout<<endl; for(i=0;i<G.nonzero;i++) cout<<" "<<G.columns[i]; cout<<endl; for(i=0;i<m+n+1;i++) cout<<" "<<G.rowIndex[i]; cout<<endl; for(i=0;i<m+n;i++) printf(" %.8f", G.b[i]); cout<<endl; for(i=0;i<m+n;i++) printf(" %.8f", G.x[i]); cout<<endl; for(i=0;i<m+n;i++) { cout<<endl; int startindex=G.rowIndex[i]; int endindex=G.rowIndex[i+1]; for(j=startindex;j<endindex;j++) cout<<" "<<G.Mat_val[j]; cout<<endl; } */ /* for (i=0;i<m+n+1;i++) { //cout<<endl; if(G.rowIndex[i]==G.rowIndex[i+1]) break; for(j=G.rowIndex[i];j<G.rowIndex[i+1];j++) { if(G.Mat_val[j]>10) cout<<G.Mat_val[j]<<"\t"; } //cout<<endl; /*for(j=G.rowIndex[i];j<G.rowIndex[i+1];j++) { cout<<G.columns[j]<<"\t"; } //cout<<endl; } cout<<endl; */ //printing the matrix printf("\n Fine till here"); printf("\n"); // int* rowmIndex=(int*)calloc(m+1,sizeof(int)); printf("\n Fine till here"); printf("\n"); //int rowmIndex[5]={1,2,3,4,5}; /* for(i=0;i<m+1;i++) { rowmIndex[i]=G.rowIndex[i]; printf(" %d", rowmIndex[i]); } */ printf("\n Allocating GPU memory\n"); cudaDeviceReset(); size_t free, total; cudaMemGetInfo(&free, &total); printf("\n Free Mem = %lf MB, Total mem = %lf MB\n", (double)(free)/(1024*1024), (double)(total)/(1024*1024)); double *dev_csrValA, *dev_b, *dev_x; int *dev_csrRowIdxA, *dev_csrColA; double *dev_GcsrVal, *dev_b1, *dev_x1; double *dev_PcsrVal, *dev_b2, *dev_x2; int *dev_GcsrRowIdx, *dev_PcsrRowIdx, *dev_GcsrCol, *dev_PcsrCol; cudaMalloc((void**)&dev_PcsrVal, G.Pnonzero*sizeof(double)); cudaMalloc((void**)&dev_PcsrRowIdx, (m+1)*sizeof(int)); cudaMalloc((void**)&dev_PcsrCol, G.Pnonzero*sizeof(int)); cudaMalloc((void**)&dev_b1, (m)*sizeof(double)); cudaMalloc((void**)&dev_b2, n*sizeof(double)); cudaMalloc((void**)&dev_x1, m*sizeof(double)); cudaMalloc((void**)&dev_x2, n*sizeof(double)); cudaMemcpy(dev_b1, G.b1, (m)*sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(dev_x1, G.x1, (m)*sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(dev_PcsrVal, G.Pmat, G.Pnonzero*sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(dev_b2, G.b2, (n)*sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(dev_x2, G.x2, (n)*sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(dev_PcsrRowIdx, G.ProwIndex, (m+1)*sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(dev_PcsrCol, G.Pcolumns, (G.Pnonzero)*sizeof(int), cudaMemcpyHostToDevice); /* Matrix has been created and stored in CSR format. However, CHOLMOD requires CSC format. Since our matrix is symmetric positive definite, we can simply swap csrColA with csrRowIdx and vice versa */ /* Starting the CHOLMOD routine now*/ printf("\n Initiating CHOLMOD\n"); cholmod_sparse *A, *P; cholmod_dense *x, *b, *r, *midvec; cholmod_factor *L; cholmod_common *Common, cm; Common = &cm; cholmod_l_start(Common); // &Common->useGPU=1; printf("\n m = %d, G.Gnonzero = %d\n", m, G.Gnonzero); cholmod_sparse *C = cholmod_l_allocate_sparse((size_t)(m), (size_t)(m), (size_t)(G.Gnonzero), 1, 0, 1, 1, Common); // P = cholmod_l_allocate_sparse((size_t)(m), (size_t)(n), (size_t)(G.Pnonzero), 1, 0, 0, 1, Common); // printf("\n Allocated \n"); C->itype = CHOLMOD_LONG; // printf("\n Itype \n"); C->p = &G.GrowIndex[0]; // printf("\n Columns \n"); C->nz = &Gnz[0]; // printf("\n Rows \n"); C->i = &G.Gcolumns[0]; C->dtype = 0; C->x = &G.Gmat[0]; /* P->itype = CHOLMOD_LONG; P->p = &G.ProwIndex[0]; P->nz = &Pnz[0]; P->i = &G.Pcolumns[0]; P->dtype = 0; P->x = &G.Pmat[0]; */ b = cholmod_l_allocate_dense((size_t)(m), 1, (size_t)(m), 1, Common); b->dtype=0; b->x = &G.b1[0]; b->xtype = 1; printf("\n CHOLMOD manually set\n"); cholmod_l_print_sparse(C, "A", Common); cholmod_l_print_dense(b, "b", Common); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); L = cholmod_l_analyze(C, Common); printf("\n Analysis: Flops: %g \t lnz: %g\n", Common->fl, Common->lnz); cholmod_l_factorize(C, L, Common); x = cholmod_l_solve(CHOLMOD_A, L, b, Common); cudaEventRecord(stop, 0); cudaEventSynchronize(stop); float elapsedTime; cudaEventElapsedTime(&elapsedTime, start, stop); printf("\n Time : %.6f secs :\n", elapsedTime); cholmod_l_print_dense(x, "X", Common); double *x1_mod = (double*)x->x; cudaMemcpy(dev_x1, x1_mod, m*sizeof(double), cudaMemcpyHostToDevice); cusparseStatus_t cuSparseStatus; cusparseHandle_t cuspHandle; cuSparseStatus = cusparseCreate(&cuspHandle); cusparseMatDescr_t descrP; cusparseCreateMatDescr(&descrP); cusparseSetMatType(descrP, CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descrP, CUSPARSE_INDEX_BASE_ZERO); double *dev_res1, *dev_simple; double *res1 = (double*)calloc(n,sizeof(double)); cudaMalloc((void**)&dev_res1, n*sizeof(double)); cudaMalloc((void**)&dev_simple, n*sizeof(double)); const double alpha = 1.0, beta=0.0; //alpha = 1.0; //beta = 0.0; //solving P^T * G^-1 * b1 Result stored in dev_res1 cuSparseStatus = cusparseDcsrmv(cuspHandle, CUSPARSE_OPERATION_TRANSPOSE, m, n, G.Pnonzero, &alpha, descrP, dev_PcsrVal, dev_PcsrRowIdx, dev_PcsrCol, dev_x1, &beta, dev_res1); if(cuSparseStatus == CUSPARSE_STATUS_SUCCESS) { /* cudaMemcpy(res1, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost); for(i=0;i<n;i++) { printf("\nres1[%d] = %.8f", i, res1[i]); } printf("\n P^T * G^-1 * b1 done! Vector stored in res1"); */ } else { printf("\n P^T * G^-1 * b1 failed\n"); exit(1); } const double alphaneg = -1.0; //Solving P^T * G^-1 * b1 - b2 ; Result stored in dev_res1 cublasStatus_t cuBlasStatus; cublasHandle_t cubHandle; cuBlasStatus = cublasCreate(&cubHandle); cuBlasStatus = cublasDaxpy(cubHandle, n, &alphaneg, dev_b2, 1, dev_res1, 1); if(cuBlasStatus == CUBLAS_STATUS_SUCCESS) { // cudaMemcpy(res1, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost); // for(i=0;i<n;i++) // { // printf("\nres1[%d] = %.8f", i, res1[i]); // } printf("\n res1 = res1 - b2 done\n"); } else { printf("\n res1 = res1 - b2 failed\n"); } ///NOW COMPUTING G^-1 * P int k = 0; int breakloop=0; double **midMat = (double**)malloc(m*sizeof(double*)); for(i=0;i<m;i++) { midMat[i] = (double*)calloc(n,sizeof(double)); } cudaEventRecord(start, 0); for(i=0;i<n;i++) { breakloop = 0; double *vect = (double*)calloc(m,sizeof(double*)); for(j=0;j<m;j++) { int startin = G.ProwIndex[j]; int endin = G.ProwIndex[j+1]; if(startin == endin) continue; k = startin; while(k<endin) { if(G.Pcolumns[k] == i) { vect[j] = G.Pmat[k]; breakloop=1; break; } k++; } if(breakloop == 1) { break; } } midvec = cholmod_l_allocate_dense((size_t)(m), 1, (size_t)(m), 1, Common); midvec->dtype=0; midvec->x=&vect[0]; midvec->xtype = 1; cholmod_dense *res2; res2 = cholmod_l_solve(CHOLMOD_A, L, midvec, Common); double *re = (double*)res2->x; // printf("\n vector %d is:\n", i); int i1, j1, k1; // for(j1=0;j1<m;j1++) // { // midmat2flat[i+j1*n] = re[j1]; // printf(" %lf", re[j1]); // } // printf("\n"); for(i1=0;i1<m;i1++) { midMat[i1][i] = re[i1]; } cholmod_l_free_dense(&midvec, Common); } /* printf("\n Midmat = \n"); for(i=0;i<m;i++) { for(j=0;j<n;j++) { printf(" %lf", midMat[i][j]); } printf("\n"); } */ double *midMatflat = (double*)calloc((m*n),sizeof(double)); double *dev_midMat; double *dev_solut; int counter = 0; for(i=0;i<n;i++) { for(j=0;j<m;j++) { midMatflat[counter] = midMat[j][i]; counter++; } } cudaMalloc((void**)&dev_midMat, m*n*sizeof(double)); cudaMalloc((void**)&dev_solut, n*n*sizeof(double)); cudaMemcpy(dev_midMat, midMatflat, m*n*sizeof(double), cudaMemcpyHostToDevice); //Solving P^T * midMat; Result stored in dev_solut cuSparseStatus = cusparseDcsrmm(cuspHandle, CUSPARSE_OPERATION_TRANSPOSE, m, n, n, G.Pnonzero, &alpha, descrP, dev_PcsrVal, dev_PcsrRowIdx, dev_PcsrCol, dev_midMat, m, &beta, dev_solut, n); if(cuSparseStatus == CUSPARSE_STATUS_SUCCESS) { printf("\n Solved P^T * G^-1 * P. Result stored in solut\n"); } else { printf("\n Failed to Solve P^T * G^-1 * P \n"); exit(1); } /* double *matGflat = (double*)calloc(n*n,sizeof(double)); cudaMemcpy(matGflat, dev_solut, n*n*sizeof(double), cudaMemcpyDeviceToHost); counter = 0; printf("\nBefore LU starts\n"); for(i=0;i<n;i++) { for(j=0;j<n;j++) { printf(" %lf ", matGflat[counter]); counter++; } printf("\n"); } printf("\n"); */ cusolverStatus_t cuSolverStatus; cusolverDnHandle_t cudenHandle; cuSolverStatus = cusolverDnCreate(&cudenHandle); int Lwork = 0; cuSolverStatus = cusolverDnDgetrf_bufferSize(cudenHandle, n, n, dev_solut, n, &Lwork); if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS) { printf("\n Buffer works\n Lwork = %d\n", Lwork); } else { exit(1); } double *dev_Workspace; int *dev_Ipiv, *dev_Info; cudaMalloc((void**)&dev_Workspace, Lwork*sizeof(double)); cudaMalloc((void**)&dev_Ipiv, n*sizeof(int)); cudaMalloc((void**)&dev_Info, sizeof(int)); //Calculating LU for dev_solut // double *nnmat = (double*)calloc(n*n,sizeof(double)); // cudaMemcpy(nnmat, dev_solut, n*n*sizeof(double), cudaMemcpyDeviceToHost); // cuSolverStatus = cusolverDnDgetrfHost(cudenHandle, n, n, cuSolverStatus = cusolverDnDgetrf(cudenHandle, n, n, dev_solut, n, dev_Workspace, dev_Ipiv, dev_Info); if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS) { printf("\n solut has be defactorized into L and U. dev_Ipiv * solut = L * U\n"); } else { printf("\n Unable to defactorize solut into LU\n"); exit(1); } //solving dev_solut * x = dev_res1. Result stored in dev_res1 cuSolverStatus = cusolverDnDgetrs(cudenHandle, CUBLAS_OP_N, n, 1, dev_solut, n, dev_Ipiv, dev_res1, n, dev_Info); if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS) { printf("\n Solution obtained for x2 \n"); } else { printf("\n LU decomposition obtained by LU solver failed\n"); } /* cudaMemcpy(G.x2, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost); printf("\n x2 = \n"); for(i=0;i<n;i++) { printf("\n x2[%d] = %lf", i, G.x2[i]); } */ double *dev_dummy; cudaMalloc((void**)&dev_dummy, m*sizeof(double)); cudaMemset(dev_dummy, 0.0, m*sizeof(double)); printf("\n Starting solving for x1 \n"); //Solving for x1 //Solving G^-1 * P * x2; G^-1 * P is stored in midMat cuBlasStatus = cublasDgemv(cubHandle, CUBLAS_OP_N, m, n, &alpha, dev_midMat, m, dev_res1, 1, &beta, dev_dummy, 1); if(cuBlasStatus == CUBLAS_STATUS_SUCCESS) { /* double *toprint = (double*)calloc(m,sizeof(double)); cudaMemcpy(toprint, dev_dummy, m*sizeof(double), cudaMemcpyDeviceToHost); printf("\n Intermediate vector :\n"); for(i=0;i<m;i++) { printf("\ndummy[%d] = %lf", i, toprint[i]); } */ printf("\n midmat * x2 obtained. Stored in dummy\n"); } else { printf("\n Failed to obtain midmat * x2\n"); } cuBlasStatus = cublasDaxpy(cubHandle, m, &alphaneg, dev_dummy, 1, dev_x1, 1); if(cuBlasStatus == CUBLAS_STATUS_SUCCESS) { /* cudaMemcpy(G.x1, dev_x1, m*sizeof(double), cudaMemcpyDeviceToHost); printf("\n x1 = \n"); for(i=0;i<m;i++) { printf("\n x1[%d] = %.15f", i, G.x1[i]); } */ printf("\n x1 obtained"); } else { printf("\n Failed to obtain x1"); } printf("\n Solver finished its work\n"); /* cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&elapsedTime, start, stop); printf("\n Time: %.6f msecs :\n", elapsedTime); */ cholmod_l_finish(Common); return 0; }
template <typename Entry> int spqr_1fixed ( // inputs, not modified double tol, // only accept singletons above tol Long bncols, // number of columns of B cholmod_sparse *A, // m-by-n sparse matrix // output arrays, neither allocated nor defined on input. Long **p_R1p, // size n1rows+1, R1p [k] = # of nonzeros in kth // row of R1. NULL if n1cols == 0. Long **p_P1inv, // size m, singleton row inverse permutation. // If row i of A is the kth singleton row, then // P1inv [i] = k. NULL if n1cols is zero. cholmod_sparse **p_Y, // on output, only the first n-n1cols+1 entries of // Y->p are defined (if Y is not NULL), where // Y = [A B] or Y = [A2 B2]. If B is empty and // there are no column singletons, Y is NULL Long *p_n1cols, // number of column singletons found Long *p_n1rows, // number of corresponding rows found // workspace and parameters cholmod_common *cc ) { cholmod_sparse *Y ; Long *P1inv, *R1p, *Yp, *Qrows, *Ap, *Ai ; char *Mark ; Entry *Ax ; Long i, j, k, p, d, row, n1rows, n1cols, ynz, iold, inew, kk, m, n, xtype ; // ------------------------------------------------------------------------- // get inputs // ------------------------------------------------------------------------- xtype = spqr_type <Entry> ( ) ; m = A->nrow ; n = A->ncol ; Ap = (Long *) A->p ; Ai = (Long *) A->i ; Ax = (Entry *) A->x ; // set outputs to NULL in case of early return *p_R1p = NULL ; *p_P1inv = NULL ; *p_Y = NULL ; *p_n1cols = EMPTY ; *p_n1rows = EMPTY ; // ------------------------------------------------------------------------- // allocate workspace // ------------------------------------------------------------------------- Mark = (char *) cholmod_l_calloc (m, sizeof (char), cc) ; Qrows = (Long *) cholmod_l_malloc (n, sizeof (Long), cc) ; if (cc->status < CHOLMOD_OK) { // out of memory cholmod_l_free (m, sizeof (char), Mark, cc) ; cholmod_l_free (n, sizeof (Long), Qrows, cc) ; return (FALSE) ; } // ------------------------------------------------------------------------- // find singletons; no column permutations allowed // ------------------------------------------------------------------------- n1cols = 0 ; // number of column singletons found n1rows = 0 ; // number of corresponding singleton rows for (j = 0 ; j < n ; j++) { // count the number of unmarked rows in column j Entry aij = 0 ; d = 0 ; row = EMPTY ; for (p = Ap [j] ; d < 2 && p < Ap [j+1] ; p++) { i = Ai [p] ; if (!Mark [i]) { // row i is not taken by a prior column singleton. If this // is the only unflagged row and the value is large enough, // it will become the row for this column singleton. aij = Ax [p] ; row = i ; d++ ; } } if (d == 0) { // j is a dead column singleton Qrows [n1cols++] = EMPTY ; } else if (d == 1 && spqr_abs (aij, cc) > tol) { // j is a live column singleton Qrows [n1cols++] = row ; // flag row i as taken Mark [row] = TRUE ; n1rows++ ; } else { // j is not a singleton; quit searching break ; } } // ------------------------------------------------------------------------- // construct P1inv permutation, row counts R1p, and col pointers Yp // ------------------------------------------------------------------------- if (n1cols == 0 && bncols == 0) { // --------------------------------------------------------------------- // no singletons, and B empty; Y=A will be done via pointer alias // --------------------------------------------------------------------- Y = NULL ; Yp = NULL ; P1inv = NULL ; R1p = NULL ; } else if (n1cols == 0) { // --------------------------------------------------------------------- // no singletons in the matrix; no R1 matrix, no P1inv permutation // --------------------------------------------------------------------- // Y has no entries yet; nnz(Y) will be determined later Y = cholmod_l_allocate_sparse (m, n+bncols, 0, FALSE, TRUE, 0, xtype, cc) ; if (cc->status < CHOLMOD_OK) { // out of memory cholmod_l_free (m, sizeof (char), Mark, cc) ; cholmod_l_free (n, sizeof (Long), Qrows, cc) ; return (FALSE) ; } Yp = (Long *) Y->p ; ASSERT (n1rows == 0) ; P1inv = NULL ; R1p = NULL ; // --------------------------------------------------------------------- // copy the column pointers of A for the first part of Y = [A B] // --------------------------------------------------------------------- ynz = Ap [n] ; for (k = 0 ; k <= n ; k++) { Yp [k] = Ap [k] ; } } else { // --------------------------------------------------------------------- // construct the row singleton permutation // --------------------------------------------------------------------- // Y has no entries yet; nnz(Y) will be determined later Y = cholmod_l_allocate_sparse (m-n1rows, n-n1cols+bncols, 0, TRUE, TRUE, 0, xtype, cc) ; P1inv = (Long *) cholmod_l_malloc (m, sizeof (Long), cc) ; R1p = (Long *) cholmod_l_calloc (n1rows+1, sizeof (Long), cc) ; if (cc->status < CHOLMOD_OK) { // out of memory cholmod_l_free_sparse (&Y, cc) ; cholmod_l_free (m, sizeof (Long), P1inv, cc) ; cholmod_l_free (n1rows+1, sizeof (Long), R1p, cc) ; cholmod_l_free (m, sizeof (char), Mark, cc) ; cholmod_l_free (n, sizeof (Long), Qrows, cc) ; return (FALSE) ; } Yp = (Long *) Y->p ; #ifndef NDEBUG for (i = 0 ; i < m ; i++) P1inv [i] = EMPTY ; #endif kk = 0 ; for (k = 0 ; k < n1cols ; k++) { i = Qrows [k] ; if (i != EMPTY) { // row i is the kk-th singleton row ASSERT (Mark [i]) ; ASSERT (P1inv [i] == EMPTY) ; P1inv [i] = kk ; kk++ ; } } for (i = 0 ; i < m ; i++) { if (!Mark [i]) { // row i is not a singleton row ASSERT (P1inv [i] == EMPTY) ; P1inv [i] = kk ; kk++ ; } } ASSERT (kk == m) ; // --------------------------------------------------------------------- // find row counts for R11 // --------------------------------------------------------------------- for (k = 0 ; k < n1cols ; k++) { for (p = Ap [k] ; p < Ap [k+1] ; p++) { iold = Ai [p] ; inew = P1inv [iold] ; ASSERT (inew < n1rows) ; R1p [inew]++ ; // a singleton row; in R1 } } // --------------------------------------------------------------------- // find row counts for R12 and column pointers for A2 part of Y // --------------------------------------------------------------------- ynz = 0 ; for ( ; k < n ; k++) { Yp [k-n1cols] = ynz ; for (p = Ap [k] ; p < Ap [k+1] ; p++) { iold = Ai [p] ; inew = P1inv [iold] ; if (inew < n1rows) { R1p [inew]++ ; // a singleton row; in R1 } else { ynz++ ; // not a singleton row; in A2 } } } Yp [n-n1cols] = ynz ; #ifndef NDEBUG PR (("n1cols: %ld\n", n1cols)) ; for (i = 0 ; i < n1rows ; i++) { PR (("R1p [%ld] is %ld\n", i, R1p [i])) ; ASSERT (R1p [i] > 0) ; } #endif } // ------------------------------------------------------------------------- // free workspace and return results // ------------------------------------------------------------------------- cholmod_l_free (n, sizeof (Long), Qrows, cc) ; cholmod_l_free (m, sizeof (char), Mark, cc) ; *p_R1p = R1p ; *p_P1inv = P1inv ; *p_Y = Y ; *p_n1cols = n1cols ; *p_n1rows = n1rows ; return (TRUE) ; }