AmesosBTF_CrsMatrix::NewTypeRef AmesosBTF_CrsMatrix:: operator()( OriginalTypeRef orig ) { origObj_ = &orig; const Epetra_BlockMap & OldRowMap = orig.RowMap(); const Epetra_BlockMap & OldColMap = orig.ColMap(); // Check if the matrix is on one processor. int myMatProc = -1, matProc = -1; int myPID = orig.Comm().MyPID(); for (int proc=0; proc<orig.Comm().NumProc(); proc++) { if (orig.NumGlobalNonzeros() == orig.NumMyNonzeros()) myMatProc = myPID; } orig.Comm().MaxAll( &myMatProc, &matProc, 1 ); if( orig.RowMap().DistributedGlobal() && matProc == -1) { cout << "FAIL for Global!\n"; abort(); } if( orig.IndicesAreGlobal() && matProc == -1) { cout << "FAIL for Global Indices!\n"; abort(); } int nGlobal = orig.NumGlobalRows(); int n = orig.NumMyRows(); int nnz = orig.NumMyNonzeros(); if( debug_ ) { cout << "Orig Matrix:\n"; cout << orig << endl; } // Create std CRS format (without elements above the threshold) vector<int> ia(n+1,0); int maxEntries = orig.MaxNumEntries(); vector<int> ja(nnz), ja_tmp(nnz); vector<double> jva_tmp(maxEntries); int cnt; Epetra_CrsGraph strippedGraph( Copy, OldRowMap, OldColMap, 0 ); for( int i = 0; i < n; ++i ) { orig.ExtractMyRowCopy( i, maxEntries, cnt, &jva_tmp[0], &ja_tmp[0] ); ia[i+1] = ia[i]; for( int j = 0; j < cnt; ++j ) if( fabs(jva_tmp[j]) > threshold_ ) ja[ ia[i+1]++ ] = ja_tmp[j]; int new_cnt = ia[i+1] - ia[i]; strippedGraph.InsertMyIndices( i, new_cnt, &ja[ ia[i] ] ); } nnz = ia[n]; strippedGraph.FillComplete(); if( debug_ ) { cout << "Stripped Graph\n"; cout << strippedGraph; } // Compute the BTF permutation only on the processor that has the graph. if ( matProc == myPID ) { if( debug_ ) { cout << "-----------------------------------------\n"; cout << "CRS Format Graph (stripped) \n"; cout << "-----------------------------------------\n"; for( int i = 0; i < n; ++i ) { cout << ia[i] << " - " << ia[i+1] << " : "; for( int j = ia[i]; j<ia[i+1]; ++j ) cout << " " << ja[j]; cout << endl; } cout << "-----------------------------------------\n"; } // Transpose the graph, not the values int j=0, next=0; vector<int> ia_tmp(n+1,0); // Compute row lengths for (int i = 0; i < n; i++) for (int k = ia[i]; k < ia[i+1]; k++) ++ia_tmp[ ja[k]+1 ]; // Compute pointers from row lengths ia_tmp[0] = 0; for (int i = 0; i < n; i++) ia_tmp[i+1] += ia_tmp[i]; // Copy over indices for (int i = 0; i < n; i++) { for (int k = ia[i]; k < ia[i+1]; k++) { j = ja[k]; next = ia_tmp[j]; ja_tmp[next] = i; ia_tmp[j] = next + 1; } } // Reshift ia_tmp for (int i=n-1; i >= 0; i--) ia_tmp[i+1] = ia_tmp[i]; ia_tmp[0] = 0; // Transformation information int numMatch = 0; // number of nonzeros on diagonal after permutation. double maxWork = 0.0; // no limit on how much work to perform in max-trans. double workPerf = 0.0; // how much work was performed in max-trans. // Create a work vector for the BTF code. vector<int> work(5*n); // Storage for the row and column permutations. vector<int> rowperm(n); vector<int> colperm(n); vector<int> blockptr(n+1); // NOTE: The permutations are sent in backwards since the matrix is transposed. // On output, rowperm and colperm are the row and column permutations of A, where // i = BTF_UNFLIP(rowperm[k]) if row i of A is the kth row of P*A*Q, and j = colperm[k] // if column j of A is the kth column of P*A*Q. If rowperm[k] < 0, then the // (k,k)th entry in P*A*Q is structurally zero. numBlocks_ = amesos_btf_order( n, &ia_tmp[0], &ja_tmp[0], maxWork, &workPerf, &rowperm[0], &colperm[0], &blockptr[0], &numMatch, &work[0] ); // Reverse ordering of permutation to get upper triangular form, if necessary. rowPerm_.resize( n ); colPerm_.resize( n ); blockptr.resize( numBlocks_+1 ); blockPtr_.resize( numBlocks_+1 ); if (!upperTri_) { for( int i = 0; i < n; ++i ) { rowPerm_[i] = BTF_UNFLIP(rowperm[(n-1)-i]); colPerm_[i] = colperm[(n-1)-i]; } for( int i = 0; i < numBlocks_+1; ++i ) { blockPtr_[i] = n - blockptr[numBlocks_-i]; } } else { colPerm_ = colperm; blockPtr_ = blockptr; for( int i = 0; i < n; ++i ) { rowPerm_[i] = BTF_UNFLIP(rowperm[i]); } } if( debug_ ) { cout << "-----------------------------------------\n"; cout << "BTF Output (n = " << n << ")\n"; cout << "-----------------------------------------\n"; cout << "Num Blocks: " << numBlocks_ << endl; cout << "Num NNZ Diags: " << numMatch << endl; cout << "RowPerm and ColPerm \n"; for( int i = 0; i<n; ++i ) cout << rowPerm_[i] << "\t" << colPerm_[i] << endl; cout << "-----------------------------------------\n"; } } // Broadcast the BTF permutation information to all processors. rowPerm_.resize( nGlobal ); colPerm_.resize( nGlobal ); orig.Comm().Broadcast(&rowPerm_[0], nGlobal, matProc); orig.Comm().Broadcast(&colPerm_[0], nGlobal, matProc); orig.Comm().Broadcast(&numBlocks_, 1, matProc); blockPtr_.resize( numBlocks_+1 ); orig.Comm().Broadcast(&blockPtr_[0], numBlocks_+1, matProc); //Generate New Domain and Range Maps //for now, assume they start out as identical vector<int> myElements( n ); OldRowMap.MyGlobalElements( &myElements[0] ); vector<int> newDomainElements( n ); vector<int> newRangeElements( n ); for( int i = 0; i < n; ++i ) { newRangeElements[ i ] = myElements[ rowPerm_[i] ]; newDomainElements[ i ] = myElements[ colPerm_[i] ]; } NewRowMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, n, &newRangeElements[0], OldRowMap.IndexBase(), OldRowMap.Comm() ) ); NewColMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, n, &newDomainElements[0], OldColMap.IndexBase(), OldColMap.Comm() ) ); if( debug_ ) { cout << "New Row Map\n"; cout << *NewRowMap_ << endl; cout << "New Col Map\n"; cout << *NewColMap_ << endl; } //Generate New Graph NewGraph_ = Teuchos::rcp( new Epetra_CrsGraph( Copy, *NewRowMap_, *NewColMap_, 0 ) ); Importer_ = Teuchos::rcp( new Epetra_Import( *NewRowMap_, OldRowMap ) ); NewGraph_->Import( strippedGraph, *Importer_, Insert ); NewGraph_->FillComplete(); if( debug_ ) { cout << "NewGraph\n"; cout << *NewGraph_; } NewMatrix_ = Teuchos::rcp( new Epetra_CrsMatrix( Copy, *NewGraph_ ) ); NewMatrix_->Import( orig, *Importer_, Insert ); NewMatrix_->FillComplete(); if( debug_ ) { cout << "New CrsMatrix\n"; cout << *NewMatrix_ << endl; } newObj_ = &*NewMatrix_; return *NewMatrix_; }
static KLU_symbolic *order_and_analyze /* returns NULL if error, or a valid KLU_symbolic object if successful */ ( /* inputs, not modified */ Int n, /* A is n-by-n */ Int Ap [ ], /* size n+1, column pointers */ Int Ai [ ], /* size nz, row indices */ /* --------------------- */ KLU_common *Common ) { double work ; KLU_symbolic *Symbolic ; double *Lnz ; Int *Qbtf, *Cp, *Ci, *Pinv, *Pblk, *Pbtf, *P, *Q, *R ; Int nblocks, nz, block, maxblock, k1, k2, nk, do_btf, ordering, k, Cilen, *Work ; /* ---------------------------------------------------------------------- */ /* allocate the Symbolic object, and check input matrix */ /* ---------------------------------------------------------------------- */ Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ; if (Symbolic == NULL) { return (NULL) ; } P = Symbolic->P ; Q = Symbolic->Q ; R = Symbolic->R ; Lnz = Symbolic->Lnz ; nz = Symbolic->nz ; ordering = Common->ordering ; if (ordering == 1) { /* COLAMD */ Cilen = COLAMD_recommended (nz, n, n) ; } else if (ordering == 0 || (ordering == 3 && Common->user_order != NULL)) { /* AMD or user ordering function */ Cilen = nz+1 ; } else { /* invalid ordering */ Common->status = KLU_INVALID ; KLU_free_symbolic (&Symbolic, Common) ; return (NULL) ; } /* AMD memory management routines */ amd_malloc = Common->malloc_memory ; amd_free = Common->free_memory ; amd_calloc = Common->calloc_memory ; amd_realloc = Common->realloc_memory ; /* ---------------------------------------------------------------------- */ /* allocate workspace for BTF permutation */ /* ---------------------------------------------------------------------- */ Pbtf = KLU_malloc (n, sizeof (Int), Common) ; Qbtf = KLU_malloc (n, sizeof (Int), Common) ; if (Common->status < KLU_OK) { KLU_free (Pbtf, n, sizeof (Int), Common) ; KLU_free (Qbtf, n, sizeof (Int), Common) ; KLU_free_symbolic (&Symbolic, Common) ; return (NULL) ; } /* ---------------------------------------------------------------------- */ /* get the common parameters for BTF and ordering method */ /* ---------------------------------------------------------------------- */ do_btf = Common->btf ; do_btf = (do_btf) ? TRUE : FALSE ; Symbolic->ordering = ordering ; Symbolic->do_btf = do_btf ; Symbolic->structural_rank = EMPTY ; /* ---------------------------------------------------------------------- */ /* find the block triangular form (if requested) */ /* ---------------------------------------------------------------------- */ Common->work = 0 ; if (do_btf) { Work = KLU_malloc (5*n, sizeof (Int), Common) ; if (Common->status < KLU_OK) { /* out of memory */ KLU_free (Pbtf, n, sizeof (Int), Common) ; KLU_free (Qbtf, n, sizeof (Int), Common) ; KLU_free_symbolic (&Symbolic, Common) ; return (NULL) ; } nblocks = BTF_order (n, Ap, Ai, Common->maxwork, &work, Pbtf, Qbtf, R, &(Symbolic->structural_rank), Work) ; Common->structural_rank = Symbolic->structural_rank ; Common->work += work ; KLU_free (Work, 5*n, sizeof (Int), Common) ; /* unflip Qbtf if the matrix does not have full structural rank */ if (Symbolic->structural_rank < n) { for (k = 0 ; k < n ; k++) { Qbtf [k] = BTF_UNFLIP (Qbtf [k]) ; } } /* find the size of the largest block */ maxblock = 1 ; for (block = 0 ; block < nblocks ; block++) { k1 = R [block] ; k2 = R [block+1] ; nk = k2 - k1 ; PRINTF (("block %d size %d\n", block, nk)) ; maxblock = MAX (maxblock, nk) ; } } else { /* BTF not requested */ nblocks = 1 ; maxblock = n ; R [0] = 0 ; R [1] = n ; for (k = 0 ; k < n ; k++) { Pbtf [k] = k ; Qbtf [k] = k ; } } Symbolic->nblocks = nblocks ; PRINTF (("maxblock size %d\n", maxblock)) ; Symbolic->maxblock = maxblock ; /* ---------------------------------------------------------------------- */ /* allocate more workspace, for analyze_worker */ /* ---------------------------------------------------------------------- */ Pblk = KLU_malloc (maxblock, sizeof (Int), Common) ; Cp = KLU_malloc (maxblock + 1, sizeof (Int), Common) ; Ci = KLU_malloc (MAX (Cilen, nz+1), sizeof (Int), Common) ; Pinv = KLU_malloc (n, sizeof (Int), Common) ; /* ---------------------------------------------------------------------- */ /* order each block of the BTF ordering, and a fill-reducing ordering */ /* ---------------------------------------------------------------------- */ if (Common->status == KLU_OK) { PRINTF (("calling analyze_worker\n")) ; Common->status = analyze_worker (n, Ap, Ai, nblocks, Pbtf, Qbtf, R, ordering, P, Q, Lnz, Pblk, Cp, Ci, Cilen, Pinv, Symbolic, Common) ; PRINTF (("analyze_worker done\n")) ; } /* ---------------------------------------------------------------------- */ /* free all workspace */ /* ---------------------------------------------------------------------- */ KLU_free (Pblk, maxblock, sizeof (Int), Common) ; KLU_free (Cp, maxblock+1, sizeof (Int), Common) ; KLU_free (Ci, MAX (Cilen, nz+1), sizeof (Int), Common) ; KLU_free (Pinv, n, sizeof (Int), Common) ; KLU_free (Pbtf, n, sizeof (Int), Common) ; KLU_free (Qbtf, n, sizeof (Int), Common) ; /* ---------------------------------------------------------------------- */ /* return the symbolic object */ /* ---------------------------------------------------------------------- */ if (Common->status < KLU_OK) { KLU_free_symbolic (&Symbolic, Common) ; } return (Symbolic) ; }
void mexFunction ( int nargout, mxArray *pargout[], int nargin, const mxArray *pargin[] ) { Long b, n, i, k, j, *Ap, *Ai, *P, *R, nblocks, *Work, *Q, jj ; double *Px, *Rx, *Qx ; /* ---------------------------------------------------------------------- */ /* get inputs and allocate workspace */ /* ---------------------------------------------------------------------- */ if (!((nargin == 1 && nargout <= 2) || (nargin == 2 && nargout <= 3))) { mexErrMsgTxt ("Usage: [p,r] = strongcomp (A)" " or [p,q,r] = strongcomp (A,qin)") ; } n = mxGetM (pargin [0]) ; if (!mxIsSparse (pargin [0]) || n != mxGetN (pargin [0])) { mexErrMsgTxt ("strongcomp: A must be sparse, square, and non-empty") ; } /* get sparse matrix A */ Ap = (Long *) mxGetJc (pargin [0]) ; Ai = (Long *) mxGetIr (pargin [0]) ; /* get output arrays */ P = mxMalloc (n * sizeof (Long)) ; R = mxMalloc ((n+1) * sizeof (Long)) ; /* get workspace of size 4n (recursive code only needs 2n) */ Work = mxMalloc (4*n * sizeof (Long)) ; /* get the input column permutation Q */ if (nargin == 2) { if (mxGetNumberOfElements (pargin [1]) != n) { mexErrMsgTxt ("strongcomp: qin must be a permutation vector of size n") ; } Qx = mxGetPr (pargin [1]) ; Q = mxMalloc (n * sizeof (Long)) ; /* connvert Qin to 0-based and check validity */ for (i = 0 ; i < n ; i++) { Work [i] = 0 ; } for (k = 0 ; k < n ; k++) { j = Qx [k] - 1 ; /* convert to 0-based */ jj = BTF_UNFLIP (j) ; if (jj < 0 || jj >= n || Work [jj] == 1) { mexErrMsgTxt ("strongcomp: qin must be a permutation vector of size n") ; } Work [jj] = 1 ; Q [k] = j ; } } else { /* no input column permutation */ Q = (Long *) NULL ; } /* ---------------------------------------------------------------------- */ /* find the strongly-connected components of A */ /* ---------------------------------------------------------------------- */ nblocks = btf_l_strongcomp (n, Ap, Ai, Q, P, R, Work) ; /* ---------------------------------------------------------------------- */ /* create outputs and free workspace */ /* ---------------------------------------------------------------------- */ /* create P */ pargout [0] = mxCreateDoubleMatrix (1, n, mxREAL) ; Px = mxGetPr (pargout [0]) ; for (k = 0 ; k < n ; k++) { Px [k] = P [k] + 1 ; /* convert to 1-based */ } /* create Q */ if (nargin == 2 && nargout > 1) { pargout [1] = mxCreateDoubleMatrix (1, n, mxREAL) ; Qx = mxGetPr (pargout [1]) ; for (k = 0 ; k < n ; k++) { Qx [k] = Q [k] + 1 ; /* convert to 1-based */ } } /* create R */ if (nargout == nargin + 1) { pargout [nargin] = mxCreateDoubleMatrix (1, nblocks+1, mxREAL) ; Rx = mxGetPr (pargout [nargin]) ; for (b = 0 ; b <= nblocks ; b++) { Rx [b] = R [b] + 1 ; /* convert to 1-based */ } } mxFree (P) ; mxFree (R) ; mxFree (Work) ; if (nargin == 2) { mxFree (Q) ; } }