AmesosBTF_CrsMatrix::NewTypeRef AmesosBTF_CrsMatrix:: operator()( OriginalTypeRef orig ) { origObj_ = &orig; const Epetra_BlockMap & OldRowMap = orig.RowMap(); const Epetra_BlockMap & OldColMap = orig.ColMap(); // Check if the matrix is on one processor. int myMatProc = -1, matProc = -1; int myPID = orig.Comm().MyPID(); for (int proc=0; proc<orig.Comm().NumProc(); proc++) { if (orig.NumGlobalNonzeros() == orig.NumMyNonzeros()) myMatProc = myPID; } orig.Comm().MaxAll( &myMatProc, &matProc, 1 ); if( orig.RowMap().DistributedGlobal() && matProc == -1) { cout << "FAIL for Global!\n"; abort(); } if( orig.IndicesAreGlobal() && matProc == -1) { cout << "FAIL for Global Indices!\n"; abort(); } int nGlobal = orig.NumGlobalRows(); int n = orig.NumMyRows(); int nnz = orig.NumMyNonzeros(); if( debug_ ) { cout << "Orig Matrix:\n"; cout << orig << endl; } // Create std CRS format (without elements above the threshold) vector<int> ia(n+1,0); int maxEntries = orig.MaxNumEntries(); vector<int> ja(nnz), ja_tmp(nnz); vector<double> jva_tmp(maxEntries); int cnt; Epetra_CrsGraph strippedGraph( Copy, OldRowMap, OldColMap, 0 ); for( int i = 0; i < n; ++i ) { orig.ExtractMyRowCopy( i, maxEntries, cnt, &jva_tmp[0], &ja_tmp[0] ); ia[i+1] = ia[i]; for( int j = 0; j < cnt; ++j ) if( fabs(jva_tmp[j]) > threshold_ ) ja[ ia[i+1]++ ] = ja_tmp[j]; int new_cnt = ia[i+1] - ia[i]; strippedGraph.InsertMyIndices( i, new_cnt, &ja[ ia[i] ] ); } nnz = ia[n]; strippedGraph.FillComplete(); if( debug_ ) { cout << "Stripped Graph\n"; cout << strippedGraph; } // Compute the BTF permutation only on the processor that has the graph. if ( matProc == myPID ) { if( debug_ ) { cout << "-----------------------------------------\n"; cout << "CRS Format Graph (stripped) \n"; cout << "-----------------------------------------\n"; for( int i = 0; i < n; ++i ) { cout << ia[i] << " - " << ia[i+1] << " : "; for( int j = ia[i]; j<ia[i+1]; ++j ) cout << " " << ja[j]; cout << endl; } cout << "-----------------------------------------\n"; } // Transpose the graph, not the values int j=0, next=0; vector<int> ia_tmp(n+1,0); // Compute row lengths for (int i = 0; i < n; i++) for (int k = ia[i]; k < ia[i+1]; k++) ++ia_tmp[ ja[k]+1 ]; // Compute pointers from row lengths ia_tmp[0] = 0; for (int i = 0; i < n; i++) ia_tmp[i+1] += ia_tmp[i]; // Copy over indices for (int i = 0; i < n; i++) { for (int k = ia[i]; k < ia[i+1]; k++) { j = ja[k]; next = ia_tmp[j]; ja_tmp[next] = i; ia_tmp[j] = next + 1; } } // Reshift ia_tmp for (int i=n-1; i >= 0; i--) ia_tmp[i+1] = ia_tmp[i]; ia_tmp[0] = 0; // Transformation information int numMatch = 0; // number of nonzeros on diagonal after permutation. double maxWork = 0.0; // no limit on how much work to perform in max-trans. double workPerf = 0.0; // how much work was performed in max-trans. // Create a work vector for the BTF code. vector<int> work(5*n); // Storage for the row and column permutations. vector<int> rowperm(n); vector<int> colperm(n); vector<int> blockptr(n+1); // NOTE: The permutations are sent in backwards since the matrix is transposed. // On output, rowperm and colperm are the row and column permutations of A, where // i = BTF_UNFLIP(rowperm[k]) if row i of A is the kth row of P*A*Q, and j = colperm[k] // if column j of A is the kth column of P*A*Q. If rowperm[k] < 0, then the // (k,k)th entry in P*A*Q is structurally zero. numBlocks_ = amesos_btf_order( n, &ia_tmp[0], &ja_tmp[0], maxWork, &workPerf, &rowperm[0], &colperm[0], &blockptr[0], &numMatch, &work[0] ); // Reverse ordering of permutation to get upper triangular form, if necessary. rowPerm_.resize( n ); colPerm_.resize( n ); blockptr.resize( numBlocks_+1 ); blockPtr_.resize( numBlocks_+1 ); if (!upperTri_) { for( int i = 0; i < n; ++i ) { rowPerm_[i] = BTF_UNFLIP(rowperm[(n-1)-i]); colPerm_[i] = colperm[(n-1)-i]; } for( int i = 0; i < numBlocks_+1; ++i ) { blockPtr_[i] = n - blockptr[numBlocks_-i]; } } else { colPerm_ = colperm; blockPtr_ = blockptr; for( int i = 0; i < n; ++i ) { rowPerm_[i] = BTF_UNFLIP(rowperm[i]); } } if( debug_ ) { cout << "-----------------------------------------\n"; cout << "BTF Output (n = " << n << ")\n"; cout << "-----------------------------------------\n"; cout << "Num Blocks: " << numBlocks_ << endl; cout << "Num NNZ Diags: " << numMatch << endl; cout << "RowPerm and ColPerm \n"; for( int i = 0; i<n; ++i ) cout << rowPerm_[i] << "\t" << colPerm_[i] << endl; cout << "-----------------------------------------\n"; } } // Broadcast the BTF permutation information to all processors. rowPerm_.resize( nGlobal ); colPerm_.resize( nGlobal ); orig.Comm().Broadcast(&rowPerm_[0], nGlobal, matProc); orig.Comm().Broadcast(&colPerm_[0], nGlobal, matProc); orig.Comm().Broadcast(&numBlocks_, 1, matProc); blockPtr_.resize( numBlocks_+1 ); orig.Comm().Broadcast(&blockPtr_[0], numBlocks_+1, matProc); //Generate New Domain and Range Maps //for now, assume they start out as identical vector<int> myElements( n ); OldRowMap.MyGlobalElements( &myElements[0] ); vector<int> newDomainElements( n ); vector<int> newRangeElements( n ); for( int i = 0; i < n; ++i ) { newRangeElements[ i ] = myElements[ rowPerm_[i] ]; newDomainElements[ i ] = myElements[ colPerm_[i] ]; } NewRowMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, n, &newRangeElements[0], OldRowMap.IndexBase(), OldRowMap.Comm() ) ); NewColMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, n, &newDomainElements[0], OldColMap.IndexBase(), OldColMap.Comm() ) ); if( debug_ ) { cout << "New Row Map\n"; cout << *NewRowMap_ << endl; cout << "New Col Map\n"; cout << *NewColMap_ << endl; } //Generate New Graph NewGraph_ = Teuchos::rcp( new Epetra_CrsGraph( Copy, *NewRowMap_, *NewColMap_, 0 ) ); Importer_ = Teuchos::rcp( new Epetra_Import( *NewRowMap_, OldRowMap ) ); NewGraph_->Import( strippedGraph, *Importer_, Insert ); NewGraph_->FillComplete(); if( debug_ ) { cout << "NewGraph\n"; cout << *NewGraph_; } NewMatrix_ = Teuchos::rcp( new Epetra_CrsMatrix( Copy, *NewGraph_ ) ); NewMatrix_->Import( orig, *Importer_, Insert ); NewMatrix_->FillComplete(); if( debug_ ) { cout << "New CrsMatrix\n"; cout << *NewMatrix_ << endl; } newObj_ = &*NewMatrix_; return *NewMatrix_; }
AmesosBTFGlobal_LinearProblem::NewTypeRef AmesosBTFGlobal_LinearProblem:: operator()( OriginalTypeRef orig ) { origObj_ = &orig; // Extract the matrix and vectors from the linear problem OldRHS_ = Teuchos::rcp( orig.GetRHS(), false ); OldLHS_ = Teuchos::rcp( orig.GetLHS(), false ); OldMatrix_ = Teuchos::rcp( dynamic_cast<Epetra_CrsMatrix *>( orig.GetMatrix() ), false ); int nGlobal = OldMatrix_->NumGlobalRows(); int n = OldMatrix_->NumMyRows(); // Check if the matrix is on one processor. int myMatProc = -1, matProc = -1; int myPID = OldMatrix_->Comm().MyPID(); int numProcs = OldMatrix_->Comm().NumProc(); const Epetra_BlockMap& oldRowMap = OldMatrix_->RowMap(); // Get some information about the parallel distribution. int maxMyRows = 0; std::vector<int> numGlobalElem( numProcs ); OldMatrix_->Comm().GatherAll(&n, &numGlobalElem[0], 1); OldMatrix_->Comm().MaxAll(&n, &maxMyRows, 1); for (int proc=0; proc<numProcs; proc++) { if (OldMatrix_->NumGlobalNonzeros() == OldMatrix_->NumMyNonzeros()) myMatProc = myPID; } OldMatrix_->Comm().MaxAll( &myMatProc, &matProc, 1 ); Teuchos::RCP<Epetra_CrsMatrix> serialMatrix; Teuchos::RCP<Epetra_Map> serialMap; if( oldRowMap.DistributedGlobal() && matProc == -1) { // The matrix is distributed and needs to be moved to processor zero. // Set the zero processor as the master. matProc = 0; serialMap = Teuchos::rcp( new Epetra_Map( Epetra_Util::Create_Root_Map( OldMatrix_->RowMap(), matProc ) ) ); Epetra_Import serialImporter( *serialMap, OldMatrix_->RowMap() ); serialMatrix = Teuchos::rcp( new Epetra_CrsMatrix( Copy, *serialMap, 0 ) ); serialMatrix->Import( *OldMatrix_, serialImporter, Insert ); serialMatrix->FillComplete(); } else { // The old matrix has already been moved to one processor (matProc). serialMatrix = OldMatrix_; } if( debug_ ) { cout << "Original (serial) Matrix:\n"; cout << *serialMatrix << endl; } // Obtain the current row and column orderings std::vector<int> origGlobalRows(nGlobal), origGlobalCols(nGlobal); serialMatrix->RowMap().MyGlobalElements( &origGlobalRows[0] ); serialMatrix->ColMap().MyGlobalElements( &origGlobalCols[0] ); // Perform reindexing on the full serial matrix (needed for BTF). Epetra_Map reIdxMap( serialMatrix->RowMap().NumGlobalElements(), serialMatrix->RowMap().NumMyElements(), 0, serialMatrix->Comm() ); Teuchos::RCP<EpetraExt::ViewTransform<Epetra_CrsMatrix> > reIdxTrans = Teuchos::rcp( new EpetraExt::CrsMatrix_Reindex( reIdxMap ) ); Epetra_CrsMatrix newSerialMatrix = (*reIdxTrans)( *serialMatrix ); reIdxTrans->fwd(); // Compute and apply BTF to the serial CrsMatrix and has been filtered by the threshold EpetraExt::AmesosBTF_CrsMatrix BTFTrans( threshold_, upperTri_, verbose_, debug_ ); Epetra_CrsMatrix newSerialMatrixBTF = BTFTrans( newSerialMatrix ); rowPerm_ = BTFTrans.RowPerm(); colPerm_ = BTFTrans.ColPerm(); blockPtr_ = BTFTrans.BlockPtr(); numBlocks_ = BTFTrans.NumBlocks(); if (myPID == matProc && verbose_) { bool isSym = true; for (int i=0; i<nGlobal; ++i) { if (rowPerm_[i] != colPerm_[i]) { isSym = false; break; } } std::cout << "The BTF permutation symmetry (0=false,1=true) is : " << isSym << std::endl; } // Compute the permutation w.r.t. the original row and column GIDs. std::vector<int> origGlobalRowsPerm(nGlobal), origGlobalColsPerm(nGlobal); if (myPID == matProc) { for (int i=0; i<nGlobal; ++i) { origGlobalRowsPerm[i] = origGlobalRows[ rowPerm_[i] ]; origGlobalColsPerm[i] = origGlobalCols[ colPerm_[i] ]; } } OldMatrix_->Comm().Broadcast( &origGlobalRowsPerm[0], nGlobal, matProc ); OldMatrix_->Comm().Broadcast( &origGlobalColsPerm[0], nGlobal, matProc ); // Generate the full serial matrix that imports according to the previously computed BTF. Epetra_CrsMatrix newSerialMatrixT( Copy, newSerialMatrixBTF.RowMap(), 0 ); newSerialMatrixT.Import( newSerialMatrix, *(BTFTrans.Importer()), Insert ); newSerialMatrixT.FillComplete(); if( debug_ ) { cout << "Original (serial) Matrix permuted via BTF:\n"; cout << newSerialMatrixT << endl; } // Perform reindexing on the full serial matrix (needed for balancing). Epetra_Map reIdxMap2( newSerialMatrixT.RowMap().NumGlobalElements(), newSerialMatrixT.RowMap().NumMyElements(), 0, newSerialMatrixT.Comm() ); Teuchos::RCP<EpetraExt::ViewTransform<Epetra_CrsMatrix> > reIdxTrans2 = Teuchos::rcp( new EpetraExt::CrsMatrix_Reindex( reIdxMap2 ) ); Epetra_CrsMatrix tNewSerialMatrixT = (*reIdxTrans2)( newSerialMatrixT ); reIdxTrans2->fwd(); Teuchos::RCP<Epetra_Map> balancedMap; if (balance_ == "linear") { // Distribute block somewhat evenly across processors std::vector<int> rowDist(numProcs+1,0); int balRows = nGlobal / numProcs + 1; int numRows = balRows, currProc = 1; for ( int i=0; i<numBlocks_ || currProc < numProcs; ++i ) { if (blockPtr_[i] > numRows) { rowDist[currProc++] = blockPtr_[i-1]; numRows = blockPtr_[i-1] + balRows; } } rowDist[numProcs] = nGlobal; // Create new Map based on this linear distribution. int numMyBalancedRows = rowDist[myPID+1]-rowDist[myPID]; NewRowMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, numMyBalancedRows, &origGlobalRowsPerm[ rowDist[myPID] ], 0, OldMatrix_->Comm() ) ); // Right now we do not explicitly build the column map and assume the BTF permutation is symmetric! //NewColMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, nGlobal, &colPerm_[0], 0, OldMatrix_->Comm() ) ); if ( verbose_ ) std::cout << "Processor " << myPID << " has " << numMyBalancedRows << " rows." << std::endl; //balancedMap = Teuchos::rcp( new Epetra_Map( nGlobal, numMyBalancedRows, 0, serialMatrix->Comm() ) ); } else if (balance_ == "isorropia") { // Compute block adjacency graph for partitioning. std::vector<double> weight; Teuchos::RCP<Epetra_CrsGraph> blkGraph; EpetraExt::BlockAdjacencyGraph adjGraph; blkGraph = adjGraph.compute( const_cast<Epetra_CrsGraph&>(tNewSerialMatrixT.Graph()), numBlocks_, blockPtr_, weight, verbose_); Epetra_Vector rowWeights( View, blkGraph->Map(), &weight[0] ); // Call Isorropia to rebalance this graph. Teuchos::RCP<Epetra_CrsGraph> balancedGraph = Isorropia::Epetra::create_balanced_copy( *blkGraph, rowWeights ); int myNumBlkRows = balancedGraph->NumMyRows(); //std::vector<int> myGlobalElements(nGlobal); std::vector<int> newRangeElements(nGlobal), newDomainElements(nGlobal); int grid = 0, myElements = 0; for (int i=0; i<myNumBlkRows; ++i) { grid = balancedGraph->GRID( i ); for (int j=blockPtr_[grid]; j<blockPtr_[grid+1]; ++j) { newRangeElements[myElements++] = origGlobalRowsPerm[j]; //myGlobalElements[myElements++] = j; } } NewRowMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, myElements, &newRangeElements[0], 0, OldMatrix_->Comm() ) ); // Right now we do not explicitly build the column map and assume the BTF permutation is symmetric! //NewColMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, nGlobal, &colPerm_[0], 0, OldMatrix_->Comm() ) ); //balancedMap = Teuchos::rcp( new Epetra_Map( nGlobal, myElements, &myGlobalElements[0], 0, serialMatrix->Comm() ) ); if ( verbose_ ) std::cout << "Processor " << myPID << " has " << myElements << " rows." << std::endl; } // Use New Domain and Range Maps to Generate Importer //for now, assume they start out as identical Epetra_Map OldRowMap = OldMatrix_->RowMap(); Epetra_Map OldColMap = OldMatrix_->ColMap(); if( debug_ ) { cout << "New Row Map\n"; cout << *NewRowMap_ << endl; //cout << "New Col Map\n"; //cout << *NewColMap_ << endl; } // Generate New Graph // NOTE: Right now we are creating the graph, assuming that the permutation is symmetric! // NewGraph_ = Teuchos::rcp( new Epetra_CrsGraph( Copy, *NewRowMap_, *NewColMap_, 0 ) ); NewGraph_ = Teuchos::rcp( new Epetra_CrsGraph( Copy, *NewRowMap_, 0 ) ); Importer_ = Teuchos::rcp( new Epetra_Import( *NewRowMap_, OldRowMap ) ); Importer2_ = Teuchos::rcp( new Epetra_Import( OldRowMap, *NewRowMap_ ) ); NewGraph_->Import( OldMatrix_->Graph(), *Importer_, Insert ); NewGraph_->FillComplete(); if( debug_ ) { cout << "NewGraph\n"; cout << *NewGraph_; } // Create new linear problem and import information from old linear problem NewMatrix_ = Teuchos::rcp( new Epetra_CrsMatrix( Copy, *NewGraph_ ) ); NewMatrix_->Import( *OldMatrix_, *Importer_, Insert ); NewMatrix_->FillComplete(); NewLHS_ = Teuchos::rcp( new Epetra_MultiVector( *NewRowMap_, OldLHS_->NumVectors() ) ); NewLHS_->Import( *OldLHS_, *Importer_, Insert ); NewRHS_ = Teuchos::rcp( new Epetra_MultiVector( *NewRowMap_, OldRHS_->NumVectors() ) ); NewRHS_->Import( *OldRHS_, *Importer_, Insert ); if( debug_ ) { cout << "New Matrix\n"; cout << *NewMatrix_ << endl; } newObj_ = new Epetra_LinearProblem( &*NewMatrix_, &*NewLHS_, &*NewRHS_ ); return *newObj_; }