void TpetraLinearObjFactory<Traits,ScalarT,LocalOrdinalT,GlobalOrdinalT,NodeT>:: ghostToGlobalTpetraMatrix(const Tpetra::CrsMatrix<ScalarT,LocalOrdinalT,GlobalOrdinalT,NodeT> & in, Tpetra::CrsMatrix<ScalarT,LocalOrdinalT,GlobalOrdinalT,NodeT> & out) const { using Teuchos::RCP; // do the global distribution RCP<ExportType> exporter = getGhostedExport(); out.resumeFill(); out.setAllToScalar(0.0); out.doExport(in,*exporter,Tpetra::ADD); out.fillComplete(); }
int main(int argc, char* argv[]){ Teuchos::oblackholestream blackhole; Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole); typedef double Scalar; typedef int Ordinal; using Tpetra::global_size_t; Teuchos::RCP<const Teuchos::Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform().getComm(); Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcp(&std::cout,false)); //out->setOutputToRootOnly(comm->getRank()); size_t myRank = comm->getRank(); size_t numProc = comm->getSize(); bool verbose = (myRank==0); std::cout << *comm; const global_size_t numGlobalElements = 4; if (numGlobalElements < numProc) { if (verbose) { std::cout << "numGlobalBlocks = " << numGlobalElements << " cannot be less than the number of processors = " << numProc << std::endl; } return -1; } // Construct a Map that puts approximately the same number of equations on each processor. Teuchos::RCP<const Tpetra::Map<Ordinal> > map = Tpetra::createUniformContigMap<Ordinal,Ordinal>(numGlobalElements, comm); // Get update list and number of local equations from newly created map. const size_t numMyElements = map->getNodeNumElements(); Teuchos::ArrayView<const Ordinal> myGlobalElements = map->getNodeElementList(); // Create an OTeger vector NumNz that is used to build the Petra Matrix. // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation // on this processor Teuchos::ArrayRCP<size_t> NumNz = Teuchos::arcp<size_t>(numMyElements); // We are building a tridiagonal matrix where each row has (-1 2 -1) // So we need 2 off-diagonal terms (except for the first and last equation) for (size_t i=0; i < numMyElements; ++i) { if (myGlobalElements[i] == 0 || static_cast<global_size_t>(myGlobalElements[i]) == numGlobalElements-1) { // boundary NumNz[i] = 2; } else { NumNz[i] = 3; } } // Create a Tpetra::Matrix using the Map, with a static allocation dictated by NumNz Tpetra::CrsMatrix<Scalar,Ordinal> A (map, NumNz, Tpetra::StaticProfile); Tpetra::CrsMatrix<Scalar,Ordinal> AT(map, NumNz, Tpetra::StaticProfile); Teuchos::RCP< Tpetra::CrsMatrix<Scalar,Ordinal> > TestMatrix = Teuchos::null; // We are done with NumNZ NumNz = Teuchos::null; // Add rows one-at-a-time // Off diagonal values will always be -1 const Scalar two = static_cast<Scalar>( 2.0); const Scalar negOne = static_cast<Scalar>(-1.0); const Scalar three = static_cast<Scalar>(3.0); for (size_t i=0; i<numMyElements; i++) { if (myGlobalElements[i] == 0) { A.insertGlobalValues( myGlobalElements[i], Teuchos::tuple<Ordinal>( myGlobalElements[i], myGlobalElements[i]+1 ), Teuchos::tuple<Scalar> ( two, negOne ) ); } else if (static_cast<global_size_t>(myGlobalElements[i]) == numGlobalElements-1) { A.insertGlobalValues( myGlobalElements[i], Teuchos::tuple<Ordinal>( myGlobalElements[i]-1, myGlobalElements[i] ), Teuchos::tuple<Scalar> ( negOne, two ) ); } else { A.insertGlobalValues( myGlobalElements[i], Teuchos::tuple<Ordinal>( myGlobalElements[i]-1, myGlobalElements[i], myGlobalElements[i]+1 ), Teuchos::tuple<Scalar> ( three, two, negOne ) ); } } for (size_t i=0; i<numMyElements; i++) { if (myGlobalElements[i] == 0) { AT.insertGlobalValues( myGlobalElements[i], Teuchos::tuple<Ordinal>( myGlobalElements[i], myGlobalElements[i]+1 ), Teuchos::tuple<Scalar> ( two, three ) ); } else if (static_cast<global_size_t>(myGlobalElements[i]) == numGlobalElements-1) { AT.insertGlobalValues( myGlobalElements[i], Teuchos::tuple<Ordinal>( myGlobalElements[i]-1, myGlobalElements[i] ), Teuchos::tuple<Scalar> ( negOne, two ) ); } else if(static_cast<global_size_t>(myGlobalElements[i])==1){ AT.insertGlobalValues( myGlobalElements[i], Teuchos::tuple<Ordinal>( myGlobalElements[i]-1, myGlobalElements[i], myGlobalElements[i]+1 ), Teuchos::tuple<Scalar> ( negOne, two, three ) ); } else if(static_cast<global_size_t>(myGlobalElements[i])==2){ AT.insertGlobalValues( myGlobalElements[i], Teuchos::tuple<Ordinal>( myGlobalElements[i]-1, myGlobalElements[i], myGlobalElements[i]+1 ), Teuchos::tuple<Scalar> ( negOne, two, negOne ) ); } } // Finish up A.fillComplete(); AT.fillComplete(); Tpetra::RowMatrixTransposer<Scalar, Ordinal> transposer (Teuchos::rcpFromRef (A)); TestMatrix = transposer.createTranspose(); //, TestMatrix/*, tMap*/); Teuchos::RCP<Tpetra::CrsMatrix<Scalar, Ordinal> > diffMatrix = Tpetra::createCrsMatrix<Scalar, Ordinal>(TestMatrix->getRowMap()); //Apparently there is a problem with ADD because while these two matricies are //identical when I add them together I don't get 0 like I should. Infact //I just get a matrix that has the exact same entries and sparsity structure. //I'll have to come back to this later. But RowMatrixTransposer is working right. //And all my other tests are telling me ADD works right too. //KLN 06/14/2011 Tpetra::MatrixMatrix::Add(AT,false,-1.0,*TestMatrix,false, 1.0,diffMatrix); diffMatrix->fillComplete(); //diffMatrix->describe(*out, Teuchos::VERB_EXTREME); double diffNorm = getNorm(*diffMatrix); double realNorm = getNorm(AT); double epsilon = diffNorm/realNorm; if(epsilon > 1e-10){ *out << "The calculated A transpose and the real one don't match!" << std::endl; *out << "Diff Norm: " << diffNorm << std::endl; *out << "Real Norm: " << realNorm << std::endl; *out << "Epsilon: " << epsilon << std::endl; return 1; } return 0; }
RCP<BlockCrsMatrix<Scalar, LO, GO, Node>> convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize) { /* ASSUMPTIONS: 1) In point matrix, all entries associated with a little block are present (even if they are zero). 2) For given mesh DOF, point DOFs appear consecutively and in ascending order in row & column maps. 3) Point column map and block column map are ordered consistently. */ using Teuchos::ArrayView; using Teuchos::Array; typedef Tpetra::Experimental::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type; typedef Tpetra::Map<LO,GO,Node> map_type; typedef Tpetra::CrsGraph<LO,GO,Node> crs_graph_type; const map_type &pointRowMap = *(pointMatrix.getRowMap()); RCP<const map_type> meshRowMap = createMeshMap<LO,GO,Node>(blockSize, pointRowMap); const map_type &pointColMap = *(pointMatrix.getColMap()); RCP<const map_type> meshColMap = createMeshMap<LO,GO,Node>(blockSize, pointColMap); const map_type &pointDomainMap = *(pointMatrix.getDomainMap()); RCP<const map_type> meshDomainMap = createMeshMap<LO,GO,Node>(blockSize, pointDomainMap); const map_type &pointRangeMap = *(pointMatrix.getRangeMap()); RCP<const map_type> meshRangeMap = createMeshMap<LO,GO,Node>(blockSize, pointRangeMap); // Use graph ctor that provides column map and upper bound on nonzeros per row. // We can use static profile because the point graph should have at least as many entries per // row as the mesh graph. RCP<crs_graph_type> meshCrsGraph = rcp(new crs_graph_type(meshRowMap, meshColMap, pointMatrix.getGlobalMaxNumRowEntries(), Tpetra::StaticProfile)); // Fill the graph by walking through the matrix. For each mesh row, we query the collection of point // rows associated with it. The point column ids are converted to mesh column ids and put into an array. // As each point row collection is finished, the mesh column ids are sorted, made unique, and inserted // into the mesh graph. ArrayView<const LO> pointColInds; ArrayView<const Scalar> pointVals; Array<GO> meshColGids; meshColGids.reserve(pointMatrix.getGlobalMaxNumRowEntries()); //again, I assume that point GIDs associated with a mesh GID are consecutive. //if they are not, this will break!! for (size_t i=0; i<pointMatrix.getNodeNumRows()/blockSize; i++) { for (int j=0; j<blockSize; ++j) { LO rowLid = i*blockSize+j; pointMatrix.getLocalRowView(rowLid,pointColInds,pointVals); //TODO optimization: Since I don't care about values, //TODO I should use the graph instead. for (int k=0; k<pointColInds.size(); ++k) { GO meshColInd = pointColMap.getGlobalElement(pointColInds[k]) / blockSize; meshColGids.push_back(meshColInd); } } //List of mesh GIDs probably contains duplicates because we looped over all point rows in the block. //Sort and make unique. std::sort(meshColGids.begin(), meshColGids.end()); meshColGids.erase( std::unique(meshColGids.begin(), meshColGids.end()), meshColGids.end() ); meshCrsGraph->insertGlobalIndices(meshRowMap->getGlobalElement(i), meshColGids()); meshColGids.clear(); } meshCrsGraph->fillComplete(meshDomainMap,meshRangeMap); //create and populate the block matrix RCP<block_crs_matrix_type> blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockSize)); //preallocate the maximum number of (dense) block entries needed by any row int maxBlockEntries = blockMatrix->getNodeMaxNumRowEntries(); Array<Array<Scalar>> blocks(maxBlockEntries); for (int i=0; i<maxBlockEntries; ++i) blocks[i].reserve(blockSize*blockSize); std::map<int,int> bcol2bentry; //maps block column index to dense block entries std::map<int,int>::iterator iter; //Fill the block matrix. We must do this in local index space. //TODO: Optimization: We assume the blocks are fully populated in the point matrix. This means //TODO: on the first point row in the block row, we know that we're hitting new block col indices. //TODO: on other rows, we know the block col indices have all been seen before //int offset; //if (pointMatrix.getIndexBase()) offset = 0; //else offset = 1; for (size_t i=0; i<pointMatrix.getNodeNumRows()/blockSize; i++) { int blkCnt=0; //how many unique block entries encountered so far in current block row for (int j=0; j<blockSize; ++j) { LO rowLid = i*blockSize+j; pointMatrix.getLocalRowView(rowLid,pointColInds,pointVals); for (int k=0; k<pointColInds.size(); ++k) { //convert point column to block col LO meshColInd = pointColInds[k] / blockSize; iter = bcol2bentry.find(meshColInd); if (iter == bcol2bentry.end()) { //new block column bcol2bentry[meshColInd] = blkCnt; blocks[blkCnt].push_back(pointVals[k]); blkCnt++; } else { //block column found previously int littleBlock = iter->second; blocks[littleBlock].push_back(pointVals[k]); } } } // TODO This inserts the blocks one block entry at a time. It is probably more efficient to // TODO store all the blocks in a block row contiguously so they can be inserted with a single call. for (iter=bcol2bentry.begin(); iter != bcol2bentry.end(); ++iter) { LO localBlockCol = iter->first; Scalar *vals = (blocks[iter->second]).getRawPtr(); blockMatrix->replaceLocalValues(i, &localBlockCol, vals, 1); } //Done with block row. Zero everything out. for (int j=0; j<maxBlockEntries; ++j) blocks[j].clear(); blkCnt = 0; bcol2bentry.clear(); } return blockMatrix; }
void Redistributor<Node>::redistribute(const ::Tpetra::CrsMatrix<double,int,int,Node>& inputMatrix, ::Tpetra::CrsMatrix<double,int,int,Node> * &outputMatrix, bool callFillComplete) { if (!created_importer_) { create_importer(inputMatrix.RowMap()); } // First obtain the length of each of my new rows int myOldRows = inputMatrix.NumMyRows(); int myNewRows = target_map_->NumMyElements(); double *nnz = new double [myOldRows]; for (int i=0; i < myOldRows; i++){ nnz[i] = inputMatrix.NumMyEntries(i); } Teuchos::ArrayView<double>::ArrayView myNNZView(nnz, myOldRows); ::Tpetra::Vector<double,int,int,Node> oldRowSizes(inputMatrix.RowMap(), myNNZView); if (myOldRows) delete [] nnz; ::Tpetra::Vector<double,int,int,Node> newRowSizes(*target_map_); newRowSizes.Import(oldRowSizes, *importer_, ::Tpetra::INSERT); int *rowSize=0; if(myNewRows){ rowSize = new int [myNewRows]; for (int i=0; i< myNewRows; i++){ rowSize[i] = static_cast<int>(newRowSizes[i]); } } Teuchos::ArrayView<int>::ArrayView rowSizeView(rowSize, myNewRows); // Receive new rows, send old rows outputMatrix = new ::Tpetra::CrsMatrix<double,int,int,Node> (*target_map_, rowSizeView, true); if (myNewRows) delete [] rowSize; outputMatrix->Import(inputMatrix, *importer_, ::Tpetra::INSERT); // Set the new domain map such that // (a) if old DomainMap == old RangeMap, preserve this property, // (b) otherwise, let the new DomainMap be the old DomainMap const ::Tpetra::Map<int,int,Node> *newDomainMap; if (inputMatrix.DomainMap().SameAs(inputMatrix.RangeMap())) newDomainMap = &(outputMatrix->RangeMap()); else newDomainMap = &(inputMatrix.DomainMap()); if (callFillComplete && (!outputMatrix->Filled())) outputMatrix->FillComplete(*newDomainMap, *target_map_); return; }