void 
TpetraLinearObjFactory<Traits,ScalarT,LocalOrdinalT,GlobalOrdinalT,NodeT>::
ghostToGlobalTpetraMatrix(const Tpetra::CrsMatrix<ScalarT,LocalOrdinalT,GlobalOrdinalT,NodeT> & in,
                          Tpetra::CrsMatrix<ScalarT,LocalOrdinalT,GlobalOrdinalT,NodeT> & out) const
{
   using Teuchos::RCP;

   // do the global distribution
   RCP<ExportType> exporter = getGhostedExport();
   
   out.resumeFill();
   out.setAllToScalar(0.0);
   out.doExport(in,*exporter,Tpetra::ADD);
   out.fillComplete();
}
Example #2
0
int main(int argc, char* argv[]){
        Teuchos::oblackholestream blackhole;
        Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);
        typedef double Scalar;
        typedef int Ordinal;
        using Tpetra::global_size_t;

        Teuchos::RCP<const Teuchos::Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform().getComm();
  Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcp(&std::cout,false));
  //out->setOutputToRootOnly(comm->getRank());

        size_t myRank = comm->getRank();
        size_t numProc = comm->getSize();
        bool verbose = (myRank==0);

        std::cout << *comm;

        const global_size_t numGlobalElements = 4;
        if (numGlobalElements < numProc) {
                if (verbose) {
                        std::cout << "numGlobalBlocks = " << numGlobalElements
                        << " cannot be less than the number of processors = " << numProc << std::endl;
                }
                return -1;
        }

        // Construct a Map that puts approximately the same number of equations on each processor.

        Teuchos::RCP<const Tpetra::Map<Ordinal> > map = Tpetra::createUniformContigMap<Ordinal,Ordinal>(numGlobalElements, comm);

        // Get update list and number of local equations from newly created map.

        const size_t numMyElements = map->getNodeNumElements();

        Teuchos::ArrayView<const Ordinal> myGlobalElements = map->getNodeElementList();

        // Create an OTeger vector NumNz that is used to build the Petra Matrix.
        // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation
        // on this processor

        Teuchos::ArrayRCP<size_t> NumNz = Teuchos::arcp<size_t>(numMyElements);

        // We are building a tridiagonal matrix where each row has (-1 2 -1)
        // So we need 2 off-diagonal terms (except for the first and last equation)

        for (size_t i=0; i < numMyElements; ++i) {
                if (myGlobalElements[i] == 0 || static_cast<global_size_t>(myGlobalElements[i]) == numGlobalElements-1) {
                // boundary
                        NumNz[i] = 2;
                }
                else {
                        NumNz[i] = 3;
                }
        }

        // Create a Tpetra::Matrix using the Map, with a static allocation dictated by NumNz
        Tpetra::CrsMatrix<Scalar,Ordinal>  A (map, NumNz, Tpetra::StaticProfile);
        Tpetra::CrsMatrix<Scalar,Ordinal>  AT(map, NumNz, Tpetra::StaticProfile);
        Teuchos::RCP< Tpetra::CrsMatrix<Scalar,Ordinal> > TestMatrix = Teuchos::null;

        // We are done with NumNZ
        NumNz = Teuchos::null;

        // Add  rows one-at-a-time
        // Off diagonal values will always be -1
        const Scalar two    = static_cast<Scalar>( 2.0);
        const Scalar negOne = static_cast<Scalar>(-1.0);
        const Scalar three = static_cast<Scalar>(3.0);
        for (size_t i=0; i<numMyElements; i++) {
                if (myGlobalElements[i] == 0) {
                        A.insertGlobalValues( myGlobalElements[i],
                        Teuchos::tuple<Ordinal>( myGlobalElements[i], myGlobalElements[i]+1 ),
                        Teuchos::tuple<Scalar> ( two, negOne ) );
                }
                else if (static_cast<global_size_t>(myGlobalElements[i]) == numGlobalElements-1) {
                        A.insertGlobalValues( myGlobalElements[i],
                        Teuchos::tuple<Ordinal>( myGlobalElements[i]-1, myGlobalElements[i] ),
                        Teuchos::tuple<Scalar> ( negOne, two ) );
                }
                else {
                        A.insertGlobalValues( myGlobalElements[i],
                        Teuchos::tuple<Ordinal>( myGlobalElements[i]-1, myGlobalElements[i], myGlobalElements[i]+1 ),
                        Teuchos::tuple<Scalar> ( three, two, negOne ) );
                }
        }


        for (size_t i=0; i<numMyElements; i++) {
                if (myGlobalElements[i] == 0) {
                        AT.insertGlobalValues( myGlobalElements[i],
                        Teuchos::tuple<Ordinal>( myGlobalElements[i], myGlobalElements[i]+1 ),
                        Teuchos::tuple<Scalar> ( two, three ) );
                }
                else if (static_cast<global_size_t>(myGlobalElements[i]) == numGlobalElements-1) {
                        AT.insertGlobalValues( myGlobalElements[i],
                        Teuchos::tuple<Ordinal>( myGlobalElements[i]-1, myGlobalElements[i] ),
                        Teuchos::tuple<Scalar> ( negOne, two ) );
                }
                else if(static_cast<global_size_t>(myGlobalElements[i])==1){
                        AT.insertGlobalValues( myGlobalElements[i],
                        Teuchos::tuple<Ordinal>( myGlobalElements[i]-1, myGlobalElements[i], myGlobalElements[i]+1 ),
                        Teuchos::tuple<Scalar> ( negOne, two, three ) );
                }
                else if(static_cast<global_size_t>(myGlobalElements[i])==2){
                        AT.insertGlobalValues( myGlobalElements[i],
                        Teuchos::tuple<Ordinal>( myGlobalElements[i]-1, myGlobalElements[i], myGlobalElements[i]+1 ),
                        Teuchos::tuple<Scalar> ( negOne, two, negOne ) );
                }
        }

        // Finish up
        A.fillComplete();
        AT.fillComplete();


        Tpetra::RowMatrixTransposer<Scalar, Ordinal> transposer (Teuchos::rcpFromRef (A));
        TestMatrix = transposer.createTranspose(); //, TestMatrix/*, tMap*/);

  Teuchos::RCP<Tpetra::CrsMatrix<Scalar, Ordinal> > diffMatrix = Tpetra::createCrsMatrix<Scalar, Ordinal>(TestMatrix->getRowMap());
  //Apparently there is a problem with ADD because while these two matricies are
  //identical when I add them together I don't get 0 like I should. Infact
  //I just get a matrix that has the exact same entries and sparsity structure.
  //I'll have to come back to this later. But RowMatrixTransposer is working right.
  //And all my other tests are telling me ADD works right too.
  //KLN 06/14/2011
  Tpetra::MatrixMatrix::Add(AT,false,-1.0,*TestMatrix,false, 1.0,diffMatrix);
  diffMatrix->fillComplete();
  //diffMatrix->describe(*out, Teuchos::VERB_EXTREME);
  double diffNorm = getNorm(*diffMatrix);
  double realNorm = getNorm(AT);
  double epsilon = diffNorm/realNorm;
  if(epsilon > 1e-10){
    *out << "The calculated A transpose and the real one don't match!" << std::endl;
    *out << "Diff Norm: " << diffNorm << std::endl;
    *out << "Real Norm: " << realNorm << std::endl;
    *out << "Epsilon: " << epsilon << std::endl;
    return 1;
  }




        return 0;
}
  RCP<BlockCrsMatrix<Scalar, LO, GO, Node>>
  convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize)
  {

      /*
        ASSUMPTIONS:

           1) In point matrix, all entries associated with a little block are present (even if they are zero).
           2) For given mesh DOF, point DOFs appear consecutively and in ascending order in row & column maps.
           3) Point column map and block column map are ordered consistently.
      */

      using Teuchos::ArrayView;
      using Teuchos::Array;

      typedef Tpetra::Experimental::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
      typedef Tpetra::Map<LO,GO,Node>                                 map_type;
      typedef Tpetra::CrsGraph<LO,GO,Node>                            crs_graph_type;

      const map_type &pointRowMap = *(pointMatrix.getRowMap());
      RCP<const map_type> meshRowMap = createMeshMap<LO,GO,Node>(blockSize, pointRowMap);

      const map_type &pointColMap = *(pointMatrix.getColMap());
      RCP<const map_type> meshColMap = createMeshMap<LO,GO,Node>(blockSize, pointColMap);

      const map_type &pointDomainMap = *(pointMatrix.getDomainMap());
      RCP<const map_type> meshDomainMap = createMeshMap<LO,GO,Node>(blockSize, pointDomainMap);

      const map_type &pointRangeMap = *(pointMatrix.getRangeMap());
      RCP<const map_type> meshRangeMap = createMeshMap<LO,GO,Node>(blockSize, pointRangeMap);

      // Use graph ctor that provides column map and upper bound on nonzeros per row.
      // We can use static profile because the point graph should have at least as many entries per
      // row as the mesh graph.
      RCP<crs_graph_type> meshCrsGraph = rcp(new crs_graph_type(meshRowMap, meshColMap,
                                                 pointMatrix.getGlobalMaxNumRowEntries(), Tpetra::StaticProfile));
      // Fill the graph by walking through the matrix.  For each mesh row, we query the collection of point
      // rows associated with it. The point column ids are converted to mesh column ids and put into an array.
      // As each point row collection is finished, the mesh column ids are sorted, made unique, and inserted
      // into the mesh graph.
      ArrayView<const LO> pointColInds;
      ArrayView<const Scalar> pointVals;
      Array<GO> meshColGids;
      meshColGids.reserve(pointMatrix.getGlobalMaxNumRowEntries());
      //again, I assume that point GIDs associated with a mesh GID are consecutive.
      //if they are not, this will break!!
      for (size_t i=0; i<pointMatrix.getNodeNumRows()/blockSize; i++) {
        for (int j=0; j<blockSize; ++j) {
          LO rowLid = i*blockSize+j;
          pointMatrix.getLocalRowView(rowLid,pointColInds,pointVals); //TODO optimization: Since I don't care about values,
                                                                      //TODO I should use the graph instead.
          for (int k=0; k<pointColInds.size(); ++k) {
            GO meshColInd = pointColMap.getGlobalElement(pointColInds[k]) / blockSize;
            meshColGids.push_back(meshColInd);
          }
        }
        //List of mesh GIDs probably contains duplicates because we looped over all point rows in the block.
        //Sort and make unique.
        std::sort(meshColGids.begin(), meshColGids.end());
        meshColGids.erase( std::unique(meshColGids.begin(), meshColGids.end()), meshColGids.end() );
        meshCrsGraph->insertGlobalIndices(meshRowMap->getGlobalElement(i), meshColGids());
        meshColGids.clear();
      }
      meshCrsGraph->fillComplete(meshDomainMap,meshRangeMap);

      //create and populate the block matrix
      RCP<block_crs_matrix_type> blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockSize));

      //preallocate the maximum number of (dense) block entries needed by any row
      int maxBlockEntries = blockMatrix->getNodeMaxNumRowEntries();
      Array<Array<Scalar>> blocks(maxBlockEntries);
      for (int i=0; i<maxBlockEntries; ++i)
        blocks[i].reserve(blockSize*blockSize);
      std::map<int,int> bcol2bentry;             //maps block column index to dense block entries
      std::map<int,int>::iterator iter;
      //Fill the block matrix.  We must do this in local index space.
      //TODO: Optimization: We assume the blocks are fully populated in the point matrix.  This means
      //TODO: on the first point row in the block row, we know that we're hitting new block col indices.
      //TODO: on other rows, we know the block col indices have all been seen before
      //int offset;
      //if (pointMatrix.getIndexBase()) offset = 0;
      //else                     offset = 1;
      for (size_t i=0; i<pointMatrix.getNodeNumRows()/blockSize; i++) {
        int blkCnt=0; //how many unique block entries encountered so far in current block row
        for (int j=0; j<blockSize; ++j) {
          LO rowLid = i*blockSize+j;
          pointMatrix.getLocalRowView(rowLid,pointColInds,pointVals);
          for (int k=0; k<pointColInds.size(); ++k) {
            //convert point column to block col
            LO meshColInd = pointColInds[k] / blockSize;
            iter = bcol2bentry.find(meshColInd);
            if (iter == bcol2bentry.end()) {
              //new block column
              bcol2bentry[meshColInd] = blkCnt;
              blocks[blkCnt].push_back(pointVals[k]);
              blkCnt++;
            } else {
              //block column found previously
              int littleBlock = iter->second;
              blocks[littleBlock].push_back(pointVals[k]);
            }
          }
        }
        // TODO This inserts the blocks one block entry at a time.  It is probably more efficient to
        // TODO store all the blocks in a block row contiguously so they can be inserted with a single call.
        for (iter=bcol2bentry.begin(); iter != bcol2bentry.end(); ++iter) {
          LO localBlockCol = iter->first;
          Scalar *vals = (blocks[iter->second]).getRawPtr();
          blockMatrix->replaceLocalValues(i, &localBlockCol, vals, 1);
        }

        //Done with block row.  Zero everything out.
        for (int j=0; j<maxBlockEntries; ++j)
          blocks[j].clear();
        blkCnt = 0;
        bcol2bentry.clear();
      }

      return blockMatrix;

  }
void 
Redistributor<Node>::redistribute(const ::Tpetra::CrsMatrix<double,int,int,Node>& inputMatrix, 
                                  ::Tpetra::CrsMatrix<double,int,int,Node> * &outputMatrix, bool callFillComplete)
{
  if (!created_importer_) {
    create_importer(inputMatrix.RowMap());
  }

  // First obtain the length of each of my new rows

  int myOldRows = inputMatrix.NumMyRows();
  int myNewRows = target_map_->NumMyElements();

  double *nnz = new double [myOldRows];
  for (int i=0; i < myOldRows; i++){
    nnz[i] = inputMatrix.NumMyEntries(i);
  }

  Teuchos::ArrayView<double>::ArrayView   myNNZView(nnz, myOldRows);

  ::Tpetra::Vector<double,int,int,Node> oldRowSizes(inputMatrix.RowMap(), myNNZView);

  if (myOldRows)
    delete [] nnz;

  ::Tpetra::Vector<double,int,int,Node> newRowSizes(*target_map_);

  newRowSizes.Import(oldRowSizes, *importer_, ::Tpetra::INSERT);

  int *rowSize=0;
  if(myNewRows){
    rowSize = new int [myNewRows];
    for (int i=0; i< myNewRows; i++){
      rowSize[i] = static_cast<int>(newRowSizes[i]);
    }
  }

  Teuchos::ArrayView<int>::ArrayView rowSizeView(rowSize, myNewRows);

  // Receive new rows, send old rows

  outputMatrix = new ::Tpetra::CrsMatrix<double,int,int,Node> (*target_map_, rowSizeView, true);

  if (myNewRows)
    delete [] rowSize;

  outputMatrix->Import(inputMatrix, *importer_, ::Tpetra::INSERT);

  // Set the new domain map such that
  // (a) if old DomainMap == old RangeMap, preserve this property,
  // (b) otherwise, let the new DomainMap be the old DomainMap 
  const ::Tpetra::Map<int,int,Node> *newDomainMap;
  if (inputMatrix.DomainMap().SameAs(inputMatrix.RangeMap()))
     newDomainMap = &(outputMatrix->RangeMap());
  else
     newDomainMap = &(inputMatrix.DomainMap());

  if (callFillComplete && (!outputMatrix->Filled()))
    outputMatrix->FillComplete(*newDomainMap,  *target_map_);

  return;
}