Пример #1
0
int Drumm2(const Epetra_Map& map, bool verbose)
{
  //Simple 2-element problem (element as in "finite-element") from
  //Clif Drumm. Two triangular elements, one per processor, as shown
  //here:
  //
  //   *----*
  //  3|\  2|
  //   | \  |
  //   | 0\1|
  //   |   \|
  //   *----*
  //  0    1
  //
  //Element 0 on processor 0, element 1 on processor 1.
  //Processor 0 will own nodes 0,1,3 and processor 1 will own node 2.
  //Each processor will pass a 3x3 element-matrix to Epetra_FECrsMatrix.
  //After GlobalAssemble(), the matrix should be as follows:
  //
  //         row 0: 2  1  0  1
  //proc 0   row 1: 1  4  1  2
  //         row 2: 0  1  2  1
  //----------------------------------
  //proc 1   row 3: 1  2  1  4
  //

  int numProcs = map.Comm().NumProc();
  int localProc = map.Comm().MyPID();

  if (numProcs != 2) return(0);

  int indexBase = 0, ierr = 0;

  double* values = new double[9];
  values[0] = 2.0;
  values[1] = 1.0;
  values[2] = 1.0;
  values[3] = 1.0;
  values[4] = 2.0;
  values[5] = 1.0;
  values[6] = 1.0;
  values[7] = 1.0;
  values[8] = 2.0;

  if (localProc == 0) {
    int numMyNodes = 3;
    int* myNodes = new int[numMyNodes];
    myNodes[0] = 0;
    myNodes[1] = 1;
    myNodes[2] = 3;

    Epetra_Map Map(-1, numMyNodes, myNodes, indexBase, map.Comm());

    int rowLengths = 3;
    Epetra_FECrsMatrix A(Copy, Map, rowLengths);

    EPETRA_TEST_ERR( A.InsertGlobalValues(numMyNodes, myNodes,
			  numMyNodes, myNodes,
			  values, Epetra_FECrsMatrix::ROW_MAJOR),ierr);

    EPETRA_TEST_ERR( A.GlobalAssemble(), ierr );
    EPETRA_TEST_ERR( A.GlobalAssemble(), ierr );

    if (verbose) {
    A.Print(cout);
    }

    //now let's make sure we can do a matvec with this matrix.
    Epetra_Vector x(Map), y(Map);
    x.PutScalar(1.0);
    EPETRA_TEST_ERR( A.Multiply(false, x, y), ierr);

    if (verbose&&localProc==0) {
      cout << "y = A*x, x=1.0's"<<endl;
    }

    if (verbose) {
    y.Print(cout);
    }

    delete [] myNodes;
    delete [] values;
  }
  else {
    int numMyNodes = 1;
    int* myNodes = new int[numMyNodes];
    myNodes[0] = 2;

    Epetra_Map Map(-1, numMyNodes, myNodes, indexBase, map.Comm());

    int rowLengths = 3;
    Epetra_FECrsMatrix A(Copy, Map, rowLengths);

    delete [] myNodes;
    numMyNodes = 3;
    myNodes = new int[numMyNodes];
    myNodes[0] = 1;
    myNodes[1] = 2;
    myNodes[2] = 3;

    EPETRA_TEST_ERR( A.InsertGlobalValues(numMyNodes, myNodes,
			  numMyNodes, myNodes,
			  values, Epetra_FECrsMatrix::ROW_MAJOR),ierr);

    EPETRA_TEST_ERR( A.GlobalAssemble(), ierr );
    EPETRA_TEST_ERR( A.GlobalAssemble(), ierr );

    if (verbose) {
    A.Print(cout);
    }

    //now let's make sure we can do a matvec with this matrix.
    Epetra_Vector x(Map), y(Map);
    x.PutScalar(1.0);
    EPETRA_TEST_ERR( A.Multiply(false, x, y), ierr);

    if (verbose) {
    y.Print(cout);
    }

    delete [] myNodes;
    delete [] values;
  }

  return(0);
}
Пример #2
0
//=============================================================================
Epetra_Map * Epetra_Map::RemoveEmptyProcesses() const
{
#ifdef HAVE_MPI
  const Epetra_MpiComm * MpiComm = dynamic_cast<const Epetra_MpiComm*>(&Comm());

  // If the Comm isn't MPI, just treat this as a copy constructor
  if(!MpiComm) return new Epetra_Map(*this);

  MPI_Comm NewComm,MyMPIComm = MpiComm->Comm();

  // Create the new communicator.  MPI_Comm_split returns a valid
  // communicator on all processes.  On processes where color == MPI_UNDEFINED,
  // ignore the result.  Passing key == 0 tells MPI to order the
  // processes in the new communicator by their rank in the old
  // communicator.
  const int color = (NumMyElements() == 0) ? MPI_UNDEFINED : 1;

  // MPI_Comm_split must be called collectively over the original
  // communicator.  We can't just call it on processes with color
  // one, even though we will ignore its result on processes with
  // color zero.
  int rv = MPI_Comm_split(MyMPIComm,color,0,&NewComm);
  if(rv!=MPI_SUCCESS) throw ReportError("Epetra_Map::RemoveEmptyProcesses: MPI_Comm_split failed.",-1);

  if(color == MPI_UNDEFINED)
    return 0; // We're not in the new map
  else {
    Epetra_MpiComm * NewEpetraComm = new Epetra_MpiComm(NewComm);

    // Use the copy constructor for a new map, but basically because it does nothing useful
    Epetra_Map * NewMap = new Epetra_Map(*this);

    // Get rid of the old BlockMapData, now make a new one from scratch...
    NewMap->CleanupData();
    if(GlobalIndicesInt()) {
#ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
      NewMap->BlockMapData_ = new Epetra_BlockMapData(NumGlobalElements(),0,IndexBase(),*NewEpetraComm,false);
#endif
    }
    else {
#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
      NewMap->BlockMapData_ = new Epetra_BlockMapData(NumGlobalElements64(),0,IndexBase64(),*NewEpetraComm,true);
#endif
    }

    // Now copy all of the relevent bits of BlockMapData...
    //    NewMap->BlockMapData_->Comm_                    = NewEpetraComm;
    NewMap->BlockMapData_->LID_                     = BlockMapData_->LID_;
#ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
    NewMap->BlockMapData_->MyGlobalElements_int_    = BlockMapData_->MyGlobalElements_int_;
#endif
#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
    NewMap->BlockMapData_->MyGlobalElements_LL_     = BlockMapData_->MyGlobalElements_LL_;
#endif
    NewMap->BlockMapData_->FirstPointInElementList_ = BlockMapData_->FirstPointInElementList_;
    NewMap->BlockMapData_->ElementSizeList_         = BlockMapData_->ElementSizeList_;
    NewMap->BlockMapData_->PointToElementList_      = BlockMapData_->PointToElementList_;

    NewMap->BlockMapData_->NumGlobalElements_       = BlockMapData_->NumGlobalElements_;
    NewMap->BlockMapData_->NumMyElements_           = BlockMapData_->NumMyElements_;
    NewMap->BlockMapData_->IndexBase_               = BlockMapData_->IndexBase_;
    NewMap->BlockMapData_->ElementSize_             = BlockMapData_->ElementSize_;
    NewMap->BlockMapData_->MinMyElementSize_        = BlockMapData_->MinMyElementSize_;
    NewMap->BlockMapData_->MaxMyElementSize_        = BlockMapData_->MaxMyElementSize_;
    NewMap->BlockMapData_->MinElementSize_          = BlockMapData_->MinElementSize_;
    NewMap->BlockMapData_->MaxElementSize_          = BlockMapData_->MaxElementSize_;
    NewMap->BlockMapData_->MinAllGID_               = BlockMapData_->MinAllGID_;
    NewMap->BlockMapData_->MaxAllGID_               = BlockMapData_->MaxAllGID_;
    NewMap->BlockMapData_->MinMyGID_                = BlockMapData_->MinMyGID_;
    NewMap->BlockMapData_->MaxMyGID_                = BlockMapData_->MaxMyGID_;
    NewMap->BlockMapData_->MinLID_                  = BlockMapData_->MinLID_;
    NewMap->BlockMapData_->MaxLID_                  = BlockMapData_->MaxLID_;
    NewMap->BlockMapData_->NumGlobalPoints_         = BlockMapData_->NumGlobalPoints_;
    NewMap->BlockMapData_->NumMyPoints_             = BlockMapData_->NumMyPoints_;
    NewMap->BlockMapData_->ConstantElementSize_     = BlockMapData_->ConstantElementSize_;
    NewMap->BlockMapData_->LinearMap_               = BlockMapData_->LinearMap_;
    NewMap->BlockMapData_->DistributedGlobal_       = NewEpetraComm->NumProc()==1 ? false : BlockMapData_->DistributedGlobal_;
    NewMap->BlockMapData_->OneToOneIsDetermined_    = BlockMapData_->OneToOneIsDetermined_;
    NewMap->BlockMapData_->OneToOne_                = BlockMapData_->OneToOne_;
    NewMap->BlockMapData_->GlobalIndicesInt_        = BlockMapData_->GlobalIndicesInt_;
    NewMap->BlockMapData_->GlobalIndicesLongLong_   = BlockMapData_->GlobalIndicesLongLong_;
    NewMap->BlockMapData_->LastContiguousGID_       = BlockMapData_->LastContiguousGID_;
    NewMap->BlockMapData_->LastContiguousGIDLoc_    = BlockMapData_->LastContiguousGIDLoc_;
    NewMap->BlockMapData_->LIDHash_                 = BlockMapData_->LIDHash_ ? new Epetra_HashTable<int>(*BlockMapData_->LIDHash_) : 0;

    // Delay directory construction
    NewMap->BlockMapData_->Directory_               = 0;

    // Cleanup
    delete NewEpetraComm;

    return NewMap;
  }
#else
    // MPI isn't compiled, so just treat this as a copy constructor
    return new Epetra_Map(*this);
#endif
}
Пример #3
0
int main(int argc, char *argv[]) {

  using Teuchos::RCP; // reference count pointers
  using Teuchos::rcp; //

  //
  // MPI initialization using Teuchos
  //

#ifdef HAVE_MPI
  MPI_Init(&argc, &argv);
  Epetra_MpiComm comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm comm;
#endif

  //
  // Parameters
  //

  GlobalOrdinal numGlobalElements = 256; // problem size

  //
  // Construct the problem
  //

  // Construct a Map that puts approximately the same number of equations on each processor
  const Epetra_Map map(numGlobalElements, 0, comm);

  // Get update list and number of local equations from newly created map.
  const size_t         numMyElements    = map.NumMyElements();
  const GlobalOrdinal* myGlobalElements = map.MyGlobalElements();

  // Create a CrsMatrix using the map, with a dynamic allocation of 3 entries per row
  RCP<Epetra_CrsMatrix> A = rcp(new Epetra_CrsMatrix(Copy, map, 3));

  // Add rows one-at-a-time
  for (size_t i = 0; i < numMyElements; i++) {
    if (myGlobalElements[i] == 0) {

      //TODO: should be rewritten in an Epetra style
      A->InsertGlobalValues(myGlobalElements[i], 2,
                            Teuchos::tuple<Scalar> (2.0, -1.0).getRawPtr(),
                            Teuchos::tuple<GlobalOrdinal>(myGlobalElements[i], myGlobalElements[i] +1).getRawPtr());

    }
    else if (myGlobalElements[i] == numGlobalElements - 1) {
      A->InsertGlobalValues(myGlobalElements[i], 2,
                            Teuchos::tuple<Scalar> (-1.0, 2.0).getRawPtr(),
                            Teuchos::tuple<GlobalOrdinal>(myGlobalElements[i] -1, myGlobalElements[i]).getRawPtr());
    }
    else {
      A->InsertGlobalValues(myGlobalElements[i], 3,
                            Teuchos::tuple<Scalar> (-1.0, 2.0, -1.0).getRawPtr(),
                            Teuchos::tuple<GlobalOrdinal>(myGlobalElements[i] -1, myGlobalElements[i], myGlobalElements[i] +1).getRawPtr());
    }
  }

  // Complete the fill, ask that storage be reallocated and optimized
  A->FillComplete();

  //
  // Construct a multigrid preconditioner
  //

  // Turns a Epetra_CrsMatrix into a MueLu::Matrix
  RCP<Xpetra::CrsMatrix<SC, LO, GO, NO, LMO> > mueluA_ = rcp(new Xpetra::EpetraCrsMatrix(A)); //TODO: should not be needed
  RCP<Xpetra::Matrix <SC, LO, GO, NO, LMO> > mueluA  = rcp(new Xpetra::CrsMatrixWrap<SC, LO, GO, NO, LMO>(mueluA_));

  // Multigrid Hierarchy
  RCP<Hierarchy> H = rcp(new Hierarchy(mueluA));
  H->setVerbLevel(Teuchos::VERB_HIGH);

  // Multigrid setup phase (using default parameters)
  H->Setup();

  //
  // Define RHS / LHS
  //

  RCP<Epetra_Vector> X = rcp(new Epetra_Vector(map));
  RCP<Epetra_Vector> B = rcp(new Epetra_Vector(map));

  X->PutScalar((Scalar) 0.0);
  B->SetSeed(846930886); B->Random();

#ifndef HAVE_MUELU_BELOS

  //
  // Use AMG directly as an iterative solver (not as a preconditionner)
  //

  int nIts = 9;

  // Wrap Epetra Vectors into Xpetra Vectors
  RCP<Vector> mueluX = rcp(new Xpetra::EpetraVector(X));
  RCP<Vector> mueluB = rcp(new Xpetra::EpetraVector(B));

  H->Iterate(*mueluB, nIts, *mueluX);

  // Print relative residual norm
  ST::magnitudeType residualNorms = Utils::ResidualNorm(*mueluA, *mueluX, *mueluB)[0];
  if (comm.MyPID() == 0)
    std::cout << "||Residual|| = " << residualNorms << std::endl;

#else // HAVE_MUELU_BELOS

  //
  // Solve Ax = b using AMG as a preconditioner in Belos
  //

  // Matrix and Multivector type that will be used with Belos
  typedef Epetra_MultiVector   MV;
  typedef Belos::OperatorT<MV> OP;

  // Define Operator and Preconditioner
  RCP<OP> belosOp   = rcp(new Belos::XpetraOp<SC, LO, GO, NO, LMO>(mueluA)); // Turns a Xpetra::Matrix object into a Belos operator
  RCP<OP> belosPrec = rcp(new Belos::MueLuOp<SC, LO, GO, NO, LMO>(H));       // Turns a MueLu::Hierarchy object into a Belos operator

  // Construct a Belos LinearProblem object
  RCP< Belos::LinearProblem<SC, MV, OP> > belosProblem = rcp(new Belos::LinearProblem<SC, MV, OP>(belosOp, X, B));
  belosProblem->setLeftPrec(belosPrec);

  bool set = belosProblem->setProblem();
  if (set == false) {
    std::cout << std::endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
    return EXIT_FAILURE;
  }

  // Belos parameter list
  int maxIts = 20;
  double tol = 1e-4;
  Teuchos::ParameterList belosList;
  belosList.set("Maximum Iterations",    maxIts); // Maximum number of iterations allowed
  belosList.set("Convergence Tolerance", tol);    // Relative convergence tolerance requested
  belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::TimingDetails + Belos::StatusTestDetails);

  // Create an iterative solver manager
  RCP< Belos::SolverManager<SC, MV, OP> > solver = rcp(new Belos::BlockCGSolMgr<SC, MV, OP>(belosProblem, rcp(&belosList, false)));

  // Perform solve
  Belos::ReturnType ret = solver->solve();

  // Get the number of iterations for this solve.
  std::cout << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl;

  // Compute actual residuals.
  int numrhs=1;
  bool badRes = false;
  std::vector<SC> actual_resids(numrhs);
  std::vector<SC> rhs_norm(numrhs);
  RCP<Epetra_MultiVector > resid = rcp(new Epetra_MultiVector(map, numrhs));

  typedef Belos::OperatorTraits<SC, MV, OP> OPT;
  typedef Belos::MultiVecTraits<SC, MV>     MVT;

  OPT::Apply(*belosOp, *X, *resid);
  MVT::MvAddMv(-1.0, *resid, 1.0, *B, *resid);
  MVT::MvNorm(*resid, actual_resids);
  MVT::MvNorm(*B, rhs_norm);
  std::cout<< "---------- Actual Residuals (normalized) ----------"<<std::endl<<std::endl;
  for (int i = 0; i < numrhs; i++) {
    SC actRes = actual_resids[i]/rhs_norm[i];
    std::cout <<"Problem " << i << " : \t" << actRes <<std::endl;
    if (actRes > tol) { badRes = true; }
  }

  // Check convergence
  if (ret != Belos::Converged || badRes) {
    std::cout << std::endl << "ERROR:  Belos did not converge! " << std::endl;
    return EXIT_FAILURE;
  }
  std::cout << std::endl << "SUCCESS:  Belos converged!" << std::endl;

#endif // HAVE_MUELU_BELOS


#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return EXIT_SUCCESS;
}
Пример #4
0
Epetra_CrsGraph * BlockUtility::TGenerateBlockGraph(
        const Epetra_CrsGraph & BaseGraph,
        const Epetra_CrsGraph & LocalBlockGraph,
        const Epetra_Comm & GlobalComm )
{
  const Epetra_BlockMap & BaseRowMap = BaseGraph.RowMap();
  const Epetra_BlockMap & BaseColMap = BaseGraph.ColMap();
  int_type ROffset = BlockUtility::TCalculateOffset<int_type>(BaseRowMap);
  (void) ROffset; // Silence "unused variable" compiler warning.
  int_type COffset = BlockUtility::TCalculateOffset<int_type>(BaseColMap);

  //Get Base Global IDs
  const Epetra_BlockMap & BlockRowMap = LocalBlockGraph.RowMap();
  const Epetra_BlockMap & BlockColMap = LocalBlockGraph.ColMap();

  int NumBlockRows = BlockRowMap.NumMyElements();
  vector<int_type> RowIndices(NumBlockRows);
  BlockRowMap.MyGlobalElements(&RowIndices[0]);

  int Size = BaseRowMap.NumMyElements();

  Epetra_Map *GlobalRowMap =
    GenerateBlockMap(BaseRowMap, BlockRowMap, GlobalComm);


  int MaxIndices = BaseGraph.MaxNumIndices();
  vector<int_type> Indices(MaxIndices);

  Epetra_CrsGraph * GlobalGraph = new Epetra_CrsGraph( Copy,
                               dynamic_cast<Epetra_BlockMap&>(*GlobalRowMap),
                               0 );

  int NumBlockIndices, NumBaseIndices;
  int *BlockIndices, *BaseIndices;
  for( int i = 0; i < NumBlockRows; ++i )
  {
    LocalBlockGraph.ExtractMyRowView(i, NumBlockIndices, BlockIndices);

    for( int j = 0; j < Size; ++j )
    {
      int_type GlobalRow = (int_type) GlobalRowMap->GID64(j+i*Size);

      BaseGraph.ExtractMyRowView( j, NumBaseIndices, BaseIndices );
      for( int k = 0; k < NumBlockIndices; ++k )
      {
        int_type ColOffset = (int_type) BlockColMap.GID64(BlockIndices[k]) * COffset;

        for( int l = 0; l < NumBaseIndices; ++l )
          Indices[l] = (int_type) BaseGraph.GCID64(BaseIndices[l]) + ColOffset;

        GlobalGraph->InsertGlobalIndices( GlobalRow, NumBaseIndices, &Indices[0] );
      }
    }
  }

  const Epetra_BlockMap & BaseDomainMap = BaseGraph.DomainMap();
  const Epetra_BlockMap & BaseRangeMap = BaseGraph.RangeMap();
  const Epetra_BlockMap & BlockDomainMap = LocalBlockGraph.DomainMap();
  const Epetra_BlockMap & BlockRangeMap = LocalBlockGraph.RangeMap();

  Epetra_Map *GlobalDomainMap =
    GenerateBlockMap(BaseDomainMap, BlockDomainMap, GlobalComm);
  Epetra_Map *GlobalRangeMap =
    GenerateBlockMap(BaseRangeMap, BlockRangeMap, GlobalComm);

  GlobalGraph->FillComplete(*GlobalDomainMap, *GlobalRangeMap);

  delete GlobalDomainMap;
  delete GlobalRangeMap;
  delete GlobalRowMap;

  return GlobalGraph;
}
Пример #5
0
// FIXME long long
Epetra_Map
Epetra_Util::Create_Root_Map(const Epetra_Map& usermap,
         int root)
{
  int numProc = usermap.Comm().NumProc();
  if (numProc==1) {
    Epetra_Map newmap(usermap);
    return(newmap);
  }

  const Epetra_Comm & comm = usermap.Comm();
  bool isRoot = usermap.Comm().MyPID()==root;

  //if usermap is already completely owned by root then we'll just return a copy of it.
  int quickreturn = 0;
  int globalquickreturn = 0;

  if (isRoot) {
    if (usermap.NumMyElements()==usermap.NumGlobalElements64()) quickreturn = 1;
  }
  else {
    if (usermap.NumMyElements()==0) quickreturn = 1;
  }
  usermap.Comm().MinAll(&quickreturn, &globalquickreturn, 1);
  
  if (globalquickreturn==1) {
    Epetra_Map newmap(usermap);
    return(newmap);
  }
  
  // Linear map: Simple case, just put all GIDs linearly on root processor
  if (usermap.LinearMap() && root!=-1) {
    int numMyElements = 0;
    if (isRoot) numMyElements = usermap.MaxAllGID64()+1; // FIXME long long
    Epetra_Map newmap(-1, numMyElements, usermap.IndexBase(), comm);
    return(newmap);
  }

  if (!usermap.UniqueGIDs()) 
    throw usermap.ReportError("usermap must have unique GIDs",-1);

  // General map

  // Build IntVector of the GIDs, then ship them to root processor
  int numMyElements = usermap.NumMyElements();
  Epetra_Map allGidsMap(-1, numMyElements, 0, comm);
  Epetra_IntVector allGids(allGidsMap);
  for (int i=0; i<numMyElements; i++) allGids[i] = usermap.GID64(i);
  
  int numGlobalElements = usermap.NumGlobalElements64();
  if (root!=-1) {
    int n1 = 0; if (isRoot) n1 = numGlobalElements;
    Epetra_Map allGidsOnRootMap(-1, n1, 0, comm);
    Epetra_Import importer(allGidsOnRootMap, allGidsMap);
    Epetra_IntVector allGidsOnRoot(allGidsOnRootMap);
    allGidsOnRoot.Import(allGids, importer, Insert);
    
    Epetra_Map rootMap(-1, allGidsOnRoot.MyLength(), allGidsOnRoot.Values(), usermap.IndexBase(), comm);
    return(rootMap);
  }
  else {
    int n1 = numGlobalElements;
    Epetra_LocalMap allGidsOnRootMap(n1, 0, comm);
    Epetra_Import importer(allGidsOnRootMap, allGidsMap);
    Epetra_IntVector allGidsOnRoot(allGidsOnRootMap);
    allGidsOnRoot.Import(allGids, importer, Insert);
    
    Epetra_Map rootMap(-1, allGidsOnRoot.MyLength(), allGidsOnRoot.Values(), usermap.IndexBase(), comm);

    return(rootMap);
  }
}
Пример #6
0
 int TLowCommunicationMakeColMapAndReindex(int N, const int * rowptr, int * colind_LID, const int_type *colind_GID, const Epetra_Map& domainMap, const int * owningPIDs, bool SortGhostsAssociatedWithEachProcessor, std::vector<int>& RemotePIDs, MapType1 & NewColMap)
   {
  int i,j;


  // Sanity checks
  bool UseLL;
  if(domainMap.GlobalIndicesLongLong()) UseLL=true;
  else if(domainMap.GlobalIndicesInt()) UseLL=false;
  else throw std::runtime_error("LowCommunicationMakeColMapAndReindex: cannot detect int type.");

  // Scan all column indices and sort into two groups: 
  // Local:  those whose GID matches a GID of the domain map on this processor and
  // Remote: All others.
  int numDomainElements = domainMap.NumMyElements();
  bool * LocalGIDs  = 0;
  if (numDomainElements>0) LocalGIDs  = new bool[numDomainElements];
  for (i=0; i<numDomainElements; i++) LocalGIDs[i] = false; // Assume domain GIDs are not local

  bool DoSizes = !domainMap.ConstantElementSize(); // If not constant element size, then error
  if(DoSizes) throw std::runtime_error("LowCommunicationMakeColMapAndReindex: cannot handle non-constant sized domainMap.");


  // In principle it is good to have RemoteGIDs and RemotGIDList be as long as the number of remote GIDs
  // on this processor, but this would require two passes through the column IDs, so we make it the max of 100
  // and the number of block rows.
  const int numMyBlockRows = N;
  int  hashsize = numMyBlockRows; if (hashsize < 100) hashsize = 100;
  Epetra_HashTable<int_type> RemoteGIDs(hashsize); 
  std::vector<int_type> RemoteGIDList; RemoteGIDList.reserve(hashsize);
  std::vector<int> PIDList;            PIDList.reserve(hashsize);

  // Here we start using the *int* colind array.  If int_type==int this clobbers the GIDs, if
  // int_type==long long, then this is the first use of the colind array.
  // For *local* GID's set colind with with their LID in the domainMap.  For *remote* GIDs, 
  // we set colind with (numDomainElements+NumRemoteColGIDs) before the increment of
  // the remote count.  These numberings will be separate because no local LID is greater 
  // than numDomainElements. 

  int NumLocalColGIDs = 0;
  int NumRemoteColGIDs = 0;
  for(i = 0; i < numMyBlockRows; i++) {
    for(j = rowptr[i]; j < rowptr[i+1]; j++) {
      int_type GID = colind_GID[j];
      // Check if GID matches a row GID
      int LID = domainMap.LID(GID);
      if(LID != -1) {
	bool alreadyFound = LocalGIDs[LID];
	if (!alreadyFound) {
          LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID
          NumLocalColGIDs++;
	}
	colind_LID[j] = LID; 
      }
      else {
	int_type hash_value=RemoteGIDs.Get(GID);
	if(hash_value  == -1) { // This means its a new remote GID
	  int PID = owningPIDs[j];
	  if(PID==-1) throw std::runtime_error("LowCommunicationMakeColMapAndReindex: Cannot figure out if PID is owned.");
	  colind_LID[j] = numDomainElements + NumRemoteColGIDs;
	  RemoteGIDs.Add(GID, NumRemoteColGIDs);
	  RemoteGIDList.push_back(GID);
	  PIDList.push_back(PID);
	  NumRemoteColGIDs++;
	}
	else
	  colind_LID[j] = numDomainElements + hash_value;	  
      }
    }
  }

  // Possible short-circuit:  If all domain map GIDs are present as column indices, then set ColMap=domainMap and quit
  if (domainMap.Comm().NumProc()==1) { 
    
    if (NumRemoteColGIDs!=0) {
      throw std::runtime_error("Some column IDs are not in domainMap.  If matrix is rectangular, you must pass in a domainMap"); 
      // Sanity test: When one processor,there can be no remoteGIDs
    }
    if (NumLocalColGIDs==numDomainElements) {
      if (LocalGIDs!=0) delete [] LocalGIDs; 
      // In this case, we just use the domainMap's indices, which is, not coincidently, what we clobbered colind with up above anyway. 
      // No further reindexing is needed.
      NewColMap = domainMap;
      return 0;
    }
  }
      
  // Now build integer array containing column GIDs
  // Build back end, containing remote GIDs, first
  int numMyBlockCols = NumLocalColGIDs + NumRemoteColGIDs;
  std::vector<int_type> ColIndices;
  int_type * RemoteColIndices=0;
  if(numMyBlockCols > 0) {
    ColIndices.resize(numMyBlockCols);
    if(NumLocalColGIDs!=numMyBlockCols) RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back end of ColIndices
    else RemoteColIndices=0;
  }

  for(i = 0; i < NumRemoteColGIDs; i++) 
    RemoteColIndices[i] = RemoteGIDList[i]; 

  // Build permute array for *remote* reindexing.
  std::vector<int> RemotePermuteIDs(NumRemoteColGIDs);
  for(i=0; i<NumRemoteColGIDs; i++) RemotePermuteIDs[i]=i;

  // Sort External column indices so that all columns coming from a given remote processor are contiguous
  int NumListsInt=0;
  int NumListsLL =0;
  int * IntSortLists[2];
  long long * LLSortLists[2];
  int * RemotePermuteIDs_ptr = RemotePermuteIDs.size() ? &RemotePermuteIDs[0] : 0;
  if(!UseLL) {
    // int version
    IntSortLists[0] = (int*) RemoteColIndices;
    IntSortLists[1] = RemotePermuteIDs_ptr;
    NumListsInt=2;
  }
  else {
    //LL version
    LLSortLists[0]  = (long long*) RemoteColIndices;
    IntSortLists[0] = RemotePermuteIDs_ptr;
    NumListsInt     = NumListsLL = 1;
  }

  int * PIDList_ptr = PIDList.size() ? &PIDList[0] : 0;
  Epetra_Util::Sort(true, NumRemoteColGIDs, PIDList_ptr, 0, 0, NumListsInt, IntSortLists,NumListsLL,LLSortLists);

  // Stash the RemotePIDs  
  PIDList.resize(NumRemoteColGIDs);
  RemotePIDs = PIDList;

  if (SortGhostsAssociatedWithEachProcessor) {
    // Sort external column indices so that columns from a given remote processor are not only contiguous
    // but also in ascending order. NOTE: I don't know if the number of externals associated
    // with a given remote processor is known at this point ... so I count them here.

    // NTS: Only sort the RemoteColIndices this time...
    int StartCurrent, StartNext;
    StartCurrent = 0; StartNext = 1;
    while ( StartNext < NumRemoteColGIDs ) {
      if (PIDList[StartNext]==PIDList[StartNext-1]) StartNext++;
      else {
	IntSortLists[0] =  &RemotePermuteIDs[StartCurrent];
	Epetra_Util::Sort(true,StartNext-StartCurrent, &(RemoteColIndices[StartCurrent]),0,0,1,IntSortLists,0,0);
        StartCurrent = StartNext; StartNext++;
      }
    }
    IntSortLists[0] =  &RemotePermuteIDs[StartCurrent];
    Epetra_Util::Sort(true, StartNext-StartCurrent, &(RemoteColIndices[StartCurrent]), 0, 0, 1,IntSortLists,0,0);
  }

  // Reverse the permutation to get the information we actually care about
  std::vector<int> ReverseRemotePermuteIDs(NumRemoteColGIDs);
  for(i=0; i<NumRemoteColGIDs; i++) ReverseRemotePermuteIDs[RemotePermuteIDs[i]]=i;

  // Build permute array for *local* reindexing.
  bool use_local_permute=false;
  std::vector<int> LocalPermuteIDs(numDomainElements);

  // Now fill front end. Two cases:
  // (1) If the number of Local column GIDs is the same as the number of Local domain GIDs, we
  //     can simply read the domain GIDs into the front part of ColIndices, otherwise 
  // (2) We step through the GIDs of the domainMap, checking to see if each domain GID is a column GID.
  //     we want to do this to maintain a consistent ordering of GIDs between the columns and the domain.

  if(NumLocalColGIDs == domainMap.NumMyElements()) {
    if(NumLocalColGIDs > 0) {
      domainMap.MyGlobalElements(&ColIndices[0]); // Load Global Indices into first numMyBlockCols elements column GID list
    }
  }
  else {
    int_type* MyGlobalElements = 0;
    domainMap.MyGlobalElementsPtr(MyGlobalElements);

    int* ElementSizeList = 0;
    if(DoSizes) 
      ElementSizeList = domainMap.ElementSizeList();
    int NumLocalAgain = 0;
    use_local_permute = true;    
    for(i = 0; i < numDomainElements; i++) {
      if(LocalGIDs[i]) {
	LocalPermuteIDs[i] = NumLocalAgain;
	ColIndices[NumLocalAgain++] = MyGlobalElements[i];
      }
    }
    assert(NumLocalAgain==NumLocalColGIDs); // Sanity test
  }

  // Done with this array
  if (LocalGIDs!=0) delete [] LocalGIDs; 

  // Make Column map with same element sizes as Domain map 
  int_type * ColIndices_ptr  = ColIndices.size() ? &ColIndices[0] : 0;
  MapType2 temp((int_type)(-1), numMyBlockCols, ColIndices_ptr, (int_type)domainMap.IndexBase64(), domainMap.Comm());
  NewColMap = temp;

  // Low-cost reindex of the matrix
  for(i=0; i<numMyBlockRows; i++){
    for(j=rowptr[i]; j<rowptr[i+1]; j++){
      int ID=colind_LID[j];
      if(ID < numDomainElements){
	if(use_local_permute) colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
	// In the case where use_local_permute==false, we just copy the DomainMap's ordering, which it so happens
	// is what we put in colind to begin with.
      }
      else
	colind_LID[j] =  NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements];
    }
  }
  
  return 0;
}
Пример #7
0
int powerMethodTests(Epetra_RowMatrix & A, Epetra_RowMatrix & JadA, Epetra_Map & Map, 
		     Epetra_Vector & q, Epetra_Vector & z, Epetra_Vector & resid, bool verbose) {

  // variable needed for iteration
  double lambda = 0.0;
  // int niters = 10000;
  int niters = 300;
  double tolerance = 1.0e-2;
  int ierr = 0;

  /////////////////////////////////////////////////////////////////////////////////////////////////
	
  // Iterate

  Epetra_Time timer(Map.Comm());
	
  double startTime = timer.ElapsedTime();
  EPETRA_TEST_ERR(power_method(false, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr);
  double elapsed_time = timer.ElapsedTime() - startTime;
  double total_flops = q.Flops();
  double MFLOPs = total_flops/elapsed_time/1000000.0;
  double lambdaref = lambda;
  double flopsref = total_flops;

  if (verbose) 
	  cout << "\n\nTotal MFLOPs for reference first solve = " << MFLOPs << endl
		  <<     "Total FLOPS                            = " <<total_flops <<endl<<endl;

  lambda = 0.0;
  startTime = timer.ElapsedTime();
  EPETRA_TEST_ERR(power_method(false, JadA, q, z, resid, &lambda, niters, tolerance, verbose),ierr);
  elapsed_time = timer.ElapsedTime() - startTime;
  total_flops = q.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;

  if (verbose) 
	  cout << "\n\nTotal MFLOPs for candidate first solve = " << MFLOPs << endl
		  <<     "Total FLOPS                            = " <<total_flops <<endl<<endl;

  EPETRA_TEST_ERR(checkValues(lambda,lambdaref," No-transpose Power Method result", verbose),ierr);
  EPETRA_TEST_ERR(checkValues(total_flops,flopsref," No-transpose Power Method flop count", verbose),ierr);

  /////////////////////////////////////////////////////////////////////////////////////////////////
	
  // Solve transpose problem

  if (verbose) cout << "\n\nUsing transpose of matrix and solving again (should give same result).\n\n"
		    << endl;
  // Iterate
  lambda = 0.0;
  startTime = timer.ElapsedTime();
  EPETRA_TEST_ERR(power_method(true, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr);
  elapsed_time = timer.ElapsedTime() - startTime;
  total_flops = q.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;
  lambdaref = lambda;
  flopsref = total_flops;

  if (verbose) 
	 cout << "\n\nTotal MFLOPs for reference transpose solve = " << MFLOPs << endl
		 <<     "Total FLOPS                                = " <<total_flops <<endl<<endl;

  lambda = 0.0;
  startTime = timer.ElapsedTime();
  EPETRA_TEST_ERR(power_method(true, JadA, q, z, resid, &lambda, niters, tolerance, verbose),ierr);
  elapsed_time = timer.ElapsedTime() - startTime;
  total_flops = q.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;

  if (verbose) 
	  cout << "\n\nTotal MFLOPs for candidate transpose solve = " << MFLOPs << endl
		  <<     "Total FLOPS                                = " <<total_flops <<endl<<endl;

  EPETRA_TEST_ERR(checkValues(lambda,lambdaref,"Transpose Power Method result", verbose),ierr);
  EPETRA_TEST_ERR(checkValues(total_flops,flopsref,"Transpose Power Method flop count", verbose),ierr);

  EPETRA_TEST_ERR(check(A, JadA, verbose),ierr);

  return(0);
}
Пример #8
0
Epetra_CrsMatrix * 
Laplace2D::CreateLaplacian( const int nx, const int ny, const Epetra_Comm * Comm)
{

  int NumGlobalElements = nx * ny;
    
  // create a map
  Epetra_Map * Map = new Epetra_Map(NumGlobalElements,0,*Comm);
  // local number of rows
  int NumMyElements = Map->NumMyElements();
  // get update list
  int * MyGlobalElements = Map->MyGlobalElements();

  double hx = 1.0/(nx-1);
  double hy = 1.0/(ny-1);
  double off_left  = -1.0/(hx*hx);
  double off_right = -1.0/(hx*hx);
  double off_lower = -1.0/(hy*hy);
  double off_upper = -1.0/(hy*hy);
  double diag      =  2.0/(hx*hx) + 2.0/(hy*hy);
  
  int left, right, lower, upper;
    
  // a bit overestimated the nonzero per row
  
  Epetra_CrsMatrix * A = new Epetra_CrsMatrix(Copy,*Map,5);
    
  // Add  rows one-at-a-time
    
  double * Values = new double[4];
  int *   Indices = new int[4];
    
  for( int i = 0; i < NumMyElements; ++i ) 
  {
    int NumEntries=0;
    get_myNeighbours(  MyGlobalElements[i], nx, ny, left, right, lower, upper );
    if( left != -1 ) 
    {
      Indices[NumEntries] = left;
      Values[NumEntries] = off_left;
      ++NumEntries;
    }
    if( right != -1 ) 
    {
      Indices[NumEntries] = right;
      Values[NumEntries] = off_right;
      ++NumEntries;
    }
    if( lower != -1 ) 
    {
      Indices[NumEntries] = lower;
      Values[NumEntries] = off_lower;
      ++NumEntries;
    }
    if( upper != -1 ) 
    {
      Indices[NumEntries] = upper;
      Values[NumEntries] = off_upper;
      ++NumEntries;
    }
    // put the off-diagonal entries
    A->InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices);
    // Put in the diagonal entry
    A->InsertGlobalValues(MyGlobalElements[i], 1, &diag, MyGlobalElements+i);
  }

  // put matrix in local ordering
  A->FillComplete();

  delete [] Indices;
  delete [] Values;
  delete    Map;

  return A;
  
} /* createJacobian */
Пример #9
0
void
LOCA::Epetra::AugmentedOp::buildExtendedMap(const Epetra_BlockMap& uMap,
                        Epetra_Map*& eMapPtr,
                        bool buildImporter,
                        bool haveParam)
{
  Epetra_BlockMap& nonconstUnderlyingMap = const_cast<Epetra_BlockMap&>(uMap);

  // Convert underlying map to point map if necessary
  Epetra_Map* uPointMapPtr =
    dynamic_cast<Epetra_Map*>(&nonconstUnderlyingMap);
  bool allocatedPointMap = false;
  if (uPointMapPtr == NULL) {
    allocatedPointMap = true;
    blockMap2PointMap(uMap, uPointMapPtr);
  }

  int max_gid = uPointMapPtr->MaxAllGID();
  int num_global_elements = uPointMapPtr->NumGlobalElements();
  int num_my_elements = uPointMapPtr->NumMyElements();
  int *global_elements = uPointMapPtr->MyGlobalElements();
  const Epetra_Comm& comm = uPointMapPtr->Comm();
  int index_base = uPointMapPtr->IndexBase();

  int ext_num_global_elements;
  int ext_num_my_elements;
  int *ext_global_elements;

  // Compute number of extended global elements
  if (buildImporter)
    ext_num_global_elements =
      num_global_elements + numConstraints*comm.NumProc();
  else
    ext_num_global_elements = num_global_elements + numConstraints;

  // Compute number of extended local elements
  if (buildImporter || haveParam)
     ext_num_my_elements = num_my_elements + numConstraints;
  else
    ext_num_my_elements = num_my_elements;

  // Allocate extended global elements array
  ext_global_elements = new int[ext_num_my_elements];

  // Set extended global elements
  for (int i=0; i<num_my_elements; i++) {
    ext_global_elements[i] = global_elements[i];
  }
  if (buildImporter || haveParam)
    for (int i=0; i<numConstraints; i++)
      ext_global_elements[num_my_elements+i] = max_gid + 1 + i;

  // Create extended point map
  eMapPtr = new Epetra_Map(ext_num_global_elements, ext_num_my_elements,
               ext_global_elements, index_base, comm);

  // Free global elements array
  delete [] ext_global_elements;
  if (allocatedPointMap)
    delete uPointMapPtr;
}
Пример #10
0
// ============================================================================
void EpetraExt::XMLWriter::
Write(const std::string& Label, const Epetra_Map& Map)
{
  TEUCHOS_TEST_FOR_EXCEPTION(IsOpen_ == false, std::logic_error,
                     "No file has been opened");

  long long NumGlobalElements = Map.NumGlobalElements64();
  const int* MyGlobalElements_int = 0;
  const long long* MyGlobalElements_LL = 0;
  Map.MyGlobalElements(MyGlobalElements_int, MyGlobalElements_LL);

  if(!MyGlobalElements_int || !MyGlobalElements_LL)
    throw "EpetraExt::XMLWriter::Write: ERROR, GlobalIndices type unknown.";

  if (Comm_.MyPID() == 0)
  {
    std::ofstream of(FileName_.c_str(), std::ios::app);

    of << "<Map Label=\"" << Label 
      << "\" NumElements=\"" << NumGlobalElements << '"'
      << " IndexBase=\"" << Map.IndexBase64() << '"'
      << " NumProc=\"" << Comm_.NumProc() << '"';

    of.close();
  }

  for (int iproc = 0; iproc < Comm_.NumProc(); ++iproc)
  {
    if (iproc == Comm_.MyPID())
    {
      std::ofstream of(FileName_.c_str(), std::ios::app);

      of << " ElementsOnProc" << iproc << "=\"" << Map.NumMyElements() << '"';
      of.close();
    }
    Comm_.Barrier();
  }

  if (Comm_.MyPID() == 0)
  {
    std::ofstream of(FileName_.c_str(), std::ios::app);
    of << '>' << std::endl;
    of.close();
  }

  for (int iproc = 0; iproc < Comm_.NumProc(); iproc++)
  {
    if (iproc == Comm_.MyPID())
    {
      std::ofstream of(FileName_.c_str(), std::ios::app);

      of << "<Proc ID=\"" << Comm_.MyPID() << "\">" << std::endl;

      if(MyGlobalElements_int)
      {
        for (int i = 0; i < Map.NumMyElements(); ++i)
        {
          of << MyGlobalElements_int[i] << std::endl;
        }
      }
      else
      {
        for (int i = 0; i < Map.NumMyElements(); ++i)
        {
          of << MyGlobalElements_LL[i] << std::endl;
        }
      }

      of << "</Proc>" << std::endl;
      of.close();
    }
    Comm_.Barrier();
  }

  if (Comm_.MyPID() == 0)
  {
    std::ofstream of(FileName_.c_str(), std::ios::app);
    of << "</Map>" << std::endl;
    of.close();
  }
}
Пример #11
0
int main(int argc, char *argv[])
{
  int ierr = 0;
  double elapsed_time;
  double total_flops;
  double MFLOPs;
    

#ifdef EPETRA_MPI

  // Initialize MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm comm( MPI_COMM_WORLD );
#else
  Epetra_SerialComm comm;
#endif

  bool verbose = false;
  bool summary = false;

  // Check if we should print verbose results to standard out
  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true;

  // Check if we should print verbose results to standard out
  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true;

  if(argc < 6) {
    cerr << "Usage: " << argv[0]
         << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
         << "where:" << endl
         << "NumNodesX         - Number of mesh nodes in X direction per processor" << endl
         << "NumNodesY         - Number of mesh nodes in Y direction per processor" << endl
         << "NumProcX          - Number of processors to use in X direction" << endl
         << "NumProcY          - Number of processors to use in Y direction" << endl
         << "NumPoints         - Number of points to use in stencil (5, 9 or 25 only)" << endl
         << "-v|-s             - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
         << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
	 << " Serial example:" << endl
         << argv[0] << " 16 12 1 1 25 -v" << endl
	 << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
	 << " MPI example:" << endl
         << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl
	 << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
	 << " in the X direction and 8 in the Y direction.  Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
         << endl;
    return(1);

  }
    //char tmp;
    //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl;
    //if (comm.MyPID()==0) cin >> tmp;
    //comm.Barrier();

  comm.SetTracebackMode(0); // This should shut down any error traceback reporting
  if (verbose && comm.MyPID()==0)
    cout << Epetra_Version() << endl << endl;
  if (summary && comm.MyPID()==0) {
    if (comm.NumProc()==1)
      cout << Epetra_Version() << endl << endl;
    else
      cout << endl << endl; // Print two blank line to keep output columns lined up
  }

  if (verbose) cout << comm <<endl;


  // Redefine verbose to only print on PE 0

  if (verbose && comm.MyPID()!=0) verbose = false;
  if (summary && comm.MyPID()!=0) summary = false;

  int numNodesX = atoi(argv[1]);
  int numNodesY = atoi(argv[2]);
  int numProcsX = atoi(argv[3]);
  int numProcsY = atoi(argv[4]);
  int numPoints = atoi(argv[5]);

  if (verbose || (summary && comm.NumProc()==1)) {
    cout << " Number of local nodes in X direction  = " << numNodesX << endl
	 << " Number of local nodes in Y direction  = " << numNodesY << endl
	 << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl
	 << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
	 << " Number of local nonzero entries       = " << numNodesX*numNodesY*numPoints << endl
	 << " Number of global nonzero entries      = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
	 << " Number of Processors in X direction   = " << numProcsX << endl
	 << " Number of Processors in Y direction   = " << numProcsY << endl
	 << " Number of Points in stencil           = " << numPoints << endl << endl;
  }
  // Print blank line to keep output columns lined up
  if (summary && comm.NumProc()>1)
    cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;

  if (numProcsX*numProcsY!=comm.NumProc()) {
    cerr << "Number of processors = " << comm.NumProc() << endl
	 << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl;
    return(1);
  }

  if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
    cerr << "Number of points specified = " << numPoints << endl
	 << " is not 5, 9, 25" << endl << endl;
    return(1);
  }

  if (numNodesX*numNodesY<=0) {
    cerr << "Product of number of nodes is <= zero" << endl << endl;
    return(1);
  }

  Epetra_IntSerialDenseVector Xoff, XLoff, XUoff;
  Epetra_IntSerialDenseVector Yoff, YLoff, YUoff;
  if (numPoints==5) {

     // Generate a 5-point 2D Finite Difference matrix
    Xoff.Size(5);
    Yoff.Size(5);
    Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0;  Xoff[4] = 0; 
    Yoff[0] = 0;  Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1; 

     // Generate a 2-point 2D Lower triangular Finite Difference matrix
    XLoff.Size(2);
    YLoff.Size(2);
    XLoff[0] = -1; XLoff[1] =  0; 
    YLoff[0] =  0; YLoff[1] = -1;

     // Generate a 3-point 2D upper triangular Finite Difference matrix
    XUoff.Size(3);
    YUoff.Size(3);
    XUoff[0] =  0; XUoff[1] =  1; XUoff[2] = 0; 
    YUoff[0] =  0; YUoff[1] =  0; YUoff[2] = 1;
  }
  else if (numPoints==9) {
    // Generate a 9-point 2D Finite Difference matrix
    Xoff.Size(9);
    Yoff.Size(9);
    Xoff[0] = -1;  Xoff[1] =  0; Xoff[2] =  1; 
    Yoff[0] = -1;  Yoff[1] = -1; Yoff[2] = -1; 
    Xoff[3] = -1;  Xoff[4] =  0; Xoff[5] =  1; 
    Yoff[3] =  0;  Yoff[4] =  0; Yoff[5] =  0; 
    Xoff[6] = -1;  Xoff[7] =  0; Xoff[8] =  1; 
    Yoff[6] =  1;  Yoff[7] =  1; Yoff[8] =  1; 

    // Generate a 5-point lower triangular 2D Finite Difference matrix
    XLoff.Size(5);
    YLoff.Size(5);
    XLoff[0] = -1;  XLoff[1] =  0; Xoff[2] =  1; 
    YLoff[0] = -1;  YLoff[1] = -1; Yoff[2] = -1; 
    XLoff[3] = -1;  XLoff[4] =  0; 
    YLoff[3] =  0;  YLoff[4] =  0;

    // Generate a 4-point upper triangular 2D Finite Difference matrix
    XUoff.Size(4);
    YUoff.Size(4);
    XUoff[0] =  1; 
    YUoff[0] =  0; 
    XUoff[1] = -1;  XUoff[2] =  0; XUoff[3] =  1; 
    YUoff[1] =  1;  YUoff[2] =  1; YUoff[3] =  1; 

  }
  else {
    // Generate a 25-point 2D Finite Difference matrix
    Xoff.Size(25);
    Yoff.Size(25);
    int xi = 0, yi = 0;
    int xo = -2, yo = -2;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 

    // Generate a 13-point lower triangular 2D Finite Difference matrix
    XLoff.Size(13);
    YLoff.Size(13);
    xi = 0, yi = 0;
    xo = -2, yo = -2;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; 
    xo = -2, yo++;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; 
    xo = -2, yo++;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;

    // Generate a 13-point upper triangular 2D Finite Difference matrix
    XUoff.Size(13);
    YUoff.Size(13);
    xi = 0, yi = 0;
    xo = 0, yo = 0;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
    xo = -2, yo++;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
    xo = -2, yo++;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 

  }

  Epetra_Map * map;
  Epetra_Map * mapL;
  Epetra_Map * mapU;
  Epetra_CrsMatrix * A;
  Epetra_CrsMatrix * L;
  Epetra_CrsMatrix * U;
  Epetra_MultiVector * b;
  Epetra_MultiVector * bt;
  Epetra_MultiVector * xexact;
  Epetra_MultiVector * bL;
  Epetra_MultiVector * btL;
  Epetra_MultiVector * xexactL;
  Epetra_MultiVector * bU;
  Epetra_MultiVector * btU;
  Epetra_MultiVector * xexactU;
  Epetra_SerialDenseVector resvec(0);

  //Timings
  Epetra_Flops flopcounter;
  Epetra_Time timer(comm);

#ifdef EPETRA_VERY_SHORT_PERFTEST
  int jstop = 1;
#elif EPETRA_SHORT_PERFTEST
  int jstop = 1;
#else
  int jstop = 2;
#endif
  for (int j=0; j<jstop; j++) {
    for (int k=1; k<17; k++) {
#ifdef EPETRA_VERY_SHORT_PERFTEST
      if (k<3 || (k%4==0 && k<9)) {
#elif EPETRA_SHORT_PERFTEST
      if (k<6 || k%4==0) {
#else
      if (k<7 || k%2==0) {
#endif
      int nrhs=k;
      if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with ";

      bool StaticProfile = (j!=0);
      if (verbose) 
	if (StaticProfile) cout << " static profile\n";
	else cout << " dynamic profile\n";
      
      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
			 Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary,
			 map, A, b, bt, xexact, StaticProfile, false);

      
#ifdef EPETRA_HAVE_JADMATRIX
      
      timer.ResetStartTime();
      Epetra_JadMatrix JA(*A);
      elapsed_time = timer.ElapsedTime();
      if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl;

      //cout << "A = " << *A << endl;
      //cout << "JA = " << JA << endl;

      runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary);

#endif
      runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);

      delete A;
      delete b;
      delete bt; 
      delete xexact;

      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(),
			 XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary,
			 mapL, L, bL, btL, xexactL, StaticProfile, true);
      

      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(),
			 XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary,
			 mapU, U, bU, btU, xexactU, StaticProfile, true);
      

      runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);

      delete L;
      delete bL;
      delete btL; 
      delete xexactL;
      delete mapL;

      delete U;
      delete bU;
      delete btU; 
      delete xexactU;
      delete mapU;

      Epetra_MultiVector q(*map, nrhs);
      Epetra_MultiVector z(q);
      Epetra_MultiVector r(q);
      
      delete map;
      q.SetFlopCounter(flopcounter);
      z.SetFlopCounter(q);
      r.SetFlopCounter(q);

      resvec.Resize(nrhs);
      
    
      flopcounter.ResetFlops();
      timer.ResetStartTime();

      //10 norms
      for( int i = 0; i < 10; ++i )
	q.Norm2( resvec.Values() );

      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;
      
      if (summary) {
	if (comm.NumProc()==1) cout << "Norm2" << '\t';
	cout << MFLOPs << endl;
      }
      
      flopcounter.ResetFlops();
      timer.ResetStartTime();
      
      //10 dot's
      for( int i = 0; i < 10; ++i )
	q.Dot(z, resvec.Values());
      
      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "Total MFLOPs for 10 Dot's  = " << MFLOPs << endl;
      
      if (summary) {
	if (comm.NumProc()==1) cout << "DotProd" << '\t';
	cout << MFLOPs << endl;
      }
      
      flopcounter.ResetFlops();
      timer.ResetStartTime();
      
      //10 dot's
      for( int i = 0; i < 10; ++i )
	q.Update(1.0, z, 1.0, r, 0.0);
      
      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl;
      
      if (summary) {
	if (comm.NumProc()==1) cout << "Update" << '\t';
	cout << MFLOPs << endl;
      }
    }
    }
  }
#ifdef EPETRA_MPI
  MPI_Finalize() ;
#endif

return ierr ;
}

// Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
// 
// nx      (In) - number of grid points in x direction
// ny      (In) - number of grid points in y direction
//   The total number of equations will be nx*ny ordered such that the x direction changes
//   most rapidly: 
//      First equation is at point (0,0)
//      Second at                  (1,0)
//       ...
//      nx equation at             (nx-1,0)
//      nx+1st equation at         (0,1)

// numPoints (In) - number of points in finite difference stencil
// xoff    (In) - stencil offsets in x direction (of length numPoints)
// yoff    (In) - stencil offsets in y direction (of length numPoints)
//   A standard 5-point finite difference stencil would be described as:
//     numPoints = 5
//     xoff = [-1, 1, 0,  0, 0]
//     yoff = [ 0, 0, 0, -1, 1]

// nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed

// comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
// map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
// A      (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil
//                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
//                diagonal will be slightly diag dominant.
// b      (Out) - Generated RHS.  Values satisfy b = A*xexact
// bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
// xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact

// Note: Caller of this function is responsible for deleting all output objects.

void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
			int * xoff, int * yoff,
			const Epetra_Comm  &comm, bool verbose, bool summary, 
			Epetra_Map *& map, 
			Epetra_CrsMatrix *& A, 
			Epetra_Vector *& b, 
			Epetra_Vector *& bt,
			Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {

  Epetra_MultiVector * b1, * bt1, * xexact1;
	
  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, 
		     xoff, yoff, 1, comm, verbose, summary, 
		     map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);

  b = dynamic_cast<Epetra_Vector *>(b1);
  bt = dynamic_cast<Epetra_Vector *>(bt1);
  xexact = dynamic_cast<Epetra_Vector *>(xexact1);

  return;
}

void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
			int * xoff, int * yoff, int nrhs,
			const Epetra_Comm  &comm, bool verbose, bool summary,
			Epetra_Map *& map, 
			Epetra_CrsMatrix *& A, 
			Epetra_MultiVector *& b, 
			Epetra_MultiVector *& bt,
			Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
  
  Epetra_Time timer(comm);
  // Determine my global IDs
  int * myGlobalElements;
  GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);

  int numMyEquations = numNodesX*numNodesY;
  
  map = new Epetra_Map(-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
  delete [] myGlobalElements;

  int numGlobalEquations = map->NumGlobalElements();

  int profile = 0; if (StaticProfile) profile = numPoints;

#ifdef EPETRA_HAVE_STATICPROFILE

  if (MakeLocalOnly) 
    A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap
  else
    A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix

#else

  if (MakeLocalOnly) 
    A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap
  else
    A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix

#endif

  int * indices = new int[numPoints];
  double * values = new double[numPoints];

  double dnumPoints = (double) numPoints;
  int nx = numNodesX*numProcsX;

  for (int i=0; i<numMyEquations; i++) {

    int rowID = map->GID(i);
    int numIndices = 0;

    for (int j=0; j<numPoints; j++) {
      int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
      if (colID>-1 && colID<numGlobalEquations) {
	indices[numIndices] = colID;
	double value = - ((double) rand())/ ((double) RAND_MAX);
	if (colID==rowID)
	  values[numIndices++] = dnumPoints - value; // Make diagonal dominant
	else
	  values[numIndices++] = value;
      }
    }
    //cout << "Building row " << rowID << endl;
    A->InsertGlobalValues(rowID, numIndices, values, indices);
  }

  delete [] indices;
  delete [] values;
  double insertTime = timer.ElapsedTime();
  timer.ResetStartTime();
  A->FillComplete(false);
  double fillCompleteTime = timer.ElapsedTime();

  if (verbose)
    cout << "Time to insert matrix values = " << insertTime << endl
	 << "Time to complete fill        = " << fillCompleteTime << endl;
  if (summary) {
    if (comm.NumProc()==1) cout << "InsertTime" << '\t';
    cout << insertTime << endl;
    if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t';
    cout << fillCompleteTime << endl;
  }

  if (nrhs<=1) {  
    b = new Epetra_Vector(*map);
    bt = new Epetra_Vector(*map);
    xexact = new Epetra_Vector(*map);
  }
  else {
    b = new Epetra_MultiVector(*map, nrhs);
    bt = new Epetra_MultiVector(*map, nrhs);
    xexact = new Epetra_MultiVector(*map, nrhs);
  }

  xexact->Random(); // Fill xexact with random values

  A->Multiply(false, *xexact, *b);
  A->Multiply(true, *xexact, *bt);

  return;
}
Пример #12
0
int main(int argc, char *argv[]) {

#ifdef EPETRA_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm (MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  int MyPID = Comm.MyPID();

  bool verbose = true; 
  if (MyPID==0) verbose = true;

  if (verbose)
    cout << EpetraExt::EpetraExt_Version() << endl << endl;
                                                                                
  cout << Comm << endl;

  if(argc < 2 && verbose) {
    cerr << "Usage: " << argv[0] 
	 << " HB_filename" << endl;
    return(1);

  }

  // Uncomment the next three lines to debug in mpi mode
  //int tmp;
  //if (MyPID==0) cin >> tmp;
  //Comm.Barrier();

  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
   
  // Call routine to read in HB problem
  Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact);

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);

  // Create Exporter to distribute read-in matrix and vectors

  Epetra_Export exporter(*readMap, map);
  Epetra_CrsMatrix A(Copy, map, 0);
  Epetra_Vector x(map);
  Epetra_Vector b(map);
  Epetra_Vector xexact(map);

  Epetra_Time FillTimer(Comm);
  x.Export(*readx, exporter, Add);
  b.Export(*readb, exporter, Add);
  xexact.Export(*readxexact, exporter, Add);
  Comm.Barrier();
  double vectorRedistributeTime = FillTimer.ElapsedTime();
  A.Export(*readA, exporter, Add);
  Comm.Barrier();
  double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime;
  assert(A.FillComplete()==0);    
  Comm.Barrier();
  double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime;
  if (Comm.MyPID()==0)	{
    cout << "\n\n****************************************************" << endl;
    cout << "\n Vector redistribute  time (sec) = " << vectorRedistributeTime<< endl;
    cout << "    Matrix redistribute time (sec) = " << matrixRedistributeTime << endl;
    cout << "    Transform to Local  time (sec) = " << fillCompleteTime << endl<< endl;
  }
  Epetra_Vector tmp1(*readMap);
  Epetra_Vector tmp2(map);
  readA->Multiply(false, *readxexact, tmp1);

  A.Multiply(false, xexact, tmp2);
  double residual;
  tmp1.Norm2(&residual);
  if (verbose) cout << "Norm of Ax from file            = " << residual << endl;
  tmp2.Norm2(&residual);
  if (verbose) cout << "Norm of Ax after redistribution = " << residual << endl << endl << endl;

  //cout << "A from file = " << *readA << endl << endl << endl;

  //cout << "A after dist = " << A << endl << endl << endl;

  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;

  Comm.Barrier();

  EpetraExt::RowMatrixToMatrixMarketFile("test.mm", A, "test matrix", "This is a test matrix");
				       
#ifdef EPETRA_MPI
  MPI_Finalize() ;
#endif

return 0 ;
}
Пример #13
0
/* Find the DBBD form */
int shylu_symbolic_factor
(
    Epetra_CrsMatrix *A,    // i/p: A matrix
    shylu_symbolic *ssym,   // symbolic structure
    shylu_data *data,       // numeric structure, TODO: Required ?
    shylu_config *config    // i/p: library configuration
)
{
#ifdef TIMING_OUTPUT
    Teuchos::Time symtime("symbolic time");
    symtime.start();
#endif
    int myPID = A->Comm().MyPID();
    int n = A->NumGlobalRows();

    int Dnr;
    int Snr;
    int *DRowElems;
    int *SRowElems;
    int sym = config->sym;

    checkMaps(A);

    // Get column map
    Epetra_Map AColMap = A->ColMap();
    int ncols = AColMap.NumMyElements();
    int *cols = AColMap.MyGlobalElements();

    // Get row map
    Epetra_Map ARowMap = A->RowMap();
    int nrows = ARowMap.NumMyElements();
    int *rows = ARowMap.MyGlobalElements();

    // Find all columns in this proc
    int *gvals = new int[n];       // vector of size n, not ncols !
    // gvals[local cols] = 1, gvals[shared cols] > 1.
    int SNumGlobalCols;
    findLocalColumns(A, gvals, SNumGlobalCols);

    // See if you can shrink the separator by assigning more rows/columns to
    // the block diagonals
    // TODO: This is because of a bug in coloring remove the if once that is
    // fixed
    //if (config->schurApproxMethod == 2)
    if (config->sep_type == 2)
        findNarrowSeparator(A, gvals);

    // 3. Assemble diagonal block and the border in convenient form [
    /* In each processor, we have (in a permuted form)
     *  | D_i    C_i   |
     *  | R_i    S_i   |
     * D_i - diagonal block, C_i - Column Separator, R_i - Row separator
     * S_i - A22 block corresponding to Schur complement part of A
     * Assemble all four blocks in local matrices. */

     ostringstream ssmsg1;
     ssmsg1 << "PID =" << myPID << " ";
     string msg = ssmsg1.str();
     ssmsg1.clear(); ssmsg1.str("");

    // Find #cols in each block
    int Dnc = 0;        // #cols in diagonal block
    int Snc = 0;        // #cols in the col. separator
    /* Looping on cols will work only for wide separator
     * as for narrow sep there will be some sep cols with gvals[col] ==1
     * */
    /*for (int i = 0; i < ncols ; i++)
    {
        if (gvals[cols[i]] == 1)
            Dnc++;
        else
            Snc++;
    }
    // Find #rows in each block 
    Dnr = Dnc;          // #rows in square diagonal block
    Snr = nrows - Dnr;  // #rows in the row separator*/

    // Find #rows in each block
    Dnr = 0;
    Snr = 0;
    for (int i = 0; i < nrows ; i++)
    {
        if (gvals[rows[i]] == 1)
            Dnr++;
        else
            Snr++;
    }
    Dnc = Dnr;
    // TODO: Snc is no longer useful, should remove it
    for (int i = 0; i < ncols ; i++)
    {
        if (gvals[cols[i]] != 1)
            Snc++;
    }

    assert(Snc >= 0);

    // TODO : The above assignment may not be correct in the unsymetric case

    ////config->dm.print(2, msg + " Mycols=");
    cout << msg << " Mycols="<< ncols << "Myrows ="<< nrows << endl;
    cout << msg << " #rows and #cols in diagonal blk ="<< Dnr << endl;
    cout << msg << " #columns in S ="<< Snc << endl;
    cout << msg << " #rows in S ="<< Snr << endl;

    ostringstream pidstr;
    pidstr <<  myPID ;
    // Create a row map for the D and S blocks [
    DRowElems = new int[Dnr];
    SRowElems = new int[Snr];
    int gid;
    // Assemble row ids in two arrays (for D and R blocks)
    if (sym)
    {
        findBlockElems(A, nrows, rows, gvals, Dnr, DRowElems, Snr, SRowElems,
                    "D"+pidstr.str()+"Rows", "S"+pidstr.str()+"Rows", false) ;
    }
    else
    {
        // SRowElems are not known until factorization, TODO
        assert(0 == 1);
    }

    data->Dnr = Dnr;
    data->Snr = Snr;
    data->Dnc = Dnc;
    data->DRowElems = DRowElems;
    data->SRowElems = SRowElems;

    // Create a column map for the D and S blocks [
    int *DColElems = new int[Dnc]; // Elems in column map of D 
    int *SColElems = new int[Snc]; // Elems in column map of C TODO: Unused
    // Assemble column ids in two arrays (for D and C blocks)
    findBlockElems(A, ncols, cols, gvals, Dnc, DColElems, Snc, SColElems,
                    "D"+pidstr.str()+"Cols", "S"+pidstr.str()+"Cols", true) ;

    data->DColElems = DColElems;
    data->gvals = gvals;

    for (int i = 0; i < Snr; i++)
    {
        // Epetra guarentees columns corresponding to local rows will be first
        // in the column map.
        assert(SRowElems[i] == SColElems[i]);
    }
    // ]

    /*--Create the Epetra Matrices with the maps (does not insert values) --- */
    create_matrices(A, ssym, data, config);

    /*--Extract the Epetra Matrices and call fillComplete --- */
    extract_matrices(A, ssym, data, config, true);

    delete[] SColElems;

    Amesos Factory;
    const char* SolverType = config->diagonalBlockSolver.c_str();
    bool IsAvailable = Factory.Query(SolverType);
    assert(IsAvailable == true);

    Teuchos::RCP<Epetra_LinearProblem> LP = Teuchos::RCP<Epetra_LinearProblem> 
                                        (new Epetra_LinearProblem());
    LP->SetOperator((ssym->D).getRawPtr());
    //LP->SetOperator((ssym->DT).getRawPtr()); // for transpose

    // Create temp vectors
    ssym->Dlhs = Teuchos::RCP<Epetra_MultiVector>
                    (new Epetra_MultiVector(ssym->D->RowMap(), 16));
    ssym->Drhs = Teuchos::RCP<Epetra_MultiVector>
                    (new Epetra_MultiVector(ssym->D->RowMap(), 16));
    ssym->Gvec = Teuchos::RCP<Epetra_MultiVector>
                    (new Epetra_MultiVector(ssym->G->RowMap(), 16));

    LP->SetRHS(ssym->Drhs.getRawPtr());
    LP->SetLHS(ssym->Dlhs.getRawPtr());

    ssym->ReIdx_LP = Teuchos::RCP<
                    EpetraExt::ViewTransform<Epetra_LinearProblem> >
                    (new EpetraExt::LinearProblem_Reindex2(0));
    ssym->LP = Teuchos::RCP<Epetra_LinearProblem>(&((*(ssym->ReIdx_LP))(*LP)),
                                        false);

    Teuchos::RCP<Amesos_BaseSolver> Solver = Teuchos::RCP<Amesos_BaseSolver>
                                    (Factory.Create(SolverType, *(ssym->LP)));
    //config->dm.print(5, "Created the diagonal solver");

#ifdef TIMING_OUTPUT
    Teuchos::Time ftime("setup time");
    ftime.start();
#endif
    //Solver->SetUseTranspose(true); // for transpose
    Teuchos::ParameterList aList;
    aList.set("TrustMe", true);
    Solver->SetParameters(aList);
    Solver->SymbolicFactorization();

    //config->dm.print(3, "Symbolic Factorization done");

#ifdef TIMING_OUTPUT
    ftime.stop();
    cout << "Symbolic Factorization Time" << ftime.totalElapsedTime() << endl;
    ftime.reset();
#endif

    ssym->OrigLP = LP;
    //ssym->LP = LP;
    ssym->Solver = Solver;

    if (config->schurApproxMethod == 1)
    {
        Teuchos::ParameterList pList;
        Teuchos::RCP<Isorropia::Epetra::Prober> prober = 
                         Teuchos::RCP<Isorropia::Epetra::Prober> (new
                          Isorropia::Epetra::Prober((ssym->Sg).getRawPtr(),
                                                     pList, false));
        //config->dm.print(3, "Doing Coloring");
#ifdef TIMING_OUTPUT
        ftime.start();
#endif
        prober->color();
#ifdef TIMING_OUTPUT
        ftime.stop();
        cout << "Time to color" << ftime.totalElapsedTime() << endl;
        ftime.reset();
        ftime.start();
#endif
        ssym->prober = prober;
    }
#ifdef TIMING_OUTPUT
    symtime.stop();
    cout << "Symbolic Time" << symtime.totalElapsedTime() << endl;
    symtime.reset();
#endif
}
Пример #14
0
int CreateTridi(Epetra_CrsMatrix& A)
{

  Epetra_Map Map = A.RowMap();
  int NumMyElements = Map.NumMyElements();
  int NumGlobalElements = Map.NumGlobalElements();

  int * MyGlobalElements = new int[NumMyElements];
    Map.MyGlobalElements(MyGlobalElements);

  // Add  rows one-at-a-time
  // Need some vectors to help
  // Off diagonal Values will always be -1


  double *Values = new double[3];
  int *Indices = new int[3];
  int NumEntries;
  
  for (int i=0; i<NumMyElements; i++)
    {
    if (MyGlobalElements[i]==0)
      {
	Indices[0] = 0;
	Indices[1] = 1;
	Values[0] = 2.0;
	Values[1] = -1.0;
	NumEntries = 2;
      }
    else if (MyGlobalElements[i] == NumGlobalElements-1)
      {
	Indices[0] = NumGlobalElements-1;
	Indices[1] = NumGlobalElements-2;
	Values[0] = 2.0;
	Values[1] = -1.0;
	NumEntries = 2;
      }
    else
      {
	Indices[0] = MyGlobalElements[i]-1;
	Indices[1] = MyGlobalElements[i];
	Indices[2] = MyGlobalElements[i]+1;
	Values[0] = -1.0; 
	Values[1] = 2.0;
	Values[2] = -1.0;
	NumEntries = 3;
      }
    
    assert(A.InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices)==0);
     // Put in the diagonal entry
     //     assert(A.InsertGlobalValues(MyGlobalElements[i], 1, &two, &MyGlobalElements[i])==0);
    }
  
  // Finish up
  assert(A.FillComplete()==0);


  delete[] MyGlobalElements;
  delete[] Values;
  delete[] Indices;
  return 0;
}
Пример #15
0
int Drumm3(const Epetra_Map& map, bool verbose)
{
  const Epetra_Comm & Comm = map.Comm();

  /* get number of processors and the name of this processor */

  int Numprocs = Comm.NumProc();
  int MyPID   = Comm.MyPID();

  if (Numprocs != 2) return(0);

  int NumGlobalRows = 4;
  int IndexBase = 0;
  Epetra_Map Map(NumGlobalRows, IndexBase, Comm);

  // Construct FECrsMatrix

  int NumEntriesPerRow = 3;

  Epetra_FECrsMatrix A(Copy, Map, NumEntriesPerRow);

  double ElementArea = 0.5;
  
  int NumCols = 3;
  int* Indices = new int[NumCols];

  if(MyPID==0)  // indices corresponding to element 0 on processor 0
  {
    Indices[0] = 0;
    Indices[1] = 1;
    Indices[2] = 3;
  }
  else if(MyPID==1)  // indices corresponding to element 1 on processor 1
  {
    Indices[0] = 1;
    Indices[1] = 2;
    Indices[2] = 3;
  }

  double* Values = new double[NumCols*NumCols];

// removal term
  Values[0] = 2*ElementArea/12.;
  Values[1] = 1*ElementArea/12.;
  Values[2] = 1*ElementArea/12.;
  Values[3] = 1*ElementArea/12.;
  Values[4] = 2*ElementArea/12.;
  Values[5] = 1*ElementArea/12.;
  Values[6] = 1*ElementArea/12.;
  Values[7] = 1*ElementArea/12.;
  Values[8] = 2*ElementArea/12.;

  A.InsertGlobalValues(NumCols, Indices,
                        Values,
                        Epetra_FECrsMatrix::ROW_MAJOR);

  A.GlobalAssemble();
  A.GlobalAssemble();

//  A.Print(cout);

// Create vectors for CG algorithm

  Epetra_FEVector* bptr = new Epetra_FEVector(A.RowMap(), 1);
  Epetra_FEVector* x0ptr = new Epetra_FEVector(A.RowMap(), 1);

  Epetra_FEVector& b = *bptr;
  Epetra_FEVector& x0 = *x0ptr;

  // source terms
  NumCols = 2;

  if(MyPID==0)  // indices corresponding to element 0 on processor 0
  {
    Indices[0] = 0;
    Indices[1] = 3;

    Values[0] = 1./2.;
    Values[1] = 1./2.;

   }
   else
   {
    Indices[0] = 1;
    Indices[1] = 2;

    Values[0] = 0;
    Values[1] = 0;
   }

  b.SumIntoGlobalValues(NumCols, Indices, Values);

  b.GlobalAssemble();

  if (verbose&&MyPID==0) cout << "b:" << endl;
  if (verbose) {
  b.Print(cout);
  }

  x0 = b;

  if (verbose&&MyPID==0) {
  cout << "x:"<<endl;
  }

  if (verbose) {
  x0.Print(cout);
  }

  delete [] Values;
  delete [] Indices;

  delete bptr;
  delete x0ptr;

  return(0);
}
Пример #16
0
int checkmap(Epetra_Map & Map, int NumGlobalElements, int NumMyElements, 
	     int *MyGlobalElements, int IndexBase, Epetra_Comm& Comm,
	     bool DistributedGlobal)
{
  int i, ierr=0, forierr = 0;

  EPETRA_TEST_ERR(!Map.ConstantElementSize(),ierr);

  EPETRA_TEST_ERR(DistributedGlobal!=Map.DistributedGlobal(),ierr);


  EPETRA_TEST_ERR(Map.ElementSize()!=1,ierr);
  int *MyElementSizeList = new int[NumMyElements];

  EPETRA_TEST_ERR(Map.ElementSizeList(MyElementSizeList)!=0,ierr);

  forierr = 0;
  for (i=0; i<NumMyElements; i++) forierr += MyElementSizeList[i]!=1;
  EPETRA_TEST_ERR(forierr,ierr);

  delete [] MyElementSizeList;

  const Epetra_Comm & Comm1 = Map.Comm();

  EPETRA_TEST_ERR(Comm1.NumProc()!=Comm.NumProc(),ierr);

  EPETRA_TEST_ERR(Comm1.MyPID()!=Comm.MyPID(),ierr);

  EPETRA_TEST_ERR(Map.IndexBase()!=IndexBase,ierr);

  EPETRA_TEST_ERR(!Map.LinearMap() && MyGlobalElements==0,ierr);

  EPETRA_TEST_ERR(Map.LinearMap() && MyGlobalElements!=0,ierr);

  EPETRA_TEST_ERR(Map.MaxAllGID()!=NumGlobalElements-1+IndexBase,ierr);

  EPETRA_TEST_ERR(Map.MaxElementSize()!=1,ierr);

  int MaxLID = Map.MaxLID();
  EPETRA_TEST_ERR(MaxLID!=NumMyElements-1,ierr);

  int MaxMyGID = (Comm.MyPID()+1)*NumMyElements-1+IndexBase;
  if (Comm.MyPID()>2) MaxMyGID+=3;
  if (!DistributedGlobal) MaxMyGID = NumMyElements-1+IndexBase;
  EPETRA_TEST_ERR(Map.MaxMyGID()!=MaxMyGID,ierr);

  EPETRA_TEST_ERR(Map.MinAllGID()!=IndexBase,ierr);

  EPETRA_TEST_ERR(Map.MinElementSize()!=1,ierr);

  EPETRA_TEST_ERR(Map.MinLID()!=0,ierr);

  int MinMyGID = Comm.MyPID()*NumMyElements+IndexBase;
  if (Comm.MyPID()>2) MinMyGID+=3;
  if (!DistributedGlobal) MinMyGID = 0;
  EPETRA_TEST_ERR(Map.MinMyGID()!=MinMyGID,ierr);
  
  int * MyGlobalElements1 = new int[NumMyElements];
  EPETRA_TEST_ERR(Map.MyGlobalElements(MyGlobalElements1)!=0,ierr);

  forierr = 0;
  if (MyGlobalElements==0)
    {
      for (i=0; i<NumMyElements; i++) 
	forierr += MyGlobalElements1[i]!=MinMyGID+i;
      EPETRA_TEST_ERR(forierr,ierr);
    }
  else {
    for (i=0; i<NumMyElements; i++)
      forierr += MyGlobalElements[i]!=MyGlobalElements1[i];
    EPETRA_TEST_ERR(forierr,ierr);
  }
  EPETRA_TEST_ERR(Map.NumGlobalElements()!=NumGlobalElements,ierr);
  
  EPETRA_TEST_ERR(Map.NumGlobalPoints()!=NumGlobalElements,ierr);
  
  EPETRA_TEST_ERR(Map.NumMyElements()!=NumMyElements,ierr);  

  EPETRA_TEST_ERR(Map.NumMyPoints()!=NumMyElements,ierr);

  int MaxMyGID2 = Map.GID(Map.LID(MaxMyGID));
  EPETRA_TEST_ERR(MaxMyGID2 != MaxMyGID,ierr);
  int MaxLID2 = Map.LID(Map.GID(MaxLID));
  EPETRA_TEST_ERR(MaxLID2 != MaxLID,ierr);

  EPETRA_TEST_ERR(Map.GID(MaxLID+1) != IndexBase-1,ierr);// MaxLID+1 doesn't exist
  EPETRA_TEST_ERR(Map.LID(MaxMyGID+1) != -1,ierr);// MaxMyGID+1 doesn't exist or is on a different processor

  EPETRA_TEST_ERR(!Map.MyGID(MaxMyGID),ierr);
  EPETRA_TEST_ERR(Map.MyGID(MaxMyGID+1),ierr);

  EPETRA_TEST_ERR(!Map.MyLID(MaxLID),ierr);
  EPETRA_TEST_ERR(Map.MyLID(MaxLID+1),ierr);

  EPETRA_TEST_ERR(!Map.MyGID(Map.GID(MaxLID)),ierr);
  EPETRA_TEST_ERR(Map.MyGID(Map.GID(MaxLID+1)),ierr);

  EPETRA_TEST_ERR(!Map.MyLID(Map.LID(MaxMyGID)),ierr);
  EPETRA_TEST_ERR(Map.MyLID(Map.LID(MaxMyGID+1)),ierr);

  // Check RemoteIDList function
  // Get some GIDs off of each processor to test
  int TotalNumEle, NumElePerProc, NumProc = Comm.NumProc();
  int MinNumEleOnProc;
  int NumMyEle=Map.NumMyElements();
  Comm.MinAll(&NumMyEle,&MinNumEleOnProc,1);
  if (MinNumEleOnProc > 5) NumElePerProc = 6;
  else NumElePerProc = MinNumEleOnProc;
  if (NumElePerProc > 0) {
    TotalNumEle = NumElePerProc*NumProc;
    int * MyGIDlist = new int[NumElePerProc];
    int * GIDlist = new int[TotalNumEle];
    int * PIDlist = new int[TotalNumEle];
    int * LIDlist = new int[TotalNumEle];
    for (i=0; i<NumElePerProc; i++)
	  MyGIDlist[i] = MyGlobalElements1[i];
    Comm.GatherAll(MyGIDlist,GIDlist,NumElePerProc);// Get a few values from each proc
    Map.RemoteIDList(TotalNumEle, GIDlist, PIDlist, LIDlist);
    int MyPID= Comm.MyPID();

    forierr = 0;
    for (i=0; i<TotalNumEle; i++) {
      if (Map.MyGID(GIDlist[i])) {
	forierr += PIDlist[i] != MyPID;
	forierr += !Map.MyLID(Map.LID(GIDlist[i])) || Map.LID(GIDlist[i]) != LIDlist[i] || Map.GID(LIDlist[i]) != GIDlist[i];
      }
      else {
	forierr += PIDlist[i] == MyPID; // If MyGID comes back false, the PID listed should be that of another proc
      }
    }
    EPETRA_TEST_ERR(forierr,ierr);

    delete [] MyGIDlist;
    delete [] GIDlist;
    delete [] PIDlist;
    delete [] LIDlist;
  }

  delete [] MyGlobalElements1;

  // Check RemoteIDList function (assumes all maps are linear, even if not stored that way)

  if (Map.LinearMap()) {

    int * GIDList = new int[3];
    int * PIDList = new int[3];
    int * LIDList = new int[3];
    int MyPID = Map.Comm().MyPID();
  
    int NumIDs = 0;
    //GIDList[NumIDs++] = Map.MaxAllGID()+1; // Should return -1 for both PID and LID
    if (Map.MinMyGID()-1>=Map.MinAllGID()) GIDList[NumIDs++] = Map.MinMyGID()-1;
    if (Map.MaxMyGID()+1<=Map.MaxAllGID()) GIDList[NumIDs++] = Map.MaxMyGID()+1;

    Map.RemoteIDList(NumIDs, GIDList, PIDList, LIDList);

    NumIDs = 0;

    //EPETRA_TEST_ERR(!(PIDList[NumIDs]==-1),ierr);
    //EPETRA_TEST_ERR(!(LIDList[NumIDs++]==-1),ierr);

    if (Map.MinMyGID()-1>=Map.MinAllGID()) EPETRA_TEST_ERR(!(PIDList[NumIDs++]==MyPID-1),ierr);
    if (Map.MaxMyGID()+1<=Map.MaxAllGID()) EPETRA_TEST_ERR(!(PIDList[NumIDs]==MyPID+1),ierr);
    if (Map.MaxMyGID()+1<=Map.MaxAllGID()) EPETRA_TEST_ERR(!(LIDList[NumIDs++]==0),ierr);

    delete [] GIDList;
    delete [] PIDList;
    delete [] LIDList;

  }
  
  return (ierr);
}
Пример #17
0
int Drumm1(const Epetra_Map& map, bool verbose)
{
  (void)verbose;
  //Simple 2-element problem (element as in "finite-element") from
  //Clif Drumm. Two triangular elements, one per processor, as shown
  //here:
  //
  //   *----*
  //  3|\  2|
  //   | \  |
  //   | 0\1|
  //   |   \|
  //   *----*
  //  0    1
  //
  //Element 0 on processor 0, element 1 on processor 1.
  //Processor 0 will own nodes 0,1 and processor 1 will own nodes 2,3.
  //Each processor will pass a 3x3 element-matrix to Epetra_FECrsMatrix.
  //After GlobalAssemble(), the matrix should be as follows:
  //
  //         row 0: 2  1  0  1
  //proc 0   row 1: 1  4  1  2
  //----------------------------------
  //         row 2: 0  1  2  1
  //proc 1   row 3: 1  2  1  4
  //

  int numProcs = map.Comm().NumProc();
  int localProc = map.Comm().MyPID();

  if (numProcs != 2) return(0);

  //so first we'll set up a epetra_test::matrix_data object with
  //contents that match the above-described matrix. (but the
  //matrix_data object will have all 4 rows on each processor)

  int i;
  int rowlengths[4];
  rowlengths[0] = 3;
  rowlengths[1] = 4;
  rowlengths[2] = 3;
  rowlengths[3] = 4;

  epetra_test::matrix_data matdata(4, rowlengths);
  for(i=0; i<4; ++i) {
    for(int j=0; j<matdata.rowlengths()[i]; ++j) {
      matdata.colindices()[i][j] = j;
    }
  }

  matdata.colindices()[0][2] = 3;

  matdata.colindices()[2][0] = 1;
  matdata.colindices()[2][1] = 2;
  matdata.colindices()[2][2] = 3;

  double** coefs = matdata.coefs();
  coefs[0][0] = 2.0; coefs[0][1] = 1.0;                    coefs[0][2] = 1.0;
  coefs[1][0] = 1.0; coefs[1][1] = 4.0; coefs[1][2] = 1.0; coefs[1][3] = 2.0;
                     coefs[2][0] = 1.0; coefs[2][1] = 2.0; coefs[2][2] = 1.0;
  coefs[3][0] = 1.0; coefs[3][1] = 2.0; coefs[3][2] = 1.0; coefs[3][3] = 4.0;

  //now we'll load a Epetra_FECrsMatrix with data that matches the
  //above-described finite-element problem.

  int indexBase = 0, ierr = 0;
  int myNodes[4];
  double values[9];
  values[0] = 2.0;
  values[1] = 1.0;
  values[2] = 1.0;
  values[3] = 1.0;
  values[4] = 2.0;
  values[5] = 1.0;
  values[6] = 1.0;
  values[7] = 1.0;
  values[8] = 2.0;

  int numMyNodes = 2;

  if (localProc == 0) {
    myNodes[0] = 0;
    myNodes[1] = 1;
  }
  else {
    myNodes[0] = 2;
    myNodes[1] = 3;
  }

  Epetra_Map Map(-1, numMyNodes, myNodes, indexBase, map.Comm());

  numMyNodes = 3;

  if (localProc == 0) {
    myNodes[0] = 0;
    myNodes[1] = 1;
    myNodes[2] = 3;
  }
  else {
    myNodes[0] = 1;
    myNodes[1] = 2;
    myNodes[2] = 3;
  }

  int rowLengths = 3;
  Epetra_FECrsMatrix A(Copy, Map, rowLengths);

  EPETRA_TEST_ERR( A.InsertGlobalValues(numMyNodes, myNodes,
                                        numMyNodes, myNodes, values,
                                        Epetra_FECrsMatrix::ROW_MAJOR),ierr);

  EPETRA_TEST_ERR( A.GlobalAssemble(), ierr );
  EPETRA_TEST_ERR( A.GlobalAssemble(), ierr );

  //now the test is to check whether the FECrsMatrix data matches the
  //epetra_test::matrix_data object...

  bool the_same = matdata.compare_local_data(A);

  if (!the_same) {
    return(-1);
  }

  return(0);
}
int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm( MPI_COMM_WORLD );
#else
  Epetra_SerialComm Comm;
#endif
  // My MPI process rank.                                                                                                                                                                  
  const int MyPID = Comm.MyPID();

  // "/Users/sakashitatatsuya/Downloads/barista_trunk_slepc/sample/hamiltonian_matrix.ip"
  std::ifstream  ifs(argv[1]);
  alps::Parameters params(ifs);
  Teuchos::oblackholestream blackHole;
  std::ostream& out = (MyPID == 0) ? std::cout : blackHole;

  barista::Hamiltonian<> hamiltonian(params);
  matrix_type matrix(hamiltonian.dimension(), hamiltonian.dimension());
  hamiltonian.fill<double>(matrix);
  int m,n;
  int N;
  m = n = N = hamiltonian.dimension();
  //std::cout << matrix << std::endl;

  std::ofstream ofs;
  if (MyPID==0) {
    ofs.open("anasazi_time.txt");
    if (!ofs) {
#ifdef HAVE_MPI
      MPI_Finalize() ;
#endif
      return -1;
    }
  }

  //Teuchos::ParameterList GaleriList;
  using Teuchos::RCP;
  using Teuchos::rcp;
  typedef Teuchos::ScalarTraits<double> STS;

  const double one = STS::one();
  const double zero = STS::zero();

  // The problem is defined on a 2D grid, global size is nx * nx.
  //int nx = N; 
  //GaleriList.set("n", nx * nx);
  //GaleriList.set("nx", nx);
  //GaleriList.set("ny", nx);
  //Teuchos::RCP<Epetra_Map> Map = Teuchos::rcp( Galeri::CreateMap("Linear", Comm, GaleriList) );
  //Teuchos::RCP<Epetra_RowMatrix> A = Teuchos::rcp( Galeri::CreateCrsMatrix("Laplace2D", &*Map, GaleriList) );

  // Construct a Map that puts approximately the same number of rows
  // of the matrix A on each processor.
  Epetra_Map RowMap (N, 0, Comm);
  Epetra_Map ColMap (N, 0, Comm);
  // Get update list and number of local equations from newly created Map.
  const int NumMyRowElements = RowMap.NumMyElements ();
  std::vector<int> MyGlobalRowElements (NumMyRowElements);
  RowMap.MyGlobalElements (&MyGlobalRowElements[0]);


  // Create an Epetra_CrsMatrix using the given row map.                                                                                                                                   
  RCP<Epetra_CrsMatrix> A = rcp (new Epetra_CrsMatrix (Copy, RowMap, n));

  // We use info to catch any errors that may have happened during                                                                                                                           // matrix assembly, and report them globally.  We do this so that                                                                                                                          // the MPI processes won't call FillComplete() unless they all                                                                                                                             // successfully filled their parts of the matrix.                                                                                                                                         
  int info = 0;
  try {
    //                                                                                                                                                                                     
    // Compute coefficients for the discrete integral operator.                                                                                                                           
    //                                                                                                                                                                                      
    std::vector<double> Values (n);
    std::vector<int> Indices (n);
    //const double inv_mp1 = one / (m+1);
    //const double inv_np1 = one / (n+1);
    int count;
    //for (int i = 0; i < n; ++i) {
    //  Indices[i] = i;
    //}
    for (int i = 0; i < NumMyRowElements; ++i) {
      count =0;
      for (int j = 0; j < n; ++j) {
	if (matrix(MyGlobalRowElements[i],j)!=0) {
	  Values[count] = matrix(MyGlobalRowElements[i],j);
	  Indices[count] = j;
	  count++;
	}
      }

      info = A->InsertGlobalValues (MyGlobalRowElements[i], count,
                                    &Values[0], &Indices[0]);
      // Make sure that the insertion succeeded.  Teuchos'                                                                                                                                 
      // TEST_FOR_EXCEPTION macro gives a nice error message if the                                                                                                                        
      // thrown exception isn't caught.  We'll report this on the                                                                                                                          
      // offending MPI process.                                                                                                                                                             
      /*                                                                                                                     
      TEST_FOR_EXCEPTION( info != 0, std::runtime_error, "Failed to insert n="                                                                                                             
      << n << " global value" << (n != 1 ? "s" : "")                                                                                                                    
      << " in row " << MyGlobalRowElements[i]                                                                                                                           
      << " of the matrix." );                                                                                                                                           
      */
    } // for i = 0...                                                                                                                                                                       
    // Call FillComplete on the matrix.  Since the matrix isn't square,                                                                                                                    
    // we have to give FillComplete the domain and range maps, which in                                                                                                                    
    // this case are the column resp. row maps.                                                                                                                                             
    info = A->FillComplete (ColMap, RowMap);
    /*                                                                                                                                                                                     
    TEST_FOR_EXCEPTION( info != 0, std::runtime_error,                                                                                                                                     
    "FillComplete failed with INFO = " << info << ".");                                                                                                                 
    */
    info = A->OptimizeStorage();
    /*                                                                                                                                                                                     
    TEST_FOR_EXCEPTION( info != 0, std::runtime_error,                                                                                                                                
    "OptimizeStorage failed with INFO = " << info << ".");                                                                                                              
    */
  } catch (std::runtime_error& e) {
    // If multiple MPI processes are reporting errors, sometimes                                                                                                                           
    // forming the error message as a string and then writing it to                                                                                                                        
    // the output stream prevents messages from different processes                                                                                                                        
    // from being interleaved.                                                                                                                                                              
    std::ostringstream os;
    os << "*** Error on MPI process " << MyPID << ": " << e.what();
    cerr << os.str() << endl;
    if (info == 0)
      info = -1; // All procs will share info later on.                                                                                                                                     
  }

  //  Variables used for the Block Davidson Method
  const int    nev         = 5;
  const int    blockSize   = 5;
  const int    numBlocks   = 8;
  const int    maxRestarts = 500;
  const double tol         = 1.0e-8;

  typedef Epetra_MultiVector MV;
  typedef Epetra_Operator OP;
  typedef Anasazi::MultiVecTraits<double, Epetra_MultiVector> MVT;

  // Create an Epetra_MultiVector for an initial vector to start the solver.
  // Note:  This needs to have the same number of columns as the blocksize.
  //
  //Teuchos::RCP<Epetra_MultiVector> ivec = Teuchos::rcp( new Epetra_MultiVector(*Map, blockSize) );
  Teuchos::RCP<Epetra_MultiVector> ivec = Teuchos::rcp( new Epetra_MultiVector(ColMap, blockSize) );
  ivec->Random();

  // Create the eigenproblem.
  Teuchos::RCP<Anasazi::BasicEigenproblem<double, MV, OP> > problem =
    Teuchos::rcp( new Anasazi::BasicEigenproblem<double, MV, OP>(A, ivec) );

  // Inform the eigenproblem that the operator A is symmetric
  problem->setHermitian(true);

  // Set the number of eigenvalues requested
  problem->setNEV( nev );

  // Inform the eigenproblem that you are finishing passing it information
  bool boolret = problem->setProblem();
  if (boolret != true) {
    std::cout<<"Anasazi::BasicEigenproblem::setProblem() returned an error." << std::endl;
#ifdef HAVE_MPI
    MPI_Finalize();
#endif
    return -1;
  }

  // Create parameter list to pass into the solver manager
  Teuchos::ParameterList anasaziPL;
  anasaziPL.set( "Which", "LM" );
  anasaziPL.set( "Block Size", blockSize );
  anasaziPL.set( "Maximum Iterations", 500 );
  anasaziPL.set( "Convergence Tolerance", tol );
  anasaziPL.set( "Verbosity", Anasazi::Errors+Anasazi::Warnings+Anasazi::TimingDetails+Anasazi::FinalSummary );

  // Create the solver manager
  Anasazi::LOBPCGSolMgr<double, MV, OP> anasaziSolver(problem, anasaziPL);

  // Solve the problem
  double start, end;
  MPI_Barrier(MPI_COMM_WORLD);
  start = MPI_Wtime();
  Anasazi::ReturnType returnCode = anasaziSolver.solve();
  MPI_Barrier(MPI_COMM_WORLD);
  end = MPI_Wtime();

  // Get the eigenvalues and eigenvectors from the eigenproblem
  Anasazi::Eigensolution<double,MV> sol = problem->getSolution();
  std::vector<Anasazi::Value<double> > evals = sol.Evals;
  Teuchos::RCP<MV> evecs = sol.Evecs;

  // Compute residuals.
  std::vector<double> normR(sol.numVecs);
  Teuchos::SerialDenseMatrix<int,double> T(sol.numVecs, sol.numVecs);
  Epetra_MultiVector tempAevec( ColMap, sol.numVecs );
  T.putScalar(0.0); 
  for (int i=0; i<sol.numVecs; i++) {
    T(i,i) = evals[i].realpart;
  }
  A->Apply( *evecs, tempAevec );
  MVT::MvTimesMatAddMv( -1.0, *evecs, T, 1.0, tempAevec );
  MVT::MvNorm( tempAevec, normR );

  if (MyPID == 0) {
  // Print the results
  std::cout<<"Solver manager returned " << (returnCode == Anasazi::Converged ? "converged." : "unconverged.") << std::endl;
  std::cout<<std::endl;
  std::cout<<"------------------------------------------------------"<<std::endl;
  std::cout<<std::setw(16)<<"Eigenvalue"
           <<std::setw(18)<<"Direct Residual"
           <<std::endl;
  std::cout<<"------------------------------------------------------"<<std::endl;
  for (int i=0; i<sol.numVecs; i++) {
    std::cout<<std::setw(16)<<evals[i].realpart
             <<std::setw(18)<<normR[i]/evals[i].realpart
             <<std::endl;
  }
  std::cout<<"------------------------------------------------------"<<std::endl;
  }

  // Print out the map and matrices
  //ColMap.Print (out);

  //A->Print (cout);
  //RowMap.Print (cout);

  double time;
  int iter;
  if (MyPID==0) {
    iter = anasaziSolver.getNumIters();
    Teuchos::Array<Teuchos::RCP<Teuchos::Time> > timer = anasaziSolver.getTimers();
    Teuchos::RCP<Teuchos::Time> _timerSolve = timer[0];
    cout << "timerSolve=" << _timerSolve << endl;
    time = end - start;
    cout << "time=" << time << endl;
    ofs << "time=" << time << endl;
    cout << "iter=" << iter << endl;
    ofs << "iter=" << iter << endl;
  }

#ifdef HAVE_MPI
  MPI_Finalize() ; 
#endif

  return 0;
}
Пример #19
0
int main(int argc, char *argv[]) {

#ifdef EPETRA_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm (MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  cout << Comm << endl;

  int MyPID = Comm.MyPID();

  bool verbose = false;
  bool verbose1 = true;
  if (MyPID==0) verbose = true;

  if(argc < 2 && verbose) {
    cerr << "Usage: " << argv[0] 
	 << " HB_filename [level_fill [level_overlap [absolute_threshold [ relative_threshold]]]]" << endl
	 << "where:" << endl
	 << "HB_filename        - filename and path of a Harwell-Boeing data set" << endl
	 << "level_fill         - The amount of fill to use for ILU(k) preconditioner (default 0)" << endl
	 << "level_overlap      - The amount of overlap used for overlapping Schwarz subdomains (default 0)" << endl
	 << "absolute_threshold - The minimum value to place on the diagonal prior to factorization (default 0.0)" << endl
	 << "relative_threshold - The relative amount to perturb the diagonal prior to factorization (default 1.0)" << endl << endl
	 << "To specify a non-default value for one of these parameters, you must specify all" << endl
	 << " preceding values but not any subsequent parameters. Example:" << endl
	 << "ifpackHpcSerialMsr.exe mymatrix.hpc 1  - loads mymatrix.hpc, uses level fill of one, all other values are defaults" << endl
	 << endl;
    return(1);

  }

  // Uncomment the next three lines to debug in mpi mode
  //int tmp;
  //if (MyPID==0) cin >> tmp;
  //Comm.Barrier();

  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
   
  // Call routine to read in HB problem
  Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact);

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);

  // Create Exporter to distribute read-in matrix and vectors

  Epetra_Export exporter(*readMap, map);
  Epetra_CrsMatrix A(Copy, map, 0);
  Epetra_Vector x(map);
  Epetra_Vector b(map);
  Epetra_Vector xexact(map);

  Epetra_Time FillTimer(Comm);
  x.Export(*readx, exporter, Add);
  b.Export(*readb, exporter, Add);
  xexact.Export(*readxexact, exporter, Add);
  Comm.Barrier();
  double vectorRedistributeTime = FillTimer.ElapsedTime();
  A.Export(*readA, exporter, Add);
  Comm.Barrier();
  double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime;
  assert(A.FillComplete()==0);    
  Comm.Barrier();
  double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime;
  if (Comm.MyPID()==0)	{
    cout << "\n\n****************************************************" << endl;
    cout << "\n Vector redistribute  time (sec) = " << vectorRedistributeTime<< endl;
    cout << "    Matrix redistribute time (sec) = " << matrixRedistributeTime << endl;
    cout << "    Transform to Local  time (sec) = " << fillCompleteTime << endl<< endl;
  }
  Epetra_Vector tmp1(*readMap);
  Epetra_Vector tmp2(map);
  readA->Multiply(false, *readxexact, tmp1);

  A.Multiply(false, xexact, tmp2);
  double residual;
  tmp1.Norm2(&residual);
  if (verbose) cout << "Norm of Ax from file            = " << residual << endl;
  tmp2.Norm2(&residual);
  if (verbose) cout << "Norm of Ax after redistribution = " << residual << endl << endl << endl;

  //cout << "A from file = " << *readA << endl << endl << endl;

  //cout << "A after dist = " << A << endl << endl << endl;

  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;

  Comm.Barrier();

  bool smallProblem = false;
  if (A.RowMap().NumGlobalElements()<100) smallProblem = true;

  if (smallProblem)
    cout << "Original Matrix = " << endl << A   << endl;

  x.PutScalar(0.0);

  Epetra_LinearProblem FullProblem(&A, &x, &b);
  double normb, norma;
  b.NormInf(&normb);
  norma = A.NormInf();
  if (verbose)
    cout << "Inf norm of Original Matrix = " << norma << endl
	 << "Inf norm of Original RHS    = " << normb << endl;
  
  Epetra_Time ReductionTimer(Comm);
  Epetra_CrsSingletonFilter SingletonFilter;
  Comm.Barrier();
  double reduceInitTime = ReductionTimer.ElapsedTime();
  SingletonFilter.Analyze(&A);
  Comm.Barrier();
  double reduceAnalyzeTime = ReductionTimer.ElapsedTime() - reduceInitTime;

  if (SingletonFilter.SingletonsDetected())
    cout << "Singletons found" << endl;
  else {
    cout << "Singletons not found" << endl;
    exit(1);
  }
  SingletonFilter.ConstructReducedProblem(&FullProblem);
  Comm.Barrier();
  double reduceConstructTime = ReductionTimer.ElapsedTime() - reduceInitTime;

  double totalReduceTime = ReductionTimer.ElapsedTime();

  if (verbose)
    cout << "\n\n****************************************************" << endl
	 << "    Reduction init  time (sec)           = " << reduceInitTime<< endl
	 << "    Reduction Analyze time (sec)         = " << reduceAnalyzeTime << endl
	 << "    Construct Reduced Problem time (sec) = " << reduceConstructTime << endl
	 << "    Reduction Total time (sec)           = " << totalReduceTime << endl<< endl;

  Statistics(SingletonFilter);

  Epetra_LinearProblem * ReducedProblem = SingletonFilter.ReducedProblem();

  Epetra_CrsMatrix * Ap = dynamic_cast<Epetra_CrsMatrix *>(ReducedProblem->GetMatrix());
  Epetra_Vector * bp = (*ReducedProblem->GetRHS())(0);
  Epetra_Vector * xp = (*ReducedProblem->GetLHS())(0);
  

  if (smallProblem)
    cout << " Reduced Matrix = " << endl << *Ap << endl
	 << " LHS before sol = " << endl << *xp << endl
	 << " RHS            = " << endl << *bp << endl;

  // Construct ILU preconditioner

  double elapsed_time, total_flops, MFLOPs;
  Epetra_Time timer(Comm);

  int LevelFill = 0;
  if (argc > 2)  LevelFill = atoi(argv[2]);
  if (verbose) cout << "Using Level Fill = " << LevelFill << endl;
  int Overlap = 0;
  if (argc > 3) Overlap = atoi(argv[3]);
  if (verbose) cout << "Using Level Overlap = " << Overlap << endl;
  double Athresh = 0.0;
  if (argc > 4) Athresh = atof(argv[4]);
  if (verbose) cout << "Using Absolute Threshold Value of = " << Athresh << endl;

  double Rthresh = 1.0;
  if (argc > 5) Rthresh = atof(argv[5]);
  if (verbose) cout << "Using Relative Threshold Value of = " << Rthresh << endl;

  Ifpack_IlukGraph * IlukGraph = 0;
  Ifpack_CrsRiluk * ILUK = 0;

  if (LevelFill>-1) {
    elapsed_time = timer.ElapsedTime();
    IlukGraph = new Ifpack_IlukGraph(Ap->Graph(), LevelFill, Overlap);
    assert(IlukGraph->ConstructFilledGraph()==0);
    elapsed_time = timer.ElapsedTime() - elapsed_time;
    if (verbose) cout << "Time to construct ILUK graph = " << elapsed_time << endl;


    Epetra_Flops fact_counter;
  
    elapsed_time = timer.ElapsedTime();
    ILUK = new Ifpack_CrsRiluk(*IlukGraph);
    ILUK->SetFlopCounter(fact_counter);
    ILUK->SetAbsoluteThreshold(Athresh);
    ILUK->SetRelativeThreshold(Rthresh);
    //assert(ILUK->InitValues()==0);
    int initerr = ILUK->InitValues(*Ap);
    if (initerr!=0) {
      cout << endl << Comm << endl << "  InitValues error = " << initerr;
      if (initerr==1) cout << "  Zero diagonal found, warning error only";
      cout << endl << endl;
    }
    assert(ILUK->Factor()==0);
    elapsed_time = timer.ElapsedTime() - elapsed_time;
    total_flops = ILUK->Flops();
    MFLOPs = total_flops/elapsed_time/1000000.0;
    if (verbose) cout << "Time to compute preconditioner values = " 
		    << elapsed_time << endl
		    << "MFLOPS for Factorization = " << MFLOPs << endl;
    //cout << *ILUK << endl;
  double Condest;
  ILUK->Condest(false, Condest);

  if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl;
  }
  int Maxiter = 100;
  double Tolerance = 1.0E-8;

  Epetra_Flops counter;
  Ap->SetFlopCounter(counter);
  xp->SetFlopCounter(*Ap);
  bp->SetFlopCounter(*Ap);
  if (ILUK!=0) ILUK->SetFlopCounter(*Ap);

  elapsed_time = timer.ElapsedTime();

  double normreducedb, normreduceda;
  bp->NormInf(&normreducedb);
  normreduceda = Ap->NormInf();
  if (verbose) 
    cout << "Inf norm of Reduced Matrix = " << normreduceda << endl
	 << "Inf norm of Reduced RHS    = " << normreducedb << endl;

  BiCGSTAB(*Ap, *xp, *bp, ILUK, Maxiter, Tolerance, &residual, verbose);

  elapsed_time = timer.ElapsedTime() - elapsed_time;
  total_flops = counter.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;
  if (verbose) cout << "Time to compute solution = " 
		    << elapsed_time << endl
		    << "Number of operations in solve = " << total_flops << endl
		    << "MFLOPS for Solve = " << MFLOPs<< endl << endl;

  SingletonFilter.ComputeFullSolution();

  if (smallProblem)
  cout << " Reduced LHS after sol = " << endl << *xp << endl
       << " Full    LHS after sol = " << endl << x << endl
       << " Full  Exact LHS         = " << endl << xexact << endl;

  Epetra_Vector resid(x);

  resid.Update(1.0, x, -1.0, xexact, 0.0); // resid = xcomp - xexact

  resid.Norm2(&residual);
  double normx, normxexact;
  x.Norm2(&normx);
  xexact.Norm2(&normxexact);

  if (verbose) 
    cout << "2-norm of computed solution                               = " << normx << endl
	 << "2-norm of exact solution                                  = " << normxexact << endl
	 << "2-norm of difference between computed and exact solution  = " << residual << endl;
    
  if (verbose1 && residual>1.0e-5) {
    if (verbose)
      cout << "Difference between computed and exact solution appears large..." << endl
	   << "Computing norm of A times this difference.  If this norm is small, then matrix is singular"
	   << endl;
    Epetra_Vector bdiff(b);
    assert(A.Multiply(false, resid, bdiff)==0);
    assert(bdiff.Norm2(&residual)==0);
    if (verbose) 
      cout << "2-norm of A times difference between computed and exact solution  = " << residual << endl;
    
  }
  if (verbose) 
    cout << "********************************************************" << endl
	 << "              Solving again with 2*Ax=2*b" << endl
	 << "********************************************************" << endl;

  A.Scale(1.0); // A = 2*A
  b.Scale(1.0); // b = 2*b
  x.PutScalar(0.0);
  b.NormInf(&normb);
  norma = A.NormInf();
  if (verbose)
    cout << "Inf norm of Original Matrix = " << norma << endl
	 << "Inf norm of Original RHS    = " << normb << endl;
  double updateReducedProblemTime = ReductionTimer.ElapsedTime();
  SingletonFilter.UpdateReducedProblem(&FullProblem);
  Comm.Barrier();
  updateReducedProblemTime = ReductionTimer.ElapsedTime() - updateReducedProblemTime;
  if (verbose)
    cout << "\n\n****************************************************" << endl
	 << "    Update Reduced Problem time (sec)           = " << updateReducedProblemTime<< endl
	 << "****************************************************" << endl;
  Statistics(SingletonFilter);

  if (LevelFill>-1) {

    Epetra_Flops fact_counter;
  
    elapsed_time = timer.ElapsedTime();

    int initerr = ILUK->InitValues(*Ap);
    if (initerr!=0) {
      cout << endl << Comm << endl << "  InitValues error = " << initerr;
      if (initerr==1) cout << "  Zero diagonal found, warning error only";
      cout << endl << endl;
    }
    assert(ILUK->Factor()==0);
    elapsed_time = timer.ElapsedTime() - elapsed_time;
    total_flops = ILUK->Flops();
    MFLOPs = total_flops/elapsed_time/1000000.0;
    if (verbose) cout << "Time to compute preconditioner values = " 
		    << elapsed_time << endl
		    << "MFLOPS for Factorization = " << MFLOPs << endl;
    double Condest;
    ILUK->Condest(false, Condest);
    
    if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl;
  }
  bp->NormInf(&normreducedb);
  normreduceda = Ap->NormInf();
  if (verbose) 
    cout << "Inf norm of Reduced Matrix = " << normreduceda << endl
	 << "Inf norm of Reduced RHS    = " << normreducedb << endl;

  BiCGSTAB(*Ap, *xp, *bp, ILUK, Maxiter, Tolerance, &residual, verbose);

  elapsed_time = timer.ElapsedTime() - elapsed_time;
  total_flops = counter.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;
  if (verbose) cout << "Time to compute solution = " 
		    << elapsed_time << endl
		    << "Number of operations in solve = " << total_flops << endl
		    << "MFLOPS for Solve = " << MFLOPs<< endl << endl;

  SingletonFilter.ComputeFullSolution();

  if (smallProblem)
  cout << " Reduced LHS after sol = " << endl << *xp << endl
       << " Full    LHS after sol = " << endl << x << endl
       << " Full  Exact LHS         = " << endl << xexact << endl;

  resid.Update(1.0, x, -1.0, xexact, 0.0); // resid = xcomp - xexact

  resid.Norm2(&residual);
  x.Norm2(&normx);
  xexact.Norm2(&normxexact);

  if (verbose) 
    cout << "2-norm of computed solution                               = " << normx << endl
	 << "2-norm of exact solution                                  = " << normxexact << endl
	 << "2-norm of difference between computed and exact solution  = " << residual << endl;
    
  if (verbose1 && residual>1.0e-5) {
    if (verbose)
      cout << "Difference between computed and exact solution appears large..." << endl
	   << "Computing norm of A times this difference.  If this norm is small, then matrix is singular"
	   << endl;
    Epetra_Vector bdiff(b);
    assert(A.Multiply(false, resid, bdiff)==0);
    assert(bdiff.Norm2(&residual)==0);
    if (verbose) 
      cout << "2-norm of A times difference between computed and exact solution  = " << residual << endl;
    
  }
 


  if (ILUK!=0) delete ILUK;
  if (IlukGraph!=0) delete IlukGraph;
				       
#ifdef EPETRA_MPI
  MPI_Finalize() ;
#endif

return 0 ;
}
Пример #20
0
int 
main (int argc, char *argv[])
{
  using Teuchos::ArrayRCP;
  using Teuchos::ArrayView;
  using Teuchos::Comm;
  using Teuchos::CommandLineProcessor;
  using Teuchos::FancyOStream;
  using Teuchos::getFancyOStream;
  using Teuchos::OSTab;
  using Teuchos::ptr;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using std::cout;
  using std::endl;

  bool success = true; // May be changed by tests

  Teuchos::oblackholestream blackHole;
  //Teuchos::GlobalMPISession (&argc, &argv, &blackHole);
  MPI_Init (&argc, &argv);

  //
  // Construct communicators, and verify that we are on 4 processors.
  //

  // Construct a Teuchos Comm object.
  RCP<const Comm<int> > teuchosComm = Teuchos::DefaultComm<int>::getComm();
  const int numProcs = teuchosComm->getSize();
  const int pid = teuchosComm->getRank();
  RCP<FancyOStream> pOut = 
    getFancyOStream (rcpFromRef ((pid == 0) ? std::cout : blackHole));
  FancyOStream& out = *pOut;
  // Verify that we are on four processors (which manifests the bug).
  if (teuchosComm->getSize() != 4) {
    out << "This test must be run on four processors.  Exiting ..." << endl;
    return EXIT_FAILURE;
  }

  // We also need an Epetra Comm, so that we can compare Tpetra and
  // Epetra results.
  Epetra_MpiComm epetraComm (MPI_COMM_WORLD);

  //
  // Default values of command-line options.
  //
  bool verbose = false;
  bool printEpetra = false;
  bool printTpetra = false;
  CommandLineProcessor cmdp (false,true);
  //
  // Set command-line options.
  //
  cmdp.setOption ("verbose", "quiet", &verbose, "Print verbose output.");
  // Epetra and Tpetra output will ask the Maps and Import objects to
  // print themselves in distributed, maximally verbose fashion.  It's
  // best to turn on either Epetra or Tpetra, but not both.  Then you
  // can compare their output side by side.
  cmdp.setOption ("printEpetra", "dontPrintEpetra", &printEpetra, 
		  "Print Epetra output (in verbose mode only).");
  cmdp.setOption ("printTpetra", "dontPrintTpetra", &printTpetra, 
		  "Print Tpetra output (in verbose mode only).");
  // Parse command-line options.
  if (cmdp.parse (argc,argv) != CommandLineProcessor::PARSE_SUCCESSFUL) {
    out << "End Result: TEST FAILED" << endl;
    MPI_Finalize ();
    return EXIT_FAILURE;
  }

  if (verbose) {
    out << "Running test on " << numProcs << " process" 
	<< (numProcs != 1 ? "es" : "") << "." << endl;
  }

  // The maps for this problem are derived from a 3D structured mesh.
  // In this example, the dimensions are 4x4x2 and there are 2
  // processors assigned to the first dimension and 2 processors
  // assigned to the second dimension, with no parallel decomposition
  // along the third dimension.  The "owned" arrays represent the
  // one-to-one map, with each array representing a 2x2x2 slice.  If
  // DIMENSIONS == 2, then only the first 4 values will be used,
  // representing a 2x2(x1) slice.
  int owned0[8] = { 0, 1, 4, 5,16,17,20,21};
  int owned1[8] = { 2, 3, 6, 7,18,19,22,23};
  int owned2[8] = { 8, 9,12,13,24,25,28,29};
  int owned3[8] = {10,11,14,15,26,27,30,31};

  // The "overlap" arrays represent the map with communication
  // elements, with each array representing a 3x3x2 slice.  If
  // DIMENSIONS == 2, then only the first 9 values will be used,
  // representing a 3x3(x1) slice.
  int overlap0[18] = {0,1,2,4, 5, 6, 8, 9,10,16,17,18,20,21,22,24,25,26};
  int overlap1[18] = {1,2,3,5, 6, 7, 9,10,11,17,18,19,21,22,23,25,26,27};
  int overlap2[18] = {4,5,6,8, 9,10,12,13,14,20,21,22,24,25,26,28,29,30};
  int overlap3[18] = {5,6,7,9,10,11,13,14,15,21,22,23,25,26,27,29,30,31};

  // Construct the owned and overlap maps for both Epetra and Tpetra.
  int* owned;
  int* overlap;
  if (pid == 0) {
    owned   = owned0;
    overlap = overlap0;
  }
  else if (pid == 1) {
    owned   = owned1;
    overlap = overlap1;
  }
  else if (pid == 2) {
    owned   = owned2;
    overlap = overlap2;
  }
  else {
    owned   = owned3;
    overlap = overlap3;
  }

#if DIMENSIONS == 2
  int ownedSize   = 4;
  int overlapSize = 9;
#elif DIMENSIONS == 3
  int ownedSize   =  8;
  int overlapSize = 18;
#endif

  // Create the two Epetra Maps.  Source for the Import is the owned
  // map; target for the Import is the overlap map.
  Epetra_Map epetraOwnedMap (  -1, ownedSize,   owned,   0, epetraComm);
  Epetra_Map epetraOverlapMap (-1, overlapSize, overlap, 0, epetraComm);

  if (verbose && printEpetra) {
    // Have the Epetra_Map objects describe themselves.
    //
    // Epetra_BlockMap::Print() takes an std::ostream&, and expects
    // all MPI processes to be able to write to it.  (The method
    // handles its own synchronization.)
    out << "Epetra owned map:" << endl;
    epetraOwnedMap.Print (std::cout);
    out << "Epetra overlap map:" << endl;
    epetraOverlapMap.Print (std::cout);
  }

  // Create the two Tpetra Maps.  The "invalid" global element count
  // input tells Tpetra::Map to compute the global number of elements
  // itself.
  const int invalid = Teuchos::OrdinalTraits<int>::invalid();
  RCP<Tpetra::Map<int> > tpetraOwnedMap = 
    rcp (new Tpetra::Map<int> (invalid, ArrayView<int> (owned, ownedSize), 
			       0, teuchosComm));
  tpetraOwnedMap->setObjectLabel ("Owned Map");
  RCP<Tpetra::Map<int> > tpetraOverlapMap =
    rcp (new Tpetra::Map<int> (invalid, ArrayView<int> (overlap, overlapSize),
			       0, teuchosComm));
  tpetraOverlapMap->setObjectLabel ("Overlap Map");

  // In verbose mode, have the Tpetra::Map objects describe themselves.
  if (verbose && printTpetra) {
    Teuchos::EVerbosityLevel verb = Teuchos::VERB_EXTREME;

    // Tpetra::Map::describe() takes a FancyOStream, but expects all
    // MPI processes to be able to write to it.  (The method handles
    // its own synchronization.)
    RCP<FancyOStream> globalOut = getFancyOStream (rcpFromRef (std::cout));
    out << "Tpetra owned map:" << endl;
    {
      OSTab tab (globalOut);
      tpetraOwnedMap->describe (*globalOut, verb);
    }
    out << "Tpetra overlap map:" << endl;
    {
      OSTab tab (globalOut);
      tpetraOverlapMap->describe (*globalOut, verb);
    }
  }

  // Use the owned and overlap maps to construct an importer for both
  // Epetra and Tpetra.
  Epetra_Import       epetraImporter (epetraOverlapMap, epetraOwnedMap  );
  Tpetra::Import<int> tpetraImporter (tpetraOwnedMap  , tpetraOverlapMap);

  // In verbose mode, have the Epetra_Import object describe itself.
  if (verbose && printEpetra) {
    out << "Epetra importer:" << endl;
    // The importer's Print() method takes an std::ostream& and plans
    // to write to it on all MPI processes (handling synchronization
    // itself).
    epetraImporter.Print (std::cout);
    out << endl;
  }

  // In verbose mode, have the Tpetra::Import object describe itself.
  if (verbose && printTpetra) {
    out << "Tpetra importer:" << endl;
    // The importer doesn't implement Teuchos::Describable.  It wants
    // std::cout and plans to write to it on all MPI processes (with
    // its own synchronization).
    tpetraImporter.print (std::cout);
    out << endl;
  }

  // Construct owned and overlap vectors for both Epetra and Tpetra.
  Epetra_Vector epetraOwnedVector   (epetraOwnedMap  );
  Epetra_Vector epetraOverlapVector (epetraOverlapMap);
  Tpetra::Vector<double,int> tpetraOwnedVector   (tpetraOwnedMap  );
  Tpetra::Vector<double,int> tpetraOverlapVector (tpetraOverlapMap);

  // The test is as follows: initialize the owned and overlap vectors
  // with global IDs in the owned regions.  Initialize the overlap
  // vectors to equal -1 in the overlap regions.  Then perform a
  // communication from the owned vectors to the overlap vectors.  The
  // resulting overlap vectors should have global IDs everywhere and
  // all of the -1 values should be overwritten.

  // Initialize.  We cannot assign directly to the Tpetra Vectors;
  // instead, we extract nonconst views and assign to those.  The
  // results aren't guaranteed to be committed to the vector unless
  // the views are released (by assigning Teuchos::null to them).
  epetraOverlapVector.PutScalar(-1);
  tpetraOverlapVector.putScalar(-1);
  ArrayRCP<double> tpetraOwnedArray   = tpetraOwnedVector.getDataNonConst(0);
  ArrayRCP<double> tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0);
  for (int owned_lid = 0; 
       owned_lid < tpetraOwnedMap->getNodeElementList().size(); 
       ++owned_lid) {
    int gid         = tpetraOwnedMap->getGlobalElement(owned_lid);
    int overlap_lid = tpetraOverlapMap->getLocalElement(gid);
    epetraOwnedVector[owned_lid]     = gid;
    epetraOverlapVector[overlap_lid] = gid;
    tpetraOwnedArray[owned_lid]      = gid;
    tpetraOverlapArray[overlap_lid]  = gid;
  }
  // Make sure that the changes to the Tpetra Vector were committed,
  // by releasing the nonconst views.
  tpetraOwnedArray = Teuchos::null;
  tpetraOverlapArray = Teuchos::null;

  // Test the Epetra and Tpetra Import.
  if (verbose) {
    out << "Testing Import from owned Map to overlap Map:" << endl << endl;
  }
  epetraOverlapVector.Import(  epetraOwnedVector, epetraImporter, Insert);
  tpetraOverlapVector.doImport(tpetraOwnedVector, tpetraImporter, 
			       Tpetra::INSERT);
  // Check the Import results.
  success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, 
			   epetraOverlapMap, epetraOverlapVector, 
			   tpetraOwnedMap, tpetraOwnedVector, 
			   tpetraOverlapMap, tpetraOverlapVector, verbose);

  const bool testOtherDirections = false;
  if (testOtherDirections) {
    //
    // Reinitialize the Tpetra vectors and test whether Export works.
    //
    tpetraOverlapVector.putScalar(-1);
    tpetraOwnedArray   = tpetraOwnedVector.getDataNonConst(0);
    tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0);
    for (int owned_lid = 0; 
	 owned_lid < tpetraOwnedMap->getNodeElementList().size(); 
	 ++owned_lid) 
      {
	int gid         = tpetraOwnedMap->getGlobalElement(owned_lid);
	int overlap_lid = tpetraOverlapMap->getLocalElement(gid);
	tpetraOwnedArray[owned_lid]      = gid;
	tpetraOverlapArray[overlap_lid]  = gid;
      }
    // Make sure that the changes to the Tpetra Vector were committed,
    // by releasing the nonconst views.
    tpetraOwnedArray = Teuchos::null;
    tpetraOverlapArray = Teuchos::null;

    // Make a Tpetra Export object, and test the export.
    Tpetra::Export<int> tpetraExporter1 (tpetraOwnedMap, tpetraOverlapMap);
    if (verbose) {
      out << "Testing Export from owned Map to overlap Map:" << endl << endl;
    }
    tpetraOverlapVector.doExport (tpetraOwnedVector, tpetraExporter1, 
				  Tpetra::INSERT);

    // Check the Export results.
    success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, 
			     epetraOverlapMap, epetraOverlapVector, 
			     tpetraOwnedMap, tpetraOwnedVector, 
			     tpetraOverlapMap, tpetraOverlapVector, verbose);
    //
    // Reinitialize the Tpetra vectors and see what Import in the
    // other direction does.
    //
    tpetraOverlapVector.putScalar(-1);
    tpetraOwnedArray   = tpetraOwnedVector.getDataNonConst(0);
    tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0);
    for (int owned_lid = 0; 
	 owned_lid < tpetraOwnedMap->getNodeElementList().size(); 
	 ++owned_lid) 
      {
	int gid         = tpetraOwnedMap->getGlobalElement(owned_lid);
	int overlap_lid = tpetraOverlapMap->getLocalElement(gid);
	tpetraOwnedArray[owned_lid]      = gid;
	tpetraOverlapArray[overlap_lid]  = gid;
      }
    // Make sure that the changes to the Tpetra Vector were committed,
    // by releasing the nonconst views.
    tpetraOwnedArray = Teuchos::null;
    tpetraOverlapArray = Teuchos::null;

    if (verbose) {
      out << "Testing Import from overlap Map to owned Map:" << endl << endl;
    }
    Tpetra::Import<int> tpetraImporter2 (tpetraOverlapMap, tpetraOwnedMap);
    tpetraOwnedVector.doImport (tpetraOverlapVector, tpetraImporter2, 
				Tpetra::INSERT);
    // Check the Import results.
    success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, 
			     epetraOverlapMap, epetraOverlapVector, 
			     tpetraOwnedMap, tpetraOwnedVector, 
			     tpetraOverlapMap, tpetraOverlapVector, verbose);
  } // if testOtherDirections

  out << "End Result: TEST " << (success ? "PASSED" : "FAILED") << endl;
  MPI_Finalize ();
  return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
//
//  TestMrhsSolver.cpp reads in a matrix in Harwell-Boeing format, 
//  calls one of the sparse direct solvers, using multiple right hand sides
//  (one per solve) and computes the error and residual.  
//
//  TestSolver ignores the Harwell-Boeing right hand sides, creating
//  random right hand sides instead.  
//
//  TestMrhsSolver can test either A x = b or A^T x = b.
//  This can be a bit confusing because sparse direct solvers 
//  use compressed column storage - the transpose of Trilinos'
//  sparse row storage.
//
//  Matrices:
//    readA - Serial.  As read from the file.
//    transposeA - Serial.  The transpose of readA.
//    serialA - if (transpose) then transposeA else readA 
//    distributedA - readA distributed to all processes
//    passA - if ( distributed ) then distributedA else serialA
//
//
int Amesos_TestMrhsSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, 
		     SparseSolverType SparseSolver, bool transpose, 
		     int special, AMESOS_MatrixType matrix_type ) {


  Comm.Barrier();

  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;

  std::string FileName = matrix_file ;
  int FN_Size = FileName.size() ; 
  std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size );
  std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size );
  bool NonContiguousMap = false; 

  if ( LastFiveBytes == ".triU" ) { 
    // Call routine to read in unsymmetric Triplet matrix
    NonContiguousMap = true; 
    EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, 
						      readb, readxexact, NonContiguousMap ) );
  } else {
    if ( LastFiveBytes == ".triS" ) { 
      NonContiguousMap = true; 
      // Call routine to read in symmetric Triplet matrix
      EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, readMap, readA, readx, 
							readb, readxexact) );
    } else {
      if (  LastFourBytes == ".mtx" ) { 
	EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, 
							       readA, readx, readb, readxexact) );
      } else {
	// Call routine to read in HB problem
	Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, 
						     readb, readxexact) ;
      }
    }
  }


  Epetra_CrsMatrix transposeA(Copy, *readMap, 0);
  Epetra_CrsMatrix *serialA ; 

  if ( transpose ) {
    assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); 
    serialA = &transposeA ; 
  } else {
    serialA = readA ; 
  }

  
  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);
  Epetra_Map* map_;

  if( NonContiguousMap ) {
    //
    //  map gives us NumMyElements and MyFirstElement;
    //
    int NumGlobalElements =  readMap->NumGlobalElements();
    int NumMyElements = map.NumMyElements();
    int MyFirstElement = map.MinMyGID();
    std::vector<int> MapMap_( NumGlobalElements );
    readMap->MyGlobalElements( &MapMap_[0] ) ;
    Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; 
    map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm);
  } else {
    map_ = new Epetra_Map( map ) ; 
  }


  // Create Exporter to distribute read-in matrix and vectors
  Epetra_Export exporter(*readMap, *map_);
  Epetra_CrsMatrix A(Copy, *map_, 0);

  Epetra_RowMatrix * passA = 0; 
  Epetra_MultiVector * passx = 0; 
  Epetra_MultiVector * passb = 0;
  Epetra_MultiVector * passxexact = 0;
  Epetra_MultiVector * passresid = 0;
  Epetra_MultiVector * passtmp = 0;

  Epetra_MultiVector x(*map_,numsolves);
  Epetra_MultiVector b(*map_,numsolves);
  Epetra_MultiVector xexact(*map_,numsolves);
  Epetra_MultiVector resid(*map_,numsolves);
  Epetra_MultiVector tmp(*map_,numsolves);


  Epetra_MultiVector serialx(*readMap,numsolves);
  Epetra_MultiVector serialb(*readMap,numsolves);
  Epetra_MultiVector serialxexact(*readMap,numsolves);
  Epetra_MultiVector serialresid(*readMap,numsolves);
  Epetra_MultiVector serialtmp(*readMap,numsolves);

  bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; 
  if ( distribute_matrix ) { 
    //
    //  Initialize x, b and xexact to the values read in from the file
    //

    A.Export(*serialA, exporter, Add);
    Comm.Barrier();

    assert(A.FillComplete()==0);    
    Comm.Barrier();

    passA = &A; 
    passx = &x; 
    passb = &b;
    passxexact = &xexact;
    passresid = &resid;
    passtmp = &tmp;
  } else { 
    passA = serialA; 
    passx = &serialx; 
    passb = &serialb;
    passxexact = &serialxexact;
    passresid = &serialresid;
    passtmp = &serialtmp;
  }

  passxexact->SetSeed(131) ; 
  passxexact->Random();
  passx->SetSeed(11231) ; 
  passx->Random();

  passb->PutScalar( 0.0 );
  passA->Multiply( transpose, *passxexact, *passb ) ; 

  Epetra_MultiVector CopyB( *passb ) ;

  double Anorm = passA->NormInf() ; 
  SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ;

  Epetra_LinearProblem Problem(  (Epetra_RowMatrix *) passA, 
				 (Epetra_MultiVector *) passx, 
				 (Epetra_MultiVector *) passb );

  double max_resid = 0.0;
  for ( int j = 0 ; j < special+1 ; j++ ) { 
    
    Epetra_Time TotalTime( Comm ) ; 
    if ( false ) { 
#ifdef TEST_UMFPACK

      unused code

    } else if ( SparseSolver == UMFPACK ) { 
      UmfpackOO umfpack( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
      
      umfpack.SetTrans( transpose ) ; 
      umfpack.Solve() ; 
#endif
#ifdef TEST_SUPERLU
    } else if ( SparseSolver == SuperLU ) { 
      SuperluserialOO superluserial ; 
      superluserial.SetUserMatrix( (Epetra_RowMatrix *) passA) ; 

      superluserial.SetPermc( SuperLU_permc ) ; 
      superluserial.SetTrans( transpose ) ; 
      superluserial.SetUseDGSSV( special == 0 ) ; 

      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	superluserial.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	superluserial.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	//      superluserial.SetRHS( (Epetra_MultiVector *) passb_i ; 
	superluserial.Solve() ; 
	if ( i == 0 ) {
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	} else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_SLUD
    } else if ( SparseSolver == SuperLUdist ) { 
      SuperludistOO superludist( Problem ) ; 
      superludist.SetTrans( transpose ) ; 

      bool factor = true; 
      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( superludist.Solve( factor ) ); 
	factor = false; 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_SLUD2
    } else if ( SparseSolver == SuperLUdist2 ) { 
      Superludist2_OO superludist2( Problem ) ; 
      superludist2.SetTrans( transpose ) ; 

      bool factor = true; 
      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( superludist2.Solve( factor ) ); 
	factor = false; 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_DSCPACK
    } else if ( SparseSolver == DSCPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Dscpack dscpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); 

      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( dscpack.Solve( ) ); 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_UMFPACK
    } else if ( SparseSolver == UMFPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Umfpack umfpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); 

      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( umfpack.Solve( ) ); 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_SUPERLU
    } else if ( SparseSolver == SUPERLU ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Superlu superlu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); 

      EPETRA_CHK_ERR( superlu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superlu.NumericFactorization(  ) ); 
      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( superlu.Solve( ) ); 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_SLUS
    } else if ( SparseSolver == SuperLU ) { 
      Epetra_SLU superluserial( &Problem ) ;
      
      bool factor = true; 

      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( superluserial.Solve( true, false, factor, 2, -1, true, transpose ) ); 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_KLU
    } else if ( SparseSolver == KLU ) { 
      Teuchos::ParameterList ParamList ;
      //      ParamList.set("OutputLevel",2);
      Amesos_Klu klu( Problem ) ; 
      // ParamList.set ("ScaleMethod", 0) ;
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); 

      EPETRA_CHK_ERR( klu.SymbolicFactorization(  ) ); 
      for ( int trials = 0 ; trials <= 1 ; trials++) {
	  EPETRA_CHK_ERR( klu.NumericFactorization(  ) ); 
	  for ( int i= 0 ; i < numsolves ; i++ ) {
	    //    set up to sovle A X[:,i] = B[:,i]
	    Epetra_Vector *passb_i = (*passb)(i) ;
	    Epetra_Vector *passx_i = (*passx)(i) ;
	    Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	    Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );

	    EPETRA_CHK_ERR( klu.Solve( ) ); 
	    if ( i == 0 ) {
	      SparseDirectTimingVars::SS_Result.Set_First_Time(
		      TotalTime.ElapsedTime() ); 
	    } else {
	      if ( i < numsolves-1 ) 
		SparseDirectTimingVars::SS_Result.Set_Middle_Time(
			TotalTime.ElapsedTime() ); 
	      else
		SparseDirectTimingVars::SS_Result.Set_Last_Time(
			TotalTime.ElapsedTime() ); 
	    }
	  }
      }
#endif
#ifdef HAVE_AMESOS_LAPACK
    } else if ( SparseSolver == LAPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Lapack lapack( Problem ) ; 
      EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); 

      EPETRA_CHK_ERR( lapack.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( lapack.NumericFactorization(  ) ); 
      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( lapack.Solve( ) ); 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_TAUCS
    } else if ( SparseSolver == TAUCS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Taucs taucs( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); 

      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( taucs.Solve( ) ); 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_PARDISO
    } else if ( SparseSolver == PARDISO ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Pardiso pardiso( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); 

      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( pardiso.Solve( ) ); 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_PARAKLETE
    } else if ( SparseSolver == PARAKLETE ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Paraklete paraklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( paraklete.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( paraklete.NumericFactorization( ) ); 

      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( paraklete.Solve( ) ); 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#if defined(HAVE_AMESOS_MUMPS) && defined(HAVE_MPI)
    } else if ( SparseSolver == MUMPS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Mumps mumps( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); 

      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( mumps.Solve( ) ); 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_SCALAPACK
    } else if ( SparseSolver == SCALAPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Scalapack scalapack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); 

      EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); 
      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( scalapack.Solve( ) ); 
	if ( i == 0 ) 
	  SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 
	else { 
	  if ( i < numsolves-1 ) 
	    SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	  else
	    SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
	}

      }
#endif
#ifdef HAVE_AMESOS_SUPERLUDIST
    } else if ( SparseSolver == SUPERLUDIST ) { 
      Teuchos::ParameterList ParamList ;
      ParamList.set( "MaxProcs", -3 );
      Amesos_Superludist superludist( Problem ) ; 
      EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( superludist.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superludist.NumericFactorization(  ) ); 
      SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); 

      for ( int i= 0 ; i < numsolves ; i++ ) { 
	//    set up to sovle A X[:,i] = B[:,i]
	Epetra_Vector *passb_i = (*passb)(i) ;
	Epetra_Vector *passx_i = (*passx)(i) ;
	Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ;
	Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) );
	EPETRA_CHK_ERR( superludist.Solve( ) ); 
	if ( i < numsolves-1 ) 
	  SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); 
	else
	  SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); 
      }
#endif
#ifdef TEST_SPOOLES
    } else if ( SparseSolver == SPOOLES ) { 
      SpoolesOO spooles( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
    
      spooles.SetTrans( transpose ) ; 
      spooles.Solve() ;
#endif 
#ifdef TEST_SPOOLESSERIAL
    } else if ( SparseSolver == SPOOLESSERIAL ) { 
      SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, 
				     (Epetra_MultiVector *) passx, 
				     (Epetra_MultiVector *) passb ) ; 
    
      spoolesserial.Solve() ;
#endif 
    } else { 
      SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ;
      std::cerr << "\n\n####################  Requested solver not available (Or not tested with multiple RHS) on this platform #####################\n" << std::endl ;
    }

    SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); 

    //
    //  Compute the error = norm(xcomp - xexact )
    //
    std::vector <double> error(numsolves) ; 
    double max_error = 0.0;
  
    passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0);

    passresid->Norm2(&error[0]);
    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( error[i] > max_error ) max_error = error[i] ; 
    SparseDirectTimingVars::SS_Result.Set_Error(max_error) ;

    //  passxexact->Norm2(&error[0] ) ; 
    //  passx->Norm2(&error ) ; 

    //
    //  Compute the residual = norm(Ax - b)
    //
    std::vector <double> residual(numsolves) ; 
  
    passtmp->PutScalar(0.0);
    passA->Multiply( transpose, *passx, *passtmp);
    passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); 
    //    passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); 
    passresid->Norm2(&residual[0]);

    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( residual[i] > max_resid ) max_resid = residual[i] ; 


    SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ;
    
    std::vector <double> bnorm(numsolves); 
    passb->Norm2( &bnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ;

    std::vector <double> xnorm(numsolves); 
    passx->Norm2( &xnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ;

  }
  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;
  delete map_;

  Comm.Barrier();
   return 0;
}
Пример #22
0
int main(int argc, char *argv[]) {

#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm (MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  int MyPID = Comm.MyPID();

  // matrix downloaded from MatrixMarket
  char FileName[] = "../HBMatrices/fidap005.rua";

  Epetra_Map * readMap; // Pointers because of Trilinos_Util_ReadHb2Epetra
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
   
  // Call routine to read in HB problem
  Trilinos_Util_ReadHb2Epetra(FileName, Comm, readMap, readA, readx, 
			      readb, readxexact);

  int NumGlobalElements = readMap->NumGlobalElements();

  // Create uniform distributed map
  Epetra_Map map(NumGlobalElements, 0, Comm);

  // Create Exporter to distribute read-in matrix and vectors

  Epetra_Export exporter(*readMap, map);
  Epetra_CrsMatrix A(Copy, map, 0);
  Epetra_Vector x(map);
  Epetra_Vector b(map);
  Epetra_Vector xexact(map);

  Epetra_Time FillTimer(Comm);
  x.Export(*readx, exporter, Add);
  b.Export(*readb, exporter, Add);
  xexact.Export(*readxexact, exporter, Add);
  Comm.Barrier();
  double vectorRedistributeTime = FillTimer.ElapsedTime();
  A.Export(*readA, exporter, Add);
  Comm.Barrier();
  double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime;
  A.FillComplete();
  Comm.Barrier();
  double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime;

  if( MyPID==0 ) {
    cout << "Vector redistribute  time (sec) = "
	 << vectorRedistributeTime<< endl;
    cout << "Matrix redistribute time (sec) = "
	 << matrixRedistributeTime << endl;
    cout << "Transform to Local  time (sec) = "
	 << fillCompleteTime << endl<< endl;
  }

  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;

#ifdef HAVE_MPI
  MPI_Finalize() ;
#endif

  return(EXIT_SUCCESS);
}
Пример #23
0
//
//  Diagonal:  0=no change, 1=eliminate entry
//             from the map for the largest row element in process 0
//             2=add diagonal entries to the matrix, with a zero value 
//             (assume row map contains all diagonal entries). 
//
//  ReindexRowMap:  
//    0=no change, 1= add 2 (still contiguous), 2=non-contiguous
//  
//  ReindexColMap
//    0=same as RowMap, 1=add 4 - Different From RowMap, but contiguous) 
//
//  RangeMap:
//    0=no change, 1=serial map, 2=bizarre distribution, 3=replicated map
//
//  DomainMap:
//    0=no change, 1=serial map, 2=bizarre distribution, 3=replicated map
//
RCP<Epetra_CrsMatrix> NewMatNewMap(Epetra_CrsMatrix& In, 
					   int Diagonal, 
					   int ReindexRowMap,
					   int ReindexColMap,
					   int RangeMapType,
					   int DomainMapType
					   )
{

  //
  //  If we are making no change, return the original matrix (which has a linear map) 
  //
#if 0
  std::cout << __FILE__ << "::" << __LINE__ << " " 
       << Diagonal << " " 
       << ReindexRowMap << " " 
       << ReindexColMap << " " 
       << RangeMapType << " " 
       << DomainMapType << " " << std::endl ; 
#endif

  if ( Diagonal + ReindexRowMap + ReindexColMap + RangeMapType + DomainMapType == 0 ) {
    RCP<Epetra_CrsMatrix> ReturnOrig = rcp( &In, false );
    return ReturnOrig ;
  }

  //
  //  Diagonal==2 is used for a different purpose - 
  //    Making sure that the diagonal of the matrix is non-empty.
  //  Note:  The diagonal must exist in In.RowMap().
  //
  if ( Diagonal == 2 ) { 
    assert( ReindexRowMap==0 && ReindexColMap == 0 ) ; 
  }

  int (*RowPermute)(int in) = 0;
  int (*ColPermute)(int in) = 0;

  assert( Diagonal >= 0  && Diagonal <= 2 ); 
  assert( ReindexRowMap>=0 && ReindexRowMap<=2 );
  assert( ReindexColMap>=0 && ReindexColMap<=1 );
  assert( RangeMapType>=0 && RangeMapType<=3 );
  assert( DomainMapType>=0 && DomainMapType<=3 );

  Epetra_Map DomainMap = In.DomainMap();
  Epetra_Map RangeMap = In.RangeMap();
  Epetra_Map ColMap = In.ColMap();
  Epetra_Map RowMap = In.RowMap();
  int NumMyRowElements = RowMap.NumMyElements();
  int NumMyColElements = ColMap.NumMyElements();
  int NumMyRangeElements = RangeMap.NumMyElements();
  int NumMyDomainElements = DomainMap.NumMyElements();

  int NumGlobalRowElements = RowMap.NumGlobalElements();
  int NumGlobalColElements = ColMap.NumGlobalElements();
  int NumGlobalRangeElements = RangeMap.NumGlobalElements();
  int NumGlobalDomainElements = DomainMap.NumGlobalElements();
  assert( NumGlobalRangeElements == NumGlobalDomainElements ) ; 

  std::vector<int> MyGlobalRowElements( NumMyRowElements ) ; 
  std::vector<int> NumEntriesPerRow( NumMyRowElements ) ; 
  std::vector<int> MyPermutedGlobalRowElements( NumMyRowElements ) ; 
  std::vector<int> MyGlobalColElements( NumMyColElements ) ; 
  std::vector<int> MyPermutedGlobalColElements( NumMyColElements ) ; // Used to create the column map
  std::vector<int> MyPermutedGlobalColElementTable( NumMyColElements ) ; // To convert local indices to global
  std::vector<int> MyGlobalRangeElements( NumMyRangeElements ) ; 
  std::vector<int> MyPermutedGlobalRangeElements( NumMyRangeElements ) ; 
  std::vector<int> MyGlobalDomainElements( NumMyDomainElements ) ; 
  std::vector<int> MyPermutedGlobalDomainElements( NumMyDomainElements ) ; 
  RowMap.MyGlobalElements(&MyGlobalRowElements[0]);
  ColMap.MyGlobalElements(&MyGlobalColElements[0]);
  RangeMap.MyGlobalElements(&MyGlobalRangeElements[0]);
  DomainMap.MyGlobalElements(&MyGlobalDomainElements[0]);

  switch( ReindexRowMap ) {
  case 0:
    RowPermute = &NoPermute ;
    break; 
  case 1:
    RowPermute = &SmallRowPermute ;
    break; 
  case 2:
    RowPermute = BigRowPermute ;
    break; 
  }
  switch( ReindexColMap ) {
  case 0:
    ColPermute = RowPermute ;
    break; 
  case 1:
    ColPermute = &SmallColPermute ;
    break; 
  }

  //
  //  Create Serial Range and Domain Maps based on the permuted indexing
  //
  int nlocal = 0;
  if (In.Comm().MyPID()==0) nlocal = NumGlobalRangeElements;
  std::vector<int> AllIDs( NumGlobalRangeElements ) ; 
  for ( int i = 0; i < NumGlobalRangeElements ; i++ ) AllIDs[i] = (*RowPermute)( i ) ; 
  Epetra_Map SerialRangeMap( -1, nlocal, &AllIDs[0], 0, In.Comm()); 
  std::vector<int> AllIDBs( NumGlobalRangeElements ) ; 
  for ( int i = 0; i < NumGlobalRangeElements ; i++ ) AllIDBs[i] = (*ColPermute)( i ) ; 
  Epetra_Map SerialDomainMap( -1, nlocal, &AllIDBs[0], 0, In.Comm()); 

  //
  //  Create Bizarre Range and Domain Maps based on the permuted indexing
  //  These are nearly serial, having all but one element on process 0
  //  The goal here is to make sure that we can use Domain and Range maps 
  //  that are neither serial, nor distributed in the normal manner.
  //
  std::vector<int> AllIDCs( NumGlobalRangeElements ) ; 
  for ( int i = 0; i < NumGlobalRangeElements ; i++ ) AllIDCs[i] = (*ColPermute)( i ) ; 
  if ( In.Comm().NumProc() > 1 ) { 
    if (In.Comm().MyPID()==0) nlocal = NumGlobalRangeElements-1;
    if (In.Comm().MyPID()==1) {
      nlocal = 1;
      AllIDCs[0] = (*ColPermute)( NumGlobalRangeElements - 1 );
    }
  } 
  int iam = In.Comm().MyPID();
  Epetra_Map BizarreDomainMap( -1, nlocal, &AllIDCs[0], 0, In.Comm()); 

  std::vector<int> AllIDDs( NumGlobalRangeElements ) ; 
  for ( int i = 0; i < NumGlobalRangeElements ; i++ ) AllIDDs[i] = (*RowPermute)( i ) ; 
  if ( In.Comm().NumProc() > 1 ) { 
    if (In.Comm().MyPID()==0) nlocal = NumGlobalRangeElements-1;
    if (In.Comm().MyPID()==1) {
      nlocal = 1;
      AllIDDs[0] = (*RowPermute)( NumGlobalRangeElements -1 ) ;
    }
  } 
  Epetra_Map BizarreRangeMap( -1, nlocal, &AllIDDs[0], 0, In.Comm()); 


  //
  //  Compute the column map 
  //
  //  If Diagonal==1, remove the column corresponding to the last row owned 
  //  by process 0.  Removing this column from a tridiagonal matrix, leaves
  //  a disconnected, but non-singular matrix.  
  //
  int NumMyColElementsOut = 0 ; 
  int NumGlobalColElementsOut ; 
  if ( Diagonal == 1 ) 
    NumGlobalColElementsOut = NumGlobalColElements-1; 
  else
    NumGlobalColElementsOut = NumGlobalColElements; 
  if ( Diagonal == 1 && iam==0 ) { 
    for ( int i=0; i < NumMyColElements  ; i++ ) {
      if ( MyGlobalColElements[i] != MyGlobalRowElements[NumMyRowElements-1] ) {
	MyPermutedGlobalColElements[NumMyColElementsOut++] = 
	  (*ColPermute)( MyGlobalColElements[i] ) ; 
      }
    }
    assert( NumMyColElementsOut == NumMyColElements-1 );
  } else {
    for ( int i=0; i < NumMyColElements  ; i++ )  
      MyPermutedGlobalColElements[i] = 
	(*ColPermute)( MyGlobalColElements[i] ) ; 
    NumMyColElementsOut = NumMyColElements ; 
    if ( Diagonal == 2 ) {
      //  For each row, make sure that the column map has this row in it, 
      //    if it doesn't, add it to the column map.  
      //  Note:  MyPermutedGlobalColElements == MyGlobalColElements when 
      //  Diagonal==2 because  ( Diagonal == 2 ) implies:
      //     ReindexRowMap==0 && ReindexColMap == 0  - see assert above
      for ( int i=0; i < NumMyRowElements  ; i++ ) {
	bool MissingDiagonal = true; 
	for ( int j=0; j < NumMyColElements; j++ ) { 
	  if ( MyGlobalRowElements[i] == MyGlobalColElements[j] ) {
	    MissingDiagonal = false; 
	  }
	}
	if ( MissingDiagonal ) {
	  MyPermutedGlobalColElements.resize(NumMyColElements+1);
	  MyPermutedGlobalColElements[NumMyColElementsOut] = MyGlobalRowElements[i];
	  NumMyColElementsOut++;
	}
      }
      In.Comm().SumAll(&NumMyColElementsOut,&NumGlobalColElementsOut,1); 
    }
  }

  //
  //  These tables are used both as the permutation tables and to create the maps.
  //
  for ( int i=0; i < NumMyColElements  ; i++ ) 
    MyPermutedGlobalColElementTable[i] = 
      (*ColPermute)( MyGlobalColElements[i] ) ; 
  for ( int i=0; i < NumMyRowElements  ; i++ ) 
    MyPermutedGlobalRowElements[i] = 
      (*RowPermute)( MyGlobalRowElements[i] ) ; 
  for ( int i=0; i < NumMyRangeElements  ; i++ ) 
    MyPermutedGlobalRangeElements[i] = 
      (*RowPermute)( MyGlobalRangeElements[i] ) ; 
  for ( int i=0; i < NumMyDomainElements  ; i++ ) 
    MyPermutedGlobalDomainElements[i] = 
      (*ColPermute)( MyGlobalDomainElements[i] ) ; 

  RCP<Epetra_Map> PermutedRowMap = 
    rcp( new Epetra_Map( NumGlobalRowElements, NumMyRowElements, 
			 &MyPermutedGlobalRowElements[0], 0, In.Comm() ) ); 
									
  RCP<Epetra_Map> PermutedColMap = 
    rcp( new Epetra_Map( NumGlobalColElementsOut, NumMyColElementsOut, 
			 &MyPermutedGlobalColElements[0], 0, In.Comm() ) ); 
									
  RCP<Epetra_Map> PermutedRangeMap = 
    rcp( new Epetra_Map( NumGlobalRangeElements, NumMyRangeElements, 
			 &MyPermutedGlobalRangeElements[0], 0, In.Comm() ) ); 
									
  RCP<Epetra_Map> PermutedDomainMap = 
    rcp( new Epetra_Map( NumGlobalDomainElements, NumMyDomainElements, 
			 &MyPermutedGlobalDomainElements[0], 0, In.Comm() ) ); 
									
  //
  //  These vectors are filled and then passed to InsertGlobalValues 
  //
  std::vector<int> ThisRowIndices( In.MaxNumEntries() );
  std::vector<double> ThisRowValues( In.MaxNumEntries() );
  std::vector<int> PermutedGlobalColIndices( In.MaxNumEntries() );

  //std::cout << __FILE__ << "::" <<__LINE__ << std::endl ; 
  RCP<Epetra_CrsMatrix> Out = 
    rcp( new Epetra_CrsMatrix( Copy, *PermutedRowMap, *PermutedColMap, 0 ) );

  for (int i=0; i<NumMyRowElements; i++)
    {

      int NumIndicesThisRow = 0;
      assert( In.ExtractMyRowCopy( i, 
				   In.MaxNumEntries(),
				   NumIndicesThisRow,
				   &ThisRowValues[0],
				   &ThisRowIndices[0] ) == 0 ) ;
      for (int j = 0 ; j < NumIndicesThisRow ; j++ )
	{
	  PermutedGlobalColIndices[j] = MyPermutedGlobalColElementTable[ ThisRowIndices[j] ]  ;
	}
      bool MissingDiagonal = false; 
      if ( Diagonal==2 ) { 
	//
	assert( MyGlobalRowElements[i] == MyPermutedGlobalRowElements[i] );
	MissingDiagonal = true; 
	for( int j =0 ; j < NumIndicesThisRow ; j++ ) {
	  if ( PermutedGlobalColIndices[j] == MyPermutedGlobalRowElements[i] ) {
	    MissingDiagonal = false ; 
	  }
	}
#if 0
	std::cout  << __FILE__ << "::" << __LINE__ 
	      << " i = " << i 
	      << " MyPermutedGlobalRowElements[i]  = " << MyPermutedGlobalRowElements[i] 
	      <<   " MissingDiagonal = " << MissingDiagonal << std::endl ; 
#endif

      }
      if ( MissingDiagonal ) { 
	ThisRowValues.resize(NumIndicesThisRow+1) ; 
	ThisRowValues[NumIndicesThisRow] = 0.0;
	PermutedGlobalColIndices.resize(NumIndicesThisRow+1);
	PermutedGlobalColIndices[NumIndicesThisRow] = MyPermutedGlobalRowElements[i] ;
	
#if 0
	std::cout  << __FILE__ << "::" << __LINE__ 
	      << " i = " << i 
	      << "NumIndicesThisRow = " << NumIndicesThisRow 
	      << "ThisRowValues[NumIndicesThisRow = " << ThisRowValues[NumIndicesThisRow] 
	      << " PermutedGlobalColIndices[NumIndcesThisRow] = " << PermutedGlobalColIndices[NumIndicesThisRow] 
	      << std::endl ; 
#endif

	NumIndicesThisRow++  ;

      } 
      assert( Out->InsertGlobalValues( MyPermutedGlobalRowElements[i], 
				       NumIndicesThisRow,
				       &ThisRowValues[0],
				       &PermutedGlobalColIndices[0] ) >= 0 ); 
    }

  //

  Epetra_LocalMap ReplicatedMap( NumGlobalRangeElements, 0, In.Comm() );

  RCP<Epetra_Map> OutRangeMap ;
  RCP<Epetra_Map> OutDomainMap ;
  
  switch( RangeMapType ) {
  case 0:
    OutRangeMap = PermutedRangeMap ;
    break;
  case 1:
    OutRangeMap = rcp(&SerialRangeMap, false); 
    break;
  case 2:
    OutRangeMap = rcp(&BizarreRangeMap, false); 
    break;
  case 3:
    OutRangeMap = rcp(&ReplicatedMap, false); 
    break;
  }
  //  switch( DomainMapType ) {
  switch( DomainMapType ) {
  case 0:
    OutDomainMap = PermutedDomainMap ;
    break;
  case 1:
    OutDomainMap = rcp(&SerialDomainMap, false); 
    break;
  case 2:
    OutDomainMap = rcp(&BizarreDomainMap, false); 
    break;
  case 3:
    OutDomainMap = rcp(&ReplicatedMap, false); 
    break;
  }
#if 0
  assert(Out->FillComplete( *PermutedDomainMap, *PermutedRangeMap )==0);
#else
  assert(Out->FillComplete( *OutDomainMap, *OutRangeMap )==0);
#endif

#if 0
  std::cout << __FILE__ << "::" << __LINE__ << std::endl ;
  Out->Print( std::cout ) ; 
#endif

  return Out;
}
Пример #24
0
int
main (int argc, char *argv[])
{
  using namespace Anasazi;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using std::endl;

#ifdef HAVE_MPI
  // Initialize MPI
  MPI_Init (&argc, &argv);
#endif // HAVE_MPI

  // Create an Epetra communicator
#ifdef HAVE_MPI
  Epetra_MpiComm Comm (MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif // HAVE_MPI

  // Create an Anasazi output manager
  BasicOutputManager<double> printer;
  printer.stream(Errors) << Anasazi_Version() << std::endl << std::endl;

  // Get the sorting std::string from the command line
  std::string which ("LM");
  Teuchos::CommandLineProcessor cmdp (false, true);
  cmdp.setOption("sort", &which, "Targetted eigenvalues (SM or LM).");
  if (cmdp.parse (argc, argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
#ifdef HAVE_MPI
    MPI_Finalize ();
#endif // HAVE_MPI
    return -1;
  }

  // Dimension of the matrix
  //
  // Discretization points in any one direction.
  const int nx = 10;
  // Size of matrix nx*nx
  const int NumGlobalElements = nx*nx;

  // Construct a Map that puts approximately the same number of
  // equations on each process.
  Epetra_Map Map (NumGlobalElements, 0, Comm);

  // Get update list and number of local equations from newly created Map.
  int NumMyElements = Map.NumMyElements ();

  std::vector<int> MyGlobalElements (NumMyElements);
  Map.MyGlobalElements (&MyGlobalElements[0]);

  // Create an integer vector NumNz that is used to build the Petra
  // matrix.  NumNz[i] is the number of OFF-DIAGONAL terms for the
  // i-th global equation on this process.
  std::vector<int> NumNz (NumMyElements);

  /* We are building a matrix of block structure:

      | T -I          |
      |-I  T -I       |
      |   -I  T       |
      |        ...  -I|
      |           -I T|

   where each block is dimension nx by nx and the matrix is on the order of
   nx*nx.  The block T is a tridiagonal matrix.
  */
  for (int i=0; i<NumMyElements; ++i) {
    if (MyGlobalElements[i] == 0 || MyGlobalElements[i] == NumGlobalElements-1 ||
        MyGlobalElements[i] == nx-1 || MyGlobalElements[i] == nx*(nx-1) ) {
      NumNz[i] = 3;
    }
    else if (MyGlobalElements[i] < nx || MyGlobalElements[i] > nx*(nx-1) ||
             MyGlobalElements[i]%nx == 0 || (MyGlobalElements[i]+1)%nx == 0) {
      NumNz[i] = 4;
    }
    else {
      NumNz[i] = 5;
    }
  }

  // Create an Epetra_Matrix
  RCP<Epetra_CrsMatrix> A = rcp (new Epetra_CrsMatrix (Copy, Map, &NumNz[0]));

  // Compute coefficients for discrete convection-diffution operator
  const double one = 1.0;
  std::vector<double> Values(4);
  std::vector<int> Indices(4);
  double rho = 0.0;
  double h = one /(nx+1);
  double h2 = h*h;
  double c = 5.0e-01*rho/ h;
  Values[0] = -one/h2 - c; Values[1] = -one/h2 + c; Values[2] = -one/h2; Values[3]= -one/h2;
  double diag = 4.0 / h2;
  int NumEntries;

  for (int i=0; i<NumMyElements; ++i) {
    if (MyGlobalElements[i]==0) {
      Indices[0] = 1;
      Indices[1] = nx;
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i] == nx*(nx-1)) {
      Indices[0] = nx*(nx-1)+1;
      Indices[1] = nx*(nx-2);
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i] == nx-1) {
      Indices[0] = nx-2;
      NumEntries = 1;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
      Indices[0] = 2*nx-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i] == NumGlobalElements-1) {
      Indices[0] = NumGlobalElements-2;
      NumEntries = 1;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
      Indices[0] = nx*(nx-1)-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i] < nx) {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i] > nx*(nx-1)) {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i]%nx == 0) {
      Indices[0] = MyGlobalElements[i]+1;
      Indices[1] = MyGlobalElements[i]-nx;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      assert( info==0 );
    }
    else if ((MyGlobalElements[i]+1)%nx == 0) {
      Indices[0] = MyGlobalElements[i]-nx;
      Indices[1] = MyGlobalElements[i]+nx;
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      assert( info==0 );
      Indices[0] = MyGlobalElements[i]-1;
      NumEntries = 1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
    }
    else {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      Indices[3] = MyGlobalElements[i]+nx;
      NumEntries = 4;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
    }
    // Put in the diagonal entry
    int info = A->InsertGlobalValues(MyGlobalElements[i], 1, &diag, &MyGlobalElements[i]);
    assert( info==0 );
  }

  // Finish up
  int info = A->FillComplete ();
  assert( info==0 );
  A->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks

  // Create a identity matrix for the temporary mass matrix
  RCP<Epetra_CrsMatrix> M = rcp (new Epetra_CrsMatrix (Copy, Map, 1));
  for (int i=0; i<NumMyElements; i++) {
    Values[0] = one;
    Indices[0] = i;
    NumEntries = 1;
    info = M->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
    assert( info==0 );
  }
  // Finish up
  info = M->FillComplete ();
  assert( info==0 );
  M->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks

  //************************************
  // Call the LOBPCG solver manager
  //***********************************
  //
  // Variables used for the LOBPCG Method
  const int nev       = 10;
  const int blockSize = 5;
  const int maxIters  = 500;
  const double tol    = 1.0e-8;

  typedef Epetra_MultiVector MV;
  typedef Epetra_Operator OP;
  typedef MultiVecTraits<double, Epetra_MultiVector> MVT;

  // Create an Epetra_MultiVector for an initial vector to start the
  // solver.  Note: This needs to have the same number of columns as
  // the blocksize.
  RCP<Epetra_MultiVector> ivec = rcp (new Epetra_MultiVector (Map, blockSize));
  ivec->Random (); // fill the initial vector with random values

  // Create the eigenproblem.
  RCP<BasicEigenproblem<double, MV, OP> > MyProblem =
    rcp (new BasicEigenproblem<double, MV, OP> (A, ivec));

  // Inform the eigenproblem that the operator A is symmetric
  MyProblem->setHermitian (true);

  // Set the number of eigenvalues requested
  MyProblem->setNEV (nev);

  // Tell the eigenproblem that you are finishing passing it information.
  const bool success = MyProblem->setProblem ();
  if (! success) {
    printer.print (Errors, "Anasazi::BasicEigenproblem::setProblem() reported an error.\n");
#ifdef HAVE_MPI
    MPI_Finalize ();
#endif // HAVE_MPI
    return -1;
  }

  // Create parameter list to pass into the solver manager
  Teuchos::ParameterList MyPL;
  MyPL.set ("Which", which);
  MyPL.set ("Block Size", blockSize);
  MyPL.set ("Maximum Iterations", maxIters);
  MyPL.set ("Convergence Tolerance", tol);
  MyPL.set ("Full Ortho", true);
  MyPL.set ("Use Locking", true);

  // Create the solver manager
  LOBPCGSolMgr<double, MV, OP> MySolverMan (MyProblem, MyPL);

  // Solve the problem
  ReturnType returnCode = MySolverMan.solve ();

  // Get the eigenvalues and eigenvectors from the eigenproblem
  Eigensolution<double,MV> sol = MyProblem->getSolution ();
  std::vector<Value<double> > evals = sol.Evals;
  RCP<MV> evecs = sol.Evecs;

  // Compute residuals.
  std::vector<double> normR (sol.numVecs);
  if (sol.numVecs > 0) {
    Teuchos::SerialDenseMatrix<int,double> T (sol.numVecs, sol.numVecs);
    Epetra_MultiVector tempAevec (Map, sol.numVecs );
    T.putScalar (0.0);
    for (int i = 0; i < sol.numVecs; ++i) {
      T(i,i) = evals[i].realpart;
    }
    A->Apply (*evecs, tempAevec);
    MVT::MvTimesMatAddMv (-1.0, *evecs, T, 1.0, tempAevec);
    MVT::MvNorm (tempAevec, normR);
  }

  // Print the results
  std::ostringstream os;
  os.setf (std::ios_base::right, std::ios_base::adjustfield);
  os << "Solver manager returned "
     << (returnCode == Converged ? "converged." : "unconverged.") << endl;
  os << endl;
  os << "------------------------------------------------------" << endl;
  os << std::setw(16) << "Eigenvalue"
     << std::setw(18) << "Direct Residual"
     << endl;
  os << "------------------------------------------------------" << endl;
  for (int i = 0; i < sol.numVecs; ++i) {
    os << std::setw(16) << evals[i].realpart
       << std::setw(18) << normR[i] / evals[i].realpart
       << endl;
  }
  os << "------------------------------------------------------" << endl;
  printer.print (Errors, os.str ());

#ifdef HAVE_MPI
  MPI_Finalize ();
#endif // HAVE_MPI
  return 0;
}
Пример #25
0
void build_simple_matrix(
  Epetra_Comm &comm,         // Communicator to use
  Epetra_CrsMatrix *&A,      // OUTPUT:  Matrix returned
  itype nGlobalRows,         // Number of global matrix rows and columns
  bool testEpetra64,         // if true, add 2*INT_MAX to each global ID
                             // to exercise Epetra64
  bool verbose               // if true, print out matrix information
)
{
  Epetra_Map *rowMap = NULL;        // Row map for the created matrix
  Epetra_Map *colMap = NULL;        // Col map for the created matrix
  Epetra_Map *vectorMap = NULL;     // Range/Domain map for the created matrix

  long long offsetEpetra64;

  build_maps(nGlobalRows, testEpetra64, comm, 
             &vectorMap, &rowMap, &colMap, offsetEpetra64, verbose);

  // Create an integer vector nnzPerRow that is used to build the Epetra Matrix.
  // nnzPerRow[i] is the number of entries for the ith global equation
  int nMyRows = rowMap->NumMyElements();
  std::vector<int> nnzPerRow(nMyRows+1, 0);

  // Also create lists of the nonzeros to be assigned to processors.
  // To save programming time and complexity, these vectors are allocated 
  // bigger than they may actually be needed.
  std::vector<itype> iv(3*nMyRows+1);
  std::vector<itype> jv(3*nMyRows+1);
  std::vector<double> vv(3*nMyRows+1);

  itype nMyNonzeros = 0;
  for (itype i = 0, myrowcnt = 0; i < nGlobalRows; i++) {
    if (rowMap->MyGID(i+offsetEpetra64)) { 
      // This processor owns part of this row; see whether it owns the nonzeros
      if (i > 0 && (!colMap || colMap->MyGID(i-1+offsetEpetra64))) {
        iv[nMyNonzeros] = i + offsetEpetra64;
        jv[nMyNonzeros] = i-1 + offsetEpetra64;
        vv[nMyNonzeros] = -1;
        nMyNonzeros++;
        nnzPerRow[myrowcnt]++;
      }
      if (!colMap || colMap->MyGID(i+offsetEpetra64)) {
        iv[nMyNonzeros] = i + offsetEpetra64;
        jv[nMyNonzeros] = i + offsetEpetra64;
        vv[nMyNonzeros] = ((i == 0 || i == nGlobalRows-1) ? 1. : 2.);
        nMyNonzeros++;
        nnzPerRow[myrowcnt]++;
      }
      if (i < nGlobalRows - 1 && (!colMap ||  colMap->MyGID(i+1+offsetEpetra64))) {
        iv[nMyNonzeros] = i + offsetEpetra64;
        jv[nMyNonzeros] = i+1 + offsetEpetra64;
        vv[nMyNonzeros] = -1;
        nMyNonzeros++;
        nnzPerRow[myrowcnt]++;
      }
      myrowcnt++;
    }
  }

  // Create an Epetra_Matrix
  A = new Epetra_CrsMatrix(Copy, *rowMap, &nnzPerRow[0], true);

  int info;

  for (int sum = 0, i=0; i < nMyRows; i++) {
    if (nnzPerRow[i]) {
      info = A->InsertGlobalValues(iv[sum],nnzPerRow[i],&vv[sum],&jv[sum]);
      assert(info==0);
      sum += nnzPerRow[i];
    }
  }

  // Finish up
  if (vectorMap)
    info = A->FillComplete(*vectorMap, *vectorMap);
  else
    info = A->FillComplete();

  assert(info==0);

}
Пример #26
0
int main(int argc, char *argv[]) {

  // Initialize MPI
#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
#endif

  // Create a communicator for Epetra objects
#ifdef HAVE_MPI
  Epetra_MpiComm Comm( MPI_COMM_WORLD );
#else
  Epetra_SerialComm Comm;
#endif

  bool verbose = false;
  bool success = false;
  try {
    int globalLength = 100; // This should suffice

    if (argc > 1)
      if (argv[1][0]=='-' && argv[1][1]=='v')
        verbose = true;

    // Get the process ID and the total number of processors
    int MyPID = Comm.MyPID();

    // Set up the printing utilities
    Teuchos::RCP<Teuchos::ParameterList> noxParamsPtr =
      Teuchos::rcp(new Teuchos::ParameterList);
    Teuchos::ParameterList& noxParams = *(noxParamsPtr.get());
    // Only print output if the "-v" flag is set on the command line
    Teuchos::ParameterList& printParams = noxParams.sublist("Printing");
    printParams.set("MyPID", MyPID);
    printParams.set("Output Precision", 5);
    printParams.set("Output Processor", 0);
    if( verbose )
      printParams.set("Output Information",
          NOX::Utils::OuterIteration +
          NOX::Utils::OuterIterationStatusTest +
          NOX::Utils::InnerIteration +
          NOX::Utils::Parameters +
          NOX::Utils::Details +
          NOX::Utils::Warning +
          NOX::Utils::TestDetails);
    else
      printParams.set("Output Information", NOX::Utils::Error);

    NOX::Utils printing(printParams);

    // Identify the test problem
    if (printing.isPrintType(NOX::Utils::TestDetails))
      printing.out() << "Starting epetra/NOX_Vector/NOX_Vector.exe" << std::endl;

    // Create a TestCompare class
    NOX::TestCompare tester( printing.out(), printing);
    double tolerance = 1.e-12;
    NOX::TestCompare::CompareType aComp = NOX::TestCompare::Absolute;

    // Identify processor information
#ifdef HAVE_MPI
    printing.out() << "Parallel Run" << std::endl;
    printing.out() << "Number of processors = " << Comm.NumProc() << std::endl;
    printing.out() << "Print Process = " << MyPID << std::endl;
    Comm.Barrier();
    if (printing.isPrintType(NOX::Utils::TestDetails))
      printing.out() << "Process " << MyPID << " is alive!" << std::endl;
    Comm.Barrier();
#else
    printing.out() << "Serial Run" << std::endl;
#endif

    // Create a map describing data distribution
    Epetra_Map * standardMap = new Epetra_Map(globalLength, 0, Comm);

    // Return value
    int status = 0;

    // *** Start Testing Here!!! ***

    // First create the Epetra_Vector needed to construct our NOX vector
    Epetra_Vector * epetraVec = new Epetra_Vector(*standardMap, true);

    NOX::Epetra::Vector * noxVec1 = new NOX::Epetra::Vector(*epetraVec, NOX::DeepCopy);
    delete epetraVec; epetraVec = 0;

    NOX::Epetra::Vector * noxVec2 = new NOX::Epetra::Vector(*noxVec1);
    noxVec2->init(1.0);

    // Test our norms
    NOX::Abstract::Vector::NormType
      oneNorm = NOX::Abstract::Vector::OneNorm,
      twoNorm = NOX::Abstract::Vector::TwoNorm,
      infNorm = NOX::Abstract::Vector::MaxNorm;

    double expectedOneNorm = (double) globalLength,
           expectedTwoNorm = sqrt( (double) globalLength),
           expectedInfNorm = 1.0;

    status += tester.testValue( noxVec2->norm(oneNorm), expectedOneNorm,
                                tolerance, "One-Norm Test", aComp);
    status += tester.testValue( noxVec2->norm(twoNorm), expectedTwoNorm,
                                tolerance, "Two-Norm Test", aComp);
    status += tester.testValue( noxVec2->norm(infNorm), expectedInfNorm,
                                tolerance, "Max-Norm Test", aComp);


    // Test random, reciprocal and dot methods
    noxVec1->random();
    // Threshold values since we want to do a reciprocal
    int myLength = standardMap->NumMyElements();
    for( int i = 0; i < myLength; ++i )
      if( fabs(noxVec1->getEpetraVector()[i]) < 1.e-8 ) noxVec1->getEpetraVector()[i] = 1.e-8;

    noxVec2->reciprocal(*noxVec1);
    double product = noxVec1->innerProduct(*noxVec2);

    status += tester.testValue( product, expectedOneNorm,
                                tolerance, "Random, Reciprocal and Dot Test", aComp);


    // Test abs and weighted-norm methods
    /*  ----------------------------
        NOT SUPPORTED AT THIS TIME
        ----------------------------
    noxVec2->abs(*noxVec2);
    double wNorm = noxVec1->norm(*noxVec2);
    status += tester.testValue( wNorm, noxVec1->norm(oneNorm),
                                tolerance, "Abs and Weighted-Norm Test", aComp);
    */

    // Test operator= , abs, update and scale methods
    (*noxVec2) = (*noxVec1);
    noxVec2->abs(*noxVec2);
    double sumAll = noxVec1->norm(oneNorm);
    noxVec2->update( 1.0, *noxVec1, 1.0 );
    noxVec2->scale(0.5);
    double sumPositive = noxVec2->norm(oneNorm);
    (*noxVec2) = (*noxVec1);
    noxVec2->abs(*noxVec2);
    noxVec2->update( 1.0, *noxVec1, -1.0 );
    noxVec2->scale(0.5);
    double sumNegative = noxVec2->norm(oneNorm);

    status += tester.testValue( (sumPositive + sumNegative), sumAll,
                                tolerance, "Abs, Operator= , Update and Scale Test", aComp);

    success = status==0;

    if (success)
      printing.out() << "Test passed!" << std::endl;
    else
      printing.out() << "Test failed!" << std::endl;

    delete noxVec2;
    delete noxVec1;
    delete standardMap;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
int Drumm2(const Epetra_Map& map, bool verbose)
{
  //Simple 2-element problem (element as in "finite-element") from
  //Clif Drumm. Two triangular elements, one per processor, as shown
  //here:
  //
  //   *----*
  //  3|\  2|
  //   | \  |
  //   | 0\1|
  //   |   \|
  //   *----*
  //  0    1
  //
  //Element 0 on processor 0, element 1 on processor 1.
  //Processor 0 will own nodes 0,1,3 and processor 1 will own node 2.
  //Each processor will pass a 3x3 element-connectivity-matrix to
  //Epetra_FECrsGraph.
  //After GlobalAssemble(), the graph should be as follows:
  //
  //         row 0: 2  1  0  1
  //proc 0   row 1: 1  4  1  2
  //         row 2: 0  1  2  1
  //----------------------------------
  //proc 1   row 3: 1  2  1  4
  //

  int numProcs = map.Comm().NumProc();
  int localProc = map.Comm().MyPID();

  if (numProcs != 2) return(0);

  int indexBase = 0, ierr = 0;
  int numMyNodes = 3;
  long long* myNodes = new long long[numMyNodes];

  if (localProc == 0) {
    myNodes[0] = 0;
    myNodes[1] = 1;
    myNodes[2] = 3;
  }
  else {
    numMyNodes = 1;
    myNodes[0] = 2;
  }

  Epetra_Map Map((long long) -1, numMyNodes, myNodes, indexBase, map.Comm());

  int rowLengths = 3;
  Epetra_FECrsGraph A(Copy, Map, rowLengths);

  if (localProc != 0) {
    numMyNodes = 3;
    myNodes[0] = 1;
    myNodes[1] = 2;
    myNodes[2] = 3;
  }

  EPETRA_TEST_ERR( A.InsertGlobalIndices(numMyNodes, myNodes,
					 numMyNodes, myNodes),ierr);

  EPETRA_TEST_ERR( A.GlobalAssemble(), ierr );

  if (verbose) {
    A.Print(std::cout);
  }

  delete [] myNodes;

  return(0);
}
Пример #28
0
int Amesos_TestSolver( Epetra_Comm &Comm, char *matrix_file, 
		       SparseSolverType SparseSolver,
		       bool transpose, 
		       int special, AMESOS_MatrixType matrix_type ) {


  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
   
  std::string FileName = matrix_file ;
  int FN_Size = FileName.size() ; 
  std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size );
  std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size );
  bool NonContiguousMap = false; 

  if ( LastFiveBytes == ".triU" ) { 
    // Call routine to read in unsymmetric Triplet matrix
    NonContiguousMap = true; 
    EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, 
						      readb, readxexact, NonContiguousMap ) );
  } else {
    if ( LastFiveBytes == ".triS" ) { 
      NonContiguousMap = true; 
      // Call routine to read in symmetric Triplet matrix
      EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, readMap, readA, readx, 
							readb, readxexact, NonContiguousMap ) );
    } else {
      if (  LastFourBytes == ".mtx" ) { 
	EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, 
							       readA, readx, readb, readxexact) );
      } else {
	// Call routine to read in HB problem
	Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, 
						     readb, readxexact) ;
      }
    }
  }

  Epetra_CrsMatrix transposeA(Copy, *readMap, 0);
  Epetra_CrsMatrix *serialA ; 

  if ( transpose ) {
    assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); 
    serialA = &transposeA ; 
  } else {
    serialA = readA ; 
  }

  Epetra_RowMatrix * passA = 0; 
  Epetra_Vector * passx = 0; 
  Epetra_Vector * passb = 0;
  Epetra_Vector * passxexact = 0;
  Epetra_Vector * passresid = 0;
  Epetra_Vector * passtmp = 0;

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);
  Epetra_Map* map_;

  if( NonContiguousMap ) {
    //
    //  map gives us NumMyElements and MyFirstElement;
    //
    int NumGlobalElements =  readMap->NumGlobalElements();
    int NumMyElements = map.NumMyElements();
    int MyFirstElement = map.MinMyGID();
    std::vector<int> MapMap_( NumGlobalElements );
    readMap->MyGlobalElements( &MapMap_[0] ) ;
    Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; 
    map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm);
  } else {
    map_ = new Epetra_Map( map ) ; 
  }


  Epetra_CrsMatrix A(Copy, *map_, 0);


  const Epetra_Map &OriginalMap = serialA->RowMatrixRowMap() ; 
  assert( OriginalMap.SameAs(*readMap) ); 
  Epetra_Export exporter(OriginalMap, *map_);
  Epetra_Export exporter2(OriginalMap, *map_);
  Epetra_Export MatrixExporter(OriginalMap, *map_);
  Epetra_CrsMatrix AwithDiag(Copy, *map_, 0);

  Epetra_Vector x(*map_);
  Epetra_Vector b(*map_);
  Epetra_Vector xexact(*map_);
  Epetra_Vector resid(*map_);
  Epetra_Vector readresid(*readMap);
  Epetra_Vector tmp(*map_);
  Epetra_Vector readtmp(*readMap);

  //  Epetra_Vector xcomp(*map_);      // X as computed by the solver
  bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; 
  if ( distribute_matrix ) { 
    // Create Exporter to distribute read-in matrix and vectors
    //
    //  Initialize x, b and xexact to the values read in from the file
    //
    x.Export(*readx, exporter, Add);
    b.Export(*readb, exporter, Add);
    xexact.Export(*readxexact, exporter, Add);
    Comm.Barrier();
    
    A.Export(*serialA, exporter, Add);
    assert(A.FillComplete()==0);    
    
    Comm.Barrier();

    passA = &A; 

    passx = &x; 
    passb = &b;
    passxexact = &xexact;
    passresid = &resid;
    passtmp = &tmp;

  } else { 

    passA = serialA; 
    passx = readx; 
    passb = readb;
    passxexact = readxexact;
    passresid = &readresid;
    passtmp = &readtmp;
  }

  Epetra_MultiVector CopyB( *passb ) ;


  double Anorm = passA->NormInf() ; 
  SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ;

  Epetra_LinearProblem Problem(  (Epetra_RowMatrix *) passA, 
				 (Epetra_MultiVector *) passx, 
				 (Epetra_MultiVector *) passb );
  

  for ( int i = 0; i < 1+special ; i++ ) { 
    Epetra_Time TotalTime( Comm ) ; 
    
    if ( false ) { 
      //  TEST_UMFPACK is never set by configure
#ifdef HAVE_AMESOS_SUPERLUDIST
    } else if ( SparseSolver == SUPERLUDIST ) {
	Teuchos::ParameterList ParamList ;
	ParamList.set( "MaxProcs", -3 );
	Amesos_Superludist A_Superludist( Problem ) ; 

  //ParamList.set( "Redistribute", true );
  //ParamList.set( "AddZeroToDiag", true );
  Teuchos::ParameterList& SuperludistParams = ParamList.sublist("Superludist") ;
  ParamList.set( "MaxProcs", -3 );

	EPETRA_CHK_ERR( A_Superludist.SetParameters( ParamList ) ); 
	EPETRA_CHK_ERR( A_Superludist.SetUseTranspose( transpose ) ); 
	EPETRA_CHK_ERR( A_Superludist.SymbolicFactorization(  ) ); 
	EPETRA_CHK_ERR( A_Superludist.NumericFactorization(  ) ); 
	EPETRA_CHK_ERR( A_Superludist.Solve(  ) ); 
#endif
#ifdef HAVE_AMESOS_DSCPACK
    } else if ( SparseSolver == DSCPACK ) {
      
      Teuchos::ParameterList ParamList ;
      ParamList.set( "MaxProcs", -3 );

      Amesos_Dscpack A_dscpack( Problem ) ; 
      EPETRA_CHK_ERR( A_dscpack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( A_dscpack.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( A_dscpack.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( A_dscpack.Solve(  ) ); 
#endif
#ifdef HAVE_AMESOS_SCALAPACK
    } else if ( SparseSolver == SCALAPACK ) {

      Teuchos::ParameterList ParamList ;
      Amesos_Scalapack A_scalapack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( A_scalapack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( A_scalapack.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( A_scalapack.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( A_scalapack.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( A_scalapack.Solve(  ) ); 

#endif
#ifdef HAVE_AMESOS_TAUCS
    } else if ( SparseSolver == TAUCS ) {

      Teuchos::ParameterList ParamList ;
      Amesos_Taucs A_taucs( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( A_taucs.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( A_taucs.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( A_taucs.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( A_taucs.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( A_taucs.Solve(  ) ); 

#endif
#ifdef HAVE_AMESOS_PARDISO
    } else if ( SparseSolver == PARDISO ) {

      Teuchos::ParameterList ParamList ;
      Amesos_Pardiso A_pardiso( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( A_pardiso.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( A_pardiso.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( A_pardiso.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( A_pardiso.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( A_pardiso.Solve(  ) ); 

#endif
#ifdef HAVE_AMESOS_PARAKLETE
    } else if ( SparseSolver == PARAKLETE ) {

      Teuchos::ParameterList ParamList ;
      Amesos_Paraklete A_paraklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( A_paraklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( A_paraklete.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( A_paraklete.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( A_paraklete.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( A_paraklete.Solve(  ) ); 

#endif
#ifdef HAVE_AMESOS_MUMPS
    } else if ( SparseSolver == MUMPS ) {

      Teuchos::ParameterList ParamList ;
      Amesos_Mumps A_mumps( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( A_mumps.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( A_mumps.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( A_mumps.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( A_mumps.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( A_mumps.Solve(  ) ); 

#endif
#ifdef HAVE_AMESOS_SUPERLU
    } else if ( SparseSolver == SUPERLU ) {

      Teuchos::ParameterList ParamList ;
      Amesos_Superlu A_superlu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( A_superlu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( A_superlu.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( A_superlu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( A_superlu.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( A_superlu.Solve(  ) ); 

#endif
#ifdef HAVE_AMESOS_LAPACK
    } else if ( SparseSolver == LAPACK ) {

      Teuchos::ParameterList ParamList ;
      Amesos_Lapack A_lapack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( A_lapack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( A_lapack.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( A_lapack.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( A_lapack.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( A_lapack.Solve(  ) ); 
#endif
#ifdef HAVE_AMESOS_UMFPACK
    } else if ( SparseSolver == UMFPACK ) {

      Teuchos::ParameterList ParamList ;
      Amesos_Umfpack A_umfpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( A_umfpack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( A_umfpack.SetUseTranspose( transpose ) ); 
      EPETRA_CHK_ERR( A_umfpack.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( A_umfpack.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( A_umfpack.Solve(  ) ); 
#endif
#ifdef HAVE_AMESOS_KLU
    } else if ( SparseSolver == KLU ) {


      using namespace Teuchos;

      Amesos_Time AT; 
      int setupTimePtr = -1, symTimePtr = -1, numTimePtr = -1, refacTimePtr = -1, solveTimePtr = -1;
      AT.CreateTimer(Comm, 2);
      AT.ResetTimer(0);

      Teuchos::ParameterList ParamList ;
      // ParamList.set("OutputLevel",2);
      Amesos_Klu A_klu( Problem ); 
      ParamList.set( "MaxProcs", -3 );
      ParamList.set( "TrustMe", false );
      // ParamList.set( "Refactorize", true );
      EPETRA_CHK_ERR( A_klu.SetParameters( ParamList ) ) ; 
      EPETRA_CHK_ERR( A_klu.SetUseTranspose( transpose ) ); 
      setupTimePtr = AT.AddTime("Setup", setupTimePtr, 0);
      EPETRA_CHK_ERR( A_klu.SymbolicFactorization(  ) ); 
      symTimePtr = AT.AddTime("Symbolic", symTimePtr, 0);
      EPETRA_CHK_ERR( A_klu.NumericFactorization(  ) ); 
      numTimePtr = AT.AddTime("Numeric", numTimePtr, 0);
      EPETRA_CHK_ERR( A_klu.NumericFactorization(  ) ); 
      refacTimePtr = AT.AddTime("Refactor", refacTimePtr, 0);
      // for ( int i=0; i<100000 ; i++ ) 
      EPETRA_CHK_ERR( A_klu.Solve(  ) ); 
      solveTimePtr = AT.AddTime("Solve", solveTimePtr, 0);

      double SetupTime = AT.GetTime(setupTimePtr);
      double SymbolicTime = AT.GetTime(symTimePtr);
      double NumericTime = AT.GetTime(numTimePtr);
      double RefactorTime = AT.GetTime(refacTimePtr);
      double SolveTime = AT.GetTime(solveTimePtr);

      std::cout << __FILE__ << "::"  << __LINE__ << " SetupTime = " << SetupTime << std::endl ; 
      std::cout << __FILE__ << "::"  << __LINE__ << " SymbolicTime = " << SymbolicTime - SetupTime << std::endl ; 
      std::cout << __FILE__ << "::"  << __LINE__ << " NumericTime = " << NumericTime - SymbolicTime<< std::endl ; 
      std::cout << __FILE__ << "::"  << __LINE__ << " RefactorTime = " << RefactorTime - NumericTime << std::endl ; 
      std::cout << __FILE__ << "::"  << __LINE__ << " SolveTime = " << SolveTime - RefactorTime << std::endl ; 

#endif
    } else { 
      SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ;
      std::cerr << "\n\n####################  Requested solver not available on this platform ##################### ATS\n" << std::endl ;
      std::cout << " SparseSolver = " << SparseSolver << std::endl ; 
      std::cerr << " SparseSolver = " << SparseSolver << std::endl ; 
    }
    
    SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); 
  }  // end for (int i=0; i<special; i++ ) 

  //
  //  Compute the error = norm(xcomp - xexact )
  //
  double error;
  passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0);

  passresid->Norm2(&error);
  SparseDirectTimingVars::SS_Result.Set_Error(error) ;

  //  passxexact->Norm2(&error ) ; 
  //  passx->Norm2(&error ) ; 

  //
  //  Compute the residual = norm(Ax - b)
  //
  double residual ; 

  passA->Multiply( transpose, *passx, *passtmp);
  passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); 
  //  passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); 
  passresid->Norm2(&residual);

  SparseDirectTimingVars::SS_Result.Set_Residual(residual) ;
    
  double bnorm; 
  passb->Norm2( &bnorm ) ; 
  SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm) ;

  double xnorm; 
  passx->Norm2( &xnorm ) ; 
  SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm) ;

  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;
  delete map_;
  
  Comm.Barrier();

  return 0;
}