Example #1
0
  StridedEpetraMap::StridedEpetraMap(global_size_t numGlobalElements, size_t numLocalElements, int indexBase,
                       std::vector<size_t>& stridingInfo, const Teuchos::RCP<const Teuchos::Comm<int> > &comm, LocalOrdinal stridedBlockId, GlobalOrdinal offset, const Teuchos::RCP<Node> &node)
  : EpetraMap(numGlobalElements, numLocalElements, indexBase, comm, node), StridedMap<int, int>(numGlobalElements, numLocalElements, indexBase, stridingInfo, comm, stridedBlockId, offset)
  {
    // check input data and reorganize map

    global_size_t numGlobalNodes = Teuchos::OrdinalTraits<global_size_t>::invalid();
    if(numGlobalElements != Teuchos::OrdinalTraits<global_size_t>::invalid())
      numGlobalNodes = numGlobalElements / getFixedBlockSize();	// number of nodes (over all processors)
    size_t blockSize = getFixedBlockSize();
    //if(stridedBlockId > -1) {
    //  blockSize = stridingInfo[stridedBlockId];
    //}
    size_t        numLocalNodes  = numLocalElements / blockSize;      // number of nodes (on each processor)
    
    // build an equally distributed node map
    RCP<Epetra_Map> nodeMap = Teuchos::null;
    IF_EPETRA_EXCEPTION_THEN_THROW_GLOBAL_INVALID_ARG((nodeMap = (rcp(new Epetra_Map(static_cast<int>(numGlobalNodes), numLocalNodes, indexBase, *toEpetra(comm))))));
    
    // translate local node ids to local dofs
    int nStridedOffset = 0;
    int nDofsPerNode = Teuchos::as<int>(getFixedBlockSize()); // dofs per node for local striding block
    if(stridedBlockId > -1) {
      // determine nStridedOffset
      for(int j=0; j<stridedBlockId; j++) {
        nStridedOffset += stridingInfo[j];
      }
      nDofsPerNode = stridingInfo[stridedBlockId];
      
      numGlobalElements = nodeMap->NumGlobalElements()*nDofsPerNode;
    }
    std::vector<int> dofgids;
    for(int i = 0; i<nodeMap->NumMyElements(); i++) {
      int gid = nodeMap->GID(i);
      for(int dof = 0; dof < nDofsPerNode; ++dof) {
	// dofs are calculated by
	// global offset + node_GID * full size of strided map + striding offset of current striding block + dof id of current striding block
        dofgids.push_back(offset_ + gid*getFixedBlockSize() + nStridedOffset + dof);
      }
    }
    
    if (numGlobalElements == Teuchos::OrdinalTraits<global_size_t>::invalid()) {
      IF_EPETRA_EXCEPTION_THEN_THROW_GLOBAL_INVALID_ARG((map_ = (rcp(new Epetra_BlockMap(-1, dofgids.size(), &dofgids[0], 1, indexBase, *toEpetra(comm))))));
    } else {
      IF_EPETRA_EXCEPTION_THEN_THROW_GLOBAL_INVALID_ARG((map_ = (rcp(new Epetra_BlockMap(numGlobalElements, dofgids.size(), &dofgids[0], 1, indexBase, *toEpetra(comm))))));
    }
    
    TEUCHOS_TEST_FOR_EXCEPTION(map_->NumMyPoints() % nDofsPerNode != 0, Exceptions::RuntimeError, "StridedEpetraMap::StridedEpetraMap: wrong distribution of dofs among processors.");
    if(stridedBlockId == -1) {
      TEUCHOS_TEST_FOR_EXCEPTION(getNodeNumElements() != Teuchos::as<size_t>(nodeMap->NumMyElements()*nDofsPerNode), Exceptions::RuntimeError, "StridedEpetraMap::StridedEpetraMap: wrong distribution of dofs among processors.");
      TEUCHOS_TEST_FOR_EXCEPTION(getGlobalNumElements() != Teuchos::as<size_t>(nodeMap->NumGlobalElements()*nDofsPerNode), Exceptions::RuntimeError, "StridedEpetraMap::StridedEpetraMap: wrong distribution of dofs among processors.");
    } else {
      TEUCHOS_TEST_FOR_EXCEPTION(stridingInfo.size() < Teuchos::as<size_t>(stridedBlockId), Exceptions::RuntimeError, "StridedEpetraMap::StridedEpetraMap: stridedBlockId > stridingInfo.size()");      
      int nDofsInStridedBlock = stridingInfo[stridedBlockId];
      TEUCHOS_TEST_FOR_EXCEPTION(getNodeNumElements() != Teuchos::as<size_t>(nodeMap->NumMyElements()*nDofsInStridedBlock), Exceptions::RuntimeError, "StridedEpetraMap::StridedEpetraMap: wrong distribution of dofs among processors.");
      TEUCHOS_TEST_FOR_EXCEPTION(getGlobalNumElements() != Teuchos::as<size_t>(nodeMap->NumGlobalElements()*nDofsInStridedBlock), Exceptions::RuntimeError, "StridedEpetraMap::StridedEpetraMap: wrong distribution of dofs among processors.");
    }    
    
    TEUCHOS_TEST_FOR_EXCEPTION(CheckConsistency() == false, Exceptions::RuntimeError, "StridedEpetraMap::StridedEpetraMap: CheckConsistency() == false");      
  }
int main(int argc, char *argv[]) {
  //
  int MyPID = 0;
#ifdef EPETRA_MPI
  // Initialize MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
  MyPID = Comm.MyPID();
#else
  Epetra_SerialComm Comm;
#endif
  //
  typedef double                            ST;
  typedef Teuchos::ScalarTraits<ST>        SCT;
  typedef SCT::magnitudeType                MT;
  typedef Epetra_MultiVector                MV;
  typedef Epetra_Operator                   OP;
  typedef Belos::MultiVecTraits<ST,MV>     MVT;
  typedef Belos::OperatorTraits<ST,MV,OP>  OPT;

  using Teuchos::ParameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;

  bool verbose = false, debug = false, proc_verbose = false;
  int frequency = -1;        // frequency of status test output.
  int blocksize = 1;         // blocksize
  int numrhs = 1;            // number of right-hand sides to solve for
  int maxiters = -1;         // maximum number of iterations allowed per linear system
  std::string filenameMatrix("orsirr1_scaled.hb");
  std::string filenameRHS;   // blank mean unset
  MT relResTol = 3.0e-4;     // relative residual tolerance
  // Like CG, LSQR is a short recurrence method that 
  // does not have the "n" step convergence property in finite precision arithmetic.
  MT resGrowthFactor = 4.0;   // In this example, warn if |resid| > resGrowthFactor * relResTol
  // With no preconditioner, this is only the difference between the "implicit" and the "explict
  // residual.  

  MT relMatTol = 1.e-4;     // relative Matrix error, default value sqrt(eps)
  MT maxCond  = 1.e+8;      // maximum condition number default value 1/eps
  MT damp = 0.;             // regularization (or damping) parameter 

  Teuchos::CommandLineProcessor cmdp(false,true); // e.g. ./a.out --tol=.1 --filename=foo.hb

  cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
  cmdp.setOption("debug","nondebug",&debug,"Print debugging information from solver.");
  cmdp.setOption("frequency",&frequency,"Solvers frequency for printing residuals (#iters).");
  cmdp.setOption("filename",&filenameMatrix,"Filename for test matrix.  Acceptable file extensions: *.hb,*.mtx,*.triU,*.triS");
  cmdp.setOption("rhsFilename",&filenameRHS,"Filename for right-hand side.  Acceptable file extension: *.mtx");
  cmdp.setOption("lambda",&damp,"Regularization parameter");
  cmdp.setOption("tol",&relResTol,"Relative residual tolerance");
  cmdp.setOption("matrixTol",&relMatTol,"Relative error in Matrix");
  cmdp.setOption("max-cond",&maxCond,"Maximum condition number");
  cmdp.setOption("num-rhs",&numrhs,"Number of right-hand sides to be solved for.");
  cmdp.setOption("block-size",&blocksize,"Block size used by LSQR."); // must be one at this point
  cmdp.setOption("max-iters",&maxiters,"Maximum number of iterations per linear system (-1 = adapted to problem/block size).");



  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
#ifdef EPETRA_MPI
    MPI_Finalize();
#endif
    return -1;
  }
  if (!verbose)
    frequency = -1;  // reset frequency if test is not verbose
  //
  // Get the problem
  //
  RCP<Epetra_Map> Map;
  RCP<Epetra_CrsMatrix> A;
  RCP<Epetra_MultiVector> B, X;
  RCP<Epetra_Vector> vecB, vecX;
  EpetraExt::readEpetraLinearSystem(filenameMatrix, Comm, &A, &Map, &vecX, &vecB);
  // Rectangular matrices are embedded in square matrices.  vecX := 0,  vecB = A*randVec
  A->OptimizeStorage();
  proc_verbose = verbose && (MyPID==0);  /* Only print on the zero processor */

  bool isRHS = false;
  if ( filenameRHS != "" ) 
    {
      isRHS = true;  
    }

  // Check to see if the number of right-hand sides is the same as requested.
  if (numrhs>1) {
    isRHS = false; // numrhs > 1 not yet supported
    X = rcp( new Epetra_MultiVector( *Map, numrhs ) );
    B = rcp( new Epetra_MultiVector( *Map, numrhs ) );
    X->Seed();
    X->Random();
    OPT::Apply( *A, *X, *B ); // B := AX
    X->PutScalar( 0.0 );   // annihilate X
  }
  else {
    if ( isRHS )  
      {
        Epetra_MultiVector * BmustDelete;
        int mmRHSioflag = 0;
        const char * charPtrRHSfn = filenameRHS.c_str();
        mmRHSioflag = EpetraExt::MatrixMarketFileToMultiVector(charPtrRHSfn, *Map, BmustDelete);
        //std::cout << "rhs from input file " << std::endl;
        //BmustDelete->Print(std::cout);
        
        if( mmRHSioflag )
          {
            if (proc_verbose)
              std::cout << "Error " <<  mmRHSioflag << " occured while attempting to read file " << filenameRHS << std::endl; 
#ifdef EPETRA_MPI
            MPI_Finalize();
#endif
            return -1;
          }
        X = rcp( new Epetra_MultiVector( *Map, numrhs ) );
        X->Scale( 0.0 ); 
        B = rcp( new MV(*BmustDelete));
        delete BmustDelete;
      }
    else 
      {
        int locNumCol = Map->MaxLID() + 1; // Create a known solution
        int globNumCol = Map->MaxAllGID() + 1;
        for( int li = 0; li < locNumCol; li++){   // assume consecutive lid
          int gid = Map->GID(li);
          double value = (double) ( globNumCol -1 - gid ); 
          int numEntries = 1;
          vecX->ReplaceGlobalValues( numEntries, &value, &gid );
        }
        bool Trans = false;
        A->Multiply( Trans, *vecX, *vecB ); // Create a consistent linear system
        // At this point, the initial guess is exact.
        bool goodInitGuess = true; // perturb initial guess
        bool zeroInitGuess = false; // annihilate initial guess
        if( goodInitGuess )
          {
            double value = 1.e-2; // "Rel RHS Err" and "Rel Mat Err" apply to the residual equation,
            int numEntries = 1;   // norm( b - A x_k ) ?<? relResTol norm( b- Axo).
            int index = 0;        // norm(b) is inaccessible to LSQR. 
            vecX->SumIntoMyValues(  numEntries, &value, &index); 
          }
        if( zeroInitGuess )
          {
            vecX->PutScalar( 0.0 ); // 
          }
        X = Teuchos::rcp_implicit_cast<Epetra_MultiVector>(vecX);
        B = Teuchos::rcp_implicit_cast<Epetra_MultiVector>(vecB);
      }
  }
  //
  // ********Other information used by block solver***********
  // *****************(can be user specified)******************
  //
  const int NumGlobalElements = B->GlobalLength();
  if (maxiters == -1)
    maxiters = NumGlobalElements/blocksize - 1; // maximum number of iterations to run
  ParameterList belosList; // mechanism for configuring specific linear solver
  belosList.set( "Block Size", blocksize );       // LSQR blocksize, must be one
  belosList.set( "Lambda", damp );                // Regularization parameter
  belosList.set( "Rel RHS Err", relResTol );      // Relative convergence tolerance requested
  belosList.set( "Rel Mat Err", relMatTol );      // Maximum number of restarts allowed
  belosList.set( "Condition Limit", maxCond);     // upper bound for cond(A)
  belosList.set( "Maximum Iterations", maxiters );// Maximum number of iterations allowed
  int verbosity = Belos::Errors + Belos::Warnings;
  if (verbose) {
    verbosity += Belos::TimingDetails + Belos::StatusTestDetails;
    if (frequency > 0)
      belosList.set( "Output Frequency", frequency );
  }
  if (debug) {
    verbosity += Belos::Debug;
  }
  belosList.set( "Verbosity", verbosity );
  //
  // Construct an unpreconditioned linear problem instance.
  //
  Belos::LinearProblem<double,MV,OP> problem( A, X, B );
  bool set = problem.setProblem();
  if (set == false) {
    if (proc_verbose)
      {
        std::cout << std::endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
      }
#ifdef EPETRA_MPI
      MPI_Finalize();
#endif
      return -1;
  }
  // *******************************************************************
  // ******************* Apply Single Vector LSQR **********************
  // *******************************************************************
  // Create an iterative solver manager.
  RCP< Belos::LSQRSolMgr<double,MV,OP> > newSolver
    = rcp( new Belos::LSQRSolMgr<double,MV,OP>(rcp(&problem,false), rcp(&belosList,false)));

  if (proc_verbose) { // ******** Print a problem description *********
    std::cout << std::endl << std::endl;
    std::cout << "Dimension of matrix: " << NumGlobalElements << std::endl;
    std::cout << "Number of right-hand sides: " << numrhs << std::endl;
    std::cout << "Block size used by solver: " << blocksize << std::endl;
    std::cout << "Max number of iterations for a linear system: " << maxiters << std::endl; 
    std::cout << "Relative residual tolerance: " << relResTol << std::endl;
    std::cout << std::endl;
    std::cout << "Solver's Description: " << std::endl;
    std::cout << newSolver->description() << std::endl; // visually verify the parameter list
  }
  Belos::ReturnType ret = newSolver->solve(); // Perform solve
  std::vector<double> solNorm( numrhs );      // get solution norm
  MVT::MvNorm( *X, solNorm );
  int numIters = newSolver->getNumIters();    // get number of solver iterations
  MT condNum = newSolver->getMatCondNum(); 
  MT matrixNorm= newSolver->getMatNorm();
  MT resNorm = newSolver->getResNorm(); 
  MT lsResNorm = newSolver->getMatResNorm();

  if (proc_verbose)
    std::cout << "Number of iterations performed for this solve: " << numIters << std::endl
     << "matrix condition number: " << condNum << std::endl
     << "matrix norm: " << matrixNorm << std::endl
     << "residual norm: " << resNorm << std::endl
     << "solution norm: " << solNorm[0] << std::endl
     << "least squares residual Norm: " << lsResNorm << std::endl;
  bool badRes = false;                     // Compute the actual residuals.
  std::vector<double> actual_resids( numrhs );
  std::vector<double> rhs_norm( numrhs );
  Epetra_MultiVector resid(*Map, numrhs);
  OPT::Apply( *A, *X, resid );
  MVT::MvAddMv( -1.0, resid, 1.0, *B, resid ); 
  MVT::MvNorm( resid, actual_resids );
  MVT::MvNorm( *B, rhs_norm );
  if (proc_verbose) {
    std::cout<< "---------- Actual Residuals (normalized) ----------"<<std::endl<<std::endl;
    for ( int i=0; i<numrhs; i++) {
      double actRes = actual_resids[i]/rhs_norm[i];
      std::cout<<"Problem "<<i<<" : \t"<< actRes <<std::endl;
      if (actRes > relResTol * resGrowthFactor)
        { 
          badRes = true;
          if (verbose) std::cout << "residual norm > " << relResTol * resGrowthFactor <<  std::endl;	
        }
    }
  }
  if (ret!=Belos::Converged || badRes) {
    if (proc_verbose)
      std::cout << std::endl << "ERROR:  Belos did not converge!" << std::endl;	
#ifdef EPETRA_MPI
    MPI_Finalize();
#endif
    return -1;
  }
  if (proc_verbose)
    std::cout << std::endl << "SUCCESS:  Belos converged!" << std::endl;
#ifdef EPETRA_MPI
  MPI_Finalize();
#endif
  return 0; // Default return value
} 
int main (int argc, char *argv[])
{
  // Initialize MPI
  Teuchos::GlobalMPISession (&argc, &argv, NULL);

  // Create output stream. (Handy for multicore output.)
  const RCP<Teuchos::FancyOStream> out =
    Teuchos::VerboseObjectBase::getDefaultOStream();

  // Create a communicator for Epetra objects
#ifdef HAVE_MPI
  RCP<Epetra_MpiComm> eComm =
    rcp<Epetra_MpiComm> (new Epetra_MpiComm (MPI_COMM_WORLD));
#else
  RCP<Epetra_SerialComm> eComm =
    rcp<Epetra_SerialComm> (new Epetra_SerialComm());
#endif

  bool success = true;
  try {
    // Create map.
    // Do strong scaling tests, so keep numGlobalElements independent of
    // the number of processes.
    int numGlobalElements = 5e7;
    int indexBase = 0;
    RCP<Epetra_Map> map =
      rcp(new Epetra_Map (numGlobalElements, indexBase, *eComm));

    //// Create map with overlay.
    //int numMyOverlapNodes = 3;

    //// Get an approximation of my nodes.
    //int numMyElements = numGlobalElements / eComm->NumProc();
    //int startIndex = eComm->MyPID() * numMyElements;
    //// Calculate the resulting number of total nodes.
    //int numTotalNodes = numMyElements * eComm->NumProc();
    //// Add one node to the first numGlobalElements-numTotalNodes processes.
    //if (eComm->MyPID() < numGlobalElements - numTotalNodes)
    //{
    //    numMyElements++;
    //    startIndex += eComm->MyPID();
    //}
    //else
    //{
    //    startIndex += numGlobalElements - numTotalNodes;
    //}

    //Teuchos::Array<int> indices(numMyElements);
    //for (int k = 0;  k<numMyElements; k++)
    //    indices[k] = startIndex + k;

    //std::cout << numGlobalElements << std::endl;
    //std::cout << numMyElements << std::endl;

    //RCP<Epetra_Map> overlapMap =
    //    rcp(new Epetra_Map (numGlobalElements, numMyElements, indices.getRawPtr(), indexBase, *eComm));

    //overlapMap->Print(std::cout);

    //throw 1;
    // tests on one vector
    RCP<Epetra_Vector> u = rcp(new Epetra_Vector(*map));
    u->Random();

    RCP<Teuchos::Time> meanValueTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::MeanValue");
    {
      Teuchos::TimeMonitor tm(*meanValueTime);
      double meanVal;
      TEUCHOS_ASSERT_EQUALITY(0, u->MeanValue(&meanVal));
    }

    RCP<Teuchos::Time> maxValueTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::MaxValue");
    {
      Teuchos::TimeMonitor tm(*maxValueTime);
      double maxValue;
      TEUCHOS_ASSERT_EQUALITY(0, u->MaxValue(&maxValue));
    }

    RCP<Teuchos::Time> minValueTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::MinValue");
    {
      Teuchos::TimeMonitor tm(*minValueTime);
      double minValue;
      TEUCHOS_ASSERT_EQUALITY(0, u->MinValue(&minValue));
    }

    RCP<Teuchos::Time> norm1Time =
      Teuchos::TimeMonitor::getNewTimer("Vector::Norm1");
    {
      Teuchos::TimeMonitor tm(*norm1Time);
      double norm1;
      TEUCHOS_ASSERT_EQUALITY(0, u->Norm1(&norm1));
    }

    RCP<Teuchos::Time> norm2Time =
      Teuchos::TimeMonitor::getNewTimer("Vector::Norm2");
    {
      Teuchos::TimeMonitor tm(*norm2Time);
      double norm2;
      TEUCHOS_ASSERT_EQUALITY(0, u->Norm2(&norm2));
    }

    RCP<Teuchos::Time> normInfTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::NormInf");
    {
      Teuchos::TimeMonitor tm(*normInfTime);
      double normInf;
      TEUCHOS_ASSERT_EQUALITY(0, u->NormInf(&normInf));
    }

    RCP<Teuchos::Time> scaleTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Scale");
    {
      Teuchos::TimeMonitor tm(*scaleTime);
      double alpha = 0.5;
      TEUCHOS_ASSERT_EQUALITY(0, u->Scale(0.5));
    }
    // tests involving two vectors
    RCP<Epetra_Vector> v = rcp(new Epetra_Vector(*map));
    v->Random();

    RCP<Teuchos::Time> dotTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Dot");
    {
      Teuchos::TimeMonitor tm(*dotTime);
      double dot;
      TEUCHOS_ASSERT_EQUALITY(0, u->Dot(*v, &dot));
    }

    RCP<Teuchos::Time> multiplyTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Multiply");
    {
      Teuchos::TimeMonitor tm(*multiplyTime);
      TEUCHOS_ASSERT_EQUALITY(0, u->Multiply(1.0, *u, *v, 1.0));
    }

    RCP<Teuchos::Time> updateTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Update");
    {
      Teuchos::TimeMonitor tm(*updateTime);
      TEUCHOS_ASSERT_EQUALITY(0, u->Update(1.0, *v, 1.0));
    }

    // matrix-vector tests
    // diagonal test matrix
    RCP<Epetra_CrsMatrix> D =
      rcp(new Epetra_CrsMatrix(Copy, *map, 1));
    for (int k = 0; k < map->NumMyElements(); k++) {
      int col = map->GID(k);
      double val = 1.0 / (col+1);
      //TEUCHOS_ASSERT_EQUALITY(0, D->InsertMyValues(k, 1, &val, &col));
      TEUCHOS_ASSERT_EQUALITY(0, D->InsertGlobalValues(col, 1, &val, &col));
    }
    TEUCHOS_ASSERT_EQUALITY(0, D->FillComplete());

    // tridiagonal test matrix
    RCP<Epetra_CrsMatrix> T =
      rcp(new Epetra_CrsMatrix(Copy, *map, 3));
    for (int k = 0; k < map->NumMyElements(); k++) {
      int row = map->GID(k);
      if (row > 0) {
        int col = row-1;
        double val = -1.0;
        //TEUCHOS_ASSERT_EQUALITY(0, T->InsertMyValues(k, 1, &val, &col));
        TEUCHOS_ASSERT_EQUALITY(0, T->InsertGlobalValues(row, 1, &val, &col));
      }
      {
        int col = row;
        double val = 2.0;
        //TEUCHOS_ASSERT_EQUALITY(0, T->InsertMyValues(k, 1, &val, &col));
        TEUCHOS_ASSERT_EQUALITY(0, T->InsertGlobalValues(row, 1, &val, &col));
      }
      if (row < numGlobalElements-1) {
        int col = row+1;
        double val = -1.0;
        //TEUCHOS_ASSERT_EQUALITY(0, T->InsertMyValues(k, 1, &val, &col));
        TEUCHOS_ASSERT_EQUALITY(0, T->InsertGlobalValues(row, 1, &val, &col));
      }
    }
    TEUCHOS_ASSERT_EQUALITY(0, T->FillComplete());

    // start timings
    RCP<Teuchos::Time> mNorm1Time =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::Norm1");
    {
      Teuchos::TimeMonitor tm(*mNorm1Time);
      double dNorm1 = D->NormOne();
      double tNorm1 = T->NormOne();
    }

    RCP<Teuchos::Time> mNormInfTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::NormInf");
    {
      Teuchos::TimeMonitor tm(*mNormInfTime);
      double dNormInf = D->NormInf();
      double tNormInf = T->NormInf();
    }

    RCP<Teuchos::Time> mNormFrobTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::NormFrobenius");
    {
      Teuchos::TimeMonitor tm(*mNormFrobTime);
      double dNormFrob = D->NormFrobenius();
      double tNormFrob = T->NormFrobenius();
    }

    RCP<Teuchos::Time> mScaleTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::Scale");
    {
      Teuchos::TimeMonitor tm(*mScaleTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->Scale(2.0));
      TEUCHOS_ASSERT_EQUALITY(0, T->Scale(2.0));
    }

    RCP<Teuchos::Time> leftScaleTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::LeftScale");
    {
      Teuchos::TimeMonitor tm(*leftScaleTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->LeftScale(*v));
      TEUCHOS_ASSERT_EQUALITY(0, T->LeftScale(*v));
    }

    RCP<Teuchos::Time> rightScaleTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::RightScale");
    {
      Teuchos::TimeMonitor tm(*rightScaleTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->RightScale(*v));
      TEUCHOS_ASSERT_EQUALITY(0, T->RightScale(*v));
    }

    RCP<Teuchos::Time> applyTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::Apply");
    {
      Teuchos::TimeMonitor tm(*applyTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->Apply(*u, *v));
      TEUCHOS_ASSERT_EQUALITY(0, T->Apply(*u, *v));
    }
    // print timing data
    Teuchos::TimeMonitor::summarize();
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, *out, success);

  return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
int main(int argc, char *argv[]) {
  //
  int MyPID = 0;
#ifdef EPETRA_MPI
  // Initialize MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
  MyPID = Comm.MyPID();
#else
  Epetra_SerialComm Comm;
#endif
  //
  typedef double                            ST;
  typedef Teuchos::ScalarTraits<ST>        SCT;
  typedef SCT::magnitudeType                MT;
  typedef Epetra_MultiVector                MV;
  typedef Epetra_Operator                   OP;
  typedef Belos::MultiVecTraits<ST,MV>     MVT;
  typedef Belos::OperatorTraits<ST,MV,OP>  OPT;

  using Teuchos::ParameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;

bool verbose = false;
bool success = true;
try {
bool proc_verbose = false;
  bool leftprec = true;      // left preconditioning or right.
  // LSQR applies the operator and the transposed operator.
  // A preconditioner must support transpose multiply.
  int frequency = -1;        // frequency of status test output.
  int blocksize = 1;         // blocksize
  // LSQR as currently implemented is a single vector algorithm.
  // However some of the parameters that would be used by a block version
  // have not been removed from this file.
  int numrhs = 1;            // number of right-hand sides to solve for
  int maxiters = -1;         // maximum number of iterations allowed per linear system
  std::string filename("orsirr1_scaled.hb");
  MT relResTol = 1.0e-5;     // relative residual tolerance for the preconditioned linear system
  MT resGrowthFactor = 1.0;  // In this example, warn if |resid| > resGrowthFactor * relResTol

  MT relMatTol = 1.e-10;     // relative Matrix error, default value sqrt(eps)
  MT maxCond  = 1.e+5;       // maximum condition number default value 1/eps
  MT damp = 0.;              // regularization (or damping) parameter

  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
  cmdp.setOption("left-prec","right-prec",&leftprec,"Left preconditioning or right.");
  cmdp.setOption("frequency",&frequency,"Solvers frequency for printing residuals (#iters).");
  cmdp.setOption("filename",&filename,"Filename for test matrix.  Acceptable file extensions: *.hb,*.mtx,*.triU,*.triS");
  cmdp.setOption("lambda",&damp,"Regularization parameter");
  cmdp.setOption("tol",&relResTol,"Relative residual tolerance");
  cmdp.setOption("matrixTol",&relMatTol,"Relative error in Matrix");
  cmdp.setOption("max-cond",&maxCond,"Maximum condition number");
  cmdp.setOption("num-rhs",&numrhs,"Number of right-hand sides to be solved for.");
  cmdp.setOption("block-size",&blocksize,"Block size used by LSQR.");
  cmdp.setOption("max-iters",&maxiters,"Maximum number of iterations per linear system (-1 = adapted to problem/block size).");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    return -1;
  }
  if (!verbose)
    frequency = -1;  // reset frequency if test is not verbose

  //
  // *************Get the problem*********************
  //
  RCP<Epetra_Map> Map;
  RCP<Epetra_CrsMatrix> A;
  RCP<Epetra_MultiVector> B, X;
  RCP<Epetra_Vector> vecB, vecX;
  EpetraExt::readEpetraLinearSystem(filename, Comm, &A, &Map, &vecX, &vecB);
  A->OptimizeStorage();
  proc_verbose = verbose && (MyPID==0);  /* Only print on the zero processor */

  // Check to see if the number of right-hand sides is the same as requested.
  if (numrhs>1) {
    X = rcp( new Epetra_MultiVector( *Map, numrhs ) );
    B = rcp( new Epetra_MultiVector( *Map, numrhs ) );
    X->Random();
    OPT::Apply( *A, *X, *B );
    X->PutScalar( 0.0 );
  }
  else {
    int locNumCol = Map->MaxLID() + 1; // Create a known solution
    int globNumCol = Map->MaxAllGID() + 1;
    for( int li = 0; li < locNumCol; li++){   // assume consecutive lid
      int gid = Map->GID(li);
      double value = (double) ( globNumCol -1 - gid );
      int numEntries = 1;
      vecX->ReplaceGlobalValues( numEntries, &value, &gid );
    }
    bool Trans = false;
    A->Multiply( Trans, *vecX, *vecB ); // Create a consistent linear system
    // At this point, the initial guess is exact.
    bool zeroInitGuess = false; // annihilate initial guess
    bool goodInitGuess = true; // initial guess near solution
    if( zeroInitGuess )
      {
        vecX->PutScalar( 0.0 );
      }
    else
      {
        if( goodInitGuess )
          {
            double value = 1.e-2; // "Rel RHS Err" and "Rel Mat Err" apply to the residual equation,
            int numEntries = 1;   // norm( b - A x_k ) ?<? relResTol norm( b- Axo).
            int index = 0;        // norm(b) is inaccessible to LSQR.
            vecX->SumIntoMyValues(  numEntries, &value, &index);
          }
      }
    X = Teuchos::rcp_implicit_cast<Epetra_MultiVector>(vecX);
    B = Teuchos::rcp_implicit_cast<Epetra_MultiVector>(vecB);
  }
  //
  // ************Construct preconditioner*************
  //
  ParameterList ifpackList;

  // allocates an IFPACK factory. No data is associated
  // to this object (only method Create()).
  Ifpack Factory;  // do support transpose multiply

  // create the preconditioner. For valid PrecType values,
  // please check the documentation
  std::string PrecType = "ILU"; // incomplete LU
  int OverlapLevel = 1; // nonnegative

  RCP<Ifpack_Preconditioner> Prec = Teuchos::rcp( Factory.Create(PrecType, &*A, OverlapLevel) );
  assert(Prec != Teuchos::null);

  // specify parameters for ILU
  ifpackList.set("fact: level-of-fill", 1);
  // the combine mode is on the following:
  // "Add", "Zero", "Insert", "InsertAdd", "Average", "AbsMax"
  // Their meaning is as defined in file Epetra_CombineMode.h
  ifpackList.set("schwarz: combine mode", "Add");
  // sets the parameters
  IFPACK_CHK_ERR(Prec->SetParameters(ifpackList));

  // initialize the preconditioner. At this point the matrix must
  // have been FillComplete()'d, but actual values are ignored.
  IFPACK_CHK_ERR(Prec->Initialize());

  // Builds the preconditioners, by looking for the values of
  // the matrix.
  IFPACK_CHK_ERR(Prec->Compute());

  {
    const int errcode = Prec->SetUseTranspose (true);
    if (errcode != 0) {
      throw std::logic_error ("Oh hai! Ifpack_Preconditioner doesn't know how to apply its transpose.");
    } else {
      (void) Prec->SetUseTranspose (false);
    }
  }

  // Create the Belos preconditioned operator from the Ifpack preconditioner.
  // NOTE:  This is necessary because Belos expects an operator to apply the
  //        preconditioner with Apply() NOT ApplyInverse().
  RCP<Belos::EpetraPrecOp> belosPrec = rcp( new Belos::EpetraPrecOp( Prec ) );

  //
  // *****Create parameter list for the LSQR solver manager*****
  //
  const int NumGlobalElements = B->GlobalLength();
  if (maxiters == -1)
    maxiters = NumGlobalElements/blocksize - 1; // maximum number of iterations to run
  //
  ParameterList belosList;
  belosList.set( "Block Size", blocksize );       // Blocksize to be used by iterative solver
  belosList.set( "Lambda", damp );                // Regularization parameter
  belosList.set( "Rel RHS Err", relResTol );      // Relative convergence tolerance requested
  belosList.set( "Rel Mat Err", relMatTol );      // Maximum number of restarts allowed
  belosList.set( "Condition Limit", maxCond);     // upper bound for cond(A)
  belosList.set( "Maximum Iterations", maxiters );// Maximum number of iterations allowed
  if (numrhs > 1) {
    belosList.set( "Show Maximum Residual Norm Only", true );  // Show only the maximum residual norm
  }
  if (verbose) {
    belosList.set( "Verbosity", Belos::Errors + Belos::Warnings +
		   Belos::TimingDetails + Belos::StatusTestDetails );
    if (frequency > 0)
      belosList.set( "Output Frequency", frequency );
  }
  else
    belosList.set( "Verbosity", Belos::Errors + Belos::Warnings );
  //
  // *******Construct a preconditioned linear problem********
  //
  RCP<Belos::LinearProblem<double,MV,OP> > problem
    = rcp( new Belos::LinearProblem<double,MV,OP>( A, X, B ) );
  if (leftprec) {
    problem->setLeftPrec( belosPrec );
  }
  else {
    problem->setRightPrec( belosPrec );
  }
  bool set = problem->setProblem();
  if (set == false) {
    if (proc_verbose)
      std::cout << std::endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
    return -1;
  }

  // Create an iterative solver manager.
  RCP< Belos::LSQRSolMgr<double,MV,OP> > solver
    = rcp( new Belos::LSQRSolMgr<double,MV,OP>(problem, rcp(&belosList,false)));

  //
  // *******************************************************************
  // ******************Start the LSQR iteration*************************
  // *******************************************************************
  //
  if (proc_verbose) {
    std::cout << std::endl << std::endl;
    std::cout << "Dimension of matrix: " << NumGlobalElements << std::endl;
    std::cout << "Number of right-hand sides: " << numrhs << std::endl;
    std::cout << "Block size used by solver: " << blocksize << std::endl;
    std::cout << "Max number of Gmres iterations per restart cycle: " << maxiters << std::endl;
    std::cout << "Relative residual tolerance: " << relResTol << std::endl;
    std::cout << std::endl;
    std::cout << "Solver's Description: " << std::endl;
    std::cout << solver->description() << std::endl; // visually verify the parameter list
  }
  //
  // Perform solve
  //
  Belos::ReturnType ret = solver->solve();
  //
  // Get the number of iterations for this solve.
  //
  std::vector<double> solNorm( numrhs );      // get solution norm
  MVT::MvNorm( *X, solNorm );
  int numIters = solver->getNumIters();
  MT condNum = solver->getMatCondNum();
  MT matrixNorm= solver->getMatNorm();
  MT resNorm = solver->getResNorm();
  MT lsResNorm = solver->getMatResNorm();
  if (proc_verbose)
    std::cout << "Number of iterations performed for this solve: " << numIters << std::endl
     << "matrix condition number: " << condNum << std::endl
     << "matrix norm: " << matrixNorm << std::endl
     << "residual norm: " << resNorm << std::endl
     << "solution norm: " << solNorm[0] << std::endl
     << "least squares residual Norm: " << lsResNorm << std::endl;
  //
  // Compute actual residuals.
  //
  bool badRes = false;
  std::vector<double> actual_resids( numrhs );
  std::vector<double> rhs_norm( numrhs );
  Epetra_MultiVector resid(*Map, numrhs);
  OPT::Apply( *A, *X, resid );
  MVT::MvAddMv( -1.0, resid, 1.0, *B, resid );
  MVT::MvNorm( resid, actual_resids );
  MVT::MvNorm( *B, rhs_norm );
  if (proc_verbose) {
    std::cout<< "---------- Actual Residuals (normalized) ----------"<<std::endl<<std::endl;
    for ( int i=0; i<numrhs; i++) {
      double actRes = actual_resids[i]/rhs_norm[i];
      std::cout<<"Problem "<<i<<" : \t"<< actRes <<std::endl;
      if (actRes > relResTol * resGrowthFactor ) badRes = true;
    }
  }

if (ret!=Belos::Converged || badRes) {
  success = false;
  if (proc_verbose)
    std::cout << std::endl << "ERROR:  Belos did not converge!" << std::endl;
} else {
  success = true;
  if (proc_verbose)
    std::cout << std::endl << "SUCCESS:  Belos converged!" << std::endl;
}
}
TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

#ifdef EPETRA_MPI
MPI_Finalize();
#endif

return success ? EXIT_SUCCESS : EXIT_FAILURE;
}