Ejemplo n.º 1
0
void Iterative_Inverse_Operator::operator () (const Epetra_MultiVector &b, Epetra_MultiVector &x)
{
  // Initialize the solution to zero
  x.PutScalar( 0.0 );

  // Reset the solver, problem, and status test for next solve (HKT)
  pProb->setProblem( Teuchos::rcp(&x, false), Teuchos::rcp(&b, false) );

  timer.start();
  Belos::ReturnType ret = pBelos->solve();
  timer.stop();

  int pid = pComm->MyPID();

  if (pid == 0 && print) {
    if (ret == Belos::Converged)
    {
      std::cout << std::endl << "pid[" << pid << "] Block GMRES converged" << std::endl;
      std::cout << "Solution time: " << timer.totalElapsedTime() << std::endl;

    }
    else
      std::cout << std::endl << "pid[" << pid << "] Block GMRES did not converge" << std::endl;
  }
}
void
Stokhos::EpetraMultiVectorOrthogPoly::
computeStandardDeviation(Epetra_MultiVector& v) const
{
  const Teuchos::Array<double>& nrm2 = this->basis_->norm_squared();
  v.PutScalar(0.0);
  for (int i=1; i<this->size(); i++)
    v.Multiply(nrm2[i], *coeff_[i], *coeff_[i], 1.0);
  for (int j=0; j<v.NumVectors(); j++)
    for (int i=0; i<v.MyLength(); i++)
      v[j][i] = std::sqrt(v[j][i]);
}
Ejemplo n.º 3
0
int  Piro::Epetra::MatrixFreeOperator::Apply
    (const Epetra_MultiVector& V, Epetra_MultiVector& Y) const
{
  TEUCHOS_TEST_FOR_EXCEPTION(!baseIsSet, std::logic_error, 
     " Piro::Epetra::MatrixFreeOperator must have Base values set before Apply");

  // Compute  Wv=y  by perturbing x
  //const Epetra_Vector* v = V(0); ??THIS DOESN'T WORK!!! Why???
  Teuchos::RCP<Epetra_Vector> v = Teuchos::rcp(new Epetra_Vector(View, V, 0));
  Teuchos::RCP<Epetra_Vector> y = Teuchos::rcp(new Epetra_Vector(Copy, Y, 0));
  Teuchos::RCP<const Epetra_Vector> xBase = modelInArgs.get_x();
  Teuchos::RCP<const Epetra_Vector> xdotBase;
  if (haveXdot) xdotBase = modelInArgs.get_x_dot();
  double vectorNorm;
  v->Norm2(&vectorNorm);

  // Any operator time zero vector is zero vector
  if (vectorNorm == 0.0) {
    Y.PutScalar(0.0);
    return 0;
  }

  double eta = lambda * (lambda + solutionNorm/vectorNorm);

  xPert->Update(1.0, *xBase, eta, *v, 0.0);
  if (haveXdot)
    xdotPert->Update(1.0, *xdotBase, eta, *v, 0.0);
  
  EpetraExt::ModelEvaluator::OutArgs modelOutArgs =
    model->createOutArgs();

  modelInArgs.set_x(xPert);
  if (haveXdot) modelInArgs.set_x_dot(xdotPert);

  // Alert model that this is a perturbed calculation, in case it does something different.
  EpetraExt::ModelEvaluator::Evaluation<Epetra_Vector> fPertEval(fPert, EpetraExt::ModelEvaluator::EVAL_TYPE_APPROX_DERIV);
  modelOutArgs.set_f(fPertEval);

  model->evalModel(modelInArgs, modelOutArgs);

  modelInArgs.set_x(xBase);
  if (haveXdot) modelInArgs.set_x_dot(xdotBase);
  modelOutArgs.set_f(fBase);

  y->Update(1.0, *fPert, -1.0, *fBase, 0.0);
  y->Scale(1.0/eta);

  Teuchos::RCP<Epetra_Vector> Y0 = Teuchos::rcp(new Epetra_Vector(View, Y, 0));
  *Y0 = *y; // copy in

  return 0;
}
//=========================================================================  
// Returns the result of a Epetra_Operator inverse applied to an Epetra_MultiVector X in Y.
int EpetraExt_PointToBlockDiagPermute::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{
  // Stuff borrowed from Epetra_CrsMatrix
  int NumVectors = X.NumVectors();
  if (NumVectors!=Y.NumVectors()) {
    EPETRA_CHK_ERR(-2); // Need same number of vectors in each MV
  }

  const Epetra_MultiVector *Xp=&X;
  Epetra_MultiVector *Yp=&Y;

  // Allocate temp workspace if X==Y and there are no imports or exports
  Epetra_MultiVector * Xcopy = 0;
  if (&X==&Y && Importer_==0 && Exporter_==0) {
    Xcopy = new Epetra_MultiVector(X);
    Xp=Xcopy;
  }
  
  UpdateImportVector(NumVectors); // Make sure Import and Export Vectors are compatible
  UpdateExportVector(NumVectors);

  // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
  if (Importer_){
    EPETRA_CHK_ERR(ImportVector_->Import(X, *Importer_, Insert));
    Xp=ImportVector_;
  }
  
  // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors
  if (Exporter_) {
    Yp=ExportVector_;
  }
  
  // Do the matvec 
  BDMat_->ApplyInverse(*Xp,*Yp);

  // Export if needed
  if (Exporter_) {
    Y.PutScalar(0.0);  // Make sure target is zero
    Y.Export(*ExportVector_, *Exporter_, Add); // Fill Y with Values from export vector
  }
  
  // Cleanup
  if(Xcopy) {
    delete Xcopy;
    EPETRA_CHK_ERR(1); // Return positive code to alert the user about needing extra copy of X
    return 1;
  }

  return 0;
}
int EpetraSamplingOperator::Apply(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
{
  TEUCHOS_ASSERT(map_.PointSameAs(X.Map()) && map_.PointSameAs(Y.Map()));
  TEUCHOS_ASSERT(X.NumVectors() == Y.NumVectors());

  Y.PutScalar(0.0);

  for (int iVec = 0; iVec < X.NumVectors(); ++iVec) {
    const ArrayView<const double> sourceVec(X[iVec], X.MyLength());
    const ArrayView<double> targetVec(Y[iVec], Y.MyLength());
    for (Array<GlobalIndex>::const_iterator it = sampleLIDs_.begin(), it_end = sampleLIDs_.end(); it != it_end; ++it) {
       targetVec[*it] = sourceVec[*it];
    }
  }

  return 0;
}
int
AmesosGenOp::Apply (const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  if (problem_ == NULL) {
    throw std::logic_error ("AmesosGenOp::Apply: problem_ is NULL");
  }
  if (massMtx_.is_null ()) {
    throw std::logic_error ("AmesosGenOp::Apply: massMtx_ is null");
  }
  if (solver_.is_null ()) {
    throw std::logic_error ("AmesosGenOp::Apply: solver_ is null");
  }

  if (! useTranspose_) {
    // Storage for M*X
    Epetra_MultiVector MX (X.Map (), X.NumVectors ());

    // Apply M*X
    massMtx_->Apply (X, MX);
    Y.PutScalar (0.0);

    // Set the LHS and RHS
    problem_->SetRHS (&MX);
    problem_->SetLHS (&Y);

    // Solve the linear system A*Y = MX
    solver_->Solve ();
  }
  else { // apply the transposed operator
    // Storage for A^{-T}*X
    Epetra_MultiVector ATX (X.Map (), X.NumVectors ());
    Epetra_MultiVector tmpX = const_cast<Epetra_MultiVector&> (X);

    // Set the LHS and RHS
    problem_->SetRHS (&tmpX);
    problem_->SetLHS (&ATX);

    // Solve the linear system A^T*Y = X
    solver_->Solve ();

    // Apply M*ATX
    massMtx_->Apply (ATX, Y);
  }

  return 0; // the method completed correctly
}
Ejemplo n.º 7
0
Teuchos::RCP<Epetra_LinearProblem> build_problem_mm(Teuchos::ParameterList& test_params, Epetra_CrsMatrix* A, Epetra_MultiVector* b)
{
  const Epetra_Map& rowmap = A->RowMap();

  Epetra_MultiVector* x = new Epetra_MultiVector(rowmap, 1);
  if (b == NULL) {
std::cout << "creating b = A*random" << std::endl;
    b = new Epetra_MultiVector(rowmap, 1);
    x->Random();

    A->Apply(*x, *b);
  }
  x->PutScalar(0);

  Teuchos::RCP<Epetra_LinearProblem> problem = Teuchos::rcp(new Epetra_LinearProblem(A,x,b));

  return problem;
}
int AmesosBucklingOp::Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y ) const 
{
    
  // Storage for A*X
  Epetra_MultiVector AX(X.Map(),X.NumVectors());
    
  // Apply A*X
  stiffMtx_->Apply(X, AX);
  Y.PutScalar(0.0);
    
  // Set the LHS and RHS
  problem_->SetRHS(&AX);
  problem_->SetLHS(&Y);

  // Solve the linear system (A-sigma*M)*Y = AX
  solver_->Solve();
  
  return 0;
}
Ejemplo n.º 9
0
//==============================================================================
int Ifpack_Polynomial::
ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{

  if (!IsComputed())
    IFPACK_CHK_ERR(-3);

  if (PolyDegree_ == 0)
    return 0;

  int nVec = X.NumVectors();
  if (nVec != Y.NumVectors())
    IFPACK_CHK_ERR(-2);

  Time_->ResetStartTime();

  Epetra_MultiVector Xcopy(X);
  if(ZeroStartingSolution_==true) {
    Y.PutScalar(0.0);
  }

  // mfh 20 Mar 2014: IBD never gets used, so I'm commenting out the
  // following lines of code in order to forestall build warnings.
// #ifdef HAVE_IFPACK_EPETRAEXT
//   EpetraExt_PointToBlockDiagPermute* IBD=0;
//   if (UseBlockMode_) IBD=&*InvBlockDiagonal_;
// #endif

  Y.Update(-coeff_[1], Xcopy, 1.0);
  for (int ii = 2; ii < static_cast<int> (coeff_.size ()); ++ii) {
    const Epetra_MultiVector V(Xcopy);
    Operator_->Apply(V,Xcopy);
    Xcopy.Multiply(1.0, *InvDiagonal_, Xcopy, 0.0);
    // Update Y
    Y.Update(-coeff_[ii], Xcopy, 1.0);
  }

  // Flops are updated in each of the following.
  ++NumApplyInverse_;
  ApplyInverseTime_ += Time_->ElapsedTime();
  return(0);
}
Ejemplo n.º 10
0
//==============================================================================
// Note that Ifpack_PointRelaxation and Jacobi is much faster than
// Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the
// way the matrix-vector produce is performed).
//
// Another ML-related observation is that the starting solution (in Y)
// is NOT supposed to be zero. This may slow down the application of just
// one sweep of Jacobi.
//
int Ifpack_PointRelaxation::
ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  if (!IsComputed())
    IFPACK_CHK_ERR(-3);

  if (X.NumVectors() != Y.NumVectors())
    IFPACK_CHK_ERR(-2);

  Time_->ResetStartTime();

  // AztecOO gives X and Y pointing to the same memory location,
  // need to create an auxiliary vector, Xcopy
  Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
  if (X.Pointers()[0] == Y.Pointers()[0])
    Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
  else
    Xcopy = Teuchos::rcp( &X, false );
    
  if (ZeroStartingSolution_)
    Y.PutScalar(0.0);

  // Flops are updated in each of the following. 
  switch (PrecType_) {
  case IFPACK_JACOBI:
    IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
    break;
  case IFPACK_GS:
    IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
    break;
  case IFPACK_SGS:
    IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
    break;
  default:
    IFPACK_CHK_ERR(-1); // something wrong
  }

  ++NumApplyInverse_;
  ApplyInverseTime_ += Time_->ElapsedTime();
  return(0);
}
//==============================================================================
int Ifpack_PointRelaxation::
ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  if (ZeroStartingSolution_)
    Y.PutScalar(0.0);

  const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
  // try to pick the best option; performances may be improved
  // if several sweeps are used.
  if (CrsMatrix != 0)
  {
    if (CrsMatrix->StorageOptimized() && LocalSmoothingIndices_)
      return(ApplyInverseSGS_LocalFastCrsMatrix(CrsMatrix, X, Y));
    else if (CrsMatrix->StorageOptimized())
      return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y));
    else
      return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y));
  }
  else
    return(ApplyInverseSGS_RowMatrix(X, Y));
}
int
Stokhos::ProductEpetraOperator::
Apply(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const
{
  if (useTranspose) {
    EpetraExt::BlockMultiVector sg_input(View, *range_base_map, Input);
    Epetra_MultiVector tmp(Result.Map(), Result.NumVectors());
    Result.PutScalar(0.0);
    for (int i=0; i<coeff_.size(); i++) {
      coeff_[i]->Apply(*(sg_input.GetBlock(i)), tmp);
      Result.Update(1.0, tmp, 1.0);
    }
  }
  else {
    EpetraExt::BlockMultiVector sg_result(View, *range_base_map, Result);
    for (int i=0; i<coeff_.size(); i++)
      coeff_[i]->Apply(Input, *(sg_result.GetBlock(i)));
  }

  return 0;
}
Ejemplo n.º 13
0
//==============================================================================
int Ifpack_DropFilter::
Multiply(bool TransA, const Epetra_MultiVector& X, 
	 Epetra_MultiVector& Y) const
{
  // NOTE: I suppose that the matrix has been localized,
  // hence all maps are trivial.
  int NumVectors = X.NumVectors();
  if (NumVectors != Y.NumVectors())
    IFPACK_CHK_ERR(-1);

  Y.PutScalar(0.0);

  vector<int> Indices(MaxNumEntries_);
  vector<double> Values(MaxNumEntries_);

  for (int i = 0 ; i < NumRows_ ; ++i) {

    int Nnz;
    ExtractMyRowCopy(i,MaxNumEntries_,Nnz,
		     &Values[0], &Indices[0]);
    if (!TransA) {
      // no transpose first
      for (int j = 0 ; j < NumVectors ; ++j) {
	for (int k = 0 ; k < Nnz ; ++k) {
	  Y[j][i] += Values[k] * X[j][Indices[k]];
	}
      }
    }
    else {
      // transpose here
      for (int j = 0 ; j < NumVectors ; ++j) {
	for (int k = 0 ; k < Nnz ; ++k) {
	  Y[j][Indices[k]] += Values[k] * X[j][i];
	}
      }
    }
  }

  return(0);
}
Ejemplo n.º 14
0
//==============================================================================
int Ifpack_Krylov::
ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  
  if (!IsComputed())
    IFPACK_CHK_ERR(-3);

  if (Iterations_ == 0)
    return 0;

  int nVec = X.NumVectors();
  if (nVec != Y.NumVectors())
    IFPACK_CHK_ERR(-2);

  Time_->ResetStartTime();

  // AztecOO gives X and Y pointing to the same memory location,
  // need to create an auxiliary vector, Xcopy
  Teuchos::RCP<Epetra_MultiVector> Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
  if(ZeroStartingSolution_==true) {
    Y.PutScalar(0.0);
  }

#ifdef HAVE_IFPACK_AZTECOO
  AztecSolver_ -> SetLHS(&Y);
  AztecSolver_ -> SetRHS(&*Xcopy);
  AztecSolver_ -> Iterate(Iterations_,Tolerance_);
#else
  cout << "You need to configure IFPACK with support for AztecOO" << endl;
  cout << "to use this preconditioner. This may require --enable-aztecoo" << endl;
  cout << "in your configure script." << endl;
  IFPACK_CHK_ERR(-1);
#endif

  // Flops are updated in each of the following. 
  ++NumApplyInverse_;
  ApplyInverseTime_ += Time_->ElapsedTime();
  return(0);
}
//==============================================================================
int Ifpack_SparsityFilter::
Multiply(bool TransA, const Epetra_MultiVector& X,
         Epetra_MultiVector& Y) const
{

  int NumVectors = X.NumVectors();
  if (NumVectors != Y.NumVectors())
    IFPACK_CHK_ERR(-1);

  Y.PutScalar(0.0);

  std::vector<int> Indices(MaxNumEntries_);
  std::vector<double> Values(MaxNumEntries_);

  for (int i = 0 ; i < A_->NumMyRows() ; ++i) {

    int Nnz;
    ExtractMyRowCopy(i,MaxNumEntries_,Nnz,
                     &Values[0], &Indices[0]);
    if (!TransA) {
      // no transpose first
      for (int j = 0 ; j < NumVectors ; ++j) {
        for (int k = 0 ; k < Nnz ; ++k) {
          Y[j][i] += Values[k] * X[j][Indices[k]];
        }
      }
    }
    else {
      // transpose here
      for (int j = 0 ; j < NumVectors ; ++j) {
        for (int k = 0 ; k < Nnz ; ++k) {
          Y[j][Indices[k]] += Values[k] * X[j][i];
        }
      }
    }
  }

  return(0);
}
Ejemplo n.º 16
0
  virtual int Multiply(bool TransA, const Epetra_MultiVector& X, 
                       Epetra_MultiVector& Y) const
  {
    Epetra_MultiVector Xtmp(RowMatrixColMap(), X.NumVectors());
    Xtmp.Import(X, *RowMatrixImporter(), Insert);

    std::vector<int> Indices(MaxNumEntries());
    std::vector<double> Values(MaxNumEntries());

    Y.PutScalar(0.0);

    for (int i = 0 ; i < NumMyRows() ; ++i)
    {
      int NumEntries;
      // use the inlined function
      getrow(i, MaxNumEntries(), NumEntries, &Values[0], &Indices[0]);
      for (int j = 0 ; j < NumEntries ; ++j)
        for (int k = 0 ; k < Y.NumVectors() ; ++k)
          Y[k][i] += Values[j] * Xtmp[k][Indices[j]];
    }
    
    return(0);
  }
Ejemplo n.º 17
0
//-----------------------------------------------------------------------------
// Function      : N_LAS_AmesosGenOp::Apply()
// Purpose       : Applies the operator inv(A)*B*X = Y
// Special Notes :
// Scope         : Public
// Creator       : Heidi Thornquist, SNL 
// Creation Date : 06/04/12
//-----------------------------------------------------------------------------
int N_LAS_AmesosGenOp::Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y ) const
{ 
  if (!useTranspose_) {

    // Storage for B*X 
    Epetra_MultiVector BX(X.Map(),X.NumVectors());

    // Apply B*X
    B_->Apply(X, BX);
    Y.PutScalar(0.0);

    // Set the LHS and RHS
    problem_->SetRHS(&BX);
    problem_->SetLHS(&Y);

    // Solve the linear system A*Y = BX
    solver_->Solve();
  }
  else {
    // Storage for A^{-T}*X
    Epetra_MultiVector ATX(X.Map(),X.NumVectors());
    Epetra_MultiVector tmpX = const_cast<Epetra_MultiVector&>(X);

    // Set the LHS and RHS
    problem_->SetRHS(&tmpX);
    problem_->SetLHS(&ATX);

    // Solve the linear system A^T*Y = X 
    solver_->Solve();

    // Apply B*ATX
    B_->Apply(ATX, Y);
  }
  
  return 0;
}
Ejemplo n.º 18
0
int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const {

  int info = 0;
  int localVerbose = verbose*(MyComm.MyPID() == 0);

  int xr = X.MyLength();

  int wSize = 3*xr;

  if (lWorkSpace < wSize) {
    if (workSpace)
      delete[] workSpace;
    workSpace = new (std::nothrow) double[wSize];
    if (workSpace == 0) {
      info = -1;
      return info;
    }
    lWorkSpace = wSize;
  } // if (lWorkSpace < wSize)

  double *pointer = workSpace;

  Epetra_Vector r(View, X.Map(), pointer);
  pointer = pointer + xr;

  Epetra_Vector p(View, X.Map(), pointer);
  pointer = pointer + xr;

  // Note: Kp and z uses the same memory space
  Epetra_Vector Kp(View, X.Map(), pointer);
  Epetra_Vector z(View, X.Map(), pointer);

  double tmp;
  double initNorm = 0.0, rNorm = 0.0, newRZ = 0.0, oldRZ = 0.0, alpha = 0.0;
  double tolSquare = tolCG*tolCG;

  memcpy(r.Values(), X.Values(), xr*sizeof(double));
  tmp = callBLAS.DOT(xr, r.Values(), 1, r.Values(), 1);
  MyComm.SumAll(&tmp, &initNorm, 1);

  Y.PutScalar(0.0);

  if (localVerbose > 1) {
    std::cout << std::endl;
    std::cout  << " --- PCG Iterations --- " << std::endl;
  }

  int iter;
  for (iter = 1; iter <= iterMax; ++iter) {

    if (Prec) {
      Prec->ApplyInverse(r, z);
    }
    else {
      memcpy(z.Values(), r.Values(), xr*sizeof(double));
    }

    if (iter == 1) {
      tmp = callBLAS.DOT(xr, r.Values(), 1, z.Values(), 1);
      MyComm.SumAll(&tmp, &newRZ, 1);
      memcpy(p.Values(), z.Values(), xr*sizeof(double));
    }
    else {
      oldRZ = newRZ;
      tmp = callBLAS.DOT(xr, r.Values(), 1, z.Values(), 1);
      MyComm.SumAll(&tmp, &newRZ, 1);
      p.Update(1.0, z, newRZ/oldRZ);
    }

    K->Apply(p, Kp);

    tmp = callBLAS.DOT(xr, p.Values(), 1, Kp.Values(), 1);
    MyComm.SumAll(&tmp, &alpha, 1);
    alpha = newRZ/alpha;

    TEUCHOS_TEST_FOR_EXCEPTION(alpha <= 0.0, std::runtime_error,
                         " !!! Non-positive value for p^TKp (" << alpha << ") !!!");

    callBLAS.AXPY(xr, alpha, p.Values(), 1, Y.Values(), 1);

    alpha *= -1.0;
    callBLAS.AXPY(xr, alpha, Kp.Values(), 1, r.Values(), 1);

    // Check convergence
    tmp = callBLAS.DOT(xr, r.Values(), 1, r.Values(), 1);
    MyComm.SumAll(&tmp, &rNorm, 1);

    if (localVerbose > 1) {
      std::cout  << "   Iter. " << iter;
      std::cout.precision(4);
      std::cout.setf(std::ios::scientific, std::ios::floatfield);
      std::cout << " Residual reduction " << std::sqrt(rNorm/initNorm) << std::endl;
    }

    if (rNorm <= tolSquare*initNorm)
      break;

  } // for (iter = 1; iter <= iterMax; ++iter)

  if (localVerbose == 1) {
    std::cout << std::endl;
    std::cout << " --- End of PCG solve ---" << std::endl;
    std::cout << "   Iter. " << iter;
    std::cout.precision(4);
    std::cout.setf(std::ios::scientific, std::ios::floatfield);
    std::cout << " Residual reduction " << std::sqrt(rNorm/initNorm) << std::endl;
    std::cout << std::endl;
  }

  if (localVerbose > 1) {
    std::cout << std::endl;
  }

  numSolve += 1;

  minIter = (iter < minIter) ? iter : minIter;
  maxIter = (iter > maxIter) ? iter : maxIter;
  sumIter += iter;

  return info;
}
//==============================================================================
int LinearProblem_CrsSingletonFilter::UpdateReducedProblem(Epetra_LinearProblem * Problem) {

  int i, j;

  if (Problem==0) EPETRA_CHK_ERR(-1); // Null problem pointer

  FullProblem_ = Problem;
  FullMatrix_ = dynamic_cast<Epetra_RowMatrix *>(Problem->GetMatrix());
  if (FullMatrix_==0) EPETRA_CHK_ERR(-2); // Need a RowMatrix
  if (Problem->GetRHS()==0) EPETRA_CHK_ERR(-3); // Need a RHS
  if (Problem->GetLHS()==0) EPETRA_CHK_ERR(-4); // Need a LHS
  if (!HaveReducedProblem_) EPETRA_CHK_ERR(-5); // Must have set up reduced problem

  // Create pointer to Full RHS, LHS
  Epetra_MultiVector * FullRHS = FullProblem()->GetRHS();
  Epetra_MultiVector * FullLHS = FullProblem()->GetLHS();
  int NumVectors = FullLHS->NumVectors();

  int NumEntries;
  int * Indices;
  double * Values;
  int NumMyRows = FullMatrix()->NumMyRows();
  int ColSingletonCounter = 0;
  for (i=0; i<NumMyRows; i++) {
    int curGRID = FullMatrixRowMap().GID(i);
    if (ReducedMatrixRowMap()->MyGID(curGRID)) { // Check if this row should go into reduced matrix
      EPETRA_CHK_ERR(GetRowGCIDs(i, NumEntries, Values, Indices)); // Get current row (indices global)
      int ierr = ReducedMatrix()->ReplaceGlobalValues(curGRID, NumEntries, 
						      Values, Indices);
      // Positive errors will occur because we are submitting col entries that are not part of
      // reduced system.  However, because we specified a column map to the ReducedMatrix constructor
      // these extra column entries will be ignored and we will be politely reminded by a positive
      // error code
      if (ierr<0) EPETRA_CHK_ERR(ierr); 
    }
    // Otherwise if singleton row we explicitly eliminate this row and solve for corresponding X value
    else {
      EPETRA_CHK_ERR(GetRow(i, NumEntries, Values, Indices)); // Get current row
      if (NumEntries==1) {
	double pivot = Values[0];
	if (pivot==0.0) EPETRA_CHK_ERR(-1); // Encountered zero row, unable to continue
	int indX = Indices[0];
	for (j=0; j<NumVectors; j++)
	  (*tempExportX_)[j][indX] = (*FullRHS)[j][i]/pivot;
      }
      // Otherwise, this is a singleton column and we will scan for the pivot element needed 
      // for post-solve equations
      else {
	j = ColSingletonPivotLIDs_[ColSingletonCounter];
	double pivot = Values[j];
	if (pivot==0.0) EPETRA_CHK_ERR(-2); // Encountered zero column, unable to continue
	ColSingletonPivots_[ColSingletonCounter] = pivot;
	ColSingletonCounter++;
      }
    }
  }

  assert(ColSingletonCounter==NumMyColSingletons_); // Sanity test

  // Update Reduced LHS (Puts any initial guess values into reduced system)

  ReducedLHS_->PutScalar(0.0); // zero out Reduced LHS
  EPETRA_CHK_ERR(ReducedLHS_->Import(*FullLHS, *Full2ReducedLHSImporter_, Insert));
  FullLHS->PutScalar(0.0); // zero out Full LHS since we will inject values as we get them

  // Construct Reduced RHS

  // Zero out temp space
  tempX_->PutScalar(0.0);
  tempB_->PutScalar(0.0);
  
  //Inject known X values into tempX for purpose of computing tempB = FullMatrix*tempX
  // Also inject into full X since we already know the solution

  if (FullMatrix()->RowMatrixImporter()!=0) {
    EPETRA_CHK_ERR(tempX_->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
    EPETRA_CHK_ERR(FullLHS->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
  }
  else {
    tempX_->Update(1.0, *tempExportX_, 0.0);
    FullLHS->Update(1.0, *tempExportX_, 0.0);
  }


  EPETRA_CHK_ERR(FullMatrix()->Multiply(false, *tempX_, *tempB_));

  EPETRA_CHK_ERR(tempB_->Update(1.0, *FullRHS, -1.0)); // tempB now has influence of already-known X values

  ReducedRHS_->PutScalar(0.0);
  EPETRA_CHK_ERR(ReducedRHS_->Import(*tempB_, *Full2ReducedRHSImporter_, Insert));
    return(0);
}
//==============================================================================
int LinearProblem_CrsSingletonFilter::ConstructReducedProblem(Epetra_LinearProblem * Problem) {

  int i, j;
  if (HaveReducedProblem_) EPETRA_CHK_ERR(-1); // Setup already done once.  Cannot do it again
  if (Problem==0) EPETRA_CHK_ERR(-2); // Null problem pointer

  FullProblem_ = Problem;
  FullMatrix_ = dynamic_cast<Epetra_RowMatrix *>(Problem->GetMatrix());
  if (FullMatrix_==0) EPETRA_CHK_ERR(-3); // Need a RowMatrix
  if (Problem->GetRHS()==0) EPETRA_CHK_ERR(-4); // Need a RHS
  if (Problem->GetLHS()==0) EPETRA_CHK_ERR(-5); // Need a LHS
  // Generate reduced row and column maps

  Epetra_MapColoring & RowMapColors = *RowMapColors_;
  Epetra_MapColoring & ColMapColors = *ColMapColors_;

  ReducedMatrixRowMap_ = RowMapColors.GenerateMap(0);
  ReducedMatrixColMap_ = ColMapColors.GenerateMap(0);

  // Create domain and range map colorings by exporting map coloring of column and row maps

  if (FullMatrix()->RowMatrixImporter()!=0) {
    Epetra_MapColoring DomainMapColors(FullMatrixDomainMap());
    EPETRA_CHK_ERR(DomainMapColors.Export(*ColMapColors_, *FullMatrix()->RowMatrixImporter(), AbsMax));
    OrigReducedMatrixDomainMap_ = DomainMapColors.GenerateMap(0);
  }
  else
    OrigReducedMatrixDomainMap_ = ReducedMatrixColMap_;

  if (FullMatrixIsCrsMatrix_) {
    if (FullCrsMatrix()->Exporter()!=0) { // Non-trivial exporter
      Epetra_MapColoring RangeMapColors(FullMatrixRangeMap());
      EPETRA_CHK_ERR(RangeMapColors.Export(*RowMapColors_, *FullCrsMatrix()->Exporter(), 
					   AbsMax));
      ReducedMatrixRangeMap_ = RangeMapColors.GenerateMap(0);
    }
    else
      ReducedMatrixRangeMap_ = ReducedMatrixRowMap_;
  }
  else
    ReducedMatrixRangeMap_ = ReducedMatrixRowMap_;

  // Check to see if the reduced system domain and range maps are the same.
  // If not, we need to remap entries of the LHS multivector so that they are distributed
  // conformally with the rows of the reduced matrix and the RHS multivector
  SymmetricElimination_ = ReducedMatrixRangeMap_->SameAs(*OrigReducedMatrixDomainMap_);
  if (!SymmetricElimination_) 
    ConstructRedistributeExporter(OrigReducedMatrixDomainMap_, ReducedMatrixRangeMap_, 
				  RedistributeDomainExporter_, ReducedMatrixDomainMap_);
  else {
    ReducedMatrixDomainMap_ = OrigReducedMatrixDomainMap_;
    OrigReducedMatrixDomainMap_ = 0;
    RedistributeDomainExporter_ = 0;
  }
  
  // Create pointer to Full RHS, LHS
  Epetra_MultiVector * FullRHS = FullProblem()->GetRHS();
  Epetra_MultiVector * FullLHS = FullProblem()->GetLHS();
  int NumVectors = FullLHS->NumVectors();

  // Create importers
//  cout << "RedDomainMap\n";
//  cout << *ReducedMatrixDomainMap();
//  cout << "FullDomainMap\n";
//  cout << FullMatrixDomainMap();
  Full2ReducedLHSImporter_ = new Epetra_Import(*ReducedMatrixDomainMap(), FullMatrixDomainMap());
//  cout << "RedRowMap\n";
//  cout << *ReducedMatrixRowMap();
//  cout << "FullRHSMap\n";
//  cout << FullRHS->Map();
  Full2ReducedRHSImporter_ = new Epetra_Import(*ReducedMatrixRowMap(), FullRHS->Map());

  // Construct Reduced Matrix
  ReducedMatrix_ = new Epetra_CrsMatrix(Copy, *ReducedMatrixRowMap(), *ReducedMatrixColMap(), 0);

  // Create storage for temporary X values due to explicit elimination of rows
  tempExportX_ = new Epetra_MultiVector(FullMatrixColMap(), NumVectors);

  int NumEntries;
  int * Indices;
  double * Values;
  int NumMyRows = FullMatrix()->NumMyRows();
  int ColSingletonCounter = 0;
  for (i=0; i<NumMyRows; i++) {
    int curGRID = FullMatrixRowMap().GID(i);
    if (ReducedMatrixRowMap()->MyGID(curGRID)) { // Check if this row should go into reduced matrix

      EPETRA_CHK_ERR(GetRowGCIDs(i, NumEntries, Values, Indices)); // Get current row (Indices are global)
      
      int ierr = ReducedMatrix()->InsertGlobalValues(curGRID, NumEntries, 
						     Values, Indices); // Insert into reduce matrix
      // Positive errors will occur because we are submitting col entries that are not part of
      // reduced system.  However, because we specified a column map to the ReducedMatrix constructor
      // these extra column entries will be ignored and we will be politely reminded by a positive
      // error code
      if (ierr<0) EPETRA_CHK_ERR(ierr); 
    }
    else {
      EPETRA_CHK_ERR(GetRow(i, NumEntries, Values, Indices)); // Get current row
      if (NumEntries==1) {
	double pivot = Values[0];
	if (pivot==0.0) EPETRA_CHK_ERR(-1); // Encountered zero row, unable to continue
	int indX = Indices[0];
	for (j=0; j<NumVectors; j++)
	  (*tempExportX_)[j][indX] = (*FullRHS)[j][i]/pivot;
      }
      // Otherwise, this is a singleton column and we will scan for the pivot element needed 
      // for post-solve equations
      else {
	int targetCol = ColSingletonColLIDs_[ColSingletonCounter];
	for (j=0; j<NumEntries; j++) {
	  if (Indices[j]==targetCol) {
	    double pivot = Values[j];
	    if (pivot==0.0) EPETRA_CHK_ERR(-2); // Encountered zero column, unable to continue
	    ColSingletonPivotLIDs_[ColSingletonCounter] = j; // Save for later use
	    ColSingletonPivots_[ColSingletonCounter] = pivot;
	    ColSingletonCounter++;
	    break;
	  }
	}
      }
    }
  }

  // Now convert to local indexing.  We have constructed things so that the domain and range of the
  // matrix will have the same map.  If the reduced matrix domain and range maps were not the same, the
  // differences were addressed in the ConstructRedistributeExporter() method
  EPETRA_CHK_ERR(ReducedMatrix()->FillComplete(*ReducedMatrixDomainMap(), *ReducedMatrixRangeMap()));

  // Construct Reduced LHS (Puts any initial guess values into reduced system)

  ReducedLHS_ = new Epetra_MultiVector(*ReducedMatrixDomainMap(), NumVectors);
  EPETRA_CHK_ERR(ReducedLHS_->Import(*FullLHS, *Full2ReducedLHSImporter_, Insert));
  FullLHS->PutScalar(0.0); // zero out Full LHS since we will inject values as we get them

  // Construct Reduced RHS

  // First compute influence of already-known values of X on RHS
  tempX_ = new Epetra_MultiVector(FullMatrixDomainMap(), NumVectors);
  tempB_ = new Epetra_MultiVector(FullRHS->Map(), NumVectors);
  
  //Inject known X values into tempX for purpose of computing tempB = FullMatrix*tempX
  // Also inject into full X since we already know the solution

  if (FullMatrix()->RowMatrixImporter()!=0) {
    EPETRA_CHK_ERR(tempX_->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
    EPETRA_CHK_ERR(FullLHS->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
  }
  else {
    tempX_->Update(1.0, *tempExportX_, 0.0);
    FullLHS->Update(1.0, *tempExportX_, 0.0);
  }


  EPETRA_CHK_ERR(FullMatrix()->Multiply(false, *tempX_, *tempB_));

  EPETRA_CHK_ERR(tempB_->Update(1.0, *FullRHS, -1.0)); // tempB now has influence of already-known X values

  ReducedRHS_ = new Epetra_MultiVector(*ReducedMatrixRowMap(), FullRHS->NumVectors());
  EPETRA_CHK_ERR(ReducedRHS_->Import(*tempB_, *Full2ReducedRHSImporter_, Insert));

  // Finally construct Reduced Linear Problem
  ReducedProblem_ = new Epetra_LinearProblem(ReducedMatrix_, ReducedLHS_, ReducedRHS_);

  double fn = FullMatrix()->NumGlobalRows();
  double fnnz = FullMatrix()->NumGlobalNonzeros();
  double rn = ReducedMatrix()->NumGlobalRows();
  double rnnz = ReducedMatrix()->NumGlobalNonzeros();

  RatioOfDimensions_ = rn/fn;
  RatioOfNonzeros_ = rnnz/fnnz;
  HaveReducedProblem_ = true;
  
  return(0);
}
Ejemplo n.º 21
0
/* Computes the approximate Schur complement for the wide separator */
Teuchos::RCP<Epetra_CrsMatrix> computeApproxWideSchur(shylu_config *config,
    shylu_symbolic *ssym,   // symbolic structure
    Epetra_CrsMatrix *G, Epetra_CrsMatrix *R,
    Epetra_LinearProblem *LP, Amesos_BaseSolver *solver,
    Ifpack_Preconditioner *ifSolver, Epetra_CrsMatrix *C,
    Epetra_Map *localDRowMap)
{
    int i;
    double relative_thres = config->relative_threshold;

    // Need to create local G (block diagonal portion) , R, C

    // Get row map of G
    //Epetra_Map CrMap = C->RowMap();
    //int *c_rows = CrMap.MyGlobalElements();
    //int *c_cols = (C->ColMap()).MyGlobalElements();
    //int c_totalElems = CrMap.NumGlobalElements();
    //int c_localElems = CrMap.NumMyElements();
    //int c_localcolElems = (C->ColMap()).NumMyElements();

    Epetra_Map GrMap = G->RowMap();
    int *g_rows = GrMap.MyGlobalElements();
    //int g_totalElems = GrMap.NumGlobalElements();
    int g_localElems = GrMap.NumMyElements();

    //Epetra_Map RrMap = R->RowMap();
    //int *r_rows = RrMap.MyGlobalElements();
    //int *r_cols = (R->ColMap()).MyGlobalElements();
    //int r_totalElems = RrMap.NumGlobalElements();
    //int r_localElems = RrMap.NumMyElements();
    //int r_localcolElems = (R->ColMap()).NumMyElements();

    Epetra_SerialComm LComm;
    Epetra_Map G_localRMap (-1, g_localElems, g_rows, 0, LComm);

    int nentries1, gid;
    // maxentries is the maximum of all three possible matrices as the arrays
    // are reused between the three
    int maxentries = max(C->MaxNumEntries(), R->MaxNumEntries());
    maxentries = max(maxentries, G->MaxNumEntries());

    double *values1 = new double[maxentries];
    double *values2 = new double[maxentries];
    double *values3 = new double[maxentries];
    int *indices1 = new int[maxentries];
    int *indices2 = new int[maxentries];
    int *indices3 = new int[maxentries];

    // Sbar - Approximate Schur complement
    Teuchos::RCP<Epetra_CrsMatrix> Sbar = Teuchos::rcp(new Epetra_CrsMatrix(
                                            Copy, GrMap, g_localElems));

    // Include only the block diagonal elements of G in localG
    Epetra_CrsMatrix localG(Copy, G_localRMap, G->MaxNumEntries(), false);
    int cnt, scnt;
    for (i = 0; i < g_localElems ; i++)
    {
        gid = g_rows[i];
        G->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1, indices1);

        cnt = 0;
        scnt = 0;
        for (int j = 0 ; j < nentries1 ; j++)
        {
            if (G->LRID(indices1[j]) != -1)
            {
                values2[cnt] = values1[j];
                indices2[cnt++] = indices1[j];
            }
            else
            {
                // Add it to Sbar immediately
                values3[scnt] = values1[j];
                indices3[scnt++] = indices1[j];
            }
        }

        localG.InsertGlobalValues(gid, cnt, values2, indices2);
        Sbar->InsertGlobalValues(gid, scnt, values3, indices3);
    }
    localG.FillComplete();
    //cout << "Created local G matrix" << endl;

    int nvectors = 16;
    /*ShyLU_Probing_Operator probeop(&localG, &localR, LP, solver, &localC,
                                        localDRowMap, nvectors);*/
    ShyLU_Local_Schur_Operator probeop(config, ssym, &localG, R, LP, solver,
                                        ifSolver, C, localDRowMap, nvectors);

#ifdef DUMP_MATRICES
    //ostringstream fnamestr;
    //fnamestr << "localC" << C->Comm().MyPID() << ".mat";
    //string Cfname = fnamestr.str();
    //EpetraExt::RowMatrixToMatlabFile(Cfname.c_str(), localC);

    //Epetra_Map defMapg(-1, g_localElems, 0, localG.Comm());
    //EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTransg =
                        //new EpetraExt::CrsMatrix_Reindex( defMapg );
    //Epetra_CrsMatrix t2G = (*ReIdx_MatTransg)( localG );
    //ReIdx_MatTransg->fwd();
    //EpetraExt::RowMatrixToMatlabFile("localG.mat", t2G);
#endif

    //cout << " totalElems in Schur Complement" << totalElems << endl;
    //cout << myPID << " localElems" << localElems << endl;

    // **************** Two collectives here *********************
#ifdef TIMING_OUTPUT
    Teuchos::Time ftime("setup time");
    ftime.start();
#endif
#ifdef TIMING_OUTPUT
    Teuchos::Time app_time("setup time");
#endif

    int nentries;
    // size > maxentries as there could be fill
    // TODO: Currently the size of the two arrays can be one, Even if we switch
    // the loop below the size of the array required is nvectors. Fix it
    double *values = new double[nvectors];
    int *indices = new int[nvectors];
    double *vecvalues;
#ifdef SHYLU_DEBUG
    // mfh 25 May 2015: Don't declare this variable if it's not used.
    // It's only used if SHYLU_DEBUG is defined.
    int dropped = 0;
#endif // SHYLU_DEBUG
    double *maxvalue = new double[nvectors];
#ifdef TIMING_OUTPUT
    ftime.start();
#endif
    int findex = g_localElems / nvectors ;

    int cindex;
    // int mypid = C->Comm().MyPID(); // unused
    Epetra_MultiVector probevec (G_localRMap, nvectors);
    Epetra_MultiVector Scol (G_localRMap, nvectors);
    probevec.PutScalar(0.0);
    for (i = 0 ; i < findex*nvectors ; i+=nvectors)
    {
        // Set the probevec to find block columns of S.
        for (int k = 0; k < nvectors; k++)
        {
            cindex = k+i;
            // TODO: Can do better than this, just need to go to the column map
            // of C, there might be null columns in C
            probevec.ReplaceGlobalValue(g_rows[cindex], k, 1.0);
            //if (mypid == 0)
            //cout << "Changing row to 1.0 " << g_rows[cindex] << endl;
        }

#ifdef TIMING_OUTPUT
        app_time.start();
#endif
        probeop.Apply(probevec, Scol);
#ifdef TIMING_OUTPUT
        app_time.stop();
#endif

        // Reset the probevec to all zeros.
        for (int k = 0; k < nvectors; k++)
        {
            cindex = k+i;
            probevec.ReplaceGlobalValue(g_rows[cindex], k, 0.0);
        }

        Scol.MaxValue(maxvalue);
        nentries = 0;
        for (int j = 0 ; j < g_localElems ; j++)
        {
            for (int k = 0; k < nvectors; k++)
            {
                cindex = k+i;
                vecvalues = Scol[k];
                if ((g_rows[cindex] == g_rows[j])  ||
                (abs(vecvalues[j]/maxvalue[k]) > relative_thres))
                // diagonal entry or large entry.
                {
                    values[nentries] = vecvalues[j];
                    indices[nentries++] = g_rows[cindex];
                }
#ifdef SHYLU_DEBUG
                else if (vecvalues[j] != 0.0)
                {
                    dropped++;
                }
#endif // SHYLU_DEBUG
            }
            Sbar->InsertGlobalValues(g_rows[j], nentries, values,
                        indices);
            nentries = 0;
        }
    }

    if (i < g_localElems)
    {
        nvectors = g_localElems - i;
        probeop.ResetTempVectors(nvectors);
        Epetra_MultiVector probevec1 (G_localRMap, nvectors);
        Epetra_MultiVector Scol1 (G_localRMap, nvectors);

        probevec1.PutScalar(0.0);
        for (int k = 0; k < nvectors; k++)
        {
            cindex = k+i;
            // TODO: Can do better than this, just need to go to the column map
            // of C, there might be null columns in C
            probevec1.ReplaceGlobalValue(g_rows[cindex], k, 1.0);
        }

#ifdef TIMING_OUTPUT
        app_time.start();
#endif
        probeop.Apply(probevec1, Scol1);
#ifdef TIMING_OUTPUT
        app_time.stop();
#endif
        Scol1.MaxValue(maxvalue);
        nentries = 0;
        for (int j = 0 ; j < g_localElems ; j++)
        {
            //cout << "MAX" << maxvalue << endl;
            for (int k = 0; k < nvectors; k++)
            {
                cindex = k+i;
                vecvalues = Scol1[k];
                //nentries = 0; // inserting one entry in each row for now
                if ((g_rows[cindex] == g_rows[j])  ||
                (abs(vecvalues[j]/maxvalue[k]) > relative_thres))
                // diagonal entry or large entry.
                {
                    values[nentries] = vecvalues[j];
                    indices[nentries++] = g_rows[cindex];
                }
#ifdef SHYLU_DEBUG
                else if (vecvalues[j] != 0.0)
                {
                    dropped++;
                }
#endif // SHYLU_DEBUG
            }
            Sbar->InsertGlobalValues(g_rows[j], nentries, values,
                        indices);
            nentries = 0;
        }
    }

#ifdef TIMING_OUTPUT
    ftime.stop();
    cout << "Time in finding and dropping entries" << ftime.totalElapsedTime()
                     << endl;
    ftime.reset();
    cout << "Time in Apply of probing" << app_time.totalElapsedTime() << endl;
    probeop.PrintTimingInfo();
#endif
    Sbar->FillComplete();

#ifdef DUMP_MATRICES
    Epetra_Map defMap2(-1, g_localElems, 0, C->Comm());
    EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTrans2 =
                        new EpetraExt::CrsMatrix_Reindex( defMap2 );
    Epetra_CrsMatrix t2S = (*ReIdx_MatTrans2)( *Sbar );
    ReIdx_MatTrans2->fwd();
    EpetraExt::RowMatrixToMatlabFile("Schur.mat", t2S);
#endif

#ifdef SHYLU_DEBUG
    cout << "#dropped entries" << dropped << endl;
#endif
    delete[] values;
    delete[] indices;
    delete[] values1;
    delete[] indices1;
    delete[] values2;
    delete[] indices2;
    delete[] values3;
    delete[] indices3;
    delete[] maxvalue;

    return Sbar;
}
Ejemplo n.º 22
0
int
Stokhos::MatrixFreeOperator::
Apply(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const
{
#ifdef STOKHOS_TEUCHOS_TIME_MONITOR
  TEUCHOS_FUNC_TIME_MONITOR("Stokhos: SG Operator Apply()");
#endif

  // Note for transpose:
  // The stochastic matrix is symmetric, however the matrix blocks may not
  // be.  So the algorithm here is the same whether we are using the transpose
  // or not.  We just apply the transpose of the blocks in the case of
  // applying the global transpose, and make sure the imported Input
  // vectors use the right map.

  // We have to be careful if Input and Result are the same vector.
  // If this is the case, the only possible solution is to make a copy
  const Epetra_MultiVector *input = &Input;
  bool made_copy = false;
  if (Input.Values() == Result.Values() && !is_stoch_parallel) {
    input = new Epetra_MultiVector(Input);
    made_copy = true;
  }

  // Initialize
  Result.PutScalar(0.0);

  const Epetra_Map* input_base_map = domain_base_map.get();
  const Epetra_Map* result_base_map = range_base_map.get();
  if (useTranspose == true) {
    input_base_map = range_base_map.get();
    result_base_map = domain_base_map.get();
  }

  // Allocate temporary storage
  int m = Input.NumVectors();
  if (useTranspose == false &&
      (tmp == Teuchos::null || tmp->NumVectors() != m*max_num_mat_vec))
    tmp = Teuchos::rcp(new Epetra_MultiVector(*result_base_map,
                                              m*max_num_mat_vec));
  else if (useTranspose == true &&
           (tmp_trans == Teuchos::null ||
            tmp_trans->NumVectors() != m*max_num_mat_vec))
    tmp_trans = Teuchos::rcp(new Epetra_MultiVector(*result_base_map,
                                                    m*max_num_mat_vec));
  Epetra_MultiVector *tmp_result;
  if (useTranspose == false)
    tmp_result = tmp.get();
  else
    tmp_result = tmp_trans.get();

  // Map input into column map
  const Epetra_MultiVector *tmp_col;
  if (!is_stoch_parallel)
    tmp_col = input;
  else {
    if (useTranspose == false) {
      if (input_col == Teuchos::null || input_col->NumVectors() != m)
        input_col = Teuchos::rcp(new Epetra_MultiVector(*global_col_map, m));
      input_col->Import(*input, *col_importer, Insert);
      tmp_col = input_col.get();
    }
    else {
      if (input_col_trans == Teuchos::null ||
          input_col_trans->NumVectors() != m)
        input_col_trans =
          Teuchos::rcp(new Epetra_MultiVector(*global_col_map_trans, m));
      input_col_trans->Import(*input, *col_importer_trans, Insert);
      tmp_col = input_col_trans.get();
    }
  }

  // Extract blocks
  EpetraExt::BlockMultiVector sg_input(View, *input_base_map, *tmp_col);
  EpetraExt::BlockMultiVector sg_result(View, *result_base_map, Result);
  for (int i=0; i<input_block.size(); i++)
    input_block[i] = sg_input.GetBlock(i);
  for (int i=0; i<result_block.size(); i++)
    result_block[i] = sg_result.GetBlock(i);

  // Apply block SG operator via
  // w_i =
  //    \sum_{j=0}^P \sum_{k=0}^L J_k v_j < \psi_i \psi_j \psi_k > / <\psi_i^2>
  // for i=0,...,P where P = expansion_size, L = num_blocks, w_j is the jth
  // input block, w_i is the ith result block, and J_k is the kth block operator

  // k_begin and k_end are initialized in the constructor
  const Teuchos::Array<double>& norms = sg_basis->norm_squared();
  for (Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
    int k = index(k_it);
    Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
    Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
    int nj = Cijk->num_j(k_it);
    if (nj > 0) {
      Teuchos::Array<double*> j_ptr(nj*m);
      Teuchos::Array<int> mj_indices(nj*m);
      int l = 0;
      for (Cijk_type::kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
        int j = index(j_it);
        for (int mm=0; mm<m; mm++) {
          j_ptr[l*m+mm] = (*input_block[j])[mm];
          mj_indices[l*m+mm] = l*m+mm;
        }
        l++;
      }
      Epetra_MultiVector input_tmp(View, *input_base_map, &j_ptr[0], nj*m);
      Epetra_MultiVector result_tmp(View, *tmp_result, &mj_indices[0], nj*m);
      if (use_block_apply) {
        (*block_ops)[k].Apply(input_tmp, result_tmp);
      }
      else {
        for (int jj=0; jj<nj*m; jj++)
          (*block_ops)[k].Apply(*(input_tmp(jj)), *(result_tmp(jj)));
      }
      l = 0;
      for (Cijk_type::kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
        int j = index(j_it);
        for (Cijk_type::kji_iterator i_it = Cijk->i_begin(j_it);
             i_it != Cijk->i_end(j_it); ++i_it) {
          int i = index(i_it);
          double c = value(i_it);
          if (scale_op) {
            int i_gid;
            if (useTranspose)
              i_gid = epetraCijk->GCID(j);
            else
              i_gid = epetraCijk->GRID(i);
            c /= norms[i_gid];
          }
          for (int mm=0; mm<m; mm++)
            (*result_block[i])(mm)->Update(c, *result_tmp(l*m+mm), 1.0);
        }
        l++;
      }
    }
  }

  // Destroy blocks
  for (int i=0; i<input_block.size(); i++)
    input_block[i] = Teuchos::null;
  for (int i=0; i<result_block.size(); i++)
    result_block[i] = Teuchos::null;

  if (made_copy)
    delete input;

  return 0;
}
int TestMultiLevelPreconditioner(char ProblemType[],
				 Teuchos::ParameterList & MLList,
				 Epetra_LinearProblem & Problem, double & TotalErrorResidual,
				 double & TotalErrorExactSol)
{
  
  Epetra_MultiVector* lhs = Problem.GetLHS();
  Epetra_MultiVector* rhs = Problem.GetRHS();
  Epetra_CrsMatrix* A = dynamic_cast<Epetra_CrsMatrix*>(Problem.GetMatrix());
  int PID = A->Comm().MyPID();
  int numProcs = A->Comm().NumProc();
  RCP<const Epetra_RowMatrix> Arcp = Teuchos::rcp(A, false);
  double n1, n2,nf;
  
  // ======================================== //
  // create a rhs corresponding to lhs or 1's //
  // ======================================== //
  
  lhs->PutScalar(1.0);
  A->Multiply(false,*lhs,*rhs);

  lhs->PutScalar(0.0);
  MLList.set("ML output", 0);

  RowMatrixToMatlabFile("mat_f.dat",*A);  
  MultiVectorToMatrixMarketFile("lhs_f.dat",*lhs,0,0,false);
  MultiVectorToMatrixMarketFile("rhs_f.dat",*rhs,0,0,false);

  
  Epetra_Time Time(A->Comm());
  /* Build the Zoltan list - Group #1 */
  ParameterList Zlist1,Sublist1;
  Sublist1.set("DEBUG_LEVEL","0");
  Sublist1.set("NUM_GLOBAL_PARTITIONS","2");
  Zlist1.set("Zoltan",Sublist1);
  
  /* Start Isorropia's Ninja Magic - Group #1 */
  RefCountPtr<Isorropia::Epetra::Partitioner> partitioner1 =
    Isorropia::Epetra::create_partitioner(Arcp, Zlist1);
  Isorropia::Epetra::Redistributor rd1(partitioner1);

  Teuchos::RCP<Epetra_CrsMatrix> ResA1=rd1.redistribute(*A);
  Teuchos::RCP<Epetra_MultiVector> ResX1=rd1.redistribute(*lhs);
  Teuchos::RCP<Epetra_MultiVector> ResB1=rd1.redistribute(*rhs);

  RestrictedCrsMatrixWrapper RW1;
  RW1.restrict_comm(ResA1);
  RestrictedMultiVectorWrapper RX1,RB1;
  RX1.restrict_comm(ResX1);
  RB1.restrict_comm(ResB1);

  /* Build the Zoltan list - Group #2 */
  ParameterList Zlist2,Sublist2;
  Sublist2.set("DEBUG_LEVEL","0");
  if(PID > 1) Sublist2.set("NUM_LOCAL_PARTITIONS","1");
  else Sublist2.set("NUM_LOCAL_PARTITIONS","0");
  Zlist2.set("Zoltan",Sublist2);
    
  /* Start Isorropia's Ninja Magic - Group #2 */
  RefCountPtr<Isorropia::Epetra::Partitioner> partitioner2 =
    Isorropia::Epetra::create_partitioner(Arcp, Zlist2);
  Isorropia::Epetra::Redistributor rd2(partitioner2);

  Teuchos::RCP<Epetra_CrsMatrix> ResA2=rd2.redistribute(*A);
  Teuchos::RCP<Epetra_MultiVector> ResX2=rd2.redistribute(*lhs);
  Teuchos::RCP<Epetra_MultiVector> ResB2=rd2.redistribute(*rhs);

  RestrictedCrsMatrixWrapper RW2;
  RW2.restrict_comm(ResA2);
  RestrictedMultiVectorWrapper RX2,RB2;
  RX2.restrict_comm(ResX2);
  RB2.restrict_comm(ResB2);

  if(RW1.RestrictedProcIsActive()){
    Teuchos::RCP<Epetra_CrsMatrix> SubA1 = RW1.RestrictedMatrix();
    Teuchos::RCP<Epetra_MultiVector> SubX1 = RX1.RestrictedMultiVector();
    Teuchos::RCP<Epetra_MultiVector> SubB1 = RB1.RestrictedMultiVector();    
    ML_Epetra::MultiLevelPreconditioner * SubPrec1 = new ML_Epetra::MultiLevelPreconditioner(*SubA1, MLList, true);        

    Epetra_LinearProblem Problem1(&*SubA1,&*SubX1,&*SubB1);
    AztecOO solver1(Problem1);
    solver1.SetPrecOperator(SubPrec1);  
    solver1.SetAztecOption(AZ_solver, AZ_gmres);
    solver1.SetAztecOption(AZ_output, 32);
    solver1.SetAztecOption(AZ_kspace, 160);  
    solver1.Iterate(1550, 1e-12);
    delete SubPrec1;

  }
  else{
    Teuchos::RCP<Epetra_CrsMatrix> SubA2 = RW2.RestrictedMatrix();
    Teuchos::RCP<Epetra_MultiVector> SubX2 = RX2.RestrictedMultiVector();
    Teuchos::RCP<Epetra_MultiVector> SubB2 = RB2.RestrictedMultiVector();        
    ML_Epetra::MultiLevelPreconditioner * SubPrec2 = new ML_Epetra::MultiLevelPreconditioner(*SubA2, MLList, true);        
    
    Epetra_LinearProblem Problem2(&*SubA2,&*SubX2,&*SubB2);
    AztecOO solver2(Problem2);
    solver2.SetPrecOperator(SubPrec2);  
    solver2.SetAztecOption(AZ_solver, AZ_gmres);
    solver2.SetAztecOption(AZ_output, 32);
    solver2.SetAztecOption(AZ_kspace, 160);  
    solver2.Iterate(1550, 1e-12);
    delete SubPrec2;

  }

  /* Post-processing exports */
  Epetra_MultiVector ans1(*lhs), ans2(*lhs); 
  rd1.redistribute_reverse(*ResX1,ans1);
  rd2.redistribute_reverse(*ResX2,ans2);
  
  /* Run on Full Problem */
  A->Comm().Barrier();    
  ML_Epetra::MultiLevelPreconditioner * FullPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList, true);          
  AztecOO solverF(Problem);
  solverF.SetPrecOperator(FullPrec);  
  solverF.SetAztecOption(AZ_solver, AZ_gmres);
  solverF.SetAztecOption(AZ_output, 32);
  solverF.SetAztecOption(AZ_kspace, 160);  
  solverF.Iterate(1550, 1e-12);
  delete FullPrec;


  /* Solution Comparison */
  ans1.Update(1.0,*lhs,-1.0);
  ans2.Update(1.0,*lhs,-1.0);
  ans1.Norm2(&n1);
  ans2.Norm2(&n2);
  if(!PID) {
    printf("Norm Diff 1 = %6.4e\n",n1);
    printf("Norm Diff 2 = %6.4e\n",n2);
  }

  TotalErrorExactSol += n1 + n2;
    
  
}
int TestMultiLevelPreconditioner(char ProblemType[],
				 Teuchos::ParameterList & MLList,
				 Epetra_LinearProblem & Problem, double & TotalErrorResidual,
				 double & TotalErrorExactSol)
{
  
  Epetra_MultiVector* lhs = Problem.GetLHS();
  Epetra_MultiVector* rhs = Problem.GetRHS();
  Epetra_RowMatrix* A = Problem.GetMatrix();
  
  // ======================================== //
  // create a rhs corresponding to lhs or 1's //
  // ======================================== //
  
  lhs->PutScalar(1.0);
  A->Multiply(false,*lhs,*rhs);

  lhs->PutScalar(0.0);
  
  Epetra_Time Time(A->Comm());

  Epetra_MultiVector lhs2(*lhs);
  Epetra_MultiVector rhs2(*rhs);
  
  // =================== //
  // call ML and AztecOO //
  // =================== //

  AztecOO solver(Problem);
  
  MLList.set("ML output", 0);
  ML_set_random_seed(24601);
  ML_Epetra::MultiLevelPreconditioner * MLPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList, true);
  
  // tell AztecOO to use this preconditioner, then solve
  solver.SetPrecOperator(MLPrec);
  
  solver.SetAztecOption(AZ_solver, AZ_gmres);
  solver.SetAztecOption(AZ_output, 32);
  solver.SetAztecOption(AZ_kspace, 160);
  
  solver.Iterate(1550, 1e-12);
  
  delete MLPrec;



  // ================================= //
  // call ML and AztecOO a second time //
  // ================================= // 
  Epetra_LinearProblem Problem2(A,&lhs2,&rhs2);

  AztecOO solver2(Problem2);
  ML_set_random_seed(24601);  
  ML_Epetra::MultiLevelPreconditioner * MLPrec2 = new ML_Epetra::MultiLevelPreconditioner(*A, MLList, true);
  
  // tell AztecOO to use this preconditioner, then solve
  solver2.SetPrecOperator(MLPrec2);
  
  solver2.SetAztecOption(AZ_solver, AZ_gmres);
  solver2.SetAztecOption(AZ_output, 32);
  solver2.SetAztecOption(AZ_kspace, 160);
  
  solver2.Iterate(1550, 1e-12);
  
  
  
  // ==================================================== //
  // compute difference between the two ML solutions //
  // ==================================================== //
  
  double d = 0.0, d_tot = 0.0;
  
  for( int i=0 ; i<lhs->Map().NumMyElements() ; ++i )
    d += ((*lhs)[0][i] - lhs2[0][i]) * ((*lhs)[0][i] - lhs2[0][i]);
  
  A->Comm().SumAll(&d,&d_tot,1);
  string msg = ProblemType;
  if (A->Comm().MyPID() == 0) {
    cout << msg << "......Using " << A->Comm().NumProc() << " processes" << endl;
    cout << msg << "......||x_1 - x_2||_2 = " << sqrt(d_tot) << endl;
    cout << msg << "......Total Time = " << Time.ElapsedTime() << endl;
  }
  
  TotalErrorExactSol += sqrt(d_tot);
  
  return( solver.NumIters() );
  
}
Ejemplo n.º 25
0
 //! Initialize vector
 static void init(Epetra_MultiVector& vec, double val) { 
   vec.PutScalar(val); }
// ============================================================================ 
int ML_Epetra::MatrixFreePreconditioner::
Compute(const Epetra_CrsGraph& Graph, Epetra_MultiVector& NullSpace)
{
  Epetra_Time TotalTime(Comm());

  const int NullSpaceDim = NullSpace.NumVectors();
  // get parameters from the list
  std::string PrecType = List_.get("prec: type", "hybrid");
  std::string SmootherType = List_.get("smoother: type", "Jacobi");
  std::string ColoringType = List_.get("coloring: type", "JONES_PLASSMAN");
  int PolynomialDegree = List_.get("smoother: degree", 3);
  std::string DiagonalColoringType = List_.get("diagonal coloring: type", "JONES_PLASSMAN");
  int MaximumIterations = List_.get("eigen-analysis: max iters", 10);
  std::string EigenType_ = List_.get("eigen-analysis: type", "cg");
  double boost = List_.get("eigen-analysis: boost for lambda max", 1.0);
  int OutputLevel = List_.get("ML output", -47);
  if (OutputLevel == -47) OutputLevel =  List_.get("output", 10);
  omega_ = List_.get("smoother: damping", omega_);
  ML_Set_PrintLevel(OutputLevel);
  bool LowMemory = List_.get("low memory", true);
  double AllocationFactor = List_.get("AP allocation factor", 0.5);

  verbose_ = (MyPID() == 0 && ML_Get_PrintLevel() > 5);

  // ================ //
  // check parameters //
  // ================ //

  if (PrecType == "presmoother only")
    PrecType_ = ML_MFP_PRESMOOTHER_ONLY;
  else if (PrecType == "hybrid")
    PrecType_ = ML_MFP_HYBRID;
  else if (PrecType == "additive")
    PrecType_ = ML_MFP_ADDITIVE;
  else
    ML_CHK_ERR(-3); // not recognized

  if (SmootherType == "none")
    SmootherType_ = ML_MFP_NONE;
  else if (SmootherType == "Jacobi")
    SmootherType_ = ML_MFP_JACOBI;
  else if (SmootherType == "block Jacobi")
    SmootherType_ = ML_MFP_BLOCK_JACOBI;
  else if (SmootherType == "Chebyshev")
    SmootherType_ = ML_MFP_CHEBY;
  else
    ML_CHK_ERR(-4); // not recognized

  if (AllocationFactor <= 0.0)
    ML_CHK_ERR(-1); // should be positive

  // =============================== //
  // basic checkings and some output //
  // =============================== //
  
  int OperatorDomainPoints =  Operator_.OperatorDomainMap().NumGlobalPoints();
  int OperatorRangePoints =  Operator_.OperatorRangeMap().NumGlobalPoints();
  int GraphBlockRows = Graph.NumGlobalBlockRows();
  int GraphNnz = Graph.NumGlobalNonzeros();
  NumPDEEqns_ = OperatorRangePoints / GraphBlockRows;
  NumMyBlockRows_ = Graph.NumMyBlockRows();

  if (OperatorDomainPoints != OperatorRangePoints)
    ML_CHK_ERR(-1); // only square matrices

  if (OperatorRangePoints % NumPDEEqns_ != 0)
    ML_CHK_ERR(-2); // num PDEs seems not constant

  if (verbose_)
  {
    ML_print_line("=",78);
    std::cout << "*** " << std::endl;
    std::cout << "*** ML_Epetra::MatrixFreePreconditioner" << std::endl;
    std::cout << "***" << std::endl;
    std::cout << "Number of rows and columns      = " << OperatorDomainPoints << std::endl;
    std::cout << "Number of rows per processor    = " << OperatorDomainPoints / Comm().NumProc()
         << " (on average)" << std::endl;
    std::cout << "Number of rows in the graph     = " << GraphBlockRows << std::endl;
    std::cout << "Number of nonzeros in the graph = " << GraphNnz << std::endl;
    std::cout << "Processors used in computation  = " << Comm().NumProc() << std::endl;
    std::cout << "Number of PDE equations         = " << NumPDEEqns_ << std::endl;
    std::cout << "Null space dimension            = " << NullSpaceDim << std::endl;
    std::cout << "Preconditioner type             = " << PrecType << std::endl;
    std::cout << "Smoother type                   = " << SmootherType << std::endl;
    std::cout << "Coloring type                   = " << ColoringType << std::endl;
    std::cout << "Allocation factor               = " << AllocationFactor << std::endl;
    std::cout << "Number of V-cycles for C        = " << List_.sublist("ML list").get("cycle applications", 1) << std::endl;
    std::cout << std::endl;
  }

  ResetStartTime();

  // ==================================== //
  // compute the inverse of the diagonal, //
  // control that no elements are zero.   //
  // ==================================== //
  
  for (int i = 0; i < InvPointDiagonal_->MyLength(); ++i)
    if ((*InvPointDiagonal_)[i] != 0.0)
      (*InvPointDiagonal_)[i] = 1.0 / (*InvPointDiagonal_)[i];

  // ========================================================= //
  // Setup the smoother. I need to extract the block diagonal  //
  // only if block Jacobi is used. For Chebyshev, I scale with //
  // the point diagonal only. In this latter case, I need to   //
  // compute lambda_max of the scaled operator.                //
  // ========================================================= //
  
  // probes for the block diagonal of the matrix.
  if (SmootherType_ == ML_MFP_JACOBI ||
      SmootherType_ == ML_MFP_NONE)
  {
    // do-nothing here
  }
  else if (SmootherType_ == ML_MFP_BLOCK_JACOBI)
  {
    if (verbose_);
      std::cout << "Diagonal coloring type         = " << DiagonalColoringType << std::endl;
    ML_CHK_ERR(GetBlockDiagonal(Graph, DiagonalColoringType));

    AddAndResetStartTime("block diagonal construction", true);
  }
  else if (SmootherType_ == ML_MFP_CHEBY)
  {
    double lambda_min = 0.0;
    double lambda_max = 0.0;
    Teuchos::ParameterList IFPACKList;

    if (EigenType_ == "power-method")
    {
      ML_CHK_ERR(Ifpack_Chebyshev::PowerMethod(Operator_, *InvPointDiagonal_,
                                               MaximumIterations, lambda_max));
    }
    else if(EigenType_ == "cg")
    {
      ML_CHK_ERR(Ifpack_Chebyshev::CG(Operator_, *InvPointDiagonal_,
                                      MaximumIterations, lambda_min, 
                                      lambda_max));
    }
    else
      ML_CHK_ERR(-1); // not recognized

    if (verbose_)
    {
      std::cout << "Using Chebyshev smoother of degree " << PolynomialDegree << std::endl;
      std::cout << "Estimating eigenvalues using " <<  EigenType_ << std::endl;
      std::cout << "lambda_min = " << lambda_min << ", ";
      std::cout << "lambda_max = " << lambda_max << std::endl;
    }

    IFPACKList.set("chebyshev: min eigenvalue", lambda_min);
    IFPACKList.set("chebyshev: max eigenvalue", boost * lambda_max);
    // FIXME: this allocates a new std::vector inside
    IFPACKList.set("chebyshev: operator inv diagonal", InvPointDiagonal_.get());
    IFPACKList.set("chebyshev: degree", PolynomialDegree);

    PreSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_)));
    if (PreSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error?

    IFPACKList.set("chebyshev: zero starting solution", true);
    ML_CHK_ERR(PreSmoother_->SetParameters(IFPACKList));
    ML_CHK_ERR(PreSmoother_->Initialize());
    ML_CHK_ERR(PreSmoother_->Compute());

    PostSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_)));
    if (PostSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error?

    IFPACKList.set("chebyshev: zero starting solution", false);
    ML_CHK_ERR(PostSmoother_->SetParameters(IFPACKList));
    ML_CHK_ERR(PostSmoother_->Initialize());
    ML_CHK_ERR(PostSmoother_->Compute());
  }

  // ========================================================= //
  // building P and R for block graph. This is done by working //
  // on the Graph_ object. Support is provided for local       //
  // aggregation schemes only so that all is basically local.  //
  // Then, build the block graph coarse problem.               //
  // ========================================================= //
  
  // ML wrapper for Graph_
  ML_Operator* Graph_ML = ML_Operator_Create(Comm_ML());
  ML_Operator_WrapEpetraCrsGraph(const_cast<Epetra_CrsGraph*>(&Graph), Graph_ML);

  ML_Aggregate* BlockAggr_ML = 0;
  ML_Operator* BlockPtent_ML = 0, *BlockRtent_ML = 0,* CoarseGraph_ML = 0;

  if (verbose_) std::cout << std::endl;

  ML_CHK_ERR(Coarsen(Graph_ML, &BlockAggr_ML, &BlockPtent_ML, &BlockRtent_ML, 
                     &CoarseGraph_ML));

  if (verbose_) std::cout << std::endl;

  Epetra_CrsMatrix* GraphCoarse;
  ML_CHK_ERR(ML_Operator2EpetraCrsMatrix(CoarseGraph_ML, GraphCoarse));

  // used later to estimate the entries in AP
  ML_Operator* CoarseAP_ML = ML_Operator_Create(Comm_ML());
  ML_2matmult(Graph_ML, BlockPtent_ML, CoarseAP_ML, ML_CSR_MATRIX);

  int AP_MaxNnzRow, itmp = CoarseAP_ML->max_nz_per_row;
  Comm().MaxAll(&itmp, &AP_MaxNnzRow, 1);
  ML_Operator_Destroy(&CoarseAP_ML);

  int NumAggregates = BlockPtent_ML->invec_leng;
  ML_Operator_Destroy(&BlockRtent_ML);
  ML_Operator_Destroy(&CoarseGraph_ML);

  AddAndResetStartTime("construction of block C, R, and P", true);
  if (verbose_) std::cout << std::endl;

  // ================================================== //
  // coloring of block graph:                           //
  // - color of block row `i' is given by `ColorMap[i]' //
  // - number of colors is ColorMap.NumColors().        //
  // ================================================== //
  
  ResetStartTime();

  CrsGraph_MapColoring* MapColoringTransform;
  
  if (ColoringType == "JONES_PLASSMAN")
    MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::JONES_PLASSMAN,
                                                     0, false, 0);
  else if (ColoringType == "PSEUDO_PARALLEL")
    MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::PSEUDO_PARALLEL,
                                                     0, false, 0);
  else if (ColoringType == "GREEDY")
    MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::GREEDY,
                                                     0, false, 0);
  else if (ColoringType == "LUBY")
    MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::LUBY,
                                                     0, false, 0);
  else 
    ML_CHK_ERR(-1);

  Epetra_MapColoring* ColorMap = &(*MapColoringTransform)(const_cast<Epetra_CrsGraph&>(GraphCoarse->Graph()));

  // move the information from ColorMap to std::vector Colors
  const int NumColors = ColorMap->MaxNumColors();
  RefCountPtr<Epetra_IntSerialDenseVector> Colors = rcp(new Epetra_IntSerialDenseVector(GraphCoarse->Graph().NumMyRows()));
  for (int i = 0; i < GraphCoarse->Graph().NumMyRows(); ++i)
    (*Colors)[i] = (*ColorMap)[i];

  delete MapColoringTransform;
  delete ColorMap; ColorMap = 0;
  delete GraphCoarse;

  AddAndResetStartTime("coarse graph coloring", true);
  if (verbose_) std::cout << std::endl;

  // get some other information about the aggregates, to be used
  // in the QR factorization of the null space. NodesOfAggregate
  // contains the local ID of block rows contained in each aggregate.

  // FIXME: make it faster
  std::vector< std::vector<int> > NodesOfAggregate(NumAggregates);

  for (int i = 0; i < Graph.NumMyBlockRows(); ++i)
  {
    int AID = BlockAggr_ML->aggr_info[0][i];
    NodesOfAggregate[AID].push_back(i);
  }

  int MaxAggrSize = 0;
  for (int i = 0; i < NumAggregates; ++i)
  {
    const int& MySize = NodesOfAggregate[i].size();
    if (MySize > MaxAggrSize) MaxAggrSize = MySize;
  }

  // collect aggregate information, and mark all nodes that are
  // connected with each aggregate. These nodes will have a possible
  // nonzero entry after the matrix-matrix product between the Operator_
  // and the tentative prolongator.

  std::vector<vector<int> > aggregates(NumAggregates);
  std::vector<int>::iterator iter;

  for (int i = 0; i < NumAggregates; ++i)
    aggregates[i].reserve(MaxAggrSize);

  for (int i = 0; i < Graph.NumMyBlockRows(); ++i)
  {
    int AID = BlockAggr_ML->aggr_info[0][i];

    int NumEntries;
    int* Indices;

    Graph.ExtractMyRowView(i, NumEntries, Indices);

    for (int k = 0; k < NumEntries; ++k)
    {
      // FIXME: use hash??
      const int& GCID = Graph.ColMap().GID(Indices[k]);

      iter = find(aggregates[AID].begin(), aggregates[AID].end(), GCID);
      if (iter == aggregates[AID].end())
        aggregates[AID].push_back(GCID);
    }
  }
  
  int* BlockNodeList = Graph.ColMap().MyGlobalElements();

  // finally get rid of the ML_Aggregate structure.
  ML_Aggregate_Destroy(&BlockAggr_ML);

  const Epetra_Map& FineMap = Operator_.OperatorDomainMap();
  Epetra_Map CoarseMap(-1, NumAggregates * NullSpaceDim, 0, Comm());
  RefCountPtr<Epetra_Map> BlockNodeListMap = 
    rcp(new Epetra_Map(-1, Graph.ColMap().NumMyElements(),
                       BlockNodeList, 0, Comm()));

  std::vector<int> NodeList(Graph.ColMap().NumMyElements() * NumPDEEqns_);
  for (int i = 0; i < Graph.ColMap().NumMyElements(); ++i)
    for (int m = 0; m < NumPDEEqns_; ++m)
      NodeList[i * NumPDEEqns_ + m] = BlockNodeList[i] * NumPDEEqns_ + m;
  RefCountPtr<Epetra_Map> NodeListMap = 
    rcp(new Epetra_Map(-1, NodeList.size(), &NodeList[0], 0, Comm()));

  AddAndResetStartTime("data structures", true);

  // ====================== //
  // process the null space //
  // ====================== //

  // CHECKME
  Epetra_MultiVector NewNullSpace(CoarseMap, NullSpaceDim);
  NewNullSpace.PutScalar(0.0);

  if (NullSpaceDim == 1)
  {
    double* ns_ptr = NullSpace.Values();

    for (int AID = 0; AID < NumAggregates; ++AID)
    {
      double dtemp = 0.0;
      for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++)
        for (int m = 0; m < NumPDEEqns_; ++m)
        {
          const int& pos = NodesOfAggregate[AID][j] * NumPDEEqns_ + m;
          dtemp += (ns_ptr[pos] * ns_ptr[pos]);
        }
      dtemp = std::sqrt(dtemp);

      NewNullSpace[0][AID] = dtemp;

      dtemp = 1.0 / dtemp;

      for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++)
        for (int m = 0; m < NumPDEEqns_; ++m)
          ns_ptr[NodesOfAggregate[AID][j] * NumPDEEqns_ + m] *= dtemp;
    }
  }
  else
  {
    // FIXME
    std::vector<double> qr_ptr(MaxAggrSize * NumPDEEqns_ * MaxAggrSize * NumPDEEqns_);
    std::vector<double> tmp_ptr(MaxAggrSize * NumPDEEqns_ * NullSpaceDim);

    std::vector<double> work(NullSpaceDim);
    int info;

    for (int AID = 0; AID < NumAggregates; ++AID)
    {
      int MySize = NodesOfAggregate[AID].size();
      int MyFullSize = NodesOfAggregate[AID].size() * NumPDEEqns_;
      int lwork = NullSpaceDim;

      for (int k = 0; k < NullSpaceDim; ++k)
        for (int j = 0; j < MySize; ++j)
          for (int m = 0; m < NumPDEEqns_; ++m)
            qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m] = 
              NullSpace[k][NodesOfAggregate[AID][j] * NumPDEEqns_ + m];

      DGEQRF_F77(&MyFullSize, (int*)&NullSpaceDim, &qr_ptr[0], 
                 &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info);

      ML_CHK_ERR(info);

      if (work[0] > lwork) work.resize((int) work[0]);

      // the upper triangle of qr_tmp is now R, so copy that into the 
      //  new nullspace

      for (int j = 0; j < NullSpaceDim; j++)
        for (int k = j; k < NullSpaceDim; k++)
          NewNullSpace[k][AID * NullSpaceDim + j] = qr_ptr[j + MyFullSize * k];
		 
      // to get this block of P, need to run qr_tmp through another LAPACK 
      // function:

      DORGQR_F77(&MyFullSize, (int*)&NullSpaceDim, (int*)&NullSpaceDim, 
                 &qr_ptr[0], &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info);
      ML_CHK_ERR(info); // dgeqtr returned a non-zero

      if (work[0] > lwork) work.resize((int) work[0]);

      // insert the Q block into the null space

      for (int k = 0; k < NullSpaceDim; ++k)
        for (int j = 0; j < MySize; ++j)
          for (int m = 0; m < NumPDEEqns_; ++m)
          {
            int LRID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m;
            double& val = qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m];
            NullSpace[k][LRID] = val;
          }
    }
  }

  AddAndResetStartTime("null space setup", true);

  if (verbose_)
    std::cout << "Number of colors on processor " << Comm().MyPID() << " = "
        << NumColors << std::endl;
  if (verbose_)
    std::cout << "Maximum number of colors = " << NumColors << std::endl;

  RefCountPtr<Epetra_FECrsMatrix> AP;
  
  // try to get a good estimate of the nonzeros per row.
  // This is a compromize between efficiency -- that is, reduce
  // the memory allocation processes, and memory usage -- that, is
  // overestimating can actually kill the code. Basically, this is
  // all junk due to our dear friend, the Cray XT3.
  
  AP = rcp(new Epetra_FECrsMatrix(Copy, FineMap, (int)
                                  (AllocationFactor * AP_MaxNnzRow * NullSpaceDim)));
  if (AP.get() == 0) throw(-1);

  if (!LowMemory)
  {
    // ================================================= //
    // allocate one big chunk of memory, and use View    //             
    // to create Epetra_MultiVectors. Note that          //
    // NumColors * NullSpace can indeed be a quite large //
    // value. To reduce the memory consumption, both     //
    // ColoredAP and ExtColoredAP use the same memory    //
    // array.                                            //
    // ================================================= //
    
    Epetra_MultiVector* ColoredP;
    std::vector<double> ColoredAP_ptr;

    try
    {
      ColoredP = new Epetra_MultiVector(FineMap, NumColors * NullSpaceDim);
      ColoredAP_ptr.resize(NumColors * NullSpaceDim * NodeListMap->NumMyPoints());
    }
    catch (std::exception& rhs)
    {
      catch_message("the allocation of ColoredP", rhs.what(), __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }
    catch (...)
    {
      catch_message("the allocation of ColoredP", "", __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }

    int ColoredAP_LDA = NodeListMap->NumMyPoints();

    ColoredP->PutScalar(0.0);

    for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i)
    {
      int allocated = 1;
      int NumEntries;
      int Indices;
      double Values;
      int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated,
                                    &Indices,&Values,&NumEntries);
      if (ierr < 0)
        ML_CHK_ERR(-1);

      assert (NumEntries == 1); // this is the block P
      const int& Color = (*Colors)[Indices] - 1;
      for (int k = 0; k < NumPDEEqns_; ++k)
        for (int j = 0; j < NullSpaceDim; ++j)
          (*ColoredP)[(Color * NullSpaceDim + j)][i * NumPDEEqns_ + k] = 
            NullSpace[j][i * NumPDEEqns_ + k];
    }

    ML_Operator_Destroy(&BlockPtent_ML);

    Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), 
                                 &ColoredAP_ptr[0], ColoredAP_LDA, 
                                 NumColors * NullSpaceDim);
    // move ColoredAP into ColoredP. This should not be required.
    // but I prefer to skip strange games with View pointers
    Operator_.Apply(*ColoredP, ColoredAP);
    *ColoredP = ColoredAP;

    // FIXME: only if NumProc > 1
    Epetra_MultiVector ExtColoredAP(View, *NodeListMap, 
                                 &ColoredAP_ptr[0], ColoredAP_LDA, 
                                 NumColors * NullSpaceDim);

    try 
    {
      Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap());
      ExtColoredAP.Import(*ColoredP, Importer, Insert);
    }
    catch (std::exception& rhs)
    {
      catch_message("importing of ExtColoredAP", rhs.what(), __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }
    catch (...)
    {
      catch_message("importing of ExtColoredAP", "", __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }

    delete ColoredP;

    AddAndResetStartTime("computation of AP", true); 

    // populate the actual AP operator, skip some controls to make it faster

    for (int i = 0; i < NumAggregates; ++i)
    {
      for (int j = 0; j < (int) (aggregates[i].size()); ++j)
      {
        int GRID = aggregates[i][j];
        int LRID = BlockNodeListMap->LID(GRID); // this is the block ID
        //assert (LRID != -1);
        int GCID = CoarseMap.GID(i * NullSpaceDim);
        //assert (GCID != -1); 
        int color = (*Colors)[i] - 1;
        for (int k = 0; k < NumPDEEqns_; ++k)
          for (int j = 0; j < NullSpaceDim; ++j)
          {
            double val = ExtColoredAP[color * NullSpaceDim + j][LRID * NumPDEEqns_ + k];
            if (val != 0.0)
            {
              int GRID2 = GRID * NumPDEEqns_ + k;
              int GCID2 = GCID + j;
              AP->InsertGlobalValues(1, &GRID2, 1, &GCID2, &val);
              //if (ierr < 0) ML_CHK_ERR(ierr);
            }
          }
      }
    }
  }
  else
  {
    // =============================================================== //
    // apply the operator one color at-a-time. This requires NumColors //
    // cycles over BlockPtent. However, the memory requirements are    //
    // drastically reduced. As for low-memory == false, both ColoredAP //
    // and ExtColoredAP point to the same memory location.             //
    // =============================================================== //
    
    if (verbose_)
      std::cout << "Using low-memory computation for AP" << std::endl;

    Epetra_MultiVector ColoredP(FineMap, NullSpaceDim);
    std::vector<double> ColoredAP_ptr;
    try
    {
      ColoredAP_ptr.resize(NullSpaceDim * NodeListMap->NumMyPoints());
    }
    catch (std::exception& rhs)
    {
      catch_message("resizing of ColoredAP_pt", rhs.what(), __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }
    catch (...)
    {
      catch_message("resizing of ColoredAP_pt", "", __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }

    Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), 
                                 &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), 
                                 NullSpaceDim);
    Epetra_MultiVector ExtColoredAP(View, *NodeListMap, 
                                 &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), 
                                 NullSpaceDim);
    Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap());

    for (int ic = 0; ic < NumColors; ++ic)
    {
      if (ML_Get_PrintLevel() > 8 && Comm().MyPID() == 0)
      {
        if (ic % 20 == 0)
          std::cout << "Processing color " << flush;

        std::cout << ic << " " << flush;
        if (ic % 20 == 19 || ic == NumColors - 1)
          std::cout << std::endl;
        if (ic == NumColors - 1) std::cout << std::endl;
      }

      ColoredP.PutScalar(0.0);

      for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i)
      {
        int allocated = 1;
        int NumEntries;
        int Indices;
        double Values;
        int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated,
                                      &Indices,&Values,&NumEntries);
        if (ierr < 0 ||  // something strange in getrow
            NumEntries != 1) // this is the block P
          ML_CHK_ERR(-1);

        const int& Color = (*Colors)[Indices] - 1;
        if (Color != ic)
          continue; // skip this color for this cycle

        for (int k = 0; k < NumPDEEqns_; ++k)
          for (int j = 0; j < NullSpaceDim; ++j)
            ColoredP[j][i * NumPDEEqns_ + k] = 
              NullSpace[j][i * NumPDEEqns_ + k];
      }

      Operator_.Apply(ColoredP, ColoredAP);
      ColoredP = ColoredAP; // just to be safe

      ExtColoredAP.Import(ColoredP, Importer, Insert);

      // populate the actual AP operator, skip some controls to make it faster

      std::vector<int> InsertCols(NullSpaceDim * NumPDEEqns_);
      std::vector<double> InsertValues(NullSpaceDim * NumPDEEqns_);

      for (int i = 0; i < NumAggregates; ++i)
      {
        for (int j = 0; j < (int) (aggregates[i].size()); ++j)
        {
          int GRID = aggregates[i][j];
          int LRID = BlockNodeListMap->LID(GRID); // this is the block ID
          //assert (LRID != -1);
          int GCID = CoarseMap.GID(i * NullSpaceDim);
          //assert (GCID != -1); 
          int color = (*Colors)[i] - 1;
          if (color != ic) continue;

          for (int k = 0; k < NumPDEEqns_; ++k)
          {
            int count = 0;
            int GRID2 = GRID * NumPDEEqns_ + k;
            for (int j = 0; j < NullSpaceDim; ++j)
            {
              double val = ExtColoredAP[j][LRID * NumPDEEqns_ + k];
              if (val != 0.0)
              {
                InsertCols[count] = GCID + j;
                InsertValues[count] = val;
                ++count;
              }
            }
            AP->InsertGlobalValues(1, &GRID2, count, &InsertCols[0], 
                                   &InsertValues[0]);
          }
        }
      }
    }

    ML_Operator_Destroy(&BlockPtent_ML);
  }

  aggregates.resize(0);
  BlockNodeListMap = Teuchos::null;
  NodeListMap = Teuchos::null;

  Colors = Teuchos::null;

  AP->GlobalAssemble(false);
  AP->FillComplete(CoarseMap, FineMap);

#if 0
  try
  {
    AP->OptimizeStorage();
  }
  catch(...)
  {
    // a memory error was reported, typically ReportError.
    // We just continue with fingers crossed.
  }
#endif

  AddAndResetStartTime("computation of the final AP", true); 

  ML_Operator* AP_ML = ML_Operator_Create(Comm_ML());
  ML_Operator_WrapEpetraMatrix(AP.get(), AP_ML);

  // ======== //
  // create R //
  // ======== //
  
  std::vector<int> REntries(NumAggregates * NullSpaceDim);
  for (int AID = 0; AID < NumAggregates; ++AID)
  {
    for (int m = 0; m < NullSpaceDim; ++m)
      REntries[AID * NullSpaceDim + m] = NodesOfAggregate[AID].size() * NumPDEEqns_;
  }

  R_ = rcp(new Epetra_CrsMatrix(Copy, CoarseMap, &REntries[0], true));
  REntries.resize(0);

  for (int AID = 0; AID < NumAggregates; ++AID)
  {
    const int& MySize = NodesOfAggregate[AID].size();

    // FIXME: make it faster
    for (int j = 0; j < MySize; ++j)
      for (int m = 0; m < NumPDEEqns_; ++m)
        for (int k = 0; k < NullSpaceDim; ++k)
        {
          int LCID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m;
          int GCID = FineMap.GID(LCID);
          assert (GCID != -1);

          double& val = NullSpace[k][LCID];

          int GRID = CoarseMap.GID(AID * NullSpaceDim + k);
          int ierr = R_->InsertGlobalValues(GRID, 1, &val, &GCID);
          if (ierr < 0)
            ML_CHK_ERR(-1);
        }
  }

  NodesOfAggregate.resize(0);

  R_->FillComplete(FineMap, CoarseMap);
#if 0
  try
  {
    R_->OptimizeStorage();
  }
  catch(...)
  {
    // a memory error was reported, typically ReportError.
    // We just continue with fingers crossed.
  }
#endif

  ML_Operator* R_ML = ML_Operator_Create(Comm_ML());
  ML_Operator_WrapEpetraMatrix(R_.get(), R_ML);

  AddAndResetStartTime("computation of R", true); 

  // ======== //
  // Create C //
  // ======== //

  C_ML_ = ML_Operator_Create(Comm_ML());
  ML_2matmult(R_ML, AP_ML, C_ML_, ML_MSR_MATRIX);

  ML_Operator_Destroy(&AP_ML);
  ML_Operator_Destroy(&R_ML);
  AP = Teuchos::null;

  C_ = rcp(new ML_Epetra::RowMatrix(C_ML_, &Comm(), false));
  assert (R_->OperatorRangeMap().SameAs(C_->OperatorDomainMap()));

  TotalTime.ResetStartTime();

  AddAndResetStartTime("computation of C", true); 

  if (verbose_)
  {
    std::cout << "Matrix-free preconditioner built. Now building solver for C..." << std::endl; 
  }

  Teuchos::ParameterList& sublist = List_.sublist("ML list");
  sublist.set("PDE equations", NullSpaceDim);
  sublist.set("null space: type", "pre-computed");
  sublist.set("null space: dimension", NewNullSpace.NumVectors());
  sublist.set("null space: vectors", NewNullSpace.Values());

  MLP_ = rcp(new MultiLevelPreconditioner(*C_, sublist, true));

  assert (MLP_.get() != 0);

  IsComputed_ = true;

  AddAndResetStartTime("computation of the preconditioner for C", true); 

  if (verbose_)
  {
    std::cout << std::endl;
    std::cout << "Total CPU time for construction (all included) = ";
    std::cout << TotalCPUTime() << std::endl;
    ML_print_line("=",78);
  }

  return(0);
}
//
//  Amesos_TestMultiSolver.cpp reads in a matrix in Harwell-Boeing format, 
//  calls one of the sparse direct solvers, using blocked right hand sides
//  and computes the error and residual.  
//
//  TestSolver ignores the Harwell-Boeing right hand sides, creating
//  random right hand sides instead.  
//
//  Amesos_TestMultiSolver can test either A x = b or A^T x = b.
//  This can be a bit confusing because sparse direct solvers 
//  use compressed column storage - the transpose of Trilinos'
//  sparse row storage.
//
//  Matrices:
//    readA - Serial.  As read from the file.
//    transposeA - Serial.  The transpose of readA.
//    serialA - if (transpose) then transposeA else readA 
//    distributedA - readA distributed to all processes
//    passA - if ( distributed ) then distributedA else serialA
//
//
int Amesos_TestMultiSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, 
		      SparseSolverType SparseSolver, bool transpose,
		      int special, AMESOS_MatrixType matrix_type ) {


  int iam = Comm.MyPID() ;

  
  //  int hatever;
  //  if ( iam == 0 )  std::cin >> hatever ; 
  Comm.Barrier();


  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
   
  std::string FileName = matrix_file ;
  int FN_Size = FileName.size() ; 
  std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size );
  std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size );
  bool NonContiguousMap = false; 

  if ( LastFiveBytes == ".triU" ) { 
    NonContiguousMap = true; 
    // Call routine to read in unsymmetric Triplet matrix
    EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, 
						      readb, readxexact, NonContiguousMap ) );
  } else {
    if ( LastFiveBytes == ".triS" ) { 
      NonContiguousMap = true; 
      // Call routine to read in symmetric Triplet matrix
      EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, 
							readMap, readA, readx, 
							readb, readxexact, NonContiguousMap ) );
    } else {
      if (  LastFourBytes == ".mtx" ) { 
	EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, 
							       readA, readx, readb, readxexact) );
      } else {
	// Call routine to read in HB problem
	Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, 
						     readb, readxexact) ;
      }
    }
  }

  Epetra_CrsMatrix transposeA(Copy, *readMap, 0);
  Epetra_CrsMatrix *serialA ; 

  if ( transpose ) {
    assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); 
    serialA = &transposeA ; 
  } else {
    serialA = readA ; 
  }

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);
  Epetra_Map* map_;

  if( NonContiguousMap ) {
    //
    //  map gives us NumMyElements and MyFirstElement;
    //
    int NumGlobalElements =  readMap->NumGlobalElements();
    int NumMyElements = map.NumMyElements();
    int MyFirstElement = map.MinMyGID();
    std::vector<int> MapMap_( NumGlobalElements );
    readMap->MyGlobalElements( &MapMap_[0] ) ;
    Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; 
    map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm);
  } else {
    map_ = new Epetra_Map( map ) ; 
  }


  // Create Exporter to distribute read-in matrix and vectors
  Epetra_Export exporter(*readMap, *map_);
  Epetra_CrsMatrix A(Copy, *map_, 0);

  Epetra_RowMatrix * passA = 0; 
  Epetra_MultiVector * passx = 0; 
  Epetra_MultiVector * passb = 0;
  Epetra_MultiVector * passxexact = 0;
  Epetra_MultiVector * passresid = 0;
  Epetra_MultiVector * passtmp = 0;

  Epetra_MultiVector x(*map_,numsolves);
  Epetra_MultiVector b(*map_,numsolves);
  Epetra_MultiVector xexact(*map_,numsolves);
  Epetra_MultiVector resid(*map_,numsolves);
  Epetra_MultiVector tmp(*map_,numsolves);

  Epetra_MultiVector serialx(*readMap,numsolves);
  Epetra_MultiVector serialb(*readMap,numsolves);
  Epetra_MultiVector serialxexact(*readMap,numsolves);
  Epetra_MultiVector serialresid(*readMap,numsolves);
  Epetra_MultiVector serialtmp(*readMap,numsolves);

  bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; 
  if ( distribute_matrix ) { 
    //
    //  Initialize x, b and xexact to the values read in from the file
    //
    
    A.Export(*serialA, exporter, Add);
    Comm.Barrier();

    assert(A.FillComplete()==0);    
    Comm.Barrier();

    passA = &A; 
    passx = &x; 
    passb = &b;
    passxexact = &xexact;
    passresid = &resid;
    passtmp = &tmp;
  } else { 
    passA = serialA; 
    passx = &serialx; 
    passb = &serialb;
    passxexact = &serialxexact;
    passresid = &serialresid;
    passtmp = &serialtmp;
  }

  passxexact->SetSeed(131) ; 
  passxexact->Random();
  passx->SetSeed(11231) ; 
  passx->Random();

  passb->PutScalar( 0.0 );
  passA->Multiply( transpose, *passxexact, *passb ) ; 

  Epetra_MultiVector CopyB( *passb ) ;

  double Anorm = passA->NormInf() ; 
  SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ;

  Epetra_LinearProblem Problem(  (Epetra_RowMatrix *) passA, 
				 (Epetra_MultiVector *) passx, 
				 (Epetra_MultiVector *) passb );

  double max_resid = 0.0;
  for ( int j = 0 ; j < special+1 ; j++ ) { 
    
    Epetra_Time TotalTime( Comm ) ; 
    if ( false ) { 
#ifdef TEST_UMFPACK

      unused code

    } else if ( SparseSolver == UMFPACK ) { 
      UmfpackOO umfpack( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
    
      umfpack.SetTrans( transpose ) ; 
      umfpack.Solve() ; 
#endif
#ifdef TEST_SUPERLU
    } else if ( SparseSolver == SuperLU ) { 
      SuperluserialOO superluserial( (Epetra_RowMatrix *) passA, 
				     (Epetra_MultiVector *) passx, 
				     (Epetra_MultiVector *) passb ) ; 

      superluserial.SetPermc( SuperLU_permc ) ; 
      superluserial.SetTrans( transpose ) ; 
      superluserial.SetUseDGSSV( special == 0 ) ; 
      superluserial.Solve() ; 
#endif
#ifdef HAVE_AMESOS_SLUD
    } else if ( SparseSolver == SuperLUdist ) { 
      SuperludistOO superludist( Problem ) ; 
      superludist.SetTrans( transpose ) ; 
      EPETRA_CHK_ERR( superludist.Solve( true ) ) ;
#endif 
#ifdef HAVE_AMESOS_SLUD2
    } else if ( SparseSolver == SuperLUdist2 ) { 
      Superludist2_OO superludist2( Problem ) ; 
      superludist2.SetTrans( transpose ) ; 
      EPETRA_CHK_ERR( superludist2.Solve( true ) ) ;
#endif 
#ifdef TEST_SPOOLES
    } else if ( SparseSolver == SPOOLES ) { 
      SpoolesOO spooles( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
    
      spooles.SetTrans( transpose ) ; 
      spooles.Solve() ; 
#endif
#ifdef HAVE_AMESOS_DSCPACK
    } else if ( SparseSolver == DSCPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Dscpack dscpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); 
    
      EPETRA_CHK_ERR( dscpack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_UMFPACK
    } else if ( SparseSolver == UMFPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Umfpack umfpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( umfpack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_KLU
    } else if ( SparseSolver == KLU ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Klu klu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( klu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( klu.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( klu.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARAKLETE
    } else if ( SparseSolver == PARAKLETE ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Paraklete paraklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( paraklete.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( paraklete.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( paraklete.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SLUS
    } else if ( SparseSolver == SuperLU ) { 
      Epetra_SLU superluserial( &Problem ) ; 
      EPETRA_CHK_ERR( superluserial.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superluserial.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superluserial.NumericFactorization(  ) ); 

      EPETRA_CHK_ERR( superluserial.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_LAPACK
    } else if ( SparseSolver == LAPACK ) { 
      Teuchos::ParameterList ParamList ;
      ParamList.set( "MaxProcs", -3 );
      Amesos_Lapack lapack( Problem ) ; 
      EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( lapack.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( lapack.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( lapack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_TAUCS
    } else if ( SparseSolver == TAUCS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Taucs taucs( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( taucs.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARDISO
    } else if ( SparseSolver == PARDISO ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Pardiso pardiso( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( pardiso.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARKLETE
    } else if ( SparseSolver == PARKLETE ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Parklete parklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( parklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( parklete.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( parklete.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( parklete.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( parklete.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_MUMPS
    } else if ( SparseSolver == MUMPS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Mumps mumps( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( mumps.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SCALAPACK
    } else if ( SparseSolver == SCALAPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Scalapack scalapack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( scalapack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SUPERLUDIST
    } else if ( SparseSolver == SUPERLUDIST ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Superludist superludist( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); 

      EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superludist.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superludist.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( superludist.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SUPERLU
    } else if ( SparseSolver == SUPERLU ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Superlu superlu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superlu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superlu.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( superlu.Solve( ) ); 
#endif
#ifdef TEST_SPOOLESSERIAL 
    } else if ( SparseSolver == SPOOLESSERIAL ) { 
      SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, 
				     (Epetra_MultiVector *) passx, 
				     (Epetra_MultiVector *) passb ) ; 
    
      spoolesserial.Solve() ;
#endif
    } else { 
      SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ;
      std::cerr << "\n\n####################  Requested solver not available (Or not tested with blocked RHS) on this platform #####################\n" << std::endl ;
    }

    SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); 
    //    SparseDirectTimingVars::SS_Result.Set_First_Time( 0.0 ); 
    //    SparseDirectTimingVars::SS_Result.Set_Middle_Time( 0.0 ); 
    //    SparseDirectTimingVars::SS_Result.Set_Last_Time( 0.0 ); 

    //
    //  Compute the error = norm(xcomp - xexact )
    //
    std::vector <double> error(numsolves) ; 
    double max_error = 0.0;
  
    passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0);

    passresid->Norm2(&error[0]);
    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( error[i] > max_error ) max_error = error[i] ; 
    SparseDirectTimingVars::SS_Result.Set_Error(max_error) ;

    //  passxexact->Norm2(&error[0] ) ; 
    //  passx->Norm2(&error ) ; 

    //
    //  Compute the residual = norm(Ax - b)
    //
    std::vector <double> residual(numsolves) ; 
  
    passtmp->PutScalar(0.0);
    passA->Multiply( transpose, *passx, *passtmp);
    passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); 
    //    passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); 
    passresid->Norm2(&residual[0]);

    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( residual[i] > max_resid ) max_resid = residual[i] ; 


    SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ;
    
    std::vector <double> bnorm(numsolves); 
    passb->Norm2( &bnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ;

    std::vector <double> xnorm(numsolves); 
    passx->Norm2( &xnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ;


    if ( false && iam == 0 ) { 

      std::cout << " Amesos_TestMutliSolver.cpp " << std::endl ; 
      for ( int i = 0 ; i< numsolves && i < 10 ; i++ ) {
	std::cout << "i=" << i 
	     << " error = " << error[i] 
	     << " xnorm = " << xnorm[i] 
	     << " residual = " << residual[i] 
	     << " bnorm = " << bnorm[i] 
	     << std::endl ; 
      
      }
    
      std::cout << std::endl << " max_resid = " << max_resid ; 
      std::cout << " max_error = " << max_error << std::endl ; 
      std::cout << " Get_residual() again = " << SparseDirectTimingVars::SS_Result.Get_Residual() << std::endl ;

    }
  }
  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;
  delete map_;
  
  Comm.Barrier();

return 0 ;
}
Ejemplo n.º 28
0
int 
Stokhos::KLMatrixFreeOperator::
Apply(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const
{
  // We have to be careful if Input and Result are the same vector.
  // If this is the case, the only possible solution is to make a copy
  const Epetra_MultiVector *input = &Input;
  bool made_copy = false;
  if (Input.Values() == Result.Values() && !is_stoch_parallel) {
    input = new Epetra_MultiVector(Input);
    made_copy = true;
  }

  // Initialize
  Result.PutScalar(0.0);

  const Epetra_Map* input_base_map = domain_base_map.get();
  const Epetra_Map* result_base_map = range_base_map.get();
  if (useTranspose == true) {
    input_base_map = range_base_map.get();
    result_base_map = domain_base_map.get();
  }

  // Allocate temporary storage
  int m = Input.NumVectors();
  if (useTranspose == false && 
      (tmp == Teuchos::null || tmp->NumVectors() != m*max_num_mat_vec))
    tmp = Teuchos::rcp(new Epetra_MultiVector(*result_base_map, 
					      m*max_num_mat_vec));
  else if (useTranspose == true && 
	   (tmp_trans == Teuchos::null || 
	    tmp_trans->NumVectors() != m*max_num_mat_vec))
    tmp_trans = Teuchos::rcp(new Epetra_MultiVector(*result_base_map, 
						    m*max_num_mat_vec));
  Epetra_MultiVector *tmp_result;
  if (useTranspose == false)
    tmp_result = tmp.get();
  else
    tmp_result = tmp_trans.get();

  // Map input into column map
  const Epetra_MultiVector *tmp_col;
  if (!is_stoch_parallel)
    tmp_col = input;
  else {
    if (useTranspose == false) {
      if (input_col == Teuchos::null || input_col->NumVectors() != m)
	input_col = Teuchos::rcp(new Epetra_MultiVector(*global_col_map, m));
      input_col->Import(*input, *col_importer, Insert);
      tmp_col = input_col.get();
    }
    else {
      if (input_col_trans == Teuchos::null || 
	  input_col_trans->NumVectors() != m)
	input_col_trans = 
	  Teuchos::rcp(new Epetra_MultiVector(*global_col_map_trans, m));
      input_col_trans->Import(*input, *col_importer_trans, Insert);
      tmp_col = input_col_trans.get();
    }
  }

  // Extract blocks
  EpetraExt::BlockMultiVector sg_input(View, *input_base_map, *tmp_col);
  EpetraExt::BlockMultiVector sg_result(View, *result_base_map, Result);
  for (int i=0; i<input_block.size(); i++)
    input_block[i] = sg_input.GetBlock(i);
  for (int i=0; i<result_block.size(); i++)
    result_block[i] = sg_result.GetBlock(i);
  int N = result_block[0]->MyLength();

  const Teuchos::Array<double>& norms = sg_basis->norm_squared();
  int d = sg_basis->dimension();
  Teuchos::Array<double> zero(d), one(d);
  for(int j = 0; j<d; j++) {
    zero[j] = 0.0;
    one[j] = 1.0;
  }
  Teuchos::Array< double > phi_0(expansion_size), phi_1(expansion_size);
  sg_basis->evaluateBases(zero, phi_0);
  sg_basis->evaluateBases(one, phi_1);

  // k_begin and k_end are initialized in the constructor
  for (Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
    Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
    Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
    int k = index(k_it);
    int nj = Cijk->num_j(k_it);
    if (nj > 0) {
      Teuchos::Array<double*> j_ptr(nj*m);
      Teuchos::Array<int> mj_indices(nj*m);
      int l = 0;
      for (Cijk_type::kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
	int j = index(j_it);
	for (int mm=0; mm<m; mm++) {
	  j_ptr[l*m+mm] = input_block[j]->Values()+mm*N;
	  mj_indices[l*m+mm] = l*m+mm;
	}
	l++;
      }
      Epetra_MultiVector input_tmp(View, *input_base_map, &j_ptr[0], nj*m);
      Epetra_MultiVector result_tmp(View, *tmp_result, &mj_indices[0], nj*m);
      (*block_ops)[k].Apply(input_tmp, result_tmp);
      l = 0;
      for (Cijk_type::kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
	int j = index(j_it);
	int j_gid = epetraCijk->GCID(j);
	for (Cijk_type::kji_iterator i_it = Cijk->i_begin(j_it);
	     i_it != Cijk->i_end(j_it); ++i_it) {
	  int i = index(i_it);
	  int i_gid = epetraCijk->GRID(i);
	  double c = value(i_it);
	  if (k == 0)
	    c /= phi_0[0];
	  else {
	    c /= phi_1[k];
	    if (i_gid == j_gid)
	      c -= phi_0[k]/(phi_1[k]*phi_0[0])*norms[i_gid];
	  }
	  if (scale_op) {
	    if (useTranspose)
	      c /= norms[j_gid];
	    else
	      c /= norms[i_gid];
	  }
	  for (int mm=0; mm<m; mm++)
	    (*result_block[i])(mm)->Update(c, *result_tmp(l*m+mm), 1.0);
	}
	l++;
      }
    }
  }

  // Destroy blocks
  for (int i=0; i<input_block.size(); i++)
    input_block[i] = Teuchos::null;
  for (int i=0; i<result_block.size(); i++)
    result_block[i] = Teuchos::null;

  if (made_copy)
    delete input;

  return 0;
}
int TestMultiLevelPreconditioner(char ProblemType[],
				 Teuchos::ParameterList & MLList,
				 Epetra_LinearProblem & Problem, double & TotalErrorResidual,
				 double & TotalErrorExactSol,bool cg=false)
{

  Epetra_MultiVector* lhs = Problem.GetLHS();
  Epetra_MultiVector* rhs = Problem.GetRHS();
  Epetra_RowMatrix* A = Problem.GetMatrix();

  // ======================================== //
  // create a rhs corresponding to lhs or 1's //
  // ======================================== //

  lhs->PutScalar(1.0);
  A->Multiply(false,*lhs,*rhs);

  lhs->PutScalar(0.0);

  Epetra_Time Time(A->Comm());

  // =================== //
  // call ML and AztecOO //
  // =================== //

  AztecOO solver(Problem);

  MLList.set("ML output", 10);

  ML_Epetra::MultiLevelPreconditioner * MLPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList, true);

  // tell AztecOO to use this preconditioner, then solve
  solver.SetPrecOperator(MLPrec);

  if(cg) solver.SetAztecOption(AZ_solver, AZ_cg);
  else solver.SetAztecOption(AZ_solver, AZ_gmres);
  solver.SetAztecOption(AZ_output, 32);
  solver.SetAztecOption(AZ_kspace, 160);

  solver.Iterate(1550, 1e-12);

  delete MLPrec;

  // ==================================================== //
  // compute difference between exact solution and ML one //
  // ==================================================== //

  double d = 0.0, d_tot = 0.0;

  for( int i=0 ; i<lhs->Map().NumMyElements() ; ++i )
    d += ((*lhs)[0][i] - 1.0) * ((*lhs)[0][i] - 1.0);

  A->Comm().SumAll(&d,&d_tot,1);

  // ================== //
  // compute ||Ax - b|| //
  // ================== //

  double Norm;
  Epetra_Vector Ax(rhs->Map());
  A->Multiply(false, *lhs, Ax);
  Ax.Update(1.0, *rhs, -1.0);
  Ax.Norm2(&Norm);

  string msg = ProblemType;

  if (A->Comm().MyPID() == 0) {
    cout << msg << "......Using " << A->Comm().NumProc() << " processes" << endl;
    cout << msg << "......||A x - b||_2 = " << Norm << endl;
    cout << msg << "......||x_exact - x||_2 = " << sqrt(d_tot) << endl;
    cout << msg << "......Total Time = " << Time.ElapsedTime() << endl;
  }

  TotalErrorExactSol += sqrt(d_tot);
  TotalErrorResidual += Norm;

  return( solver.NumIters() );

}
// ================================================ ====== ==== ==== == =
//! Apply the preconditioner to an Epetra_MultiVector X, puts the result in Y
int ML_Epetra::FaceMatrixFreePreconditioner::ApplyInverse(const Epetra_MultiVector& B_, Epetra_MultiVector& X) const{
  const Epetra_MultiVector *B;
  Epetra_MultiVector *Bcopy=0;

  /* Sanity Checks */
  int NumVectors=B_.NumVectors();
  if (!B_.Map().SameAs(*FaceDomainMap_)) ML_CHK_ERR(-1);
  if (NumVectors != X.NumVectors()) ML_CHK_ERR(-1);

  Epetra_MultiVector r_edge(*FaceDomainMap_,NumVectors,false);
  Epetra_MultiVector e_edge(*FaceDomainMap_,NumVectors,false);
  Epetra_MultiVector e_node(*CoarseMap_,NumVectors,false);
  Epetra_MultiVector r_node(*CoarseMap_,NumVectors,false);

  /* Deal with the B==X case */
  if (B_.Pointers()[0] == X.Pointers()[0]){
    Bcopy=new Epetra_MultiVector(B_);
    B=Bcopy;
    X.PutScalar(0.0);
  }
  else B=&B_;


  for(int i=0;i<num_cycles;i++){
    /* Pre-smoothing */
#ifdef HAVE_ML_IFPACK
    if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X));
#endif

    if(MaxLevels > 0){
      if(i != 0
#ifdef HAVE_ML_IFPACK
         || Smoother_
#endif
         ){
        /* Calculate Residual (r_e = b - (S+M+Addon) * x) */
        ML_CHK_ERR(Operator_->Apply(X,r_edge));
        ML_CHK_ERR(r_edge.Update(1.0,*B,-1.0));

        /* Xfer to coarse grid (r_n = P' * r_e) */
        ML_CHK_ERR(Prolongator_->Multiply(true,r_edge,r_node));
      }
      else{
        /* Xfer to coarse grid (r_n = P' * r_e) */
        ML_CHK_ERR(Prolongator_->Multiply(true,*B,r_node));
      }

      /* AMG on coarse grid  (e_n = (CoarseMatrix)^{-1} r_n) */
      ML_CHK_ERR(CoarsePC->ApplyInverse(r_node,e_node));

      /* Xfer back to fine grid (e_e = P * e_n) */
      ML_CHK_ERR(Prolongator_->Multiply(false,e_node,e_edge));

      /* Add in correction (x = x + e_e)        */
      ML_CHK_ERR(X.Update(1.0,e_edge,1.0));
    }/*end if*/

    /* Post-Smoothing */
#ifdef HAVE_ML_IFPACK
    if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X));
#endif

  }/*end for*/

  /* Cleanup */
  if(Bcopy) delete Bcopy;

  return 0;
}/*end ApplyInverse*/