void Add(
    const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>& A,
    bool transposeA,
    Scalar scalarA,
    const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>& B,
    bool transposeB,
    Scalar scalarB,
    Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> > C) {

  if ( !(A.getRowMap()->isSameAs(*(B.getRowMap()))) ) {
    throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Add: matrix row maps are not the same."));
  }
  if (C==Teuchos::null)
    //FIXME 5 is a complete guess as to the #nonzeros per row
    C = rcp( new Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>(A.getRowMap(), 5) );

  if (C->getRowMap()->lib() == Xpetra::UseEpetra) {
#ifdef HAVE_XPETRA_EPETRAEXT
      const Epetra_CrsMatrix& epA = Xpetra::MatrixMatrix::Op2EpetraCrs(A);
      const Epetra_CrsMatrix& epB = Xpetra::MatrixMatrix::Op2EpetraCrs(B);
      RCP<Epetra_CrsMatrix>       epC = Xpetra::MatrixMatrix::Op2NonConstEpetraCrs(C);
      Epetra_CrsMatrix* ref2epC = &*epC; //to avoid a compiler error...

      //FIXME is there a bug if beta=0?
      int i = EpetraExt::MatrixMatrix::Add(epA,transposeA,scalarA,epB,transposeB,scalarB,ref2epC);

      if (i != 0) {
        std::ostringstream buf;
        buf << i;
        std::string msg = "EpetraExt::MatrixMatrix::Add return value of " + buf.str();
        throw(Xpetra::Exceptions::RuntimeError(msg));
      }
#else
      throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compile with EpetraExt."));
#endif
  } else if(C->getRowMap()->lib() == Xpetra::UseTpetra) {
#ifdef HAVE_XPETRA_TPETRA
    const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> & tpA = Xpetra::MatrixMatrix::Op2TpetraCrs(A);
    const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> & tpB = Xpetra::MatrixMatrix::Op2TpetraCrs(B);
    RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> >       tpC = Xpetra::MatrixMatrix::Op2NonConstTpetraCrs(C);

    Tpetra::MatrixMatrix::Add(tpA, transposeA, scalarA, tpB, transposeB, scalarB, tpC);
#else
    throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compile with Tpetra."));
#endif
  }

  ///////////////////////// EXPERIMENTAL
  RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> > rcpA = Teuchos::rcp_const_cast<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> >(Teuchos::rcpFromRef(A));
  RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> > rcpB = Teuchos::rcp_const_cast<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> >(Teuchos::rcpFromRef(B));
  if(A.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpA);
  if(B.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpB);
  ///////////////////////// EXPERIMENTAL
}
  void Jacobi<double,int,int,EpetraNode>(double omega,
                                         const Xpetra::Vector<double,int,int,EpetraNode> & Dinv,
                                         const Xpetra::Matrix<double,int,int,EpetraNode> & A,
                                         const Xpetra::Matrix<double,int,int,EpetraNode> & B,
                                         Xpetra::Matrix<double,int,int,EpetraNode> &C,
                                         bool call_FillComplete_on_result,
                                         bool doOptimizeStorage,
                                         const std::string & label,
                                         const Teuchos::RCP<Teuchos::ParameterList>& params) {
    typedef double        SC;
    typedef int           LO;
    typedef int           GO;
    typedef EpetraNode    NO;

    TEUCHOS_TEST_FOR_EXCEPTION(C.getRowMap()->isSameAs(*A.getRowMap()) == false, Exceptions::RuntimeError,
                               "XpetraExt::MatrixMatrix::Jacobi: row map of C is not same as row map of A")
    TEUCHOS_TEST_FOR_EXCEPTION(C.getRowMap()->isSameAs(*B.getRowMap()) == false, Exceptions::RuntimeError,
                               "XpetraExt::MatrixMatrix::Jacobi: row map of C is not same as row map of B");
    TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
    TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");

    bool haveMultiplyDoFillComplete = call_FillComplete_on_result && doOptimizeStorage;

    if (C.getRowMap()->lib() == Xpetra::UseEpetra) {
#ifndef HAVE_XPETRA_EPETRAEXT
      throw(Xpetra::Exceptions::RuntimeError("Xpetra::IteratorOps::Jacobi requires EpetraExt to be compiled."));
#else
      Epetra_CrsMatrix& epA = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(A);
      Epetra_CrsMatrix& epB = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(B);
      Epetra_CrsMatrix& epC = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(C);
      // FIXME
      XPETRA_DYNAMIC_CAST(const EpetraVectorT<GO XPETRA_COMMA NO>, Dinv, epD, "Xpetra::IteratorOps::Jacobi() only accepts Xpetra::EpetraVector as input argument.");

      int i = EpetraExt::MatrixMatrix::Jacobi(omega, *epD.getEpetra_Vector(), epA, epB, epC, haveMultiplyDoFillComplete);
      if (haveMultiplyDoFillComplete) {
        // Due to Epetra wrapper intricacies, we need to explicitly call
        // fillComplete on Xpetra matrix here. Specifically, EpetraCrsMatrix
        // only keeps an internal variable to check whether we are in resumed
        // state or not, but never touches the underlying Epetra object. As
        // such, we need to explicitly update the state of Xpetra matrix to
        // that of Epetra one afterwords
        C.fillComplete();
      }

      if (i != 0) {
        std::ostringstream buf;
        buf << i;
        std::string msg = "EpetraExt::MatrixMatrix::Jacobi return value of " + buf.str();
        throw(Exceptions::RuntimeError(msg));
      }
#endif
    } else if (C.getRowMap()->lib() == Xpetra::UseTpetra) {
inline void Jacobi<double,int,int,KokkosClassic::DefaultNode::DefaultNodeType,KokkosClassic::DefaultKernels<double,int,KokkosClassic::DefaultNode::DefaultNodeType>::SparseOps>(
  double omega,
  const Xpetra::Vector<double,int,int,KokkosClassic::DefaultNode::DefaultNodeType> & Dinv,
  const Xpetra::Matrix<double,int,int,KokkosClassic::DefaultNode::DefaultNodeType> & A,
  const Xpetra::Matrix<double,int,int,KokkosClassic::DefaultNode::DefaultNodeType> & B,
  Xpetra::Matrix<double,int,int,KokkosClassic::DefaultNode::DefaultNodeType,KokkosClassic::DefaultKernels<double,int,KokkosClassic::DefaultNode::DefaultNodeType>::SparseOps> &C,
  bool call_FillComplete_on_result,
  bool doOptimizeStorage) {

  typedef double Scalar;
  typedef int LocalOrdinal;
  typedef int GlobalOrdinal;
  typedef KokkosClassic::DefaultNode::DefaultNodeType Node;
  typedef KokkosClassic::DefaultKernels<double,int,KokkosClassic::DefaultNode::DefaultNodeType>::SparseOps LocalMatOps;

  if(C.getRowMap()->isSameAs(*A.getRowMap()) == false) {
    std::string msg = "XpetraExt::MatrixMatrix::Jacobi: row map of C is not same as row map of A";
    throw(Xpetra::Exceptions::RuntimeError(msg));
  }
  else if(C.getRowMap()->isSameAs(*B.getRowMap()) == false) {
    std::string msg = "XpetraExt::MatrixMatrix::Jacobi: row map of C is not same as row map of B";
    throw(Xpetra::Exceptions::RuntimeError(msg));
  }
  
  if (!A.isFillComplete())
    throw(Xpetra::Exceptions::RuntimeError("A is not fill-completed"));
  if (!B.isFillComplete())
    throw(Xpetra::Exceptions::RuntimeError("B is not fill-completed"));

  bool haveMultiplyDoFillComplete = call_FillComplete_on_result && doOptimizeStorage;

  if (C.getRowMap()->lib() == Xpetra::UseEpetra) {
#       ifndef HAVE_XPETRA_EPETRAEXT
    throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Jacobi requires EpetraExt to be compiled."));
#else
    Epetra_CrsMatrix & epA = Xpetra::MatrixMatrix::Op2NonConstEpetraCrs(A);
    Epetra_CrsMatrix & epB = Xpetra::MatrixMatrix::Op2NonConstEpetraCrs(B);
    Epetra_CrsMatrix & epC = Xpetra::MatrixMatrix::Op2NonConstEpetraCrs(C);
    //    const Epetra_Vector & epD = toEpetra(Dinv);  
    XPETRA_DYNAMIC_CAST(const EpetraVector, Dinv, epD, "Xpetra::MatrixMatrix::Jacobi() only accepts Xpetra::EpetraVector as input argument.");


    int i = EpetraExt::MatrixMatrix::Jacobi(omega,*epD.getEpetra_Vector(),epA,epB,epC,haveMultiplyDoFillComplete);
    if (i != 0) {
      std::ostringstream buf;
      buf << i;
      std::string msg = "EpetraExt::MatrixMatrix::Jacobi return value of " + buf.str();
      throw(Exceptions::RuntimeError(msg));
    }
#endif
    } else if (C.getRowMap()->lib() == Xpetra::UseTpetra) {
void Add(
    const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>& A,
    bool transposeA,
    Scalar scalarA,
    Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>& B,
    Scalar scalarB ) {

  if (!(A.getRowMap()->isSameAs(*(B.getRowMap())))) {
    throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Add: matrix row maps are not the same."));
  }

  /*RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > rcpA = Teuchos::rcp_const_cast<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >(Teuchos::rcpFromRef(A));
  RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > rcpB = Teuchos::rcp_const_cast<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >(Teuchos::rcpFromRef(B));*/

  if (A.getRowMap()->lib() == Xpetra::UseEpetra) {
#ifdef HAVE_XPETRA_EPETRAEXT
    const Epetra_CrsMatrix & epA = Xpetra::MatrixMatrix::Op2EpetraCrs(A);
    Epetra_CrsMatrix & epB = Xpetra::MatrixMatrix::Op2NonConstEpetraCrs(B);

    //FIXME is there a bug if beta=0?
    int i = EpetraExt::MatrixMatrix::Add(epA,transposeA,scalarA,epB,scalarB);
    if (i != 0) {
      std::ostringstream buf;
      buf << i;
      std::string msg = "EpetraExt::MatrixMatrix::Add return value of " + buf.str();
      throw(Xpetra::Exceptions::RuntimeError(msg));
    }
#else
    throw(Exceptions::RuntimeError("Xpetra must be compiled with EpetraExt."));
#endif
  } else if(A.getRowMap()->lib() == Xpetra::UseTpetra) {
#ifdef HAVE_XPETRA_TPETRA
    //RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > tpA = Xpetra::MatrixMatrix::Op2TpetraCrs(rcpA);
    //RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > tpB = Xpetra::MatrixMatrix::Op2NonConstTpetraCrs(rcpB);

    const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> & tpA = Xpetra::MatrixMatrix::Op2TpetraCrs(A);
    Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> &       tpB = Xpetra::MatrixMatrix::Op2NonConstTpetraCrs(B);

    //Tpetra::MatrixMatrix::Add(*tpA, transposeA, scalarA, *tpB, scalarB);
    Tpetra::MatrixMatrix::Add(tpA, transposeA, scalarA, tpB, scalarB);
#else
    throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra."));
#endif
  }
} // end Add
void Jacobi(
  Scalar omega,
  const Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & Dinv,
  const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>& A,
  const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>& B,
  Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>& C,
  bool call_FillComplete_on_result = true,
  bool doOptimizeStorage = true) {

  if(C.getRowMap()->isSameAs(*A.getRowMap()) == false) {
    std::string msg = "XpetraExt::MatrixMatrix::Jacobi: row map of C is not same as row map of A";
    throw(Xpetra::Exceptions::RuntimeError(msg));
  }
  else if(C.getRowMap()->isSameAs(*B.getRowMap()) == false) {
    std::string msg = "XpetraExt::MatrixMatrix::Jacobi: row map of C is not same as row map of B";
    throw(Xpetra::Exceptions::RuntimeError(msg));
  }
  
  if (!A.isFillComplete())
    throw(Xpetra::Exceptions::RuntimeError("A is not fill-completed"));
  if (!B.isFillComplete())
    throw(Xpetra::Exceptions::RuntimeError("B is not fill-completed"));

  bool haveMultiplyDoFillComplete = call_FillComplete_on_result && doOptimizeStorage;

  if (C.getRowMap()->lib() == Xpetra::UseEpetra) {
#ifndef HAVE_XPETRA_EPETRAEXT
    throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Jacobi requires EpetraExt to be compiled."));
#else
    throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Jacobi requires you to use an Epetra-compatible data type."));
#endif
    } else if (C.getRowMap()->lib() == Xpetra::UseTpetra) {
#ifdef HAVE_XPETRA_TPETRA
    const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> & tpA = Xpetra::MatrixMatrix::Op2TpetraCrs(A);
    const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> & tpB = Xpetra::MatrixMatrix::Op2TpetraCrs(B);
    Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>       & tpC = Xpetra::MatrixMatrix::Op2NonConstTpetraCrs(C);
    const RCP<Tpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>  >          & tpD = toTpetra(Dinv);
    
    Tpetra::MatrixMatrix::Jacobi(omega,*tpD,tpA,tpB,tpC,haveMultiplyDoFillComplete);
#else
      throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra."));
#endif
    }
  
  if(call_FillComplete_on_result && !haveMultiplyDoFillComplete) {
      RCP<Teuchos::ParameterList> params = rcp(new Teuchos::ParameterList());
      params->set("Optimize Storage",doOptimizeStorage);
      C.fillComplete(B.getDomainMap(),B.getRangeMap(),params);
    }

    // transfer striding information
    RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> > rcpA = Teuchos::rcp_const_cast<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> >(Teuchos::rcpFromRef(A));
    RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> > rcpB = Teuchos::rcp_const_cast<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> >(Teuchos::rcpFromRef(B));
    C.CreateView("stridedMaps", rcpA, false, rcpB, false); // TODO use references instead of RCPs
} // end Jacobi
void Multiply(
  const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>& A,
  bool transposeA,
  const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>& B,
  bool transposeB,
  Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>& C,
  bool call_FillComplete_on_result = true,
  bool doOptimizeStorage = true) {

    if(transposeA == false && C.getRowMap()->isSameAs(*A.getRowMap()) == false) {
      std::string msg = "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as row map of A";
      throw(Xpetra::Exceptions::RuntimeError(msg));
    }
    else if(transposeA == true && C.getRowMap()->isSameAs(*A.getDomainMap()) == false) {
      std::string msg = "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as domain map of A";
      throw(Xpetra::Exceptions::RuntimeError(msg));
    }


    if (!A.isFillComplete())
      throw(Xpetra::Exceptions::RuntimeError("A is not fill-completed"));
    if (!B.isFillComplete())
      throw(Xpetra::Exceptions::RuntimeError("B is not fill-completed"));

    bool haveMultiplyDoFillComplete = call_FillComplete_on_result && doOptimizeStorage;

    if (C.getRowMap()->lib() == Xpetra::UseEpetra) {
#       ifndef HAVE_XPETRA_EPETRAEXT
      throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Multiply requires EpetraExt to be compiled."));
#else
      Epetra_CrsMatrix & epA = Xpetra::MatrixMatrix::Op2NonConstEpetraCrs(A);
      Epetra_CrsMatrix & epB = Xpetra::MatrixMatrix::Op2NonConstEpetraCrs(B);
      Epetra_CrsMatrix & epC = Xpetra::MatrixMatrix::Op2NonConstEpetraCrs(C);


      int i = EpetraExt::MatrixMatrix::Multiply(epA,transposeA,epB,transposeB,epC,haveMultiplyDoFillComplete);
      if (i != 0) {
        std::ostringstream buf;
        buf << i;
        std::string msg = "EpetraExt::MatrixMatrix::Multiply return value of " + buf.str();
        throw(Exceptions::RuntimeError(msg));
      }

#endif
    } else if (C.getRowMap()->lib() == Xpetra::UseTpetra) {
#ifdef HAVE_XPETRA_TPETRA
      const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> & tpA = Xpetra::MatrixMatrix::Op2TpetraCrs(A);
      const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> & tpB = Xpetra::MatrixMatrix::Op2TpetraCrs(B);
      Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> &       tpC = Xpetra::MatrixMatrix::Op2NonConstTpetraCrs(C);

      //18Feb2013 JJH I'm reenabling the code that allows the matrix matrix multiply to do the fillComplete.
      //Previously, Tpetra's matrix matrix multiply did not support fillComplete.
      Tpetra::MatrixMatrix::Multiply(tpA,transposeA,tpB,transposeB,tpC,haveMultiplyDoFillComplete);
#else
      throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra."));
#endif
    }

    if(call_FillComplete_on_result && !haveMultiplyDoFillComplete) {
      RCP<Teuchos::ParameterList> params = rcp(new Teuchos::ParameterList());
      params->set("Optimize Storage",doOptimizeStorage);
      C.fillComplete((transposeB) ? B.getRangeMap() : B.getDomainMap(),
          (transposeA) ? A.getDomainMap() : A.getRangeMap(),
          params);
    }

    // transfer striding information
    RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> > rcpA = Teuchos::rcp_const_cast<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> >(Teuchos::rcpFromRef(A));
    RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> > rcpB = Teuchos::rcp_const_cast<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> >(Teuchos::rcpFromRef(B));
    C.CreateView("stridedMaps", rcpA, transposeA, rcpB, transposeB); // TODO use references instead of RCPs
} // end Multiply