NOX::Abstract::MultiVector&
NOX::Thyra::MultiVector::
update(double alpha, const NOX::Abstract::MultiVector& a, double gamma)
{
  using Teuchos::tuple;
  const NOX::Thyra::MultiVector& aa =
    dynamic_cast<const NOX::Thyra::MultiVector&>(a);
  ::Thyra::linear_combination<double>(tuple(alpha)().getConst(),
    tuple(aa.thyraMultiVec.ptr().getConst())(), gamma,
    thyraMultiVec.ptr());
  return *this;
}
 void
 ele_wise_bound (const ::Thyra::VectorBase<Scalar>& x_lo,
                    const ::Thyra::VectorBase<Scalar>& x_up,
                    const Teuchos::Ptr< ::Thyra::VectorBase<Scalar> > &x) {
   using Teuchos::tuple;
   using Teuchos::ptrInArg;
   using Teuchos::null;
   RTOpPack::TOpEleWiseBound<Scalar> ele_wise_bound_op;
   ::Thyra::applyOp<Scalar> (ele_wise_bound_op,
                             tuple (ptrInArg (x_lo), ptrInArg (x_up)), tuple (x),
                             null);
 }
 void
 ele_wise_prune_upper (const ::Thyra::VectorBase<Scalar>& x,
                    const ::Thyra::VectorBase<Scalar>& x_up,
                    const Teuchos::Ptr< ::Thyra::VectorBase<Scalar> > &v,
                    const Scalar& eps) {
   using Teuchos::tuple;
   using Teuchos::ptrInArg;
   using Teuchos::null;
   RTOpPack::TOpEleWisePruneUpper_2_1<Scalar> ele_wise_prune_op(eps);
   ::Thyra::applyOp<Scalar> (ele_wise_prune_op,
                             tuple (ptrInArg (x), ptrInArg (x_up)), tuple (v),
                             null);
 }
RCP<LinearProblem<Scalar,MultiVector<Scalar,int>,Operator<Scalar,int> > > buildProblem()
{
  typedef ScalarTraits<Scalar>         SCT;
  typedef typename SCT::magnitudeType  MT;
  typedef Operator<Scalar,int>         OP;
  typedef MultiVector<Scalar,int>      MV;
  typedef OperatorTraits<Scalar,MV,OP> OPT;
  typedef MultiVecTraits<Scalar,MV>    MVT;
  RCP<CrsMatrix<Scalar,int> > A = rcp(new CrsMatrix<Scalar,int>(vmap,rnnzmax));
  if (mptestmypid == 0) {
    // HB format is compressed column. CrsMatrix is compressed row.
    const double *dptr = dvals;
    const int *rptr = rowind;
    for (int c=0; c<mptestdim; ++c) {
      for (int colnnz=0; colnnz < colptr[c+1]-colptr[c]; ++colnnz) {
        A->insertGlobalValues(*rptr-1,tuple(c),tuple<Scalar>(*dptr));
        if (c != *rptr -1) {
          A->insertGlobalValues(c,tuple(*rptr-1),tuple<Scalar>(*dptr));
        }
        ++rptr;
        ++dptr;
      }
    }
  }
  // distribute matrix data to other nodes
  A->fillComplete();
  // Create initial MV and solution MV
  RCP<MV> B, X;
  X = rcp( new MV(vmap,numrhs) );
  MVT::MvRandom( *X );
  B = rcp( new MV(vmap,numrhs) );
  OPT::Apply( *A, *X, *B );
  MVT::MvInit( *X, 0.0 );
  // Construct a linear problem instance with zero initial MV
  RCP<LinearProblem<Scalar,MV,OP> > problem = rcp( new LinearProblem<Scalar,MV,OP>(A,X,B) );
  problem->setLabel(Teuchos::typeName(SCT::one()));
  // diagonal preconditioner
  // if (precond) {
  //   Vector<Scalar,int> diags(A->getRowMap());
  //   A->getLocalDiagCopy(diags);
  //   for (Teuchos_Ordinal i=0; i<vmap->getNumMyEntries(); ++i) {
  //     TEST_FOR_EXCEPTION(diags[i] <= SCT::zero(), std::runtime_error,"Matrix is not positive-definite: " << diags[i]);
  //     diags[i] = SCT::one() / diags[i];
  //   }
  //   RCP<Operator<Scalar,int> > P = rcp(new DiagPrecond<Scalar,int>(diags));
  //   problem->setRightPrec(P);
  // }
  TEST_FOR_EXCEPT(problem->setProblem() == false);
  return problem;
}
Teuchos::RCP<const Thyra::ProductVectorBase<Scalar> >
Thyra::castOrCreateProductVectorBase(const RCP<const VectorBase<Scalar> > v)
{
  using Teuchos::rcp_dynamic_cast;
  using Teuchos::tuple;
  const RCP<const ProductVectorBase<Scalar> > prod_v =
    rcp_dynamic_cast<const ProductVectorBase<Scalar> >(v);
  if (nonnull(prod_v)) {
    return prod_v;
  }
  return defaultProductVector<Scalar>(
    productVectorSpace<Scalar>(tuple(v->space())()),
    tuple(v)()
    );
}
void MetricJacobian<NodeT,ScalarT>::SetDataViews(
    ArrayRCP<NodeT>& mesh_data, map<string,int>& mesh_map_offset,
    ArrayRCP<ScalarT>& soln_data, map<string,int>& soln_map_offset,
    ArrayRCP<MetricJacobian<NodeT,ScalarT>::ResidT>& resid_data,
    map<string,int>& resid_map_offset) {
  using Teuchos::tuple;
  // views of inputs
  node_coords_ = GenerateConstView(mesh_data, mesh_map_offset.at("node_coords"),
                                   tuple(num_elems_, num_nodes_per_elem_, dim_));
  // views of outputs
  jacob_ = GenerateView(mesh_data, mesh_map_offset.at("jacob"),
                        tuple(num_elems_, num_cub_points_, dim_, dim_));
  jacob_inv_ = GenerateView(mesh_data, mesh_map_offset.at("jacob_inv"),
                            tuple(num_elems_, num_cub_points_, dim_, dim_));
  jacob_det_ = GenerateView(mesh_data, mesh_map_offset.at("jacob_det"),
                            tuple(num_elems_, num_cub_points_));
}
Example #7
0
  // Create and return a simple example CrsMatrix, with row
  // distribution over the given Map.
  Teuchos::RCP<const TpetraMatrixType>
  create (const Teuchos::RCP<const map_type>& map) const
  {
    using Teuchos::arcp;
    using Teuchos::ArrayRCP;
    using Teuchos::ArrayView;
    using Teuchos::RCP;
    using Teuchos::rcp;
    using Teuchos::Time;
    using Teuchos::TimeMonitor;
    using Teuchos::tuple;
    typedef Tpetra::global_size_t GST;

    // Create a timer for sparse matrix creation.
    RCP<Time> timer = TimeMonitor::getNewCounter ("Sparse matrix creation");

    // Time the whole scope of this routine, not counting timer lookup.
    TimeMonitor monitor (*timer);

    // Create a Tpetra::Matrix using the Map, with dynamic allocation.
    RCP<TpetraMatrixType> A = rcp (new TpetraMatrixType (map, 3));

    // Add rows one at a time.  Off diagonal values will always be -1.
    const scalar_type two    = static_cast<scalar_type>( 2.0);
    const scalar_type negOne = static_cast<scalar_type>(-1.0);

    const GST numGlobalElements = map->getGlobalNumElements ();
    //    const size_t numMyElements = map->getNodeNumElements ();

    // The list of global elements owned by this MPI process.
    ArrayView<const global_ordinal_type> myGlobalElements =
      map->getNodeElementList ();

    typedef typename ArrayView<const global_ordinal_type>::const_iterator iter_type;
    for (iter_type it = myGlobalElements.begin(); it != myGlobalElements.end(); ++it) {
      const local_ordinal_type i_local = *it;
      const global_ordinal_type i_global = map->getGlobalElement (i_local);

      // Can't insert local indices without a column map, so we insert
      // global indices here.
      if (i_global == 0) {
        A->insertGlobalValues (i_global,
                              tuple (i_global, i_global+1),
                              tuple (two, negOne));
      } else if (static_cast<GST> (i_global) == numGlobalElements - 1) {
        A->insertGlobalValues (i_global,
                              tuple (i_global-1, i_global),
                              tuple (negOne, two));
      } else {
        A->insertGlobalValues (i_global,
                              tuple (i_global-1, i_global, i_global+1),
                              tuple (negOne, two, negOne));
      }
    }

    // Finish up the matrix.
    A->fillComplete ();
    return A;
  }
Teuchos::RCP<const Thyra::LinearOpBase<Scalar> >
Thyra::multiply(
  const RCP<const LinearOpBase<Scalar> > &A,
  const RCP<const LinearOpBase<Scalar> > &B,
  const std::string &M_label
  )
{
  using Teuchos::tuple;
  RCP<DefaultMultipliedLinearOp<Scalar> > multOp =
    defaultMultipliedLinearOp<Scalar>(tuple(A, B)());
  if(M_label.length())
    multOp->setObjectLabel(M_label);
  return multOp;
}
void Thyra::reductions( const MultiVectorBase<Scalar>& V, const NormOp &op,
  const ArrayView<typename ScalarTraits<Scalar>::magnitudeType> &norms )
{
  using Teuchos::tuple; using Teuchos::ptrInArg; using Teuchos::null;
  const int m = V.domain()->dim();
  Array<RCP<RTOpPack::ReductTarget> > rcp_op_targs(m);
  Array<Ptr<RTOpPack::ReductTarget> > op_targs(m);
  for( int kc = 0; kc < m; ++kc ) {
    rcp_op_targs[kc] = op.reduct_obj_create();
    op_targs[kc] = rcp_op_targs[kc].ptr();
  }
  applyOp<Scalar>(op, tuple(ptrInArg(V)),
    ArrayView<Ptr<MultiVectorBase<Scalar> > >(null),
    op_targs );
  for( int kc = 0; kc < m; ++kc ) {
    norms[kc] = op(*op_targs[kc]);
  }
}
Example #10
0
int main(int argc, char *argv[])
{
#ifndef HAVE_TPETRA_COMPLEX_DOUBLE
#  error "Anasazi: This test requires Scalar = std::complex<double> to be enabled in Tpetra."
#else
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::tuple;
  using std::cout;
  using std::endl;

  typedef std::complex<double>                ST;
  typedef Teuchos::ScalarTraits<ST>          SCT;
  typedef SCT::magnitudeType                  MT;
  typedef Tpetra::MultiVector<ST>             MV;
  typedef MV::global_ordinal_type             GO;
  typedef Tpetra::Operator<ST>                OP;
  typedef Anasazi::MultiVecTraits<ST,MV>     MVT;
  typedef Anasazi::OperatorTraits<ST,MV,OP>  OPT;

  Tpetra::ScopeGuard tpetraScope (&argc, &argv);

  bool success = false;

  const ST ONE = SCT::one ();

  int info = 0;

  RCP<const Teuchos::Comm<int> > comm = Tpetra::getDefaultComm ();

  const int MyPID = comm->getRank ();

  bool verbose = false;
  bool debug = false;
  bool insitu = false;
  bool herm = false;
  std::string which("LM");
  std::string filename;
  int nev = 4;
  int blockSize = 4;
  MT tol = 1.0e-6;

  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
  cmdp.setOption("debug","nodebug",&debug,"Print debugging information.");
  cmdp.setOption("insitu","exsitu",&insitu,"Perform in situ restarting.");
  cmdp.setOption("sort",&which,"Targetted eigenvalues (SM or LM).");
  cmdp.setOption("herm","nonherm",&herm,"Solve Hermitian or non-Hermitian problem.");
  cmdp.setOption("filename",&filename,"Filename for Harwell-Boeing test matrix (assumes non-Hermitian unless specified otherwise).");
  cmdp.setOption("nev",&nev,"Number of eigenvalues to compute.");
  cmdp.setOption("blockSize",&blockSize,"Block size for the algorithm.");
  cmdp.setOption("tol",&tol,"Tolerance for convergence.");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    return -1;
  }
  if (debug) verbose = true;
  if (filename == "") {
    // get default based on herm
    if (herm) {
      filename = "mhd1280b.cua";
    }
    else {
      filename = "mhd1280a.cua";
    }
  }

  if (MyPID == 0) {
    cout << Anasazi::Anasazi_Version() << endl << endl;
  }

  // Get the data from the HB file
  int dim,dim2,nnz;
  int rnnzmax;
  double *dvals;
  int *colptr,*rowind;
  nnz = -1;
  if (MyPID == 0) {
    info = readHB_newmat_double(filename.c_str(),&dim,&dim2,&nnz,&colptr,&rowind,&dvals);
    // find maximum NNZ over all rows
    vector<int> rnnz(dim,0);
    for (int *ri=rowind; ri<rowind+nnz; ++ri) {
      ++rnnz[*ri-1];
    }
    rnnzmax = *std::max_element(rnnz.begin(),rnnz.end());
  }
  else {
    // address uninitialized data warnings
    dvals = NULL;
    colptr = NULL;
    rowind = NULL;
  }
  Teuchos::broadcast(*comm,0,&info);
  Teuchos::broadcast(*comm,0,&nnz);
  Teuchos::broadcast(*comm,0,&dim);
  Teuchos::broadcast(*comm,0,&rnnzmax);
  if (info == 0 || nnz < 0) {
    if (MyPID == 0) {
      cout << "Error reading '" << filename << "'" << endl
           << "End Result: TEST FAILED" << endl;
    }
    return -1;
  }
  // create map
  RCP<const Map<> > map = rcp (new Map<> (dim, 0, comm));
  RCP<CrsMatrix<ST> > K = rcp (new CrsMatrix<ST> (map, rnnzmax));
  if (MyPID == 0) {
    // Convert interleaved doubles to complex values
    // HB format is compressed column. CrsMatrix is compressed row.
    const double *dptr = dvals;
    const int *rptr = rowind;
    for (int c=0; c<dim; ++c) {
      for (int colnnz=0; colnnz < colptr[c+1]-colptr[c]; ++colnnz) {
        K->insertGlobalValues (static_cast<GO> (*rptr++ - 1), tuple<GO> (c), tuple (ST (dptr[0], dptr[1])));
        dptr += 2;
      }
    }
  }
  if (MyPID == 0) {
    // Clean up.
    free( dvals );
    free( colptr );
    free( rowind );
  }
  K->fillComplete();
  // cout << *K << endl;

  // Create initial vectors
  RCP<MV> ivec = rcp( new MV(map,blockSize) );
  ivec->randomize ();

  // Create eigenproblem
  RCP<Anasazi::BasicEigenproblem<ST,MV,OP> > problem =
    rcp( new Anasazi::BasicEigenproblem<ST,MV,OP>(K,ivec) );
  //
  // Inform the eigenproblem that the operator K is symmetric
  problem->setHermitian(herm);
  //
  // Set the number of eigenvalues requested
  problem->setNEV( nev );
  //
  // Inform the eigenproblem that you are done passing it information
  bool boolret = problem->setProblem();
  if (boolret != true) {
    if (MyPID == 0) {
      cout << "Anasazi::BasicEigenproblem::SetProblem() returned with error." << endl
           << "End Result: TEST FAILED" << endl;
    }
    return -1;
  }


  // Set verbosity level
  int verbosity = Anasazi::Errors + Anasazi::Warnings + Anasazi::FinalSummary + Anasazi::TimingDetails;
  if (verbose) {
    verbosity += Anasazi::IterationDetails;
  }
  if (debug) {
    verbosity += Anasazi::Debug;
  }



  // Eigensolver parameters
  int numBlocks = 8;
  int maxRestarts = 10;
  //
  // Create parameter list to pass into the solver manager
  Teuchos::ParameterList MyPL;
  MyPL.set( "Verbosity", verbosity );
  MyPL.set( "Which", which );
  MyPL.set( "Block Size", blockSize );
  MyPL.set( "Num Blocks", numBlocks );
  MyPL.set( "Maximum Restarts", maxRestarts );
  MyPL.set( "Convergence Tolerance", tol );
  MyPL.set( "In Situ Restarting", insitu );
  //
  // Create the solver manager
  Anasazi::BlockKrylovSchurSolMgr<ST,MV,OP> MySolverMgr(problem, MyPL);

  // Solve the problem to the specified tolerances or length
  Anasazi::ReturnType returnCode = MySolverMgr.solve();
  success = (returnCode == Anasazi::Converged);

  // Get the eigenvalues and eigenvectors from the eigenproblem
  Anasazi::Eigensolution<ST,MV> sol = problem->getSolution();
  RCP<MV> evecs = sol.Evecs;
  int numev = sol.numVecs;

  if (numev > 0) {
    std::ostringstream os;
    os.setf(std::ios::scientific, std::ios::floatfield);
    os.precision(6);

    // Compute the direct residual
    std::vector<MT> normV( numev );
    Teuchos::SerialDenseMatrix<int,ST> T (numev, numev);
    for (int i=0; i<numev; i++) {
      T(i,i) = ST(sol.Evals[i].realpart,sol.Evals[i].imagpart);
    }
    RCP<MV> Kvecs = MVT::Clone( *evecs, numev );

    OPT::Apply( *K, *evecs, *Kvecs );

    MVT::MvTimesMatAddMv( -ONE, *evecs, T, ONE, *Kvecs );
    MVT::MvNorm( *Kvecs, normV );

    os << "Direct residual norms computed in BlockKrylovSchurComplex_test.exe" << endl
       << std::setw(20) << "Eigenvalue" << std::setw(20) << "Residual  " << endl
       << "----------------------------------------" << endl;
    for (int i=0; i<numev; i++) {
      if ( SCT::magnitude(T(i,i)) != SCT::zero() ) {
        normV[i] = SCT::magnitude(normV[i]/T(i,i));
      }
      os << std::setw(20) << T(i,i) << std::setw(20) << normV[i] << endl;
      success = (normV[i] < tol);
    }
    if (MyPID==0) {
      cout << endl << os.str() << endl;
    }
  }

  if (MyPID==0) {
    if (success)
      cout << "End Result: TEST PASSED" << endl;
    else
      cout << "End Result: TEST FAILED" << endl;
  }

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
#endif // HAVE_TPETRA_COMPLEX_DOUBLE
}
Example #11
0
//
// Test for Tpetra::CrsMatrix::sumIntoGlobalValues(), with nonowned
// rows.  The test creates the CrsMatrix with a static graph, so that
// globalAssemble() uses sumIntoGlobalValues() instead of
// insertGlobalValues() to merge in the incoming matrix entries.  All
// calls to sumIntoGlobalValues() in this test are for nonowned rows,
// and all the calls are correct (that is, the processes that own
// those rows have entries in the corresponding columns, so that
// nonowned fill does not require creating new entries).
//
// mfh 16 Dec 2012: The one-template-argument version breaks explicit
// instantiation.  Ah well.
//
//TEUCHOS_UNIT_TEST_TEMPLATE_1_DECL( CrsMatrix, NonlocalSumInto, CrsMatrixType )
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( CrsMatrix, NonlocalSumInto, LocalOrdinalType, GlobalOrdinalType, ScalarType, NodeType )
{
  using Tpetra::createContigMapWithNode;
  using Tpetra::createNonContigMapWithNode;
  using Tpetra::global_size_t;
  using Tpetra::Map;
  using Teuchos::Array;
  using Teuchos::ArrayView;
  using Teuchos::as;
  using Teuchos::av_const_cast;
  using Teuchos::Comm;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcp_const_cast;
  using Teuchos::OrdinalTraits;
  using Teuchos::outArg;
  using Teuchos::ParameterList;
  using Teuchos::parameterList;
  using Teuchos::reduceAll;
  using Teuchos::ScalarTraits;
  using Teuchos::tuple;
  using Teuchos::TypeNameTraits;
  using std::endl;

#if 0
  // Extract typedefs from the CrsMatrix specialization.
  typedef typename CrsMatrixType::scalar_type scalar_type;
  typedef typename CrsMatrixType::local_ordinal_type local_ordinal_type;
  typedef typename CrsMatrixType::global_ordinal_type global_ordinal_type;
  typedef typename CrsMatrixType::node_type node_type;
#endif // 0

  typedef ScalarType scalar_type;
  typedef LocalOrdinalType local_ordinal_type;
  typedef GlobalOrdinalType global_ordinal_type;
  typedef NodeType node_type;

  // Typedefs derived from the above canonical typedefs.
  typedef ScalarTraits<scalar_type> STS;
  typedef Map<local_ordinal_type, global_ordinal_type, node_type> map_type;

  // Abbreviation typedefs.
  typedef scalar_type ST;
  typedef local_ordinal_type LO;
  typedef global_ordinal_type GO;
  typedef node_type NT;

  typedef Tpetra::CrsMatrix<ST, LO, GO, NT> CrsMatrixType;

  // CrsGraph specialization corresponding to CrsMatrixType (the
  // CrsMatrix specialization).
  typedef Tpetra::CrsGraph<LO, GO, NT, typename CrsMatrixType::mat_solve_type> crs_graph_type;

  ////////////////////////////////////////////////////////////////////
  // HERE BEGINS THE TEST.
  ////////////////////////////////////////////////////////////////////

  const global_size_t INVALID = OrdinalTraits<global_size_t>::invalid();

  // Get the default communicator.
  RCP<const Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform ().getComm ();
  const int numProcs = comm->getSize ();
  const int myRank = comm->getRank ();

  if (myRank == 0) {
    out << "Test with " << numProcs << " process" << (numProcs != 1 ? "es" : "") << endl;
  }

  // This test doesn't make much sense if there is only one MPI
  // process.  We let it pass trivially in that case.
  if (numProcs == 1) {
    out << "Number of processes in world is one; test passes trivially." << endl;
    return;
  }

  // Get a Kokkos Node instance.  It would be nice if we could pass in
  // parameters here, but threads don't matter for this test; it's a
  // test for distributed-memory capabilities.

  if (myRank == 0) {
    out << "Creating Kokkos Node of type " << TypeNameTraits<node_type>::name () << endl;
  }
  RCP<node_type> node;
  {
    ParameterList pl; // Kokkos Node types require a PL inout.
    node = rcp (new node_type (pl));
  }

  // Number of rows in the matrix owned by each process.
  const LO numLocalRows = 10;

  // Number of (global) rows and columns in the matrix.
  const GO numGlobalRows = numLocalRows * numProcs;
  const GO numGlobalCols = numGlobalRows;
  // Prevent compile warning for unused variable.
  // (It's not really "variable" if it's const, but oh well.)
  (void) numGlobalCols;

  if (myRank == 0) {
    out << "Creating contiguous row Map" << endl;
  }

  // Create a contiguous row Map, with numLocalRows rows per process.
  RCP<const map_type> rowMap = createContigMapWithNode<LO, GO, NT> (INVALID, numLocalRows, comm, node);

  // For now, reuse the row Map for the domain and range Maps.  Later,
  // we might want to test using different domain or range Maps.
  RCP<const map_type> domainMap = rowMap;
  RCP<const map_type> rangeMap = rowMap;

  // Min and max row and column index of this process.  Use the row
  // Map for the row and column indices, since we're only inserting
  // indices into the graph for rows that the calling process owns.
  const GO globalMinRow = rowMap->getMinGlobalIndex ();
  const GO globalMaxRow = rowMap->getMaxGlobalIndex ();
  const GO globalMinCol = domainMap->getMinAllGlobalIndex ();
  const GO globalMaxCol = domainMap->getMaxAllGlobalIndex ();

  if (myRank == 0) {
    out << "Creating graph" << endl;
  }

  // Create a numGlobalRows by numGlobalCols graph and set its
  // structure.  Every process sets its diagonal entries (which it
  // owns), and its local (0,0) (if not on the diagonal) and
  // (numLocalRows-1, numLocalCols-1) (if not on the diagonal)
  // entries.  We will use the off-diagonal entries to test
  // modification of nonlocal entries.
  RCP<const crs_graph_type> graph;
  {
    // We have a good upper bound for the number of entries per row, so use static profile.
    RCP<crs_graph_type> nonconstGraph (new crs_graph_type (rowMap, 2, Tpetra::StaticProfile));

    TEUCHOS_TEST_FOR_EXCEPTION(globalMinRow >= globalMaxRow, std::logic_error,
      "This test only works if globalMinRow < globalMaxRow.");

    // Insert all the diagonal entries.
    for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) {
      nonconstGraph->insertGlobalIndices (globalRow, tuple (globalRow));
    }

    // Insert the local (0,0) entry, if not on the diagonal.
    if (globalMinRow > rowMap->getMinAllGlobalIndex ()) {
      nonconstGraph->insertGlobalIndices (globalMinRow, tuple (globalMinCol));
    }

    // Insert the local (numLocalRows-1, numLocalCols-1) entry, if not on the diagonal.
    if (globalMaxRow < rowMap->getMaxAllGlobalIndex ()) {
      nonconstGraph->insertGlobalIndices (globalMaxRow, tuple (globalMaxCol));
    }

    nonconstGraph->fillComplete (domainMap, rangeMap);
    graph = rcp_const_cast<const crs_graph_type> (nonconstGraph);
  }

  // Test whether the graph has the correct structure.
  bool localGraphSuccess = true;
  std::ostringstream graphFailMsg;
  {
    Array<GO> ind (2); // upper bound

    for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) {
      size_t numEntries = 0; // output argument of below line.
      graph->getGlobalRowCopy (globalRow, ind (), numEntries);

      // Revise view based on numEntries.
      ArrayView<GO> indView = ind.view (0, numEntries);

      // Sort the view.
      std::sort (indView.begin (), indView.end ());

      if (globalRow == globalMinRow && globalRow > rowMap->getMinAllGlobalIndex ()) {
        if (numEntries != as<size_t> (2)) {
          localGraphSuccess = false;
          graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl;
        }
        if (numEntries > 0 && indView[0] != globalMinCol) {
          localGraphSuccess = false;
          graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalMinCol = " << globalMinCol << endl;
        }
        if (numEntries > 1 && indView[1] != globalRow) {
          localGraphSuccess = false;
          graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalRow = " << globalRow << endl;
        }
      }
      else if (globalRow == globalMaxRow && globalRow < rowMap->getMaxAllGlobalIndex ()) {
        if (numEntries != as<size_t> (2)) {
          localGraphSuccess = false;
          graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl;
        }
        if (numEntries > 0 && indView[0] != globalRow) {
          localGraphSuccess = false;
          graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl;
        }
        if (numEntries > 1 && indView[1] != globalMaxCol) {
          localGraphSuccess = false;
          graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalMaxCol = " << globalMaxCol << endl;
        }
      }
      else {
        if (numEntries != as<size_t> (1)) {
          localGraphSuccess = false;
          graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl;
        }
        if (numEntries > 0 && indView[0] != globalRow) {
          localGraphSuccess = false;
          graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl;
        }
      }
    }
  }

  // Make sure that all processes successfully created the graph.
  bool globalGraphSuccess = true;
  {
    int globalGraphSuccess_int = 1;
    reduceAll (*comm, Teuchos::REDUCE_MIN, localGraphSuccess ? 1 : 0, outArg (globalGraphSuccess_int));
    globalGraphSuccess = (globalGraphSuccess_int != 0);
  }
  if (! globalGraphSuccess) {
    if (myRank == 0) {
      out << "Graph structure not all correct:" << endl << endl;
    }
    // Print out the failure messages on all processes.
    for (int p = 0; p < numProcs; ++p) {
      if (p == myRank) {
        out << graphFailMsg.str () << endl;
        std::flush (out);
      }
      // Do some barriers to allow output to finish.
      comm->barrier ();
      comm->barrier ();
      comm->barrier ();
    }
  }
  TEUCHOS_TEST_FOR_EXCEPTION(! globalGraphSuccess, std::logic_error, "Graph structure test failed.");

  if (myRank == 0) {
    out << "Creating matrix" << endl;
  }

  // Create the matrix, using the above graph.
  RCP<CrsMatrixType> matrix (new CrsMatrixType (graph));

  if (myRank == 0) {
    out << "Setting all matrix entries to 1" << endl;
  }

  // Set all the owned entries to one.  Later we'll set nonlocal
  // entries' values in a loop.
  matrix->setAllToScalar (STS::one ());

  // Sum into nonowned entries (which nevertheless exist in the
  // matrix, just not on this process) using this process' rank.
  // After global assembly, this should result in those entries having
  // value equal to one plus the rank of the process that wrote to
  // them.  That value happens to be myRank for the (0,0) local entry
  // (except when myRank==0, in which case the value is 1), and
  // myRank+2 for the (numLocalRows-1,numLocalCols-1) local entry
  // (except when myRank==numProcs-1, in which case the value is 1).
  if (globalMinRow > rowMap->getMinAllGlobalIndex ()) {
    // Write to the (numLocalRows-1,numLocalCols-1) local entry of the previous process.
    matrix->sumIntoGlobalValues (globalMinRow-1, tuple (globalMaxCol), tuple (as<ST> (myRank)));
  }
  if (globalMaxRow < rowMap->getMaxAllGlobalIndex ()) {
    // Write to the (0,0) local entry of the next process.
    matrix->sumIntoGlobalValues (globalMaxRow+1, tuple (globalMinCol), tuple (as<ST> (myRank)));
  }

  if (myRank == 0) {
    out << "Calling fillComplete on the matrix" << endl;
  }
  matrix->fillComplete (domainMap, rangeMap);

  if (myRank == 0) {
    out << "Testing the matrix values" << endl;
  }

  // Test whether the entries have their correct values.
  bool localSuccess = true;
  std::ostringstream failMsg;
  {
    Array<GO> ind (2); // upper bound
    Array<ST> val (2); // upper bound

    for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) {
      size_t numEntries = 0; // output argument of below line.
      matrix->getGlobalRowCopy (globalRow, ind (), val (), numEntries);

      // Revise views based on numEntries.
      ArrayView<GO> indView = ind.view (0, numEntries);
      ArrayView<ST> valView = val.view (0, numEntries);

      // Sort the views jointly by column index.
      Tpetra::sort2 (indView.begin (), indView.end (), valView.begin ());

      if (globalRow == globalMinRow && globalRow > rowMap->getMinAllGlobalIndex ()) {
        if (numEntries != as<size_t> (2)) {
          localSuccess = false;
          failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl;
        }
        if (numEntries > 0 && indView[0] != globalMinCol) {
          localSuccess = false;
          failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalMinCol = " << globalMinCol << endl;
        }
        if (numEntries > 1 && indView[1] != globalRow) {
          localSuccess = false;
          failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalRow = " << globalRow << endl;
        }
        if (numEntries > 0 && valView[0] != as<ST> (myRank)) {
          localSuccess = false;
          failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != myRank = " << myRank << endl;
        }
        if (numEntries > 1 && valView[1] != STS::one ()) {
          localSuccess = false;
          failMsg << "Proc " << 1 << ": globalRow = " << globalRow << ": valView[1] = " << valView[1] << " != 1" << endl;
        }
      }
      else if (globalRow == globalMaxRow && globalRow < rowMap->getMaxAllGlobalIndex ()) {
        if (numEntries != as<size_t> (2)) {
          localSuccess = false;
          failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl;
        }
        if (numEntries > 0 && indView[0] != globalRow) {
          localSuccess = false;
          failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl;
        }
        if (numEntries > 1 && indView[1] != globalMaxCol) {
          localSuccess = false;
          failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalMaxCol = " << globalMaxCol << endl;
        }
        if (numEntries > 0 && valView[0] != STS::one ()) {
          localSuccess = false;
          failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != 1" << endl;
        }
        if (numEntries > 1 && valView[1] != as<ST> (myRank+2)) {
          localSuccess = false;
          failMsg << "Proc " << 1 << ": globalRow = " << globalRow << ": valView[1] = " << valView[1] << " != myRank+2 = " << (myRank+2) << endl;
        }
      }
      else {
        if (numEntries != as<size_t> (1)) {
          localSuccess = false;
          failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl;
        }
        if (numEntries > 0 && indView[0] != globalRow) {
          localSuccess = false;
          failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl;
        }
        if (numEntries > 0 && valView[0] != STS::one ()) {
          localSuccess = false;
          failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != 1" << endl;
        }
      }
    }
  }

  bool globalSuccess = true;
  {
    int globalSuccess_int = 1;
    reduceAll (*comm, Teuchos::REDUCE_MIN, localSuccess ? 1 : 0, outArg (globalSuccess_int));
    globalSuccess = (globalSuccess_int != 0);
  }

  if (! globalSuccess) {
    // Print out the failure messages on all processes.
    for (int p = 0; p < numProcs; ++p) {
      if (p == myRank) {
        out << failMsg.str () << endl;
        out << "Proc " << myRank << ": localSuccess = " << localSuccess << ", globalSuccess = " << globalSuccess << endl;
        //      std::flush (out);
      }
      // Do some barriers to allow output to finish.
      comm->barrier ();
      comm->barrier ();
      comm->barrier ();
    }
  }

  TEST_EQUALITY_CONST(globalSuccess, true);
}
//
// Test for Tpetra::CrsMatrix::sumIntoGlobalValues(), with nonowned
// rows.  This test is like CrsMatrix_NonlocalSumInto.cpp, except that
// it attempts to sum into remote entries that don't exist on the
// process that owns them.  Currently, CrsMatrix silently ignores
// these entries.  (This is how CrsMatrix implements Import and Export
// when the target matrix has a fixed column Map.  Data are
// redistributed between the two row Maps, and "filtered" by the
// target matrix's column Map.)  This unit test verifies that behavior
// by ensuring the following:
//
// 1. fillComplete() (actually globalAssemble()) does not throw an
//    exception when the incoming entries don't exist on the process
//    that owns their rows.
//
// 2. The ignored entries are actually ignored.  They must change
//    neither the structure nor the values of the matrix.
//
// mfh 16 Dec 2012: The one-template-argument version breaks explicit
// instantiation.  Ah well.
//
//TEUCHOS_UNIT_TEST_TEMPLATE_1_DECL( CrsMatrix, NonlocalSumInto_Ignore, CrsMatrixType )
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( CrsMatrix, NonlocalSumInto_Ignore, LocalOrdinalType, GlobalOrdinalType, ScalarType, NodeType )
{
  using Tpetra::createContigMapWithNode;
  using Tpetra::createNonContigMapWithNode;
  using Tpetra::global_size_t;
  using Tpetra::Map;
  using Teuchos::Array;
  using Teuchos::ArrayView;
  using Teuchos::as;
  using Teuchos::av_const_cast;
  using Teuchos::Comm;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcp_const_cast;
  using Teuchos::OrdinalTraits;
  using Teuchos::outArg;
  using Teuchos::ParameterList;
  using Teuchos::parameterList;
  using Teuchos::reduceAll;
  using Teuchos::ScalarTraits;
  using Teuchos::tuple;
  using Teuchos::TypeNameTraits;
  using std::endl;

#if 0
  // Extract typedefs from the CrsMatrix specialization.
  typedef typename CrsMatrixType::scalar_type scalar_type;
  typedef typename CrsMatrixType::local_ordinal_type local_ordinal_type;
  typedef typename CrsMatrixType::global_ordinal_type global_ordinal_type;
  typedef typename CrsMatrixType::node_type node_type;
#endif // 0

  typedef ScalarType scalar_type;
  typedef LocalOrdinalType local_ordinal_type;
  typedef GlobalOrdinalType global_ordinal_type;
  typedef NodeType node_type;

  // Typedefs derived from the above canonical typedefs.
  typedef ScalarTraits<scalar_type> STS;
  typedef Map<local_ordinal_type, global_ordinal_type, node_type> map_type;

  // Abbreviation typedefs.
  typedef scalar_type ST;
  typedef local_ordinal_type LO;
  typedef global_ordinal_type GO;
  typedef node_type NT;

  typedef Tpetra::CrsMatrix<ST, LO, GO, NT> CrsMatrixType;

  // CrsGraph specialization corresponding to CrsMatrixType (the
  // CrsMatrix specialization).
  typedef Tpetra::CrsGraph<LO, GO, NT, typename CrsMatrixType::mat_solve_type> crs_graph_type;

  ////////////////////////////////////////////////////////////////////
  // HERE BEGINS THE TEST.
  ////////////////////////////////////////////////////////////////////

  const global_size_t INVALID = OrdinalTraits<global_size_t>::invalid();

  // Get the default communicator.
  RCP<const Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform ().getComm ();
  const int numProcs = comm->getSize ();
  const int myRank = comm->getRank ();

  if (myRank == 0) {
    out << "Test with " << numProcs << " process" << (numProcs != 1 ? "es" : "") << endl;
  }

  // This test doesn't make much sense if there is only one MPI
  // process.  We let it pass trivially in that case.
  if (numProcs == 1) {
    out << "Number of processes in world is one; test passes trivially." << endl;
    return;
  }

  // Get a Kokkos Node instance.  It would be nice if we could pass in
  // parameters here, but threads don't matter for this test; it's a
  // test for distributed-memory capabilities.

  if (myRank == 0) {
    out << "Creating Kokkos Node of type " << TypeNameTraits<node_type>::name () << endl;
  }
  RCP<node_type> node;
  {
    ParameterList pl; // Kokkos Node types require a PL inout.
    node = rcp (new node_type (pl));
  }

  // Number of rows in the matrix owned by each process.
  const LO numLocalRows = 10;

  //CrT: 4Feb14: the void trick does not seem to work, I get warnings
  // Number of (global) rows and columns in the matrix.
  //const GO numGlobalRows = numLocalRows * numProcs;
  //const GO numGlobalCols = numGlobalRows;
  // Prevent compile warning for unused variable.
  // (It's not really "variable" if it's const, but oh well.)
  //(void) numGlobalCols;

  if (myRank == 0) {
    out << "Creating contiguous row Map" << endl;
  }

  // Create a contiguous row Map, with numLocalRows rows per process.
  RCP<const map_type> rowMap = createContigMapWithNode<LO, GO, NT> (INVALID, numLocalRows, comm, node);

  // For now, reuse the row Map for the domain and range Maps.  Later,
  // we might want to test using different domain or range Maps.
  RCP<const map_type> domainMap = rowMap;
  RCP<const map_type> rangeMap = rowMap;

  // Min and max row and column index of this process.  Use the row
  // Map for the row and column indices, since we're only inserting
  // indices into the graph for rows that the calling process owns.
  const GO globalMinRow = rowMap->getMinGlobalIndex ();
  const GO globalMaxRow = rowMap->getMaxGlobalIndex ();
  const GO globalMinCol = domainMap->getMinAllGlobalIndex ();
  const GO globalMaxCol = domainMap->getMaxAllGlobalIndex ();

  if (myRank == 0) {
    out << "Creating graph" << endl;
  }

  // Create a numGlobalRows by numGlobalCols graph and set its
  // structure.  Every process sets its diagonal entries (which it
  // owns).  Unlike in the CrsMatrix_NonlocalSumInto.cpp test, we
  // don't set any other entries.  As a result, the later calls to
  // sumIntoGlobalValues() for nonowned rows should fail.
  RCP<const crs_graph_type> graph;
  {
    // We have a good upper bound for the number of entries per row,
    // so use static profile.  Leave the upper bound as 2 (just as it
    // is in the CrsMatrix_NonlocalSumInto.cpp test) so that there
    // would actually be room for the incoming entries from remote
    // calls to sumIntoGlobalValues().
    RCP<crs_graph_type> nonconstGraph (new crs_graph_type (rowMap, 2, Tpetra::StaticProfile));

    TEUCHOS_TEST_FOR_EXCEPTION(globalMinRow >= globalMaxRow, std::logic_error,
      "This test only works if globalMinRow < globalMaxRow.");

    // Insert all the diagonal entries, and only the diagonal entries
    // (unlike in the other test).
    for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) {
      nonconstGraph->insertGlobalIndices (globalRow, tuple (globalRow));
    }

    nonconstGraph->fillComplete (domainMap, rangeMap);
    graph = rcp_const_cast<const crs_graph_type> (nonconstGraph);
  }

  // Test whether the graph has the correct structure.
  bool localGraphSuccess = true;
  std::ostringstream graphFailMsg;
  {
    Array<GO> ind (2); // upper bound

    for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) {
      size_t numEntries = 0; // output argument of below line.
      graph->getGlobalRowCopy (globalRow, ind (), numEntries);

      // Revise view based on numEntries.
      ArrayView<GO> indView = ind.view (0, numEntries);

      // Sort the view.
      std::sort (indView.begin (), indView.end ());

      if (numEntries != as<size_t> (1)) {
        localGraphSuccess = false;
        graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl;
      }
      if (numEntries > 0 && indView[0] != globalRow) {
        localGraphSuccess = false;
        graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl;
      }
    }
  }

  // Make sure that all processes successfully created the graph.
  bool globalGraphSuccess = true;
  {
    int globalGraphSuccess_int = 1;
    reduceAll (*comm, Teuchos::REDUCE_MIN, localGraphSuccess ? 1 : 0, outArg (globalGraphSuccess_int));
    globalGraphSuccess = (globalGraphSuccess_int != 0);
  }
  if (! globalGraphSuccess) {
    if (myRank == 0) {
      out << "Graph structure not all correct:" << endl << endl;
    }
    // Print out the failure messages on all processes.
    for (int p = 0; p < numProcs; ++p) {
      if (p == myRank) {
        out << graphFailMsg.str () << endl;
        std::flush (out);
      }
      // Do some barriers to allow output to finish.
      comm->barrier ();
      comm->barrier ();
      comm->barrier ();
    }
  }
  TEUCHOS_TEST_FOR_EXCEPTION(! globalGraphSuccess, std::logic_error, "Graph structure test failed.");

  if (myRank == 0) {
    out << "Creating matrix" << endl;
  }

  // Create the matrix, using the above graph.
  RCP<CrsMatrixType> matrix (new CrsMatrixType (graph));

  if (myRank == 0) {
    out << "Setting all matrix entries to 1" << endl;
  }

  // Set all the owned entries to one.  Later we'll set nonlocal
  // entries' values in a loop.
  matrix->setAllToScalar (STS::one ());

  // Attempt to sum into nonowned entries (which nevertheless exist in
  // the matrix, just not on this process) using this process' rank.
  // The sumIntoGlobalValues() calls will record the data, but the
  // globalAssemble() method (called by fillComplete()) will silently
  // ignore entries whose columns are not in the column Map.  The
  // comment at the top of this test explains why this behavior is
  // reasonable.
  //
  // mfh 15,16 Dec 2012: Silently ignoring columns not in the column
  // Map has implications for the implementation of
  // sumIntoGlobalValues() for nonowned rows.  In particular, a
  // version of Map's getRemoteIDList() that uses one-sided
  // communication could invoke MPI_Get to figure out what the remote
  // process owns, without asking it or otherwise requiring
  // synchronization.  Thus, sumIntoGlobalValues() could throw
  // immediately on the calling process, rather than deferring the
  // exception to the remote process in globalAssemble().  If we
  // switch to that implementation, this unit test must be changed
  // accordingly.
  if (globalMinRow > rowMap->getMinAllGlobalIndex ()) {
    // Attempt to write to the (numLocalRows-1,numLocalCols-1) local entry of the previous process.
    matrix->sumIntoGlobalValues (globalMinRow-1, tuple (globalMaxCol), tuple (as<ST> (myRank)));
  }
  if (globalMaxRow < rowMap->getMaxAllGlobalIndex ()) {
    // Attempt to write to the (0,0) local entry of the next process.
    matrix->sumIntoGlobalValues (globalMaxRow+1, tuple (globalMinCol), tuple (as<ST> (myRank)));
  }

  if (myRank == 0) {
    out << "Calling fillComplete on the matrix" << endl;
  }
  TEST_NOTHROW(matrix->fillComplete (domainMap, rangeMap)); // Tpetra::Details::InvalidGlobalIndex<GO>

  // mfh 15 Dec 2012: We currently don't make promises about the state
  // of the matrix if fillComplete() throws.  Later, we might like to
  // improve the exception guarantees of fillComplete().  In that
  // case, the commented-out code below should be allowed to run.

  if (myRank == 0) {
    out << "Testing the matrix values" << endl;
  }

  // Test whether the entries have their correct values.
  bool localSuccess = true;
  std::ostringstream failMsg;
  {
    Array<GO> ind (2); // upper bound
    Array<ST> val (2); // upper bound

    for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) {
      size_t numEntries = 0; // output argument of below line.
      matrix->getGlobalRowCopy (globalRow, ind (), val (), numEntries);

      // Revise views based on numEntries.
      ArrayView<GO> indView = ind.view (0, numEntries);
      ArrayView<ST> valView = val.view (0, numEntries);

      // Sort the views jointly by column index.
      Tpetra::sort2 (indView.begin (), indView.end (), valView.begin ());

      if (numEntries != as<size_t> (1)) {
        localSuccess = false;
        failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl;
      }
      if (numEntries > 0 && indView[0] != globalRow) {
        localSuccess = false;
        failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl;
      }
      if (numEntries > 0 && valView[0] != STS::one ()) {
        localSuccess = false;
        failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != 1" << endl;
      }
    }
  }

  bool globalSuccess = true;
  {
    int globalSuccess_int = 1;
    reduceAll (*comm, Teuchos::REDUCE_MIN, localSuccess ? 1 : 0, outArg (globalSuccess_int));
    globalSuccess = (globalSuccess_int != 0);
  }

  if (! globalSuccess) {
    // Print out the failure messages on all processes.
    for (int p = 0; p < numProcs; ++p) {
      if (p == myRank) {
        out << failMsg.str () << endl;
        out << "Proc " << myRank << ": localSuccess = " << localSuccess << ", globalSuccess = " << globalSuccess << endl;
        //      std::flush (out);
      }
      // Do some barriers to allow output to finish.
      comm->barrier ();
      comm->barrier ();
      comm->barrier ();
    }
  }

  TEST_EQUALITY_CONST(globalSuccess, true);
}