//! Get the global index corresponding to the given local index.
 KOKKOS_INLINE_FUNCTION GlobalOrdinal
 getGlobalElement (const LocalOrdinal localIndex) const
 {
   if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
     return ::Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
   }
   if (isContiguous ()) {
     return getMinGlobalIndex () + localIndex;
   }
   else {
     return lgMap_(localIndex);
   }
 }
int main(int argc, char *argv[])
{
  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  // 
  // Get the default communicator and node
  //
  auto &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
  auto comm = platform.getComm();
  auto node = platform.getNode();
  const int myImageID = comm->getRank();
  const int numImages = comm->getSize();
  const bool verbose = (myImageID==0);

  // 
  // Say hello, print some communicator info
  //
  if (verbose) {
    std::cout << "\n" << Tpetra::version() << std::endl;
    std::cout << "Comm info: " << *comm;
    std::cout << "Node type: " << Teuchos::typeName(*node) << std::endl;
    std::cout << std::endl;
  }

  // 
  // Create a simple map for domain and range
  // 
  Tpetra::global_size_t numGlobalRows = 1000*numImages;
  auto map = Tpetra::createUniformContigMapWithNode<int,int>(numGlobalRows, comm, node);
  auto x = Tpetra::createVector<double>(map),
       y = Tpetra::createVector<double>(map);

  // Create a simple diagonal operator using lambda function
  auto fTwoOp = Tpetra::RTI::binaryOp<double>( [](double /*y*/, double x) { return 2.0 * x; } , map );
  // y = 3*fTwoOp*x + 2*y = 3*2*1 + 2*1 = 8
  x->putScalar(1.0);
  y->putScalar(1.0);
  fTwoOp->apply( *x, *y, Teuchos::NO_TRANS, 3.0, 2.0 );
  // check that y == eights
  double norm = y->norm1();
  if (verbose) {
    std::cout << "Tpetra::RTI::binaryOp" << std::endl
              << "norm(y) result == " << std::setprecision(2) << std::scientific << norm 
              << ", expected value is " << numGlobalRows * 8.0 << std::endl;
  }

  //
  // Create a finite-difference stencil using a Kokkos kernel and non-trivial maps
  //
  decltype(map) colmap;
  if (numImages > 1) {
    Teuchos::Array<int>           colElements;
    Teuchos::ArrayView<const int> rowElements = map->getNodeElementList();
    // This isn't the most efficient Map/Import layout, but it makes for a very straight-forward kernel
    if (myImageID != 0) colElements.push_back( map->getMinGlobalIndex() - 1 );
    colElements.insert(colElements.end(), rowElements.begin(), rowElements.end());
    if (myImageID != numImages-1) colElements.push_back( map->getMaxGlobalIndex() + 1 );
    colmap = Tpetra::createNonContigMapWithNode<int,int>(colElements(), comm, node);
  }
  else {
    colmap = map;
  }
  auto importer = createImport(map,colmap);
  // Finite-difference kernel = tridiag(-1, 2, -1)
  FDStencil<double> kern(myImageID, numImages, map->getNodeNumElements(), -1.0, 2.0, -1.0);
  auto FDStencilOp = Tpetra::RTI::kernelOp<double>( kern, map, map, importer );
  // x = ones(), FD(x) = [1 zeros() 1]
  FDStencilOp->apply( *x, *y );
  norm = y->norm1();
  if (verbose) {
    std::cout << std::endl
              << "TpetraExamples::FDStencil" << std::endl
              << "norm(y) result == " << std::setprecision(2) << std::scientific << norm 
              << ", expected value is " << 2.0 << std::endl;
  }

  std::cout << "\nEnd Result: TEST PASSED" << std::endl;
  return 0;
}