//! Get the global index corresponding to the given local index.
 KOKKOS_INLINE_FUNCTION GlobalOrdinal
 getGlobalElement (const LocalOrdinal localIndex) const
 {
   if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
     return ::Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
   }
   if (isContiguous ()) {
     return getMinGlobalIndex () + localIndex;
   }
   else {
     return lgMap_(localIndex);
   }
 }
  TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicScalarWithoutFiltering, Scalar, LocalOrdinal, GlobalOrdinal, Node)
  {
#   include "MueLu_UseShortNames.hpp"
    MUELU_TESTING_SET_OSTREAM;
    MUELU_TESTING_LIMIT_SCOPE(Scalar,GlobalOrdinal,NO);
    out << "version: " << MueLu::Version() << std::endl;

    RCP<const Teuchos::Comm<int> > comm = Parameters::getDefaultComm();

    Level fineLevel;
    TestHelpers_kokkos::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(fineLevel);

    RCP<Matrix> A = TestHelpers_kokkos::TestFactory<SC,LO,GO,NO>::Build1DPoisson(36);
    fineLevel.Set("A", A);

    CoalesceDropFactory_kokkos dropFact;
    fineLevel.Request("Graph",       &dropFact);
    fineLevel.Request("DofsPerNode", &dropFact);
    fineLevel.Request("Filtering",   &dropFact);

    dropFact.Build(fineLevel);

    auto graph         = fineLevel.Get<RCP<LWGraph_kokkos> >("Graph",       &dropFact);
    auto myDofsPerNode = fineLevel.Get<LO>                  ("DofsPerNode", &dropFact);
    auto filtering     = fineLevel.Get<bool>                ("Filtering",   &dropFact);

    TEST_EQUALITY(as<int>(myDofsPerNode) == 1, true);
    TEST_EQUALITY(filtering,                   false);

    bool bCorrectGraph = false;
    if (comm->getSize() == 1) {
      auto v0 = graph->getNeighborVertices(0);
      auto v1 = graph->getNeighborVertices(1);
      auto v2 = graph->getNeighborVertices(2);
      if (v0.size() == 2 && ((v0(0) == 0 && v0(1) == 1) || (v0(0) == 1 && v0(1) == 0)) &&
          v1.size() == 3 && v2.size() == 3)
        bCorrectGraph = true;
    } else {
      if (comm->getRank() == 0 ) {
        if (graph->getNeighborVertices(0).size() == 2)
          bCorrectGraph = true;

      } else {
        if (graph->getNeighborVertices(0).size() == 3)
          bCorrectGraph = true;
      }
    }
    TEST_EQUALITY(bCorrectGraph, true);

    auto myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping!
    auto myDomainMap = graph->GetDomainMap();

    TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(),  35);
    TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(),  0);
    TEST_EQUALITY(myImportMap->getMinLocalIndex(),      0);
    TEST_EQUALITY(myImportMap->getGlobalNumElements(),  as<size_t>(36 + (comm->getSize()-1)*2));

    TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(),  35);
    TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(),  0);
    TEST_EQUALITY(myDomainMap->getMinLocalIndex(),      0);
    TEST_EQUALITY(myDomainMap->getGlobalNumElements(),  36);
  }
void
SDirichlet<PHAL::AlbanyTraits::Jacobian, Traits>::evaluateFields(
    typename Traits::EvalData dirichlet_workset)
{
  // NOTE: you may be tempted to const_cast away the const here. However,
  //       consider the case where x is a Thyra::TpetraVector object. The
  //       actual Tpetra_Vector is stored as a Teuchos::ConstNonconstObjectContainer,
  //       which (most likely) happens to be created from a const RCP, and therefore
  //       when calling getTpetraVector (from Thyra::TpetraVector), the container
  //       will throw.
  //       Instead, keep the const correctness until the very last moment.
  Teuchos::RCP<const Thyra_Vector> x = dirichlet_workset.x;
  Teuchos::RCP<Thyra_Vector> f = dirichlet_workset.f;

  // TODO: abstract away the tpetra interface
  Teuchos::RCP<Tpetra_CrsMatrix> J = Albany::getTpetraMatrix(dirichlet_workset.Jac);

  auto row_map = J->getRowMap();
  auto col_map = J->getColMap();
  // we make this assumption, which lets us use both local row and column
  // indices into a single is_dbc vector
  ALBANY_ASSERT(col_map->isLocallyFitted(*row_map));

  auto& ns_nodes = dirichlet_workset.nodeSets->find(this->nodeSetID)->second;

  bool const fill_residual = f != Teuchos::null;

  auto f_view = fill_residual ? Albany::getNonconstLocalData(f) : Teuchos::null;
  auto x_view = fill_residual ? Teuchos::arcp_const_cast<ST>(Albany::getLocalData(x)) : Teuchos::null;

  Teuchos::Array<Tpetra_GO> global_index(1);

  Teuchos::Array<LO> index(1);

  Teuchos::Array<ST> entry(1);

  Teuchos::Array<ST> entries;

  Teuchos::Array<LO> indices;

  using IntVec = Tpetra::Vector<int, Tpetra_LO, Tpetra_GO, KokkosNode>;
  using Import = Tpetra::Import<Tpetra_LO, Tpetra_GO, KokkosNode>;
  Teuchos::RCP<const Import> import;

  auto domain_map = row_map;  // we are assuming this!

  // in theory we should use the importer from the CRS graph, although
  // I saw a segfault in one of the tests when doing this...
  // if (J->getCrsGraph()->isFillComplete()) {
  //  import = J->getCrsGraph()->getImporter();
  //} else {
  // this construction is expensive!
  import = Teuchos::rcp(new Import(domain_map, col_map));
  //}

  IntVec row_is_dbc(row_map);
  IntVec col_is_dbc(col_map);

  int const spatial_dimension = dirichlet_workset.spatial_dimension_;

#if defined(ALBANY_LCM)
  auto const& fixed_dofs = dirichlet_workset.fixed_dofs_;
#endif

  row_is_dbc.template modify<Kokkos::HostSpace>();
  {
    auto row_is_dbc_data =
        row_is_dbc.template getLocalView<Kokkos::HostSpace>();
    ALBANY_ASSERT(row_is_dbc_data.extent(1) == 1);
#if defined(ALBANY_LCM)
    if (dirichlet_workset.is_schwarz_bc_ == false) {  // regular SDBC
#endif
      for (size_t ns_node = 0; ns_node < ns_nodes.size(); ns_node++) {
        auto dof                = ns_nodes[ns_node][this->offset];
        row_is_dbc_data(dof, 0) = 1;
      }
#if defined(ALBANY_LCM)
    } else {  // special case for Schwarz SDBC
      for (size_t ns_node = 0; ns_node < ns_nodes.size(); ns_node++) {
        for (int offset = 0; offset < spatial_dimension; ++offset) {
          auto dof = ns_nodes[ns_node][offset];
          // If this DOF already has a DBC, skip it.
          if (fixed_dofs.find(dof) != fixed_dofs.end()) continue;
          row_is_dbc_data(dof, 0) = 1;
        }
      }
    }
#endif
  }
  col_is_dbc.doImport(row_is_dbc, *import, Tpetra::ADD);
  auto col_is_dbc_data = col_is_dbc.template getLocalView<Kokkos::HostSpace>();

  size_t const num_local_rows = J->getNodeNumRows();
  auto         min_local_row  = row_map->getMinLocalIndex();
  auto         max_local_row  = row_map->getMaxLocalIndex();
  for (auto local_row = min_local_row; local_row <= max_local_row;
       ++local_row) {
    auto num_row_entries = J->getNumEntriesInLocalRow(local_row);

    entries.resize(num_row_entries);
    indices.resize(num_row_entries);

    J->getLocalRowCopy(local_row, indices(), entries(), num_row_entries);

    auto row_is_dbc = col_is_dbc_data(local_row, 0) > 0;

    if (row_is_dbc && fill_residual == true) {
      f_view[local_row] = 0.0;
      x_view[local_row] = this->value.val();
    }
    

    for (size_t row_entry = 0; row_entry < num_row_entries; ++row_entry) {
      auto local_col         = indices[row_entry];
      auto is_diagonal_entry = local_col == local_row;
      //IKT, 4/5/18: scale diagonal entries by provided scaling 
      if (is_diagonal_entry && row_is_dbc) {
        entries[row_entry] *= scale;   
      }
      if (is_diagonal_entry) continue;
      ALBANY_ASSERT(local_col >= col_map->getMinLocalIndex());
      ALBANY_ASSERT(local_col <= col_map->getMaxLocalIndex());
      auto col_is_dbc = col_is_dbc_data(local_col, 0) > 0;
      if (row_is_dbc || col_is_dbc) {
        entries[row_entry] = 0.0;
      }
    }
    J->replaceLocalValues(local_row, indices(), entries());
  }
  return;
}
  TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicBlockWithFiltering, Scalar, LocalOrdinal, GlobalOrdinal, Node)
  {
#   include "MueLu_UseShortNames.hpp"
    MUELU_TESTING_SET_OSTREAM;
    MUELU_TESTING_LIMIT_SCOPE(Scalar,GlobalOrdinal,NO);
    out << "version: " << MueLu::Version() << std::endl;

    RCP<const Teuchos::Comm<int> > comm = Parameters::getDefaultComm();
    Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib();

    Level fineLevel;
    TestHelpers_kokkos::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(fineLevel);

    auto dofMap = MapFactory::Build(lib, 3*comm->getSize(), 0, comm);
    auto mtx    = TestHelpers_kokkos::TestFactory<SC,LO,GO,NO>::BuildTridiag(dofMap, 2.0, -1.0, 0.00001);

    mtx->SetFixedBlockSize(3, 0);
    fineLevel.Set("A", mtx);

    CoalesceDropFactory_kokkos dropFact = CoalesceDropFactory_kokkos();
    dropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0));

    fineLevel.Request("Graph",       &dropFact);
    fineLevel.Request("DofsPerNode", &dropFact);
    fineLevel.Request("Filtering",   &dropFact);

    dropFact.Build(fineLevel);

    auto graph         = fineLevel.Get<RCP<LWGraph_kokkos> >("Graph",       &dropFact);
    auto myDofsPerNode = fineLevel.Get<LO>                  ("DofsPerNode", &dropFact);
    auto filtering     = fineLevel.Get<bool>                ("Filtering",   &dropFact);

    TEST_EQUALITY(as<int>(myDofsPerNode) == 3, true);
    TEST_EQUALITY(filtering,                            true);
    TEST_EQUALITY(as<int>(graph->GetDomainMap()->getGlobalNumElements()) == comm->getSize(), true);

    TEST_EQUALITY(graph->getNeighborVertices(0).size(), 1);

    auto myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping!
    auto myDomainMap = graph->GetDomainMap();

    TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), comm->getSize()-1);
    TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0);
    TEST_EQUALITY(myImportMap->getMinLocalIndex(),     0);
    TEST_EQUALITY(myImportMap->getGlobalNumElements(), as<size_t>(comm->getSize()+2*(comm->getSize()-1)));
    if (comm->getSize() > 1) {
      size_t numLocalRowMapElts = graph->GetNodeNumVertices();
      size_t numLocalImportElts = myImportMap->getNodeNumElements();
      if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) {
        TEST_EQUALITY(numLocalImportElts, numLocalRowMapElts+1);
      } else {
        TEST_EQUALITY(numLocalImportElts, numLocalRowMapElts+2);
      }
    }

    TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), comm->getSize()-1);
    TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0);
    TEST_EQUALITY(myDomainMap->getMaxLocalIndex(),     0);
    TEST_EQUALITY(myDomainMap->getMinLocalIndex(),     0);
    TEST_EQUALITY(myDomainMap->getGlobalNumElements(), as<size_t>(comm->getSize()));
    TEST_EQUALITY(myDomainMap->getNodeNumElements(),   1);
  }