/// \brief Return an optimized reordering of the given column Map. /// Optionally, recompute an Import from the input domain Map to /// the new column Map. /// \tparam MapType A specialization of Map. /// /// See the documentation of the free function /// makeOptimizedColMapAndImport(). /// /// \param errStream [out] Output stream for human-readable error /// reporting. This is local to the calling process and may /// differ on different processes. /// \param lclErr [out] On output: true if anything went wrong on /// the calling process. This value is local to the calling /// process and may differ on different processes. /// \param domMap [in] Domain Map of a CrsGraph or CrsMatrix. /// \param colMap [in] <i>Original</i> column Map of the same /// CrsGraph or CrsMatrix as \c domMap. /// \param oldImport [in] Optional pointer to the "original /// Import: an Import from \c domMap to \c colMap. This is not /// required, but if you supply this, this function may use it /// to avoid some communication and/or work when setting up the /// new Import object. This function will <i>only</i> look at /// this pointer if \c makeImport is true. /// \param makeImport [in] Whether to make and return an Import from /// the input domain Map to the new column Map. /// /// \return The possibly reordered column Map \c newColMap, and the /// corresponding Import from \c domMap to \c newColMap. The /// latter is nonnull if and only if \c makeImport is true. /// /// \pre \c domMap and \c colMap must have the same or congruent /// communicators. /// \pre On all calling processes, the indices in \c colMap must be /// a subset of the indices in \c domMap. static std::pair<map_type, Teuchos::RCP<import_type> > make (std::ostream& errStream, bool& lclErr, const map_type& domMap, const map_type& colMap, const import_type* oldImport, const bool makeImport) { using Teuchos::Array; using Teuchos::ArrayView; using Teuchos::RCP; using Teuchos::rcp; using std::endl; typedef local_ordinal_type LO; typedef global_ordinal_type GO; const char prefix[] = "Tpetra::makeOptimizedColMapAndImport: "; std::ostream& err = errStream; (void) oldImport; // We don't currently use this argument. RCP<const Teuchos::Comm<int> > comm = colMap.getComm (); const LO colMapMinLid = colMap.getMinLocalIndex (); const LO colMapMaxLid = colMap.getMaxLocalIndex (); // Count the numbers of GIDs in colMap that are in and not in // domMap on the calling process. Check for zero indices on the // calling process first, because if it's true, then we shouldn't // trust [getMinLocalIndex(), getMaxLocalIndex()] to return a // correct range. LO numOwnedGids = 0; LO numRemoteGids = 0; if (colMap.getNodeNumElements () != 0) { for (LO colMapLid = colMapMinLid; colMapLid <= colMapMaxLid; ++colMapLid) { const GO colMapGid = colMap.getGlobalElement (colMapLid); if (domMap.isNodeLocalElement (colMapGid)) { ++numOwnedGids; } else { ++numRemoteGids; } } } // Put all colMap GIDs on the calling process in a single array. // Owned GIDs go in front, and remote GIDs at the end. Array<GO> allGids (numOwnedGids + numRemoteGids); ArrayView<GO> ownedGids = allGids.view (0, numOwnedGids); ArrayView<GO> remoteGids = allGids.view (numOwnedGids, numRemoteGids); // Fill ownedGids and remoteGids (and therefore allGids). We use // two loops, one to count (above) and one to fill (here), in // order to avoid dynamic memory allocation during the loop (in // this case, lots of calls to push_back()). That will simplify // use of Kokkos to parallelize these loops later. LO ownedPos = 0; LO remotePos = 0; if (colMap.getNodeNumElements () != 0) { for (LO colMapLid = colMapMinLid; colMapLid <= colMapMaxLid; ++colMapLid) { const GO colMapGid = colMap.getGlobalElement (colMapLid); if (domMap.isNodeLocalElement (colMapGid)) { ownedGids[ownedPos++] = colMapGid; } else { remoteGids[remotePos++] = colMapGid; } } } // If, for some reason, the running count doesn't match the // orignal count, fill in any remaining GID spots with an // obviously invalid value. We don't want to stop yet, because // other processes might not have noticed this error; Map // construction is a collective, so we can't stop now. if (ownedPos != numOwnedGids) { lclErr = true; err << prefix << "On Process " << comm->getRank () << ", ownedPos = " << ownedPos << " != numOwnedGids = " << numOwnedGids << endl; for (LO colMapLid = ownedPos; colMapLid < numOwnedGids; ++colMapLid) { ownedGids[colMapLid] = Teuchos::OrdinalTraits<GO>::invalid (); } } if (remotePos != numRemoteGids) { lclErr = true; err << prefix << "On Process " << comm->getRank () << ", remotePos = " << remotePos << " != numRemoteGids = " << numRemoteGids << endl; for (LO colMapLid = remotePos; colMapLid < numRemoteGids; ++colMapLid) { remoteGids[colMapLid] = Teuchos::OrdinalTraits<GO>::invalid (); } } // Figure out what processes own what GIDs in the domain Map. // Initialize the output array of remote PIDs with the "invalid // process rank" -1, to help us test whether getRemoteIndexList // did its job. Array<int> remotePids (numRemoteGids, -1); Array<LO> remoteLids; if (makeImport) { remoteLids.resize (numRemoteGids); std::fill (remoteLids.begin (), remoteLids.end (), Teuchos::OrdinalTraits<LO>::invalid ()); } LookupStatus lookupStatus; if (makeImport) { lookupStatus = domMap.getRemoteIndexList (remoteGids, remotePids (), remoteLids ()); } else { lookupStatus = domMap.getRemoteIndexList (remoteGids, remotePids ()); } // If any process returns IDNotPresent, then at least one of the // remote indices was not present in the domain Map. This means // that the Import object cannot be constructed, because of // incongruity between the column Map and domain Map. This means // that either the column Map or domain Map, or both, is // incorrect. const bool getRemoteIndexListFailed = (lookupStatus == IDNotPresent); if (getRemoteIndexListFailed) { lclErr = true; err << prefix << "On Process " << comm->getRank () << ", some indices " "in the input colMap (the original column Map) are not in domMap (the " "domain Map). Either these indices or the domain Map is invalid. " "Likely cause: For a nonsquare matrix, you must give the domain and " "range Maps as input to fillComplete." << endl; } // Check that getRemoteIndexList actually worked, by making sure // that none of the remote PIDs are -1. for (LO k = 0; k < numRemoteGids; ++k) { bool foundInvalidPid = false; if (remotePids[k] == -1) { foundInvalidPid = true; break; } if (foundInvalidPid) { lclErr = true; err << prefix << "On Process " << comm->getRank () << ", " "getRemoteIndexList returned -1 for the process ranks of " "one or more GIDs on this process." << endl; } } // Sort incoming remote column Map indices so that all columns // coming from a given remote process are contiguous. This means // the Import's Distributor doesn't need to reorder data. if (makeImport) { sort2 (remotePids.begin (), remotePids.end (), remoteGids.begin ()); } else { sort3 (remotePids.begin (), remotePids.end (), remoteGids.begin (), remoteLids.begin ()); } // Make the new column Map. MapType newColMap (colMap.getGlobalNumElements (), allGids (), colMap.getIndexBase (), comm, colMap.getNode ()); // Optionally, make the new Import object. RCP<import_type> imp; if (makeImport) { imp = rcp (new import_type (rcp (new map_type (domMap)), rcp (new map_type (newColMap)))); // FIXME (mfh 06 Jul 2014) This constructor throws a runtime // error, so I'm not using it for now. // // imp = rcp (new import_type (domMap, newColMap, remoteGids, // remotePids (), remoteLids (), // Teuchos::null, Teuchos::null)); } return std::make_pair (newColMap, imp); }