Scalar Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node,true>:: dot (const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node,true>& a) const { using Teuchos::outArg; using Teuchos::REDUCE_SUM; using Teuchos::reduceAll; TEUCHOS_TEST_FOR_EXCEPTION( this->getGlobalLength () != a.getGlobalLength (), std::runtime_error, "Tpetra::Vector::dots: Vectors do not have the same global length. " "this->getGlobalLength() = " << this->getGlobalLength () << " != " "a.getGlobalLength() = " << a.getGlobalLength () << "."); #ifdef HAVE_TPETRA_DEBUG TEUCHOS_TEST_FOR_EXCEPTION( ! this->getMap ()->isCompatible (*a.getMap ()), std::runtime_error, "Tpetra::Vector::dots: Vectors do not have compatible Maps:" << std::endl << "this->getMap(): " << std::endl << * (this->getMap ()) << "a.getMap(): " << std::endl << * (a.getMap ()) << std::endl); #else TEUCHOS_TEST_FOR_EXCEPTION( this->getLocalLength () != a.getLocalLength (), std::runtime_error, "Tpetra::Vector::dots: Vectors do not have the same local length."); #endif Scalar gbldot; gbldot = MVT::Dot (this->lclMV_, a.lclMV_); if (this->isDistributed ()) { Scalar lcldot = gbldot; reduceAll (*this->getMap ()->getComm (), REDUCE_SUM, lcldot, outArg (gbldot)); } return gbldot; }
// // Test for Tpetra::CrsMatrix::sumIntoGlobalValues(), with nonowned // rows. The test creates the CrsMatrix with a static graph, so that // globalAssemble() uses sumIntoGlobalValues() instead of // insertGlobalValues() to merge in the incoming matrix entries. All // calls to sumIntoGlobalValues() in this test are for nonowned rows, // and all the calls are correct (that is, the processes that own // those rows have entries in the corresponding columns, so that // nonowned fill does not require creating new entries). // // mfh 16 Dec 2012: The one-template-argument version breaks explicit // instantiation. Ah well. // //TEUCHOS_UNIT_TEST_TEMPLATE_1_DECL( CrsMatrix, NonlocalSumInto, CrsMatrixType ) TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( CrsMatrix, NonlocalSumInto, LocalOrdinalType, GlobalOrdinalType, ScalarType, NodeType ) { using Tpetra::createContigMapWithNode; using Tpetra::createNonContigMapWithNode; using Tpetra::global_size_t; using Tpetra::Map; using Teuchos::Array; using Teuchos::ArrayView; using Teuchos::as; using Teuchos::av_const_cast; using Teuchos::Comm; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcp_const_cast; using Teuchos::OrdinalTraits; using Teuchos::outArg; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::reduceAll; using Teuchos::ScalarTraits; using Teuchos::tuple; using Teuchos::TypeNameTraits; using std::endl; #if 0 // Extract typedefs from the CrsMatrix specialization. typedef typename CrsMatrixType::scalar_type scalar_type; typedef typename CrsMatrixType::local_ordinal_type local_ordinal_type; typedef typename CrsMatrixType::global_ordinal_type global_ordinal_type; typedef typename CrsMatrixType::node_type node_type; #endif // 0 typedef ScalarType scalar_type; typedef LocalOrdinalType local_ordinal_type; typedef GlobalOrdinalType global_ordinal_type; typedef NodeType node_type; // Typedefs derived from the above canonical typedefs. typedef ScalarTraits<scalar_type> STS; typedef Map<local_ordinal_type, global_ordinal_type, node_type> map_type; // Abbreviation typedefs. typedef scalar_type ST; typedef local_ordinal_type LO; typedef global_ordinal_type GO; typedef node_type NT; typedef Tpetra::CrsMatrix<ST, LO, GO, NT> CrsMatrixType; // CrsGraph specialization corresponding to CrsMatrixType (the // CrsMatrix specialization). typedef Tpetra::CrsGraph<LO, GO, NT, typename CrsMatrixType::mat_solve_type> crs_graph_type; //////////////////////////////////////////////////////////////////// // HERE BEGINS THE TEST. //////////////////////////////////////////////////////////////////// const global_size_t INVALID = OrdinalTraits<global_size_t>::invalid(); // Get the default communicator. RCP<const Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform ().getComm (); const int numProcs = comm->getSize (); const int myRank = comm->getRank (); if (myRank == 0) { out << "Test with " << numProcs << " process" << (numProcs != 1 ? "es" : "") << endl; } // This test doesn't make much sense if there is only one MPI // process. We let it pass trivially in that case. if (numProcs == 1) { out << "Number of processes in world is one; test passes trivially." << endl; return; } // Get a Kokkos Node instance. It would be nice if we could pass in // parameters here, but threads don't matter for this test; it's a // test for distributed-memory capabilities. if (myRank == 0) { out << "Creating Kokkos Node of type " << TypeNameTraits<node_type>::name () << endl; } RCP<node_type> node; { ParameterList pl; // Kokkos Node types require a PL inout. node = rcp (new node_type (pl)); } // Number of rows in the matrix owned by each process. const LO numLocalRows = 10; // Number of (global) rows and columns in the matrix. const GO numGlobalRows = numLocalRows * numProcs; const GO numGlobalCols = numGlobalRows; // Prevent compile warning for unused variable. // (It's not really "variable" if it's const, but oh well.) (void) numGlobalCols; if (myRank == 0) { out << "Creating contiguous row Map" << endl; } // Create a contiguous row Map, with numLocalRows rows per process. RCP<const map_type> rowMap = createContigMapWithNode<LO, GO, NT> (INVALID, numLocalRows, comm, node); // For now, reuse the row Map for the domain and range Maps. Later, // we might want to test using different domain or range Maps. RCP<const map_type> domainMap = rowMap; RCP<const map_type> rangeMap = rowMap; // Min and max row and column index of this process. Use the row // Map for the row and column indices, since we're only inserting // indices into the graph for rows that the calling process owns. const GO globalMinRow = rowMap->getMinGlobalIndex (); const GO globalMaxRow = rowMap->getMaxGlobalIndex (); const GO globalMinCol = domainMap->getMinAllGlobalIndex (); const GO globalMaxCol = domainMap->getMaxAllGlobalIndex (); if (myRank == 0) { out << "Creating graph" << endl; } // Create a numGlobalRows by numGlobalCols graph and set its // structure. Every process sets its diagonal entries (which it // owns), and its local (0,0) (if not on the diagonal) and // (numLocalRows-1, numLocalCols-1) (if not on the diagonal) // entries. We will use the off-diagonal entries to test // modification of nonlocal entries. RCP<const crs_graph_type> graph; { // We have a good upper bound for the number of entries per row, so use static profile. RCP<crs_graph_type> nonconstGraph (new crs_graph_type (rowMap, 2, Tpetra::StaticProfile)); TEUCHOS_TEST_FOR_EXCEPTION(globalMinRow >= globalMaxRow, std::logic_error, "This test only works if globalMinRow < globalMaxRow."); // Insert all the diagonal entries. for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { nonconstGraph->insertGlobalIndices (globalRow, tuple (globalRow)); } // Insert the local (0,0) entry, if not on the diagonal. if (globalMinRow > rowMap->getMinAllGlobalIndex ()) { nonconstGraph->insertGlobalIndices (globalMinRow, tuple (globalMinCol)); } // Insert the local (numLocalRows-1, numLocalCols-1) entry, if not on the diagonal. if (globalMaxRow < rowMap->getMaxAllGlobalIndex ()) { nonconstGraph->insertGlobalIndices (globalMaxRow, tuple (globalMaxCol)); } nonconstGraph->fillComplete (domainMap, rangeMap); graph = rcp_const_cast<const crs_graph_type> (nonconstGraph); } // Test whether the graph has the correct structure. bool localGraphSuccess = true; std::ostringstream graphFailMsg; { Array<GO> ind (2); // upper bound for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { size_t numEntries = 0; // output argument of below line. graph->getGlobalRowCopy (globalRow, ind (), numEntries); // Revise view based on numEntries. ArrayView<GO> indView = ind.view (0, numEntries); // Sort the view. std::sort (indView.begin (), indView.end ()); if (globalRow == globalMinRow && globalRow > rowMap->getMinAllGlobalIndex ()) { if (numEntries != as<size_t> (2)) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl; } if (numEntries > 0 && indView[0] != globalMinCol) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalMinCol = " << globalMinCol << endl; } if (numEntries > 1 && indView[1] != globalRow) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalRow = " << globalRow << endl; } } else if (globalRow == globalMaxRow && globalRow < rowMap->getMaxAllGlobalIndex ()) { if (numEntries != as<size_t> (2)) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } if (numEntries > 1 && indView[1] != globalMaxCol) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalMaxCol = " << globalMaxCol << endl; } } else { if (numEntries != as<size_t> (1)) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } } } } // Make sure that all processes successfully created the graph. bool globalGraphSuccess = true; { int globalGraphSuccess_int = 1; reduceAll (*comm, Teuchos::REDUCE_MIN, localGraphSuccess ? 1 : 0, outArg (globalGraphSuccess_int)); globalGraphSuccess = (globalGraphSuccess_int != 0); } if (! globalGraphSuccess) { if (myRank == 0) { out << "Graph structure not all correct:" << endl << endl; } // Print out the failure messages on all processes. for (int p = 0; p < numProcs; ++p) { if (p == myRank) { out << graphFailMsg.str () << endl; std::flush (out); } // Do some barriers to allow output to finish. comm->barrier (); comm->barrier (); comm->barrier (); } } TEUCHOS_TEST_FOR_EXCEPTION(! globalGraphSuccess, std::logic_error, "Graph structure test failed."); if (myRank == 0) { out << "Creating matrix" << endl; } // Create the matrix, using the above graph. RCP<CrsMatrixType> matrix (new CrsMatrixType (graph)); if (myRank == 0) { out << "Setting all matrix entries to 1" << endl; } // Set all the owned entries to one. Later we'll set nonlocal // entries' values in a loop. matrix->setAllToScalar (STS::one ()); // Sum into nonowned entries (which nevertheless exist in the // matrix, just not on this process) using this process' rank. // After global assembly, this should result in those entries having // value equal to one plus the rank of the process that wrote to // them. That value happens to be myRank for the (0,0) local entry // (except when myRank==0, in which case the value is 1), and // myRank+2 for the (numLocalRows-1,numLocalCols-1) local entry // (except when myRank==numProcs-1, in which case the value is 1). if (globalMinRow > rowMap->getMinAllGlobalIndex ()) { // Write to the (numLocalRows-1,numLocalCols-1) local entry of the previous process. matrix->sumIntoGlobalValues (globalMinRow-1, tuple (globalMaxCol), tuple (as<ST> (myRank))); } if (globalMaxRow < rowMap->getMaxAllGlobalIndex ()) { // Write to the (0,0) local entry of the next process. matrix->sumIntoGlobalValues (globalMaxRow+1, tuple (globalMinCol), tuple (as<ST> (myRank))); } if (myRank == 0) { out << "Calling fillComplete on the matrix" << endl; } matrix->fillComplete (domainMap, rangeMap); if (myRank == 0) { out << "Testing the matrix values" << endl; } // Test whether the entries have their correct values. bool localSuccess = true; std::ostringstream failMsg; { Array<GO> ind (2); // upper bound Array<ST> val (2); // upper bound for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { size_t numEntries = 0; // output argument of below line. matrix->getGlobalRowCopy (globalRow, ind (), val (), numEntries); // Revise views based on numEntries. ArrayView<GO> indView = ind.view (0, numEntries); ArrayView<ST> valView = val.view (0, numEntries); // Sort the views jointly by column index. Tpetra::sort2 (indView.begin (), indView.end (), valView.begin ()); if (globalRow == globalMinRow && globalRow > rowMap->getMinAllGlobalIndex ()) { if (numEntries != as<size_t> (2)) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl; } if (numEntries > 0 && indView[0] != globalMinCol) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalMinCol = " << globalMinCol << endl; } if (numEntries > 1 && indView[1] != globalRow) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalRow = " << globalRow << endl; } if (numEntries > 0 && valView[0] != as<ST> (myRank)) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != myRank = " << myRank << endl; } if (numEntries > 1 && valView[1] != STS::one ()) { localSuccess = false; failMsg << "Proc " << 1 << ": globalRow = " << globalRow << ": valView[1] = " << valView[1] << " != 1" << endl; } } else if (globalRow == globalMaxRow && globalRow < rowMap->getMaxAllGlobalIndex ()) { if (numEntries != as<size_t> (2)) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } if (numEntries > 1 && indView[1] != globalMaxCol) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalMaxCol = " << globalMaxCol << endl; } if (numEntries > 0 && valView[0] != STS::one ()) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != 1" << endl; } if (numEntries > 1 && valView[1] != as<ST> (myRank+2)) { localSuccess = false; failMsg << "Proc " << 1 << ": globalRow = " << globalRow << ": valView[1] = " << valView[1] << " != myRank+2 = " << (myRank+2) << endl; } } else { if (numEntries != as<size_t> (1)) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } if (numEntries > 0 && valView[0] != STS::one ()) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != 1" << endl; } } } } bool globalSuccess = true; { int globalSuccess_int = 1; reduceAll (*comm, Teuchos::REDUCE_MIN, localSuccess ? 1 : 0, outArg (globalSuccess_int)); globalSuccess = (globalSuccess_int != 0); } if (! globalSuccess) { // Print out the failure messages on all processes. for (int p = 0; p < numProcs; ++p) { if (p == myRank) { out << failMsg.str () << endl; out << "Proc " << myRank << ": localSuccess = " << localSuccess << ", globalSuccess = " << globalSuccess << endl; // std::flush (out); } // Do some barriers to allow output to finish. comm->barrier (); comm->barrier (); comm->barrier (); } } TEST_EQUALITY_CONST(globalSuccess, true); }
// // Test for Tpetra::CrsMatrix::sumIntoGlobalValues(), with nonowned // rows. This test is like CrsMatrix_NonlocalSumInto.cpp, except that // it attempts to sum into remote entries that don't exist on the // process that owns them. Currently, CrsMatrix silently ignores // these entries. (This is how CrsMatrix implements Import and Export // when the target matrix has a fixed column Map. Data are // redistributed between the two row Maps, and "filtered" by the // target matrix's column Map.) This unit test verifies that behavior // by ensuring the following: // // 1. fillComplete() (actually globalAssemble()) does not throw an // exception when the incoming entries don't exist on the process // that owns their rows. // // 2. The ignored entries are actually ignored. They must change // neither the structure nor the values of the matrix. // // mfh 16 Dec 2012: The one-template-argument version breaks explicit // instantiation. Ah well. // //TEUCHOS_UNIT_TEST_TEMPLATE_1_DECL( CrsMatrix, NonlocalSumInto_Ignore, CrsMatrixType ) TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( CrsMatrix, NonlocalSumInto_Ignore, LocalOrdinalType, GlobalOrdinalType, ScalarType, NodeType ) { using Tpetra::createContigMapWithNode; using Tpetra::createNonContigMapWithNode; using Tpetra::global_size_t; using Tpetra::Map; using Teuchos::Array; using Teuchos::ArrayView; using Teuchos::as; using Teuchos::av_const_cast; using Teuchos::Comm; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcp_const_cast; using Teuchos::OrdinalTraits; using Teuchos::outArg; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::reduceAll; using Teuchos::ScalarTraits; using Teuchos::tuple; using Teuchos::TypeNameTraits; using std::endl; #if 0 // Extract typedefs from the CrsMatrix specialization. typedef typename CrsMatrixType::scalar_type scalar_type; typedef typename CrsMatrixType::local_ordinal_type local_ordinal_type; typedef typename CrsMatrixType::global_ordinal_type global_ordinal_type; typedef typename CrsMatrixType::node_type node_type; #endif // 0 typedef ScalarType scalar_type; typedef LocalOrdinalType local_ordinal_type; typedef GlobalOrdinalType global_ordinal_type; typedef NodeType node_type; // Typedefs derived from the above canonical typedefs. typedef ScalarTraits<scalar_type> STS; typedef Map<local_ordinal_type, global_ordinal_type, node_type> map_type; // Abbreviation typedefs. typedef scalar_type ST; typedef local_ordinal_type LO; typedef global_ordinal_type GO; typedef node_type NT; typedef Tpetra::CrsMatrix<ST, LO, GO, NT> CrsMatrixType; // CrsGraph specialization corresponding to CrsMatrixType (the // CrsMatrix specialization). typedef Tpetra::CrsGraph<LO, GO, NT, typename CrsMatrixType::mat_solve_type> crs_graph_type; //////////////////////////////////////////////////////////////////// // HERE BEGINS THE TEST. //////////////////////////////////////////////////////////////////// const global_size_t INVALID = OrdinalTraits<global_size_t>::invalid(); // Get the default communicator. RCP<const Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform ().getComm (); const int numProcs = comm->getSize (); const int myRank = comm->getRank (); if (myRank == 0) { out << "Test with " << numProcs << " process" << (numProcs != 1 ? "es" : "") << endl; } // This test doesn't make much sense if there is only one MPI // process. We let it pass trivially in that case. if (numProcs == 1) { out << "Number of processes in world is one; test passes trivially." << endl; return; } // Get a Kokkos Node instance. It would be nice if we could pass in // parameters here, but threads don't matter for this test; it's a // test for distributed-memory capabilities. if (myRank == 0) { out << "Creating Kokkos Node of type " << TypeNameTraits<node_type>::name () << endl; } RCP<node_type> node; { ParameterList pl; // Kokkos Node types require a PL inout. node = rcp (new node_type (pl)); } // Number of rows in the matrix owned by each process. const LO numLocalRows = 10; //CrT: 4Feb14: the void trick does not seem to work, I get warnings // Number of (global) rows and columns in the matrix. //const GO numGlobalRows = numLocalRows * numProcs; //const GO numGlobalCols = numGlobalRows; // Prevent compile warning for unused variable. // (It's not really "variable" if it's const, but oh well.) //(void) numGlobalCols; if (myRank == 0) { out << "Creating contiguous row Map" << endl; } // Create a contiguous row Map, with numLocalRows rows per process. RCP<const map_type> rowMap = createContigMapWithNode<LO, GO, NT> (INVALID, numLocalRows, comm, node); // For now, reuse the row Map for the domain and range Maps. Later, // we might want to test using different domain or range Maps. RCP<const map_type> domainMap = rowMap; RCP<const map_type> rangeMap = rowMap; // Min and max row and column index of this process. Use the row // Map for the row and column indices, since we're only inserting // indices into the graph for rows that the calling process owns. const GO globalMinRow = rowMap->getMinGlobalIndex (); const GO globalMaxRow = rowMap->getMaxGlobalIndex (); const GO globalMinCol = domainMap->getMinAllGlobalIndex (); const GO globalMaxCol = domainMap->getMaxAllGlobalIndex (); if (myRank == 0) { out << "Creating graph" << endl; } // Create a numGlobalRows by numGlobalCols graph and set its // structure. Every process sets its diagonal entries (which it // owns). Unlike in the CrsMatrix_NonlocalSumInto.cpp test, we // don't set any other entries. As a result, the later calls to // sumIntoGlobalValues() for nonowned rows should fail. RCP<const crs_graph_type> graph; { // We have a good upper bound for the number of entries per row, // so use static profile. Leave the upper bound as 2 (just as it // is in the CrsMatrix_NonlocalSumInto.cpp test) so that there // would actually be room for the incoming entries from remote // calls to sumIntoGlobalValues(). RCP<crs_graph_type> nonconstGraph (new crs_graph_type (rowMap, 2, Tpetra::StaticProfile)); TEUCHOS_TEST_FOR_EXCEPTION(globalMinRow >= globalMaxRow, std::logic_error, "This test only works if globalMinRow < globalMaxRow."); // Insert all the diagonal entries, and only the diagonal entries // (unlike in the other test). for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { nonconstGraph->insertGlobalIndices (globalRow, tuple (globalRow)); } nonconstGraph->fillComplete (domainMap, rangeMap); graph = rcp_const_cast<const crs_graph_type> (nonconstGraph); } // Test whether the graph has the correct structure. bool localGraphSuccess = true; std::ostringstream graphFailMsg; { Array<GO> ind (2); // upper bound for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { size_t numEntries = 0; // output argument of below line. graph->getGlobalRowCopy (globalRow, ind (), numEntries); // Revise view based on numEntries. ArrayView<GO> indView = ind.view (0, numEntries); // Sort the view. std::sort (indView.begin (), indView.end ()); if (numEntries != as<size_t> (1)) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } } } // Make sure that all processes successfully created the graph. bool globalGraphSuccess = true; { int globalGraphSuccess_int = 1; reduceAll (*comm, Teuchos::REDUCE_MIN, localGraphSuccess ? 1 : 0, outArg (globalGraphSuccess_int)); globalGraphSuccess = (globalGraphSuccess_int != 0); } if (! globalGraphSuccess) { if (myRank == 0) { out << "Graph structure not all correct:" << endl << endl; } // Print out the failure messages on all processes. for (int p = 0; p < numProcs; ++p) { if (p == myRank) { out << graphFailMsg.str () << endl; std::flush (out); } // Do some barriers to allow output to finish. comm->barrier (); comm->barrier (); comm->barrier (); } } TEUCHOS_TEST_FOR_EXCEPTION(! globalGraphSuccess, std::logic_error, "Graph structure test failed."); if (myRank == 0) { out << "Creating matrix" << endl; } // Create the matrix, using the above graph. RCP<CrsMatrixType> matrix (new CrsMatrixType (graph)); if (myRank == 0) { out << "Setting all matrix entries to 1" << endl; } // Set all the owned entries to one. Later we'll set nonlocal // entries' values in a loop. matrix->setAllToScalar (STS::one ()); // Attempt to sum into nonowned entries (which nevertheless exist in // the matrix, just not on this process) using this process' rank. // The sumIntoGlobalValues() calls will record the data, but the // globalAssemble() method (called by fillComplete()) will silently // ignore entries whose columns are not in the column Map. The // comment at the top of this test explains why this behavior is // reasonable. // // mfh 15,16 Dec 2012: Silently ignoring columns not in the column // Map has implications for the implementation of // sumIntoGlobalValues() for nonowned rows. In particular, a // version of Map's getRemoteIDList() that uses one-sided // communication could invoke MPI_Get to figure out what the remote // process owns, without asking it or otherwise requiring // synchronization. Thus, sumIntoGlobalValues() could throw // immediately on the calling process, rather than deferring the // exception to the remote process in globalAssemble(). If we // switch to that implementation, this unit test must be changed // accordingly. if (globalMinRow > rowMap->getMinAllGlobalIndex ()) { // Attempt to write to the (numLocalRows-1,numLocalCols-1) local entry of the previous process. matrix->sumIntoGlobalValues (globalMinRow-1, tuple (globalMaxCol), tuple (as<ST> (myRank))); } if (globalMaxRow < rowMap->getMaxAllGlobalIndex ()) { // Attempt to write to the (0,0) local entry of the next process. matrix->sumIntoGlobalValues (globalMaxRow+1, tuple (globalMinCol), tuple (as<ST> (myRank))); } if (myRank == 0) { out << "Calling fillComplete on the matrix" << endl; } TEST_NOTHROW(matrix->fillComplete (domainMap, rangeMap)); // Tpetra::Details::InvalidGlobalIndex<GO> // mfh 15 Dec 2012: We currently don't make promises about the state // of the matrix if fillComplete() throws. Later, we might like to // improve the exception guarantees of fillComplete(). In that // case, the commented-out code below should be allowed to run. if (myRank == 0) { out << "Testing the matrix values" << endl; } // Test whether the entries have their correct values. bool localSuccess = true; std::ostringstream failMsg; { Array<GO> ind (2); // upper bound Array<ST> val (2); // upper bound for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { size_t numEntries = 0; // output argument of below line. matrix->getGlobalRowCopy (globalRow, ind (), val (), numEntries); // Revise views based on numEntries. ArrayView<GO> indView = ind.view (0, numEntries); ArrayView<ST> valView = val.view (0, numEntries); // Sort the views jointly by column index. Tpetra::sort2 (indView.begin (), indView.end (), valView.begin ()); if (numEntries != as<size_t> (1)) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } if (numEntries > 0 && valView[0] != STS::one ()) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != 1" << endl; } } } bool globalSuccess = true; { int globalSuccess_int = 1; reduceAll (*comm, Teuchos::REDUCE_MIN, localSuccess ? 1 : 0, outArg (globalSuccess_int)); globalSuccess = (globalSuccess_int != 0); } if (! globalSuccess) { // Print out the failure messages on all processes. for (int p = 0; p < numProcs; ++p) { if (p == myRank) { out << failMsg.str () << endl; out << "Proc " << myRank << ": localSuccess = " << localSuccess << ", globalSuccess = " << globalSuccess << endl; // std::flush (out); } // Do some barriers to allow output to finish. comm->barrier (); comm->barrier (); comm->barrier (); } } TEST_EQUALITY_CONST(globalSuccess, true); }
// This test is only meaningful in an MPI build. TEUCHOS_UNIT_TEST( Map, replaceCommWithSubset ) { typedef int local_ordinal_type; typedef long global_ordinal_type; typedef Tpetra::Map<local_ordinal_type, global_ordinal_type> map_type; typedef Array<global_ordinal_type>::size_type size_type; RCP<const Comm<int> > origComm = rcp (new MpiComm<int> (MPI_COMM_WORLD)); const int numProcs = origComm->getSize (); const int myRank = origComm->getRank (); // Create a Map in which all processes have a nonzero number of elements. const size_type numGidsPerProc = 3; const size_type myNumGids = numGidsPerProc; Array<global_ordinal_type> myGids (myNumGids); for (size_type k = 0; k < myNumGids; ++k) { myGids[k] = as<global_ordinal_type> (myRank) * as<global_ordinal_type> (numGidsPerProc) + as<global_ordinal_type> (k); } const global_size_t globalNumElts = as<global_size_t> (numGidsPerProc) * as<global_size_t> (numProcs); const global_ordinal_type indexBase = 0; RCP<const map_type> origMap (new map_type (globalNumElts, myGids (), indexBase, origComm)); // Create a new communicator that excludes Proc 0. // This will exercise recomputing the index base. const int color = (myRank == 0) ? 0 : 1; const int key = 0; RCP<const Comm<int> > newComm = origComm->split (color, key); if (myRank == 0) { newComm = null; } // Create the new Map distributed over the subset communicator. RCP<const map_type> newMap = origMap->replaceCommWithSubset (newComm); // Test collectively for success, so the test doesn't hang on failure. int localSuccess = 1; std::ostringstream err; if (myRank == 0) { if (! newMap.is_null ()) { localSuccess = 0; err << "removeEmptyProcesses() should have returned null, but did not." << endl; } } else { if (newMap.is_null ()) { localSuccess = 0; err << "removeEmptyProcesses() should not have returned null, but did." << endl; } else { RCP<const Comm<int> > theNewComm = newMap->getComm (); if (theNewComm->getSize () != numProcs - 1) { localSuccess = 0; err << "New communicator should have " << (numProcs - 1) << " processes, but has " << theNewComm->getSize () << " processes instead." << endl; } if (newMap->getGlobalNumElements () != origMap->getGlobalNumElements () - numGidsPerProc) { localSuccess = 0; err << "New Map has " << newMap->getGlobalNumElements () << " global " << "elements, but should have " << (origMap->getGlobalNumElements () - numGidsPerProc) << "." << endl; } if (newMap->getNodeNumElements () != origMap->getNodeNumElements ()) { localSuccess = 0; err << "New Map has " << newMap->getNodeNumElements () << " local " << "elements, but should have " << origMap->getNodeNumElements () << "." << endl; } if (newMap->getIndexBase () != as<global_ordinal_type> (numGidsPerProc)) { localSuccess = 0; err << "New Map has index base " << newMap->getIndexBase () << ", but should have index base " << numGidsPerProc << "." << endl; } ArrayView<const global_ordinal_type> myNewGids = newMap->getNodeElementList (); if (myNewGids.size () != myGids.size () || ! std::equal (myNewGids.begin (), myNewGids.end (), myGids.begin ())) { localSuccess = 0; err << "New Map has local GID list " << toString (myNewGids) << ", but " << "should have local GID list " << toString (myGids ()) << "." << endl; } } } int globalSuccess = 0; reduceAll (*origComm, REDUCE_MIN, localSuccess, outArg (globalSuccess)); if (globalSuccess == 0) { if (myRank == 0) { cerr << "TEST FAILED" << endl << "Error messages from each process:" << endl << endl; } for (int p = 0; p < numProcs; ++p) { if (myRank == p) { cerr << "Process " << myRank << ": " << err.str () << endl; } origComm->barrier (); // Give time for output to finish. origComm->barrier (); origComm->barrier (); } } TEST_EQUALITY(globalSuccess, 1); }
int main (int argc, char *argv[]) { using Teuchos::Array; using Teuchos::as; using Teuchos::Comm; using Teuchos::CommandLineProcessor; using Teuchos::ParameterList; using Teuchos::ptr; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::REDUCE_MAX; using Teuchos::REDUCE_MIN; using Teuchos::reduceAll; using std::cerr; using std::cout; using std::endl; typedef double scalar_type; typedef int local_ordinal_type; typedef int global_ordinal_type; typedef Kokkos::SerialNode node_type; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &cout); RCP<const Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform().getComm(); const int myRank = comm->getRank(); const int numProcs = comm->getSize(); std::string inputFilename; // Matrix Market file to read std::string temporaryFilename; // Matrix Market file to write (if applicable) std::string outputFilename; // Matrix Market file to write (if applicable) // Number of a specific test to run. If nonzero, only run that // test. We always run Test #1 since its result is needed for // subsequent tests. int testToRun = 0; // FIXME (mfh 07 Feb 2012) Currently, all tests with a different // index base FAIL. Reading in the multivector appears to be // correct, but writing it results in a multivector of all zeros (on // _all_ processes). bool testDifferentIndexBase = false; bool testContiguousInputMap = true; bool testNoncontiguousInputMap = false; bool testWrite = true; // Test Matrix Market output? bool tolerant = false; // Parse the file tolerantly? bool echo = false; // Echo the read-in matrix back? bool verbose = false; // Verbosity of output bool debug = false; // Print debugging info? // If true, stop after a single test failure. Intended for // interactive use, so that you can examine a test's output file. // Not intended for batch or ctest use. bool stopAfterFailure = false; CommandLineProcessor cmdp (false, true); cmdp.setOption ("inputFilename", &inputFilename, "Name of the Matrix Market dense matrix file to read."); cmdp.setOption ("temporaryFilename", &temporaryFilename, "If --testWrite is true, then use this file as temporary " "storage on (MPI) Proc 0. Otherwise, this argument is " "ignored."); cmdp.setOption ("outputFilename", &outputFilename, "If --testWrite is true, then write the read-in matrix to " "this file in Matrix Market format on (MPI) Proc 0. " "Otherwise, this argument is ignored. Note that the output " "file may not be identical to the input file."); cmdp.setOption ("testToRun", &testToRun, "Number of a specific test to run. " "If nonzero, only run that test. We always run Test #1 since" " its result is needed for subsequent tests."); cmdp.setOption ("testDifferentIndexBase", "dontTestDifferentIndexBase", &testDifferentIndexBase, "Whether to test input and output " "for Maps with different index bases."); cmdp.setOption ("testContiguousInputMap", "dontTestContiguousInputMap", &testContiguousInputMap, "Whether to test input and output for nonnull contiguous " "input Maps."); cmdp.setOption ("testNoncontiguousInputMap", "dontTestNoncontiguousInputMap", &testNoncontiguousInputMap, "Whether to test input and output for nonnull noncontiguous " "input Maps."); cmdp.setOption ("testWrite", "noTestWrite", &testWrite, "Whether to test Matrix Market file output. Ignored if no " "--outputFilename value is given."); cmdp.setOption ("tolerant", "strict", &tolerant, "Whether to parse the input Matrix Market file tolerantly."); cmdp.setOption ("echo", "noecho", &echo, "Whether to echo the read-in matrix back to stdout on Rank 0 " "in Matrix Market format. Note that the echoed matrix may " "not be identical to the input file."); cmdp.setOption ("verbose", "quiet", &verbose, "Print messages and results."); cmdp.setOption ("debug", "nodebug", &debug, "Print debugging information."); cmdp.setOption ("stopAfterFailure", "dontStopAfterFailure", &stopAfterFailure, "Whether to stop after a single test failure."); // Parse the command-line arguments. { const CommandLineProcessor::EParseCommandLineReturn parseResult = cmdp.parse (argc,argv); // If the caller asks us to print the documentation, or does not // explicitly say to run the benchmark, we let this "test" pass // trivially. if (parseResult == CommandLineProcessor::PARSE_HELP_PRINTED) { if (myRank == 0) { cout << "End Result: TEST PASSED" << endl; } return EXIT_SUCCESS; } TEUCHOS_TEST_FOR_EXCEPTION(parseResult != CommandLineProcessor::PARSE_SUCCESSFUL, std::invalid_argument, "Failed to parse command-line arguments."); } // Get a Kokkos Node instance for the particular Node type. RCP<node_type> node = getNode<node_type>(); // List of numbers of failed tests. std::vector<int> failedTests; // Current test number. Increment before starting each test. If a // test is only run conditionally, increment before evaluating the // condition. This ensures that each test has the same number each // time, whether or not a particular test is run. int testNum = 0; // Run all the tests. If no input filename was specified, we don't // invoke the test and we report a "TEST PASSED" message. if (inputFilename != "") { // Convenient abbreviations typedef scalar_type ST; typedef local_ordinal_type LO; typedef global_ordinal_type GO; typedef node_type NT; typedef Tpetra::MultiVector<ST, LO, GO, NT> MV; typedef Tpetra::Map<LO, GO, NT> MT; // If not testing writes, don't do the sanity check that tests // input by comparing against output. std::string outFilename = testWrite ? outputFilename : ""; std::string tmpFilename = testWrite ? temporaryFilename : ""; // Test 1: null input Map. ++testNum; if (verbose && myRank == 0) { cout << "Test " << testNum << ": Null Map on input to readDenseFile()" << endl; } RCP<MV> X; try { X = testReadDenseFile<MV> (inputFilename, tmpFilename, comm, node, tolerant, verbose, debug); if (outFilename != "") { testWriteDenseFile<MV> (outFilename, X, echo, verbose, debug); } } catch (std::exception& e) { failedTests.push_back (testNum); // If Test 1 fails, the other tests shouldn't even run, since // they depend on the result of Test 1 (the multivector X). throw e; } // Test 2: nonnull contiguous input Map with the same index base // as X's Map. This Map may or may not necessarily be the same as // (in the sense of isSameAs()) or even compatible with X's Map. ++testNum; if ((testToRun == 0 && testContiguousInputMap) || (testToRun != 0 && testToRun == testNum)) { if (verbose && myRank == 0) { cout << "Test " << testNum << ": Nonnull contiguous Map (same index " "base) on input to readDenseFile()" << endl; } const Tpetra::global_size_t globalNumRows = X->getMap()->getGlobalNumElements(); const GO indexBase = X->getMap()->getIndexBase(); // Create the Map. RCP<const MT> map = rcp (new Tpetra::Map<LO, GO, NT> (globalNumRows, indexBase, comm, Tpetra::GloballyDistributed, node)); try { RCP<MV> X2 = testReadDenseFileWithInputMap<MV> (inputFilename, tmpFilename, map, tolerant, verbose, debug); if (outFilename != "") { testWriteDenseFile<MV> (outFilename, X2, echo, verbose, debug); } } catch (std::exception& e) { failedTests.push_back (testNum); if (myRank == 0) { cerr << "Test " << testNum << " failed: " << e.what() << endl; } if (stopAfterFailure) { if (failedTests.size() > 0) { if (myRank == 0) { cout << "End Result: TEST FAILED" << endl; } return EXIT_FAILURE; } else { if (myRank == 0) { cout << "End Result: TEST PASSED" << endl; } return EXIT_SUCCESS; } } // if stop after failure } } // Test 3: nonnull contiguous input Map, with a different index // base than X's Map. In this case, the index base is X's Map's // index base plus a small number (3). For sufficiently long // vectors, this tests the case where the GID sets overlap. ++testNum; if ((testToRun == 0 && testContiguousInputMap && testDifferentIndexBase) || (testToRun != 0 && testToRun == testNum)) { if (verbose && myRank == 0) { cout << "Test " << testNum << ": Nonnull contiguous Map (different " "index base) on input to readDenseFile()" << endl; } const Tpetra::global_size_t globalNumRows = X->getMap()->getGlobalNumElements(); const GO indexBase = X->getMap()->getIndexBase() + as<GO> (3); // Make sure that the index base is the same on all processes. // It definitely should be, since the Map's getMaxAllGlobalIndex() // method should return the same value on all processes. GO minIndexBase = indexBase; reduceAll (*comm, REDUCE_MIN, indexBase, ptr (&minIndexBase)); GO maxIndexBase = indexBase; reduceAll (*comm, REDUCE_MAX, indexBase, ptr (&maxIndexBase)); TEUCHOS_TEST_FOR_EXCEPTION(minIndexBase != maxIndexBase || minIndexBase != indexBase, std::logic_error, "Index base values do not match on all processes. " "Min value is " << minIndexBase << " and max value is " << maxIndexBase << "."); // Create the Map. RCP<const MT> map = rcp (new Tpetra::Map<LO, GO, NT> (globalNumRows, indexBase, comm, Tpetra::GloballyDistributed, node)); try { RCP<MV> X3 = testReadDenseFileWithInputMap<MV> (inputFilename, tmpFilename, map, tolerant, verbose, debug); if (outFilename != "") { testWriteDenseFile<MV> (outFilename, X3, echo, verbose, debug); } } catch (std::exception& e) { failedTests.push_back (testNum); if (myRank == 0) { cerr << "Test " << testNum << " failed: " << e.what() << endl; } if (stopAfterFailure) { if (failedTests.size() > 0) { if (myRank == 0) { cout << "End Result: TEST FAILED" << endl; } return EXIT_FAILURE; } else { if (myRank == 0) { cout << "End Result: TEST PASSED" << endl; } return EXIT_SUCCESS; } } // if stop after failure } } // Test 4: nonnull contiguous input Map, with a different index // base than X's Map. In this case, the new index base is chosen // so that the new GID set does not overlap with X's Map's GID // set. ++testNum; if ((testToRun == 0 && testContiguousInputMap && testDifferentIndexBase) || (testToRun != 0 && testToRun == testNum)) { if (verbose && myRank == 0) { cout << "Test " << testNum << ": Nonnull contiguous Map (different " "index base) on input to readDenseFile()" << endl; } const Tpetra::global_size_t globalNumRows = X->getMap()->getGlobalNumElements(); // Choose the Map's index base so that the global ordinal sets // of X->getMap() and map don't overlap. This will ensure that // we test something nontrivial. const GO indexBase = X->getMap()->getMaxAllGlobalIndex() + 1; // Make sure that the index base is the same on all processes. // It definitely should be, since the Map's getMaxAllGlobalIndex() // method should return the same value on all processes. GO minIndexBase = indexBase; reduceAll (*comm, REDUCE_MIN, indexBase, ptr (&minIndexBase)); GO maxIndexBase = indexBase; reduceAll (*comm, REDUCE_MAX, indexBase, ptr (&maxIndexBase)); TEUCHOS_TEST_FOR_EXCEPTION(minIndexBase != maxIndexBase || minIndexBase != indexBase, std::logic_error, "Index base values do not match on all processes. " "Min value is " << minIndexBase << " and max value is " << maxIndexBase << "."); // Create the Map. RCP<const MT> map = rcp (new Tpetra::Map<LO, GO, NT> (globalNumRows, indexBase, comm, Tpetra::GloballyDistributed, node)); try { RCP<MV> X3 = testReadDenseFileWithInputMap<MV> (inputFilename, tmpFilename, map, tolerant, verbose, debug); if (outFilename != "") { testWriteDenseFile<MV> (outFilename, X3, echo, verbose, debug); } } catch (std::exception& e) { failedTests.push_back (testNum); if (myRank == 0) { cerr << "Test " << testNum << " failed: " << e.what() << endl; } if (stopAfterFailure) { if (failedTests.size() > 0) { if (myRank == 0) { cout << "End Result: TEST FAILED" << endl; } return EXIT_FAILURE; } else { if (myRank == 0) { cout << "End Result: TEST PASSED" << endl; } return EXIT_SUCCESS; } } // if stop after failure } } ++testNum; if ((testToRun == 0 && testNoncontiguousInputMap) || (testToRun != 0 && testToRun == testNum)) { // Test 5: nonnull input Map with the same index base as X's // Map, and a "noncontiguous" distribution (in the sense that // the Map is constructed using the constructor that takes an // arbitrary list of GIDs; that doesn't necessarily mean that // the GIDs themselves are noncontiguous). if (verbose && myRank == 0) { cout << "Test " << testNum << ": Nonnull noncontiguous Map (same index " "base) on input to readDenseFile()" << endl; } const GO indexBase = X->getMap()->getIndexBase(); const Tpetra::global_size_t globalNumRows = X->getMap()->getGlobalNumElements(); // Compute number of GIDs owned by each process. We're // replicating Tpetra functionality here because we want to // trick Tpetra into thinking we have a noncontiguous // distribution. This is the most general case and the most // likely to uncover bugs. const size_t quotient = globalNumRows / numProcs; const size_t remainder = globalNumRows - quotient * numProcs; const size_t localNumRows = (as<size_t> (myRank) < remainder) ? (quotient + 1) : quotient; // Build the list of GIDs owned by this process. Array<GO> elementList (localNumRows); GO myStartGID; if (as<size_t> (myRank) < remainder) { myStartGID = indexBase + as<GO> (myRank) * as<GO> (quotient + 1); } else { // This branch does _not_ assume that GO is a signed type. myStartGID = indexBase + as<GO> (remainder) * as<GO> (quotient + 1) + (as<GO> (myRank) - as<GO> (remainder)) * as<GO> (quotient); } for (GO i = 0; i < as<GO> (localNumRows); ++i) { elementList[i] = myStartGID + i; } if (debug) { for (int p = 0; p < numProcs; ++p) { if (p == myRank) { if (elementList.size() > 0) { const GO minGID = *std::min_element (elementList.begin(), elementList.end()); const GO maxGID = *std::max_element (elementList.begin(), elementList.end()); cerr << "On Proc " << p << ": min,max GID = " << minGID << "," << maxGID << endl; } else { cerr << "On Proc " << p << ": elementList is empty" << endl; } cerr << std::flush; } comm->barrier (); comm->barrier (); comm->barrier (); } } // Create the Map. using Tpetra::createNonContigMapWithNode; RCP<const MT> map = createNonContigMapWithNode<LO, GO, NT> (elementList(), comm, node); try { RCP<MV> X4 = testReadDenseFileWithInputMap<MV> (inputFilename, tmpFilename, map, tolerant, verbose, debug); if (outFilename != "") { testWriteDenseFile<MV> (outFilename, X4, echo, verbose, debug); } } catch (std::exception& e) { failedTests.push_back (testNum); if (myRank == 0) { cerr << "Test " << testNum << " failed: " << e.what() << endl; } if (stopAfterFailure) { if (failedTests.size() > 0) { if (myRank == 0) { cout << "End Result: TEST FAILED" << endl; } return EXIT_FAILURE; } else { if (myRank == 0) { cout << "End Result: TEST PASSED" << endl; } return EXIT_SUCCESS; } } // if stop after failure } } // if test noncontiguous input Map ++testNum; if ((testToRun == 0 && testNoncontiguousInputMap && testDifferentIndexBase) || (testToRun != 0 && testToRun == testNum)) { // Test 6: nonnull input Map with a different index base than // X's Map, and a "noncontiguous" distribution (in the sense // that the Map is constructed using the constructor that takes // an arbitrary list of GIDs; that doesn't necessarily mean that // the GIDs themselves are noncontiguous). if (verbose && myRank == 0) { cout << "Test " << testNum << ": Nonnull noncontiguous Map (different " "index base) on input to readDenseFile()" << endl; } // Make sure that the global ordinal sets of X->getMap() and // map don't overlap. GO indexBase = X->getMap()->getMaxAllGlobalIndex() + 1; const Tpetra::global_size_t globalNumRows = X->getMap()->getGlobalNumElements(); // Compute number of GIDs owned by each process. We're // replicating Tpetra functionality here because we want to // trick Tpetra into thinking we have a noncontiguous // distribution. This is the most general case and the most // likely to uncover bugs. const size_t quotient = globalNumRows / numProcs; const size_t remainder = globalNumRows - quotient * numProcs; const size_t localNumRows = (as<size_t> (myRank) < remainder) ? (quotient + 1) : quotient; // Build the list of GIDs owned by this process. Array<GO> elementList (localNumRows); GO myStartGID; if (as<size_t> (myRank) < remainder) { myStartGID = indexBase + as<GO> (myRank) * as<GO> (quotient + 1); } else { // This branch does _not_ assume that GO is a signed type. myStartGID = indexBase + as<GO> (remainder) * as<GO> (quotient + 1) + (as<GO> (remainder) - as<GO> (myRank)) * as<GO> (quotient); } for (GO i = 0; i < as<GO> (localNumRows); ++i) { elementList[i] = myStartGID + i; } // Create the Map. using Tpetra::createNonContigMapWithNode; RCP<const MT> map = createNonContigMapWithNode<LO, GO, NT> (elementList(), comm, node); try { RCP<MV> X5 = testReadDenseFileWithInputMap<MV> (inputFilename, tmpFilename, map, tolerant, verbose, debug); if (outFilename != "") { testWriteDenseFile<MV> (outFilename, X5, echo, verbose, debug); } } catch (std::exception& e) { failedTests.push_back (testNum); if (myRank == 0) { cerr << "Test " << testNum << " failed: " << e.what() << endl; } if (stopAfterFailure) { if (failedTests.size() > 0) { if (myRank == 0) { cout << "End Result: TEST FAILED" << endl; } return EXIT_FAILURE; } else { if (myRank == 0) { cout << "End Result: TEST PASSED" << endl; } return EXIT_SUCCESS; } } // if stop after failure } } // if test noncontiguous input Map ++testNum; if ((testToRun == 0 && testNoncontiguousInputMap) || (testToRun != 0 && testToRun == testNum)) { // Test 7: nonnull input Map with the same index base as X's // Map, and a "noncontiguous" distribution with GIDs that start // at 3. This lets us easily observe any missing entries after // writing X and reading it back in again. if (verbose && myRank == 0) { cout << "Test " << testNum << ": Nonnull noncontiguous Map (same index " "base, GIDs not in 0 .. N-1) on input to readDenseFile()" << endl; } const Tpetra::global_size_t globalNumRows = X->getMap()->getGlobalNumElements(); const GO globalStartGID = as<GO> (3); // Compute number of GIDs owned by each process. We're // replicating Tpetra functionality here because we want to // trick Tpetra into thinking we have a noncontiguous // distribution. This is the most general case and the most // likely to uncover bugs. const size_t quotient = globalNumRows / numProcs; const size_t remainder = globalNumRows - quotient * numProcs; const size_t localNumRows = (as<size_t> (myRank) < remainder) ? (quotient + 1) : quotient; // Build the list of GIDs owned by this process. Array<GO> elementList (localNumRows); GO myStartGID; if (as<size_t> (myRank) < remainder) { myStartGID = globalStartGID + as<GO> (myRank) * as<GO> (quotient + 1); } else { // This branch does _not_ assume that GO is a signed type. myStartGID = globalStartGID + as<GO> (remainder) * as<GO> (quotient + 1) + (as<GO> (myRank) - as<GO> (remainder)) * as<GO> (quotient); } for (GO i = 0; i < as<GO> (localNumRows); ++i) { elementList[i] = myStartGID + i; } if (debug) { for (int p = 0; p < numProcs; ++p) { if (p == myRank) { if (elementList.size() > 0) { const GO minGID = *std::min_element (elementList.begin(), elementList.end()); const GO maxGID = *std::max_element (elementList.begin(), elementList.end()); cerr << "On Proc " << p << ": min,max GID = " << minGID << "," << maxGID << endl; } else { cerr << "On Proc " << p << ": elementList is empty" << endl; } cerr << std::flush; } comm->barrier (); comm->barrier (); comm->barrier (); } } // Create the Map. using Tpetra::createNonContigMapWithNode; RCP<const MT> map = createNonContigMapWithNode<LO, GO, NT> (elementList(), comm, node); try { RCP<MV> X7 = testReadDenseFileWithInputMap<MV> (inputFilename, tmpFilename, map, tolerant, verbose, debug); if (outFilename != "") { testWriteDenseFile<MV> (outFilename, X7, echo, verbose, debug); } } catch (std::exception& e) { failedTests.push_back (testNum); if (myRank == 0) { cerr << "Test " << testNum << " failed: " << e.what() << endl; } if (stopAfterFailure) { if (failedTests.size() > 0) { if (myRank == 0) { cout << "End Result: TEST FAILED" << endl; } return EXIT_FAILURE; } else { if (myRank == 0) { cout << "End Result: TEST PASSED" << endl; } return EXIT_SUCCESS; } } // if stop after failure } } // if test noncontiguous input Map } if (failedTests.size() > 0) { if (myRank == 0) { cout << "End Result: TEST FAILED" << endl; } return EXIT_FAILURE; } else { if (myRank == 0) { cout << "End Result: TEST PASSED" << endl; } return EXIT_SUCCESS; } }