NOX::Abstract::MultiVector& NOX::Thyra::MultiVector:: update(double alpha, const NOX::Abstract::MultiVector& a, double gamma) { using Teuchos::tuple; const NOX::Thyra::MultiVector& aa = dynamic_cast<const NOX::Thyra::MultiVector&>(a); ::Thyra::linear_combination<double>(tuple(alpha)().getConst(), tuple(aa.thyraMultiVec.ptr().getConst())(), gamma, thyraMultiVec.ptr()); return *this; }
void ele_wise_bound (const ::Thyra::VectorBase<Scalar>& x_lo, const ::Thyra::VectorBase<Scalar>& x_up, const Teuchos::Ptr< ::Thyra::VectorBase<Scalar> > &x) { using Teuchos::tuple; using Teuchos::ptrInArg; using Teuchos::null; RTOpPack::TOpEleWiseBound<Scalar> ele_wise_bound_op; ::Thyra::applyOp<Scalar> (ele_wise_bound_op, tuple (ptrInArg (x_lo), ptrInArg (x_up)), tuple (x), null); }
void ele_wise_prune_upper (const ::Thyra::VectorBase<Scalar>& x, const ::Thyra::VectorBase<Scalar>& x_up, const Teuchos::Ptr< ::Thyra::VectorBase<Scalar> > &v, const Scalar& eps) { using Teuchos::tuple; using Teuchos::ptrInArg; using Teuchos::null; RTOpPack::TOpEleWisePruneUpper_2_1<Scalar> ele_wise_prune_op(eps); ::Thyra::applyOp<Scalar> (ele_wise_prune_op, tuple (ptrInArg (x), ptrInArg (x_up)), tuple (v), null); }
RCP<LinearProblem<Scalar,MultiVector<Scalar,int>,Operator<Scalar,int> > > buildProblem() { typedef ScalarTraits<Scalar> SCT; typedef typename SCT::magnitudeType MT; typedef Operator<Scalar,int> OP; typedef MultiVector<Scalar,int> MV; typedef OperatorTraits<Scalar,MV,OP> OPT; typedef MultiVecTraits<Scalar,MV> MVT; RCP<CrsMatrix<Scalar,int> > A = rcp(new CrsMatrix<Scalar,int>(vmap,rnnzmax)); if (mptestmypid == 0) { // HB format is compressed column. CrsMatrix is compressed row. const double *dptr = dvals; const int *rptr = rowind; for (int c=0; c<mptestdim; ++c) { for (int colnnz=0; colnnz < colptr[c+1]-colptr[c]; ++colnnz) { A->insertGlobalValues(*rptr-1,tuple(c),tuple<Scalar>(*dptr)); if (c != *rptr -1) { A->insertGlobalValues(c,tuple(*rptr-1),tuple<Scalar>(*dptr)); } ++rptr; ++dptr; } } } // distribute matrix data to other nodes A->fillComplete(); // Create initial MV and solution MV RCP<MV> B, X; X = rcp( new MV(vmap,numrhs) ); MVT::MvRandom( *X ); B = rcp( new MV(vmap,numrhs) ); OPT::Apply( *A, *X, *B ); MVT::MvInit( *X, 0.0 ); // Construct a linear problem instance with zero initial MV RCP<LinearProblem<Scalar,MV,OP> > problem = rcp( new LinearProblem<Scalar,MV,OP>(A,X,B) ); problem->setLabel(Teuchos::typeName(SCT::one())); // diagonal preconditioner // if (precond) { // Vector<Scalar,int> diags(A->getRowMap()); // A->getLocalDiagCopy(diags); // for (Teuchos_Ordinal i=0; i<vmap->getNumMyEntries(); ++i) { // TEST_FOR_EXCEPTION(diags[i] <= SCT::zero(), std::runtime_error,"Matrix is not positive-definite: " << diags[i]); // diags[i] = SCT::one() / diags[i]; // } // RCP<Operator<Scalar,int> > P = rcp(new DiagPrecond<Scalar,int>(diags)); // problem->setRightPrec(P); // } TEST_FOR_EXCEPT(problem->setProblem() == false); return problem; }
Teuchos::RCP<const Thyra::ProductVectorBase<Scalar> > Thyra::castOrCreateProductVectorBase(const RCP<const VectorBase<Scalar> > v) { using Teuchos::rcp_dynamic_cast; using Teuchos::tuple; const RCP<const ProductVectorBase<Scalar> > prod_v = rcp_dynamic_cast<const ProductVectorBase<Scalar> >(v); if (nonnull(prod_v)) { return prod_v; } return defaultProductVector<Scalar>( productVectorSpace<Scalar>(tuple(v->space())()), tuple(v)() ); }
void MetricJacobian<NodeT,ScalarT>::SetDataViews( ArrayRCP<NodeT>& mesh_data, map<string,int>& mesh_map_offset, ArrayRCP<ScalarT>& soln_data, map<string,int>& soln_map_offset, ArrayRCP<MetricJacobian<NodeT,ScalarT>::ResidT>& resid_data, map<string,int>& resid_map_offset) { using Teuchos::tuple; // views of inputs node_coords_ = GenerateConstView(mesh_data, mesh_map_offset.at("node_coords"), tuple(num_elems_, num_nodes_per_elem_, dim_)); // views of outputs jacob_ = GenerateView(mesh_data, mesh_map_offset.at("jacob"), tuple(num_elems_, num_cub_points_, dim_, dim_)); jacob_inv_ = GenerateView(mesh_data, mesh_map_offset.at("jacob_inv"), tuple(num_elems_, num_cub_points_, dim_, dim_)); jacob_det_ = GenerateView(mesh_data, mesh_map_offset.at("jacob_det"), tuple(num_elems_, num_cub_points_)); }
// Create and return a simple example CrsMatrix, with row // distribution over the given Map. Teuchos::RCP<const TpetraMatrixType> create (const Teuchos::RCP<const map_type>& map) const { using Teuchos::arcp; using Teuchos::ArrayRCP; using Teuchos::ArrayView; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::Time; using Teuchos::TimeMonitor; using Teuchos::tuple; typedef Tpetra::global_size_t GST; // Create a timer for sparse matrix creation. RCP<Time> timer = TimeMonitor::getNewCounter ("Sparse matrix creation"); // Time the whole scope of this routine, not counting timer lookup. TimeMonitor monitor (*timer); // Create a Tpetra::Matrix using the Map, with dynamic allocation. RCP<TpetraMatrixType> A = rcp (new TpetraMatrixType (map, 3)); // Add rows one at a time. Off diagonal values will always be -1. const scalar_type two = static_cast<scalar_type>( 2.0); const scalar_type negOne = static_cast<scalar_type>(-1.0); const GST numGlobalElements = map->getGlobalNumElements (); // const size_t numMyElements = map->getNodeNumElements (); // The list of global elements owned by this MPI process. ArrayView<const global_ordinal_type> myGlobalElements = map->getNodeElementList (); typedef typename ArrayView<const global_ordinal_type>::const_iterator iter_type; for (iter_type it = myGlobalElements.begin(); it != myGlobalElements.end(); ++it) { const local_ordinal_type i_local = *it; const global_ordinal_type i_global = map->getGlobalElement (i_local); // Can't insert local indices without a column map, so we insert // global indices here. if (i_global == 0) { A->insertGlobalValues (i_global, tuple (i_global, i_global+1), tuple (two, negOne)); } else if (static_cast<GST> (i_global) == numGlobalElements - 1) { A->insertGlobalValues (i_global, tuple (i_global-1, i_global), tuple (negOne, two)); } else { A->insertGlobalValues (i_global, tuple (i_global-1, i_global, i_global+1), tuple (negOne, two, negOne)); } } // Finish up the matrix. A->fillComplete (); return A; }
Teuchos::RCP<const Thyra::LinearOpBase<Scalar> > Thyra::multiply( const RCP<const LinearOpBase<Scalar> > &A, const RCP<const LinearOpBase<Scalar> > &B, const std::string &M_label ) { using Teuchos::tuple; RCP<DefaultMultipliedLinearOp<Scalar> > multOp = defaultMultipliedLinearOp<Scalar>(tuple(A, B)()); if(M_label.length()) multOp->setObjectLabel(M_label); return multOp; }
void Thyra::reductions( const MultiVectorBase<Scalar>& V, const NormOp &op, const ArrayView<typename ScalarTraits<Scalar>::magnitudeType> &norms ) { using Teuchos::tuple; using Teuchos::ptrInArg; using Teuchos::null; const int m = V.domain()->dim(); Array<RCP<RTOpPack::ReductTarget> > rcp_op_targs(m); Array<Ptr<RTOpPack::ReductTarget> > op_targs(m); for( int kc = 0; kc < m; ++kc ) { rcp_op_targs[kc] = op.reduct_obj_create(); op_targs[kc] = rcp_op_targs[kc].ptr(); } applyOp<Scalar>(op, tuple(ptrInArg(V)), ArrayView<Ptr<MultiVectorBase<Scalar> > >(null), op_targs ); for( int kc = 0; kc < m; ++kc ) { norms[kc] = op(*op_targs[kc]); } }
int main(int argc, char *argv[]) { #ifndef HAVE_TPETRA_COMPLEX_DOUBLE # error "Anasazi: This test requires Scalar = std::complex<double> to be enabled in Tpetra." #else using Teuchos::RCP; using Teuchos::rcp; using Teuchos::tuple; using std::cout; using std::endl; typedef std::complex<double> ST; typedef Teuchos::ScalarTraits<ST> SCT; typedef SCT::magnitudeType MT; typedef Tpetra::MultiVector<ST> MV; typedef MV::global_ordinal_type GO; typedef Tpetra::Operator<ST> OP; typedef Anasazi::MultiVecTraits<ST,MV> MVT; typedef Anasazi::OperatorTraits<ST,MV,OP> OPT; Tpetra::ScopeGuard tpetraScope (&argc, &argv); bool success = false; const ST ONE = SCT::one (); int info = 0; RCP<const Teuchos::Comm<int> > comm = Tpetra::getDefaultComm (); const int MyPID = comm->getRank (); bool verbose = false; bool debug = false; bool insitu = false; bool herm = false; std::string which("LM"); std::string filename; int nev = 4; int blockSize = 4; MT tol = 1.0e-6; Teuchos::CommandLineProcessor cmdp(false,true); cmdp.setOption("verbose","quiet",&verbose,"Print messages and results."); cmdp.setOption("debug","nodebug",&debug,"Print debugging information."); cmdp.setOption("insitu","exsitu",&insitu,"Perform in situ restarting."); cmdp.setOption("sort",&which,"Targetted eigenvalues (SM or LM)."); cmdp.setOption("herm","nonherm",&herm,"Solve Hermitian or non-Hermitian problem."); cmdp.setOption("filename",&filename,"Filename for Harwell-Boeing test matrix (assumes non-Hermitian unless specified otherwise)."); cmdp.setOption("nev",&nev,"Number of eigenvalues to compute."); cmdp.setOption("blockSize",&blockSize,"Block size for the algorithm."); cmdp.setOption("tol",&tol,"Tolerance for convergence."); if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { return -1; } if (debug) verbose = true; if (filename == "") { // get default based on herm if (herm) { filename = "mhd1280b.cua"; } else { filename = "mhd1280a.cua"; } } if (MyPID == 0) { cout << Anasazi::Anasazi_Version() << endl << endl; } // Get the data from the HB file int dim,dim2,nnz; int rnnzmax; double *dvals; int *colptr,*rowind; nnz = -1; if (MyPID == 0) { info = readHB_newmat_double(filename.c_str(),&dim,&dim2,&nnz,&colptr,&rowind,&dvals); // find maximum NNZ over all rows vector<int> rnnz(dim,0); for (int *ri=rowind; ri<rowind+nnz; ++ri) { ++rnnz[*ri-1]; } rnnzmax = *std::max_element(rnnz.begin(),rnnz.end()); } else { // address uninitialized data warnings dvals = NULL; colptr = NULL; rowind = NULL; } Teuchos::broadcast(*comm,0,&info); Teuchos::broadcast(*comm,0,&nnz); Teuchos::broadcast(*comm,0,&dim); Teuchos::broadcast(*comm,0,&rnnzmax); if (info == 0 || nnz < 0) { if (MyPID == 0) { cout << "Error reading '" << filename << "'" << endl << "End Result: TEST FAILED" << endl; } return -1; } // create map RCP<const Map<> > map = rcp (new Map<> (dim, 0, comm)); RCP<CrsMatrix<ST> > K = rcp (new CrsMatrix<ST> (map, rnnzmax)); if (MyPID == 0) { // Convert interleaved doubles to complex values // HB format is compressed column. CrsMatrix is compressed row. const double *dptr = dvals; const int *rptr = rowind; for (int c=0; c<dim; ++c) { for (int colnnz=0; colnnz < colptr[c+1]-colptr[c]; ++colnnz) { K->insertGlobalValues (static_cast<GO> (*rptr++ - 1), tuple<GO> (c), tuple (ST (dptr[0], dptr[1]))); dptr += 2; } } } if (MyPID == 0) { // Clean up. free( dvals ); free( colptr ); free( rowind ); } K->fillComplete(); // cout << *K << endl; // Create initial vectors RCP<MV> ivec = rcp( new MV(map,blockSize) ); ivec->randomize (); // Create eigenproblem RCP<Anasazi::BasicEigenproblem<ST,MV,OP> > problem = rcp( new Anasazi::BasicEigenproblem<ST,MV,OP>(K,ivec) ); // // Inform the eigenproblem that the operator K is symmetric problem->setHermitian(herm); // // Set the number of eigenvalues requested problem->setNEV( nev ); // // Inform the eigenproblem that you are done passing it information bool boolret = problem->setProblem(); if (boolret != true) { if (MyPID == 0) { cout << "Anasazi::BasicEigenproblem::SetProblem() returned with error." << endl << "End Result: TEST FAILED" << endl; } return -1; } // Set verbosity level int verbosity = Anasazi::Errors + Anasazi::Warnings + Anasazi::FinalSummary + Anasazi::TimingDetails; if (verbose) { verbosity += Anasazi::IterationDetails; } if (debug) { verbosity += Anasazi::Debug; } // Eigensolver parameters int numBlocks = 8; int maxRestarts = 10; // // Create parameter list to pass into the solver manager Teuchos::ParameterList MyPL; MyPL.set( "Verbosity", verbosity ); MyPL.set( "Which", which ); MyPL.set( "Block Size", blockSize ); MyPL.set( "Num Blocks", numBlocks ); MyPL.set( "Maximum Restarts", maxRestarts ); MyPL.set( "Convergence Tolerance", tol ); MyPL.set( "In Situ Restarting", insitu ); // // Create the solver manager Anasazi::BlockKrylovSchurSolMgr<ST,MV,OP> MySolverMgr(problem, MyPL); // Solve the problem to the specified tolerances or length Anasazi::ReturnType returnCode = MySolverMgr.solve(); success = (returnCode == Anasazi::Converged); // Get the eigenvalues and eigenvectors from the eigenproblem Anasazi::Eigensolution<ST,MV> sol = problem->getSolution(); RCP<MV> evecs = sol.Evecs; int numev = sol.numVecs; if (numev > 0) { std::ostringstream os; os.setf(std::ios::scientific, std::ios::floatfield); os.precision(6); // Compute the direct residual std::vector<MT> normV( numev ); Teuchos::SerialDenseMatrix<int,ST> T (numev, numev); for (int i=0; i<numev; i++) { T(i,i) = ST(sol.Evals[i].realpart,sol.Evals[i].imagpart); } RCP<MV> Kvecs = MVT::Clone( *evecs, numev ); OPT::Apply( *K, *evecs, *Kvecs ); MVT::MvTimesMatAddMv( -ONE, *evecs, T, ONE, *Kvecs ); MVT::MvNorm( *Kvecs, normV ); os << "Direct residual norms computed in BlockKrylovSchurComplex_test.exe" << endl << std::setw(20) << "Eigenvalue" << std::setw(20) << "Residual " << endl << "----------------------------------------" << endl; for (int i=0; i<numev; i++) { if ( SCT::magnitude(T(i,i)) != SCT::zero() ) { normV[i] = SCT::magnitude(normV[i]/T(i,i)); } os << std::setw(20) << T(i,i) << std::setw(20) << normV[i] << endl; success = (normV[i] < tol); } if (MyPID==0) { cout << endl << os.str() << endl; } } if (MyPID==0) { if (success) cout << "End Result: TEST PASSED" << endl; else cout << "End Result: TEST FAILED" << endl; } return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); #endif // HAVE_TPETRA_COMPLEX_DOUBLE }
// // Test for Tpetra::CrsMatrix::sumIntoGlobalValues(), with nonowned // rows. The test creates the CrsMatrix with a static graph, so that // globalAssemble() uses sumIntoGlobalValues() instead of // insertGlobalValues() to merge in the incoming matrix entries. All // calls to sumIntoGlobalValues() in this test are for nonowned rows, // and all the calls are correct (that is, the processes that own // those rows have entries in the corresponding columns, so that // nonowned fill does not require creating new entries). // // mfh 16 Dec 2012: The one-template-argument version breaks explicit // instantiation. Ah well. // //TEUCHOS_UNIT_TEST_TEMPLATE_1_DECL( CrsMatrix, NonlocalSumInto, CrsMatrixType ) TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( CrsMatrix, NonlocalSumInto, LocalOrdinalType, GlobalOrdinalType, ScalarType, NodeType ) { using Tpetra::createContigMapWithNode; using Tpetra::createNonContigMapWithNode; using Tpetra::global_size_t; using Tpetra::Map; using Teuchos::Array; using Teuchos::ArrayView; using Teuchos::as; using Teuchos::av_const_cast; using Teuchos::Comm; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcp_const_cast; using Teuchos::OrdinalTraits; using Teuchos::outArg; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::reduceAll; using Teuchos::ScalarTraits; using Teuchos::tuple; using Teuchos::TypeNameTraits; using std::endl; #if 0 // Extract typedefs from the CrsMatrix specialization. typedef typename CrsMatrixType::scalar_type scalar_type; typedef typename CrsMatrixType::local_ordinal_type local_ordinal_type; typedef typename CrsMatrixType::global_ordinal_type global_ordinal_type; typedef typename CrsMatrixType::node_type node_type; #endif // 0 typedef ScalarType scalar_type; typedef LocalOrdinalType local_ordinal_type; typedef GlobalOrdinalType global_ordinal_type; typedef NodeType node_type; // Typedefs derived from the above canonical typedefs. typedef ScalarTraits<scalar_type> STS; typedef Map<local_ordinal_type, global_ordinal_type, node_type> map_type; // Abbreviation typedefs. typedef scalar_type ST; typedef local_ordinal_type LO; typedef global_ordinal_type GO; typedef node_type NT; typedef Tpetra::CrsMatrix<ST, LO, GO, NT> CrsMatrixType; // CrsGraph specialization corresponding to CrsMatrixType (the // CrsMatrix specialization). typedef Tpetra::CrsGraph<LO, GO, NT, typename CrsMatrixType::mat_solve_type> crs_graph_type; //////////////////////////////////////////////////////////////////// // HERE BEGINS THE TEST. //////////////////////////////////////////////////////////////////// const global_size_t INVALID = OrdinalTraits<global_size_t>::invalid(); // Get the default communicator. RCP<const Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform ().getComm (); const int numProcs = comm->getSize (); const int myRank = comm->getRank (); if (myRank == 0) { out << "Test with " << numProcs << " process" << (numProcs != 1 ? "es" : "") << endl; } // This test doesn't make much sense if there is only one MPI // process. We let it pass trivially in that case. if (numProcs == 1) { out << "Number of processes in world is one; test passes trivially." << endl; return; } // Get a Kokkos Node instance. It would be nice if we could pass in // parameters here, but threads don't matter for this test; it's a // test for distributed-memory capabilities. if (myRank == 0) { out << "Creating Kokkos Node of type " << TypeNameTraits<node_type>::name () << endl; } RCP<node_type> node; { ParameterList pl; // Kokkos Node types require a PL inout. node = rcp (new node_type (pl)); } // Number of rows in the matrix owned by each process. const LO numLocalRows = 10; // Number of (global) rows and columns in the matrix. const GO numGlobalRows = numLocalRows * numProcs; const GO numGlobalCols = numGlobalRows; // Prevent compile warning for unused variable. // (It's not really "variable" if it's const, but oh well.) (void) numGlobalCols; if (myRank == 0) { out << "Creating contiguous row Map" << endl; } // Create a contiguous row Map, with numLocalRows rows per process. RCP<const map_type> rowMap = createContigMapWithNode<LO, GO, NT> (INVALID, numLocalRows, comm, node); // For now, reuse the row Map for the domain and range Maps. Later, // we might want to test using different domain or range Maps. RCP<const map_type> domainMap = rowMap; RCP<const map_type> rangeMap = rowMap; // Min and max row and column index of this process. Use the row // Map for the row and column indices, since we're only inserting // indices into the graph for rows that the calling process owns. const GO globalMinRow = rowMap->getMinGlobalIndex (); const GO globalMaxRow = rowMap->getMaxGlobalIndex (); const GO globalMinCol = domainMap->getMinAllGlobalIndex (); const GO globalMaxCol = domainMap->getMaxAllGlobalIndex (); if (myRank == 0) { out << "Creating graph" << endl; } // Create a numGlobalRows by numGlobalCols graph and set its // structure. Every process sets its diagonal entries (which it // owns), and its local (0,0) (if not on the diagonal) and // (numLocalRows-1, numLocalCols-1) (if not on the diagonal) // entries. We will use the off-diagonal entries to test // modification of nonlocal entries. RCP<const crs_graph_type> graph; { // We have a good upper bound for the number of entries per row, so use static profile. RCP<crs_graph_type> nonconstGraph (new crs_graph_type (rowMap, 2, Tpetra::StaticProfile)); TEUCHOS_TEST_FOR_EXCEPTION(globalMinRow >= globalMaxRow, std::logic_error, "This test only works if globalMinRow < globalMaxRow."); // Insert all the diagonal entries. for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { nonconstGraph->insertGlobalIndices (globalRow, tuple (globalRow)); } // Insert the local (0,0) entry, if not on the diagonal. if (globalMinRow > rowMap->getMinAllGlobalIndex ()) { nonconstGraph->insertGlobalIndices (globalMinRow, tuple (globalMinCol)); } // Insert the local (numLocalRows-1, numLocalCols-1) entry, if not on the diagonal. if (globalMaxRow < rowMap->getMaxAllGlobalIndex ()) { nonconstGraph->insertGlobalIndices (globalMaxRow, tuple (globalMaxCol)); } nonconstGraph->fillComplete (domainMap, rangeMap); graph = rcp_const_cast<const crs_graph_type> (nonconstGraph); } // Test whether the graph has the correct structure. bool localGraphSuccess = true; std::ostringstream graphFailMsg; { Array<GO> ind (2); // upper bound for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { size_t numEntries = 0; // output argument of below line. graph->getGlobalRowCopy (globalRow, ind (), numEntries); // Revise view based on numEntries. ArrayView<GO> indView = ind.view (0, numEntries); // Sort the view. std::sort (indView.begin (), indView.end ()); if (globalRow == globalMinRow && globalRow > rowMap->getMinAllGlobalIndex ()) { if (numEntries != as<size_t> (2)) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl; } if (numEntries > 0 && indView[0] != globalMinCol) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalMinCol = " << globalMinCol << endl; } if (numEntries > 1 && indView[1] != globalRow) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalRow = " << globalRow << endl; } } else if (globalRow == globalMaxRow && globalRow < rowMap->getMaxAllGlobalIndex ()) { if (numEntries != as<size_t> (2)) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } if (numEntries > 1 && indView[1] != globalMaxCol) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalMaxCol = " << globalMaxCol << endl; } } else { if (numEntries != as<size_t> (1)) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } } } } // Make sure that all processes successfully created the graph. bool globalGraphSuccess = true; { int globalGraphSuccess_int = 1; reduceAll (*comm, Teuchos::REDUCE_MIN, localGraphSuccess ? 1 : 0, outArg (globalGraphSuccess_int)); globalGraphSuccess = (globalGraphSuccess_int != 0); } if (! globalGraphSuccess) { if (myRank == 0) { out << "Graph structure not all correct:" << endl << endl; } // Print out the failure messages on all processes. for (int p = 0; p < numProcs; ++p) { if (p == myRank) { out << graphFailMsg.str () << endl; std::flush (out); } // Do some barriers to allow output to finish. comm->barrier (); comm->barrier (); comm->barrier (); } } TEUCHOS_TEST_FOR_EXCEPTION(! globalGraphSuccess, std::logic_error, "Graph structure test failed."); if (myRank == 0) { out << "Creating matrix" << endl; } // Create the matrix, using the above graph. RCP<CrsMatrixType> matrix (new CrsMatrixType (graph)); if (myRank == 0) { out << "Setting all matrix entries to 1" << endl; } // Set all the owned entries to one. Later we'll set nonlocal // entries' values in a loop. matrix->setAllToScalar (STS::one ()); // Sum into nonowned entries (which nevertheless exist in the // matrix, just not on this process) using this process' rank. // After global assembly, this should result in those entries having // value equal to one plus the rank of the process that wrote to // them. That value happens to be myRank for the (0,0) local entry // (except when myRank==0, in which case the value is 1), and // myRank+2 for the (numLocalRows-1,numLocalCols-1) local entry // (except when myRank==numProcs-1, in which case the value is 1). if (globalMinRow > rowMap->getMinAllGlobalIndex ()) { // Write to the (numLocalRows-1,numLocalCols-1) local entry of the previous process. matrix->sumIntoGlobalValues (globalMinRow-1, tuple (globalMaxCol), tuple (as<ST> (myRank))); } if (globalMaxRow < rowMap->getMaxAllGlobalIndex ()) { // Write to the (0,0) local entry of the next process. matrix->sumIntoGlobalValues (globalMaxRow+1, tuple (globalMinCol), tuple (as<ST> (myRank))); } if (myRank == 0) { out << "Calling fillComplete on the matrix" << endl; } matrix->fillComplete (domainMap, rangeMap); if (myRank == 0) { out << "Testing the matrix values" << endl; } // Test whether the entries have their correct values. bool localSuccess = true; std::ostringstream failMsg; { Array<GO> ind (2); // upper bound Array<ST> val (2); // upper bound for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { size_t numEntries = 0; // output argument of below line. matrix->getGlobalRowCopy (globalRow, ind (), val (), numEntries); // Revise views based on numEntries. ArrayView<GO> indView = ind.view (0, numEntries); ArrayView<ST> valView = val.view (0, numEntries); // Sort the views jointly by column index. Tpetra::sort2 (indView.begin (), indView.end (), valView.begin ()); if (globalRow == globalMinRow && globalRow > rowMap->getMinAllGlobalIndex ()) { if (numEntries != as<size_t> (2)) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl; } if (numEntries > 0 && indView[0] != globalMinCol) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalMinCol = " << globalMinCol << endl; } if (numEntries > 1 && indView[1] != globalRow) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalRow = " << globalRow << endl; } if (numEntries > 0 && valView[0] != as<ST> (myRank)) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != myRank = " << myRank << endl; } if (numEntries > 1 && valView[1] != STS::one ()) { localSuccess = false; failMsg << "Proc " << 1 << ": globalRow = " << globalRow << ": valView[1] = " << valView[1] << " != 1" << endl; } } else if (globalRow == globalMaxRow && globalRow < rowMap->getMaxAllGlobalIndex ()) { if (numEntries != as<size_t> (2)) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 2" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } if (numEntries > 1 && indView[1] != globalMaxCol) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[1] = " << indView[1] << " != globalMaxCol = " << globalMaxCol << endl; } if (numEntries > 0 && valView[0] != STS::one ()) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != 1" << endl; } if (numEntries > 1 && valView[1] != as<ST> (myRank+2)) { localSuccess = false; failMsg << "Proc " << 1 << ": globalRow = " << globalRow << ": valView[1] = " << valView[1] << " != myRank+2 = " << (myRank+2) << endl; } } else { if (numEntries != as<size_t> (1)) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } if (numEntries > 0 && valView[0] != STS::one ()) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != 1" << endl; } } } } bool globalSuccess = true; { int globalSuccess_int = 1; reduceAll (*comm, Teuchos::REDUCE_MIN, localSuccess ? 1 : 0, outArg (globalSuccess_int)); globalSuccess = (globalSuccess_int != 0); } if (! globalSuccess) { // Print out the failure messages on all processes. for (int p = 0; p < numProcs; ++p) { if (p == myRank) { out << failMsg.str () << endl; out << "Proc " << myRank << ": localSuccess = " << localSuccess << ", globalSuccess = " << globalSuccess << endl; // std::flush (out); } // Do some barriers to allow output to finish. comm->barrier (); comm->barrier (); comm->barrier (); } } TEST_EQUALITY_CONST(globalSuccess, true); }
// // Test for Tpetra::CrsMatrix::sumIntoGlobalValues(), with nonowned // rows. This test is like CrsMatrix_NonlocalSumInto.cpp, except that // it attempts to sum into remote entries that don't exist on the // process that owns them. Currently, CrsMatrix silently ignores // these entries. (This is how CrsMatrix implements Import and Export // when the target matrix has a fixed column Map. Data are // redistributed between the two row Maps, and "filtered" by the // target matrix's column Map.) This unit test verifies that behavior // by ensuring the following: // // 1. fillComplete() (actually globalAssemble()) does not throw an // exception when the incoming entries don't exist on the process // that owns their rows. // // 2. The ignored entries are actually ignored. They must change // neither the structure nor the values of the matrix. // // mfh 16 Dec 2012: The one-template-argument version breaks explicit // instantiation. Ah well. // //TEUCHOS_UNIT_TEST_TEMPLATE_1_DECL( CrsMatrix, NonlocalSumInto_Ignore, CrsMatrixType ) TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( CrsMatrix, NonlocalSumInto_Ignore, LocalOrdinalType, GlobalOrdinalType, ScalarType, NodeType ) { using Tpetra::createContigMapWithNode; using Tpetra::createNonContigMapWithNode; using Tpetra::global_size_t; using Tpetra::Map; using Teuchos::Array; using Teuchos::ArrayView; using Teuchos::as; using Teuchos::av_const_cast; using Teuchos::Comm; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcp_const_cast; using Teuchos::OrdinalTraits; using Teuchos::outArg; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::reduceAll; using Teuchos::ScalarTraits; using Teuchos::tuple; using Teuchos::TypeNameTraits; using std::endl; #if 0 // Extract typedefs from the CrsMatrix specialization. typedef typename CrsMatrixType::scalar_type scalar_type; typedef typename CrsMatrixType::local_ordinal_type local_ordinal_type; typedef typename CrsMatrixType::global_ordinal_type global_ordinal_type; typedef typename CrsMatrixType::node_type node_type; #endif // 0 typedef ScalarType scalar_type; typedef LocalOrdinalType local_ordinal_type; typedef GlobalOrdinalType global_ordinal_type; typedef NodeType node_type; // Typedefs derived from the above canonical typedefs. typedef ScalarTraits<scalar_type> STS; typedef Map<local_ordinal_type, global_ordinal_type, node_type> map_type; // Abbreviation typedefs. typedef scalar_type ST; typedef local_ordinal_type LO; typedef global_ordinal_type GO; typedef node_type NT; typedef Tpetra::CrsMatrix<ST, LO, GO, NT> CrsMatrixType; // CrsGraph specialization corresponding to CrsMatrixType (the // CrsMatrix specialization). typedef Tpetra::CrsGraph<LO, GO, NT, typename CrsMatrixType::mat_solve_type> crs_graph_type; //////////////////////////////////////////////////////////////////// // HERE BEGINS THE TEST. //////////////////////////////////////////////////////////////////// const global_size_t INVALID = OrdinalTraits<global_size_t>::invalid(); // Get the default communicator. RCP<const Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform ().getComm (); const int numProcs = comm->getSize (); const int myRank = comm->getRank (); if (myRank == 0) { out << "Test with " << numProcs << " process" << (numProcs != 1 ? "es" : "") << endl; } // This test doesn't make much sense if there is only one MPI // process. We let it pass trivially in that case. if (numProcs == 1) { out << "Number of processes in world is one; test passes trivially." << endl; return; } // Get a Kokkos Node instance. It would be nice if we could pass in // parameters here, but threads don't matter for this test; it's a // test for distributed-memory capabilities. if (myRank == 0) { out << "Creating Kokkos Node of type " << TypeNameTraits<node_type>::name () << endl; } RCP<node_type> node; { ParameterList pl; // Kokkos Node types require a PL inout. node = rcp (new node_type (pl)); } // Number of rows in the matrix owned by each process. const LO numLocalRows = 10; //CrT: 4Feb14: the void trick does not seem to work, I get warnings // Number of (global) rows and columns in the matrix. //const GO numGlobalRows = numLocalRows * numProcs; //const GO numGlobalCols = numGlobalRows; // Prevent compile warning for unused variable. // (It's not really "variable" if it's const, but oh well.) //(void) numGlobalCols; if (myRank == 0) { out << "Creating contiguous row Map" << endl; } // Create a contiguous row Map, with numLocalRows rows per process. RCP<const map_type> rowMap = createContigMapWithNode<LO, GO, NT> (INVALID, numLocalRows, comm, node); // For now, reuse the row Map for the domain and range Maps. Later, // we might want to test using different domain or range Maps. RCP<const map_type> domainMap = rowMap; RCP<const map_type> rangeMap = rowMap; // Min and max row and column index of this process. Use the row // Map for the row and column indices, since we're only inserting // indices into the graph for rows that the calling process owns. const GO globalMinRow = rowMap->getMinGlobalIndex (); const GO globalMaxRow = rowMap->getMaxGlobalIndex (); const GO globalMinCol = domainMap->getMinAllGlobalIndex (); const GO globalMaxCol = domainMap->getMaxAllGlobalIndex (); if (myRank == 0) { out << "Creating graph" << endl; } // Create a numGlobalRows by numGlobalCols graph and set its // structure. Every process sets its diagonal entries (which it // owns). Unlike in the CrsMatrix_NonlocalSumInto.cpp test, we // don't set any other entries. As a result, the later calls to // sumIntoGlobalValues() for nonowned rows should fail. RCP<const crs_graph_type> graph; { // We have a good upper bound for the number of entries per row, // so use static profile. Leave the upper bound as 2 (just as it // is in the CrsMatrix_NonlocalSumInto.cpp test) so that there // would actually be room for the incoming entries from remote // calls to sumIntoGlobalValues(). RCP<crs_graph_type> nonconstGraph (new crs_graph_type (rowMap, 2, Tpetra::StaticProfile)); TEUCHOS_TEST_FOR_EXCEPTION(globalMinRow >= globalMaxRow, std::logic_error, "This test only works if globalMinRow < globalMaxRow."); // Insert all the diagonal entries, and only the diagonal entries // (unlike in the other test). for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { nonconstGraph->insertGlobalIndices (globalRow, tuple (globalRow)); } nonconstGraph->fillComplete (domainMap, rangeMap); graph = rcp_const_cast<const crs_graph_type> (nonconstGraph); } // Test whether the graph has the correct structure. bool localGraphSuccess = true; std::ostringstream graphFailMsg; { Array<GO> ind (2); // upper bound for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { size_t numEntries = 0; // output argument of below line. graph->getGlobalRowCopy (globalRow, ind (), numEntries); // Revise view based on numEntries. ArrayView<GO> indView = ind.view (0, numEntries); // Sort the view. std::sort (indView.begin (), indView.end ()); if (numEntries != as<size_t> (1)) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localGraphSuccess = false; graphFailMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } } } // Make sure that all processes successfully created the graph. bool globalGraphSuccess = true; { int globalGraphSuccess_int = 1; reduceAll (*comm, Teuchos::REDUCE_MIN, localGraphSuccess ? 1 : 0, outArg (globalGraphSuccess_int)); globalGraphSuccess = (globalGraphSuccess_int != 0); } if (! globalGraphSuccess) { if (myRank == 0) { out << "Graph structure not all correct:" << endl << endl; } // Print out the failure messages on all processes. for (int p = 0; p < numProcs; ++p) { if (p == myRank) { out << graphFailMsg.str () << endl; std::flush (out); } // Do some barriers to allow output to finish. comm->barrier (); comm->barrier (); comm->barrier (); } } TEUCHOS_TEST_FOR_EXCEPTION(! globalGraphSuccess, std::logic_error, "Graph structure test failed."); if (myRank == 0) { out << "Creating matrix" << endl; } // Create the matrix, using the above graph. RCP<CrsMatrixType> matrix (new CrsMatrixType (graph)); if (myRank == 0) { out << "Setting all matrix entries to 1" << endl; } // Set all the owned entries to one. Later we'll set nonlocal // entries' values in a loop. matrix->setAllToScalar (STS::one ()); // Attempt to sum into nonowned entries (which nevertheless exist in // the matrix, just not on this process) using this process' rank. // The sumIntoGlobalValues() calls will record the data, but the // globalAssemble() method (called by fillComplete()) will silently // ignore entries whose columns are not in the column Map. The // comment at the top of this test explains why this behavior is // reasonable. // // mfh 15,16 Dec 2012: Silently ignoring columns not in the column // Map has implications for the implementation of // sumIntoGlobalValues() for nonowned rows. In particular, a // version of Map's getRemoteIDList() that uses one-sided // communication could invoke MPI_Get to figure out what the remote // process owns, without asking it or otherwise requiring // synchronization. Thus, sumIntoGlobalValues() could throw // immediately on the calling process, rather than deferring the // exception to the remote process in globalAssemble(). If we // switch to that implementation, this unit test must be changed // accordingly. if (globalMinRow > rowMap->getMinAllGlobalIndex ()) { // Attempt to write to the (numLocalRows-1,numLocalCols-1) local entry of the previous process. matrix->sumIntoGlobalValues (globalMinRow-1, tuple (globalMaxCol), tuple (as<ST> (myRank))); } if (globalMaxRow < rowMap->getMaxAllGlobalIndex ()) { // Attempt to write to the (0,0) local entry of the next process. matrix->sumIntoGlobalValues (globalMaxRow+1, tuple (globalMinCol), tuple (as<ST> (myRank))); } if (myRank == 0) { out << "Calling fillComplete on the matrix" << endl; } TEST_NOTHROW(matrix->fillComplete (domainMap, rangeMap)); // Tpetra::Details::InvalidGlobalIndex<GO> // mfh 15 Dec 2012: We currently don't make promises about the state // of the matrix if fillComplete() throws. Later, we might like to // improve the exception guarantees of fillComplete(). In that // case, the commented-out code below should be allowed to run. if (myRank == 0) { out << "Testing the matrix values" << endl; } // Test whether the entries have their correct values. bool localSuccess = true; std::ostringstream failMsg; { Array<GO> ind (2); // upper bound Array<ST> val (2); // upper bound for (GO globalRow = globalMinRow; globalRow <= globalMaxRow; ++globalRow) { size_t numEntries = 0; // output argument of below line. matrix->getGlobalRowCopy (globalRow, ind (), val (), numEntries); // Revise views based on numEntries. ArrayView<GO> indView = ind.view (0, numEntries); ArrayView<ST> valView = val.view (0, numEntries); // Sort the views jointly by column index. Tpetra::sort2 (indView.begin (), indView.end (), valView.begin ()); if (numEntries != as<size_t> (1)) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": numEntries = " << numEntries << " != 1" << endl; } if (numEntries > 0 && indView[0] != globalRow) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": indView[0] = " << indView[0] << " != globalRow = " << globalRow << endl; } if (numEntries > 0 && valView[0] != STS::one ()) { localSuccess = false; failMsg << "Proc " << myRank << ": globalRow = " << globalRow << ": valView[0] = " << valView[0] << " != 1" << endl; } } } bool globalSuccess = true; { int globalSuccess_int = 1; reduceAll (*comm, Teuchos::REDUCE_MIN, localSuccess ? 1 : 0, outArg (globalSuccess_int)); globalSuccess = (globalSuccess_int != 0); } if (! globalSuccess) { // Print out the failure messages on all processes. for (int p = 0; p < numProcs; ++p) { if (p == myRank) { out << failMsg.str () << endl; out << "Proc " << myRank << ": localSuccess = " << localSuccess << ", globalSuccess = " << globalSuccess << endl; // std::flush (out); } // Do some barriers to allow output to finish. comm->barrier (); comm->barrier (); comm->barrier (); } } TEST_EQUALITY_CONST(globalSuccess, true); }