KOKKOS_INLINE_FUNCTION static Scalar draw(Generator& gen, const Scalar& start, const Scalar& end) { return BaseRand::draw(gen, start.coeff(0), end.coeff(0)); }
void run_samples( const Teuchos::Comm<int>& comm , Kokkos::Example::FENL::Problem< Sacado::MP::Vector<Storage>, Device, ElemOrder>& problem , const CoeffFunctionType & coeff_function, const Teuchos::RCP<Kokkos::Example::FENL::SampleGrouping<double> >& grouper, const Teuchos::RCP<Teuchos::ParameterList>& fenlParams, const CMD & cmd , const double bc_lower_value, const double bc_upper_value, const Teuchos::Array< Teuchos::Array<double> >& points, Teuchos::Array<double>& responses, Teuchos::Array<int>& iterations, Kokkos::Example::FENL::Perf& perf_total) { using Teuchos::Array; using Teuchos::Ordinal; typedef typename Sacado::MP::Vector<Storage> Scalar; typedef typename CoeffFunctionType::RandomVariableView RV; typedef typename RV::HostMirror HRV; static const int VectorSize = Storage::static_size; // Group points into ensembles Array< Array<Ordinal> > groups; Ordinal num_duplicate = 0; grouper->group(VectorSize, points, groups, num_duplicate); const int num_groups = groups.size(); RV rv = coeff_function.getRandomVariables(); HRV hrv = Kokkos::create_mirror_view(rv); const int dim = rv.dimension_0(); // Loop over quadrature point groups for (int group=0; group<num_groups; ++group) { // Set random variables for (int qp=0; qp<VectorSize; ++qp) for (int i=0; i<dim; ++i) hrv(i).fastAccessCoeff(qp) = points[groups[group][qp]][i]; Kokkos::deep_copy( rv, hrv ); // Evaluate response at quadrature point Scalar response = 0; Kokkos::Example::FENL::Perf perf = fenl( problem , fenlParams , cmd.PRINT , cmd.USE_TRIALS , cmd.USE_ATOMIC , cmd.USE_BELOS , cmd.USE_MUELU , cmd.USE_MEANBASED , coeff_function , cmd.USE_ISOTROPIC , cmd.USE_COEFF_SRC , cmd.USE_COEFF_ADV , bc_lower_value , bc_upper_value , response); // Save response -- note currently all samples within an ensemble // get the same number of iterations for (int qp=0; qp<VectorSize; ++qp) { responses[groups[group][qp]] = response.coeff(qp); iterations[groups[group][qp]] = perf.cg_iter_count; } if (cmd.PRINT_ITS && 0 == comm.getRank()) { std::cout << group << " : " << perf.cg_iter_count << " ( "; for (int qp=0; qp<VectorSize; ++qp) std::cout << groups[group][qp] << " "; std::cout << ")"; std::cout << " ( "; for (int i=0; i<dim; ++i) std::cout << hrv(i) << " "; std::cout << ")" << std::endl; } // Adjust timing statistics for ensemble size perf.newton_iter_count *= VectorSize; perf.cg_iter_count *= VectorSize; perf.map_ratio *= VectorSize; perf.fill_node_set *= VectorSize; perf.scan_node_count *= VectorSize; perf.fill_graph_entries *= VectorSize; perf.sort_graph_entries *= VectorSize; perf.fill_element_graph *= VectorSize; // Increment timing statistics perf_total.increment(perf, !cmd.USE_BELOS); } }
KOKKOS_INLINE_FUNCTION static Scalar draw(Generator& gen, const Scalar& range) { return BaseRand::draw(gen, range.coeff(0)); }
// // Test Belos GMRES solve for a simple banded upper-triangular matrix // TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( Tpetra_CrsMatrix_MP, BelosGMRES, BaseScalar, LocalOrdinal, GlobalOrdinal, Node ) { using Teuchos::RCP; using Teuchos::rcp; using Teuchos::ArrayView; using Teuchos::Array; using Teuchos::ArrayRCP; using Teuchos::ParameterList; const LocalOrdinal VectorSize = 3; typedef typename DeviceForNode<Node>::type Device; typedef Stokhos::StaticFixedStorage<LocalOrdinal,BaseScalar,VectorSize,Device> Storage; typedef Sacado::MP::Vector<Storage> Scalar; typedef Teuchos::Comm<int> Tpetra_Comm; typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map; typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector; typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix; typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph; // Ensure device is initialized if (!Device::is_initialized()) Device::initialize(); // Build banded matrix GlobalOrdinal nrow = 10; RCP<const Tpetra_Comm> comm = Tpetra::DefaultPlatform::getDefaultPlatform().getComm(); RCP<Node> node = rcp(new Node); RCP<const Tpetra_Map> map = Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal>( nrow, comm, node); RCP<Tpetra_CrsGraph> graph = Tpetra::createCrsGraph(map, size_t(2)); Array<GlobalOrdinal> columnIndices(2); ArrayView<const GlobalOrdinal> myGIDs = map->getNodeElementList(); const size_t num_my_row = myGIDs.size(); for (size_t i=0; i<num_my_row; ++i) { const GlobalOrdinal row = myGIDs[i]; columnIndices[0] = row; size_t ncol = 1; if (row != nrow-1) { columnIndices[1] = row+1; ncol = 2; } graph->insertGlobalIndices(row, columnIndices(0,ncol)); } graph->fillComplete(); RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph)); // Set values in matrix Array<Scalar> vals(2); Scalar val(VectorSize, BaseScalar(0.0)); for (size_t i=0; i<num_my_row; ++i) { const GlobalOrdinal row = myGIDs[i]; columnIndices[0] = row; for (LocalOrdinal j=0; j<VectorSize; ++j) val.fastAccessCoeff(j) = j+1; vals[0] = val; size_t ncol = 1; if (row != nrow-1) { columnIndices[1] = row+1; for (LocalOrdinal j=0; j<VectorSize; ++j) val.fastAccessCoeff(j) = j+1; vals[1] = val; ncol = 2; } matrix->replaceGlobalValues(row, columnIndices(0,ncol), vals(0,ncol)); } matrix->fillComplete(); // Fill RHS vector RCP<Tpetra_Vector> b = Tpetra::createVector<Scalar>(map); ArrayRCP<Scalar> b_view = b->get1dViewNonConst(); for (size_t i=0; i<num_my_row; ++i) { b_view[i] = Scalar(1.0); } // Solve typedef Teuchos::ScalarTraits<BaseScalar> ST; typedef BaseScalar BelosScalar; typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV; typedef Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> OP; typedef Belos::OperatorTraits<BelosScalar,MV,OP> BOPT; typedef Belos::MultiVecTraits<BelosScalar,MV> BMVT; typedef Belos::LinearProblem<BelosScalar,MV,OP> BLinProb; RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map); RCP< BLinProb > problem = rcp(new BLinProb(matrix, x, b)); RCP<ParameterList> belosParams = rcp(new ParameterList); typename ST::magnitudeType tol = 1e-12; belosParams->set("Flexible Gmres", false); belosParams->set("Num Blocks", 100); belosParams->set("Convergence Tolerance", BelosScalar(tol)); belosParams->set("Maximum Iterations", 100); belosParams->set("Verbosity", 33); belosParams->set("Output Style", 1); belosParams->set("Output Frequency", 1); belosParams->set("Output Stream", out.getOStream()); RCP<Belos::SolverManager<BelosScalar,MV,OP> > solver = rcp(new Belos::PseudoBlockGmresSolMgr<BelosScalar,MV,OP>(problem, belosParams)); problem->setProblem(); Belos::ReturnType ret = solver->solve(); TEST_EQUALITY_CONST( ret, Belos::Converged ); // x->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))), // Teuchos::VERB_EXTREME); // Check -- Correct answer is: // [ 0, 0, ..., 0 ] // [ 1, 1/2, ..., 1/VectorSize ] // [ 0, 0, ..., 0 ] // [ 1, 1/2, ..., 1/VectorSize ] // .... ArrayRCP<Scalar> x_view = x->get1dViewNonConst(); for (size_t i=0; i<num_my_row; ++i) { const GlobalOrdinal row = myGIDs[i]; if (row % 2) { for (LocalOrdinal j=0; j<VectorSize; ++j) { val.fastAccessCoeff(j) = BaseScalar(1.0) / BaseScalar(j+1); } } else val = Scalar(0.0); TEST_EQUALITY( x_view[i].size(), VectorSize ); // Set small values to zero Scalar v = x_view[i]; for (LocalOrdinal j=0; j<VectorSize; ++j) { if (ST::magnitude(v.coeff(j)) < tol) v.fastAccessCoeff(j) = BaseScalar(0.0); } for (LocalOrdinal j=0; j<VectorSize; ++j) TEST_FLOATING_EQUALITY(v.coeff(j), val.coeff(j), tol); } }