bool run( const Teuchos::RCP<const Teuchos::Comm<int> > & comm , const CMD & cmd) { typedef typename Kokkos::Compat::KokkosDeviceWrapperNode<Device> NodeType; bool success = true; try { const int comm_rank = comm->getRank(); // Create Tpetra Node -- do this first as it initializes host/device Teuchos::RCP<NodeType> node = createKokkosNode<NodeType>( cmd , *comm ); // Set up stochastic discretization using Teuchos::Array; using Teuchos::RCP; using Teuchos::rcp; typedef Stokhos::OneDOrthogPolyBasis<int,double> one_d_basis; typedef Stokhos::LegendreBasis<int,double> legendre_basis; typedef Stokhos::LexographicLess< Stokhos::MultiIndex<int> > order_type; typedef Stokhos::TotalOrderBasis<int,double,order_type> product_basis; typedef Stokhos::Sparse3Tensor<int,double> Cijk; const int dim = cmd.CMD_USE_UQ_DIM; const int order = cmd.CMD_USE_UQ_ORDER ; Array< RCP<const one_d_basis> > bases(dim); for (int i=0; i<dim; i++) bases[i] = rcp(new legendre_basis(order, true)); RCP<const product_basis> basis = rcp(new product_basis(bases)); RCP<Cijk> cijk = basis->computeTripleProductTensor(); typedef Stokhos::DynamicStorage<int,double,Device> Storage; typedef Sacado::UQ::PCE<Storage> Scalar; typename Scalar::cijk_type kokkos_cijk = Stokhos::create_product_tensor<Device>(*basis, *cijk); Kokkos::setGlobalCijkTensor(kokkos_cijk); // typedef Stokhos::TensorProductQuadrature<int,double> quadrature; // RCP<const quadrature> quad = rcp(new quadrature(basis)); // const int num_quad_points = quad->size(); // const Array<double>& quad_weights = quad->getQuadWeights(); // const Array< Array<double> >& quad_points = quad->getQuadPoints(); // const Array< Array<double> >& quad_values = quad->getBasisAtQuadPoints(); // Print output headers const std::vector< size_t > widths = print_headers( std::cout , cmd , comm_rank ); using Kokkos::Example::FENL::TrivialManufacturedSolution; using Kokkos::Example::FENL::ElementComputationKLCoefficient; using Kokkos::Example::BoxElemPart; using Kokkos::Example::FENL::fenl; using Kokkos::Example::FENL::Perf; const double bc_lower_value = 1 ; const double bc_upper_value = 2 ; const TrivialManufacturedSolution manufactured_solution; int nelem[3] = { cmd.CMD_USE_FIXTURE_X , cmd.CMD_USE_FIXTURE_Y , cmd.CMD_USE_FIXTURE_Z }; // Create KL diffusion coefficient const double kl_mean = cmd.CMD_USE_MEAN; const double kl_variance = cmd.CMD_USE_VAR; const double kl_correlation = cmd.CMD_USE_COR; typedef ElementComputationKLCoefficient< Scalar, double, Device > KL; KL diffusion_coefficient( kl_mean, kl_variance, kl_correlation, dim ); typedef typename KL::RandomVariableView RV; typedef typename RV::HostMirror HRV; RV rv = diffusion_coefficient.getRandomVariables(); HRV hrv = Kokkos::create_mirror_view(rv); // Set random variables // ith random variable \xi_i = \psi_I(\xi) / \psi_I(1.0) // where I is determined by the basis ordering (since the component basis // functions have unit two-norm, \psi_I(1.0) might not be 1.0). We compute // this by finding the index of the multivariate term that is first order in // the ith slot, all other orders 0 Teuchos::Array<double> point(dim, 1.0); Teuchos::Array<double> basis_vals(basis->size()); basis->evaluateBases(point, basis_vals); for (int i=0; i<dim; ++i) { Stokhos::MultiIndex<int> term(dim, 0); term[i] = 1; int index = basis->index(term); hrv(i).fastAccessCoeff(index) = 1.0 / basis_vals[index]; } Kokkos::deep_copy( rv, hrv ); // Compute stochastic response using stochastic Galerkin method Scalar response = 0; Perf perf; if ( cmd.CMD_USE_FIXTURE_QUADRATIC ) perf = fenl< Scalar , Device , BoxElemPart::ElemQuadratic > ( comm , node , cmd.CMD_PRINT , cmd.CMD_USE_TRIALS , cmd.CMD_USE_ATOMIC , cmd.CMD_USE_BELOS , cmd.CMD_USE_MUELU , cmd.CMD_USE_MEANBASED , nelem , diffusion_coefficient , manufactured_solution , bc_lower_value , bc_upper_value , false , response); else perf = fenl< Scalar , Device , BoxElemPart::ElemLinear > ( comm , node , cmd.CMD_PRINT , cmd.CMD_USE_TRIALS , cmd.CMD_USE_ATOMIC , cmd.CMD_USE_BELOS , cmd.CMD_USE_MUELU , cmd.CMD_USE_MEANBASED , nelem , diffusion_coefficient , manufactured_solution , bc_lower_value , bc_upper_value , false , response); // std::cout << "newton count = " << perf.newton_iter_count // << " cg count = " << perf.cg_iter_count << std::endl; int pce_size = basis->size(); perf.uq_count = pce_size; perf.newton_iter_count *= pce_size; perf.cg_iter_count *= pce_size; perf.map_ratio *= pce_size; perf.fill_node_set *= pce_size; perf.scan_node_count *= pce_size; perf.fill_graph_entries *= pce_size; perf.sort_graph_entries *= pce_size; perf.fill_element_graph *= pce_size; // Compute response mean, variance perf.response_mean = response.mean(); perf.response_std_dev = response.standard_deviation(); //std::cout << std::endl << response << std::endl; if ( 0 == comm_rank ) { print_perf_value( std::cout , cmd , widths , perf ); } if ( cmd.CMD_SUMMARIZE ) { Teuchos::TimeMonitor::report (comm.ptr (), std::cout); } } TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); return success; }
bool run( const Teuchos::RCP<const Teuchos::Comm<int> > & comm , const CMD & cmd) { typedef typename Kokkos::Compat::KokkosDeviceWrapperNode<Device> NodeType; bool success = true; try { const int comm_rank = comm->getRank(); // Create Tpetra Node -- do this first as it initializes host/device Teuchos::RCP<NodeType> node = createKokkosNode<NodeType>( cmd , *comm ); // Set up stochastic discretization using Teuchos::Array; using Teuchos::RCP; using Teuchos::rcp; typedef Stokhos::OneDOrthogPolyBasis<int,double> one_d_basis; typedef Stokhos::LegendreBasis<int,double> legendre_basis; typedef Stokhos::LexographicLess< Stokhos::MultiIndex<int> > order_type; typedef Stokhos::TotalOrderBasis<int,double,order_type> product_basis; typedef Stokhos::Sparse3Tensor<int,double> Cijk; typedef Stokhos::Quadrature<int,double> quadrature; const int dim = cmd.USE_UQ_DIM; const int order = cmd.USE_UQ_ORDER ; Array< RCP<const one_d_basis> > bases(dim); for (int i=0; i<dim; i++) bases[i] = rcp(new legendre_basis(order, true)); RCP<const product_basis> basis = rcp(new product_basis(bases)); RCP<Cijk> cijk = basis->computeTripleProductTensor(); typedef Stokhos::DynamicStorage<int,double,Device> Storage; typedef Sacado::UQ::PCE<Storage> Scalar; typename Scalar::cijk_type kokkos_cijk = Stokhos::create_product_tensor<Device>(*basis, *cijk); Kokkos::setGlobalCijkTensor(kokkos_cijk); // Create quadrature data used by assembly RCP<const quadrature> quad; if ( cmd.USE_SPARSE ) { Stokhos::TotalOrderIndexSet<int> index_set(dim, order); quad = rcp(new Stokhos::SmolyakSparseGridQuadrature<int,double>(basis, index_set)); } else quad = rcp(new Stokhos::TensorProductQuadrature<int,double>(basis)); const int num_pce = basis->size(); const int num_quad_points = quad->size(); const Array<double>& quad_weights = quad->getQuadWeights(); const Array< Array<double> >& quad_points = quad->getQuadPoints(); const Array< Array<double> >& quad_values = quad->getBasisAtQuadPoints(); // Align number of quadrature points to ensemble size used in assembly const int align = 32; const int mask = align-1; const int num_quad_points_aligned = (num_quad_points + mask) & ~mask; // Copy quadrature data to view's for assembly kernels typedef Kokkos::Example::FENL::QuadratureData<Device> QD; typedef typename QD::quad_weights_type quad_weights_type; typedef typename QD::quad_values_type quad_values_type; QD qd; qd.weights_view = quad_weights_type( "quad weights", num_quad_points_aligned ); qd.points_view = quad_values_type( "quad points", num_quad_points_aligned, dim ); qd.values_view = quad_values_type( "quad values", num_quad_points_aligned, num_pce ); typename quad_weights_type::HostMirror host_weights_view = Kokkos::create_mirror_view( qd.weights_view ); typename quad_values_type::HostMirror host_points_view = Kokkos::create_mirror_view( qd.points_view ); typename quad_values_type::HostMirror host_values_view = Kokkos::create_mirror_view( qd.values_view ); for (int qp=0; qp<num_quad_points; ++qp) { host_weights_view(qp) = quad_weights[qp]; for (int i=0; i<dim; ++i) host_points_view(qp,i) = quad_points[qp][i]; for (int i=0; i<num_pce; ++i) host_values_view(qp,i) = quad_values[qp][i]; } for (int qp=num_quad_points; qp<num_quad_points_aligned; ++qp) { host_weights_view(qp) = 0.0; for (int i=0; i<dim; ++i) host_points_view(qp,i) = quad_points[num_quad_points-1][i]; for (int i=0; i<num_pce; ++i) host_values_view(qp,i) = quad_values[num_quad_points-1][i]; } Kokkos::deep_copy( qd.weights_view, host_weights_view ); Kokkos::deep_copy( qd.points_view, host_points_view ); Kokkos::deep_copy( qd.values_view, host_values_view ); // Print output headers const std::vector< size_t > widths = print_headers( std::cout , cmd , comm_rank ); using Kokkos::Example::FENL::ElementComputationKLCoefficient; using Kokkos::Example::FENL::ExponentialKLCoefficient; using Kokkos::Example::BoxElemPart; using Kokkos::Example::FENL::fenl; using Kokkos::Example::FENL::Perf; const double bc_lower_value = 1 ; const double bc_upper_value = 2 ; int nelem[3] = { cmd.USE_FIXTURE_X , cmd.USE_FIXTURE_Y , cmd.USE_FIXTURE_Z }; // Create KL diffusion coefficient const double kl_mean = cmd.USE_MEAN; const double kl_variance = cmd.USE_VAR; const double kl_correlation = cmd.USE_COR; const bool kl_exp = cmd.USE_EXPONENTIAL; const double kl_exp_shift = cmd.USE_EXP_SHIFT; const double kl_exp_scale = cmd.USE_EXP_SCALE; const bool kl_disc_exp_scale = cmd.USE_DISC_EXP_SCALE; //typedef ElementComputationKLCoefficient< Scalar, double, Device > KL; typedef ExponentialKLCoefficient< Scalar, double, Device > KL; KL diffusion_coefficient( kl_mean, kl_variance, kl_correlation, dim, kl_exp, kl_exp_shift, kl_exp_scale, kl_disc_exp_scale ); typedef typename KL::RandomVariableView RV; typedef typename RV::HostMirror HRV; RV rv = diffusion_coefficient.getRandomVariables(); HRV hrv = Kokkos::create_mirror_view(rv); // Set random variables // ith random variable \xi_i = \psi_I(\xi) / \psi_I(1.0) // where I is determined by the basis ordering (since the component basis // functions have unit two-norm, \psi_I(1.0) might not be 1.0). We compute // this by finding the index of the multivariate term that is first order in // the ith slot, all other orders 0 Teuchos::Array<double> point(dim, 1.0); Teuchos::Array<double> basis_vals(num_pce); basis->evaluateBases(point, basis_vals); for (int i=0; i<dim; ++i) { Stokhos::MultiIndex<int> term(dim, 0); term[i] = 1; int index = basis->index(term); hrv(i).fastAccessCoeff(index) = 1.0 / basis_vals[index]; } Kokkos::deep_copy( rv, hrv ); // Compute stochastic response using stochastic Galerkin method Scalar response = 0; Perf perf; if ( cmd.USE_FIXTURE_QUADRATIC ) perf = fenl< Scalar , Device , BoxElemPart::ElemQuadratic > ( comm , node , cmd.USE_FENL_XML_FILE , cmd.PRINT , cmd.USE_TRIALS , cmd.USE_ATOMIC , cmd.USE_BELOS , cmd.USE_MUELU , cmd.USE_MEANBASED , nelem , diffusion_coefficient , cmd.USE_ISOTROPIC , cmd.USE_COEFF_SRC , cmd.USE_COEFF_ADV , bc_lower_value , bc_upper_value , response, qd ); else perf = fenl< Scalar , Device , BoxElemPart::ElemLinear > ( comm , node , cmd.USE_FENL_XML_FILE , cmd.PRINT , cmd.USE_TRIALS , cmd.USE_ATOMIC , cmd.USE_BELOS , cmd.USE_MUELU , cmd.USE_MEANBASED , nelem , diffusion_coefficient , cmd.USE_ISOTROPIC , cmd.USE_COEFF_SRC , cmd.USE_COEFF_ADV , bc_lower_value , bc_upper_value , response , qd ); // std::cout << "newton count = " << perf.newton_iter_count // << " cg count = " << perf.cg_iter_count << std::endl; perf.uq_count = num_quad_points; perf.newton_iter_count *= num_quad_points; perf.cg_iter_count *= num_pce; perf.map_ratio *= num_pce; perf.fill_node_set *= num_pce; perf.scan_node_count *= num_pce; perf.fill_graph_entries *= num_pce; perf.sort_graph_entries *= num_pce; perf.fill_element_graph *= num_pce; // Compute response mean, variance perf.response_mean = response.mean(); perf.response_std_dev = response.standard_deviation(); //std::cout << std::endl << response << std::endl; if ( 0 == comm_rank ) { print_perf_value( std::cout , cmd , widths , perf ); } if ( cmd.SUMMARIZE ) { Teuchos::TimeMonitor::report (comm.ptr (), std::cout); print_memory_usage(std::cout, *comm); } } TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); return success; }