// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int main(int argc, char *argv[]) { #ifdef HAVE_MPI MPI_Init(&argc, &argv); #endif using namespace std; using namespace Teuchos; using namespace PHX; try { RCP<Time> total_time = TimeMonitor::getNewTimer("Total Run Time"); TimeMonitor tm(*total_time); RCP<Time> residual_eval_time = TimeMonitor::getNewTimer("Residual Evaluation Time"); RCP<Time> jacobian_eval_time = TimeMonitor::getNewTimer("Jacobian Evaluation Time"); RCP<Time> linear_solve_time = TimeMonitor::getNewTimer("Linear Solve Time"); RCP<Time> nonlinear_solve_time = TimeMonitor::getNewTimer("Nonlinear Solve Time"); RCP<Time> preconditioner_solve_time = TimeMonitor::getNewTimer("Preconditioner Time"); RCP<Time> setup_time = TimeMonitor::getNewTimer("Setup Time (not scalable)"); RCP<Time> jv_eval_time = TimeMonitor::getNewTimer("Jv (AD)"); RCP<Time> matvec = TimeMonitor::getNewTimer("Matvec"); setup_time->start(); bool print_debug_info = false; #ifdef HAVE_MPI RCP<Epetra_Comm> comm = rcp(new Epetra_MpiComm(MPI_COMM_WORLD)); #else RCP<Epetra_Comm> comm = rcp(new Epetra_SerialComm); #endif Teuchos::basic_FancyOStream<char> os(rcp(&std::cout,false)); os.setShowProcRank(true); os.setProcRankAndSize(comm->MyPID(), comm->NumProc()); if (comm->MyPID() == 0) cout << "\nStarting FEM_Nonlinear Example!\n" << endl; // ********************************************************* // * Build the Finite Element data structures // ********************************************************* // Problem dimension - a 2D problem const static int dim = 2; // Create the mesh MeshBuilder mb(comm, 10, 3, 1.0, 1.0, 8); if (print_debug_info) os << mb; std::vector<Element_Linear2D>& cells = *(mb.myElements()); // Divide mesh into workset blocks const std::size_t workset_size = 15; std::vector<MyWorkset> worksets; { std::vector<Element_Linear2D>::iterator cell_it = cells.begin(); std::size_t count = 0; MyWorkset w; w.local_offset = cell_it->localElementIndex(); w.begin = cell_it; for (; cell_it != cells.end(); ++cell_it) { ++count; std::vector<Element_Linear2D>::iterator next = cell_it; ++next; if ( count == workset_size || next == cells.end()) { w.end = next; w.num_cells = count; worksets.push_back(w); count = 0; if (next != cells.end()) { w.local_offset = next->localElementIndex(); w.begin = next; } } } } if (print_debug_info) { cout << "Printing Element Information" << endl; for (std::size_t i = 0; i < worksets.size(); ++i) { std::vector<Element_Linear2D>::iterator it = worksets[i].begin; for (; it != worksets[i].end; ++it) cout << *it << endl; } } if (print_debug_info) { for (std::size_t i = 0; i < worksets.size(); ++i) { cout << "Printing Workset Information" << endl; cout << "worksets[" << i << "]" << endl; cout << " num_cells =" << worksets[i].num_cells << endl; cout << " local_offset =" << worksets[i].local_offset << endl; std::vector<Element_Linear2D>::iterator it = worksets[i].begin; for (; it != worksets[i].end; ++it) cout << " cell_local_index =" << it->localElementIndex() << endl; } cout << endl; } // ********************************************************* // * Build the Newton solver data structures // ********************************************************* // Setup Nonlinear Problem (build Epetra_Vector and Epetra_CrsMatrix) // Newton's method: J delta_x = -f const std::size_t num_eq = 2; LinearObjectFactory lof(mb, comm, num_eq); if (print_debug_info) { ofstream file("OwnedGraph.dat", ios::out | ios::app); Teuchos::basic_FancyOStream<char> p(rcp(&file,false)); p.setShowProcRank(true); p.setProcRankAndSize(comm->MyPID(), comm->NumProc()); lof.ownedGraph()->Print(p); } Epetra_Map owned_map = *(lof.ownedMap()); Epetra_Map overlapped_map = *(lof.overlappedMap()); Epetra_CrsGraph owned_graph = *(lof.ownedGraph()); Epetra_CrsGraph overlapped_graph = *(lof.overlappedGraph()); // Solution vector x RCP<Epetra_Vector> owned_x = rcp(new Epetra_Vector(owned_map,true)); RCP<Epetra_Vector> overlapped_x = rcp(new Epetra_Vector(overlapped_map,true)); // Update vector x RCP<Epetra_Vector> owned_delta_x = rcp(new Epetra_Vector(owned_map,true)); // Residual vector f RCP<Epetra_Vector> owned_f = rcp(new Epetra_Vector(owned_map,true)); RCP<Epetra_Vector> overlapped_f = rcp(new Epetra_Vector(overlapped_map,true)); // Jacobian Matrix Epetra_DataAccess copy = ::Copy; RCP<Epetra_CrsMatrix> owned_jac = rcp(new Epetra_CrsMatrix(copy, owned_graph)); RCP<Epetra_CrsMatrix> overlapped_jac = rcp(new Epetra_CrsMatrix(copy, overlapped_graph)); // Import/export RCP<Epetra_Import> importer = rcp(new Epetra_Import(overlapped_map, owned_map)); RCP<Epetra_Export> exporter = rcp(new Epetra_Export(overlapped_map, owned_map)); // Sets bc for initial guess applyBoundaryConditions(1.0, *owned_x, *owned_jac, *owned_f, mb); // ********************************************************* // * Build the FieldManager // ********************************************************* RCP< vector<string> > dof_names = rcp(new vector<string>(num_eq)); (*dof_names)[0] = "Temperature"; (*dof_names)[1] = "Velocity X"; RCP<DataLayout> qp_scalar = rcp(new MDALayout<Cell,QuadPoint>(workset_size,4)); RCP<DataLayout> node_scalar = rcp(new MDALayout<Cell,Node>(workset_size,4)); RCP<DataLayout> qp_vec = rcp(new MDALayout<Cell,QuadPoint,Dim>(workset_size,4,dim)); RCP<DataLayout> node_vec = rcp(new MDALayout<Cell,Node,Dim>(workset_size,4,dim)); RCP<DataLayout> dummy = rcp(new MDALayout<Cell>(0)); map<string, RCP<ParameterList> > evaluators_to_build; { // Gather Solution RCP<ParameterList> p = rcp(new ParameterList); int type = MyFactoryTraits<MyTraits>::id_gather_solution; p->set<int>("Type", type); p->set< RCP< vector<string> > >("Solution Names", dof_names); p->set< RCP<Epetra_Vector> >("Solution Vector", overlapped_x); p->set< RCP<DataLayout> >("Data Layout", node_scalar); evaluators_to_build["Gather Solution"] = p; } { // FE Interpolation - Temperature RCP<ParameterList> p = rcp(new ParameterList); int type = MyFactoryTraits<MyTraits>::id_feinterpolation; p->set<int>("Type", type); p->set<string>("Node Variable Name", "Temperature"); p->set<string>("QP Variable Name", "Temperature"); p->set<string>("Gradient QP Variable Name", "Temperature Gradient"); p->set< RCP<DataLayout> >("Node Data Layout", node_scalar); p->set< RCP<DataLayout> >("QP Scalar Data Layout", qp_scalar); p->set< RCP<DataLayout> >("QP Vector Data Layout", qp_vec); evaluators_to_build["FE Interpolation Temperature"] = p; } { // FE Interpolation - Velocity X RCP<ParameterList> p = rcp(new ParameterList); int type = MyFactoryTraits<MyTraits>::id_feinterpolation; p->set<int>("Type", type); p->set<string>("Node Variable Name", "Velocity X"); p->set<string>("QP Variable Name", "Velocity X"); p->set<string>("Gradient QP Variable Name", "Velocity X Gradient"); p->set< RCP<DataLayout> >("Node Data Layout", node_scalar); p->set< RCP<DataLayout> >("QP Scalar Data Layout", qp_scalar); p->set< RCP<DataLayout> >("QP Vector Data Layout", qp_vec); evaluators_to_build["FE Interpolation Velocity X"] = p; } { // Evaluate residual RCP<ParameterList> p = rcp(new ParameterList); int type = MyFactoryTraits<MyTraits>::id_equations; p->set<int>("Type", type); p->set< RCP< vector<string> > >("Solution Names", dof_names); p->set< RCP<DataLayout> >("Node Data Layout", node_scalar); p->set< RCP<DataLayout> >("QP Data Layout", qp_scalar); p->set< RCP<DataLayout> >("Gradient QP Data Layout", qp_vec); evaluators_to_build["Equations"] = p; } { // Scatter Solution RCP<ParameterList> p = rcp(new ParameterList); int type = MyFactoryTraits<MyTraits>::id_scatter_residual; p->set<int>("Type", type); RCP< vector<string> > res_names = rcp(new vector<string>(num_eq)); (*res_names)[0] = "Residual Temperature"; (*res_names)[1] = "Residual Velocity X"; p->set< RCP< vector<string> > >("Residual Names", res_names); p->set< RCP<Epetra_Vector> >("Residual Vector", overlapped_f); p->set< RCP<Epetra_CrsMatrix> >("Jacobian Matrix", overlapped_jac); p->set< RCP<DataLayout> >("Dummy Data Layout", dummy); p->set< RCP<DataLayout> >("Data Layout", node_scalar); evaluators_to_build["Scatter Residual"] = p; } // Build Field Evaluators for each evaluation type EvaluatorFactory<MyTraits,MyFactoryTraits<MyTraits> > factory; RCP< vector< RCP<Evaluator_TemplateManager<MyTraits> > > > evaluators; evaluators = factory.buildEvaluators(evaluators_to_build); // Create a FieldManager FieldManager<MyTraits> fm; // Register all Evaluators registerEvaluators(evaluators, fm); // Request quantities to assemble RESIDUAL PDE operators { typedef MyTraits::Residual::ScalarT ResScalarT; Tag<ResScalarT> res_tag("Scatter", dummy); fm.requireField<MyTraits::Residual>(res_tag); // Request quantities to assemble JACOBIAN PDE operators typedef MyTraits::Jacobian::ScalarT JacScalarT; Tag<JacScalarT> jac_tag("Scatter", dummy); fm.requireField<MyTraits::Jacobian>(jac_tag); // Request quantities to assemble Jv operators typedef MyTraits::Jv::ScalarT JvScalarT; Tag<JvScalarT> jv_tag("Scatter", dummy); fm.requireField<MyTraits::Jv>(jv_tag); } { RCP<Time> registration_time = TimeMonitor::getNewTimer("Post Registration Setup Time"); { TimeMonitor t(*registration_time); fm.postRegistrationSetupForType<MyTraits::Residual>(NULL); fm.postRegistrationSetupForType<MyTraits::Jacobian>(NULL); fm.postRegistrationSetupForType<MyTraits::Jv>(NULL); } } if (print_debug_info) cout << fm << endl; // ********************************************************* // * Evaluate Jacobian and Residual (required for ML to // * to be constructed properly // ********************************************************* { //TimeMonitor t(*jacobian_eval_time); overlapped_x->Import(*owned_x, *importer, Insert); owned_f->PutScalar(0.0); overlapped_f->PutScalar(0.0); owned_jac->PutScalar(0.0); overlapped_jac->PutScalar(0.0); for (std::size_t i = 0; i < worksets.size(); ++i) fm.evaluateFields<MyTraits::Jacobian>(worksets[i]); owned_f->Export(*overlapped_f, *exporter, Add); owned_jac->Export(*overlapped_jac, *exporter, Add); applyBoundaryConditions(1.0, *owned_x, *owned_jac, *owned_f, mb); } // ********************************************************* // * Build Preconditioner (Ifpack or ML) // ********************************************************* bool use_ml = false; RCP<Belos::EpetraPrecOp> belosPrec; RCP<Ifpack_Preconditioner> ifpack_prec; RCP<ML_Epetra::MultiLevelPreconditioner> ml_prec; if (!use_ml) { Ifpack Factory; std::string PrecType = "ILU"; int OverlapLevel = 0; ifpack_prec = Teuchos::rcp( Factory.Create(PrecType,owned_jac.get(),OverlapLevel) ); ParameterList ifpackList; ifpackList.set("fact: drop tolerance", 1e-9); ifpackList.set("fact: level-of-fill", 1); ifpackList.set("schwarz: combine mode", "Add"); IFPACK_CHK_ERR(ifpack_prec->SetParameters(ifpackList)); IFPACK_CHK_ERR(ifpack_prec->Initialize()); belosPrec = rcp( new Belos::EpetraPrecOp( ifpack_prec ) ); } else { ParameterList ml_params; ML_Epetra::SetDefaults("SA",ml_params); //ml_params.set("Base Method Defaults", "SA"); ml_params.set("ML label", "Phalanx_Test"); ml_params.set("ML output", 10); ml_params.set("print unused", 1); ml_params.set("max levels", 4); ml_params.set("PDE equations",2); ml_params.set("prec type","MGV"); ml_params.set("increasing or decreasing","increasing"); // ml_params.set("aggregation: nodes per aggregate",50); ml_params.set("aggregation: type","Uncoupled"); ml_params.set("aggregation: damping factor", 0.0); ml_params.set("coarse: type","Amesos-KLU"); //ml_params.set("coarse: type","IFPACK"); ml_params.set("coarse: max size", 1000); //ml_params.set("smoother: type","IFPACK"); ml_params.set("smoother: type","block Gauss-Seidel"); ml_params.set("smoother: ifpack type","ILU"); ml_params.set("smoother: ifpack overlap",1); ml_params.sublist("smoother: ifpack list").set("fact: level-of-fill",1); ml_params.sublist("smoother: ifpack list").set("schwarz: reordering type","rcm"); ml_prec = rcp( new ML_Epetra::MultiLevelPreconditioner(*owned_jac, ml_params, true) ); belosPrec = rcp( new Belos::EpetraPrecOp( ml_prec ) ); } // ********************************************************* // * Build linear solver (Belos) // ********************************************************* // Linear solver parameters typedef double ST; typedef Teuchos::ScalarTraits<ST> SCT; typedef SCT::magnitudeType MT; typedef Epetra_MultiVector MV; typedef Epetra_Operator OP; typedef Belos::MultiVecTraits<ST,MV> MVT; typedef Belos::OperatorTraits<ST,MV,OP> OPT; RCP<ParameterList> belosList = rcp(new ParameterList); belosList->set<int>("Num Blocks", 400); belosList->set<int>("Block Size", 1); belosList->set<int>("Maximum Iterations", 400); belosList->set<int>("Maximum Restarts", 0); belosList->set<MT>( "Convergence Tolerance", 1.0e-4); int verbosity = Belos::Errors + Belos::Warnings; if (false) { verbosity += Belos::TimingDetails + Belos::StatusTestDetails; belosList->set<int>( "Output Frequency", -1); } if (print_debug_info) { verbosity += Belos::Debug; belosList->set<int>( "Output Frequency", -1); } belosList->set( "Verbosity", verbosity ); RCP<Epetra_MultiVector> F = Teuchos::rcp_implicit_cast<Epetra_MultiVector>(owned_f); RCP<Epetra_MultiVector> DX = Teuchos::rcp_implicit_cast<Epetra_MultiVector>(owned_delta_x); RCP<Belos::LinearProblem<double,MV,OP> > problem = rcp(new Belos::LinearProblem<double,MV,OP>(owned_jac, DX, F) ); problem->setRightPrec( belosPrec ); RCP< Belos::SolverManager<double,MV,OP> > gmres_solver = rcp( new Belos::BlockGmresSolMgr<double,MV,OP>(problem, belosList) ); setup_time->stop(); // Timing for Mat-Vec using sacado RCP<Epetra_Vector> owned_v = rcp(new Epetra_Vector(owned_map,true)); RCP<Epetra_Vector> overlapped_v = rcp(new Epetra_Vector(overlapped_map,true)); RCP<Epetra_Vector> owned_Jv = rcp(new Epetra_Vector(owned_map,true)); RCP<Epetra_Vector> overlapped_Jv = rcp(new Epetra_Vector(overlapped_map,true)); owned_x->PutScalar(1.0); owned_v->PutScalar(1.0); int iter = 0; while (iter != 30) { overlapped_x->Import(*owned_x, *importer, Insert); overlapped_v->Import(*owned_v, *importer, Insert); owned_f->PutScalar(0.0); overlapped_f->PutScalar(0.0); owned_jac->PutScalar(0.0); overlapped_jac->PutScalar(0.0); owned_Jv->PutScalar(0.0); overlapped_Jv->PutScalar(0.0); for (std::size_t i = 0; i < worksets.size(); ++i) { worksets[i].v = overlapped_v; worksets[i].Jv = overlapped_Jv; } { TimeMonitor t(*residual_eval_time); for (std::size_t i = 0; i < worksets.size(); ++i) fm.evaluateFields<MyTraits::Residual>(worksets[i]); } { TimeMonitor t(*jacobian_eval_time); for (std::size_t i = 0; i < worksets.size(); ++i) fm.evaluateFields<MyTraits::Jacobian>(worksets[i]); } { TimeMonitor t(*jv_eval_time); for (std::size_t i = 0; i < worksets.size(); ++i) fm.evaluateFields<MyTraits::Jv>(worksets[i]); } owned_f->Export(*overlapped_f, *exporter, Add); owned_jac->Export(*overlapped_jac, *exporter, Add); owned_Jv->Export(*overlapped_Jv, *exporter, Add); applyBoundaryConditions(1.0, *owned_x, *owned_jac, *owned_f, mb); { TimeMonitor t(*matvec); owned_jac->Apply(*owned_x, *owned_f); } ++iter; } //owned_jac->Print(std::cout); //overlapped_Jv->Print(std::cout); //owned_Jv->Print(std::cout); // NOTE: in the future we can set up the nonlinear solver below to // do Jacobian-Free Newton-Krylov solves to test the matvec /* // ********************************************************* // * Solve the system // ********************************************************* // Set initial guess owned_x->PutScalar(1.0); // Evaluate Residual { TimeMonitor t(*residual_eval_time); overlapped_x->Import(*owned_x, *importer, Insert); owned_f->PutScalar(0.0); overlapped_f->PutScalar(0.0); for (std::size_t i = 0; i < worksets.size(); ++i) fm.evaluateFields<MyTraits::Residual>(worksets[i]); owned_f->Export(*overlapped_f, *exporter, Add); applyBoundaryConditions(1.0, *owned_x, *owned_jac, *owned_f, mb); } if (print_debug_info) { printVector("x_owned", *owned_x, -1); printVector("f_owned", *owned_f, -1); } // Newton Loop bool converged = false; std::size_t num_newton_steps = 0; std::size_t num_gmres_iterations = 0; checkConvergence(comm->MyPID(), num_newton_steps, *owned_f, *owned_delta_x, converged); while (!converged && num_newton_steps < 20) { TimeMonitor t(*nonlinear_solve_time); // Evaluate Residual and Jacobian { TimeMonitor t(*jacobian_eval_time); overlapped_x->Import(*owned_x, *importer, Insert); owned_f->PutScalar(0.0); overlapped_f->PutScalar(0.0); owned_jac->PutScalar(0.0); overlapped_jac->PutScalar(0.0); for (std::size_t i = 0; i < worksets.size(); ++i) fm.evaluateFields<MyTraits::Jacobian>(worksets[i]); owned_f->Export(*overlapped_f, *exporter, Add); owned_jac->Export(*overlapped_jac, *exporter, Add); applyBoundaryConditions(1.0, *owned_x, *owned_jac, *owned_f, mb); } if (print_debug_info) { printVector("x_owned", *owned_x, num_newton_steps); printVector("x_overlapped", *overlapped_x, num_newton_steps); printVector("f_owned", *owned_f, num_newton_steps); printMatrix("jacobian_owned", *owned_jac, num_newton_steps); } owned_f->Scale(-1.0); // Solve linear problem { TimeMonitor t(*linear_solve_time); owned_delta_x->PutScalar(0.0); { TimeMonitor tp(*preconditioner_solve_time); if (use_ml) ml_prec->ReComputePreconditioner(); else IFPACK_CHK_ERR(ifpack_prec->Compute()); } problem->setProblem(); Belos::ReturnType ret = gmres_solver->solve(); int num_iters = gmres_solver->getNumIters(); num_gmres_iterations += num_iters; //if (print_debug_info) if (comm->MyPID() == 0) std::cout << "Number of gmres iterations performed for this solve: " << num_iters << std::endl; if (ret!=Belos::Converged && comm->MyPID() == 0) { std::cout << std::endl << "WARNING: Belos did not converge!" << std::endl; } } owned_x->Update(1.0, *owned_delta_x, 1.0); { // Evaluate Residual Only TimeMonitor t(*residual_eval_time); overlapped_x->Import(*owned_x, *importer, Insert); owned_f->PutScalar(0.0); overlapped_f->PutScalar(0.0); for (std::size_t i = 0; i < worksets.size(); ++i) fm.evaluateFields<MyTraits::Residual>(worksets[i]); owned_f->Export(*overlapped_f, *exporter, Add); applyBoundaryConditions(1.0, *owned_x, *owned_jac, *owned_f, mb); } num_newton_steps += 1; checkConvergence(comm->MyPID(), num_newton_steps, *owned_f, *owned_delta_x, converged); } if (print_debug_info) printVector("f_owned", *owned_f, num_newton_steps); if (comm->MyPID() == 0) { if (converged) cout << "\n\nNewton Iteration Converged!\n" << endl; else cout << "\n\nNewton Iteration Failed to Converge!\n" << endl; } RCP<Time> file_io = TimeMonitor::getNewTimer("File IO"); { TimeMonitor t(*file_io); // Create a list of node coordinates std::map<int, std::vector<double> > coordinates; Teuchos::RCP< std::vector<Element_Linear2D> > cells = mb.myElements(); for (std::vector<Element_Linear2D>::iterator cell = cells->begin(); cell != cells->end(); ++cell) { const shards::Array<double,shards::NaturalOrder,Node,Dim>& coords = cell->nodeCoordinates(); for (int node=0; node < cell->numNodes(); ++node) { coordinates[cell->globalNodeId(node)].resize(dim); coordinates[cell->globalNodeId(node)][0] = coords(node,0); coordinates[cell->globalNodeId(node)][1] = coords(node,1); } } { std::vector< RCP<ofstream> > files; for (std::size_t eq = 0; eq < num_eq; ++eq) { std::stringstream ost; ost << "upper_DOF" << eq << "_PID" << comm->MyPID() << ".dat"; files.push_back( rcp(new std::ofstream(ost.str().c_str()), ios::out | ios::trunc) ); files[eq]->precision(10); } const std::vector<int>& node_list = mb.topNodeSetGlobalIds(); for (std::size_t node = 0; node < node_list.size(); ++node) { int lid = owned_x->Map().LID(node_list[node] * num_eq); for (std::size_t eq = 0; eq < num_eq; ++eq) { int dof_index = lid + eq; *(files[eq]) << coordinates[node_list[node]][0] << " " << (*owned_x)[dof_index] << endl; } } } { std::vector< RCP<ofstream> > files; for (std::size_t eq = 0; eq < num_eq; ++eq) { std::stringstream ost; ost << "lower_DOF" << eq << "_PID" << comm->MyPID() << ".dat"; files.push_back( rcp(new std::ofstream(ost.str().c_str()), ios::out | ios::trunc) ); files[eq]->precision(10); } const std::vector<int>& node_list = mb.bottomNodeSetGlobalIds(); for (std::size_t node = 0; node < node_list.size(); ++node) { int lid = owned_x->Map().LID(node_list[node] * num_eq); for (std::size_t eq = 0; eq < num_eq; ++eq) { int dof_index = lid + eq; *(files[eq]) << coordinates[node_list[node]][0] << " " << (*owned_x)[dof_index] << endl; } } } } TEUCHOS_TEST_FOR_EXCEPTION(!converged, std::runtime_error, "Problem failed to converge!"); TEUCHOS_TEST_FOR_EXCEPTION(num_newton_steps != 10, std::runtime_error, "Incorrect number of Newton steps!"); // Only check num gmres steps in serial #ifndef HAVE_MPI TEUCHOS_TEST_FOR_EXCEPTION(num_gmres_iterations != 10, std::runtime_error, "Incorrect number of GMRES iterations!"); #endif */ // ********************************************************************* // Finished all testing // ********************************************************************* if (comm->MyPID() == 0) std::cout << "\nRun has completed successfully!\n" << std::endl; // ********************************************************************* // ********************************************************************* } catch (const std::exception& e) { std::cout << "************************************************" << endl; std::cout << "************************************************" << endl; std::cout << "Exception Caught!" << endl; std::cout << "Error message is below\n " << e.what() << endl; std::cout << "************************************************" << endl; } catch (...) { std::cout << "************************************************" << endl; std::cout << "************************************************" << endl; std::cout << "Unknown Exception Caught!" << endl; std::cout << "************************************************" << endl; } TimeMonitor::summarize(); #ifdef HAVE_MPI MPI_Finalize(); #endif return 0; }
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int main(int argc, char *argv[]) { using namespace std; using namespace Teuchos; using namespace PHX; GlobalMPISession mpi_session(&argc, &argv); try { PHX::InitializeKokkosDevice(); RCP<Time> total_time = TimeMonitor::getNewTimer("Total Run Time"); TimeMonitor tm(*total_time); bool print_debug_info = false; { cout << "\nStarting MultiDimensionalArray EnergyFlux Example!" << endl; // Assume we have 102 cells on processor and can fit 20 cells in cache typedef PHX::Device::size_type size_type; const size_type num_local_cells = 102; const size_type workset_size = 20; RCP<DataLayout> qp_scalar = rcp(new MDALayout<Cell,Node>(workset_size,4)); RCP<DataLayout> node_scalar = rcp(new MDALayout<Cell,QuadPoint>(workset_size,4)); RCP<DataLayout> qp_vec = rcp(new MDALayout<Cell,Node,Dim>(workset_size,4,3)); RCP<DataLayout> node_vec = rcp(new MDALayout<Cell,QuadPoint,Dim>(workset_size,4,3)); RCP<DataLayout> qp_mat = rcp(new MDALayout<Cell,Node,Dim,Dim>(workset_size,4,3,3)); RCP<DataLayout> node_mat = rcp(new MDALayout<Cell,QuadPoint,Dim,Dim>(workset_size,4,3,3)); // Parser will build parameter list that determines the field // evaluators to build map<string, RCP<ParameterList> > evaluators_to_build; { // Temperature RCP<ParameterList> p = rcp(new ParameterList); int type = MyFactoryTraits<MyTraits>::id_constant; p->set<int>("Type", type); p->set<string>("Name", "Temperature"); p->set<double>("Value", 2.0); p->set< RCP<DataLayout> >("Data Layout", node_scalar); evaluators_to_build["DOF_Temperature"] = p; } { // Density RCP<ParameterList> p = rcp(new ParameterList); int type = MyFactoryTraits<MyTraits>::id_density; p->set<int>("Type", type); p->set< RCP<DataLayout> >("Data Layout", qp_scalar); evaluators_to_build["Density"] = p; } { // Constant Thermal Conductivity RCP<ParameterList> p = rcp(new ParameterList); int type = MyFactoryTraits<MyTraits>::id_constant; p->set<int>("Type", type); p->set<string>("Name", "Thermal Conductivity"); p->set<double>("Value", 2.0); p->set< RCP<DataLayout> >("Data Layout", qp_scalar); evaluators_to_build["Thermal Conductivity"] = p; } { // Nonlinear Source RCP<ParameterList> p = rcp(new ParameterList); int type = MyFactoryTraits<MyTraits>::id_nonlinearsource; p->set<int>("Type", type); p->set< RCP<DataLayout> >("Data Layout", qp_scalar); evaluators_to_build["Nonlinear Source"] = p; } { // Fourier Energy Flux RCP<ParameterList> p = rcp(new ParameterList); int type = MyFactoryTraits<MyTraits>::id_fourier; p->set<int>("Type", type); p->set< RCP<DataLayout> >("Scalar Data Layout", qp_scalar); p->set< RCP<DataLayout> >("Vector Data Layout", qp_vec); evaluators_to_build["Energy Flux"] = p; } { // FE Interpolation RCP<ParameterList> p = rcp(new ParameterList); int type = MyFactoryTraits<MyTraits>::id_feinterpolation; p->set<int>("Type", type); p->set<string>("Node Variable Name", "Temperature"); p->set<string>("QP Variable Name", "Temperature"); p->set<string>("Gradient QP Variable Name", "Temperature Gradient"); p->set< RCP<DataLayout> >("Node Data Layout", node_scalar); p->set< RCP<DataLayout> >("QP Scalar Data Layout", qp_scalar); p->set< RCP<DataLayout> >("QP Vector Data Layout", qp_vec); evaluators_to_build["FE Interpolation"] = p; } // Build Field Evaluators for each evaluation type EvaluatorFactory<MyTraits,MyFactoryTraits<MyTraits> > factory; RCP< vector< RCP<Evaluator_TemplateManager<MyTraits> > > > evaluators; evaluators = factory.buildEvaluators(evaluators_to_build); // Create a FieldManager FieldManager<MyTraits> fm; // Register all Evaluators registerEvaluators(evaluators, fm); // Request quantities to assemble RESIDUAL PDE operators typedef MyTraits::Residual::ScalarT ResScalarT; Tag<ResScalarT> energy_flux_tag("Energy_Flux", qp_vec); fm.requireField<MyTraits::Residual>(energy_flux_tag); Tag<ResScalarT> source_tag("Nonlinear Source", qp_scalar); fm.requireField<MyTraits::Residual>(source_tag); // Request quantities to assemble JACOBIAN PDE operators typedef MyTraits::Jacobian::ScalarT JacScalarT; Tag<JacScalarT> j_energy_flux_tag("Energy_Flux", qp_vec); fm.requireField<MyTraits::Jacobian>(j_energy_flux_tag); Tag<JacScalarT> j_source_tag("Nonlinear Source", qp_scalar); fm.requireField<MyTraits::Jacobian>(j_source_tag); // For Kokkos extended types (Sacado FAD) set derivtive array // size std::vector<PHX::index_size_type> derivative_dimensions; derivative_dimensions.push_back(8); fm.setKokkosExtendedDataTypeDimensions<MyTraits::Jacobian>(derivative_dimensions); RCP<Time> registration_time = TimeMonitor::getNewTimer("Post Registration Setup Time"); { TimeMonitor t(*registration_time); fm.postRegistrationSetup(NULL); } cout << fm << endl; fm.writeGraphvizFile<MyTraits::Residual>("graph_residual.dot"); fm.writeGraphvizFile<MyTraits::Jacobian>("graph_jacobian.dot"); fm.writeGraphvizFile("all_graph", ".dot"); // Create Workset information: Cells and EvalData objects std::vector<MyCell> cells(num_local_cells); for (std::size_t i = 0; i < cells.size(); ++i) cells[i].setLocalIndex(i); std::vector<MyWorkset> worksets; std::vector<MyCell>::iterator cell_it = cells.begin(); std::size_t count = 0; MyWorkset w; w.local_offset = cell_it->localIndex(); w.begin = cell_it; for (; cell_it != cells.end(); ++cell_it) { ++count; std::vector<MyCell>::iterator next = cell_it; ++next; if ( count == workset_size || next == cells.end()) { w.end = next; w.num_cells = count; worksets.push_back(w); count = 0; if (next != cells.end()) { w.local_offset = next->localIndex(); w.begin = next; } } } if (print_debug_info) { for (std::size_t i = 0; i < worksets.size(); ++i) { cout << "worksets[" << i << "]" << endl; cout << " num_cells =" << worksets[i].num_cells << endl; cout << " local_offset =" << worksets[i].local_offset << endl; std::vector<MyCell>::iterator it = worksets[i].begin; for (; it != worksets[i].end; ++it) cout << " cell_local_index =" << it->localIndex() << endl; } cout << endl; } // Create local vectors to hold flux and source at quad points std::vector<double> local_energy_flux_at_qp(num_local_cells * qp_vec->size()); std::vector<double> local_source_at_qp(num_local_cells * qp_scalar->size()); // Fields we require MDField<double,Cell,QuadPoint,Dim> energy_flux(energy_flux_tag); MDField<double,Cell,QuadPoint> source(source_tag); fm.getFieldData<double,MyTraits::Residual,Cell,QuadPoint,Dim>(energy_flux); fm.getFieldData<double,MyTraits::Residual,Cell,QuadPoint>(source); // Get host arrays to copy from device back to host auto host_energy_flux = Kokkos::create_mirror_view(energy_flux.get_kokkos_view()); auto host_source = Kokkos::create_mirror_view(source.get_kokkos_view()); RCP<Time> eval_time = TimeMonitor::getNewTimer("Evaluation Time"); fm.preEvaluate<MyTraits::Residual>(NULL); { TimeMonitor t(*eval_time); for (std::size_t i = 0; i < worksets.size(); ++i) { #ifdef PHX_ENABLE_KOKKOS_AMT fm.evaluateFieldsTaskParallel<MyTraits::Residual>(1,worksets[i]); #else fm.evaluateFields<MyTraits::Residual>(worksets[i]); #endif // Use values: in this example, move values into local host arrays Kokkos::deep_copy(host_energy_flux,energy_flux.get_kokkos_view()); Kokkos::deep_copy(host_source,source.get_kokkos_view()); for (size_type cell = 0; cell < energy_flux.dimension(0); ++cell) { for (size_type ip = 0; ip < energy_flux.dimension(1); ++ip) { for (size_type dim = 0; dim < energy_flux.dimension(2); ++dim) { std::size_t index = cell * energy_flux.dimension(0) + ip * energy_flux.dimension(1) + dim; local_energy_flux_at_qp[index] = host_energy_flux(cell,ip,dim); } } } for (size_type cell = 0; cell < energy_flux.dimension(0); ++cell) { for (size_type ip = 0; ip < energy_flux.dimension(1); ++ip) { std::size_t index = cell * energy_flux.dimension(0) + ip; local_source_at_qp[index] = host_source(cell,ip); } } } } fm.postEvaluate<MyTraits::Residual>(NULL); // Test data retrieval cout << "Testing data members" << endl; Tag<double> d_var("Density", qp_scalar); MDField<double> den(d_var); fm.getFieldData<double,MyTraits::Residual>(den); cout << "size of density = " << den.size() << ", should be " << d_var.dataLayout().size() << "." << endl; TEUCHOS_TEST_FOR_EXCEPTION(den.size() != d_var.dataLayout().size(), std::runtime_error, "Returned arrays are not sized correctly!"); cout << endl; //Jacobian: std::vector<JacScalarT> j_local_energy_flux_at_qp(num_local_cells * qp_vec->size()); std::vector<JacScalarT> j_local_source_at_qp(num_local_cells * qp_scalar->size()); // Fields we require MDField<JacScalarT,Cell,QuadPoint,Dim> j_energy_flux(j_energy_flux_tag); MDField<JacScalarT,Cell,QuadPoint> j_source(j_source_tag); fm.getFieldData<JacScalarT,MyTraits::Jacobian,Cell,QuadPoint,Dim>(j_energy_flux); fm.getFieldData<JacScalarT,MyTraits::Jacobian,Cell,QuadPoint>(j_source); RCP<Time> eval_time2 = TimeMonitor::getNewTimer("Evaluation Time Jacobian"); auto host_j_energy_flux = Kokkos::create_mirror_view(j_energy_flux.get_kokkos_view()); auto host_j_source = Kokkos::create_mirror_view(j_source.get_kokkos_view()); fm.preEvaluate<MyTraits::Jacobian>(NULL); { TimeMonitor t(*eval_time2); for (std::size_t i = 0; i < worksets.size(); ++i) { #ifdef PHX_ENABLE_KOKKOS_AMT fm.evaluateFieldsTaskParallel<MyTraits::Jacobian>(1,worksets[i]); #else fm.evaluateFields<MyTraits::Jacobian>(worksets[i]); #endif Kokkos::deep_copy(host_j_energy_flux,j_energy_flux.get_kokkos_view()); Kokkos::deep_copy(host_j_source,j_source.get_kokkos_view()); for (size_type cell = 0; cell < j_energy_flux.dimension(0); ++cell) { for (size_type ip = 0; ip < j_energy_flux.dimension(1); ++ip) { for (size_type dim = 0; dim < j_energy_flux.dimension(2); ++dim) { std::size_t index = cell * j_energy_flux.dimension(0) + ip * j_energy_flux.dimension(1) + dim; j_local_energy_flux_at_qp[index] = host_j_energy_flux(cell,ip,dim); } } } for (size_type cell = 0; cell < j_energy_flux.dimension(0); ++cell) { for (size_type ip = 0; ip < j_energy_flux.dimension(1); ++ip) { std::size_t index = cell * j_energy_flux.dimension(0) + ip; j_local_source_at_qp[index] = host_j_source(cell,ip); } } } } fm.postEvaluate<MyTraits::Jacobian>(NULL); double scalability = 0.0; double parallelizability = 1.0; fm.analyzeGraph<MyTraits::Residual>(scalability,parallelizability); std::cout << "Residual Scalability = " << scalability << std::endl; std::cout << "Residual Parallelizability = " << parallelizability << std::endl; fm.analyzeGraph<MyTraits::Jacobian>(scalability,parallelizability); std::cout << "Residual Scalability = " << scalability << std::endl; std::cout << "Residual Parallelizability = " << parallelizability << std::endl; } // ********************************************************************* // Finished all testing // ********************************************************************* std::cout << "\nRun has completed successfully!\n" << std::endl; // ********************************************************************* // ********************************************************************* PHX::FinalizeKokkosDevice(); } catch (const std::exception& e) { std::cout << "************************************************" << endl; std::cout << "************************************************" << endl; std::cout << "Exception Caught!" << endl; std::cout << "Error message is below\n " << e.what() << endl; std::cout << "************************************************" << endl; } catch (...) { std::cout << "************************************************" << endl; std::cout << "************************************************" << endl; std::cout << "Unknown Exception Caught!" << endl; std::cout << "************************************************" << endl; } TimeMonitor::summarize(); return 0; }