int main(int argc, char * argv[]) { typedef MPI::HGeometryForest<DIM,DOW> forest_t; typedef MPI::BirdView<forest_t> ir_mesh_t; typedef FEMSpace<double,DIM,DOW> fe_space_t; typedef MPI::DOF::GlobalIndex<forest_t, fe_space_t> global_index_t; MPI_Init(&argc, &argv); forest_t forest(MPI_COMM_WORLD); forest.readMesh(argv[1]); ir_mesh_t ir_mesh(forest); int round = 0; if (argc >= 3) round = atoi(argv[2]); ir_mesh.globalRefine(round); ir_mesh.semiregularize(); ir_mesh.regularize(false); TemplateGeometry<DIM> tri; tri.readData("triangle.tmp_geo"); CoordTransform<DIM,DIM> tri_ct; tri_ct.readData("triangle.crd_trs"); TemplateDOF<DIM> tri_td(tri); tri_td.readData("triangle.1.tmp_dof"); BasisFunctionAdmin<double,DIM,DIM> tri_bf(tri_td); tri_bf.readData("triangle.1.bas_fun"); std::vector<TemplateElement<double,DIM,DIM> > tmp_ele(1); tmp_ele[0].reinit(tri, tri_td, tri_ct, tri_bf); RegularMesh<DIM,DOW>& mesh = ir_mesh.regularMesh(); fe_space_t fem_space(mesh, tmp_ele); u_int n_ele = mesh.n_geometry(DIM); fem_space.element().resize(n_ele); for (int i = 0;i < n_ele;i ++) { fem_space.element(i).reinit(fem_space, i, 0); } fem_space.buildElement(); fem_space.buildDof(); fem_space.buildDofBoundaryMark(); global_index_t global_index(forest, fem_space); global_index.build(); MPI_Finalize(); return 0; }
void SDirichlet<PHAL::AlbanyTraits::Jacobian, Traits>::evaluateFields( typename Traits::EvalData dirichlet_workset) { // NOTE: you may be tempted to const_cast away the const here. However, // consider the case where x is a Thyra::TpetraVector object. The // actual Tpetra_Vector is stored as a Teuchos::ConstNonconstObjectContainer, // which (most likely) happens to be created from a const RCP, and therefore // when calling getTpetraVector (from Thyra::TpetraVector), the container // will throw. // Instead, keep the const correctness until the very last moment. Teuchos::RCP<const Thyra_Vector> x = dirichlet_workset.x; Teuchos::RCP<Thyra_Vector> f = dirichlet_workset.f; // TODO: abstract away the tpetra interface Teuchos::RCP<Tpetra_CrsMatrix> J = Albany::getTpetraMatrix(dirichlet_workset.Jac); auto row_map = J->getRowMap(); auto col_map = J->getColMap(); // we make this assumption, which lets us use both local row and column // indices into a single is_dbc vector ALBANY_ASSERT(col_map->isLocallyFitted(*row_map)); auto& ns_nodes = dirichlet_workset.nodeSets->find(this->nodeSetID)->second; bool const fill_residual = f != Teuchos::null; auto f_view = fill_residual ? Albany::getNonconstLocalData(f) : Teuchos::null; auto x_view = fill_residual ? Teuchos::arcp_const_cast<ST>(Albany::getLocalData(x)) : Teuchos::null; Teuchos::Array<Tpetra_GO> global_index(1); Teuchos::Array<LO> index(1); Teuchos::Array<ST> entry(1); Teuchos::Array<ST> entries; Teuchos::Array<LO> indices; using IntVec = Tpetra::Vector<int, Tpetra_LO, Tpetra_GO, KokkosNode>; using Import = Tpetra::Import<Tpetra_LO, Tpetra_GO, KokkosNode>; Teuchos::RCP<const Import> import; auto domain_map = row_map; // we are assuming this! // in theory we should use the importer from the CRS graph, although // I saw a segfault in one of the tests when doing this... // if (J->getCrsGraph()->isFillComplete()) { // import = J->getCrsGraph()->getImporter(); //} else { // this construction is expensive! import = Teuchos::rcp(new Import(domain_map, col_map)); //} IntVec row_is_dbc(row_map); IntVec col_is_dbc(col_map); int const spatial_dimension = dirichlet_workset.spatial_dimension_; #if defined(ALBANY_LCM) auto const& fixed_dofs = dirichlet_workset.fixed_dofs_; #endif row_is_dbc.template modify<Kokkos::HostSpace>(); { auto row_is_dbc_data = row_is_dbc.template getLocalView<Kokkos::HostSpace>(); ALBANY_ASSERT(row_is_dbc_data.extent(1) == 1); #if defined(ALBANY_LCM) if (dirichlet_workset.is_schwarz_bc_ == false) { // regular SDBC #endif for (size_t ns_node = 0; ns_node < ns_nodes.size(); ns_node++) { auto dof = ns_nodes[ns_node][this->offset]; row_is_dbc_data(dof, 0) = 1; } #if defined(ALBANY_LCM) } else { // special case for Schwarz SDBC for (size_t ns_node = 0; ns_node < ns_nodes.size(); ns_node++) { for (int offset = 0; offset < spatial_dimension; ++offset) { auto dof = ns_nodes[ns_node][offset]; // If this DOF already has a DBC, skip it. if (fixed_dofs.find(dof) != fixed_dofs.end()) continue; row_is_dbc_data(dof, 0) = 1; } } } #endif } col_is_dbc.doImport(row_is_dbc, *import, Tpetra::ADD); auto col_is_dbc_data = col_is_dbc.template getLocalView<Kokkos::HostSpace>(); size_t const num_local_rows = J->getNodeNumRows(); auto min_local_row = row_map->getMinLocalIndex(); auto max_local_row = row_map->getMaxLocalIndex(); for (auto local_row = min_local_row; local_row <= max_local_row; ++local_row) { auto num_row_entries = J->getNumEntriesInLocalRow(local_row); entries.resize(num_row_entries); indices.resize(num_row_entries); J->getLocalRowCopy(local_row, indices(), entries(), num_row_entries); auto row_is_dbc = col_is_dbc_data(local_row, 0) > 0; if (row_is_dbc && fill_residual == true) { f_view[local_row] = 0.0; x_view[local_row] = this->value.val(); } for (size_t row_entry = 0; row_entry < num_row_entries; ++row_entry) { auto local_col = indices[row_entry]; auto is_diagonal_entry = local_col == local_row; //IKT, 4/5/18: scale diagonal entries by provided scaling if (is_diagonal_entry && row_is_dbc) { entries[row_entry] *= scale; } if (is_diagonal_entry) continue; ALBANY_ASSERT(local_col >= col_map->getMinLocalIndex()); ALBANY_ASSERT(local_col <= col_map->getMaxLocalIndex()); auto col_is_dbc = col_is_dbc_data(local_col, 0) > 0; if (row_is_dbc || col_is_dbc) { entries[row_entry] = 0.0; } } J->replaceLocalValues(local_row, indices(), entries()); } return; }
int main(int argc, char * argv[]) { typedef MPI::HGeometryForest<DIM,DOW> forest_t; typedef MPI::BirdView<forest_t> ir_mesh_t; typedef FEMSpace<double,DIM,DOW> fe_space_t; typedef MPI::DOF::GlobalIndex<forest_t, fe_space_t> global_index_t; MPI_Init(&argc, &argv); forest_t forest(MPI_COMM_WORLD); ir_mesh_t ir_mesh; MPI::load_mesh(argv[1], forest, ir_mesh); /// 从一个目录中读入网格数据 int round = 0; if (argc >= 3) round = atoi(argv[2]); ir_mesh.globalRefine(round); ir_mesh.semiregularize(); ir_mesh.regularize(false); TemplateGeometry<DIM> tri; tri.readData("triangle.tmp_geo"); CoordTransform<DIM,DIM> tri_ct; tri_ct.readData("triangle.crd_trs"); TemplateDOF<DIM> tri_td(tri); tri_td.readData("triangle.1.tmp_dof"); BasisFunctionAdmin<double,DIM,DIM> tri_bf(tri_td); tri_bf.readData("triangle.1.bas_fun"); std::vector<TemplateElement<double,DIM,DIM> > tmp_ele(1); tmp_ele[0].reinit(tri, tri_td, tri_ct, tri_bf); RegularMesh<DIM,DOW>& mesh = ir_mesh.regularMesh(); fe_space_t fem_space(mesh, tmp_ele); u_int n_ele = mesh.n_geometry(DIM); fem_space.element().resize(n_ele); for (int i = 0;i < n_ele;i ++) { fem_space.element(i).reinit(fem_space, i, 0); } fem_space.buildElement(); fem_space.buildDof(); fem_space.buildDofBoundaryMark(); std::cout << "Building global indices ... " << std::flush; global_index_t global_index(forest, fem_space); global_index.build(); std::cout << "OK!" << std::endl; Epetra_MpiComm comm(forest.communicator()); Epetra_Map map(global_index.n_global_dof(), global_index.n_primary_dof(), 0, comm); global_index.build_epetra_map(map); /// 构造 Epetra 的分布式稀疏矩阵模板 std::cout << "Build sparsity pattern ... " << std::flush; Epetra_FECrsGraph G(Copy, map, 10); fe_space_t::ElementIterator the_ele = fem_space.beginElement(), end_ele = fem_space.endElement(); for (;the_ele != end_ele;++ the_ele) { const std::vector<int>& ele_dof = the_ele->dof(); u_int n_ele_dof = ele_dof.size(); /** * 建立从局部自由度数组到全局自由度数组的映射表,这是实现分布式并行 * 状态下的数据结构的关键一步。 */ std::vector<int> indices(n_ele_dof); for (u_int i = 0;i < n_ele_dof;++ i) { indices[i] = global_index(ele_dof[i]); } G.InsertGlobalIndices(n_ele_dof, &indices[0], n_ele_dof, &indices[0]); } G.GlobalAssemble(); std::cout << "OK!" << std::endl; /// 准备构造 Epetra 的分布式稀疏矩阵和计算分布式右端项 std::cout << "Build sparse matrix ... " << std::flush; Epetra_FECrsMatrix A(Copy, G); Epetra_FEVector b(map); the_ele = fem_space.beginElement(); for (;the_ele != end_ele;++ the_ele) { double vol = the_ele->templateElement().volume(); const QuadratureInfo<DIM>& qi = the_ele->findQuadratureInfo(5); std::vector<Point<DIM> > q_pnt = the_ele->local_to_global(qi.quadraturePoint()); int n_q_pnt = qi.n_quadraturePoint(); std::vector<double> jac = the_ele->local_to_global_jacobian(qi.quadraturePoint()); std::vector<std::vector<double> > bas_val = the_ele->basis_function_value(q_pnt); std::vector<std::vector<std::vector<double> > > bas_grad = the_ele->basis_function_gradient(q_pnt); const std::vector<int>& ele_dof = the_ele->dof(); u_int n_ele_dof = ele_dof.size(); FullMatrix<double> ele_mat(n_ele_dof, n_ele_dof); Vector<double> ele_rhs(n_ele_dof); for (u_int l = 0;l < n_q_pnt;++ l) { double JxW = vol*jac[l]*qi.weight(l); double f_val = _f_(q_pnt[l]); for (u_int i = 0;i < n_ele_dof;++ i) { for (u_int j = 0;j < n_ele_dof;++ j) { ele_mat(i, j) += JxW*(bas_val[i][l]*bas_val[j][l] + innerProduct(bas_grad[i][l], bas_grad[j][l])); } ele_rhs(i) += JxW*f_val*bas_val[i][l]; } } /** * 此处将单元矩阵和单元载荷先计算好,然后向全局的矩阵和载荷向量上 * 集中,可以提高效率。 */ std::vector<int> indices(n_ele_dof); for (u_int i = 0;i < n_ele_dof;++ i) { indices[i] = global_index(ele_dof[i]); } A.SumIntoGlobalValues(n_ele_dof, &indices[0], n_ele_dof, &indices[0], &ele_mat(0,0)); b.SumIntoGlobalValues(n_ele_dof, &indices[0], &ele_rhs(0)); } A.GlobalAssemble(); b.GlobalAssemble(); std::cout << "OK!" << std::endl; /// 准备解向量。 Epetra_Vector x(map); /// 调用 AztecOO 的求解器。 std::cout << "Solving the linear system ..." << std::flush; Epetra_LinearProblem problem(&A, &x, &b); AztecOO solver(problem); ML_Epetra::MultiLevelPreconditioner precond(A, true); solver.SetPrecOperator(&precond); solver.SetAztecOption(AZ_solver, AZ_cg); solver.SetAztecOption(AZ_output, 100); solver.Iterate(5000, 1.0e-12); std::cout << "OK!" << std::endl; Epetra_Map fe_map(-1, global_index.n_local_dof(), &global_index(0), 0, comm); FEMFunction<double,DIM> u_h(fem_space); Epetra_Import importer(fe_map, map); Epetra_Vector X(View, fe_map, &u_h(0)); X.Import(x, importer, Add); char filename[1024]; sprintf(filename, "u_h%d.dx", forest.rank()); u_h.writeOpenDXData(filename); MPI_Finalize(); return 0; }
void PartitionerMetis<MeshType>::partitionImpl ( mesh_ptrtype mesh, rank_type np ) { LOG(INFO) << "PartitionerMetis::partitionImpl starts..."; tic(); // Check for an easy return if (np == 1) { this->singlePartition (mesh); return; } const dof_id_type n_elems = mesh->numElements(); // build the graph // std::vector<Metis::idx_t> options(5); std::vector<Metis::idx_t> vwgt(n_elems); std::vector<Metis::idx_t> part(n_elems); // number of "nodes" (elements) in the graph Metis::idx_t n = static_cast<Metis::idx_t>(n_elems); // number of subdomains to create Metis::idx_t nparts = static_cast<Metis::idx_t>(np); // number of edges cut by the resulting partition Metis::idx_t edgecut = 0; std::map<dof_id_type, dof_id_type> global_index_map; { std::vector<dof_id_type> global_index(nelements(elements(mesh)),0); std::iota( global_index.begin(), global_index.end(), 0 ); size_type cnt = 0; for( auto const& elt : elements(mesh) ) { global_index_map.insert (std::make_pair(elt.id(), global_index[cnt++])); } } // Invoke METIS, but only on processor 0. // Then broadcast the resulting decomposition if ( Environment::isMasterRank() ) { CSRGraphMetis<Metis::idx_t> csr_graph; csr_graph.offsets.resize(mesh->numElements()+1, 0); // Local scope for these { #ifndef NDEBUG std::size_t graph_size=0; #endif // build the graph in CSR format. Note that // the edges in the graph will correspond to // face neighbors for( auto& elt: elements(mesh) ) { // (1) first pass - get the row sizes for each element by counting the number // of face neighbors. Also populate the vwght array if necessary const dof_id_type gid = global_index_map[elt.id()]; CHECK( gid < vwgt.size() ) << "Invalid gid " << gid << " greater or equal than " << vwgt.size(); // maybe there is a better weight? // The weight is used to define what a balanced graph is //if(!_weights) vwgt[gid] = elt.numPoints; //else //vwgt[gid] = static_cast<Metis::idx_t>((*_weights)[elem->id()]); unsigned int num_neighbors = 0; // Loop over the element's neighbors. An element // adjacency corresponds to a face neighbor for ( uint16_type ms=0; ms < elt.nNeighbors(); ms++ ) { element_type const* neighbor = NULL; size_type neighbor_id = elt.neighbor( ms ).first; if ( neighbor_id != invalid_size_type_value ) { num_neighbors++; } } std::cout << "element id " << elt.id() << " gid: " << gid << " w: " << vwgt[gid] << " neigh: " << num_neighbors << std::endl; csr_graph.prepareNumberNonZeros(gid, num_neighbors); #ifndef NDEBUG graph_size += num_neighbors; #endif } csr_graph.prepareForUse(); // (2) second pass - fill the compressed adjacency array for( auto& elt : elements(mesh) ) { dof_id_type gid = global_index_map[elt.id()]; unsigned int connection=0; // Loop over the element's neighbors. An element // adjacency corresponds to a face neighbor for ( uint16_type ms=0; ms < elt.nNeighbors(); ms++ ) { element_type const* neighbor = NULL; size_type neighbor_id = elt.neighbor( ms ).first; if ( neighbor_id != invalid_size_type_value ) { csr_graph(gid, connection++) = global_index_map[neighbor_id]; } } } #ifndef NDEBUG // We create a non-empty vals for a disconnected graph, to // work around a segfault from METIS. DCHECK( csr_graph.vals.size() == std::max(graph_size,std::size_t(1))) << "Invalid graph"; #endif } // done building the graph Metis::idx_t ncon = 1; // Select which type of partitioning to create // Use recursive if the number of partitions is less than or equal to 8 if (np <= 8) Metis::METIS_PartGraphRecursive(&n, &ncon, &csr_graph.offsets[0], &csr_graph.vals[0], &vwgt[0], NULL, NULL, &nparts, NULL, NULL, NULL, &edgecut, &part[0]); // Otherwise use kway else Metis::METIS_PartGraphKway(&n, &ncon, &csr_graph.offsets[0], &csr_graph.vals[0], &vwgt[0], NULL, NULL, &nparts, NULL, NULL, NULL, &edgecut, &part[0]); } // end processor 0 part // Assign the returned processor ids. The part array contains the processor // id for each element, but in terms of the contiguous indexing we defined // above LOG(INFO) << "PartitionerMetis::partitionImpl nelements : " << nelements(elements(mesh)); for( auto it = mesh->beginElement(), en = mesh->endElement(); it != en; ++it ) { dof_id_type gid = global_index_map[it->id()]; CHECK( gid < part.size() ) << "Invalid gid " << gid << " greater or equal than partition size " << part.size(); rank_type pid = static_cast<rank_type>(part[gid]); #if 0 mesh->elements().modify( it, [&pid]( element_type& e ) { e.setProcessId( pid ); std::cout << "element id " << e.id() << " process id " << e.processId() << "\n"; }); #else std::cout << "element id " << it->id() << " process id " << pid << "\n"; auto e = *it; e.setProcessId( pid ); mesh->elements().replace( it, e ); #endif } for( auto& e : allelements(mesh) ) { std::cout << "2. element id " << e.id() << " process id " << e.processId() << "\n"; } auto t = toc("PartitionerMetis::partitionImpl", FLAGS_v > 0 ); LOG(INFO) << "PartitionerMetis::partitionImpl done in " << t << "s"; }
int main(int argc, char * argv[]) { typedef MPI::HGeometryForest<DIM,DOW> forest_t; typedef MPI::BirdView<forest_t> ir_mesh_t; typedef FEMSpace<double,DIM,DOW> fe_space_t; typedef MPI::DOF::GlobalIndex<forest_t, fe_space_t> global_index_t; MPI_Init(&argc, &argv); forest_t forest(MPI_COMM_WORLD); ir_mesh_t ir_mesh; MPI::load_mesh(argv[1], forest, ir_mesh); /// 从一个目录中读入网格数据 int round = 0; if (argc >= 3) round = atoi(argv[2]); ir_mesh.globalRefine(round); ir_mesh.semiregularize(); ir_mesh.regularize(false); TemplateGeometry<DIM> tri; tri.readData("triangle.tmp_geo"); CoordTransform<DIM,DIM> tri_ct; tri_ct.readData("triangle.crd_trs"); TemplateDOF<DIM> tri_td(tri); tri_td.readData("triangle.1.tmp_dof"); BasisFunctionAdmin<double,DIM,DIM> tri_bf(tri_td); tri_bf.readData("triangle.1.bas_fun"); std::vector<TemplateElement<double,DIM,DIM> > tmp_ele(1); tmp_ele[0].reinit(tri, tri_td, tri_ct, tri_bf); RegularMesh<DIM,DOW>& mesh = ir_mesh.regularMesh(); fe_space_t fem_space(mesh, tmp_ele); u_int n_ele = mesh.n_geometry(DIM); fem_space.element().resize(n_ele); for (int i = 0;i < n_ele;i ++) { fem_space.element(i).reinit(fem_space, i, 0); } fem_space.buildElement(); fem_space.buildDof(); fem_space.buildDofBoundaryMark(); std::cout << "Building global indices ... " << std::flush; global_index_t global_index(forest, fem_space); global_index.build(); std::cout << "OK!" << std::endl; Epetra_MpiComm comm(forest.communicator()); Epetra_Map map(global_index.n_global_dof(), global_index.n_primary_dof(), 0, comm); global_index.build_epetra_map(map); /// 构造 Epetra 的分布式稀疏矩阵模板 std::cout << "Build sparsity pattern ... " << std::flush; Epetra_FECrsGraph G(Copy, map, 10); fe_space_t::ElementIterator the_ele = fem_space.beginElement(), end_ele = fem_space.endElement(); for (;the_ele != end_ele;++ the_ele) { const std::vector<int>& ele_dof = the_ele->dof(); u_int n_ele_dof = ele_dof.size(); /** * 建立从局部自由度数组到全局自由度数组的映射表,这是实现分布式并行 * 状态下的数据结构的关键一步。 */ std::vector<int> indices(n_ele_dof); for (u_int i = 0;i < n_ele_dof;++ i) { indices[i] = global_index(ele_dof[i]); } G.InsertGlobalIndices(n_ele_dof, &indices[0], n_ele_dof, &indices[0]); } G.GlobalAssemble(); std::cout << "OK!" << std::endl; /// 准备构造 Epetra 的分布式稀疏矩阵和计算分布式右端项 std::cout << "Build sparse matrix ... " << std::flush; Epetra_FECrsMatrix A(Copy, G); Epetra_FEVector b(map); the_ele = fem_space.beginElement(); for (;the_ele != end_ele;++ the_ele) { double vol = the_ele->templateElement().volume(); const QuadratureInfo<DIM>& qi = the_ele->findQuadratureInfo(5); std::vector<Point<DIM> > q_pnt = the_ele->local_to_global(qi.quadraturePoint()); int n_q_pnt = qi.n_quadraturePoint(); std::vector<double> jac = the_ele->local_to_global_jacobian(qi.quadraturePoint()); std::vector<std::vector<double> > bas_val = the_ele->basis_function_value(q_pnt); std::vector<std::vector<std::vector<double> > > bas_grad = the_ele->basis_function_gradient(q_pnt); const std::vector<int>& ele_dof = the_ele->dof(); u_int n_ele_dof = ele_dof.size(); FullMatrix<double> ele_mat(n_ele_dof, n_ele_dof); Vector<double> ele_rhs(n_ele_dof); for (u_int l = 0;l < n_q_pnt;++ l) { double JxW = vol*jac[l]*qi.weight(l); double f_val = _f_(q_pnt[l]); for (u_int i = 0;i < n_ele_dof;++ i) { for (u_int j = 0;j < n_ele_dof;++ j) { ele_mat(i, j) += JxW*(innerProduct(bas_grad[i][l], bas_grad[j][l])); } ele_rhs(i) += JxW*f_val*bas_val[i][l]; } } /** * 此处将单元矩阵和单元载荷先计算好,然后向全局的矩阵和载荷向量上 * 集中,可以提高效率。 */ std::vector<int> indices(n_ele_dof); for (u_int i = 0;i < n_ele_dof;++ i) { indices[i] = global_index(ele_dof[i]); } A.SumIntoGlobalValues(n_ele_dof, &indices[0], n_ele_dof, &indices[0], &ele_mat(0,0)); b.SumIntoGlobalValues(n_ele_dof, &indices[0], &ele_rhs(0)); } A.GlobalAssemble(); b.GlobalAssemble(); std::cout << "OK!" << std::endl; /// 准备解向量。 Epetra_FEVector x(map); /// 加上狄氏边值条件 u_int n_bnd_dof = 0; /// 首先清点边界上自由度的个数 for (u_int i = 0;i < fem_space.n_dof();++ i) { if (fem_space.dofBoundaryMark(i) > 0) { /// 如果不是在主几何体上就不做 if (! global_index.is_dof_on_primary_geometry(i)) continue; n_bnd_dof += 1; } } /// 准备空间存储边界上全局标号、自变量和右端项 std::vector<int> bnd_idx(n_bnd_dof); std::vector<double> x_entry(n_bnd_dof), rhs_entry(n_bnd_dof); /// 对自由度做循环 for (u_int i = 0, j = 0;i < fem_space.n_dof();++ i) { if (fem_space.dofBoundaryMark(i) > 0) { /// 边界上的自由度? /// 如果不是在主几何体上就不做 if (! global_index.is_dof_on_primary_geometry(i)) continue; const int& idx = global_index(i); /// 行的全局标号 bnd_idx[j] = idx; /// 修改矩阵 int lrid = A.LRID(idx); int row_nnz, *row_idx; double *row_entry, row_diag; A.ExtractMyRowView(lrid, row_nnz, row_entry, row_idx); /// 取出矩阵的行 for (int k = 0;k < row_nnz;++ k) { /// 对矩阵的行进行修改 if (A.LCID(row_idx[k]) != lrid) { /// 如果不是对角元 row_entry[k] = 0.0; /// 则将矩阵元素清零 } else { /// 而对角元保持不变 row_diag = row_entry[k]; /// 并记录下对角元 } } /// 计算并记下自变量和右端项,假设自由度值为插值量 double u_b_val = _u_b_(fem_space.dofInfo(i).interp_point); x_entry[j] = u_b_val; rhs_entry[j] = row_diag*u_b_val; j += 1; } } std::cout << "# DOF on the boundary: " << n_bnd_dof << std::endl; /// 修改解变量和右端项 x.ReplaceGlobalValues(n_bnd_dof, &bnd_idx[0], &x_entry[0]); b.ReplaceGlobalValues(n_bnd_dof, &bnd_idx[0], &rhs_entry[0]); /// 调用 AztecOO 的求解器。 std::cout << "Solving the linear system ..." << std::flush; Epetra_LinearProblem problem(&A, &x, &b); AztecOO solver(problem); ML_Epetra::MultiLevelPreconditioner precond(A, true); solver.SetPrecOperator(&precond); solver.SetAztecOption(AZ_solver, AZ_gmres); solver.SetAztecOption(AZ_output, 100); solver.Iterate(5000, 1.0e-12); std::cout << "OK!" << std::endl; Epetra_Map fe_map(-1, global_index.n_local_dof(), &global_index(0), 0, comm); FEMFunction<double,DIM> u_h(fem_space); Epetra_Import importer(fe_map, map); Epetra_Vector X(View, fe_map, &u_h(0)); X.Import(x, importer, Add); char filename[1024]; sprintf(filename, "u_h%d.dx", forest.rank()); u_h.writeOpenDXData(filename); MPI_Finalize(); return 0; }
int main(int argc, char * argv[]) { typedef MPI::HGeometryForest<DIM,DOW> forest_t; typedef MPI::BirdView<forest_t> ir_mesh_t; typedef FEMSpace<double,DIM,DOW> fe_space_t; typedef MPI::DOF::GlobalIndex<forest_t, fe_space_t> global_index_t; PetscInitialize(&argc, &argv, (char *)NULL, help); forest_t forest(PETSC_COMM_WORLD); forest.readMesh(argv[1]); ir_mesh_t ir_mesh(forest); int round = 0; if (argc >= 3) round = atoi(argv[2]); ir_mesh.globalRefine(round); ir_mesh.semiregularize(); ir_mesh.regularize(false); setenv("AFEPACK_TEMPLATE_PATH", "/usr/local/AFEPack/template/triangle", 1); TemplateGeometry<DIM> tri; tri.readData("triangle.tmp_geo"); CoordTransform<DIM,DIM> tri_ct; tri_ct.readData("triangle.crd_trs"); TemplateDOF<DIM> tri_td(tri); tri_td.readData("triangle.1.tmp_dof"); BasisFunctionAdmin<double,DIM,DIM> tri_bf(tri_td); tri_bf.readData("triangle.1.bas_fun"); std::vector<TemplateElement<double,DIM,DIM> > tmp_ele(1); tmp_ele[0].reinit(tri, tri_td, tri_ct, tri_bf); RegularMesh<DIM,DOW>& mesh = ir_mesh.regularMesh(); fe_space_t fem_space(mesh, tmp_ele); u_int n_ele = mesh.n_geometry(DIM); fem_space.element().resize(n_ele); for (int i = 0;i < n_ele;i ++) { fem_space.element(i).reinit(fem_space, i, 0); } fem_space.buildElement(); fem_space.buildDof(); fem_space.buildDofBoundaryMark(); std::cout << "Building global indices ... " << std::flush; global_index_t global_index(forest, fem_space); global_index.build(); std::cout << "OK!" << std::endl; std::cout << "Building the linear system ... " << std::flush; Mat A; Vec x, b; MatCreateMPIAIJ(PETSC_COMM_WORLD, global_index.n_primary_dof(), global_index.n_primary_dof(), PETSC_DECIDE, PETSC_DECIDE, 0, PETSC_NULL, 0, PETSC_NULL, &A); VecCreateMPI(PETSC_COMM_WORLD, global_index.n_primary_dof(), PETSC_DECIDE, &b); fe_space_t::ElementIterator the_ele = fem_space.beginElement(), end_ele = fem_space.endElement(); for (;the_ele != end_ele;++ the_ele) { double vol = the_ele->templateElement().volume(); const QuadratureInfo<DIM>& qi = the_ele->findQuadratureInfo(5); std::vector<Point<DIM> > q_pnt = the_ele->local_to_global(qi.quadraturePoint()); int n_q_pnt = qi.n_quadraturePoint(); std::vector<double> jac = the_ele->local_to_global_jacobian(qi.quadraturePoint()); std::vector<std::vector<double> > bas_val = the_ele->basis_function_value(q_pnt); std::vector<std::vector<std::vector<double> > > bas_grad = the_ele->basis_function_gradient(q_pnt); const std::vector<int>& ele_dof = the_ele->dof(); u_int n_ele_dof = ele_dof.size(); FullMatrix<double> ele_mat(n_ele_dof, n_ele_dof); Vector<double> ele_rhs(n_ele_dof); for (u_int l = 0;l < n_q_pnt;++ l) { double JxW = vol*jac[l]*qi.weight(l); double f_val = _f_(q_pnt[l]); for (u_int i = 0;i < n_ele_dof;++ i) { for (u_int j = 0;j < n_ele_dof;++ j) { ele_mat(i, j) += JxW*(bas_val[i][l]*bas_val[j][l] + innerProduct(bas_grad[i][l], bas_grad[j][l])); } ele_rhs(i) += JxW*f_val*bas_val[i][l]; } } /** * 此处将单元矩阵和单元载荷先计算好,然后向全局的矩阵和载荷向量上 * 集中,可以提高效率。 */ std::vector<int> indices(n_ele_dof); for (u_int i = 0;i < n_ele_dof;++ i) { indices[i] = global_index(ele_dof[i]); } MatSetValues(A, n_ele_dof, &indices[0], n_ele_dof, &indices[0], &ele_mat(0,0), ADD_VALUES); VecSetValues(b, n_ele_dof, &indices[0], &ele_rhs(0), ADD_VALUES); } MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); VecAssemblyBegin(b); VecAssemblyEnd(b); std::cout << "OK!" << std::endl; /// 加上狄氏边值条件 std::cout << "Applying the Dirichlet boundary condition ... " << std::flush; u_int n_bnd_dof = 0; /// 首先清点边界上自由度的个数 for (u_int i = 0;i < fem_space.n_dof();++ i) { if (fem_space.dofBoundaryMark(i) > 0) { /// 如果不是在主几何体上就不做 if (! global_index.is_dof_on_primary_geometry(i)) continue; n_bnd_dof += 1; } } /// 准备空间存储边界上全局标号、自变量和右端项 std::vector<int> bnd_idx(n_bnd_dof); std::vector<double> rhs_entry(n_bnd_dof); /// 对自由度做循环 for (u_int i = 0, j = 0;i < fem_space.n_dof();++ i) { if (fem_space.dofBoundaryMark(i) > 0) { /// 边界上的自由度? /// 如果不是在主几何体上就不做 if (! global_index.is_dof_on_primary_geometry(i)) continue; bnd_idx[j] = global_index(i); /// 行的全局标号 /// 计算并记下自变量和右端项,假设自由度值为插值量 double u_b_val = _u_b_(fem_space.dofInfo(i).interp_point); rhs_entry[j] = u_b_val; j += 1; } } /// 将矩阵修改为对角元 1.0,其它元素为零的状态 /// MatSetOption(A, MAT_KEEP_ZEROED_ROWS); MatZeroRows(A, n_bnd_dof, &bnd_idx[0], 1.0); /// 修改右端项为相应点的边值 Vec rhs_bnd; VecCreateSeqWithArray(PETSC_COMM_SELF, n_bnd_dof, &rhs_entry[0], &rhs_bnd); IS is_bnd; ISCreateGeneralWithArray(PETSC_COMM_WORLD, n_bnd_dof, &bnd_idx[0], &is_bnd); VecScatter bnd_scatter; VecScatterCreate(rhs_bnd, PETSC_NULL, b, is_bnd, &bnd_scatter); VecScatterBegin(bnd_scatter, rhs_bnd, b, INSERT_VALUES, SCATTER_FORWARD); VecScatterEnd(bnd_scatter, rhs_bnd, b, INSERT_VALUES, SCATTER_FORWARD); VecDestroy(rhs_bnd); ISDestroy(is_bnd); VecScatterDestroy(bnd_scatter); std::cout << "OK!" << std::endl; VecDuplicate(b, &x); KSP solver; KSPCreate(PETSC_COMM_WORLD, &solver); KSPSetOperators(solver, A, A, SAME_NONZERO_PATTERN); KSPSetType(solver, KSPGMRES); KSPSetFromOptions(solver); KSPSolve(solver, b, x); if (forest.rank() == 0) { KSPConvergedReason reason; KSPGetConvergedReason(solver,&reason); if (reason == KSP_DIVERGED_INDEFINITE_PC) { printf("\nDivergence because of indefinite preconditioner;\n"); printf("Run the executable again but with -pc_ilu_shift option.\n"); } else if (reason<0) { printf("\nOther kind of divergence: this should not happen.\n"); } else { PetscInt its; KSPGetIterationNumber(solver,&its); printf("\nConvergence in %d iterations.\n",(int)its); } printf("\n"); } MatDestroy(A); VecDestroy(b); KSPDestroy(solver); FEMFunction<double,DIM> u_h(fem_space); Vec X; VecCreateSeqWithArray(PETSC_COMM_SELF, global_index.n_local_dof(), &u_h(0), &X); std::vector<int> primary_idx(global_index.n_primary_dof()); global_index.build_primary_index(&primary_idx[0]); IS is; ISCreateGeneralWithArray(PETSC_COMM_WORLD, global_index.n_local_dof(), &global_index(0), &is); VecScatter scatter; VecScatterCreate(x, is, X, PETSC_NULL, &scatter); VecScatterBegin(scatter, x, X, INSERT_VALUES, SCATTER_FORWARD); VecScatterEnd(scatter, x, X, INSERT_VALUES, SCATTER_FORWARD); VecDestroy(x); VecDestroy(X); VecScatterDestroy(scatter); ISDestroy(is); char filename[1024]; sprintf(filename, "u_h%d.dx", forest.rank()); u_h.writeOpenDXData(filename); PetscFinalize(); return 0; }
int main(int argc,char **argv) { Mat A,B,C,PtAP,PtAP_copy,PtAP_squared; PetscInt i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1; PetscScalar v; PetscErrorCode ierr; PetscBool equal=PETSC_FALSE,mat_view=PETSC_FALSE; char stencil[PETSC_MAX_PATH_LEN]; #if defined(PETSC_USE_LOG) PetscLogStage fullMatMatMultStage; #endif ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL);CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view);CHKERRQ(ierr); ierr = PetscOptionsGetString(NULL,NULL,"-stencil",stencil,PETSC_MAX_PATH_LEN,NULL);CHKERRQ(ierr); /* Create a aij matrix A */ M = N = m*n*o; ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); /* Consistency checks */ if (o < 1 || m < 1 || n < 1) SETERRQ(PETSC_COMM_WORLD,1,"Dimensions need to be larger than zero!"); /************ 2D stencils ***************/ ierr = PetscStrcmp(stencil, "2d5point", &equal);CHKERRQ(ierr); if (equal) { /* 5-point stencil, 2D */ ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } ierr = PetscStrcmp(stencil, "2d9point", &equal);CHKERRQ(ierr); if (equal) { /* 9-point stencil, 2D */ ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i>0) {J = global_index(i-1,j, k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i>0 && j>0) {J = global_index(i-1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if ( j>0) {J = global_index(i, j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1 && j>0) {J = global_index(i+1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = global_index(i+1,j, k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = global_index(i, j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i>0 && j<n-1) {J = global_index(i-1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } ierr = PetscStrcmp(stencil, "2d9point2", &equal);CHKERRQ(ierr); if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } ierr = PetscStrcmp(stencil, "2d13point", &equal);CHKERRQ(ierr); if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i>2) {J = global_index(i-3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-3) {J = global_index(i+3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>2) {J = global_index(i,j-3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-3) {J = global_index(i,j+3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } /************ 3D stencils ***************/ ierr = PetscStrcmp(stencil, "3d7point", &equal);CHKERRQ(ierr); if (equal) { /* 7-point stencil, 3D */ ierr = MatMPIAIJSetPreallocation(A,7,NULL,7,NULL);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A,7,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} v = 6.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } ierr = PetscStrcmp(stencil, "3d13point", &equal);CHKERRQ(ierr); if (equal) { /* 13-point stencil, 3D */ ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (k>1) {J = global_index(i,j,k-2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (k<o-2) {J = global_index(i,j,k+2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } ierr = PetscStrcmp(stencil, "3d19point", &equal);CHKERRQ(ierr); if (equal) { /* 19-point stencil, 3D */ ierr = MatMPIAIJSetPreallocation(A,19,NULL,19,NULL);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A,19,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); /* one hop */ if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} /* two hops */ if (i>0 && j>0) {J = global_index(i-1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i>0 && k>0) {J = global_index(i-1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i>0 && j<n-1) {J = global_index(i-1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i>0 && k<o-1) {J = global_index(i-1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1 && j>0) {J = global_index(i+1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1 && k>0) {J = global_index(i+1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1 && k<o-1) {J = global_index(i+1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0 && k>0) {J = global_index(i, j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0 && k<o-1) {J = global_index(i, j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1 && k>0) {J = global_index(i, j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1 && k<o-1) {J = global_index(i, j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} v = 18.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } ierr = PetscStrcmp(stencil, "3d27point", &equal);CHKERRQ(ierr); if (equal) { /* 27-point stencil, 3D */ ierr = MatMPIAIJSetPreallocation(A,27,NULL,27,NULL);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A,27,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (k>0) { if (j>0) { if (i>0) {J = global_index(i-1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} J = global_index(i, j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} } { if (i>0) {J = global_index(i-1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} J = global_index(i, j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); if (i<m-1) {J = global_index(i+1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} } if (j<n-1) { if (i>0) {J = global_index(i-1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} J = global_index(i, j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} } } { if (j>0) { if (i>0) {J = global_index(i-1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} J = global_index(i, j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); if (i<m-1) {J = global_index(i+1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} } { if (i>0) {J = global_index(i-1,j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} J = global_index(i, j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); if (i<m-1) {J = global_index(i+1,j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} } if (j<n-1) { if (i>0) {J = global_index(i-1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} J = global_index(i, j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); if (i<m-1) {J = global_index(i+1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} } } if (k<o-1) { if (j>0) { if (i>0) {J = global_index(i-1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} J = global_index(i, j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} } { if (i>0) {J = global_index(i-1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} J = global_index(i, j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); if (i<m-1) {J = global_index(i+1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} } if (j<n-1) { if (i>0) {J = global_index(i-1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} J = global_index(i, j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} } } v = 26.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); ierr = PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage);CHKERRQ(ierr); /* Test C = A*B */ ierr = PetscLogStagePush(fullMatMatMultStage);CHKERRQ(ierr); ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C);CHKERRQ(ierr); /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ ierr = MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP);CHKERRQ(ierr); ierr = MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy);CHKERRQ(ierr); ierr = MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared);CHKERRQ(ierr); ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = MatDestroy(&PtAP_squared);CHKERRQ(ierr); ierr = MatDestroy(&PtAP_copy);CHKERRQ(ierr); ierr = MatDestroy(&PtAP);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }