Exemple #1
0
int main(int argc, char * argv[])
{
  typedef MPI::HGeometryForest<DIM,DOW> forest_t;
  typedef MPI::BirdView<forest_t> ir_mesh_t;
  typedef FEMSpace<double,DIM,DOW> fe_space_t;  
  typedef MPI::DOF::GlobalIndex<forest_t, fe_space_t> global_index_t;

  MPI_Init(&argc, &argv);

  forest_t forest(MPI_COMM_WORLD);
  forest.readMesh(argv[1]);
  ir_mesh_t ir_mesh(forest);

  int round = 0;
  if (argc >= 3) round = atoi(argv[2]);

  ir_mesh.globalRefine(round);
  ir_mesh.semiregularize();
  ir_mesh.regularize(false);

  TemplateGeometry<DIM> tri;
  tri.readData("triangle.tmp_geo");
  CoordTransform<DIM,DIM> tri_ct;
  tri_ct.readData("triangle.crd_trs");
  TemplateDOF<DIM> tri_td(tri);
  tri_td.readData("triangle.1.tmp_dof");
  BasisFunctionAdmin<double,DIM,DIM> tri_bf(tri_td);
  tri_bf.readData("triangle.1.bas_fun");

  std::vector<TemplateElement<double,DIM,DIM> > tmp_ele(1);
  tmp_ele[0].reinit(tri, tri_td, tri_ct, tri_bf);

  RegularMesh<DIM,DOW>& mesh = ir_mesh.regularMesh();
  fe_space_t fem_space(mesh, tmp_ele);

  u_int n_ele = mesh.n_geometry(DIM);
  fem_space.element().resize(n_ele);
  for (int i = 0;i < n_ele;i ++) {
    fem_space.element(i).reinit(fem_space, i, 0);
  }

  fem_space.buildElement();
  fem_space.buildDof();
  fem_space.buildDofBoundaryMark();

  global_index_t global_index(forest, fem_space);
  global_index.build();

  MPI_Finalize();

  return 0;
}
void
SDirichlet<PHAL::AlbanyTraits::Jacobian, Traits>::evaluateFields(
    typename Traits::EvalData dirichlet_workset)
{
  // NOTE: you may be tempted to const_cast away the const here. However,
  //       consider the case where x is a Thyra::TpetraVector object. The
  //       actual Tpetra_Vector is stored as a Teuchos::ConstNonconstObjectContainer,
  //       which (most likely) happens to be created from a const RCP, and therefore
  //       when calling getTpetraVector (from Thyra::TpetraVector), the container
  //       will throw.
  //       Instead, keep the const correctness until the very last moment.
  Teuchos::RCP<const Thyra_Vector> x = dirichlet_workset.x;
  Teuchos::RCP<Thyra_Vector> f = dirichlet_workset.f;

  // TODO: abstract away the tpetra interface
  Teuchos::RCP<Tpetra_CrsMatrix> J = Albany::getTpetraMatrix(dirichlet_workset.Jac);

  auto row_map = J->getRowMap();
  auto col_map = J->getColMap();
  // we make this assumption, which lets us use both local row and column
  // indices into a single is_dbc vector
  ALBANY_ASSERT(col_map->isLocallyFitted(*row_map));

  auto& ns_nodes = dirichlet_workset.nodeSets->find(this->nodeSetID)->second;

  bool const fill_residual = f != Teuchos::null;

  auto f_view = fill_residual ? Albany::getNonconstLocalData(f) : Teuchos::null;
  auto x_view = fill_residual ? Teuchos::arcp_const_cast<ST>(Albany::getLocalData(x)) : Teuchos::null;

  Teuchos::Array<Tpetra_GO> global_index(1);

  Teuchos::Array<LO> index(1);

  Teuchos::Array<ST> entry(1);

  Teuchos::Array<ST> entries;

  Teuchos::Array<LO> indices;

  using IntVec = Tpetra::Vector<int, Tpetra_LO, Tpetra_GO, KokkosNode>;
  using Import = Tpetra::Import<Tpetra_LO, Tpetra_GO, KokkosNode>;
  Teuchos::RCP<const Import> import;

  auto domain_map = row_map;  // we are assuming this!

  // in theory we should use the importer from the CRS graph, although
  // I saw a segfault in one of the tests when doing this...
  // if (J->getCrsGraph()->isFillComplete()) {
  //  import = J->getCrsGraph()->getImporter();
  //} else {
  // this construction is expensive!
  import = Teuchos::rcp(new Import(domain_map, col_map));
  //}

  IntVec row_is_dbc(row_map);
  IntVec col_is_dbc(col_map);

  int const spatial_dimension = dirichlet_workset.spatial_dimension_;

#if defined(ALBANY_LCM)
  auto const& fixed_dofs = dirichlet_workset.fixed_dofs_;
#endif

  row_is_dbc.template modify<Kokkos::HostSpace>();
  {
    auto row_is_dbc_data =
        row_is_dbc.template getLocalView<Kokkos::HostSpace>();
    ALBANY_ASSERT(row_is_dbc_data.extent(1) == 1);
#if defined(ALBANY_LCM)
    if (dirichlet_workset.is_schwarz_bc_ == false) {  // regular SDBC
#endif
      for (size_t ns_node = 0; ns_node < ns_nodes.size(); ns_node++) {
        auto dof                = ns_nodes[ns_node][this->offset];
        row_is_dbc_data(dof, 0) = 1;
      }
#if defined(ALBANY_LCM)
    } else {  // special case for Schwarz SDBC
      for (size_t ns_node = 0; ns_node < ns_nodes.size(); ns_node++) {
        for (int offset = 0; offset < spatial_dimension; ++offset) {
          auto dof = ns_nodes[ns_node][offset];
          // If this DOF already has a DBC, skip it.
          if (fixed_dofs.find(dof) != fixed_dofs.end()) continue;
          row_is_dbc_data(dof, 0) = 1;
        }
      }
    }
#endif
  }
  col_is_dbc.doImport(row_is_dbc, *import, Tpetra::ADD);
  auto col_is_dbc_data = col_is_dbc.template getLocalView<Kokkos::HostSpace>();

  size_t const num_local_rows = J->getNodeNumRows();
  auto         min_local_row  = row_map->getMinLocalIndex();
  auto         max_local_row  = row_map->getMaxLocalIndex();
  for (auto local_row = min_local_row; local_row <= max_local_row;
       ++local_row) {
    auto num_row_entries = J->getNumEntriesInLocalRow(local_row);

    entries.resize(num_row_entries);
    indices.resize(num_row_entries);

    J->getLocalRowCopy(local_row, indices(), entries(), num_row_entries);

    auto row_is_dbc = col_is_dbc_data(local_row, 0) > 0;

    if (row_is_dbc && fill_residual == true) {
      f_view[local_row] = 0.0;
      x_view[local_row] = this->value.val();
    }
    

    for (size_t row_entry = 0; row_entry < num_row_entries; ++row_entry) {
      auto local_col         = indices[row_entry];
      auto is_diagonal_entry = local_col == local_row;
      //IKT, 4/5/18: scale diagonal entries by provided scaling 
      if (is_diagonal_entry && row_is_dbc) {
        entries[row_entry] *= scale;   
      }
      if (is_diagonal_entry) continue;
      ALBANY_ASSERT(local_col >= col_map->getMinLocalIndex());
      ALBANY_ASSERT(local_col <= col_map->getMaxLocalIndex());
      auto col_is_dbc = col_is_dbc_data(local_col, 0) > 0;
      if (row_is_dbc || col_is_dbc) {
        entries[row_entry] = 0.0;
      }
    }
    J->replaceLocalValues(local_row, indices(), entries());
  }
  return;
}
Exemple #3
0
int main(int argc, char * argv[])
{
  typedef MPI::HGeometryForest<DIM,DOW> forest_t;
  typedef MPI::BirdView<forest_t> ir_mesh_t;
  typedef FEMSpace<double,DIM,DOW> fe_space_t;  
  typedef MPI::DOF::GlobalIndex<forest_t, fe_space_t> global_index_t;

  MPI_Init(&argc, &argv);

  forest_t forest(MPI_COMM_WORLD);
  ir_mesh_t ir_mesh;
  MPI::load_mesh(argv[1], forest, ir_mesh); /// 从一个目录中读入网格数据

  int round = 0;
  if (argc >= 3) round = atoi(argv[2]);

  ir_mesh.globalRefine(round);
  ir_mesh.semiregularize();
  ir_mesh.regularize(false);

  TemplateGeometry<DIM> tri;
  tri.readData("triangle.tmp_geo");
  CoordTransform<DIM,DIM> tri_ct;
  tri_ct.readData("triangle.crd_trs");
  TemplateDOF<DIM> tri_td(tri);
  tri_td.readData("triangle.1.tmp_dof");
  BasisFunctionAdmin<double,DIM,DIM> tri_bf(tri_td);
  tri_bf.readData("triangle.1.bas_fun");

  std::vector<TemplateElement<double,DIM,DIM> > tmp_ele(1);
  tmp_ele[0].reinit(tri, tri_td, tri_ct, tri_bf);

  RegularMesh<DIM,DOW>& mesh = ir_mesh.regularMesh();
  fe_space_t fem_space(mesh, tmp_ele);
  u_int n_ele = mesh.n_geometry(DIM);
  fem_space.element().resize(n_ele);
  for (int i = 0;i < n_ele;i ++) {
    fem_space.element(i).reinit(fem_space, i, 0);
  }
  fem_space.buildElement();
  fem_space.buildDof();
  fem_space.buildDofBoundaryMark();

  std::cout << "Building global indices ... " << std::flush;
  global_index_t global_index(forest, fem_space);
  global_index.build();
  std::cout << "OK!" << std::endl;

  Epetra_MpiComm comm(forest.communicator());
  Epetra_Map map(global_index.n_global_dof(), global_index.n_primary_dof(), 0, comm);
  global_index.build_epetra_map(map);
  
  /// 构造 Epetra 的分布式稀疏矩阵模板
  std::cout << "Build sparsity pattern ... " << std::flush;
  Epetra_FECrsGraph G(Copy, map, 10);
  fe_space_t::ElementIterator
    the_ele = fem_space.beginElement(),
    end_ele = fem_space.endElement();
  for (;the_ele != end_ele;++ the_ele) {
    const std::vector<int>& ele_dof = the_ele->dof();
    u_int n_ele_dof = ele_dof.size();

    /**
     * 建立从局部自由度数组到全局自由度数组的映射表,这是实现分布式并行
     * 状态下的数据结构的关键一步。
     */
    std::vector<int> indices(n_ele_dof);
    for (u_int i = 0;i < n_ele_dof;++ i) {
      indices[i] = global_index(ele_dof[i]);
    }
    G.InsertGlobalIndices(n_ele_dof, &indices[0], n_ele_dof, &indices[0]);
  }
  G.GlobalAssemble();
  std::cout << "OK!" << std::endl;

  /// 准备构造 Epetra 的分布式稀疏矩阵和计算分布式右端项
  std::cout << "Build sparse matrix ... " << std::flush;
  Epetra_FECrsMatrix A(Copy, G);
  Epetra_FEVector b(map);
  the_ele = fem_space.beginElement();
  for (;the_ele != end_ele;++ the_ele) {
    double vol = the_ele->templateElement().volume();
    const QuadratureInfo<DIM>& qi = the_ele->findQuadratureInfo(5);
    std::vector<Point<DIM> > q_pnt = the_ele->local_to_global(qi.quadraturePoint());
    int n_q_pnt = qi.n_quadraturePoint();
    std::vector<double> jac = the_ele->local_to_global_jacobian(qi.quadraturePoint());
    std::vector<std::vector<double> > bas_val = the_ele->basis_function_value(q_pnt);
    std::vector<std::vector<std::vector<double> > > bas_grad = the_ele->basis_function_gradient(q_pnt);

    const std::vector<int>& ele_dof = the_ele->dof();
    u_int n_ele_dof = ele_dof.size();
    FullMatrix<double> ele_mat(n_ele_dof, n_ele_dof);
    Vector<double> ele_rhs(n_ele_dof);
    for (u_int l = 0;l < n_q_pnt;++ l) {
      double JxW = vol*jac[l]*qi.weight(l);
      double f_val = _f_(q_pnt[l]);
      for (u_int i = 0;i < n_ele_dof;++ i) {
        for (u_int j = 0;j < n_ele_dof;++ j) {
          ele_mat(i, j) += JxW*(bas_val[i][l]*bas_val[j][l] +
                                innerProduct(bas_grad[i][l], bas_grad[j][l]));
        }
        ele_rhs(i) += JxW*f_val*bas_val[i][l];
      }
    }
    /**
     * 此处将单元矩阵和单元载荷先计算好,然后向全局的矩阵和载荷向量上
     * 集中,可以提高效率。
     */

    std::vector<int> indices(n_ele_dof);
    for (u_int i = 0;i < n_ele_dof;++ i) {
      indices[i] = global_index(ele_dof[i]);
    }
    A.SumIntoGlobalValues(n_ele_dof, &indices[0], n_ele_dof, &indices[0], &ele_mat(0,0));
    b.SumIntoGlobalValues(n_ele_dof, &indices[0], &ele_rhs(0));
  }
  A.GlobalAssemble();
  b.GlobalAssemble();
  std::cout << "OK!" << std::endl;

  /// 准备解向量。
  Epetra_Vector x(map);

  /// 调用 AztecOO 的求解器。
  std::cout << "Solving the linear system ..." << std::flush;
  Epetra_LinearProblem problem(&A, &x, &b);
  AztecOO solver(problem);
  ML_Epetra::MultiLevelPreconditioner precond(A, true);
  solver.SetPrecOperator(&precond);
  solver.SetAztecOption(AZ_solver, AZ_cg);
  solver.SetAztecOption(AZ_output, 100);
  solver.Iterate(5000, 1.0e-12);
  std::cout << "OK!" << std::endl;

  Epetra_Map fe_map(-1, global_index.n_local_dof(), &global_index(0), 0, comm);
  FEMFunction<double,DIM> u_h(fem_space);
  Epetra_Import importer(fe_map, map);
  Epetra_Vector X(View, fe_map, &u_h(0));
  X.Import(x, importer, Add);

  char filename[1024];
  sprintf(filename, "u_h%d.dx", forest.rank());
  u_h.writeOpenDXData(filename);

  MPI_Finalize();

  return 0;
}
void 
PartitionerMetis<MeshType>::partitionImpl ( mesh_ptrtype mesh, rank_type np )
{
    LOG(INFO) << "PartitionerMetis::partitionImpl starts...";
    tic();

    // Check for an easy return
    if (np == 1)
    {
        this->singlePartition (mesh);
        return;
    }
    const dof_id_type n_elems = mesh->numElements();

    // build the graph
    // std::vector<Metis::idx_t> options(5);
    std::vector<Metis::idx_t> vwgt(n_elems);
    std::vector<Metis::idx_t> part(n_elems);

    // number of "nodes" (elements) in the graph
    Metis::idx_t n = static_cast<Metis::idx_t>(n_elems);
    // number of subdomains to create
    Metis::idx_t nparts  = static_cast<Metis::idx_t>(np); 
    // number of edges cut by the resulting partition
    Metis::idx_t edgecut = 0;

    std::map<dof_id_type, dof_id_type> global_index_map;

    {
        std::vector<dof_id_type> global_index(nelements(elements(mesh)),0);
        std::iota( global_index.begin(), global_index.end(), 0 );

        size_type cnt = 0;
        for( auto const& elt : elements(mesh) )
        {
            global_index_map.insert (std::make_pair(elt.id(), global_index[cnt++]));
        }
    }


    // Invoke METIS, but only on processor 0.
    // Then broadcast the resulting decomposition
    if ( Environment::isMasterRank() )
    {
        CSRGraphMetis<Metis::idx_t> csr_graph;
        
        csr_graph.offsets.resize(mesh->numElements()+1, 0);

        // Local scope for these
        {
#ifndef NDEBUG
            std::size_t graph_size=0;
#endif
            // build the graph in CSR format.  Note that
            // the edges in the graph will correspond to
            // face neighbors
            for( auto& elt: elements(mesh) )
            {

                // (1) first pass - get the row sizes for each element by counting the number
                // of face neighbors.  Also populate the vwght array if necessary
                const dof_id_type gid = global_index_map[elt.id()];

                CHECK( gid < vwgt.size() ) << "Invalid gid " << gid << " greater or equal than " << vwgt.size();
            
                // maybe there is a better weight?
                // The weight is used to define what a balanced graph is
                //if(!_weights)
                vwgt[gid] = elt.numPoints;
                //else
                //vwgt[gid] = static_cast<Metis::idx_t>((*_weights)[elem->id()]);

                unsigned int num_neighbors = 0;

                // Loop over the element's neighbors.  An element
                // adjacency corresponds to a face neighbor
                for ( uint16_type ms=0; ms < elt.nNeighbors(); ms++ )
                {
                    element_type const* neighbor = NULL;
                    size_type neighbor_id = elt.neighbor( ms ).first;
                    if ( neighbor_id != invalid_size_type_value )
                    {
                                        
                        num_neighbors++;
                    }
                }
                std::cout << "element id " << elt.id() << " gid: " << gid << " w: " << vwgt[gid] 
                          << " neigh: " << num_neighbors << std::endl;
                csr_graph.prepareNumberNonZeros(gid, num_neighbors);
#ifndef NDEBUG
                graph_size += num_neighbors;
#endif
            }

            csr_graph.prepareForUse();

            // (2) second pass - fill the compressed adjacency array
            for( auto& elt : elements(mesh) )
            {
                dof_id_type gid = global_index_map[elt.id()];

                unsigned int connection=0;

                // Loop over the element's neighbors.  An element
                // adjacency corresponds to a face neighbor
                for ( uint16_type ms=0; ms < elt.nNeighbors(); ms++ )
                {
                    element_type const* neighbor = NULL;
                    size_type neighbor_id = elt.neighbor( ms ).first;
                    if ( neighbor_id != invalid_size_type_value )
                    {
                        csr_graph(gid, connection++) = global_index_map[neighbor_id];

                    }
                }
            }
#ifndef NDEBUG
            // We create a non-empty vals for a disconnected graph, to
            // work around a segfault from METIS.
            DCHECK( csr_graph.vals.size() == std::max(graph_size,std::size_t(1)))
                << "Invalid graph";
#endif
        } // done building the graph

        Metis::idx_t ncon = 1;

        // Select which type of partitioning to create

        // Use recursive if the number of partitions is less than or equal to 8
        if (np <= 8)
            Metis::METIS_PartGraphRecursive(&n, &ncon, &csr_graph.offsets[0], &csr_graph.vals[0], &vwgt[0], NULL,
                                            NULL, &nparts, NULL, NULL, NULL,
                                            &edgecut, &part[0]);

        // Otherwise  use kway
        else
            Metis::METIS_PartGraphKway(&n, &ncon, &csr_graph.offsets[0], &csr_graph.vals[0], &vwgt[0], NULL,
                                       NULL, &nparts, NULL, NULL, NULL,
                                       &edgecut, &part[0]);

    } // end processor 0 part

    // Assign the returned processor ids.  The part array contains the processor
    // id for each element, but in terms of the contiguous indexing we defined
    // above
    LOG(INFO) << "PartitionerMetis::partitionImpl nelements : " << nelements(elements(mesh));
    for( auto it = mesh->beginElement(), en = mesh->endElement(); it != en; ++it )
    {
        dof_id_type gid = global_index_map[it->id()];
        CHECK( gid < part.size() ) << "Invalid gid " << gid << " greater or equal than partition size " << part.size();
        rank_type pid = static_cast<rank_type>(part[gid]);
#if 0
        mesh->elements().modify( it,
                                 [&pid]( element_type& e ) 
                                 { 
                                     e.setProcessId( pid ); 
                                     std::cout << "element id " << e.id() << " process id " << e.processId() << "\n"; 
                                 });
#else
        std::cout << "element id " << it->id() << " process id " << pid << "\n"; 
        auto e = *it;
        e.setProcessId( pid );
        mesh->elements().replace( it, e );
#endif
    }
    for( auto& e : allelements(mesh) )
    {
        std::cout << "2. element id " << e.id() << " process id " << e.processId() << "\n"; 
    }

    auto t = toc("PartitionerMetis::partitionImpl", FLAGS_v > 0 );
    LOG(INFO) << "PartitionerMetis::partitionImpl done in " << t << "s";
}
Exemple #5
0
int main(int argc, char * argv[])
{
  typedef MPI::HGeometryForest<DIM,DOW> forest_t;
  typedef MPI::BirdView<forest_t> ir_mesh_t;
  typedef FEMSpace<double,DIM,DOW> fe_space_t;  
  typedef MPI::DOF::GlobalIndex<forest_t, fe_space_t> global_index_t;

  MPI_Init(&argc, &argv);

  forest_t forest(MPI_COMM_WORLD);

  ir_mesh_t ir_mesh;
  MPI::load_mesh(argv[1], forest, ir_mesh); /// 从一个目录中读入网格数据

  int round = 0;
  if (argc >= 3) round = atoi(argv[2]);

  ir_mesh.globalRefine(round);
  ir_mesh.semiregularize();
  ir_mesh.regularize(false);

  TemplateGeometry<DIM> tri;
  tri.readData("triangle.tmp_geo");
  CoordTransform<DIM,DIM> tri_ct;
  tri_ct.readData("triangle.crd_trs");
  TemplateDOF<DIM> tri_td(tri);
  tri_td.readData("triangle.1.tmp_dof");
  BasisFunctionAdmin<double,DIM,DIM> tri_bf(tri_td);
  tri_bf.readData("triangle.1.bas_fun");

  std::vector<TemplateElement<double,DIM,DIM> > tmp_ele(1);
  tmp_ele[0].reinit(tri, tri_td, tri_ct, tri_bf);

  RegularMesh<DIM,DOW>& mesh = ir_mesh.regularMesh();
  fe_space_t fem_space(mesh, tmp_ele);
  u_int n_ele = mesh.n_geometry(DIM);
  fem_space.element().resize(n_ele);
  for (int i = 0;i < n_ele;i ++) {
    fem_space.element(i).reinit(fem_space, i, 0);
  }
  fem_space.buildElement();
  fem_space.buildDof();
  fem_space.buildDofBoundaryMark();

  std::cout << "Building global indices ... " << std::flush;
  global_index_t global_index(forest, fem_space);
  global_index.build();
  std::cout << "OK!" << std::endl;

  Epetra_MpiComm comm(forest.communicator());
  Epetra_Map map(global_index.n_global_dof(), global_index.n_primary_dof(), 0, comm);
  global_index.build_epetra_map(map);

  /// 构造 Epetra 的分布式稀疏矩阵模板
  std::cout << "Build sparsity pattern ... " << std::flush;
  Epetra_FECrsGraph G(Copy, map, 10);
  fe_space_t::ElementIterator
    the_ele = fem_space.beginElement(),
    end_ele = fem_space.endElement();
  for (;the_ele != end_ele;++ the_ele) {
    const std::vector<int>& ele_dof = the_ele->dof();
    u_int n_ele_dof = ele_dof.size();

    /**
     * 建立从局部自由度数组到全局自由度数组的映射表,这是实现分布式并行
     * 状态下的数据结构的关键一步。
     */
    std::vector<int> indices(n_ele_dof);
    for (u_int i = 0;i < n_ele_dof;++ i) {
      indices[i] = global_index(ele_dof[i]);
    }
    G.InsertGlobalIndices(n_ele_dof, &indices[0], n_ele_dof, &indices[0]);
  }
  G.GlobalAssemble();
  std::cout << "OK!" << std::endl;

  /// 准备构造 Epetra 的分布式稀疏矩阵和计算分布式右端项
  std::cout << "Build sparse matrix ... " << std::flush;
  Epetra_FECrsMatrix A(Copy, G);
  Epetra_FEVector b(map);
  the_ele = fem_space.beginElement();
  for (;the_ele != end_ele;++ the_ele) {
    double vol = the_ele->templateElement().volume();
    const QuadratureInfo<DIM>& qi = the_ele->findQuadratureInfo(5);
    std::vector<Point<DIM> > q_pnt = the_ele->local_to_global(qi.quadraturePoint());
    int n_q_pnt = qi.n_quadraturePoint();
    std::vector<double> jac = the_ele->local_to_global_jacobian(qi.quadraturePoint());
    std::vector<std::vector<double> > bas_val = the_ele->basis_function_value(q_pnt);
    std::vector<std::vector<std::vector<double> > > bas_grad = the_ele->basis_function_gradient(q_pnt);

    const std::vector<int>& ele_dof = the_ele->dof();
    u_int n_ele_dof = ele_dof.size();
    FullMatrix<double> ele_mat(n_ele_dof, n_ele_dof);
    Vector<double> ele_rhs(n_ele_dof);
    for (u_int l = 0;l < n_q_pnt;++ l) {
      double JxW = vol*jac[l]*qi.weight(l);
      double f_val = _f_(q_pnt[l]);
      for (u_int i = 0;i < n_ele_dof;++ i) {
        for (u_int j = 0;j < n_ele_dof;++ j) {
          ele_mat(i, j) += JxW*(innerProduct(bas_grad[i][l], bas_grad[j][l]));
        }
        ele_rhs(i) += JxW*f_val*bas_val[i][l];
      }
    }
    /**
     * 此处将单元矩阵和单元载荷先计算好,然后向全局的矩阵和载荷向量上
     * 集中,可以提高效率。
     */

    std::vector<int> indices(n_ele_dof);
    for (u_int i = 0;i < n_ele_dof;++ i) {
      indices[i] = global_index(ele_dof[i]);
    }
    A.SumIntoGlobalValues(n_ele_dof, &indices[0], n_ele_dof, &indices[0], &ele_mat(0,0));
    b.SumIntoGlobalValues(n_ele_dof, &indices[0], &ele_rhs(0));
  }
  A.GlobalAssemble();
  b.GlobalAssemble();
  std::cout << "OK!" << std::endl;

  /// 准备解向量。
  Epetra_FEVector x(map);

  /// 加上狄氏边值条件
  u_int n_bnd_dof = 0; /// 首先清点边界上自由度的个数
  for (u_int i = 0;i < fem_space.n_dof();++ i) {
    if (fem_space.dofBoundaryMark(i) > 0) {
      /// 如果不是在主几何体上就不做
      if (! global_index.is_dof_on_primary_geometry(i)) continue;

      n_bnd_dof += 1;
    }
  }

  /// 准备空间存储边界上全局标号、自变量和右端项
  std::vector<int> bnd_idx(n_bnd_dof);
  std::vector<double> x_entry(n_bnd_dof), rhs_entry(n_bnd_dof);

  /// 对自由度做循环
  for (u_int i = 0, j = 0;i < fem_space.n_dof();++ i) {
    if (fem_space.dofBoundaryMark(i) > 0) { /// 边界上的自由度?
      /// 如果不是在主几何体上就不做
      if (! global_index.is_dof_on_primary_geometry(i)) continue;

      const int& idx = global_index(i); /// 行的全局标号
      bnd_idx[j] = idx; 

      /// 修改矩阵
      int lrid = A.LRID(idx);
      int row_nnz, *row_idx;
      double *row_entry, row_diag;
      A.ExtractMyRowView(lrid, row_nnz, row_entry, row_idx); /// 取出矩阵的行
      for (int k = 0;k < row_nnz;++ k) { /// 对矩阵的行进行修改
        if (A.LCID(row_idx[k]) != lrid) {   /// 如果不是对角元
          row_entry[k] = 0.0;      /// 则将矩阵元素清零
        } else {                   /// 而对角元保持不变
          row_diag = row_entry[k]; /// 并记录下对角元
        }
      }

      /// 计算并记下自变量和右端项,假设自由度值为插值量
      double u_b_val = _u_b_(fem_space.dofInfo(i).interp_point);
      x_entry[j] = u_b_val;
      rhs_entry[j] = row_diag*u_b_val;

      j += 1;
    }
  } 
  std::cout << "# DOF on the boundary: " << n_bnd_dof << std::endl;

  /// 修改解变量和右端项
  x.ReplaceGlobalValues(n_bnd_dof, &bnd_idx[0], &x_entry[0]);
  b.ReplaceGlobalValues(n_bnd_dof, &bnd_idx[0], &rhs_entry[0]);

  /// 调用 AztecOO 的求解器。
  std::cout << "Solving the linear system ..." << std::flush;
  Epetra_LinearProblem problem(&A, &x, &b);
  AztecOO solver(problem);
  ML_Epetra::MultiLevelPreconditioner precond(A, true);
  solver.SetPrecOperator(&precond);
  solver.SetAztecOption(AZ_solver, AZ_gmres);
  solver.SetAztecOption(AZ_output, 100);
  solver.Iterate(5000, 1.0e-12);
  std::cout << "OK!" << std::endl;

  Epetra_Map fe_map(-1, global_index.n_local_dof(), &global_index(0), 0, comm);
  FEMFunction<double,DIM> u_h(fem_space);
  Epetra_Import importer(fe_map, map);
  Epetra_Vector X(View, fe_map, &u_h(0));
  X.Import(x, importer, Add);

  char filename[1024];
  sprintf(filename, "u_h%d.dx", forest.rank());
  u_h.writeOpenDXData(filename);

  MPI_Finalize();

  return 0;
}
Exemple #6
0
int main(int argc, char * argv[])
{
  typedef MPI::HGeometryForest<DIM,DOW> forest_t;
  typedef MPI::BirdView<forest_t> ir_mesh_t;
  typedef FEMSpace<double,DIM,DOW> fe_space_t;  
  typedef MPI::DOF::GlobalIndex<forest_t, fe_space_t> global_index_t;

  PetscInitialize(&argc, &argv, (char *)NULL, help);

  forest_t forest(PETSC_COMM_WORLD);
  forest.readMesh(argv[1]);
  ir_mesh_t ir_mesh(forest);

  int round = 0;
  if (argc >= 3) round = atoi(argv[2]);

  ir_mesh.globalRefine(round);
  ir_mesh.semiregularize();
  ir_mesh.regularize(false);

  setenv("AFEPACK_TEMPLATE_PATH", "/usr/local/AFEPack/template/triangle", 1);

  TemplateGeometry<DIM> tri;
  tri.readData("triangle.tmp_geo");
  CoordTransform<DIM,DIM> tri_ct;
  tri_ct.readData("triangle.crd_trs");
  TemplateDOF<DIM> tri_td(tri);
  tri_td.readData("triangle.1.tmp_dof");
  BasisFunctionAdmin<double,DIM,DIM> tri_bf(tri_td);
  tri_bf.readData("triangle.1.bas_fun");

  std::vector<TemplateElement<double,DIM,DIM> > tmp_ele(1);
  tmp_ele[0].reinit(tri, tri_td, tri_ct, tri_bf);

  RegularMesh<DIM,DOW>& mesh = ir_mesh.regularMesh();
  fe_space_t fem_space(mesh, tmp_ele);
  u_int n_ele = mesh.n_geometry(DIM);
  fem_space.element().resize(n_ele);
  for (int i = 0;i < n_ele;i ++) {
    fem_space.element(i).reinit(fem_space, i, 0);
  }
  fem_space.buildElement();
  fem_space.buildDof();
  fem_space.buildDofBoundaryMark();

  std::cout << "Building global indices ... " << std::flush;
  global_index_t global_index(forest, fem_space);
  global_index.build();
  std::cout << "OK!" << std::endl;

  std::cout << "Building the linear system ... " << std::flush;
  Mat A;
  Vec x, b;
  MatCreateMPIAIJ(PETSC_COMM_WORLD, 
                  global_index.n_primary_dof(), global_index.n_primary_dof(),
                  PETSC_DECIDE, PETSC_DECIDE,
                  0, PETSC_NULL, 0, PETSC_NULL, &A);
  VecCreateMPI(PETSC_COMM_WORLD, global_index.n_primary_dof(), PETSC_DECIDE, &b);
  fe_space_t::ElementIterator
    the_ele = fem_space.beginElement(),
    end_ele = fem_space.endElement();
  for (;the_ele != end_ele;++ the_ele) {
    double vol = the_ele->templateElement().volume();
    const QuadratureInfo<DIM>& qi = the_ele->findQuadratureInfo(5);
    std::vector<Point<DIM> > q_pnt = the_ele->local_to_global(qi.quadraturePoint());
    int n_q_pnt = qi.n_quadraturePoint();
    std::vector<double> jac = the_ele->local_to_global_jacobian(qi.quadraturePoint());
    std::vector<std::vector<double> > bas_val = the_ele->basis_function_value(q_pnt);
    std::vector<std::vector<std::vector<double> > > bas_grad = the_ele->basis_function_gradient(q_pnt);

    const std::vector<int>& ele_dof = the_ele->dof();
    u_int n_ele_dof = ele_dof.size();
    FullMatrix<double> ele_mat(n_ele_dof, n_ele_dof);
    Vector<double> ele_rhs(n_ele_dof);
    for (u_int l = 0;l < n_q_pnt;++ l) {
      double JxW = vol*jac[l]*qi.weight(l);
      double f_val = _f_(q_pnt[l]);
      for (u_int i = 0;i < n_ele_dof;++ i) {
        for (u_int j = 0;j < n_ele_dof;++ j) {
          ele_mat(i, j) += JxW*(bas_val[i][l]*bas_val[j][l] +
                                innerProduct(bas_grad[i][l], bas_grad[j][l]));
        }
        ele_rhs(i) += JxW*f_val*bas_val[i][l];
      }
    }
    /**
     * 此处将单元矩阵和单元载荷先计算好,然后向全局的矩阵和载荷向量上
     * 集中,可以提高效率。
     */

    std::vector<int> indices(n_ele_dof);
    for (u_int i = 0;i < n_ele_dof;++ i) {
      indices[i] = global_index(ele_dof[i]);
    }
    MatSetValues(A, n_ele_dof, &indices[0], n_ele_dof, &indices[0], &ele_mat(0,0), ADD_VALUES);
    VecSetValues(b, n_ele_dof, &indices[0], &ele_rhs(0), ADD_VALUES);
  }
  MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
  MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
  VecAssemblyBegin(b);
  VecAssemblyEnd(b);
  std::cout << "OK!" << std::endl;

  /// 加上狄氏边值条件
  std::cout << "Applying the Dirichlet boundary condition ... " << std::flush;
  u_int n_bnd_dof = 0; /// 首先清点边界上自由度的个数
  for (u_int i = 0;i < fem_space.n_dof();++ i) {
    if (fem_space.dofBoundaryMark(i) > 0) {
      /// 如果不是在主几何体上就不做
      if (! global_index.is_dof_on_primary_geometry(i)) continue;

      n_bnd_dof += 1;
    }
  }

  /// 准备空间存储边界上全局标号、自变量和右端项
  std::vector<int> bnd_idx(n_bnd_dof);
  std::vector<double> rhs_entry(n_bnd_dof);

  /// 对自由度做循环
  for (u_int i = 0, j = 0;i < fem_space.n_dof();++ i) {
    if (fem_space.dofBoundaryMark(i) > 0) { /// 边界上的自由度?
      /// 如果不是在主几何体上就不做
      if (! global_index.is_dof_on_primary_geometry(i)) continue;

      bnd_idx[j] = global_index(i); /// 行的全局标号
      /// 计算并记下自变量和右端项,假设自由度值为插值量
      double u_b_val = _u_b_(fem_space.dofInfo(i).interp_point);
      rhs_entry[j] = u_b_val;

      j += 1;
    }
  }
  /// 将矩阵修改为对角元 1.0,其它元素为零的状态
  /// MatSetOption(A, MAT_KEEP_ZEROED_ROWS);
  MatZeroRows(A, n_bnd_dof, &bnd_idx[0], 1.0); 

  /// 修改右端项为相应点的边值
  Vec rhs_bnd;
  VecCreateSeqWithArray(PETSC_COMM_SELF, n_bnd_dof, &rhs_entry[0], &rhs_bnd);
  IS is_bnd;
  ISCreateGeneralWithArray(PETSC_COMM_WORLD, n_bnd_dof, &bnd_idx[0], &is_bnd);
  VecScatter bnd_scatter;
  VecScatterCreate(rhs_bnd, PETSC_NULL, b, is_bnd, &bnd_scatter);
  VecScatterBegin(bnd_scatter, rhs_bnd, b, INSERT_VALUES, SCATTER_FORWARD);
  VecScatterEnd(bnd_scatter, rhs_bnd, b, INSERT_VALUES, SCATTER_FORWARD);
  VecDestroy(rhs_bnd);
  ISDestroy(is_bnd);
  VecScatterDestroy(bnd_scatter);
  std::cout << "OK!" << std::endl;

  VecDuplicate(b, &x);

  KSP solver;
  KSPCreate(PETSC_COMM_WORLD, &solver);
  KSPSetOperators(solver, A, A, SAME_NONZERO_PATTERN);
  KSPSetType(solver, KSPGMRES);
  KSPSetFromOptions(solver);
  KSPSolve(solver, b, x);

  if (forest.rank() == 0) {
    KSPConvergedReason reason;
    KSPGetConvergedReason(solver,&reason);
    if (reason == KSP_DIVERGED_INDEFINITE_PC) {
      printf("\nDivergence because of indefinite preconditioner;\n");
      printf("Run the executable again but with -pc_ilu_shift option.\n");
    } else if (reason<0) {
      printf("\nOther kind of divergence: this should not happen.\n");
    } else {
      PetscInt its;
      KSPGetIterationNumber(solver,&its);
      printf("\nConvergence in %d iterations.\n",(int)its);
    }
    printf("\n");
  }

  MatDestroy(A);
  VecDestroy(b);
  KSPDestroy(solver);

  FEMFunction<double,DIM> u_h(fem_space);
  Vec X;
  VecCreateSeqWithArray(PETSC_COMM_SELF, global_index.n_local_dof(), &u_h(0), &X);

  std::vector<int> primary_idx(global_index.n_primary_dof());
  global_index.build_primary_index(&primary_idx[0]);
  IS is;
  ISCreateGeneralWithArray(PETSC_COMM_WORLD, global_index.n_local_dof(),
                           &global_index(0), &is);
  VecScatter scatter;
  VecScatterCreate(x, is, X, PETSC_NULL, &scatter);
  VecScatterBegin(scatter, x, X, INSERT_VALUES, SCATTER_FORWARD);
  VecScatterEnd(scatter, x, X, INSERT_VALUES, SCATTER_FORWARD);

  VecDestroy(x);
  VecDestroy(X);
  VecScatterDestroy(scatter);
  ISDestroy(is);

  char filename[1024];
  sprintf(filename, "u_h%d.dx", forest.rank());
  u_h.writeOpenDXData(filename);

  PetscFinalize();

  return 0;
}
Exemple #7
0
int main(int argc,char **argv)
{
  Mat            A,B,C,PtAP,PtAP_copy,PtAP_squared;
  PetscInt       i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1;
  PetscScalar    v;
  PetscErrorCode ierr;
  PetscBool      equal=PETSC_FALSE,mat_view=PETSC_FALSE;
  char           stencil[PETSC_MAX_PATH_LEN];
#if defined(PETSC_USE_LOG)
  PetscLogStage  fullMatMatMultStage;
#endif

  ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
  ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view);CHKERRQ(ierr);
  ierr = PetscOptionsGetString(NULL,NULL,"-stencil",stencil,PETSC_MAX_PATH_LEN,NULL);CHKERRQ(ierr);

  /* Create a aij matrix A */
  M    = N = m*n*o;
  ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
  ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
  ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
  ierr = MatSetFromOptions(A);CHKERRQ(ierr);

  /* Consistency checks */
  if (o < 1 || m < 1 || n < 1) SETERRQ(PETSC_COMM_WORLD,1,"Dimensions need to be larger than zero!");

  /************ 2D stencils ***************/
  ierr = PetscStrcmp(stencil, "2d5point", &equal);CHKERRQ(ierr);
  if (equal) {   /* 5-point stencil, 2D */
    ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr);
    ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr);
    ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
    for (Ii=Istart; Ii<Iend; Ii++) {
      v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
      if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
    }
  }
  ierr = PetscStrcmp(stencil, "2d9point", &equal);CHKERRQ(ierr);
  if (equal) {      /* 9-point stencil, 2D */
    ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr);
    ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr);
    ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
    for (Ii=Istart; Ii<Iend; Ii++) {
      v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
      if (i>0)            {J = global_index(i-1,j,  k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i>0   && j>0)   {J = global_index(i-1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (         j>0)   {J = global_index(i,  j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1 && j>0)   {J = global_index(i+1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1)          {J = global_index(i+1,j,  k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-1)          {J = global_index(i,  j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i>0   && j<n-1) {J = global_index(i-1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
    }
  }
  ierr = PetscStrcmp(stencil, "2d9point2", &equal);CHKERRQ(ierr);
  if (equal) {      /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
    ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr);
    ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr);
    ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
    for (Ii=Istart; Ii<Iend; Ii++) {
      v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
      if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
    }
  }
  ierr = PetscStrcmp(stencil, "2d13point", &equal);CHKERRQ(ierr);
  if (equal) {      /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
    ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr);
    ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr);
    ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
    for (Ii=Istart; Ii<Iend; Ii++) {
      v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
      if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i>2)   {J = global_index(i-3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-3) {J = global_index(i+3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>2)   {J = global_index(i,j-3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-3) {J = global_index(i,j+3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
    }
  }
  /************ 3D stencils ***************/
  ierr = PetscStrcmp(stencil, "3d7point", &equal);CHKERRQ(ierr);
  if (equal) {      /* 7-point stencil, 3D */
    ierr = MatMPIAIJSetPreallocation(A,7,NULL,7,NULL);CHKERRQ(ierr);
    ierr = MatSeqAIJSetPreallocation(A,7,NULL);CHKERRQ(ierr);
    ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
    for (Ii=Istart; Ii<Iend; Ii++) {
      v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
      if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      v = 6.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
    }
  }
  ierr = PetscStrcmp(stencil, "3d13point", &equal);CHKERRQ(ierr);
  if (equal) {      /* 13-point stencil, 3D */
    ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr);
    ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr);
    ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
    for (Ii=Istart; Ii<Iend; Ii++) {
      v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
      if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (k>1)   {J = global_index(i,j,k-2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (k<o-2) {J = global_index(i,j,k+2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
    }
  }
  ierr = PetscStrcmp(stencil, "3d19point", &equal);CHKERRQ(ierr);
  if (equal) {      /* 19-point stencil, 3D */
    ierr = MatMPIAIJSetPreallocation(A,19,NULL,19,NULL);CHKERRQ(ierr);
    ierr = MatSeqAIJSetPreallocation(A,19,NULL);CHKERRQ(ierr);
    ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
    for (Ii=Istart; Ii<Iend; Ii++) {
      v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
      /* one hop */
      if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      /* two hops */
      if (i>0   && j>0)   {J = global_index(i-1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i>0   && k>0)   {J = global_index(i-1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i>0   && j<n-1) {J = global_index(i-1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i>0   && k<o-1) {J = global_index(i-1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1 && j>0)   {J = global_index(i+1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1 && k>0)   {J = global_index(i+1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (i<m-1 && k<o-1) {J = global_index(i+1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>0   && k>0)   {J = global_index(i,  j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j>0   && k<o-1) {J = global_index(i,  j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-1 && k>0)   {J = global_index(i,  j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      if (j<n-1 && k<o-1) {J = global_index(i,  j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
      v = 18.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
    }
  }
  ierr = PetscStrcmp(stencil, "3d27point", &equal);CHKERRQ(ierr);
  if (equal) {      /* 27-point stencil, 3D */
    ierr = MatMPIAIJSetPreallocation(A,27,NULL,27,NULL);CHKERRQ(ierr);
    ierr = MatSeqAIJSetPreallocation(A,27,NULL);CHKERRQ(ierr);
    ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
    for (Ii=Istart; Ii<Iend; Ii++) {
      v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
      if (k>0) {
        if (j>0) {
          if (i>0)   {J = global_index(i-1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
                      J = global_index(i,  j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
          if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
        }
        {
          if (i>0)   {J = global_index(i-1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
                      J = global_index(i,  j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
          if (i<m-1) {J = global_index(i+1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
        }
        if (j<n-1) {
          if (i>0)   {J = global_index(i-1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
                      J = global_index(i,  j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
          if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
        }
      }
      {
        if (j>0) {
          if (i>0)   {J = global_index(i-1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
                      J = global_index(i,  j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
          if (i<m-1) {J = global_index(i+1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
        }
        {
          if (i>0)   {J = global_index(i-1,j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
                      J = global_index(i,  j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
          if (i<m-1) {J = global_index(i+1,j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
        }
        if (j<n-1) {
          if (i>0)   {J = global_index(i-1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
                      J = global_index(i,  j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
          if (i<m-1) {J = global_index(i+1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
        }
      }
      if (k<o-1) {
        if (j>0) {
          if (i>0)   {J = global_index(i-1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
                      J = global_index(i,  j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
          if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
        }
        {
          if (i>0)   {J = global_index(i-1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
                      J = global_index(i,  j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
          if (i<m-1) {J = global_index(i+1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
        }
        if (j<n-1) {
          if (i>0)   {J = global_index(i-1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
                      J = global_index(i,  j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
          if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
        }
      }
      v = 26.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
    }
  }
  ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

  /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
  ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);

  ierr = PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage);CHKERRQ(ierr);

  /* Test C = A*B */
  ierr = PetscLogStagePush(fullMatMatMultStage);CHKERRQ(ierr);
  ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C);CHKERRQ(ierr);

  /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C)  */
  ierr = MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP);CHKERRQ(ierr);
  ierr = MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy);CHKERRQ(ierr);
  ierr = MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared);CHKERRQ(ierr);

  ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  ierr = MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);


  ierr = MatDestroy(&PtAP_squared);CHKERRQ(ierr);
  ierr = MatDestroy(&PtAP_copy);CHKERRQ(ierr);
  ierr = MatDestroy(&PtAP);CHKERRQ(ierr);
  ierr = MatDestroy(&C);CHKERRQ(ierr);
  ierr = MatDestroy(&B);CHKERRQ(ierr);
  ierr = MatDestroy(&A);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return ierr;
}