//----------------------------------------------------------------------------- void Assembler::assemble_interior_facets( GenericTensor& A, const Form& a, UFC& ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains, std::shared_ptr<const MeshFunction<std::size_t>> cell_domains, std::vector<double>* values) { // Skip assembly if there are no interior facet integrals if (!ufc.form.has_interior_facet_integrals()) return; // Set timer Timer timer("Assemble interior facets"); // Extract mesh and coefficients const Mesh& mesh = a.mesh(); // MPI rank const int my_mpi_rank = MPI::rank(mesh.mpi_comm()); // Form rank const std::size_t form_rank = ufc.form.rank(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dofs for cells, and a vector holding pointers to same std::vector<std::vector<dolfin::la_index>> macro_dofs(form_rank); std::vector<ArrayView<const dolfin::la_index>> macro_dof_ptrs(form_rank); // Interior facet integral const ufc::interior_facet_integral* integral = ufc.default_interior_facet_integral.get(); // Check whether integral is domain-dependent bool use_domains = domains && !domains->empty(); bool use_cell_domains = cell_domains && !cell_domains->empty(); // Compute facets and facet - cell connectivity if not already computed const std::size_t D = mesh.topology().dim(); mesh.init(D - 1); mesh.init(D - 1, D); dolfin_assert(mesh.ordered()); // Assemble over interior facets (the facets of the mesh) ufc::cell ufc_cell[2]; std::vector<double> coordinate_dofs[2]; Progress p(AssemblerBase::progress_message(A.rank(), "interior facets"), mesh.num_facets()); for (FacetIterator facet(mesh); !facet.end(); ++facet) { if (facet->num_entities(D) == 1) continue; // Check that facet is not a ghost dolfin_assert(!facet->is_ghost()); // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_interior_facet_integral((*domains)[*facet]); // Skip integral if zero if (!integral) continue; // Get cells incident with facet (which is 0 and 1 here is arbitrary) dolfin_assert(facet->num_entities(D) == 2); std::size_t cell_index_plus = facet->entities(D)[0]; std::size_t cell_index_minus = facet->entities(D)[1]; if (use_cell_domains && (*cell_domains)[cell_index_plus] < (*cell_domains)[cell_index_minus]) { std::swap(cell_index_plus, cell_index_minus); } // The convention '+' = 0, '-' = 1 is from ffc const Cell cell0(mesh, cell_index_plus); const Cell cell1(mesh, cell_index_minus); // Get local index of facet with respect to each cell std::size_t local_facet0 = cell0.index(*facet); std::size_t local_facet1 = cell1.index(*facet); // Update to current pair of cells cell0.get_cell_data(ufc_cell[0], local_facet0); cell0.get_coordinate_dofs(coordinate_dofs[0]); cell1.get_cell_data(ufc_cell[1], local_facet1); cell1.get_coordinate_dofs(coordinate_dofs[1]); ufc.update(cell0, coordinate_dofs[0], ufc_cell[0], cell1, coordinate_dofs[1], ufc_cell[1], integral->enabled_coefficients()); // Tabulate dofs for each dimension on macro element for (std::size_t i = 0; i < form_rank; i++) { // Get dofs for each cell const ArrayView<const dolfin::la_index> cell_dofs0 = dofmaps[i]->cell_dofs(cell0.index()); const ArrayView<const dolfin::la_index> cell_dofs1 = dofmaps[i]->cell_dofs(cell1.index()); // Create space in macro dof vector macro_dofs[i].resize(cell_dofs0.size() + cell_dofs1.size()); // Copy cell dofs into macro dof vector std::copy(cell_dofs0.data(), cell_dofs0.data() + cell_dofs0.size(), macro_dofs[i].begin()); std::copy(cell_dofs1.data(), cell_dofs1.data() + cell_dofs1.size(), macro_dofs[i].begin() + cell_dofs0.size()); macro_dof_ptrs[i].set(macro_dofs[i]); } // Tabulate interior facet tensor on macro element integral->tabulate_tensor(ufc.macro_A.data(), ufc.macro_w(), coordinate_dofs[0].data(), coordinate_dofs[1].data(), local_facet0, local_facet1, ufc_cell[0].orientation, ufc_cell[1].orientation); if (cell0.is_ghost() != cell1.is_ghost()) { int ghost_rank = -1; if (cell0.is_ghost()) ghost_rank = cell0.owner(); else ghost_rank = cell1.owner(); dolfin_assert(my_mpi_rank != ghost_rank); dolfin_assert(ghost_rank != -1); if (ghost_rank < my_mpi_rank) continue; } // Add entries to global tensor A.add_local(ufc.macro_A.data(), macro_dof_ptrs); p++; } }
//----------------------------------------------------------------------------- void Assembler::assemble_vertices( GenericTensor& A, const Form& a, UFC& ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains) { // Skip assembly if there are no point integrals if (!ufc.form.has_vertex_integrals()) return; // Set timer Timer timer("Assemble vertices"); // Extract mesh const Mesh& mesh = a.mesh(); // Compute cell and vertex - cell connectivity if not already // computed const std::size_t D = mesh.topology().dim(); mesh.init(0); mesh.init(0, D); dolfin_assert(mesh.ordered()); // Logics for shared vertices const bool has_shared_vertices = mesh.topology().have_shared_entities(0); const std::map<unsigned int, std::set<unsigned int>>& shared_vertices = mesh.topology().shared_entities(0); // Form rank const std::size_t form_rank = ufc.form.rank(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps(form_rank); // Create a vector for storying local to local map for vertex entity // dofs std::vector<std::vector<std::size_t>> local_to_local_dofs(form_rank); // Create a values vector to be used to fan out local tabulated // values to the global tensor std::vector<double> local_values(1); // Vector to hold local dof map for a vertex std::vector<std::vector<dolfin::la_index>> global_dofs(form_rank); std::vector<ArrayView<const dolfin::la_index>> global_dofs_p(form_rank); std::vector<dolfin::la_index> local_dof_size(form_rank); for (std::size_t i = 0; i < form_rank; ++i) { dofmaps[i] = a.function_space(i)->dofmap().get(); // Check that the test and trial space as dofs on the vertices if (dofmaps[i]->num_entity_dofs(0) == 0) { dolfin_error("Assembler.cpp", "assemble form over vertices", "Expecting test and trial spaces to have dofs on "\ "vertices for point integrals"); } // Check that the test and trial spaces do not have dofs other // than on vertices for (std::size_t j = 1; j <= D; j++) { if (dofmaps[i]->num_entity_dofs(j)!=0) { dolfin_error("Assembler.cpp", "assemble form over vertices", "Expecting test and trial spaces to only have dofs on " \ "vertices for point integrals"); } } // Resize local values so it can hold dofs on one vertex local_values.resize(local_values.size()*dofmaps[i]->num_entity_dofs(0)); // Resize local to local map according to the number of vertex // entities dofs local_to_local_dofs[i].resize(dofmaps[i]->num_entity_dofs(0)); // Resize local dof map vector global_dofs[i].resize(dofmaps[i]->num_entity_dofs(0)); // Get size of local dofs local_dof_size[i] = dofmaps[i]->ownership_range().second - dofmaps[i]->ownership_range().first; // Get pointer to global dofs global_dofs_p[i].set(global_dofs[i]); } // Vector to hold dof map for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // Exterior point integral const ufc::vertex_integral* integral = ufc.default_vertex_integral.get(); // Check whether integral is domain-dependent bool use_domains = domains && !domains->empty(); // MPI rank const unsigned int my_mpi_rank = MPI::rank(mesh.mpi_comm()); // Assemble over vertices ufc::cell ufc_cell; std::vector<double> coordinate_dofs; Progress p(AssemblerBase::progress_message(A.rank(), "vertices"), mesh.num_vertices()); for (VertexIterator vert(mesh); !vert.end(); ++vert) { // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_vertex_integral((*domains)[*vert]); // Skip integral if zero if (!integral) continue; // Check if assembling a scalar and a vertex is shared if (form_rank == 0 && has_shared_vertices) { // Find shared processes for this global vertex std::map<unsigned int, std::set<unsigned int>>::const_iterator e; e = shared_vertices.find(vert->index()); // If vertex is shared and this rank is not the lowest do not // include the contribution from this vertex to scalar sum if (e != shared_vertices.end()) { bool skip_vertex = false; std::set<unsigned int>::const_iterator it; for (it = e->second.begin(); it != e->second.end(); it++) { // Check if a shared vertex has a lower process rank if (*it < my_mpi_rank) { skip_vertex = true; break; } } if (skip_vertex) continue; } } // Get mesh cell to which mesh vertex belongs (pick first) Cell mesh_cell(mesh, vert->entities(D)[0]); // Check that cell is not a ghost dolfin_assert(!mesh_cell.is_ghost()); // Get local index of vertex with respect to the cell const std::size_t local_vertex = mesh_cell.index(*vert); // Update UFC cell mesh_cell.get_cell_data(ufc_cell); mesh_cell.get_coordinate_dofs(coordinate_dofs); // Update UFC object ufc.update(mesh_cell, coordinate_dofs, ufc_cell, integral->enabled_coefficients()); // Tabulate vertex tensor integral->tabulate_tensor(ufc.A.data(), ufc.w(), coordinate_dofs.data(), local_vertex, ufc_cell.orientation); // For rank 1 and 2 tensors we need to check if tabulated dofs for // the test space is within the local range bool owns_all_dofs = true; for (std::size_t i = 0; i < form_rank; ++i) { // Get local-to-global dof maps for cell dofs[i] = dofmaps[i]->cell_dofs(mesh_cell.index()); // Get local dofs of the local vertex dofmaps[i]->tabulate_entity_dofs(local_to_local_dofs[i], 0, local_vertex); // Copy cell dofs to local dofs and check owner ship range for (std::size_t j = 0; j < local_to_local_dofs[i].size(); ++j) { global_dofs[i][j] = dofs[i][local_to_local_dofs[i][j]]; // It is the dofs for the test space that determines if a dof // is owned by a process, therefore i==0 if (i == 0 && global_dofs[i][j] >= local_dof_size[i]) { owns_all_dofs = false; break; } } } // If not owning all dofs if (!owns_all_dofs) continue; // Scalar if (form_rank == 0) { // Add entries to global tensor A.add_local(ufc.A.data(), dofs); } else if (form_rank == 1) { // Copy tabulated tensor to local value vector for (std::size_t i = 0; i < local_to_local_dofs[0].size(); ++i) local_values[i] = ufc.A[local_to_local_dofs[0][i]]; // Add local entries to global tensor A.add_local(local_values.data(), global_dofs_p); } else { // Copy tabulated tensor to local value vector const std::size_t num_cols = dofs[1].size(); for (std::size_t i = 0; i < local_to_local_dofs[0].size(); ++i) { for (std::size_t j = 0; j < local_to_local_dofs[1].size(); ++j) { local_values[i*local_to_local_dofs[1].size() + j] = ufc.A[local_to_local_dofs[0][i]*num_cols + local_to_local_dofs[1][j]]; } } // Add local entries to global tensor A.add_local(local_values.data(), global_dofs_p); } p++; } }
//----------------------------------------------------------------------------- void Assembler::assemble_exterior_facets( GenericTensor& A, const Form& a, UFC& ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains, std::vector<double>* values) { // Skip assembly if there are no exterior facet integrals if (!ufc.form.has_exterior_facet_integrals()) return; // Set timer Timer timer("Assemble exterior facets"); // Extract mesh const Mesh& mesh = a.mesh(); // Form rank const std::size_t form_rank = ufc.form.rank(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof map for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // Exterior facet integral const ufc::exterior_facet_integral* integral = ufc.default_exterior_facet_integral.get(); // Check whether integral is domain-dependent bool use_domains = domains && !domains->empty(); // Compute facets and facet - cell connectivity if not already computed const std::size_t D = mesh.topology().dim(); mesh.init(D - 1); mesh.init(D - 1, D); dolfin_assert(mesh.ordered()); // Assemble over exterior facets (the cells of the boundary) ufc::cell ufc_cell; std::vector<double> coordinate_dofs; Progress p(AssemblerBase::progress_message(A.rank(), "exterior facets"), mesh.num_facets()); for (FacetIterator facet(mesh); !facet.end(); ++facet) { // Only consider exterior facets if (!facet->exterior()) { p++; continue; } // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_exterior_facet_integral((*domains)[*facet]); // Skip integral if zero if (!integral) continue; // Get mesh cell to which mesh facet belongs (pick first, there is // only one) dolfin_assert(facet->num_entities(D) == 1); Cell mesh_cell(mesh, facet->entities(D)[0]); // Check that cell is not a ghost dolfin_assert(!mesh_cell.is_ghost()); // Get local index of facet with respect to the cell const std::size_t local_facet = mesh_cell.index(*facet); // Update UFC cell mesh_cell.get_cell_data(ufc_cell, local_facet); mesh_cell.get_coordinate_dofs(coordinate_dofs); // Update UFC object ufc.update(mesh_cell, coordinate_dofs, ufc_cell, integral->enabled_coefficients()); // Get local-to-global dof maps for cell for (std::size_t i = 0; i < form_rank; ++i) dofs[i] = dofmaps[i]->cell_dofs(mesh_cell.index()); // Tabulate exterior facet tensor integral->tabulate_tensor(ufc.A.data(), ufc.w(), coordinate_dofs.data(), local_facet, ufc_cell.orientation); // Add entries to global tensor A.add_local(ufc.A.data(), dofs); p++; } }
//----------------------------------------------------------------------------- void Assembler::assemble_cells( GenericTensor& A, const Form& a, UFC& ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains, std::vector<double>* values) { // Skip assembly if there are no cell integrals if (!ufc.form.has_cell_integrals()) return; // Set timer Timer timer("Assemble cells"); // Extract mesh const Mesh& mesh = a.mesh(); // Form rank const std::size_t form_rank = ufc.form.rank(); // Check if form is a functional const bool is_cell_functional = (values && form_rank == 0) ? true : false; // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof map for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // Cell integral ufc::cell_integral* integral = ufc.default_cell_integral.get(); // Check whether integral is domain-dependent bool use_domains = domains && !domains->empty(); // Assemble over cells ufc::cell ufc_cell; std::vector<double> coordinate_dofs; Progress p(AssemblerBase::progress_message(A.rank(), "cells"), mesh.num_cells()); for (CellIterator cell(mesh); !cell.end(); ++cell) { // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_cell_integral((*domains)[*cell]); // Skip if no integral on current domain if (!integral) continue; // Check that cell is not a ghost dolfin_assert(!cell->is_ghost()); // Update to current cell cell->get_cell_data(ufc_cell); cell->get_coordinate_dofs(coordinate_dofs); ufc.update(*cell, coordinate_dofs, ufc_cell, integral->enabled_coefficients()); // Get local-to-global dof maps for cell bool empty_dofmap = false; for (std::size_t i = 0; i < form_rank; ++i) { dofs[i] = dofmaps[i]->cell_dofs(cell->index()); empty_dofmap = empty_dofmap || dofs[i].size() == 0; } // Skip if at least one dofmap is empty if (empty_dofmap) continue; // Tabulate cell tensor integral->tabulate_tensor(ufc.A.data(), ufc.w(), coordinate_dofs.data(), ufc_cell.orientation); // Add entries to global tensor. Either store values cell-by-cell // (currently only available for functionals) if (is_cell_functional) (*values)[cell->index()] = ufc.A[0]; else A.add_local(ufc.A.data(), dofs); p++; } }
//----------------------------------------------------------------------------- void OpenMpAssembler::assemble_interior_facets( GenericTensor& A, const Form& a, UFC& _ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains, std::shared_ptr<const MeshFunction<std::size_t>> cell_domains, std::vector<double>* values) { warning("OpenMpAssembler::assemble_interior_facets is untested."); // Extract mesh const Mesh& mesh = a.mesh(); // Topological dimension const std::size_t D = mesh.topology().dim(); dolfin_assert(!values); // Skip assembly if there are no interior facet integrals if (!_ufc.form.has_interior_facet_integrals()) return; Timer timer("Assemble interior facets"); // Set number of OpenMP threads (from parameter systems) omp_set_num_threads(parameters["num_threads"]); // Get integral for sub domain (if any) bool use_domains = domains && !domains->empty(); bool use_cell_domains = cell_domains && !cell_domains->empty(); if (use_domains) { dolfin_error("OpenMPAssembler.cpp", "perform multithreaded assembly using OpenMP assembler", "Subdomains are not yet handled"); } // Color mesh std::vector<std::size_t> coloring_type = a.coloring(D - 1); mesh.color(coloring_type); // Dummy UFC object since each thread needs to created its own UFC object UFC ufc(_ufc); // Form rank const std::size_t form_rank = ufc.form.rank(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dofs for cells std::vector<std::vector<dolfin::la_index>> macro_dofs(form_rank); // Interior facet integral const ufc::interior_facet_integral* integral = ufc.default_interior_facet_integral.get(); // Compute facets and facet - cell connectivity if not already computed mesh.init(D - 1); mesh.init(D - 1, D); dolfin_assert(mesh.ordered()); // Get coloring data std::map<const std::vector<std::size_t>, std::pair<std::vector<std::size_t>, std::vector<std::vector<std::size_t>>>>::const_iterator mesh_coloring; mesh_coloring = mesh.topology().coloring.find(coloring_type); // Check that requested coloring has been computed if (mesh_coloring == mesh.topology().coloring.end()) { dolfin_error("OpenMPAssembler.cpp", "perform multithreaded assembly using OpenMP assembler", "Requested mesh coloring has not been computed"); } // Get coloring data const std::vector<std::vector<std::size_t>>& entities_of_color = mesh_coloring->second.second; // UFC cells and vertex coordinates ufc::cell ufc_cell0, ufc_cell1; std::vector<double> vertex_coordinates0, vertex_coordinates1; // Assemble over interior facets (loop over colours, then cells of same color) const std::size_t num_colors = entities_of_color.size(); for (std::size_t color = 0; color < num_colors; ++color) { // Get the array of facet indices of current color const std::vector<std::size_t>& colored_facets = entities_of_color[color]; // Number of facets of current color const int num_facets = colored_facets.size(); // OpenMP test loop over cells of the same color Progress p(AssemblerBase::progress_message(A.rank(), "interior facets"), mesh.num_facets()); #pragma omp parallel for schedule(guided, 20) firstprivate(ufc, ufc_cell0, ufc_cell1, vertex_coordinates0, vertex_coordinates1, macro_dofs, integral) for (int facet_index = 0; facet_index < num_facets; ++facet_index) { // Facet index const std::size_t index = colored_facets[facet_index]; // Create cell const Facet facet(mesh, index); // Only consider interior facets if (facet.exterior()) { p++; continue; } // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_interior_facet_integral((*domains)[facet]); // Skip integral if zero if (!integral) continue; // Get cells incident with facet (which is 0 and 1 here is arbitrary) dolfin_assert(facet.num_entities(D) == 2); std::size_t cell_index_plus = facet.entities(D)[0]; std::size_t cell_index_minus = facet.entities(D)[1]; if (use_cell_domains && (*cell_domains)[cell_index_plus] < (*cell_domains)[cell_index_minus]) std::swap(cell_index_plus, cell_index_minus); // The convention '+' = 0, '-' = 1 is from ffc const Cell cell0(mesh, cell_index_plus); const Cell cell1(mesh, cell_index_minus); // Get local index of facet with respect to each cell const std::size_t local_facet0 = cell0.index(facet); const std::size_t local_facet1 = cell1.index(facet); // Update UFC cell cell0.get_vertex_coordinates(vertex_coordinates0); cell0.get_cell_data(ufc_cell0, local_facet0); cell1.get_vertex_coordinates(vertex_coordinates1); cell1.get_cell_data(ufc_cell1, local_facet1); // Update to current pair of cells ufc.update(cell0, vertex_coordinates0, ufc_cell0, cell1, vertex_coordinates1, ufc_cell1, integral->enabled_coefficients()); // Tabulate dofs for each dimension on macro element for (std::size_t i = 0; i < form_rank; i++) { // Get dofs for each cell const ArrayView<const dolfin::la_index> cell_dofs0 = dofmaps[i]->cell_dofs(cell0.index()); const ArrayView<const dolfin::la_index> cell_dofs1 = dofmaps[i]->cell_dofs(cell1.index()); // Create space in macro dof vector macro_dofs[i].resize(cell_dofs0.size() + cell_dofs1.size()); // Copy cell dofs into macro dof vector std::copy(cell_dofs0.begin(), cell_dofs0.end(), macro_dofs[i].begin()); std::copy(cell_dofs1.begin(), cell_dofs1.end(), macro_dofs[i].begin() + cell_dofs0.size()); } // Tabulate exterior interior facet tensor on macro element integral->tabulate_tensor(ufc.macro_A.data(), ufc.macro_w(), vertex_coordinates0.data(), vertex_coordinates1.data(), local_facet0, local_facet1, ufc_cell0.orientation, ufc_cell1.orientation); // Add entries to global tensor std::vector<ArrayView<const la_index>> macro_dofs_p(macro_dofs.size()); for (std::size_t i = 0; i < macro_dofs.size(); ++i) macro_dofs_p[i].set(macro_dofs[i]); A.add_local(ufc.macro_A.data(), macro_dofs_p); p++; } } }
//----------------------------------------------------------------------------- void OpenMpAssembler::assemble_cells_and_exterior_facets( GenericTensor& A, const Form& a, UFC& _ufc, std::shared_ptr<const MeshFunction<std::size_t>> cell_domains, std::shared_ptr<const MeshFunction<std::size_t>> exterior_facet_domains, std::vector<double>* values) { Timer timer("Assemble cells and exterior facets"); // Set number of OpenMP threads (from parameter systems) const int num_threads = parameters["num_threads"]; omp_set_num_threads(num_threads); // Extract mesh const Mesh& mesh = a.mesh(); // Compute facets and facet - cell connectivity if not already computed const std::size_t D = mesh.topology().dim(); mesh.init(D - 1); mesh.init(D - 1, D); dolfin_assert(mesh.ordered()); // Get connectivity const MeshConnectivity& connectivity = mesh.topology()(D, D - 1); dolfin_assert(!connectivity.empty()); // Dummy UFC object since each thread needs to created its own UFC object UFC ufc(_ufc); // Form rank const std::size_t form_rank = ufc.form.rank(); // Cell and facet integrals ufc::cell_integral* cell_integral = ufc.default_cell_integral.get(); ufc::exterior_facet_integral* facet_integral = ufc.default_exterior_facet_integral.get(); // Check whether integrals are domain-dependent bool use_cell_domains = cell_domains && !cell_domains->empty(); bool use_exterior_facet_domains = exterior_facet_domains && !exterior_facet_domains->empty(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof maps for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // FIXME: Pass or determine coloring type // Define graph type std::vector<std::size_t> coloring_type = a.coloring(mesh.topology().dim()); mesh.color(coloring_type); // Get coloring data std::map<const std::vector<std::size_t>, std::pair<std::vector<std::size_t>, std::vector<std::vector<std::size_t>>>>::const_iterator mesh_coloring; mesh_coloring = mesh.topology().coloring.find(coloring_type); if (mesh_coloring == mesh.topology().coloring.end()) { dolfin_error("OpenMPAssembler.cpp", "perform multithreaded assembly using OpenMP assembler", "Requested mesh coloring has not been computed"); } // Get coloring data const std::vector<std::vector<std::size_t>>& entities_of_color = mesh_coloring->second.second; // If assembling a scalar we need to ensure each threads assemble // its own scalar std::vector<double> scalars(num_threads, 0.0); // UFC cell and vertex coordinates ufc::cell ufc_cell; std::vector<double> vertex_coordinates; // Assemble over cells (loop over colors, then cells of same color) const std::size_t num_colors = entities_of_color.size(); for (std::size_t color = 0; color < num_colors; ++color) { // Get the array of cell indices of current color const std::vector<std::size_t>& colored_cells = entities_of_color[color]; // Number of cells of current color const int num_cell_in_color = colored_cells.size(); // OpenMP test loop over cells of the same color Progress p(AssemblerBase::progress_message(A.rank(), "cells"), num_colors); #pragma omp parallel for schedule(guided, 20) firstprivate(ufc, ufc_cell, vertex_coordinates, dofs, cell_integral, facet_integral) for (int index = 0; index < num_cell_in_color; ++index) { // Cell index const std::size_t cell_index = colored_cells[index]; // Create cell const Cell cell(mesh, cell_index); // Get integral for sub domain (if any) if (use_cell_domains) cell_integral = ufc.get_cell_integral((*cell_domains)[cell_index]); // Update to current cell cell.get_cell_data(ufc_cell); cell.get_vertex_coordinates(vertex_coordinates); // Get local-to-global dof maps for cell for (std::size_t i = 0; i < form_rank; ++i) dofs[i] = dofmaps[i]->cell_dofs(cell_index); // Get number of entries in cell tensor std::size_t dim = 1; for (std::size_t i = 0; i < form_rank; ++i) dim *= dofs[i].size(); // Tabulate cell tensor if we have a cell_integral if (cell_integral) { ufc.update(cell, vertex_coordinates, ufc_cell, cell_integral->enabled_coefficients()); cell_integral->tabulate_tensor(ufc.A.data(), ufc.w(), vertex_coordinates.data(), ufc_cell.orientation); } else std::fill(ufc.A.begin(), ufc.A.end(), 0.0); // Assemble over external facet for (FacetIterator facet(cell); !facet.end(); ++facet) { // Only consider exterior facets if (!facet->exterior()) { p++; continue; } // Get local facet index const std::size_t local_facet = cell.index(*facet); // Get integral for sub domain (if any) if (use_exterior_facet_domains) { // Get global facet index const std::size_t facet_index = connectivity(cell_index)[local_facet]; facet_integral = ufc.get_exterior_facet_integral((*exterior_facet_domains)[facet_index]); } // Skip integral if zero if (!facet_integral) continue; // FIXME: Do we really need an update version with the local // facet index? // Update UFC object ufc_cell.local_facet = local_facet; ufc.update(cell, vertex_coordinates, ufc_cell, facet_integral->enabled_coefficients()); // Tabulate tensor facet_integral->tabulate_tensor(ufc.A_facet.data(), ufc.w(), vertex_coordinates.data(), local_facet, ufc_cell.orientation); // Add facet contribution for (std::size_t i = 0; i < dim; ++i) ufc.A[i] += ufc.A_facet[i]; } // Add entries to global tensor if (values && form_rank == 0) (*values)[cell_index] = ufc.A[0]; else if (form_rank == 0) scalars[omp_get_thread_num()] += ufc.A[0]; else A.add_local(&ufc.A[0], dofs); } p++; } // If we assemble a scalar we need to sum the contributions from each thread if (form_rank == 0) { const double scalar_sum = std::accumulate(scalars.begin(), scalars.end(), 0.0); A.add_local(&scalar_sum, dofs); } }
//----------------------------------------------------------------------------- void OpenMpAssembler::assemble_cells( GenericTensor& A, const Form& a, UFC& _ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains, std::vector<double>* values) { // Skip assembly if there are no cell integrals if (!_ufc.form.has_cell_integrals()) return; Timer timer("Assemble cells"); // Set number of OpenMP threads (from parameter systems) const std::size_t num_threads = parameters["num_threads"]; omp_set_num_threads(num_threads); // Extract mesh const Mesh& mesh = a.mesh(); // FIXME: Check that UFC copy constructor is dealing with copying // pointers correctly // Dummy UFC object since each thread needs to created its own UFC object UFC ufc(_ufc); // Form rank const std::size_t form_rank = ufc.form.rank(); // Cell integral const ufc::cell_integral* integral = ufc.default_cell_integral.get(); // Check whether integral is domain-dependent bool use_domains = domains && !domains->empty(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof map for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // Color mesh std::vector<std::size_t> coloring_type = a.coloring(mesh.topology().dim()); mesh.color(coloring_type); // Get coloring data std::map<const std::vector<std::size_t>, std::pair<std::vector<std::size_t>, std::vector<std::vector<std::size_t>>>>::const_iterator mesh_coloring; mesh_coloring = mesh.topology().coloring.find(coloring_type); if (mesh_coloring == mesh.topology().coloring.end()) { dolfin_error("OpenMPAssembler.cpp", "perform multithreaded assembly using OpenMP assembler", "Requested mesh coloring has not been computed"); } // Get coloring data const std::vector<std::vector<std::size_t>>& entities_of_color = mesh_coloring->second.second; // If assembling a scalar we need to ensure each threads assemble // its own scalar std::vector<double> scalars(num_threads, 0.0); // Assemble over cells (loop over colours, then cells of same color) const std::size_t num_colors = entities_of_color.size(); Progress p("Assembling cells (threaded)", num_colors); for (std::size_t color = 0; color < num_colors; ++color) { // Get the array of cell indices of current color const std::vector<std::size_t>& colored_cells = entities_of_color[color]; // Number of cells of current color const int num_cells = colored_cells.size(); ufc::cell ufc_cell; std::vector<double> vertex_coordinates; // OpenMP test loop over cells of the same color #pragma omp parallel for schedule(guided, 20) firstprivate(ufc, ufc_cell, vertex_coordinates, dofs, integral) for (int cell_index = 0; cell_index < num_cells; ++cell_index) { // Cell index const std::size_t index = colored_cells[cell_index]; // Create cell const Cell cell(mesh, index); // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_cell_integral((*domains)[cell]); // Skip integral if zero if (!integral) continue; // Update to current cell cell.get_cell_data(ufc_cell); cell.get_vertex_coordinates(vertex_coordinates); ufc.update(cell, vertex_coordinates, ufc_cell, integral->enabled_coefficients()); // Get local-to-global dof maps for cell for (std::size_t i = 0; i < form_rank; ++i) dofs[i] = dofmaps[i]->cell_dofs(index); // Tabulate cell tensor integral->tabulate_tensor(ufc.A.data(), ufc.w(), vertex_coordinates.data(), ufc_cell.orientation); // Add entries to global tensor if (values && form_rank == 0) (*values)[cell_index] = ufc.A[0]; else if (form_rank == 0) scalars[omp_get_thread_num()] += ufc.A[0]; else A.add_local(ufc.A.data(), dofs); } p++; } // If we assemble a scalar we need to sum the contributions from each thread if (form_rank == 0) { const double scalar_sum = std::accumulate(scalars.begin(), scalars.end(), 0.0); A.add_local(&scalar_sum, dofs); } }