//----------------------------------------------------------------------------- void MultiMeshAssembler::assemble(GenericTensor& A, const MultiMeshForm& a) { // Developer note: This implementation does not yet handle // - subdomains // - interior facets // - exterior facets begin(PROGRESS, "Assembling tensor over multimesh function space."); // Initialize global tensor _init_global_tensor(A, a); // Assemble over uncut cells _assemble_uncut_cells(A, a); // Assemble over cut cells _assemble_cut_cells(A, a); // Assemble over interface _assemble_interface(A, a); // Assemble over overlap _assemble_overlap(A, a); // Finalize assembly of global tensor if (finalize_tensor) A.apply("add"); // Lock any remaining inactive dofs if (A.rank() == 2) static_cast<GenericMatrix&>(A).ident_zeros(); end(); }
//----------------------------------------------------------------------------- void MultiMeshAssembler::assemble(GenericTensor& A, const MultiMeshForm& a) { // Developer note: This implementation does not yet handle // - subdomains // - interior facets // - exterior facets begin(PROGRESS, "Assembling tensor over multimesh function space."); // Initialize global tensor _init_global_tensor(A, a); // Assemble over uncut cells _assemble_uncut_cells(A, a); // Assemble over exterior facets _assemble_uncut_exterior_facets(A, a); // Assemble over cut cells _assemble_cut_cells(A, a); // Assemble over interface _assemble_interface(A, a); // Assemble over overlap _assemble_overlap(A, a); // Finalize assembly of global tensor if (finalize_tensor) A.apply("add"); end(); }
//---------------------------------------------------------------------------- void OpenMpAssembler::assemble(GenericTensor& A, const Form& a) { // Get mesh const Mesh& mesh = a.mesh(); if (MPI::size(mesh.mpi_comm()) > 1) { dolfin_error("OpenMPAssembler.cpp", "perform multithreaded assembly using OpenMP assembler", "The OpenMp assembler has not been tested in combination with MPI"); } dolfin_assert(a.ufc_form()); // All assembler functions above end up calling this function, which // in turn calls the assembler functions below to assemble over // cells, exterior and interior facets. Note the importance of // treating empty mesh functions as null pointers for the PyDOLFIN // interface. // Get cell domains std::shared_ptr<const MeshFunction<std::size_t>> cell_domains = a.cell_domains(); // Get exterior facet domains std::shared_ptr<const MeshFunction<std::size_t>> exterior_facet_domains = a.exterior_facet_domains(); // Get interior facet domains std::shared_ptr<const MeshFunction<std::size_t>> interior_facet_domains = a.interior_facet_domains(); // Check form AssemblerBase::check(a); // Create data structure for local assembly data UFC ufc(a); // Initialize global tensor init_global_tensor(A, a); // FIXME: The below selections should be made robust if (a.ufc_form()->has_interior_facet_integrals()) assemble_interior_facets(A, a, ufc, interior_facet_domains, cell_domains, 0); if (a.ufc_form()->has_exterior_facet_integrals()) { assemble_cells_and_exterior_facets(A, a, ufc, cell_domains, exterior_facet_domains, 0); } else assemble_cells(A, a, ufc, cell_domains, 0); // Finalize assembly of global tensor if (finalize_tensor) A.apply("add"); }
//----------------------------------------------------------------------------- void Assembler::assemble_interior_facets( GenericTensor& A, const Form& a, UFC& ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains, std::shared_ptr<const MeshFunction<std::size_t>> cell_domains, std::vector<double>* values) { // Skip assembly if there are no interior facet integrals if (!ufc.form.has_interior_facet_integrals()) return; // Set timer Timer timer("Assemble interior facets"); // Extract mesh and coefficients const Mesh& mesh = a.mesh(); // MPI rank const int my_mpi_rank = MPI::rank(mesh.mpi_comm()); // Form rank const std::size_t form_rank = ufc.form.rank(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dofs for cells, and a vector holding pointers to same std::vector<std::vector<dolfin::la_index>> macro_dofs(form_rank); std::vector<ArrayView<const dolfin::la_index>> macro_dof_ptrs(form_rank); // Interior facet integral const ufc::interior_facet_integral* integral = ufc.default_interior_facet_integral.get(); // Check whether integral is domain-dependent bool use_domains = domains && !domains->empty(); bool use_cell_domains = cell_domains && !cell_domains->empty(); // Compute facets and facet - cell connectivity if not already computed const std::size_t D = mesh.topology().dim(); mesh.init(D - 1); mesh.init(D - 1, D); dolfin_assert(mesh.ordered()); // Assemble over interior facets (the facets of the mesh) ufc::cell ufc_cell[2]; std::vector<double> coordinate_dofs[2]; Progress p(AssemblerBase::progress_message(A.rank(), "interior facets"), mesh.num_facets()); for (FacetIterator facet(mesh); !facet.end(); ++facet) { if (facet->num_entities(D) == 1) continue; // Check that facet is not a ghost dolfin_assert(!facet->is_ghost()); // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_interior_facet_integral((*domains)[*facet]); // Skip integral if zero if (!integral) continue; // Get cells incident with facet (which is 0 and 1 here is arbitrary) dolfin_assert(facet->num_entities(D) == 2); std::size_t cell_index_plus = facet->entities(D)[0]; std::size_t cell_index_minus = facet->entities(D)[1]; if (use_cell_domains && (*cell_domains)[cell_index_plus] < (*cell_domains)[cell_index_minus]) { std::swap(cell_index_plus, cell_index_minus); } // The convention '+' = 0, '-' = 1 is from ffc const Cell cell0(mesh, cell_index_plus); const Cell cell1(mesh, cell_index_minus); // Get local index of facet with respect to each cell std::size_t local_facet0 = cell0.index(*facet); std::size_t local_facet1 = cell1.index(*facet); // Update to current pair of cells cell0.get_cell_data(ufc_cell[0], local_facet0); cell0.get_coordinate_dofs(coordinate_dofs[0]); cell1.get_cell_data(ufc_cell[1], local_facet1); cell1.get_coordinate_dofs(coordinate_dofs[1]); ufc.update(cell0, coordinate_dofs[0], ufc_cell[0], cell1, coordinate_dofs[1], ufc_cell[1], integral->enabled_coefficients()); // Tabulate dofs for each dimension on macro element for (std::size_t i = 0; i < form_rank; i++) { // Get dofs for each cell const ArrayView<const dolfin::la_index> cell_dofs0 = dofmaps[i]->cell_dofs(cell0.index()); const ArrayView<const dolfin::la_index> cell_dofs1 = dofmaps[i]->cell_dofs(cell1.index()); // Create space in macro dof vector macro_dofs[i].resize(cell_dofs0.size() + cell_dofs1.size()); // Copy cell dofs into macro dof vector std::copy(cell_dofs0.data(), cell_dofs0.data() + cell_dofs0.size(), macro_dofs[i].begin()); std::copy(cell_dofs1.data(), cell_dofs1.data() + cell_dofs1.size(), macro_dofs[i].begin() + cell_dofs0.size()); macro_dof_ptrs[i].set(macro_dofs[i]); } // Tabulate interior facet tensor on macro element integral->tabulate_tensor(ufc.macro_A.data(), ufc.macro_w(), coordinate_dofs[0].data(), coordinate_dofs[1].data(), local_facet0, local_facet1, ufc_cell[0].orientation, ufc_cell[1].orientation); if (cell0.is_ghost() != cell1.is_ghost()) { int ghost_rank = -1; if (cell0.is_ghost()) ghost_rank = cell0.owner(); else ghost_rank = cell1.owner(); dolfin_assert(my_mpi_rank != ghost_rank); dolfin_assert(ghost_rank != -1); if (ghost_rank < my_mpi_rank) continue; } // Add entries to global tensor A.add_local(ufc.macro_A.data(), macro_dof_ptrs); p++; } }
//----------------------------------------------------------------------------- void Assembler::assemble_exterior_facets( GenericTensor& A, const Form& a, UFC& ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains, std::vector<double>* values) { // Skip assembly if there are no exterior facet integrals if (!ufc.form.has_exterior_facet_integrals()) return; // Set timer Timer timer("Assemble exterior facets"); // Extract mesh const Mesh& mesh = a.mesh(); // Form rank const std::size_t form_rank = ufc.form.rank(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof map for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // Exterior facet integral const ufc::exterior_facet_integral* integral = ufc.default_exterior_facet_integral.get(); // Check whether integral is domain-dependent bool use_domains = domains && !domains->empty(); // Compute facets and facet - cell connectivity if not already computed const std::size_t D = mesh.topology().dim(); mesh.init(D - 1); mesh.init(D - 1, D); dolfin_assert(mesh.ordered()); // Assemble over exterior facets (the cells of the boundary) ufc::cell ufc_cell; std::vector<double> coordinate_dofs; Progress p(AssemblerBase::progress_message(A.rank(), "exterior facets"), mesh.num_facets()); for (FacetIterator facet(mesh); !facet.end(); ++facet) { // Only consider exterior facets if (!facet->exterior()) { p++; continue; } // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_exterior_facet_integral((*domains)[*facet]); // Skip integral if zero if (!integral) continue; // Get mesh cell to which mesh facet belongs (pick first, there is // only one) dolfin_assert(facet->num_entities(D) == 1); Cell mesh_cell(mesh, facet->entities(D)[0]); // Check that cell is not a ghost dolfin_assert(!mesh_cell.is_ghost()); // Get local index of facet with respect to the cell const std::size_t local_facet = mesh_cell.index(*facet); // Update UFC cell mesh_cell.get_cell_data(ufc_cell, local_facet); mesh_cell.get_coordinate_dofs(coordinate_dofs); // Update UFC object ufc.update(mesh_cell, coordinate_dofs, ufc_cell, integral->enabled_coefficients()); // Get local-to-global dof maps for cell for (std::size_t i = 0; i < form_rank; ++i) dofs[i] = dofmaps[i]->cell_dofs(mesh_cell.index()); // Tabulate exterior facet tensor integral->tabulate_tensor(ufc.A.data(), ufc.w(), coordinate_dofs.data(), local_facet, ufc_cell.orientation); // Add entries to global tensor A.add_local(ufc.A.data(), dofs); p++; } }
//----------------------------------------------------------------------------- void Assembler::assemble_cells( GenericTensor& A, const Form& a, UFC& ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains, std::vector<double>* values) { // Skip assembly if there are no cell integrals if (!ufc.form.has_cell_integrals()) return; // Set timer Timer timer("Assemble cells"); // Extract mesh const Mesh& mesh = a.mesh(); // Form rank const std::size_t form_rank = ufc.form.rank(); // Check if form is a functional const bool is_cell_functional = (values && form_rank == 0) ? true : false; // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof map for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // Cell integral ufc::cell_integral* integral = ufc.default_cell_integral.get(); // Check whether integral is domain-dependent bool use_domains = domains && !domains->empty(); // Assemble over cells ufc::cell ufc_cell; std::vector<double> coordinate_dofs; Progress p(AssemblerBase::progress_message(A.rank(), "cells"), mesh.num_cells()); for (CellIterator cell(mesh); !cell.end(); ++cell) { // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_cell_integral((*domains)[*cell]); // Skip if no integral on current domain if (!integral) continue; // Check that cell is not a ghost dolfin_assert(!cell->is_ghost()); // Update to current cell cell->get_cell_data(ufc_cell); cell->get_coordinate_dofs(coordinate_dofs); ufc.update(*cell, coordinate_dofs, ufc_cell, integral->enabled_coefficients()); // Get local-to-global dof maps for cell bool empty_dofmap = false; for (std::size_t i = 0; i < form_rank; ++i) { dofs[i] = dofmaps[i]->cell_dofs(cell->index()); empty_dofmap = empty_dofmap || dofs[i].size() == 0; } // Skip if at least one dofmap is empty if (empty_dofmap) continue; // Tabulate cell tensor integral->tabulate_tensor(ufc.A.data(), ufc.w(), coordinate_dofs.data(), ufc_cell.orientation); // Add entries to global tensor. Either store values cell-by-cell // (currently only available for functionals) if (is_cell_functional) (*values)[cell->index()] = ufc.A[0]; else A.add_local(ufc.A.data(), dofs); p++; } }
//----------------------------------------------------------------------------- void OpenMpAssembler::assemble_interior_facets( GenericTensor& A, const Form& a, UFC& _ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains, std::shared_ptr<const MeshFunction<std::size_t>> cell_domains, std::vector<double>* values) { warning("OpenMpAssembler::assemble_interior_facets is untested."); // Extract mesh const Mesh& mesh = a.mesh(); // Topological dimension const std::size_t D = mesh.topology().dim(); dolfin_assert(!values); // Skip assembly if there are no interior facet integrals if (!_ufc.form.has_interior_facet_integrals()) return; Timer timer("Assemble interior facets"); // Set number of OpenMP threads (from parameter systems) omp_set_num_threads(parameters["num_threads"]); // Get integral for sub domain (if any) bool use_domains = domains && !domains->empty(); bool use_cell_domains = cell_domains && !cell_domains->empty(); if (use_domains) { dolfin_error("OpenMPAssembler.cpp", "perform multithreaded assembly using OpenMP assembler", "Subdomains are not yet handled"); } // Color mesh std::vector<std::size_t> coloring_type = a.coloring(D - 1); mesh.color(coloring_type); // Dummy UFC object since each thread needs to created its own UFC object UFC ufc(_ufc); // Form rank const std::size_t form_rank = ufc.form.rank(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dofs for cells std::vector<std::vector<dolfin::la_index>> macro_dofs(form_rank); // Interior facet integral const ufc::interior_facet_integral* integral = ufc.default_interior_facet_integral.get(); // Compute facets and facet - cell connectivity if not already computed mesh.init(D - 1); mesh.init(D - 1, D); dolfin_assert(mesh.ordered()); // Get coloring data std::map<const std::vector<std::size_t>, std::pair<std::vector<std::size_t>, std::vector<std::vector<std::size_t>>>>::const_iterator mesh_coloring; mesh_coloring = mesh.topology().coloring.find(coloring_type); // Check that requested coloring has been computed if (mesh_coloring == mesh.topology().coloring.end()) { dolfin_error("OpenMPAssembler.cpp", "perform multithreaded assembly using OpenMP assembler", "Requested mesh coloring has not been computed"); } // Get coloring data const std::vector<std::vector<std::size_t>>& entities_of_color = mesh_coloring->second.second; // UFC cells and vertex coordinates ufc::cell ufc_cell0, ufc_cell1; std::vector<double> vertex_coordinates0, vertex_coordinates1; // Assemble over interior facets (loop over colours, then cells of same color) const std::size_t num_colors = entities_of_color.size(); for (std::size_t color = 0; color < num_colors; ++color) { // Get the array of facet indices of current color const std::vector<std::size_t>& colored_facets = entities_of_color[color]; // Number of facets of current color const int num_facets = colored_facets.size(); // OpenMP test loop over cells of the same color Progress p(AssemblerBase::progress_message(A.rank(), "interior facets"), mesh.num_facets()); #pragma omp parallel for schedule(guided, 20) firstprivate(ufc, ufc_cell0, ufc_cell1, vertex_coordinates0, vertex_coordinates1, macro_dofs, integral) for (int facet_index = 0; facet_index < num_facets; ++facet_index) { // Facet index const std::size_t index = colored_facets[facet_index]; // Create cell const Facet facet(mesh, index); // Only consider interior facets if (facet.exterior()) { p++; continue; } // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_interior_facet_integral((*domains)[facet]); // Skip integral if zero if (!integral) continue; // Get cells incident with facet (which is 0 and 1 here is arbitrary) dolfin_assert(facet.num_entities(D) == 2); std::size_t cell_index_plus = facet.entities(D)[0]; std::size_t cell_index_minus = facet.entities(D)[1]; if (use_cell_domains && (*cell_domains)[cell_index_plus] < (*cell_domains)[cell_index_minus]) std::swap(cell_index_plus, cell_index_minus); // The convention '+' = 0, '-' = 1 is from ffc const Cell cell0(mesh, cell_index_plus); const Cell cell1(mesh, cell_index_minus); // Get local index of facet with respect to each cell const std::size_t local_facet0 = cell0.index(facet); const std::size_t local_facet1 = cell1.index(facet); // Update UFC cell cell0.get_vertex_coordinates(vertex_coordinates0); cell0.get_cell_data(ufc_cell0, local_facet0); cell1.get_vertex_coordinates(vertex_coordinates1); cell1.get_cell_data(ufc_cell1, local_facet1); // Update to current pair of cells ufc.update(cell0, vertex_coordinates0, ufc_cell0, cell1, vertex_coordinates1, ufc_cell1, integral->enabled_coefficients()); // Tabulate dofs for each dimension on macro element for (std::size_t i = 0; i < form_rank; i++) { // Get dofs for each cell const ArrayView<const dolfin::la_index> cell_dofs0 = dofmaps[i]->cell_dofs(cell0.index()); const ArrayView<const dolfin::la_index> cell_dofs1 = dofmaps[i]->cell_dofs(cell1.index()); // Create space in macro dof vector macro_dofs[i].resize(cell_dofs0.size() + cell_dofs1.size()); // Copy cell dofs into macro dof vector std::copy(cell_dofs0.begin(), cell_dofs0.end(), macro_dofs[i].begin()); std::copy(cell_dofs1.begin(), cell_dofs1.end(), macro_dofs[i].begin() + cell_dofs0.size()); } // Tabulate exterior interior facet tensor on macro element integral->tabulate_tensor(ufc.macro_A.data(), ufc.macro_w(), vertex_coordinates0.data(), vertex_coordinates1.data(), local_facet0, local_facet1, ufc_cell0.orientation, ufc_cell1.orientation); // Add entries to global tensor std::vector<ArrayView<const la_index>> macro_dofs_p(macro_dofs.size()); for (std::size_t i = 0; i < macro_dofs.size(); ++i) macro_dofs_p[i].set(macro_dofs[i]); A.add_local(ufc.macro_A.data(), macro_dofs_p); p++; } } }
//----------------------------------------------------------------------------- void AssemblerBase::init_global_tensor(GenericTensor& A, const Form& a) { dolfin_assert(a.ufc_form()); // Get dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < a.rank(); ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); if (A.size(0) == 0) { Timer t0("Build sparsity"); // Create layout for initialising tensor std::shared_ptr<TensorLayout> tensor_layout; tensor_layout = A.factory().create_layout(a.rank()); dolfin_assert(tensor_layout); // Get dimensions std::vector<std::size_t> global_dimensions; std::vector<std::pair<std::size_t, std::size_t> > local_range; std::vector<std::size_t> block_sizes; for (std::size_t i = 0; i < a.rank(); i++) { dolfin_assert(dofmaps[i]); global_dimensions.push_back(dofmaps[i]->global_dimension()); local_range.push_back(dofmaps[i]->ownership_range()); block_sizes.push_back(dofmaps[i]->block_size); } // Set block size for sparsity graphs std::size_t block_size = 1; if (a.rank() == 2) { const std::vector<std::size_t> _bs(a.rank(), dofmaps[0]->block_size); block_size = (block_sizes == _bs) ? dofmaps[0]->block_size : 1; } // Initialise tensor layout tensor_layout->init(a.mesh().mpi_comm(), global_dimensions, block_size, local_range); // Build sparsity pattern if required if (tensor_layout->sparsity_pattern()) { GenericSparsityPattern& pattern = *tensor_layout->sparsity_pattern(); SparsityPatternBuilder::build(pattern, a.mesh(), dofmaps, a.ufc_form()->has_cell_integrals(), a.ufc_form()->has_interior_facet_integrals(), a.ufc_form()->has_exterior_facet_integrals(), keep_diagonal); } t0.stop(); // Initialize tensor Timer t1("Init tensor"); A.init(*tensor_layout); t1.stop(); // Insert zeros on the diagonal as diagonal entries may be prematurely // optimised away by the linear algebra backend when calling // GenericMatrix::apply, e.g. PETSc does this then errors when matrices // have no diagonal entry inserted. if (A.rank() == 2 && keep_diagonal) { // Down cast to GenericMatrix GenericMatrix& _A = A.down_cast<GenericMatrix>(); // Loop over rows and insert 0.0 on the diagonal const double block = 0.0; const std::pair<std::size_t, std::size_t> row_range = A.local_range(0); const std::size_t range = std::min(row_range.second, A.size(1)); for (std::size_t i = row_range.first; i < range; i++) { dolfin::la_index _i = i; _A.set(&block, 1, &_i, 1, &_i); } A.apply("flush"); } // Delete sparsity pattern Timer t2("Delete sparsity"); t2.stop(); } else { // If tensor is not reset, check that dimensions are correct for (std::size_t i = 0; i < a.rank(); ++i) { if (A.size(i) != dofmaps[i]->global_dimension()) { dolfin_error("AssemblerBase.cpp", "assemble form", "Reset of tensor in assembly not requested, but dim %d of tensor does not match form", i); } } } if (!add_values) A.zero(); }
//----------------------------------------------------------------------------- void MultiMeshAssembler::_assemble_uncut_cells(GenericTensor& A, const MultiMeshForm& a) { // Get form rank const std::size_t form_rank = a.rank(); // Extract multimesh std::shared_ptr<const MultiMesh> multimesh = a.multimesh(); // Collect pointers to dof maps std::vector<const MultiMeshDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; i++) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof map for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // Initialize variables that will be reused throughout assembly ufc::cell ufc_cell; std::vector<double> coordinate_dofs; // Iterate over parts for (std::size_t part = 0; part < a.num_parts(); part++) { log(PROGRESS, "Assembling multimesh form over uncut cells on part %d.", part); // Get form for current part const Form& a_part = *a.part(part); // Create data structure for local assembly data UFC ufc_part(a_part); // Extract mesh const Mesh& mesh_part = a_part.mesh(); // FIXME: Handle subdomains // Get integral ufc::cell_integral* integral = ufc_part.default_cell_integral.get(); // Skip if we don't have a cell integral if (!integral) continue; // Get uncut cells const std::vector<unsigned int>& uncut_cells = multimesh->uncut_cells(part); // Iterate over uncut cells for (auto it = uncut_cells.begin(); it != uncut_cells.end(); ++it) { // Create cell Cell cell(mesh_part, *it); // Update to current cell cell.get_cell_data(ufc_cell); cell.get_coordinate_dofs(coordinate_dofs); ufc_part.update(cell, coordinate_dofs, ufc_cell); // Get local-to-global dof maps for cell for (std::size_t i = 0; i < form_rank; ++i) { const auto dofmap = a.function_space(i)->dofmap()->part(part); dofs[i] = dofmap->cell_dofs(cell.index()); } // Tabulate cell tensor integral->tabulate_tensor(ufc_part.A.data(), ufc_part.w(), coordinate_dofs.data(), ufc_cell.orientation); // Add entries to global tensor A.add(ufc_part.A.data(), dofs); } } }
//----------------------------------------------------------------------------- void MultiMeshAssembler::_init_global_tensor(GenericTensor& A, const MultiMeshForm& a) { log(PROGRESS, "Initializing global tensor."); // This function initializes the big system matrix corresponding to // all dofs (including inactive dofs) on all parts of the MultiMesh // function space. // Create layout for initializing tensor std::shared_ptr<TensorLayout> tensor_layout; tensor_layout = A.factory().create_layout(a.rank()); dolfin_assert(tensor_layout); // Get dimensions std::vector<std::shared_ptr<const IndexMap>> index_maps; for (std::size_t i = 0; i < a.rank(); i++) { std::shared_ptr<const MultiMeshFunctionSpace> V = a.function_space(i); dolfin_assert(V); index_maps.push_back(std::shared_ptr<const IndexMap> (new IndexMap(MPI_COMM_WORLD, V->dim(), 1))); } // Initialise tensor layout tensor_layout->init(MPI_COMM_WORLD, index_maps, TensorLayout::Ghosts::UNGHOSTED); // Build sparsity pattern if required if (tensor_layout->sparsity_pattern()) { GenericSparsityPattern& pattern = *tensor_layout->sparsity_pattern(); SparsityPatternBuilder::build_multimesh_sparsity_pattern(pattern, a); } // Initialize tensor A.init(*tensor_layout); // Insert zeros on the diagonal as diagonal entries may be prematurely // optimised away by the linear algebra backend when calling // GenericMatrix::apply, e.g. PETSc does this then errors when matrices // have no diagonal entry inserted. if (A.rank() == 2) { // Down cast to GenericMatrix GenericMatrix& _matA = A.down_cast<GenericMatrix>(); // Loop over rows and insert 0.0 on the diagonal const double block = 0.0; const std::pair<std::size_t, std::size_t> row_range = A.local_range(0); const std::size_t range = std::min(row_range.second, A.size(1)); for (std::size_t i = row_range.first; i < range; i++) { dolfin::la_index _i = i; _matA.set(&block, 1, &_i, 1, &_i); } A.apply("flush"); } // Set tensor to zero A.zero(); }
//----------------------------------------------------------------------------- void MultiMeshAssembler::_assemble_overlap(GenericTensor& A, const MultiMeshForm& a) { // FIXME: This function and assemble_interface are very similar. // FIXME: Refactor to improve code reuse. // Extract multimesh std::shared_ptr<const MultiMesh> multimesh = a.multimesh(); // Get form rank const std::size_t form_rank = a.rank(); // Collect pointers to dof maps std::vector<const MultiMeshDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; i++) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof map for a cell std::vector<const std::vector<dolfin::la_index>* > dofs(form_rank); // Initialize variables that will be reused throughout assembly ufc::cell ufc_cell[2]; std::vector<double> coordinate_dofs[2]; std::vector<double> macro_coordinate_dofs; // Vector to hold dofs for cells, and a vector holding pointers to same std::vector<ArrayView<const dolfin::la_index>> macro_dof_ptrs(form_rank); std::vector<std::vector<dolfin::la_index>> macro_dofs(form_rank); // Iterate over parts for (std::size_t part = 0; part < a.num_parts(); part++) { log(PROGRESS, "Assembling multimesh form over overlap on part %d.", part); // Get form for current part const Form& a_part = *a.part(part); // Create data structure for local assembly data UFC ufc_part(a_part); // FIXME: Handle subdomains // Get integral ufc::overlap_integral* integral = ufc_part.default_overlap_integral.get(); // Skip if we don't have an overlap integral if (!integral) continue; // Get quadrature rules const auto& quadrature_rules = multimesh->quadrature_rule_overlap(part); // Get collision map const auto& cmap = multimesh->collision_map_cut_cells(part); // Iterate over all cut cells in collision map for (auto it = cmap.begin(); it != cmap.end(); ++it) { // Get cut cell const unsigned int cut_cell_index = it->first; const Cell cut_cell(*multimesh->part(part), cut_cell_index); // Iterate over cutting cells const auto& cutting_cells = it->second; for (auto jt = cutting_cells.begin(); jt != cutting_cells.end(); jt++) { // Get cutting part and cutting cell const std::size_t cutting_part = jt->first; const std::size_t cutting_cell_index = jt->second; const Cell cutting_cell(*multimesh->part(cutting_part), cutting_cell_index); // Get quadrature rule for interface part defined by // intersection of the cut and cutting cells const std::size_t k = jt - cutting_cells.begin(); dolfin_assert(k < quadrature_rules.at(cut_cell_index).size()); const auto& qr = quadrature_rules.at(cut_cell_index)[k]; // FIXME: There might be quite a few cases when we skip cutting // FIXME: cells because there are no quadrature points. Perhaps // FIXME: we can rewrite this inner loop to avoid unnecessary // FIXME: iterations. // Skip if there are no quadrature points const std::size_t num_quadrature_points = qr.second.size(); if (num_quadrature_points == 0) continue; // Create aliases for cells to simplify notation const Cell& cell_0 = cut_cell; const Cell& cell_1 = cutting_cell; // Update to current pair of cells cell_0.get_cell_data(ufc_cell[0], 0); cell_1.get_cell_data(ufc_cell[1], 0); cell_0.get_coordinate_dofs(coordinate_dofs[0]); cell_1.get_coordinate_dofs(coordinate_dofs[1]); ufc_part.update(cell_0, coordinate_dofs[0], ufc_cell[0], cell_1, coordinate_dofs[1], ufc_cell[1]); // Collect vertex coordinates macro_coordinate_dofs.resize(coordinate_dofs[0].size() + coordinate_dofs[0].size()); std::copy(coordinate_dofs[0].begin(), coordinate_dofs[0].end(), macro_coordinate_dofs.begin()); std::copy(coordinate_dofs[1].begin(), coordinate_dofs[1].end(), macro_coordinate_dofs.begin() + coordinate_dofs[0].size()); // Tabulate dofs for each dimension on macro element for (std::size_t i = 0; i < form_rank; i++) { // Get dofs for cut mesh const auto dofmap_0 = a.function_space(i)->dofmap()->part(part); const auto dofs_0 = dofmap_0->cell_dofs(cell_0.index()); // Get dofs for cutting mesh const auto dofmap_1 = a.function_space(i)->dofmap()->part(cutting_part); const auto dofs_1 = dofmap_1->cell_dofs(cell_1.index()); // Create space in macro dof vector macro_dofs[i].resize(dofs_0.size() + dofs_1.size()); // Copy cell dofs into macro dof vector std::copy(dofs_0.begin(), dofs_0.end(), macro_dofs[i].begin()); std::copy(dofs_1.begin(), dofs_1.end(), macro_dofs[i].begin() + dofs_0.size()); // Update array view macro_dof_ptrs[i] = ArrayView<const dolfin::la_index>(macro_dofs[i].size(), macro_dofs[i].data()); } // FIXME: Cell orientation not supported const int cell_orientation = ufc_cell[0].orientation; // Tabulate overlap tensor on macro element integral->tabulate_tensor(ufc_part.macro_A.data(), ufc_part.macro_w(), macro_coordinate_dofs.data(), num_quadrature_points, qr.first.data(), qr.second.data(), cell_orientation); // Add entries to global tensor A.add(ufc_part.macro_A.data(), macro_dof_ptrs); } } } }
//----------------------------------------------------------------------------- void MultiMeshAssembler::_assemble_cut_cells(GenericTensor& A, const MultiMeshForm& a) { // Get form rank const std::size_t form_rank = a.rank(); // Extract multimesh std::shared_ptr<const MultiMesh> multimesh = a.multimesh(); // Collect pointers to dof maps std::vector<const MultiMeshDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; i++) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof map for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // Initialize variables that will be reused throughout assembly ufc::cell ufc_cell; std::vector<double> coordinate_dofs; // Iterate over parts for (std::size_t part = 0; part < a.num_parts(); part++) { log(PROGRESS, "Assembling multimesh form over cut cells on part %d.", part); // Get form for current part const Form& a_part = *a.part(part); // Create data structure for local assembly data UFC ufc_part(a_part); // Extract mesh const Mesh& mesh_part = a_part.mesh(); // FIXME: Handle subdomains // Get integral ufc::cutcell_integral* integral = ufc_part.default_cutcell_integral.get(); // Skip if we don't have a cutcell integral if (!integral) continue; // Get cut cells and quadrature rules const std::vector<unsigned int>& cut_cells = multimesh->cut_cells(part); const auto& quadrature_rules = multimesh->quadrature_rule_cut_cells(part); // Iterate over cut cells for (auto it = cut_cells.begin(); it != cut_cells.end(); ++it) { // Create cell Cell cell(mesh_part, *it); // Update to current cell cell.get_cell_data(ufc_cell); cell.get_coordinate_dofs(coordinate_dofs); ufc_part.update(cell, coordinate_dofs, ufc_cell); // Get local-to-global dof maps for cell for (std::size_t i = 0; i < form_rank; ++i) { const auto dofmap = a.function_space(i)->dofmap()->part(part); dofs[i] = dofmap->cell_dofs(cell.index()); } // Get quadrature rule for cut cell const auto& qr = quadrature_rules.at(*it); // Skip if there are no quadrature points std::size_t num_quadrature_points = qr.second.size(); if (num_quadrature_points == 0) continue; // FIXME: Handle this inside the quadrature point generation, // FIXME: perhaps by storing three different sets of points, // FIXME: including cut cell, overlap and the whole cell. // Include only quadrature points with positive weight if // integration should be extended on cut cells std::pair<std::vector<double>, std::vector<double>> pr; if (extend_cut_cell_integration) { const std::size_t gdim = mesh_part.geometry().dim(); for (std::size_t i = 0; i < num_quadrature_points; i++) { if (qr.second[i] > 0.0) { pr.second.push_back(qr.second[i]); for (std::size_t j = i*gdim; j < (i + 1)*gdim; j++) pr.first.push_back(qr.first[j]); } } num_quadrature_points = pr.second.size(); } else { pr = qr; } // Tabulate cell tensor integral->tabulate_tensor(ufc_part.A.data(), ufc_part.w(), coordinate_dofs.data(), num_quadrature_points, pr.first.data(), pr.second.data(), ufc_cell.orientation); // Add entries to global tensor A.add(ufc_part.A.data(), dofs); } } }
//----------------------------------------------------------------------------- void MultiMeshAssembler::_assemble_uncut_exterior_facets(GenericTensor& A, const MultiMeshForm& a) { // FIXME: This implementation assumes that there is one background mesh // that contains the entire exterior facet. // Get form rank const std::size_t form_rank = a.rank(); // Extract multimesh std::shared_ptr<const MultiMesh> multimesh = a.multimesh(); // Collect pointers to dof maps std::vector<const MultiMeshDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; i++) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof map for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // Initialize variables that will be reused throughout assembly ufc::cell ufc_cell; std::vector<double> coordinate_dofs; // Assembly exterior uncut facets on mesh 0, the background mesh int part = 0; log(PROGRESS, "Assembling multimesh form over uncut facets on part %d.", part); // Get form for current part const Form& a_part = *a.part(part); // Create data structure for local assembly data UFC ufc_part(a_part); // Extract mesh dolfin_assert(a_part.mesh()); const Mesh& mesh_part = *(a_part.mesh()); // FIXME: Handle subdomains // Exterior facet integral const ufc::exterior_facet_integral* integral = ufc_part.default_exterior_facet_integral.get(); // Skip if we don't have a facet integral if (!integral) return; // Iterate over uncut cells for (FacetIterator facet(mesh_part); !facet.end(); ++facet) { // Only consider exterior facets if (!facet->exterior()) { continue; } const std::size_t D = mesh_part.topology().dim(); // Get mesh cell to which mesh facet belongs (pick first, there is // only one) dolfin_assert(facet->num_entities(D) == 1); Cell mesh_cell(mesh_part, facet->entities(D)[0]); // Check that cell is not a ghost dolfin_assert(!mesh_cell.is_ghost()); // Get local index of facet with respect to the cell const std::size_t local_facet = mesh_cell.index(*facet); // Update UFC cell mesh_cell.get_cell_data(ufc_cell, local_facet); mesh_cell.get_coordinate_dofs(coordinate_dofs); // Update UFC object ufc_part.update(mesh_cell, coordinate_dofs, ufc_cell, integral->enabled_coefficients()); // Get local-to-global dof maps for cell for (std::size_t i = 0; i < form_rank; ++i) { const auto dofmap = a.function_space(i)->dofmap()->part(part); const auto dmap = dofmap->cell_dofs(mesh_cell.index()); dofs[i] = ArrayView<const dolfin::la_index>(dmap.size(), dmap.data()); } // Tabulate cell tensor integral->tabulate_tensor(ufc_part.A.data(), ufc_part.w(), coordinate_dofs.data(), local_facet, ufc_cell.orientation); // Add entries to global tensor A.add(ufc_part.A.data(), dofs); } }
//----------------------------------------------------------------------------- void Assembler::assemble_vertices( GenericTensor& A, const Form& a, UFC& ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains) { // Skip assembly if there are no point integrals if (!ufc.form.has_vertex_integrals()) return; // Set timer Timer timer("Assemble vertices"); // Extract mesh const Mesh& mesh = a.mesh(); // Compute cell and vertex - cell connectivity if not already // computed const std::size_t D = mesh.topology().dim(); mesh.init(0); mesh.init(0, D); dolfin_assert(mesh.ordered()); // Logics for shared vertices const bool has_shared_vertices = mesh.topology().have_shared_entities(0); const std::map<unsigned int, std::set<unsigned int>>& shared_vertices = mesh.topology().shared_entities(0); // Form rank const std::size_t form_rank = ufc.form.rank(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps(form_rank); // Create a vector for storying local to local map for vertex entity // dofs std::vector<std::vector<std::size_t>> local_to_local_dofs(form_rank); // Create a values vector to be used to fan out local tabulated // values to the global tensor std::vector<double> local_values(1); // Vector to hold local dof map for a vertex std::vector<std::vector<dolfin::la_index>> global_dofs(form_rank); std::vector<ArrayView<const dolfin::la_index>> global_dofs_p(form_rank); std::vector<dolfin::la_index> local_dof_size(form_rank); for (std::size_t i = 0; i < form_rank; ++i) { dofmaps[i] = a.function_space(i)->dofmap().get(); // Check that the test and trial space as dofs on the vertices if (dofmaps[i]->num_entity_dofs(0) == 0) { dolfin_error("Assembler.cpp", "assemble form over vertices", "Expecting test and trial spaces to have dofs on "\ "vertices for point integrals"); } // Check that the test and trial spaces do not have dofs other // than on vertices for (std::size_t j = 1; j <= D; j++) { if (dofmaps[i]->num_entity_dofs(j)!=0) { dolfin_error("Assembler.cpp", "assemble form over vertices", "Expecting test and trial spaces to only have dofs on " \ "vertices for point integrals"); } } // Resize local values so it can hold dofs on one vertex local_values.resize(local_values.size()*dofmaps[i]->num_entity_dofs(0)); // Resize local to local map according to the number of vertex // entities dofs local_to_local_dofs[i].resize(dofmaps[i]->num_entity_dofs(0)); // Resize local dof map vector global_dofs[i].resize(dofmaps[i]->num_entity_dofs(0)); // Get size of local dofs local_dof_size[i] = dofmaps[i]->ownership_range().second - dofmaps[i]->ownership_range().first; // Get pointer to global dofs global_dofs_p[i].set(global_dofs[i]); } // Vector to hold dof map for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // Exterior point integral const ufc::vertex_integral* integral = ufc.default_vertex_integral.get(); // Check whether integral is domain-dependent bool use_domains = domains && !domains->empty(); // MPI rank const unsigned int my_mpi_rank = MPI::rank(mesh.mpi_comm()); // Assemble over vertices ufc::cell ufc_cell; std::vector<double> coordinate_dofs; Progress p(AssemblerBase::progress_message(A.rank(), "vertices"), mesh.num_vertices()); for (VertexIterator vert(mesh); !vert.end(); ++vert) { // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_vertex_integral((*domains)[*vert]); // Skip integral if zero if (!integral) continue; // Check if assembling a scalar and a vertex is shared if (form_rank == 0 && has_shared_vertices) { // Find shared processes for this global vertex std::map<unsigned int, std::set<unsigned int>>::const_iterator e; e = shared_vertices.find(vert->index()); // If vertex is shared and this rank is not the lowest do not // include the contribution from this vertex to scalar sum if (e != shared_vertices.end()) { bool skip_vertex = false; std::set<unsigned int>::const_iterator it; for (it = e->second.begin(); it != e->second.end(); it++) { // Check if a shared vertex has a lower process rank if (*it < my_mpi_rank) { skip_vertex = true; break; } } if (skip_vertex) continue; } } // Get mesh cell to which mesh vertex belongs (pick first) Cell mesh_cell(mesh, vert->entities(D)[0]); // Check that cell is not a ghost dolfin_assert(!mesh_cell.is_ghost()); // Get local index of vertex with respect to the cell const std::size_t local_vertex = mesh_cell.index(*vert); // Update UFC cell mesh_cell.get_cell_data(ufc_cell); mesh_cell.get_coordinate_dofs(coordinate_dofs); // Update UFC object ufc.update(mesh_cell, coordinate_dofs, ufc_cell, integral->enabled_coefficients()); // Tabulate vertex tensor integral->tabulate_tensor(ufc.A.data(), ufc.w(), coordinate_dofs.data(), local_vertex, ufc_cell.orientation); // For rank 1 and 2 tensors we need to check if tabulated dofs for // the test space is within the local range bool owns_all_dofs = true; for (std::size_t i = 0; i < form_rank; ++i) { // Get local-to-global dof maps for cell dofs[i] = dofmaps[i]->cell_dofs(mesh_cell.index()); // Get local dofs of the local vertex dofmaps[i]->tabulate_entity_dofs(local_to_local_dofs[i], 0, local_vertex); // Copy cell dofs to local dofs and check owner ship range for (std::size_t j = 0; j < local_to_local_dofs[i].size(); ++j) { global_dofs[i][j] = dofs[i][local_to_local_dofs[i][j]]; // It is the dofs for the test space that determines if a dof // is owned by a process, therefore i==0 if (i == 0 && global_dofs[i][j] >= local_dof_size[i]) { owns_all_dofs = false; break; } } } // If not owning all dofs if (!owns_all_dofs) continue; // Scalar if (form_rank == 0) { // Add entries to global tensor A.add_local(ufc.A.data(), dofs); } else if (form_rank == 1) { // Copy tabulated tensor to local value vector for (std::size_t i = 0; i < local_to_local_dofs[0].size(); ++i) local_values[i] = ufc.A[local_to_local_dofs[0][i]]; // Add local entries to global tensor A.add_local(local_values.data(), global_dofs_p); } else { // Copy tabulated tensor to local value vector const std::size_t num_cols = dofs[1].size(); for (std::size_t i = 0; i < local_to_local_dofs[0].size(); ++i) { for (std::size_t j = 0; j < local_to_local_dofs[1].size(); ++j) { local_values[i*local_to_local_dofs[1].size() + j] = ufc.A[local_to_local_dofs[0][i]*num_cols + local_to_local_dofs[1][j]]; } } // Add local entries to global tensor A.add_local(local_values.data(), global_dofs_p); } p++; } }
//----------------------------------------------------------------------------- void OpenMpAssembler::assemble_cells( GenericTensor& A, const Form& a, UFC& _ufc, std::shared_ptr<const MeshFunction<std::size_t>> domains, std::vector<double>* values) { // Skip assembly if there are no cell integrals if (!_ufc.form.has_cell_integrals()) return; Timer timer("Assemble cells"); // Set number of OpenMP threads (from parameter systems) const std::size_t num_threads = parameters["num_threads"]; omp_set_num_threads(num_threads); // Extract mesh const Mesh& mesh = a.mesh(); // FIXME: Check that UFC copy constructor is dealing with copying // pointers correctly // Dummy UFC object since each thread needs to created its own UFC object UFC ufc(_ufc); // Form rank const std::size_t form_rank = ufc.form.rank(); // Cell integral const ufc::cell_integral* integral = ufc.default_cell_integral.get(); // Check whether integral is domain-dependent bool use_domains = domains && !domains->empty(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof map for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // Color mesh std::vector<std::size_t> coloring_type = a.coloring(mesh.topology().dim()); mesh.color(coloring_type); // Get coloring data std::map<const std::vector<std::size_t>, std::pair<std::vector<std::size_t>, std::vector<std::vector<std::size_t>>>>::const_iterator mesh_coloring; mesh_coloring = mesh.topology().coloring.find(coloring_type); if (mesh_coloring == mesh.topology().coloring.end()) { dolfin_error("OpenMPAssembler.cpp", "perform multithreaded assembly using OpenMP assembler", "Requested mesh coloring has not been computed"); } // Get coloring data const std::vector<std::vector<std::size_t>>& entities_of_color = mesh_coloring->second.second; // If assembling a scalar we need to ensure each threads assemble // its own scalar std::vector<double> scalars(num_threads, 0.0); // Assemble over cells (loop over colours, then cells of same color) const std::size_t num_colors = entities_of_color.size(); Progress p("Assembling cells (threaded)", num_colors); for (std::size_t color = 0; color < num_colors; ++color) { // Get the array of cell indices of current color const std::vector<std::size_t>& colored_cells = entities_of_color[color]; // Number of cells of current color const int num_cells = colored_cells.size(); ufc::cell ufc_cell; std::vector<double> vertex_coordinates; // OpenMP test loop over cells of the same color #pragma omp parallel for schedule(guided, 20) firstprivate(ufc, ufc_cell, vertex_coordinates, dofs, integral) for (int cell_index = 0; cell_index < num_cells; ++cell_index) { // Cell index const std::size_t index = colored_cells[cell_index]; // Create cell const Cell cell(mesh, index); // Get integral for sub domain (if any) if (use_domains) integral = ufc.get_cell_integral((*domains)[cell]); // Skip integral if zero if (!integral) continue; // Update to current cell cell.get_cell_data(ufc_cell); cell.get_vertex_coordinates(vertex_coordinates); ufc.update(cell, vertex_coordinates, ufc_cell, integral->enabled_coefficients()); // Get local-to-global dof maps for cell for (std::size_t i = 0; i < form_rank; ++i) dofs[i] = dofmaps[i]->cell_dofs(index); // Tabulate cell tensor integral->tabulate_tensor(ufc.A.data(), ufc.w(), vertex_coordinates.data(), ufc_cell.orientation); // Add entries to global tensor if (values && form_rank == 0) (*values)[cell_index] = ufc.A[0]; else if (form_rank == 0) scalars[omp_get_thread_num()] += ufc.A[0]; else A.add_local(ufc.A.data(), dofs); } p++; } // If we assemble a scalar we need to sum the contributions from each thread if (form_rank == 0) { const double scalar_sum = std::accumulate(scalars.begin(), scalars.end(), 0.0); A.add_local(&scalar_sum, dofs); } }
//---------------------------------------------------------------------------- void Assembler::assemble(GenericTensor& A, const Form& a) { // All assembler functions above end up calling this function, which // in turn calls the assembler functions below to assemble over // cells, exterior and interior facets. // Check whether we should call the multi-core assembler #ifdef HAS_OPENMP const std::size_t num_threads = parameters["num_threads"]; if (num_threads > 0) { OpenMpAssembler assembler; assembler.add_values = add_values; assembler.finalize_tensor = finalize_tensor; assembler.keep_diagonal = keep_diagonal; assembler.assemble(A, a); return; } #endif // Get cell domains std::shared_ptr<const MeshFunction<std::size_t>> cell_domains = a.cell_domains(); // Get exterior facet domains std::shared_ptr<const MeshFunction<std::size_t>> exterior_facet_domains = a.exterior_facet_domains(); // Get interior facet domains std::shared_ptr<const MeshFunction<std::size_t>> interior_facet_domains = a.interior_facet_domains(); // Get vertex domains std::shared_ptr<const MeshFunction<std::size_t>> vertex_domains = a.vertex_domains(); // Check form AssemblerBase::check(a); // Create data structure for local assembly data UFC ufc(a); // Update off-process coefficients const std::vector<std::shared_ptr<const GenericFunction>> coefficients = a.coefficients(); // Initialize global tensor init_global_tensor(A, a); // Assemble over cells assemble_cells(A, a, ufc, cell_domains, NULL); // Assemble over exterior facets assemble_exterior_facets(A, a, ufc, exterior_facet_domains, NULL); // Assemble over interior facets assemble_interior_facets(A, a, ufc, interior_facet_domains, cell_domains, NULL); // Assemble over vertices assemble_vertices(A, a, ufc, vertex_domains); // Finalize assembly of global tensor if (finalize_tensor) A.apply("add"); }
//----------------------------------------------------------------------------- void OpenMpAssembler::assemble_cells_and_exterior_facets( GenericTensor& A, const Form& a, UFC& _ufc, std::shared_ptr<const MeshFunction<std::size_t>> cell_domains, std::shared_ptr<const MeshFunction<std::size_t>> exterior_facet_domains, std::vector<double>* values) { Timer timer("Assemble cells and exterior facets"); // Set number of OpenMP threads (from parameter systems) const int num_threads = parameters["num_threads"]; omp_set_num_threads(num_threads); // Extract mesh const Mesh& mesh = a.mesh(); // Compute facets and facet - cell connectivity if not already computed const std::size_t D = mesh.topology().dim(); mesh.init(D - 1); mesh.init(D - 1, D); dolfin_assert(mesh.ordered()); // Get connectivity const MeshConnectivity& connectivity = mesh.topology()(D, D - 1); dolfin_assert(!connectivity.empty()); // Dummy UFC object since each thread needs to created its own UFC object UFC ufc(_ufc); // Form rank const std::size_t form_rank = ufc.form.rank(); // Cell and facet integrals ufc::cell_integral* cell_integral = ufc.default_cell_integral.get(); ufc::exterior_facet_integral* facet_integral = ufc.default_exterior_facet_integral.get(); // Check whether integrals are domain-dependent bool use_cell_domains = cell_domains && !cell_domains->empty(); bool use_exterior_facet_domains = exterior_facet_domains && !exterior_facet_domains->empty(); // Collect pointers to dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof maps for a cell std::vector<ArrayView<const dolfin::la_index>> dofs(form_rank); // FIXME: Pass or determine coloring type // Define graph type std::vector<std::size_t> coloring_type = a.coloring(mesh.topology().dim()); mesh.color(coloring_type); // Get coloring data std::map<const std::vector<std::size_t>, std::pair<std::vector<std::size_t>, std::vector<std::vector<std::size_t>>>>::const_iterator mesh_coloring; mesh_coloring = mesh.topology().coloring.find(coloring_type); if (mesh_coloring == mesh.topology().coloring.end()) { dolfin_error("OpenMPAssembler.cpp", "perform multithreaded assembly using OpenMP assembler", "Requested mesh coloring has not been computed"); } // Get coloring data const std::vector<std::vector<std::size_t>>& entities_of_color = mesh_coloring->second.second; // If assembling a scalar we need to ensure each threads assemble // its own scalar std::vector<double> scalars(num_threads, 0.0); // UFC cell and vertex coordinates ufc::cell ufc_cell; std::vector<double> vertex_coordinates; // Assemble over cells (loop over colors, then cells of same color) const std::size_t num_colors = entities_of_color.size(); for (std::size_t color = 0; color < num_colors; ++color) { // Get the array of cell indices of current color const std::vector<std::size_t>& colored_cells = entities_of_color[color]; // Number of cells of current color const int num_cell_in_color = colored_cells.size(); // OpenMP test loop over cells of the same color Progress p(AssemblerBase::progress_message(A.rank(), "cells"), num_colors); #pragma omp parallel for schedule(guided, 20) firstprivate(ufc, ufc_cell, vertex_coordinates, dofs, cell_integral, facet_integral) for (int index = 0; index < num_cell_in_color; ++index) { // Cell index const std::size_t cell_index = colored_cells[index]; // Create cell const Cell cell(mesh, cell_index); // Get integral for sub domain (if any) if (use_cell_domains) cell_integral = ufc.get_cell_integral((*cell_domains)[cell_index]); // Update to current cell cell.get_cell_data(ufc_cell); cell.get_vertex_coordinates(vertex_coordinates); // Get local-to-global dof maps for cell for (std::size_t i = 0; i < form_rank; ++i) dofs[i] = dofmaps[i]->cell_dofs(cell_index); // Get number of entries in cell tensor std::size_t dim = 1; for (std::size_t i = 0; i < form_rank; ++i) dim *= dofs[i].size(); // Tabulate cell tensor if we have a cell_integral if (cell_integral) { ufc.update(cell, vertex_coordinates, ufc_cell, cell_integral->enabled_coefficients()); cell_integral->tabulate_tensor(ufc.A.data(), ufc.w(), vertex_coordinates.data(), ufc_cell.orientation); } else std::fill(ufc.A.begin(), ufc.A.end(), 0.0); // Assemble over external facet for (FacetIterator facet(cell); !facet.end(); ++facet) { // Only consider exterior facets if (!facet->exterior()) { p++; continue; } // Get local facet index const std::size_t local_facet = cell.index(*facet); // Get integral for sub domain (if any) if (use_exterior_facet_domains) { // Get global facet index const std::size_t facet_index = connectivity(cell_index)[local_facet]; facet_integral = ufc.get_exterior_facet_integral((*exterior_facet_domains)[facet_index]); } // Skip integral if zero if (!facet_integral) continue; // FIXME: Do we really need an update version with the local // facet index? // Update UFC object ufc_cell.local_facet = local_facet; ufc.update(cell, vertex_coordinates, ufc_cell, facet_integral->enabled_coefficients()); // Tabulate tensor facet_integral->tabulate_tensor(ufc.A_facet.data(), ufc.w(), vertex_coordinates.data(), local_facet, ufc_cell.orientation); // Add facet contribution for (std::size_t i = 0; i < dim; ++i) ufc.A[i] += ufc.A_facet[i]; } // Add entries to global tensor if (values && form_rank == 0) (*values)[cell_index] = ufc.A[0]; else if (form_rank == 0) scalars[omp_get_thread_num()] += ufc.A[0]; else A.add_local(&ufc.A[0], dofs); } p++; } // If we assemble a scalar we need to sum the contributions from each thread if (form_rank == 0) { const double scalar_sum = std::accumulate(scalars.begin(), scalars.end(), 0.0); A.add_local(&scalar_sum, dofs); } }
//----------------------------------------------------------------------------- void AssemblerBase::init_global_tensor(GenericTensor& A, const Form& a) { dolfin_assert(a.ufc_form()); // Get dof maps std::vector<const GenericDofMap*> dofmaps; for (std::size_t i = 0; i < a.rank(); ++i) dofmaps.push_back(a.function_space(i)->dofmap().get()); if (A.empty()) { Timer t0("Build sparsity"); // Create layout for initialising tensor std::shared_ptr<TensorLayout> tensor_layout; tensor_layout = A.factory().create_layout(a.rank()); dolfin_assert(tensor_layout); // Get dimensions and mapping across processes for each dimension std::vector<std::shared_ptr<const IndexMap> > index_maps; for (std::size_t i = 0; i < a.rank(); i++) { dolfin_assert(dofmaps[i]); index_maps.push_back(dofmaps[i]->index_map()); } // Initialise tensor layout // FIXME: somewhere need to check block sizes are same on both axes // NOTE: Jan: that will be done on the backend side; IndexMap will // provide tabulate functions with arbitrary block size; // moreover the functions will tabulate directly using a // correct int type tensor_layout->init(a.mesh().mpi_comm(), index_maps, TensorLayout::Ghosts::UNGHOSTED); // Build sparsity pattern if required if (tensor_layout->sparsity_pattern()) { GenericSparsityPattern& pattern = *tensor_layout->sparsity_pattern(); SparsityPatternBuilder::build(pattern, a.mesh(), dofmaps, a.ufc_form()->has_cell_integrals(), a.ufc_form()->has_interior_facet_integrals(), a.ufc_form()->has_exterior_facet_integrals(), a.ufc_form()->has_vertex_integrals(), keep_diagonal); } t0.stop(); // Initialize tensor Timer t1("Init tensor"); A.init(*tensor_layout); t1.stop(); // Insert zeros on the diagonal as diagonal entries may be // prematurely optimised away by the linear algebra backend when // calling GenericMatrix::apply, e.g. PETSc does this then errors // when matrices have no diagonal entry inserted. if (A.rank() == 2 && keep_diagonal) { // Down cast to GenericMatrix GenericMatrix& _matA = A.down_cast<GenericMatrix>(); // Loop over rows and insert 0.0 on the diagonal const double block = 0.0; const std::pair<std::size_t, std::size_t> row_range = A.local_range(0); const std::size_t range = std::min(row_range.second, A.size(1)); for (std::size_t i = row_range.first; i < range; i++) { dolfin::la_index _i = i; _matA.set(&block, 1, &_i, 1, &_i); } A.apply("flush"); } // Delete sparsity pattern Timer t2("Delete sparsity"); t2.stop(); } else { // If tensor is not reset, check that dimensions are correct for (std::size_t i = 0; i < a.rank(); ++i) { if (A.size(i) != dofmaps[i]->global_dimension()) { dolfin_error("AssemblerBase.cpp", "assemble form", "Dim %d of tensor does not match form", i); } } } if (!add_values) A.zero(); }
//----------------------------------------------------------------------------- void MultiMeshAssembler::_assemble_interface(GenericTensor& A, const MultiMeshForm& a) { // Extract multimesh std::shared_ptr<const MultiMesh> multimesh = a.multimesh(); // Get form rank const std::size_t form_rank = a.rank(); // Get multimesh coefficients // These are updated in within this assembly loop std::map<std::size_t, std::shared_ptr<const MultiMeshFunction> > multimesh_coefficients = a.multimesh_coefficients(); // Identify the coefficents that are not MultiMeshFunction // These will be updated by UFC // It is assumed that the coefficents are the same for all parts std::vector<bool> ufc_enabled_coefficients; for (std::size_t i = 0; i < a.part(0)->coefficients().size(); i++) { bool ufc_update = multimesh_coefficients.find(i) == multimesh_coefficients.end(); ufc_enabled_coefficients.push_back(ufc_update); } // Collect pointers to dof maps std::vector<const MultiMeshDofMap*> dofmaps; for (std::size_t i = 0; i < form_rank; i++) dofmaps.push_back(a.function_space(i)->dofmap().get()); // Vector to hold dof map for a cell std::vector<const std::vector<dolfin::la_index>* > dofs(form_rank); // Initialize variables that will be reused throughout assembly ufc::cell ufc_cell[2]; std::vector<double> coordinate_dofs[2]; std::vector<double> macro_coordinate_dofs; // Vector to hold dofs for cells, and a vector holding pointers to same std::vector<ArrayView<const dolfin::la_index>> macro_dof_ptrs(form_rank); std::vector<std::vector<dolfin::la_index>> macro_dofs(form_rank); // Iterate over parts for (std::size_t part = 0; part < a.num_parts(); part++) { log(PROGRESS, "Assembling multimesh form over interface on part %d.", part); // Get form for current part const Form& a_part = *a.part(part); // Create data structure for local assembly data UFC ufc_part(a_part); // FIXME: Handle subdomains // Get integral ufc::interface_integral* integral = ufc_part.default_interface_integral.get(); // Skip if we don't have an interface integral if (!integral) continue; // Get quadrature rules const auto& quadrature_rules = multimesh->quadrature_rules_interface(part); // Get collision map const auto& cmap = multimesh->collision_map_cut_cells(part); // Get facet normals const auto& facet_normals = multimesh->facet_normals(part); // Iterate over all cut cells in collision map for (auto it = cmap.begin(); it != cmap.end(); ++it) { // Get cut cell const unsigned int cut_cell_index = it->first; const Cell cut_cell(*multimesh->part(part), cut_cell_index); // Iterate over cutting cells const auto& cutting_cells = it->second; for (auto jt = cutting_cells.begin(); jt != cutting_cells.end(); jt++) { // Get cutting part and cutting cell const std::size_t cutting_part = jt->first; const std::size_t cutting_cell_index = jt->second; const Cell cutting_cell(*multimesh->part(cutting_part), cutting_cell_index); // Get quadrature rule for interface part defined by // intersection of the cut and cutting cells const std::size_t k = jt - cutting_cells.begin(); dolfin_assert(k < quadrature_rules.at(cut_cell_index).size()); const auto& qr = quadrature_rules.at(cut_cell_index)[k]; // FIXME: There might be quite a few cases when we skip cutting // FIXME: cells because there are no quadrature points. Perhaps // FIXME: we can rewrite this inner loop to avoid unnecessary // FIXME: iterations. // Skip if there are no quadrature points const std::size_t num_quadrature_points = qr.second.size(); if (num_quadrature_points == 0) continue; // Create aliases for cells to simplify notation const std::size_t& part_1 = cutting_part; const std::size_t& part_0 = part; const Cell& cell_1 = cutting_cell; const Cell& cell_0 = cut_cell; // Update to current pair of cells // Let UFC update the coefficients that are not MultiMeshFunction cell_0.get_cell_data(ufc_cell[0], 0); cell_1.get_cell_data(ufc_cell[1], 0); cell_0.get_coordinate_dofs(coordinate_dofs[0]); cell_1.get_coordinate_dofs(coordinate_dofs[1]); ufc_part.update(cell_0, coordinate_dofs[0], ufc_cell[0], cell_1, coordinate_dofs[1], ufc_cell[1], ufc_enabled_coefficients); // Manually update multimesh coefficients for (auto it : multimesh_coefficients) { std::size_t coefficient_number = it.first; std::shared_ptr<const MultiMeshFunction> coefficient = it.second; double** macro_w = ufc_part.macro_w(); const FiniteElement& element = *coefficient->function_space()->part(part_0)->element(); std::size_t offset = element.space_dimension(); double * w_0 = macro_w[coefficient_number]; double * w_1 = macro_w[coefficient_number] + offset; coefficient->restrict(w_0, element, part_0, cell_0, coordinate_dofs[0].data(), ufc_cell[0]); coefficient->restrict(w_1, element, part_1, cell_1, coordinate_dofs[1].data(), ufc_cell[1]); } // Collect vertex coordinates macro_coordinate_dofs.resize(coordinate_dofs[0].size() + coordinate_dofs[1].size()); std::copy(coordinate_dofs[0].begin(), coordinate_dofs[0].end(), macro_coordinate_dofs.begin()); std::copy(coordinate_dofs[1].begin(), coordinate_dofs[1].end(), macro_coordinate_dofs.begin() + coordinate_dofs[0].size()); // Tabulate dofs for each dimension on macro element for (std::size_t i = 0; i < form_rank; i++) { // Get dofs for cut mesh const auto dofmap_0 = a.function_space(i)->dofmap()->part(part_0); const auto dofs_0 = dofmap_0->cell_dofs(cell_0.index()); // Get dofs for cutting mesh const auto dofmap_1 = a.function_space(i)->dofmap()->part(part_1); const auto dofs_1 = dofmap_1->cell_dofs(cell_1.index()); // Create space in macro dof vector macro_dofs[i].resize(dofs_0.size() + dofs_1.size()); // Copy cell dofs into macro dof vector std::copy(dofs_0.data(), dofs_0.data() + dofs_0.size(), macro_dofs[i].begin()); std::copy(dofs_1.data(), dofs_1.data() + dofs_1.size(), macro_dofs[i].begin() + dofs_0.size()); // Update array view macro_dof_ptrs[i] = ArrayView<const dolfin::la_index>(macro_dofs[i].size(), macro_dofs[i].data()); } // Get facet normals const auto& n = facet_normals.at(cut_cell_index)[k]; // FIXME: We would like to use this assertion (but it fails // for 2 meshes) dolfin_assert(n.size() == a_part.mesh()->geometry().dim()*num_quadrature_points); // FIXME: For now, use this assertion (which fails for 3 meshes) //dolfin_assert(n.size() > 0); // FIXME: Cell orientation not supported const int cell_orientation = ufc_cell[0].orientation; // Tabulate interface tensor on macro element integral->tabulate_tensor(ufc_part.macro_A.data(), ufc_part.macro_w(), macro_coordinate_dofs.data(), num_quadrature_points, qr.first.data(), qr.second.data(), n.data(), cell_orientation); // Add entries to global tensor A.add(ufc_part.macro_A.data(), macro_dof_ptrs); } } } }