std::unique_ptr<DiscreteBoundaryOperator<ResultType>> ElementaryLocalOperator<BasisFunctionType, ResultType>:: assembleWeakFormInSparseMode(LocalAssembler &assembler, const AssemblyOptions &options) const { if (boost::is_complex<BasisFunctionType>::value) throw std::runtime_error( "ElementaryLocalOperator::assembleWeakFormInSparseMode(): " "sparse-mode assembly of identity operators for " "complex-valued basis functions is not supported yet"); const Space<BasisFunctionType> &testSpace = *this->dualToRange(); const Space<BasisFunctionType> &trialSpace = *this->domain(); // Fill local submatrices const GridView &view = testSpace.gridView(); const size_t elementCount = view.entityCount(0); std::vector<int> elementIndices(elementCount); for (size_t i = 0; i < elementCount; ++i) elementIndices[i] = i; std::vector<Matrix<ResultType>> localResult; assembler.evaluateLocalWeakForms(elementIndices, localResult); // Global DOF indices corresponding to local DOFs on elements std::vector<std::vector<GlobalDofIndex>> testGdofs(elementCount); std::vector<std::vector<GlobalDofIndex>> trialGdofs(elementCount); std::vector<std::vector<BasisFunctionType>> testLdofWeights(elementCount); std::vector<std::vector<BasisFunctionType>> trialLdofWeights(elementCount); gatherGlobalDofs(testSpace, trialSpace, testGdofs, trialGdofs, testLdofWeights, trialLdofWeights); // Multiply matrix entries by DOF weights for (size_t e = 0; e < elementCount; ++e) for (size_t trialDof = 0; trialDof < trialGdofs[e].size(); ++trialDof) for (size_t testDof = 0; testDof < testGdofs[e].size(); ++testDof) localResult[e](testDof, trialDof) *= conj(testLdofWeights[e][testDof]) * trialLdofWeights[e][trialDof]; const int testGlobalDofCount = testSpace.globalDofCount(); const int trialGlobalDofCount = trialSpace.globalDofCount(); shared_ptr<RealSparseMatrix> result = boost::make_shared<RealSparseMatrix>( testGlobalDofCount, trialGlobalDofCount); std::vector<Eigen::Triplet<double>> triplets; generateTriplets<ResultType>(*result, testGdofs, trialGdofs, localResult, elementCount, triplets); result->setFromTriplets(triplets.begin(), triplets.end()); // Create and return a discrete operator represented by the matrix that // has just been calculated return std::unique_ptr<DiscreteBoundaryOperator<ResultType>>( new DiscreteSparseBoundaryOperator<ResultType>(result, this->symmetry(), NO_TRANSPOSE)); }
std::unique_ptr<DiscreteBoundaryOperator<ResultType>> ElementaryLocalOperator<BasisFunctionType, ResultType>:: assembleWeakFormInDenseMode(LocalAssembler &assembler, const AssemblyOptions &options) const { const Space<BasisFunctionType> &testSpace = *this->dualToRange(); const Space<BasisFunctionType> &trialSpace = *this->domain(); // Fill local submatrices const GridView &view = testSpace.gridView(); const size_t elementCount = view.entityCount(0); std::vector<int> elementIndices(elementCount); for (size_t i = 0; i < elementCount; ++i) elementIndices[i] = i; std::vector<Matrix<ResultType>> localResult; assembler.evaluateLocalWeakForms(elementIndices, localResult); // Create the operator's matrix Matrix<ResultType> result(testSpace.globalDofCount(), trialSpace.globalDofCount()); result.setZero(); // Retrieve global DOFs corresponding to local DOFs on all elements std::vector<std::vector<GlobalDofIndex>> testGdofs(elementCount); std::vector<std::vector<GlobalDofIndex>> trialGdofs(elementCount); std::vector<std::vector<BasisFunctionType>> testLdofWeights(elementCount); std::vector<std::vector<BasisFunctionType>> trialLdofWeights(elementCount); gatherGlobalDofs(testSpace, trialSpace, testGdofs, trialGdofs, testLdofWeights, trialLdofWeights); // Distribute local matrices into the global matrix for (size_t e = 0; e < elementCount; ++e) for (size_t trialIndex = 0; trialIndex < trialGdofs[e].size(); ++trialIndex) { int trialGdof = trialGdofs[e][trialIndex]; if (trialGdof < 0) continue; for (size_t testIndex = 0; testIndex < testGdofs[e].size(); ++testIndex) { int testGdof = testGdofs[e][testIndex]; if (testGdof < 0) continue; result(testGdof, trialGdof) += conj(testLdofWeights[e][testIndex]) * trialLdofWeights[e][trialIndex] * localResult[e](testIndex, trialIndex); } } return std::unique_ptr<DiscreteBoundaryOperator<ResultType>>( new DiscreteDenseBoundaryOperator<ResultType>(result)); }
std::unique_ptr<DiscreteBoundaryOperator<ResultType>> ElementaryLocalOperator<BasisFunctionType, ResultType>:: assembleWeakFormInSparseMode(LocalAssembler &assembler, const AssemblyOptions &options) const { #ifdef WITH_TRILINOS if (boost::is_complex<BasisFunctionType>::value) throw std::runtime_error( "ElementaryLocalOperator::assembleWeakFormInSparseMode(): " "sparse-mode assembly of identity operators for " "complex-valued basis functions is not supported yet"); const Space<BasisFunctionType> &testSpace = *this->dualToRange(); const Space<BasisFunctionType> &trialSpace = *this->domain(); // Fill local submatrices const GridView &view = testSpace.gridView(); const size_t elementCount = view.entityCount(0); std::vector<int> elementIndices(elementCount); for (size_t i = 0; i < elementCount; ++i) elementIndices[i] = i; std::vector<arma::Mat<ResultType>> localResult; assembler.evaluateLocalWeakForms(elementIndices, localResult); // Global DOF indices corresponding to local DOFs on elements std::vector<std::vector<GlobalDofIndex>> testGdofs(elementCount); std::vector<std::vector<GlobalDofIndex>> trialGdofs(elementCount); std::vector<std::vector<BasisFunctionType>> testLdofWeights(elementCount); std::vector<std::vector<BasisFunctionType>> trialLdofWeights(elementCount); gatherGlobalDofs(testSpace, trialSpace, testGdofs, trialGdofs, testLdofWeights, trialLdofWeights); // Multiply matrix entries by DOF weights for (size_t e = 0; e < elementCount; ++e) for (size_t trialDof = 0; trialDof < trialGdofs[e].size(); ++trialDof) for (size_t testDof = 0; testDof < testGdofs[e].size(); ++testDof) localResult[e](testDof, trialDof) *= conj(testLdofWeights[e][testDof]) * trialLdofWeights[e][trialDof]; // Estimate number of entries in each row // This will be useful when we begin to use MPI // // Get global DOF indices for which this process is responsible // const int testGlobalDofCount = testSpace.globalDofCount(); // Epetra_Map rowMap(testGlobalDofCount, 0 /* index-base */, comm); // std::vector<int> myTestGlobalDofs(rowMap.MyGlobalElements(), // rowMap.MyGlobalElements() + // rowMap.NumMyElements()); // const int myTestGlobalDofCount = myTestGlobalDofs.size(); const int testGlobalDofCount = testSpace.globalDofCount(); const int trialGlobalDofCount = trialSpace.globalDofCount(); arma::Col<int> nonzeroEntryCountEstimates(testGlobalDofCount); nonzeroEntryCountEstimates.fill(0); // Upper estimate for the number of global trial DOFs coupled to a given // global test DOF: sum of the local trial DOF counts for each element that // contributes to the global test DOF in question for (size_t e = 0; e < elementCount; ++e) for (size_t testLdof = 0; testLdof < testGdofs[e].size(); ++testLdof) { int testGdof = testGdofs[e][testLdof]; if (testGdof >= 0) nonzeroEntryCountEstimates(testGdof) += trialGdofs[e].size(); } Epetra_SerialComm comm; // To be replaced once we begin to use MPI Epetra_LocalMap rowMap(testGlobalDofCount, 0 /* index_base */, comm); Epetra_LocalMap colMap(trialGlobalDofCount, 0 /* index_base */, comm); shared_ptr<Epetra_FECrsMatrix> result = boost::make_shared<Epetra_FECrsMatrix>( Copy, rowMap, colMap, nonzeroEntryCountEstimates.memptr()); // TODO: make each process responsible for a subset of elements // Find maximum number of local dofs per element size_t maxLdofCount = 0; for (size_t e = 0; e < elementCount; ++e) maxLdofCount = std::max(maxLdofCount, testGdofs[e].size() * trialGdofs[e].size()); // Initialise sparse matrix with zeros at required positions arma::Col<double> zeros(maxLdofCount); zeros.fill(0.); for (size_t e = 0; e < elementCount; ++e) result->InsertGlobalValues(testGdofs[e].size(), &testGdofs[e][0], trialGdofs[e].size(), &trialGdofs[e][0], zeros.memptr()); // Add contributions from individual elements for (size_t e = 0; e < elementCount; ++e) epetraSumIntoGlobalValues(*result, testGdofs[e], trialGdofs[e], localResult[e]); result->GlobalAssemble(); // If assembly mode is equal to ACA and we have AHMED, // construct the block cluster tree. Otherwise leave it uninitialized. typedef ClusterConstructionHelper<BasisFunctionType> CCH; typedef AhmedDofWrapper<CoordinateType> AhmedDofType; typedef ExtendedBemCluster<AhmedDofType> AhmedBemCluster; typedef bbxbemblcluster<AhmedDofType, AhmedDofType> AhmedBemBlcluster; shared_ptr<AhmedBemBlcluster> blockCluster; shared_ptr<IndexPermutation> test_o2pPermutation, test_p2oPermutation; shared_ptr<IndexPermutation> trial_o2pPermutation, trial_p2oPermutation; #ifdef WITH_AHMED if (options.assemblyMode() == AssemblyOptions::ACA) { const AcaOptions &acaOptions = options.acaOptions(); bool indexWithGlobalDofs = acaOptions.mode != AcaOptions::HYBRID_ASSEMBLY; typedef ClusterConstructionHelper<BasisFunctionType> CCH; shared_ptr<AhmedBemCluster> testClusterTree; CCH::constructBemCluster(testSpace, indexWithGlobalDofs, acaOptions, testClusterTree, test_o2pPermutation, test_p2oPermutation); // TODO: construct a hermitian H-matrix if possible shared_ptr<AhmedBemCluster> trialClusterTree; CCH::constructBemCluster(trialSpace, indexWithGlobalDofs, acaOptions, trialClusterTree, trial_o2pPermutation, trial_p2oPermutation); unsigned int blockCount = 0; bool useStrongAdmissibilityCondition = !indexWithGlobalDofs; blockCluster.reset(CCH::constructBemBlockCluster( acaOptions, false /* hermitian */, *testClusterTree, *trialClusterTree, useStrongAdmissibilityCondition, blockCount).release()); } #endif // Create and return a discrete operator represented by the matrix that // has just been calculated return std::unique_ptr<DiscreteBoundaryOperator<ResultType>>( new DiscreteSparseBoundaryOperator<ResultType>( result, this->symmetry(), NO_TRANSPOSE, blockCluster, trial_o2pPermutation, test_o2pPermutation)); #else // WITH_TRILINOS throw std::runtime_error( "ElementaryLocalOperator::assembleWeakFormInSparseMode(): " "To enable assembly in sparse mode, recompile BEM++ " "with the symbol WITH_TRILINOS defined."); #endif }
std::auto_ptr<DiscreteBoundaryOperator<ResultType> > DenseGlobalAssembler<BasisFunctionType, ResultType>:: assembleDetachedWeakForm( const Space<BasisFunctionType>& testSpace, const Space<BasisFunctionType>& trialSpace, LocalAssemblerForIntegralOperators& assembler, const Context<BasisFunctionType, ResultType>& context) { const AssemblyOptions& options = context.assemblyOptions(); // Global DOF indices corresponding to local DOFs on elements std::vector<std::vector<GlobalDofIndex> > testGlobalDofs, trialGlobalDofs; std::vector<std::vector<BasisFunctionType> > testLocalDofWeights, trialLocalDofWeights; gatherGlobalDofs(testSpace, testGlobalDofs, testLocalDofWeights); if (&testSpace == &trialSpace) { trialGlobalDofs = testGlobalDofs; trialLocalDofWeights = testLocalDofWeights; } else gatherGlobalDofs(trialSpace, trialGlobalDofs, trialLocalDofWeights); const size_t testElementCount = testGlobalDofs.size(); const size_t trialElementCount = trialGlobalDofs.size(); // Make a vector of all element indices std::vector<int> testIndices(testElementCount); for (int i = 0; i < testElementCount; ++i) testIndices[i] = i; // Create the operator's matrix arma::Mat<ResultType> result(testSpace.globalDofCount(), trialSpace.globalDofCount()); result.fill(0.); typedef DenseWeakFormAssemblerLoopBody<BasisFunctionType, ResultType> Body; typename Body::MutexType mutex; const ParallelizationOptions& parallelOptions = options.parallelizationOptions(); int maxThreadCount = 1; if (!parallelOptions.isOpenClEnabled()) { if (parallelOptions.maxThreadCount() == ParallelizationOptions::AUTO) maxThreadCount = tbb::task_scheduler_init::automatic; else maxThreadCount = parallelOptions.maxThreadCount(); } tbb::task_scheduler_init scheduler(maxThreadCount); { Fiber::SerialBlasRegion region; tbb::parallel_for(tbb::blocked_range<size_t>(0, trialElementCount), Body(testIndices, testGlobalDofs, trialGlobalDofs, testLocalDofWeights, trialLocalDofWeights, assembler, result, mutex)); } //// Old serial code (TODO: decide whether to keep it behind e.g. #ifndef PARALLEL) // std::vector<arma::Mat<ValueType> > localResult; // // Loop over trial elements // for (int trialIndex = 0; trialIndex < trialElementCount; ++trialIndex) // { // // Evaluate integrals over pairs of the current trial element and // // all the test elements // assembler.evaluateLocalWeakForms(TEST_TRIAL, testIndices, trialIndex, // ALL_DOFS, localResult); // // Loop over test indices // for (int testIndex = 0; testIndex < testElementCount; ++testIndex) // // Add the integrals to appropriate entries in the operator's matrix // for (int trialDof = 0; trialDof < trialGlobalDofs[trialIndex].size(); ++trialDof) // for (int testDof = 0; testDof < testGlobalDofs[testIndex].size(); ++testDof) // result(testGlobalDofs[testIndex][testDof], // trialGlobalDofs[trialIndex][trialDof]) += // localResult[testIndex](testDof, trialDof); // } // Create and return a discrete operator represented by the matrix that // has just been calculated return std::auto_ptr<DiscreteBoundaryOperator<ResultType> >( new DiscreteDenseBoundaryOperator<ResultType>(result)); }