void AbstractBoundaryOperator<BasisFunctionType, ResultType>:: collectOptionsDependentDataForAssemblerConstruction( const AssemblyOptions& options, const shared_ptr<Fiber::RawGridGeometry<CoordinateType> >& testRawGeometry, const shared_ptr<Fiber::RawGridGeometry<CoordinateType> >& trialRawGeometry, shared_ptr<Fiber::OpenClHandler>& openClHandler, bool& cacheSingularIntegrals) const { typedef LocalAssemblerConstructionHelper Helper; Helper::makeOpenClHandler(options.parallelizationOptions().openClOptions(), testRawGeometry, trialRawGeometry, openClHandler); cacheSingularIntegrals = options.isSingularIntegralCachingEnabled(); }
std::pair< shared_ptr<typename HypersingularIntegralOperator< BasisFunctionType, KernelType, ResultType>::LocalAssembler>, shared_ptr<typename HypersingularIntegralOperator< BasisFunctionType, KernelType, ResultType>::LocalAssembler> > HypersingularIntegralOperator<BasisFunctionType, KernelType, ResultType>::makeAssemblers( const QuadratureStrategy& quadStrategy, const AssemblyOptions& options) const { typedef Fiber::RawGridGeometry<CoordinateType> RawGridGeometry; typedef std::vector<const Fiber::Shapeset<BasisFunctionType>*> ShapesetPtrVector; const bool verbose = (options.verbosityLevel() >= VerbosityLevel::DEFAULT); shared_ptr<RawGridGeometry> testRawGeometry, trialRawGeometry; shared_ptr<GeometryFactory> testGeometryFactory, trialGeometryFactory; shared_ptr<Fiber::OpenClHandler> openClHandler; shared_ptr<ShapesetPtrVector> testShapesets, trialShapesets; bool cacheSingularIntegrals; if (verbose) std::cout << "Collecting data for assembler construction..." << std::endl; this->collectDataForAssemblerConstruction(options, testRawGeometry, trialRawGeometry, testGeometryFactory, trialGeometryFactory, testShapesets, trialShapesets, openClHandler, cacheSingularIntegrals); if (verbose) std::cout << "Data collection finished." << std::endl; bool makeSeparateOffDiagonalAssembler = options.assemblyMode() == AssemblyOptions::ACA && options.acaOptions().mode == AcaOptions::HYBRID_ASSEMBLY; return reallyMakeAssemblers(quadStrategy, testGeometryFactory, trialGeometryFactory, testRawGeometry, trialRawGeometry, testShapesets, trialShapesets, openClHandler, options.parallelizationOptions(), options.verbosityLevel(), cacheSingularIntegrals, makeSeparateOffDiagonalAssembler); }
std::auto_ptr<DiscreteBoundaryOperator<ResultType> > AcaGlobalAssembler<BasisFunctionType, ResultType>::assembleDetachedWeakForm( const Space<BasisFunctionType>& testSpace, const Space<BasisFunctionType>& trialSpace, const std::vector<LocalAssembler*>& localAssemblers, const std::vector<const DiscreteBndOp*>& sparseTermsToAdd, const std::vector<ResultType>& denseTermsMultipliers, const std::vector<ResultType>& sparseTermsMultipliers, const AssemblyOptions& options, int symmetry) { #ifdef WITH_AHMED typedef AhmedDofWrapper<CoordinateType> AhmedDofType; typedef ExtendedBemCluster<AhmedDofType> AhmedBemCluster; typedef bemblcluster<AhmedDofType, AhmedDofType> AhmedBemBlcluster; typedef DiscreteAcaBoundaryOperator<ResultType> DiscreteAcaLinOp; const AcaOptions& acaOptions = options.acaOptions(); const bool indexWithGlobalDofs = acaOptions.globalAssemblyBeforeCompression; const bool verbosityAtLeastDefault = (options.verbosityLevel() >= VerbosityLevel::DEFAULT); const bool verbosityAtLeastHigh = (options.verbosityLevel() >= VerbosityLevel::HIGH); // Currently we don't support Hermitian ACA operators. This is because we // don't have the means to really test them -- we would need complex-valued // basis functions for that. (Assembly of such a matrix would be very easy // -- just change complex_sym from true to false in the call to apprx_sym() // in AcaWeakFormAssemblerLoopBody::operator() -- but operations on // symmetric/Hermitian matrices are not always trivial and we do need to be // able to test them properly.) bool symmetric = symmetry & SYMMETRIC; if (symmetry & HERMITIAN && !(symmetry & SYMMETRIC) && verbosityAtLeastDefault) std::cout << "Warning: assembly of non-symmetric Hermitian H-matrices " "is not supported yet. A general H-matrix will be assembled" << std::endl; #ifndef WITH_TRILINOS if (!indexWithGlobalDofs) throw std::runtime_error("AcaGlobalAssembler::assembleDetachedWeakForm(): " "ACA assembly with globalAssemblyBeforeCompression " "set to false requires BEM++ to be linked with " "Trilinos"); #endif // WITH_TRILINOS const size_t testDofCount = indexWithGlobalDofs ? testSpace.globalDofCount() : testSpace.flatLocalDofCount(); const size_t trialDofCount = indexWithGlobalDofs ? trialSpace.globalDofCount() : trialSpace.flatLocalDofCount(); if (symmetric && testDofCount != trialDofCount) throw std::invalid_argument("AcaGlobalAssembler::assembleDetachedWeakForm(): " "you cannot generate a symmetric weak form " "using test and trial spaces with different " "numbers of DOFs"); // o2p: map of original indices to permuted indices // p2o: map of permuted indices to original indices typedef ClusterConstructionHelper<BasisFunctionType> CCH; shared_ptr<AhmedBemCluster> testClusterTree; shared_ptr<IndexPermutation> test_o2pPermutation, test_p2oPermutation; CCH::constructBemCluster(testSpace, indexWithGlobalDofs, acaOptions, testClusterTree, test_o2pPermutation, test_p2oPermutation); shared_ptr<AhmedBemCluster> trialClusterTree; shared_ptr<IndexPermutation> trial_o2pPermutation, trial_p2oPermutation; if (symmetric || &testSpace == &trialSpace) { trialClusterTree = testClusterTree; trial_o2pPermutation = test_o2pPermutation; trial_p2oPermutation = test_p2oPermutation; } else CCH::constructBemCluster(trialSpace, indexWithGlobalDofs, acaOptions, trialClusterTree, trial_o2pPermutation, trial_p2oPermutation); // // Export VTK plots showing the disctribution of leaf cluster ids // std::vector<unsigned int> testClusterIds; // getClusterIds(*testClusterTree, test_p2oPermutation->permutedIndices(), testClusterIds); // testSpace.dumpClusterIds("testClusterIds", testClusterIds, // indexWithGlobalDofs ? GLOBAL_DOFS : FLAT_LOCAL_DOFS); // std::vector<unsigned int> trialClusterIds; // getClusterIds(*trialClusterTree, trial_p2oPermutation->permutedIndices(), trialClusterIds); // trialSpace.dumpClusterIds("trialClusterIds", trialClusterIds, // indexWithGlobalDofs ? GLOBAL_DOFS : FLAT_LOCAL_DOFS); if (verbosityAtLeastHigh) std::cout << "Test cluster count: " << testClusterTree->getncl() << "\nTrial cluster count: " << trialClusterTree->getncl() << std::endl; unsigned int blockCount = 0; shared_ptr<AhmedBemBlcluster> bemBlclusterTree( CCH::constructBemBlockCluster(acaOptions, symmetric, *testClusterTree, *trialClusterTree, blockCount).release()); if (verbosityAtLeastHigh) std::cout << "Mblock count: " << blockCount << std::endl; std::vector<unsigned int> p2oTestDofs = test_p2oPermutation->permutedIndices(); std::vector<unsigned int> p2oTrialDofs = trial_p2oPermutation->permutedIndices(); WeakFormAcaAssemblyHelper<BasisFunctionType, ResultType> helper(testSpace, trialSpace, p2oTestDofs, p2oTrialDofs, localAssemblers, sparseTermsToAdd, denseTermsMultipliers, sparseTermsMultipliers, options); typedef mblock<typename AhmedTypeTraits<ResultType>::Type> AhmedMblock; boost::shared_array<AhmedMblock*> blocks = allocateAhmedMblockArray<ResultType>(bemBlclusterTree.get()); // matgen_sqntl(helper, AhmedBemBlclusterTree.get(), AhmedBemBlclusterTree.get(), // acaOptions.recompress, acaOptions.eps, // acaOptions.maximumRank, blocks.get()); // matgen_omp(helper, blockCount, AhmedBemBlclusterTree.get(), // acaOptions.eps, acaOptions.maximumRank, blocks.get()); // // Dump mblocks // const int mblockCount = AhmedBemBlclusterTree->nleaves(); // for (int i = 0; i < mblockCount; ++i) // if (blocks[i]->isdns()) // { // char buffer[1024]; // sprintf(buffer, "mblock-dns-%d-%d.txt", // blocks[i]->getn1(), blocks[i]->getn2()); // arma::Col<ResultType> block((ResultType*)blocks[i]->getdata(), // blocks[i]->nvals()); // arma::diskio::save_raw_ascii(block, buffer); // } // else // { // char buffer[1024]; // sprintf(buffer, "mblock-lwr-%d-%d.txt", // blocks[i]->getn1(), blocks[i]->getn2()); // arma::Col<ResultType> block((ResultType*)blocks[i]->getdata(), // blocks[i]->nvals()); // arma::diskio::save_raw_ascii(block, buffer); // } AhmedLeafClusterArray leafClusters(bemBlclusterTree.get()); leafClusters.sortAccordingToClusterSize(); const size_t leafClusterCount = leafClusters.size(); const ParallelizationOptions& parallelOptions = options.parallelizationOptions(); int maxThreadCount = 1; if (!parallelOptions.isOpenClEnabled()) { if (parallelOptions.maxThreadCount() == ParallelizationOptions::AUTO) maxThreadCount = tbb::task_scheduler_init::automatic; else maxThreadCount = parallelOptions.maxThreadCount(); } tbb::task_scheduler_init scheduler(maxThreadCount); tbb::atomic<size_t> done; done = 0; std::vector<ChunkStatistics> chunkStats(leafClusterCount); // typedef AcaWeakFormAssemblerLoopBody<BasisFunctionType, ResultType> Body; // // std::cout << "Loop start" << std::endl; // tbb::tick_count loopStart = tbb::tick_count::now(); // // tbb::parallel_for(tbb::blocked_range<size_t>(0, leafClusterCount), // // Body(helper, leafClusters, blocks, acaOptions, done // // , chunkStats)); // tbb::parallel_for(ScatteredRange(0, leafClusterCount), // Body(helper, leafClusters, blocks, acaOptions, done // , chunkStats)); // tbb::tick_count loopEnd = tbb::tick_count::now(); // // std::cout << "Loop end" << std::endl; typedef AcaWeakFormAssemblerLoopBody<BasisFunctionType, ResultType> Body; typename Body::LeafClusterIndexQueue leafClusterIndexQueue; for (size_t i = 0; i < leafClusterCount; ++i) leafClusterIndexQueue.push(i); if (verbosityAtLeastDefault) std::cout << "About to start the ACA assembly loop" << std::endl; tbb::tick_count loopStart = tbb::tick_count::now(); { Fiber::SerialBlasRegion region; // if possible, ensure that BLAS is single-threaded tbb::parallel_for(tbb::blocked_range<size_t>(0, leafClusterCount), Body(helper, leafClusters, blocks, acaOptions, done, verbosityAtLeastDefault, leafClusterIndexQueue, symmetric, chunkStats)); } tbb::tick_count loopEnd = tbb::tick_count::now(); if (verbosityAtLeastDefault) { std::cout << "\n"; // the progress bar doesn't print the final \n std::cout << "ACA loop took " << (loopEnd - loopStart).seconds() << " s" << std::endl; } // TODO: parallelise! if (acaOptions.recompress) { if (verbosityAtLeastDefault) std::cout << "About to start ACA agglomeration" << std::endl; agglH(bemBlclusterTree.get(), blocks.get(), acaOptions.eps, acaOptions.maximumRank); if (verbosityAtLeastDefault) std::cout << "Agglomeration finished" << std::endl; } // // Dump timing data of individual chunks // std::cout << "\nChunks:\n"; // for (int i = 0; i < leafClusterCount; ++i) // if (chunkStats[i].valid) { // int blockIndex = leafClusters[i]->getidx(); // std::cout << chunkStats[i].chunkStart << "\t" // << chunkStats[i].chunkSize << "\t" // << (chunkStats[i].startTime - loopStart).seconds() << "\t" // << (chunkStats[i].endTime - loopStart).seconds() << "\t" // << (chunkStats[i].endTime - chunkStats[i].startTime).seconds() << "\t" // << blocks[blockIndex]->getn1() << "\t" // << blocks[blockIndex]->getn2() << "\t" // << blocks[blockIndex]->islwr() << "\t" // << (blocks[blockIndex]->islwr() ? blocks[blockIndex]->rank() : 0) << "\n"; // } { size_t origMemory = sizeof(ResultType) * testDofCount * trialDofCount; size_t ahmedMemory = sizeH(bemBlclusterTree.get(), blocks.get()); int maximumRank = Hmax_rank(bemBlclusterTree.get(), blocks.get()); if (verbosityAtLeastDefault) std::cout << "\nNeeded storage: " << ahmedMemory / 1024. / 1024. << " MB.\n" << "Without approximation: " << origMemory / 1024. / 1024. << " MB.\n" << "Compressed to " << (100. * ahmedMemory) / origMemory << "%.\n" << "Maximum rank: " << maximumRank << ".\n" << std::endl; if (acaOptions.outputPostscript) { if (verbosityAtLeastDefault) std::cout << "Writing matrix partition ..." << std::flush; std::ofstream os(acaOptions.outputFname.c_str()); if (symmetric) // seems valid also for Hermitian matrices psoutputHeH(os, bemBlclusterTree.get(), testDofCount, blocks.get()); else psoutputGeH(os, bemBlclusterTree.get(), testDofCount, blocks.get()); os.close(); if (verbosityAtLeastDefault) std::cout << " done." << std::endl; } } int outSymmetry = NO_SYMMETRY; if (symmetric) { outSymmetry = SYMMETRIC; if (!boost::is_complex<ResultType>()) outSymmetry |= HERMITIAN; } std::auto_ptr<DiscreteAcaLinOp> acaOp( new DiscreteAcaLinOp(testDofCount, trialDofCount, acaOptions.eps, acaOptions.maximumRank, outSymmetry, bemBlclusterTree, blocks, *trial_o2pPermutation, *test_o2pPermutation, parallelOptions)); std::auto_ptr<DiscreteBndOp> result; if (indexWithGlobalDofs) result = acaOp; else { #ifdef WITH_TRILINOS // without Trilinos, this code will never be reached -- an exception // will be thrown earlier in this function typedef DiscreteBoundaryOperatorComposition<ResultType> DiscreteBndOpComp; shared_ptr<DiscreteBndOp> acaOpShared(acaOp.release()); shared_ptr<DiscreteBndOp> trialGlobalToLocal = constructOperatorMappingGlobalToFlatLocalDofs< BasisFunctionType, ResultType>(trialSpace); shared_ptr<DiscreteBndOp> testLocalToGlobal = constructOperatorMappingFlatLocalToGlobalDofs< BasisFunctionType, ResultType>(testSpace); shared_ptr<DiscreteBndOp> tmp( new DiscreteBndOpComp(acaOpShared, trialGlobalToLocal)); result.reset(new DiscreteBndOpComp(testLocalToGlobal, tmp)); #endif // WITH_TRILINOS } return result; #else // without Ahmed throw std::runtime_error("AcaGlobalAssembler::assembleDetachedWeakForm(): " "To enable assembly in ACA mode, recompile BEM++ " "with the symbol WITH_AHMED defined."); #endif // WITH_AHMED }