shared_ptr<const Epetra_CrsMatrix> IndexPermutation::permutationMatrix() const { const int size = m_permutedIndices.size(); Epetra_SerialComm comm; // To be replaced once we begin to use MPI Epetra_LocalMap rowMap(size, 0 /* index_base */, comm); Epetra_LocalMap colMap(size, 0 /* index_base */, comm); shared_ptr<Epetra_CrsMatrix> result = boost::make_shared<Epetra_CrsMatrix>( Copy, rowMap, colMap, 1 /* one entry per row */); const double ONE = 1.; for (int i = 0; i < size; ++i) result->InsertGlobalValues(m_permutedIndices[i], 1 /* one entry */, &ONE, &i); result->FillComplete(); return result; }
int Epetra_FEVbrMatrix::GlobalAssemble(bool callFillComplete) { if(Map().Comm().NumProc() < 2 || ignoreNonLocalEntries_) { if(callFillComplete) { EPETRA_CHK_ERR(FillComplete()); } return(0); } int i; //In this method we need to gather all the non-local (overlapping) data //that's been input on each processor, into the //non-overlapping distribution defined by the map that 'this' matrix was //constructed with. //Need to build a map that describes our nonlocal data. //First, create a list of the sizes (point-rows per block-row) of the //nonlocal rows we're holding. int* pointRowsPerNonlocalBlockRow = numNonlocalBlockRows_>0 ? new int[numNonlocalBlockRows_] : NULL; for(i=0; i<numNonlocalBlockRows_; ++i) { pointRowsPerNonlocalBlockRow[i] = nonlocalCoefs_[i][0]->M(); } //We'll use the arbitrary distribution constructor of BlockMap. Epetra_BlockMap sourceMap(-1, numNonlocalBlockRows_, nonlocalBlockRows_, // CJ TODO FIXME long long pointRowsPerNonlocalBlockRow, RowMap().IndexBase(), RowMap().Comm()); delete [] pointRowsPerNonlocalBlockRow; //If sourceMap has global size 0, then no nonlocal data exists and we can //skip most of this function. if(sourceMap.NumGlobalElements64() < 1) { if(callFillComplete) { EPETRA_CHK_ERR(FillComplete()); } return(0); } //We also need to build a column-map, containing the columns in our //nonlocal data. To do that, create a list of all column-indices that //occur in our nonlocal rows. int numCols = 0, allocLen = 0; int* cols = NULL; int* pointColsPerBlockCol = NULL; int ptColAllocLen = 0; int insertPoint = -1; for(i=0; i<numNonlocalBlockRows_; ++i) { for(int j=0; j<nonlocalBlockRowLengths_[i]; ++j) { int col = nonlocalBlockCols_[i][j]; int offset = Epetra_Util_binary_search(col, cols, numCols, insertPoint); if (offset < 0) { EPETRA_CHK_ERR( Epetra_Util_insert(col, insertPoint, cols, numCols, allocLen) ); int tmpNumCols = numCols-1; EPETRA_CHK_ERR( Epetra_Util_insert(nonlocalCoefs_[i][j]->N(), insertPoint, pointColsPerBlockCol, tmpNumCols, ptColAllocLen) ); } } } Epetra_BlockMap colMap(-1, numCols, cols, // CJ TODO FIXME long long pointColsPerBlockCol, RowMap().IndexBase(), RowMap().Comm()); delete [] cols; delete [] pointColsPerBlockCol; numCols = 0; allocLen = 0; //now we need to create a matrix with sourceMap and colMap, and fill it with //our nonlocal data so we can then export it to the correct owning //processors. Epetra_VbrMatrix tempMat(Copy, sourceMap, colMap, nonlocalBlockRowLengths_); //Next we need to make sure the 'indices-are-global' attribute of tempMat's //graph is set to true, in case this processor doesn't end up calling the //InsertGlobalValues method... const Epetra_CrsGraph& graph = tempMat.Graph(); Epetra_CrsGraph& nonconst_graph = const_cast<Epetra_CrsGraph&>(graph); nonconst_graph.SetIndicesAreGlobal(true); for(i=0; i<numNonlocalBlockRows_; ++i) { EPETRA_CHK_ERR( tempMat.BeginInsertGlobalValues(nonlocalBlockRows_[i], nonlocalBlockRowLengths_[i], nonlocalBlockCols_[i]) ); for(int j=0; j<nonlocalBlockRowLengths_[i]; ++j) { Epetra_SerialDenseMatrix& subblock = *(nonlocalCoefs_[i][j]); EPETRA_CHK_ERR( tempMat.SubmitBlockEntry(subblock.A(), subblock.LDA(), subblock.M(), subblock.N()) ); } EPETRA_CHK_ERR( tempMat.EndSubmitEntries() ); } //Now we need to call FillComplete on our temp matrix. We need to //pass a DomainMap and RangeMap, which are not the same as the RowMap //and ColMap that we constructed the matrix with. EPETRA_CHK_ERR(tempMat.FillComplete(RowMap(), sourceMap)); //Finally, we're ready to create the exporter and export non-local data to //the appropriate owning processors. Epetra_Export exporter(sourceMap, RowMap()); EPETRA_CHK_ERR( Export(tempMat, exporter, Add) ); if(callFillComplete) { EPETRA_CHK_ERR(FillComplete()); } destroyNonlocalData(); return(0); }
std::unique_ptr<DiscreteBoundaryOperator<ResultType>> ElementaryLocalOperator<BasisFunctionType, ResultType>:: assembleWeakFormInSparseMode(LocalAssembler &assembler, const AssemblyOptions &options) const { #ifdef WITH_TRILINOS if (boost::is_complex<BasisFunctionType>::value) throw std::runtime_error( "ElementaryLocalOperator::assembleWeakFormInSparseMode(): " "sparse-mode assembly of identity operators for " "complex-valued basis functions is not supported yet"); const Space<BasisFunctionType> &testSpace = *this->dualToRange(); const Space<BasisFunctionType> &trialSpace = *this->domain(); // Fill local submatrices const GridView &view = testSpace.gridView(); const size_t elementCount = view.entityCount(0); std::vector<int> elementIndices(elementCount); for (size_t i = 0; i < elementCount; ++i) elementIndices[i] = i; std::vector<arma::Mat<ResultType>> localResult; assembler.evaluateLocalWeakForms(elementIndices, localResult); // Global DOF indices corresponding to local DOFs on elements std::vector<std::vector<GlobalDofIndex>> testGdofs(elementCount); std::vector<std::vector<GlobalDofIndex>> trialGdofs(elementCount); std::vector<std::vector<BasisFunctionType>> testLdofWeights(elementCount); std::vector<std::vector<BasisFunctionType>> trialLdofWeights(elementCount); gatherGlobalDofs(testSpace, trialSpace, testGdofs, trialGdofs, testLdofWeights, trialLdofWeights); // Multiply matrix entries by DOF weights for (size_t e = 0; e < elementCount; ++e) for (size_t trialDof = 0; trialDof < trialGdofs[e].size(); ++trialDof) for (size_t testDof = 0; testDof < testGdofs[e].size(); ++testDof) localResult[e](testDof, trialDof) *= conj(testLdofWeights[e][testDof]) * trialLdofWeights[e][trialDof]; // Estimate number of entries in each row // This will be useful when we begin to use MPI // // Get global DOF indices for which this process is responsible // const int testGlobalDofCount = testSpace.globalDofCount(); // Epetra_Map rowMap(testGlobalDofCount, 0 /* index-base */, comm); // std::vector<int> myTestGlobalDofs(rowMap.MyGlobalElements(), // rowMap.MyGlobalElements() + // rowMap.NumMyElements()); // const int myTestGlobalDofCount = myTestGlobalDofs.size(); const int testGlobalDofCount = testSpace.globalDofCount(); const int trialGlobalDofCount = trialSpace.globalDofCount(); arma::Col<int> nonzeroEntryCountEstimates(testGlobalDofCount); nonzeroEntryCountEstimates.fill(0); // Upper estimate for the number of global trial DOFs coupled to a given // global test DOF: sum of the local trial DOF counts for each element that // contributes to the global test DOF in question for (size_t e = 0; e < elementCount; ++e) for (size_t testLdof = 0; testLdof < testGdofs[e].size(); ++testLdof) { int testGdof = testGdofs[e][testLdof]; if (testGdof >= 0) nonzeroEntryCountEstimates(testGdof) += trialGdofs[e].size(); } Epetra_SerialComm comm; // To be replaced once we begin to use MPI Epetra_LocalMap rowMap(testGlobalDofCount, 0 /* index_base */, comm); Epetra_LocalMap colMap(trialGlobalDofCount, 0 /* index_base */, comm); shared_ptr<Epetra_FECrsMatrix> result = boost::make_shared<Epetra_FECrsMatrix>( Copy, rowMap, colMap, nonzeroEntryCountEstimates.memptr()); // TODO: make each process responsible for a subset of elements // Find maximum number of local dofs per element size_t maxLdofCount = 0; for (size_t e = 0; e < elementCount; ++e) maxLdofCount = std::max(maxLdofCount, testGdofs[e].size() * trialGdofs[e].size()); // Initialise sparse matrix with zeros at required positions arma::Col<double> zeros(maxLdofCount); zeros.fill(0.); for (size_t e = 0; e < elementCount; ++e) result->InsertGlobalValues(testGdofs[e].size(), &testGdofs[e][0], trialGdofs[e].size(), &trialGdofs[e][0], zeros.memptr()); // Add contributions from individual elements for (size_t e = 0; e < elementCount; ++e) epetraSumIntoGlobalValues(*result, testGdofs[e], trialGdofs[e], localResult[e]); result->GlobalAssemble(); // If assembly mode is equal to ACA and we have AHMED, // construct the block cluster tree. Otherwise leave it uninitialized. typedef ClusterConstructionHelper<BasisFunctionType> CCH; typedef AhmedDofWrapper<CoordinateType> AhmedDofType; typedef ExtendedBemCluster<AhmedDofType> AhmedBemCluster; typedef bbxbemblcluster<AhmedDofType, AhmedDofType> AhmedBemBlcluster; shared_ptr<AhmedBemBlcluster> blockCluster; shared_ptr<IndexPermutation> test_o2pPermutation, test_p2oPermutation; shared_ptr<IndexPermutation> trial_o2pPermutation, trial_p2oPermutation; #ifdef WITH_AHMED if (options.assemblyMode() == AssemblyOptions::ACA) { const AcaOptions &acaOptions = options.acaOptions(); bool indexWithGlobalDofs = acaOptions.mode != AcaOptions::HYBRID_ASSEMBLY; typedef ClusterConstructionHelper<BasisFunctionType> CCH; shared_ptr<AhmedBemCluster> testClusterTree; CCH::constructBemCluster(testSpace, indexWithGlobalDofs, acaOptions, testClusterTree, test_o2pPermutation, test_p2oPermutation); // TODO: construct a hermitian H-matrix if possible shared_ptr<AhmedBemCluster> trialClusterTree; CCH::constructBemCluster(trialSpace, indexWithGlobalDofs, acaOptions, trialClusterTree, trial_o2pPermutation, trial_p2oPermutation); unsigned int blockCount = 0; bool useStrongAdmissibilityCondition = !indexWithGlobalDofs; blockCluster.reset(CCH::constructBemBlockCluster( acaOptions, false /* hermitian */, *testClusterTree, *trialClusterTree, useStrongAdmissibilityCondition, blockCount).release()); } #endif // Create and return a discrete operator represented by the matrix that // has just been calculated return std::unique_ptr<DiscreteBoundaryOperator<ResultType>>( new DiscreteSparseBoundaryOperator<ResultType>( result, this->symmetry(), NO_TRANSPOSE, blockCluster, trial_o2pPermutation, test_o2pPermutation)); #else // WITH_TRILINOS throw std::runtime_error( "ElementaryLocalOperator::assembleWeakFormInSparseMode(): " "To enable assembly in sparse mode, recompile BEM++ " "with the symbol WITH_TRILINOS defined."); #endif }