shared_ptr<Epetra_CrsMatrix> sparseCholesky(const Epetra_CrsMatrix &mat) { // Note: we assume the matrix mat is symmetric and positive-definite size_t size = mat.NumGlobalCols(); if (mat.NumGlobalRows() != size) throw std::invalid_argument("sparseCholesky(): matrix must be square"); int *rowOffsets = 0; int *colIndices = 0; double *values = 0; mat.ExtractCrsDataPointers(rowOffsets, colIndices, values); Epetra_SerialComm comm; Epetra_LocalMap rowMap(static_cast<int>(size), 0 /* index_base */, comm); Epetra_LocalMap columnMap(static_cast<int>(size), 0 /* index_base */, comm); shared_ptr<Epetra_CrsMatrix> result = boost::make_shared<Epetra_CrsMatrix>( Copy, rowMap, columnMap, mat.GlobalMaxNumEntries()); arma::Mat<double> localMat; arma::Mat<double> localCholesky; std::vector<bool> processed(size, false); for (size_t r = 0; r < size; ++r) { if (processed[r]) continue; int localSize = rowOffsets[r + 1] - rowOffsets[r]; localMat.set_size(localSize, localSize); localMat.fill(0.); localCholesky.set_size(localSize, localSize); for (int s = 0; s < localSize; ++s) { int row = colIndices[rowOffsets[r] + s]; for (int c = 0; c < localSize; ++c) { int col = colIndices[rowOffsets[row] + c]; if (col != colIndices[rowOffsets[r] + c]) throw std::invalid_argument("sparseCholesky(): matrix is not " "block-diagonal"); localMat(s, c) = values[rowOffsets[row] + c]; } } assert(arma::norm(localMat - localMat.t(), "fro") < 1e-12 * arma::norm(localMat, "fro")); localCholesky = arma::chol(localMat); // localCholesky: U for (int s = 0; s < localSize; ++s) { int row = colIndices[rowOffsets[r] + s]; processed[row] = true; #ifndef NDEBUG int errorCode = #endif result->InsertGlobalValues(row, s + 1 /* number of values */, localCholesky.colptr(s), colIndices + rowOffsets[r]); assert(errorCode == 0); } } result->FillComplete(columnMap, rowMap); return result; }
shared_ptr<const Epetra_CrsMatrix> IndexPermutation::permutationMatrix() const { const int size = m_permutedIndices.size(); Epetra_SerialComm comm; // To be replaced once we begin to use MPI Epetra_LocalMap rowMap(size, 0 /* index_base */, comm); Epetra_LocalMap colMap(size, 0 /* index_base */, comm); shared_ptr<Epetra_CrsMatrix> result = boost::make_shared<Epetra_CrsMatrix>( Copy, rowMap, colMap, 1 /* one entry per row */); const double ONE = 1.; for (int i = 0; i < size; ++i) result->InsertGlobalValues(m_permutedIndices[i], 1 /* one entry */, &ONE, &i); result->FillComplete(); return result; }
void CrsMatrixWrapper<ST>::nullifyRowsAndCols( const Teuchos::ArrayView<const real_t>& rowMask, const Teuchos::ArrayView<const real_t>& colView, ST mdv) { const_TrilinosMap_ptr rowMap(mat.getRowMap()); RCP<VectorType<real_t> > lclCol = rcp(new VectorType<real_t>(rowMap, colView, colView.size(), 1)); RCP<VectorType<real_t> > gblCol = rcp(new VectorType<real_t>( mat.getColMap(), 1)); const ImportType importer(rowMap, mat.getColMap()); gblCol->doImport(*lclCol, importer, Tpetra::INSERT); Teuchos::ArrayRCP<const real_t> colMask(gblCol->getData(0)); const ST zero = Teuchos::ScalarTraits<ST>::zero(); resumeFill(); // Can't use OpenMP here as replaceLocalValues() is not thread-safe. //#pragma omp parallel for for (LO lclrow = 0; lclrow < mat.getNodeNumRows(); lclrow++) { Teuchos::ArrayView<const LO> indices; Teuchos::ArrayView<const ST> values; std::vector<GO> cols; std::vector<ST> vals; mat.getLocalRowView(lclrow, indices, values); GO row = rowMap->getGlobalElement(lclrow); for (size_t c = 0; c < indices.size(); c++) { const LO lclcol = indices[c]; const GO col = mat.getColMap()->getGlobalElement(lclcol); if (rowMask[lclrow] > 0. || colMask[lclcol] > 0.) { cols.push_back(lclcol); vals.push_back(row==col ? mdv : zero); } } if (cols.size() > 0) mat.replaceLocalValues(lclrow, cols, vals); } fillComplete(true); }
std::unique_ptr<DiscreteBoundaryOperator<ResultType>> ElementaryLocalOperator<BasisFunctionType, ResultType>:: assembleWeakFormInSparseMode(LocalAssembler &assembler, const AssemblyOptions &options) const { #ifdef WITH_TRILINOS if (boost::is_complex<BasisFunctionType>::value) throw std::runtime_error( "ElementaryLocalOperator::assembleWeakFormInSparseMode(): " "sparse-mode assembly of identity operators for " "complex-valued basis functions is not supported yet"); const Space<BasisFunctionType> &testSpace = *this->dualToRange(); const Space<BasisFunctionType> &trialSpace = *this->domain(); // Fill local submatrices const GridView &view = testSpace.gridView(); const size_t elementCount = view.entityCount(0); std::vector<int> elementIndices(elementCount); for (size_t i = 0; i < elementCount; ++i) elementIndices[i] = i; std::vector<arma::Mat<ResultType>> localResult; assembler.evaluateLocalWeakForms(elementIndices, localResult); // Global DOF indices corresponding to local DOFs on elements std::vector<std::vector<GlobalDofIndex>> testGdofs(elementCount); std::vector<std::vector<GlobalDofIndex>> trialGdofs(elementCount); std::vector<std::vector<BasisFunctionType>> testLdofWeights(elementCount); std::vector<std::vector<BasisFunctionType>> trialLdofWeights(elementCount); gatherGlobalDofs(testSpace, trialSpace, testGdofs, trialGdofs, testLdofWeights, trialLdofWeights); // Multiply matrix entries by DOF weights for (size_t e = 0; e < elementCount; ++e) for (size_t trialDof = 0; trialDof < trialGdofs[e].size(); ++trialDof) for (size_t testDof = 0; testDof < testGdofs[e].size(); ++testDof) localResult[e](testDof, trialDof) *= conj(testLdofWeights[e][testDof]) * trialLdofWeights[e][trialDof]; // Estimate number of entries in each row // This will be useful when we begin to use MPI // // Get global DOF indices for which this process is responsible // const int testGlobalDofCount = testSpace.globalDofCount(); // Epetra_Map rowMap(testGlobalDofCount, 0 /* index-base */, comm); // std::vector<int> myTestGlobalDofs(rowMap.MyGlobalElements(), // rowMap.MyGlobalElements() + // rowMap.NumMyElements()); // const int myTestGlobalDofCount = myTestGlobalDofs.size(); const int testGlobalDofCount = testSpace.globalDofCount(); const int trialGlobalDofCount = trialSpace.globalDofCount(); arma::Col<int> nonzeroEntryCountEstimates(testGlobalDofCount); nonzeroEntryCountEstimates.fill(0); // Upper estimate for the number of global trial DOFs coupled to a given // global test DOF: sum of the local trial DOF counts for each element that // contributes to the global test DOF in question for (size_t e = 0; e < elementCount; ++e) for (size_t testLdof = 0; testLdof < testGdofs[e].size(); ++testLdof) { int testGdof = testGdofs[e][testLdof]; if (testGdof >= 0) nonzeroEntryCountEstimates(testGdof) += trialGdofs[e].size(); } Epetra_SerialComm comm; // To be replaced once we begin to use MPI Epetra_LocalMap rowMap(testGlobalDofCount, 0 /* index_base */, comm); Epetra_LocalMap colMap(trialGlobalDofCount, 0 /* index_base */, comm); shared_ptr<Epetra_FECrsMatrix> result = boost::make_shared<Epetra_FECrsMatrix>( Copy, rowMap, colMap, nonzeroEntryCountEstimates.memptr()); // TODO: make each process responsible for a subset of elements // Find maximum number of local dofs per element size_t maxLdofCount = 0; for (size_t e = 0; e < elementCount; ++e) maxLdofCount = std::max(maxLdofCount, testGdofs[e].size() * trialGdofs[e].size()); // Initialise sparse matrix with zeros at required positions arma::Col<double> zeros(maxLdofCount); zeros.fill(0.); for (size_t e = 0; e < elementCount; ++e) result->InsertGlobalValues(testGdofs[e].size(), &testGdofs[e][0], trialGdofs[e].size(), &trialGdofs[e][0], zeros.memptr()); // Add contributions from individual elements for (size_t e = 0; e < elementCount; ++e) epetraSumIntoGlobalValues(*result, testGdofs[e], trialGdofs[e], localResult[e]); result->GlobalAssemble(); // If assembly mode is equal to ACA and we have AHMED, // construct the block cluster tree. Otherwise leave it uninitialized. typedef ClusterConstructionHelper<BasisFunctionType> CCH; typedef AhmedDofWrapper<CoordinateType> AhmedDofType; typedef ExtendedBemCluster<AhmedDofType> AhmedBemCluster; typedef bbxbemblcluster<AhmedDofType, AhmedDofType> AhmedBemBlcluster; shared_ptr<AhmedBemBlcluster> blockCluster; shared_ptr<IndexPermutation> test_o2pPermutation, test_p2oPermutation; shared_ptr<IndexPermutation> trial_o2pPermutation, trial_p2oPermutation; #ifdef WITH_AHMED if (options.assemblyMode() == AssemblyOptions::ACA) { const AcaOptions &acaOptions = options.acaOptions(); bool indexWithGlobalDofs = acaOptions.mode != AcaOptions::HYBRID_ASSEMBLY; typedef ClusterConstructionHelper<BasisFunctionType> CCH; shared_ptr<AhmedBemCluster> testClusterTree; CCH::constructBemCluster(testSpace, indexWithGlobalDofs, acaOptions, testClusterTree, test_o2pPermutation, test_p2oPermutation); // TODO: construct a hermitian H-matrix if possible shared_ptr<AhmedBemCluster> trialClusterTree; CCH::constructBemCluster(trialSpace, indexWithGlobalDofs, acaOptions, trialClusterTree, trial_o2pPermutation, trial_p2oPermutation); unsigned int blockCount = 0; bool useStrongAdmissibilityCondition = !indexWithGlobalDofs; blockCluster.reset(CCH::constructBemBlockCluster( acaOptions, false /* hermitian */, *testClusterTree, *trialClusterTree, useStrongAdmissibilityCondition, blockCount).release()); } #endif // Create and return a discrete operator represented by the matrix that // has just been calculated return std::unique_ptr<DiscreteBoundaryOperator<ResultType>>( new DiscreteSparseBoundaryOperator<ResultType>( result, this->symmetry(), NO_TRANSPOSE, blockCluster, trial_o2pPermutation, test_o2pPermutation)); #else // WITH_TRILINOS throw std::runtime_error( "ElementaryLocalOperator::assembleWeakFormInSparseMode(): " "To enable assembly in sparse mode, recompile BEM++ " "with the symbol WITH_TRILINOS defined."); #endif }