void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) { Ppattern_ = Ppattern; const RCP<const Map> uniqueMap = Ppattern_->getDomainMap(); const RCP<const Map> nonUniqueMap = Ppattern_->getColMap(); RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap); const size_t NSDim = Bc.getNumVectors(); X_ = MultiVectorFactory::Build(nonUniqueMap, NSDim); X_->doImport(Bc, *importer, Xpetra::INSERT); size_t numRows = Ppattern_->getNodeNumRows(); XXtInv_.resize(numRows); Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false); for (size_t i = 0; i < numRows; i++) { Teuchos::ArrayView<const LO> indices; Ppattern_->getLocalRowView(i, indices); size_t nnz = indices.size(); Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false); for (size_t j = 0; j < nnz; j++) { for (size_t k = 0; k < NSDim; k++) BcRow[k] = X_->getData(k)[indices[j]]; Teuchos::setCol(BcRow, (LO)j, locX); } XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false); Teuchos::BLAS<LO,SC> blas; blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz, Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(), locX.values(), locX.stride(), Teuchos::ScalarTraits<SC>::zero(), XXtInv_[i].values(), XXtInv_[i].stride()); Teuchos::LAPACK<LO,SC> lapack; LO info, lwork = 3*NSDim; ArrayRCP<LO> IPIV(NSDim); ArrayRCP<SC> WORK(lwork); lapack.GETRF(NSDim, NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), &info); lapack.GETRI(NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), WORK.get(), lwork, &info); } }
Basis_HGRAD_LINE_Cn_FEM<SpT,OT,PT>:: Basis_HGRAD_LINE_Cn_FEM( const ordinal_type order, const EPointType pointType ) { this->basisCardinality_ = order+1; this->basisDegree_ = order; this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Line<2> >() ); this->basisType_ = BASIS_FEM_FIAT; this->basisCoordinates_ = COORDINATES_CARTESIAN; const ordinal_type card = this->basisCardinality_; // points are computed in the host and will be copied Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace> dofCoords("Hgrad::Line::Cn::dofCoords", card, 1); switch (pointType) { case POINTTYPE_EQUISPACED: case POINTTYPE_WARPBLEND: { // lattice ordering { const ordinal_type offset = 0; PointTools::getLattice( dofCoords, this->basisCellTopology_, order, offset, pointType ); } // topological order // { // // two vertices // dofCoords(0,0) = -1.0; // dofCoords(1,0) = 1.0; // // internal points // typedef Kokkos::pair<ordinal_type,ordinal_type> range_type; // auto pts = Kokkos::subview(dofCoords, range_type(2, card), Kokkos::ALL()); // const auto offset = 1; // PointTools::getLattice( pts, // this->basisCellTopology_, // order, offset, // pointType ); // } break; } case POINTTYPE_GAUSS: { // internal points only PointTools::getGaussPoints( dofCoords, order ); break; } default: { INTREPID2_TEST_FOR_EXCEPTION( !isValidPointType(pointType), std::invalid_argument , ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) invalid pointType." ); } } this->dofCoords_ = Kokkos::create_mirror_view(typename SpT::memory_space(), dofCoords); Kokkos::deep_copy(this->dofCoords_, dofCoords); // form Vandermonde matrix; actually, this is the transpose of the VDM, // this matrix is used in LAPACK so it should be column major and left layout const ordinal_type lwork = card*card; Kokkos::DynRankView<typename scalarViewType::value_type,Kokkos::LayoutLeft,Kokkos::HostSpace> vmat("Hgrad::Line::Cn::vmat", card, card), work("Hgrad::Line::Cn::work", lwork), ipiv("Hgrad::Line::Cn::ipiv", card); const double alpha = 0.0, beta = 0.0; Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI:: getValues<Kokkos::HostSpace::execution_space,Parameters::MaxNumPtsPerBasisEval> (vmat, dofCoords, order, alpha, beta, OPERATOR_VALUE); ordinal_type info = 0; Teuchos::LAPACK<ordinal_type,typename scalarViewType::value_type> lapack; lapack.GETRF(card, card, vmat.data(), vmat.stride_1(), (ordinal_type*)ipiv.data(), &info); INTREPID2_TEST_FOR_EXCEPTION( info != 0, std::runtime_error , ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRF returns nonzero info." ); lapack.GETRI(card, vmat.data(), vmat.stride_1(), (ordinal_type*)ipiv.data(), work.data(), lwork, &info); INTREPID2_TEST_FOR_EXCEPTION( info != 0, std::runtime_error , ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRI returns nonzero info." ); // create host mirror Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace> vinv("Hgrad::Line::Cn::vinv", card, card); for (ordinal_type i=0;i<card;++i) for (ordinal_type j=0;j<card;++j) vinv(i,j) = vmat(j,i); this->vinv_ = Kokkos::create_mirror_view(typename SpT::memory_space(), vinv); Kokkos::deep_copy(this->vinv_ , vinv); // initialize tags { const bool is_vertex_included = (pointType != POINTTYPE_GAUSS); // Basis-dependent initializations const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell ordinal_type tags[Parameters::MaxOrder+1][4]; // now we check the points for association if (is_vertex_included) { // lattice order { const auto v0 = 0; tags[v0][0] = 0; // vertex dof tags[v0][1] = 0; // vertex id tags[v0][2] = 0; // local dof id tags[v0][3] = 1; // total number of dofs in this vertex const ordinal_type iend = card - 2; for (ordinal_type i=0;i<iend;++i) { const auto e = i + 1; tags[e][0] = 1; // edge dof tags[e][1] = 0; // edge id tags[e][2] = i; // local dof id tags[e][3] = iend; // total number of dofs in this edge } const auto v1 = card -1; tags[v1][0] = 0; // vertex dof tags[v1][1] = 1; // vertex id tags[v1][2] = 0; // local dof id tags[v1][3] = 1; // total number of dofs in this vertex } // topological order // { // tags[0][0] = 0; // vertex dof // tags[0][1] = 0; // vertex id // tags[0][2] = 0; // local dof id // tags[0][3] = 1; // total number of dofs in this vertex // tags[1][0] = 0; // vertex dof // tags[1][1] = 1; // vertex id // tags[1][2] = 0; // local dof id // tags[1][3] = 1; // total number of dofs in this vertex // const ordinal_type iend = card - 2; // for (ordinal_type i=0;i<iend;++i) { // const auto ii = i + 2; // tags[ii][0] = 1; // edge dof // tags[ii][1] = 0; // edge id // tags[ii][2] = i; // local dof id // tags[ii][3] = iend; // total number of dofs in this edge // } // } } else { for (ordinal_type i=0;i<card;++i) { tags[i][0] = 1; // edge dof tags[i][1] = 0; // edge id tags[i][2] = i; // local dof id tags[i][3] = card; // total number of dofs in this edge } } ordinal_type_array_1d_host tagView(&tags[0][0], card*4); // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays: // tags are constructed on host this->setOrdinalTagData(this->tagToOrdinal_, this->ordinalToTag_, tagView, this->basisCardinality_, tagSize, posScDim, posScOrd, posDfOrd); } }
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) { const size_t NSDim = Bc.getNumVectors(); Ppattern_ = Ppattern; size_t numRows = Ppattern_->getNodeNumRows(); XXtInv_.resize(numRows); RCP<const Import> importer = Ppattern_->getImporter(); X_ = MultiVectorFactory::Build(Ppattern_->getColMap(), NSDim); if (!importer.is_null()) X_->doImport(Bc, *importer, Xpetra::INSERT); else *X_ = Bc; std::vector<const SC*> Xval(NSDim); for (size_t j = 0; j < NSDim; j++) Xval[j] = X_->getData(j).get(); SC zero = Teuchos::ScalarTraits<SC>::zero(); SC one = Teuchos::ScalarTraits<SC>::one(); Teuchos::BLAS <LO,SC> blas; Teuchos::LAPACK<LO,SC> lapack; LO lwork = 3*NSDim; ArrayRCP<LO> IPIV(NSDim); ArrayRCP<SC> WORK(lwork); for (size_t i = 0; i < numRows; i++) { Teuchos::ArrayView<const LO> indices; Ppattern_->getLocalRowView(i, indices); size_t nnz = indices.size(); XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false/*zeroOut*/); Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i]; if (NSDim == 1) { SC d = zero; for (size_t j = 0; j < nnz; j++) d += Xval[0][indices[j]] * Xval[0][indices[j]]; XXtInv(0,0) = one/d; } else { Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false/*zeroOut*/); for (size_t j = 0; j < nnz; j++) for (size_t k = 0; k < NSDim; k++) locX(k,j) = Xval[k][indices[j]]; // XXtInv_ = (locX*locX^T)^{-1} blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz, one, locX.values(), locX.stride(), locX.values(), locX.stride(), zero, XXtInv.values(), XXtInv.stride()); LO info; // Compute LU factorization using partial pivoting with row exchanges lapack.GETRF(NSDim, NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), &info); // Use the computed factorization to compute the inverse lapack.GETRI(NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), WORK.get(), lwork, &info); } } }
//---------------------------------------------------------------------------// // Tests. //---------------------------------------------------------------------------// TEUCHOS_UNIT_TEST( LAPACK, block_inversion ) { // Build a 4x4 block. int m = 4; int n = 4; Teuchos::SerialDenseMatrix<int,double> block( m, n ); block(0,0) = 3.2; block(0,1) = -1.43; block(0,2) = 2.98; block(0,3) = 0.32; block(1,0) = -4.12; block(1,1) = -7.53; block(1,2) = 1.44; block(1,3) = -3.72; block(2,0) = 4.24; block(2,1) = -6.42; block(2,2) = 1.82; block(2,3) = 2.67; block(3,0) = -0.23; block(3,1) = 5.8; block(3,2) = 1.13; block(3,3) = -3.73; // Make a LAPACK object. Teuchos::LAPACK<int,double> lapack; // Compute the LU-factorization of the block. Teuchos::ArrayRCP<int> ipiv( block.numRows() ); int info = 0; int lda = m; lapack.GETRF( m, n, block.values(), lda, ipiv.getRawPtr(), &info ); TEST_EQUALITY( info, 0 ); // Compute the inverse of the block from the LU-factorization. Teuchos::ArrayRCP<double> work( m ); lapack.GETRI( n, block.values(), lda, ipiv.getRawPtr(), work.getRawPtr(), work.size(), &info ); TEST_EQUALITY( info, 0 ); TEST_EQUALITY( work[0], m ); // Check the inversion against matlab. TEST_FLOATING_EQUALITY( block(0,0), -0.461356423424245, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(0,1), -0.060920073472551, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(0,2), 0.547244760641934, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(0,3), 0.412904055961420, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(1,0), 0.154767451798665, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(1,1), -0.056225122550555, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(1,2), -0.174451348828054, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(1,3), -0.055523340725809, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(2,0), 0.848746201780808, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(2,1), 0.045927762119214, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(2,2), -0.618485718805259, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(2,3), -0.415712965073367, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(3,0), 0.526232280383953, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(3,1), -0.069757566407458, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(3,2), -0.492378815120724, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(3,3), -0.505833501236923, 1.0e-14 ); }