Exemplo n.º 1
0
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) {
    Ppattern_ = Ppattern;

    const RCP<const Map> uniqueMap    = Ppattern_->getDomainMap();
    const RCP<const Map> nonUniqueMap = Ppattern_->getColMap();
    RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);

    const size_t NSDim = Bc.getNumVectors();
    X_ = MultiVectorFactory::Build(nonUniqueMap, NSDim);
    X_->doImport(Bc, *importer, Xpetra::INSERT);

    size_t numRows = Ppattern_->getNodeNumRows();
    XXtInv_.resize(numRows);
    Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false);
    for (size_t i = 0; i < numRows; i++) {
      Teuchos::ArrayView<const LO> indices;
      Ppattern_->getLocalRowView(i, indices);

      size_t nnz = indices.size();

      Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false);
      for (size_t j = 0; j < nnz; j++) {
        for (size_t k = 0; k < NSDim; k++)
          BcRow[k] = X_->getData(k)[indices[j]];

        Teuchos::setCol(BcRow, (LO)j, locX);
      }

      XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false);

      Teuchos::BLAS<LO,SC> blas;
      blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz,
                Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(),
                locX.values(), locX.stride(), Teuchos::ScalarTraits<SC>::zero(),
                XXtInv_[i].values(), XXtInv_[i].stride());

      Teuchos::LAPACK<LO,SC> lapack;
      LO info, lwork = 3*NSDim;
      ArrayRCP<LO> IPIV(NSDim);
      ArrayRCP<SC> WORK(lwork);
      lapack.GETRF(NSDim, NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), &info);
      lapack.GETRI(NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), WORK.get(), lwork, &info);
    }
  }
  Basis_HGRAD_LINE_Cn_FEM<SpT,OT,PT>::
  Basis_HGRAD_LINE_Cn_FEM( const ordinal_type order,
                           const EPointType   pointType ) {
    this->basisCardinality_  = order+1;
    this->basisDegree_       = order;
    this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Line<2> >() );
    this->basisType_         = BASIS_FEM_FIAT;
    this->basisCoordinates_  = COORDINATES_CARTESIAN;

    const ordinal_type card = this->basisCardinality_;
    
    // points are computed in the host and will be copied 
    Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace>
      dofCoords("Hgrad::Line::Cn::dofCoords", card, 1);


    switch (pointType) {
    case POINTTYPE_EQUISPACED:
    case POINTTYPE_WARPBLEND: {
      // lattice ordering 
      {
        const ordinal_type offset = 0;
        PointTools::getLattice( dofCoords,
                                this->basisCellTopology_, 
                                order, offset, 
                                pointType );
        
      }
      // topological order
      // { 
      //   // two vertices
      //   dofCoords(0,0) = -1.0;
      //   dofCoords(1,0) =  1.0;
        
      //   // internal points
      //   typedef Kokkos::pair<ordinal_type,ordinal_type> range_type;
      //   auto pts = Kokkos::subview(dofCoords, range_type(2, card), Kokkos::ALL());
        
      //   const auto offset = 1;
      //   PointTools::getLattice( pts,
      //                           this->basisCellTopology_, 
      //                           order, offset, 
      //                           pointType );
      // }
      break;
    }
    case POINTTYPE_GAUSS: {
      // internal points only
      PointTools::getGaussPoints( dofCoords, 
                                  order );
      break;
    }
    default: {
      INTREPID2_TEST_FOR_EXCEPTION( !isValidPointType(pointType),
                                    std::invalid_argument , 
                                    ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) invalid pointType." );
    }
    }

    this->dofCoords_ = Kokkos::create_mirror_view(typename SpT::memory_space(), dofCoords);
    Kokkos::deep_copy(this->dofCoords_, dofCoords);
    
    // form Vandermonde matrix; actually, this is the transpose of the VDM,
    // this matrix is used in LAPACK so it should be column major and left layout
    const ordinal_type lwork = card*card;
    Kokkos::DynRankView<typename scalarViewType::value_type,Kokkos::LayoutLeft,Kokkos::HostSpace>
      vmat("Hgrad::Line::Cn::vmat", card, card), 
      work("Hgrad::Line::Cn::work", lwork),
      ipiv("Hgrad::Line::Cn::ipiv", card);

    const double alpha = 0.0, beta = 0.0;
    Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI::
      getValues<Kokkos::HostSpace::execution_space,Parameters::MaxNumPtsPerBasisEval>
      (vmat, dofCoords, order, alpha, beta, OPERATOR_VALUE);

    ordinal_type info = 0;
    Teuchos::LAPACK<ordinal_type,typename scalarViewType::value_type> lapack;

    lapack.GETRF(card, card, 
                 vmat.data(), vmat.stride_1(),
                 (ordinal_type*)ipiv.data(),
                 &info);

    INTREPID2_TEST_FOR_EXCEPTION( info != 0,
                                  std::runtime_error , 
                                  ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRF returns nonzero info." );

    lapack.GETRI(card, 
                 vmat.data(), vmat.stride_1(),
                 (ordinal_type*)ipiv.data(),
                 work.data(), lwork,
                 &info);

    INTREPID2_TEST_FOR_EXCEPTION( info != 0,
                                  std::runtime_error , 
                                  ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRI returns nonzero info." );
    
    // create host mirror 
    Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace>
      vinv("Hgrad::Line::Cn::vinv", card, card);

    for (ordinal_type i=0;i<card;++i) 
      for (ordinal_type j=0;j<card;++j) 
        vinv(i,j) = vmat(j,i);

    this->vinv_ = Kokkos::create_mirror_view(typename SpT::memory_space(), vinv);
    Kokkos::deep_copy(this->vinv_ , vinv);

    // initialize tags
    {
      const bool is_vertex_included = (pointType != POINTTYPE_GAUSS);

      // Basis-dependent initializations
      const ordinal_type tagSize  = 4;        // size of DoF tag, i.e., number of fields in the tag
      const ordinal_type posScDim = 0;        // position in the tag, counting from 0, of the subcell dim 
      const ordinal_type posScOrd = 1;        // position in the tag, counting from 0, of the subcell ordinal
      const ordinal_type posDfOrd = 2;        // position in the tag, counting from 0, of DoF ordinal relative to the subcell
      

      ordinal_type tags[Parameters::MaxOrder+1][4];

      // now we check the points for association 
      if (is_vertex_included) {
        // lattice order
        {
          const auto v0 = 0;
          tags[v0][0] = 0; // vertex dof
          tags[v0][1] = 0; // vertex id
          tags[v0][2] = 0; // local dof id
          tags[v0][3] = 1; // total number of dofs in this vertex
          
          const ordinal_type iend = card - 2;
          for (ordinal_type i=0;i<iend;++i) {
            const auto e = i + 1;
            tags[e][0] = 1;    // edge dof
            tags[e][1] = 0;    // edge id
            tags[e][2] = i;    // local dof id
            tags[e][3] = iend; // total number of dofs in this edge
          }

          const auto v1 = card -1;
          tags[v1][0] = 0; // vertex dof
          tags[v1][1] = 1; // vertex id
          tags[v1][2] = 0; // local dof id
          tags[v1][3] = 1; // total number of dofs in this vertex
        }

        // topological order
        // {
        //   tags[0][0] = 0; // vertex dof
        //   tags[0][1] = 0; // vertex id
        //   tags[0][2] = 0; // local dof id
        //   tags[0][3] = 1; // total number of dofs in this vertex
          
        //   tags[1][0] = 0; // vertex dof
        //   tags[1][1] = 1; // vertex id
        //   tags[1][2] = 0; // local dof id
        //   tags[1][3] = 1; // total number of dofs in this vertex
          
        //   const ordinal_type iend = card - 2;
        //   for (ordinal_type i=0;i<iend;++i) {
        //     const auto ii = i + 2;
        //     tags[ii][0] = 1;    // edge dof
        //     tags[ii][1] = 0;    // edge id
        //     tags[ii][2] = i;    // local dof id
        //     tags[ii][3] = iend; // total number of dofs in this edge
        //   }
        // }
      } else {
        for (ordinal_type i=0;i<card;++i) {
          tags[i][0] = 1;    // edge dof
          tags[i][1] = 0;    // edge id
          tags[i][2] = i;    // local dof id
          tags[i][3] = card; // total number of dofs in this edge
        }
      }

      ordinal_type_array_1d_host tagView(&tags[0][0], card*4);

      // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
      // tags are constructed on host
      this->setOrdinalTagData(this->tagToOrdinal_,
                              this->ordinalToTag_,
                              tagView,
                              this->basisCardinality_,
                              tagSize,
                              posScDim,
                              posScOrd,
                              posDfOrd);
    }  
  }
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) {
    const size_t NSDim = Bc.getNumVectors();

    Ppattern_ = Ppattern;

    size_t numRows = Ppattern_->getNodeNumRows();
    XXtInv_.resize(numRows);

    RCP<const Import> importer = Ppattern_->getImporter();

    X_ = MultiVectorFactory::Build(Ppattern_->getColMap(), NSDim);
    if (!importer.is_null())
      X_->doImport(Bc, *importer, Xpetra::INSERT);
    else
      *X_ = Bc;

    std::vector<const SC*> Xval(NSDim);
    for (size_t j = 0; j < NSDim; j++)
      Xval[j] = X_->getData(j).get();

    SC zero = Teuchos::ScalarTraits<SC>::zero();
    SC one  = Teuchos::ScalarTraits<SC>::one();

    Teuchos::BLAS  <LO,SC> blas;
    Teuchos::LAPACK<LO,SC> lapack;
    LO lwork = 3*NSDim;
    ArrayRCP<LO> IPIV(NSDim);
    ArrayRCP<SC> WORK(lwork);

    for (size_t i = 0; i < numRows; i++) {
      Teuchos::ArrayView<const LO> indices;
      Ppattern_->getLocalRowView(i, indices);

      size_t nnz = indices.size();

      XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false/*zeroOut*/);
      Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i];

      if (NSDim == 1) {
        SC d = zero;
        for (size_t j = 0; j < nnz; j++)
          d += Xval[0][indices[j]] * Xval[0][indices[j]];
        XXtInv(0,0) = one/d;

      } else {
        Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false/*zeroOut*/);
        for (size_t j = 0; j < nnz; j++)
          for (size_t k = 0; k < NSDim; k++)
            locX(k,j) = Xval[k][indices[j]];

        // XXtInv_ = (locX*locX^T)^{-1}
        blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz,
                   one,   locX.values(),   locX.stride(),
                          locX.values(),   locX.stride(),
                  zero, XXtInv.values(), XXtInv.stride());

        LO info;
        // Compute LU factorization using partial pivoting with row exchanges
        lapack.GETRF(NSDim, NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), &info);
        // Use the computed factorization to compute the inverse
        lapack.GETRI(NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), WORK.get(), lwork, &info);
      }
    }
  }
Exemplo n.º 4
0
//---------------------------------------------------------------------------//
// Tests.
//---------------------------------------------------------------------------//
TEUCHOS_UNIT_TEST( LAPACK, block_inversion )
{
    // Build a 4x4 block.
    int m = 4;
    int n = 4;
    Teuchos::SerialDenseMatrix<int,double> block( m, n );

    block(0,0) = 3.2;
    block(0,1) = -1.43;
    block(0,2) = 2.98;
    block(0,3) = 0.32;

    block(1,0) = -4.12;
    block(1,1) = -7.53;
    block(1,2) = 1.44;
    block(1,3) = -3.72;

    block(2,0) = 4.24;
    block(2,1) = -6.42;
    block(2,2) = 1.82;
    block(2,3) = 2.67;

    block(3,0) = -0.23;
    block(3,1) = 5.8;
    block(3,2) = 1.13;
    block(3,3) = -3.73;

    // Make a LAPACK object.
    Teuchos::LAPACK<int,double> lapack;

    // Compute the LU-factorization of the block.
    Teuchos::ArrayRCP<int> ipiv( block.numRows() );
    int info = 0;
    int lda = m;
    lapack.GETRF( m, n, block.values(), lda, ipiv.getRawPtr(), &info );
    TEST_EQUALITY( info, 0 );

    // Compute the inverse of the block from the LU-factorization.
    Teuchos::ArrayRCP<double> work( m );
    lapack.GETRI( n, block.values(), lda, ipiv.getRawPtr(),
		  work.getRawPtr(), work.size(), &info );
    TEST_EQUALITY( info, 0 );
    TEST_EQUALITY( work[0], m );

    // Check the inversion against matlab.
    TEST_FLOATING_EQUALITY( block(0,0), -0.461356423424245, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(0,1), -0.060920073472551, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(0,2),  0.547244760641934, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(0,3),  0.412904055961420, 1.0e-14 );

    TEST_FLOATING_EQUALITY( block(1,0),  0.154767451798665, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(1,1), -0.056225122550555, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(1,2), -0.174451348828054, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(1,3), -0.055523340725809, 1.0e-14 );

    TEST_FLOATING_EQUALITY( block(2,0),  0.848746201780808, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(2,1),  0.045927762119214, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(2,2), -0.618485718805259, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(2,3), -0.415712965073367, 1.0e-14 );

    TEST_FLOATING_EQUALITY( block(3,0),  0.526232280383953, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(3,1), -0.069757566407458, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(3,2), -0.492378815120724, 1.0e-14 );
    TEST_FLOATING_EQUALITY( block(3,3), -0.505833501236923, 1.0e-14 );
}