KOKKOS_INLINE_FUNCTION
    void OrthPolynomial<1>::generate( /**/  outputViewType output,
                                      const inputViewType input,
                                      const ordinal_type p ) {
      typedef outputViewType::value_type value_type;
      typedef Sacado::Fad::SFad<value_type,2> fad_type;

      constexpr ordinal_type maxCard = (maxOrder+1)*(maxOrder+2)/2;

      const ordinal_type
        npts = input.dimension(0),
        card = output.dimension(0);

      // use stack buffer
      fad_type inBuf[maxNumPts][2], outBuf[maxCard][maxNumPts];

      Kokkos::View<fad_type**, Kokkos::Impl::ActiveExecutionMemorySpace> in(&inBuf[0][0],            npts, 2);
      Kokkos::View<fad_type***,Kokkos::Impl::ActiveExecutionMemorySpace> out(&outBuf[0][0][0], card, npts);

      for (ordinal_type i=0;i<npts;++i)
        for (ordinal_type j=0;j<2;++j) {
          in(i,j) = Sacado::Fad::SFad<value_type,2>( input(i,j) );
          in(i,j).diff(j,2);
        }

      OrthPolynomial<0>::generate<maxOrder,maxNumPts>(out, in, p);

      for (ordinal_type i=0;i<card;++i)
        for (ordinal_type j=0;j<npts;++j)
          for (ordinal_type k=0;k<2;++k)
            output(i,j,k) = out(i,j).dx(k);
    }
    KOKKOS_INLINE_FUNCTION
    void
    Basis_Constant_FEM::Serial<opType>::
    getValues( /**/  outputViewType output,
               const inputViewType input ) {
      switch (opType) {
      case OPERATOR_VALUE : {
        output(0) = 1.0;
        break;
      }
      case OPERATOR_MAX : {
        const ordinal_type jend = output.dimension(1);
        const ordinal_type iend = output.dimension(0);

        for (ordinal_type j=0;j<jend;++j)
          for (ordinal_type i=0;i<iend;++i)
            output(i, j) = 0.0;
        break;
      }
      default: {
        INTREPID2_TEST_FOR_ABORT( opType != OPERATOR_VALUE &&
                                  opType != OPERATOR_MAX,
                                  ">>> ERROR: (Intrepid2::Basis_Constant_FEM::Serial::getValues) operator is not supported");
      }
      }
    }
      KOKKOS_INLINE_FUNCTION
      void operator()(const size_type iter) const {
        size_type cl = 0, bf = 0, pt = 0;
        size_type outputRank(_output.rank()), rightRank(_rightInput.rank());

        if (_hasField) 
          Util::unrollIndex( cl, bf, pt, 
                             _output.dimension(0),
                             _output.dimension(1),
                             _output.dimension(2),
                             iter );
        else          
          Util::unrollIndex( cl, pt,
                             _output.dimension(0),
                             _output.dimension(1),
                             iter );

        auto result = ( _hasField ? Kokkos::subview(_output, cl, bf, pt, Kokkos::ALL(), Kokkos::ALL()) :
                        /**/        Kokkos::subview(_output, cl,     pt, Kokkos::ALL(), Kokkos::ALL()));
        
        const auto right = ( outputRank == rightRank ? ( _hasField ? Kokkos::subview(_rightInput, cl, bf, pt, Kokkos::ALL(), Kokkos::ALL()) :
                                                         /**/        Kokkos::subview(_rightInput, cl,     pt, Kokkos::ALL(), Kokkos::ALL()) ) : 
                             /**/                      ( _hasField ? Kokkos::subview(_rightInput,     bf, pt, Kokkos::ALL(), Kokkos::ALL()) :
                                                         /**/        Kokkos::subview(_rightInput,         pt, Kokkos::ALL(), Kokkos::ALL()) ) );
                                                                
        const auto left = (_leftInput.dimension(1) == 1) ? Kokkos::subview(_leftInput, cl, 0) :
                            /**/                           Kokkos::subview(_leftInput, cl, pt);

        const size_type iend  = result.dimension(0);
        const size_type jend  = result.dimension(1);

        const auto val = left();
        if (_reciprocal)
          for(size_type i = 0; i < iend; ++i)
            for(size_type j = 0; j < jend; ++j)
              result(i, j) = right(i, j)/val;
        else
          for(size_type i = 0; i < iend; ++i)
            for(size_type j = 0; j < jend; ++j)
              result(i, j) = right(i, j)*val;
      }
      KOKKOS_INLINE_FUNCTION
      void operator()(const size_type iter) const {
        size_type cl, bf, pt;
        size_type leftRank(_leftInput.rank()), rightRank(_rightInput.rank());

        if (_hasField) 
          unrollIndex( cl, bf, pt, 
                             _output.dimension(0),
                             _output.dimension(1), 
                             _output.dimension(2), 
                             iter );
        else          
          unrollIndex( cl, pt,
                             _output.dimension(0),
                             _output.dimension(1),
                             iter);
        
        auto result = ( _hasField ? Kokkos::subview(_output, cl, bf, pt) :
                        /**/        Kokkos::subview(_output, cl,     pt));
        
        const auto left = (_leftInput.dimension(1) == 1) ? Kokkos::subview(_leftInput, cl, 0, Kokkos::ALL(), Kokkos::ALL()) :
                            /**/                           Kokkos::subview(_leftInput, cl, pt, Kokkos::ALL(), Kokkos::ALL());

        
        const auto right = (rightRank == leftRank + int(_hasField)) ?
                             ( _hasField ? Kokkos::subview(_rightInput, cl, bf, pt, Kokkos::ALL(), Kokkos::ALL()) :
                             /**/          Kokkos::subview(_rightInput, cl,     pt, Kokkos::ALL(), Kokkos::ALL())) :
                             ( _hasField ? Kokkos::subview(_rightInput,     bf, pt, Kokkos::ALL(), Kokkos::ALL()) :
                             /**/          Kokkos::subview(_rightInput,         pt, Kokkos::ALL(), Kokkos::ALL()));
        
        const size_type iend  = left.dimension(0);
        const size_type jend  = left.dimension(1);

        value_type tmp(0);
        for(size_type i = 0; i < iend; ++i)
          for(size_type j = 0; j < jend; ++j)
            tmp += left(i, j)*right(i, j);
        result() = tmp;
      }
    KOKKOS_INLINE_FUNCTION
    void
    Basis_HGRAD_TRI_C2_FEM::Serial<opType>::
    getValues( /**/  outputViewType output,
               const inputViewType input ) {
      switch (opType) {
      case OPERATOR_VALUE: {
        const auto x = input(0);
        const auto y = input(1);

        // output is a rank-2 array with dimensions (basisCardinality_, dim0)
        output(0) = (x + y - 1.0)*(2.0*x + 2.0*y - 1.0);
        output(1) = x*(2.0*x - 1.0);
        output(2) = y*(2.0*y - 1.0);
        output(3) = -4.0*x*(x + y - 1.0);
        output(4) =  4.0*x*y;
        output(5) = -4.0*y*(x + y - 1.0);
        break;
      }
      case OPERATOR_D1:
      case OPERATOR_GRAD: {
        const auto x = input(0);
        const auto y = input(1);
        // output is a rank-3 array with dimensions (basisCardinality_, dim0, spaceDim)
        output(0, 0) =  4.0*x + 4.0*y - 3.0;
        output(0, 1) =  4.0*x + 4.0*y - 3.0;

        output(1, 0) =  4.0*x - 1.0;
        output(1, 1) =  0.0;

        output(2, 0) =  0.0;
        output(2, 1) =  4.0*y - 1.0;

        output(3, 0) = -4.0*(2.0*x + y - 1.0);
        output(3, 1) = -4.0*x;

        output(4, 0) =  4.0*y;
        output(4, 1) =  4.0*x;

        output(5, 0) = -4.0*y;
        output(5, 1) = -4.0*(x + 2.0*y - 1.0);
        break;
      }
      case OPERATOR_CURL: {
        const auto x = input(0);
        const auto y = input(1);
        // CURL(u) = (u_y, -u_x), is rotated GRAD
        output(0, 1) =-(4.0*x + 4.0*y - 3.0);
        output(0, 0) =  4.0*x + 4.0*y - 3.0;

        output(1, 1) =-(4.0*x - 1.0);
        output(1, 0) =  0.0;

        output(2, 1) =  0.0;
        output(2, 0) =  4.0*y - 1.0;

        output(3, 1) =  4.0*(2.0*x + y - 1.0);
        output(3, 0) = -4.0*x;

        output(4, 1) = -4.0*y;
        output(4, 0) =  4.0*x;

        output(5, 1) =  4.0*y;
        output(5, 0) = -4.0*(x + 2.0*y - 1.0);
        break;
      }
      case OPERATOR_D2: {
        // output is a rank-3 array with dimensions (basisCardinality_, dim0, DkCardinality)
        // D2 -> (2,0) -> dx^2.
        output(0, 0) = 4.0;
        output(1, 0) = 4.0;
        output(2, 0) = 0.0;
        output(3, 0) =-8.0;
        output(4, 0) = 0.0;
        output(5, 0) = 0.0;

        // D2 -> (1,1) -> dx dy
        output(0, 1) = 4.0;
        output(1, 1) = 0.0;
        output(2, 1) = 0.0;
        output(3, 1) =-4.0;
        output(4, 1) = 4.0;
        output(5, 1) =-4.0;

        // D2 -> (0,2) -> dy^2
        output(0, 2) = 4.0;
        output(1, 2) = 0.0;
        output(2, 2) = 4.0;
        output(3, 2) = 0.0;
        output(4, 2) = 0.0;
        output(5, 2) =-8.0;
        break;
      }
      case OPERATOR_MAX: {
        const ordinal_type jend = output.dimension(1);
        const ordinal_type iend = output.dimension(0);

        for (ordinal_type j=0;j<jend;++j)
          for (ordinal_type i=0;i<iend;++i)
            output(i, j) = 0.0;
        break;
      }
      default: {
        INTREPID2_TEST_FOR_ABORT( opType != OPERATOR_VALUE &&
                                  opType != OPERATOR_GRAD &&
                                  opType != OPERATOR_CURL &&
                                  opType != OPERATOR_D1 &&
                                  opType != OPERATOR_D2 &&
                                  opType != OPERATOR_MAX,
                                  ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_C2_FEM::Serial::getValues) operator is not supported");
      }
      }
    }
    KOKKOS_INLINE_FUNCTION
    void
    Basis_HGRAD_HEX_C1_FEM::Serial<opType>::
    getValues( /**/  outputViewType output,
               const inputViewType input ) {
      switch (opType) {
      case OPERATOR_VALUE : {
        const auto x = input(0);
        const auto y = input(1);
        const auto z = input(2);

        // output is a rank-2 array with dimensions (basisCardinality_, dim0)
        output(0) = (1.0 - x)*(1.0 - y)*(1.0 - z)/8.0;
        output(1) = (1.0 + x)*(1.0 - y)*(1.0 - z)/8.0;
        output(2) = (1.0 + x)*(1.0 + y)*(1.0 - z)/8.0;
        output(3) = (1.0 - x)*(1.0 + y)*(1.0 - z)/8.0;

        output(4) = (1.0 - x)*(1.0 - y)*(1.0 + z)/8.0;
        output(5) = (1.0 + x)*(1.0 - y)*(1.0 + z)/8.0;
        output(6) = (1.0 + x)*(1.0 + y)*(1.0 + z)/8.0;
        output(7) = (1.0 - x)*(1.0 + y)*(1.0 + z)/8.0;
        break;
      }
      case OPERATOR_GRAD : {
        const auto x = input(0);
        const auto y = input(1);
        const auto z = input(2);

        // output is a rank-3 array with dimensions (basisCardinality_, dim0, spaceDim)
        output(0, 0) = -(1.0 - y)*(1.0 - z)/8.0;
        output(0, 1) = -(1.0 - x)*(1.0 - z)/8.0;
        output(0, 2) = -(1.0 - x)*(1.0 - y)/8.0;

        output(1, 0) =  (1.0 - y)*(1.0 - z)/8.0;
        output(1, 1) = -(1.0 + x)*(1.0 - z)/8.0;
        output(1, 2) = -(1.0 + x)*(1.0 - y)/8.0;

        output(2, 0) =  (1.0 + y)*(1.0 - z)/8.0;
        output(2, 1) =  (1.0 + x)*(1.0 - z)/8.0;
        output(2, 2) = -(1.0 + x)*(1.0 + y)/8.0;

        output(3, 0) = -(1.0 + y)*(1.0 - z)/8.0;
        output(3, 1) =  (1.0 - x)*(1.0 - z)/8.0;
        output(3, 2) = -(1.0 - x)*(1.0 + y)/8.0;

        output(4, 0) = -(1.0 - y)*(1.0 + z)/8.0;
        output(4, 1) = -(1.0 - x)*(1.0 + z)/8.0;
        output(4, 2) =  (1.0 - x)*(1.0 - y)/8.0;

        output(5, 0) =  (1.0 - y)*(1.0 + z)/8.0;
        output(5, 1) = -(1.0 + x)*(1.0 + z)/8.0;
        output(5, 2) =  (1.0 + x)*(1.0 - y)/8.0;

        output(6, 0) =  (1.0 + y)*(1.0 + z)/8.0;
        output(6, 1) =  (1.0 + x)*(1.0 + z)/8.0;
        output(6, 2) =  (1.0 + x)*(1.0 + y)/8.0;

        output(7, 0) = -(1.0 + y)*(1.0 + z)/8.0;
        output(7, 1) =  (1.0 - x)*(1.0 + z)/8.0;
        output(7, 2) =  (1.0 - x)*(1.0 + y)/8.0;
        break;
      }
      case OPERATOR_D2 : {
        const auto x = input(0);
        const auto y = input(1);
        const auto z = input(2);

        // output is a rank-3 array with dimensions (basisCardinality_, dim0, D2Cardinality = 6)
        output(0, 0) =  0.0;                    // {2, 0, 0}
        output(0, 1) =  (1.0 - z)/8.0;          // {1, 1, 0}
        output(0, 2) =  (1.0 - y)/8.0;          // {1, 0, 1}
        output(0, 3) =  0.0;                    // {0, 2, 0}
        output(0, 4) =  (1.0 - x)/8.0;          // {0, 1, 1}
        output(0, 5) =  0.0;                    // {0, 0, 2}

        output(1, 0) =  0.0;                    // {2, 0, 0}
        output(1, 1) = -(1.0 - z)/8.0;          // {1, 1, 0}
        output(1, 2) = -(1.0 - y)/8.0;          // {1, 0, 1}
        output(1, 3) =  0.0;                    // {0, 2, 0}
        output(1, 4) =  (1.0 + x)/8.0;          // {0, 1, 1}
        output(1, 5) =  0.0;                    // {0, 0, 2}

        output(2, 0) =  0.0;                    // {2, 0, 0}
        output(2, 1) =  (1.0 - z)/8.0;          // {1, 1, 0}
        output(2, 2) = -(1.0 + y)/8.0;          // {1, 0, 1}
        output(2, 3) =  0.0;                    // {0, 2, 0}
        output(2, 4) = -(1.0 + x)/8.0;          // {0, 1, 1}
        output(2, 5) =  0.0;                    // {0, 0, 2}

        output(3, 0) =  0.0;                    // {2, 0, 0}
        output(3, 1) = -(1.0 - z)/8.0;          // {1, 1, 0}
        output(3, 2) =  (1.0 + y)/8.0;          // {1, 0, 1}
        output(3, 3) =  0.0;                    // {0, 2, 0}
        output(3, 4) = -(1.0 - x)/8.0;          // {0, 1, 1}
        output(3, 5) =  0.0;                    // {0, 0, 2}


        output(4, 0) =  0.0;                    // {2, 0, 0}
        output(4, 1) =  (1.0 + z)/8.0;          // {1, 1, 0}
        output(4, 2) = -(1.0 - y)/8.0;          // {1, 0, 1}
        output(4, 3) =  0.0;                    // {0, 2, 0}
        output(4, 4) = -(1.0 - x)/8.0;          // {0, 1, 1}
        output(4, 5) =  0.0;                    // {0, 0, 2}

        output(5, 0) =  0.0;                    // {2, 0, 0}
        output(5, 1) = -(1.0 + z)/8.0;          // {1, 1, 0}
        output(5, 2) =  (1.0 - y)/8.0;          // {1, 0, 1}
        output(5, 3) =  0.0;                    // {0, 2, 0}
        output(5, 4) = -(1.0 + x)/8.0;          // {0, 1, 1}
        output(5, 5) =  0.0;                    // {0, 0, 2}

        output(6, 0) =  0.0;                    // {2, 0, 0}
        output(6, 1) =  (1.0 + z)/8.0;          // {1, 1, 0}
        output(6, 2) =  (1.0 + y)/8.0;          // {1, 0, 1}
        output(6, 3) =  0.0;                    // {0, 2, 0}
        output(6, 4) =  (1.0 + x)/8.0;          // {0, 1, 1}
        output(6, 5) =  0.0;                    // {0, 0, 2}

        output(7, 0) =  0.0;                    // {2, 0, 0}
        output(7, 1) = -(1.0 + z)/8.0;          // {1, 1, 0}
        output(7, 2) = -(1.0 + y)/8.0;          // {1, 0, 1}
        output(7, 3) =  0.0;                    // {0, 2, 0}
        output(7, 4) =  (1.0 - x)/8.0;          // {0, 1, 1}
        output(7, 5) =  0.0;                    // {0, 0, 2}
        break;
      }
      case OPERATOR_MAX : {
        const ordinal_type jend = output.dimension(1);
        const ordinal_type iend = output.dimension(0);

        for (ordinal_type j=0;j<jend;++j)
          for (ordinal_type i=0;i<iend;++i)
            output(i, j) = 0.0;
        break;
      }
      default: {
        INTREPID2_TEST_FOR_ABORT( opType != OPERATOR_VALUE &&
                                  opType != OPERATOR_GRAD &&
                                  opType != OPERATOR_CURL &&
                                  opType != OPERATOR_D2 &&
                                  opType != OPERATOR_MAX,
                                  ">>> ERROR: (Intrepid2::Basis_HGRAD_HEX_C1_FEM::Serial::getValues) operator is not supported");

      }
      }
    }
    KOKKOS_INLINE_FUNCTION
    void
    Basis_HGRAD_QUAD_C1_FEM::Serial<opType>::
    getValues(       outputViewType output,
               const inputViewType input ) {
      switch (opType) {
      case OPERATOR_VALUE : {
        const auto x = input(0);
        const auto y = input(1);

        // output with dimensions (basisCardinality_)
        output(0) = (1.0 - x)*(1.0 - y)/4.0;
        output(1) = (1.0 + x)*(1.0 - y)/4.0;
        output(2) = (1.0 + x)*(1.0 + y)/4.0;
        output(3) = (1.0 - x)*(1.0 + y)/4.0;
        break;
      }
      case OPERATOR_GRAD : {
        const auto x = input(0);
        const auto y = input(1);

        // output with dimensions (basisCardinality_, spaceDim)
        output(0, 0) = -(1.0 - y)/4.0;
        output(0, 1) = -(1.0 - x)/4.0;

        output(1, 0) =  (1.0 - y)/4.0;
        output(1, 1) = -(1.0 + x)/4.0;

        output(2, 0) =  (1.0 + y)/4.0;
        output(2, 1) =  (1.0 + x)/4.0;

        output(3, 0) = -(1.0 + y)/4.0;
        output(3, 1) =  (1.0 - x)/4.0;
        break;
      }
      case OPERATOR_CURL : {
        const auto x = input(0);
        const auto y = input(1);

        // output with dimensions (basisCardinality_, spaceDim)
        output(0, 0) = -(1.0 - x)/4.0;
        output(0, 1) =  (1.0 - y)/4.0;

        output(1, 0) = -(1.0 + x)/4.0;
        output(1, 1) = -(1.0 - y)/4.0;

        output(2, 0) =  (1.0 + x)/4.0;
        output(2, 1) = -(1.0 + y)/4.0;

        output(3, 0) =  (1.0 - x)/4.0;
        output(3, 1) =  (1.0 + y)/4.0;
        break;
      }
      case OPERATOR_D2 : {
        // output with dimensions (basisCardinality_, D2Cardinality=3)
        output(0, 0) =  0.0;
        output(0, 1) =  0.25;
        output(0, 2) =  0.0;

        output(1, 0) =  0.0;
        output(1, 1) = -0.25;
        output(1, 2) =  0.0;

        output(2, 0) =  0.0;
        output(2, 1) =  0.25;
        output(2, 2) =  0.0;

        output(3, 0) =  0.0;
        output(3, 1) = -0.25;
        output(3, 2) =  0.0;
        break;
      }
      case OPERATOR_MAX : {
        const ordinal_type jend = output.dimension(1);
        const ordinal_type iend = output.dimension(0);

        for (ordinal_type j=0;j<jend;++j)
          for (ordinal_type i=0;i<iend;++i)
            output(i, j) = 0.0;
        break;
      }
      default: {
        INTREPID2_TEST_FOR_ABORT( opType != OPERATOR_VALUE &&
                                  opType != OPERATOR_GRAD &&
                                  opType != OPERATOR_CURL &&
                                  opType != OPERATOR_D2 &&
                                  opType != OPERATOR_MAX,
                                  ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_C1_FEM::Serial::getValues) operator is not supported");

      }
      }
    }