void CubatureTensor<Scalar,ArrayPoint,ArrayWeight>::getCubature(ArrayPoint  & cubPoints,
                                                                ArrayWeight & cubWeights) const {
  int numCubPoints = getNumPoints();
  int cubDim       = getDimension();
  // check size of cubPoints and cubWeights
  TEUCHOS_TEST_FOR_EXCEPTION( ( ( (int)cubPoints.size() < numCubPoints*cubDim ) || ( (int)cubWeights.size() < numCubPoints ) ),
                      std::out_of_range,
                      ">>> ERROR (CubatureTensor): Insufficient space allocated for cubature points or weights.");

  unsigned numCubs   = cubatures_.size();
  std::vector<unsigned> numLocPoints(numCubs);
  std::vector<unsigned> locDim(numCubs);
  std::vector< FieldContainer<Scalar> > points(numCubs);
  std::vector< FieldContainer<Scalar> > weights(numCubs);

  // extract required points and weights
  for (unsigned i=0; i<numCubs; i++) {

    numLocPoints[i] = cubatures_[i]->getNumPoints();
    locDim[i]       = cubatures_[i]->getDimension();
    points[i].resize(numLocPoints[i], locDim[i]);
    weights[i].resize(numLocPoints[i]);

    // cubPoints and cubWeights are used here only for temporary data retrieval
    cubatures_[i]->getCubature(cubPoints, cubWeights);
    for (unsigned pt=0; pt<numLocPoints[i]; pt++) {
      for (unsigned d=0; d<locDim[i]; d++) {
        points[i](pt,d) = cubPoints(pt,d);
        weights[i](pt)  = cubWeights(pt);
      }
    }

  }

  // reset all weights to 1.0
  for (int i=0; i<numCubPoints; i++) {
      cubWeights(i) = (Scalar)1.0;
  }

  // fill tensor-product cubature
  int globDimCounter = 0;
  int shift          = 1;
  for (unsigned i=0; i<numCubs; i++) {

    for (int j=0; j<numCubPoints; j++) {
      /* int itmp = ((j*shift) % numCubPoints) + (j / (numCubPoints/shift)); // equivalent, but numerically unstable */
      int itmp = (j % (numCubPoints/shift))*shift + (j / (numCubPoints/shift));
      for (unsigned k=0; k<locDim[i]; k++) {
        cubPoints(itmp , globDimCounter+k) = points[i](j % numLocPoints[i], k);
      }
      cubWeights( itmp ) *= weights[i](j % numLocPoints[i]);
    }
    
    shift *= numLocPoints[i];
    globDimCounter += locDim[i];
  }

} // end getCubature
void CubatureDirect<Scalar,ArrayPoint,ArrayWeight>::getCubatureData(ArrayPoint  &                cubPoints,
                                                                    ArrayWeight &                cubWeights,
                                                                    const CubatureTemplate *     cubData) const {

  int numCubPoints = getNumPoints();
  int cellDim      = getDimension();
  // check size of cubPoints and cubWeights
  TEUCHOS_TEST_FOR_EXCEPTION( ( ( (int)cubPoints.size() < numCubPoints*cellDim ) || ( (int)cubWeights.size() < numCubPoints ) ),
                      std::out_of_range,
                      ">>> ERROR (CubatureDirect): Insufficient space allocated for cubature points or weights.");

  for (int pointId = 0; pointId < numCubPoints; pointId++) {
    for (int dim = 0; dim < cellDim; dim++) {
      cubPoints(pointId,dim) = cubData->points_[pointId][dim];
    }
    cubWeights(pointId) = cubData->weights_[pointId];
  }
} // end getCubatureData
void CubaturePolylib<Scalar,ArrayPoint,ArrayWeight>::getCubature(ArrayPoint & cubPoints, ArrayWeight & cubWeights) const {
  int numCubPoints = getNumPoints();
  int cellDim      = getDimension();
  // check size of cubPoints and cubWeights
  TEUCHOS_TEST_FOR_EXCEPTION( ( ( (int)cubPoints.size() < numCubPoints*cellDim ) || ( (int)cubWeights.size() < numCubPoints ) ),
                      std::out_of_range,
                      ">>> ERROR (CubatureDirect): Insufficient space allocated for cubature points or weights.");

  // temporary storage
  FieldContainer<Scalar> z(numCubPoints);
  FieldContainer<Scalar> w(numCubPoints);

  // run Polylib routines
  switch (poly_type_) {
    case PL_GAUSS:
      IntrepidPolylib::zwgj(&z[0], &w[0], numCubPoints, alpha_, beta_);
      break;
    case PL_GAUSS_RADAU_LEFT:
      IntrepidPolylib::zwgrjm(&z[0], &w[0], numCubPoints, alpha_, beta_);
      break;
    case PL_GAUSS_RADAU_RIGHT:
      IntrepidPolylib::zwgrjp(&z[0], &w[0], numCubPoints, alpha_, beta_);
      break;
    case PL_GAUSS_LOBATTO:
      IntrepidPolylib::zwglj(&z[0], &w[0], numCubPoints, alpha_, beta_);
      break;
    default:
      TEUCHOS_TEST_FOR_EXCEPTION((1),
                         std::invalid_argument,
                         ">>> ERROR (CubaturePolylib): Unknown point type argument.");
  }

  // fill input arrays
  for (int pointId = 0; pointId < numCubPoints; pointId++) {
    for (int dim = 0; dim < cellDim; dim++) {
      cubPoints(pointId,dim) = z[pointId];
    }
    cubWeights(pointId) = w[pointId];
  }
} // end getCubature
void CubatureControlVolumeSide<Scalar,ArrayPoint,ArrayWeight>::getCubature(ArrayPoint& cubPoints,
		                                                           ArrayWeight& cubWeights,
                                                                           ArrayPoint& cellCoords) const
{
  // get array dimensions
  index_type numCells         = static_cast<index_type>(cellCoords.dimension(0));
  index_type numNodesPerCell  = static_cast<index_type>(cellCoords.dimension(1));
  index_type spaceDim         = static_cast<index_type>(cellCoords.dimension(2));
  int numNodesPerSubCV = subCVCellTopo_->getNodeCount();

  // get sub-control volume coordinates (one sub-control volume per node of primary cell)
  Intrepid2::FieldContainer<Scalar> subCVCoords(numCells,numNodesPerCell,numNodesPerSubCV,spaceDim);
  Intrepid2::CellTools<Scalar>::getSubCVCoords(subCVCoords,cellCoords,*(primaryCellTopo_));

 // num edges per primary cell
  int numEdgesPerCell = primaryCellTopo_->getEdgeCount();

  // Loop over cells
  for (index_type icell = 0; icell < numCells; icell++){

     // Get subcontrol volume side midpoints and normals
      int iside = 1;
      int numNodesPerSide = subCVCellTopo_->getNodeCount(spaceDim-1,iside);
      Intrepid2::FieldContainer<int> sideNodes(numNodesPerSide);
      for (int i=0; i<numNodesPerSide; i++){
          sideNodes(i) = subCVCellTopo_->getNodeMap(spaceDim-1,iside,i);
      }

      // Loop over primary cell nodes and get side midpoints
      //   In each primary cell the number of control volume side integration
      //   points is equal to the number of primary cell edges. In 2d the
      //   number of edges = number of nodes and this loop defines all side
      //   points. In 3d this loop computes the side points for all
      //   subcontrol volume sides for iside = 1. Additional code below
      //   computes the remaining points for particular 3d topologies.
       for (index_type inode=0; inode < numNodesPerCell; inode++){
          for(index_type idim=0; idim < spaceDim; idim++){
             Scalar midpt = 0.0;
             for (int i=0; i<numNodesPerSide; i++){
                  midpt += subCVCoords(icell,inode,sideNodes(i),idim);
             }
             cubPoints(icell,inode,idim) = midpt/numNodesPerSide;
          }
       }

      // Map side center to reference subcell
       //Intrepid2::FieldContainer<Scalar> sideCenterLocal(1,spaceDim-1);
       Intrepid2::FieldContainer<double> sideCenterLocal(1,spaceDim-1);
       for (index_type idim = 0; idim < spaceDim-1; idim++){
          sideCenterLocal(0,idim) = 0.0;
       }

       Intrepid2::FieldContainer<Scalar> refSidePoints(1,spaceDim);
       iside = 1;
       Intrepid2::CellTools<Scalar>::mapToReferenceSubcell(refSidePoints,
                                    sideCenterLocal,
                                    spaceDim-1, iside, *(subCVCellTopo_));

      // Array of cell control volume coordinates
       Intrepid2::FieldContainer<Scalar> cellCVCoords(numNodesPerCell, numNodesPerSubCV, spaceDim);
       for (index_type isubcv = 0; isubcv < numNodesPerCell; isubcv++) {
         for (int inode = 0; inode < numNodesPerSubCV; inode++){
           for (int idim = 0; idim < spaceDim; idim++){
               cellCVCoords(isubcv,inode,idim) = subCVCoords(icell,isubcv,inode,idim);
           }
         }
       }

      // calculate Jacobian at side centers
       Intrepid2::FieldContainer<Scalar> subCVsideJacobian(numNodesPerCell, 1, spaceDim, spaceDim);
       Intrepid2::CellTools<Scalar>::setJacobian(subCVsideJacobian, refSidePoints, cellCVCoords, *(subCVCellTopo_));

      // Get subcontrol volume side normals
       Intrepid2::FieldContainer<Scalar> normals(numNodesPerCell, 1, spaceDim);
       Intrepid2::CellTools<Scalar>::getPhysicalSideNormals(normals,subCVsideJacobian,iside,*(subCVCellTopo_));

       for (index_type inode = 0; inode < numNodesPerCell; inode++) {
          for (index_type idim = 0; idim < spaceDim; idim++){
             cubWeights(icell,inode,idim) = normals(inode,0,idim)*pow(2,spaceDim-1);
          }
       }

       if (primaryCellTopo_->getKey()==shards::Hexahedron<8>::key)
         {
           // first set of side midpoints and normals (above) associated with
           // primary cell edges 0-7 are obtained from side 1 of the
           // eight control volumes

           // second set of side midpoints and normals associated with
           // primary cell edges 8-11 are obtained from side 5 of the
           // first four control volumes.
           iside = 5;
           for (int i=0; i<numNodesPerSide; i++){
              sideNodes(i) = subCVCellTopo_->getNodeMap(spaceDim-1,iside,i);
           }
           int numExtraSides = numEdgesPerCell - numNodesPerCell;
             for (int icount=0; icount < numExtraSides; icount++){
                int iedge = icount + numNodesPerCell;
                for(index_type idim=0; idim < spaceDim; idim++){
                    Scalar midpt = 0.0;
                    for (int i=0; i<numNodesPerSide; i++){
                        midpt += subCVCoords(icell,icount,sideNodes(i),idim)/numNodesPerSide;
                    }
                    cubPoints(icell,iedge,idim) = midpt;
                }
            }

           // Map side center to reference subcell
           iside = 5;
           Intrepid2::CellTools<Scalar>::mapToReferenceSubcell(refSidePoints,
                                        sideCenterLocal,
                                        spaceDim-1, iside, *(subCVCellTopo_));

           // calculate Jacobian at side centers
           Intrepid2::CellTools<Scalar>::setJacobian(subCVsideJacobian, refSidePoints, cellCVCoords, *(subCVCellTopo_));

           // Get subcontrol volume side normals
           Intrepid2::CellTools<Scalar>::getPhysicalSideNormals(normals,subCVsideJacobian,iside,*(subCVCellTopo_));

           for (int icount = 0; icount < numExtraSides; icount++) {
              int iedge = icount + numNodesPerCell;
              for (index_type idim = 0; idim < spaceDim; idim++){
                  cubWeights(icell,iedge,idim) = normals(icount,0,idim)*pow(2,spaceDim-1);
              }
           }

         } // end if Hex

        if (primaryCellTopo_->getKey()==shards::Tetrahedron<4>::key)
          {
           // first set of side midpoints and normals associated with
           // primary cell edges 0-2 are obtained from side 1 of the
           // eight control volumes (above)

           // second set of side midpoints and normals associated with
           // primary cell edges 3-5 are obtained from side 5 of the
           // first three control volumes.
           iside = 5;
           for (int i=0; i<numNodesPerSide; i++){
              sideNodes(i) = subCVCellTopo_->getNodeMap(spaceDim-1,iside,i);
           }
           for (int icount=0; icount < 3; icount++){
                int iedge = icount + 3;
                for(index_type idim=0; idim < spaceDim; idim++){
                    Scalar midpt = 0.0;
                    for (int i=0; i<numNodesPerSide; i++){
                        midpt += subCVCoords(icell,icount,sideNodes(i),idim)/numNodesPerSide;
                    }
                    cubPoints(icell,iedge,idim) = midpt;
                }
           }

          // Map side center to reference subcell
           iside = 5;
           Intrepid2::CellTools<Scalar>::mapToReferenceSubcell(refSidePoints,
                                        sideCenterLocal,
                                        spaceDim-1, iside, *(subCVCellTopo_));

           // calculate Jacobian at side centers
           Intrepid2::CellTools<Scalar>::setJacobian(subCVsideJacobian, refSidePoints, cellCVCoords, *(subCVCellTopo_));

           // Get subcontrol volume side normals
           Intrepid2::CellTools<Scalar>::getPhysicalSideNormals(normals,subCVsideJacobian,iside,*(subCVCellTopo_));

           for (int icount = 0; icount < 3; icount++) {
              int iedge = icount + 3;
              for (index_type idim = 0; idim < spaceDim; idim++){
                  cubWeights(icell,iedge,idim) = normals(icount,0,idim)*pow(2,spaceDim-1);
              }
           }

       }// if tetrahedron

  } // end loop over cells

} // end getCubature
void CubatureControlVolume<Scalar,ArrayPoint,ArrayWeight>::getCubature(ArrayPoint& cubPoints,
		                                                       ArrayWeight& cubWeights,
                                                                       ArrayPoint& cellCoords) const
{
  // get array dimensions
  int numCells         = cellCoords.dimension(0);
  int numNodesPerCell  = cellCoords.dimension(1);
  int spaceDim         = cellCoords.dimension(2);
  int numNodesPerSubCV = subCVCellTopo_->getNodeCount();

  // get sub-control volume coordinates (one sub-control volume per node of primary cell)
  Intrepid2::FieldContainer<Scalar> subCVCoords(numCells,numNodesPerCell,numNodesPerSubCV,spaceDim);
  Intrepid2::CellTools<Scalar>::getSubCVCoords(subCVCoords,cellCoords,*(primaryCellTopo_));

  // Integration points and weights for calculating sub-control volumes
  Intrepid2::DefaultCubatureFactory<double>  subCVCubFactory;
  int subcvCubDegree = 2;
  Teuchos::RCP<Intrepid2::Cubature<double,Intrepid2::FieldContainer<double>  > > subCVCubature;
  subCVCubature = subCVCubFactory.create(*(subCVCellTopo_), subcvCubDegree);

  int subcvCubDim       = subCVCubature -> getDimension();
  int numSubcvCubPoints = subCVCubature -> getNumPoints();

   // Get numerical integration points and weights
  Intrepid2::FieldContainer<double> subcvCubPoints (numSubcvCubPoints, subcvCubDim);
  Intrepid2::FieldContainer<double> subcvCubWeights(numSubcvCubPoints);

  subCVCubature -> getCubature(subcvCubPoints, subcvCubWeights);

  // Loop over cells
  for (std::size_t icell = 0; icell < numCells; icell++){

    // get sub-control volume centers (integration points)
     Intrepid2::FieldContainer<Scalar> subCVCenter(numNodesPerCell,1,spaceDim);
     Intrepid2::FieldContainer<Scalar> cellCVCoords(numNodesPerCell,numNodesPerSubCV,spaceDim);
     for (int isubcv = 0; isubcv < numNodesPerCell; isubcv++){
       for (int idim = 0; idim < spaceDim; idim++){
          for (int inode = 0; inode < numNodesPerSubCV; inode++){
              subCVCenter(isubcv,0,idim) += subCVCoords(icell,isubcv,inode,idim)/numNodesPerSubCV;
              cellCVCoords(isubcv,inode,idim) = subCVCoords(icell,isubcv,inode,idim);
          }
          cubPoints(icell,isubcv,idim) = subCVCenter(isubcv,0,idim);
        }
     }

   // calculate Jacobian and determinant for each subCV quadrature point
     Intrepid2::FieldContainer<Scalar> subCVJacobian(numNodesPerCell, numSubcvCubPoints, spaceDim, spaceDim);
     Intrepid2::FieldContainer<Scalar> subCVJacobDet(numNodesPerCell, numSubcvCubPoints);
     Intrepid2::CellTools<Scalar>::setJacobian(subCVJacobian, subcvCubPoints, cellCVCoords, *(subCVCellTopo_));
     Intrepid2::CellTools<Scalar>::setJacobianDet(subCVJacobDet, subCVJacobian );

    // fill array with sub control volumes (the sub control volume cell measure)
     for (int inode = 0; inode < numNodesPerCell; inode++){
         Scalar vol = 0;
         for (int ipt = 0; ipt < numSubcvCubPoints; ipt++){
            vol += subcvCubWeights(ipt)*subCVJacobDet(inode,ipt);
         }
         cubWeights(icell,inode) = vol;
     }

 } // end cell loop

} // end getCubature