void CubatureTensor<Scalar,ArrayPoint,ArrayWeight>::getCubature(ArrayPoint  & cubPoints,
                                                                ArrayWeight & cubWeights) const {
  int numCubPoints = getNumPoints();
  int cubDim       = getDimension();
  // check size of cubPoints and cubWeights
  TEUCHOS_TEST_FOR_EXCEPTION( ( ( (int)cubPoints.size() < numCubPoints*cubDim ) || ( (int)cubWeights.size() < numCubPoints ) ),
                      std::out_of_range,
                      ">>> ERROR (CubatureTensor): Insufficient space allocated for cubature points or weights.");

  unsigned numCubs   = cubatures_.size();
  std::vector<unsigned> numLocPoints(numCubs);
  std::vector<unsigned> locDim(numCubs);
  std::vector< FieldContainer<Scalar> > points(numCubs);
  std::vector< FieldContainer<Scalar> > weights(numCubs);

  // extract required points and weights
  for (unsigned i=0; i<numCubs; i++) {

    numLocPoints[i] = cubatures_[i]->getNumPoints();
    locDim[i]       = cubatures_[i]->getDimension();
    points[i].resize(numLocPoints[i], locDim[i]);
    weights[i].resize(numLocPoints[i]);

    // cubPoints and cubWeights are used here only for temporary data retrieval
    cubatures_[i]->getCubature(cubPoints, cubWeights);
    for (unsigned pt=0; pt<numLocPoints[i]; pt++) {
      for (unsigned d=0; d<locDim[i]; d++) {
        points[i](pt,d) = cubPoints(pt,d);
        weights[i](pt)  = cubWeights(pt);
      }
    }

  }

  // reset all weights to 1.0
  for (int i=0; i<numCubPoints; i++) {
      cubWeights(i) = (Scalar)1.0;
  }

  // fill tensor-product cubature
  int globDimCounter = 0;
  int shift          = 1;
  for (unsigned i=0; i<numCubs; i++) {

    for (int j=0; j<numCubPoints; j++) {
      /* int itmp = ((j*shift) % numCubPoints) + (j / (numCubPoints/shift)); // equivalent, but numerically unstable */
      int itmp = (j % (numCubPoints/shift))*shift + (j / (numCubPoints/shift));
      for (unsigned k=0; k<locDim[i]; k++) {
        cubPoints(itmp , globDimCounter+k) = points[i](j % numLocPoints[i], k);
      }
      cubWeights( itmp ) *= weights[i](j % numLocPoints[i]);
    }
    
    shift *= numLocPoints[i];
    globDimCounter += locDim[i];
  }

} // end getCubature
void CubatureDirect<Scalar,ArrayPoint,ArrayWeight>::getCubatureData(ArrayPoint  &                cubPoints,
                                                                    ArrayWeight &                cubWeights,
                                                                    const CubatureTemplate *     cubData) const {

  int numCubPoints = getNumPoints();
  int cellDim      = getDimension();
  // check size of cubPoints and cubWeights
  TEUCHOS_TEST_FOR_EXCEPTION( ( ( (int)cubPoints.size() < numCubPoints*cellDim ) || ( (int)cubWeights.size() < numCubPoints ) ),
                      std::out_of_range,
                      ">>> ERROR (CubatureDirect): Insufficient space allocated for cubature points or weights.");

  for (int pointId = 0; pointId < numCubPoints; pointId++) {
    for (int dim = 0; dim < cellDim; dim++) {
      cubPoints(pointId,dim) = cubData->points_[pointId][dim];
    }
    cubWeights(pointId) = cubData->weights_[pointId];
  }
} // end getCubatureData
void CubaturePolylib<Scalar,ArrayPoint,ArrayWeight>::getCubature(ArrayPoint & cubPoints, ArrayWeight & cubWeights) const {
  int numCubPoints = getNumPoints();
  int cellDim      = getDimension();
  // check size of cubPoints and cubWeights
  TEUCHOS_TEST_FOR_EXCEPTION( ( ( (int)cubPoints.size() < numCubPoints*cellDim ) || ( (int)cubWeights.size() < numCubPoints ) ),
                      std::out_of_range,
                      ">>> ERROR (CubatureDirect): Insufficient space allocated for cubature points or weights.");

  // temporary storage
  FieldContainer<Scalar> z(numCubPoints);
  FieldContainer<Scalar> w(numCubPoints);

  // run Polylib routines
  switch (poly_type_) {
    case PL_GAUSS:
      IntrepidPolylib::zwgj(&z[0], &w[0], numCubPoints, alpha_, beta_);
      break;
    case PL_GAUSS_RADAU_LEFT:
      IntrepidPolylib::zwgrjm(&z[0], &w[0], numCubPoints, alpha_, beta_);
      break;
    case PL_GAUSS_RADAU_RIGHT:
      IntrepidPolylib::zwgrjp(&z[0], &w[0], numCubPoints, alpha_, beta_);
      break;
    case PL_GAUSS_LOBATTO:
      IntrepidPolylib::zwglj(&z[0], &w[0], numCubPoints, alpha_, beta_);
      break;
    default:
      TEUCHOS_TEST_FOR_EXCEPTION((1),
                         std::invalid_argument,
                         ">>> ERROR (CubaturePolylib): Unknown point type argument.");
  }

  // fill input arrays
  for (int pointId = 0; pointId < numCubPoints; pointId++) {
    for (int dim = 0; dim < cellDim; dim++) {
      cubPoints(pointId,dim) = z[pointId];
    }
    cubWeights(pointId) = w[pointId];
  }
} // end getCubature