void DOFInterpolation<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  //Intrepid version:
  // for (int i=0; i < val_qp.size() ; i++) val_qp[i] = 0.0;
  // Intrepid::FunctionSpaceTools:: evaluate<ScalarT>(val_qp, val_node, BF);
  for (int cell=0; cell < workset.numCells; ++cell) {
    for (int qp=0; qp < numQPs; ++qp) {
      if (2==numRank) {
        typename PHAL::Ref<ScalarT>::type vqp = val_qp(cell,qp) = 0;
        for (int node=0; node < numNodes; ++node) {
          vqp += val_node(cell, node) * BF(cell, node, qp);
        }
      } else {
        for (int level=0; level < numLevels; ++level) {
          typename PHAL::Ref<ScalarT>::type vqp = val_qp(cell,qp,level);
          vqp = 0;
          for (int node=0; node < numNodes; ++node) {
            vqp += val_node(cell, node, level) * BF(cell, node, qp);
          }
        }
      } 
    }
  }

}
void val_push(struct value_stack *vg_ptr,struct data *d,int freed) {
   
  if(ISVAL_EMPTY ){

    vg_ptr->head = val_node();
    vg_ptr->head->val = d;
    vg_ptr->head->freed = freed;

    vg_ptr->tail = vg_ptr->head;
    vg_ptr->tail->next = (struct value_stack *) NULL;

  }

  else {

    vg_ptr->tail->next = val_node(); //assign it to the next of the tail

    vg_ptr->tail->next->val = d;
  
    vg_ptr->tail->next->freed=freed;
   
    vg_ptr->tail->next->next = (struct value_stack*) NULL;

    vg_ptr->tail = vg_ptr->tail->next;
    

  }

}   
void DOFInterpolation<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  //Intrepid version:
  // for (int i=0; i < val_qp.size() ; i++) val_qp[i] = 0.0;
  // Intrepid::FunctionSpaceTools:: evaluate<ScalarT>(val_qp, val_node, BF);

  for (std::size_t cell=0; cell < workset.numCells; ++cell) {
    for (std::size_t qp=0; qp < numQPs; ++qp) {
      ScalarT& vqp = val_qp(cell,qp);
      vqp = val_node(cell, 0) * BF(cell, 0, qp);
      for (std::size_t node=1; node < numNodes; ++node) {
        vqp += val_node(cell, node) * BF(cell, node, qp);
      }
    }
  }
}
KOKKOS_INLINE_FUNCTION
void FEInterpolation<EvalT, Traits>::operator () (const int i) const
{  
  for (PHX::index_size_type qp = 0; qp < num_qp; ++qp) {
    val_qp(i,qp) = 0.0;

    for (PHX::index_size_type dim = 0; dim < num_dim; ++dim)
      val_grad_qp(i,qp,dim) = 0.0;

    // Sum nodal contributions to qp
    for (PHX::index_size_type node = 0; node < num_nodes; ++node) {
      val_qp(i,qp) += phi(qp, node) * val_node(i,node);
      for (PHX::index_size_type dim = 0; dim < num_dim; ++dim)
	val_grad_qp(i,qp,dim) += grad_phi(qp, node, dim) * val_node(i,node);
    }
  }
}
//**********************************************************************
PHX_EVALUATE_FIELDS(FEInterpolation,cell_data)
{ 

  std::vector<Element_Linear2D>::iterator cell_it = cell_data.begin;

  // Loop over number of cells
  for (std::size_t cell = 0; cell < cell_data.num_cells; ++cell) {
    
    const shards::Array<double,shards::NaturalOrder,QuadPoint,Node>& phi = 
      cell_it->basisFunctions();

    const shards::Array<double,shards::NaturalOrder,QuadPoint,Node,Dim>& 
      grad_phi = cell_it->basisFunctionGradientsRealSpace();

    // Loop over quad points of cell
    for (int qp = 0; qp < num_qp; ++qp) {
      
      val_qp(cell,qp) = 0.0;

      for (int dim = 0; dim < num_dim; ++dim)
	val_grad_qp(cell,qp,dim) = 0.0;

      // Sum nodal contributions to qp
      for (int node = 0; node < num_nodes; ++node) {

	val_qp(cell,qp) += phi(qp,node) * val_node(cell,node);
	
	for (int dim = 0; dim < num_dim; ++dim)
	  val_grad_qp(cell,qp,dim) += 
	    grad_phi(qp,node,dim) * val_node(cell,node);
	
      }
    }
    
    ++cell_it;
 
  }
    
//   std::cout << "FEINterpolation: val_node" << std::endl;
//   val_node.print(std::cout,true);
//   std::cout << "FEINterpolation: val_qp" << std::endl;
//   val_qp.print(std::cout,true);
//   std::cout << "FEINterpolation: val_grad_qp" << std::endl;
//   val_grad_qp.print(std::cout,true);

}
void DOFTensorInterpolationBase<EvalT, Traits, ScalarT>::
evaluateFields(typename Traits::EvalData workset)
{
  for (std::size_t cell=0; cell < workset.numCells; ++cell) {
    for (std::size_t qp=0; qp < numQPs; ++qp) {
      for (std::size_t i=0; i<vecDim; i++) {
        for (std::size_t j=0; j<vecDim; j++) {
          // Zero out for node==0; then += for node = 1 to numNodes
          typename PHAL::Ref<ScalarT>::type vqp = val_qp(cell,qp,i,j);
          vqp = val_node(cell, 0, i, j) * BF(cell, 0, qp);
          for (std::size_t node=1; node < numNodes; ++node) {
            vqp += val_node(cell, node, i, j) * BF(cell, node, qp);
          }
        }
      }
    }
  }
}
  void SurfaceScalarGradientOperator<EvalT, Traits>::
  evaluateFields(typename Traits::EvalData workset)
  {
    Intrepid2::Vector<MeshScalarT> Parent_Grad_plus(3);
    Intrepid2::Vector<MeshScalarT> Parent_Grad_minor(3);

    for (int cell=0; cell < workset.numCells; ++cell) {
      for (int pt=0; pt < numQPs; ++pt) {

        Intrepid2::Tensor<MeshScalarT> gBasis(3, refDualBasis,cell, pt,0,0);

        Intrepid2::Vector<MeshScalarT> N(3, refNormal,cell, pt,0);

        gBasis = Intrepid2::transpose(gBasis);

        // in-plane (parallel) contribution
        for (int node(0); node < numPlaneNodes; ++node) {
          int topNode = node + numPlaneNodes;

          // the parallel-to-the-plane term
          for (int i(0); i < numPlaneDims; ++i ){
            Parent_Grad_plus(i) = 0.5*refGrads(node, pt, i);
            Parent_Grad_minor(i) = 0.5*refGrads(node, pt, i);
          }

          // the orthogonal-to-the-plane term
          MeshScalarT invh = 1./ thickness;
          Parent_Grad_plus(numPlaneDims) = invh * refValues(node,pt);
          Parent_Grad_minor(numPlaneDims) = -invh * refValues(node,pt);

          // Mapping from parent to the physical domain
          Intrepid2::Vector<MeshScalarT> Transformed_Grad_plus(Intrepid2::dot(gBasis, Parent_Grad_plus));
          Intrepid2::Vector<MeshScalarT> Transformed_Grad_minor(Intrepid2::dot(gBasis,Parent_Grad_minor));

          // assign components to MDfield ScalarGrad
          for (int j(0); j < numDims; ++j ){
            surface_Grad_BF(cell, topNode, pt, j) = Transformed_Grad_plus(j);
            surface_Grad_BF(cell, node, pt, j) = Transformed_Grad_minor(j);
          }
        }
      }
    }

    for (int cell=0; cell < workset.numCells; ++cell) {
      for (int pt=0; pt < numQPs; ++pt) {
        for (int k(0); k< numDims; ++k){
          grad_val_qp(cell, pt, k) = 0;
          for (int node(0); node < numNodes; ++node) {
            grad_val_qp(cell, pt, k) += surface_Grad_BF(cell, node, pt, k)*
              val_node(cell,node);
          }
        }
      }
    }

  }
void DOFVecInterpolationLevels<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  PHAL::set(val_qp, 0.0);
  for (int cell=0; cell < workset.numCells; ++cell) 
    for (int qp=0; qp < numQPs; ++qp) 
      for (int node=0; node < numNodes; ++node) 
        for (int level=0; level < numLevels; ++level) 
          for (int dim=0; dim < numDims; ++dim) 
            val_qp(cell,qp,level,dim) += val_node(cell,node,level,dim) * BF(cell,node,qp);
}
struct value_stack *val_initialise(void){

  struct value_stack *vg_ptr;

  vg_ptr = val_node();
  
  vg_ptr->head = (struct value_stack *)NULL;
  vg_ptr->tail = (struct value_stack *)NULL;
  vg_ptr->val_ptr = &val_operations;
  return vg_ptr;
}
void DOFInterpolation<PHAL::AlbanyTraits::Jacobian, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  //Intrepid version:
  // for (int i=0; i < val_qp.size() ; i++) val_qp[i] = 0.0;
  // Intrepid::FunctionSpaceTools:: evaluate<ScalarT>(val_qp, val_node, BF);

  int num_dof = val_node(0,0).size();
  int neq = num_dof / numNodes;

  for (std::size_t cell=0; cell < workset.numCells; ++cell) {
    for (std::size_t qp=0; qp < numQPs; ++qp) {
      ScalarT& vqp = val_qp(cell,qp);
      vqp = FadType(num_dof, val_node(cell, 0).val() * BF(cell, 0, qp));
      if (num_dof) vqp.fastAccessDx(offset) = val_node(cell, 0).fastAccessDx(offset) * BF(cell, 0, qp);
      for (std::size_t node=1; node < numNodes; ++node) {
        vqp.val() += val_node(cell, node).val() * BF(cell, node, qp);
        if (num_dof) vqp.fastAccessDx(neq*node+offset) += val_node(cell, node).fastAccessDx(neq*node+offset) * BF(cell, node, qp);
      }
    }
  }
}
void DOFGradInterpolation_noDeriv<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  //Intrepid Version:
  // for (int i=0; i < grad_val_qp.size() ; i++) grad_val_qp[i] = 0.0;
  // Intrepid::FunctionSpaceTools:: evaluate<ScalarT>(grad_val_qp, val_node, GradBF);

  for (std::size_t i=0; i < grad_val_qp.size(); ++i) grad_val_qp(i)=0.0;
  for (int cell=0; cell < workset.numCells; ++cell) 
    for (int qp=0; qp < numQPs; ++qp) 
      for (int dim=0; dim<numDims; dim++) 
        for (int node=0 ; node < numNodes; ++node) 
          grad_val_qp(cell,qp,dim) += val_node(cell, node) * GradBF(cell, node, qp, dim);
}
 void DOFVecGradInterpolation<EvalT, Traits>::
 evaluateFields(typename Traits::EvalData workset)
 {
   // This is needed, since evaluate currently sums into
   //for (int i=0; i < grad_val_qp.size() ; i++) grad_val_qp[i] = 0.0;
 
   for (std::size_t cell=0; cell < workset.numCells; ++cell) {
       for (std::size_t qp=0; qp < numQPs; ++qp) {
         for (std::size_t i=0; i<vecDim; i++) {
           for (std::size_t dim=0; dim<numDims; dim++) {
             // For node==0, overwrite. Then += for 1 to numNodes.
             ScalarT& gvqp = grad_val_qp(cell,qp,i,dim);
             gvqp = val_node(cell, 0, i) * GradBF(cell, 0, qp, dim);
             for (std::size_t node= 1 ; node < numNodes; ++node) {
               gvqp += val_node(cell, node, i) * GradBF(cell, node, qp, dim);
               //grad_val_qp(cell,qp,i,dim) += val_node(cell, node, i) * GradBF(cell, node, qp, dim);
           } 
         } 
       } 
     } 
   }
   //  Intrepid::FunctionSpaceTools::evaluate<ScalarT>(grad_val_qp, val_node, GradBF);
 }
  void DOFVecGradInterpolation<PHAL::AlbanyTraits::Jacobian, Traits>::
  evaluateFields(typename Traits::EvalData workset)
  {
  int num_dof = val_node(0,0,0).size();
  int neq = num_dof / numNodes;

    for (std::size_t cell=0; cell < workset.numCells; ++cell) {
        for (std::size_t qp=0; qp < numQPs; ++qp) {
          for (std::size_t i=0; i<vecDim; i++) {
            for (std::size_t dim=0; dim<numDims; dim++) {
              // For node==0, overwrite. Then += for 1 to numNodes.
              ScalarT& gvqp = grad_val_qp(cell,qp,i,dim);
              gvqp = FadType(num_dof, val_node(cell, 0, i).val() * GradBF(cell, 0, qp, dim));
              gvqp.fastAccessDx(offset+i) = val_node(cell, 0, i).fastAccessDx(offset+i) * GradBF(cell, 0, qp, dim);
              for (std::size_t node= 1 ; node < numNodes; ++node) {
                gvqp.val() += val_node(cell, node, i).val() * GradBF(cell, node, qp, dim);
                gvqp.fastAccessDx(neq*node+offset+i) += val_node(cell, node, i).fastAccessDx(neq*node+offset+i) * GradBF(cell, node, qp, dim);
            } 
          } 
        } 
      } 
    }
    //  Intrepid::FunctionSpaceTools::evaluate<ScalarT>(grad_val_qp, val_node, GradBF);
  }
void DOFDivInterpolationLevelsXZ<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  PHAL::set(div_val_qp, 0.0);
//#define WEAK_DIV 0
//#if WEAK_DIV
  for (int cell=0; cell < workset.numCells; ++cell) 
    for (int qp=0; qp < numQPs; ++qp) 
      for (int node= 0 ; node < numNodes; ++node) 
        for (int level=0; level < numLevels; ++level) 
          for (int dim=0; dim<numDims; dim++) {
            div_val_qp(cell,qp,level) += val_node(cell,node,level,dim) * GradBF(cell,node,qp,dim);
            //std::cout << "gradbf: " << cell << " " << node << " " << qp << " " << dim << " " << GradBF(cell,node,qp,dim) << std::endl;
            //std::cout << "val_node " << val_node(cell,node,level,dim) << std::endl;

         }
}