void DOFVecGradInterpolation<EvalT, Traits>::
 evaluateFields(typename Traits::EvalData workset)
 {
   // This is needed, since evaluate currently sums into
   //for (int i=0; i < grad_val_qp.size() ; i++) grad_val_qp[i] = 0.0;
 
   for (std::size_t cell=0; cell < workset.numCells; ++cell) {
       for (std::size_t qp=0; qp < numQPs; ++qp) {
         for (std::size_t i=0; i<vecDim; i++) {
           for (std::size_t dim=0; dim<numDims; dim++) {
             // For node==0, overwrite. Then += for 1 to numNodes.
             ScalarT& gvqp = grad_val_qp(cell,qp,i,dim);
             gvqp = val_node(cell, 0, i) * GradBF(cell, 0, qp, dim);
             for (std::size_t node= 1 ; node < numNodes; ++node) {
               gvqp += val_node(cell, node, i) * GradBF(cell, node, qp, dim);
               //grad_val_qp(cell,qp,i,dim) += val_node(cell, node, i) * GradBF(cell, node, qp, dim);
           } 
         } 
       } 
     } 
   }
   //  Intrepid::FunctionSpaceTools::evaluate<ScalarT>(grad_val_qp, val_node, GradBF);
 }
void DOFGradInterpolation_noDeriv<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  //Intrepid Version:
  // for (int i=0; i < grad_val_qp.size() ; i++) grad_val_qp[i] = 0.0;
  // Intrepid::FunctionSpaceTools:: evaluate<ScalarT>(grad_val_qp, val_node, GradBF);

  for (std::size_t i=0; i < grad_val_qp.size(); ++i) grad_val_qp(i)=0.0;
  for (int cell=0; cell < workset.numCells; ++cell) 
    for (int qp=0; qp < numQPs; ++qp) 
      for (int dim=0; dim<numDims; dim++) 
        for (int node=0 ; node < numNodes; ++node) 
          grad_val_qp(cell,qp,dim) += val_node(cell, node) * GradBF(cell, node, qp, dim);
}
  void DOFVecGradInterpolation<PHAL::AlbanyTraits::Jacobian, Traits>::
  evaluateFields(typename Traits::EvalData workset)
  {
  int num_dof = val_node(0,0,0).size();
  int neq = num_dof / numNodes;

    for (std::size_t cell=0; cell < workset.numCells; ++cell) {
        for (std::size_t qp=0; qp < numQPs; ++qp) {
          for (std::size_t i=0; i<vecDim; i++) {
            for (std::size_t dim=0; dim<numDims; dim++) {
              // For node==0, overwrite. Then += for 1 to numNodes.
              ScalarT& gvqp = grad_val_qp(cell,qp,i,dim);
              gvqp = FadType(num_dof, val_node(cell, 0, i).val() * GradBF(cell, 0, qp, dim));
              gvqp.fastAccessDx(offset+i) = val_node(cell, 0, i).fastAccessDx(offset+i) * GradBF(cell, 0, qp, dim);
              for (std::size_t node= 1 ; node < numNodes; ++node) {
                gvqp.val() += val_node(cell, node, i).val() * GradBF(cell, node, qp, dim);
                gvqp.fastAccessDx(neq*node+offset+i) += val_node(cell, node, i).fastAccessDx(neq*node+offset+i) * GradBF(cell, node, qp, dim);
            } 
          } 
        } 
      } 
    }
    //  Intrepid::FunctionSpaceTools::evaluate<ScalarT>(grad_val_qp, val_node, GradBF);
  }
void DOFDivInterpolationLevelsXZ<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  PHAL::set(div_val_qp, 0.0);
//#define WEAK_DIV 0
//#if WEAK_DIV
  for (int cell=0; cell < workset.numCells; ++cell) 
    for (int qp=0; qp < numQPs; ++qp) 
      for (int node= 0 ; node < numNodes; ++node) 
        for (int level=0; level < numLevels; ++level) 
          for (int dim=0; dim<numDims; dim++) {
            div_val_qp(cell,qp,level) += val_node(cell,node,level,dim) * GradBF(cell,node,qp,dim);
            //std::cout << "gradbf: " << cell << " " << node << " " << qp << " " << dim << " " << GradBF(cell,node,qp,dim) << std::endl;
            //std::cout << "val_node " << val_node(cell,node,level,dim) << std::endl;

         }
}
void ComputeHierarchicBasis<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{

  // do some work to get the pumi discretization and the apf mesh
  // this is so we can use the pumi mesh database to compute
  // mesh / basis function quantities.
  Teuchos::RCP<Albany::AbstractDiscretization> discretization =
    app->getDiscretization();

  Teuchos::RCP<Albany::PUMIDiscretization> pumiDiscretization =
    Teuchos::rcp_dynamic_cast<Albany::PUMIDiscretization>(discretization);

  Teuchos::RCP<Albany::PUMIMeshStruct> pumiMeshStruct =
    pumiDiscretization->getPUMIMeshStruct();

  // get the element block index
  // this will allow us to index into buckets
  ebIndex = pumiMeshStruct->ebNameToIndex[workset.EBName];

  // get the buckets
  // this is the elements of the apf mesh indexed by
  // buckets[Elem Block Index][Cell Index]
  buckets = pumiDiscretization->getBuckets();
  
  // get the apf mesh
  // this is used for a variety of apf things
  mesh = pumiMeshStruct->getMesh();

  // get the apf heirarchic shape
  // this is used to get shape function values / gradients
  shape = apf::getHierarchic(polynomialOrder);

  for (int cell=0; cell < workset.numCells; ++cell)
  {

    // get the apf objects associated with this cell
    apf::MeshEntity* element = buckets[ebIndex][cell];
    apf::MeshElement* meshElement = apf::createMeshElement(mesh, element);

    for (int qp=0; qp < numQPs; ++qp)
    {
      
      // get the parametric value of the current integration point
      apf::getIntPoint(meshElement, cubatureDegree, qp, point);

      // set the jacobian determinant
      detJ(cell, qp) = apf::getDV(meshElement, point);
      assert( detJ(cell, qp) > 0.0 );

      // get the integration point weight associated with this qp
      double w = apf::getIntWeight(meshElement, cubatureDegree, qp);

      // weight the determinant of the jacobian by the qp weight
      weightedDV(cell, qp) = w * detJ(cell,qp);

      // get the shape function values and gradients at this point
      apf::getBF(shape, meshElement, point, bf);
      apf::getGradBF(shape, meshElement, point, gbf);

      for (int node=0; node < numNodes; ++node)
      {
        BF(cell, node, qp) = bf[node];
        wBF(cell, node, qp) = weightedDV(cell, qp) * bf[node];
        for (int dim=0; dim < numDims; ++dim)
        {
          GradBF(cell, node, qp, dim) = gbf[node][dim];
          wGradBF(cell, node, qp, dim) = weightedDV(cell,qp) * gbf[node][dim];
        }
      }

    }

    // do some memory cleanup to keep everyone happy
    apf::destroyMeshElement(meshElement);

  }

}
void DislocationDensity<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{

  Teuchos::SerialDenseMatrix<int, double> A;
  Teuchos::SerialDenseMatrix<int, double> X;
  Teuchos::SerialDenseMatrix<int, double> B;
  Teuchos::SerialDenseSolver<int, double> solver;

  A.shape(numNodes,numNodes);
  X.shape(numNodes,numNodes);
  B.shape(numNodes,numNodes);
  
  // construct Identity for RHS
  for (int i = 0; i < numNodes; ++i)
    B(i,i) = 1.0;

  for (int i=0; i < G.size() ; i++) G[i] = 0.0;

  // construct the node --> point operator
  for (std::size_t cell=0; cell < workset.numCells; ++cell)
  {
    for (std::size_t node=0; node < numNodes; ++node) 
      for (std::size_t qp=0; qp < numQPs; ++qp) 
	A(qp,node) = BF(cell,node,qp);
    
    X = 0.0;

    solver.setMatrix( Teuchos::rcp( &A, false) );
    solver.setVectors( Teuchos::rcp( &X, false ), Teuchos::rcp( &B, false ) );

    // Solve the system A X = B to find A_inverse
    int status = 0;
    status = solver.factor();
    status = solver.solve();

    // compute nodal Fp
    nodalFp.initialize(0.0);
    for (std::size_t node=0; node < numNodes; ++node) 
      for (std::size_t qp=0; qp < numQPs; ++qp) 
	for (std::size_t i=0; i < numDims; ++i) 
	  for (std::size_t j=0; j < numDims; ++j) 
	    nodalFp(node,i,j) += X(node,qp) * Fp(cell,qp,i,j);

    // compute the curl using nodalFp
    curlFp.initialize(0.0);
    for (std::size_t node=0; node < numNodes; ++node) 
    {
      for (std::size_t qp=0; qp < numQPs; ++qp) 
      {
	curlFp(qp,0,0) += nodalFp(node,0,2) * GradBF(cell,node,qp,1) - nodalFp(node,0,1) * GradBF(cell,node,qp,2);
	curlFp(qp,0,1) += nodalFp(node,1,2) * GradBF(cell,node,qp,1) - nodalFp(node,1,1) * GradBF(cell,node,qp,2);
	curlFp(qp,0,2) += nodalFp(node,2,2) * GradBF(cell,node,qp,1) - nodalFp(node,2,1) * GradBF(cell,node,qp,2);

	curlFp(qp,1,0) += nodalFp(node,0,0) * GradBF(cell,node,qp,2) - nodalFp(node,0,2) * GradBF(cell,node,qp,0);
	curlFp(qp,1,1) += nodalFp(node,1,0) * GradBF(cell,node,qp,2) - nodalFp(node,1,2) * GradBF(cell,node,qp,0);
	curlFp(qp,1,2) += nodalFp(node,2,0) * GradBF(cell,node,qp,2) - nodalFp(node,2,2) * GradBF(cell,node,qp,0);

	curlFp(qp,2,0) += nodalFp(node,0,1) * GradBF(cell,node,qp,0) - nodalFp(node,0,0) * GradBF(cell,node,qp,1);
	curlFp(qp,2,1) += nodalFp(node,1,1) * GradBF(cell,node,qp,0) - nodalFp(node,1,0) * GradBF(cell,node,qp,1);
	curlFp(qp,2,2) += nodalFp(node,2,1) * GradBF(cell,node,qp,0) - nodalFp(node,2,0) * GradBF(cell,node,qp,1);
      }
    }

    for (std::size_t qp=0; qp < numQPs; ++qp) 
      for (std::size_t i=0; i < numDims; ++i) 
	for (std::size_t j=0; j < numDims; ++j) 
	  for (std::size_t k=0; k < numDims; ++k) 
	    G(cell,qp,i,j) += Fp(cell,qp,i,k) * curlFp(qp,k,j);
  }
}
  void BasalFrictionHeat<EvalT,Traits,Type>::
  evaluateFields(typename Traits::EvalData d)
  {
    // Zero out, to avoid leaving stuff from previous workset!
    for (int cell = 0; cell < d.numCells; ++cell)
      for (int node = 0; node < numCellNodes; ++node)
        basalFricHeat(cell,node) = 0.;

    const double scyr (3.1536e7);  // [s/yr];

    if (d.sideSets->find(basalSideName)==d.sideSets->end())
      return;

    const std::vector<Albany::SideStruct>& sideSet = d.sideSets->at(basalSideName);

    for (auto const& it_side : sideSet)
    {
      // Get the local data of side and cell
      const int cell = it_side.elem_LID;
      const int side = it_side.side_local_id;

      for (int node = 0; node < numSideNodes; ++node)
      {
        basalFricHeat(cell,sideNodes[side][node]) = 0.;
        for (int qp = 0; qp < numSideQPs; ++qp)
        {
          for (int dim = 0; dim < vecDimFO; ++dim)
          {
            basalFricHeat(cell,sideNodes[side][node]) += 1000/scyr * beta(cell,side,qp) * velocity(cell,side,qp,dim) * velocity(cell,side,qp,dim) *
                BF(cell,side,node,qp) * w_measure(cell,side,qp);
          }
        }
      }
    }

    if (haveSUPG)
    {
      ScalarT wSUPG;

      // Zero out, to avoid leaving stuff from previous workset!
      for (int cell = 0; cell < d.numCells; ++cell)
        for (int node = 0; node < numCellNodes; ++node)
          basalFricHeatSUPG(cell,node) = 0.;

      const std::vector<Albany::SideStruct>& sideSetSUPG = d.sideSets->at(basalSideName);

      for (auto const& iter_side : sideSetSUPG)
      {
        // Get the local data of side and cell
        const int cell = iter_side.elem_LID;
        const int side = iter_side.side_local_id;

        for (int node = 0; node < numSideNodes; ++node)
        {
          basalFricHeatSUPG(cell,sideNodes[side][node]) = 0.;
          for (int qp = 0; qp < numSideQPs; ++qp)
          {
            wSUPG = 0.001 / scyr * // [km^2 s^{-1}]  TODO:check dimension
                (velocity(cell,side,qp,0)*GradBF(cell,side,node,qp,0) + velocity(cell,side,qp,1)*GradBF(cell,side,node,qp,1))*w_measure(cell,side,qp);

            basalFricHeatSUPG(cell,sideNodes[side][node]) += 1000 / scyr * beta(cell,side,qp) *
                (velocity(cell,side,qp,0) * velocity(cell,side,qp,0) + velocity(cell,side,qp,1) * velocity(cell,side,qp,1)) * wSUPG;
          }
        }
      }
    }
  }