//**********************************************************************
PHX_EVALUATE_FIELDS(NeumannResidual,workset)
{ 
  residual.deep_copy(ScalarT(0.0));

  for (std::size_t cell = 0; cell < workset.num_cells; ++cell) {
    for (std::size_t ip = 0; ip < num_ip; ++ip) {
      normal_dot_flux(cell,ip) = ScalarT(0.0);
      for (std::size_t dim = 0; dim < num_dim; ++dim) {
	normal_dot_flux(cell,ip) += normal(cell,ip,dim) * flux(cell,ip,dim); 
      }
    }
  }

  // const Intrepid::FieldContainer<double> & weighted_basis = workset.bases[basis_index]->weighted_basis;
  const Teuchos::RCP<const BasisValues2<double> > bv = workset.bases[basis_index];
  for (std::size_t cell = 0; cell < workset.num_cells; ++cell) {
    for (std::size_t basis = 0; basis < residual.dimension(1); ++basis) {
      for (std::size_t qp = 0; qp < num_ip; ++qp) {
        residual(cell,basis) += normal_dot_flux(cell,qp)*bv->weighted_basis_scalar(cell,basis,qp);
      }
    }
  }

  if(workset.num_cells>0)
    Intrepid::FunctionSpaceTools::
      integrate<ScalarT>(residual, normal_dot_flux, 
			 (workset.bases[basis_index])->weighted_basis_scalar, 
			 Intrepid::COMP_BLAS);
}
//**********************************************************************
PHX_EVALUATE_FIELDS(InterfaceResidual,workset)
{ 
  residual.deep_copy(ScalarT(0.0));

  for (std::size_t cell = 0; cell < workset.num_cells; ++cell) {
    for (std::size_t ip = 0; ip < num_ip; ++ip) {
      normal_dot_flux(cell,ip) = ScalarT(0.0);
      for (std::size_t dim = 0; dim < num_dim; ++dim) {
	normal_dot_flux(cell,ip) += normal(cell,ip,dim) * flux(cell,ip,dim); 
      }
    }
  }

  // const Kokkos::DynRankView<double,PHX::Device> & weighted_basis = this->wda(workset).bases[basis_index]->weighted_basis;
  const Teuchos::RCP<const BasisValues2<double> > bv = this->wda(workset).bases[basis_index];
  for (std::size_t cell = 0; cell < workset.num_cells; ++cell) {
    for (std::size_t basis = 0; basis < residual.dimension(1); ++basis) {
      for (std::size_t qp = 0; qp < num_ip; ++qp) {
        residual(cell,basis) += normal_dot_flux(cell,qp)*bv->weighted_basis_scalar(cell,basis,qp);
      }
    }
  }

  if(workset.num_cells>0)
    Intrepid2::FunctionSpaceTools::
      integrate<ScalarT>(residual, normal_dot_flux, 
			 (this->wda(workset).bases[basis_index])->weighted_basis_scalar, 
			 Intrepid2::COMP_CPP);
}
//**********************************************************************
PHX_EVALUATE_FIELDS(Integrator_TransientBasisTimesScalar,workset)
{ 
  if (workset.evaluate_transient_terms) {
    
   // for (int i=0; i < residual.size(); ++i)
   //   residual[i] = 0.0;
    
   Kokkos::deep_copy (residual.get_kokkos_view(), ScalarT(0.0));

    for (std::size_t cell = 0; cell < workset.num_cells; ++cell) {
      for (std::size_t qp = 0; qp < num_qp; ++qp) {
	tmp(cell,qp) = multiplier * scalar(cell,qp);
	for (typename std::vector<PHX::MDField<ScalarT,Cell,IP> >::iterator field = field_multipliers.begin();
	     field != field_multipliers.end(); ++field)
	  tmp(cell,qp) = tmp(cell,qp) * (*field)(cell,qp);  
      }
    }

    if(workset.num_cells>0)
      Intrepid2::FunctionSpaceTools::
        integrate<ScalarT>(residual, tmp, 
			   (this->wda(workset).bases[basis_index])->weighted_basis_scalar, 
			   Intrepid2::COMP_CPP);
  }
}
void StokesFOImplicitThicknessUpdateResid<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  typedef Intrepid::FunctionSpaceTools FST; 

  // Initialize residual to 0.0
  Kokkos::deep_copy(Residual.get_kokkos_view(), ScalarT(0.0));

  Intrepid::FieldContainer<ScalarT> res(numNodes,3);

  double rho_g=rho*g;

  for (std::size_t cell=0; cell < workset.numCells; ++cell) {
    for (int i = 0; i < res.size(); i++) res(i) = 0.0;
    for (std::size_t qp=0; qp < numQPs; ++qp) {
      ScalarT dHdiffdx = 0;//Ugrad(cell,qp,2,0);
      ScalarT dHdiffdy = 0;//Ugrad(cell,qp,2,1);
      for (std::size_t node=0; node < numNodes; ++node) {
        dHdiffdx += (H(cell,node)-H0(cell,node)) * gradBF(cell,node, qp,0);
        dHdiffdy += (H(cell,node)-H0(cell,node)) * gradBF(cell,node, qp,1);
      }

      for (std::size_t node=0; node < numNodes; ++node) {
           res(node,0) += rho_g*dHdiffdx*wBF(cell,node,qp);
           res(node,1) += rho_g*dHdiffdy*wBF(cell,node,qp);
      }
    }
    for (std::size_t node=0; node < numNodes; ++node) {
       Residual(cell,node,0) = res(node,0);
       Residual(cell,node,1) = res(node,1);
    }
  }
}
void FastSolutionTensorInterpolationBase<PHAL::AlbanyTraits::Jacobian, Traits, typename PHAL::AlbanyTraits::Jacobian::ScalarT>::
evaluateFields(typename Traits::EvalData workset)
{
  const int num_dof = this->val_node(0,0,0,0).size();
  const int neq = workset.wsElNodeEqID.dimension(2);
  const auto vecDim = this->vecDim;
  for (std::size_t cell=0; cell < workset.numCells; ++cell) {
    for (std::size_t qp=0; qp < this->numQPs; ++qp) {
      for (std::size_t i=0; i< vecDim; i++) {
        for (std::size_t j=0; j< vecDim; j++) {
          // Zero out for node==0; then += for node = 1 to numNodes
          typename PHAL::Ref<ScalarT>::type vqp = this->val_qp(cell,qp,i,j);

          vqp = this->val_node(cell, 0, i, j) * this->BF(cell, 0, qp);
          vqp = ScalarT(num_dof, this->val_node(cell, 0, i, j).val() * this->BF(cell, 0, qp));
          vqp.fastAccessDx(offset+i*vecDim+j) = this->val_node(cell, 0, i, j).fastAccessDx(offset+i*vecDim+j) * this->BF(cell, 0, qp);
          for (std::size_t node=1; node < this->numNodes; ++node) {
            vqp.val() += this->val_node(cell, node, i, j).val() * this->BF(cell, node, qp);
            vqp.fastAccessDx(neq*node+offset+i*this->vecDim+j)
              += this->val_node(cell, node, i, j).fastAccessDx(neq*node+offset+i*vecDim+j) * this->BF(cell, node, qp);
          }
        }
      }
    }
  }
}
void panzer::ReorderADValues_Evaluator<panzer::Traits::Jacobian, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  // for AD data do a reordering
  for(std::size_t i = 0; i < inFields_.size(); ++i) {

    for(typename PHX::MDField<ScalarT>::size_type j = 0; j < inFields_[i].size(); ++j) {
      // allocated scalar fields
      outFields_[i][j] = ScalarT(dstFromSrcMap_.size(), inFields_[i][j].val());

      ScalarT & outField = outFields_[i][j];
      const ScalarT & inField = inFields_[i][j];

      // the jacobian must be initialized, otherwise its just a value copy
      if(inField.size()>0) {
        // loop over the sensitivity indices: all DOFs on a cell
        outField.resize(dstFromSrcMap_.size());

        // copy jacobian entries correctly reordered
        for(std::size_t k=0;k<dstFromSrcMap_.size();k++) 
          outField.fastAccessDx(k) = inField.fastAccessDx(dstFromSrcMap_[k]);
      }
 
      outField.val() = inField.val();
    }
  }
}
void Gather2DField<PHAL::AlbanyTraits::Residual, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  Teuchos::RCP<const Tpetra_Vector> xT = workset.xT;
  Teuchos::ArrayRCP<const ST> xT_constView = xT->get1dView();


  Kokkos::deep_copy(this->field2D.get_kokkos_view(), ScalarT(0.0));

  const Albany::SideSetList& ssList = *(workset.sideSets);
  Albany::SideSetList::const_iterator it = ssList.find(this->meshPart);

  if (it != ssList.end()) {
    const std::vector<Albany::SideStruct>& sideSet = it->second;

    const Albany::NodalDOFManager& solDOFManager = workset.disc->getOverlapDOFManager("ordinary_solution");

    for (std::size_t iSide = 0; iSide < sideSet.size(); ++iSide) { // loop over the sides on this ws and name

      // Get the data that corresponds to the side
      const int elem_GID = sideSet[iSide].elem_GID;
      const int elem_LID = sideSet[iSide].elem_LID;
      const int elem_side = sideSet[iSide].side_local_id;
      const CellTopologyData_Subcell& side =  this->cell_topo->side[elem_side];
      int numSideNodes = side.topology->node_count;
      const Teuchos::ArrayRCP<Teuchos::ArrayRCP<int> >& nodeID  = workset.wsElNodeEqID[elem_LID];
      for (int i = 0; i < numSideNodes; ++i){
        std::size_t node = side.node[i];
        const Teuchos::ArrayRCP<int>& eqID  = nodeID[node];
        this->field2D(elem_LID,node) = xT_constView[eqID[this->offset]];
      }
    }
  }
}
void panzer::ProjectToEdges<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{ 
  const shards::CellTopology & parentCell = *basis->getCellTopology();
  const int intDegree = basis->order();
  TEUCHOS_ASSERT(intDegree == 1);
  Intrepid2::DefaultCubatureFactory<double,Kokkos::DynRankView<double,PHX::Device>,Kokkos::DynRankView<double,PHX::Device>> quadFactory;
  Teuchos::RCP< Intrepid2::Cubature<double,Kokkos::DynRankView<double,PHX::Device>,Kokkos::DynRankView<double,PHX::Device>> > edgeQuad;

  // Collect the reference edge information. For now, do nothing with the quadPts.
  const unsigned num_edges = parentCell.getEdgeCount();
  std::vector<double> refEdgeWt(num_edges, 0.0);
  for (unsigned e=0; e<num_edges; e++) {
    edgeQuad = quadFactory.create(parentCell.getCellTopologyData(1,e), intDegree);
    const int numQPoints = edgeQuad->getNumPoints();
    Kokkos::DynRankView<double,PHX::Device> quadWts("quadWts",numQPoints);
    Kokkos::DynRankView<double,PHX::Device> quadPts("quadPts",numQPoints,num_dim);
    edgeQuad->getCubature(quadPts,quadWts);
    for (int q=0; q<numQPoints; q++)
      refEdgeWt[e] += quadWts(q);
  }

  // Loop over the edges of the workset cells.
  for (index_t cell = 0; cell < workset.num_cells; ++cell) {
    for (int p = 0; p < num_pts; ++p) {
      result(cell,p) = ScalarT(0.0);
      for (int dim = 0; dim < num_dim; ++dim)
        result(cell,p) += vector_values(cell,p,dim) * tangents(cell,p,dim);
      result(cell,p) *= refEdgeWt[p];
    }
  }

}
void GatherExtruded2DField<PHAL::AlbanyTraits::Residual, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  Teuchos::RCP<const Tpetra_Vector> xT = workset.xT;
  Teuchos::ArrayRCP<const ST> xT_constView = xT->get1dView();

  Kokkos::deep_copy(this->field2D.get_kokkos_view(), ScalarT(0.0));

  const Albany::LayeredMeshNumbering<LO>& layeredMeshNumbering = *workset.disc->getLayeredMeshNumbering();
  const Albany::NodalDOFManager& solDOFManager = workset.disc->getOverlapDOFManager("ordinary_solution");

  int numLayers = layeredMeshNumbering.numLayers;
  this->fieldLevel = (this->fieldLevel < 0) ? numLayers : this->fieldLevel;
  const Teuchos::ArrayRCP<Teuchos::ArrayRCP<GO> >& wsElNodeID  = workset.disc->getWsElNodeID()[workset.wsIndex];

  for (std::size_t cell=0; cell < workset.numCells; ++cell ) {
    const Teuchos::ArrayRCP<GO>& elNodeID = wsElNodeID[cell];

    for (std::size_t node = 0; node < this->numNodes; ++node) {
      LO lnodeId = workset.disc->getOverlapNodeMapT()->getLocalElement(elNodeID[node]);
      LO base_id, ilayer;
      layeredMeshNumbering.getIndices(lnodeId, base_id, ilayer);
      LO inode = layeredMeshNumbering.getId(base_id, this->fieldLevel);

      (this->field2D)(cell,node) = xT_constView[solDOFManager.getLocalDOF(inode, this->offset)];
    }
  }
}
void GatherVerticallyAveragedVelocity<PHAL::AlbanyTraits::Residual, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  Teuchos::RCP<const Tpetra_Vector> xT = Albany::getConstTpetraVector(workset.x);
  Teuchos::ArrayRCP<const ST> xT_constView = xT->get1dView();

  Kokkos::deep_copy(this->averagedVel.get_view(), ScalarT(0.0));

  if (workset.sideSets == Teuchos::null)
      TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Side sets defined in input file but not properly specified on the mesh" << std::endl);

  const Albany::SideSetList& ssList = *(workset.sideSets);
  Albany::SideSetList::const_iterator it = ssList.find(this->meshPart);

  if (it != ssList.end()) {
    const std::vector<Albany::SideStruct>& sideSet = it->second;

    // Loop over the sides that form the boundary condition
    const Teuchos::ArrayRCP<Teuchos::ArrayRCP<GO> >& wsElNodeID  = workset.disc->getWsElNodeID()[workset.wsIndex];
    const Albany::LayeredMeshNumbering<LO>& layeredMeshNumbering = *workset.disc->getLayeredMeshNumbering();
    const Albany::NodalDOFManager& solDOFManager = workset.disc->getOverlapDOFManager("ordinary_solution");

    const Teuchos::ArrayRCP<double>& layers_ratio = layeredMeshNumbering.layers_ratio;
    int numLayers = layeredMeshNumbering.numLayers;

    Teuchos::ArrayRCP<double> quadWeights(numLayers+1); //doing trapezoidal rule
    quadWeights[0] = 0.5*layers_ratio[0]; quadWeights[numLayers] = 0.5*layers_ratio[numLayers-1];
    for(int i=1; i<numLayers; ++i)
      quadWeights[i] = 0.5*(layers_ratio[i-1] + layers_ratio[i]);

    for (std::size_t iSide = 0; iSide < sideSet.size(); ++iSide) { // loop over the sides on this ws and name
      // Get the data that corresponds to the side
      const int elem_GID = sideSet[iSide].elem_GID;
      const int elem_LID = sideSet[iSide].elem_LID;
      const int elem_side = sideSet[iSide].side_local_id;
      const CellTopologyData_Subcell& side =  this->cell_topo->side[elem_side];
      int numSideNodes = side.topology->node_count;

      const Teuchos::ArrayRCP<GO>& elNodeID = wsElNodeID[elem_LID];

      //we only consider elements on the top.
      LO baseId, ilayer;
      for (int i = 0; i < numSideNodes; ++i) {
        std::size_t node = side.node[i];
        LO lnodeId = workset.disc->getOverlapNodeMapT()->getLocalElement(elNodeID[node]);
        layeredMeshNumbering.getIndices(lnodeId, baseId, ilayer);
        std::vector<double> avVel(this->vecDimFO,0);
        for(int il=0; il<numLayers+1; ++il)
        {
          LO inode = layeredMeshNumbering.getId(baseId, il);
          for(int comp=0; comp<this->vecDimFO; ++comp)
            avVel[comp] += xT_constView[solDOFManager.getLocalDOF(inode, comp)]*quadWeights[il];
        }
        for(int comp=0; comp<this->vecDimFO; ++comp)
          this->averagedVel(elem_LID,elem_side,i,comp) = avVel[comp];
      }
    }
  }
}
//**********************************************************************
PHX_EVALUATE_FIELDS(DotProduct,workset)
{
    for (std::size_t cell = 0; cell < workset.num_cells; ++cell) {
        for (int p = 0; p < num_pts; ++p) {
            vec_a_dot_vec_b(cell,p) = ScalarT(0.0);
            for (int dim = 0; dim < num_dim; ++dim)
                vec_a_dot_vec_b(cell,p) += vec_a(cell,p,dim) * vec_b(cell,p,dim);
        }
    }
}
void
ElectrostaticResidual<EvalT, Traits>::evaluateFields(
    typename Traits::EvalData workset)
{
  for (int cell = 0; cell < workset.numCells; ++cell) {
    for (int node = 0; node < num_nodes_; ++node)
      residual_(cell, node) = ScalarT(0);
    for (int pt = 0; pt < num_pts_; ++pt)
      for (int node = 0; node < num_nodes_; ++node)
        for (int i = 0; i < num_dims_; ++i)
          residual_(cell, node) +=
              edisp_(cell, pt, i) * w_grad_bf_(cell, node, pt, i);
  }
}
  void LiquidWaterFraction<EvalT,Traits,Type>::
  evaluateFields(typename Traits::EvalData d)
  {
    const double pow6 = 1e6; //[k^{-2}], k =1000
    //  double pi = atan(1.) * 4.;
    ScalarT phiNode;

    for (std::size_t cell = 0; cell < d.numCells; ++cell)
    {
      for (std::size_t node = 0; node < numNodes; ++node)
      {
        phi(cell,node) =  ( enthalpy(cell,node) < enthalpyHs(cell,node) ) ? ScalarT(0) : pow6 * (enthalpy(cell,node) - enthalpyHs(cell,node)) / (rho_w * L);
      }
    }
  }
void Gather2DField<PHAL::AlbanyTraits::Jacobian, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  Teuchos::RCP<const Tpetra_Vector> xT = workset.xT;
  Teuchos::ArrayRCP<const ST> xT_constView = xT->get1dView();

  if (workset.sideSets == Teuchos::null)
      TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Side sets defined in input file but not properly specified on the mesh" << std::endl);


  const Albany::LayeredMeshNumbering<LO>& layeredMeshNumbering = *workset.disc->getLayeredMeshNumbering();
  int numLayers = workset.disc->getLayeredMeshNumbering()->numLayers;
  this->fieldLevel = (this->fieldLevel < 0) ? numLayers : this->fieldLevel;

  Kokkos::deep_copy(this->field2D.get_kokkos_view(), ScalarT(0.0));

  const Albany::SideSetList& ssList = *(workset.sideSets);
  Albany::SideSetList::const_iterator it = ssList.find(this->meshPart);


  if (it != ssList.end()) {
    const std::vector<Albany::SideStruct>& sideSet = it->second;

    // Loop over the sides that form the boundary condition
    const Teuchos::ArrayRCP<Teuchos::ArrayRCP<GO> >& wsElNodeID  = workset.disc->getWsElNodeID()[workset.wsIndex];
    const Albany::NodalDOFManager& solDOFManager = workset.disc->getOverlapDOFManager("ordinary_solution");

     for (std::size_t iSide = 0; iSide < sideSet.size(); ++iSide) { // loop over the sides on this ws and name

      // Get the data that corresponds to the side
      const int elem_GID = sideSet[iSide].elem_GID;
      const int elem_LID = sideSet[iSide].elem_LID;
      const int elem_side = sideSet[iSide].side_local_id;
      const CellTopologyData_Subcell& side =  this->cell_topo->side[elem_side];
      int numSideNodes = side.topology->node_count;

      const Teuchos::ArrayRCP<GO>& elNodeID = wsElNodeID[elem_LID];
      const Teuchos::ArrayRCP<Teuchos::ArrayRCP<int> >& nodeID  = workset.wsElNodeEqID[elem_LID];

      for (int i = 0; i < numSideNodes; ++i){
        std::size_t node = side.node[i];
        const Teuchos::ArrayRCP<int>& eqID  = nodeID[node];
        this->field2D(elem_LID,node) = FadType(numSideNodes*this->vecDim*(numLayers+1), xT_constView[eqID[this->offset]]);
        this->field2D(elem_LID,node).fastAccessDx(numSideNodes*this->vecDim*this->fieldLevel+this->vecDim*i+this->offset) = workset.j_coeff;
      }
    }
  }
}
//**********************************************************************
PHX_EVALUATE_FIELDS(WeakDirichletResidual,workset)
{ 
  for (index_t cell = 0; cell < workset.num_cells; ++cell) {
    for (std::size_t ip = 0; ip < num_ip; ++ip) {
      normal_dot_flux_plus_pen(cell,ip) = ScalarT(0.0);
      for (std::size_t dim = 0; dim < num_dim; ++dim) {
	normal_dot_flux_plus_pen(cell,ip) += normal(cell,ip,dim) * flux(cell,ip,dim);
      }
      normal_dot_flux_plus_pen(cell,ip) += sigma(cell,ip) * (dof(cell,ip) - value(cell,ip)); 
    }
  }

  if(workset.num_cells>0)
    Intrepid2::FunctionSpaceTools::
      integrate<ScalarT>(residual, normal_dot_flux_plus_pen, 
			 (this->wda(workset).bases[basis_index])->weighted_basis_scalar, 
			 Intrepid2::COMP_CPP);
  
}
PHX_EVALUATE_FIELDS(TestScatter,workset)
{ 
 // for (int i=0; i < scatter_value.size(); ++i)
 //   scatter_value[i] = 0.0;
  Kokkos::deep_copy(scatter_value.get_static_view(), ScalarT(0.0));

  for (index_t cell = 0; cell < workset.num_cells; ++cell) {
    ScalarT sum = 0.0;
    for (std::size_t node = 0; node < num_nodes; ++node) 
       sum += value(cell,node);
    sum = sum / double(num_nodes);

    for (std::size_t node = 0; node < num_nodes; ++node) {
      //unsigned node_GID = *** need to fix this ***;

      scatter_value(cell,node) = 3.0*sum;
    }
  }
}
void DOFInterpolation<PHAL::AlbanyTraits::MPJacobian, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  //Intrepid version:
  // for (int i=0; i < val_qp.size() ; i++) val_qp[i] = 0.0;
  // Intrepid::FunctionSpaceTools:: evaluate<ScalarT>(val_qp, val_node, BF);

  const int num_dof = val_node(0,0).size();
  const int neq = workset.wsElNodeEqID[0][0].size();

  for (std::size_t cell=0; cell < workset.numCells; ++cell) {
    for (std::size_t qp=0; qp < numQPs; ++qp) {
      //ScalarT& vqp = val_qp(cell,qp);
      val_qp(cell,qp) = ScalarT(num_dof, val_node(cell, 0).val() * BF(cell, 0, qp));
      if (num_dof) (val_qp(cell,qp)).fastAccessDx(offset) = val_node(cell, 0).fastAccessDx(offset) * BF(cell, 0, qp);
      for (std::size_t node=1; node < numNodes; ++node) {
        (val_qp(cell,qp)).val() += val_node(cell, node).val() * BF(cell, node, qp);
        if (num_dof) (val_qp(cell,qp)).fastAccessDx(neq*node+offset) += val_node(cell, node).fastAccessDx(neq*node+offset) * BF(cell, node, qp);
      }
    }
  }
}
void DOF_PointField<EvalT,TRAITST>::evaluateFields(typename TRAITST::EvalData workset)
{ 
  // Zero out arrays (intrepid does a sum! 1/17/2012)
  dof_field.deep_copy(ScalarT(0.0));

  // copy coordinates
  for (int i = 0; i < coordinates.dimension_0(); ++i)
    for (int j = 0; j < coordinates.dimension_1(); ++j)
      intrpCoords(i,j) = Sacado::ScalarValue<ScalarT>::eval(coordinates(i,j));

  if(workset.num_cells>0) {
    // evaluate at reference points
    intrepidBasis->getValues(basisRef, intrpCoords, Intrepid2::OPERATOR_VALUE);

    // transfer reference basis values to physical frame values
    Intrepid2::FunctionSpaceTools::
      HGRADtransformVALUE<double>(basis,
				  basisRef);

    // evaluate function at specified points
    Intrepid2::FunctionSpaceTools::
      evaluate<ScalarT>(dof_field,dof_coeff,basis);
  }
}
void GatherExtruded2DField<PHAL::AlbanyTraits::Jacobian, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  Teuchos::RCP<const Tpetra_Vector> xT = workset.xT;
  Teuchos::ArrayRCP<const ST> xT_constView = xT->get1dView();

  Kokkos::deep_copy(this->field2D.get_kokkos_view(), ScalarT(0.0));

  const Albany::LayeredMeshNumbering<LO>& layeredMeshNumbering = *workset.disc->getLayeredMeshNumbering();
  const Albany::NodalDOFManager& solDOFManager = workset.disc->getOverlapDOFManager("ordinary_solution");

  int numLayers = layeredMeshNumbering.numLayers;
  this->fieldLevel = (this->fieldLevel < 0) ? numLayers : this->fieldLevel;
  const Teuchos::ArrayRCP<Teuchos::ArrayRCP<GO> >& wsElNodeID  = workset.disc->getWsElNodeID()[workset.wsIndex];


  for (std::size_t cell=0; cell < workset.numCells; ++cell ) {
    const Teuchos::ArrayRCP<GO>& elNodeID = wsElNodeID[cell];
    const Teuchos::ArrayRCP<Teuchos::ArrayRCP<int> >& nodeID  = workset.wsElNodeEqID[cell];
    const int neq = nodeID[0].size();
    const std::size_t num_dof = neq * this->numNodes;

    for (std::size_t node = 0; node < this->numNodes; ++node) {
      int firstunk = neq * node + this->offset;
      LO lnodeId = workset.disc->getOverlapNodeMapT()->getLocalElement(elNodeID[node]);
      LO base_id, ilayer;
      layeredMeshNumbering.getIndices(lnodeId, base_id, ilayer);
      LO inode = layeredMeshNumbering.getId(base_id, this->fieldLevel);
      typename PHAL::Ref<ScalarT>::type val = (this->field2D)(cell,node);

      val = FadType(neq * this->numNodes, xT_constView[solDOFManager.getLocalDOF(inode, this->offset)]);
      val.setUpdateValue(!workset.ignore_residual);
      val.fastAccessDx(firstunk) = workset.j_coeff;
    }
  }
}
//**********************************************************************
PHX_EVALUATE_FIELDS(Integrator_DivBasisTimesScalar,workset)
{ 
  // zero the reisdual
  residual.deep_copy(ScalarT(0.0));
  
  for (std::size_t cell = 0; cell < workset.num_cells; ++cell) {
    for (std::size_t qp = 0; qp < num_qp; ++qp) {
      ScalarT tmpVar = 1.0;
      for (typename std::vector<PHX::MDField<ScalarT,Cell,IP> >::iterator field = field_multipliers.begin();
           field != field_multipliers.end(); ++field)
        tmpVar = tmpVar * (*field)(cell,qp);  

      // no dimension to loop over for scalar fields
      tmp(cell,qp) = multiplier * tmpVar * scalar(cell,qp);
    }
  }
  
  {
    // const Kokkos::DynRankView<double,PHX::Device> & weighted_div_basis = (this->wda(workset).bases[basis_index])->weighted_div_basis;
    const BasisValues2<double> & bv = *this->wda(workset).bases[basis_index];

    for (std::size_t cell = 0; cell < workset.num_cells; ++cell)
      for (std::size_t basis = 0; basis < num_nodes; ++basis) {
        for (std::size_t qp = 0; qp < num_qp; ++qp)
          residual(cell,basis) += tmp(cell,qp)*bv.weighted_div_basis(cell,basis,qp);
      }
  }
/*
  if(workset.num_cells>0) {
     Intrepid2::FunctionSpaceTools::
       integrate<ScalarT>(residual, tmp, 
                       this->wda(workset).bases[basis_index]->weighted_div_basis, 
		       Intrepid2::COMP_CPP);
  }
*/
}
//**********************************************************************
PHX_EVALUATE_FIELDS(DirichletResidual_EdgeBasis,workset)
{ 
  if(workset.num_cells<=0)
    return;

  residual.deep_copy(ScalarT(0.0));

  if(workset.subcell_dim==1) {
    Intrepid2::CellTools<ScalarT>::getPhysicalEdgeTangents(edgeTan,
                                            pointValues.jac,
                                            this->wda(workset).subcell_index, 
                                           *basis->getCellTopology());

    for(std::size_t c=0;c<workset.num_cells;c++) {
      for(int b=0;b<dof.dimension(1);b++) {
        for(int d=0;d<dof.dimension(2);d++)
          residual(c,b) += (dof(c,b,d)-value(c,b,d))*edgeTan(c,b,d);
      } 
    }
  }
  else if(workset.subcell_dim==2) {
    // we need to compute the tangents on each edge for each cell.
    // how do we do this????
    const shards::CellTopology & parentCell = *basis->getCellTopology();
    int cellDim = parentCell.getDimension();
    int numEdges = dof.dimension(1);

    refEdgeTan = Kokkos::createDynRankView(residual.get_kokkos_view(),"refEdgeTan",numEdges,cellDim);

    for(int i=0;i<numEdges;i++) {
      Kokkos::DynRankView<double,PHX::Device> refEdgeTan_local("refEdgeTan_local",cellDim);
      Intrepid2::CellTools<double>::getReferenceEdgeTangent(refEdgeTan_local, i, parentCell);

      for(int d=0;d<cellDim;d++) 
        refEdgeTan(i,d) = refEdgeTan_local(d);
    }

    // Loop over workset faces and edge points
    for(std::size_t c=0;c<workset.num_cells;c++) {
      for(int pt = 0; pt < numEdges; pt++) {

        // Apply parent cell Jacobian to ref. edge tangent
        for(int i = 0; i < cellDim; i++) {
          edgeTan(c, pt, i) = 0.0;
          for(int j = 0; j < cellDim; j++){
            edgeTan(c, pt, i) +=  pointValues.jac(c, pt, i, j)*refEdgeTan(pt,j);
          }// for j
        }// for i
      }// for pt
    }// for pCell

    for(std::size_t c=0;c<workset.num_cells;c++) {
      for(int b=0;b<dof.dimension(1);b++) {
        for(int d=0;d<dof.dimension(2);d++)
          residual(c,b) += (dof(c,b,d)-value(c,b,d))*edgeTan(c,b,d);
      } 
    }

  }
  else {
    // don't know what to do 
    TEUCHOS_ASSERT(false);
  }

  // loop over residuals scaling by orientation. This gurantees
  // everything is oriented in the "positive" direction, this allows
  // sums acrossed processor to be oriented in the same way (right?)
  for(std::size_t c=0;c<workset.num_cells;c++) {
    for(int b=0;b<dof.dimension(1);b++) {
      residual(c,b) *= dof_orientation(c,b);
    }
  }
}
void panzer::GatherSolution_Epetra<panzer::Traits::Jacobian, Traits,LO,GO>::
evaluateFields(typename Traits::EvalData workset)
{ 
   std::vector<int> LIDs;

   // for convenience pull out some objects from workset
   std::string blockId = workset.block_id;
   const std::vector<std::size_t> & localCellIds = workset.cell_local_ids;

   Teuchos::RCP<Epetra_Vector> x;
   double seed_value = 0.0;
   if (useTimeDerivativeSolutionVector_) {
     x = epetraContainer_->get_dxdt();
     seed_value = workset.alpha;
   }
   else {
     x = epetraContainer_->get_x();
     seed_value = workset.beta;
   }

   // turn off sensitivies: this may be faster if we don't expand the term
   // but I suspect not because anywhere it is used the full complement of
   // sensitivies will be needed anyway.
   if(disableSensitivities_)
      seed_value = 0.0;

   // NOTE: A reordering of these loops will likely improve performance
   //       The "getGIDFieldOffsets may be expensive.  However the
   //       "getElementGIDs" can be cheaper. However the lookup for LIDs
   //       may be more expensive!

   // gather operation for each cell in workset
   for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) {
      std::size_t cellLocalId = localCellIds[worksetCellIndex];

      LIDs = globalIndexer_->getElementLIDs(cellLocalId); 

      // loop over the fields to be gathered
      for(std::size_t fieldIndex=0;
          fieldIndex<gatherFields_.size();fieldIndex++) {
         int fieldNum = fieldIds_[fieldIndex];
         const std::vector<int> & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum);

         if(disableSensitivities_) {
           // loop over basis functions and fill the fields
           for(std::size_t basis=0;basis<elmtOffset.size();basis++) {
             int offset = elmtOffset[basis];
             int lid = LIDs[offset];

             // set the value and seed the FAD object
             (gatherFields_[fieldIndex])(worksetCellIndex,basis) = (*x)[lid];
           }
         }
         else {
           // loop over basis functions and fill the fields
           for(std::size_t basis=0;basis<elmtOffset.size();basis++) {
             int offset = elmtOffset[basis];
             int lid = LIDs[offset];

             // set the value and seed the FAD object
             (gatherFields_[fieldIndex])(worksetCellIndex,basis) = ScalarT(LIDs.size(), (*x)[lid]);
             (gatherFields_[fieldIndex])(worksetCellIndex,basis).fastAccessDx(offset) = seed_value;
           }
         }
      }
   }
}
void
AnisotropicViscoplasticModel<EvalT, Traits>::computeState(
    typename Traits::EvalData workset,
    DepFieldMap               dep_fields,
    FieldMap                  eval_fields)
{
  std::string cauchy_string = (*field_name_map_)["Cauchy_Stress"];
  std::string Fp_string     = (*field_name_map_)["Fp"];
  std::string eqps_string   = (*field_name_map_)["eqps"];
  std::string ess_string    = (*field_name_map_)["ess"];
  std::string kappa_string  = (*field_name_map_)["iso_Hardening"];
  std::string source_string = (*field_name_map_)["Mechanical_Source"];
  std::string F_string      = (*field_name_map_)["F"];
  std::string J_string      = (*field_name_map_)["J"];

  // extract dependent MDFields
  auto def_grad          = *dep_fields[F_string];
  auto J                 = *dep_fields[J_string];
  auto poissons_ratio    = *dep_fields["Poissons Ratio"];
  auto elastic_modulus   = *dep_fields["Elastic Modulus"];
  auto yield_strength    = *dep_fields["Yield Strength"];
  auto hardening_modulus = *dep_fields["Hardening Modulus"];
  auto recovery_modulus  = *dep_fields["Recovery Modulus"];
  auto flow_exp          = *dep_fields["Flow Rule Exponent"];
  auto flow_coeff        = *dep_fields["Flow Rule Coefficient"];
  auto delta_time        = *dep_fields["Delta Time"];

  // extract evaluated MDFields
  auto                  stress = *eval_fields[cauchy_string];
  auto                  Fp     = *eval_fields[Fp_string];
  auto                  eqps   = *eval_fields[eqps_string];
  auto                  ess    = *eval_fields[ess_string];
  auto                  kappa  = *eval_fields[kappa_string];
  PHX::MDField<ScalarT> source;
  if (have_temperature_) { source = *eval_fields[source_string]; }

  // get State Variables
  Albany::MDArray Fpold   = (*workset.stateArrayPtr)[Fp_string + "_old"];
  Albany::MDArray eqpsold = (*workset.stateArrayPtr)[eqps_string + "_old"];

  ScalarT bulk, mu, mubar, K, Y;
  ScalarT Jm23, trace, smag2, smag, f, p, dgam;
  ScalarT sq23(std::sqrt(2. / 3.));

  minitensor::Tensor<ScalarT> F(num_dims_), be(num_dims_), s(num_dims_),
      sigma(num_dims_);
  minitensor::Tensor<ScalarT> N(num_dims_), A(num_dims_), expA(num_dims_),
      Fpnew(num_dims_);
  minitensor::Tensor<ScalarT> I(minitensor::eye<ScalarT>(num_dims_));
  minitensor::Tensor<ScalarT> Fpn(num_dims_), Cpinv(num_dims_), Fe(num_dims_);
  minitensor::Tensor<ScalarT> tau(num_dims_), M(num_dims_);

  for (int cell(0); cell < workset.numCells; ++cell) {
    for (int pt(0); pt < num_pts_; ++pt) {
      bulk = elastic_modulus(cell, pt) /
             (3. * (1. - 2. * poissons_ratio(cell, pt)));
      mu   = elastic_modulus(cell, pt) / (2. * (1. + poissons_ratio(cell, pt)));
      K    = hardening_modulus(cell, pt);
      Y    = yield_strength(cell, pt);
      Jm23 = std::pow(J(cell, pt), -2. / 3.);

      // fill local tensors
      F.fill(def_grad, cell, pt, 0, 0);

      // Mechanical deformation gradient
      auto Fm = minitensor::Tensor<ScalarT>(F);
      if (have_temperature_) {
        // Compute the mechanical deformation gradient Fm based on the
        // multiplicative decomposition of the deformation gradient
        //
        //            F = Fm.Ft => Fm = F.inv(Ft)
        //
        // where Ft is the thermal part of F, given as
        //
        //     Ft = Le * I = exp(alpha * dtemp) * I
        //
        // Le is the thermal stretch and alpha the coefficient of thermal
        // expansion.
        ScalarT dtemp           = temperature_(cell, pt) - ref_temperature_;
        ScalarT thermal_stretch = std::exp(expansion_coeff_ * dtemp);
        Fm /= thermal_stretch;
      }

      // Fpn.fill( &Fpold(cell,pt,int(0),int(0)) );
      for (int i(0); i < num_dims_; ++i) {
        for (int j(0); j < num_dims_; ++j) {
          Fpn(i, j) = ScalarT(Fpold(cell, pt, i, j));
        }
      }

      // compute trial state
      // compute the Kirchhoff stress in the current configuration
      //
      Fe    = Fm * minitensor::inverse(Fpn);
      Cpinv = minitensor::inverse(Fpn) *
              minitensor::transpose(minitensor::inverse(Fpn));
      be         = Fm * Cpinv * minitensor::transpose(Fm);
      ScalarT Je = std::sqrt(minitensor::det(be));
      s          = mu * minitensor::dev(be);
      p          = 0.5 * bulk * (Je * Je - 1.);
      tau        = p * I + s;

      // pull back the Kirchhoff stress to the intermediate configuration
      // this is the Mandel stress
      //
      M = minitensor::transpose(Fe) * tau *
          minitensor::inverse(minitensor::transpose(Fe));

      // check yield condition
      smag = minitensor::norm(s);
      f    = smag - sq23 * (Y + K * eqpsold(cell, pt));

      if (f > 1E-12) {
        // return mapping algorithm
        bool    converged = false;
        ScalarT g         = f;
        ScalarT H         = 0.0;
        ScalarT dH        = 0.0;
        ScalarT alpha     = 0.0;
        ScalarT res       = 0.0;
        int     count     = 0;
        dgam              = 0.0;

        LocalNonlinearSolver<EvalT, Traits> solver;

        std::vector<ScalarT> F(1);
        std::vector<ScalarT> dFdX(1);
        std::vector<ScalarT> X(1);

        F[0]    = f;
        X[0]    = 0.0;
        dFdX[0] = (-2. * mubar) * (1. + H / (3. * mubar));
        while (!converged && count <= 30) {
          count++;
          solver.solve(dFdX, X, F);
          alpha   = eqpsold(cell, pt) + sq23 * X[0];
          H       = K * alpha;
          dH      = K;
          F[0]    = smag - (2. * mubar * X[0] + sq23 * (Y + H));
          dFdX[0] = -2. * mubar * (1. + dH / (3. * mubar));

          res = std::abs(F[0]);
          if (res < 1.e-11 || res / f < 1.E-11) converged = true;

          TEUCHOS_TEST_FOR_EXCEPTION(
              count == 30,
              std::runtime_error,
              std::endl
                  << "Error in return mapping, count = " << count
                  << "\nres = " << res << "\nrelres = " << res / f
                  << "\ng = " << F[0] << "\ndg = " << dFdX[0]
                  << "\nalpha = " << alpha << std::endl);
        }
        solver.computeFadInfo(dFdX, X, F);
        dgam = X[0];

        // plastic direction
        N = (1 / smag) * s;

        // update s
        s -= 2 * mubar * dgam * N;

        // update eqps
        eqps(cell, pt) = alpha;

        // mechanical source
        if (have_temperature_ && delta_time(0) > 0) {
          source(cell, pt) =
              (sq23 * dgam / delta_time(0) * (Y + H + temperature_(cell, pt))) /
              (density_ * heat_capacity_);
        }

        // exponential map to get Fpnew
        A     = dgam * N;
        expA  = minitensor::exp(A);
        Fpnew = expA * Fpn;
        for (int i(0); i < num_dims_; ++i) {
          for (int j(0); j < num_dims_; ++j) {
            Fp(cell, pt, i, j) = Fpnew(i, j);
          }
        }
      } else {
        eqps(cell, pt) = eqpsold(cell, pt);
        if (have_temperature_) source(cell, pt) = 0.0;
        for (int i(0); i < num_dims_; ++i) {
          for (int j(0); j < num_dims_; ++j) { Fp(cell, pt, i, j) = Fpn(i, j); }
        }
      }

      // compute pressure
      p = 0.5 * bulk * (J(cell, pt) - 1. / (J(cell, pt)));

      // compute stress
      sigma = p * I + s / J(cell, pt);
      for (int i(0); i < num_dims_; ++i) {
        for (int j(0); j < num_dims_; ++j) {
          stress(cell, pt, i, j) = sigma(i, j);
        }
      }
    }
  }
}
void XZHydrostatic_Pressure<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  const Eta<EvalT> &E = Eta<EvalT>::self();

  for (int cell=0; cell < workset.numCells; ++cell) {
    for (int node=0; node < numNodes; ++node) {
/*
    	  if(cell == 0 && node == 0){
    		  std::cout << "Etatop = " << E.etatop() <<"\n";
    	  for (int level=0; level < numLevels; ++level) {
    		std::cout << "Here we are level, eta " << level << " " << E.eta(level) << "\n";
    	  }
    	  for (int level=0; level < numLevels; ++level) {
    		std::cout << "Here we are A, B " << level << " " << E.A(level) << "  " << E.B(level) << "\n";
    	  }
    	  }
*/
      for (int level=0; level < numLevels; ++level) {
        Pressure(cell,node,level) = E.A(level)*E.p0() + E.B(level)*Ps(cell,node);
        //std::cout <<"In Pressure "<< " Ps" << Ps(cell,node) <<" workset time" << workset.current_time << "\n";

      }
      //here instead of computing eta, A, B, and pressure at level interfaces directly,
      //averages are used to approx. pressure at level interfaces.
      for (int level=0; level < numLevels; ++level) {
        const ScalarT pm   = level             ? 0.5*( Pressure(cell,node,level) + Pressure(cell,node,level-1) ) : E.ptop();
        const ScalarT pp   = level<numLevels-1 ? 0.5*( Pressure(cell,node,level) + Pressure(cell,node,level+1) ) : ScalarT(Ps(cell,node));
        Pi(cell,node,level) = (pp - pm) /E.delta(level);
      }
    }
  }
}
void ElastoViscoplasticModel<EvalT, Traits>::
computeState(typename Traits::EvalData workset,
    std::map<std::string, Teuchos::RCP<PHX::MDField<ScalarT> > > dep_fields,
    std::map<std::string, Teuchos::RCP<PHX::MDField<ScalarT> > > eval_fields)
{
  std::string cauchy_string = (*field_name_map_)["Cauchy_Stress"];
  std::string Fp_string = (*field_name_map_)["Fp"];
  std::string eqps_string = (*field_name_map_)["eqps"];
  std::string eps_ss_string = (*field_name_map_)["eps_ss"];
  std::string kappa_string = (*field_name_map_)["isotropic_hardening"];
  std::string source_string = (*field_name_map_)["Mechanical_Source"];
  std::string F_string = (*field_name_map_)["F"];
  std::string J_string = (*field_name_map_)["J"];

  // extract dependent MDFields
  PHX::MDField<ScalarT> def_grad_field = *dep_fields[F_string];
  PHX::MDField<ScalarT> J = *dep_fields[J_string];
  PHX::MDField<ScalarT> poissons_ratio = *dep_fields["Poissons Ratio"];
  PHX::MDField<ScalarT> elastic_modulus = *dep_fields["Elastic Modulus"];
  PHX::MDField<ScalarT> yield_strength = *dep_fields["Yield Strength"];
  PHX::MDField<ScalarT> hardening_modulus = *dep_fields["Hardening Modulus"];
  PHX::MDField<ScalarT> recovery_modulus = *dep_fields["Recovery Modulus"];
  PHX::MDField<ScalarT> flow_exp = *dep_fields["Flow Rule Exponent"];
  PHX::MDField<ScalarT> flow_coeff = *dep_fields["Flow Rule Coefficient"];
  PHX::MDField<ScalarT> delta_time = *dep_fields["Delta Time"];

  // extract evaluated MDFields
  PHX::MDField<ScalarT> stress_field = *eval_fields[cauchy_string];
  PHX::MDField<ScalarT> Fp_field = *eval_fields[Fp_string];
  PHX::MDField<ScalarT> eqps_field = *eval_fields[eqps_string];
  PHX::MDField<ScalarT> eps_ss_field = *eval_fields[eps_ss_string];
  PHX::MDField<ScalarT> kappa_field = *eval_fields[kappa_string];
  PHX::MDField<ScalarT> source_field;
  if (have_temperature_) {
    source_field = *eval_fields[source_string];
  }

  // get State Variables
  Albany::MDArray Fp_field_old     = (*workset.stateArrayPtr)[Fp_string + "_old"];
  Albany::MDArray eqps_field_old   = (*workset.stateArrayPtr)[eqps_string + "_old"];
  Albany::MDArray eps_ss_field_old = (*workset.stateArrayPtr)[eps_ss_string + "_old"];
  Albany::MDArray kappa_field_old  = (*workset.stateArrayPtr)[kappa_string + "_old"];

  // define constants
  RealType sq23(std::sqrt(2. / 3.));
  RealType sq32(std::sqrt(3. / 2.));

  // pre-define some tensors that will be re-used below
  Intrepid::Tensor<ScalarT> F(num_dims_), be(num_dims_);
  Intrepid::Tensor<ScalarT> s(num_dims_), sigma(num_dims_);
  Intrepid::Tensor<ScalarT> N(num_dims_), A(num_dims_);
  Intrepid::Tensor<ScalarT> expA(num_dims_), Fpnew(num_dims_);
  Intrepid::Tensor<ScalarT> I(Intrepid::eye<ScalarT>(num_dims_));
  Intrepid::Tensor<ScalarT> Fpn(num_dims_), Cpinv(num_dims_), Fpinv(num_dims_);

  for (std::size_t cell(0); cell < workset.numCells; ++cell) {
    for (std::size_t pt(0); pt < num_pts_; ++pt) {
      ScalarT bulk = elastic_modulus(cell, pt)
          / (3. * (1. - 2. * poissons_ratio(cell, pt)));
      ScalarT mu = elastic_modulus(cell, pt) / (2. * (1. + poissons_ratio(cell, pt)));
      ScalarT Y = yield_strength(cell, pt);
      ScalarT Jm23 = std::pow(J(cell, pt), -2. / 3.);

      // assign local state variables
      //
      //ScalarT kappa = kappa_field(cell,pt);
      ScalarT kappa_old = kappa_field_old(cell,pt);
      ScalarT eps_ss = eps_ss_field(cell,pt);
      ScalarT eps_ss_old = eps_ss_field_old(cell,pt);
      ScalarT eqps_old = eqps_field_old(cell,pt);

      // fill local tensors
      //
      F.fill(&def_grad_field(cell, pt, 0, 0));
      for (std::size_t i(0); i < num_dims_; ++i) {
        for (std::size_t j(0); j < num_dims_; ++j) {
          Fpn(i, j) = ScalarT(Fp_field_old(cell, pt, i, j));
        }
      }

      // compute trial state
      // compute the Kirchhoff stress in the current configuration
      //
      Cpinv = Intrepid::inverse(Fpn) * Intrepid::transpose(Intrepid::inverse(Fpn));
      be = Jm23 * F * Cpinv * Intrepid::transpose(F);
      s = mu * Intrepid::dev(be);
      ScalarT smag = Intrepid::norm(s);
      ScalarT mubar = Intrepid::trace(be) * mu / (num_dims_);
      
      // check yield condition
      //
      ScalarT Phi = sq32 * smag - ( Y + kappa_old );

      std::cout << "======== Phi: " << Phi << std::endl;
      std::cout << "======== eps: " << std::numeric_limits<RealType>::epsilon() << std::endl;

      if (Phi > std::numeric_limits<RealType>::epsilon()) {

        // return mapping algorithm
        //
        bool converged = false;
        int iter = 0;
        RealType max_norm = std::numeric_limits<RealType>::min();

        // hardening and recovery parameters
        //
        ScalarT H = hardening_modulus(cell, pt);
        ScalarT Rd = recovery_modulus(cell, pt);

        // flow rule temperature dependent parameters
        //
        ScalarT f = flow_coeff(cell,pt);
        ScalarT n = flow_exp(cell,pt);

        // This solver deals with Sacado type info
        //
        LocalNonlinearSolver<EvalT, Traits> solver;

        // create some vectors to store solver data
        //
        std::vector<ScalarT> R(2);
        std::vector<ScalarT> dRdX(4);
        std::vector<ScalarT> X(2);

        // initial guess
        X[0] = 0.0;
        X[1] = eps_ss_old;

        // create a copy of be as a Fad
        Intrepid::Tensor<Fad> beF(num_dims_);
        for (std::size_t i = 0; i < num_dims_; ++i) {
          for (std::size_t j = 0; j < num_dims_; ++j) {
            beF(i, j) = be(i, j);
          }
        }
        Fad two_mubarF = 2.0 * Intrepid::trace(beF) * mu / (num_dims_);
        //Fad sq32F = std::sqrt(3.0/2.0);
        // FIXME this seems to be necessary to get PhiF to compile below
        // need to look into this more, it appears to be a conflict
        // between the Intrepid::norm and FadType operations
        //
        Fad smagF = smag;

        while (!converged) {

          // set up data types
          //
          std::vector<Fad> XFad(2);
          std::vector<Fad> RFad(2);
          std::vector<ScalarT> Xval(2);
          for (std::size_t i = 0; i < 2; ++i) {
            Xval[i] = Sacado::ScalarValue<ScalarT>::eval(X[i]);
            XFad[i] = Fad(2, i, Xval[i]);
          }

          // get solution vars
          //
          Fad dgamF = XFad[0];
          Fad eps_ssF = XFad[1];

          // compute yield function
          //
          Fad eqps_rateF = 0.0;
          if (delta_time(0) > 0) eqps_rateF = sq23 * dgamF / delta_time(0);
          Fad rate_termF = 1.0 + std::asinh( std::pow(eqps_rateF / f, n));
          Fad kappaF = two_mubarF * eps_ssF;
          Fad PhiF = sq32 * (smagF - two_mubarF * dgamF) - ( Y + kappaF ) * rate_termF;

          // compute the hardening residual
          //
          Fad eps_resF = eps_ssF - eps_ss_old - (H - Rd*eps_ssF) * dgamF;

          // for convenience put the residuals into a container
          //
          RFad[0] = PhiF;
          RFad[1] = eps_resF;

          // extract the values of the residuals
          //
          for (int i = 0; i < 2; ++i)
            R[i] = RFad[i].val();

          // extract the sensitivities of the residuals
          //
          for (int i = 0; i < 2; ++i)
            for (int j = 0; j < 2; ++j)
              dRdX[i + 2 * j] = RFad[i].dx(j);

          // this call invokes the solver and updates the solution in X
          //
          solver.solve(dRdX, X, R);

          // compute the norm of the residual
          //
          RealType R0 = Sacado::ScalarValue<ScalarT>::eval(R[0]); 
          RealType R1 = Sacado::ScalarValue<ScalarT>::eval(R[1]);
          RealType norm_res = std::sqrt(R0*R0 + R1*R1);
          max_norm = std::max(norm_res, max_norm);
            
          // check against too many inerations
          //
          TEUCHOS_TEST_FOR_EXCEPTION(iter == 30, std::runtime_error,
                                     std::endl <<
                                     "Error in ElastoViscoplastic return mapping\n" <<
                                     "iter count = " << iter << "\n" << std::endl);

          // check for a sufficiently small residual
          //
          std::cout << "======== norm_res : " << norm_res << std::endl;
          if ( (norm_res/max_norm < 1.e-12) || (norm_res < 1.e-12) )
            converged = true;

          // increment the iteratio counter
          //
          iter++;
        }

        solver.computeFadInfo(dRdX, X, R);
        ScalarT dgam = X[0];
        ScalarT eps_ss = X[1];
        ScalarT kappa = 2.0 * mubar * eps_ss;

        std::cout << "======== dgam : " << dgam << std::endl;
        std::cout << "======== e_ss : " << eps_ss << std::endl;
        std::cout << "======== kapp : " << kappa << std::endl;

        // plastic direction
        N = (1 / smag) * s;

        // update s
        s -= 2 * mubar * dgam * N;

        // update state variables
        eps_ss_field(cell, pt) = eps_ss;
        eqps_field(cell,pt) = eqps_old + sq23 * dgam;
        kappa_field(cell,pt) = kappa;

        // mechanical source
        // FIXME this is not correct, just a placeholder
        //
        if (have_temperature_ && delta_time(0) > 0) {
          source_field(cell, pt) = (sq23 * dgam / delta_time(0))
            * (Y + kappa) / (density_ * heat_capacity_);
        }

        // exponential map to get Fpnew
        //
        A = dgam * N;
        expA = Intrepid::exp(A);
        Fpnew = expA * Fpn;
        for (std::size_t i(0); i < num_dims_; ++i) {
          for (std::size_t j(0); j < num_dims_; ++j) {
            Fp_field(cell, pt, i, j) = Fpnew(i, j);
          }
        }
      } else {
        // we are not yielding, variables do not evolve
        //
        eps_ss_field(cell, pt) = eps_ss_old;
        eqps_field(cell,pt) = eqps_old;
        kappa_field(cell,pt) = kappa_old;
        if (have_temperature_) source_field(cell, pt) = 0.0;
        for (std::size_t i(0); i < num_dims_; ++i) {
          for (std::size_t j(0); j < num_dims_; ++j) {
            Fp_field(cell, pt, i, j) = Fpn(i, j);
          }
        }
      }

      // compute pressure
      ScalarT p = 0.5 * bulk * (J(cell, pt) - 1. / (J(cell, pt)));

      // compute stress
      sigma = p * I + s / J(cell, pt);
      for (std::size_t i(0); i < num_dims_; ++i) {
        for (std::size_t j(0); j < num_dims_; ++j) {
          stress_field(cell, pt, i, j) = sigma(i, j);
        }
      }
    }
  }

  if (have_temperature_) {
    for (std::size_t cell(0); cell < workset.numCells; ++cell) {
      for (std::size_t pt(0); pt < num_pts_; ++pt) {
        F.fill(&def_grad_field(cell,pt,0,0));
        ScalarT J = Intrepid::det(F);
        sigma.fill(&stress_field(cell,pt,0,0));
        sigma -= 3.0 * expansion_coeff_ * (1.0 + 1.0 / (J*J))
          * (temperature_(cell,pt) - ref_temperature_) * I;
        for (std::size_t i = 0; i < num_dims_; ++i) {
          for (std::size_t j = 0; j < num_dims_; ++j) {
            stress_field(cell, pt, i, j) = sigma(i, j);
          }
        }
      }
    }
  }
}
Exemple #26
0
KOKKOS_INLINE_FUNCTION void
J2MiniKernel<EvalT, Traits>::operator()(int cell, int pt) const
{
  constexpr minitensor::Index MAX_DIM{3};

  using Tensor = minitensor::Tensor<ScalarT, MAX_DIM>;

  Tensor        F(num_dims_);
  Tensor const  I(minitensor::eye<ScalarT, MAX_DIM>(num_dims_));
  Tensor        sigma(num_dims_);
  ScalarT const E     = elastic_modulus_(cell, pt);
  ScalarT const nu    = poissons_ratio_(cell, pt);
  ScalarT const kappa = E / (3.0 * (1.0 - 2.0 * nu));
  ScalarT const mu    = E / (2.0 * (1.0 + nu));
  ScalarT const K     = hardening_modulus_(cell, pt);
  ScalarT const Y     = yield_strength_(cell, pt);
  ScalarT const J1    = J_(cell, pt);
  ScalarT const Jm23  = 1.0 / std::cbrt(J1 * J1);

  // fill local tensors
  F.fill(def_grad_, cell, pt, 0, 0);

  // Mechanical deformation gradient
  auto Fm = Tensor(F);
  if (have_temperature_) {
    // Compute the mechanical deformation gradient Fm based on the
    // multiplicative decomposition of the deformation gradient
    //
    //            F = Fm.Ft => Fm = F.inv(Ft)
    //
    // where Ft is the thermal part of F, given as
    //
    //     Ft = Le * I = exp(alpha * dtemp) * I
    //
    // Le = exp(alpha*dtemp) is the thermal stretch and alpha the
    // coefficient of thermal expansion.
    ScalarT dtemp           = temperature_(cell, pt) - ref_temperature_;
    ScalarT thermal_stretch = std::exp(expansion_coeff_ * dtemp);
    Fm /= thermal_stretch;
  }

  Tensor Fpn(num_dims_);

  for (int i{0}; i < num_dims_; ++i) {
    for (int j{0}; j < num_dims_; ++j) {
      Fpn(i, j) = ScalarT(Fp_old_(cell, pt, i, j));
    }
  }

  // compute trial state
  Tensor const  Fpinv = minitensor::inverse(Fpn);
  Tensor const  Cpinv = Fpinv * minitensor::transpose(Fpinv);
  Tensor const  be    = Jm23 * Fm * Cpinv * minitensor::transpose(Fm);
  Tensor        s     = mu * minitensor::dev(be);
  ScalarT const mubar = minitensor::trace(be) * mu / (num_dims_);

  // check yield condition
  ScalarT const smag = minitensor::norm(s);
  ScalarT const f =
      smag -
      SQ23 * (Y + K * eqps_old_(cell, pt) +
              sat_mod_ * (1.0 - std::exp(-sat_exp_ * eqps_old_(cell, pt))));

  RealType constexpr yield_tolerance = 1.0e-12;

  if (f > yield_tolerance) {
    // Use minimization equivalent to return mapping
    using ValueT = typename Sacado::ValueType<ScalarT>::type;
    using NLS    = J2NLS<EvalT>;

    constexpr minitensor::Index nls_dim{NLS::DIMENSION};

    using MIN  = minitensor::Minimizer<ValueT, nls_dim>;
    using STEP = minitensor::NewtonStep<NLS, ValueT, nls_dim>;

    MIN  minimizer;
    STEP step;
    NLS  j2nls(sat_mod_, sat_exp_, eqps_old_(cell, pt), K, smag, mubar, Y);

    minitensor::Vector<ScalarT, nls_dim> x;

    x(0) = 0.0;

    LCM::MiniSolver<MIN, STEP, NLS, EvalT, nls_dim> mini_solver(
        minimizer, step, j2nls, x);

    ScalarT const alpha = eqps_old_(cell, pt) + SQ23 * x(0);
    ScalarT const H     = K * alpha + sat_mod_ * (1.0 - exp(-sat_exp_ * alpha));
    ScalarT const dgam  = x(0);

    // plastic direction
    Tensor const N = (1 / smag) * s;

    // update s
    s -= 2 * mubar * dgam * N;

    // update eqps
    eqps_(cell, pt) = alpha;

    // mechanical source
    if (have_temperature_ == true && delta_time_(0) > 0) {
      source_(cell, pt) =
          (SQ23 * dgam / delta_time_(0) * (Y + H + temperature_(cell, pt))) /
          (density_ * heat_capacity_);
    }

    // exponential map to get Fpnew
    Tensor const A     = dgam * N;
    Tensor const expA  = minitensor::exp(A);
    Tensor const Fpnew = expA * Fpn;

    for (int i{0}; i < num_dims_; ++i) {
      for (int j{0}; j < num_dims_; ++j) { Fp_(cell, pt, i, j) = Fpnew(i, j); }
    }
  } else {
    eqps_(cell, pt) = eqps_old_(cell, pt);

    if (have_temperature_ == true) source_(cell, pt) = 0.0;

    for (int i{0}; i < num_dims_; ++i) {
      for (int j{0}; j < num_dims_; ++j) { Fp_(cell, pt, i, j) = Fpn(i, j); }
    }
  }

  // update yield surface
  yield_surf_(cell, pt) =
      Y + K * eqps_(cell, pt) +
      sat_mod_ * (1. - std::exp(-sat_exp_ * eqps_(cell, pt)));

  // compute pressure
  ScalarT const p = 0.5 * kappa * (J_(cell, pt) - 1. / (J_(cell, pt)));

  // compute stress
  sigma = p * I + s / J_(cell, pt);

  for (int i(0); i < num_dims_; ++i) {
    for (int j(0); j < num_dims_; ++j) {
      stress_(cell, pt, i, j) = sigma(i, j);
    }
  }
}
Exemple #27
0
 ScalarT value( const std::vector<ScalarT> &x, Real &tol ) {
   return ScalarT(-1.0);
 }
void panzer::GatherSolution_Epetra<panzer::Traits::SGJacobian, TRAITS,LO,GO>::
evaluateFields(typename TRAITS::EvalData workset)
{ 
   std::vector<GO> GIDs;
   std::vector<int> LIDs;

   // for convenience pull out some objects from workset
   std::string blockId = this->wda(workset).block_id;
   const std::vector<std::size_t> & localCellIds = this->wda(workset).cell_local_ids;

   Teuchos::RCP<Stokhos::OrthogPolyExpansion<int,double> > expansion = sgEpetraContainer_->getExpansion();

   Teuchos::RCP<Epetra_Vector> x_template;
   double seed_value = 0.0;
   if (useTimeDerivativeSolutionVector_) {
     x_template = (*sgEpetraContainer_->begin())->get_dxdt();
     seed_value = workset.alpha;
   }
   else {
     x_template = (*sgEpetraContainer_->begin())->get_x(); 
     seed_value = workset.beta;
   }

   // NOTE: A reordering of these loops will likely improve performance
   //       The "getGIDFieldOffsets may be expensive.  However the
   //       "getElementGIDs" can be cheaper. However the lookup for LIDs
   //       may be more expensive!

   // gather operation for each cell in workset
   for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) {
      std::size_t cellLocalId = localCellIds[worksetCellIndex];

      globalIndexer_->getElementGIDs(cellLocalId,GIDs,blockId); 

      // caculate the local IDs for this element
      LIDs.resize(GIDs.size());
      for(std::size_t i=0;i<GIDs.size();i++)
        LIDs[i] = x_template->Map().LID(GIDs[i]);

      // loop over the fields to be gathered
      for(std::size_t fieldIndex=0;
          fieldIndex<gatherFields_.size();fieldIndex++) {
         int fieldNum = fieldIds_[fieldIndex];
         const std::vector<int> & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum);

         // loop over basis functions and fill the fields
         for(std::size_t basis=0;basis<elmtOffset.size();basis++) {
            int offset = elmtOffset[basis];
            int lid = LIDs[offset];

            PHX::MDField<ScalarT,Cell,NODE> field = (gatherFields_[fieldIndex]);
            // ScalarT & field = (gatherFields_[fieldIndex])(worksetCellIndex,basis);

            field(worksetCellIndex,basis) = ScalarT(GIDs.size(), 0.0);

            // set the value and seed the FAD object
            field(worksetCellIndex,basis).fastAccessDx(offset) = seed_value;
            field(worksetCellIndex,basis).val().reset(expansion);
            field(worksetCellIndex,basis).val().copyForWrite();

            // loop over stochastic basis initialzing field gather values
            int stochIndex = 0;
            panzer::SGEpetraLinearObjContainer::iterator itr; 
            for(itr=sgEpetraContainer_->begin();itr!=sgEpetraContainer_->end();++itr,++stochIndex) {
               // extract solution and time derivative vectors
               Teuchos::RCP<Epetra_Vector> x;
               if (useTimeDerivativeSolutionVector_)
                 x = (*itr)->get_dxdt();
               else
                 x = (*itr)->get_x(); 

               field(worksetCellIndex,basis).val().fastAccessCoeff(stochIndex) = (*x)[lid];
            }
         }
      }
   }
}
void NewtonianFluidModel<EvalT, Traits>::
computeState(typename Traits::EvalData workset,
    std::map<std::string, Teuchos::RCP<PHX::MDField<ScalarT>>> dep_fields,
    std::map<std::string, Teuchos::RCP<PHX::MDField<ScalarT>>> eval_fields)
{

  std::string F_string      = (*field_name_map_)["F"];
  std::string cauchy_string = (*field_name_map_)["Cauchy_Stress"];

  // extract dependent MDFields
  PHX::MDField<ScalarT> def_grad         = *dep_fields[F_string];
  PHX::MDField<ScalarT> delta_time       = *dep_fields["Delta Time"];

  // extract evaluated MDFields
  PHX::MDField<ScalarT> stress = *eval_fields[cauchy_string];

  // get State Variables
  Albany::MDArray def_grad_old = (*workset.stateArrayPtr)[F_string + "_old"];

  // pressure is hard coded as 1 for now
  // this is likely not general enough :)
  ScalarT p = 1;

  // time increment
  ScalarT dt = delta_time(0);

  // containers
  Intrepid2::Tensor<ScalarT> Fnew(num_dims_);
  Intrepid2::Tensor<ScalarT> Fold(num_dims_);
  Intrepid2::Tensor<ScalarT> Finc(num_dims_);
  Intrepid2::Tensor<ScalarT> L(num_dims_);
  Intrepid2::Tensor<ScalarT> D(num_dims_);
  Intrepid2::Tensor<ScalarT> sigma(num_dims_);
  Intrepid2::Tensor<ScalarT> I(Intrepid2::eye<ScalarT>(num_dims_));

  for (int cell(0); cell < workset.numCells; ++cell) {
    for (int pt(0); pt < num_pts_; ++pt) {

      // should only be the first time step
      if ( dt == 0 ) {
        for (int i=0; i < num_dims_; ++i)
        for (int j=0; j < num_dims_; ++j)
          stress(cell,pt,i,j) = 0.0;
      }
      else {

        // old deformation gradient
        for (int i=0; i < num_dims_; ++i)
        for (int j=0; j < num_dims_; ++j)
          Fold(i,j) = ScalarT(def_grad_old(cell,pt,i,j));

        // current deformation gradient
        Fnew.fill(def_grad,cell,pt,0,0);

        // incremental deformation gradient
        Finc = Fnew * Intrepid2::inverse(Fold);

        // velocity gradient
        L = (1.0/dt) * Intrepid2::log(Finc);

        // strain rate (a.k.a rate of deformation)
        D = Intrepid2::sym(L);

        // stress tensor
        sigma = -p*I +  2.0*mu_*( D - (2.0/3.0)*Intrepid2::trace(D)*I);

        // update stress state
        for (int i=0; i < num_dims_; ++i)
        for (int j=0; j < num_dims_; ++j)
          stress(cell,pt,i,j) = sigma(i,j);

      }
    }
  }
}