void
AnisotropicViscoplasticModel<EvalT, Traits>::computeState(
    typename Traits::EvalData workset,
    DepFieldMap               dep_fields,
    FieldMap                  eval_fields)
{
  std::string cauchy_string = (*field_name_map_)["Cauchy_Stress"];
  std::string Fp_string     = (*field_name_map_)["Fp"];
  std::string eqps_string   = (*field_name_map_)["eqps"];
  std::string ess_string    = (*field_name_map_)["ess"];
  std::string kappa_string  = (*field_name_map_)["iso_Hardening"];
  std::string source_string = (*field_name_map_)["Mechanical_Source"];
  std::string F_string      = (*field_name_map_)["F"];
  std::string J_string      = (*field_name_map_)["J"];

  // extract dependent MDFields
  auto def_grad          = *dep_fields[F_string];
  auto J                 = *dep_fields[J_string];
  auto poissons_ratio    = *dep_fields["Poissons Ratio"];
  auto elastic_modulus   = *dep_fields["Elastic Modulus"];
  auto yield_strength    = *dep_fields["Yield Strength"];
  auto hardening_modulus = *dep_fields["Hardening Modulus"];
  auto recovery_modulus  = *dep_fields["Recovery Modulus"];
  auto flow_exp          = *dep_fields["Flow Rule Exponent"];
  auto flow_coeff        = *dep_fields["Flow Rule Coefficient"];
  auto delta_time        = *dep_fields["Delta Time"];

  // extract evaluated MDFields
  auto                  stress = *eval_fields[cauchy_string];
  auto                  Fp     = *eval_fields[Fp_string];
  auto                  eqps   = *eval_fields[eqps_string];
  auto                  ess    = *eval_fields[ess_string];
  auto                  kappa  = *eval_fields[kappa_string];
  PHX::MDField<ScalarT> source;
  if (have_temperature_) { source = *eval_fields[source_string]; }

  // get State Variables
  Albany::MDArray Fpold   = (*workset.stateArrayPtr)[Fp_string + "_old"];
  Albany::MDArray eqpsold = (*workset.stateArrayPtr)[eqps_string + "_old"];

  ScalarT bulk, mu, mubar, K, Y;
  ScalarT Jm23, trace, smag2, smag, f, p, dgam;
  ScalarT sq23(std::sqrt(2. / 3.));

  minitensor::Tensor<ScalarT> F(num_dims_), be(num_dims_), s(num_dims_),
      sigma(num_dims_);
  minitensor::Tensor<ScalarT> N(num_dims_), A(num_dims_), expA(num_dims_),
      Fpnew(num_dims_);
  minitensor::Tensor<ScalarT> I(minitensor::eye<ScalarT>(num_dims_));
  minitensor::Tensor<ScalarT> Fpn(num_dims_), Cpinv(num_dims_), Fe(num_dims_);
  minitensor::Tensor<ScalarT> tau(num_dims_), M(num_dims_);

  for (int cell(0); cell < workset.numCells; ++cell) {
    for (int pt(0); pt < num_pts_; ++pt) {
      bulk = elastic_modulus(cell, pt) /
             (3. * (1. - 2. * poissons_ratio(cell, pt)));
      mu   = elastic_modulus(cell, pt) / (2. * (1. + poissons_ratio(cell, pt)));
      K    = hardening_modulus(cell, pt);
      Y    = yield_strength(cell, pt);
      Jm23 = std::pow(J(cell, pt), -2. / 3.);

      // fill local tensors
      F.fill(def_grad, cell, pt, 0, 0);

      // Mechanical deformation gradient
      auto Fm = minitensor::Tensor<ScalarT>(F);
      if (have_temperature_) {
        // Compute the mechanical deformation gradient Fm based on the
        // multiplicative decomposition of the deformation gradient
        //
        //            F = Fm.Ft => Fm = F.inv(Ft)
        //
        // where Ft is the thermal part of F, given as
        //
        //     Ft = Le * I = exp(alpha * dtemp) * I
        //
        // Le is the thermal stretch and alpha the coefficient of thermal
        // expansion.
        ScalarT dtemp           = temperature_(cell, pt) - ref_temperature_;
        ScalarT thermal_stretch = std::exp(expansion_coeff_ * dtemp);
        Fm /= thermal_stretch;
      }

      // Fpn.fill( &Fpold(cell,pt,int(0),int(0)) );
      for (int i(0); i < num_dims_; ++i) {
        for (int j(0); j < num_dims_; ++j) {
          Fpn(i, j) = ScalarT(Fpold(cell, pt, i, j));
        }
      }

      // compute trial state
      // compute the Kirchhoff stress in the current configuration
      //
      Fe    = Fm * minitensor::inverse(Fpn);
      Cpinv = minitensor::inverse(Fpn) *
              minitensor::transpose(minitensor::inverse(Fpn));
      be         = Fm * Cpinv * minitensor::transpose(Fm);
      ScalarT Je = std::sqrt(minitensor::det(be));
      s          = mu * minitensor::dev(be);
      p          = 0.5 * bulk * (Je * Je - 1.);
      tau        = p * I + s;

      // pull back the Kirchhoff stress to the intermediate configuration
      // this is the Mandel stress
      //
      M = minitensor::transpose(Fe) * tau *
          minitensor::inverse(minitensor::transpose(Fe));

      // check yield condition
      smag = minitensor::norm(s);
      f    = smag - sq23 * (Y + K * eqpsold(cell, pt));

      if (f > 1E-12) {
        // return mapping algorithm
        bool    converged = false;
        ScalarT g         = f;
        ScalarT H         = 0.0;
        ScalarT dH        = 0.0;
        ScalarT alpha     = 0.0;
        ScalarT res       = 0.0;
        int     count     = 0;
        dgam              = 0.0;

        LocalNonlinearSolver<EvalT, Traits> solver;

        std::vector<ScalarT> F(1);
        std::vector<ScalarT> dFdX(1);
        std::vector<ScalarT> X(1);

        F[0]    = f;
        X[0]    = 0.0;
        dFdX[0] = (-2. * mubar) * (1. + H / (3. * mubar));
        while (!converged && count <= 30) {
          count++;
          solver.solve(dFdX, X, F);
          alpha   = eqpsold(cell, pt) + sq23 * X[0];
          H       = K * alpha;
          dH      = K;
          F[0]    = smag - (2. * mubar * X[0] + sq23 * (Y + H));
          dFdX[0] = -2. * mubar * (1. + dH / (3. * mubar));

          res = std::abs(F[0]);
          if (res < 1.e-11 || res / f < 1.E-11) converged = true;

          TEUCHOS_TEST_FOR_EXCEPTION(
              count == 30,
              std::runtime_error,
              std::endl
                  << "Error in return mapping, count = " << count
                  << "\nres = " << res << "\nrelres = " << res / f
                  << "\ng = " << F[0] << "\ndg = " << dFdX[0]
                  << "\nalpha = " << alpha << std::endl);
        }
        solver.computeFadInfo(dFdX, X, F);
        dgam = X[0];

        // plastic direction
        N = (1 / smag) * s;

        // update s
        s -= 2 * mubar * dgam * N;

        // update eqps
        eqps(cell, pt) = alpha;

        // mechanical source
        if (have_temperature_ && delta_time(0) > 0) {
          source(cell, pt) =
              (sq23 * dgam / delta_time(0) * (Y + H + temperature_(cell, pt))) /
              (density_ * heat_capacity_);
        }

        // exponential map to get Fpnew
        A     = dgam * N;
        expA  = minitensor::exp(A);
        Fpnew = expA * Fpn;
        for (int i(0); i < num_dims_; ++i) {
          for (int j(0); j < num_dims_; ++j) {
            Fp(cell, pt, i, j) = Fpnew(i, j);
          }
        }
      } else {
        eqps(cell, pt) = eqpsold(cell, pt);
        if (have_temperature_) source(cell, pt) = 0.0;
        for (int i(0); i < num_dims_; ++i) {
          for (int j(0); j < num_dims_; ++j) { Fp(cell, pt, i, j) = Fpn(i, j); }
        }
      }

      // compute pressure
      p = 0.5 * bulk * (J(cell, pt) - 1. / (J(cell, pt)));

      // compute stress
      sigma = p * I + s / J(cell, pt);
      for (int i(0); i < num_dims_; ++i) {
        for (int j(0); j < num_dims_; ++j) {
          stress(cell, pt, i, j) = sigma(i, j);
        }
      }
    }
  }
}
void ElastoViscoplasticModel<EvalT, Traits>::
computeState(typename Traits::EvalData workset,
    std::map<std::string, Teuchos::RCP<PHX::MDField<ScalarT> > > dep_fields,
    std::map<std::string, Teuchos::RCP<PHX::MDField<ScalarT> > > eval_fields)
{
  std::string cauchy_string = (*field_name_map_)["Cauchy_Stress"];
  std::string Fp_string = (*field_name_map_)["Fp"];
  std::string eqps_string = (*field_name_map_)["eqps"];
  std::string eps_ss_string = (*field_name_map_)["eps_ss"];
  std::string kappa_string = (*field_name_map_)["isotropic_hardening"];
  std::string source_string = (*field_name_map_)["Mechanical_Source"];
  std::string F_string = (*field_name_map_)["F"];
  std::string J_string = (*field_name_map_)["J"];

  // extract dependent MDFields
  PHX::MDField<ScalarT> def_grad_field = *dep_fields[F_string];
  PHX::MDField<ScalarT> J = *dep_fields[J_string];
  PHX::MDField<ScalarT> poissons_ratio = *dep_fields["Poissons Ratio"];
  PHX::MDField<ScalarT> elastic_modulus = *dep_fields["Elastic Modulus"];
  PHX::MDField<ScalarT> yield_strength = *dep_fields["Yield Strength"];
  PHX::MDField<ScalarT> hardening_modulus = *dep_fields["Hardening Modulus"];
  PHX::MDField<ScalarT> recovery_modulus = *dep_fields["Recovery Modulus"];
  PHX::MDField<ScalarT> flow_exp = *dep_fields["Flow Rule Exponent"];
  PHX::MDField<ScalarT> flow_coeff = *dep_fields["Flow Rule Coefficient"];
  PHX::MDField<ScalarT> delta_time = *dep_fields["Delta Time"];

  // extract evaluated MDFields
  PHX::MDField<ScalarT> stress_field = *eval_fields[cauchy_string];
  PHX::MDField<ScalarT> Fp_field = *eval_fields[Fp_string];
  PHX::MDField<ScalarT> eqps_field = *eval_fields[eqps_string];
  PHX::MDField<ScalarT> eps_ss_field = *eval_fields[eps_ss_string];
  PHX::MDField<ScalarT> kappa_field = *eval_fields[kappa_string];
  PHX::MDField<ScalarT> source_field;
  if (have_temperature_) {
    source_field = *eval_fields[source_string];
  }

  // get State Variables
  Albany::MDArray Fp_field_old     = (*workset.stateArrayPtr)[Fp_string + "_old"];
  Albany::MDArray eqps_field_old   = (*workset.stateArrayPtr)[eqps_string + "_old"];
  Albany::MDArray eps_ss_field_old = (*workset.stateArrayPtr)[eps_ss_string + "_old"];
  Albany::MDArray kappa_field_old  = (*workset.stateArrayPtr)[kappa_string + "_old"];

  // define constants
  RealType sq23(std::sqrt(2. / 3.));
  RealType sq32(std::sqrt(3. / 2.));

  // pre-define some tensors that will be re-used below
  Intrepid::Tensor<ScalarT> F(num_dims_), be(num_dims_);
  Intrepid::Tensor<ScalarT> s(num_dims_), sigma(num_dims_);
  Intrepid::Tensor<ScalarT> N(num_dims_), A(num_dims_);
  Intrepid::Tensor<ScalarT> expA(num_dims_), Fpnew(num_dims_);
  Intrepid::Tensor<ScalarT> I(Intrepid::eye<ScalarT>(num_dims_));
  Intrepid::Tensor<ScalarT> Fpn(num_dims_), Cpinv(num_dims_), Fpinv(num_dims_);

  for (std::size_t cell(0); cell < workset.numCells; ++cell) {
    for (std::size_t pt(0); pt < num_pts_; ++pt) {
      ScalarT bulk = elastic_modulus(cell, pt)
          / (3. * (1. - 2. * poissons_ratio(cell, pt)));
      ScalarT mu = elastic_modulus(cell, pt) / (2. * (1. + poissons_ratio(cell, pt)));
      ScalarT Y = yield_strength(cell, pt);
      ScalarT Jm23 = std::pow(J(cell, pt), -2. / 3.);

      // assign local state variables
      //
      //ScalarT kappa = kappa_field(cell,pt);
      ScalarT kappa_old = kappa_field_old(cell,pt);
      ScalarT eps_ss = eps_ss_field(cell,pt);
      ScalarT eps_ss_old = eps_ss_field_old(cell,pt);
      ScalarT eqps_old = eqps_field_old(cell,pt);

      // fill local tensors
      //
      F.fill(&def_grad_field(cell, pt, 0, 0));
      for (std::size_t i(0); i < num_dims_; ++i) {
        for (std::size_t j(0); j < num_dims_; ++j) {
          Fpn(i, j) = ScalarT(Fp_field_old(cell, pt, i, j));
        }
      }

      // compute trial state
      // compute the Kirchhoff stress in the current configuration
      //
      Cpinv = Intrepid::inverse(Fpn) * Intrepid::transpose(Intrepid::inverse(Fpn));
      be = Jm23 * F * Cpinv * Intrepid::transpose(F);
      s = mu * Intrepid::dev(be);
      ScalarT smag = Intrepid::norm(s);
      ScalarT mubar = Intrepid::trace(be) * mu / (num_dims_);
      
      // check yield condition
      //
      ScalarT Phi = sq32 * smag - ( Y + kappa_old );

      std::cout << "======== Phi: " << Phi << std::endl;
      std::cout << "======== eps: " << std::numeric_limits<RealType>::epsilon() << std::endl;

      if (Phi > std::numeric_limits<RealType>::epsilon()) {

        // return mapping algorithm
        //
        bool converged = false;
        int iter = 0;
        RealType max_norm = std::numeric_limits<RealType>::min();

        // hardening and recovery parameters
        //
        ScalarT H = hardening_modulus(cell, pt);
        ScalarT Rd = recovery_modulus(cell, pt);

        // flow rule temperature dependent parameters
        //
        ScalarT f = flow_coeff(cell,pt);
        ScalarT n = flow_exp(cell,pt);

        // This solver deals with Sacado type info
        //
        LocalNonlinearSolver<EvalT, Traits> solver;

        // create some vectors to store solver data
        //
        std::vector<ScalarT> R(2);
        std::vector<ScalarT> dRdX(4);
        std::vector<ScalarT> X(2);

        // initial guess
        X[0] = 0.0;
        X[1] = eps_ss_old;

        // create a copy of be as a Fad
        Intrepid::Tensor<Fad> beF(num_dims_);
        for (std::size_t i = 0; i < num_dims_; ++i) {
          for (std::size_t j = 0; j < num_dims_; ++j) {
            beF(i, j) = be(i, j);
          }
        }
        Fad two_mubarF = 2.0 * Intrepid::trace(beF) * mu / (num_dims_);
        //Fad sq32F = std::sqrt(3.0/2.0);
        // FIXME this seems to be necessary to get PhiF to compile below
        // need to look into this more, it appears to be a conflict
        // between the Intrepid::norm and FadType operations
        //
        Fad smagF = smag;

        while (!converged) {

          // set up data types
          //
          std::vector<Fad> XFad(2);
          std::vector<Fad> RFad(2);
          std::vector<ScalarT> Xval(2);
          for (std::size_t i = 0; i < 2; ++i) {
            Xval[i] = Sacado::ScalarValue<ScalarT>::eval(X[i]);
            XFad[i] = Fad(2, i, Xval[i]);
          }

          // get solution vars
          //
          Fad dgamF = XFad[0];
          Fad eps_ssF = XFad[1];

          // compute yield function
          //
          Fad eqps_rateF = 0.0;
          if (delta_time(0) > 0) eqps_rateF = sq23 * dgamF / delta_time(0);
          Fad rate_termF = 1.0 + std::asinh( std::pow(eqps_rateF / f, n));
          Fad kappaF = two_mubarF * eps_ssF;
          Fad PhiF = sq32 * (smagF - two_mubarF * dgamF) - ( Y + kappaF ) * rate_termF;

          // compute the hardening residual
          //
          Fad eps_resF = eps_ssF - eps_ss_old - (H - Rd*eps_ssF) * dgamF;

          // for convenience put the residuals into a container
          //
          RFad[0] = PhiF;
          RFad[1] = eps_resF;

          // extract the values of the residuals
          //
          for (int i = 0; i < 2; ++i)
            R[i] = RFad[i].val();

          // extract the sensitivities of the residuals
          //
          for (int i = 0; i < 2; ++i)
            for (int j = 0; j < 2; ++j)
              dRdX[i + 2 * j] = RFad[i].dx(j);

          // this call invokes the solver and updates the solution in X
          //
          solver.solve(dRdX, X, R);

          // compute the norm of the residual
          //
          RealType R0 = Sacado::ScalarValue<ScalarT>::eval(R[0]); 
          RealType R1 = Sacado::ScalarValue<ScalarT>::eval(R[1]);
          RealType norm_res = std::sqrt(R0*R0 + R1*R1);
          max_norm = std::max(norm_res, max_norm);
            
          // check against too many inerations
          //
          TEUCHOS_TEST_FOR_EXCEPTION(iter == 30, std::runtime_error,
                                     std::endl <<
                                     "Error in ElastoViscoplastic return mapping\n" <<
                                     "iter count = " << iter << "\n" << std::endl);

          // check for a sufficiently small residual
          //
          std::cout << "======== norm_res : " << norm_res << std::endl;
          if ( (norm_res/max_norm < 1.e-12) || (norm_res < 1.e-12) )
            converged = true;

          // increment the iteratio counter
          //
          iter++;
        }

        solver.computeFadInfo(dRdX, X, R);
        ScalarT dgam = X[0];
        ScalarT eps_ss = X[1];
        ScalarT kappa = 2.0 * mubar * eps_ss;

        std::cout << "======== dgam : " << dgam << std::endl;
        std::cout << "======== e_ss : " << eps_ss << std::endl;
        std::cout << "======== kapp : " << kappa << std::endl;

        // plastic direction
        N = (1 / smag) * s;

        // update s
        s -= 2 * mubar * dgam * N;

        // update state variables
        eps_ss_field(cell, pt) = eps_ss;
        eqps_field(cell,pt) = eqps_old + sq23 * dgam;
        kappa_field(cell,pt) = kappa;

        // mechanical source
        // FIXME this is not correct, just a placeholder
        //
        if (have_temperature_ && delta_time(0) > 0) {
          source_field(cell, pt) = (sq23 * dgam / delta_time(0))
            * (Y + kappa) / (density_ * heat_capacity_);
        }

        // exponential map to get Fpnew
        //
        A = dgam * N;
        expA = Intrepid::exp(A);
        Fpnew = expA * Fpn;
        for (std::size_t i(0); i < num_dims_; ++i) {
          for (std::size_t j(0); j < num_dims_; ++j) {
            Fp_field(cell, pt, i, j) = Fpnew(i, j);
          }
        }
      } else {
        // we are not yielding, variables do not evolve
        //
        eps_ss_field(cell, pt) = eps_ss_old;
        eqps_field(cell,pt) = eqps_old;
        kappa_field(cell,pt) = kappa_old;
        if (have_temperature_) source_field(cell, pt) = 0.0;
        for (std::size_t i(0); i < num_dims_; ++i) {
          for (std::size_t j(0); j < num_dims_; ++j) {
            Fp_field(cell, pt, i, j) = Fpn(i, j);
          }
        }
      }

      // compute pressure
      ScalarT p = 0.5 * bulk * (J(cell, pt) - 1. / (J(cell, pt)));

      // compute stress
      sigma = p * I + s / J(cell, pt);
      for (std::size_t i(0); i < num_dims_; ++i) {
        for (std::size_t j(0); j < num_dims_; ++j) {
          stress_field(cell, pt, i, j) = sigma(i, j);
        }
      }
    }
  }

  if (have_temperature_) {
    for (std::size_t cell(0); cell < workset.numCells; ++cell) {
      for (std::size_t pt(0); pt < num_pts_; ++pt) {
        F.fill(&def_grad_field(cell,pt,0,0));
        ScalarT J = Intrepid::det(F);
        sigma.fill(&stress_field(cell,pt,0,0));
        sigma -= 3.0 * expansion_coeff_ * (1.0 + 1.0 / (J*J))
          * (temperature_(cell,pt) - ref_temperature_) * I;
        for (std::size_t i = 0; i < num_dims_; ++i) {
          for (std::size_t j = 0; j < num_dims_; ++j) {
            stress_field(cell, pt, i, j) = sigma(i, j);
          }
        }
      }
    }
  }
}