void DislocationDensity<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{

  Teuchos::SerialDenseMatrix<int, double> A;
  Teuchos::SerialDenseMatrix<int, double> X;
  Teuchos::SerialDenseMatrix<int, double> B;
  Teuchos::SerialDenseSolver<int, double> solver;

  A.shape(numNodes,numNodes);
  X.shape(numNodes,numNodes);
  B.shape(numNodes,numNodes);
  
  // construct Identity for RHS
  for (int i = 0; i < numNodes; ++i)
    B(i,i) = 1.0;

  for (int i=0; i < G.size() ; i++) G[i] = 0.0;

  // construct the node --> point operator
  for (std::size_t cell=0; cell < workset.numCells; ++cell)
  {
    for (std::size_t node=0; node < numNodes; ++node) 
      for (std::size_t qp=0; qp < numQPs; ++qp) 
	A(qp,node) = BF(cell,node,qp);
    
    X = 0.0;

    solver.setMatrix( Teuchos::rcp( &A, false) );
    solver.setVectors( Teuchos::rcp( &X, false ), Teuchos::rcp( &B, false ) );

    // Solve the system A X = B to find A_inverse
    int status = 0;
    status = solver.factor();
    status = solver.solve();

    // compute nodal Fp
    nodalFp.initialize(0.0);
    for (std::size_t node=0; node < numNodes; ++node) 
      for (std::size_t qp=0; qp < numQPs; ++qp) 
	for (std::size_t i=0; i < numDims; ++i) 
	  for (std::size_t j=0; j < numDims; ++j) 
	    nodalFp(node,i,j) += X(node,qp) * Fp(cell,qp,i,j);

    // compute the curl using nodalFp
    curlFp.initialize(0.0);
    for (std::size_t node=0; node < numNodes; ++node) 
    {
      for (std::size_t qp=0; qp < numQPs; ++qp) 
      {
	curlFp(qp,0,0) += nodalFp(node,0,2) * GradBF(cell,node,qp,1) - nodalFp(node,0,1) * GradBF(cell,node,qp,2);
	curlFp(qp,0,1) += nodalFp(node,1,2) * GradBF(cell,node,qp,1) - nodalFp(node,1,1) * GradBF(cell,node,qp,2);
	curlFp(qp,0,2) += nodalFp(node,2,2) * GradBF(cell,node,qp,1) - nodalFp(node,2,1) * GradBF(cell,node,qp,2);

	curlFp(qp,1,0) += nodalFp(node,0,0) * GradBF(cell,node,qp,2) - nodalFp(node,0,2) * GradBF(cell,node,qp,0);
	curlFp(qp,1,1) += nodalFp(node,1,0) * GradBF(cell,node,qp,2) - nodalFp(node,1,2) * GradBF(cell,node,qp,0);
	curlFp(qp,1,2) += nodalFp(node,2,0) * GradBF(cell,node,qp,2) - nodalFp(node,2,2) * GradBF(cell,node,qp,0);

	curlFp(qp,2,0) += nodalFp(node,0,1) * GradBF(cell,node,qp,0) - nodalFp(node,0,0) * GradBF(cell,node,qp,1);
	curlFp(qp,2,1) += nodalFp(node,1,1) * GradBF(cell,node,qp,0) - nodalFp(node,1,0) * GradBF(cell,node,qp,1);
	curlFp(qp,2,2) += nodalFp(node,2,1) * GradBF(cell,node,qp,0) - nodalFp(node,2,0) * GradBF(cell,node,qp,1);
      }
    }

    for (std::size_t qp=0; qp < numQPs; ++qp) 
      for (std::size_t i=0; i < numDims; ++i) 
	for (std::size_t j=0; j < numDims; ++j) 
	  for (std::size_t k=0; k < numDims; ++k) 
	    G(cell,qp,i,j) += Fp(cell,qp,i,k) * curlFp(qp,k,j);
  }
}
void SaddleOperator<ScalarType, MV, OP>::Apply(const SaddleContainer<ScalarType,MV>& X, SaddleContainer<ScalarType,MV>& Y) const
{
    RCP<SerialDenseMatrix> Xlower = X.getLower();
    RCP<SerialDenseMatrix> Ylower = Y.getLower();

    if(pt_ == NO_PREC)
    {
        // trans does literally nothing, because the operator is symmetric
        // Y.bottom = B'X.top
        MVT::MvTransMv(1., *B_, *(X.upper_), *Ylower);

        // Y.top = A*X.top+B*X.bottom
        A_->Apply(*(X.upper_), *(Y.upper_));
        MVT::MvTimesMatAddMv(1., *B_, *Xlower, 1., *(Y.upper_));
    }
    else if(pt_ == NONSYM)
    {
        // Y.bottom = -B'X.top
        MVT::MvTransMv(-1., *B_, *(X.upper_), *Ylower);

        // Y.top = A*X.top+B*X.bottom
        A_->Apply(*(X.upper_), *(Y.upper_));
        MVT::MvTimesMatAddMv(1., *B_, *Xlower, 1., *(Y.upper_));
    }
    else if(pt_ == BD_PREC)
    {
        Teuchos::SerialDenseSolver<int,ScalarType> MySolver;

        // Solve A Y.X = X.X
        A_->Apply(*(X.upper_),*(Y.upper_));

        // So, let me tell you a funny story about how the SerialDenseSolver destroys the original matrix...
        Teuchos::RCP<SerialDenseMatrix> localSchur = Teuchos::rcp(new SerialDenseMatrix(*Schur_));

        // Solve the small system
        MySolver.setMatrix(localSchur);
        MySolver.setVectors(Ylower, Xlower);
        MySolver.solve();
    }
    // Hermitian-Skew Hermitian splitting has some extra requirements
    // We need B'B = I, which is true for standard eigenvalue problems, but not generalized
    // We also need to use gmres, because our operator is no longer symmetric
    else if(pt_ == HSS_PREC)
    {
//    std::cout << "applying preconditioner to";
//    X.MvPrint(std::cout);

        // Solve (H + alpha I) Y1 = X
        // 1.  Apply preconditioner
        A_->Apply(*(X.upper_),*(Y.upper_));
        // 2. Scale by 1/alpha
        *Ylower = *Xlower;
        Ylower->scale(1./alpha_);

//    std::cout << "H preconditioning produced";
//	Y.setLower(Ylower);
//    Y.MvPrint(std::cout);

        // Solve (S + alpha I) Y = Y1
        // 1.  Y_lower = (B' Y1_upper + alpha Y1_lower) / (1 + alpha^2)
        Teuchos::RCP<SerialDenseMatrix> Y1_lower = Teuchos::rcp(new SerialDenseMatrix(*Ylower));
        MVT::MvTransMv(1,*B_,*(Y.upper_),*Ylower);
//	std::cout << "Y'b1 " << *Ylower;
        Y1_lower->scale(alpha_);
//	std::cout << "alpha b2 " << *Y1_lower;
        *Ylower += *Y1_lower;
//	std::cout << "alpha b2 + Y'b1 " << *Ylower;
        Ylower->scale(1/(1+alpha_*alpha_));
        // 2.  Y_upper = (Y1_upper - B Y_lower) / alpha
        MVT::MvTimesMatAddMv(-1/alpha_,*B_,*Ylower,1/alpha_,*(Y.upper_));

//    std::cout << "preconditioning produced";
//	Y.setLower(Ylower);
//    Y.MvPrint(std::cout);
    }
    else
    {
        std::cout << "Not a valid preconditioner type\n";
    }

    Y.setLower(Ylower);

//  std::cout << "result of applying operator";
//  Y.MvPrint(std::cout);
}