Teuchos::RCP<const Teuchos::ParameterList>
IfpackPreconditionerFactory::getValidParameters() const
{
  using Teuchos::rcp;
  using Teuchos::rcp_implicit_cast;
  typedef Teuchos::ParameterEntryValidator PEV;
  static Teuchos::RCP<Teuchos::ParameterList> validParamList;
  if(validParamList.get()==NULL) {
    validParamList = Teuchos::rcp(new Teuchos::ParameterList(Ifpack_name));
    {
      // Create the validator for the preconditioner type!
      Teuchos::Array<std::string>
        precTypeNames;
      precTypeNames.insert(
        precTypeNames.begin(),
        &Ifpack::precTypeNames[0],
        &Ifpack::precTypeNames[0] + Ifpack::numPrecTypes
        );
      Teuchos::Array<Ifpack::EPrecType>
        precTypeValues;
      precTypeValues.insert(
        precTypeValues.begin(),
        &Ifpack::precTypeValues[0],
        &Ifpack::precTypeValues[0] + Ifpack::numPrecTypes
        );
      precTypeValidator = rcp(
        new Teuchos::StringToIntegralParameterEntryValidator<Ifpack::EPrecType>(
          precTypeNames,precTypeValues,PrecType_name
          )
        );
    }
    validParamList->set(
      PrecType_name, PrecTypeName_default,
      "Type of Ifpack preconditioner to use.",
      rcp_implicit_cast<const PEV>(precTypeValidator)
      );
    validParamList->set(
      Overlap_name, Overlap_default,
      "Number of rows/columns overlapped between subdomains in different"
      "\nprocesses in the additive Schwarz-type domain-decomposition preconditioners."
      );
    validParamList->sublist(
      IfpackSettings_name, false,
      "Preconditioner settings that are passed onto the Ifpack preconditioners themselves."
      ).setParameters(Ifpack_GetValidParameters());
    // Note that in the above setParameterList(...) function that we actually
    // validate down into the first level of this sublist.  Really the
    // Ifpack_Preconditioner objects themselves should do validation but we do
    // it ourselves taking the return from the Ifpack_GetValidParameters()
    // function as gospel!
    Teuchos::setupVerboseObjectSublist(&*validParamList);
  }
  return validParamList;
}
int main(int argc, char *argv[])
{
  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  using Teuchos::TimeMonitor;
  using TpetraExamples::FDStencil;
  using TpetraExamples::ScaleKernel;

  // 
  // Get the default communicator and node
  //
  auto &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
  auto comm = platform.getComm();
  auto node = platform.getNode();
  const int myImageID = comm->getRank();
  const int numImages = comm->getSize();

  //
  // Get example parameters from command-line processor
  //  
  bool verbose = (myImageID==0);
  int numGlobal_user = 100*comm->getSize();
  int numTimeTrials = 3;
  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
  cmdp.setOption("global-size",&numGlobal_user,"Global test size.");
  cmdp.setOption("num-time-trials",&numTimeTrials,"Number of trials in timing loops.");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    return -1;
  }

  // 
  // Say hello, print some communicator info
  //
  if (verbose) {
    std::cout << "\n" << Tpetra::version() << std::endl;
    std::cout << "Comm info: " << *comm;
    std::cout << "Node type: " << Teuchos::typeName(*node) << std::endl;
    std::cout << std::endl;
  }

  // 
  // Create a simple map for domain and range
  // 
  Tpetra::global_size_t numGlobalRows = numGlobal_user;
  auto map = Tpetra::createUniformContigMapWithNode<int,int>(numGlobalRows, comm, node);
  // const size_t numLocalRows = map->getNodeNumElements();
  auto x = Tpetra::createVector<double>(map),
       y = Tpetra::createVector<double>(map);

  // Create a simple diagonal operator using lambda function
  auto fTwoOp = Tpetra::RTI::binaryOp<double>( [](double /*y*/, double x) { return 2.0 * x; } , map );
  // y = 3*fTwoOp*x + 2*y = 3*2*1 + 2*1 = 8
  x->putScalar(1.0);
  y->putScalar(1.0);
  fTwoOp->apply( *x, *y, Teuchos::NO_TRANS, 3.0, 2.0 );
  // check that y == eights
  double norm = y->norm1();
  if (verbose) {
    std::cout << "Tpetra::RTI::binaryOp" << std::endl
              << "norm(y) result == " << std::setprecision(2) << std::scientific << norm 
              << ", expected value is " << numGlobalRows * 8.0 << std::endl;
  }

  // Create the same diagonal operator using a Kokkos kernel
  auto kTwoOp = Tpetra::RTI::kernelOp<double>( ScaleKernel<double>(2.0), map );
  // y = 0.5*kTwop*x + 0.75*y = 0.5*2*1 + 0.75*8 = 7
  kTwoOp->apply( *x, *y, Teuchos::NO_TRANS, 0.5, 0.75 );
  // check that y == sevens
  norm = y->norm1();
  if (verbose) {
    std::cout << "Tpetra::RTI::kernelOp" << std::endl
              << "norm(y) result == " << std::setprecision(2) << std::scientific << norm 
              << ", expected value is " << numGlobalRows * 7.0 << std::endl;
  }

  //
  // Create a finite-difference stencil using a Kokkos kernel and non-trivial maps
  //
  decltype(map) colmap;
  if (numImages > 1) {
    Teuchos::Array<int>           colElements;
    Teuchos::ArrayView<const int> rowElements = map->getNodeElementList();
    // This isn't the most efficient Map/Import layout, but it makes for a very straight-forward kernel
    if (myImageID != 0) colElements.push_back( map->getMinGlobalIndex() - 1 );
    colElements.insert(colElements.end(), rowElements.begin(), rowElements.end());
    if (myImageID != numImages-1) colElements.push_back( map->getMaxGlobalIndex() + 1 );
    colmap = Tpetra::createNonContigMapWithNode<int,int>(colElements(), comm, node);
  }
  else {
    colmap = map;
  }
  auto importer = createImport(map,colmap);
  // Finite-difference kernel = tridiag(-1, 2, -1)
  FDStencil<double> kern(myImageID, numImages, map->getNodeNumElements(), -1.0, 2.0, -1.0);
  auto FDStencilOp = Tpetra::RTI::kernelOp<double>( kern, map, map, importer );
  // x = ones(), FD(x) = [1 zeros() 1]
  auto timeFDStencil = TimeMonitor::getNewTimer("FD RTI Stencil");
  {
    TimeMonitor lcltimer(*timeFDStencil);
    for (int l=0; l != numTimeTrials; ++l) {
      FDStencilOp->apply( *x, *y );
    }
  }
  norm = y->norm1();
  if (verbose) {
    std::cout << std::endl
              << "TpetraExamples::FDStencil" << std::endl
              << "norm(y) result == " << std::setprecision(2) << std::scientific << norm 
              << ", expected value is " << 2.0 << std::endl;
  }

  //
  // Create a finite-difference stencil using a CrsMatrix
  // 
  auto FDMatrix = Tpetra::createCrsMatrix<double>(map);  
  for (int r=map->getMinGlobalIndex(); r <= map->getMaxGlobalIndex(); ++r) {
    if (r == map->getMinAllGlobalIndex()) {
      FDMatrix->insertGlobalValues(r, Teuchos::tuple<int>(r,r+1), Teuchos::tuple<double>(2.0,-1.0));
    }
    else if (r == map->getMaxAllGlobalIndex()) {
      FDMatrix->insertGlobalValues(r, Teuchos::tuple<int>(r-1,r), Teuchos::tuple<double>(-1.0,2.0));
    }
    else {
      FDMatrix->insertGlobalValues(r, Teuchos::tuple<int>(r-1,r,r+1), Teuchos::tuple<double>(-1.0,2.0,-1.0));
    }
  }
  FDMatrix->fillComplete();
  auto timeFDMatrix = TimeMonitor::getNewTimer("FD CrsMatrix");
  {
    TimeMonitor lcltimer(*timeFDMatrix);
    for (int l=0; l != numTimeTrials; ++l) {
      FDMatrix->apply(*x, *y);
    }
  }

  //
  // Print timings
  //
  if (verbose) {
    std::cout << std::endl;
    TimeMonitor::summarize( std::cout );
  }

  if (verbose) {
    std::cout << "\nEnd Result: TEST PASSED" << std::endl;
  }
  return 0;
}
FCPtr BasisEvaluation::getTransformedValuesWithBasisValues(BasisPtr basis, Camellia::EOperator op,
                                                           constFCPtr referenceValues, int numCells,
                                                           BasisCache* basisCache)
{
  typedef FunctionSpaceTools fst;
  //  int numCells = cellJacobian.dimension(0);
  
  int spaceDim = basisCache->getSpaceDim(); // changed 3-21-16
  
  int componentOfInterest;
  Camellia::EFunctionSpace fs = basis->functionSpace();
  Intrepid::EOperator relatedOp = relatedOperator(op,fs,spaceDim, componentOfInterest);
  Teuchos::Array<int> dimensions;
  referenceValues->dimensions(dimensions);
  dimensions.insert(dimensions.begin(), numCells);
  Teuchos::RCP<FieldContainer<double> > transformedValues = Teuchos::rcp(new FieldContainer<double>(dimensions));
  bool vectorizedBasis = functionSpaceIsVectorized(fs);
  if (vectorizedBasis && (op ==  Camellia::OP_VALUE))
  {
    TEUCHOS_TEST_FOR_EXCEPTION( vectorizedBasis && (op ==  Camellia::OP_VALUE),
                               std::invalid_argument, "Vector HGRAD/HVOL with OP_VALUE not supported by getTransformedValuesWithBasisValues.  Please use getTransformedVectorValuesWithComponentBasisValues instead.");
  }
  switch(relatedOp)
  {
    case(Intrepid::OPERATOR_VALUE):
      switch(fs)
    {
      case Camellia::FUNCTION_SPACE_REAL_SCALAR:
        //          cout << "Reference values for FUNCTION_SPACE_REAL_SCALAR: " << *referenceValues;
      case Camellia::FUNCTION_SPACE_HGRAD:
      case Camellia::FUNCTION_SPACE_HGRAD_DISC:
        fst::HGRADtransformVALUE<double>(*transformedValues,*referenceValues);
        break;
      case Camellia::FUNCTION_SPACE_HCURL:
      case Camellia::FUNCTION_SPACE_HCURL_DISC:
        fst::HCURLtransformVALUE<double>(*transformedValues,basisCache->getJacobianInv(),*referenceValues);
        break;
      case Camellia::FUNCTION_SPACE_HDIV:
      case Camellia::FUNCTION_SPACE_HDIV_DISC:
      case Camellia::FUNCTION_SPACE_HDIV_FREE:
        fst::HDIVtransformVALUE<double>(*transformedValues,basisCache->getJacobian(),basisCache->getJacobianDet(),*referenceValues);
        break;
      case Camellia::FUNCTION_SPACE_HVOL:
      case Camellia::FUNCTION_SPACE_HVOL_SPACE_HGRAD_TIME:
      case Camellia::FUNCTION_SPACE_HGRAD_SPACE_HVOL_TIME:
        //        {
        //          static bool haveWarned = false;
        //          if (!haveWarned) {
        //            cout << "WARNING: for the moment, switching to the standard HVOLtransformVALUE method.\n";
        //            haveWarned = true;
        //          }
        //        }
        //          fst::HVOLtransformVALUE<double>(*transformedValues, cellJacobianDet, *referenceValues);
        // for the moment, use the fact that we know the HVOL basis is always an HGRAD basis:
        // (I think using the below amounts to solving for the HVOL variables scaled by Jacobian)
        fst::HGRADtransformVALUE<double>(*transformedValues,*referenceValues);
        break;
      default:
        TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument, "unhandled transformation");
        break;
    }
      break;
    case(Intrepid::OPERATOR_GRAD):
    case(Intrepid::OPERATOR_D1):
      switch(fs)
    {
      case Camellia::FUNCTION_SPACE_HVOL:
      case Camellia::FUNCTION_SPACE_HGRAD:
      case Camellia::FUNCTION_SPACE_HGRAD_DISC:
        fst::HGRADtransformGRAD<double>(*transformedValues,basisCache->getJacobianInv(),*referenceValues);
        break;
      case Camellia::FUNCTION_SPACE_VECTOR_HVOL:
      case Camellia::FUNCTION_SPACE_VECTOR_HGRAD:
      case Camellia::FUNCTION_SPACE_VECTOR_HGRAD_DISC:
        // referenceValues has dimensions (F,P,D1,D2).  D1 is our component dimension, and D2 is the one that came from the gradient.
        // HGRADtransformGRAD expects (F,P,D) for input, and (C,F,P,D) for output.
        // If we split referenceValues into (F,P,D1=0,D2) and (F,P,D1=1,D2), then we can transform each of those, and then interleave the results…
      {
        // block off so we can create new stuff inside the switch case
        Teuchos::Array<int> dimensions;
        referenceValues->dimensions(dimensions);
        int numFields = dimensions[0];
        int numPoints = dimensions[1];
        int D1 = dimensions[dimensions.size()-2];
        int D2 = dimensions[dimensions.size()-1];
        dimensions[dimensions.size()-2] = D2; // put D2 in the D1 spot
        dimensions.pop_back(); // get rid of original D2
        FieldContainer<double> refValuesSlice(dimensions);
        dimensions.insert(dimensions.begin(),numCells);
        FieldContainer<double> transformedValuesSlice(dimensions);
        
        //          int numEntriesPerSlice = refValuesSlice.size();
        //          int numEntriesPerTransformedSlice = transformedValuesSlice.size();
        
        for (int compIndex1=0; compIndex1<D1; compIndex1++)
        {
          // could speed the following along by doing the enumeration arithmetic in place...
          for (int fieldIndex=0; fieldIndex<numFields; fieldIndex++)
          {
            for (int ptIndex=0; ptIndex<numPoints; ptIndex++)
            {
              for (int compIndex2=0; compIndex2<D2; compIndex2++)
              {
                refValuesSlice(fieldIndex,ptIndex,compIndex2) = (*referenceValues)(fieldIndex,ptIndex,compIndex1,compIndex2);
              }
            }
          }
          
          //            for (int i=0; i<numEntriesPerSlice; i++) {
          //              refValuesSlice[i] = (*referenceValues)[i*D2 + compIndex];
          //            }
          fst::HGRADtransformGRAD<double>(transformedValuesSlice,basisCache->getJacobianInv(),refValuesSlice);
          // could speed the following along by doing the enumeration arithmetic in place...
          for (int cellIndex=0; cellIndex<numCells; cellIndex++)
          {
            for (int fieldIndex=0; fieldIndex<numFields; fieldIndex++)
            {
              for (int ptIndex=0; ptIndex<numPoints; ptIndex++)
              {
                for (int compIndex2=0; compIndex2<D2; compIndex2++)
                {
                  (*transformedValues)(cellIndex,fieldIndex,ptIndex,compIndex1,compIndex2) = transformedValuesSlice(cellIndex,fieldIndex,ptIndex,compIndex2);
                }
              }
            }
          }
          //            for (int i=0; i<numEntriesPerTransformedSlice; i++) {
          //              (*transformedValues)[i*D2 + compIndex] = transformedValuesSlice[i];
          //            }
        }
      }
        
        break;
      default:
        TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument, "unhandled transformation");
        break;
    }
      break;
    case(Intrepid::OPERATOR_CURL):
      switch(fs)
    {
      case Camellia::FUNCTION_SPACE_HCURL:
      case Camellia::FUNCTION_SPACE_HCURL_DISC:
        if (spaceDim == 2)
        {
          // TODO: confirm that this is correct
          //            static bool warningIssued = false;
          //            if ( ! warningIssued ) {
          //              cout << "WARNING: for HCURL in 2D, transforming with HVOLtransformVALUE. Need to confirm this is correct.\n";
          //              warningIssued = true;
          //            }
          fst::HVOLtransformVALUE<double>(*transformedValues,basisCache->getJacobianDet(), *referenceValues);
        }
        else
        {
          fst::HCURLtransformCURL<double>(*transformedValues,basisCache->getJacobian(),basisCache->getJacobianDet(),*referenceValues);
        }
        break;
      case Camellia::FUNCTION_SPACE_HGRAD:
      case Camellia::FUNCTION_SPACE_HGRAD_DISC:
        // in 2D, HGRADtransformCURL == HDIVtransformVALUE (because curl(H1) --> H(div))
        fst::HDIVtransformVALUE<double>(*transformedValues,basisCache->getJacobian(),basisCache->getJacobianDet(),*referenceValues);
        break;
      case Camellia::FUNCTION_SPACE_VECTOR_HVOL:
      case Camellia::FUNCTION_SPACE_VECTOR_HGRAD:
      case Camellia::FUNCTION_SPACE_VECTOR_HGRAD_DISC: // shouldn't take the transform so late
      default:
        TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument, "unhandled transformation");
        break;
    }
      break;
    case(Intrepid::OPERATOR_DIV):
      switch(fs)
    {
      case Camellia::FUNCTION_SPACE_HDIV:
      case Camellia::FUNCTION_SPACE_HDIV_DISC:
      case Camellia::FUNCTION_SPACE_HDIV_FREE:
        fst::HDIVtransformDIV<double>(*transformedValues,basisCache->getJacobianDet(),*referenceValues);
        break;
      case Camellia::FUNCTION_SPACE_VECTOR_HVOL:
      case Camellia::FUNCTION_SPACE_VECTOR_HGRAD:
      case Camellia::FUNCTION_SPACE_VECTOR_HGRAD_DISC: // shouldn't take the transform so late
      default:
        TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument, "unhandled transformation");
        break;
    }
      break;
    case(Intrepid::OPERATOR_D2):
      switch(fs)
    {
      case Camellia::FUNCTION_SPACE_HVOL:
      case Camellia::FUNCTION_SPACE_HGRAD:
      case Camellia::FUNCTION_SPACE_HGRAD_DISC:
      {
        // first term in the sum is:
        //      J^{-1} * OPD2 * J^{-T}
        // where J^{-1} is the inverse Jacabian, and OPD2 is the matrix of reference-space values formed from OP_D2
        
        const FieldContainer<double> *J_inv = &basisCache->getJacobianInv();
        transformedValues->initialize(0.0);
        int basisCardinality = basis->getCardinality();
        
        Teuchos::Array<int> multiplicities(spaceDim);
        Teuchos::Array<int> multiplicitiesTransformed(spaceDim);
        int dkCardinality = Intrepid::getDkCardinality(Intrepid::OPERATOR_D2, spaceDim);
        int numPoints = referenceValues->dimension(1);
        
        for (int cellOrdinal=0; cellOrdinal<numCells; cellOrdinal++)
        {
          for (int fieldOrdinal=0; fieldOrdinal<basisCardinality; fieldOrdinal++)
          {
            for (int pointOrdinal=0; pointOrdinal<numPoints; pointOrdinal++)
            {
              for (int dkOrdinal=0; dkOrdinal<dkCardinality; dkOrdinal++) // ref values dkOrdinal
              {
                double refValue = (*referenceValues)(fieldOrdinal,pointOrdinal,dkOrdinal);
                Intrepid::getDkMultiplicities(multiplicities, dkOrdinal, Intrepid::OPERATOR_D2, spaceDim);
                int k,l; // ref space coordinate directions for refValue (columns in J_inv)
                getD2_ij_FromMultiplicities(k,l,multiplicities);
                
                for (int dkOrdinalTransformed=0; dkOrdinalTransformed<dkCardinality; dkOrdinalTransformed++) // physical values dkOrdinal
                {
                  Intrepid::getDkMultiplicities(multiplicitiesTransformed, dkOrdinalTransformed, Intrepid::OPERATOR_D2, spaceDim);
                  int i,j; // physical space coordinate directions for refValue (rows in J_inv)
                  getD2_ij_FromMultiplicities(i,j,multiplicitiesTransformed);
                  double J_inv_ik = (*J_inv)(cellOrdinal,pointOrdinal,i,k);
                  double J_inv_jl = (*J_inv)(cellOrdinal,pointOrdinal,j,l);
                  (*transformedValues)(cellOrdinal,fieldOrdinal,pointOrdinal,dkOrdinalTransformed) += refValue * J_inv_ik * J_inv_jl;
                }
              }
            }
          }
        }
        
        // do we need the second term in the sum?  (So far, we don't support this)
        if (!basisCache->neglectHessian())
        {
          // then we need to do include the gradient of the basis times the (inverse) of the Hessian
          // this seems complicated, and we don't need it for computing the Laplacian in physical space
          // (at least not unless we have curvilinear geometry) so we don't support it for now...
          TEUCHOS_TEST_FOR_EXCEPTION(!basisCache->neglectHessian(), std::invalid_argument, "Support for the Hessian of the reference-to-physical mapping not yet implemented.");
        }
      }
        break;
      default:
        TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument, "unhandled transformation");
        break;
    }
      break;

    default:
      TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument, "unhandled transformation");
      break;
  }
  
  FCPtr result = getComponentOfInterest(transformedValues,op,fs,componentOfInterest);
  if ( result.get() == 0 )
  {
    result = transformedValues;
  }
  return result;
}
int main(int argc, char *argv[])
{
  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  // 
  // Get the default communicator and node
  //
  auto &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
  auto comm = platform.getComm();
  auto node = platform.getNode();
  const int myImageID = comm->getRank();
  const int numImages = comm->getSize();
  const bool verbose = (myImageID==0);

  // 
  // Say hello, print some communicator info
  //
  if (verbose) {
    std::cout << "\n" << Tpetra::version() << std::endl;
    std::cout << "Comm info: " << *comm;
    std::cout << "Node type: " << Teuchos::typeName(*node) << std::endl;
    std::cout << std::endl;
  }

  // 
  // Create a simple map for domain and range
  // 
  Tpetra::global_size_t numGlobalRows = 1000*numImages;
  auto map = Tpetra::createUniformContigMapWithNode<int,int>(numGlobalRows, comm, node);
  auto x = Tpetra::createVector<double>(map),
       y = Tpetra::createVector<double>(map);

  // Create a simple diagonal operator using lambda function
  auto fTwoOp = Tpetra::RTI::binaryOp<double>( [](double /*y*/, double x) { return 2.0 * x; } , map );
  // y = 3*fTwoOp*x + 2*y = 3*2*1 + 2*1 = 8
  x->putScalar(1.0);
  y->putScalar(1.0);
  fTwoOp->apply( *x, *y, Teuchos::NO_TRANS, 3.0, 2.0 );
  // check that y == eights
  double norm = y->norm1();
  if (verbose) {
    std::cout << "Tpetra::RTI::binaryOp" << std::endl
              << "norm(y) result == " << std::setprecision(2) << std::scientific << norm 
              << ", expected value is " << numGlobalRows * 8.0 << std::endl;
  }

  //
  // Create a finite-difference stencil using a Kokkos kernel and non-trivial maps
  //
  decltype(map) colmap;
  if (numImages > 1) {
    Teuchos::Array<int>           colElements;
    Teuchos::ArrayView<const int> rowElements = map->getNodeElementList();
    // This isn't the most efficient Map/Import layout, but it makes for a very straight-forward kernel
    if (myImageID != 0) colElements.push_back( map->getMinGlobalIndex() - 1 );
    colElements.insert(colElements.end(), rowElements.begin(), rowElements.end());
    if (myImageID != numImages-1) colElements.push_back( map->getMaxGlobalIndex() + 1 );
    colmap = Tpetra::createNonContigMapWithNode<int,int>(colElements(), comm, node);
  }
  else {
    colmap = map;
  }
  auto importer = createImport(map,colmap);
  // Finite-difference kernel = tridiag(-1, 2, -1)
  FDStencil<double> kern(myImageID, numImages, map->getNodeNumElements(), -1.0, 2.0, -1.0);
  auto FDStencilOp = Tpetra::RTI::kernelOp<double>( kern, map, map, importer );
  // x = ones(), FD(x) = [1 zeros() 1]
  FDStencilOp->apply( *x, *y );
  norm = y->norm1();
  if (verbose) {
    std::cout << std::endl
              << "TpetraExamples::FDStencil" << std::endl
              << "norm(y) result == " << std::setprecision(2) << std::scientific << norm 
              << ", expected value is " << 2.0 << std::endl;
  }

  std::cout << "\nEnd Result: TEST PASSED" << std::endl;
  return 0;
}
FCPtr BasisEvaluation::getTransformedValuesWithBasisValues(BasisPtr basis, Camellia::EOperator op, int spaceDim,
                                                           constFCPtr referenceValues, int numCells,
                                                           const FieldContainer<double> &cellJacobian,
                                                           const FieldContainer<double> &cellJacobianInv,
                                                           const FieldContainer<double> &cellJacobianDet)
{
  typedef FunctionSpaceTools fst;
//  int numCells = cellJacobian.dimension(0);
  
//  int spaceDim = basis->domainTopology()->getDimension(); // changed 2/18/15
//  // 6-16-14 NVR: getting the spaceDim from cellJacobian's dimensioning is the way we've historically done it.
//  // I think it might be better to do this using basis->domainTopology() generally, but for now we only make the
//  // switch in case the domain topology is a Node.
//  if (basis->domainTopology()->getDimension() == 0) {
//    spaceDim = 0;
//  } else {
//    spaceDim = cellJacobian.dimension(2);
//  }

  int componentOfInterest;
  Camellia::EFunctionSpace fs = basis->functionSpace();
  Intrepid::EOperator relatedOp = relatedOperator(op,fs,spaceDim, componentOfInterest);
  Teuchos::Array<int> dimensions;
  referenceValues->dimensions(dimensions);
  dimensions.insert(dimensions.begin(), numCells);
  Teuchos::RCP<FieldContainer<double> > transformedValues = Teuchos::rcp(new FieldContainer<double>(dimensions));
  bool vectorizedBasis = functionSpaceIsVectorized(fs);
  if (vectorizedBasis && (op ==  Camellia::OP_VALUE))
  {
    TEUCHOS_TEST_FOR_EXCEPTION( vectorizedBasis && (op ==  Camellia::OP_VALUE),
                                std::invalid_argument, "Vector HGRAD/HVOL with OP_VALUE not supported by getTransformedValuesWithBasisValues.  Please use getTransformedVectorValuesWithComponentBasisValues instead.");
  }
  switch(relatedOp)
  {
  case(Intrepid::OPERATOR_VALUE):
    switch(fs)
    {
    case Camellia::FUNCTION_SPACE_REAL_SCALAR:
    //          cout << "Reference values for FUNCTION_SPACE_REAL_SCALAR: " << *referenceValues;
    case Camellia::FUNCTION_SPACE_HGRAD:
    case Camellia::FUNCTION_SPACE_HGRAD_DISC:
      fst::HGRADtransformVALUE<double>(*transformedValues,*referenceValues);
      break;
    case Camellia::FUNCTION_SPACE_HCURL:
    case Camellia::FUNCTION_SPACE_HCURL_DISC:
      fst::HCURLtransformVALUE<double>(*transformedValues,cellJacobianInv,*referenceValues);
      break;
    case Camellia::FUNCTION_SPACE_HDIV:
    case Camellia::FUNCTION_SPACE_HDIV_DISC:
    case Camellia::FUNCTION_SPACE_HDIV_FREE:
      fst::HDIVtransformVALUE<double>(*transformedValues,cellJacobian,cellJacobianDet,*referenceValues);
        break;
      case Camellia::FUNCTION_SPACE_HVOL:
      case Camellia::FUNCTION_SPACE_HVOL_SPACE_HGRAD_TIME:
      case Camellia::FUNCTION_SPACE_HGRAD_SPACE_HVOL_TIME:
//        {
//          static bool haveWarned = false;
//          if (!haveWarned) {
//            cout << "WARNING: for the moment, switching to the standard HVOLtransformVALUE method.\n";
//            haveWarned = true;
//          }
//        }
//          fst::HVOLtransformVALUE<double>(*transformedValues, cellJacobianDet, *referenceValues);
      // for the moment, use the fact that we know the HVOL basis is always an HGRAD basis:
      // (I think using the below amounts to solving for the HVOL variables scaled by Jacobian)
      fst::HGRADtransformVALUE<double>(*transformedValues,*referenceValues);
      break;
    default:
      TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument, "unhandled transformation");
      break;
    }
    break;
  case(Intrepid::OPERATOR_GRAD):
    switch(fs)
    {
    case Camellia::FUNCTION_SPACE_HVOL:
    case Camellia::FUNCTION_SPACE_HGRAD:
    case Camellia::FUNCTION_SPACE_HGRAD_DISC:
      fst::HGRADtransformGRAD<double>(*transformedValues,cellJacobianInv,*referenceValues);
      break;
    case Camellia::FUNCTION_SPACE_VECTOR_HVOL:
    case Camellia::FUNCTION_SPACE_VECTOR_HGRAD:
    case Camellia::FUNCTION_SPACE_VECTOR_HGRAD_DISC:
      // referenceValues has dimensions (F,P,D1,D2).  D1 is our component dimension, and D2 is the one that came from the gradient.
      // HGRADtransformGRAD expects (F,P,D) for input, and (C,F,P,D) for output.
      // If we split referenceValues into (F,P,D1=0,D2) and (F,P,D1=1,D2), then we can transform each of those, and then interleave the results…
    {
      // block off so we can create new stuff inside the switch case
      Teuchos::Array<int> dimensions;
      referenceValues->dimensions(dimensions);
      int numFields = dimensions[0];
      int numPoints = dimensions[1];
      int D1 = dimensions[dimensions.size()-2];
      int D2 = dimensions[dimensions.size()-1];
      dimensions[dimensions.size()-2] = D2; // put D2 in the D1 spot
      dimensions.pop_back(); // get rid of original D2
      FieldContainer<double> refValuesSlice(dimensions);
      dimensions.insert(dimensions.begin(),numCells);
      FieldContainer<double> transformedValuesSlice(dimensions);

//          int numEntriesPerSlice = refValuesSlice.size();
//          int numEntriesPerTransformedSlice = transformedValuesSlice.size();

      for (int compIndex1=0; compIndex1<D1; compIndex1++)
      {
        // could speed the following along by doing the enumeration arithmetic in place...
        for (int fieldIndex=0; fieldIndex<numFields; fieldIndex++)
        {
          for (int ptIndex=0; ptIndex<numPoints; ptIndex++)
          {
            for (int compIndex2=0; compIndex2<D2; compIndex2++)
            {
              refValuesSlice(fieldIndex,ptIndex,compIndex2) = (*referenceValues)(fieldIndex,ptIndex,compIndex1,compIndex2);
            }
          }
        }

//            for (int i=0; i<numEntriesPerSlice; i++) {
//              refValuesSlice[i] = (*referenceValues)[i*D2 + compIndex];
//            }
        fst::HGRADtransformGRAD<double>(transformedValuesSlice,cellJacobianInv,refValuesSlice);
        // could speed the following along by doing the enumeration arithmetic in place...
        for (int cellIndex=0; cellIndex<numCells; cellIndex++)
        {
          for (int fieldIndex=0; fieldIndex<numFields; fieldIndex++)
          {
            for (int ptIndex=0; ptIndex<numPoints; ptIndex++)
            {
              for (int compIndex2=0; compIndex2<D2; compIndex2++)
              {
                (*transformedValues)(cellIndex,fieldIndex,ptIndex,compIndex1,compIndex2) = transformedValuesSlice(cellIndex,fieldIndex,ptIndex,compIndex2);
              }
            }
          }
        }
//            for (int i=0; i<numEntriesPerTransformedSlice; i++) {
//              (*transformedValues)[i*D2 + compIndex] = transformedValuesSlice[i];
//            }
      }
    }

    break;
    default:
      TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument, "unhandled transformation");
      break;
    }
    break;
  case(Intrepid::OPERATOR_CURL):
    switch(fs)
    {
    case Camellia::FUNCTION_SPACE_HCURL:
    case Camellia::FUNCTION_SPACE_HCURL_DISC:
      if (spaceDim == 2)
      {
        // TODO: confirm that this is correct
//            static bool warningIssued = false;
//            if ( ! warningIssued ) {
//              cout << "WARNING: for HCURL in 2D, transforming with HVOLtransformVALUE. Need to confirm this is correct.\n";
//              warningIssued = true;
//            }
        fst::HVOLtransformVALUE<double>(*transformedValues, cellJacobianDet, *referenceValues);
      }
      else
      {
        fst::HCURLtransformCURL<double>(*transformedValues,cellJacobian,cellJacobianDet,*referenceValues);
      }
      break;
    case Camellia::FUNCTION_SPACE_HGRAD:
    case Camellia::FUNCTION_SPACE_HGRAD_DISC:
      // in 2D, HGRADtransformCURL == HDIVtransformVALUE (because curl(H1) --> H(div))
      fst::HDIVtransformVALUE<double>(*transformedValues,cellJacobian,cellJacobianDet,*referenceValues);
      break;
    case Camellia::FUNCTION_SPACE_VECTOR_HVOL:
    case Camellia::FUNCTION_SPACE_VECTOR_HGRAD:
    case Camellia::FUNCTION_SPACE_VECTOR_HGRAD_DISC: // shouldn't take the transform so late
    default:
      TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument, "unhandled transformation");
      break;
    }
    break;
  case(Intrepid::OPERATOR_DIV):
    switch(fs)
    {
    case Camellia::FUNCTION_SPACE_HDIV:
    case Camellia::FUNCTION_SPACE_HDIV_DISC:
    case Camellia::FUNCTION_SPACE_HDIV_FREE:
      fst::HDIVtransformDIV<double>(*transformedValues,cellJacobianDet,*referenceValues);
      break;
    case Camellia::FUNCTION_SPACE_VECTOR_HVOL:
    case Camellia::FUNCTION_SPACE_VECTOR_HGRAD:
    case Camellia::FUNCTION_SPACE_VECTOR_HGRAD_DISC: // shouldn't take the transform so late
    default:
      TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument, "unhandled transformation");
      break;
    }
    break;
  default:
    TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument, "unhandled transformation");
    break;
  }

  FCPtr result = getComponentOfInterest(transformedValues,op,fs,componentOfInterest);
  if ( result.get() == 0 )
  {
    result = transformedValues;
  }
  return result;
}