Exemple #1
0
//**********************************************************************
TEUCHOS_UNIT_TEST(hessian_test,correctness)
{
  typedef InputConditionsEvaluator<panzer::Traits::Hessian,panzer::Traits> InputCondEval;
  typedef HessianTestEvaluator<panzer::Traits::Hessian,panzer::Traits> HessTestEval;
  typedef panzer::Traits::HessianType ScalarT;
  typedef Sacado::ScalarValue<ScalarT> Value;
 
  using Teuchos::RCP;
  using Teuchos::rcp;


  // the one and only evaluator
  Teuchos::ParameterList empty_pl;
  RCP<InputCondEval> ic_eval = rcp(new InputCondEval(empty_pl));
  RCP<HessTestEval> ht_eval = rcp(new HessTestEval(empty_pl));

  Teuchos::RCP<PHX::FieldManager<panzer::Traits> > fm
     = Teuchos::rcp(new PHX::FieldManager<panzer::Traits>); 
  fm->registerEvaluator<panzer::Traits::Hessian>(ic_eval);
  fm->registerEvaluator<panzer::Traits::Hessian>(ht_eval);
  fm->requireField<panzer::Traits::Hessian>(ht_eval->result.fieldTag());

  std::vector<PHX::index_size_type> derivative_dimensions;
  derivative_dimensions.push_back(4);
  fm->setKokkosExtendedDataTypeDimensions<panzer::Traits::Hessian>(derivative_dimensions);

  panzer::Traits::SetupData setupData;
  fm->postRegistrationSetup(setupData);

  panzer::Workset workset;
  panzer::Traits::PreEvalData preEvalData;

  fm->preEvaluate<panzer::Traits::Hessian>(preEvalData);
  fm->evaluateFields<panzer::Traits::Hessian>(workset);
  fm->postEvaluate<panzer::Traits::Hessian>(0);

  for(int i=0;i<5;i++) {
    double x  = Value::eval(ic_eval->x(i));
    double y  = Value::eval(ic_eval->y(i));
    double dx = Value::eval(ic_eval->dx(i));
    double dy = Value::eval(ic_eval->dy(i));
    double f = func(x,y);
    std::vector<double> hess = hess_func(x,y,dx,dy);

    ScalarT r = ht_eval->result(i);

    TEST_EQUALITY(Value::eval(r),f);
    TEST_EQUALITY(r.fastAccessDx(0).fastAccessDx(0),hess[0]);
    TEST_EQUALITY(r.fastAccessDx(1).fastAccessDx(0),hess[1]);
  }
}
Exemple #2
0
//**********************************************************************
TEUCHOS_UNIT_TEST(hessian_test_k,correctness)
{
  typedef HessianType ScalarT;
  typedef Sacado::ScalarValue<ScalarT> Value;
 
  using Teuchos::RCP;
  using Teuchos::rcp;


  double x_val = 0.25;
  double y_val = 0.5;
  double dx_val = 2.0;
  double dy_val = 3.0;

  Kokkos::View<ScalarT*> x("x",5);
  Kokkos::View<ScalarT*> y("y",5);
  Kokkos::View<ScalarT*> dx("dx",5);
  Kokkos::View<ScalarT*> dy("dy",5);
  Kokkos::View<ScalarT*> result("result",5);

  for(int i=0;i<5;++i) {
    dx(i) = ScalarT(dx_val);
    dy(i) = ScalarT(dy_val);
    x(i) = seed_second_deriv(2,0,x_val,dx_val);
    y(i) = seed_second_deriv(2,1,y_val,dy_val);
  }

  for(int i=0;i<5;++i)
    result(i) = std::sin(x(i)*y(i))+0.25*std::cos(y(i));

  for(int i=0;i<5;i++) {
    double x_val  = Value::eval(x(i));
    double y_val  = Value::eval(y(i));
    double dx_val = Value::eval(dx(i));
    double dy_val = Value::eval(dy(i));
    double f = func(x_val,y_val);
    std::vector<double> hess = hess_func(x_val,y_val,dx_val,dy_val);

    ScalarT r = result(i);

    TEST_EQUALITY(Value::eval(r),f);
    TEST_EQUALITY(r.fastAccessDx(0).fastAccessDx(0),hess[0]);
    TEST_EQUALITY(r.fastAccessDx(0).fastAccessDx(1),hess[1]);

    out << "RESULT = " << r << std::endl;
  }
}
void panzer::ScatterResidual_Epetra<panzer::Traits::Hessian, TRAITS,LO,GO>::
evaluateFields(typename TRAITS::EvalData workset)
{ 
   std::vector<int> cLIDs, rLIDs;
   std::vector<double> jacRow;

   bool useColumnIndexer = colGlobalIndexer_!=Teuchos::null;

   // for convenience pull out some objects from workset
   std::string blockId = this->wda(workset).block_id;
   const std::vector<std::size_t> & localCellIds = this->wda(workset).cell_local_ids;

   Teuchos::RCP<Epetra_Vector> r = epetraContainer_->get_f(); 
   Teuchos::RCP<Epetra_CrsMatrix> Jac = epetraContainer_->get_A();

   const Teuchos::RCP<const panzer::UniqueGlobalIndexer<LO,GO> >&
     colGlobalIndexer = useColumnIndexer ? colGlobalIndexer_ : globalIndexer_;
   
   // NOTE: A reordering of these loops will likely improve performance
   //       The "getGIDFieldOffsets" may be expensive.  However the
   //       "getElementGIDs" can be cheaper. However the lookup for LIDs
   //       may be more expensive!

   // scatter operation for each cell in workset
   for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) {
      std::size_t cellLocalId = localCellIds[worksetCellIndex];

      rLIDs = globalIndexer_->getElementLIDs(cellLocalId); 
      cLIDs = colGlobalIndexer->getElementLIDs(cellLocalId);
      if (Teuchos::nonnull(workset.other)) {
        const std::size_t other_cellLocalId = workset.other->cell_local_ids[worksetCellIndex];
        const std::vector<int> other_cLIDs = colGlobalIndexer->getElementLIDs(other_cellLocalId);
        cLIDs.insert(cLIDs.end(), other_cLIDs.begin(), other_cLIDs.end());
      }

      // loop over each field to be scattered
      for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
         int fieldNum = fieldIds_[fieldIndex];
         const std::vector<int> & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum);

         // loop over the basis functions (currently they are nodes)
         for(std::size_t rowBasisNum = 0; rowBasisNum < elmtOffset.size(); rowBasisNum++) {
            const ScalarT scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,rowBasisNum);
            int rowOffset = elmtOffset[rowBasisNum];
            int row = rLIDs[rowOffset];
    
            // loop over the sensitivity indices: all DOFs on a cell
            jacRow.resize(scatterField.size());
            
            for(int sensIndex=0;sensIndex<scatterField.size();++sensIndex)
              jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex).fastAccessDx(0);

            {
               int err = Jac->SumIntoMyValues(
                 row,
                 std::min(cLIDs.size(), static_cast<size_t>(scatterField.size())),
                 panzer::ptrFromStlVector(jacRow),
                 panzer::ptrFromStlVector(cLIDs));
               TEUCHOS_ASSERT_EQUALITY(err,0);
            }
         } // end rowBasisNum
      } // end fieldIndex
   }
}
void GatherSolution<PHAL::AlbanyTraits::Tangent, Traits>::
evaluateFields(typename Traits::EvalData workset)
{

  Teuchos::RCP<const Epetra_Vector> x = workset.x;
  Teuchos::RCP<const Epetra_Vector> xdot = workset.xdot;
  Teuchos::RCP<const Epetra_MultiVector> Vx = workset.Vx;
  Teuchos::RCP<const Epetra_MultiVector> Vxdot = workset.Vxdot;

  Teuchos::RCP<ParamVec> params = workset.params;
  int num_cols_tot = workset.param_offset + workset.num_cols_p;
  ScalarT* valptr;

  for (int cell=0; cell < workset.numCells; ++cell ) {
    const Teuchos::ArrayRCP<Teuchos::ArrayRCP<int> >& nodeID  = workset.wsElNodeEqID[cell];

    for (int node = 0; node < this->numNodes; ++node) {
      const Teuchos::ArrayRCP<int>& eqID  = nodeID[node];
      int n = 0, eq = 0;
      for (int j = eq; j < eq+this->numNodeVar; j++, ++n) {
        valptr = &(this->val[j])(cell,node);
        if (Vx != Teuchos::null && workset.j_coeff != 0.0) {
          *valptr = TanFadType(num_cols_tot, (*x)[eqID[n]]);
          for (int k=0; k<workset.num_cols_x; k++)
            valptr->fastAccessDx(k) = workset.j_coeff*(*Vx)[k][eqID[n]];
        }
        else
          *valptr = TanFadType((*x)[eqID[n]]);
      }
      eq += this->numNodeVar;
      for (int level = 0; level < this->numLevels; level++) { 
        for (int j = eq; j < eq+this->numVectorLevelVar; j++) {
          for (int dim = 0; dim < this->numDims; ++dim, ++n) {
            valptr = &(this->val[j])(cell,node,level,dim);
            if (Vx != Teuchos::null && workset.j_coeff != 0.0) {
              *valptr = TanFadType(num_cols_tot, (*x)[eqID[n]]);
              for (int k=0; k<workset.num_cols_x; k++)
                valptr->fastAccessDx(k) = workset.j_coeff*(*Vx)[k][eqID[n]];
            }
            else
              *valptr = TanFadType((*x)[eqID[n]]);
          }
        }
        for (int j = eq+this->numVectorLevelVar; 
                 j < eq+this->numVectorLevelVar+this->numScalarLevelVar; j++, ++n) {
          valptr = &(this->val[j])(cell,node,level);
          if (Vx != Teuchos::null && workset.j_coeff != 0.0) {
            *valptr = TanFadType(num_cols_tot, (*x)[eqID[n]]);
            for (int k=0; k<workset.num_cols_x; k++)
              valptr->fastAccessDx(k) = workset.j_coeff*(*Vx)[k][eqID[n]];
          }
          else
            *valptr = TanFadType((*x)[eqID[n]]);
        }
      }
      eq += this->numVectorLevelVar+this->numScalarLevelVar;
      for (int level = 0; level < this->numLevels; ++level) { 
        for (int j = eq; j < eq+this->numTracerVar; ++j, ++n) {
          valptr = &(this->val[j])(cell,node,level);
          if (Vx != Teuchos::null && workset.j_coeff != 0.0) {
            *valptr = TanFadType(num_cols_tot, (*x)[eqID[n]]);
            for (int k=0; k<workset.num_cols_x; k++)
              valptr->fastAccessDx(k) = workset.j_coeff*(*Vx)[k][eqID[n]];
          }
          else
            *valptr = TanFadType((*x)[eqID[n]]);
        }
      }
      eq += this->numTracerVar;
      if (workset.transientTerms) {
        int n = 0, eq = 0;
        for (int j = eq; j < eq+this->numNodeVar; j++, ++n) {
          valptr = &(this->val_dot[j])(cell,node);
          if (Vxdot != Teuchos::null && workset.m_coeff != 0.0) {
            *valptr = TanFadType(num_cols_tot, (*xdot)[eqID[n]]);
            for (int k=0; k<workset.num_cols_x; k++)
              valptr->fastAccessDx(k) =
                workset.m_coeff*(*Vxdot)[k][eqID[n]];
          }
          else
            *valptr = TanFadType((*xdot)[eqID[n]]);
        }
        eq += this->numNodeVar;
        for (int level = 0; level < this->numLevels; level++) { 
          for (int j = eq; j < eq+this->numVectorLevelVar; j++) {
            for (int dim = 0; dim < this->numDims; ++dim, ++n) {
              valptr = &(this->val_dot[j])(cell,node,level,dim);
              if (Vxdot != Teuchos::null && workset.m_coeff != 0.0) {
                *valptr = TanFadType(num_cols_tot, (*xdot)[eqID[n]]);
                for (int k=0; k<workset.num_cols_x; k++)
                  valptr->fastAccessDx(k) =
                    workset.m_coeff*(*Vxdot)[k][eqID[n]];
              }
              else
                *valptr = TanFadType((*xdot)[eqID[n]]);
            } 
          }     
          for (int j = eq+this->numVectorLevelVar; 
                   j < eq+this->numScalarLevelVar+this->numScalarLevelVar; j++,++n) {
            valptr = &(this->val_dot[j])(cell,node,level);
            if (Vxdot != Teuchos::null && workset.m_coeff != 0.0) {
              *valptr = TanFadType(num_cols_tot, (*xdot)[eqID[n]]);
              for (int k=0; k<workset.num_cols_x; k++)
                valptr->fastAccessDx(k) =
                  workset.m_coeff*(*Vxdot)[k][eqID[n]];
            }
            else
              *valptr = TanFadType((*xdot)[eqID[n]]);
          }
        }
        eq += this->numVectorLevelVar+this->numScalarLevelVar;
        for (int level = 0; level < this->numLevels; ++level) { 
          for (int j = eq; j < eq+this->numTracerVar; ++j, ++n) {
            valptr = &(this->val_dot[j])(cell,node,level);
            if (Vxdot != Teuchos::null && workset.m_coeff != 0.0) {
              *valptr = TanFadType(num_cols_tot, (*xdot)[eqID[n]]);
              for (int k=0; k<workset.num_cols_x; k++)
                valptr->fastAccessDx(k) =
                  workset.m_coeff*(*Vxdot)[k][eqID[n]];
            }
            else
              *valptr = TanFadType((*xdot)[eqID[n]]);
          }
        }
        eq += this->numTracerVar;
      }
    }
  }
}
void GatherSolution<PHAL::AlbanyTraits::Jacobian, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  const Teuchos::RCP<const Epetra_Vector>    x = workset.x;
  const Teuchos::RCP<const Epetra_Vector> xdot = workset.xdot;

  for (int cell=0; cell < workset.numCells; ++cell ) {
    const Teuchos::ArrayRCP<Teuchos::ArrayRCP<int> >& nodeID  = workset.wsElNodeEqID[cell];
    const int neq = nodeID[0].size();
    const int num_dof = neq * this->numNodes;


    for (int node = 0; node < this->numNodes; ++node) {
      const Teuchos::ArrayRCP<int>& eqID  = nodeID[node];
      const int firstunk = neq * node;
      int n = 0, eq = 0;
      for (int j = eq; j < eq+this->numNodeVar; ++j, ++n) {
        ScalarT* valptr = &(this->val[j])(cell,node);
        *valptr = FadType(num_dof, (*x)[eqID[n]]);
        valptr->setUpdateValue(!workset.ignore_residual);
        valptr->fastAccessDx(firstunk + n) = workset.j_coeff;
      }
      eq += this->numNodeVar;
      for (int level = 0; level < this->numLevels; level++) { 
        for (int j = eq; j < eq+this->numVectorLevelVar; j++) {
          for (int dim = 0; dim < this->numDims; ++dim, ++n) {
            ScalarT* valptr = &(this->val[j])(cell,node,level,dim);
            *valptr = FadType(num_dof, (*x)[eqID[n]]);
            valptr->setUpdateValue(!workset.ignore_residual);
            valptr->fastAccessDx(firstunk + n) = workset.j_coeff;
          } 
        }
        for (int j = eq+this->numVectorLevelVar; 
                 j < eq+this->numVectorLevelVar+this->numScalarLevelVar; ++j,++n) {
          ScalarT* valptr = &(this->val[j])(cell,node,level);
          *valptr = FadType(num_dof, (*x)[eqID[n]]);
          valptr->setUpdateValue(!workset.ignore_residual);
          valptr->fastAccessDx(firstunk + n) = workset.j_coeff;
        }
      }
      eq += this->numVectorLevelVar+this->numScalarLevelVar;
      for (int level = 0; level < this->numLevels; ++level) { 
        for (int j = eq; j < eq+this->numTracerVar; ++j, ++n) {
          ScalarT* valptr = &(this->val[j])(cell,node,level);
          *valptr = FadType(num_dof, (*x)[eqID[n]]);
          valptr->setUpdateValue(!workset.ignore_residual);
          valptr->fastAccessDx(firstunk + n) = workset.j_coeff;
        }
      }
      eq += this->numTracerVar;

      if (workset.transientTerms) {
        int n = 0, eq = 0;
        for (int j = eq; j < eq+this->numNodeVar; ++j, ++n) {
          ScalarT* valptr = &(this->val_dot[j])(cell,node);
          *valptr = FadType(num_dof, (*xdot)[eqID[n]]);
          valptr->fastAccessDx(firstunk + n) = workset.m_coeff;
        }
        eq += this->numNodeVar;
        for (int level = 0; level < this->numLevels; level++) { 
          for (int j = eq; j < eq+this->numVectorLevelVar; j++) {
            for (int dim = 0; dim < this->numDims; ++dim, ++n) {
              ScalarT* valptr = &(this->val_dot[j])(cell,node,level,dim);
              *valptr = FadType(num_dof, (*xdot)[eqID[n]]);
              valptr->fastAccessDx(firstunk + n) = workset.m_coeff;
            }
          }
          for (int j = eq+this->numVectorLevelVar; 
                   j < eq+this->numVectorLevelVar+this->numScalarLevelVar; j++,++n) {
            ScalarT* valptr = &(this->val_dot[j])(cell,node,level);
            *valptr = FadType(num_dof, (*xdot)[eqID[n]]);
            valptr->fastAccessDx(firstunk + n) = workset.m_coeff;
          }
        }
        eq += this->numVectorLevelVar+this->numScalarLevelVar;
        for (int level = 0; level < this->numLevels; ++level) { 
          for (int j = eq; j < eq+this->numTracerVar; ++j, ++n) {
            ScalarT* valptr = &(this->val_dot[j])(cell,node,level);
            *valptr = FadType(num_dof, (*xdot)[eqID[n]]);
            valptr->fastAccessDx(firstunk + n) = workset.m_coeff;
          }
        }
        eq += this->numTracerVar;
      }
    }
  }
}
void panzer::ScatterDirichletResidual_Tpetra<panzer::Traits::Jacobian, TRAITS,LO,GO,NodeT>::
evaluateFields(typename TRAITS::EvalData workset)
{ 
   std::vector<GO> GIDs;
 
   // for convenience pull out some objects from workset
   std::string blockId = workset.block_id;
   const std::vector<std::size_t> & localCellIds = workset.cell_local_ids;

   Teuchos::RCP<typename LOC::VectorType> r = tpetraContainer_->get_f(); 
   Teuchos::RCP<typename LOC::CrsMatrixType> Jac = tpetraContainer_->get_A();

   Teuchos::ArrayRCP<double> r_array = r->get1dViewNonConst();
   Teuchos::ArrayRCP<double> dc_array = dirichletCounter_->get1dViewNonConst();

   // NOTE: A reordering of these loops will likely improve performance
   //       The "getGIDFieldOffsets may be expensive.  However the
   //       "getElementGIDs" can be cheaper. However the lookup for LIDs
   //       may be more expensive!

   // scatter operation for each cell in workset
   for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) {
      std::size_t cellLocalId = localCellIds[worksetCellIndex];

      globalIndexer_->getElementGIDs(cellLocalId,GIDs); 
      const std::vector<LO> & LIDs = globalIndexer_->getElementLIDs(cellLocalId); 

      // loop over each field to be scattered
      for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
         int fieldNum = fieldIds_[fieldIndex];
   
         // this call "should" get the right ordering according to the Intrepid basis
         const std::pair<std::vector<int>,std::vector<int> > & indicePair 
               = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_);
         const std::vector<int> & elmtOffset = indicePair.first;
         const std::vector<int> & basisIdMap = indicePair.second;
   
         // loop over basis functions
         for(std::size_t basis=0;basis<elmtOffset.size();basis++) {
            int offset = elmtOffset[basis];
            LO lid = LIDs[offset];
            if(lid<0) // not on this processor
               continue;

            int basisId = basisIdMap[basis];

            if (checkApplyBC_)
              if (!applyBC_[fieldIndex](worksetCellIndex,basisId))
                continue;

            // zero out matrix row
            {
               std::size_t sz = Jac->getNumEntriesInLocalRow(lid);
               std::size_t numEntries = 0;
               Teuchos::Array<LO> rowIndices(sz);
               Teuchos::Array<double> rowValues(sz);

               // Jac->getLocalRowView(lid,numEntries,rowValues,rowIndices);
               Jac->getLocalRowCopy(lid,rowIndices,rowValues,numEntries);

               for(std::size_t i=0;i<numEntries;i++)
                  rowValues[i] = 0.0;

               Jac->replaceLocalValues(lid,rowIndices,rowValues);
            }
 
            GO gid = GIDs[offset];
            const ScalarT scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,basisId);
    
            r_array[lid] = scatterField.val();
            dc_array[lid] = 1.0; // mark row as dirichlet
    
            // loop over the sensitivity indices: all DOFs on a cell
            std::vector<double> jacRow(scatterField.size(),0.0);
    
            for(int sensIndex=0;sensIndex<scatterField.size();++sensIndex)
               jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex);
            TEUCHOS_ASSERT(jacRow.size()==GIDs.size());
    
            Jac->replaceGlobalValues(gid, GIDs, jacRow);
         }
      }
   }
}
void panzer::ScatterDirichletResidual_BlockedTpetra<panzer::Traits::Jacobian, TRAITS,LO,GO,NodeT>::
evaluateFields(typename TRAITS::EvalData workset)
{
    using Teuchos::RCP;
    using Teuchos::ArrayRCP;
    using Teuchos::ptrFromRef;
    using Teuchos::rcp_dynamic_cast;

    using Thyra::VectorBase;
    using Thyra::SpmdVectorBase;
    using Thyra::ProductVectorBase;
    using Thyra::BlockedLinearOpBase;

    std::vector<std::pair<int,GO> > GIDs;
    std::vector<LO> LIDs;

    // for convenience pull out some objects from workset
    std::string blockId = this->wda(workset).block_id;
    const std::vector<std::size_t> & localCellIds = this->wda(workset).cell_local_ids;

    RCP<ProductVectorBase<double> > r = rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_f());
    Teuchos::RCP<BlockedLinearOpBase<double> > Jac = rcp_dynamic_cast<BlockedLinearOpBase<double> >(blockedContainer_->get_A());

    int numFieldBlocks = globalIndexer_->getNumFieldBlocks();
    std::vector<int> blockOffsets(numFieldBlocks+1); // number of fields, plus a sentinnel
    for(int blk=0; blk<numFieldBlocks; blk++) {
        int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk);
        blockOffsets[blk] = blockOffset;
    }

    std::unordered_map<std::pair<int,int>,Teuchos::RCP<CrsMatrixType>,panzer::pair_hash> jacTpetraBlocks;

    // NOTE: A reordering of these loops will likely improve performance
    //       The "getGIDFieldOffsets may be expensive.  However the
    //       "getElementGIDs" can be cheaper. However the lookup for LIDs
    //       may be more expensive!

    // scatter operation for each cell in workset
    for(std::size_t worksetCellIndex=0; worksetCellIndex<localCellIds.size(); ++worksetCellIndex) {
        std::size_t cellLocalId = localCellIds[worksetCellIndex];

        globalIndexer_->getElementGIDs(cellLocalId,GIDs);
        blockOffsets[numFieldBlocks] = GIDs.size();

        // caculate the local IDs for this element
        LIDs.resize(GIDs.size());
        for(std::size_t i=0; i<GIDs.size(); i++) {
            // used for doing local ID lookups
            RCP<const MapType> r_map = blockedContainer_->getMapForBlock(GIDs[i].first);

            LIDs[i] = r_map->getLocalElement(GIDs[i].second);
        }

        // loop over each field to be scattered
        Teuchos::ArrayRCP<double> local_r, local_dc;
        for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
            int fieldNum = fieldIds_[fieldIndex];
            int blockRowIndex = globalIndexer_->getFieldBlock(fieldNum);

            RCP<SpmdVectorBase<double> > dc = rcp_dynamic_cast<SpmdVectorBase<double> >(dirichletCounter_->getNonconstVectorBlock(blockRowIndex));
            dc->getNonconstLocalData(ptrFromRef(local_dc));

            // grab local data for inputing
            RCP<SpmdVectorBase<double> > block_r = rcp_dynamic_cast<SpmdVectorBase<double> >(r->getNonconstVectorBlock(blockRowIndex));
            block_r->getNonconstLocalData(ptrFromRef(local_r));

            // this call "should" get the right ordering according to the Intrepid basis
            const std::pair<std::vector<int>,std::vector<int> > & indicePair
                = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_);
            const std::vector<int> & elmtOffset = indicePair.first;
            const std::vector<int> & basisIdMap = indicePair.second;

            // loop over basis functions
            for(std::size_t basis=0; basis<elmtOffset.size(); basis++) {
                int offset = elmtOffset[basis];
                int lid = LIDs[offset];
                if(lid<0) // not on this processor
                    continue;

                int basisId = basisIdMap[basis];

                if (checkApplyBC_)
                    if (!applyBC_[fieldIndex](worksetCellIndex,basisId))
                        continue;

                // zero out matrix row
                for(int blockColIndex=0; blockColIndex<numFieldBlocks; blockColIndex++) {
                    int start = blockOffsets[blockColIndex];
                    int end = blockOffsets[blockColIndex+1];

                    if(end-start<=0)
                        continue;

                    // check hash table for jacobian sub block
                    std::pair<int,int> blockIndex = std::make_pair(blockRowIndex,blockColIndex);
                    Teuchos::RCP<CrsMatrixType> subJac = jacTpetraBlocks[blockIndex];

                    // if you didn't find one before, add it to the hash table
                    if(subJac==Teuchos::null) {
                        Teuchos::RCP<Thyra::LinearOpBase<double> > tOp = Jac->getNonconstBlock(blockIndex.first,blockIndex.second);

                        // block operator is null, don't do anything (it is excluded)
                        if(Teuchos::is_null(tOp))
                            continue;

                        Teuchos::RCP<OperatorType> tpetra_Op = rcp_dynamic_cast<ThyraLinearOp>(tOp)->getTpetraOperator();
                        subJac = rcp_dynamic_cast<CrsMatrixType>(tpetra_Op,true);
                        jacTpetraBlocks[blockIndex] = subJac;
                    }

                    std::size_t sz = subJac->getNumEntriesInLocalRow(lid);
                    std::size_t numEntries = 0;
                    Teuchos::Array<LO> rowIndices(sz);
                    Teuchos::Array<double> rowValues(sz);

                    subJac->getLocalRowCopy(lid,rowIndices,rowValues,numEntries);

                    for(std::size_t i=0; i<numEntries; i++)
                        rowValues[i] = 0.0;

                    subJac->replaceLocalValues(lid,rowIndices,rowValues);
                }

                const ScalarT scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,basisId);

                local_r[lid] = scatterField.val();
                local_dc[lid] = 1.0; // mark row as dirichlet

                // loop over the sensitivity indices: all DOFs on a cell
                std::vector<double> jacRow(scatterField.size(),0.0);

                for(int sensIndex=0; sensIndex<scatterField.size(); ++sensIndex)
                    jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex);
                TEUCHOS_ASSERT(jacRow.size()==GIDs.size());

                for(int blockColIndex=0; blockColIndex<numFieldBlocks; blockColIndex++) {
                    int start = blockOffsets[blockColIndex];
                    int end = blockOffsets[blockColIndex+1];

                    if(end-start<=0)
                        continue;

                    // check hash table for jacobian sub block
                    std::pair<int,int> blockIndex = std::make_pair(blockRowIndex,blockColIndex);
                    Teuchos::RCP<CrsMatrixType> subJac = jacTpetraBlocks[blockIndex];

                    // if you didn't find one before, add it to the hash table
                    if(subJac==Teuchos::null) {
                        Teuchos::RCP<Thyra::LinearOpBase<double> > tOp = Jac->getNonconstBlock(blockIndex.first,blockIndex.second);

                        // block operator is null, don't do anything (it is excluded)
                        if(Teuchos::is_null(tOp))
                            continue;

                        Teuchos::RCP<OperatorType> tpetra_Op = rcp_dynamic_cast<ThyraLinearOp>(tOp)->getTpetraOperator();
                        subJac = rcp_dynamic_cast<CrsMatrixType>(tpetra_Op,true);
                        jacTpetraBlocks[blockIndex] = subJac;
                    }

                    // Sum Jacobian
                    subJac->replaceLocalValues(lid, Teuchos::arrayViewFromVector(LIDs).view(start,end-start),
                                               Teuchos::arrayViewFromVector(jacRow).view(start,end-start));
                }
            }
        }
    }
}
void panzer::ScatterResidual_BlockedEpetra<panzer::Traits::Jacobian, TRAITS,LO,GO>::
evaluateFields(typename TRAITS::EvalData workset)
{ 
   using Teuchos::RCP;
   using Teuchos::ArrayRCP;
   using Teuchos::ptrFromRef;
   using Teuchos::rcp_dynamic_cast;

   using Thyra::VectorBase;
   using Thyra::SpmdVectorBase;
   using Thyra::ProductVectorBase;
   using Thyra::BlockedLinearOpBase;

   typedef BlockedEpetraLinearObjContainer BLOC;

   std::vector<std::pair<int,GO> > GIDs;
   std::vector<LO> LIDs;
   std::vector<double> jacRow;

   // for convenience pull out some objects from workset
   std::string blockId = this->wda(workset).block_id;
   const std::vector<std::size_t> & localCellIds = this->wda(workset).cell_local_ids;

   RCP<const BLOC> blockedContainer = blockedContainer_;

   RCP<ProductVectorBase<double> > r = rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer->get_f());
   Teuchos::RCP<BlockedLinearOpBase<double> > Jac = rcp_dynamic_cast<BlockedLinearOpBase<double> >(blockedContainer->get_A());

   int numFieldBlocks = globalIndexer_->getNumFieldBlocks();
   std::vector<int> blockOffsets(numFieldBlocks+1); // number of fields, plus a sentinnel
   for(int blk=0;blk<numFieldBlocks;blk++) {
      int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk);
      blockOffsets[blk] = blockOffset;
   }

   std::unordered_map<std::pair<int,int>,Teuchos::RCP<Epetra_CrsMatrix>,panzer::pair_hash> jacEpetraBlocks;

   // NOTE: A reordering of these loops will likely improve performance
   //       The "getGIDFieldOffsets" may be expensive.  However the
   //       "getElementGIDs" can be cheaper. However the lookup for LIDs
   //       may be more expensive!

   // scatter operation for each cell in workset
   for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) {
      std::size_t cellLocalId = localCellIds[worksetCellIndex];

      globalIndexer_->getElementGIDs(cellLocalId,GIDs,blockId); 

      // caculate the local IDs for this element
      LIDs.resize(GIDs.size());
      for(std::size_t i=0;i<GIDs.size();i++) {
         // used for doing local ID lookups
         RCP<const Epetra_Map> r_map = blockedContainer->getMapForBlock(GIDs[i].first);

         LIDs[i] = r_map->LID(GIDs[i].second);
      }

      // loop over each field to be scattered
      Teuchos::ArrayRCP<double> local_r;
      for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
         int fieldNum = fieldIds_[fieldIndex];
         int blockRowIndex = globalIndexer_->getFieldBlock(fieldNum);

         // grab local data for inputing
         if(r!=Teuchos::null) {
            RCP<SpmdVectorBase<double> > block_r = rcp_dynamic_cast<SpmdVectorBase<double> >(r->getNonconstVectorBlock(blockRowIndex));
            block_r->getNonconstLocalData(ptrFromRef(local_r));
         }

         const std::vector<int> & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum);
        
         // loop over the basis functions (currently they are nodes)
         for(std::size_t rowBasisNum = 0; rowBasisNum < elmtOffset.size(); rowBasisNum++) {
            const ScalarT scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,rowBasisNum);
            int rowOffset = elmtOffset[rowBasisNum];
            int r_lid = LIDs[rowOffset];
    
            // Sum residual
            if(local_r!=Teuchos::null)
               local_r[r_lid] += (scatterField.val());

            blockOffsets[numFieldBlocks] = scatterField.size(); // add the sentinel
            // loop over the sensitivity indices: all DOFs on a cell
            jacRow.resize(scatterField.size());
  
            // For Neumann conditions with no dependence on degrees of freedom, there should be no Jacobian contribution
            if(scatterField.size() == 0)
                continue;
 
            for(int sensIndex=0;sensIndex<scatterField.size();++sensIndex) {
               jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex);
            }
    
            for(int blockColIndex=0;blockColIndex<numFieldBlocks;blockColIndex++) {
               int start = blockOffsets[blockColIndex];
               int end = blockOffsets[blockColIndex+1];

               if(end-start<=0) 
                  continue;

               // check hash table for jacobian sub block
               std::pair<int,int> blockIndex = std::make_pair(blockRowIndex,blockColIndex);
               Teuchos::RCP<Epetra_CrsMatrix> subJac = jacEpetraBlocks[blockIndex];

               // if you didn't find one before, add it to the hash table
               if(subJac==Teuchos::null) {
                  Teuchos::RCP<Thyra::LinearOpBase<double> > tOp = Jac->getNonconstBlock(blockIndex.first,blockIndex.second); 

                  // block operator is null, don't do anything (it is excluded)
                  if(Teuchos::is_null(tOp))
                     continue;

                  Teuchos::RCP<Epetra_Operator> eOp = Thyra::get_Epetra_Operator(*tOp);
                  subJac = rcp_dynamic_cast<Epetra_CrsMatrix>(eOp,true);
                  jacEpetraBlocks[blockIndex] = subJac;
               }

               // Sum Jacobian
               int err = subJac->SumIntoMyValues(r_lid, end-start, &jacRow[start],&LIDs[start]);
               if(err!=0) {
                 RCP<const Epetra_Map> rr = blockedContainer->getMapForBlock(GIDs[start].first);
                 bool sameColMap = subJac->ColMap().SameAs(*rr);

                 std::stringstream ss;
                 ss << "Failed inserting row: " << GIDs[rowOffset].second << " (" << r_lid << "): ";
                 for(int i=start;i<end;i++)
                   ss << GIDs[i].second << " (" << LIDs[i] << ") ";
                 ss << std::endl;
                 ss << "Into block " << blockRowIndex << ", " << blockColIndex << std::endl;

                 ss << "scatter field = ";
                 scatterFields_[fieldIndex].print(ss);
                 ss << std::endl;

                 ss << "Same map = " << (sameColMap ? "true" : "false") << std::endl; 
                 
                 TEUCHOS_TEST_FOR_EXCEPTION(err!=0,std::runtime_error,ss.str());
               }
            }
         } // end rowBasisNum
      } // end fieldIndex
   }
}
void Response_Functional<panzer::Traits::Tangent>::
scatterResponse()
{
  const int n = value.size();
  const int num_deriv = this->numDeriv();
  TEUCHOS_ASSERT(n == 0 || n == num_deriv);
  ScalarT glbValue = ScalarT(num_deriv, 0.0);

  // do global summation -- it is possible to do the reduceAll() on the Fad's directly, but it is somewhat
  // complicated for DFad (due to temporaries that might get created).  Since this is just a sum, it is
  // easier to do the reduction for each value and derivative component.
  Teuchos::reduceAll(*this->getComm(), Teuchos::REDUCE_SUM, Thyra::Ordinal(1), &value.val(), &glbValue.val());
  if (num_deriv > 0)
    Teuchos::reduceAll(*this->getComm(), Teuchos::REDUCE_SUM, Thyra::Ordinal(n), value.dx(),  &glbValue.fastAccessDx(0));

  value = glbValue;

  // copy data in vectors
  if(this->useEpetra()) {
    // use epetra
    Epetra_MultiVector& deriv = this->getEpetraMultiVector();
    for (int i=0; i<num_deriv; ++i)
      deriv[i][0] = glbValue.dx(i);
  }
  else {
    // use thyra
    TEUCHOS_ASSERT(this->useThyra());
    Thyra::ArrayRCP< Thyra::ArrayRCP<double> > deriv = this->getThyraMultiVector();
    for (int i=0; i<num_deriv; ++i)
      deriv[i][0] = glbValue.dx(i);
  }
}
void panzer::ScatterDirichletResidual_Tpetra<panzer::Traits::Tangent, TRAITS,LO,GO,NodeT>::
evaluateFields(typename TRAITS::EvalData workset)
{
   std::vector<GO> GIDs;
   std::vector<LO> LIDs;

   // for convenience pull out some objects from workset
   std::string blockId = this->wda(workset).block_id;
   const std::vector<std::size_t> & localCellIds = this->wda(workset).cell_local_ids;

   Teuchos::RCP<typename LOC::VectorType> r = (!scatterIC_) ?
     tpetraContainer_->get_f() :
     tpetraContainer_->get_x();

   Teuchos::ArrayRCP<double> r_array = r->get1dViewNonConst();
   Teuchos::ArrayRCP<double> dc_array = dirichletCounter_->get1dViewNonConst();

   // NOTE: A reordering of these loops will likely improve performance
   //       The "getGIDFieldOffsets may be expensive.  However the
   //       "getElementGIDs" can be cheaper. However the lookup for LIDs
   //       may be more expensive!


   // scatter operation for each cell in workset
   for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) {
      std::size_t cellLocalId = localCellIds[worksetCellIndex];

      globalIndexer_->getElementGIDs(cellLocalId,GIDs);

      // caculate the local IDs for this element
      LIDs.resize(GIDs.size());
      for(std::size_t i=0;i<GIDs.size();i++)
         LIDs[i] = r->getMap()->getLocalElement(GIDs[i]);

      // loop over each field to be scattered
      for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
         int fieldNum = fieldIds_[fieldIndex];

         if (!scatterIC_) {
           // this call "should" get the right ordering according to the Intrepid2 basis
           const std::pair<std::vector<int>,std::vector<int> > & indicePair
             = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_);
           const std::vector<int> & elmtOffset = indicePair.first;
           const std::vector<int> & basisIdMap = indicePair.second;

           // loop over basis functions
           for(std::size_t basis=0;basis<elmtOffset.size();basis++) {
             int offset = elmtOffset[basis];
             LO lid = LIDs[offset];
             if(lid<0) // not on this processor!
               continue;

             int basisId = basisIdMap[basis];

             if (checkApplyBC_)
               if (!applyBC_[fieldIndex](worksetCellIndex,basisId))
                 continue;

             ScalarT value = (scatterFields_[fieldIndex])(worksetCellIndex,basisId);
             //r_array[lid] = (scatterFields_[fieldIndex])(worksetCellIndex,basisId).val();

             // then scatter the sensitivity vectors
             if(value.size()==0)
               for(std::size_t d=0;d<dfdp_vectors_.size();d++)
                 dfdp_vectors_[d][lid] = 0.0;
             else
               for(int d=0;d<value.size();d++) {
                 dfdp_vectors_[d][lid] = value.fastAccessDx(d);
               }

             // record that you set a dirichlet condition
             dc_array[lid] = 1.0;
           }
         } else {
           // this call "should" get the right ordering according to the Intrepid2 basis
           const std::vector<int> & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum);

           // loop over basis functions
           for(std::size_t basis=0;basis<elmtOffset.size();basis++) {
             int offset = elmtOffset[basis];
             LO lid = LIDs[offset];
             if(lid<0) // not on this processor!
               continue;

             ScalarT value = (scatterFields_[fieldIndex])(worksetCellIndex,basis);
             //r_array[lid] = (scatterFields_[fieldIndex])(worksetCellIndex,basis).val();

             // then scatter the sensitivity vectors
             if(value.size()==0)
               for(std::size_t d=0;d<dfdp_vectors_.size();d++)
                 dfdp_vectors_[d][lid] = 0.0;
             else
               for(int d=0;d<value.size();d++) {
                 dfdp_vectors_[d][lid] = value.fastAccessDx(d);
               }

             // record that you set a dirichlet condition
             dc_array[lid] = 1.0;
           }
         }
      }
   }
}