//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleOversetSolverConstraintAlgorithm::execute()
{
  // first thing to do is to zero out the row (lhs and rhs)
  prepare_constraints();

  // extract the rank
  const int theRank = NaluEnv::self().parallel_rank();

  stk::mesh::BulkData & bulkData = realm_.bulk_data();

  // space for LHS/RHS (nodesPerElem+1)*numDof*(nodesPerElem+1)*numDof; (nodesPerElem+1)*numDof
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<int> scratchIds;
  std::vector<double> scratchVals;
  std::vector<stk::mesh::Entity> connected_nodes;

  // master element data
  std::vector <double > ws_general_shape_function;
 
  // interpolate nodal values to point-in-elem
  const int sizeOfDof = eqSystem_->linsys_->numDof();
 
  // size interpolated value
  std::vector<double> qNp1Orphan(sizeOfDof, 0.0);

  // Parallel communication of ghosted entities has been already handled in
  // EquationSystems::pre_iter_work

  // iterate oversetInfoVec_
  std::vector<OversetInfo *>::iterator ii;
  for( ii=realm_.oversetManager_->oversetInfoVec_.begin();
       ii!=realm_.oversetManager_->oversetInfoVec_.end(); ++ii ) {

    // overset info object of interest
    OversetInfo * infoObject = (*ii);

    // extract element and node mesh object
    stk::mesh::Entity owningElement = infoObject->owningElement_;
    stk::mesh::Entity orphanNode = infoObject->orphanNode_;

    // extract the owning rank for this node
    const int nodeRank = bulkData.parallel_owner_rank(orphanNode);
    
    // check to see if this node is locally owned by this rank; we only want to process locally owned nodes, not shared
    if ( theRank != nodeRank )
      continue;

    // get master element type for this contactInfo
    MasterElement *meSCS  = infoObject->meSCS_;
    const int nodesPerElement = meSCS->nodesPerElement_;
    std::vector <double > elemNodalQ(nodesPerElement*sizeOfDof);
    std::vector <double > shpfc(nodesPerElement);

    // resize some things; matrix related
    const int npePlusOne = nodesPerElement+1;
    const int lhsSize = npePlusOne*sizeOfDof*npePlusOne*sizeOfDof;
    const int rhsSize = npePlusOne*sizeOfDof;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    scratchIds.resize(rhsSize);
    scratchVals.resize(rhsSize);
    connected_nodes.resize(npePlusOne);

    // algorithm related; element 
    ws_general_shape_function.resize(nodesPerElement);
   
    // pointer to lhs/rhs
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];
    
    // zeroing of lhs/rhs
    for ( int k = 0; k < lhsSize; ++k ) {
      p_lhs[k] = 0.0;
    }
    for ( int k = 0; k < rhsSize; ++k ) {
      p_rhs[k] = 0.0;
    }
    
    // extract nodal value for scalarQ
    const double *qNp1Nodal = (double *)stk::mesh::field_data(*fieldQ_, orphanNode);
    
    stk::mesh::Entity const* elem_node_rels = bulkData.begin_nodes(owningElement);
    const int num_nodes = bulkData.num_nodes(owningElement);

    // now load the elemental values for future interpolation; fill in connected nodes; first connected node is orhpan
    connected_nodes[0] = orphanNode;
    for ( int ni = 0; ni < num_nodes; ++ni ) {
      stk::mesh::Entity node = elem_node_rels[ni];
      connected_nodes[ni+1] = node;

      const double *qNp1 = (double *)stk::mesh::field_data(*fieldQ_, node );
      for ( int i = 0; i < sizeOfDof; ++i ) {
        elemNodalQ[i*nodesPerElement + ni] = qNp1[i];
      }
    }

    // interpolate dof to elemental ips (assigns qNp1)
    meSCS->interpolatePoint(
      sizeOfDof,
      &(infoObject->isoParCoords_[0]),
      &elemNodalQ[0],
      &qNp1Orphan[0]);
    
    // lhs; extract general shape function
    meSCS->general_shape_fcn(1, &(infoObject->isoParCoords_[0]), &ws_general_shape_function[0]);

    // rhs; orphan node is defined to be the zeroth connected node
    for ( int i = 0; i < sizeOfDof; ++i) {
      const int rowOi = i * npePlusOne * sizeOfDof;
      const double residual = qNp1Nodal[i] - qNp1Orphan[i];
      p_rhs[i] = -residual;

      // row is zero by design (first connected node is the orphan node); assign it fully
      p_lhs[rowOi+i] += 1.0;

      for ( int ic = 0; ic < nodesPerElement; ++ic ) {
        const int indexR = i + sizeOfDof*(ic+1);
        const int rOiR = rowOi+indexR;
        p_lhs[rOiR] -= ws_general_shape_function[ic];
      }
    }

    // apply to linear system
    apply_coeff(connected_nodes, scratchIds, scratchVals, rhs, lhs, __FILE__);
  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleScalarEdgeDiffContactSolverAlgorithm::execute()
{

  stk::mesh::MetaData & meta_data = realm_.meta_data();
  stk::mesh::BulkData & bulk_data = realm_.bulk_data();

  const int nDim = meta_data.spatial_dimension();

  // space for LHS/RHS (nodesPerElem+1)*(nodesPerElem+1); nodesPerElem+1
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<int> scratchIds;
  std::vector<double> scratchVals;
  std::vector<stk::mesh::Entity> connected_nodes;

  // deal with state
  ScalarFieldType &scalarQNp1 = scalarQ_->field_of_state(stk::mesh::StateNP1);

  // space for interpolated right state (halo)
  double qNp1R;
  double diffFluxCoeffR;
  std::vector<double> dqdxR(nDim);

  // interpolate nodal values to point-in-elem
  const int sizeOfScalarField = 1;
  const int sizeOfVectorField = nDim;
  
  // parallel communicate ghosted entities
  if ( NULL != realm_.contactManager_->contactGhosting_ )
    stk::mesh::communicate_field_data(*(realm_.contactManager_->contactGhosting_), ghostFieldVec_);
  
  // iterate contactInfoVec_
  std::vector<ContactInfo *>::iterator ii;
  for( ii=realm_.contactManager_->contactInfoVec_.begin();
       ii!=realm_.contactManager_->contactInfoVec_.end(); ++ii ) {
    
    // get master element type for this contactInfo
    MasterElement *meSCS  = (*ii)->meSCS_;
    const int nodesPerElement = meSCS->nodesPerElement_;
    std::vector <double > elemNodalQ(nodesPerElement);
    std::vector <double > elemNodalDiffFluxCoeff(nodesPerElement);
    std::vector <double > elemNodalCoords(nDim*nodesPerElement);
    std::vector <double > elemNodalDqdx(nDim*nodesPerElement);
    std::vector <double > shpfc(nodesPerElement);

    // resize some things; matrix related
    const int npePlusOne = nodesPerElement+1;
    const int lhsSize = npePlusOne*npePlusOne;
    const int rhsSize = npePlusOne;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    scratchIds.resize(rhsSize);
    scratchVals.resize(rhsSize);
    connected_nodes.resize(npePlusOne);

    // pointer to lhs/rhs
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];

    // iterate halo face nodes
    std::map<uint64_t, HaloInfo *>::iterator iterHalo;
    for (iterHalo  = (*ii)->haloInfoMap_.begin();
         iterHalo != (*ii)->haloInfoMap_.end();
         ++iterHalo) {

      // halo info object of interest
      HaloInfo * infoObject = (*iterHalo).second;

      // zeroing of lhs/rhs
      for ( int k = 0; k < lhsSize; ++k ) {
        p_lhs[k] = 0.0;
      }
      for ( int k = 0; k < rhsSize; ++k ) {
        p_rhs[k] = 0.0;
      }

      // pointer to edge area vector
      const double *p_areaVec = &infoObject->haloEdgeAreaVec_[0];

      // extract element mesh object and global id for face node
      stk::mesh::Entity elem  = infoObject->owningElement_;

      stk::mesh::Entity const* elem_node_rels = bulk_data.begin_nodes(elem);
      const int num_nodes = bulk_data.num_nodes(elem);

      // now load the elemental values for future interpolation; fill in connected nodes
      connected_nodes[0] = infoObject->faceNode_;
      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = elem_node_rels[ni];
        connected_nodes[ni+1] = node;

        elemNodalQ[ni] = *stk::mesh::field_data(*scalarQ_, node);
        elemNodalDiffFluxCoeff[ni] = *stk::mesh::field_data(*diffFluxCoeff_, node);

        // load up vectors
        const double * Gjq = stk::mesh::field_data(*dqdx_, node );
        const double * coords = stk::mesh::field_data(*coordinates_, node );
        for ( int j = 0; j < nDim; ++j ) {
          const int offSet = j*nodesPerElement +ni;
          elemNodalDqdx[offSet] = Gjq[j];
          elemNodalCoords[offSet] = coords[j];
        }
      }

      // extract nodal fields; right state is Halo and requires inperpolation
      const double *coordL = stk::mesh::field_data(*coordinates_, infoObject->faceNode_);
      const double *coordR = &infoObject->haloNodalCoords_[0];

      const double qNp1L = *stk::mesh::field_data(scalarQNp1, infoObject->faceNode_);
      meSCS->interpolatePoint(
        sizeOfScalarField,
        &(infoObject->isoParCoords_[0]),
        &elemNodalQ[0],
        &qNp1R);

      // possible Hermite polynomial interpolation
      if (NULL != hermite_) {
        double qNp1H = 0;
        hermite_->do_hermite(&elemNodalQ[0], &elemNodalCoords[0], &elemNodalDqdx[0], &coordR[0], qNp1H);
        qNp1R = qNp1H;
      }

      const double diffFluxCoeffL = *stk::mesh::field_data(*diffFluxCoeff_, infoObject->faceNode_);
      meSCS->interpolatePoint(
        sizeOfScalarField,
        &(infoObject->isoParCoords_[0]),
        &elemNodalDiffFluxCoeff[0],
        &diffFluxCoeffR);

      // deal with nodal dqdx
      const double *dqdxL = stk::mesh::field_data(*dqdx_, infoObject->faceNode_);
      meSCS->interpolatePoint(
        sizeOfVectorField,
        &(infoObject->isoParCoords_[0]),
        &elemNodalDqdx[0],
        &(dqdxR[0]));

      // ip props
      const double viscIp = 0.5*(diffFluxCoeffL + diffFluxCoeffR);

      // compute geometry
      double axdx = 0.0;
      double asq = 0.0;
      for (int j = 0; j < nDim; ++j ) {
        const double dxj = coordR[j] - coordL[j];
        const double axj = p_areaVec[j];
        axdx += axj*dxj;
        asq += axj*axj;
      }

      const double inv_axdx = 1.0/axdx;

      // NOC
      double nonOrth = 0.0;
      for ( int j = 0; j < nDim; ++j ) {
        const double axj = p_areaVec[j];
        const double dxj = coordR[j] - coordL[j];
        // now non-orth (over-relaxed procedure of Jasek)
        const double kxj = axj - asq*inv_axdx*dxj;
        const double GjIp = 0.5*(dqdxL[j] + dqdxR[j]);
        nonOrth += -viscIp*kxj*GjIp;
      }

      // iterate owning element nodes and form pair
      meSCS->general_shape_fcn(1, &(infoObject->isoParCoords_[0]), &shpfc[0]);

      const double lhsFac = -viscIp*asq*inv_axdx;
      const double diffFlux = lhsFac*(qNp1R-qNp1L) + nonOrth;

      // setup for LHS; row left is easy - simply zero

      // left node (face)
      p_rhs[0] = -diffFlux;
      p_lhs[0] = -lhsFac;

      for ( int ni = 0; ni < num_nodes; ++ni ) {
        p_lhs[ni+1] = lhsFac*shpfc[ni];
      }

      // apply to linear system
      apply_coeff(connected_nodes, scratchIds, scratchVals, rhs, lhs, __FILE__);
    }
  }
}