MomentumBuoyancyBoussinesqSrcElemKernel<AlgTraits>::MomentumBuoyancyBoussinesqSrcElemKernel(
  const stk::mesh::BulkData& bulkData,
  const SolutionOptions& solnOpts,
  ElemDataRequests& dataPreReqs)
  : Kernel(),
    rhoRef_(solnOpts.referenceDensity_),
    ipNodeMap_(sierra::nalu::MasterElementRepo::get_volume_master_element(AlgTraits::topo_)->ipNodeMap())
{
  const stk::mesh::MetaData& metaData = bulkData.mesh_meta_data();
  ScalarFieldType *temperature = metaData.get_field<ScalarFieldType>(stk::topology::NODE_RANK, "temperature");

  temperatureNp1_ = &(temperature->field_of_state(stk::mesh::StateNP1));
  coordinates_ = metaData.get_field<VectorFieldType>(stk::topology::NODE_RANK, solnOpts.get_coordinates_name());
  
  const std::vector<double>& solnOptsGravity = solnOpts.get_gravity_vector(AlgTraits::nDim_);
  for (int i = 0; i < AlgTraits::nDim_; i++)
    gravity_(i) = solnOptsGravity[i];

  tRef_ = solnOpts.referenceTemperature_;
  rhoRef_ = solnOpts.referenceDensity_;
  beta_ = solnOpts.thermalExpansionCoeff_;

  MasterElement* meSCV = sierra::nalu::MasterElementRepo::get_volume_master_element(AlgTraits::topo_);
  get_scv_shape_fn_data<AlgTraits>([&](double* ptr){meSCV->shape_fcn(ptr);}, v_shape_function_);

  // add master elements
  dataPreReqs.add_cvfem_volume_me(meSCV);

  // fields and data
  dataPreReqs.add_coordinates_field(*coordinates_, AlgTraits::nDim_, CURRENT_COORDINATES);
  dataPreReqs.add_gathered_nodal_field(*temperatureNp1_, 1);
  dataPreReqs.add_master_element_call(SCV_VOLUME, CURRENT_COORDINATES);
}
예제 #2
0
MomentumNSOKeElemKernel<AlgTraits>::MomentumNSOKeElemKernel(
  const stk::mesh::BulkData& bulkData,
  const SolutionOptions& solnOpts,
  VectorFieldType* ,
  GenericFieldType* Gju,
  const double fourthFac,
  ElemDataRequests& dataPreReqs)
  : Kernel(),
    Gju_(Gju),
    lrscv_(sierra::nalu::MasterElementRepo::get_surface_master_element(AlgTraits::topo_)->adjacentNodes()),
    fourthFac_(fourthFac),
    shiftedGradOp_(solnOpts.get_shifted_grad_op("velocity"))
{
  const stk::mesh::MetaData& metaData = bulkData.mesh_meta_data();
  velocityNp1_ = metaData.get_field<VectorFieldType>(
    stk::topology::NODE_RANK, "velocity");
  densityNp1_ = metaData.get_field<ScalarFieldType>(
    stk::topology::NODE_RANK, "density");
  pressure_ = metaData.get_field<ScalarFieldType>(
    stk::topology::NODE_RANK, "pressure");

  if (solnOpts.does_mesh_move())
    velocityRTM_ = metaData.get_field<VectorFieldType>(
      stk::topology::NODE_RANK, "velocity_rtm");
  else
    velocityRTM_ = metaData.get_field<VectorFieldType>(
      stk::topology::NODE_RANK, "velocity");

  pressure_ = metaData.get_field<ScalarFieldType>(
    stk::topology::NODE_RANK, "pressure");

  coordinates_ = metaData.get_field<VectorFieldType>(
    stk::topology::NODE_RANK, solnOpts.get_coordinates_name());

  Gjp_ = metaData.get_field<VectorFieldType>(stk::topology::NODE_RANK, "dpdx");

  MasterElement *meSCS = sierra::nalu::MasterElementRepo::get_surface_master_element(AlgTraits::topo_);
  get_scs_shape_fn_data<AlgTraits>([&](double* ptr){meSCS->shape_fcn(ptr);}, v_shape_function_);

  // add master elements
  dataPreReqs.add_cvfem_surface_me(meSCS);

  // fields
  dataPreReqs.add_gathered_nodal_field(*Gju_, AlgTraits::nDim_, AlgTraits::nDim_);
  dataPreReqs.add_coordinates_field(*coordinates_, AlgTraits::nDim_, CURRENT_COORDINATES);
  dataPreReqs.add_gathered_nodal_field(*velocityNp1_, AlgTraits::nDim_);
  dataPreReqs.add_gathered_nodal_field(*velocityRTM_, AlgTraits::nDim_);
  dataPreReqs.add_gathered_nodal_field(*Gjp_, AlgTraits::nDim_);
  dataPreReqs.add_gathered_nodal_field(*densityNp1_,1);
  dataPreReqs.add_gathered_nodal_field(*pressure_,1);

  // master element data
  dataPreReqs.add_master_element_call(SCS_AREAV, CURRENT_COORDINATES);
  if ( shiftedGradOp_ )
    dataPreReqs.add_master_element_call(SCS_SHIFTED_GRAD_OP, CURRENT_COORDINATES);
  else
    dataPreReqs.add_master_element_call(SCS_GRAD_OP, CURRENT_COORDINATES);
  dataPreReqs.add_master_element_call(SCS_GIJ, CURRENT_COORDINATES);
}
예제 #3
0
ContinuityMassElemSuppAlg<AlgTraits>::ContinuityMassElemSuppAlg(
   Realm &realm,
   ElemDataRequests& dataPreReqs,
   const bool lumpedMass)
  : SupplementalAlgorithm(realm),
    densityNm1_(NULL),
    densityN_(NULL),
    densityNp1_(NULL),
    coordinates_(NULL),
    dt_(0.0),
    gamma1_(0.0),
    gamma2_(0.0),
    gamma3_(0.0),
    lumpedMass_(lumpedMass),
    ipNodeMap_(realm.get_volume_master_element(AlgTraits::topo_)->ipNodeMap())
{
  // save off fields; shove state N into Nm1 if this is BE
  stk::mesh::MetaData & meta_data = realm_.meta_data();
  ScalarFieldType *density = meta_data.get_field<ScalarFieldType>(stk::topology::NODE_RANK, "density");
  densityNm1_ = realm_.number_of_states() == 2 ? &(density->field_of_state(stk::mesh::StateN)) : &(density->field_of_state(stk::mesh::StateNM1));
  densityN_ = &(density->field_of_state(stk::mesh::StateN));
  densityNp1_ = &(density->field_of_state(stk::mesh::StateNP1));
  coordinates_ = meta_data.get_field<VectorFieldType>(stk::topology::NODE_RANK, realm_.get_coordinates_name());

  MasterElement *meSCV = realm.get_volume_master_element(AlgTraits::topo_);

  // compute shape function
  if ( lumpedMass_ )
    meSCV->shifted_shape_fcn(&v_shape_function_(0,0));
  else
    meSCV->shape_fcn(&v_shape_function_(0,0));

  // add master elements
  dataPreReqs.add_cvfem_volume_me(meSCV);

  // fields and data
  dataPreReqs.add_gathered_nodal_field(*coordinates_, AlgTraits::nDim_);
  dataPreReqs.add_gathered_nodal_field(*densityNm1_, 1);
  dataPreReqs.add_gathered_nodal_field(*densityN_, 1);
  dataPreReqs.add_gathered_nodal_field(*densityNp1_, 1);
  dataPreReqs.add_master_element_call(SCV_VOLUME);
}
예제 #4
0
MomentumWallFunctionElemKernel<BcAlgTraits>::MomentumWallFunctionElemKernel(
  const stk::mesh::BulkData& bulkData,
  const SolutionOptions& solnOpts,
  ElemDataRequests& dataPreReqs)
  : Kernel(),
    elog_(solnOpts.get_turb_model_constant(TM_elog)),
    kappa_(solnOpts.get_turb_model_constant(TM_kappa)),
    yplusCrit_(solnOpts.get_turb_model_constant(TM_yplus_crit)),
    ipNodeMap_(sierra::nalu::MasterElementRepo::get_surface_master_element(BcAlgTraits::topo_)->ipNodeMap())
{
  const stk::mesh::MetaData& metaData = bulkData.mesh_meta_data();
  VectorFieldType *velocity = metaData.get_field<VectorFieldType>(stk::topology::NODE_RANK, "velocity");
  velocityNp1_ = &(velocity->field_of_state(stk::mesh::StateNP1));
  bcVelocity_ = metaData.get_field<VectorFieldType>(
    stk::topology::NODE_RANK, "wall_velocity_bc");
  density_ = metaData.get_field<ScalarFieldType>(stk::topology::NODE_RANK, "density");
  viscosity_ = metaData.get_field<ScalarFieldType>(stk::topology::NODE_RANK, "viscosity");
  exposedAreaVec_ = metaData.get_field<GenericFieldType>(metaData.side_rank(), "exposed_area_vector");
  wallFrictionVelocityBip_ = metaData.get_field<GenericFieldType>(metaData.side_rank(), "wall_friction_velocity_bip");
  wallNormalDistanceBip_ = metaData.get_field<GenericFieldType>(metaData.side_rank(), "wall_normal_distance_bip");
  VectorFieldType *coordinates = metaData.get_field<VectorFieldType>(
    stk::topology::NODE_RANK, solnOpts.get_coordinates_name());
 
  MasterElement *meFC = sierra::nalu::MasterElementRepo::get_surface_master_element(BcAlgTraits::topo_);
 
  // compute and save shape function
  get_face_shape_fn_data<BcAlgTraits>([&](double* ptr){meFC->shape_fcn(ptr);}, vf_shape_function_);

  // add master elements
  dataPreReqs.add_cvfem_face_me(meFC);
 
  // fields and data; mdot not gathered as element data
  dataPreReqs.add_coordinates_field(*coordinates, BcAlgTraits::nDim_, CURRENT_COORDINATES);
  dataPreReqs.add_gathered_nodal_field(*velocityNp1_, BcAlgTraits::nDim_);
  dataPreReqs.add_gathered_nodal_field(*bcVelocity_, BcAlgTraits::nDim_);
  dataPreReqs.add_gathered_nodal_field(*density_, 1);
  dataPreReqs.add_gathered_nodal_field(*viscosity_, 1);
  dataPreReqs.add_face_field(*exposedAreaVec_, BcAlgTraits::numFaceIp_, BcAlgTraits::nDim_);
  dataPreReqs.add_face_field(*wallFrictionVelocityBip_, BcAlgTraits::numFaceIp_);
  dataPreReqs.add_face_field(*wallNormalDistanceBip_, BcAlgTraits::numFaceIp_);
}
예제 #5
0
ScalarAdvDiffElemKernel<AlgTraits>::ScalarAdvDiffElemKernel(
  const stk::mesh::BulkData& bulkData,
  const SolutionOptions& solnOpts,
  ScalarFieldType* scalarQ,
  ScalarFieldType* diffFluxCoeff,
  ElemDataRequests& dataPreReqs)
  : Kernel(),
    scalarQ_(scalarQ),
    diffFluxCoeff_(diffFluxCoeff),
    lrscv_(sierra::nalu::MasterElementRepo::get_surface_master_element(AlgTraits::topo_)->adjacentNodes()),
    shiftedGradOp_(solnOpts.get_shifted_grad_op(scalarQ->name()))
{
  // Save of required fields
  const stk::mesh::MetaData& metaData = bulkData.mesh_meta_data();
  coordinates_ = metaData.get_field<VectorFieldType>(
    stk::topology::NODE_RANK, solnOpts.get_coordinates_name());
  massFlowRate_ = metaData.get_field<GenericFieldType>(
    stk::topology::ELEMENT_RANK, "mass_flow_rate_scs");

  MasterElement *meSCS = sierra::nalu::MasterElementRepo::get_surface_master_element(AlgTraits::topo_);

  get_scs_shape_fn_data<AlgTraits>([&](double* ptr){meSCS->shape_fcn(ptr);}, v_shape_function_);
  const bool skewSymmetric = solnOpts.get_skew_symmetric(scalarQ->name());
  get_scs_shape_fn_data<AlgTraits>([&](double* ptr){skewSymmetric ? meSCS->shifted_shape_fcn(ptr) : meSCS->shape_fcn(ptr);}, 
                                   v_adv_shape_function_);

  dataPreReqs.add_cvfem_surface_me(meSCS);

  // fields and data
  dataPreReqs.add_coordinates_field(*coordinates_, AlgTraits::nDim_, CURRENT_COORDINATES);
  dataPreReqs.add_gathered_nodal_field(*scalarQ_, 1);
  dataPreReqs.add_gathered_nodal_field(*diffFluxCoeff_, 1);
  dataPreReqs.add_element_field(*massFlowRate_, AlgTraits::numScsIp_);
  dataPreReqs.add_master_element_call(SCS_AREAV, CURRENT_COORDINATES);
  if ( shiftedGradOp_ )
    dataPreReqs.add_master_element_call(SCS_SHIFTED_GRAD_OP, CURRENT_COORDINATES);
  else
    dataPreReqs.add_master_element_call(SCS_GRAD_OP, CURRENT_COORDINATES);
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleNodalGradUElemAlgorithm::execute()
{

  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  // extract fields
  ScalarFieldType *dualNodalVolume = meta_data.get_field<ScalarFieldType>(stk::topology::NODE_RANK, "dual_nodal_volume");
  VectorFieldType *coordinates = meta_data.get_field<VectorFieldType>(stk::topology::NODE_RANK, realm_.get_coordinates_name());

  // nodal fields to gather; gather everything other than what we are assembling
  std::vector<double> ws_vectorQ;
  std::vector<double> ws_dualVolume;
  std::vector<double> ws_coordinates;

  // geometry related to populate
  std::vector<double> ws_scs_areav;
  std::vector<double> ws_shape_function;

  // ip data
  std::vector<double>qIp(nDim);

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& elem_buckets =
    realm_.get_buckets( stk::topology::ELEMENT_RANK, s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = elem_buckets.begin();
        ib != elem_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;
    const stk::mesh::Bucket::size_type length   = b.size();

    // extract master element
    MasterElement *meSCS = realm_.get_surface_master_element(b.topology());

    // extract master element specifics
    const int nodesPerElement = meSCS->nodesPerElement_;
    const int numScsIp = meSCS->numIntPoints_;
    const int *lrscv = meSCS->adjacentNodes();

    // algorithm related
    ws_vectorQ.resize(nodesPerElement*nDim);
    ws_dualVolume.resize(nodesPerElement);
    ws_coordinates.resize(nodesPerElement*nDim);
    ws_scs_areav.resize(numScsIp*nDim);
    ws_shape_function.resize(numScsIp*nodesPerElement);

    // pointers.
    double *p_vectorQ = &ws_vectorQ[0];
    double *p_dualVolume = &ws_dualVolume[0];
    double *p_coordinates = &ws_coordinates[0];
    double *p_scs_areav = &ws_scs_areav[0];
    double *p_shape_function = &ws_shape_function[0];

    if ( useShifted_ )
      meSCS->shifted_shape_fcn(&p_shape_function[0]);
    else
      meSCS->shape_fcn(&p_shape_function[0]);

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      //===============================================
      // gather nodal data; this is how we do it now..
      //===============================================
      stk::mesh::Entity const * node_rels = b.begin_nodes(k);
      int num_nodes = b.num_nodes(k);

      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerElement );

      // note: we absolutely need to gather coords since it
      // is required to compute the area vector. however,
      // ws_scalarQ and ws_dualVolume are choices to avoid
      // field data call for interpolation

      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = node_rels[ni];

        // pointers to real data
        double * coords = stk::mesh::field_data(*coordinates, node);
        double * vectorQ = stk::mesh::field_data(*vectorQ_, node);

        // gather scalars
        p_dualVolume[ni] = *stk::mesh::field_data(*dualNodalVolume, node);

        // gather vectors
        const int offSet = ni*nDim;
        for ( int j=0; j < nDim; ++j ) {
          p_coordinates[offSet+j] = coords[j];
          p_vectorQ[offSet+j] = vectorQ[j];
        }
      }

      // compute geometry
      double scs_error = 0.0;
      meSCS->determinant(1, &p_coordinates[0], &p_scs_areav[0], &scs_error);

      // start assembly
      for ( int ip = 0; ip < numScsIp; ++ip ) {

        // left and right nodes for this ip
        const int il = lrscv[2*ip];
        const int ir = lrscv[2*ip+1];

        stk::mesh::Entity nodeL = node_rels[il];
        stk::mesh::Entity nodeR = node_rels[ir];

        // pointer to fields to assemble
        double *gradQL = stk::mesh::field_data(*dqdx_, nodeL);
        double *gradQR = stk::mesh::field_data(*dqdx_, nodeR);

        // interpolate to scs point; operate on saved off ws_field
        for (int j=0; j < nDim; ++j )
          qIp[j] = 0.0;

        const int offSet = ip*nodesPerElement;
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {
          const double r = p_shape_function[offSet+ic];
          for ( int j = 0; j < nDim; ++j ) {
            qIp[j] += r*p_vectorQ[ic*nDim+j];
          }
        }

        // left and right volume
        double inv_volL = 1.0/p_dualVolume[il];
        double inv_volR = 1.0/p_dualVolume[ir];

        // assemble to il/ir
        for ( int i = 0; i < nDim; ++i ) {
          const int row_gradQ = i*nDim;
          const double qip = qIp[i];
          for ( int j = 0; j < nDim; ++j ) {
            double fac = qip*p_scs_areav[ip*nDim+j];
            gradQL[row_gradQ+j] += fac*inv_volL;
            gradQR[row_gradQ+j] -= fac*inv_volR;
          }
        }
      }
    }
  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleHeatCondIrradWallSolverAlgorithm::execute()
{

  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  const double sigma = realm_.get_stefan_boltzmann();

  // space for LHS/RHS; nodesPerFace*nodesPerFace and nodesPerFace
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<stk::mesh::Entity> connected_nodes;

  // nodal fields to gather
  std::vector<double> ws_irradiation;
  std::vector<double> ws_emissivity;
  std::vector<double> ws_temperature;

  // geometry related to populate
  std::vector<double> ws_shape_function;

  // setup for buckets; union parts and ask for locally owned
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);
  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned_union );

  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
        ib != face_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;

    // extract master element specifics
    MasterElement *meFC = realm_.get_surface_master_element(b.topology());
    const int nodesPerFace = meFC->nodesPerElement_;
    const int numScsIp = meFC->numIntPoints_;

    // resize some things; matrix related
    const int lhsSize = nodesPerFace*nodesPerFace;
    const int rhsSize = nodesPerFace;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    connected_nodes.resize(nodesPerFace);

    // algorithm related
    ws_irradiation.resize(nodesPerFace);
    ws_emissivity.resize(nodesPerFace);
    ws_temperature.resize(nodesPerFace);
    ws_shape_function.resize(numScsIp*nodesPerFace);

    // pointers
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];
    double *p_irradiation = &ws_irradiation[0];
    double *p_emissivity = &ws_emissivity[0];
    double *p_temperature = &ws_temperature[0];
    double *p_shape_function = &ws_shape_function[0];

    if ( useShifted_ )
      meFC->shifted_shape_fcn(&p_shape_function[0]);
    else
      meFC->shape_fcn(&p_shape_function[0]);

    const stk::mesh::Bucket::size_type length   = b.size();
    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // zero lhs/rhs
      for ( int p = 0; p < lhsSize; ++p )
        p_lhs[p] = 0.0;
      for ( int p = 0; p < rhsSize; ++p )
        p_rhs[p] = 0.0;

      // face data
      double * areaVec = stk::mesh::field_data(*exposedAreaVec_, b, k);

      // face node relations for nodal gather
      stk::mesh::Entity const * face_node_rels = b.begin_nodes(k);
      
      int num_nodes = b.num_nodes(k);
      for ( int ni = 0; ni < num_nodes; ++ni ) {

        // get the node and form connected_node
        stk::mesh::Entity node = face_node_rels[ni];
        connected_nodes[ni] = node;

        // gather scalar
        p_irradiation[ni] = *stk::mesh::field_data(*irradiation_, node);
        p_emissivity[ni] = *stk::mesh::field_data(*emissivity_, node);
        p_temperature[ni] = *stk::mesh::field_data(*temperature_, node);
      }

      // start the assembly
      for ( int ip = 0; ip < numScsIp; ++ip ) {
	
        double magA = 0.0;
        for ( int j=0; j < nDim; ++j ) {
          magA += areaVec[ip*nDim+j]*areaVec[ip*nDim+j];
        }
        magA = std::sqrt(magA);
	
        const int nn = ip;
        const int offSet = ip*nodesPerFace;
	
        // form boundary ip values
        double irradiationBip = 0.0;
        double emissivityBip = 0.0;
        double tBip = 0.0;
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const double r = p_shape_function[offSet+ic];
          irradiationBip += r*p_irradiation[ic];
          emissivityBip += r*p_emissivity[ic];
          tBip += r*p_temperature[ic];
        }

        // form rhs contribution
        const double radiation = emissivityBip*(irradiationBip - sigma*std::pow(tBip,4))*magA;
        p_rhs[nn] += radiation;
	
        // sensitivities
        const int rowR = nn*nodesPerFace;
        const double lhsFac = 4.0*sigma*emissivityBip*magA*std::pow(tBip,3);
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const double r = p_shape_function[offSet+ic];
          p_lhs[rowR+ic] += r*lhsFac;
        }
      }
      
      apply_coeff(connected_nodes, rhs, lhs, __FILE__);

    }
  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleScalarElemSolverAlgorithm::execute()
{

  stk::mesh::BulkData & bulk_data = realm_.bulk_data();
  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();
  const double small = 1.0e-16;


  // extract user advection options (allow to potentially change over time)
  const std::string dofName = scalarQ_->name();
  const double hybridFactor = realm_.get_hybrid_factor(dofName);
  const double alpha = realm_.get_alpha_factor(dofName);
  const double alphaUpw = realm_.get_alpha_upw_factor(dofName);
  const double hoUpwind = realm_.get_upw_factor(dofName);
  const bool useLimiter = realm_.primitive_uses_limiter(dofName);

  // one minus flavor..
  const double om_alpha = 1.0-alpha;
  const double om_alphaUpw = 1.0-alphaUpw;

  // space for LHS/RHS; nodesPerElem*nodesPerElem* and nodesPerElem
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<stk::mesh::Entity> connected_nodes;

  // supplemental algorithm size and setup
  const size_t supplementalAlgSize = supplementalAlg_.size();
  for ( size_t i = 0; i < supplementalAlgSize; ++i )
    supplementalAlg_[i]->setup();

  // nodal fields to gather
  std::vector<double> ws_velocityNp1;
  std::vector<double> ws_meshVelocity;
  std::vector<double> ws_vrtm;
  std::vector<double> ws_coordinates;
  std::vector<double> ws_scalarQNp1;
  std::vector<double> ws_dqdx;
  std::vector<double> ws_density;
  std::vector<double> ws_diffFluxCoeff;

  // geometry related to populate
  std::vector<double> ws_scs_areav;
  std::vector<double> ws_dndx;
  std::vector<double> ws_deriv;
  std::vector<double> ws_det_j;
  std::vector<double> ws_shape_function;

  // ip values
  std::vector<double>coordIp(nDim);

  // pointers
  double *p_coordIp = &coordIp[0];

  // deal with state
  ScalarFieldType &scalarQNp1   = scalarQ_->field_of_state(stk::mesh::StateNP1);
  VectorFieldType &velocityNp1 = velocity_->field_of_state(stk::mesh::StateNP1);
  ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1);

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& elem_buckets =
    realm_.get_buckets( stk::topology::ELEMENT_RANK, s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = elem_buckets.begin();
        ib != elem_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;
    const stk::mesh::Bucket::size_type length   = b.size();

    // extract master element
    MasterElement *meSCS = realm_.get_surface_master_element(b.topology());

    // extract master element specifics
    const int nodesPerElement = meSCS->nodesPerElement_;
    const int numScsIp = meSCS->numIntPoints_;
    const int *lrscv = meSCS->adjacentNodes();

    // resize some things; matrix related
    const int lhsSize = nodesPerElement*nodesPerElement;
    const int rhsSize = nodesPerElement;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    connected_nodes.resize(nodesPerElement);

    // algorithm related
    ws_velocityNp1.resize(nodesPerElement*nDim);
    ws_meshVelocity.resize(nodesPerElement*nDim);
    ws_vrtm.resize(nodesPerElement*nDim);
    ws_coordinates.resize(nodesPerElement*nDim);
    ws_dqdx.resize(nodesPerElement*nDim);
    ws_scalarQNp1.resize(nodesPerElement);
    ws_density.resize(nodesPerElement);
    ws_diffFluxCoeff.resize(nodesPerElement);
    ws_scs_areav.resize(numScsIp*nDim);
    ws_dndx.resize(nDim*numScsIp*nodesPerElement);
    ws_deriv.resize(nDim*numScsIp*nodesPerElement);
    ws_det_j.resize(numScsIp);
    ws_shape_function.resize(numScsIp*nodesPerElement);

    // pointer to lhs/rhs
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];
    double *p_velocityNp1 = &ws_velocityNp1[0];
    double *p_meshVelocity = &ws_meshVelocity[0];
    double *p_vrtm = &ws_vrtm[0];
    double *p_coordinates = &ws_coordinates[0];
    double *p_dqdx = &ws_dqdx[0];
    double *p_scalarQNp1 = &ws_scalarQNp1[0];
    double *p_density = &ws_density[0];
    double *p_diffFluxCoeff = &ws_diffFluxCoeff[0];
    double *p_scs_areav = &ws_scs_areav[0];
    double *p_dndx = &ws_dndx[0];
    double *p_shape_function = &ws_shape_function[0];

    // extract shape function
    meSCS->shape_fcn(&p_shape_function[0]);

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {
      // get elem
      stk::mesh::Entity elem = b[k];

      // zero lhs/rhs
      for ( int p = 0; p < lhsSize; ++p )
        p_lhs[p] = 0.0;
      for ( int p = 0; p < rhsSize; ++p )
        p_rhs[p] = 0.0;


      // ip data for this element; scs and scv
      const double *mdot = stk::mesh::field_data(*massFlowRate_, elem );

      //===============================================
      // gather nodal data; this is how we do it now..
      //===============================================
      stk::mesh::Entity const * node_rels = bulk_data.begin_nodes(elem);
      int num_nodes = bulk_data.num_nodes(elem);

      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerElement );

      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = node_rels[ni];

        // set connected nodes
        connected_nodes[ni] = node;

        // pointers to real data
        const double * uNp1   = stk::mesh::field_data(velocityNp1, node );
        const double * vNp1   = stk::mesh::field_data(*meshVelocity_, node);
        const double * coords = stk::mesh::field_data(*coordinates_, node );
        const double * dq     = stk::mesh::field_data(*dqdx_, node );

        // gather scalars
        p_scalarQNp1[ni]    = *stk::mesh::field_data(scalarQNp1, node );
        p_density[ni]       = *stk::mesh::field_data(densityNp1, node );
        p_diffFluxCoeff[ni] = *stk::mesh::field_data(*diffFluxCoeff_, node );

        // gather vectors
        const int niNdim = ni*nDim;
        for ( int i=0; i < nDim; ++i ) {
          p_velocityNp1[niNdim+i] = uNp1[i];
          p_vrtm[niNdim+i] = uNp1[i];
          p_meshVelocity[niNdim+i] = vNp1[i];
          p_coordinates[niNdim+i] = coords[i];
          p_dqdx[niNdim+i] = dq[i];
        }
      }

      // compute geometry
      double scs_error = 0.0;
      meSCS->determinant(1, &p_coordinates[0], &p_scs_areav[0], &scs_error);

      // compute dndx
      meSCS->grad_op(1, &p_coordinates[0], &p_dndx[0], &ws_deriv[0], &ws_det_j[0], &scs_error);

      // manage velocity relative to mesh
      if ( meshMotion_ ) {
        const int kSize = num_nodes*nDim;
        for ( int k = 0; k < kSize; ++k ) {
          p_vrtm[k] -= p_meshVelocity[k];
        }
      }

      for ( int ip = 0; ip < numScsIp; ++ip ) {

        // left and right nodes for this ip
        const int il = lrscv[2*ip];
        const int ir = lrscv[2*ip+1];

        // corresponding matrix rows
        const int rowL = il*nodesPerElement;
        const int rowR = ir*nodesPerElement;

        // save off mdot
        const double tmdot = mdot[ip];

        // zero out values of interest for this ip
        for ( int j = 0; j < nDim; ++j ) {
          p_coordIp[j] = 0.0;
        }

        // save off ip values; offset to Shape Function
        double rhoIp = 0.0;
        double muIp = 0.0;
        double qIp = 0.0;
        const int offSetSF = ip*nodesPerElement;
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {
          const double r = p_shape_function[offSetSF+ic];
          rhoIp += r*p_density[ic];
          muIp += r*p_diffFluxCoeff[ic];
          qIp += r*p_scalarQNp1[ic];
          // compute scs point values
          for ( int i = 0; i < nDim; ++i ) {
            p_coordIp[i] += r*p_coordinates[ic*nDim+i];
          }
        }

        // Peclet factor; along the edge
        const double diffIp = 0.5*(p_diffFluxCoeff[il]/p_density[il]
                                   + p_diffFluxCoeff[ir]/p_density[ir]);
        double udotx = 0.0;
        for(int j = 0; j < nDim; ++j ) {
          const double dxj = p_coordinates[ir*nDim+j]-p_coordinates[il*nDim+j];
          const double uj = 0.5*(p_vrtm[il*nDim+j] + p_vrtm[ir*nDim+j]);
          udotx += uj*dxj;
        }
        double pecfac = hybridFactor*udotx/(diffIp+small);
        pecfac = pecfac*pecfac/(5.0 + pecfac*pecfac);
        const double om_pecfac = 1.0-pecfac;

        // left and right extrapolation
        double dqL = 0.0;
        double dqR = 0.0;
        for(int j = 0; j < nDim; ++j ) {
          const double dxjL = p_coordIp[j] - p_coordinates[il*nDim+j];
          const double dxjR = p_coordinates[ir*nDim+j] - p_coordIp[j];
          dqL += dxjL*p_dqdx[nDim*il+j];
          dqR += dxjR*p_dqdx[nDim*ir+j];
        }

        // add limiter if appropriate
        double limitL = 1.0;
        double limitR = 1.0;
        if ( useLimiter ) {
          const double dq = p_scalarQNp1[ir] - p_scalarQNp1[il];
          const double dqMl = 2.0*2.0*dqL - dq;
          const double dqMr = 2.0*2.0*dqR - dq;
          limitL = van_leer(dqMl, dq, small);
          limitR = van_leer(dqMr, dq, small);
        }
        
        // extrapolated; for now limit (along edge is fine)
        const double qIpL = p_scalarQNp1[il] + dqL*hoUpwind*limitL;
        const double qIpR = p_scalarQNp1[ir] - dqR*hoUpwind*limitR;

        // assemble advection; rhs and upwind contributions

        // 2nd order central; simply qIp from above

        // upwind
        const double qUpwind = (tmdot > 0) ? alphaUpw*qIpL + om_alphaUpw*qIp
            : alphaUpw*qIpR + om_alphaUpw*qIp;

        // generalized central (2nd and 4th order)
        const double qHatL = alpha*qIpL + om_alpha*qIp;
        const double qHatR = alpha*qIpR + om_alpha*qIp;
        const double qCds = 0.5*(qHatL + qHatR);

        // total advection
        const double aflux = tmdot*(pecfac*qUpwind + om_pecfac*qCds);

        // right hand side; L and R
        p_rhs[il] -= aflux;
        p_rhs[ir] += aflux;

        // advection operator sens; all but central

        // upwind advection (includes 4th); left node
        const double alhsfacL = 0.5*(tmdot+std::abs(tmdot))*pecfac*alphaUpw
          + 0.5*alpha*om_pecfac*tmdot;
        p_lhs[rowL+il] += alhsfacL;
        p_lhs[rowR+il] -= alhsfacL;

        // upwind advection; right node
        const double alhsfacR = 0.5*(tmdot-std::abs(tmdot))*pecfac*alphaUpw
          + 0.5*alpha*om_pecfac*tmdot;
        p_lhs[rowR+ir] -= alhsfacR;
        p_lhs[rowL+ir] += alhsfacR;

        double qDiff = 0.0;
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {

          // shape function
          const double r = p_shape_function[offSetSF+ic];

          // upwind (il/ir) handled above; collect terms on alpha and alphaUpw
          const double lhsfacAdv = r*tmdot*(pecfac*om_alphaUpw + om_pecfac*om_alpha);

          // advection operator lhs; rhs handled above
          // lhs; il then ir
          p_lhs[rowL+ic] += lhsfacAdv;
          p_lhs[rowR+ic] -= lhsfacAdv;

          // diffusion
          double lhsfacDiff = 0.0;
          const int offSetDnDx = nDim*nodesPerElement*ip + ic*nDim;
          for ( int j = 0; j < nDim; ++j ) {
            lhsfacDiff += -muIp*p_dndx[offSetDnDx+j]*p_scs_areav[ip*nDim+j];
          }

          qDiff += lhsfacDiff*p_scalarQNp1[ic];

          // lhs; il then ir
          p_lhs[rowL+ic] += lhsfacDiff;
          p_lhs[rowR+ic] -= lhsfacDiff;
        }

        // rhs; il then ir
        p_rhs[il] -= qDiff;
        p_rhs[ir] += qDiff;

      }

      // call supplemental
      for ( size_t i = 0; i < supplementalAlgSize; ++i )
        supplementalAlg_[i]->elem_execute( nodesPerElement, numScsIp, &lhs[0], &rhs[0], elem);

      apply_coeff(connected_nodes, rhs, lhs, __FILE__);

    }
  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
SurfaceForceAndMomentWallFunctionAlgorithm::execute()
{

  // check to see if this is a valid step to process output file
  const int timeStepCount = realm_.get_time_step_count();
  const bool processMe = (timeStepCount % frequency_) == 0 ? true : false;

  // do not waste time here
  if ( !processMe )
    return;

  stk::mesh::BulkData & bulk_data = realm_.bulk_data();
  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  // set min and max values
  double yplusMin = 1.0e8;
  double yplusMax = -1.0e8;

  // bip values
  std::vector<double> uBip(nDim);
  std::vector<double> uBcBip(nDim);
  std::vector<double> unitNormal(nDim);

  // tangential work array
  std::vector<double> uiTangential(nDim);
  std::vector<double> uiBcTangential(nDim);

  // pointers to fixed values
  double *p_uBip = &uBip[0];
  double *p_uBcBip = &uBcBip[0];
  double *p_unitNormal= &unitNormal[0];
  double *p_uiTangential = &uiTangential[0];
  double *p_uiBcTangential = &uiBcTangential[0];

  // nodal fields to gather
  std::vector<double> ws_velocityNp1;
  std::vector<double> ws_bcVelocity;
  std::vector<double> ws_pressure;
  std::vector<double> ws_density;
  std::vector<double> ws_viscosity;

  // master element
  std::vector<double> ws_face_shape_function;

  // deal with state
  VectorFieldType &velocityNp1 = velocity_->field_of_state(stk::mesh::StateNP1);
  ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1);
  const double currentTime = realm_.get_current_time();

  // local force and MomentWallFunction; i.e., to be assembled
  double l_force_moment[9] = {};

  // work force, MomentWallFunction and radius; i.e., to be pused to cross_product()
  double ws_p_force[3] = {};
  double ws_v_force[3] = {};
  double ws_t_force[3] = {};
  double ws_moment[3] = {};
  double ws_radius[3] = {};

  // centroid
  double centroid[3] = {};
  for ( size_t k = 0; k < parameters_.size(); ++k)
    centroid[k] = parameters_[k];

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
        ib != face_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;

    // face master element
    MasterElement *meFC = realm_.get_surface_master_element(b.topology());
    const int nodesPerFace = meFC->nodesPerElement_;

    // algorithm related; element
    ws_velocityNp1.resize(nodesPerFace*nDim);
    ws_bcVelocity.resize(nodesPerFace*nDim);
    ws_pressure.resize(nodesPerFace);
    ws_density.resize(nodesPerFace);
    ws_viscosity.resize(nodesPerFace);
    ws_face_shape_function.resize(nodesPerFace*nodesPerFace);

    // pointers
    double *p_velocityNp1 = &ws_velocityNp1[0];
    double *p_bcVelocity = &ws_bcVelocity[0];
    double *p_pressure = &ws_pressure[0];
    double *p_density = &ws_density[0];
    double *p_viscosity = &ws_viscosity[0];
    double *p_face_shape_function = &ws_face_shape_function[0];

    // shape functions
    if ( useShifted_ )
      meFC->shifted_shape_fcn(&p_face_shape_function[0]);
    else
      meFC->shape_fcn(&p_face_shape_function[0]);

    const stk::mesh::Bucket::size_type length   = b.size();

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // get face
      stk::mesh::Entity face = b[k];

      // face node relations
      stk::mesh::Entity const * face_node_rels = bulk_data.begin_nodes(face);

      //======================================
      // gather nodal data off of face
      //======================================
      for ( int ni = 0; ni < nodesPerFace; ++ni ) {
        stk::mesh::Entity node = face_node_rels[ni];

        // gather scalars
        p_pressure[ni]    = *stk::mesh::field_data(*pressure_, node);
        p_density[ni]    = *stk::mesh::field_data(densityNp1, node);
        p_viscosity[ni] = *stk::mesh::field_data(*viscosity_, node);

        // gather vectors
        double * uNp1 = stk::mesh::field_data(velocityNp1, node);
        double * uBc = stk::mesh::field_data(*bcVelocity_, node);
        const int offSet = ni*nDim;
        for ( int j=0; j < nDim; ++j ) {
          p_velocityNp1[offSet+j] = uNp1[j];
          p_bcVelocity[offSet+j] = uBc[j];
        }
      }

      // pointer to face data
      const double * areaVec = stk::mesh::field_data(*exposedAreaVec_, face);
      const double *wallNormalDistanceBip = stk::mesh::field_data(*wallNormalDistanceBip_, face);
      const double *wallFrictionVelocityBip = stk::mesh::field_data(*wallFrictionVelocityBip_, face);

      for ( int ip = 0; ip < nodesPerFace; ++ip ) {

        // offsets
        const int offSetAveraVec = ip*nDim;
        const int offSetSF_face = ip*nodesPerFace;

        // zero out vector quantities; squeeze in aMag
        double aMag = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          p_uBip[j] = 0.0;
          p_uBcBip[j] = 0.0;
          const double axj = areaVec[offSetAveraVec+j];
          aMag += axj*axj;
        }
        aMag = std::sqrt(aMag);

        // interpolate to bip
        double pBip = 0.0;
        double rhoBip = 0.0;
        double muBip = 0.0;
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const double r = p_face_shape_function[offSetSF_face+ic];
          pBip += r*p_pressure[ic];
          rhoBip += r*p_density[ic];
          muBip += r*p_viscosity[ic];
          const int offSetFN = ic*nDim;
          for ( int j = 0; j < nDim; ++j ) {
            p_uBip[j] += r*p_velocityNp1[offSetFN+j];
            p_uBcBip[j] += r*p_bcVelocity[offSetFN+j];
          }
        }

        // form unit normal
        for ( int j = 0; j < nDim; ++j ) {
          p_unitNormal[j] = areaVec[offSetAveraVec+j]/aMag;
        }

        // determine tangential velocity
        double uTangential = 0.0;
        for ( int i = 0; i < nDim; ++i ) {
          double uiTan = 0.0;
          double uiBcTan = 0.0;
          for ( int j = 0; j < nDim; ++j ) {
            const double ninj = p_unitNormal[i]*p_unitNormal[j];
            if ( i==j ) {
              const double om_nini = 1.0 - ninj;
              uiTan += om_nini*p_uBip[j];
              uiBcTan += om_nini*p_uBcBip[j];
            }
            else {
              uiTan -= ninj*p_uBip[j];
              uiBcTan -= ninj*p_uBcBip[j];
            }
          }
          // save off tangential components and augment magnitude
          p_uiTangential[i] = uiTan;
          p_uiBcTangential[i] = uiBcTan;
          uTangential += (uiTan-uiBcTan)*(uiTan-uiBcTan);
        }
        uTangential = std::sqrt(uTangential);

        // extract bip data
        const double yp = wallNormalDistanceBip[ip];
        const double utau= wallFrictionVelocityBip[ip];

        // determine yplus
        const double yplusBip = rhoBip*yp*utau/muBip;

        // min and max
        yplusMin = std::min(yplusMin, yplusBip);
        yplusMax = std::max(yplusMax, yplusBip);

        double lambda = muBip/yp*aMag;
        if ( yplusBip > yplusCrit_)
          lambda = rhoBip*kappa_*utau/std::log(elog_*yplusBip)*aMag;

        // extract nodal fields
        stk::mesh::Entity node = face_node_rels[ip];
        const double * coord = stk::mesh::field_data(*coordinates_, node );
        double *pressureForce = stk::mesh::field_data(*pressureForce_, node );
        double *tauWall = stk::mesh::field_data(*tauWall_, node );
        double *yplus = stk::mesh::field_data(*yplus_, node );
        const double assembledArea = *stk::mesh::field_data(*assembledArea_, node );

        // load radius; assemble force -sigma_ij*njdS
        double uParallel = 0.0;
        for ( int i = 0; i < nDim; ++i ) {
          const double ai = areaVec[offSetAveraVec+i];
          ws_radius[i] = coord[i] - centroid[i];
          const double uDiff = p_uiTangential[i] - p_uiBcTangential[i];
          ws_p_force[i] = pBip*ai;
          ws_v_force[i] = lambda*uDiff;
          ws_t_force[i] = ws_p_force[i] + ws_v_force[i];
          pressureForce[i] += ws_p_force[i];;
          uParallel += uDiff*uDiff;
        }

        cross_product(&ws_t_force[0], &ws_moment[0], &ws_radius[0]);

        // assemble for and moment
        for ( int j = 0; j < 3; ++j ) {
          l_force_moment[j] += ws_p_force[j];
          l_force_moment[j+3] += ws_v_force[j];
          l_force_moment[j+6] += ws_moment[j];
        }

        // assemble tauWall; area weighting is hiding in lambda/assembledArea
        *tauWall += lambda*std::sqrt(uParallel)/assembledArea;

        // deal with yplus
        *yplus += yplusBip*aMag/assembledArea;

      }
    }
  }

  if ( processMe ) {
    // parallel assemble and output
    double g_force_moment[9] = {};
    stk::ParallelMachine comm = NaluEnv::self().parallel_comm();

    // Parallel assembly of L2
    stk::all_reduce_sum(comm, &l_force_moment[0], &g_force_moment[0], 9);

    // min/max
    double g_yplusMin = 0.0, g_yplusMax = 0.0;
    stk::all_reduce_min(comm, &yplusMin, &g_yplusMin, 1);
    stk::all_reduce_max(comm, &yplusMax, &g_yplusMax, 1);

    // deal with file name and banner
    if ( NaluEnv::self().parallel_rank() == 0 ) {
      std::ofstream myfile;
      myfile.open(outputFileName_.c_str(), std::ios_base::app);
      myfile << std::setprecision(6) 
             << std::setw(w_) 
             << currentTime << std::setw(w_) 
             << g_force_moment[0] << std::setw(w_) << g_force_moment[1] << std::setw(w_) << g_force_moment[2] << std::setw(w_)
             << g_force_moment[3] << std::setw(w_) << g_force_moment[4] << std::setw(w_) << g_force_moment[5] <<  std::setw(w_)
             << g_force_moment[6] << std::setw(w_) << g_force_moment[7] << std::setw(w_) << g_force_moment[8] <<  std::setw(w_)
             << g_yplusMin << std::setw(w_) << g_yplusMax << std::endl;
      myfile.close();
    }
  }

}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssemblePressureForceBCSolverAlgorithm::execute()
{

  stk::mesh::BulkData & bulk_data = realm_.bulk_data();
  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

   // space for LHS/RHS; nodesPerElem*nDim*nodesPerElem*nDim and nodesPerElem*nDim
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<stk::mesh::Entity> connected_nodes;

  // nodal fields to gather
  std::vector<double> ws_face_coordinates;
  std::vector<double> ws_bcScalarQ;

  // master element
  std::vector<double> ws_face_shape_function;

  // define vector of parent topos; should always be UNITY in size
  std::vector<stk::topology> parentTopo;

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
        ib != face_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;

    // extract connected element topology
    b.parent_topology(stk::topology::ELEMENT_RANK, parentTopo);
    ThrowAssert ( parentTopo.size() == 1 );
    stk::topology theElemTopo = parentTopo[0];

    // volume master element
    MasterElement *meSCS = realm_.get_surface_master_element(theElemTopo);
    const int nodesPerElement = meSCS->nodesPerElement_;

    // face master element
    MasterElement *meFC = realm_.get_surface_master_element(b.topology());
    const int nodesPerFace = meFC->nodesPerElement_;
    std::vector<int> face_node_ordinal_vec(nodesPerFace);

    // resize some things; matrix related
    const int lhsSize = nodesPerElement*nDim*nodesPerElement*nDim;
    const int rhsSize = nodesPerElement*nDim;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    connected_nodes.resize(nodesPerElement);

    // algorithm related; element
    ws_face_coordinates.resize(nodesPerFace*nDim);
    ws_bcScalarQ.resize(nodesPerFace);
    ws_face_shape_function.resize(nodesPerFace*nodesPerFace);

    // pointers
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];
    double *p_face_coordinates = &ws_face_coordinates[0];
    double *p_bcScalarQ = &ws_bcScalarQ[0];
    double *p_face_shape_function = &ws_face_shape_function[0];

    // shape functions
    if (use_shifted_integration_)
    {
      meFC->shifted_shape_fcn(&p_face_shape_function[0]);
    }
    else{
      meFC->shape_fcn(&p_face_shape_function[0]);
    }

    const size_t length   = b.size();

    for ( size_t k = 0 ; k < length ; ++k ) {

      // zero lhs/rhs
      for ( int p = 0; p < lhsSize; ++p )
        p_lhs[p] = 0.0;
      for ( int p = 0; p < rhsSize; ++p )
        p_rhs[p] = 0.0;

      // get face
      stk::mesh::Entity face = b[k];

      //======================================
      // gather nodal data off of face
      //======================================
      stk::mesh::Entity const * face_node_rels = bulk_data .begin_nodes(face);
      int num_face_nodes = bulk_data.num_nodes(face);
      // sanity check on num nodes
      ThrowAssert( num_face_nodes == nodesPerFace );
      for ( int ni = 0; ni < num_face_nodes; ++ni ) {
        stk::mesh::Entity node = face_node_rels[ni];

        p_bcScalarQ[ni] = *stk::mesh::field_data(*bcScalarQ_, node);

        // gather vectors
        double * coords = stk::mesh::field_data(*coordinates_, node);
        const int offSet = ni*nDim;
        for ( int i=0; i < nDim; ++i ) {
          p_face_coordinates[offSet+i] = coords[i];
        }
      }

      // extract the connected element to this exposed face; should be single in size!
      const stk::mesh::Entity* face_elem_rels = bulk_data.begin_elements(face);
      ThrowAssert( bulk_data.num_elements(face) == 1 );

      // get element; its face ordinal number and populate face_node_ordinal_vec
      stk::mesh::Entity element = face_elem_rels[0];
      const int face_ordinal = bulk_data.begin_element_ordinals(face)[0];
      theElemTopo.side_node_ordinals(face_ordinal, face_node_ordinal_vec.begin());

      //==========================================
      // gather nodal data off of element; n/a
      //==========================================
      stk::mesh::Entity const * elem_node_rels = bulk_data.begin_nodes(element);
      int num_nodes = bulk_data.num_nodes(element);
      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerElement );
      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = elem_node_rels[ni];
        // set connected nodes
        connected_nodes[ni] = node;
      }

      // pointer to face data
      double * areaVec = stk::mesh::field_data(*exposedAreaVec_, face);

      // loop over face nodes
      for ( int ip = 0; ip < num_face_nodes; ++ip ) {

        const int nearestNode = face_node_ordinal_vec[ip];

        const int offSetSF_face = ip*nodesPerFace;

        // interpolate to bip
        double fluxBip = 0.0;
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const double r = p_face_shape_function[offSetSF_face+ic];
          fluxBip += r*p_bcScalarQ[ic];
        }

        // assemble for each of the ith component
        for ( int i = 0; i < nDim; ++i ) {
          const int indexR = nearestNode*nDim + i;
          p_rhs[indexR] -= fluxBip*areaVec[ip*nDim+i];
          // RHS only, no need to populate LHS (is zeroed out)
        }
      }

      apply_coeff(connected_nodes, rhs, lhs, __FILE__);
    }
  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleRadTransElemSolverAlgorithm::execute()
{
  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  // use edge-based length scale
  const bool useEdgeH = true;

  // extract current ordinate direction
  std::vector<double> Sk(nDim,0.0);
  radEqSystem_->get_current_ordinate(&Sk[0]);
  const double *p_Sk = &Sk[0];
  intensity_ = radEqSystem_->get_intensity();
  
  const double invPi = 1.0/(std::acos(-1.0));

   // space for LHS/RHS; nodesPerElem*nodesPerElem and nodesPerElem
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<stk::mesh::Entity> connected_nodes;

  // nodal fields to gather
  std::vector<double> ws_coordinates;
  std::vector<double> ws_intensity;
  std::vector<double> ws_absorption;
  std::vector<double> ws_scattering;
  std::vector<double> ws_scalarFlux;
  std::vector<double> ws_radiationSource;
  std::vector<double> ws_dualVolume;

  // geometry related to populate
  std::vector<double> ws_scs_areav;
  std::vector<double> ws_dndx;
  std::vector<double> ws_deriv;
  std::vector<double> ws_det_j;
  std::vector<double> ws_shape_function;

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& elem_buckets =
    realm_.get_buckets( stk::topology::ELEMENT_RANK, s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = elem_buckets.begin();
        ib != elem_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;
    const size_t length   = b.size();

    // extract master element
    MasterElement *meSCS = realm_.get_surface_master_element(b.topology());

    // extract master element specifics
    const int nodesPerElement = meSCS->nodesPerElement_;
    const int numScsIp = meSCS->numIntPoints_;
    const int *lrscv = meSCS->adjacentNodes();

    // resize some things; matrix related
    const int lhsSize = nodesPerElement*nodesPerElement;
    const int rhsSize = nodesPerElement;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    connected_nodes.resize(nodesPerElement);

    // algorithm related
    ws_coordinates.resize(nodesPerElement*nDim);
    ws_intensity.resize(nodesPerElement);
    ws_absorption.resize(nodesPerElement);
    ws_scattering.resize(nodesPerElement);
    ws_scalarFlux.resize(nodesPerElement);
    ws_radiationSource.resize(nodesPerElement);
    ws_dualVolume.resize(nodesPerElement);
    ws_scs_areav.resize(numScsIp*nDim);
    ws_dndx.resize(nDim*numScsIp*nodesPerElement);
    ws_deriv.resize(nDim*numScsIp*nodesPerElement);
    ws_det_j.resize(numScsIp);
    ws_shape_function.resize(numScsIp*nodesPerElement);

     // pointers
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];
    double *p_coordinates = &ws_coordinates[0];
    double *p_intensity = &ws_intensity[0];
    double *p_absorption = &ws_absorption[0];
    double *p_scattering = &ws_scattering[0];
    double *p_scalarFlux = &ws_scalarFlux[0];
    double *p_radiationSource = &ws_radiationSource[0];
    double *p_dualVolume = &ws_dualVolume[0];
    double *p_scs_areav = &ws_scs_areav[0];
    double *p_dndx = &ws_dndx[0];
    double *p_shape_function = &ws_shape_function[0];

    meSCS->shape_fcn(&p_shape_function[0]);

    for ( size_t k = 0 ; k < length ; ++k ) {

        // zero lhs/rhs
      for ( int p = 0; p < lhsSize; ++p )
        p_lhs[p] = 0.0;
      for ( int p = 0; p < rhsSize; ++p )
        p_rhs[p] = 0.0;

      // get elem and its node relations
      unsigned elem_offset = k;

      //===============================================
      // gather nodal data; this is how we do it now..
      //===============================================
      stk::mesh::Entity const *  node_rels = b.begin_nodes(elem_offset);
      int num_nodes = b.num_nodes(elem_offset);

      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerElement );

      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = node_rels[ni];

        // set connected nodes
        connected_nodes[ni] = node;

        // pointers to real data
        const double * coords = stk::mesh::field_data(*coordinates_, node);

        // gather scalars
        p_intensity[ni]   = *stk::mesh::field_data(*intensity_, node);
        p_absorption[ni]  = *stk::mesh::field_data(*absorption_, node );
        p_scattering[ni]  = *stk::mesh::field_data(*scattering_, node );
        p_scalarFlux[ni]  = *stk::mesh::field_data(*scalarFlux_, node );
        p_radiationSource[ni] = *stk::mesh::field_data(*radiationSource_, node );
        p_dualVolume[ni]  = *stk::mesh::field_data(*dualNodalVolume_, node );

        // gather vectors
        const int offSet = ni*nDim;
        for ( int j=0; j < nDim; ++j ) {
          p_coordinates[offSet+j] = coords[j];
        }
      }

      // compute geometry
      double scs_error = 0.0;
      meSCS->determinant(1, &p_coordinates[0], &p_scs_areav[0], &scs_error);

      // compute dndx
      meSCS->grad_op(1, &p_coordinates[0], &p_dndx[0], &ws_deriv[0], &ws_det_j[0], &scs_error);

      for ( int ip = 0; ip < numScsIp; ++ip ) {
	
        // left and right nodes for this ip
        const int il = lrscv[2*ip];
        const int ir = lrscv[2*ip+1];

        // corresponding matrix rows
        int rowL = il*nodesPerElement;
        int rowR = ir*nodesPerElement;

        // form sj*njdS (part of the lhs for central term; I*sj*njdS)
        double sjaj = 0.0;
        double asq = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          const double aj = p_scs_areav[ip*nDim+j];
          sjaj += p_Sk[j]*aj;
          asq += aj*aj;
        }
        const double aMag = std::sqrt(asq);

        // integration point interpolation
        double Iscs = 0.0;
        double extCoeffscs = 0.0;
        double ePscs = 0.0;
        double isotropicScatterscs = 0.0;
        double dualNodalVscs = 0.0;
        const int offSet = ip*nodesPerElement;
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {
          const double r = p_shape_function[offSet+ic];
          
          // save of some variables
          const double I = p_intensity[ic];
          const double mua = p_absorption[ic];
          const double mus = p_scattering[ic];

          // interpolation to scs
          Iscs += r*I;
          extCoeffscs += r*(mua+mus);
          ePscs += r*p_radiationSource[ic];
          isotropicScatterscs += r*mus*p_scalarFlux[ic]/4.0*invPi;
          dualNodalVscs += r*p_dualVolume[ic];

          // assemble I*sj*njdS to lhs; left/right
          p_lhs[rowL+ic] += sjaj*r;
          p_lhs[rowR+ic] -= sjaj*r;
        }

        // rhs residual for I*sj*njdS
        p_rhs[il] -= Iscs*sjaj;
        p_rhs[ir] += Iscs*sjaj;

        // now work on SUCV stabilization terms; needed tau, hence second ic loop
        double h_edge = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          const double nj = p_scs_areav[ip*nDim+j]/aMag;
          const double dxj = p_coordinates[ir*nDim+j]-p_coordinates[il*nDim+j];
          h_edge += nj*dxj;
        }

        // alternative h
        const double h_vol = std::pow(dualNodalVscs, 1.0/(double)nDim);

        // form tau
        const double h = (useEdgeH) ? h_edge : h_vol;
        const double tau = std::sqrt(1.0/((2.0/h)*(2.0/h) + extCoeffscs*extCoeffscs));
	
        double sidIdxi = 0.0;
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {
          const double r = p_shape_function[offSet+ic];

          // save of some variables
          const double I = p_intensity[ic];
          
          // SUCV -tau*sj*aj*(mua+mus)*I term; left/right (residual below)
          p_lhs[rowL+ic] += -tau*sjaj*r*extCoeffscs;
          p_lhs[rowR+ic] -= -tau*sjaj*r*extCoeffscs;
	  
          // SUCV diffusion-like term; -tau*si*dI/dxi*sjaj (residual below)
          double lhsfac = 0.0;
          const int offSetDnDx = nDim*nodesPerElement*ip + ic*nDim;
          for ( int j = 0; j < nDim; ++j ) {
            const double sjdNj = p_Sk[j]*p_dndx[offSetDnDx+j];
            sidIdxi += sjdNj*I;
            lhsfac += -sjdNj;
          }
          p_lhs[rowL+ic] += tau*sjaj*lhsfac;
          p_lhs[rowR+ic] -= tau*sjaj*lhsfac;
	  
        }
	
        // full sucv residual
	const double residual = -tau*sjaj*(sidIdxi + extCoeffscs*Iscs - ePscs - isotropicScatterscs);
	
        // residual; left and right
        p_rhs[il] -= residual;
        p_rhs[ir] += residual;
	
      }

      apply_coeff(connected_nodes, rhs, lhs, __FILE__);
    }

  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
SurfaceForceAndMomentAlgorithm::execute()
{
  // check to see if this is a valid step to process output file
  const int timeStepCount = realm_.get_time_step_count();
  const bool processMe = (timeStepCount % frequency_) == 0 ? true : false;

  // do not waste time here
  if ( !processMe )
    return;

  // common
  stk::mesh::BulkData & bulk_data = realm_.bulk_data();
  stk::mesh::MetaData & meta_data = realm_.meta_data();
  const int nDim = meta_data.spatial_dimension();

  // set min and max values
  double yplusMin = 1.0e8;
  double yplusMax = -1.0e8;

  // nodal fields to gather
  std::vector<double> ws_pressure;
  std::vector<double> ws_density;
  std::vector<double> ws_viscosity;

  // master element
  std::vector<double> ws_face_shape_function;

  // deal with state
  ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1);

  // define vector of parent topos; should always be UNITY in size
  std::vector<stk::topology> parentTopo;

  const double currentTime = realm_.get_current_time();

  // local force and moment; i.e., to be assembled
  double l_force_moment[9] = {};

  // work force, moment and radius; i.e., to be pushed to cross_product()
  double ws_p_force[3] = {};
  double ws_v_force[3] = {};
  double ws_t_force[3] = {};
  double ws_tau[3] = {};
  double ws_moment[3] = {};
  double ws_radius[3] = {};

  // will need surface normal
  double ws_normal[3] = {};

  // centroid
  double centroid[3] = {};
  for ( size_t k = 0; k < parameters_.size(); ++k)
    centroid[k] = parameters_[k];

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
        ib != face_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;

    // face master element
    MasterElement *meFC = realm_.get_surface_master_element(b.topology());
    const int nodesPerFace = meFC->nodesPerElement_;
    std::vector<int> face_node_ordinal_vec(nodesPerFace);

    // extract connected element topology
    b.parent_topology(stk::topology::ELEMENT_RANK, parentTopo);
    ThrowAssert ( parentTopo.size() == 1 );
    stk::topology theElemTopo = parentTopo[0];

    // extract master element for this element topo
    MasterElement *meSCS = realm_.get_surface_master_element(theElemTopo);

    // algorithm related; element
    ws_pressure.resize(nodesPerFace);
    ws_density.resize(nodesPerFace);
    ws_viscosity.resize(nodesPerFace);
    ws_face_shape_function.resize(nodesPerFace*nodesPerFace);
    
    // pointers
    double *p_pressure = &ws_pressure[0];
    double *p_density = &ws_density[0];
    double *p_viscosity = &ws_viscosity[0];
    double *p_face_shape_function = &ws_face_shape_function[0];

    // shape functions
    if ( useShifted_ )
      meFC->shifted_shape_fcn(&p_face_shape_function[0]);
    else
      meFC->shape_fcn(&p_face_shape_function[0]);

    const stk::mesh::Bucket::size_type length   = b.size();

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // get face
      stk::mesh::Entity face = b[k];

      // face node relations
      stk::mesh::Entity const * face_node_rels = bulk_data.begin_nodes(face);

      //======================================
      // gather nodal data off of face
      //======================================
      for ( int ni = 0; ni < nodesPerFace; ++ni ) {
        stk::mesh::Entity node = face_node_rels[ni];
        // gather scalars
        p_pressure[ni]    = *stk::mesh::field_data(*pressure_, node);
        p_density[ni] = *stk::mesh::field_data(densityNp1, node);
        p_viscosity[ni] = *stk::mesh::field_data(*viscosity_, node);
      }

      // pointer to face data
      const double * areaVec = stk::mesh::field_data(*exposedAreaVec_, face);

      // extract the connected element to this exposed face; should be single in size!
      const stk::mesh::Entity* face_elem_rels = bulk_data.begin_elements(face);
      ThrowAssert( bulk_data.num_elements(face) == 1 );

      // get element; its face ordinal number and populate face_node_ordinal_vec
      stk::mesh::Entity element = face_elem_rels[0];
      const int face_ordinal = bulk_data.begin_element_ordinals(face)[0];
      theElemTopo.side_node_ordinals(face_ordinal, face_node_ordinal_vec.begin());

      // get the relations off of element
      stk::mesh::Entity const * elem_node_rels = bulk_data.begin_nodes(element);

      for ( int ip = 0; ip < nodesPerFace; ++ip ) {

        // offsets
        const int offSetAveraVec = ip*nDim;
        const int offSetSF_face = ip*nodesPerFace;

        // interpolate to bip
        double pBip = 0.0;
        double rhoBip = 0.0;
        double muBip = 0.0;
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const double r = p_face_shape_function[offSetSF_face+ic];
          pBip += r*p_pressure[ic];
          rhoBip += r*p_density[ic];
          muBip += r*p_viscosity[ic];
        }

        // extract nodal fields
        stk::mesh::Entity node = face_node_rels[ip];
        const double * coord = stk::mesh::field_data(*coordinates_, node );
        const double *duidxj = stk::mesh::field_data(*dudx_, node );
        double *pressureForce = stk::mesh::field_data(*pressureForce_, node );
        double *tauWall = stk::mesh::field_data(*tauWall_, node );
        double *yplus = stk::mesh::field_data(*yplus_, node );
        const double assembledArea = *stk::mesh::field_data(*assembledArea_, node );

        // divU and aMag
        double divU = 0.0;
        double aMag = 0.0;
        for ( int j = 0; j < nDim; ++j) {
          divU += duidxj[j*nDim+j];
          aMag += areaVec[offSetAveraVec+j]*areaVec[offSetAveraVec+j];
        }
        aMag = std::sqrt(aMag);

        // normal
        for ( int i = 0; i < nDim; ++i ) {
          const double ai = areaVec[offSetAveraVec+i];
          ws_normal[i] = ai/aMag;
        }

        // load radius; assemble force -sigma_ij*njdS and compute tau_ij njDs
        for ( int i = 0; i < nDim; ++i ) {
          const double ai = areaVec[offSetAveraVec+i];
          ws_radius[i] = coord[i] - centroid[i];
          // set forces
          ws_v_force[i] = 2.0/3.0*muBip*divU*includeDivU_*ai;
          ws_p_force[i] = pBip*ai;
          pressureForce[i] += pBip*ai;
          double dflux = 0.0;
          double tauijNj = 0.0;
          const int offSetI = nDim*i;
          for ( int j = 0; j < nDim; ++j ) {
            const int offSetTrans = nDim*j+i;
            dflux += -muBip*(duidxj[offSetI+j] + duidxj[offSetTrans])*areaVec[offSetAveraVec+j];
            tauijNj += -muBip*(duidxj[offSetI+j] + duidxj[offSetTrans])*ws_normal[j];
          }
          // accumulate viscous force and set tau for component i
          ws_v_force[i] += dflux;
          ws_tau[i] = tauijNj;
        }

        // compute total force and tangential tau
        double tauTangential = 0.0;
        for ( int i = 0; i < nDim; ++i ) {
          ws_t_force[i] = ws_p_force[i] + ws_v_force[i];
          double tauiTangential = (1.0-ws_normal[i]*ws_normal[i])*ws_tau[i];
          for ( int j = 0; j < nDim; ++j ) {
            if ( i != j )
              tauiTangential -= ws_normal[i]*ws_normal[j]*ws_tau[j];
          }
          tauTangential += tauiTangential*tauiTangential;
        }

        // assemble nodal quantities; scaled by area for L2 lumped nodal projection
        const double areaFac = aMag/assembledArea;
        *tauWall += std::sqrt(tauTangential)*areaFac;

        cross_product(&ws_t_force[0], &ws_moment[0], &ws_radius[0]);

        // assemble force and moment
        for ( int j = 0; j < 3; ++j ) {
          l_force_moment[j] += ws_p_force[j];
          l_force_moment[j+3] += ws_v_force[j];
          l_force_moment[j+6] += ws_moment[j];
        }

        // deal with yplus
        const int opposingNode = meSCS->opposingNodes(face_ordinal,ip);
        const int nearestNode = face_node_ordinal_vec[ip];

        // left and right nodes; right is on the face; left is the opposing node
        stk::mesh::Entity nodeL = elem_node_rels[opposingNode];
        stk::mesh::Entity nodeR = elem_node_rels[nearestNode];

        // extract nodal fields
        const double * coordL = stk::mesh::field_data(*coordinates_, nodeL );
        const double * coordR = stk::mesh::field_data(*coordinates_, nodeR );

        // determine yp (approximated by 1/4 distance along edge)
        double ypBip = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          const double nj = ws_normal[j];
          const double ej = 0.25*(coordR[j] - coordL[j]);
          ypBip += nj*ej*nj*ej;
        }
        ypBip = std::sqrt(ypBip);

        const double tauW = std::sqrt(tauTangential);
        const double uTau = std::sqrt(tauW/rhoBip);
        const double yplusBip = rhoBip*ypBip/muBip*uTau;

        // nodal field
        *yplus += yplusBip*areaFac;

        // min and max
        yplusMin = std::min(yplusMin, yplusBip);
        yplusMax = std::max(yplusMax, yplusBip);

      }
    }
  }

  if ( processMe ) {
    // parallel assemble and output
    double g_force_moment[9] = {};
    stk::ParallelMachine comm = NaluEnv::self().parallel_comm();

    // Parallel assembly of L2
    stk::all_reduce_sum(comm, &l_force_moment[0], &g_force_moment[0], 9);

    // min/max
    double g_yplusMin = 0.0, g_yplusMax = 0.0;
    stk::all_reduce_min(comm, &yplusMin, &g_yplusMin, 1);
    stk::all_reduce_max(comm, &yplusMax, &g_yplusMax, 1);

    // deal with file name and banner
    if ( NaluEnv::self().parallel_rank() == 0 ) {
      std::ofstream myfile;
      myfile.open(outputFileName_.c_str(), std::ios_base::app);
      myfile << std::setprecision(6) 
             << std::setw(w_) 
             << currentTime << std::setw(w_) 
             << g_force_moment[0] << std::setw(w_) << g_force_moment[1] << std::setw(w_) << g_force_moment[2] << std::setw(w_)
             << g_force_moment[3] << std::setw(w_) << g_force_moment[4] << std::setw(w_) << g_force_moment[5] <<  std::setw(w_)
             << g_force_moment[6] << std::setw(w_) << g_force_moment[7] << std::setw(w_) << g_force_moment[8] <<  std::setw(w_)
             << g_yplusMin << std::setw(w_) << g_yplusMax << std::endl;
      myfile.close();
    }
  }

}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleContinuityElemSolverAlgorithm::execute()
{

  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  // time step
  const double dt = realm_.get_time_step();
  const double gamma1 = realm_.get_gamma1();
  const double projTimeScale = dt/gamma1;

  // deal with interpolation procedure
  const double interpTogether = realm_.get_mdot_interp();
  const double om_interpTogether = 1.0-interpTogether;

  // space for LHS/RHS; nodesPerElem*nodesPerElem and nodesPerElem
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<stk::mesh::Entity> connected_nodes;

  // supplemental algorithm setup
  const size_t supplementalAlgSize = supplementalAlg_.size();
  for ( size_t i = 0; i < supplementalAlgSize; ++i )
    supplementalAlg_[i]->setup();

  // nodal fields to gather
  std::vector<double> ws_vrtm;
  std::vector<double> ws_Gpdx;
  std::vector<double> ws_coordinates;
  std::vector<double> ws_pressure;
  std::vector<double> ws_density;

  // geometry related to populate
  std::vector<double> ws_scs_areav;
  std::vector<double> ws_dndx;
  std::vector<double> ws_dndx_lhs;
  std::vector<double> ws_deriv;
  std::vector<double> ws_det_j;
  std::vector<double> ws_shape_function;

  // integration point data that depends on size
  std::vector<double> uIp(nDim);
  std::vector<double> rho_uIp(nDim);
  std::vector<double> GpdxIp(nDim);
  std::vector<double> dpdxIp(nDim);

  // pointers to everyone...
  double *p_uIp = &uIp[0];
  double *p_rho_uIp = &rho_uIp[0];
  double *p_GpdxIp = &GpdxIp[0];
  double *p_dpdxIp = &dpdxIp[0];

  // deal with state
  ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1);

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    & stk::mesh::selectUnion(partVec_) 
    & !(realm_.get_inactive_selector());

  stk::mesh::BucketVector const& elem_buckets =
    realm_.get_buckets( stk::topology::ELEMENT_RANK, s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = elem_buckets.begin();
        ib != elem_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;
    const stk::mesh::Bucket::size_type length   = b.size();

    // extract master element
    MasterElement *meSCS = realm_.get_surface_master_element(b.topology());
    MasterElement *meSCV = realm_.get_volume_master_element(b.topology());

    // extract master element specifics
    const int nodesPerElement = meSCS->nodesPerElement_;
    const int numScsIp = meSCS->numIntPoints_;
    const int *lrscv = meSCS->adjacentNodes();

    // resize some things; matrix related
    const int lhsSize = nodesPerElement*nodesPerElement;
    const int rhsSize = nodesPerElement;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    connected_nodes.resize(nodesPerElement);

    // algorithm related
    ws_vrtm.resize(nodesPerElement*nDim);
    ws_Gpdx.resize(nodesPerElement*nDim);
    ws_coordinates.resize(nodesPerElement*nDim);
    ws_pressure.resize(nodesPerElement);
    ws_density.resize(nodesPerElement);
    ws_scs_areav.resize(numScsIp*nDim);
    ws_dndx.resize(nDim*numScsIp*nodesPerElement);
    ws_dndx_lhs.resize(nDim*numScsIp*nodesPerElement);
    ws_deriv.resize(nDim*numScsIp*nodesPerElement);
    ws_det_j.resize(numScsIp);
    ws_shape_function.resize(numScsIp*nodesPerElement);

    // pointers
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];
    double *p_vrtm = &ws_vrtm[0];
    double *p_Gpdx = &ws_Gpdx[0];
    double *p_coordinates = &ws_coordinates[0];
    double *p_pressure = &ws_pressure[0];
    double *p_density = &ws_density[0];
    double *p_scs_areav = &ws_scs_areav[0];
    double *p_dndx = &ws_dndx[0];
    double *p_dndx_lhs = reducedSensitivities_ ? &ws_dndx_lhs[0] : &ws_dndx[0];
    double *p_shape_function = &ws_shape_function[0];

    if ( shiftMdot_)
      meSCS->shifted_shape_fcn(&p_shape_function[0]);
    else
      meSCS->shape_fcn(&p_shape_function[0]);

    // resize possible supplemental element alg
    for ( size_t i = 0; i < supplementalAlgSize; ++i )
      supplementalAlg_[i]->elem_resize(meSCS, meSCV);

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // get elem
      stk::mesh::Entity elem = b[k];

      // zero lhs/rhs
      for ( int p = 0; p < lhsSize; ++p )
        p_lhs[p] = 0.0;
      for ( int p = 0; p < rhsSize; ++p )
        p_rhs[p] = 0.0;

      //===============================================
      // gather nodal data; this is how we do it now..
      //===============================================
      stk::mesh::Entity const *  node_rels = b.begin_nodes(k);
      int num_nodes = b.num_nodes(k);

      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerElement );

      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = node_rels[ni];

        // set connected nodes
        connected_nodes[ni] = node;

        // pointers to real data
        const double * Gjp    = stk::mesh::field_data(*Gpdx_, node );
        const double * coords = stk::mesh::field_data(*coordinates_, node );
        const double * vrtm   = stk::mesh::field_data(*velocityRTM_, node );

        // gather scalars
        p_pressure[ni] = *stk::mesh::field_data(*pressure_, node );
        p_density[ni]  = *stk::mesh::field_data(densityNp1, node );

        // gather vectors
        const int niNdim = ni*nDim;
        for ( int j=0; j < nDim; ++j ) {
          p_vrtm[niNdim+j] = vrtm[j];
          p_Gpdx[niNdim+j] = Gjp[j];
          p_coordinates[niNdim+j] = coords[j];
        }
      }

      // compute geometry
      double scs_error = 0.0;
      meSCS->determinant(1, &p_coordinates[0], &p_scs_areav[0], &scs_error);

      // compute dndx for residual
      if ( shiftPoisson_ )
        meSCS->shifted_grad_op(1, &p_coordinates[0], &ws_dndx[0], &ws_deriv[0], &ws_det_j[0], &scs_error);
      else
        meSCS->grad_op(1, &p_coordinates[0], &ws_dndx[0], &ws_deriv[0], &ws_det_j[0], &scs_error);
      
      // compute dndx for LHS
      if ( reducedSensitivities_ )
        meSCS->shifted_grad_op(1, &p_coordinates[0], &ws_dndx_lhs[0], &ws_deriv[0], &ws_det_j[0], &scs_error);

      for ( int ip = 0; ip < numScsIp; ++ip ) {

        // left and right nodes for this ip
        const int il = lrscv[2*ip];
        const int ir = lrscv[2*ip+1];

        // corresponding matrix rows
        int rowL = il*nodesPerElement;
        int rowR = ir*nodesPerElement;

        // setup for ip values; sneak in geometry for possible reduced sens
        for ( int j = 0; j < nDim; ++j ) {
          p_uIp[j] = 0.0;
          p_rho_uIp[j] = 0.0;
          p_GpdxIp[j] = 0.0;
          p_dpdxIp[j] = 0.0;
        }
        double rhoIp = 0.0;

        const int offSet = ip*nodesPerElement;
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {

          const double r = p_shape_function[offSet+ic];
          const double nodalPressure = p_pressure[ic];
          const double nodalRho = p_density[ic];

          rhoIp += r*nodalRho;

          double lhsfac = 0.0;
          const int offSetDnDx = nDim*nodesPerElement*ip + ic*nDim;
          for ( int j = 0; j < nDim; ++j ) {
            p_GpdxIp[j] += r*p_Gpdx[nDim*ic+j];
            p_uIp[j] += r*p_vrtm[nDim*ic+j];
            p_rho_uIp[j] += r*nodalRho*p_vrtm[nDim*ic+j];
            p_dpdxIp[j] += p_dndx[offSetDnDx+j]*nodalPressure;
            lhsfac += -p_dndx_lhs[offSetDnDx+j]*p_scs_areav[ip*nDim+j];
          }

          // assemble to lhs; left
          p_lhs[rowL+ic] += lhsfac;

          // assemble to lhs; right
          p_lhs[rowR+ic] -= lhsfac;

        }

        // assemble mdot
        double mdot = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          mdot += (interpTogether*p_rho_uIp[j] + om_interpTogether*rhoIp*p_uIp[j] 
                   - projTimeScale*(p_dpdxIp[j] - p_GpdxIp[j]))*p_scs_areav[ip*nDim+j];
        }

        // residual; left and right
        p_rhs[il] -= mdot/projTimeScale;
        p_rhs[ir] += mdot/projTimeScale;
      }

      // call supplemental
      for ( size_t i = 0; i < supplementalAlgSize; ++i )
        supplementalAlg_[i]->elem_execute( &lhs[0], &rhs[0], elem, meSCS, meSCV);

      apply_coeff(connected_nodes, rhs, lhs, __FILE__);

    }
  }
}
예제 #14
0
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
ComputeMdotElemAlgorithm::execute()
{

  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  // time step
  const double dt = realm_.get_time_step();
  const double gamma1 = realm_.get_gamma1();
  const double projTimeScale = dt/gamma1;

  // deal with interpolation procedure
  const double interpTogether = realm_.get_mdot_interp();
  const double om_interpTogether = 1.0-interpTogether;

  // nodal fields to gather
  std::vector<double> ws_vrtm;
  std::vector<double> ws_Gpdx;
  std::vector<double> ws_coordinates;
  std::vector<double> ws_pressure;
  std::vector<double> ws_density;

  // geometry related to populate
  std::vector<double> ws_scs_areav;
  std::vector<double> ws_dndx;
  std::vector<double> ws_deriv;
  std::vector<double> ws_det_j;
  std::vector<double> ws_shape_function;

  // integration point data that depends on size
  std::vector<double> uIp(nDim);
  std::vector<double> rho_uIp(nDim);
  std::vector<double> GpdxIp(nDim);
  std::vector<double> dpdxIp(nDim);

  // pointers to everyone...
  double *p_uIp = &uIp[0];
  double *p_rho_uIp = &rho_uIp[0];
  double *p_GpdxIp = &GpdxIp[0];
  double *p_dpdxIp = &dpdxIp[0];

  // deal with state
  ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1);

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& elem_buckets =
    realm_.get_buckets( stk::topology::ELEMENT_RANK, s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = elem_buckets.begin();
        ib != elem_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;
    const stk::mesh::Bucket::size_type length   = b.size();

    // extract master element
    MasterElement *meSCS = realm_.get_surface_master_element(b.topology());

    // extract master element specifics
    const int nodesPerElement = meSCS->nodesPerElement_;
    const int numScsIp = meSCS->numIntPoints_;

    // algorithm related
    ws_vrtm.resize(nodesPerElement*nDim);
    ws_Gpdx.resize(nodesPerElement*nDim);
    ws_coordinates.resize(nodesPerElement*nDim);
    ws_pressure.resize(nodesPerElement);
    ws_density.resize(nodesPerElement);
    ws_scs_areav.resize(numScsIp*nDim);
    ws_dndx.resize(nDim*numScsIp*nodesPerElement);
    ws_deriv.resize(nDim*numScsIp*nodesPerElement);
    ws_det_j.resize(numScsIp);
    ws_shape_function.resize(numScsIp*nodesPerElement);

    // pointers
    double *p_vrtm = &ws_vrtm[0];
    double *p_Gpdx = &ws_Gpdx[0];
    double *p_coordinates = &ws_coordinates[0];
    double *p_pressure = &ws_pressure[0];
    double *p_density = &ws_density[0];
    double *p_scs_areav = &ws_scs_areav[0];
    double *p_dndx = &ws_dndx[0];
    double *p_shape_function = &ws_shape_function[0];
    
    if ( shiftMdot_)
      meSCS->shifted_shape_fcn(&p_shape_function[0]);
    else
      meSCS->shape_fcn(&p_shape_function[0]);
    
    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // pointers to elem data
      double * mdot = stk::mesh::field_data(*massFlowRate_, b, k );

      //===============================================
      // gather nodal data; this is how we do it now..
      //===============================================
      stk::mesh::Entity const * node_rels = b.begin_nodes(k);
      int num_nodes = b.num_nodes(k);

      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerElement );

      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = node_rels[ni];

        // pointers to real data
        const double * vrtm   = stk::mesh::field_data(*velocityRTM_, node);
        const double * Gjp    = stk::mesh::field_data(*Gpdx_, node);
        const double * coords = stk::mesh::field_data(*coordinates_, node);

        // gather scalars
        p_pressure[ni] = *stk::mesh::field_data(*pressure_, node);
        p_density[ni]  = *stk::mesh::field_data(densityNp1, node);

        // gather vectors
        const int offSet = ni*nDim;
        for ( int j=0; j < nDim; ++j ) {
          p_vrtm[offSet+j] = vrtm[j];
          p_Gpdx[offSet+j] = Gjp[j];
          p_coordinates[offSet+j] = coords[j];
        }
      }

      // compute geometry
      double scs_error = 0.0;
      meSCS->determinant(1, &p_coordinates[0], &p_scs_areav[0], &scs_error);

      // compute dndx
      if (shiftPoisson_)
        meSCS->shifted_grad_op(1, &p_coordinates[0], &p_dndx[0], &ws_deriv[0], &ws_det_j[0], &scs_error);
      else
        meSCS->grad_op(1, &p_coordinates[0], &p_dndx[0], &ws_deriv[0], &ws_det_j[0], &scs_error);
      
      for ( int ip = 0; ip < numScsIp; ++ip ) {

        // setup for ip values
        for ( int j = 0; j < nDim; ++j ) {
          p_uIp[j] = 0.0;
          p_rho_uIp[j] = 0.0;
          p_GpdxIp[j] = 0.0;
          p_dpdxIp[j] = 0.0;
        }
        double rhoIp = 0.0;

        const int offSet = ip*nodesPerElement;
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {

          const double r = p_shape_function[offSet+ic];
          const double nodalPressure = p_pressure[ic];
          const double nodalRho = p_density[ic];

          rhoIp += r*nodalRho;

          const int offSetDnDx = nDim*nodesPerElement*ip + ic*nDim;
          for ( int j = 0; j < nDim; ++j ) {
            p_GpdxIp[j] += r*p_Gpdx[nDim*ic+j];
            p_uIp[j] += r*p_vrtm[nDim*ic+j];
            p_rho_uIp[j] += r*nodalRho*p_vrtm[nDim*ic+j];
            p_dpdxIp[j] += p_dndx[offSetDnDx+j]*nodalPressure;
          }
        }

        // assemble mdot
        double tmdot = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          tmdot += (interpTogether*p_rho_uIp[j] + om_interpTogether*rhoIp*p_uIp[j] 
                    - projTimeScale*(p_dpdxIp[j] - p_GpdxIp[j]))*p_scs_areav[ip*nDim+j];
        }

        mdot[ip] = tmdot;

      }
    }
  }

  // check for edge-mdot assembly
  if ( assembleMdotToEdge_ )
    assemble_edge_mdot();
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleScalarElemOpenSolverAlgorithm::execute()
{

  stk::mesh::BulkData & bulk_data = realm_.bulk_data();
  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  const double small = 1.0e-16;

  // extract user advection options (allow to potentially change over time)
  const std::string dofName = scalarQ_->name();
  const double alphaUpw = realm_.get_alpha_upw_factor(dofName);
  const double hoUpwind = realm_.get_upw_factor(dofName);

  // one minus flavor..
  const double om_alphaUpw = 1.0-alphaUpw;

  // space for LHS/RHS; nodesPerElement*nodesPerElement and nodesPerElement
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<int> scratchIds;
  std::vector<double> scratchVals;
  std::vector<stk::mesh::Entity> connected_nodes;

  // ip values; only boundary
  std::vector<double> coordBip(nDim);

  // pointers to fixed values
  double *p_coordBip = &coordBip[0];

  // nodal fields to gather
  std::vector<double> ws_face_coordinates;
  std::vector<double> ws_scalarQNp1;
  std::vector<double> ws_bcScalarQ;

  // master element
  std::vector<double> ws_face_shape_function;

  // deal with state
  ScalarFieldType &scalarQNp1 = scalarQ_->field_of_state(stk::mesh::StateNP1);
  ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1);

  // define vector of parent topos; should always be UNITY in size
  std::vector<stk::topology> parentTopo;

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
        ib != face_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;

    // extract connected element topology
    b.parent_topology(stk::topology::ELEMENT_RANK, parentTopo);
    ThrowAssert ( parentTopo.size() == 1 );
    stk::topology theElemTopo = parentTopo[0];

    // volume master element
    MasterElement *meSCS = realm_.get_surface_master_element(theElemTopo);
    const int nodesPerElement = meSCS->nodesPerElement_;

    // face master element
    MasterElement *meFC = realm_.get_surface_master_element(b.topology());
    const int nodesPerFace = meFC->nodesPerElement_;
    const int numScsBip = meFC->numIntPoints_;
    std::vector<int> face_node_ordinal_vec(nodesPerFace);

    // resize some things; matrix related
    const int lhsSize = nodesPerElement*nodesPerElement;
    const int rhsSize = nodesPerElement;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    scratchIds.resize(rhsSize);
    scratchVals.resize(rhsSize);
    connected_nodes.resize(nodesPerElement);

    // algorithm related; element
    ws_face_coordinates.resize(nodesPerFace*nDim);
    ws_scalarQNp1.resize(nodesPerFace);
    ws_bcScalarQ.resize(nodesPerFace);
    ws_face_shape_function.resize(numScsBip*nodesPerFace);

    // pointers
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];
    double *p_face_coordinates = &ws_face_coordinates[0];
    double *p_scalarQNp1 = &ws_scalarQNp1[0];
    double *p_bcScalarQ = &ws_bcScalarQ[0];
    double *p_face_shape_function = &ws_face_shape_function[0];

    // shape functions
    meFC->shape_fcn(&p_face_shape_function[0]);

    const stk::mesh::Bucket::size_type length   = b.size();

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // zero lhs/rhs
      for ( int p = 0; p < lhsSize; ++p )
        p_lhs[p] = 0.0;
      for ( int p = 0; p < rhsSize; ++p )
        p_rhs[p] = 0.0;

      // get face
      stk::mesh::Entity face = b[k];

      // pointer to face data
      const double * mdot = stk::mesh::field_data(*openMassFlowRate_, face);

      //======================================
      // gather nodal data off of face
      //======================================
      stk::mesh::Entity const * face_node_rels = bulk_data.begin_nodes(face);
      int num_face_nodes = bulk_data.num_nodes(face);
      // sanity check on num nodes
      ThrowAssert( num_face_nodes == nodesPerFace );
      for ( int ni = 0; ni < num_face_nodes; ++ni ) {
        stk::mesh::Entity node = face_node_rels[ni];

        // gather scalars
        p_scalarQNp1[ni] = *stk::mesh::field_data(scalarQNp1, node);
        p_bcScalarQ[ni] = *stk::mesh::field_data(*bcScalarQ_, node);

        // gather vectors
        double * coords = stk::mesh::field_data(*coordinates_, node);
        const int offSet = ni*nDim;
        for ( int i=0; i < nDim; ++i ) {
          p_face_coordinates[offSet+i] = coords[i];
        }
      }

      // extract the connected element to this exposed face; should be single in size!
      const stk::mesh::Entity* face_elem_rels = bulk_data.begin_elements(face);
      ThrowAssert( bulk_data.num_elements(face) == 1 );

      // get element; its face ordinal number and populate face_node_ordinal_vec
      stk::mesh::Entity element = face_elem_rels[0];
      const int face_ordinal = bulk_data.begin_element_ordinals(face)[0];
      theElemTopo.side_node_ordinals(face_ordinal, face_node_ordinal_vec.begin());

      // mapping from ip to nodes for this ordinal
      const int *ipNodeMap = meSCS->ipNodeMap(face_ordinal);

      //==========================================
      // gather nodal data off of element; n/a
      //==========================================
      stk::mesh::Entity const * elem_node_rels = bulk_data.begin_nodes(element);
      int num_nodes = bulk_data.num_nodes(element);
      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerElement );
      for ( int ni = 0; ni < num_nodes; ++ni ) {
        // set connected nodes
        connected_nodes[ni] = elem_node_rels[ni];
      }

      // loop over face nodes
      for ( int ip = 0; ip < numScsBip; ++ip ) {

        const int opposingNode = meSCS->opposingNodes(face_ordinal,ip);
        const int nearestNode = ipNodeMap[ip];

        const int offSetSF_face = ip*nodesPerFace;

        // left and right nodes; right is on the face; left is the opposing node
        stk::mesh::Entity nodeL = elem_node_rels[opposingNode];
        stk::mesh::Entity nodeR = elem_node_rels[nearestNode];

        // zero out vector quantities
        for ( int j = 0; j < nDim; ++j )
          p_coordBip[j] = 0.0;

        // interpolate to bip
        double qIp = 0.0;
        double qIpEntrain = 0.0;
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const double r = p_face_shape_function[offSetSF_face+ic];
          qIp += r*p_scalarQNp1[ic];
          qIpEntrain += r*p_bcScalarQ[ic];
          const int offSetFN = ic*nDim;
          for ( int j = 0; j < nDim; ++j ) {
            p_coordBip[j] += r*p_face_coordinates[offSetFN+j];
          }
        }

        // Peclet factor; along the edge is fine
        const double densL       = *stk::mesh::field_data(densityNp1, nodeL);
        const double densR       = *stk::mesh::field_data(densityNp1, nodeR);
        const double diffCoeffL  = *stk::mesh::field_data(*diffFluxCoeff_, nodeL);
        const double diffCoeffR  = *stk::mesh::field_data(*diffFluxCoeff_, nodeR);
        const double scalarQNp1R = *stk::mesh::field_data(scalarQNp1, nodeR);
        const double *vrtmL      =  stk::mesh::field_data(*velocityRTM_, nodeL);
        const double *vrtmR      =  stk::mesh::field_data(*velocityRTM_, nodeR);
        const double *coordL     =  stk::mesh::field_data(*coordinates_, nodeL);
        const double *coordR     =  stk::mesh::field_data(*coordinates_, nodeR);
        const double *dqdxR      =  stk::mesh::field_data(*dqdx_, nodeR);

        double udotx = 0.0;
        double dqR = 0.0;
        for ( int i = 0; i < nDim; ++i ) {
          const double dxi = coordR[i]  - coordL[i];
          udotx += 0.5*dxi*(vrtmL[i] + vrtmR[i]);
          // extrapolation
          const double dx_bip = coordBip[i] - coordR[i];
          dqR += dx_bip*dqdxR[i]*hoUpwind;
        }
        const double qIpUpw = scalarQNp1R + dqR;

        const double diffIp = 0.5*(diffCoeffL/densL + diffCoeffR/densR);
        const double pecfac = pecletFunction_->execute(std::abs(udotx)/(diffIp+small));
        const double om_pecfac = 1.0-pecfac;

        //================================
        // advection first (and only)
        //================================
        const double tmdot = mdot[ip];

        const int rowR = nearestNode*nodesPerElement;

        // advection; leaving the domain
        if ( tmdot > 0.0 ) {

          // central; is simply qIp

          // upwind
          const double qUpwind = alphaUpw*qIpUpw + (om_alphaUpw)*qIp;

          // total advection
          const double aflux = tmdot*(pecfac*qUpwind+om_pecfac*qIp);

          p_rhs[nearestNode] -= aflux;

          // upwind lhs
          p_lhs[rowR+nearestNode] += tmdot*pecfac*alphaUpw;

          // central part
          const double fac = tmdot*(pecfac*om_alphaUpw+om_pecfac);
          for ( int ic = 0; ic < nodesPerFace; ++ic ) {
            const double r = p_face_shape_function[offSetSF_face+ic];
            const int nn = face_node_ordinal_vec[ic];
            p_lhs[rowR+nn] += r*fac;
          }
        }
        else {

          // extrainment; advect in from specified value
          const double aflux = tmdot*qIpEntrain;
          p_rhs[nearestNode] -= aflux;
        }
      }

      apply_coeff(connected_nodes, scratchIds, scratchVals, rhs, lhs, __FILE__);
    }
  }
}
//--------------------------------------------------------------------------
//-------- add_elem_gradq --------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleNodalGradElemContactAlgorithm::add_elem_gradq()
{

  stk::mesh::MetaData & meta_data = realm_.meta_data();
  stk::mesh::BulkData & bulk_data = realm_.bulk_data();

  // fields
  VectorFieldType *coordinates = meta_data.get_field<VectorFieldType>(stk::topology::NODE_RANK, realm_.get_coordinates_name());
  VectorFieldType *haloDxj = meta_data.get_field<VectorFieldType>(stk::topology::NODE_RANK, "halo_dxj");
 
  const int nDim = meta_data.spatial_dimension();

  // loop over locally owned faces and construct missing elemental contributions
  stk::mesh::Selector s_locally_owned = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);
  
  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned );  
  
  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
	ib != face_buckets.end() ; ++ib ) {
    
    stk::mesh::Bucket & b = **ib;
    
    // extract master element; hard coded for quad or hex; 
    // quad is always true for 2D while for 3D, either hex or wedge apply
    const stk::topology & theElemTopo = (nDim == 2) ? stk::topology::QUAD_4_2D : stk::topology::HEX_8;
    const int num_face_nodes = (nDim == 2) ? 2 : 4;
    std::vector<int> face_node_ordinals(num_face_nodes);
    
    // extract master element for extruded element type
    MasterElement *meSCS = realm_.get_surface_master_element(theElemTopo);
    MasterElement *meSCV = realm_.get_volume_master_element(theElemTopo);
    
    // extract master element specifics
    const int nodesPerElement = meSCV->nodesPerElement_;
    const int numScsIp = meSCS->numIntPoints_;
    
    // mapping between exposed face and extruded element's overlapping face
    const int *faceNodeOnExtrudedElem = meSCS->faceNodeOnExtrudedElem();

    // mapping between exposed face and extruded element's opposing face
    const int *opposingNodeOnExtrudedElem = meSCS->opposingNodeOnExtrudedElem();
  
    // mapping between exposed face scs ips and halo edge
    const int *faceScsIpOnExtrudedElem = meSCS->faceScsIpOnExtrudedElem();
  
    // mapping between exposed face scs ips and exposed face edge
    const int *faceScsIpOnFaceEdges = meSCS->faceScsIpOnFaceEdges();
    
    // alignment of face:edge ordering and scsip area vector
    const double *edgeAlignedArea = meSCS->edgeAlignedArea();

    // define scratch field
    std::vector<double > ws_coordinates(nodesPerElement*nDim);
    std::vector<double > ws_scs_areav(numScsIp*nDim);
    std::vector<double > ws_scalarQ(nodesPerElement);
    std::vector<double> ws_shape_function(numScsIp*nodesPerElement);

    // pointers
    double *p_shape_function = &ws_shape_function[0];
    meSCS->shape_fcn(&p_shape_function[0]);

    const stk::mesh::Bucket::size_type length   = b.size();
    
    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {
      
      // get face
      stk::mesh::Entity face = b[k];
      
      // extract the connected element to this exposed face; should be single in size!
      stk::mesh::Entity const* face_elem_rels = bulk_data.begin_elements(face);
      stk::mesh::ConnectivityOrdinal const* face_elem_ords = bulk_data.begin_element_ordinals(face);
      const int num_elements = bulk_data.num_elements(face);
      ThrowRequire( num_elements == 1 );
      stk::mesh::Entity element = face_elem_rels[0];
      const int face_ordinal = face_elem_ords[0];
      theElemTopo.side_node_ordinals(face_ordinal, face_node_ordinals.begin());
      
      // concentrate on loading up the nodal coordinates/scalarQ for the extruded element
      stk::mesh::Entity const * face_node_rels = b.begin_nodes(k);
      int num_nodes = b.num_nodes(k);
      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = face_node_rels[ni];
        const double * coords = stk::mesh::field_data(*coordinates, node);
        const double * hDxj = stk::mesh::field_data( *haloDxj, node );
        const int faceNode = faceNodeOnExtrudedElem[face_ordinal*num_nodes + ni];
        const int opposingNode = opposingNodeOnExtrudedElem[face_ordinal*num_nodes + ni];
        const int offSetFN = faceNode*nDim;
        const int offSetON = opposingNode*nDim;

        // populate scalars
        ws_scalarQ[faceNode] = *stk::mesh::field_data(*scalarQ_, node);
        ws_scalarQ[opposingNode] = *stk::mesh::field_data(*haloQ_, node);

        // now vectors
        for ( int j=0; j < nDim; ++j ) {
          // face node
          ws_coordinates[offSetFN+j] = coords[j];
          ws_coordinates[offSetON+j] = coords[j] + hDxj[j];
        }
      }      
      
      // compute scs integration point areavec
      double scs_error = 0.0;
      meSCS->determinant(1, &ws_coordinates[0], &ws_scs_areav[0], &scs_error);
      
      // assemble halo ip contribution for face node
      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = face_node_rels[ni];
        const double &dualNodalVolume = *stk::mesh::field_data( *dualNodalVolume_, node );
        // area vector for halo edge;
        // face ordinal 0 for extruded element has all scs area vectors pointing from face to opposing face
        const int scsIp = faceScsIpOnExtrudedElem[face_ordinal*num_nodes + ni];

        // interpolate element nodal values to this scsIp of interest
        double scalarQ_scsIp = 0.0;
        for ( int ic = 0; ic < nodesPerElement; ++ic )
          scalarQ_scsIp += p_shape_function[scsIp*nodesPerElement + ic]*ws_scalarQ[ic];
	
        // add in nodal gradient contribution
        double *dqdx = stk::mesh::field_data( *dqdx_, node );
        for ( int j = 0; j < nDim; ++j ) {
          dqdx[j] += scalarQ_scsIp*ws_scs_areav[scsIp*nDim+j]/dualNodalVolume;
        }
      }
      
      // deal with edges on the exposed face and each
      stk::mesh::Entity const* elem_node_rels = bulk_data.begin_nodes(element);
      
      // face edge relations; if this is 2D then the face is a edge and size is unity
      stk::mesh::Entity const* face_edge_rels = bulk_data.begin_edges(face);
      const int num_face_edges = bulk_data.num_edges(face);
      
      int num_edges = (nDim == 3) ? num_face_edges : 1;
      
      for ( int i = 0; i < num_edges; ++i ) {
	
        // get edge
        stk::mesh::Entity edge = (nDim == 3) ? face_edge_rels[i] : face;
	
        // get the relations from edge
        stk::mesh::Entity const* edge_node_rels = bulk_data.begin_nodes(edge);
        const int edge_num_nodes = bulk_data.num_nodes(edge);
        // sanity check on num nodes
        if ( edge_num_nodes != 2 ){
          throw std::runtime_error("num nodes is not 2");
        }
	
        // extract ip for this edge
        const int scsIp = faceScsIpOnFaceEdges[face_ordinal*num_edges + i];
	
        // correct area for edge and scs area vector from extruded element alignment
        const double alignmentFac = edgeAlignedArea[face_ordinal*num_edges + i];
	
        // interpolate element nodal values to this scsIp of interest
        double scalarQ_scsIp = 0.0;
        for ( int ic = 0; ic < nodesPerElement; ++ic )
          scalarQ_scsIp += p_shape_function[scsIp*nodesPerElement + ic]*ws_scalarQ[ic];

        // left and right nodes on the edge
        stk::mesh::Entity nodeL = edge_node_rels[0];
        stk::mesh::Entity nodeR = edge_node_rels[1];

        // does edge point correctly
        const int leftNode = face_node_ordinals[i];
        const size_t iglob_Lelem = bulk_data.identifier(elem_node_rels[leftNode]);
        const size_t iglob_Ledge = bulk_data.identifier(edge_node_rels[0]);
	
        // determine the sign value for area vector; if Left node is the same,
        // then the element and edge relations are aligned
        const double sign = ( iglob_Lelem == iglob_Ledge ) ? 1.0 : -1.0;
	
        // add in nodal gradient contribution
        double *dqdxL = stk::mesh::field_data( *dqdx_, nodeL );
        double *dqdxR = stk::mesh::field_data( *dqdx_, nodeR );
        const double &dualNodalVolumeL = *stk::mesh::field_data( *dualNodalVolume_, nodeL );
        const double &dualNodalVolumeR = *stk::mesh::field_data( *dualNodalVolume_, nodeR );
        for ( int j = 0; j < nDim; ++j ) {
          dqdxL[j] += scalarQ_scsIp*ws_scs_areav[scsIp*nDim+j]/dualNodalVolumeL*sign*alignmentFac;
          dqdxR[j] -= scalarQ_scsIp*ws_scs_areav[scsIp*nDim+j]/dualNodalVolumeR*sign*alignmentFac;
        }
	
      }
    }
  }
  
  // parallel assembly handled elsewhere

}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleContinuityElemOpenSolverAlgorithm::execute()
{

  stk::mesh::BulkData & bulk_data = realm_.bulk_data();
  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();
  
  // extract noc
  const std::string dofName = "pressure";
  const double includeNOC 
    = (realm_.get_noc_usage(dofName) == true) ? 1.0 : 0.0;

  // space for LHS/RHS; nodesPerElem*nodesPerElem and nodesPerElem
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<stk::mesh::Entity> connected_nodes;

  // ip values; both boundary and opposing surface
  std::vector<double> uBip(nDim);
  std::vector<double> rho_uBip(nDim);
  std::vector<double> GpdxBip(nDim);
  std::vector<double> coordBip(nDim);
  std::vector<double> coordScs(nDim);

  // pointers to fixed values
  double *p_uBip = &uBip[0];
  double *p_rho_uBip = &rho_uBip[0];
  double *p_GpdxBip = &GpdxBip[0];
  double *p_coordBip = &coordBip[0];
  double *p_coordScs = &coordScs[0];

  // nodal fields to gather
  std::vector<double> ws_coordinates;
  std::vector<double> ws_pressure;
  std::vector<double> ws_vrtm;
  std::vector<double> ws_Gpdx;
  std::vector<double> ws_density;
  std::vector<double> ws_bcPressure;
  // master element
  std::vector<double> ws_shape_function;
  std::vector<double> ws_shape_function_lhs;
  std::vector<double> ws_face_shape_function;

  // time step
  const double dt = realm_.get_time_step();
  const double gamma1 = realm_.get_gamma1();
  const double projTimeScale = dt/gamma1;

  // deal with interpolation procedure
  const double interpTogether = realm_.get_mdot_interp();
  const double om_interpTogether = 1.0-interpTogether;

  // deal with state
  ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1);

  // define vector of parent topos; should always be UNITY in size
  std::vector<stk::topology> parentTopo;

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
        ib != face_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;

    // extract connected element topology
    b.parent_topology(stk::topology::ELEMENT_RANK, parentTopo);
    ThrowAssert ( parentTopo.size() == 1 );
    stk::topology theElemTopo = parentTopo[0];

    // volume master element
    MasterElement *meSCS = realm_.get_surface_master_element(theElemTopo);
    const int nodesPerElement = meSCS->nodesPerElement_;
    const int numScsIp = meSCS->numIntPoints_;

    // face master element
    MasterElement *meFC = realm_.get_surface_master_element(b.topology());
    const int nodesPerFace = b.topology().num_nodes();
    const int numScsBip = meFC->numIntPoints_;
    std::vector<int> face_node_ordinal_vec(nodesPerFace);

    // resize some things; matrix related
    const int lhsSize = nodesPerElement*nodesPerElement;
    const int rhsSize = nodesPerElement;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    connected_nodes.resize(nodesPerElement);

    // algorithm related; element
    ws_coordinates.resize(nodesPerElement*nDim);
    ws_pressure.resize(nodesPerElement);
    ws_vrtm.resize(nodesPerFace*nDim);
    ws_Gpdx.resize(nodesPerFace*nDim);
    ws_density.resize(nodesPerFace);
    ws_bcPressure.resize(nodesPerFace);
    ws_shape_function.resize(numScsIp*nodesPerElement);
    ws_shape_function_lhs.resize(numScsIp*nodesPerElement);
    ws_face_shape_function.resize(numScsBip*nodesPerFace);

    // pointers
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];
    double *p_coordinates = &ws_coordinates[0];
    double *p_pressure = &ws_pressure[0];
    double *p_vrtm = &ws_vrtm[0];
    double *p_Gpdx = &ws_Gpdx[0];
    double *p_density = &ws_density[0];
    double *p_bcPressure = &ws_bcPressure[0];
    double *p_shape_function = &ws_shape_function[0];
    double *p_shape_function_lhs = shiftPoisson_ ? &ws_shape_function[0] : reducedSensitivities_ ? &ws_shape_function_lhs[0] : &ws_shape_function[0];
    double *p_face_shape_function = &ws_face_shape_function[0];

    // shape functions; interior
    if ( shiftPoisson_ )
      meSCS->shifted_shape_fcn(&p_shape_function[0]);
    else
      meSCS->shape_fcn(&p_shape_function[0]);

    if ( !shiftPoisson_ && reducedSensitivities_ )
      meSCS->shifted_shape_fcn(&p_shape_function_lhs[0]);

    // shape functions; boundary
    if ( shiftMdot_ )
      meFC->shifted_shape_fcn(&p_face_shape_function[0]);
    else
      meFC->shape_fcn(&p_face_shape_function[0]);

    const stk::mesh::Bucket::size_type length   = b.size();

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // zero lhs/rhs
      for ( int p = 0; p < lhsSize; ++p )
        p_lhs[p] = 0.0;
      for ( int p = 0; p < rhsSize; ++p )
        p_rhs[p] = 0.0;

      // get face
      stk::mesh::Entity face = b[k];

      //======================================
      // gather nodal data off of face
      //======================================
      stk::mesh::Entity const * face_node_rels = bulk_data.begin_nodes(face);
      int num_face_nodes = bulk_data.num_nodes(face);
      // sanity check on num nodes
      ThrowAssert( num_face_nodes == nodesPerFace );
      for ( int ni = 0; ni < num_face_nodes; ++ni ) {
        stk::mesh::Entity node = face_node_rels[ni];

        // gather scalars
        p_density[ni] = *stk::mesh::field_data(densityNp1, node);
        p_bcPressure[ni] = *stk::mesh::field_data(*pressureBc_, node);

        // gather vectors
        const double * vrtm = stk::mesh::field_data(*velocityRTM_, node);
        const double * Gjp = stk::mesh::field_data(*Gpdx_, node);
        const int offSet = ni*nDim;
        for ( int j=0; j < nDim; ++j ) {
          p_vrtm[offSet+j] = vrtm[j];
          p_Gpdx[offSet+j] = Gjp[j];
        }
      }

      // pointer to face data
      const double * areaVec = stk::mesh::field_data(*exposedAreaVec_, face);

      // extract the connected element to this exposed face; should be single in size!
      const stk::mesh::Entity* face_elem_rels = bulk_data.begin_elements(face);
      ThrowAssert( bulk_data.num_elements(face) == 1 );

      // get element; its face ordinal number and populate face_node_ordinal_vec
      stk::mesh::Entity element = face_elem_rels[0];
      const stk::mesh::ConnectivityOrdinal* face_elem_ords = bulk_data.begin_element_ordinals(face);
      const int face_ordinal = face_elem_ords[0];
      theElemTopo.side_node_ordinals(face_ordinal, face_node_ordinal_vec.begin());

      // mapping from ip to nodes for this ordinal
      const int *ipNodeMap = meSCS->ipNodeMap(face_ordinal);

      //======================================
      // gather nodal data off of element
      //======================================
      stk::mesh::Entity const * elem_node_rels = bulk_data.begin_nodes(element);
      int num_nodes = bulk_data.num_nodes(element);
      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerElement );
      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = elem_node_rels[ni];

        // set connected nodes
        connected_nodes[ni] = node;

        // gather scalars
        p_pressure[ni] = *stk::mesh::field_data(*pressure_, node);

        // gather vectors
        const double * coords = stk::mesh::field_data(*coordinates_, node);
        const int offSet = ni*nDim;
        for ( int j=0; j < nDim; ++j ) {
          p_coordinates[offSet+j] = coords[j];
        }
      }

      // loop over boundary ips
      for ( int ip = 0; ip < numScsBip; ++ip ) {

        const int nearestNode = ipNodeMap[ip];
        const int opposingScsIp = meSCS->opposingFace(face_ordinal,ip);

        // zero out vector quantities
        for ( int j = 0; j < nDim; ++j ) {
          p_uBip[j] = 0.0;
          p_rho_uBip[j] = 0.0;
          p_GpdxBip[j] = 0.0;
          p_coordBip[j] = 0.0;
          p_coordScs[j] = 0.0;
        }
        double rhoBip = 0.0;

        // interpolate to bip
        double pBip = 0.0;
        const int offSetSF_face = ip*nodesPerFace;
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const int fn = face_node_ordinal_vec[ic];
          const double r = p_face_shape_function[offSetSF_face+ic];
          const double rhoIC = p_density[ic];
          rhoBip += r*rhoIC;
          pBip += r*p_bcPressure[ic];
          const int offSetFN = ic*nDim;
          const int offSetEN = fn*nDim;
          for ( int j = 0; j < nDim; ++j ) {
            p_uBip[j] += r*p_vrtm[offSetFN+j];
            p_rho_uBip[j] += r*rhoIC*p_vrtm[offSetFN+j];
            p_GpdxBip[j] += r*p_Gpdx[offSetFN+j];
            p_coordBip[j] += r*p_coordinates[offSetEN+j];
          }
        }

        // data at interior opposing face
        double pScs = 0.0;
        const int offSetSF_elem = opposingScsIp*nodesPerElement;
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {
          const double r = p_shape_function[offSetSF_elem+ic];
          pScs += r*p_pressure[ic];
          const int offSet = ic*nDim;
          for ( int j = 0; j < nDim; ++j ) {
            p_coordScs[j] += r*p_coordinates[offSet+j];
          }
        }

        // form axdx, asq and mdot (without dp/dn or noc)
        double asq = 0.0;
        double axdx = 0.0;
        double mdot = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          const double dxj = p_coordBip[j] - p_coordScs[j];
          const double axj = areaVec[ip*nDim+j];
          asq += axj*axj;
          axdx += axj*dxj;
          mdot += (interpTogether*p_rho_uBip[j] + om_interpTogether*rhoBip*p_uBip[j] 
                   + projTimeScale*p_GpdxBip[j])*axj;
        }
	
        const double inv_axdx = 1.0/axdx;
	
        // deal with noc
        double noc = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          const double dxj = p_coordBip[j] - p_coordScs[j];
          const double axj = areaVec[ip*nDim+j];
          const double kxj = axj - asq*inv_axdx*dxj; // NOC
          noc += kxj*p_GpdxBip[j];
        }

        // lhs for pressure system
        int rowR = nearestNode*nodesPerElement;

        for ( int ic = 0; ic < nodesPerElement; ++ic ) {
          const double r = p_shape_function_lhs[offSetSF_elem+ic];
          p_lhs[rowR+ic] += r*asq*inv_axdx;
        }

        // final mdot
        mdot += -projTimeScale*((pBip-pScs)*asq*inv_axdx + noc*includeNOC);

        // residual
        p_rhs[nearestNode] -= mdot/projTimeScale;
      }

      apply_coeff(connected_nodes, rhs, lhs, __FILE__);

    }
  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleMomentumElemSymmetrySolverAlgorithm::execute()
{

  stk::mesh::BulkData & bulk_data = realm_.bulk_data();
  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  // space for LHS/RHS; nodesPerElem*nDim*nodesPerElem*nDim and nodesPerElem*nDim
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<int> scratchIds;
  std::vector<double> scratchVals;
  std::vector<stk::mesh::Entity> connected_nodes;

  // vectors
  std::vector<double> nx(nDim);

  // pointers to fixed values
  double *p_nx = &nx[0];

  // nodal fields to gather
  std::vector<double> ws_velocityNp1;
  std::vector<double> ws_coordinates;
  std::vector<double> ws_viscosity;
  // master element
  std::vector<double> ws_face_shape_function;
  std::vector<double> ws_dndx;
  std::vector<double> ws_det_j;

  // deal with state
  VectorFieldType &velocityNp1 = velocity_->field_of_state(stk::mesh::StateNP1);

  // define vector of parent topos; should always be UNITY in size
  std::vector<stk::topology> parentTopo;

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
        ib != face_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;

    // extract connected element topology
    b.parent_topology(stk::topology::ELEMENT_RANK, parentTopo);
    ThrowAssert ( parentTopo.size() == 1 );
    stk::topology theElemTopo = parentTopo[0];

    // volume master element
    MasterElement *meSCS = realm_.get_surface_master_element(theElemTopo);
    const int nodesPerElement = meSCS->nodesPerElement_;

    // face master element
    MasterElement *meFC = realm_.get_surface_master_element(b.topology());
    const int nodesPerFace = meFC->nodesPerElement_;
    const int numScsBip = meFC->numIntPoints_;

    // resize some things; matrix related
    const int lhsSize = nodesPerElement*nDim*nodesPerElement*nDim;
    const int rhsSize = nodesPerElement*nDim;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    scratchIds.resize(rhsSize);
    scratchVals.resize(rhsSize);
    connected_nodes.resize(nodesPerElement);

    // algorithm related; element
    ws_velocityNp1.resize(nodesPerElement*nDim);
    ws_coordinates.resize(nodesPerElement*nDim);
    ws_viscosity.resize(nodesPerFace);
    ws_face_shape_function.resize(numScsBip*nodesPerFace);
    ws_dndx.resize(nDim*numScsBip*nodesPerElement);
    ws_det_j.resize(numScsBip);

    // pointers
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];
    double *p_velocityNp1 = &ws_velocityNp1[0];
    double *p_coordinates = &ws_coordinates[0];
    double *p_viscosity = &ws_viscosity[0];
    double *p_face_shape_function = &ws_face_shape_function[0];
    double *p_dndx = &ws_dndx[0];

    // shape function
    meFC->shape_fcn(&p_face_shape_function[0]);

    const stk::mesh::Bucket::size_type length   = b.size();

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // zero lhs/rhs
      for ( int p = 0; p < lhsSize; ++p )
        p_lhs[p] = 0.0;
      for ( int p = 0; p < rhsSize; ++p )
        p_rhs[p] = 0.0;

      // get face
      stk::mesh::Entity face = b[k];

      //======================================
      // gather nodal data off of face
      //======================================
      stk::mesh::Entity const * face_node_rels = bulk_data.begin_nodes(face);
      int num_face_nodes = bulk_data.num_nodes(face);
      // sanity check on num nodes
      ThrowAssert( num_face_nodes == nodesPerFace );
      for ( int ni = 0; ni < num_face_nodes; ++ni ) {
        stk::mesh::Entity node = face_node_rels[ni];
        // gather scalars
        p_viscosity[ni] = *stk::mesh::field_data(*viscosity_, node);
      }

      // pointer to face data
      const double * areaVec = stk::mesh::field_data(*exposedAreaVec_, face);

      // extract the connected element to this exposed face; should be single in size!
      stk::mesh::Entity const * face_elem_rels = bulk_data.begin_elements(face);
      ThrowAssert( bulk_data.num_elements(face) == 1 );

      // get element; its face ordinal number
      stk::mesh::Entity element = face_elem_rels[0];
      const stk::mesh::ConnectivityOrdinal* face_elem_ords = bulk_data.begin_element_ordinals(face);
      const int face_ordinal = face_elem_ords[0];

      // mapping from ip to nodes for this ordinal
      const int *ipNodeMap = meSCS->ipNodeMap(face_ordinal);

      //==========================================
      // gather nodal data off of element
      //==========================================
      stk::mesh::Entity const * elem_node_rels = bulk_data.begin_nodes(element);
      int num_nodes = bulk_data.num_nodes(element);
      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerElement );
      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = elem_node_rels[ni];
        // set connected nodes
        connected_nodes[ni] = node;
        // gather vectors
        double * uNp1 = stk::mesh::field_data(velocityNp1, node);
        double * coords = stk::mesh::field_data(*coordinates_, node);
        const int offSet = ni*nDim;
        for ( int j=0; j < nDim; ++j ) {
          p_velocityNp1[offSet+j] = uNp1[j];
          p_coordinates[offSet+j] = coords[j];
        }
      }

      // compute dndx
      double scs_error = 0.0;
      meSCS->face_grad_op(1, face_ordinal, &p_coordinates[0], &p_dndx[0], &ws_det_j[0], &scs_error);

      // loop over boundary ips
      for ( int ip = 0; ip < numScsBip; ++ip ) {

        const int nearestNode = ipNodeMap[ip];

        // offset for bip area vector and types of shape function
        const int faceOffSet = ip*nDim;
        const int offSetSF_face = ip*nodesPerFace;

        // form unit normal
        double asq = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          const double axj = areaVec[faceOffSet+j];
          asq += axj*axj;
        }
        const double amag = std::sqrt(asq);
        for ( int i = 0; i < nDim; ++i ) {
          p_nx[i] = areaVec[faceOffSet+i]/amag;
        }

        // interpolate to bip
        double viscBip = 0.0;
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const double r = p_face_shape_function[offSetSF_face+ic];
          viscBip += r*p_viscosity[ic];
        }

        //================================
        // diffusion second
        //================================
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {

          const int offSetDnDx = nDim*nodesPerElement*ip + ic*nDim;

          for ( int j = 0; j < nDim; ++j ) {

            const double axj = areaVec[faceOffSet+j];
            const double dndxj = p_dndx[offSetDnDx+j];
            const double uxj = p_velocityNp1[ic*nDim+j];

            const double divUstress = 2.0/3.0*viscBip*dndxj*uxj*axj*includeDivU_;

            for ( int i = 0; i < nDim; ++i ) {

              // matrix entries
              int indexR = nearestNode*nDim + i;
              int rowR = indexR*nodesPerElement*nDim;

              const double dndxi = p_dndx[offSetDnDx+i];
              const double uxi = p_velocityNp1[ic*nDim+i];
              const double nxi = p_nx[i];
              const double nxinxi = nxi*nxi;

              // -mu*dui/dxj*Aj*ni*ni; sneak in divU (explicit)
              double lhsfac = - viscBip*dndxj*axj*nxinxi;
              p_lhs[rowR+ic*nDim+i] += lhsfac;
              p_rhs[indexR] -= lhsfac*uxi + divUstress*nxinxi;

              // -mu*duj/dxi*Aj*ni*ni
              lhsfac = - viscBip*dndxi*axj*nxinxi;
              p_lhs[rowR+ic*nDim+j] += lhsfac;
              p_rhs[indexR] -= lhsfac*uxj;

              // now we need the +nx*ny*Fy + nx*nz*Fz part

              for ( int l = 0; l < nDim; ++l ) {

                if ( i != l ) {
                  const double nxinxl = nxi*p_nx[l];
                  const double uxl = p_velocityNp1[ic*nDim+l];
                  const double dndxl = p_dndx[offSetDnDx+l];

                  // -ni*nl*mu*dul/dxj*Aj; sneak in divU (explict)
                  lhsfac = -viscBip*dndxj*axj*nxinxl;
                  p_lhs[rowR+ic*nDim+l] += lhsfac;
                  p_rhs[indexR] -= lhsfac*uxl + divUstress*nxinxl;

                  // -ni*nl*mu*duj/dxl*Aj
                  lhsfac = -viscBip*dndxl*axj*nxinxl;
                  p_lhs[rowR+ic*nDim+j] += lhsfac;
                  p_rhs[indexR] -= lhsfac*uxj;
                }
              }
            }
          }
        }
      }

      apply_coeff(connected_nodes, scratchIds, scratchVals, rhs, lhs, __FILE__);

    }
  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssemblePNGBoundarySolverAlgorithm::execute()
{
  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  // space for LHS/RHS; nodesPerFace*nDim*nodesPerFace*nDim and nodesPerFace*nDim
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<int> scratchIds;
  std::vector<double> scratchVals;
  std::vector<stk::mesh::Entity> connected_nodes;

  // nodal fields to gather
  std::vector<double> ws_scalarQ;

  // master element
  std::vector<double> ws_face_shape_function;

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
        ib != face_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;

    // face master element
    MasterElement *meFC = realm_.get_surface_master_element(b.topology());
    const int nodesPerFace = meFC->nodesPerElement_;
    const int numScsBip = meFC->numIntPoints_;
    const int *faceIpNodeMap = meFC->ipNodeMap();

    // resize some things; matrix related
    const int lhsSize = nodesPerFace*nDim*nodesPerFace*nDim;
    const int rhsSize = nodesPerFace*nDim;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    scratchIds.resize(rhsSize);
    scratchVals.resize(rhsSize);
    connected_nodes.resize(nodesPerFace);

    // algorithm related; element
    ws_scalarQ.resize(nodesPerFace);
    ws_face_shape_function.resize(numScsBip*nodesPerFace);
  
    // pointers
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];
    double *p_scalarQ = &ws_scalarQ[0];
    double *p_face_shape_function = &ws_face_shape_function[0];

    // zero lhs; always zero
    for ( int p = 0; p < lhsSize; ++p )
      p_lhs[p] = 0.0;
  
    // shape function
    meFC->shape_fcn(&p_face_shape_function[0]);

    const stk::mesh::Bucket::size_type length   = b.size();

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // zero rhs only since LHS never contributes, never touched
      for ( int p = 0; p < rhsSize; ++p )
        p_rhs[p] = 0.0;

      //======================================
      // gather nodal data off of face
      //======================================
      stk::mesh::Entity const * face_node_rels = b.begin_nodes(k);
      int num_face_nodes = b.num_nodes(k);
      // sanity check on num nodes
      ThrowAssert( num_face_nodes == nodesPerFace );
      for ( int ni = 0; ni < num_face_nodes; ++ni ) {
        // get the node and form connected_node
        stk::mesh::Entity node = face_node_rels[ni];
        connected_nodes[ni] = node;
        // gather scalars
        p_scalarQ[ni] = *stk::mesh::field_data(*scalarQ_, node);
      }

      // pointer to face data
      const double * areaVec = stk::mesh::field_data(*exposedAreaVec_, b, k);

      // start the assembly
      for ( int ip = 0; ip < numScsBip; ++ip ) {
        
        // nearest node to ip
        const int localFaceNode = faceIpNodeMap[ip];

        // save off some offsets for this ip
        const int nnNdim = localFaceNode*nDim;
        const int offSetSF_face = ip*nodesPerFace;

        // interpolate to bip
        double scalarQBip = 0.0;
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const double r = p_face_shape_function[offSetSF_face+ic];
          scalarQBip += r*p_scalarQ[ic];
        }

        // assemble to RHS; rhs -= a negative contribution => +=
        for ( int i = 0; i < nDim; ++i ) {
          p_rhs[nnNdim+i] += scalarQBip*areaVec[ip*nDim+i];
        }
      }
      
      apply_coeff(connected_nodes, scratchIds, scratchVals, rhs, lhs, __FILE__);

    }
  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleNodalGradUBoundaryAlgorithm::execute()
{

  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  // extract fields
  GenericFieldType *exposedAreaVec = meta_data.get_field<GenericFieldType>(meta_data.side_rank(), "exposed_area_vector");
  ScalarFieldType *dualNodalVolume = meta_data.get_field<ScalarFieldType>(stk::topology::NODE_RANK, "dual_nodal_volume");

  // nodal fields to gather; gather everything other than what we are assembling
  std::vector<double> ws_vectorQ;

  // geometry related to populate
  std::vector<double> ws_shape_function;

  // ip data
  std::vector<double>qIp(nDim);

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
        ib != face_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;
    const stk::mesh::Bucket::size_type length   = b.size();

    // extract master element
    MasterElement *meFC = realm_.get_surface_master_element(b.topology());

    // extract master element specifics
    const int nodesPerFace = meFC->nodesPerElement_;
    const int numScsIp = meFC->numIntPoints_;

    // algorithm related
    ws_vectorQ.resize(nodesPerFace*nDim);
    ws_shape_function.resize(numScsIp*nodesPerFace);

    // pointers
    double *p_vectorQ = &ws_vectorQ[0];
    double *p_shape_function = &ws_shape_function[0];

    if ( useShifted_ )
      meFC->shifted_shape_fcn(&p_shape_function[0]);
    else
      meFC->shape_fcn(&p_shape_function[0]);

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // face data
      double * areaVec = stk::mesh::field_data(*exposedAreaVec, b, k);

      //===============================================
      // gather nodal data; this is how we do it now..
      //===============================================
      stk::mesh::Entity const * face_node_rels = b.begin_nodes(k);
      int num_nodes = b.num_nodes(k);

      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerFace );

      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = face_node_rels[ni];

        // pointers to real data
        double * vectorQ = stk::mesh::field_data(*vectorQ_, node );

        // gather vectors
        const int offSet = ni*nDim;
        for ( int j=0; j < nDim; ++j ) {
          p_vectorQ[offSet+j] = vectorQ[j];
        }
      }

      // start assembly
      for ( int ip = 0; ip < numScsIp; ++ip ) {

        // nearest node
        const int nn = ip;

        stk::mesh::Entity nodeNN = face_node_rels[nn];

        // pointer to fields to assemble
        double *gradQNN = stk::mesh::field_data(*dqdx_, nodeNN );

        // suplemental
        double volNN = *stk::mesh::field_data(*dualNodalVolume, nodeNN);

        // interpolate to scs point; operate on saved off ws_field
        for (int j =0; j < nDim; ++j )
          qIp[j] = 0.0;

        const int offSet = ip*nodesPerFace;
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const double r = p_shape_function[offSet+ic];
          for ( int j = 0; j < nDim; ++j ) {
            qIp[j] += r*p_vectorQ[ic*nDim+j];
          }
        }

        // nearest node volume
        double inv_volNN = 1.0/volNN;

        // assemble to nearest node
        for ( int i = 0; i < nDim; ++i ) {
          const int row_gradQ = i*nDim;
          double qip = qIp[i];
          for ( int j = 0; j < nDim; ++j ) {
            double fac = qip*areaVec[ip*nDim+j];
            gradQNN[row_gradQ+j] += fac*inv_volNN;
          }
        }
      }
    }
  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
ComputeHeatTransferElemWallAlgorithm::execute()
{

  stk::mesh::BulkData & bulk_data = realm_.bulk_data();
  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  const double dt = realm_.get_time_step();

  // nodal fields to gather
  std::vector<double> ws_coordinates;
  std::vector<double> ws_temperature;
  std::vector<double> ws_thermalCond;
  std::vector<double> ws_density;
  std::vector<double> ws_specificHeat;

  // master element
  std::vector<double> ws_face_shape_function;
  std::vector<double> ws_dndx;
  std::vector<double> ws_det_j;
  // array for face nodes and nodes off face
  std::vector<double> ws_nodesOnFace;
  std::vector<double> ws_nodesOffFace;

  // define vector of parent topos; should always be UNITY in size
  std::vector<stk::topology> parentTopo;

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
        ib != face_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;

    // extract connected element topology
    b.parent_topology(stk::topology::ELEMENT_RANK, parentTopo);
    ThrowAssert ( parentTopo.size() == 1 );
    stk::topology theElemTopo = parentTopo[0];
    MasterElement *meSCS = realm_.get_surface_master_element(theElemTopo);
    const int nodesPerElement = meSCS->nodesPerElement_;

    // face master element
    MasterElement *meFC = realm_.get_surface_master_element(b.topology());
    const int nodesPerFace = meFC->nodesPerElement_;

    // size some things that are useful
    int num_face_nodes = b.topology().num_nodes();
    std::vector<int> face_node_ordinals(num_face_nodes);

    // algorithm related; element
    ws_coordinates.resize(nodesPerElement*nDim);
    ws_temperature.resize(nodesPerElement);
    ws_thermalCond.resize(nodesPerFace);
    ws_density.resize(nodesPerFace);
    ws_specificHeat.resize(nodesPerFace);
    ws_face_shape_function.resize(nodesPerFace*nodesPerFace);
    ws_dndx.resize(nDim*nodesPerFace*nodesPerElement);
    ws_det_j.resize(nodesPerFace);
    ws_nodesOnFace.resize(nodesPerElement);
    ws_nodesOffFace.resize(nodesPerElement);

    // pointers
    double *p_coordinates = &ws_coordinates[0];
    double *p_temperature = &ws_temperature[0];
    double *p_thermalCond = &ws_thermalCond[0];
    double *p_density     = &ws_density[0];
    double *p_specificHeat = &ws_specificHeat[0];
    double *p_face_shape_function = &ws_face_shape_function[0];
    double *p_dndx = &ws_dndx[0];
    double *p_nodesOnFace = &ws_nodesOnFace[0];
    double *p_nodesOffFace = &ws_nodesOffFace[0];

    // shape function
    meFC->shape_fcn(&p_face_shape_function[0]);

    const stk::mesh::Bucket::size_type length   = b.size();

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // get face
      stk::mesh::Entity face = b[k];

      // pointer to face data
      const double * areaVec = stk::mesh::field_data(*exposedAreaVec_, b, k);

      //======================================
      // gather nodal data off of face
      //======================================
      stk::mesh::Entity const * face_node_rels = bulk_data.begin_nodes(face);
      num_face_nodes = bulk_data.num_nodes(face);
      // sanity check on num nodes
      ThrowAssert( num_face_nodes == nodesPerFace );
      for ( int ni = 0; ni < num_face_nodes; ++ni ) {
        stk::mesh::Entity node = face_node_rels[ni];
        // gather scalars
        p_density[ni] = *stk::mesh::field_data(*density_, node);
        const double mu = *stk::mesh::field_data(*viscosity_, node);
        const double Cp = *stk::mesh::field_data(*specificHeat_, node);
        p_specificHeat[ni] = Cp;
        p_thermalCond[ni] = mu*Cp/Pr_;
      }

      // extract the connected element to this exposed face; should be single in size!
      stk::mesh::Entity const * face_elem_rels = b.begin_elements(k);
      ThrowAssert( b.num_elements(k) == 1 );

      // get element; its face ordinal number and populate face_node_ordinals
      stk::mesh::Entity element = face_elem_rels[0];
      const int face_ordinal = b.begin_element_ordinals(k)[0];
      theElemTopo.side_node_ordinals(face_ordinal, face_node_ordinals.begin());

      //==========================================
      // gather nodal data off of element
      //==========================================
      stk::mesh::Entity const * elem_node_rels = bulk_data.begin_nodes(element);
      int num_nodes = bulk_data.num_nodes(element);
      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerElement );
      for ( int ni = 0; ni < num_nodes; ++ni ) {
        // sneak in nodesOn/offFace
        p_nodesOnFace[ni] = 0.0;
        p_nodesOffFace[ni] = 1.0;
        stk::mesh::Entity node = elem_node_rels[ni];
        // gather scalars
        p_temperature[ni] = *stk::mesh::field_data(*temperature_, node);
        // gather vectors
        double * coords = stk::mesh::field_data(*coordinates_, node);
        const int offSet = ni*nDim;
        for ( int j=0; j < nDim; ++j ) {
          p_coordinates[offSet+j] = coords[j];
        }
      }

      // process on/off while looping over face nodes
      for ( int ip = 0; ip < num_face_nodes; ++ip ) {
        const int nearestNode = face_node_ordinals[ip];
        p_nodesOnFace[nearestNode] = 1.0;
        p_nodesOffFace[nearestNode] = 0.0;
      }

      // compute dndx
      double scs_error = 0.0;
      meSCS->face_grad_op(1, face_ordinal, &p_coordinates[0], &p_dndx[0], &ws_det_j[0], &scs_error);

      for ( int ip = 0; ip < num_face_nodes; ++ip ) {

        const int nearestNode = face_node_ordinals[ip];
        stk::mesh::Entity nodeR = elem_node_rels[nearestNode];

        // pointers to nearest node data
        double *assembledWallArea = stk::mesh::field_data(*assembledWallArea_, nodeR);
        double *referenceTemperature = stk::mesh::field_data(*referenceTemperature_, nodeR);
        double *heatTransferCoefficient = stk::mesh::field_data(*heatTransferCoefficient_, nodeR);
        double *normalHeatFlux = stk::mesh::field_data(*normalHeatFlux_, nodeR);
        double *robinCouplingParameter = stk::mesh::field_data(*robinCouplingParameter_, nodeR);

        // offset for bip area vector and types of shape function
        const int faceOffSet = ip*nDim;
        const int offSetSF_face = ip*nodesPerFace;

        // interpolate to bip
        double thermalCondBip = 0.0;
        double densityBip = 0.0;
        double specificHeatBip = 0.0;
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const double r = p_face_shape_function[offSetSF_face+ic];
          thermalCondBip += r*p_thermalCond[ic];
          densityBip += r*p_density[ic];
          specificHeatBip += r*p_specificHeat[ic];
        }

        // handle flux due to on and off face in a single loop (on/off provided above)
        double dndx    = 0.0;
        double dndxOn  = 0.0;
        double dndxOff = 0.0;
        double invEltLen = 0.0;
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {
          const int offSetDnDx = nDim*nodesPerElement*ip + ic*nDim;
          const double nodesOnFace = p_nodesOnFace[ic];
          const double nodesOffFace = p_nodesOffFace[ic];
          const double tempIC = p_temperature[ic];
          for ( int j = 0; j < nDim; ++j ) {
            const double axj = areaVec[faceOffSet+j];
            const double dndxj = p_dndx[offSetDnDx+j];
            const double dTdA = dndxj*axj*tempIC;
            dndx    += dTdA;
            dndxOn  += dTdA*nodesOnFace;
            dndxOff += dTdA*nodesOffFace;
            invEltLen += dndxj*axj*nodesOnFace;
          }
        }

        // compute assembled area
        double aMag = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          const double axj = areaVec[faceOffSet+j];
          aMag += axj*axj;
        }
        aMag = std::sqrt(aMag);
        double eltLen = aMag/invEltLen;

        // compute coupling parameter
        const double chi = densityBip * specificHeatBip * eltLen * eltLen 
          / (2 * thermalCondBip * dt);
        const double alpha = compute_coupling_parameter(thermalCondBip, eltLen, chi);

        // assemble the nodal quantities
        *assembledWallArea += aMag;
        *normalHeatFlux -= thermalCondBip*dndx;
        *referenceTemperature -= thermalCondBip*dndxOff;
        *heatTransferCoefficient -= thermalCondBip*dndxOn;
        *robinCouplingParameter += alpha*aMag;
      }
    }
  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleMeshDisplacementElemSolverAlgorithm::execute()
{

  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  // space for LHS/RHS; nodesPerElem*nDim*nodesPerElem*nDim and nodesPerElem*nDim
  std::vector<double> lhs;
  std::vector<double> rhs;
  std::vector<int> scratchIds;
  std::vector<double> scratchVals;
  std::vector<stk::mesh::Entity> connected_nodes;

  // nodal fields to gather
  std::vector<double> ws_displacementNp1;
  std::vector<double> ws_coordinates;
  std::vector<double> ws_modelCoordinates;
  std::vector<double> ws_mu;
  std::vector<double> ws_lambda;

  // geometry related to populate
  std::vector<double> ws_scs_areav;
  std::vector<double> ws_dndx;
  std::vector<double> ws_deriv;
  std::vector<double> ws_det_j;
  std::vector<double> ws_shape_function;

  // deal with state
  VectorFieldType &displacementNp1 = meshDisplacement_->field_of_state(stk::mesh::StateNP1);

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& elem_buckets =
    realm_.get_buckets( stk::topology::ELEMENT_RANK, s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = elem_buckets.begin();
        ib != elem_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;
    const stk::mesh::Bucket::size_type length   = b.size();

    // extract master element
    MasterElement *meSCS = sierra::nalu::MasterElementRepo::get_surface_master_element(b.topology());

    // extract master element specifics
    const int nodesPerElement = meSCS->nodesPerElement_;
    const int numScsIp = meSCS->numIntPoints_;
    const int *lrscv = meSCS->adjacentNodes();

    // resize some things; matrix related
    const int lhsSize = nodesPerElement*nDim*nodesPerElement*nDim;
    const int rhsSize = nodesPerElement*nDim;
    lhs.resize(lhsSize);
    rhs.resize(rhsSize);
    scratchIds.resize(rhsSize);
    scratchVals.resize(rhsSize);
    connected_nodes.resize(nodesPerElement);

    // algorithm related
    ws_displacementNp1.resize(nodesPerElement*nDim);
    ws_coordinates.resize(nodesPerElement*nDim);
    ws_modelCoordinates.resize(nodesPerElement*nDim);
    ws_mu.resize(nodesPerElement);
    ws_lambda.resize(nodesPerElement);
    ws_scs_areav.resize(numScsIp*nDim);
    ws_dndx.resize(nDim*numScsIp*nodesPerElement);
    ws_deriv.resize(nDim*numScsIp*nodesPerElement);
    ws_det_j.resize(numScsIp);
    ws_shape_function.resize(numScsIp*nodesPerElement);

    // pointer to lhs/rhs
    double *p_lhs = &lhs[0];
    double *p_rhs = &rhs[0];
    double *p_displacementNp1 = &ws_displacementNp1[0];
    double *p_coordinates = &ws_coordinates[0];
    double *p_modelCoordinates = &ws_modelCoordinates[0];
    double *p_mu = &ws_mu[0];
    double *p_lambda = &ws_lambda[0];
    double *p_scs_areav = &ws_scs_areav[0];
    double *p_dndx = &ws_dndx[0];
    double *p_shape_function = &ws_shape_function[0];

    // extract shape function
    meSCS->shape_fcn(&p_shape_function[0]);

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // zero lhs/rhs
      for ( int p = 0; p < lhsSize; ++p )
        p_lhs[p] = 0.0;
      for ( int p = 0; p < rhsSize; ++p )
        p_rhs[p] = 0.0;

      //===============================================
      // gather nodal data; this is how we do it now..
      //===============================================
      stk::mesh::Entity const * node_rels = b.begin_nodes(k);
      int num_nodes = b.num_nodes(k);

      // sanity check on num nodes
      ThrowAssert( num_nodes == nodesPerElement );

      for ( int ni = 0; ni < num_nodes; ++ni ) {
        stk::mesh::Entity node = node_rels[ni];

        // set connected nodes
        connected_nodes[ni] = node;

        // pointers to real data
        const double * dxNp1  =  stk::mesh::field_data(displacementNp1, node);
        const double * coords =  stk::mesh::field_data(*coordinates_, node);
        const double * modelCoords =  stk::mesh::field_data(*modelCoordinates_, node);
        const double mu = *stk::mesh::field_data(*mu_, node);
        const double lambda = *stk::mesh::field_data(*lambda_, node);

        // gather scalars
        p_mu[ni] = mu;
        p_lambda[ni] = lambda;

        // gather vectors
        const int niNdim = ni*nDim;
        for ( int i=0; i < nDim; ++i ) {
          p_displacementNp1[niNdim+i] = dxNp1[i];
          p_coordinates[niNdim+i] = coords[i];
          p_modelCoordinates[niNdim+i] = modelCoords[i];

        }
      }

      // compute geometry
      double scs_error = 0.0;
      meSCS->determinant(1, &p_coordinates[0], &p_scs_areav[0], &scs_error);

      // compute dndx; model coords or displaced?
      if ( deformWrtModelCoords_ ) {
        meSCS->grad_op(1, &p_modelCoordinates[0], &p_dndx[0], &ws_deriv[0], &ws_det_j[0], &scs_error);
      }
      else {
        meSCS->grad_op(1, &p_coordinates[0], &p_dndx[0], &ws_deriv[0], &ws_det_j[0], &scs_error);
      }
        
      for ( int ip = 0; ip < numScsIp; ++ip ) {

        const int ipNdim = ip*nDim;

        const int offSetSF = ip*nodesPerElement;

        // left and right nodes for this ip
        const int il = lrscv[2*ip];
        const int ir = lrscv[2*ip+1];

        // save off some offsets
        const int ilNdim = il*nDim;
        const int irNdim = ir*nDim;

        // compute scs point values; offset to Shape Function; sneak in divU
        double muIp = 0.0;
        double lambdaIp = 0.0;
        double divDx = 0.0;
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {
          const double r = p_shape_function[offSetSF+ic];
          muIp += r*p_mu[ic];
          lambdaIp += r*p_lambda[ic];
          const int offSetDnDx = nDim*nodesPerElement*ip + ic*nDim;
          for ( int j = 0; j < nDim; ++j ) {
            const double dxj = p_displacementNp1[ic*nDim+j];
            divDx += dxj*p_dndx[offSetDnDx+j];
          }
        }

        // assemble divDx term (explicit)
        for ( int i = 0; i < nDim; ++i ) {
          // divU stress term
          const double divTerm = -lambdaIp*divDx*p_scs_areav[ipNdim+i];
          const int indexL = ilNdim + i;
          const int indexR = irNdim + i;
          // right hand side; L and R
          p_rhs[indexL] -= divTerm;
          p_rhs[indexR] += divTerm;
        }

        // stress
        for ( int ic = 0; ic < nodesPerElement; ++ic ) {

          const int icNdim = ic*nDim;

          for ( int i = 0; i < nDim; ++i ) {

            const int indexL = ilNdim + i;
            const int indexR = irNdim + i;

            const int rowL = indexL*nodesPerElement*nDim;
            const int rowR = indexR*nodesPerElement*nDim;

            const int rLiC_i = rowL+icNdim+i;
            const int rRiC_i = rowR+icNdim+i;

            // viscous stress
            const int offSetDnDx = nDim*nodesPerElement*ip + icNdim;
            double lhs_riC_i = 0.0;
            for ( int j = 0; j < nDim; ++j ) {

              const double axj = p_scs_areav[ipNdim+j];
              const double dxj = p_displacementNp1[icNdim+j];

              // -mu*dxi/dxj*A_j; fixed i over j loop; see below..
              const double lhsfacDiff_i = -muIp*p_dndx[offSetDnDx+j]*axj;
              // lhs; il then ir
              lhs_riC_i += lhsfacDiff_i;

              // -mu*dxj/dxi*A_j
              const double lhsfacDiff_j = -muIp*p_dndx[offSetDnDx+i]*axj;
              // lhs; il then ir
              p_lhs[rowL+icNdim+j] += lhsfacDiff_j;
              p_lhs[rowR+icNdim+j] -= lhsfacDiff_j;

              // rhs; il then ir
              p_rhs[indexL] -= lhsfacDiff_j*dxj;
              p_rhs[indexR] += lhsfacDiff_j*dxj;
            }

            // deal with accumulated lhs and flux for -mu*dxi/dxj*Aj
            p_lhs[rLiC_i] += lhs_riC_i;
            p_lhs[rRiC_i] -= lhs_riC_i;
            const double dxi = p_displacementNp1[icNdim+i];
            p_rhs[indexL] -= lhs_riC_i*dxi;
            p_rhs[indexR] += lhs_riC_i*dxi;

          }
        }
      }

      apply_coeff(connected_nodes, scratchIds, scratchVals, rhs, lhs, __FILE__);

    }
  }
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
ComputeLowReynoldsSDRWallAlgorithm::execute()
{

  stk::mesh::BulkData & bulk_data = realm_.bulk_data();
  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  // nodal fields to gather
  std::vector<double> ws_density;
  std::vector<double> ws_viscosity;

  // master element
  std::vector<double> ws_face_shape_function;

  // deal with state
  ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1);

  // define vector of parent topos; should always be UNITY in size
  std::vector<stk::topology> parentTopo;

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& face_buckets =
    realm_.get_buckets( meta_data.side_rank(), s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = face_buckets.begin();
        ib != face_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;

    // extract connected element topology
    b.parent_topology(stk::topology::ELEMENT_RANK, parentTopo);
    ThrowAssert ( parentTopo.size() == 1 );
    stk::topology theElemTopo = parentTopo[0];

    // extract master element
    MasterElement *meSCS = realm_.get_surface_master_element(theElemTopo);

    // face master element
    MasterElement *meFC = realm_.get_surface_master_element(b.topology());
    const int nodesPerFace = b.topology().num_nodes();
    std::vector<int> face_node_ordinal_vec(nodesPerFace);

    // algorithm related; element
    ws_density.resize(nodesPerFace);
    ws_viscosity.resize(nodesPerFace);
    ws_face_shape_function.resize(nodesPerFace*nodesPerFace);

    // pointers
    double *p_density = &ws_density[0];
    double *p_viscosity = &ws_viscosity[0];
    double *p_face_shape_function = &ws_face_shape_function[0];

    // shape functions
    if ( useShifted_ )
      meFC->shifted_shape_fcn(&p_face_shape_function[0]);
    else
      meFC->shape_fcn(&p_face_shape_function[0]);

    const stk::mesh::Bucket::size_type length   = b.size();

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // get face
      stk::mesh::Entity face = b[k];

      //======================================
      // gather nodal data off of face
      //======================================
      stk::mesh::Entity const * face_node_rels = bulk_data.begin_nodes(face);
      int num_face_nodes = bulk_data.num_nodes(face);
      // sanity check on num nodes
      ThrowAssert( num_face_nodes == nodesPerFace );
      for ( int ni = 0; ni < num_face_nodes; ++ni ) {
        stk::mesh::Entity node = face_node_rels[ni];

        // gather scalars
        p_density[ni]    = *stk::mesh::field_data(densityNp1, node);
        p_viscosity[ni] = *stk::mesh::field_data(*viscosity_, node);
      }

      // pointer to face data
      const double * areaVec = stk::mesh::field_data(*exposedAreaVec_, face);

      // extract the connected element to this exposed face; should be single in size!
      const stk::mesh::Entity* face_elem_rels = bulk_data.begin_elements(face);
      ThrowAssert( bulk_data.num_elements(face) == 1 );

      // get element; its face ordinal number and populate face_node_ordinal_vec
      stk::mesh::Entity element = face_elem_rels[0];
      const int face_ordinal = bulk_data.begin_element_ordinals(face)[0];
      theElemTopo.side_node_ordinals(face_ordinal, face_node_ordinal_vec.begin());

      // get the relations off of element
      stk::mesh::Entity const * elem_node_rels = bulk_data.begin_nodes(element);

      // loop over face nodes
      for ( int ip = 0; ip < num_face_nodes; ++ip ) {

        const int offSetAveraVec = ip*nDim;

        const int opposingNode = meSCS->opposingNodes(face_ordinal,ip);
        const int nearestNode = face_node_ordinal_vec[ip];

        // left and right nodes; right is on the face; left is the opposing node
        stk::mesh::Entity nodeL = elem_node_rels[opposingNode];
        stk::mesh::Entity nodeR = elem_node_rels[nearestNode];

        // extract nodal fields
        const double * coordL = stk::mesh::field_data(*coordinates_, nodeL );
        const double * coordR = stk::mesh::field_data(*coordinates_, nodeR );

        // aMag
        double aMag = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          const double axj = areaVec[offSetAveraVec+j];
          aMag += axj*axj;
        }
        aMag = std::sqrt(aMag);

        // interpolate to bip
        double rhoBip = 0.0;
        double muBip = 0.0;
        const int offSetSF_face = ip*nodesPerFace;
        for ( int ic = 0; ic < nodesPerFace; ++ic ) {
          const double r = p_face_shape_function[offSetSF_face+ic];
          rhoBip += r*p_density[ic];
          muBip += r*p_viscosity[ic];
        }
        const double nuBip = muBip/rhoBip;

        // determine yp (approximated by 1/4 distance along edge)
        double ypbip = 0.0;
        for ( int j = 0; j < nDim; ++j ) {
          const double nj = areaVec[offSetAveraVec+j]/aMag;
          const double ej = 0.25*(coordR[j] - coordL[j]);
          ypbip += nj*ej*nj*ej;
        }
        ypbip = std::sqrt(ypbip);

        // compute low Re wall sdr
        const double lowReSdr = wallFactor_*6.0*nuBip/betaOne_/ypbip/ypbip;

        // assemble to nodal quantities; will normalize and assemble in driver
        double * assembledWallArea = stk::mesh::field_data(*assembledWallArea_, nodeR );
        double * sdrBc = stk::mesh::field_data(*sdrBc_, nodeR );

        *assembledWallArea += aMag;
        *sdrBc += lowReSdr*aMag;
      }
    }
  }
}