void lecode_tools_Isostasy_AverageSurfaces(lecode_tools_Isostasy *self,
      double** _avg_sep, double** _avg_height)
{
   FeMesh *mesh;
   Grid *grid, *elGrid;
   double *local_height, *global_height;
   double *local_top_vy, *global_top_vy;
   double *local_bot_vy, *global_bot_vy;
   int *local_top_cnt, *global_top_cnt;
   int *local_bot_cnt, *global_bot_cnt;
   int param[3], elParam[3], num_nodes, n;
   double vel[3];
   IArray *inc;
   int nDims, arrayPos, arraySize;
   int ii, jj;

   mesh = self->mesh;
   nDims = Mesh_GetDimSize( mesh );
   grid = *Mesh_GetExtension(mesh, Grid**, mesh->vertGridId );
   elGrid = *Mesh_GetExtension(mesh, Grid**, mesh->elGridId );
   num_nodes = FeMesh_GetNodeLocalSize(mesh);
   inc = IArray_New();

   /* mem alloc from bottom surface */
   arraySize=0; /*to prevent warnings*/
   if ( nDims == 2 ) arraySize = elGrid->sizes[0];
   else if ( nDims == 3 ) arraySize = elGrid->sizes[0]*elGrid->sizes[self->zontalAxis];
   else assert(0);

   local_top_vy = (double*)malloc( arraySize*sizeof(double) );
   memset( local_top_vy, 0, arraySize*sizeof(double) );
   local_bot_vy = (double*)malloc( arraySize*sizeof(double) );
   memset( local_bot_vy, 0, arraySize*sizeof(double) );
   local_height = (double*)malloc( arraySize*sizeof(double) );
   memset( local_height, 0, arraySize*sizeof(double) );
   local_top_cnt = (int*)malloc( arraySize*sizeof(int) );
   memset( local_top_cnt, 0, arraySize*sizeof(int) );
   local_bot_cnt = (int*)malloc( arraySize*sizeof(int) );
   memset( local_bot_cnt, 0, arraySize*sizeof(int) );

   for (ii = 0; ii < num_nodes; ii++)
   {

      FeMesh_GetNodeElements( mesh, ii, inc );

      n = FeMesh_NodeDomainToGlobal(mesh, ii);
      Grid_Lift(grid, n, param);

      if ((self->surfaceIdx != -1 && param[self->vertAxis] == self->surfaceIdx) ||
            (self->surfaceIdx == -1 && param[self->vertAxis] == grid->sizes[self->vertAxis] - 1))
      {
         FeVariable_GetValueAtNode(self->vel_field, ii, vel);

         if ( self->avg )
         {
            local_top_vy[0] += vel[self->vertAxis];
            local_height[0] += Mesh_GetVertex( mesh, ii )[self->vertAxis];
            local_top_cnt[0]++;
         }
         else
         {
            for (jj = 0; jj < inc->size; jj++ )
            {
               Grid_Lift( elGrid, FeMesh_ElementDomainToGlobal( self->mesh, inc->ptr[jj] ), elParam );

               /* Make sure element is below surface. */
               if ( self->surfaceIdx != -1 && elParam[self->vertAxis] >= self->surfaceIdx )
                  continue;

               arrayPos = elParam[0];
               if ( nDims == 3 ) arrayPos += elParam[self->zontalAxis]*elGrid->sizes[0];

               local_top_vy[arrayPos] += vel[self->vertAxis];
               local_height[arrayPos] += Mesh_GetVertex( mesh, ii )[self->vertAxis];
               local_top_cnt[arrayPos]++;
            }
         }
      }

      if (param[self->vertAxis] == 0 )
      {
         FeVariable_GetValueAtNode(self->vel_field, ii, vel);

         if ( self->avg )
         {
            local_bot_vy[0] += vel[self->vertAxis];
            local_height[0] -= Mesh_GetVertex( mesh, ii )[self->vertAxis];
            local_bot_cnt[0]++;
         }
         else
         {
            for (jj = 0; jj < inc->size; jj++ )
            {
               Grid_Lift( elGrid, FeMesh_ElementDomainToGlobal( self->mesh, inc->ptr[jj] ), elParam );

               /* Make sure element is below surface. */
               if ( self->surfaceIdx != -1 && elParam[self->vertAxis] >= self->surfaceIdx )
                  continue;

               arrayPos = elParam[0];
               if ( nDims == 3 ) arrayPos += elParam[self->zontalAxis]*elGrid->sizes[0];

               local_bot_vy[arrayPos] += vel[self->vertAxis];
               local_height[arrayPos] -= Mesh_GetVertex( mesh, ii )[self->vertAxis];
               local_bot_cnt[arrayPos]++;
            }
         }
      }

   }

   global_top_vy = (double*)malloc( arraySize*sizeof(double) );
   global_bot_vy = (double*)malloc( arraySize*sizeof(double) );
   global_height = (double*)malloc( arraySize*sizeof(double) );
   global_top_cnt = (int*)malloc( arraySize*sizeof(int) );
   global_bot_cnt = (int*)malloc( arraySize*sizeof(int) );

   MPI_Allreduce(local_height, global_height, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_top_vy, global_top_vy, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_bot_vy, global_bot_vy, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_top_cnt, global_top_cnt, arraySize, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_bot_cnt, global_bot_cnt, arraySize, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

   free( local_height );
   free( local_top_vy );
   free( local_bot_vy );
   free( local_top_cnt );
   free( local_bot_cnt );
   Stg_Class_Delete( inc );

   *_avg_sep = (double*)malloc( arraySize*sizeof(double) );
   *_avg_height = (double*)malloc( arraySize*sizeof(double) );
   for ( ii = 0; ii < arraySize; ii++ )
   {
      (*_avg_sep)[ii] = global_top_vy[ii]/(double)(global_top_cnt[ii]) - global_bot_vy[ii]/(double)(global_bot_cnt[ii]);
      (*_avg_height)[ii] = global_height[ii]/(double)(global_top_cnt[ii]);
      if (self->avg)
         break;
   }

   free( global_height );
   free( global_top_vy );
   free( global_bot_vy );
   free( global_top_cnt );
   free( global_bot_cnt );
}
Fn::GradFeVariableFn::func Fn::GradFeVariableFn::getFunction( IOsptr sample_input )
{

    // setup output
    FeVariable* fevar = (FeVariable*)_fevariable;
    int numComponents = fevar->fieldComponentCount;

    FunctionIO::IOType iotype;
    std::shared_ptr<IO_double> _output;
    if (fevar->dim == 1) {
        iotype = FunctionIO::Vector;
    } else
        iotype = FunctionIO::Tensor;
    
    _output = std::make_shared<IO_double>(numComponents*fevar->dim, iotype);

    // if input is FEMCoordinate, eject appropriate lambda
    std::shared_ptr<const FEMCoordinate> femCoord = std::dynamic_pointer_cast<const FEMCoordinate>(sample_input);
    if ( femCoord ){
        if( femCoord->mesh() == (void*) (fevar->feMesh->parentMesh ) )
            return [_output,fevar](IOsptr input)->IOsptr {
                std::shared_ptr<const FEMCoordinate> femCoord = debug_dynamic_cast<const FEMCoordinate>(input);
                
                FeVariable_InterpolateDerivativesToElLocalCoord( fevar, femCoord->index(), femCoord->localCoord()->data(), _output->data() );

                return debug_dynamic_cast<const FunctionIO>(_output);
            };
    };

    // if input is MeshCoordinate, eject appropriate lambda
    std::shared_ptr<const MeshCoordinate> meshCoord = std::dynamic_pointer_cast<const MeshCoordinate>(sample_input);
    if ( meshCoord ){
        if( meshCoord->object() == (void*) (fevar->feMesh) )  // in this case, we need the identical mesh
        {
            std::shared_ptr<IArrayClass> inc = std::make_shared<IArrayClass>();
            return [_output,fevar,inc](IOsptr input)->IOsptr {
                std::shared_ptr<const MeshCoordinate> meshCoord = debug_dynamic_cast<const MeshCoordinate>(input);
                unsigned index = meshCoord->index();
                // from OperatorFeVariable.c
                /* Find the elements around the node and point to them via the nbrElList. */
                FeMesh_GetNodeElements( (void*)fevar->feMesh, index, (IArray*)inc->inc );
                unsigned nbrElCount = IArray_GetSize( inc->inc );
                int*     nbrElList  = IArray_GetPtr( inc->inc );
                Coord  elLocalCoord;

                /* Use last element in list, get local coords then interpolate value. */
                FeMesh_CoordGlobalToLocal( fevar->feMesh, nbrElList[nbrElCount-1], Mesh_GetVertex( fevar->feMesh, index ), elLocalCoord );

                /* Get value at node for this element. */
                FeVariable_InterpolateDerivativesToElLocalCoord( fevar, nbrElList[nbrElCount-1], elLocalCoord, _output->data() );

                return debug_dynamic_cast<const FunctionIO>(_output);
            };
        }
    }
    
    // if neither of the above worked, try plain old global coord
    std::shared_ptr<const IO_double> iodouble = std::dynamic_pointer_cast<const IO_double>(sample_input);
    if ( iodouble ){
        return [_output,fevar](IOsptr input)->IOsptr {
            std::shared_ptr<const IO_double> iodouble = debug_dynamic_cast<const IO_double>(input);            

            InterpolationResult retval = FeVariable_InterpolateDerivativesAt( (void*)fevar, (double*)iodouble->data(), (double*) _output->data() );
            
            if (! ( (retval == LOCAL) || (retval == SHADOW) ) ){
                std::stringstream streamguy;
                streamguy << "FeVariable derivative interpolation at location (" << iodouble->at(0);
                for (unsigned ii=1; ii<iodouble->size(); ii++)
                    streamguy << ", "<< iodouble->at(ii);
                streamguy << ") does not appear to be valid.\nLocation is probably outside local domain.";
                
                throw std::runtime_error(streamguy.str());
            }


            return debug_dynamic_cast<const FunctionIO>(_output);
        };
    }
    
    // if we get here, something aint right
    throw std::invalid_argument("'GradFeVariableFn' does not appear to be compatible with provided input type.");

    
}
void lecode_tools_Isostasy_CalcBasalFlow(lecode_tools_Isostasy *self)
{
   int rank;
   Grid *nodeGrid, *elGrid;
   double *avg_sep, *avg_height;
   double *avg_density, *rho_zero_density;
   double *phi, **phi_ptr;
   double *node_avg_sep, *node_avg_height;
   double *node_avg_density, *node_rho_zero_density, *node_phi;
   double *global_node_avg_sep, *global_node_avg_height;
   double *global_node_avg_density, *global_node_rho_zero_density, *global_node_phi;
   double avgFlow, tmp;
   int param[3], nodeIdx, arraySize, arrayPos, elPos;
   int nDims, iterSizes[2];
   IArray *inc;
   int ii, jj, kk;

   MPI_Comm_rank(MPI_COMM_WORLD, &rank);

   nDims = Mesh_GetDimSize( self->mesh );
   nodeGrid = *Mesh_GetExtension( self->mesh, Grid**,  self->mesh->vertGridId );
   elGrid = *Mesh_GetExtension( self->mesh, Grid**,  self->mesh->elGridId );
   inc = IArray_New();

   if (self->thermalSLE)
   {
      lecode_tools_Isostasy_SolveThermal(self);
      phi_ptr = &phi;
   }
   else
      phi_ptr = NULL;

   lecode_tools_Isostasy_AverageSurfaces(self, &avg_sep, &avg_height);
   lecode_tools_Isostasy_AverageBody(self, &avg_density, &rho_zero_density, phi_ptr);
   // avg_density seems to be the avg rho across the basal node columns.
   // rho_zero_density seems to be the avg density of the rho_zero material.

   if ( self->avg )
   {

      self->flow[0] = -1.0*avg_density[0]*avg_sep[0]/rho_zero_density[0];
      if (phi_ptr)
         self->flow[0] -= avg_height[0]*phi[0]/rho_zero_density[0];

      avgFlow = self->flow[0];

   }
   else
   {

      // nodeGrid->sizes is the dimensions of the mesh in x (0), y (1), and z(2)
      arraySize=0;
      if ( nDims == 2 ) arraySize = nodeGrid->sizes[0];
      else if ( nDims == 3 ) arraySize = nodeGrid->sizes[0]*nodeGrid->sizes[self->zontalAxis];
      else assert(0);
      // the arraysize is the base of the model, in nodes.

      node_avg_density = (double*)malloc( arraySize*sizeof(double) );
      memset( node_avg_density, 0, arraySize*sizeof(double) );
      node_rho_zero_density = (double*)malloc( arraySize*sizeof(double) );
      memset( node_rho_zero_density, 0, arraySize*sizeof(double) );
      node_avg_sep = (double*)malloc( arraySize*sizeof(double) );
      memset( node_avg_sep, 0, arraySize*sizeof(double) );
      node_avg_height = (double*)malloc( arraySize*sizeof(double) );
      memset( node_avg_height, 0, arraySize*sizeof(double) );
      if ( phi_ptr )
      {
         node_phi = (double*)malloc( arraySize*sizeof(double) );
         memset( node_phi, 0, arraySize*sizeof(double) );
      }
     
      param[0] = param[1] = param[2] = 0;
      iterSizes[0] = nodeGrid->sizes[0];
      iterSizes[1] = (nDims == 3) ? nodeGrid->sizes[self->zontalAxis] : 1;
      for ( ii = 0; ii < iterSizes[1]; ii++ )
      {
         // Going into the 'Y' or 'Z' of the basal surface. If 2D, then this loop only goes once.
         for ( kk = 0; kk < iterSizes[0]; kk++)
         {

            param[self->zontalAxis] = ii;
            param[0] = kk;
            arrayPos = param[0] + param[self->zontalAxis]*nodeGrid->sizes[0];

            nodeIdx = Grid_Project( nodeGrid, param );
            if ( !FeMesh_NodeGlobalToDomain( self->mesh, nodeIdx, &nodeIdx ) ||
                  nodeIdx >= FeMesh_GetNodeLocalSize( self->mesh ) )
            {
               continue;
            }

            FeMesh_GetNodeElements( self->mesh, nodeIdx, inc );
            for ( jj = 0; jj < inc->size; jj++ )
            {
               Grid_Lift( elGrid, FeMesh_ElementDomainToGlobal( self->mesh, inc->ptr[jj] ), param );

               elPos = param[0];
               if ( nDims == 3 ) elPos += param[self->zontalAxis]*elGrid->sizes[0];

               node_avg_density[arrayPos] += avg_density[elPos];
               node_rho_zero_density[arrayPos] += rho_zero_density[elPos];
               node_avg_sep[arrayPos] += avg_sep[elPos];
               node_avg_height[arrayPos] += avg_height[elPos];
               if ( phi_ptr )
                  node_phi[arrayPos] += phi[elPos];
            }

            node_avg_density[arrayPos] /= (double)inc->size;
            node_rho_zero_density[arrayPos] /= (double)inc->size;
            node_avg_sep[arrayPos] /= (double)inc->size;
            node_avg_height[arrayPos] /= (double)inc->size;
            if ( phi_ptr )
               node_phi[arrayPos] /= (double)inc->size;

         }

      }

      global_node_avg_density = (double*)malloc( arraySize*sizeof(double) );
      memset( global_node_avg_density, 0, arraySize*sizeof(double) );
      global_node_rho_zero_density = (double*)malloc( arraySize*sizeof(double) );
      memset( global_node_rho_zero_density, 0, arraySize*sizeof(double) );
      global_node_avg_sep = (double*)malloc( arraySize*sizeof(double) );
      memset( global_node_avg_sep, 0, arraySize*sizeof(double) );
      global_node_avg_height = (double*)malloc( arraySize*sizeof(double) );
      memset( global_node_avg_height, 0, arraySize*sizeof(double) );
      if ( phi_ptr )
      {
         global_node_phi = (double*)malloc( arraySize*sizeof(double) );
         memset( global_node_phi, 0, arraySize*sizeof(double) );
      }

      MPI_Allreduce(node_avg_density, global_node_avg_density, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(node_rho_zero_density, global_node_rho_zero_density, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(node_avg_sep, global_node_avg_sep, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(node_avg_height, global_node_avg_height, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      if ( phi_ptr )
         MPI_Allreduce(node_phi, global_node_phi, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

      free( avg_sep );
      free( avg_height );
      free( avg_density );
      free( rho_zero_density );
      if ( phi_ptr )
         free( phi );
      Stg_Class_Delete( inc );

      avgFlow = 0.0;

      for ( ii = 0; ii < arraySize; ii++ )
      {

         /*
           param[0] = ii;
           nodeIdx = Grid_Project( nodeGrid, param );
           if( !FeMesh_NodeGlobalToDomain( self->mesh, nodeIdx, &nodeIdx ) ||
           nodeIdx >= FeMesh_GetNodeLocalSize( self->mesh ) )
           {
           continue;
           }
         */

         self->flow[ii] = -1.0*global_node_avg_density[ii]*global_node_avg_sep[ii]/global_node_rho_zero_density[ii];
         if (phi_ptr)
            self->flow[ii] -= global_node_avg_height[ii]*global_node_phi[ii]/global_node_rho_zero_density[ii];

         avgFlow += self->flow[ii];

      }


      //MPI_Allreduce(&avgFlow, &tmp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      tmp = avgFlow;
      avgFlow = tmp/(double)arraySize;

      free( node_avg_sep );
      free( node_avg_height );
      free( node_avg_density );
      free( node_rho_zero_density );
      if ( phi_ptr )
         free( node_phi );

      free( global_node_avg_sep );
      free( global_node_avg_height );
      free( global_node_avg_density );
      free( global_node_rho_zero_density );
      if ( phi_ptr )
         free( global_node_phi );

   }

   if (rank == 0)
   {
      printf("===========\n");
      printf("= Isostasy\n");
      printf("=\n");
      printf("= Average basal flow: %g\n", avgFlow);
      printf("===========\n");
   }
}
void _SPR_StrainRate_AssemblePatch( SPR_StrainRate* self, int node_I, double** AMatrix, double** bVector) {
   /* 
    * Functiona Details:
    * This assembles the AMatrix and the bVectors, see eq. 14.31, REFERENCE.
    * First the elements around the patch node are found, then for each element the
    * cooordinate and the strain rate at the super convergent locations are stored.
    */
   OperatorFeVariable*     rawField = self->rawField;
   FeMesh*         feMesh = (FeMesh*)rawField->feMesh;
   Index           dofThatExist = rawField->fieldComponentCount;
   Index           nbrEl_I, nbrElementID, nbrElCount;
   Index           count_i, count_j, dof_I;
   double          center[3] = {0.0,0.0,0.0};
   int             orderOfInterpolation = self->orderOfInterpolation;
   IArray*         inc = self->inc; 
   SymmetricTensor *scp_eps; 
   double          **globalCoord;
   double          **pVec;
   int             *nbrElList;    

   /* 1) find the elements around the node and point to them via the nbrElList*/
   FeMesh_GetNodeElements( feMesh, node_I, inc );
   nbrElCount = IArray_GetSize( inc );
   nbrElList = IArray_GetPtr( inc );

   /* 2) Memory Allocations + Initialisations */
   scp_eps = Memory_Alloc_Array( SymmetricTensor, nbrElCount, "StrainRate at superconvergent points" );

   globalCoord = Memory_Alloc_2DArray( double, nbrElCount, self->dim, (Name)"Global Coords of superconvergent points" );

   pVec = Memory_Alloc_2DArray( double, nbrElCount, orderOfInterpolation, (Name)"Holds transformed global coord polynomials" );

   /* 3 ) Now collect information, to go find each elements contribution to the patch 
    * So find the p vectors and strain-rate pseudo vectors for the Ax=b equation
    */
   for( nbrEl_I = 0 ; nbrEl_I < nbrElCount ; nbrEl_I++ ) {
      nbrElementID = nbrElList[ nbrEl_I ];

      FeMesh_CoordLocalToGlobal( feMesh, nbrElementID, center, globalCoord[nbrEl_I] );
      self->_makePoly( globalCoord[nbrEl_I], pVec[nbrEl_I] );

      _OperatorFeVariable_InterpolateWithinElement( rawField, nbrElList[nbrEl_I], center, scp_eps[nbrEl_I] );
   }

   /* Construct A Matrix (Geometric based) and b Vectors (tensor based) */
   for( nbrEl_I = 0 ; nbrEl_I < nbrElCount ; nbrEl_I++ ) {
      for( count_i = 0 ; count_i < orderOfInterpolation ; count_i++ ) {
         for( count_j = 0; count_j < orderOfInterpolation ; count_j++ ) {
            AMatrix[count_i][count_j] += ( pVec[nbrEl_I][count_i] * pVec[nbrEl_I][count_j] );
         }
      }
      for(dof_I = 0 ; dof_I < dofThatExist ; dof_I++ ) {
         for( count_i = 0 ; count_i < orderOfInterpolation ; count_i++ ) {
            bVector[dof_I][ count_i ] += (scp_eps[nbrEl_I][dof_I] * pVec[nbrEl_I][count_i]);
         }
      }
   }
   Memory_Free( scp_eps );
   Memory_Free( globalCoord );
   Memory_Free( pVec );
}