double IntegrateField( FeVariable* tempField, IntegrationPointsSwarm* swarm ) {
	FeMesh*			mesh	= tempField->feMesh;
	unsigned		el_i;
	unsigned		cell_i;
	unsigned		point_i;
	double			lTemp	= 0.0;
	double			gTemp;
	double			temp;
	double			detJac;
	IntegrationPoint*	point;
	ElementType*		elType;
	unsigned		dim	= Mesh_GetDimSize( mesh );
	
	for( el_i = 0; el_i < Mesh_GetLocalSize( mesh, dim ); el_i++ ) {
		cell_i = CellLayout_MapElementIdToCellId( swarm->cellLayout, el_i );
		elType = FeMesh_GetElementType( mesh, el_i );
		
		for( point_i = 0; point_i < swarm->cellParticleCountTbl[cell_i]; point_i++ ) {
			point = (IntegrationPoint*)Swarm_ParticleInCellAt( swarm, cell_i, point_i );

			FeVariable_InterpolateWithinElement( tempField, el_i, point->xi, &temp );
			detJac = ElementType_JacobianDeterminant( elType, mesh, el_i, point->xi, dim );

			lTemp += detJac * point->weight * temp;
		}
	}

	MPI_Allreduce( &lTemp, &gTemp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );

	return gTemp;
}
void _VectorAssemblyTerm_NA__Fn_AssembleElement( void* forceTerm, ForceVector* forceVector, Element_LocalIndex lElement_I, double* elForceVec ) {
   VectorAssemblyTerm_NA__Fn* self = Stg_CheckType( forceTerm, VectorAssemblyTerm_NA__Fn );
   IntegrationPointsSwarm*    swarm = (IntegrationPointsSwarm*)self->integrationSwarm;
   Dimension_Index            dim = forceVector->dim;
   IntegrationPoint*          particle;
   FeMesh*                    mesh;
   double*                    xi;
   Particle_InCellIndex       cParticle_I;
   Particle_InCellIndex       cellParticleCount;
   Element_NodeIndex          nodesPerEl;
   Node_ElementLocalIndex     A;
   ElementType*               elementType;
   Dof_Index                  dofsPerNode, i;
   Cell_Index                 cell_I;
   double                     detJac;
   double                     factor;
   double                     N[27];

   /* Since we are integrating over the velocity mesh - we want the velocity mesh here and not the temperature mesh */
   mesh = forceVector->feVariable->feMesh;

   VectorAssemblyTerm_NA__Fn_cppdata* cppdata = (VectorAssemblyTerm_NA__Fn_cppdata*)self->cppdata;
    
   debug_dynamic_cast<ParticleInCellCoordinate*>(cppdata->input->localCoord())->index() = lElement_I;  // set the elementId as the owning cell for the particleCoord
   cppdata->input->index()   = lElement_I;  // set the elementId for the fem coordinate

   /* Set the element type */
   elementType = FeMesh_GetElementType( mesh, lElement_I );
   nodesPerEl  = elementType->nodeCount;
   

   /* assumes constant number of dofs per element */
   dofsPerNode = forceVector->feVariable->fieldComponentCount;

   cell_I = CellLayout_MapElementIdToCellId( swarm->cellLayout, lElement_I );
   cellParticleCount = swarm->cellParticleCountTbl[ cell_I ];

   for ( cParticle_I = 0 ; cParticle_I < cellParticleCount ; cParticle_I++ ) {
      debug_dynamic_cast<ParticleInCellCoordinate*>(cppdata->input->localCoord())->particle_cellId(cParticle_I);  // set the particleCoord cellId
      particle = (IntegrationPoint*) Swarm_ParticleInCellAt( swarm, cell_I, cParticle_I );
      xi       = particle->xi;

      /* Calculate Determinant of Jacobian and Shape Functions */
      if (self->geometryMesh)
         detJac = ElementType_JacobianDeterminant( FeMesh_GetElementType( self->geometryMesh, lElement_I ), self->geometryMesh, lElement_I, xi, dim );
      else
         detJac = ElementType_JacobianDeterminant( elementType, mesh, lElement_I, xi, dim );

      ElementType_EvaluateShapeFunctionsAt( elementType, xi, N );

      /* evaluate function */
      const FunctionIO* funcout = debug_dynamic_cast<const FunctionIO*>(cppdata->func(cppdata->input.get()));

      factor = detJac * particle->weight;
      for( A = 0 ; A < nodesPerEl ; A++ )
         for( i = 0 ; i < dofsPerNode ; i++ )
            elForceVec[A * dofsPerNode + i ] += factor * funcout->at<double>(i) * N[A] ;

   }
}
void ElementCellLayoutSuite_TestElementCellLayout( ElementCellLayoutSuiteData* data ) {
   int                 procToWatch = data->nProcs > 1 ? 1 : 0;
   Cell_Index          cell;
   Element_DomainIndex element;
   GlobalParticle      testParticle;
      
   if( data->rank == procToWatch ) {
      for( element = 0; element < Mesh_GetLocalSize( data->mesh, data->nDims ); element++ ) {
         Cell_PointIndex   count;
         Cell_Points       cellPoints;
   
         cell = CellLayout_MapElementIdToCellId( data->elementCellLayout, element );

         pcu_check_true( cell == element );

         count = data->elementCellLayout->_pointCount( data->elementCellLayout, cell );
         cellPoints = Memory_Alloc_Array( Cell_Point, count, "cellPoints" );
         /* for the element cell layout, the elements map to cells as 1:1, as such the "points" which define the cell as the
          * same as the "nodes" which define the element */
         data->elementCellLayout->_initialisePoints( data->elementCellLayout, cell, count, cellPoints );

         testParticle.coord[0] = ( (cellPoints[0])[0] + (cellPoints[1])[0] ) / 2;
         testParticle.coord[1] = ( (cellPoints[0])[1] + (cellPoints[2])[1] ) / 2;
         testParticle.coord[2] = ( (cellPoints[0])[2] + (cellPoints[4])[2] ) / 2;
         pcu_check_true( CellLayout_IsInCell( data->elementCellLayout, cell, &testParticle ) );

         testParticle.coord[0] = (cellPoints[count-2])[0] + 1;
         testParticle.coord[1] = (cellPoints[count-2])[1] + 1;
         testParticle.coord[2] = (cellPoints[count-2])[2] + 1;
         pcu_check_true( !CellLayout_IsInCell( data->elementCellLayout, cell, &testParticle ) );

         Memory_Free( cellPoints );
      }
   }
}
void _BuoyancyForceTermPpc_AssembleElement( void* _self, ForceVector* forceVector, Element_LocalIndex lElement_I, double* elForceVec ) {
    BuoyancyForceTermPpc* self = (BuoyancyForceTermPpc*) _self;
    IntegrationPoint* particle;
    Particle_InCellIndex cParticle_I;
    Particle_InCellIndex cellParticleCount;
    Element_NodeIndex elementNodeCount;
    Dimension_Index dim = forceVector->dim;
    IntegrationPointsSwarm* swarm = (IntegrationPointsSwarm*)self->integrationSwarm;
    FeMesh* mesh = forceVector->feVariable->feMesh;
    Node_ElementLocalIndex eNode_I;
    unsigned int dim_I;
    Cell_Index cell_I;
    ElementType* elementType;
    Dof_Index nodeDofCount;
    double gravity[3], factor, density;
    double detJac = 0.0;
    double Ni[27];
    double* xi;
    int err;

    elementType = FeMesh_GetElementType( mesh, lElement_I );
    elementNodeCount = elementType->nodeCount;
    nodeDofCount = dim;
    cell_I = CellLayout_MapElementIdToCellId( swarm->cellLayout, lElement_I );
    cellParticleCount = swarm->cellParticleCountTbl[cell_I];

    for( cParticle_I = 0 ; cParticle_I < cellParticleCount ; cParticle_I++ ) {
        particle = (IntegrationPoint*) Swarm_ParticleInCellAt( swarm, cell_I, cParticle_I );
        xi = particle->xi;

        detJac = ElementType_JacobianDeterminant( elementType, mesh, lElement_I, xi, dim );
        ElementType_EvaluateShapeFunctionsAt( elementType, xi, Ni );

        /* Density */
        err = PpcManager_Get( self->manager, lElement_I, particle, self->density_id, &density );

        Journal_Firewall( !err,
                          Journal_Register( Error_Type, (Name)BuoyancyForceTermPpc_Type  ),
                          "%d not found at the PpcManager\n", self->density_id );

        Journal_Firewall( !isnan( density ),
                          Journal_Register( Error_Type, (Name)BuoyancyForceTermPpc_Type  ),
                          "Density at integration point %i of element %i is nan\n", cParticle_I, lElement_I );

        /* Gravity */
        PpcManager_GetGravity( self->manager, lElement_I, particle, gravity );

        /* Apply force in the correct direction */
        for( eNode_I = 0 ; eNode_I < elementNodeCount; eNode_I++ ) {
            for( dim_I = 0 ; dim_I < dim ; dim_I++ ) {
                factor = detJac * particle->weight * density * gravity[dim_I];
                elForceVec[ eNode_I * nodeDofCount + dim_I ] += -1.0 * factor * Ni[ eNode_I ] ;
            }
        }
    }
}
double SemiLagrangianIntegratorSuite_EvaluateError( FeVariable* phiField, FeVariable* phiOldField, Swarm* gaussSwarm ) {
   FeMesh*			feMesh		= phiField->feMesh;
   GaussParticleLayout*	particleLayout 	= (GaussParticleLayout*)gaussSwarm->particleLayout;
   Index			lElement_I, lCell_I;
   unsigned		nDims		= Mesh_GetDimSize( feMesh );
   unsigned		numMeshElements	= Mesh_GetLocalSize( feMesh, nDims );
   double			elementError;
   double			lErrorSq	= 0.0;
   double			lAnalyticSq 	= 0.0;
   double			gErrorSq, gAnalyticSq, gErrorNorm;
   IntegrationPoint*	gaussPoint;
   unsigned		gaussPoint_I, numGaussPoints;
   double			initialValue, finalValue;
   double			elErrorSq, elAnalyticSq;
   ElementType*		elementType;
   double			detJac;

   for( lElement_I = 0; lElement_I < numMeshElements; lElement_I++ ) {
      lCell_I = CellLayout_MapElementIdToCellId( gaussSwarm->cellLayout, lElement_I );
      numGaussPoints = _GaussParticleLayout_InitialCount( particleLayout, NULL, lCell_I );

      elementType = FeMesh_GetElementType( feMesh, lElement_I );

      elErrorSq = 0.0;
      elAnalyticSq = 0.0;

      for( gaussPoint_I = 0; gaussPoint_I < numGaussPoints; gaussPoint_I++ ) {
         gaussPoint = (IntegrationPoint*) Swarm_ParticleInCellAt( gaussSwarm, lCell_I, gaussPoint_I );
         FeVariable_InterpolateWithinElement( phiOldField, lElement_I, gaussPoint->xi, &initialValue );
         FeVariable_InterpolateWithinElement( phiField, lElement_I, gaussPoint->xi, &finalValue );

         detJac = ElementType_JacobianDeterminant( elementType, feMesh, lElement_I, gaussPoint->xi, nDims );

         elErrorSq += ( finalValue - initialValue ) * ( finalValue - initialValue ) * gaussPoint->weight * detJac;
         elAnalyticSq += ( initialValue * initialValue ) * gaussPoint->weight * detJac;
      }

      elementError = sqrt( elErrorSq ) / sqrt( elAnalyticSq );

      lErrorSq += elErrorSq;
      lAnalyticSq += elAnalyticSq;
   }

   MPI_Allreduce( &lErrorSq, &gErrorSq, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );
   MPI_Allreduce( &lAnalyticSq, &gAnalyticSq, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );

   gErrorNorm = sqrt( gErrorSq ) / sqrt( gAnalyticSq );

   return gErrorNorm;
}
	/* allocate */
#ifndef PDE
	if( elementNodeCount > self->max_nElNodes ) {
		 self->max_nElNodes = elementNodeCount;
		 self->GNx = ReallocArray2D( self->GNx, double, dim, elementNodeCount );
		 self->Ni = ReallocArray( self->Ni, double, elementNodeCount );
	}
	
	
	GNx = self->GNx;
	Ni = self->Ni;
#endif
	Dtilda_B = self->Dtilda_B;

	/* Get number of particles per element */
	cell_I            = CellLayout_MapElementIdToCellId( swarm->cellLayout, lElement_I );
	cellParticleCount = swarm->cellParticleCountTbl[ cell_I ];

	/* Determine whether this is the first solve for not */
	Journal_Firewall( sle != NULL, Journal_Register( Error_Type, (Name)ConstitutiveMatrix_Type  ), 
			"In func %s: SLE is NULL.\n", __func__ );

	/* Note: we may have deliberately set the previousSolutionExists flag to true in the
		parent ConstitutiveMatrix constructor if in restart mode, even if the SLE hasn't executed yet
		in this run - so only update to the sle's value when SLE is confirming it has
		executed */
		
	if ( True == sle->hasExecuted ) {
		self->previousSolutionExists = sle->hasExecuted;
	}
	
void lecode_tools_Isostasy_AverageBody(lecode_tools_Isostasy *self,
                                       double** _avg_density, double** _rho_zero_density,
                                       double** _phi)
{
   FeMesh *mesh;
   ElementType *el_type;
   IntegrationPointsSwarm *swarm;
   double *local_density, *global_density;
   double *local_vol, *global_vol;
   double *local_rho_zero_vol, *global_rho_zero_vol;
   double *local_rho_zero_density, *global_rho_zero_density;
   double *local_phi, *global_phi, temp, tempDot;
   int cell, num_particles, num_dims, num_els;
   IntegrationPoint *particle;
   double jac_det;
   double density, alpha, densityFinal;
   Material *mat;
   Bool oneToMany;
   Grid* elGrid;
   int elInds[3], arraySize, arrayPos;
   int ii, jj;

   mesh = self->mesh;
   elGrid = *Mesh_GetExtension( mesh, Grid**,  mesh->elGridId );
   num_dims = Mesh_GetDimSize(mesh);
   swarm = self->swarm;
   num_els = FeMesh_GetElementLocalSize(mesh);

   arraySize=0;
   if ( num_dims == 2 ) arraySize = elGrid->sizes[0];
   else if ( num_dims == 3 ) arraySize = elGrid->sizes[0]*elGrid->sizes[self->zontalAxis];
   else assert(0);

   /* Allocate for the column values. */
   local_vol = (double*)malloc( arraySize*sizeof(double) );
   memset( local_vol, 0, arraySize*sizeof(double) );
   local_density = (double*)malloc( arraySize*sizeof(double) );
   memset( local_density, 0, arraySize*sizeof(double) );
   local_rho_zero_vol = (double*)malloc( arraySize*sizeof(double) );
   memset( local_rho_zero_vol, 0, arraySize*sizeof(double) );
   local_rho_zero_density = (double*)malloc( arraySize*sizeof(double) );
   memset( local_rho_zero_density, 0, arraySize*sizeof(double) );
   local_phi = (double*)malloc( arraySize*sizeof(double) );
   memset( local_phi, 0, arraySize*sizeof(double) );

   /* Initialise temperature. */
   temp = 0.0;

   oneToMany = Stg_Class_IsInstance(swarm->mapper, OneToManyMapper_Type);

   for (ii = 0; ii < num_els; ii++)
   {

      /* Make sure the element is beneath the surface. */
      Grid_Lift( elGrid, FeMesh_ElementDomainToGlobal( mesh, ii ), elInds );
      if ( self->surfaceIdx != -1 && elInds[self->vertAxis] >= self->surfaceIdx )
         continue;

      el_type = FeMesh_GetElementType(mesh, ii);
      cell = CellLayout_MapElementIdToCellId(swarm->cellLayout, ii);
      num_particles = swarm->cellParticleCountTbl[cell];

      for (jj = 0; jj < num_particles; jj++)
      {

         particle = (IntegrationPoint*)Swarm_ParticleInCellAt(swarm, cell, jj);
         jac_det = ElementType_JacobianDeterminant(el_type, mesh, ii, particle->xi, num_dims);

         if(!self->ppcManager){
            density = IntegrationPointMapper_GetDoubleFromMaterial(
                         swarm->mapper, particle, self->buoyancy->materialExtHandle,
                         offsetof(BuoyancyForceTerm_MaterialExt, density) );
            alpha = IntegrationPointMapper_GetDoubleFromMaterial(
                       swarm->mapper, particle, self->buoyancy->materialExtHandle,
                       offsetof(BuoyancyForceTerm_MaterialExt, alpha) );

            if (self->tempField)
            {
               FeVariable_InterpolateFromMeshLocalCoord(self->tempField, self->tempField->feMesh,
                     ii, particle->xi, &temp);
               FeVariable_InterpolateFromMeshLocalCoord(self->tempDotField, self->tempDotField->feMesh,
                     ii, particle->xi, &tempDot);
            }

            densityFinal = density*(1.0 - alpha*temp);

         } else {
            int err;
            /* Density */
            err = PpcManager_Get( self->ppcManager, ii, particle, self->densityID, &densityFinal );
            assert(!err);
         }

         arrayPos = elInds[0];
         if ( num_dims == 3 ) arrayPos += elInds[self->zontalAxis]*elGrid->sizes[0];

         local_vol[arrayPos] += particle->weight*jac_det;
         local_density[arrayPos] += particle->weight*jac_det*densityFinal;

         if (!oneToMany)
         {
            mat = IntegrationPointsSwarm_GetMaterialOn(swarm, particle);
            if (mat->index == self->rho_zero_mat->index)
            {
               local_rho_zero_vol[arrayPos] += particle->weight*jac_det;
               local_rho_zero_density[arrayPos] += particle->weight*jac_det*densityFinal;
            }
         }
         else
         {
            OneToManyRef *ref;
            int cnt;
            int kk;

            ref = OneToManyMapper_GetMaterialRef(swarm->mapper, particle);
            cnt = 0;
            for (kk = 0; kk < ref->numParticles; kk++)
            {
               mat = MaterialPointsSwarm_GetMaterialAt(((OneToManyMapper*)swarm->mapper)->materialSwarm, ref->particleInds[kk]);
               if (mat->index == self->rho_zero_mat->index)
                  cnt++;
            }

            if (2*cnt > ref->numParticles)
            {
               local_rho_zero_vol[arrayPos] += particle->weight*jac_det;
               local_rho_zero_density[arrayPos] += particle->weight*jac_det*densityFinal;
            }
         }

         if (_phi)
         {
            local_phi[arrayPos] += particle->weight*jac_det*(-density*alpha*tempDot);
         }
      }
   }

   /* Allocate for the global column values. */
   global_vol = (double*)malloc( arraySize*sizeof(double) );
   global_density = (double*)malloc( arraySize*sizeof(double) );
   global_rho_zero_vol = (double*)malloc( arraySize*sizeof(double) );
   global_rho_zero_density = (double*)malloc( arraySize*sizeof(double) );
   global_phi = (double*)malloc( arraySize*sizeof(double) );

   MPI_Allreduce(local_vol, global_vol, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_density, global_density, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_rho_zero_vol, global_rho_zero_vol, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_rho_zero_density, global_rho_zero_density, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   if (_phi)
      MPI_Allreduce(local_phi, global_phi, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

   free( local_vol );
   free( local_density );
   free( local_rho_zero_vol );
   free( local_rho_zero_density );
   free( local_phi );

   if ( self->avg )
   {
      for ( ii = 1; ii < arraySize; ii++ )
      {
         global_vol[0] += global_vol[ii];
         global_density[0] += global_density[ii];
         global_rho_zero_vol[0] += global_rho_zero_vol[ii];
         global_rho_zero_density[0] += global_rho_zero_density[ii];
         if ( _phi )
            global_phi[0] += global_phi[ii];
      }
   }

   /* Calculate results. */
   *_avg_density = (double*)malloc( arraySize*sizeof(double) );
   *_rho_zero_density = (double*)malloc( arraySize*sizeof(double) );
   if (_phi)
      *_phi = (double*)malloc( arraySize*sizeof(double) );
   for ( ii = 0; ii < arraySize; ii++ )
   {
      (*_avg_density)[ii] = (global_vol[ii] > 1e-7) ? global_density[ii]/global_vol[ii] : 0.0;
      (*_rho_zero_density)[ii] = (global_rho_zero_vol[ii] > 1e-7) ? global_rho_zero_density[ii]/global_rho_zero_vol[ii] : 0.0;
      if (_phi)
         (*_phi)[ii] = (global_vol[ii] > 1e-7) ? global_phi[ii]/global_vol[ii] : 0.0;
      if ( self->avg )
         break;
   }

   /*
       printf("Global mean density: %g\n", (*_avg_density)[0]);
       printf("Global mean rho_0 density: %g\n", (*_rho_zero_density)[0]);
       printf("Global phi/vol: %g\n", (*_phi)[0]);
   */

   free( global_vol );
   free( global_density );
   free( global_rho_zero_vol );
   free( global_rho_zero_density );
   free( global_phi );
}
void _MatAssembly_NA__Fi__NB_AssembleElement(
      void*                                              matrixTerm,
      StiffnessMatrix*                                   stiffnessMatrix,
      Element_LocalIndex                                 lElement_I,
      SystemLinearEquations*                             sle,
      FiniteElementContext*                              context,
      double**                                           elStiffMat )
{
   MatAssembly_NA__Fi__NB*   self = (MatAssembly_NA__Fi__NB*)matrixTerm;
   Swarm*                              swarm        = self->integrationSwarm;
   FeVariable*                         rowFeVar     = stiffnessMatrix->rowVariable;
   FeVariable*                         colFeVar     = stiffnessMatrix->columnVariable;
   Dimension_Index                     dim          = stiffnessMatrix->dim;
   int                                 rowDofs = rowFeVar->fieldComponentCount; // number of dofs per row node
   int                                 colDofs = colFeVar->fieldComponentCount; // number of dofs per row node
   IntegrationPoint*                   currIntegrationPoint;
   double*                             xi;
   double                              weight;
   Particle_InCellIndex                cParticle_I, cellParticleCount;
   Index                               rowNodes; // number of row nodes per element
   Index                               colNodes; // number of col nodes per element
   Index                               A,B;
   Index                               i;
   double                              detJac;
   double                              gradRho_rtp[3], gradRho_xyz[3], rho, xyz[3];
   Cell_Index                          cell_I;
   ElementType*                        rowElementType, *colElementType;
   double                              N[27], M[6];

   /* Set the element type */
   rowElementType = FeMesh_GetElementType( rowFeVar->feMesh, lElement_I ); rowNodes = rowElementType->nodeCount;
   colElementType = FeMesh_GetElementType( colFeVar->feMesh, lElement_I ); colNodes = colElementType->nodeCount;

   cell_I = CellLayout_MapElementIdToCellId( swarm->cellLayout, lElement_I );
   cellParticleCount = swarm->cellParticleCountTbl[ cell_I ];

   for( cParticle_I = 0 ; cParticle_I < cellParticleCount ; cParticle_I++ ) {

      currIntegrationPoint = (IntegrationPoint*)Swarm_ParticleInCellAt( swarm, cell_I, cParticle_I );

      xi = currIntegrationPoint->xi;
      weight = currIntegrationPoint->weight;

      /* Calculate Determinant of Jacobian and Shape Functions */
      detJac = ElementType_JacobianDeterminant( colElementType, colFeVar->feMesh, lElement_I, xi, dim );
      ElementType_EvaluateShapeFunctionsAt( rowElementType, xi, M );
      ElementType_EvaluateShapeFunctionsAt( colElementType, xi, N ); 

      /* evaluate ppc function */
      PpcManager_Get( self->ppcManager, lElement_I, currIntegrationPoint, self->rho, &rho );

      // ASSUMES: gradRho_rtp[0] is the radial component of grad(rho)
      PpcManager_Get( self->ppcManager, lElement_I, currIntegrationPoint, self->grad_rho, &(gradRho_rtp[0]) );
      gradRho_rtp[0]=-1*gradRho_rtp[0]/rho; gradRho_rtp[1]=0; gradRho_rtp[2]=0; // minus one because depth is the negative of radial-axis
      // This needs to be converted to an xyz basis vector
      FeMesh_CoordLocalToGlobal( colFeVar->feMesh, lElement_I, xi, xyz );
      Spherical_VectorRTP2XYZ( gradRho_rtp, xyz, 2, gradRho_xyz );

      for( A=0; A<rowNodes; A++ ) {
         for( B=0; B<colNodes; B++ ) {
            for ( i = 0; i < colDofs ; i++ ) {
               elStiffMat[rowDofs*A][colDofs*B+i] += detJac * weight * M[A] * gradRho_xyz[i] * N[B] ;
            }
         }
      }
   }
}
void _MatrixAssemblyTerm_NA_i__NB_i__Fn_AssembleElement(
    void*                                              matrixTerm,
    StiffnessMatrix*                                   stiffnessMatrix,
    Element_LocalIndex                                 lElement_I,
    SystemLinearEquations*                             sle,
    FiniteElementContext*                              context,
    double**                                           elStiffMat )
{
    MatrixAssemblyTerm_NA_i__NB_i__Fn*   self = (MatrixAssemblyTerm_NA_i__NB_i__Fn*)matrixTerm;
    Swarm*                              swarm        = self->integrationSwarm;
    FeVariable*                         variable1    = stiffnessMatrix->rowVariable;
    Dimension_Index                     dim          = stiffnessMatrix->dim;
    IntegrationPoint*                   currIntegrationPoint;
    double*                             xi;
    double                              weight;
    Particle_InCellIndex                cParticle_I, cellParticleCount;
    Index                               nodesPerEl;
    Index                               A,B;
    Index                               i;
    double**                            GNx;
    double                              detJac;
    double                              F;
    Cell_Index                          cell_I;
    ElementType*                        elementType;

    /* Set the element type */
    elementType = FeMesh_GetElementType( variable1->feMesh, lElement_I );
    nodesPerEl = elementType->nodeCount;

    if( nodesPerEl > self->max_nElNodes ) {
        /* reallocate */
        if (self->GNx)
            free(self->GNx);
        self->GNx = (double**)AllocArray2D( double, dim, nodesPerEl );
        self->max_nElNodes = nodesPerEl;
    }
    GNx = self->GNx;

    MatrixAssemblyTerm_NA_i__NB_i__Fn_cppdata* cppdata = (MatrixAssemblyTerm_NA_i__NB_i__Fn_cppdata*)self->cppdata;

    debug_dynamic_cast<ParticleInCellCoordinate>(cppdata->input->localCoord())->index() = lElement_I;  // set the elementId as the owning cell for the particleCoord
    cppdata->input->index() = lElement_I;  // set the elementId for the fem coordinate

    cell_I = CellLayout_MapElementIdToCellId( swarm->cellLayout, lElement_I );
    cellParticleCount = swarm->cellParticleCountTbl[ cell_I ];

    for( cParticle_I = 0 ; cParticle_I < cellParticleCount ; cParticle_I++ ) {
        debug_dynamic_cast<ParticleInCellCoordinate>(cppdata->input->localCoord())->particle_cellId(cParticle_I);  // set the particleCoord cellId
        currIntegrationPoint = (IntegrationPoint*)Swarm_ParticleInCellAt( swarm, cell_I, cParticle_I );

        xi = currIntegrationPoint->xi;
        weight = currIntegrationPoint->weight;

        ElementType_ShapeFunctionsGlobalDerivs(
            elementType,
            variable1->feMesh, lElement_I,
            xi, dim, &detJac, GNx );

        /* evaluate function */
        std::shared_ptr<const IO_double> funcout = debug_dynamic_cast<const IO_double>(cppdata->func(cppdata->input));
        F = funcout->at();

        for( A=0; A<nodesPerEl; A++ )
            for( B=0; B<nodesPerEl; B++ )
                for ( i = 0; i < dim ; i++ )
                    elStiffMat[A][B] += detJac * weight * GNx[i][A] * GNx[i][B] * F;
    }
}
double _GALEDruckerPrager_GetYieldCriterion( 
			void*                            druckerPrager,
			ConstitutiveMatrix*              constitutiveMatrix,
			MaterialPointsSwarm*             materialPointsSwarm,
			Element_LocalIndex               lElement_I,
			MaterialPoint*                   materialPoint,
			Coord                            xi )
{
	GALEDruckerPrager*                    self             = (GALEDruckerPrager*) druckerPrager;
        Dimension_Index                   dim = constitutiveMatrix->dim;
	double                            cohesion;
	double                            cohesionAfterSoftening;
	double                            frictionCoefficient;
	double                            frictionCoefficientAfterSoftening;
	double                            minimumYieldStress;
	double                            minimumViscosity;
	double                            effectiveCohesion;
	double                            effectiveFrictionCoefficient;
	double                            frictionalStrength;
	double                            pressure;
	GALEDruckerPrager_Particle*           particleExt;
        Cell_Index                        cell_I;
        Coord                             coord;
        Element_GlobalIndex	          element_gI = 0;
        unsigned		          inds[3];
        Grid*			          elGrid;
        Bool                              inside_boundary;
        double                            factor;
	
	/* Get Parameters From Rheology */
	cohesion                           = self->cohesion;
	cohesionAfterSoftening             = self->cohesionAfterSoftening;
	frictionCoefficient                = self->frictionCoefficient;
	frictionCoefficientAfterSoftening  = self->frictionCoefficientAfterSoftening;
	minimumYieldStress                 = self->minimumYieldStress;
	minimumViscosity                   = self->minimumViscosity;
	
	particleExt = (GALEDruckerPrager_Particle*)ExtensionManager_Get( materialPointsSwarm->particleExtensionMgr, materialPoint, self->particleExtHandle );

        if( self->pressureField )
          FeVariable_InterpolateWithinElement( self->pressureField, lElement_I, xi, &pressure );
        else {
          SwarmVariable_ValueAt( self->swarmPressure,
                                 constitutiveMatrix->currentParticleIndex,
                                 &pressure );
        }

        cell_I=CellLayout_MapElementIdToCellId(materialPointsSwarm->cellLayout,
                                               lElement_I );
        FeMesh_CoordLocalToGlobal(self->pressureField->feMesh, cell_I, xi, coord);
        if(self->hydrostaticTerm)
          {
            pressure+=HydrostaticTerm_Pressure(self->hydrostaticTerm,coord);
          }

        /* Normally we add the average of the trace of the stress.
           With compressible material, you have to do it.  But with
           stabilized linear pressure elements, the non-zero trace is
           a numerical artifact.  So we do not add it. */

/*   pressure+=self->trace/dim; */

        /* Calculate frictional strength.  We modify the friction and
           cohesion because we have grouped terms from the normal
           stresses and moved it to the yield indicator. */
	

        /* Big song and dance to see if we are at a boundary that we care about */
        elGrid = *(Grid**)ExtensionManager_Get(self->pressureField->feMesh->info,
                                               self->pressureField->feMesh,
self->pressureField->feMesh->elGridId );
  
        element_gI = FeMesh_ElementDomainToGlobal( self->pressureField->feMesh, lElement_I );
        RegularMeshUtils_Element_1DTo3D( self->pressureField->feMesh, element_gI, inds );
  
        inside_boundary=(self->boundaryBottom && inds[1]==0)
          || (self->boundaryTop && inds[1]==elGrid->sizes[1]-1)
          || (self->boundaryLeft && inds[0]==0)
          || (self->boundaryRight && inds[0]==elGrid->sizes[0]-1)
          || (dim==3 && ((self->boundaryBack && inds[2]==0)
                         || (self->boundaryFront && inds[2]==elGrid->sizes[2]-1)));

        effectiveFrictionCoefficient =
          _GALEDruckerPrager_EffectiveFrictionCoefficient( self, materialPoint,
                                                       inside_boundary );
        effectiveCohesion = _GALEDruckerPrager_EffectiveCohesion(self,materialPoint,
                                                             inside_boundary);

  if(dim==2)
    {
      /* effectiveFrictionCoefficient=tan(phi).  If
         factor=sin(atan(1/tan(phi))) =>
         factor=cos(phi)=1/sqrt(1+tan(phi)**2) */
      factor=1/sqrt(1 + effectiveFrictionCoefficient*effectiveFrictionCoefficient);
      frictionalStrength = effectiveFrictionCoefficient*pressure*factor
        + effectiveCohesion*factor;
    }
  else
    {
      double cos_phi, sin_phi;
      /* cos(phi)=1/sqrt(1+tan(phi)**2) */
      cos_phi=
        1/sqrt(1 + effectiveFrictionCoefficient*effectiveFrictionCoefficient);
      sin_phi=effectiveFrictionCoefficient*cos_phi;
      factor=2*cos_phi/(sqrt(3.0)*(3-sin_phi));

      /* The full expression is

         sqrt(J2)=p*2*sin(phi)/(sqrt(3)*(3-sin(phi)))
                  + C*6*cos(phi)/(sqrt(3)*(3-sin(phi)))

         Note the extra factor of 3 for cohesion */

      frictionalStrength = effectiveFrictionCoefficient*factor*pressure
        + effectiveCohesion*3*factor;
    }
  
  /* If the minimumYieldStress is not set, then use the
     effective cohesion.  Maybe it should be the modified
     effective cohesion, though that probably should not matter
     much. */
  minimumYieldStress = self->minimumYieldStress;
  if(minimumYieldStress==0.0)
    minimumYieldStress=effectiveCohesion;
  
  /* Make sure frictionalStrength is above the minimum */
  if ( frictionalStrength < minimumYieldStress*factor) 
    frictionalStrength = minimumYieldStress*factor;

  self->yieldCriterion = frictionalStrength;
  self->curFrictionCoef = effectiveFrictionCoefficient*factor;

  return frictionalStrength;
}
/** AdvectionDiffusion_UpwindDiffusivity - See Brooks, Hughes 1982 Section 3.3 
 * All equations refer to this paper if not otherwise indicated */
double SUPGAdvDiffTermPpc_UpwindDiffusivity( 
   SUPGAdvDiffTermPpc*    self, 
   AdvectionDiffusionSLE* sle, 
   Swarm*                 swarm, 
   FeMesh*                mesh, 
   Element_LocalIndex     lElement_I, 
   Dimension_Index        dim )
{
   FeVariable*          velocityField = self->velocityField;
   Coord                xiElementCentre = {0.0,0.0,0.0};
   double               xiUpwind;
   double               velocityCentre[3];
   double               pecletNumber;
   double               lengthScale;
   double               upwindDiffusivity;
   Dimension_Index      dim_I;
   double*              leastCoord;
   double*              greatestCoord;
   Node_LocalIndex      nodeIndex_LeastValues, nodeIndex_GreatestValues;
   unsigned             nInc, *inc;
   IArray*              incArray;
   Cell_Index           cell_I;
   IntegrationPoint*    particle;
   Particle_Index       lParticle_I;
   double               averageDiffusivity;
   Particle_InCellIndex cParticle_I;
   Particle_InCellIndex particleCount;
   int                  err;
   double               diffusivity;

   
   /* Compute the average diffusivity */
   /* Find Number of Particles in Element */
   cell_I = CellLayout_MapElementIdToCellId( swarm->cellLayout, lElement_I );
   particleCount = swarm->cellParticleCountTbl[ cell_I ];

   /* Average diffusivity for element */
   averageDiffusivity = 0.0;
   for ( cParticle_I = 0 ; cParticle_I < particleCount ; cParticle_I++ ) {
      particle = (IntegrationPoint*)Swarm_ParticleInCellAt( swarm, cell_I, cParticle_I );
      err = PpcManager_Get( self->mgr, lElement_I, particle, self->diffusivityLabel, &diffusivity );
      assert(!err);
      averageDiffusivity += diffusivity;
   }
   averageDiffusivity /= (double)particleCount;
   
   if (sle->maxDiffusivity < averageDiffusivity)
      sle->maxDiffusivity = averageDiffusivity;
   
   /* Change Diffusivity if it is too small */
   if ( averageDiffusivity < SUPG_MIN_DIFFUSIVITY ) 
      averageDiffusivity = SUPG_MIN_DIFFUSIVITY;
   
   /* Calculate Velocity At Middle of Element - See Eq. 3.3.6 */
   FeVariable_InterpolateFromMeshLocalCoord( velocityField, mesh, lElement_I, xiElementCentre, velocityCentre );
   
   /* Calculate Length Scales - See Fig 3.4 - ASSUMES BOX MESH TODO - fix */
   incArray = self->incarray;
   FeMesh_GetElementNodes( mesh, lElement_I, incArray );
   nInc = IArray_GetSize( incArray );
   inc = IArray_GetPtr( incArray );
   
   nodeIndex_LeastValues = inc[0];
   nodeIndex_GreatestValues = (dim == 2) ? inc[3] : (dim == 3) ? inc[7] : inc[1];
   leastCoord = Mesh_GetVertex( mesh, nodeIndex_LeastValues );
   greatestCoord = Mesh_GetVertex( mesh, nodeIndex_GreatestValues );
   
   upwindDiffusivity = 0.0;
   for ( dim_I = 0 ; dim_I < dim ; dim_I++ ) {
      lengthScale = greatestCoord[ dim_I ] - leastCoord[ dim_I ];
      
      /* Calculate Peclet Number (alpha) - See Eq. 3.3.5 */
      pecletNumber = velocityCentre[ dim_I ] * lengthScale / (2.0 * averageDiffusivity);
      
      /* Calculate Upwind Local Coordinate - See Eq. 3.3.4 and (2.4.2, 3.3.1 and 3.3.2) */
      xiUpwind = self->_upwindParam( self, pecletNumber );
      
      /* Calculate Upwind Thermal Diffusivity - See Eq. 3.3.3  */
      upwindDiffusivity += xiUpwind * velocityCentre[ dim_I ] * lengthScale;
   }
   upwindDiffusivity *= ISQRT15; /* See Eq. 3.3.11 */
   
   return upwindDiffusivity;
}
void _SUPGAdvDiffTermPpc_AssembleElement(
   void*              forceTerm,
   ForceVector*       forceVector,
   Element_LocalIndex lElement_I,
   double*            elementResidual )
{
   SUPGAdvDiffTermPpc*    self = Stg_CheckType( forceTerm, SUPGAdvDiffTermPpc );
   AdvectionDiffusionSLE* sle = self->sle;
   Swarm*                 swarm = self->integrationSwarm;
   Particle_Index         lParticle_I;
   Particle_Index         cParticle_I;
   Particle_Index         cellParticleCount;
   Cell_Index             cell_I;    
   IntegrationPoint*      particle;
   FeVariable*            phiField = self->phiField;
   Dimension_Index        dim = forceVector->dim;
   double                 velocity[3];
   double                 phi, phiDot;
   double                 detJac;
   double*                xi;
   double                 totalDerivative, diffusionTerm;
   double                 diffusivity = 0;
   ElementType*           elementType = FeMesh_GetElementType( phiField->feMesh, lElement_I );
   Node_Index             elementNodeCount = elementType->nodeCount;
   Node_Index             node_I;
   double                 factor;
   double**               GNx;
   double*                phiGrad;
   double*                Ni;
   double*                SUPGNi;
   double                 supgfactor;
   double                 udotu, perturbation;
   double                 upwindDiffusivity;
   int                    err;

   GNx = self->GNx;
   phiGrad = self->phiGrad;
   Ni = self->Ni;
   SUPGNi = self->SUPGNi;
   
   upwindDiffusivity = SUPGAdvDiffTermPpc_UpwindDiffusivity( self, sle, swarm, phiField->feMesh, lElement_I, dim );

   /* Determine number of particles in element */
   cell_I = CellLayout_MapElementIdToCellId( swarm->cellLayout, lElement_I );
   cellParticleCount = swarm->cellParticleCountTbl[ cell_I ];
   
   for ( cParticle_I = 0 ; cParticle_I < cellParticleCount ; cParticle_I++ ) {
      particle = (IntegrationPoint*)Swarm_ParticleInCellAt( swarm, cell_I, cParticle_I );
      xi          = particle->xi;
      
      /* Evaluate Shape Functions */
      ElementType_EvaluateShapeFunctionsAt(elementType, xi, Ni);

      /* Calculate Global Shape Function Derivatives */
      ElementType_ShapeFunctionsGlobalDerivs( 
         elementType,
         phiField->feMesh, lElement_I,
         xi, dim, &detJac, GNx );
      
      /* Calculate Velocity */
      FeVariable_InterpolateFromMeshLocalCoord( self->velocityField, phiField->feMesh, lElement_I, xi, velocity );

      /* Build the SUPG shape functions */
      udotu = velocity[I_AXIS]*velocity[I_AXIS] + velocity[J_AXIS]*velocity[J_AXIS];
      if(dim == 3) udotu += velocity[ K_AXIS ] * velocity[ K_AXIS ];

      supgfactor = upwindDiffusivity / udotu;
      for ( node_I = 0 ; node_I < elementNodeCount ; node_I++ ) {
         /* In the case of per diffusion - just build regular shape functions */
         if ( fabs(upwindDiffusivity) < SUPG_MIN_DIFFUSIVITY ) {
            SUPGNi[node_I] = Ni[node_I];
            continue;
         }
         
         perturbation = velocity[ I_AXIS ] * GNx[ I_AXIS ][ node_I ] + velocity[ J_AXIS ] * GNx[ J_AXIS ][ node_I ];
         if (dim == 3)
               perturbation = perturbation + velocity[ K_AXIS ] * GNx[ K_AXIS ][ node_I ];
         
         /* p = \frac{\bar \kappa \hat u_j w_j }{ ||u|| } -  Eq. 3.2.25 */
         perturbation = supgfactor * perturbation;
         
         SUPGNi[node_I] = Ni[node_I] + perturbation;
      }  
      
      /* Calculate phi on particle */
      _FeVariable_InterpolateNodeValuesToElLocalCoord( phiField, lElement_I, xi, &phi );

      /* Calculate Gradients of Phi */
      FeVariable_InterpolateDerivatives_WithGNx( phiField, lElement_I, GNx, phiGrad );

      /* Calculate time derivative of phi */
      _FeVariable_InterpolateNodeValuesToElLocalCoord( sle->phiDotField, lElement_I, xi, &phiDot );
      
      /* Calculate total derivative (i.e. Dphi/Dt = \dot \phi + u . \grad \phi) */
      totalDerivative = phiDot + StGermain_VectorDotProduct( velocity, phiGrad, dim );

      /* get the diffusivity */
      err = PpcManager_Get( self->mgr, lElement_I, particle, self->diffusivityLabel, &diffusivity );
      if( err ) assert(0);

      /* Add to element residual */
      factor = particle->weight * detJac;
      for( node_I = 0 ; node_I < elementNodeCount ; node_I++ ) {
         /* Calculate Diffusion Term */
         diffusionTerm = diffusivity * ( GNx[0][node_I] * phiGrad[0] + GNx[1][node_I] * phiGrad[1] );
         if(dim == 3)
            diffusionTerm += diffusivity * GNx[2][ node_I ] * phiGrad[2] ;
         
         elementResidual[ node_I ] -=  factor * ( SUPGNi[ node_I ] * totalDerivative + diffusionTerm );
      }
   }
}
void Spherical_CubedSphereNusselt_Output( UnderworldContext* context ) {
   Spherical_CubedSphereNusselt* 	self  		= NULL;
   Swarm*				swarm		= NULL;
   FeMesh* 				mesh		= NULL;
   ElementType* 			elementType	= NULL;
   Grid*				grid		= NULL;
   double 				avgT, vol;

   self = (Spherical_CubedSphereNusselt*)LiveComponentRegister_Get( context->CF->LCRegister, (Name)Spherical_CubedSphereNusselt_Type );

   FeVariable 	*temperatureGradientsField 	= self->temperatureGradientsField;
   FeVariable 	*temperatureField 		= self->temperatureField;
   FeVariable 	*velocityField 			= self->velocityField;

   swarm = self->gaussSwarm;
   mesh = (FeMesh*)self->mesh;

   assert( self );
   assert( mesh );
   assert( swarm );

   /*
      for each boundary element at the top
         integrate dT/dr &
         integrate T*v_r

      ASSUMPTIONS:
      * uses grid data structure for r,theta mesh (cartesian topology so i can)
   */

   unsigned 		nEls, e_i, cell_i, nPoints, p_i, ijk[3];
   IntegrationPoint*	particle;
   double 		value[3], xyz[3], rtp[3], detJac, dT_dr, factor, vel[3], T, vel_rtp[3];
   double 		gVolume[2], volume[2];
   double 		J_Nu[2], gJ_Nu[2];
   double		rMin			= ((RSGenerator*)mesh->generator)->crdMin[0];
   double		rMax			= ((RSGenerator*)mesh->generator)->crdMin[1];
   double		dxdr[3], r;

   elementType = FeMesh_GetElementType( mesh, 0 ); // assuming all element are the same as el 0

   memset( J_Nu, 0, 2*sizeof(double) );
   memset( volume, 0, 2*sizeof(double) );

   RegularMeshUtils_ErrorCheckAndGetDetails( (Mesh*)mesh, MT_VOLUME, &nEls, &grid );

   for( e_i = 0; e_i < nEls; e_i++ ) {
      // use cartesian grid data structure - can improve later on to make more general
      RegularMeshUtils_Element_1DTo3D( mesh, Mesh_DomainToGlobal( (Mesh*)mesh, MT_VOLUME, e_i ), ijk );

      // if element is on the outer radius boundary
      if( ijk[0] == grid->sizes[0] - 1 ) {
         // get integration number of integration points in cell
         cell_i = CellLayout_MapElementIdToCellId( swarm->cellLayout, e_i );
         nPoints = swarm->cellParticleCountTbl[ cell_i ];

         for( p_i = 0; p_i < nPoints; p_i++ ) {
            // get integration particle
            particle = (IntegrationPoint*) Swarm_ParticleInCellAt( swarm, cell_i, p_i );

            // get temperatureDeriv and xyz
            FeVariable_InterpolateFromMeshLocalCoord( temperatureField, (FeMesh*)mesh, e_i, particle->xi, &T);
            FeVariable_InterpolateFromMeshLocalCoord( velocityField, (FeMesh*)mesh, e_i, particle->xi, vel);
            FeVariable_InterpolateFromMeshLocalCoord( temperatureGradientsField, (FeMesh*)mesh, e_i, particle->xi, value);
            FeMesh_CoordLocalToGlobal( mesh, e_i, particle->xi, xyz );

            Spherical_XYZ2regionalSphere( xyz, rtp );
            Spherical_VectorXYZ2regionalSphere( vel, xyz, vel_rtp );
            detJac = ElementType_JacobianDeterminant( elementType, (FeMesh*)mesh, e_i, particle->xi, 3 );

            r = sqrt( rtp[0]*rtp[0] + rtp[1]*rtp[1] + rtp[2]*rtp[2] );
            dxdr[0] = r/xyz[0];
            dxdr[1] = r/xyz[1];
            dxdr[2] = r/xyz[2];
            // calc dT_dr = dT_dx * dx_dr + dT_dy * dy_dr + dT_dz * dz_dr
            dT_dr = value[0]*dxdr[0] + value[1]*dxdr[1] + value[2]*dxdr[2];

            // add to element integral
            J_Nu[0] += particle->weight * detJac * (dT_dr - T*vel_rtp[0]);
            volume[0] += detJac * particle->weight;
         }
      }
      // if element is on the inner radius boundary
      if( ijk[0] == 0 ) {
         // get integration number of integration points in cell
         cell_i = CellLayout_MapElementIdToCellId( swarm->cellLayout, e_i );
         nPoints = swarm->cellParticleCountTbl[ cell_i ];

         for( p_i = 0; p_i < nPoints; p_i++ ) {
            // get integration particle
            particle = (IntegrationPoint*) Swarm_ParticleInCellAt( swarm, cell_i, p_i );

            // get temperatureDeriv and xyz
            FeVariable_InterpolateFromMeshLocalCoord( temperatureField, (FeMesh*)mesh, e_i, particle->xi, &T);
            FeVariable_InterpolateFromMeshLocalCoord( velocityField, (FeMesh*)mesh, e_i, particle->xi, vel);
            FeVariable_InterpolateFromMeshLocalCoord( temperatureGradientsField, (FeMesh*)mesh, e_i, particle->xi, value);
            FeMesh_CoordLocalToGlobal( mesh, e_i, particle->xi, xyz );

            Spherical_XYZ2regionalSphere( xyz, rtp );
            Spherical_VectorXYZ2regionalSphere( vel, xyz, vel_rtp );
            detJac = ElementType_JacobianDeterminant( elementType, (FeMesh*)mesh, e_i, particle->xi, 3 );

            r = sqrt( rtp[0]*rtp[0] + rtp[1]*rtp[1] + rtp[2]*rtp[2] );
            dxdr[0] = r/xyz[0];
            dxdr[1] = r/xyz[1];
            dxdr[2] = r/xyz[2];
            // calc dT_dr = dT_dx * dx_dr + dT_dy * dy_dr + dT_dz * dz_dr
            dT_dr = value[0]*dxdr[0] + value[1]*dxdr[1] + value[2]*dxdr[2];

            J_Nu[1] += particle->weight * detJac * (dT_dr - T*vel_rtp[0]);
            volume[1] += detJac * particle->weight;
         }
      }
   }

   /* Sum of procs integral */
   (void)MPI_Allreduce( J_Nu, gJ_Nu, 2, MPI_DOUBLE, MPI_SUM, context->communicator );
   (void)MPI_Allreduce( volume, gVolume, 2, MPI_DOUBLE, MPI_SUM, context->communicator );

   // to get horizontally averaged quantities we divide by volume
   gJ_Nu[0] /= gVolume[0];
   gJ_Nu[1] /= gVolume[1];

   // normalise CubedSphereNusselt upper condition - this is for scaling against published results
   // 1.22 and 2.22 are the inner and outer radii for those results
   factor = rMax * log(0.55);
   gJ_Nu[0] = factor *  gJ_Nu[0];

   // normalise CubedSphereNusselt lower condition
   factor = rMin * log(0.55);
   gJ_Nu[1] = factor * gJ_Nu[1];

   avgT = PpcIntegral_Integrate( self->volAvgT );
   vol = PpcIntegral_Integrate( self->vol );

   StgFEM_FrequentOutput_PrintValue( context, avgT/vol );
   StgFEM_FrequentOutput_PrintValue( context, gJ_Nu[0] );
   StgFEM_FrequentOutput_PrintValue( context, gJ_Nu[1] );
}
unsigned GeneralSwarm_IntegrationPointMap( void* _self, void* _intSwarm, unsigned elementId, unsigned intPtCellId ){
    GeneralSwarm* self  = (GeneralSwarm*)_self;
    IntegrationPointsSwarm*	intSwarm = (IntegrationPointsSwarm*)_intSwarm;
    Mesh*	                intMesh  = (Mesh*)intSwarm->mesh;
    SwarmMap* map = NULL;

    // first, lets check if the int swarm is mirroring a general swarm
    if (intSwarm->mirroredSwarm == (Swarm*)self)
    {
        // ok, it is a mirrored swarm
        return Swarm_ParticleCellIDtoLocalID(
                                        self,
                                        CellLayout_MapElementIdToCellId( self->cellLayout, elementId ),
                                        intPtCellId );
    } else if ( self->previousIntSwarmMap && self->previousIntSwarmMap->swarm==intSwarm ) { /* next check if previous swarmmap */
        map = self->previousIntSwarmMap;
    } else {
        /* ok, previous is not our guy, check other existing: */
        int ii;
        for (ii=0; ii<List_GetSize(self->intSwarmMapList); ii++) {
            map = *(SwarmMap**)List_GetItem(self->intSwarmMapList, ii);
            if ( map->swarm==intSwarm ){
                self->previousIntSwarmMap = map;
                break;
            }
        }
        // if we've gotten to this point, there is no corresponding map.. let's create one */
        map = SwarmMap_New( intSwarm );
        // add to list
        List_Append( self->intSwarmMapList, (void*)&map );
        self->previousIntSwarmMap = map;
        // also add to int swarm incase it moves
        List_Append( intSwarm->swarmsMappedTo, (void*)&map );

    }
    
    unsigned matPointLocalIndex;
    if ( SwarmMap_Map(map,elementId,intPtCellId,&matPointLocalIndex) ) {
        /* ok, map found, return value */
        return matPointLocalIndex;
    } else {
        /* not found... damn.. lets go ahead and find nearest neighbour */
        
        /* lets check some things */
        Journal_Firewall(
            Stg_Class_IsInstance( self->cellLayout, ElementCellLayout_Type ),
            NULL,
            "Error In func %s: %s expects a materialSwarm with cellLayout of type ElementCellLayout.",
            __func__, self->type
        );

        Journal_Firewall(
            intSwarm->mesh==(FeMesh*)((ElementCellLayout*)self->cellLayout)->mesh,
            Journal_Register( Error_Type, (Name)self->type  ),
            "Error - in %s(): Mapper requires both the MaterialSwarm and\n"
            "the IntegrationSwarm to live on the same mesh.\n"
            "Here the MaterialSwarm %s lives in the mesh %s\n"
            "and the IntegrationSwarm %s lives in the mesh %s.",
            self->name, ((ElementCellLayout*)self->cellLayout)->mesh->name,
            intSwarm->name, intSwarm->mesh->name
        );
        
        Cell_Index cell_I = CellLayout_MapElementIdToCellId( intSwarm->cellLayout, elementId );
        Cell_Index cell_M = CellLayout_MapElementIdToCellId(     self->cellLayout, elementId );

        IntegrationPoint* integrationPoint = (IntegrationPoint*)Swarm_ParticleInCellAt( intSwarm, cell_I, intPtCellId );
        
        /* Convert integration point local to global coordinates */
        Coord global;
        FeMesh_CoordLocalToGlobal( intMesh, elementId, integrationPoint->xi, (double*) &global );

        /* now lets sweep material points to find our closest friend */
        double         distance2_min = DBL_MAX;
        double         distance2;
        Particle_Index particle_M;
        unsigned cellPartCount = self->cellParticleCountTbl[ cell_M ];
        
        Journal_Firewall( cellPartCount,
            Journal_Register( Error_Type, (Name)self->type  ),
            "Error - in %s(): There doesn't appear to be any particles\n"
            "within the current cell (%u).\n",
            self->name, cell_M );

        for ( particle_M = 0; particle_M < cellPartCount; particle_M++ ) {
            GlobalParticle* materialPoint = (GlobalParticle*)Swarm_ParticleInCellAt( self, cell_M, particle_M );
            distance2 = pow( global[0] - materialPoint->coord[0], 2 ) + pow( global[1] - materialPoint->coord[1], 2 );
            if( self->dim == 3 )
                distance2 += pow( global[2] - materialPoint->coord[2], 2 );
            if ( distance2 < distance2_min ){
                distance2_min = distance2;
                matPointLocalIndex = Swarm_ParticleCellIDtoLocalID( self, cell_M, particle_M );
            }
        }
        
        /* ok, we've found our nearest friend. record to mapping */
        SwarmMap_Insert(map,elementId,intPtCellId,matPointLocalIndex);

    }
    
    return matPointLocalIndex;
}
void _MatrixAssemblyTerm_NA__NB__Fn_AssembleElement(
   void*                                              matrixTerm,
   StiffnessMatrix*                                   stiffnessMatrix,
   Element_LocalIndex                                 lElement_I,
   SystemLinearEquations*                             sle,
   FiniteElementContext*                              context,
   double**                                           elStiffMat )
{
   MatrixAssemblyTerm_NA__NB__Fn* self = (MatrixAssemblyTerm_NA__NB__Fn*)matrixTerm;
   Swarm*                              swarm        = self->integrationSwarm;
   FeVariable*                         variable_row = stiffnessMatrix->rowVariable;
   FeVariable*                         variable_col = stiffnessMatrix->columnVariable;
   Dimension_Index                     dim          = stiffnessMatrix->dim;
   Particle_InCellIndex                cParticle_I, cellParticleCount;
   Node_ElementLocalIndex              nodesPerEl_row, nodesPerEl_col;

   Dof_Index                           dofPerNode_col;
   Index                               row, col; /* Indices into the stiffness matrix */
   Node_ElementLocalIndex              rowNode_I, colNode_I;
   Dof_Index                           colDof_I;
   double                              *xi, *Ni, *Mi;
   double                              detJac, weight, F, factor;
   IntegrationPoint*                   intPoint;
   Cell_Index                          cell_I;
   ElementType*                        elementType_row;
   ElementType*                        elementType_col;


   MatrixAssemblyTerm_NA__NB__Fn_cppdata* cppdata = (MatrixAssemblyTerm_NA__NB__Fn_cppdata*)self->cppdata;
   debug_dynamic_cast<ParticleInCellCoordinate*>(cppdata->input->localCoord())->index() = lElement_I;  // set the elementId as the owning cell for the particleCoord
   cppdata->input->index() = lElement_I;

   FeMesh*                 geometryMesh = ( self->geometryMesh ? self->geometryMesh : variable_row->feMesh );
   ElementType*            geometryElementType;

   /* Set the element type */
   geometryElementType = FeMesh_GetElementType( geometryMesh, lElement_I );

   elementType_row = FeMesh_GetElementType( variable_row->feMesh, lElement_I );
   nodesPerEl_row = elementType_row->nodeCount;
   elementType_col = FeMesh_GetElementType( variable_col->feMesh, lElement_I );
   nodesPerEl_col = elementType_col->nodeCount;

   dofPerNode_col = variable_col->fieldComponentCount;

   // allocate shape function array, Mi and Ni
   if( nodesPerEl_row > self->max_nElNodes_row ) {
      /* reallocate */
      if (self->Mi) free(self->Mi);
      self->Mi = (double*)AllocArray( double, nodesPerEl_row );
      self->max_nElNodes_row = nodesPerEl_row;
   }

   if( nodesPerEl_col > self->max_nElNodes_col ) {
      if( self->Ni) free(self->Ni);
      self->Ni = (double*)AllocArray(double, nodesPerEl_col );
      self->max_nElNodes_col = nodesPerEl_col;
   }
   Ni = self->Ni;
   if (elementType_row == elementType_col)
      Mi = Ni;

   cell_I = CellLayout_MapElementIdToCellId( swarm->cellLayout, lElement_I );
   cellParticleCount = swarm->cellParticleCountTbl[ cell_I ];

   for ( cParticle_I = 0 ; cParticle_I < cellParticleCount ; cParticle_I++ ) {
      debug_dynamic_cast<ParticleInCellCoordinate*>(cppdata->input->localCoord())->particle_cellId(cParticle_I);  // set the particleCoord cellId
      /* get integration point information */
      intPoint = (IntegrationPoint*)Swarm_ParticleInCellAt( swarm, cell_I, cParticle_I );
      xi = intPoint->xi;
      weight = intPoint->weight;

      /* evaluate shape function and jacobian determinant */
      detJac = ElementType_JacobianDeterminant( geometryElementType, geometryMesh, lElement_I, xi, dim );
      ElementType_EvaluateShapeFunctionsAt( elementType_col, xi, Ni );
      if (elementType_row != elementType_col)
         ElementType_EvaluateShapeFunctionsAt( elementType_row, xi, Mi );

      // /* evaluate function */
      const IO_double* funcout = debug_dynamic_cast<const IO_double*>(cppdata->func(cppdata->input.get()));
      F = funcout->at();

      factor = weight*detJac*F;
      /* build stiffness matrix */
      for ( rowNode_I = 0; rowNode_I < nodesPerEl_row ; rowNode_I++) {
         for (colNode_I = 0; colNode_I < nodesPerEl_col; colNode_I++ ) {
            for( colDof_I=0; colDof_I<dofPerNode_col; colDof_I++) {
                  /* note that we use the row dof count here too */
                  row = rowNode_I*dofPerNode_col + colDof_I;
                  col = colNode_I*dofPerNode_col + colDof_I;
                  elStiffMat[row][col] += factor * ( Ni[rowNode_I] * Mi[colNode_I] );
            }
         }
      }
   }
}
void _ThermalBuoyancyForceTerm_AssembleElement( void* forceTerm, ForceVector* forceVector, Element_LocalIndex lElement_I, double* elForceVec ) {
	ThermalBuoyancyForceTerm*	self = Stg_CheckType( forceTerm, ThermalBuoyancyForceTerm );
	Swarm*							swarm = self->integrationSwarm;
	Dimension_Index				dim = forceVector->dim;
	IntegrationPoint*				particle;
	FeVariable*						temperatureField;
	FeMesh*							mesh;
	FeMesh*							temperatureMesh;
	double*							xi;
	Particle_InCellIndex			cParticle_I;
	Particle_InCellIndex			cellParticleCount;
	Element_NodeIndex				elementNodeCount;
	Node_ElementLocalIndex		node_I;
	ElementType*					elementType;
	Dof_Index						dofsPerNode;
	Cell_Index						cell_I;
	double							detJac;
	double							factor;
	/*double							Ni[8];*/
	double							Ni[27];
	double							force;
	double							rayleighNumber;
	double							temperature;

	/* Get context extension */
	rayleighNumber   = self->rayleighNumber;
	temperatureField = self->temperatureField;
	temperatureMesh  = temperatureField->feMesh;

	/* Since we are integrating over the velocity mesh - we want the velocity mesh here and not the temperature mesh */
	mesh             = forceVector->feVariable->feMesh;
	
	/* Set the element type */
	elementType      = FeMesh_GetElementType( mesh, lElement_I ); 
	elementNodeCount = elementType->nodeCount;

	/* assumes constant number of dofs per element */
	dofsPerNode = dim;
	
	cell_I = CellLayout_MapElementIdToCellId( swarm->cellLayout, lElement_I );
	cellParticleCount = swarm->cellParticleCountTbl[ cell_I ];
	
	for ( cParticle_I = 0 ; cParticle_I < cellParticleCount ; cParticle_I++ ) {
		particle = (IntegrationPoint*) Swarm_ParticleInCellAt( swarm, cell_I, cParticle_I );
		xi       = particle->xi;
		
		/* Calculate Determinant of Jacobian and Shape Functions */
		detJac = ElementType_JacobianDeterminant( elementType, mesh, lElement_I, xi, dim );
		ElementType_EvaluateShapeFunctionsAt( elementType, xi, Ni );

		/* Field Get Temperature from Field Variable */
		FeVariable_InterpolateFromMeshLocalCoord( temperatureField, mesh, lElement_I, xi, &temperature );

		force = rayleighNumber * temperature;

		factor = detJac * particle->weight * force;
		for( node_I = 0 ; node_I < elementNodeCount ; node_I++ ) 
			elForceVec[node_I * dofsPerNode + J_AXIS ] += factor * Ni[ node_I ] ;
		
	}
}