/* New method: sample & surface each element in local coords, faster, handles deformed meshes */
void lucIsosurface_SampleLocal( void* drawingObject)
{
   lucIsosurface*             self               = (lucIsosurface*)drawingObject;
   FeVariable*                feVariable         = (FeVariable*) self->isosurfaceField;
   FeMesh*    		            mesh               = feVariable->feMesh;
   Element_LocalIndex         lElement_I;
   Element_LocalIndex         elementLocalCount  = FeMesh_GetElementLocalSize( mesh );
   int                        i, j, k;
   Vertex***                  vertex;

   vertex = Memory_Alloc_3DArray( Vertex, self->nx, self->ny, self->nz, (Name)"Vertex array" );

   for ( lElement_I = 0 ; lElement_I < elementLocalCount ; lElement_I++ )
   {
      for (i = 0 ; i < self->nx; i++)
      {
         for (j = 0 ; j < self->ny; j++)
         {
            for (k = 0 ; k < self->nz; k++)
            {
               /* Calc position within element in local coords */
               Coord local = {-1.0 + (2.0 * i / self->resolution[ I_AXIS ]), 
                              -1.0 + (2.0 * j / self->resolution[ J_AXIS ]),
                              -1.0 + (2.0 * k / self->resolution[ K_AXIS ])};

               /* Get value at coords (faster using element and local coords) */
               FeVariable_InterpolateWithinElement( feVariable, lElement_I, local, &(vertex[i][j][k].value));

               /* Save local coords and element index for fast interpolation */
               memcpy( vertex[i][j][k].pos, local, 3 * sizeof(double) );
               vertex[i][j][k].element_I = lElement_I;
            }
         }
      }

      if (self->isosurfaceField->dim == 3)
         lucIsosurface_MarchingCubes( self, vertex );

      if (self->isosurfaceField->dim == 2 || self->drawWalls)
         lucIsosurface_DrawWalls( self, vertex );
   }

   /* Free memory */
   Memory_Free( vertex );
}
void ForceVector_GlobalAssembly_General( void* forceVector ) {
	ForceVector*            self                 = (ForceVector*) forceVector;
	FeVariable*             feVar                = self->feVariable;
	Element_LocalIndex      element_lI;
	Element_LocalIndex      elementLocalCount;
	Node_ElementLocalIndex  nodeCountCurrElement = 0;
	Element_Nodes           nodeIdsInCurrElement = 0;
	Dof_Index               totalDofsThisElement = 0;
	Dof_Index               totalDofsPrevElement = 0;
	Dof_Index               dofCountLastNode     = 0;
	Dof_EquationNumber**    elementLM            = NULL;
	double*                 elForceVecToAdd      = NULL;
	/* For output printing */
	double                  outputPercentage=10;	/* Controls how often to give a status update of assembly progress */
	int                     outputInterval;

	Journal_DPrintf( self->debug, "In %s - for vector \"%s\"\n", __func__, self->name );
	
	Stream_IndentBranch( StgFEM_Debug );
	
	if ( Stg_ObjectList_Count( self->forceTermList ) > 0 ) {
		elementLocalCount = FeMesh_GetElementLocalSize( feVar->feMesh );

		/* Initialise Vector */
		outputInterval = (int)( (outputPercentage/100.0)*(double)(elementLocalCount) );
		if( outputInterval == 0 ) { outputInterval = elementLocalCount; }
	
		for( element_lI = 0; element_lI < elementLocalCount; element_lI++ ) {  
			unsigned	nInc, *inc;
		
			FeMesh_GetElementNodes( feVar->feMesh, element_lI, self->inc );
			nInc = IArray_GetSize( self->inc );
			inc = IArray_GetPtr( self->inc );
			nodeCountCurrElement = nInc;
			/* Get the local node ids */
			nodeIdsInCurrElement = inc;

			/* Set value of elementLM: will automatically just index into global LM table if built */
			elementLM = FeEquationNumber_BuildOneElementLocationMatrix( feVar->eqNum, element_lI );

			/* work out number of dofs at the node, using LM */
			/* Since: Number of entries in LM table for this element = (by defn.) Number of dofs this element */
			dofCountLastNode = feVar->dofLayout->dofCounts[nodeIdsInCurrElement[nodeCountCurrElement-1]]; 
			totalDofsThisElement = &elementLM[nodeCountCurrElement-1][dofCountLastNode-1] - &elementLM[0][0] + 1;

			if ( totalDofsThisElement > totalDofsPrevElement ) {
				if (elForceVecToAdd) Memory_Free( elForceVecToAdd );
				Journal_DPrintfL( self->debug, 2, "Reallocating elForceVecToAdd to size %d\n", totalDofsThisElement );
				elForceVecToAdd = Memory_Alloc_Array( double, totalDofsThisElement, "elForceVecToAdd" );
			}

			/* Initialise Values to Zero */
			memset( elForceVecToAdd, 0, totalDofsThisElement * sizeof(double) );
		
			/* Assemble this element's element force vector: going through each force term in list */
			ForceVector_AssembleElement( self, element_lI, elForceVecToAdd );


	        /* When keeping BCs in we come across a bit of a problem in parallel. We're not
	           allowed to add entries to the force vector here and then clobber it later with
	           an insert in order to set the BC. So, what we'll do is just add zero here, that
	           way later we can add the BC and it will be the same as inserting it.
	           --- Luke, 20 May 2008 */
	        if( !self->feVariable->eqNum->removeBCs ) {
	           DofLayout* dofs;
	           int nDofs, curInd;
	           int ii, jj;

	           dofs = self->feVariable->dofLayout; /* shortcut to the dof layout */
	           curInd = 0; /* need a counter to track where we are in the element force vector */
	           for( ii = 0; ii < nodeCountCurrElement; ii++ ) {
	              nDofs = dofs->dofCounts[inc[ii]]; /* number of dofs on this node */
	              for( jj = 0; jj < nDofs; jj++ ) {
	                 if( !FeVariable_IsBC( self->feVariable, inc[ii], jj ) ) {
	                    curInd++;
	                    continue; /* only need to clear it if it's a bc */
	                 }
	                 elForceVecToAdd[curInd] = 0.0;
	                 curInd++;
	              }
	           }
	        }

			/* Ok, assemble into global matrix */
			//Vector_AddEntries( self->vector, totalDofsThisElement, (Index*)(elementLM[0]), elForceVecToAdd );
			VecSetValues( self->vector, totalDofsThisElement, (PetscInt*)elementLM[0], elForceVecToAdd, ADD_VALUES );

#if DEBUG
			if( element_lI % outputInterval == 0 ) {
				Journal_DPrintfL( self->debug, 2, "done %d percent of global force vector assembly (general) \n",
						  (int)(100.0*((double)element_lI/(double)elementLocalCount)) );
			}
#endif

			/* Cleanup: If we haven't built the big LM for all elements, free the temporary one */
			if ( False == feVar->eqNum->locationMatrixBuilt ) {
				Memory_Free( elementLM );
			}
			totalDofsPrevElement = totalDofsThisElement;
		}

		Memory_Free( elForceVecToAdd );
	}
void lecode_tools_Isostasy_AverageBody(lecode_tools_Isostasy *self,
                                       double** _avg_density, double** _rho_zero_density,
                                       double** _phi)
{
   FeMesh *mesh;
   ElementType *el_type;
   IntegrationPointsSwarm *swarm;
   double *local_density, *global_density;
   double *local_vol, *global_vol;
   double *local_rho_zero_vol, *global_rho_zero_vol;
   double *local_rho_zero_density, *global_rho_zero_density;
   double *local_phi, *global_phi, temp, tempDot;
   int cell, num_particles, num_dims, num_els;
   IntegrationPoint *particle;
   double jac_det;
   double density, alpha, densityFinal;
   Material *mat;
   Bool oneToMany;
   Grid* elGrid;
   int elInds[3], arraySize, arrayPos;
   int ii, jj;

   mesh = self->mesh;
   elGrid = *Mesh_GetExtension( mesh, Grid**,  mesh->elGridId );
   num_dims = Mesh_GetDimSize(mesh);
   swarm = self->swarm;
   num_els = FeMesh_GetElementLocalSize(mesh);

   arraySize=0;
   if ( num_dims == 2 ) arraySize = elGrid->sizes[0];
   else if ( num_dims == 3 ) arraySize = elGrid->sizes[0]*elGrid->sizes[self->zontalAxis];
   else assert(0);

   /* Allocate for the column values. */
   local_vol = (double*)malloc( arraySize*sizeof(double) );
   memset( local_vol, 0, arraySize*sizeof(double) );
   local_density = (double*)malloc( arraySize*sizeof(double) );
   memset( local_density, 0, arraySize*sizeof(double) );
   local_rho_zero_vol = (double*)malloc( arraySize*sizeof(double) );
   memset( local_rho_zero_vol, 0, arraySize*sizeof(double) );
   local_rho_zero_density = (double*)malloc( arraySize*sizeof(double) );
   memset( local_rho_zero_density, 0, arraySize*sizeof(double) );
   local_phi = (double*)malloc( arraySize*sizeof(double) );
   memset( local_phi, 0, arraySize*sizeof(double) );

   /* Initialise temperature. */
   temp = 0.0;

   oneToMany = Stg_Class_IsInstance(swarm->mapper, OneToManyMapper_Type);

   for (ii = 0; ii < num_els; ii++)
   {

      /* Make sure the element is beneath the surface. */
      Grid_Lift( elGrid, FeMesh_ElementDomainToGlobal( mesh, ii ), elInds );
      if ( self->surfaceIdx != -1 && elInds[self->vertAxis] >= self->surfaceIdx )
         continue;

      el_type = FeMesh_GetElementType(mesh, ii);
      cell = CellLayout_MapElementIdToCellId(swarm->cellLayout, ii);
      num_particles = swarm->cellParticleCountTbl[cell];

      for (jj = 0; jj < num_particles; jj++)
      {

         particle = (IntegrationPoint*)Swarm_ParticleInCellAt(swarm, cell, jj);
         jac_det = ElementType_JacobianDeterminant(el_type, mesh, ii, particle->xi, num_dims);

         if(!self->ppcManager){
            density = IntegrationPointMapper_GetDoubleFromMaterial(
                         swarm->mapper, particle, self->buoyancy->materialExtHandle,
                         offsetof(BuoyancyForceTerm_MaterialExt, density) );
            alpha = IntegrationPointMapper_GetDoubleFromMaterial(
                       swarm->mapper, particle, self->buoyancy->materialExtHandle,
                       offsetof(BuoyancyForceTerm_MaterialExt, alpha) );

            if (self->tempField)
            {
               FeVariable_InterpolateFromMeshLocalCoord(self->tempField, self->tempField->feMesh,
                     ii, particle->xi, &temp);
               FeVariable_InterpolateFromMeshLocalCoord(self->tempDotField, self->tempDotField->feMesh,
                     ii, particle->xi, &tempDot);
            }

            densityFinal = density*(1.0 - alpha*temp);

         } else {
            int err;
            /* Density */
            err = PpcManager_Get( self->ppcManager, ii, particle, self->densityID, &densityFinal );
            assert(!err);
         }

         arrayPos = elInds[0];
         if ( num_dims == 3 ) arrayPos += elInds[self->zontalAxis]*elGrid->sizes[0];

         local_vol[arrayPos] += particle->weight*jac_det;
         local_density[arrayPos] += particle->weight*jac_det*densityFinal;

         if (!oneToMany)
         {
            mat = IntegrationPointsSwarm_GetMaterialOn(swarm, particle);
            if (mat->index == self->rho_zero_mat->index)
            {
               local_rho_zero_vol[arrayPos] += particle->weight*jac_det;
               local_rho_zero_density[arrayPos] += particle->weight*jac_det*densityFinal;
            }
         }
         else
         {
            OneToManyRef *ref;
            int cnt;
            int kk;

            ref = OneToManyMapper_GetMaterialRef(swarm->mapper, particle);
            cnt = 0;
            for (kk = 0; kk < ref->numParticles; kk++)
            {
               mat = MaterialPointsSwarm_GetMaterialAt(((OneToManyMapper*)swarm->mapper)->materialSwarm, ref->particleInds[kk]);
               if (mat->index == self->rho_zero_mat->index)
                  cnt++;
            }

            if (2*cnt > ref->numParticles)
            {
               local_rho_zero_vol[arrayPos] += particle->weight*jac_det;
               local_rho_zero_density[arrayPos] += particle->weight*jac_det*densityFinal;
            }
         }

         if (_phi)
         {
            local_phi[arrayPos] += particle->weight*jac_det*(-density*alpha*tempDot);
         }
      }
   }

   /* Allocate for the global column values. */
   global_vol = (double*)malloc( arraySize*sizeof(double) );
   global_density = (double*)malloc( arraySize*sizeof(double) );
   global_rho_zero_vol = (double*)malloc( arraySize*sizeof(double) );
   global_rho_zero_density = (double*)malloc( arraySize*sizeof(double) );
   global_phi = (double*)malloc( arraySize*sizeof(double) );

   MPI_Allreduce(local_vol, global_vol, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_density, global_density, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_rho_zero_vol, global_rho_zero_vol, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_rho_zero_density, global_rho_zero_density, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   if (_phi)
      MPI_Allreduce(local_phi, global_phi, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

   free( local_vol );
   free( local_density );
   free( local_rho_zero_vol );
   free( local_rho_zero_density );
   free( local_phi );

   if ( self->avg )
   {
      for ( ii = 1; ii < arraySize; ii++ )
      {
         global_vol[0] += global_vol[ii];
         global_density[0] += global_density[ii];
         global_rho_zero_vol[0] += global_rho_zero_vol[ii];
         global_rho_zero_density[0] += global_rho_zero_density[ii];
         if ( _phi )
            global_phi[0] += global_phi[ii];
      }
   }

   /* Calculate results. */
   *_avg_density = (double*)malloc( arraySize*sizeof(double) );
   *_rho_zero_density = (double*)malloc( arraySize*sizeof(double) );
   if (_phi)
      *_phi = (double*)malloc( arraySize*sizeof(double) );
   for ( ii = 0; ii < arraySize; ii++ )
   {
      (*_avg_density)[ii] = (global_vol[ii] > 1e-7) ? global_density[ii]/global_vol[ii] : 0.0;
      (*_rho_zero_density)[ii] = (global_rho_zero_vol[ii] > 1e-7) ? global_rho_zero_density[ii]/global_rho_zero_vol[ii] : 0.0;
      if (_phi)
         (*_phi)[ii] = (global_vol[ii] > 1e-7) ? global_phi[ii]/global_vol[ii] : 0.0;
      if ( self->avg )
         break;
   }

   /*
       printf("Global mean density: %g\n", (*_avg_density)[0]);
       printf("Global mean rho_0 density: %g\n", (*_rho_zero_density)[0]);
       printf("Global phi/vol: %g\n", (*_phi)[0]);
   */

   free( global_vol );
   free( global_density );
   free( global_rho_zero_vol );
   free( global_rho_zero_density );
   free( global_phi );
}
void Stokes_SLE_PenaltySolver_MakePenalty( Stokes_SLE_PenaltySolver* self, Stokes_SLE* sle, Vec* _penalty ) {
    Vec fVec = sle->fForceVec->vector, hVec = sle->hForceVec->vector, penalty, lambda;
    Mat kMat = sle->kStiffMat->matrix;
    FeMesh *mesh = sle->kStiffMat->rowVariable->feMesh;
    FeVariable *velField = sle->kStiffMat->rowVariable;
    SolutionVector* uVec = sle->uSolnVec;
    FeEquationNumber *eqNum = uVec->eqNum;
    IArray *inc;
    PetscScalar *lambdaVals, lambdaMin, *penaltyVals;
    int numDofs, numLocalElems, nodeCur, numLocalNodes, rank, eq;
    SolutionVector *solVec = sle->uSolnVec;
    double *velBackup;
    Vec vecBackup;
    int ii, jj, kk;

    MPI_Comm_rank( MPI_COMM_WORLD, &rank );

    numDofs = Mesh_GetDimSize( mesh );
    numLocalElems = FeMesh_GetElementLocalSize( mesh );
    numLocalNodes = FeMesh_GetNodeLocalSize( mesh );

    velBackup = (double*)malloc( numLocalNodes*numDofs*sizeof(double) );
    for( ii = 0; ii < numLocalNodes; ii++ )
        FeVariable_GetValueAtNode( velField, ii, velBackup + ii*numDofs );

    VecDuplicate( hVec, &penalty );
    VecGetArray( penalty, &penaltyVals );

    VecDuplicate( fVec, &lambda );
    MatGetDiagonal( kMat, lambda );
    {
        PetscInt idx;
        PetscReal min, max;

        VecMin( lambda, &idx, &min );
        VecMax( lambda, &idx, &max );
        if( rank == 0 ) {
           printf( "LAMBDA RANGE:\n" );
           printf( "  MIN: %e\n", min );
           printf( "  MAX: %e\n", max );
        }
    }

    vecBackup = solVec->vector;
    solVec->vector = lambda;
    SolutionVector_UpdateSolutionOntoNodes( solVec );

    inc = IArray_New();
    lambdaVals = (double*)malloc( numDofs*sizeof(double) );

    for( ii = 0; ii < numLocalElems; ii++ ) {

        lambdaMin = DBL_MAX;

        FeMesh_GetElementNodes( mesh, ii, inc );
        for( jj = 0; jj < inc->size; jj++ ) {

            nodeCur = inc->ptr[jj];
            FeVariable_GetValueAtNode( velField, nodeCur, lambdaVals );

            for( kk = 0; kk < numDofs; kk++ ) {

                eq = eqNum->mapNodeDof2Eq[nodeCur][kk];
                if( eq == -1 )
                    continue;

/*
                eq = *(int*)STreeMap_Map( eqNum->ownedMap, &eq );

                VecGetValues( lambda, 1, &eq, &lambdaVal );
*/

                if( lambdaVals[kk] < 0.0 )
                    printf( "%g\n",  lambdaVals[kk] );
                if( lambdaVals[kk] < lambdaMin )
                    lambdaMin = lambdaVals[kk];

            }
        }

        penaltyVals[ii] = lambdaMin;

    }

    if( lambdaVals ) free( lambdaVals );
    Stg_Class_Delete( inc );

    solVec->vector = vecBackup;

    for( ii = 0; ii < numLocalNodes; ii++ )
        FeVariable_SetValueAtNode( velField, ii, velBackup + ii*numDofs );
    if( velBackup ) free( velBackup );
    FeVariable_SyncShadowValues( velField );

    Stg_VecDestroy(&lambda );

    VecRestoreArray( penalty, &penaltyVals );
    VecAssemblyBegin( penalty );
    VecAssemblyEnd( penalty );

    {
        PetscInt idx;
        PetscReal min, max;

        VecMin( penalty, &idx, &min );
        VecMax( penalty, &idx, &max );
        if( rank == 0 ) {
           printf( "SEMI-PENALTY RANGE:\n" );
           printf( "  MIN: %e\n", min );
           printf( "  MAX: %e\n", max );
        }
    }

    *_penalty = penalty;
}