void Spherical_MaxVel_Output( UnderworldContext* context ) {
   Spherical_CubedSphereNusselt* 	self		= (Spherical_CubedSphereNusselt*)LiveComponentRegister_Get( context->CF->LCRegister, (Name)Spherical_CubedSphereNusselt_Type );
   FeVariable*				velocityField   = self->velocityField;
   FeMesh* 				mesh 		= velocityField->feMesh;
   Grid*				grid		= NULL;
   unsigned*				sizes		= NULL;
   int 					vertId, dId;
   unsigned 				dT_i, dT_j, nDomainSize, ijk[3];
   double 				vel[3], velMax[2], gVelMax[2], velMag;

   //set 'em big and negative
   velMax[0] = velMax[1] = -1*HUGE_VAL;

   // get vert grid
   RegularMeshUtils_ErrorCheckAndGetDetails( (Mesh*)mesh, MT_VERTEX, &nDomainSize, &grid );
   sizes = grid->sizes;

   // go around 
   for( dT_i = 0; dT_i < sizes[1]; dT_i++ ) {
      for( dT_j = 0; dT_j < sizes[2]; dT_j++ ) {
         // find inner vertex
         ijk[0] = 0;
         // angular discretisation
         ijk[1] = dT_i;
         ijk[2] = dT_j;
         vertId = Grid_Project( grid, ijk );

         // if the node is local
         if( Mesh_GlobalToDomain( mesh, MT_VERTEX, vertId, &dId ) == True ) {
            FeVariable_GetValueAtNode( velocityField, dId, vel );
            velMag = sqrt( vel[0]*vel[0] + vel[1]*vel[1] + vel[2]*vel[2] );
            if( velMag > velMax[0] ) velMax[0] = velMag;
         }

         // find outer vertex
         ijk[0] = grid->sizes[0]-1;
         vertId = Grid_Project( grid, ijk );

         // if the node is local
         if( Mesh_GlobalToDomain( mesh, MT_VERTEX, vertId, &dId ) == True ) {
            FeVariable_GetValueAtNode( velocityField, dId, vel );
            velMag = sqrt( vel[0]*vel[0] + vel[1]*vel[1] + vel[2]*vel[2] );
            if( velMag > velMax[1] ) velMax[1] = velMag;
         }
      }
   }

   (void)MPI_Allreduce( velMax, gVelMax, 2, MPI_DOUBLE, MPI_MAX, context->communicator );

   StgFEM_FrequentOutput_PrintValue( context, gVelMax[1] ); // print outer velocity max
   StgFEM_FrequentOutput_PrintValue( context, gVelMax[0] ); // print inner velocity max
}
double AdvectionDiffusionSLE_AdvectiveTimestep( void* advectionDiffusionSLE ) {
	AdvectionDiffusionSLE*    self              = (AdvectionDiffusionSLE*) advectionDiffusionSLE;
	AdvDiffResidualForceTerm* residualForceTerm = self->advDiffResidualForceTerm;
	FeVariable*               velocityField     = residualForceTerm->velocityField;
	Node_LocalIndex           nodeLocalCount    = FeMesh_GetNodeLocalSize( self->phiField->feMesh );
	Node_LocalIndex           node_I;
	Dimension_Index           dim               = self->dim;
	Dimension_Index           dim_I;
	double                    timestep          = HUGE_VAL;
	XYZ                       velocity;
	double                    minSeparation;
	double                    minSeparationEachDim[3];
	double*                   meshCoord;
	
	Journal_DPrintf( self->debug, "In func: %s\n", __func__ );

	FeVariable_GetMinimumSeparation( self->phiField, &minSeparation, minSeparationEachDim );

	for( node_I = 0 ; node_I < nodeLocalCount ; node_I++ ){
		if( self->phiField->feMesh == velocityField->feMesh){
		   FeVariable_GetValueAtNode( velocityField, node_I, velocity );
		} else {
         meshCoord = Mesh_GetVertex( self->phiField->feMesh, node_I );
         FieldVariable_InterpolateValueAt( velocityField, meshCoord, velocity );
      }
		
		for ( dim_I = 0 ; dim_I < dim ; dim_I++ ) {
			if( velocity[ dim_I ] == 0.0 ) 
				continue;
			timestep = MIN( timestep, fabs( minSeparationEachDim[ dim_I ]/velocity[ dim_I ] ) );
		}
	}

	return self->courantFactor * timestep;
}
void SemiLagrangianIntegratorSuite_Test( SemiLagrangianIntegratorSuiteData* data ) {
   Stg_ComponentFactory*	cf;
   ConditionFunction*      condFunc;
   //char			xml_input[PCU_PATH_MAX];
   double			l2Error;
   FeVariable*		phiField;
   FeVariable*		phiOldField;
   Swarm*			gaussSwarm;
   double			phi[3];
   unsigned		node_i;
   AbstractContext*	context;

   //pcu_filename_input( "testSemiLagrangianIntegrator.xml", xml_input );
   cf = stgMainInitFromXML( "StgFEM/Utils/input/testSemiLagrangianIntegrator.xml", MPI_COMM_WORLD, NULL );
   context = Stg_ComponentFactory_ConstructByName( cf, (Name)"context", AbstractContext, True, NULL  );

   condFunc = ConditionFunction_New( SemiLagrangianIntegratorSuite_Line, (Name)"Line", NULL  );
   ConditionFunction_Register_Add( condFunc_Register, condFunc );
   condFunc = ConditionFunction_New( SemiLagrangianIntegratorSuite_ShearCellX, (Name)"ShearCellX", NULL  );
   ConditionFunction_Register_Add( condFunc_Register, condFunc );
   condFunc = ConditionFunction_New( SemiLagrangianIntegratorSuite_ShearCellY, (Name)"ShearCellY", NULL  );
   ConditionFunction_Register_Add( condFunc_Register, condFunc );

   /* manually set the timestep */
   ContextEP_ReplaceAll( context, AbstractContext_EP_Dt, Dt );
   ContextEP_Append( context, AbstractContext_EP_UpdateClass, SemiLagrangianIntegratorSuite_UpdatePositions );

   stgMainBuildAndInitialise( cf );

   phiField = (FeVariable*)LiveComponentRegister_Get( cf->LCRegister, (Name)"PhiField" );
   phiOldField = (FeVariable* )LiveComponentRegister_Get( cf->LCRegister, (Name)"PhiFieldInitial" );
   gaussSwarm = (Swarm* )LiveComponentRegister_Get( cf->LCRegister, (Name)"gaussSwarm"  );
   for( node_i = 0; node_i < Mesh_GetLocalSize( phiField->feMesh, MT_VERTEX ); node_i++ ) {
      FeVariable_GetValueAtNode( phiField, node_i, phi );
      FeVariable_SetValueAtNode( phiOldField, node_i, phi );
   }

   stgMainLoop( cf );

   l2Error = SemiLagrangianIntegratorSuite_EvaluateError( phiField, phiOldField, gaussSwarm );

   pcu_check_true( l2Error < TOLERANCE );

   stgMainDestroy( cf );
}
void SemiLagrangianIntegratorSuite_UpdatePositions( void* data, FiniteElementContext* context ) {
   Index                   reverseTimeStep = Dictionary_GetUnsignedInt_WithDefault( context->dictionary, "reverseTimeStep", 100 );
   FeVariable*		velocityField	= (FeVariable*) LiveComponentRegister_Get( context->CF->LCRegister, (Name)"VelocityField" );
   FeMesh*			mesh		= velocityField->feMesh;
   unsigned		node_I;
   Index			dim_I;
   unsigned		nDims		= Mesh_GetDimSize( mesh );
   double			velocity[3];
   double			phi;
   SemiLagrangianIntegrator*	slIntegrator;
   FeVariable*		phiField;
   FeVariable*		phiStarField;

   _FeVariable_SyncShadowValues( velocityField );

   /* reverse the numerically advected particles (& the semi lagrangian field also) */
   if( context->timeStep == reverseTimeStep + 1  ) {
      for( node_I = 0; node_I < Mesh_GetLocalSize( mesh, MT_VERTEX ); node_I++ ) {
         _FeVariable_GetValueAtNode( velocityField, node_I, velocity );

         for( dim_I = 0; dim_I < nDims; dim_I++ ) {
            velocity[dim_I] *= -1;
         }

         FeVariable_SetValueAtNode( velocityField, node_I, velocity );
      }
   }

   slIntegrator = (SemiLagrangianIntegrator*)LiveComponentRegister_Get( context->CF->LCRegister, (Name)"integrator" );
   phiField     = (FeVariable* )LiveComponentRegister_Get( context->CF->LCRegister, (Name)"PhiField" );
   phiStarField = (FeVariable* )LiveComponentRegister_Get( context->CF->LCRegister, (Name)"PhiStarField"  );
   SemiLagrangianIntegrator_Solve( slIntegrator, phiField, phiStarField );

   for( node_I = 0; node_I < Mesh_GetLocalSize( mesh, MT_VERTEX ); node_I++ ) {
      FeVariable_GetValueAtNode( phiStarField, node_I, &phi );
      FeVariable_SetValueAtNode( phiField, node_I, &phi );
   }
}
void lecode_tools_Isostasy_AverageSurfaces(lecode_tools_Isostasy *self,
      double** _avg_sep, double** _avg_height)
{
   FeMesh *mesh;
   Grid *grid, *elGrid;
   double *local_height, *global_height;
   double *local_top_vy, *global_top_vy;
   double *local_bot_vy, *global_bot_vy;
   int *local_top_cnt, *global_top_cnt;
   int *local_bot_cnt, *global_bot_cnt;
   int param[3], elParam[3], num_nodes, n;
   double vel[3];
   IArray *inc;
   int nDims, arrayPos, arraySize;
   int ii, jj;

   mesh = self->mesh;
   nDims = Mesh_GetDimSize( mesh );
   grid = *Mesh_GetExtension(mesh, Grid**, mesh->vertGridId );
   elGrid = *Mesh_GetExtension(mesh, Grid**, mesh->elGridId );
   num_nodes = FeMesh_GetNodeLocalSize(mesh);
   inc = IArray_New();

   /* mem alloc from bottom surface */
   arraySize=0; /*to prevent warnings*/
   if ( nDims == 2 ) arraySize = elGrid->sizes[0];
   else if ( nDims == 3 ) arraySize = elGrid->sizes[0]*elGrid->sizes[self->zontalAxis];
   else assert(0);

   local_top_vy = (double*)malloc( arraySize*sizeof(double) );
   memset( local_top_vy, 0, arraySize*sizeof(double) );
   local_bot_vy = (double*)malloc( arraySize*sizeof(double) );
   memset( local_bot_vy, 0, arraySize*sizeof(double) );
   local_height = (double*)malloc( arraySize*sizeof(double) );
   memset( local_height, 0, arraySize*sizeof(double) );
   local_top_cnt = (int*)malloc( arraySize*sizeof(int) );
   memset( local_top_cnt, 0, arraySize*sizeof(int) );
   local_bot_cnt = (int*)malloc( arraySize*sizeof(int) );
   memset( local_bot_cnt, 0, arraySize*sizeof(int) );

   for (ii = 0; ii < num_nodes; ii++)
   {

      FeMesh_GetNodeElements( mesh, ii, inc );

      n = FeMesh_NodeDomainToGlobal(mesh, ii);
      Grid_Lift(grid, n, param);

      if ((self->surfaceIdx != -1 && param[self->vertAxis] == self->surfaceIdx) ||
            (self->surfaceIdx == -1 && param[self->vertAxis] == grid->sizes[self->vertAxis] - 1))
      {
         FeVariable_GetValueAtNode(self->vel_field, ii, vel);

         if ( self->avg )
         {
            local_top_vy[0] += vel[self->vertAxis];
            local_height[0] += Mesh_GetVertex( mesh, ii )[self->vertAxis];
            local_top_cnt[0]++;
         }
         else
         {
            for (jj = 0; jj < inc->size; jj++ )
            {
               Grid_Lift( elGrid, FeMesh_ElementDomainToGlobal( self->mesh, inc->ptr[jj] ), elParam );

               /* Make sure element is below surface. */
               if ( self->surfaceIdx != -1 && elParam[self->vertAxis] >= self->surfaceIdx )
                  continue;

               arrayPos = elParam[0];
               if ( nDims == 3 ) arrayPos += elParam[self->zontalAxis]*elGrid->sizes[0];

               local_top_vy[arrayPos] += vel[self->vertAxis];
               local_height[arrayPos] += Mesh_GetVertex( mesh, ii )[self->vertAxis];
               local_top_cnt[arrayPos]++;
            }
         }
      }

      if (param[self->vertAxis] == 0 )
      {
         FeVariable_GetValueAtNode(self->vel_field, ii, vel);

         if ( self->avg )
         {
            local_bot_vy[0] += vel[self->vertAxis];
            local_height[0] -= Mesh_GetVertex( mesh, ii )[self->vertAxis];
            local_bot_cnt[0]++;
         }
         else
         {
            for (jj = 0; jj < inc->size; jj++ )
            {
               Grid_Lift( elGrid, FeMesh_ElementDomainToGlobal( self->mesh, inc->ptr[jj] ), elParam );

               /* Make sure element is below surface. */
               if ( self->surfaceIdx != -1 && elParam[self->vertAxis] >= self->surfaceIdx )
                  continue;

               arrayPos = elParam[0];
               if ( nDims == 3 ) arrayPos += elParam[self->zontalAxis]*elGrid->sizes[0];

               local_bot_vy[arrayPos] += vel[self->vertAxis];
               local_height[arrayPos] -= Mesh_GetVertex( mesh, ii )[self->vertAxis];
               local_bot_cnt[arrayPos]++;
            }
         }
      }

   }

   global_top_vy = (double*)malloc( arraySize*sizeof(double) );
   global_bot_vy = (double*)malloc( arraySize*sizeof(double) );
   global_height = (double*)malloc( arraySize*sizeof(double) );
   global_top_cnt = (int*)malloc( arraySize*sizeof(int) );
   global_bot_cnt = (int*)malloc( arraySize*sizeof(int) );

   MPI_Allreduce(local_height, global_height, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_top_vy, global_top_vy, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_bot_vy, global_bot_vy, arraySize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_top_cnt, global_top_cnt, arraySize, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
   MPI_Allreduce(local_bot_cnt, global_bot_cnt, arraySize, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

   free( local_height );
   free( local_top_vy );
   free( local_bot_vy );
   free( local_top_cnt );
   free( local_bot_cnt );
   Stg_Class_Delete( inc );

   *_avg_sep = (double*)malloc( arraySize*sizeof(double) );
   *_avg_height = (double*)malloc( arraySize*sizeof(double) );
   for ( ii = 0; ii < arraySize; ii++ )
   {
      (*_avg_sep)[ii] = global_top_vy[ii]/(double)(global_top_cnt[ii]) - global_bot_vy[ii]/(double)(global_bot_cnt[ii]);
      (*_avg_height)[ii] = global_height[ii]/(double)(global_top_cnt[ii]);
      if (self->avg)
         break;
   }

   free( global_height );
   free( global_top_vy );
   free( global_bot_vy );
   free( global_top_cnt );
   free( global_bot_cnt );
}
void lucMeshCrossSection_Sample( void* drawingObject, Bool reverse)
{
   lucMeshCrossSection* self          = (lucMeshCrossSection*)drawingObject;
   FeVariable*          fieldVariable = (FeVariable*) self->fieldVariable;
   Mesh*                mesh          = (Mesh*) fieldVariable->feMesh;
   Grid*                vertGrid;
   Node_LocalIndex      crossSection_I;
   IJK                  node_ijk;
   Node_GlobalIndex     node_gI;
   Node_DomainIndex     node_dI;
   int                  i,j, d, sizes[3] = {1,1,1};
   Coord                globalMin, globalMax, min, max;

      int localcount = 0;

   vertGrid = *(Grid**)ExtensionManager_Get( mesh->info, mesh, self->vertexGridHandle );
   for (d=0; d<fieldVariable->dim; d++) sizes[d] = vertGrid->sizes[d];
   self->dim[0] = sizes[ self->axis ];
   self->dim[1] = sizes[ self->axis1 ];
   self->dim[2] = sizes[ self->axis2 ];

   crossSection_I = lucCrossSection_GetValue(self, 0, self->dim[0]-1);

   FieldVariable_GetMinAndMaxLocalCoords( fieldVariable, min, max );
   FieldVariable_GetMinAndMaxGlobalCoords( fieldVariable, globalMin, globalMax );

   Journal_Printf( lucDebug, "%s called on field %s, with axis of cross section as %d, crossSection_I as %d (dims %d,%d,%d) field dim %d\n",
                    __func__, fieldVariable->name, self->axis, crossSection_I, self->dim[0], self->dim[1], self->dim[2], self->fieldDim);

   /* Get mesh cross section self->vertices and values */
   self->resolutionA = self->dim[1];
   self->resolutionB = self->dim[2];
   lucCrossSection_AllocateSampleData(self, self->fieldDim);
   int lSize = Mesh_GetLocalSize( mesh, MT_VERTEX );
   double time = MPI_Wtime();
   Journal_Printf(lucInfo, "Sampling mesh (%s) %d x %d...  0%", self->name, self->dim[1], self->dim[2]);
   node_ijk[ self->axis ] = crossSection_I;
   for ( i = 0 ; i < self->dim[1]; i++ )
   {
      int percent = 100 * (i + 1) / self->dim[1];
      Journal_Printf(lucInfo, "\b\b\b\b%3d%%", percent);
      fflush(stdout);

      /* Reverse order if requested */
      int i0 = i;
      if (reverse) i0 = self->dim[1] - i - 1;

      node_ijk[ self->axis1 ] = i0;

      for ( j = 0 ; j < self->dim[2]; j++ )
      {
         self->vertices[i][j][0] = HUGE_VAL;
         self->vertices[i][j][2] = 0;
         node_ijk[ self->axis2 ] = j;
         node_gI = Grid_Project( vertGrid, node_ijk );
         /* Get coord and value if node is local... */
         if (Mesh_GlobalToDomain( mesh, MT_VERTEX, node_gI, &node_dI ) && node_dI < lSize)
         {  
            /* Found on this processor */
            double value[self->fieldDim];
            FeVariable_GetValueAtNode( fieldVariable, node_dI, value );
            double* pos = Mesh_GetVertex( mesh, node_dI );
            /*fprintf(stderr, "[%d] (%d,%d) Node %d %f,%f,%f value %f\n", self->context->rank, i, j, node_gI, pos[0], pos[1], pos[2], value);*/
         
            for (d=0; d<fieldVariable->dim; d++)
               self->vertices[i][j][d] = pos[d];

            for (d=0; d<self->fieldDim; d++)
               self->values[i][j][d] = (float)value[d];

            localcount++;
         }
      }
   }
   Journal_Printf(lucInfo, " %f sec. ", MPI_Wtime() - time);

   /* Collate */
   time = MPI_Wtime();
   for ( i=0 ; i < self->dim[1]; i++ )
   {
      for ( j=0 ; j < self->dim[2]; j++ )
      {
         /* Receive values at root */
         if (self->context->rank == 0)
         {
            /* Already have value? */
            if (self->vertices[i][j][0] != HUGE_VAL) {localcount--; continue; }

            /* Recv (pos and value together = (3 + fevar dims)*float) */
            float data[3 + self->fieldDim];
            (void)MPI_Recv(data, 3+self->fieldDim, MPI_FLOAT, MPI_ANY_SOURCE, i*self->dim[2]+j, self->context->communicator, MPI_STATUS_IGNORE);
            /* Copy */
            memcpy(self->vertices[i][j], data, 3 * sizeof(float));
            memcpy(self->values[i][j], &data[3], self->fieldDim * sizeof(float));
         }
         else
         {
            /* Found on this proc? */
            if (self->vertices[i][j][0] == HUGE_VAL) continue;

            /* Copy */
            float data[3 + self->fieldDim];
            memcpy(data, self->vertices[i][j], 3 * sizeof(float));
            memcpy(&data[3], self->values[i][j], self->fieldDim * sizeof(float));

            /* Send values to root (pos & value = 4 * float) */
            MPI_Ssend(data, 3+self->fieldDim, MPI_FLOAT, 0, i*self->dim[2]+j, self->context->communicator);
            localcount--;
         }
      }
   }
   MPI_Barrier(self->context->communicator);    /* Barrier required, prevent subsequent MPI calls from interfering with transfer */
   Journal_Printf(lucInfo, " Gather in %f sec.\n", MPI_Wtime() - time);
   Journal_Firewall(localcount == 0, lucError,
                     "Error - in %s: count of values sampled compared to sent/received by mpi on proc %d does not match (balance = %d)\n",
                     __func__, self->context->rank, localcount);
}
void Stokes_SLE_PenaltySolver_MakePenalty( Stokes_SLE_PenaltySolver* self, Stokes_SLE* sle, Vec* _penalty ) {
    Vec fVec = sle->fForceVec->vector, hVec = sle->hForceVec->vector, penalty, lambda;
    Mat kMat = sle->kStiffMat->matrix;
    FeMesh *mesh = sle->kStiffMat->rowVariable->feMesh;
    FeVariable *velField = sle->kStiffMat->rowVariable;
    SolutionVector* uVec = sle->uSolnVec;
    FeEquationNumber *eqNum = uVec->eqNum;
    IArray *inc;
    PetscScalar *lambdaVals, lambdaMin, *penaltyVals;
    int numDofs, numLocalElems, nodeCur, numLocalNodes, rank, eq;
    SolutionVector *solVec = sle->uSolnVec;
    double *velBackup;
    Vec vecBackup;
    int ii, jj, kk;

    MPI_Comm_rank( MPI_COMM_WORLD, &rank );

    numDofs = Mesh_GetDimSize( mesh );
    numLocalElems = FeMesh_GetElementLocalSize( mesh );
    numLocalNodes = FeMesh_GetNodeLocalSize( mesh );

    velBackup = (double*)malloc( numLocalNodes*numDofs*sizeof(double) );
    for( ii = 0; ii < numLocalNodes; ii++ )
        FeVariable_GetValueAtNode( velField, ii, velBackup + ii*numDofs );

    VecDuplicate( hVec, &penalty );
    VecGetArray( penalty, &penaltyVals );

    VecDuplicate( fVec, &lambda );
    MatGetDiagonal( kMat, lambda );
    {
        PetscInt idx;
        PetscReal min, max;

        VecMin( lambda, &idx, &min );
        VecMax( lambda, &idx, &max );
        if( rank == 0 ) {
           printf( "LAMBDA RANGE:\n" );
           printf( "  MIN: %e\n", min );
           printf( "  MAX: %e\n", max );
        }
    }

    vecBackup = solVec->vector;
    solVec->vector = lambda;
    SolutionVector_UpdateSolutionOntoNodes( solVec );

    inc = IArray_New();
    lambdaVals = (double*)malloc( numDofs*sizeof(double) );

    for( ii = 0; ii < numLocalElems; ii++ ) {

        lambdaMin = DBL_MAX;

        FeMesh_GetElementNodes( mesh, ii, inc );
        for( jj = 0; jj < inc->size; jj++ ) {

            nodeCur = inc->ptr[jj];
            FeVariable_GetValueAtNode( velField, nodeCur, lambdaVals );

            for( kk = 0; kk < numDofs; kk++ ) {

                eq = eqNum->mapNodeDof2Eq[nodeCur][kk];
                if( eq == -1 )
                    continue;

/*
                eq = *(int*)STreeMap_Map( eqNum->ownedMap, &eq );

                VecGetValues( lambda, 1, &eq, &lambdaVal );
*/

                if( lambdaVals[kk] < 0.0 )
                    printf( "%g\n",  lambdaVals[kk] );
                if( lambdaVals[kk] < lambdaMin )
                    lambdaMin = lambdaVals[kk];

            }
        }

        penaltyVals[ii] = lambdaMin;

    }

    if( lambdaVals ) free( lambdaVals );
    Stg_Class_Delete( inc );

    solVec->vector = vecBackup;

    for( ii = 0; ii < numLocalNodes; ii++ )
        FeVariable_SetValueAtNode( velField, ii, velBackup + ii*numDofs );
    if( velBackup ) free( velBackup );
    FeVariable_SyncShadowValues( velField );

    Stg_VecDestroy(&lambda );

    VecRestoreArray( penalty, &penaltyVals );
    VecAssemblyBegin( penalty );
    VecAssemblyEnd( penalty );

    {
        PetscInt idx;
        PetscReal min, max;

        VecMin( penalty, &idx, &min );
        VecMax( penalty, &idx, &max );
        if( rank == 0 ) {
           printf( "SEMI-PENALTY RANGE:\n" );
           printf( "  MIN: %e\n", min );
           printf( "  MAX: %e\n", max );
        }
    }

    *_penalty = penalty;
}