void ParticleMovementHandler_FindParticlesThatHaveMovedOutsideMyDomain( ParticleMovementHandler* self )
{
	Particle_Index		particlesOutsideDomainSize = 0;
	GlobalParticle*         currParticle = NULL;
	Particle_Index		lParticle_I = 0;

	Journal_DPrintfL( self->debug, 1, "In %s():\n", __func__ );
	Stream_IndentBranch( Swarm_Debug );

	self->particlesOutsideDomainTotalCount = 0;
	self->currParticleLeavingMyDomainIndex = 0;
	particlesOutsideDomainSize = self->swarm->particlesArrayDelta;


	Journal_DPrintfL( self->debug, 1, "Checking the owning cell of each of my swarm's %d particles:\n",
		self->swarm->particleLocalCount );
	Stream_IndentBranch( Swarm_Debug );

	for ( lParticle_I=0; lParticle_I < self->swarm->particleLocalCount; lParticle_I++ ) {

		currParticle = (GlobalParticle*)Swarm_ParticleAt( self->swarm, lParticle_I );
		if ( currParticle->owningCell == self->swarm->cellDomainCount ) {
			Journal_DPrintfL( self->debug, 3, "particle %d has moved outside domain to (%.2f,%.2f,%.2f): "
				"saving index\n", lParticle_I, currParticle->coord[0], currParticle->coord[1],
								currParticle->coord[2] );
			if ( self->particlesOutsideDomainTotalCount == particlesOutsideDomainSize ) { 
				particlesOutsideDomainSize += self->swarm->particlesArrayDelta;
				Journal_DPrintfL( self->debug, 3, "(Need more memory to save indexes: increasing from %d to %d.)\n",
					self->particlesOutsideDomainTotalCount, particlesOutsideDomainSize );
				self->particlesOutsideDomainIndices = Memory_Realloc_Array( self->particlesOutsideDomainIndices,
					Particle_Index, particlesOutsideDomainSize );
			}
			self->particlesOutsideDomainIndices[self->particlesOutsideDomainTotalCount++] = lParticle_I;
		}	

	}	
	Stream_UnIndentBranch( Swarm_Debug );

	self->particlesOutsideDomainUnfilledCount = self->particlesOutsideDomainTotalCount;

	#if DEBUG
	{
		Particle_Index		particle_I = 0;
		if ( Stream_IsPrintableLevel( self->debug, 2 ) ) {
			Journal_DPrintf( self->debug, "%d Particles have moved outside my domain:\n\t[",
				self->particlesOutsideDomainTotalCount );
			for ( ; particle_I < self->particlesOutsideDomainTotalCount; particle_I++ ) {
				Journal_DPrintf( self->debug, "%d, ", self->particlesOutsideDomainIndices[particle_I] );
			}
			Journal_DPrintf( self->debug, "]\n" );
		}
	}
	#endif
	Stream_UnIndentBranch( Swarm_Debug );
}
void _Stokes_SLE_UzawaSolver_Build( void* solver, void* stokesSLE ) {
	Stokes_SLE_UzawaSolver*	self  = (Stokes_SLE_UzawaSolver*)solver;
	Stokes_SLE*             sle   = (Stokes_SLE*)stokesSLE;

   /* ok, this is far from satisfactory, but let's just bail if we have not been called from within
      the SLE routine  JM20140618 */
   if( sle == NULL )
      return;
   
 	Journal_DPrintf( self->debug, "In %s\n", __func__ );
	Stream_IndentBranch( StgFEM_Debug );
	
 	Journal_DPrintfL( self->debug, 2, "building a standard solver for the velocity system.\n" );
	/* was also being built in _Stokes_SLE_UzawaSolver_AssignFromXML function ? */
	KSPCreate( sle->comm, &self->velSolver );
	KSPSetOptionsPrefix( self->velSolver, "Uzawa_velSolver_" );
	
	/* Build Preconditioner */
	if ( self->preconditioner ) {
		Stg_Component_Build( self->preconditioner, stokesSLE, False );
		SystemLinearEquations_AddStiffnessMatrix( sle, self->preconditioner );

		Journal_DPrintfL( self->debug, 2, "build a standard solver for the preconditioner system.\n" );
		KSPCreate( sle->comm, &self->pcSolver );
		KSPSetOptionsPrefix( self->pcSolver, "Uzawa_pcSolver_" );
	}
	else 
		self->pcSolver = PETSC_NULL;

	if( self->pTempVec != PETSC_NULL ) Stg_VecDestroy(&self->pTempVec );
	if( self->rVec != PETSC_NULL )     Stg_VecDestroy(&self->rVec );
	if( self->sVec != PETSC_NULL )     Stg_VecDestroy(&self->sVec );
	if( self->fTempVec != PETSC_NULL ) Stg_VecDestroy(&self->fTempVec );
	if( self->vStarVec != PETSC_NULL ) Stg_VecDestroy(&self->vStarVec );

 	Journal_DPrintfL( self->debug, 2, "Allocate the auxillary vectors pTemp, r, s, fTemp and vStar.\n" ); 
	VecDuplicate( sle->pSolnVec->vector, &self->pTempVec );
	VecDuplicate( sle->pSolnVec->vector, &self->rVec );
	VecDuplicate( sle->pSolnVec->vector, &self->sVec );

	VecDuplicate( sle->fForceVec->vector, &self->fTempVec );
	VecDuplicate( sle->fForceVec->vector, &self->vStarVec );

	/* Need by the Picard nonlinear solver */
//        Vector_Duplicate( sle->pTempVec->vector, (void**)&self->f_hat );
//        Vector_SetLocalSize( self->vf_hat, Vector_GetLocalSize( sle->pTempVec->vector ) );

	Stream_UnIndentBranch( StgFEM_Debug );
}
Particle_Index ParticleMovementHandler_FindFreeSlotAndPrepareForInsertion( ParticleCommHandler* self )
{
	Particle_Index	lParticle_I = 0;

	if ( self->shadowParticlesLeavingMeUnfilledCount > 0 ) {
		Journal_DPrintfL( self->debug, 3, "Still %d holes available from "
			"particles leaving via shadow cells\n-> free slot to add into is %d\n",
			self->shadowParticlesLeavingMeUnfilledCount,
			self->shadowParticlesLeavingMeIndices[self->currShadowParticleLeavingMeIndex]);
			
		lParticle_I = self->shadowParticlesLeavingMeIndices[self->currShadowParticleLeavingMeIndex];

		self->currShadowParticleLeavingMeIndex++;
		self->shadowParticlesLeavingMeUnfilledCount--;
	}
	else if ( self->particlesOutsideDomainUnfilledCount ) {
		Journal_DPrintfL( self->debug, 3, "Still %d holes available from "
			"particles leaving domain direct\n-> free slot to add into is %d\n", 
			self->particlesOutsideDomainUnfilledCount,
			self->particlesOutsideDomainIndices[self->currParticleLeavingMyDomainIndex]);

		lParticle_I = self->particlesOutsideDomainIndices[self->currParticleLeavingMyDomainIndex];

		self->currParticleLeavingMyDomainIndex++;
		self->particlesOutsideDomainUnfilledCount--;
	}
	else {
		Journal_DPrintfL( self->debug, 3, "No holes left from leaving "
			"particles\n-> slot to insert into is end of array %d\n", 
			self->swarm->particleLocalCount );
		lParticle_I = self->swarm->particleLocalCount;
		/*
		if ( self->swarm->particleLocalCount == self->swarm->particlesArraySize ) {
			Journal_DPrintfL( self->debug, 3, "Particles array memory used up "
				"-> increasing from %d entries by %d\n",
				self->swarm->particlesArraySize, self->swarm->particlesArrayDelta );
			self->swarm->particlesArraySize += self->swarm->particlesArrayDelta;
			self->swarm->particles = Memory_Realloc_Array_Bytes(
				self->swarm->particles,
				self->swarm->particleExtensionMgr->finalSize,
				self->swarm->particlesArraySize );
		}
		*/
		self->swarm->particleLocalCount++;
		Swarm_Realloc( self->swarm );
	}

	return lParticle_I;
}
void _ParticleShadowSync_SendParticleTotalsInShadowCellsToNbrs( ParticleCommHandler* self )
{	
	Processor_Index			proc_I;
	ShadowInfo*			cellShadowInfo = CellLayout_GetShadowInfo( self->swarm->cellLayout );
	ProcNbrInfo*			procNbrInfo = cellShadowInfo->procNbrInfo;
	Neighbour_Index		nbr_I, nbrCount;
	Cell_Index              lCellID, cellParticleCount, shadowedCell_I;	

	Journal_DPrintfL( self->debug, 1, "In %s():\n", __func__ );
	Stream_IndentBranch( Swarm_Debug );

	self->shadowParticlesLeavingMeTotalCount = 0;

	nbrCount = procNbrInfo->procNbrCnt;
	for( nbr_I = 0 ; nbr_I < nbrCount ; nbr_I++ ) {
		proc_I = procNbrInfo->procNbrTbl[nbr_I];
		
		self->shadowParticlesLeavingMeTotalCounts[nbr_I] = 0;
		for( shadowedCell_I = 0 ; shadowedCell_I < cellShadowInfo->procShadowedCnt[ nbr_I ]; shadowedCell_I++ ) {
			lCellID = cellShadowInfo->procShadowedTbl[nbr_I][shadowedCell_I];
			cellParticleCount = self->swarm->cellParticleCountTbl[ lCellID ];
			self->shadowParticlesLeavingMeCountsPerCell[nbr_I][shadowedCell_I] = cellParticleCount;

			self->shadowParticlesLeavingMeTotalCounts[nbr_I] += cellParticleCount;
		}
		MPI_Ssend( self->shadowParticlesLeavingMeCountsPerCell[nbr_I], cellShadowInfo->procShadowCnt[nbr_I], MPI_UNSIGNED,
			proc_I, SHADOW_PARTICLE_COUNTS_PER_CELL, self->swarm->comm );
	}

	Stream_UnIndentBranch( Swarm_Debug );
}
Exemplo n.º 5
0
void _DomainContext_Delete( void* context ) {
   DomainContext* self = (DomainContext*)context;
   
   Journal_DPrintf( self->debug, "In: %s()\n", __func__ );

   Journal_DPrintfL( self->debug, 2, "Deleting the FieldVariable register (and hence all FieldVariables).\n" );

   /* Stg_Class_Delete parent */
   _AbstractContext_Delete( self );
}
void _Stokes_SLE_UzawaSolver_Destroy( void* solver, void* data ) {
   Stokes_SLE_UzawaSolver* self = (Stokes_SLE_UzawaSolver*) solver;
	Journal_DPrintf( self->debug, "In: %s \n", __func__);

	Stream_IndentBranch( StgFEM_Debug );
	Journal_DPrintfL( self->debug, 2, "Destroying Solver contexts.\n" );
	Stg_KSPDestroy(&self->velSolver );
        if ( self->preconditioner ) { Stg_KSPDestroy(&self->pcSolver ); }

	Journal_DPrintfL( self->debug, 2, "Destroying temporary solver vectors.\n" );
	if( self->pTempVec != PETSC_NULL ) Stg_VecDestroy(&self->pTempVec );
	if( self->rVec != PETSC_NULL )     Stg_VecDestroy(&self->rVec );
	if( self->sVec != PETSC_NULL )     Stg_VecDestroy(&self->sVec );
	if( self->fTempVec != PETSC_NULL ) Stg_VecDestroy(&self->fTempVec );
	if( self->vStarVec != PETSC_NULL ) Stg_VecDestroy(&self->vStarVec );
	Stream_UnIndentBranch( StgFEM_Debug );
   _SLE_Solver_Destroy( self, data );

}
void _Stokes_SLE_UzawaSolver_SolverSetup( void* solver, void* stokesSLE ) {
	Stokes_SLE_UzawaSolver* self = (Stokes_SLE_UzawaSolver*) solver;
	Stokes_SLE*             sle  = (Stokes_SLE*)             stokesSLE;
	
 	Journal_DPrintf( self->debug, "In %s:\n", __func__ );
	Stream_IndentBranch( StgFEM_Debug );

	Journal_DPrintfL( self->debug, 1, "Setting up MatrixSolver for the velocity eqn.\n" );
	Stg_KSPSetOperators( self->velSolver, sle->kStiffMat->matrix, sle->kStiffMat->matrix, DIFFERENT_NONZERO_PATTERN );
  	KSPSetFromOptions( self->velSolver );

	if( self->pcSolver ) {
		Journal_DPrintfL( self->debug, 1, "Setting up MatrixSolver for the Preconditioner.\n" );
		Stg_KSPSetOperators( self->pcSolver, self->preconditioner->matrix, self->preconditioner->matrix, DIFFERENT_NONZERO_PATTERN );
    		KSPSetFromOptions( self->pcSolver );
	}

	Stream_UnIndentBranch( StgFEM_Debug );
}
void _ForceVector_Build( void* forceVector, void* data ) {
   ForceVector* self = (ForceVector*)forceVector;

   _SolutionVector_Build( self, data );

   /* update the size depending on our now built feVariable */
   self->localSize = self->feVariable->eqNum->localEqNumsOwnedCount;

   Stream_IndentBranch( StgFEM_Debug );
   Journal_DPrintfL( self->debug, 2, "Allocating the L.A. Force Vector with %d local entries.\n", self->localSize );
   Stream_UnIndentBranch( StgFEM_Debug );

   Assembler_SetVariables( self->bcAsm, self->feVariable, NULL );
   Assembler_SetCallbacks( self->bcAsm, 
      NULL, 
      ForceVector_BCAsm_RowR, NULL, 
      NULL, NULL, 
      self );
}
void ParticleMovementHandler_ShareAndUpdateParticlesThatHaveMovedOutsideDomains(
		ParticleMovementHandler* self,
		Particle_Index*      globalParticlesArrivingMyDomainCountPtr,
		Particle_Index*      globalParticlesOutsideDomainTotalPtr )
{
	Particle_Index*		globalParticlesOutsideDomainCounts = NULL;		
	Particle_Index		maxGlobalParticlesOutsideDomainCount = 0;		
	Processor_Index		proc_I = 0;
	Particle_Index		lParticle_I = 0;
	Particle_Index		particle_I = 0;

	Journal_DPrintfL( self->debug, 2, "In %s():\n", __func__ );
	Stream_IndentBranch( Swarm_Debug );

	(*globalParticlesArrivingMyDomainCountPtr) = 0;
	(*globalParticlesOutsideDomainTotalPtr) = 0;		

	/* Find the counts of particles	outside domain... */
	ParticleMovementHandler_GetCountOfParticlesOutsideDomainPerProcessor(
		self,
		&globalParticlesOutsideDomainCounts,
		&maxGlobalParticlesOutsideDomainCount,
		globalParticlesOutsideDomainTotalPtr );

	if ( (*globalParticlesOutsideDomainTotalPtr) > 0 ) {
		Particle*		particlesLeavingMyDomain = NULL;
		Particle*		globalParticlesLeavingDomains = NULL;
		SizeT			particlesLeavingDomainSizeBytes = 0;
		Cell_DomainIndex	lCell_I = 0;
		GlobalParticle*	        currParticle = NULL;
		Particle_Index		currProcParticlesOutsideDomainCount = 0;
		Particle_Index		currProcOffset = 0;

		particlesLeavingDomainSizeBytes = self->swarm->particleExtensionMgr->finalSize
			* maxGlobalParticlesOutsideDomainCount;
		particlesLeavingMyDomain = Memory_Alloc_Bytes( particlesLeavingDomainSizeBytes, "Particle",
			"particlesLeavingMyDomain" );

		// TODO: investigate doing this with an MPI_Indexed datatype instead...
		Journal_DPrintfL( self->debug, 2, "Copying particles leaving my domain to temp. transfer array\n" );
		Stream_IndentBranch( Swarm_Debug );

		#if 0
		MPI_Type_indexed( 
			self->particlesOutsideDomainTotalCount,
			blocklens,
			self->particlesOutsideDomainIndices,//change to contiguous indices?
			MPI_BYTE,
			ParticlesLeavingDomainTransferIndexed
			);
		#endif	

		for ( particle_I=0; particle_I < self->particlesOutsideDomainTotalCount; particle_I++ ) {
			Journal_DPrintfL( self->debug, 3, "Copying particle %d to particlesLeavingMyDomain[%d]\n",
				self->particlesOutsideDomainIndices[particle_I], particle_I );
			Swarm_CopyParticleOffSwarm( self->swarm,
				particlesLeavingMyDomain, particle_I,
				self->particlesOutsideDomainIndices[particle_I] );
		}	
		Stream_UnIndentBranch( Swarm_Debug );

		/* allocate the big global receive buffer */
		globalParticlesLeavingDomains = Memory_Alloc_Bytes( particlesLeavingDomainSizeBytes * self->swarm->nProc,
			"Particle", "globalParticlesLeavingDomains" );

		Journal_DPrintfL( self->debug, 2, "Getting the global array of particles leaving domains\n" );
		(void)MPI_Allgather( particlesLeavingMyDomain, particlesLeavingDomainSizeBytes, MPI_BYTE,
			globalParticlesLeavingDomains, particlesLeavingDomainSizeBytes, MPI_BYTE,
			self->swarm->comm );

		Journal_DPrintfL( self->debug, 2, "Checking through the global array of particles leaving domains, "
			"and snaffling those moving into my domain:\n" );
		Stream_IndentBranch( Swarm_Debug );
		for ( proc_I=0; proc_I < self->swarm->nProc; proc_I++ ) {

			if ( proc_I == self->swarm->myRank ) continue;

			currProcOffset = proc_I * maxGlobalParticlesOutsideDomainCount;
			currProcParticlesOutsideDomainCount = globalParticlesOutsideDomainCounts[proc_I];
			
			Journal_DPrintfL( self->debug, 3, "Checking particles that left proc. %d:\n", proc_I );
			for ( particle_I=0; particle_I < currProcParticlesOutsideDomainCount; particle_I++ ) {
				currParticle = (GlobalParticle*)ParticleAt( globalParticlesLeavingDomains,
					(currProcOffset + particle_I),
					self->swarm->particleExtensionMgr->finalSize );
				lCell_I = CellLayout_CellOf( self->swarm->cellLayout, currParticle );
				if ( lCell_I < self->swarm->cellLocalCount ) { 
					#if DEBUG
					Journal_DPrintfL( self->debug, 3, "Found particle at (%.2f,%.2f,%.2f) that's moved "
						"into my local cell %d...\n", currParticle->coord[0],
						currParticle->coord[1], currParticle->coord[2], lCell_I );
					#endif	
					
					/* copy particle to the lowest available slot in my particles array */
					lParticle_I = ParticleMovementHandler_FindFreeSlotAndPrepareForInsertion( (ParticleCommHandler*)self );

					Swarm_CopyParticleOntoSwarm( self->swarm, lParticle_I,
						globalParticlesLeavingDomains, (currProcOffset + particle_I) );
					Swarm_AddParticleToCell( self->swarm, lCell_I, lParticle_I );
					(*globalParticlesArrivingMyDomainCountPtr)++;
				}
				#if DEBUG
				else {
					currParticle = (GlobalParticle*)ParticleAt( globalParticlesLeavingDomains, 
						(currProcOffset + particle_I),
						self->swarm->particleExtensionMgr->finalSize );
					Journal_DPrintfL( self->debug, 3, "Ignoring particle at (%.2f,%.2f,%.2f) since "
						"not in my local cells...\n", currParticle->coord[0],
						currParticle->coord[1], currParticle->coord[2] );
				}
				#endif
			}		
		}	
		Stream_UnIndentBranch( Swarm_Debug );

		Memory_Free( particlesLeavingMyDomain );
		Memory_Free( globalParticlesLeavingDomains );

		/* Defensive check to make sure particles not lost/created accidentally somehow */
		if( self->defensive == True ) {
			ParticleMovementHandler_EnsureParticleCountLeavingDomainsEqualsCountEnteringGlobally( self );
		}
	}	
	Memory_Free( globalParticlesOutsideDomainCounts );
	Stream_UnIndentBranch( Swarm_Debug );
}
Exemplo n.º 10
0
void _GlobalParticleLayout_InitialiseParticles( void* particleLayout, void* _swarm )
{
	GlobalParticleLayout*	self = (GlobalParticleLayout*)particleLayout;
	Swarm*			swarm = (Swarm*)_swarm;
	GlobalParticle*         particle = NULL;
	Particle_Index		lParticle_I=0;
	Particle_Index		newParticle_I=0;
	Cell_Index		cell_I;
	Particle_Index          globalParticlesInitialisedCount=0;
	Stream*                 errorStream = Journal_Register( Error_Type, self->type );

	Journal_DPrintf( self->debug, "In %s(): for ParticleLayout \"%s\" (of type %s):\n",
		__func__, self->name, self->type );
	Stream_IndentBranch( Swarm_Debug );

	Journal_DPrintf( self->debug, "For each of the %u total global requested particles, "
		"generating a particle, and checking if it's in this processor's domain. If so, "
		"adding it to the appropriate local cell.\n", self->totalInitialParticles );
	Stream_IndentBranch( Swarm_Debug );

	while( newParticle_I < self->totalInitialParticles ) {
		
		particle = (GlobalParticle*)Swarm_ParticleAt( swarm, lParticle_I );
		GlobalParticleLayout_InitialiseParticle( self, swarm, newParticle_I, particle );
		/* Work out which cell the new particle is in */
		/* First specify the particle doesn't have an owning cell yet, so as
		not to confuse the search algorithm if its an irregular cell/mesh layout */
		particle->owningCell = swarm->cellDomainCount;

		cell_I = CellLayout_CellOf( swarm->cellLayout, particle );

		/* If we found a further particle inside our domain, add it to a cell */
		if ( cell_I < swarm->cellLocalCount ) {
			Journal_DPrintfL( self->debug, 3, "global particle %u at (%.2f,%.2f,%.2f) inside local cell %u\n"
				"adding it to cell and saving it as local particle %u.\n",
				newParticle_I, particle->coord[0], particle->coord[1], particle->coord[2],
				cell_I, lParticle_I );
				
			Stream_IndentBranch( Swarm_Debug );
			/* Add it to that cell */
			Swarm_AddParticleToCell( swarm, cell_I, lParticle_I );
			lParticle_I++;
			swarm->particleLocalCount++;
			Swarm_Realloc( swarm );
			Stream_UnIndentBranch( Swarm_Debug );
		}
		else {
			Journal_DPrintfL( self->debug, 4, "global particle %u at (%.2f,%.2f,%.2f) outside this proc's domain:\n"
				"ignoring.\n", newParticle_I, particle->coord[0], particle->coord[1], particle->coord[2] );
		}		
				
		newParticle_I++;
	}

	Stream_UnIndentBranch( Swarm_Debug );

	/* Do a test to make sure that the total particles assigned across all processors ==
		totalInitialParticles count */
	MPI_Allreduce( &swarm->particleLocalCount, &globalParticlesInitialisedCount, 1, MPI_UNSIGNED, MPI_SUM, swarm->comm );
	Journal_Firewall( globalParticlesInitialisedCount == self->totalInitialParticles, errorStream,
		"Error - in %s() - for GlobalParticleLayout \"%s\", of type %s: after initialising particles, "
		"actual global count of particles initialised was %u, whereas requested global total "
		"totalInitialParticles was %u. If actual is < requested, it means some particles were not "
		"identified by any processor as inside their domain. If actual > requested, it means that "
		"some particles were identified by _multiple_ processors as belonging to their domain. Both "
		"these states are erroneous.\n",
		__func__, self->name, self->type, globalParticlesInitialisedCount, self->totalInitialParticles );

	Stream_UnIndentBranch( Swarm_Debug );
}
Exemplo n.º 11
0
void ForceVector_GlobalAssembly_General( void* forceVector ) {
	ForceVector*            self                 = (ForceVector*) forceVector;
	FeVariable*             feVar                = self->feVariable;
	Element_LocalIndex      element_lI;
	Element_LocalIndex      elementLocalCount;
	Node_ElementLocalIndex  nodeCountCurrElement = 0;
	Element_Nodes           nodeIdsInCurrElement = 0;
	Dof_Index               totalDofsThisElement = 0;
	Dof_Index               totalDofsPrevElement = 0;
	Dof_Index               dofCountLastNode     = 0;
	Dof_EquationNumber**    elementLM            = NULL;
	double*                 elForceVecToAdd      = NULL;
	/* For output printing */
	double                  outputPercentage=10;	/* Controls how often to give a status update of assembly progress */
	int                     outputInterval;

	Journal_DPrintf( self->debug, "In %s - for vector \"%s\"\n", __func__, self->name );
	
	Stream_IndentBranch( StgFEM_Debug );
	
	if ( Stg_ObjectList_Count( self->forceTermList ) > 0 ) {
		elementLocalCount = FeMesh_GetElementLocalSize( feVar->feMesh );

		/* Initialise Vector */
		outputInterval = (int)( (outputPercentage/100.0)*(double)(elementLocalCount) );
		if( outputInterval == 0 ) { outputInterval = elementLocalCount; }
	
		for( element_lI = 0; element_lI < elementLocalCount; element_lI++ ) {  
			unsigned	nInc, *inc;
		
			FeMesh_GetElementNodes( feVar->feMesh, element_lI, self->inc );
			nInc = IArray_GetSize( self->inc );
			inc = IArray_GetPtr( self->inc );
			nodeCountCurrElement = nInc;
			/* Get the local node ids */
			nodeIdsInCurrElement = inc;

			/* Set value of elementLM: will automatically just index into global LM table if built */
			elementLM = FeEquationNumber_BuildOneElementLocationMatrix( feVar->eqNum, element_lI );

			/* work out number of dofs at the node, using LM */
			/* Since: Number of entries in LM table for this element = (by defn.) Number of dofs this element */
			dofCountLastNode = feVar->dofLayout->dofCounts[nodeIdsInCurrElement[nodeCountCurrElement-1]]; 
			totalDofsThisElement = &elementLM[nodeCountCurrElement-1][dofCountLastNode-1] - &elementLM[0][0] + 1;

			if ( totalDofsThisElement > totalDofsPrevElement ) {
				if (elForceVecToAdd) Memory_Free( elForceVecToAdd );
				Journal_DPrintfL( self->debug, 2, "Reallocating elForceVecToAdd to size %d\n", totalDofsThisElement );
				elForceVecToAdd = Memory_Alloc_Array( double, totalDofsThisElement, "elForceVecToAdd" );
			}

			/* Initialise Values to Zero */
			memset( elForceVecToAdd, 0, totalDofsThisElement * sizeof(double) );
		
			/* Assemble this element's element force vector: going through each force term in list */
			ForceVector_AssembleElement( self, element_lI, elForceVecToAdd );


	        /* When keeping BCs in we come across a bit of a problem in parallel. We're not
	           allowed to add entries to the force vector here and then clobber it later with
	           an insert in order to set the BC. So, what we'll do is just add zero here, that
	           way later we can add the BC and it will be the same as inserting it.
	           --- Luke, 20 May 2008 */
	        if( !self->feVariable->eqNum->removeBCs ) {
	           DofLayout* dofs;
	           int nDofs, curInd;
	           int ii, jj;

	           dofs = self->feVariable->dofLayout; /* shortcut to the dof layout */
	           curInd = 0; /* need a counter to track where we are in the element force vector */
	           for( ii = 0; ii < nodeCountCurrElement; ii++ ) {
	              nDofs = dofs->dofCounts[inc[ii]]; /* number of dofs on this node */
	              for( jj = 0; jj < nDofs; jj++ ) {
	                 if( !FeVariable_IsBC( self->feVariable, inc[ii], jj ) ) {
	                    curInd++;
	                    continue; /* only need to clear it if it's a bc */
	                 }
	                 elForceVecToAdd[curInd] = 0.0;
	                 curInd++;
	              }
	           }
	        }

			/* Ok, assemble into global matrix */
			//Vector_AddEntries( self->vector, totalDofsThisElement, (Index*)(elementLM[0]), elForceVecToAdd );
			VecSetValues( self->vector, totalDofsThisElement, (PetscInt*)elementLM[0], elForceVecToAdd, ADD_VALUES );

#if DEBUG
			if( element_lI % outputInterval == 0 ) {
				Journal_DPrintfL( self->debug, 2, "done %d percent of global force vector assembly (general) \n",
						  (int)(100.0*((double)element_lI/(double)elementLocalCount)) );
			}
#endif

			/* Cleanup: If we haven't built the big LM for all elements, free the temporary one */
			if ( False == feVar->eqNum->locationMatrixBuilt ) {
				Memory_Free( elementLM );
			}
			totalDofsPrevElement = totalDofsThisElement;
		}

		Memory_Free( elForceVecToAdd );
	}
Particle_Index* ParticleMovementHandler_MergeListsOfUnfilledParticleSlots( ParticleCommHandler* self )
{
	Particle_Index*		mergedLeavingParticleArray = NULL;
	Particle_Index		slotsToFillTotalCount = 0;
	Index			currMergedLeavingParticleEntry = 0;
	Index			lowestUnmergedLeavingViaShadow = self->currShadowParticleLeavingMeIndex;
	Index			lowestUnmergedLeavingDomain = self->currParticleLeavingMyDomainIndex;
	Particle_Index		indexOfLowestUnmergedLeavingDomain = 0;
	Index*			lowestUnmergedLeavingEntryToUpdatePtr = NULL;
	Particle_Index		candidateMergeParticle = 0;

	Journal_DPrintfL( self->debug, 1, "In %s():\n", __func__ );
	Stream_Indent( self->debug );

	slotsToFillTotalCount = self->particlesOutsideDomainUnfilledCount + self->shadowParticlesLeavingMeUnfilledCount;
	mergedLeavingParticleArray = Memory_Alloc_Array( Particle_Index, slotsToFillTotalCount, "mergedLeavingParticlesArray" );

	while ( currMergedLeavingParticleEntry < slotsToFillTotalCount ) {
		/* Need to initialise this to the max particle count every loop, in case the first condition is false,
		so the 2nd will always hit it. */
		candidateMergeParticle = self->swarm->particleLocalCount;
		 
		if ( lowestUnmergedLeavingViaShadow < self->shadowParticlesLeavingMeTotalCount ) {
			candidateMergeParticle = self->shadowParticlesLeavingMeIndices[lowestUnmergedLeavingViaShadow];
			lowestUnmergedLeavingEntryToUpdatePtr = &lowestUnmergedLeavingViaShadow;
		}
		if ( lowestUnmergedLeavingDomain < self->particlesOutsideDomainTotalCount ) {
			indexOfLowestUnmergedLeavingDomain = self->particlesOutsideDomainIndices[lowestUnmergedLeavingDomain];

			if ( indexOfLowestUnmergedLeavingDomain < candidateMergeParticle ) { 
				candidateMergeParticle = indexOfLowestUnmergedLeavingDomain;
				lowestUnmergedLeavingEntryToUpdatePtr = &lowestUnmergedLeavingDomain;
			}	
		}
		
		mergedLeavingParticleArray[currMergedLeavingParticleEntry++] = candidateMergeParticle;
		(*lowestUnmergedLeavingEntryToUpdatePtr)++;

		#if DEBUG
		Journal_Firewall( lowestUnmergedLeavingViaShadow <= self->shadowParticlesLeavingMeTotalCount,
			Swarm_Error, "Error: merging of unfilled particle lists stuffed up.\n" );
		Journal_Firewall( lowestUnmergedLeavingDomain <= self->particlesOutsideDomainTotalCount,
			Swarm_Error, "Error: merging of unfilled particle lists stuffed up.\n" );
		#endif
	}

	#if DEBUG
	if ( Stream_IsPrintableLevel( self->debug, 2 ) ) {
		Journal_DPrintf( self->debug, "Merged list of particles leaving proc:\n\t{" );
		for ( currMergedLeavingParticleEntry=0; currMergedLeavingParticleEntry < slotsToFillTotalCount;
			currMergedLeavingParticleEntry++ ) 
		{
			Journal_DPrintf( self->debug, "%d, ",
				mergedLeavingParticleArray[currMergedLeavingParticleEntry] );
		}
		Journal_DPrintf( self->debug, "}\n" );
	}
	#endif
	
	Stream_UnIndent( self->debug );

	return mergedLeavingParticleArray;
}
void ParticleMovementHandler_FillRemainingHolesInLocalParticlesArray( ParticleCommHandler* self )
{
	Particle_Index		prevParticlesArraySize = self->swarm->particlesArraySize;
	Particle_Index		numHolesToFill;
	Particle_InCellIndex	cParticle_I;
	StandardParticle*	oldPtrToMovedParticle;
	Cell_LocalIndex		owningCell;
	Particle_Index		indexToInsertAt;
	Particle_Index*		leavingParticlesArray = NULL;
	Index			currLeavingParticleArrayEntry = 0;
	Index			highestLeavingParticleArrayEntry;
	Index			leavingParticlesUnfilledCount = 0;
	Particle_Index		highestLeavingParticleIndex;
	Particle_Index		candidateParticleToMove;
	Bool			finishedFlag = False;
	Bool			mergedArrayCreated = False;
	Particle_Index		prevParticleCount = self->swarm->particleLocalCount;
	
	Journal_DPrintf( self->debug, "In %s():\n", __func__ );
	Stream_IndentBranch( Swarm_Debug );

	numHolesToFill = self->particlesOutsideDomainUnfilledCount + self->shadowParticlesLeavingMeUnfilledCount;
	if ( numHolesToFill == 0 ) {
		Journal_DPrintfL( self->debug, 2, "No holes to fill -> nothing to do, returning.\n" );
		Stream_UnIndentBranch( Swarm_Debug );
		return;
	}
	#if DEBUG
	if ( Stream_IsPrintableLevel( self->debug, 2 ) ) {
		ParticleMovementHandler_PrintParticleSlotsYetToFill( self );
	}	
	#endif

	/* work out the list we have to iterate over: */
	if ( self->shadowParticlesLeavingMeUnfilledCount && !self->particlesOutsideDomainUnfilledCount ) {
		Journal_DPrintfL( self->debug, 2, "Particles have only left via shadow cells -> no need to merge lists\n" );
		leavingParticlesArray = &self->shadowParticlesLeavingMeIndices[self->currShadowParticleLeavingMeIndex];
	}
	else if ( self->particlesOutsideDomainUnfilledCount && !self->shadowParticlesLeavingMeUnfilledCount ) {
		Journal_DPrintfL( self->debug, 2, "Particles have only left domain directly -> no need to merge lists\n" );
		leavingParticlesArray = &self->particlesOutsideDomainIndices[self->currParticleLeavingMyDomainIndex];
	} 
	else {
		Journal_DPrintfL( self->debug, 2, "Particles have left both via shadow cells and domain directly -> merge lists\n" );
		leavingParticlesArray = ParticleMovementHandler_MergeListsOfUnfilledParticleSlots( self );
		mergedArrayCreated = True;
	}

	/* Ok: while there are holes left to fill, find the highest candidate, move it, and reduce the count. */
	Journal_DPrintfL( self->debug, 2, "Starting run through the %d particles to fill:\n", numHolesToFill );
	Stream_IndentBranch( Swarm_Debug );

	currLeavingParticleArrayEntry = 0;
	highestLeavingParticleArrayEntry = numHolesToFill-1;
	leavingParticlesUnfilledCount = numHolesToFill;

	while ( leavingParticlesUnfilledCount > 0 ) {

		indexToInsertAt = leavingParticlesArray[currLeavingParticleArrayEntry];
		Journal_DPrintfL( self->debug, 3, "Attempting to fill leaving slot %d (at particle index %d):\n",
			currLeavingParticleArrayEntry, indexToInsertAt );

		Stream_Indent( self->debug );

		/* This is where we work out the index of which particle to move into the free slot.
		 * We Start from the end of the particles array, then decrement by 1 until we find a candidate that
		 * hasn't itself already left. 
		 * We also need to consider the possibility that every candidate higher than the current index
		 * has also left, in which case we are done, and finish the while process. 
		 * See the ParticleCommHandler Twiki page for diagrams illustrating this algorithm.
		 */

		candidateParticleToMove = self->swarm->particleLocalCount-1;
		highestLeavingParticleIndex = leavingParticlesArray[highestLeavingParticleArrayEntry];
		Journal_DPrintfL( self->debug, 3, "Searching for highest particle that hasn't also moved:\n" );
		Stream_Indent( self->debug );	

		while ( candidateParticleToMove == leavingParticlesArray[highestLeavingParticleArrayEntry] ) {
			/* Check if that was the last candidate particle above the current one: */
			/* This test needs to be at the top of this loop to handle the case where we have one
			particle that's leaving */

			if ( candidateParticleToMove <= indexToInsertAt ) {
				Journal_DPrintfL( self->debug, 3, "** No more particles above current "
					"hole %d to fill: we're done. **\n", indexToInsertAt );
				/* Need the line below to mark the fact we failed to fill the current indexToInsertAt hole */
				self->swarm->particleLocalCount--;
				finishedFlag = True;
				break;
			}

			Journal_DPrintfL( self->debug, 3, "Candidate particle %d has also left...\n",
				candidateParticleToMove );

			highestLeavingParticleArrayEntry--;
			highestLeavingParticleIndex = leavingParticlesArray[highestLeavingParticleArrayEntry];
			leavingParticlesUnfilledCount--;
			self->swarm->particleLocalCount--;
			candidateParticleToMove--;
		}	
		Stream_UnIndent( self->debug );	

		if ( True == finishedFlag ) {
			/* We must have hit the "no more candidate particles" criterion in the search loop, so
			 * quit trying to fill empty holes entirely. */
			Stream_UnIndent( self->debug );
			break;
		}

		Journal_DPrintfL( self->debug, 3, "Highest valid particle found at index %d:\n",
			candidateParticleToMove );
		Journal_DFirewall( (candidateParticleToMove > indexToInsertAt), Swarm_Error,
			"Error in %s: Empty hole filling\nalgorithm has stuffed up somehow,"
			" since particle to be moved %d is <= slot to insert into %d.\n",
			__func__, candidateParticleToMove, indexToInsertAt );
		Stream_Indent( self->debug );	

		Journal_DPrintfL( self->debug, 3, "Copying particle data from %d to %d\n",
			candidateParticleToMove, indexToInsertAt );
		Swarm_CopyParticleWithinSwarm( self->swarm, indexToInsertAt, candidateParticleToMove );

		/* update the cell that the moved particle lives in to have the correct index into the
		 * particle array for it. */
		oldPtrToMovedParticle = Swarm_ParticleAt( self->swarm, candidateParticleToMove );
		owningCell = oldPtrToMovedParticle->owningCell;
		cParticle_I = Swarm_GetParticleIndexWithinCell( self->swarm, owningCell, candidateParticleToMove );
		Journal_DPrintfL( self->debug, 3, "Updating owning cell: (Cell %d, PIC index %d) now -> p.i. %d\n",
			owningCell, cParticle_I, indexToInsertAt );
		self->swarm->cellParticleTbl[owningCell][cParticle_I] = indexToInsertAt;


		Stream_UnIndent( self->debug );	

		/* update the counters/indices */
		currLeavingParticleArrayEntry++;
		leavingParticlesUnfilledCount--;
		self->swarm->particleLocalCount--;

		Stream_UnIndent( self->debug );
	}
	Stream_UnIndentBranch( Swarm_Debug );

	/* we only need to free the array of leaving particle slots if its a new merged list */
	if ( mergedArrayCreated == True ) {
		Memory_Free( leavingParticlesArray );
	}

	/* ------------------------- */
	Journal_DPrintfL( self->debug, 2, "Local particle count reduced from %d to %d\n", prevParticleCount,
		self->swarm->particleLocalCount );

	/* Update the memory allocated to the particles array if particle count has reduced significantly */
	while ( self->swarm->particlesArraySize > self->swarm->particleLocalCount + self->swarm->particlesArrayDelta ) {
		self->swarm->particlesArraySize -= self->swarm->particlesArrayDelta;
	}
	if ( self->swarm->particlesArraySize < prevParticlesArraySize ) {
		Journal_DPrintfL( self->debug, 2, "Reducing particles array entries from %d to %d\n",
			prevParticlesArraySize, self->swarm->particlesArraySize );
		Swarm_Realloc( self->swarm );
		/*
		self->swarm->particles = Memory_Realloc_Array_Bytes(
			self->swarm->particles,
			self->swarm->particleExtensionMgr->finalSize,
			self->swarm->particlesArraySize );
		*/
	}	

	Stream_UnIndentBranch( Swarm_Debug );
}
Exemplo n.º 14
0
void StgFEM_GMG_SolverSetup( void* _solver, void* _stokesSLE ) {
    StgFEM_GMG* self = StgFEM_GMG_selfPointer;
    Stokes_SLE_UzawaSolver* solver = (Stokes_SLE_UzawaSolver*)_solver;
    Stokes_SLE* sle = (Stokes_SLE*)_stokesSLE;
    KSP ksp = solver->velSolver;
    PC pc;
    int ii;

    Journal_DPrintf( solver->debug, "In %s:\n", __func__ );
    Stream_IndentBranch( StgFEM_Debug );

    KSPSetType( ksp, KSPFGMRES );
    KSPGetPC( ksp, &pc );
    PCSetType( pc, PCMG );
    PCMGSetLevels( pc, self->numLevels, PETSC_NULL );
    PCMGSetType( pc, PC_MG_MULTIPLICATIVE );
    #if ((PETSC_VERSION_MAJOR==3) && (PETSC_VERSION_MINOR>=2) )
    PCMGSetGalerkin( pc, PETSC_TRUE );
    #else
    PCMGSetGalerkin( pc );
    #endif
    /* Set the operators for each level. */
    {
        Mat *pOps, *rOps;
        MGOpGenerator_SetNumLevels( self->opGen, self->numLevels );
        MGOpGenerator_Generate( self->opGen, &pOps, &rOps );
        for( ii = 1; ii < self->numLevels; ii++ )
            PCMGSetInterpolation( pc, ii, pOps[ii] );
        Memory_Free( pOps );
        Memory_Free( rOps );
    }

    /* Set the solvers on each level. */
    for( ii = 1; ii < self->numLevels; ii++ ) {
        KSP smoother;
        PC smPc;
        PCMGGetSmoother( pc, ii, &smoother );
        KSPSetType( smoother, KSPRICHARDSON );
        KSPGetPC( smoother, &smPc );
        PCSetType( smPc, PCSOR );
        KSPSetTolerances( smoother, PETSC_DEFAULT, PETSC_DEFAULT,
                          PETSC_DEFAULT, 2 );
    }
    

    Stg_KSPSetOperators( ksp, sle->kStiffMat->matrix, sle->kStiffMat->matrix,
                     DIFFERENT_NONZERO_PATTERN );
    KSPSetFromOptions( ksp );

    /* Because we stole the setup routine we need to do this too. */
    if( solver->pcSolver ) {
        Journal_DPrintfL( solver->debug, 1,
                          "Setting up MatrixSolver for the "
                          "Preconditioner.\n" );
        Stg_KSPSetOperators( solver->pcSolver, solver->preconditioner->matrix,
                         solver->preconditioner->matrix,
                         DIFFERENT_NONZERO_PATTERN );
        KSPSetFromOptions( solver->pcSolver );
    }

    Stream_UnIndentBranch( StgFEM_Debug );
}
Exemplo n.º 15
0
void SolutionVector_UpdateSolutionOntoNodes( void* solutionVector ) {
	SolutionVector*		self = (SolutionVector *)solutionVector;
	double*			localSolnVecValues;
	Node_LocalIndex 	lNode_I = 0;
	Dof_Index		currNodeNumDofs;
	Dof_Index		nodeLocalDof_I;
	Partition_Index		ownerProc;
	FeVariable*		feVar = self->feVariable;
	FeMesh*			feMesh = feVar->feMesh;
	MPI_Comm		mpiComm;
	FeEquationNumber*	eqNum = self->eqNum;
	Dof_EquationNumber	currEqNum;
	Index			indexIntoLocalSolnVecValues;
	Index*			reqFromOthersCounts;
	Index*			reqFromOthersSizes;
	RequestInfo**		reqFromOthersInfos;
	Dof_EquationNumber**	reqFromOthers;
	Comm*			comm;
	Partition_Index		nProc;
	Partition_Index		myRank;
	Partition_Index		proc_I;
	double			initialGuessAtNonLocalEqNumsRatio = 0.1;
	double			ratioToIncreaseRequestArraySize = 1.5;
	Index			newReqFromOthersSize;

	Journal_DPrintf( self->debug, "In %s - for \"%s\"\n", __func__, self->name );
	Stream_IndentBranch( StgFEM_Debug );

	#if DEBUG
	if ( Stream_IsPrintableLevel( self->debug, 3 ) ) {
		Journal_DPrintf( self->debug, "Vector data:\n" );
		_SolutionVector_VectorView( self->vector, self->debug );
	}
	#endif

	comm = Mesh_GetCommTopology( feMesh, MT_VERTEX );
	mpiComm = Comm_GetMPIComm( comm );
	MPI_Comm_size( mpiComm, (int*)&nProc );
	MPI_Comm_rank( mpiComm, (int*)&myRank );

	/* allocate arrays for nodes that I want on each processor */
	reqFromOthersCounts = Memory_Alloc_Array( Index, nProc, "reqFromOthersCounts" );
	reqFromOthersSizes = Memory_Alloc_Array( Index, nProc, "reqFromOthersSizes" );
	reqFromOthersInfos = Memory_Alloc_Array( RequestInfo*, nProc, "reqFromOthersInfos" );
	reqFromOthers = Memory_Alloc_Array( Dof_EquationNumber*, nProc, "reqFromOthers" );
	/* Allocate the arrays of req. values from others independently, as we don't know how large they'll be */
	for ( proc_I=0; proc_I < nProc; proc_I++ ) {
		reqFromOthersCounts[proc_I] = 0;

		if (proc_I == myRank) continue;

		/* Our initial guess at number of non-local eqNums is a small ratio of the number of local dofs */
		reqFromOthersSizes[proc_I] = eqNum->localEqNumsOwnedCount * initialGuessAtNonLocalEqNumsRatio;
		/* Special case for really small meshes: make sure it's at least 1 */
		if (0 == reqFromOthersSizes[proc_I] ) {
			reqFromOthersSizes[proc_I]++;
		}
		reqFromOthersInfos[proc_I] = Memory_Alloc_Array( RequestInfo, reqFromOthersSizes[proc_I],
			"reqFromOthersInfos[proc_I]" );
		reqFromOthers[proc_I] = Memory_Alloc_Array( Dof_EquationNumber, reqFromOthersSizes[proc_I],
			"reqFromOthers[proc_I]" );
	}

	/* Get the locally held part of the vector */
	//Vector_GetArray( self->vector, &localSolnVecValues );
	VecGetArray( self->vector, &localSolnVecValues );

	for( lNode_I=0; lNode_I < Mesh_GetLocalSize( feMesh, MT_VERTEX ); lNode_I++ ) {
		currNodeNumDofs = feVar->dofLayout->dofCounts[ lNode_I ];
		Journal_DPrintfL( self->debug, 3, "getting solutions for local node %d, has %d dofs.\n", lNode_I, currNodeNumDofs );

		/* process each dof */
		for ( nodeLocalDof_I = 0; nodeLocalDof_I < currNodeNumDofs; nodeLocalDof_I++ ) {
			Journal_DPrintfL( self->debug, 3, "\tdof %d: ", nodeLocalDof_I );

			currEqNum = eqNum->mapNodeDof2Eq[lNode_I][nodeLocalDof_I];
			if( currEqNum != -1 ) {
				Journal_DPrintfL( self->debug, 3, "is unconstrained, eqNum %d:", currEqNum );

				if( STreeMap_HasKey( eqNum->ownedMap, &currEqNum ) ) {
					indexIntoLocalSolnVecValues = *(int*)STreeMap_Map( eqNum->ownedMap, &currEqNum );
					Journal_DPrintfL( self->debug, 3, "local -> just copying value %f\n",
						localSolnVecValues[indexIntoLocalSolnVecValues] );
					DofLayout_SetValueDouble( feVar->dofLayout, lNode_I, nodeLocalDof_I,
						localSolnVecValues[indexIntoLocalSolnVecValues] );
				}
				else {
					RequestInfo*	requestInfo;

					Journal_DPrintfL( self->debug, 3, "nonlocal -> add to req list " );
					ownerProc = FeEquationNumber_CalculateOwningProcessorOfEqNum( eqNum, currEqNum );
					Journal_DPrintfL( self->debug, 3, "from proc %d\n", ownerProc );
					/* first check count & realloc if necessary */
					if (reqFromOthersCounts[ownerProc] == reqFromOthersSizes[ownerProc] ) {
						newReqFromOthersSize = reqFromOthersSizes[ownerProc] * ratioToIncreaseRequestArraySize;
						if ( newReqFromOthersSize == reqFromOthersSizes[ownerProc] ) {
							/* Special case: always increase by at least 1 */
							newReqFromOthersSize++;
						}
						reqFromOthersSizes[ownerProc] = newReqFromOthersSize;

						Journal_DPrintfL( self->debug, 3, "req list from proc %d count %d now "
							"equal to size, so reallocing to size %d\n",
							ownerProc, reqFromOthersCounts[ownerProc],
							reqFromOthersSizes[ownerProc] );

						reqFromOthersInfos[ownerProc] = Memory_Realloc_Array(
							reqFromOthersInfos[ownerProc], RequestInfo, reqFromOthersSizes[ownerProc] );
						reqFromOthers[ownerProc] = Memory_Realloc_Array(
							reqFromOthers[ownerProc], Dof_EquationNumber, reqFromOthersSizes[ownerProc] );
					}
					requestInfo = &reqFromOthersInfos[ownerProc][ reqFromOthersCounts[ownerProc] ];
					requestInfo->lNode_I = lNode_I;
					requestInfo->nodeLocalDof_I = nodeLocalDof_I;
					reqFromOthers[ownerProc][reqFromOthersCounts[ownerProc]] = currEqNum;
					(reqFromOthersCounts[ownerProc])++;
				}
			}
			else {
				Journal_DPrintfL( self->debug, 3, "is a BC, so skipping...\n" );
			}
		}
	}

	if ( nProc > 1 ) {
		_SolutionVector_ShareValuesNotStoredLocally( self, reqFromOthersCounts, reqFromOthersInfos, reqFromOthers,
			localSolnVecValues );
	}

	for ( proc_I=0; proc_I < nProc; proc_I++ ) {
		if (proc_I == myRank) continue;
		Memory_Free( reqFromOthers[proc_I] );
		Memory_Free( reqFromOthersInfos[proc_I] );
	}
	Memory_Free( reqFromOthers );
	Memory_Free( reqFromOthersInfos );
	Memory_Free( reqFromOthersCounts );
	Memory_Free( reqFromOthersSizes );

	//Vector_RestoreArray( self->vector, &localSolnVecValues );
	VecRestoreArray( self->vector, &localSolnVecValues );

	/*
	** Syncronise the FEVariable in question.
	*/

	FeVariable_SyncShadowValues( feVar );

	Stream_UnIndentBranch( StgFEM_Debug );
}
Exemplo n.º 16
0
void _SolutionVector_ShareValuesNotStoredLocally(
	SolutionVector*		self,
	Index*			reqFromOthersCounts,
	RequestInfo**		reqFromOthersInfos,
	Dof_EquationNumber**	reqFromOthers,
	double*			localSolnVecValues )
{

	FeVariable*		feVar = self->feVariable;
	FeMesh*			feMesh = feVar->feMesh;
	FeEquationNumber*	eqNum = self->eqNum;
	Comm*			comm;
	MPI_Comm		mpiComm;
	Partition_Index		nProc;
	Partition_Index		myRank;
	Partition_Index		proc_I;
	Index			req_I;
	Index			indexIntoLocalSolnVecValues;
	MPI_Status		status;
	Index*			reqFromMeCounts;
	Dof_EquationNumber**	reqFromMe;
	double**		reqValuesFromMe;
	MPI_Request**		reqValuesFromMeHandles;
	MPI_Request**		reqFromOthersHandles;
	double**		reqValuesFromOthers;
	MPI_Request**		reqValuesFromOthersHandles;
	Bool*			reqValuesFromOthersReceived;
	Partition_Index	     reqValueSetsFromOthersNotYetReceivedCount;
	Dof_EquationNumber   totalRequestedFromOthers = 0;
	Dof_EquationNumber   totalRequestedFromMe = 0;
   int ierr;

	Journal_DPrintf( self->debug, "In %s - for \"%s\"\n", __func__, self->name );
	Stream_IndentBranch( StgFEM_Debug );

	comm = Mesh_GetCommTopology( feMesh, MT_VERTEX );
	mpiComm = Comm_GetMPIComm( comm );
	MPI_Comm_size( mpiComm, (int*)&nProc );
	MPI_Comm_rank( mpiComm, (int*)&myRank );

	reqFromMeCounts = Memory_Alloc_Array( Index, nProc, "reqFromMeCounts" );
	reqFromOthersHandles = Memory_Alloc_Array_Unnamed( MPI_Request*, nProc );
	reqValuesFromOthersHandles = Memory_Alloc_Array_Unnamed( MPI_Request*, nProc );
	reqValuesFromMeHandles = Memory_Alloc_Array_Unnamed( MPI_Request*, nProc );
	reqValuesFromOthers = Memory_Alloc_2DComplex( double, nProc, reqFromOthersCounts, "reqValuesFromOthers" );
	reqValuesFromOthersReceived = Memory_Alloc_Array_Unnamed( Bool, nProc );

	#if DEBUG
	if ( Stream_IsPrintableLevel( self->debug, 2 ) ) {
		Journal_DPrintf( self->debug, "Final list of vec values I need from other procs:\n" );
		for ( proc_I=0; proc_I < nProc; proc_I++ ) {
			if ( proc_I == myRank ) continue;
			Journal_DPrintf( self->debug, "\t%d[0-%d]: ", proc_I, reqFromOthersCounts[proc_I] );
			for ( req_I=0; req_I < reqFromOthersCounts[proc_I]; req_I++ ) {
				RequestInfo* reqInfo = &reqFromOthersInfos[proc_I][req_I];
				Journal_DPrintf( self->debug, "(lnode %d, dof %d -> %d ), ",
					reqInfo->lNode_I, reqInfo->nodeLocalDof_I,
					reqFromOthers[proc_I][req_I] );
			}
			Journal_DPrintf( self->debug, "\n" );
		}
	}
	#endif

	/* send out my request counts, receive the req. counts others want from me */
	MPI_Alltoall( reqFromOthersCounts, 1, MPI_UNSIGNED,
		      reqFromMeCounts, 1, MPI_UNSIGNED, mpiComm );

	Journal_DPrintf( self->debug, "After MPI_Alltoall- counts are:\n" );
	totalRequestedFromOthers = 0;
	totalRequestedFromMe = 0;
	Stream_Indent( self->debug );
	Journal_DPrintf( self->debug, "reqFromOthersCounts: " );
	for ( proc_I=0; proc_I < nProc; proc_I++ ) {
		if ( proc_I == myRank ) continue;
		Journal_DPrintf( self->debug, "\tp%d:%d, ", proc_I, reqFromOthersCounts[proc_I] );
		totalRequestedFromOthers += reqFromOthersCounts[proc_I];
	}
	Journal_DPrintf( self->debug, "\n" );
	Journal_DPrintf( self->debug, "reqFromMeCounts: " );
	for ( proc_I=0; proc_I < nProc; proc_I++ ) {
		if ( proc_I == myRank ) continue;
		Journal_DPrintf( self->debug, "\tp%d:%d, ", proc_I, reqFromMeCounts[proc_I] );
		totalRequestedFromMe += reqFromMeCounts[proc_I];
	}
	Journal_DPrintf( self->debug, "\n" );
	Stream_UnIndent( self->debug );

	if ( ( totalRequestedFromOthers == 0) && (totalRequestedFromMe == 0) )
	{
		Journal_DPrintf( self->debug, "No vector values either required from others or "
			"required by others from me, therefore cleaning up memory and returning.\n" );
		Memory_Free( reqFromMeCounts );
		Memory_Free( reqFromOthersHandles );
		Memory_Free( reqValuesFromOthersHandles );
		Memory_Free( reqValuesFromMeHandles );
		Memory_Free( reqValuesFromOthers );
		Memory_Free( reqValuesFromOthersReceived );
		Stream_UnIndentBranch( StgFEM_Debug );
		return;
	}

	Journal_DPrintfL( self->debug, 2, "Starting non-blocking sends of my lists of vector entry indices I want from others:\n" );
	Stream_Indent( self->debug );
	for( proc_I=0; proc_I < nProc; proc_I++) {
		if ( proc_I == myRank ) continue;
/* Journal_Printf( Journal_Register( Info_Type, (Name)"mpi"  ),  "!!! line %d, proc_I %d: count = %u\n", __LINE__, proc_I, reqFromOthersCounts[proc_I] ); */
		if ( reqFromOthersCounts[proc_I] > 0 ) {
			Journal_DPrintfL( self->debug, 2, "Sending to proc %d the list of %d vector entry indices I want from it:\n"
				"\t(tracking via reqFromOthersHandles[%d], tag %d)\n", proc_I,
				reqFromOthersCounts[proc_I], proc_I, VALUE_REQUEST_TAG );

			reqFromOthersHandles[proc_I] = Memory_Alloc_Unnamed( MPI_Request );
			ierr=MPI_Isend( reqFromOthers[proc_I], reqFromOthersCounts[proc_I], MPI_UNSIGNED,
				proc_I, VALUE_REQUEST_TAG, mpiComm, reqFromOthersHandles[proc_I] );
		}
	}
	Stream_UnIndent( self->debug );


	Journal_DPrintfL( self->debug, 2, "Starting non-blocking receive of the vector entries I want from others:\n" );
	Stream_Indent( self->debug );
	for( proc_I=0; proc_I < nProc; proc_I++) {
		if ( proc_I == myRank ) continue;
		if ( reqFromOthersCounts[proc_I] > 0 ) {
			Journal_DPrintfL( self->debug, 2, "Posting recv reqst from proc %d for the %d vector entries I want from it:\n"
				"\t(tracking via reqValuesFromOthersHandles[%d], tag %d)\n", proc_I,
				reqFromOthersCounts[proc_I], proc_I, VALUE_TAG );
			reqValuesFromOthersHandles[proc_I] = Memory_Alloc_Unnamed( MPI_Request );
			ierr=MPI_Irecv( reqValuesFromOthers[proc_I], reqFromOthersCounts[proc_I], MPI_DOUBLE,
				proc_I, VALUE_TAG, mpiComm, reqValuesFromOthersHandles[proc_I] );
		}
	}
	Stream_UnIndent( self->debug );

	Journal_DPrintfL( self->debug, 2, "Starting blocking receive of the lists of vector entry indices "
		"others want from me:\n" );
	Stream_Indent( self->debug );
	reqFromMe = Memory_Alloc_2DComplex( Dof_EquationNumber, nProc, reqFromMeCounts, "reqFromMe" );
	reqValuesFromMe = Memory_Alloc_2DComplex( double, nProc, reqFromMeCounts, "reqValuesFromMe" );
	for( proc_I=0; proc_I < nProc; proc_I++) {
		if ( proc_I == myRank ) continue;
/* /Journal_Printf( Journal_Register( Info_Type, (Name)"mpi"  ),  "!!! line %d, proc_I %d: count = %u\n", __LINE__, proc_I, reqFromMeCounts[proc_I] ); */
		if ( reqFromMeCounts[proc_I] > 0 ) {
			ierr=MPI_Recv( reqFromMe[proc_I], reqFromMeCounts[proc_I], MPI_UNSIGNED,
				proc_I, VALUE_REQUEST_TAG, mpiComm, &status );
			Journal_DPrintfL( self->debug, 3, "Received a list of %u requested vector entry indices from proc %u, "
				"with tag %d\n", reqFromMeCounts[proc_I], proc_I, status.MPI_TAG );
		}
	}
	Stream_UnIndent( self->debug );

	#if DEBUG
	if ( Stream_IsPrintableLevel( self->debug, 2 ) ) {
		Journal_DPrintf( self->debug, "Final lists of vector entry indices other procs want from me are:\n" );
		Stream_Indent( self->debug );
		for ( proc_I=0; proc_I < nProc; proc_I++ ) {
			if ( proc_I == myRank ) continue;
			if ( reqFromMeCounts[proc_I] > 0 ) {
				Journal_DPrintf( self->debug, "%d[0-%d]: ", proc_I, reqFromMeCounts[proc_I] );
				for ( req_I=0; req_I < reqFromMeCounts[proc_I]; req_I++ ) {
					Journal_DPrintf( self->debug, "(eqNum %d), ", reqFromMe[proc_I][req_I] );
				}
				Journal_DPrintf( self->debug, "\n" );
			}
		}
		Stream_UnIndent( self->debug );
	}
	#endif

	/* for all those requested from me, non-blocking send out values */
	Journal_DPrintfL( self->debug, 2, "Beginning non-blocking send out of vector entry lists requested by others:\n" );
	Stream_Indent( self->debug );
	for( proc_I=0; proc_I < nProc; proc_I++) {
		if ( proc_I == myRank ) continue;
		if ( reqFromMeCounts[proc_I] > 0 ) {
			Journal_DPrintfL( self->debug, 3, "list to proc %d is: ", proc_I );
			for ( req_I=0; req_I < reqFromMeCounts[proc_I]; req_I++ ) {
				/* look up and fill in correct value in array */
				indexIntoLocalSolnVecValues = *(int*)STreeMap_Map( eqNum->ownedMap,
										   reqFromMe[proc_I] + req_I );
				reqValuesFromMe[proc_I][req_I] = localSolnVecValues[indexIntoLocalSolnVecValues];
				Journal_DPrintfL( self->debug, 3, "%d=%f, ", reqFromMe[proc_I][req_I],
					reqValuesFromMe[proc_I][req_I] );
			}
			Journal_DPrintfL( self->debug, 3, "\n" );
			/* Non-blocking send out the now-complete list to this processor */
			reqValuesFromMeHandles[proc_I] = Memory_Alloc_Unnamed( MPI_Request );
			Journal_DPrintfL( self->debug, 2, "Sending to proc %d the list of %d vector entries they want:\n"
				"\t(tracking via reqValuesFromMe[%d], tag %d)\n", proc_I,
				reqFromMeCounts[proc_I], proc_I, VALUE_TAG );
			ierr=MPI_Isend( reqValuesFromMe[proc_I], reqFromMeCounts[proc_I], MPI_DOUBLE,
				proc_I, VALUE_TAG, mpiComm, reqValuesFromMeHandles[proc_I] );
		}
	}
	Stream_UnIndent( self->debug );

	Journal_DPrintfL( self->debug, 1, "Starting iterative-test receive of the vector entries I "
		"requested from others:\n" );
	/* Set up an array for keeping track of who we've received things from
	 * already */
	reqValueSetsFromOthersNotYetReceivedCount = nProc-1;
	for( proc_I=0; proc_I < nProc; proc_I++) {
		if ( proc_I == myRank ) continue;
		reqValuesFromOthersReceived[proc_I] = False;
		if ( reqFromOthersCounts[proc_I] == 0 ) {
			reqValueSetsFromOthersNotYetReceivedCount--;
		}
	}

	#if DEBUG
	Journal_DPrintfL( self->debug, 2, "(Expecting %d receives from procs: ",
		reqValueSetsFromOthersNotYetReceivedCount );
	for( proc_I=0; proc_I < nProc; proc_I++) {
		if ( proc_I == myRank ) continue;
		if ( reqFromOthersCounts[proc_I] > 0 ) {
			Journal_DPrintfL( self->debug, 2, "%d, ", proc_I );
		}
	}
	Journal_DPrintfL( self->debug, 2, ")\n" );
	#endif

	Stream_Indent( self->debug );
	/* now update the values at nodes that I requested from others, as they come in */
	while ( reqValueSetsFromOthersNotYetReceivedCount ) {
		int flag = 0;

		Journal_DPrintfL( self->debug, 3, "%d sets still to go...\n", reqValueSetsFromOthersNotYetReceivedCount );
		for( proc_I=0; proc_I < nProc; proc_I++) {
			if ( proc_I == myRank ) continue;

			if ( (reqFromOthersCounts[proc_I] > 0) && (False == reqValuesFromOthersReceived[proc_I]) ) {
				MPI_Test( reqValuesFromOthersHandles[proc_I], &flag, &status );
				if ( !flag ) {
					/* No results yet from this proc -> continue to next. */
					continue;
				}
				else {
					RequestInfo* reqInfo;
					Journal_DPrintfL( self->debug, 2, "received some requested "
						"values (using reqValuesFromOthersHandles) from proc %d "
						"(with tag %d, exp %d):", proc_I, status.MPI_TAG, VALUE_TAG );
					/* go through each value received from that proc & update onto node */
					for ( req_I=0; req_I < reqFromOthersCounts[proc_I]; req_I++ ) {
						reqInfo = &reqFromOthersInfos[proc_I][req_I];
						Journal_DPrintfL( self->debug, 3, "(lnode %d, dof %d -> %d )=%f, ",
							reqInfo->lNode_I, reqInfo->nodeLocalDof_I,
							reqFromOthers[proc_I][req_I], reqValuesFromOthers[proc_I][req_I] );
						DofLayout_SetValueDouble( feVar->dofLayout, reqInfo->lNode_I, reqInfo->nodeLocalDof_I,
							reqValuesFromOthers[proc_I][req_I] );
					}
					Journal_DPrintfL( self->debug, 2, "\n" );
					reqValuesFromOthersReceived[proc_I] = True;
					reqValueSetsFromOthersNotYetReceivedCount--;
					Memory_Free( reqValuesFromOthersHandles[proc_I] );
				}
			}
		}
	}
	Stream_UnIndent( self->debug );

	/* MPI_Wait to be sure all sends to others have completed */
	Journal_DPrintfL( self->debug, 2, "Making sure all comms of this function finished:...\n" );
	Stream_Indent( self->debug );

	Journal_DPrintfL( self->debug, 2, "Confirming completion of my sends of "
		"vector entry index lists I wanted from others were received:\n" );
	Stream_Indent( self->debug );
	for( proc_I=0; proc_I < nProc; proc_I++) {
		if ( proc_I == myRank ) continue;
		if ( reqFromOthersCounts[proc_I] > 0 ) {
			ierr=MPI_Wait( reqFromOthersHandles[proc_I], MPI_STATUS_IGNORE );
			Journal_DPrintfL( self->debug, 2, "Confirmed wait on reqFromOthersHandles[%u]"
				"\n", proc_I );
			Memory_Free( reqFromOthersHandles[proc_I] );
		}
	}
	Stream_UnIndent( self->debug );
	Journal_DPrintfL( self->debug, 2, "done.\n" );

	Journal_DPrintfL( self->debug, 2, "Confirming completion of my sends of "
		"vector entry values requested by others were received:\n" );
	Stream_Indent( self->debug );
	for( proc_I=0; proc_I < nProc; proc_I++) {
		if ( proc_I == myRank ) continue;
		if ( reqFromMeCounts[proc_I] > 0 ) {
			ierr=MPI_Wait( reqValuesFromMeHandles[proc_I], MPI_STATUS_IGNORE );
			Journal_DPrintfL( self->debug, 2, "Confirmed wait on reqValuesFromMeHandles[%u]"
				"\n", proc_I );
			Memory_Free( reqValuesFromMeHandles[proc_I] );
		}
	}
	Stream_UnIndent( self->debug );
	Journal_DPrintfL( self->debug, 2, "done.\n" );

	Stream_UnIndent( self->debug );
	Journal_DPrintfL( self->debug, 2, "done.\n" );

	Memory_Free( reqFromMeCounts );
	Memory_Free( reqFromMe );
	Memory_Free( reqValuesFromMe );
	Memory_Free( reqValuesFromOthers );
	Memory_Free( reqValuesFromOthersReceived );
	Memory_Free( reqFromOthersHandles );
	Memory_Free( reqValuesFromOthersHandles );
	Memory_Free( reqValuesFromMeHandles );

	Stream_UnIndentBranch( StgFEM_Debug );
	return;
}
void ParticleMovementHandler_HandleParticleMovementBetweenProcs( ParticleCommHandler* pCommsHandler ) {
	ParticleMovementHandler*	self = (ParticleMovementHandler*)pCommsHandler;
	double                  startTime = 0;
	Stream*                 info = Journal_Register( Info_Type, (Name)self->type  );

	Journal_DPrintfL( self->debug, 1, "In %s(), for swarm \"%s\":\n", __func__, self->swarm->name );
	if ( 1 == self->swarm->nProc ) {
		Journal_DPrintfL( self->debug, 1, "Serial run -> nothing to communicate, returning.\n" );
		Stream_UnIndentBranch( Swarm_Debug );
		return;
	}

	Stream_IndentBranch( Swarm_Debug );

	startTime = MPI_Wtime();

	if ( self->swarm->cellShadowCount > 0 ) {
		/* Allocate the recv count arrays and handles */
		self->allocateIncomingCountArrays( (ParticleCommHandler*)self );
		self->allocateOutgoingCountArrays( (ParticleCommHandler*)self );

		/* First thing to do is begin non-blocking receive of incoming particles (for latency hiding) */
		self->beginReceiveOfIncomingParticleCounts( (ParticleCommHandler*)self );

		/* Do a blocking send of outgoing counts, so our nbrs know what to receive */
		self->sendOutgoingParticleCounts( (ParticleCommHandler*)self );

		/* Now need to make sure that incoming particle counts are here, then begin receiving particles
		   (We do this as early as possible for latency hiding purposes */
		self->finishReceiveOfIncomingParticleCounts( (ParticleCommHandler*)self );
		self->allocateIncomingParticleArrays( (ParticleCommHandler*)self );
		self->beginReceiveOfIncomingParticles( (ParticleCommHandler*)self );
			
		/* OK, now begin sending out particles we know need to go to nbrs */
		self->allocateOutgoingParticleArrays( (ParticleCommHandler*)self );
		self->beginSendingParticles( (ParticleCommHandler*)self );
	}
	
	if ( self->useGlobalFallbackCommStrategy ) {
		self->particlesOutsideDomainIndices = Memory_Alloc_Array( Particle_Index, self->swarm->particlesArrayDelta,
			"self->particlesOutsideDomainIndices" );

		ParticleMovementHandler_DoGlobalFallbackCommunication( self );
	}

	if ( self->swarm->cellShadowCount > 0 ) {
		/* Ok, at this point make sure our non-blocking receive of incoming via domain boundary
 		 *  particles have completed: then immediately insert these particles into our local swarm */
		self->finishReceiveOfIncomingParticlesAndUpdateIndices( (ParticleCommHandler*)self );
	}

	/* final update of the 'holes' in my particles list (some may be left if we sent more than we recv'd overall */
	ParticleMovementHandler_FillRemainingHolesInLocalParticlesArray( (ParticleCommHandler*)self );

	/* NB: Can't delete these until here, as they're needed by the "fillRemainingHoles" function above. */
	if ( self->useGlobalFallbackCommStrategy ) {
		Memory_Free( self->particlesOutsideDomainIndices );
		self->particlesOutsideDomainIndices = NULL;
	}

	if ( self->swarm->cellShadowCount > 0 ) {
		self->confirmOutgoingSendsCompleted( (ParticleCommHandler*)self );
	}

	_ParticleCommHandler_PrintCommunicationVolumeStats( (ParticleCommHandler*)self, startTime, info );

	MPI_Barrier( self->swarm->comm );

	/* clean up allocated memory, and zero counters, ready for next timestep */
	if ( self->swarm->cellShadowCount > 0 ) {
		self->freeIncomingArrays( (ParticleCommHandler*)self );
		self->freeOutgoingArrays( (ParticleCommHandler*)self );
	}
	
	_ParticleCommHandler_ZeroShadowCommStrategyCounters( (ParticleCommHandler*)self );
	
	Stream_UnIndentBranch( Swarm_Debug );
}
void _Stokes_SLE_UzawaSolver_Solve( void* solver, void* stokesSLE ) {
	Stokes_SLE_UzawaSolver* self            = (Stokes_SLE_UzawaSolver*)solver;	
	Stokes_SLE*             sle             = (Stokes_SLE*)stokesSLE;
	
	/* Create shortcuts to stuff needed on sle */
	Mat                     K_Mat           = sle->kStiffMat->matrix;
	Mat                     G_Mat           = sle->gStiffMat->matrix;
	Mat                     D_Mat           = NULL;
	Mat                     M_Mat           = NULL;
	Vec                     uVec            = sle->uSolnVec->vector;
	Vec                     qVec            = sle->pSolnVec->vector;
	Vec                     fVec            = sle->fForceVec->vector;
	Vec                     hVec            = sle->hForceVec->vector;
	
	/* Create shortcuts to solver related stuff */
	Vec                     qTempVec        = self->pTempVec;  
	Vec                     rVec            = self->rVec;
	Vec                     sVec            = self->sVec;
	Vec                     fTempVec        = self->fTempVec;
	Vec                     vStarVec        = self->vStarVec; 
	KSP                     velSolver       = self->velSolver;	/*  Inner velocity solver */
	KSP                     pcSolver        = self->pcSolver;   /*  Preconditioner  */

	Iteration_Index         maxIterations   = self->maxUzawaIterations;
	Iteration_Index         minIterations   = self->minUzawaIterations;
	Iteration_Index         iteration_I     = 0;
	Iteration_Index         outputInterval  = 1;
	
	double                  zdotr_current	= 0.0;
	double                  zdotr_previous 	= 1.0;
	double                  sdotGTrans_v;
	double                  alpha, beta;
	double                  absResidual;  
	double                  relResidual;
	double*                 chosenResidual;	  /* We can opt to use either the absolute or relative residual in termination condition */
    	double                  uzawaRhsScale;      
	double                  divU;
	double                  weightedResidual;
	double                  weightedVelocityScale;
	double                  momentumEquationResidual;
	
	Iteration_Index         innerLoopIterations;
	Stream*                 errorStream     = Journal_Register( Error_Type, (Name)Stokes_SLE_UzawaSolver_Type  );
	
	PetscInt		fVecSize, qTempVecSize, uVecSize, qVecSize;
	PetscScalar		fVecNorm, qTempVecNorm, uVecNorm, rVecNorm, fTempVecNorm, uVecNormInf, qVecNorm, qVecNormInf;

	double                  qGlobalProblemScale;
	double                  qReciprocalGlobalProblemScale;
	int			init_info_stream_rank;	
	PetscScalar p_sum;
	/* Bool nullsp_present; */
	Bool uzawa_summary;
	double time,t0,rnorm0;

	PetscTruth     flg;
	double        ksptime;

	VecGetSize( qTempVec, &qTempVecSize );
	qGlobalProblemScale = sqrt( (double) qTempVecSize );
	qReciprocalGlobalProblemScale = 1.0 / qGlobalProblemScale;
	init_info_stream_rank = Stream_GetPrintingRank( self->info );
	Stream_SetPrintingRank( self->info, 0 ); 

	/*	DEFINITIONS:
					See accompanying documentation
					u - the displacement / velocity solution (to which constraints are applied)
					q - the pressure-like variable which constrains the divergence displacement / velocity	(= pressure for incompressible)	
					F - standard FE force vector
					Fhat - Uzawa RHS = K^{-1} G F  - h 
					K - standard FE stiffness matrix
					Khat - Uzawa transformed stiffness matrix = G^T K^{-1} G
					G matrix - discrete gradient operator
					D matrix - discrete divergence operator = G^T for this particular algorithm
					C matrix - Mass matrix (M) for compressibility 
					
		LM & DAM			
	*/

	/* CHOICE OF RESIDUAL: 
					we may opt to converge on the absolute value (self->useAbsoluteTolerance == True ... default)
					or the relative value of the residual (self->useAbsoluteTolerance == False) 
			 		(another possibility would be always to improve the residual by a given tolerance)
					The Moresi & Solomatov (Phys Fluids, 1995) approach is to use the relative tolerance	
	*/ 

	VecNorm( fVec, NORM_2, &fVecNorm );
	VecGetSize( fVec, &fVecSize );
	if ( fVecNorm / sqrt( (double)fVecSize ) <= 1e-99 ) {
		Journal_Printf( errorStream,
			"Error in func %s: The momentum force vector \"%s\" is zero. "
			"The force vector should be non-zero either because of your chosen boundary "
			"conditions, or because of the element force vector assembly. You have %d "
			"element force vectors attached.\n",
			__func__, sle->fForceVec->name, sle->fForceVec->assembleForceVector->hooks->count );
		if ( sle->fForceVec->assembleForceVector->hooks->count > 0 ) {
			Journal_Printf( errorStream, "You used the following force vector assembly terms:\n" );
			EntryPoint_PrintConcise( sle->fForceVec->assembleForceVector, errorStream );
/* 			 TODO : need to print the elementForceVector assembly, not the global guy!! */
		}	
		Journal_Printf( errorStream,
			"Please check values for building the force vector.\n" );
		Journal_Firewall( 0, errorStream, "Exiting.\n" ); 	
	}
	
					
 	Journal_DPrintf( self->debug, "In %s:\n", __func__ );
	Journal_RPrintfL( self->debug, 2, "Conjugate Gradient Uzawa solver with:\n");
	
	Stream_IndentBranch( StgFEM_Debug );
	
	Journal_RPrintfL( self->debug, 2, "Compressibility %s\n", (sle->cStiffMat)? "on" : "off");
	Journal_RPrintfL( self->debug, 2, "Preconditioning %s\n", (pcSolver)? "on" : "off" );   
	
	
	
	if ( sle->cStiffMat ) {
		Journal_DPrintfL( self->debug, 2, "(compressibility active)\n" );
		M_Mat = sle->cStiffMat->matrix;   
	}
	else {
		Journal_DPrintfL( self->debug, 2, "(compressibility inactive)\n" );
	}
	if ( sle->dStiffMat ) {
		Journal_DPrintfL( self->debug, 2, "(asymmetric geometry: handling D Matrix [incorrectly - will be ignored])\n" );
		D_Mat = sle->dStiffMat->matrix;
	}
	else {
		Journal_DPrintfL( self->debug, 2, "(No D -> symmetric geometry: D = Gt)\n" );
	}
	
	#if DEBUG
	if ( Stream_IsPrintableLevel( self->debug, 3 ) ) {
		Journal_DPrintf( self->debug, "Matrices and Vectors to solve are:\n" );
		Journal_DPrintf( self->debug, "K Matrix:\n" );
		/* No nice way of viewing Matrices, so commented out as incompatible with
		 * new 3D decomp at present --Kathleen Humble 30-04-07 
		 * Matrix_View( sle->kStiffMat->matrix, self->debug ); */
		Journal_DPrintf( self->debug, "G Matrix:\n" );
		if ( D_Mat ) {
			Journal_DPrintf( self->debug, "D Matrix:\n" );
		}	
		if ( M_Mat ) {
			Journal_DPrintf( self->debug, "M Matrix:\n" );
		}	
		Journal_DPrintf( self->debug, "Z (preconditioner) Matrix:\n" );
		Journal_DPrintf( self->debug, "f Vector:\n" );
		_SLE_VectorView( fVec, self->debug );
		Journal_DPrintf( self->debug, "h Vector:\n" );
		_SLE_VectorView( hVec, self->debug );
	}
	#endif
	
	/* STEP 1: Estimate the magnitude of the RHS for the transformed problem
			   we compute (usually to lower accuracy than elsewhere) the RHS (Fhat - h) 
	         and store the result in qTempVec.
		LM & DAM			
	*/
	
	Journal_DPrintfL( self->debug, 2, "Building Fhat - h.\n" );
	PetscOptionsHasName(PETSC_NULL,"-uzawa_printksptimes",&flg);
	KSPSetTolerances( velSolver, self->tolerance, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT );
	if (flg) {
	    ksptime = MPI_Wtime();
	}
	KSPSolve( velSolver, fVec, vStarVec );
	if (flg) {
	    ksptime = MPI_Wtime() - ksptime;
	    PetscPrintf( PETSC_COMM_WORLD,  "KSP on velSolver took %lf seconds in Building Fhat step\n", ksptime);
	}
	KSPGetIterationNumber( velSolver, &innerLoopIterations );
	
	Journal_DPrintfL( self->debug, 2, "Fhat inner solution: Number of iterations: %d\n", innerLoopIterations );
	
        if ( D_Mat ) {
          MatMult( D_Mat, vStarVec, qTempVec );
        }
        else {
          MatMultTranspose( G_Mat, vStarVec, qTempVec );
        }
	VecAXPY( qTempVec, -1.0, hVec );
	
	/*  WARNING:
			If D != G^T then the resulting \hat{K} is not likely to be symmetric, positive definite as
			required by this implementation of the Uzawa iteration.  This next piece of code
			is VERY unlikely to work properly so it's in the sin bin for the time being - LM.
			
			if ( D_Mat ) {
				MatrixMultiply( D_Mat, vStarVec, qTempVec );
			}
			else {
				MatrixTransposeMultiply( G_Mat, vStarVec, qTempVec );
			}
		LM & DAM			
	*/	

	
	/* STEP 2: The problem scaling - optionally normalize the uzawa residual by the magnitude of the RHS (use a relative tolerance)
			For the inner velocity solver,  Citcom uses a relative tolerance equal to that used for the Uzawa iteration as a whole
		LM & DAM			
	*/
	
	if (self->useAbsoluteTolerance) {
		chosenResidual = &absResidual;
		Journal_PrintfL( self->info, 2, "Absolute residual < %g for Uzawa stopping condition\n", self->tolerance);
		/* We should calculate the effective relative tolerance and insert that here !! */
		KSPSetTolerances( velSolver, 0.1 * self->tolerance, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT );
	}
	else {  /* The CITCOM compatible choice */
		chosenResidual = &relResidual;
		Journal_PrintfL( self->info, 2, "Relative residual < %g for Uzawa stopping condition\n", self->tolerance);	
		KSPSetTolerances( velSolver, 0.1 * self->tolerance, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT );
	}
	
	Journal_DPrintfL( self->debug, 2, "Determining scaling factor for residual:\n" );
	VecNorm( qTempVec, NORM_2, &qTempVecNorm );
	uzawaRhsScale = ((double)qTempVecNorm) * qReciprocalGlobalProblemScale;
	
	Journal_DPrintfL( self->debug, 2, "uzawaRhsScale = %f\n", uzawaRhsScale );	
	Journal_Firewall( isGoodNumber( uzawaRhsScale ), errorStream, 
			"Error in func '%s' for %s '%s' - uzawaRhsScale has illegal value '%g'.\n", __func__, self->type, self->name, uzawaRhsScale );
	
	/* STEP 3: Calculate initial residual for transformed equation  (\hat{F} - h - \hat{K} q_0)
	    Compute the solution to K u_0 = F - G q_0  (u_0 unknown)
		  Then G^T u* = \hat{F} - \hat{K} q_0 
	    u_0 is also the initial velocity solution to which the constraint is applied by the subsequent iteration
		LM & DAM			
	*/
	
	Journal_DPrintfL( self->debug, 2, "Solving for transformed Uzawa RHS.\n" );
	
	VecCopy( fVec, fTempVec );
	VecScale( fTempVec, -1.0 );
	MatMultAdd( G_Mat, qVec, fTempVec, fTempVec );
	VecScale( fTempVec, -1.0 );
	KSPSolve( velSolver, fTempVec, uVec );

	/* Handling for NON-SYMMETRIC: relegated to sin bin (see comment above) LM & DAM */
        if ( D_Mat ) {
           MatMult( D_Mat, uVec, rVec );
        }
        else {
           MatMultTranspose( G_Mat, uVec, rVec );
        }
	VecNorm( rVec, NORM_2, &rnorm0 );
	VecNorm( uVec, NORM_2, &uVecNorm );
	divU = rnorm0 / uVecNorm;
	
	Journal_PrintfL( self->info, 2, "Initial l2Norm( Div u ) / l2Norm( u ) = %f \n", divU);
	
	Journal_Firewall( isGoodNumber( divU ), errorStream, 
			"Error in func '%s' for %s '%s' - l2Norm( Div u ) has illegal value '%g'.\n",
			__func__, self->type, self->name, divU );
	
	
	Journal_DPrintfL( self->debug, 2, "Adding compressibility and prescribed divergence terms.\n" );
	
	if ( M_Mat ) {
		MatMultAdd( M_Mat, qVec, rVec, rVec );
	}	
	VecAXPY( rVec, -1.0, hVec );
			
	/* Check for existence of constant null space */
#if 0
	nullsp_present = _check_if_constant_nullsp_present( self, K_Mat,G_Mat,M_Mat, fTempVec,vStarVec,qTempVec,sVec, velSolver );
#endif
			
	/* STEP 4: Preconditioned conjugate gradient iteration loop */	
		
	Journal_DPrintfL( self->debug, 1, "Beginning main Uzawa conjugate gradient loop:\n" );	

	iteration_I = 0;

        /* outer_it, residual, time */
        uzawa_summary = self->monitor;
	time = 0.0;
	t0 = MPI_Wtime();
//	Journal_PrintfL( self->info, 1, "  |r0| = %.8e \n", rnorm0 );

	do{	
		/* reset initial time and end time for inner its back to 0 - probs don't need to do this but just in case */
		self->outeritsinitialtime = 0;
		self->outeritsendtime = 0;
		//BEGINNING OF OUTER ITERATIONS!!!!!
		/*get wall time for start of outer loop*/
		self->outeritsinitialtime = MPI_Wtime();
		
		Journal_DPrintfL( self->debug, 2, "Beginning solve '%u'.\n", iteration_I );
		Stream_IndentBranch( StgFEM_Debug );
		
		/* STEP 4.1: Preconditioner
			Solve:
				Q_\hat{K} z_1 =  r_1
				Q_\hat{K} is an approximation to \hat{K} which is simple / trivial / quick to invert
			LM & DAM			
		*/
		
		if ( pcSolver ) {
		    PetscOptionsHasName(PETSC_NULL,"-uzawa_printksptimes",&flg);
		    if (flg) {
			ksptime = MPI_Wtime();
		    }
		    KSPSolve( pcSolver, rVec, qTempVec );
		    if (flg) {
			ksptime = MPI_Wtime() - ksptime;
			PetscPrintf( PETSC_COMM_WORLD,  "KSP on pcSolver took %lf seconds\n", ksptime);
		    }
		}
		else {
			VecCopy( rVec, qTempVec );
		}

		/* Remove the constant null space, but only if NOT compressible */
#if 0
		if( nullsp_present == True ) {
			_remove_constant_nullsp( qTempVec );
		}
#endif
				
		/* STEP 4.2: Calculate s_I, the pressure search direction
				z_{I-1} . r_{I-1}  
				\beta = (z_{I-1} . r_{I-1}) / (z_{I-2} . r_{I-2})  
					\beta = 0 for the first iteration
		      s_I = z_(I-1) + \beta * s_(I-1) 
			LM & DAM			
		*/ 
		
		VecDot( qTempVec, rVec, &zdotr_current );
	
		VecNorm( qTempVec, NORM_2, &qTempVecNorm );
		VecNorm( rVec, NORM_2, &rVecNorm );	
		Journal_DPrintfL( self->debug, 2, "l2Norm (qTempVec) %g; (rVec) %g \n", 
			qTempVecNorm * qReciprocalGlobalProblemScale,
			rVecNorm * qReciprocalGlobalProblemScale );
		
		if ( iteration_I == 0 ) {
			VecCopy( qTempVec, sVec );
		}
		else {
			beta = zdotr_current/zdotr_previous;
			VecAYPX( sVec, beta, qTempVec );
		}
		
		/* STEP 4.3: Velocity search direction corresponding to s_I is found by solving
				K u* = G s_I
			LM & DAM			
		*/
			
		MatMult( G_Mat, sVec, fTempVec );
		
		Journal_DPrintfL( self->debug, 2, "Uzawa inner iteration step\n");
		
		//START OF INNER ITERATIONS!!!!
		PetscOptionsHasName(PETSC_NULL,"-uzawa_printksptimes",&flg);
		/*get initial wall time for inner loop*/
		self->inneritsinitialtime = MPI_Wtime();
		if (flg) {
		    ksptime = MPI_Wtime();
		}
		KSPSolve( velSolver, fTempVec, vStarVec );
		if (flg) {
		    ksptime = MPI_Wtime() - ksptime;
		    PetscPrintf( PETSC_COMM_WORLD,  "KSP on velSolver took %lf seconds in Uzawa inner iteration step\n", ksptime);
		}
		/*get end wall time for inner loop*/
		self->inneritsendtime = MPI_Wtime();
		
		/* add time to total time inner its: */
		self->totalinneritstime = self->totalinneritstime + (-self->inneritsinitialtime + self->inneritsendtime);
		/* reset initial time and end time for inner its back to 0 - probs don't need to do this but just in case */
		self->inneritsinitialtime = 0;
		self->inneritsendtime = 0;
		
		KSPGetIterationNumber( velSolver, &innerLoopIterations );
		/* add the inner loop iterations to the total inner iterations */
		self->totalnuminnerits = self->totalnuminnerits + innerLoopIterations;
		
		Journal_DPrintfL( self->debug, 2, "Completed Uzawa inner iteration in '%u' iterations \n", innerLoopIterations );
				
		/* STEP 4.4: Calculate the step size ( \alpha = z_{I-1} . r_{I-1} / (s_I . \hat{K} s_I) )
				 \hat{K} s_I = G^T u* - M s_I (u* from step 4.3) 	
			LM & DAM			
		*/ 
		
                if ( D_Mat ) {
                   MatMult( D_Mat, vStarVec, qTempVec );
                }
                else {
                   MatMultTranspose( G_Mat, vStarVec, qTempVec );
                }
		
		/* Handling for NON-SYMMETRIC: relegated to sin bin (see comment above) 
		
			if ( D_Mat ) {
				MatrixMultiply( D_Mat, vStarVec, qTempVec );
			}
			else {
				MatrixTransposeMultiply( G_Mat, vStarVec, qTempVec );
			}
			LM & DAM			
		*/

		if ( M_Mat ) {
			Journal_DPrintfL( self->debug, 2, "Correcting for Compressibility\n" );
			VecScale( qTempVec, -1.0 );
			MatMultAdd( M_Mat, sVec, qTempVec, qTempVec );
			VecScale( qTempVec, -1.0 );
		}

		VecDot( sVec, qTempVec, &sdotGTrans_v );
		
		alpha = zdotr_current/sdotGTrans_v;
		
		/* STEP 4.5: Update pressure, velocity and value of residual
				 by \alpha times corresponding search direction 
			LM & DAM			
		*/
		
		Journal_DPrintfL( self->debug, 2, "zdotr_current = %g \n", zdotr_current);
		Journal_DPrintfL( self->debug, 2, "sdotGTrans_v = %g \n", sdotGTrans_v);
		Journal_DPrintfL( self->debug, 2, "alpha = %g \n", alpha);
	
		Journal_Firewall( 
				isGoodNumber( zdotr_current ) && isGoodNumber( sdotGTrans_v ) && isGoodNumber( alpha ), 
				errorStream, 
				"Error in func '%s' for %s '%s' - zdotr_current, sdotGTrans_v or alpha has an illegal value: '%g','%g' or '%g'\n",
				__func__, self->type, self->name, zdotr_current, sdotGTrans_v, alpha );
		
		VecAXPY( qVec, alpha, sVec );
		VecAXPY( uVec, -alpha, vStarVec );
		VecAXPY( rVec, -alpha, qTempVec );
		
		/* STEP 4.6: store the value of z_{I-1} . r_{I-1} for the next iteration
		 LM & DAM
		*/
		
		zdotr_previous = zdotr_current; 
		
		VecNorm( rVec, NORM_2, &rVecNorm );
		absResidual = rVecNorm * qReciprocalGlobalProblemScale;
		relResidual = absResidual / uzawaRhsScale;
		
		Stream_UnIndentBranch( StgFEM_Debug );
		
		if( iteration_I % outputInterval == 0 ) {
			Journal_PrintfL( self->info, 2, "\tLoop = %u, absResidual = %.8e, relResidual = %.8e\n", 
				iteration_I, absResidual, relResidual );
		}
		
		Journal_Firewall( isGoodNumber( absResidual ), errorStream, 
				"Error in func '%s' for %s '%s' - absResidual has an illegal value: '%g'\n",
				__func__, self->type, self->name, absResidual );
		
		Journal_Firewall( iteration_I < maxIterations, 
				errorStream, "In func %s: Reached maximum number of iterations %u without converging; absResidual = %.5g, relResidual = %.5g \n",
				__func__, iteration_I, absResidual, relResidual );

/* 		 TODO: test for small change in 10 iterations and if so restart? */

		time = MPI_Wtime()-t0;
		if (uzawa_summary) {
                	Journal_PrintfL( self->info, 1, "  %1.4d uzawa residual norm %12.13e, cpu time %5.5e\n", iteration_I+1,*chosenResidual,time );
        	}
			
	iteration_I++;  
	//END OF OUTER ITERATION LOOP!!!
		/*get wall time for end of outer loop*/
		self->outeritsendtime = MPI_Wtime();
		/* add time to total time inner its: */
		self->totalouteritstime = self->totalouteritstime + (-self->outeritsinitialtime + self->outeritsendtime);
		/* reset initial time and end time for inner its back to 0 - probs don't need to do this but just in case */
		self->outeritsinitialtime = 0;
		self->outeritsendtime = 0;
		/* add the outer loop iterations to the total outer iterations */
		self->totalnumouterits++; 
	}  while ( (*chosenResidual > self->tolerance) || (iteration_I<minIterations) );  
//	}  while ( *chosenResidual > self->tolerance );

	Journal_DPrintfL( self->debug, 1, "Pressure solution converged. Exiting uzawa \n ");
	
	/* STEP 5:  Check all the relevant residuals and report back */
	
	if (Stream_IsEnable( self->info ) ) {
	
	/* This information should be in an info stream */
	Journal_PrintfL( self->info, 1, "Summary:\n");
	Journal_PrintfL( self->info, 1, "  Uzawa its. = %04d , Uzawa residual = %12.13e\n", iteration_I, relResidual );
	MatMultTranspose( G_Mat, uVec, rVec );
	VecNorm( rVec, NORM_2, &rVecNorm );
	VecNorm( uVec, NORM_2, &uVecNorm );
	divU = rVecNorm / uVecNorm;
	Journal_PrintfL( self->info, 1, "  |G^T u|/|u|               = %.8e\n", divU);
	
	/* Residual for the momentum equation 
		Compute r = || F - Ku - Gp || / || F ||
	*/
	
	MatMult( G_Mat, qVec, vStarVec );
	MatMultAdd( K_Mat, uVec, vStarVec, fTempVec );
	VecAYPX( fTempVec, -1.0, fVec );
	
	VecNorm( fTempVec, NORM_2, &fTempVecNorm );
	VecNorm( fVec, NORM_2, &fVecNorm );
	momentumEquationResidual = fTempVecNorm / fVecNorm;
	Journal_PrintfL( self->info, 1, "  |f - K u - G p|/|f|       = %.8e\n", momentumEquationResidual );
	Journal_Firewall( isGoodNumber( momentumEquationResidual ), errorStream, 
			"Bad residual for the momentum equation (|| F - Ku - Gp || / || F || = %g):\n"
			"\tCheck to see if forcing term is zero or nan - \n\t|| F - Ku - Gp || = %g \n\t|| F || = %g.\n", 
			momentumEquationResidual,
			fTempVecNorm, fVecNorm );
		
	/* "Preconditioned"	residual for the momentum equation 
	 		r_{w} = || Q_{K}(r) || / || Q_{K}(F)
			fTempVec contains the residual but is overwritten once used
			vStarVec is used to hold the diagonal preconditioner Q_{K} 
	*/
	
	MatGetDiagonal( K_Mat, vStarVec );
	VecReciprocal( vStarVec );
	VecPointwiseMult( vStarVec, fTempVec, fTempVec );
	VecNorm( fTempVec, NORM_2, &weightedResidual );
	VecPointwiseMult( vStarVec, fVec, fTempVec );
	VecNorm( fTempVec, NORM_2, &weightedVelocityScale );
		
	Journal_PrintfL( self->info, 1, "  |f - K u - G p|_w/|f|_w   = %.8e\n", weightedResidual / weightedVelocityScale );	
		
	/* Report back on the solution - velocity and pressure 
	 Note - correction for dof in Vrms ??
	*/

	VecNorm( uVec, NORM_INFINITY, &uVecNormInf );
	VecNorm( uVec, NORM_2, &uVecNorm );
	VecGetSize( uVec, &uVecSize );
	VecNorm( qVec, NORM_INFINITY, &qVecNormInf );
	VecNorm( qVec, NORM_2, &qVecNorm );
	VecGetSize( qVec, &qVecSize );
        Journal_PrintfL( self->info, 1, "  |u|_{\\infty} = %.8e , u_rms = %.8e\n", 
		uVecNormInf, uVecNorm / sqrt( (double)uVecSize ) );
	Journal_PrintfL( self->info, 1, "  |p|_{\\infty} = %.8e , p_rms = %.8e\n",
               	qVecNormInf, qVecNorm / sqrt( (double)qVecSize ) );

	{	PetscInt lmin,lmax;
		PetscReal min,max;
		VecMax( uVec, &lmax, &max );
		VecMin( uVec, &lmin, &min );
		Journal_PrintfL( self->info, 1, "  min/max(u) = %.8e [%d] / %.8e [%d]\n",min,lmin,max,lmax);
                VecMax( qVec, &lmax, &max );
                VecMin( qVec, &lmin, &min );
                Journal_PrintfL( self->info, 1, "  min/max(p) = %.8e [%d] / %.8e [%d]\n",min,lmin,max,lmax);
        }
	VecSum( qVec, &p_sum );
	Journal_PrintfL( self->info, 1, "  \\sum_i p_i = %.8e \n", p_sum );

	} /* journal stream enabled */

	#if DEBUG
	if ( Stream_IsPrintableLevel( self->debug, 3 ) ) {
		Journal_DPrintf( self->debug, "Velocity solution:\n" );
		_SLE_VectorView( uVec, self->debug );
		Journal_DPrintf( self->debug, "Pressure solution:\n" );
		_SLE_VectorView( qVec, self->debug );
	}
	#endif
	Stream_UnIndentBranch( StgFEM_Debug );

        Stream_SetPrintingRank( self->info, init_info_stream_rank );
		/* Now gather up data for printing out to FrequentOutput file: */
	
	
	/*!!! if non-linear need to divide by number of nonlinear iterations and we do this in SystemLinearEquations */
	if((sle->isNonLinear != True)){
		self->avgnuminnerits = self->totalnuminnerits/self->totalnumouterits;
		self->avgnumouterits = self->totalnumouterits;
		self->avgtimeouterits = (self->totalouteritstime - self->totalinneritstime)/self->totalnumouterits;
		self->avgtimeinnerits = self->totalinneritstime/self->totalnuminnerits;
	}	
}
/* TODO: look at using MPI_Indexed instead */
void ParticleMovementHandler_FinishReceiveAndUpdateShadowParticlesEnteringMyDomain( ParticleCommHandler* self ) {
	MPI_Status	status;
	Cell_ShadowTransferIndex	stCell_I;
	Cell_LocalIndex			lCell_I;
	Neighbour_Index			nbr_I;
	Cell_ShadowTransferIndex	shadowCellsFromProcCount;
	ShadowInfo*			cellShadowInfo = CellLayout_GetShadowInfo( self->swarm->cellLayout );
	ProcNbrInfo*			procNbrInfo = cellShadowInfo->procNbrInfo;
	Neighbour_Index			nbrCount = procNbrInfo->procNbrCnt;
	Particle_InCellIndex		incomingCellParticleCount;
	Particle_InCellIndex		cParticle_I;
	Particle_Index			lParticle_I;
	Index				incomingParticle_I=0; /*Index into the array of all leaving particle indices */
	Index				incomingParticleSetsNotYetReceivedCount;
	Bool*				incomingParticlesReceived;
	#if DEBUG
	GlobalParticle*                 currParticle;
	#endif

	Journal_DPrintf( self->debug, "In %s():\n", __func__ );
	Stream_IndentBranch( Swarm_Debug );
	
	incomingParticlesReceived = Memory_Alloc_Array_Unnamed( Bool, nbrCount );

	/* Calculate how many particle sets we have to receive */
	incomingParticleSetsNotYetReceivedCount = 0;
	for ( nbr_I=0; nbr_I < nbrCount; nbr_I++ ) {
		incomingParticlesReceived[nbr_I] = False;
		if (self->particlesArrivingFromNbrShadowCellsTotalCounts[nbr_I] > 0) {
			incomingParticleSetsNotYetReceivedCount++;
		}
	}

	while ( incomingParticleSetsNotYetReceivedCount > 0 ) {
		int flag = 0;
		Journal_DPrintfL( self->debug, 3, "%d particle sets still to go...\n", incomingParticleSetsNotYetReceivedCount );
		for ( nbr_I=0; nbr_I < nbrCount; nbr_I++ ) {
			if ( (self->particlesArrivingFromNbrShadowCellsTotalCounts[nbr_I] > 0) &&
				(False == incomingParticlesReceived[nbr_I]) )
			{
				MPI_Test( self->particlesArrivingFromNbrShadowCellsHandles[nbr_I], &flag, &status );
				if ( False == flag ) {
					/* No results yet from this proc -> continue to next. */
					continue;
				}
				else {
					Journal_DPrintfL( self->debug, 3, "Received particles from nbr %d (proc %d):\n",
						nbr_I, procNbrInfo->procNbrTbl[nbr_I] );
					Stream_Indent( self->debug );

					incomingParticle_I = 0;
					shadowCellsFromProcCount = cellShadowInfo->procShadowedCnt[nbr_I];

				
					for ( stCell_I=0; stCell_I < shadowCellsFromProcCount; stCell_I++ ) {

						lCell_I = cellShadowInfo->procShadowedTbl[nbr_I][stCell_I];
						Journal_DPrintfL( self->debug, 3, "Incoming cell %d (local index %d):\n",
							stCell_I, lCell_I );
						Stream_Indent( self->debug );

						incomingCellParticleCount =
							self->particlesArrivingFromNbrShadowCellCounts[nbr_I][stCell_I];

						for ( cParticle_I=0; cParticle_I < incomingCellParticleCount; cParticle_I++ ) {	

							#if DEBUG
							currParticle = (GlobalParticle*)ParticleAt(
								self->particlesArrivingFromNbrShadowCells[nbr_I],
								incomingParticle_I,
								self->swarm->particleExtensionMgr->finalSize );
							Journal_DPrintfL( self->debug, 3, "Handling its PIC %d: - at "
								"(%.2f,%.2f,%.2f)\n", cParticle_I,
								currParticle->coord[0], currParticle->coord[1],
								currParticle->coord[2] );
							#endif

							Stream_Indent( self->debug );

							lParticle_I = ParticleMovementHandler_FindFreeSlotAndPrepareForInsertion( self );

							Swarm_CopyParticleOntoSwarm(
								self->swarm,
								lParticle_I,
								self->particlesArrivingFromNbrShadowCells[nbr_I], incomingParticle_I++ ); 

							Swarm_AddParticleToCell( self->swarm, lCell_I, lParticle_I );
							
							Stream_UnIndent( self->debug );
						}	
						Stream_UnIndent( self->debug );
					}
					incomingParticlesReceived[nbr_I] = True;
					incomingParticleSetsNotYetReceivedCount--;
					Stream_UnIndent( self->debug );
				}
			}
		}
	}	

	Memory_Free( incomingParticlesReceived );

	Stream_UnIndentBranch( Swarm_Debug );
}
void ParticleShadowSync_HandleParticleMovementBetweenProcs( ParticleCommHandler* pCommsHandler ) {
	ParticleShadowSync*	self = (ParticleShadowSync*)pCommsHandler;

	Journal_DPrintfL( self->debug, 1, "In %s(), for swarm \"%s\":\n", __func__, self->swarm->name );
	if ( 1 == self->swarm->nProc ) {
		Journal_DPrintfL( self->debug, 1, "Serial run -> nothing to communicate in %s, returning.\n", __func__ );
		Stream_UnIndentBranch( Swarm_Debug );
		return;
	}

	Stream_IndentBranch( Swarm_Debug );
	
	if ( self->swarm->cellShadowCount > 0 ) {
		/* Allocate the recv count arrays and handles */
		
		/*---_ParticleCommHandler_AllocateIncomingCountArrays( (ParticleCommHandler*)self );
		_ParticleCommHandler_AllocateOutgoingCountArrays( (ParticleCommHandler*)self );*/
		self->allocateIncomingCountArrays( (ParticleCommHandler*)self );
		self->allocateOutgoingCountArrays( (ParticleCommHandler*)self );



		/* First thing to do is begin non-blocking receive of incoming particles (for latency hiding) */
		/*---_ParticleCommHandler_BeginReceiveOfIncomingParticleCounts( (ParticleCommHandler*)self );*/
		self->beginReceiveOfIncomingParticleCounts( (ParticleCommHandler*)self );


		/* Do a blocking send of outgoing counts, so our nbrs know what to receive */
		//_ParticleCommHandler_SendParticleTotalsInShadowCellsToNbrs( (ParticleCommHandler*)self );
	
		/*---_ParticleShadowSync_SendParticleTotalsInShadowCellsToNbrs( (ParticleCommHandler*)self );*/
		self->sendOutgoingParticleCounts( (ParticleCommHandler*)self );

		
		/* Now need to make sure that incoming particle counts are here, then begin receiving particles
		   (We do this as early as possible for latency hiding purposes */
		
		/*---_ParticleShadowSync_FinishReceiveOfIncomingParticleCounts( (ParticleCommHandler*)self );*/
		self->finishReceiveOfIncomingParticleCounts( (ParticleCommHandler*)self );


		/*---_ParticleShadowSync_BeginReceiveOfIncomingParticles( (ParticleCommHandler*)self );
		_ParticleShadowSync_SendShadowParticles( (ParticleCommHandler*)self );*/
		self->beginReceiveOfIncomingParticles( (ParticleCommHandler*)self );
		self->beginSendingParticles( (ParticleCommHandler*)self );


		/*---_ParticleCommHandler_ConfirmOutgoingSendsCompleted( (ParticleCommHandler*)self );
		_ParticleShadowSync_FinishReceiveOfIncomingParticles( self );*/
		self->confirmOutgoingSendsCompleted( (ParticleCommHandler*)self );
		self->finishReceiveOfIncomingParticlesAndUpdateIndices( (ParticleCommHandler*)self );


		/*---_ParticleCommHandler_FreeOutgoingArrays( (ParticleCommHandler*)self );
		_ParticleCommHandler_FreeIncomingArrays( (ParticleCommHandler*)self );*/
		self->freeIncomingArrays( (ParticleCommHandler*)self );
		self->freeOutgoingArrays( (ParticleCommHandler*)self );


		/* This is where our own Data structure comes into play  Jules and Raq*/
		//----_ParticleCommHandler_AllocateIncomingParticlesArrays( (ParticleCommHandler*)self );
		//----_ParticleCommHandler_BeginReceiveOfIncomingParticles( (ParticleCommHandler*)self );
			
		/* OK, now begin sending out particles we know need to go to nbrs */
		//----_ParticleCommHandler_AllocateOutgoingParticlesArrays( (ParticleCommHandler*)self );
		//----_ParticleCommHandler_BeginSendingParticlesInShadowCellsToNbrs( (ParticleCommHandler*)self );
	}
	
	MPI_Barrier( self->swarm->comm );

	_ParticleCommHandler_ZeroShadowCommStrategyCounters( (ParticleCommHandler*)self );
	
	Stream_UnIndentBranch( Swarm_Debug );
}
void PeriodicBoundariesManager_UpdateParticle( void* periodicBCsManager, Particle_Index lParticle_I ) {
	Axis								boundaryAxis;	
	PeriodicBoundariesManager*	self = (PeriodicBoundariesManager*)periodicBCsManager;
	double							difference = 0.0;
	GlobalParticle*				particle = NULL;
	Index								perBoundary_I = 0;
	PeriodicBoundary*				perBoundary = NULL;

	Journal_DPrintfL( self->debug, 2, "In %s:\n", __func__ );
	Stream_Indent( self->debug );

	particle = (GlobalParticle*)Swarm_ParticleAt( self->swarm, lParticle_I );

	Journal_DPrintfL( self->debug, 2, "Checking particle %d at (%.4g,%.4g,%.4g)\n", lParticle_I, particle->coord[0], particle->coord[1], particle->coord[2] );

	for ( perBoundary_I = 0; perBoundary_I < self->count; perBoundary_I++ ) {
		perBoundary = &self->boundaries[perBoundary_I];
		boundaryAxis = perBoundary->axis;

		Journal_DPrintfL( self->debug, 2, "Checking axis %d:\n", boundaryAxis );
		Stream_Indent( self->debug );

		if ( particle->coord[boundaryAxis] < perBoundary->minWall ) {
			Journal_DPrintfL( self->debug, 3, "coord is < min wall %.4f:\n", perBoundary->minWall );
			difference = perBoundary->minWall - particle->coord[boundaryAxis];
			particle->coord[boundaryAxis] = perBoundary->maxWall - difference;
			perBoundary->particlesUpdatedMinEndCount++;
			Journal_DPrintfL( self->debug, 3, "moving to (%.4f,%.4f,%.4f).\n", particle->coord[I_AXIS], particle->coord[J_AXIS], particle->coord[K_AXIS] );
		}
		else if ( particle->coord[perBoundary->axis] > perBoundary->maxWall ) {
			Journal_DPrintfL( self->debug, 3, "coord is > max wall %.4f:\n", perBoundary->maxWall );
			difference = particle->coord[boundaryAxis] - perBoundary->maxWall; 
			particle->coord[boundaryAxis] = perBoundary->minWall + difference;
			perBoundary->particlesUpdatedMaxEndCount++;
			Journal_DPrintfL( self->debug, 3, "moving to (%.4f,%.4f,%.4f).\n", particle->coord[I_AXIS], particle->coord[J_AXIS], particle->coord[K_AXIS] );
		}
		Stream_UnIndent( self->debug );
	}	

	Stream_UnIndent( self->debug );

	/* TODO: this is a bit of a hack to print this here using the lParticleI = swarm->total - 1, but its
	the only way I can see given this func is part of the SwarmAdvector intermediate. Should really be a 
	function on this class that updates all the particles. -- Main.PatrickSunter 15 May 2006 */
	if ( lParticle_I == (self->swarm->particleLocalCount-1) ) {
		PeriodicBoundary*	boundary = NULL;
		Index					perB_I;
	
		Journal_DPrintfL( self->debug, 1, "PeriodicBoundariesManager total particles updated:\n" );
		Stream_Indent( self->debug );

		for ( perB_I = 0; perB_I < self->count; perB_I++ ) {
			boundary = &self->boundaries[perB_I];

			Journal_DPrintfL( self->debug, 1, "Periodic Boundary in %c Axis {%.2f,%.2f}: %d min end, %d max end\n",
				IJKTopology_DimNumToDimLetter[boundary->axis], boundary->minWall, boundary->maxWall,
				boundary->particlesUpdatedMinEndCount, boundary->particlesUpdatedMaxEndCount );
			/* Reset the counters for next time */
			boundary->particlesUpdatedMinEndCount = 0;	
			boundary->particlesUpdatedMaxEndCount = 0;	
		}
		Stream_UnIndent( self->debug );
	}
}