void physx::Pt::collideCellsWithStaticMesh(ParticleCollData* collData, const LocalCellHash& localCellHash,
                                           const GeometryUnion& meshShape, const PxTransform& world2Shape,
                                           const PxTransform& shape2World, PxReal /*cellSize*/,
                                           PxReal /*collisionRange*/, PxReal proxRadius, const PxVec3& /*packetCorner*/)
{
	PX_ASSERT(collData);
	PX_ASSERT(localCellHash.isHashValid);
	PX_ASSERT(localCellHash.numParticles <= PT_SUBPACKET_PARTICLE_LIMIT_COLLISION);
	PX_ASSERT(localCellHash.numHashEntries <= PT_LOCAL_HASH_SIZE_MESH_COLLISION);

	const PxTriangleMeshGeometryLL& meshShapeData = meshShape.get<const PxTriangleMeshGeometryLL>();

	const TriangleMesh* meshData = meshShapeData.meshData;
	PX_ASSERT(meshData);

	// mesh bounds in world space (conservative)
	const PxBounds3 shapeBounds =
	    meshData->getLocalBoundsFast().transformSafe(world2Shape.getInverse() * meshShapeData.scale);

	const bool idtScaleMesh = meshShapeData.scale.isIdentity();

	Cm::FastVertex2ShapeScaling meshScaling;
	if(!idtScaleMesh)
		meshScaling.init(meshShapeData.scale);

	// process the particle cells
	for(PxU32 c = 0; c < localCellHash.numHashEntries; c++)
	{
		const ParticleCell& cell = localCellHash.hashEntries[c];

		if(cell.numParticles == PX_INVALID_U32)
			continue;

		PxBounds3 cellBounds;

		cellBounds.setEmpty();
		PxBounds3 cellBoundsNew(PxBounds3::empty());

		PxU32* it = localCellHash.particleIndices + cell.firstParticle;
		const PxU32* end = it + cell.numParticles;
		for(; it != end; it++)
		{
			const ParticleCollData& particle = collData[*it];
			cellBounds.include(particle.oldPos);
			cellBoundsNew.include(particle.newPos);
		}
		PX_ASSERT(!cellBoundsNew.isEmpty());
		cellBoundsNew.fattenFast(proxRadius);
		cellBounds.include(cellBoundsNew);

		if(!cellBounds.intersects(shapeBounds))
			continue; // early out if (inflated) cell doesn't intersect mesh bounds

		// opcode query: cell bounds against shape bounds in unscaled mesh space
		PxcContactCellMeshCallback callback(collData, &(localCellHash.particleIndices[cell.firstParticle]),
		                                    cell.numParticles, *meshData, meshScaling, proxRadius, NULL, shape2World);
		testBoundsMesh(*meshData, world2Shape, meshScaling, idtScaleMesh, cellBounds, callback);
	}
}
	bool getPCMConvexData(const Gu::GeometryUnion& shape, Cm::FastVertex2ShapeScaling& scaling, PxBounds3& bounds, PolygonalData& polyData)
	{
		const PxConvexMeshGeometryLL& shapeConvex = shape.get<const PxConvexMeshGeometryLL>();

		const bool idtScale = shapeConvex.scale.isIdentity();
		if(!idtScale)
			scaling.init(shapeConvex.scale);

		PX_ASSERT(!shapeConvex.hullData->mAABB.isEmpty());
		bounds = PxBounds3::transformFast(scaling.getVertex2ShapeSkew(), shapeConvex.hullData->mAABB);

		getPCMPolygonalData_Convex(&polyData, shapeConvex.hullData, scaling);

		return idtScale;
	}
void collideWithMeshTriangles(PxsParticleCollData& collisionShapeData, const Gu::InternalTriangleMeshData& /*meshData*/, const Cm::FastVertex2ShapeScaling& scale,
											  const PxVec3* triangleVerts, PxU32 numTriangles, PxReal proxRadius, const PxTransform& shape2World)
{	
	bool hasCC = ((collisionShapeData.localFlags & PXS_FLUID_COLL_FLAG_CC) || (collisionShapeData.localFlags & PXS_FLUID_COLL_FLAG_L_CC));

	PxVec3 tmpSurfaceNormal(0.0f);
	PxVec3 tmpSurfacePos(0.0f);
	PxVec3 tmpProxSurfaceNormal(0.0f);
	PxVec3 tmpProxSurfacePos(0.0f);
	PxReal tmpCCTime(0.0f);
	PxReal tmpDistOldToSurface(0.0f);
	
	for (PxU32 i = 0; i < numTriangles; ++i)
	{
		PxVec3 v0 = scale * triangleVerts[i*3];
		PxVec3 v1 = scale * triangleVerts[i*3 + 1];
		PxVec3 v2 = scale * triangleVerts[i*3 + 2];
		if (scale.flipsNormal())
			Ps::swap<PxVec3>(v1, v2);

		PxU32 tmpFlags = collideWithMeshTriangle(tmpSurfaceNormal, tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos,
			tmpCCTime, tmpDistOldToSurface, collisionShapeData.localOldPos, collisionShapeData.localNewPos, 
			v0, v1-v0, v2-v0, hasCC, collisionShapeData.restOffset, proxRadius);

		updateCollShapeData(collisionShapeData, hasCC, tmpFlags, tmpCCTime, tmpDistOldToSurface, tmpSurfaceNormal,
			tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos, shape2World);
	}	
}
void testBoundsMesh(	
	const Gu::InternalTriangleMeshData& meshData,
	const PxTransform& world2Shape,
	const PxTransform& s2w,
	const Cm::FastVertex2ShapeScaling& meshScaling,
	bool idtScaleMesh,
	const PxBounds3& worldBounds,
	PxcContactCellMeshCallback& callback)
{
	// Find colliding triangles.
	// Setup an OBB for the fluid particle cell (in local space of shape)
	// assuming uniform scaling in most cases, using the pose as box rotation
	// if scaling is non-uniform, the bounding box is conservative

	PxBounds3 boundsInMesh;
	PX_ASSERT(!worldBounds.isEmpty());
	boundsInMesh = PxBounds3::transformFast(world2Shape, worldBounds);

	Gu::Box vertexSpaceOBB(boundsInMesh.getCenter(), boundsInMesh.getExtents(), PxMat33(PxIdentity));

	if(!idtScaleMesh)
		meshScaling.transformQueryBounds(vertexSpaceOBB.center, vertexSpaceOBB.extents);

	// Set collider flags (has to be done each time again!)
	Gu::RTreeMidphaseData hmd;
	meshData.mCollisionModel.getRTreeMidphaseData(hmd);	
	MPT_SET_CONTEXT("flui", s2w, meshScaling); PX_UNUSED(s2w);
	Gu::MeshRayCollider::collideOBB(vertexSpaceOBB, true, hmd, callback);
}
void collideWithMeshTriangles(ParticleCollData& collisionShapeData, const TriangleMesh& /*meshData*/,
                              const Cm::FastVertex2ShapeScaling& scale, const PxVec3* triangleVerts, PxU32 numTriangles,
                              PxReal proxRadius, const PxTransform& shape2World)
{
	bool hasCC = ((collisionShapeData.localFlags & ParticleCollisionFlags::CC) ||
	              (collisionShapeData.localFlags & ParticleCollisionFlags::L_CC));

	PxVec3 tmpSurfaceNormal(0.0f);
	PxVec3 tmpSurfacePos(0.0f);
	PxVec3 tmpProxSurfaceNormal(0.0f);
	PxVec3 tmpProxSurfacePos(0.0f);
	PxReal tmpCCTime(0.0f);
	PxReal tmpDistOldToSurface(0.0f);

	for(PxU32 i = 0; i < numTriangles; ++i)
	{
		const PxI32 winding = scale.flipsNormal() ? 1 : 0;
		PxVec3 v0 = scale * triangleVerts[i * 3];
		PxVec3 v1 = scale * triangleVerts[i * 3 + 1 + winding];
		PxVec3 v2 = scale * triangleVerts[i * 3 + 2 - winding];

		PxU32 tmpFlags =
		    collideWithMeshTriangle(tmpSurfaceNormal, tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos, tmpCCTime,
		                            tmpDistOldToSurface, collisionShapeData.localOldPos, collisionShapeData.localNewPos,
		                            v0, v1 - v0, v2 - v0, hasCC, collisionShapeData.restOffset, proxRadius);

		updateCollShapeData(collisionShapeData, hasCC, tmpFlags, tmpCCTime, tmpDistOldToSurface, tmpSurfaceNormal,
		                    tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos, shape2World);
	}
}
bool Gu::getConvexData(const Gu::GeometryUnion& shape, Cm::FastVertex2ShapeScaling& scaling, PxBounds3& bounds, PolygonalData& polyData)
{
	const PxConvexMeshGeometryLL& shapeConvex = shape.get<const PxConvexMeshGeometryLL>();

	const bool idtScale = shapeConvex.scale.isIdentity();
	if(!idtScale)
		scaling.init(shapeConvex.scale);

	// PT: this version removes all the FCMPs and almost all LHS. This is temporary until
	// the legacy 3x3 matrix totally vanishes but meanwhile do NOT do useless matrix conversions,
	// it's a perfect recipe for LHS.
	PX_ASSERT(!shapeConvex.hullData->mAABB.isEmpty());
	bounds = PxBounds3::transformFast(scaling.getVertex2ShapeSkew(), shapeConvex.hullData->mAABB);

	getPolygonalData_Convex(&polyData, shapeConvex.hullData, scaling);

	// PT: non-uniform scaling invalidates the "internal objects" optimization, since our internal sphere
	// might become an ellipsoid or something. Just disable the optimization if scaling is used...
	if(!idtScale)
		polyData.mInternal.reset();

	return idtScale;
}
void physx::collideWithStaticMesh(PxU32 numParticles, PxsParticleCollData* collData,
								  PxsFluidParticleOpcodeCache* opcodeCaches, const Gu::GeometryUnion& meshShape,
								  const PxTransform& world2Shape, const PxTransform& shape2World, PxReal /*cellSize*/, PxReal collisionRange, PxReal proxRadius)
{
	PX_ASSERT(collData);
	PX_ASSERT(opcodeCaches);

	const PxTriangleMeshGeometryLL& meshShapeData = meshShape.get<const PxTriangleMeshGeometryLL>();
	
	const bool idtScaleMesh = meshShapeData.scale.isIdentity();
	Cm::FastVertex2ShapeScaling meshScaling;
	if(!idtScaleMesh)
		meshScaling.init(meshShapeData.scale);

	const PxF32 maxCacheBoundsExtent = 4*collisionRange + proxRadius;
	const PxsFluidParticleOpcodeCache::QuantizationParams quantizationParams = 
		PxsFluidParticleOpcodeCache::getQuantizationParams(maxCacheBoundsExtent);

	const Gu::InternalTriangleMeshData* meshData = meshShapeData.meshData;
	PX_ASSERT(meshData);

	bool isSmallMesh = meshData->mNumTriangles <= 0xffff;
	PxU32 cachedTriangleBuffer[PxsFluidParticleOpcodeCache::sMaxCachedTriangles];

	PxVec3 extent(proxRadius);

	for (PxU32 i = 0; i < numParticles; ++i)
	{
		//had to make this non-const to be able to update cache bits
		PxsParticleCollData& particle = collData[i];
		PxsFluidParticleOpcodeCache& cache = opcodeCaches[i];
		
		PxBounds3 bounds;
		{
			bounds = PxBounds3(particle.newPos - extent, particle.newPos + extent); 
			bounds.include(particle.oldPos);
		}
		
		PxU32 numTriangles = 0;
		const PxU32* triangles = NULL;
		bool isCached = cache.read(particle.particleFlags.low, numTriangles, cachedTriangleBuffer, bounds, quantizationParams, &meshShape, isSmallMesh);

		if (isCached)
		{
			triangles = cachedTriangleBuffer;
			if (numTriangles > 0)
			{
				PxVec3 triangleVerts[PxsFluidParticleOpcodeCache::sMaxCachedTriangles*3];
				const PxU32* triangleIndexIt = triangles;
				for (PxU32 j = 0; j < numTriangles; ++j, ++triangleIndexIt)
				{
					Gu::MeshInterface::GetTriangleVerts((PxU32)isSmallMesh, (Cm::MemFetchPtr)meshData->mTriangles, (Cm::MemFetchPtr)meshData->mVertices, *triangleIndexIt,
						triangleVerts[j*3], triangleVerts[j*3 + 1], triangleVerts[j*3 + 2]);
				}	
				
				collData[i].localDcNum = 0.0f;
				collData[i].localSurfaceNormal = PxVec3(0);
				collData[i].localSurfacePos = PxVec3(0);

				collideWithMeshTriangles(collData[i], *meshData, meshScaling, triangleVerts, numTriangles, proxRadius, shape2World);				
			}
		}
		else if ((particle.particleFlags.low & PxvInternalParticleFlag::eGEOM_CACHE_BIT_0) != 0 && (particle.particleFlags.low & PxvInternalParticleFlag::eGEOM_CACHE_BIT_1) != 0)
		{
			// don't update the cache since it's already successfully in use
			PxcContactCellMeshCallback callback(collData, &i, 1, *meshData, meshScaling, proxRadius, NULL, shape2World);
				
			testBoundsMesh(*meshData, world2Shape, shape2World, meshScaling, idtScaleMesh, bounds, callback);		
		}
		else
		{
			// compute new conservative bounds for cache
			PxBounds3 cachedBounds;
			{
				PxVec3 predictedExtent(proxRadius*1.5f);

				//add future newpos + extent
				PxVec3 newPosPredicted = particle.newPos + 3.f*(particle.newPos - particle.oldPos);
				cachedBounds = PxBounds3(newPosPredicted - predictedExtent, newPosPredicted + predictedExtent);
				
				//add next oldpos + extent
				cachedBounds.include(PxBounds3(particle.newPos - predictedExtent, particle.newPos + predictedExtent));
				
				//add old pos
				cachedBounds.include(particle.oldPos);
			}
         
			cache.init(cachedTriangleBuffer);

			//the callback function will call collideWithMeshTriangles()
			PxcContactCellMeshCallback callback(collData, &i, 1, *meshData, meshScaling, proxRadius, &cache, shape2World);

			// opcode query: cache bounds against shape bounds in unscaled mesh space
			testBoundsMesh(*meshData, world2Shape, shape2World, meshScaling, idtScaleMesh, cachedBounds, callback);		
	
			//update cache
			cache.write(particle.particleFlags.low, cachedBounds, quantizationParams, meshShape, isSmallMesh);
		}
	}
}
bool physx::Gu::computeConvex_HeightFieldMTD(const PxHeightFieldGeometry& heightFieldGeom, const PxTransform& pose, const PxConvexMeshGeometry& convexGeom, const PxTransform& convexPose, const PxReal inflation,
						  const PxReal distance, const bool isDoubleSided, const PxU32 flags, PxSweepHit& hit)
{
	using namespace Ps::aos;

	const Gu::HeightFieldUtil hfUtil(heightFieldGeom);

	
	const Vec3V zeroV = V3Zero();
	Gu::MeshPersistentContact manifoldContacts[64]; 
	PxU32 numContacts = 0;
	FloatV distV = FLoad(distance);
	
	
	bool foundInitial = false;
	static PxU32 iterations = 2;

	Gu::ConvexMesh* cm = static_cast<Gu::ConvexMesh*>(convexGeom.convexMesh);

	Gu::ConvexHullData* hullData = &cm->getHull();

	const bool idtScaleConvex = convexGeom.scale.isIdentity();
	
	 Cm::FastVertex2ShapeScaling convexScaling;
	if(!idtScaleConvex)
			convexScaling.init(convexGeom.scale);

	const PxVec3 _shapeSpaceCenterOfMass = convexScaling * hullData->mCenterOfMass;
	const Vec3V shapeSpaceCenterOfMass = V3LoadU(_shapeSpaceCenterOfMass);

	const QuatV q0 = QuatVLoadU(&convexPose.q.x);
	const Vec3V p0 = V3LoadU(&convexPose.p.x);
	PsTransformV convexTransformV(p0, q0);

	const Vec3V vScale = V3LoadU(convexGeom.scale.scale);
	const QuatV vQuat = QuatVLoadU(&convexGeom.scale.rotation.x);
	Gu::ConvexHullV convexHull(hullData, zeroV, vScale, vQuat);
	PX_ALIGN(16, PxU8 convexBuff[sizeof(SupportLocalImpl<ConvexHullV>)]);

	const FloatV convexMargin = Gu::CalculatePCMConvexMargin(hullData, vScale);
	const FloatV inflationV = FAdd(FLoad(inflation), convexMargin);
	PxReal boundInflation;
	FStore(inflationV, &boundInflation);


	LocalContainer(tempContainer, 128);


	Vec3V closestA = zeroV, closestB = zeroV, normal = zeroV;
	Vec3V worldNormal=zeroV, worldContactA=zeroV;//, worldContactB=zeroV;
	PxU32 triangleIndex = 0xfffffff;

	Vec3V translation = zeroV;

	Gu::PolygonalData polyData;
	getPCMConvexData(convexHull, idtScaleConvex, polyData);

	FloatV mtd;
	Vec3V center = p0;
	PxTransform tempConvexPose = convexPose;
	Cm::Matrix34 meshToWorldSkew(pose);
	
	
	for(PxU32 i=0; i<iterations; ++i)
	{

		tempContainer.Reset();

		//ML:: construct convex hull data
	
		V3StoreU(center, tempConvexPose.p);
		convexTransformV.p = center;

		SupportLocal* convexMap = (idtScaleConvex ? (SupportLocal*)PX_PLACEMENT_NEW(convexBuff, SupportLocalImpl<ConvexHullNoScaleV>)((ConvexHullNoScaleV&)convexHull, convexTransformV, convexHull.vertex2Shape, convexHull.shape2Vertex, idtScaleConvex) : 
		(SupportLocal*)PX_PLACEMENT_NEW(convexBuff, SupportLocalImpl<ConvexHullV>)(convexHull, convexTransformV, convexHull.vertex2Shape, convexHull.shape2Vertex, idtScaleConvex));

		convexMap->setShapeSpaceCenterofMass(shapeSpaceCenterOfMass);
		
		Gu::Box hullOBB;
		Gu::computeOBBAroundConvex(hullOBB, convexGeom, cm, tempConvexPose);
		hullOBB.extents.x += boundInflation;
		hullOBB.extents.y += boundInflation;
		hullOBB.extents.z += boundInflation;

		PxBounds3 bounds = PxBounds3::basisExtent(hullOBB.center, hullOBB.rot, hullOBB.extents);

		midPhaseQueryHF(hfUtil, pose, bounds, tempContainer, flags);

		// Get results
		PxU32 nbTriangles = tempContainer.GetNbEntries();

		if(!nbTriangles)
			break;
	
		// Move to AABB space
		Cm::Matrix34 worldToConvex(tempConvexPose.getInverse());
		const Cm::Matrix34 meshToConvex = worldToConvex*meshToWorldSkew;

		Ps::aos::Mat33V rot(V3LoadU(meshToConvex.base0), V3LoadU(meshToConvex.base1), V3LoadU(meshToConvex.base2));
		Ps::aos::PsMatTransformV meshToConvexV(V3LoadU(meshToConvex.base3), rot);


		PxU32* indices = tempContainer.GetEntries();

		bool hadContacts = false;

		PxU32 nbBatches = (nbTriangles + BATCH_TRIANGLE_NUMBER-1)/BATCH_TRIANGLE_NUMBER;
		mtd = FMax();
		MTDTriangle triangles[BATCH_TRIANGLE_NUMBER];
		for(PxU32 a = 0; a < nbBatches; ++a)
		{
			PxU32 startIndex = a * BATCH_TRIANGLE_NUMBER;   
			PxU32 nbTrigs = PxMin(nbTriangles - startIndex, BATCH_TRIANGLE_NUMBER);
			for(PxU32 k=0; k<nbTrigs; k++)
			{
				//triangle vertext space
				const PxU32 triangleIndex1 = indices[startIndex+k];
				hfUtil.getTriangle(pose, triangles[k], NULL, NULL, triangleIndex1, false, false);
				triangles[k].extraTriData = 0x38;
			}

			//ML: mtd has back face culling, so if the capsule's center is below the triangle, we won't generate any contacts
			hadContacts = calculateMTD(polyData, convexMap, convexTransformV, meshToConvexV, isDoubleSided, inflationV,  triangles, nbTrigs, startIndex, manifoldContacts, numContacts, normal, closestA, closestB, triangleIndex, mtd) || hadContacts;
		}
	
		if(!hadContacts)
			break;

		triangleIndex = indices[triangleIndex];

		foundInitial = true;

		distV =mtd;
		worldNormal = convexTransformV.rotate(normal);
		worldContactA = convexTransformV.transform(closestA);

		if(FAllGrtrOrEq(FZero(), distV))
		{
			Vec3V t = V3Scale(worldNormal, mtd);
			translation = V3Sub(translation, t);
			center = V3Sub(center, t);
		}
		else
		{
			if(i == 0)
			{
				//First iteration so keep this normal
				hit.distance = 0.f;
				V3StoreU(worldContactA, hit.position);
				V3StoreU(worldNormal, hit.normal);
				hit.faceIndex = triangleIndex;
				return true;
			}
			break;
		}
	}

	worldNormal = V3Normalize(translation);
	distV = FNeg(V3Length(translation));
	if(foundInitial)
	{
		//transform closestA to world space
		FStore(distV, &hit.distance);
		V3StoreU(worldContactA, hit.position);
		V3StoreU(worldNormal, hit.normal);
		hit.faceIndex = triangleIndex;
	}

	return foundInitial;
}