Beispiel #1
0
void GatherCollisionObjectAndShapeData (RaycastGatheredObjectData* gatheredObjectData, RaycastTask_LocalStoreMemory* lsMemPtr, ppu_address_t objectWrapper)
{
	register int dmaSize;
	register ppu_address_t	dmaPpuAddress2;
	/* DMA Collision object wrapper into local store */
	dmaSize = sizeof(SpuCollisionObjectWrapper);
	dmaPpuAddress2 = objectWrapper;
	cellDmaGet(&lsMemPtr->gCollisionObjectWrapper, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));

	/* DMA Collision object into local store */
	dmaSize = sizeof(btCollisionObject);
	dmaPpuAddress2 = lsMemPtr->getCollisionObjectWrapper()->getCollisionObjectPtr();
	cellDmaGet(&lsMemPtr->gColObj, dmaPpuAddress2  , dmaSize, DMA_TAG(2), 0, 0);
	cellDmaWaitTagStatusAll(DMA_MASK(2));
	
	/* Gather information about collision object and shape */
	gatheredObjectData->m_worldTransform = lsMemPtr->getColObj()->getWorldTransform();
	gatheredObjectData->m_collisionMargin = lsMemPtr->getCollisionObjectWrapper()->getCollisionMargin ();
	gatheredObjectData->m_shapeType = lsMemPtr->getCollisionObjectWrapper()->getShapeType ();
	gatheredObjectData->m_collisionShape = (ppu_address_t)lsMemPtr->getColObj()->getCollisionShape();
	gatheredObjectData->m_spuCollisionShape = (void*)&lsMemPtr->gCollisionShape.collisionShape;

	/* DMA shape data */
	dmaCollisionShape (gatheredObjectData->m_spuCollisionShape, gatheredObjectData->m_collisionShape, 1, gatheredObjectData->m_shapeType);
	cellDmaWaitTagStatusAll(DMA_MASK(1));
	if (btBroadphaseProxy::isConvex (gatheredObjectData->m_shapeType))
	{
		btConvexInternalShape* spuConvexShape = (btConvexInternalShape*)gatheredObjectData->m_spuCollisionShape;
		gatheredObjectData->m_primitiveDimensions = spuConvexShape->getImplicitShapeDimensions ();
	} else {
		gatheredObjectData->m_primitiveDimensions = btVector3(1.0, 1.0, 1.0);
	}

}
void processDecodeSet(unsigned int uiPtr)
{
	SpursSpeexTaskOutput spuOutput;
	cellDmaGet(&gviSpursSpeexTaskDesc, uiPtr, sizeof(SpursSpeexTaskDesc), DMA_TAG(1), 0, 0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));

	//spuDebugPrintf("[Speex][SPU] CMD_SAMPLE_TASK_DECODESET_COMMAND\n");

	if (gviSpursSpeexTaskDesc.mDebugPause)
	{
		snPause();
	}
	cellDmaLargeGet(gviSpursSpeexStateBuffer, (uint64_t)gviSpursSpeexTaskDesc.mSpeexStateBuffer, SPEEX_DECODER_STATE_BUFFER_SIZE, DMA_TAG(1), 0,0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));
	
	gviSpursSpeexDecodeSet(&spuOutput);

	if (spuOutput.mSpeexReturnCode < 0)
	{
		spuDebugPrintf("SPU: failed to encode, ret = %d\n", spuOutput.mSpeexReturnCode);
	}

	cellDmaPut(&spuOutput, (uint64_t)gviSpursSpeexTaskDesc.mSpeexTaskOutput, sizeof(SpursSpeexTaskOutput), DMA_TAG(1),
		0, 0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));

	cellDmaLargePut(gviSpursSpeexStateBuffer, (uint64_t)gviSpursSpeexTaskDesc.mSpeexStateBuffer, SPEEX_DECODER_STATE_BUFFER_SIZE, DMA_TAG(1), 0,0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));

	//spuDebugPrintf("[Speex][SPU] buffer dma done\n");
}
void processDecodeInit(unsigned int uiPtr)
{
	SpursSpeexTaskOutput spuOutput;
	cellDmaGet(&gviSpursSpeexTaskDesc, uiPtr, sizeof(SpursSpeexTaskDesc), DMA_TAG(1), 0, 0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));

	//spuDebugPrintf("[Speex][SPU] CMD_SAMPLE_TASK_DECODE_INIT_COMMAND\n");
	
	if (gviSpursSpeexTaskDesc.mDebugPause)
	{
		snPause();
	}

	gviSpursSpeexDecoderInitialize(&spuOutput);

	if (spuOutput.mSpeexReturnCode < 0)
	{
		spuDebugPrintf("[Speex][SPU] failed to initialize decoder, ret = %d\n", spuOutput.mSpeexReturnCode);
	}

	cellDmaPut(&spuOutput,	(uint64_t)gviSpursSpeexTaskDesc.mSpeexTaskOutput, sizeof(SpursSpeexTaskOutput), DMA_TAG(1),
		0, 0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));

	cellDmaLargePut(gviSpursSpeexStateBuffer, (uint64_t)gviSpursSpeexTaskDesc.mSpeexStateBuffer, 
		gviSpursSpeexTaskDesc.mSpeexStateBufferSize, DMA_TAG(1), 0,0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));

	//spuDebugPrintf("[Speex][SPU] buffer dma done\n");
}
void procesEncodeInit(unsigned int uiPtr)
{
	SpursSpeexTaskOutput spuOutput;

	//spuDebugPrintf("[Speex][SPU] CMD_SAMPLE_TASK_ENCODE_INIT_COMMAND\n");
	cellDmaGet(&gviSpursSpeexTaskDesc, uiPtr, sizeof(SpursSpeexTaskDesc), DMA_TAG(1), 0, 0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));
	
	if (gviSpursSpeexTaskDesc.mDebugPause)
	{
		snPause();
	}

	gviSpursSpeexEncoderInitialize(&spuOutput);
	if (spuOutput.mSpeexReturnCode < 0)
	{
		spuDebugPrintf("[Speex][SPU] failed to initialize encoder, ret = %d\n", spuOutput.mSpeexReturnCode);
	}

	//spuDebugPrintf("[Speex][SPU] done with initializing things for speex, now returning data via DMA put\n");

	//printGlobalTaskDescData();

	cellDmaPut(&spuOutput,	(uint64_t)gviSpursSpeexTaskDesc.mSpeexTaskOutput, sizeof(SpursSpeexTaskOutput), DMA_TAG(1),
		0, 0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));

	//spuDebugPrintf("[Speex][SPU] task dma done\n");

	cellDmaLargePut(gviSpursSpeexStateBuffer, (uint64_t)gviSpursSpeexTaskDesc.mSpeexStateBuffer, SPEEX_ENCODER_STATE_BUFFER_SIZE, DMA_TAG(1), 0,0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));

	//spuDebugPrintf("[Speex][SPU] buffer dma done\n");
}
void gviSpursSpeexEncode(SpursSpeexTaskOutput *spuTaskOut)
{	
	short *inBuffer;
	float *speexBuffer;
	char *outBuffer;
	unsigned int i;
	spuTaskOut->mSpeexEncodedFrameSize = 0;
	spuTaskOut->mSpeexInitialized = 1;
	spuTaskOut->mSpeexSamplesPerFrame = 0;
	spuTaskOut->mSpeexReturnCode = 0;
	spuTaskOut->mSpeexOutBufferSize = 0;

	speexBuffer = (float *)memalign(16, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(float));
	inBuffer = (short *)memalign(16, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(short));
	outBuffer = (char *)memalign(16, gviSpursSpeexTaskDesc.mOutputBufferSize);
	
	memset(speexBuffer, 0, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(float));
	memset(inBuffer, 0, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(short));
	memset(outBuffer, 0, gviSpursSpeexTaskDesc.mOutputBufferSize);

	cellDmaGet(inBuffer, (uint64_t)gviSpursSpeexTaskDesc.mInputBuffer, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(short), DMA_TAG(1), 0,0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));
		
	// convert the input to floats for encoding
	for(i = 0 ; i < gviSpursSpeexTaskDesc.mInputBufferSize ; i++)
		speexBuffer[i] = inBuffer[i];

	// (re)initialize the bits struct
	speex_bits_init_buffer(&gviSpursSpeexBits,gviSpursSpeexBitsBuffer,sizeof(gviSpursSpeexBitsBuffer));

	// flush the bits
	speex_bits_reset(&gviSpursSpeexBits);

	// encode the frame
	speex_encode(gviSpursSpeexStateBuffer, speexBuffer, &gviSpursSpeexBits);
	// write the bits to the output
	spuTaskOut->mSpeexOutBufferSize = speex_bits_write(&gviSpursSpeexBits, (char *)outBuffer, gviSpursSpeexTaskDesc.mEncodedFrameSize);
	//spuDebugPrintf("[Speex][SPU] transferring data back, output size should be: %d\n", gviSpursSpeexTaskDesc.mOutputBufferSize>16?gviSpursSpeexTaskDesc.mOutputBufferSize:16);
	cellDmaPut(outBuffer, (uint64_t)gviSpursSpeexTaskDesc.mOutputBuffer, gviSpursSpeexTaskDesc.mOutputBufferSize, DMA_TAG(1), 0, 0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));
	//spuDebugPrintf("[Speex][SPU] done transferring data back\n");
	free(speexBuffer);
	free(inBuffer);
	free(outBuffer);
	spuTaskOut->mSpeexReturnCode = 0;
}
void gviSpursSpeexDecodeAdd(SpursSpeexTaskOutput *spuTaskOut)
{
	char *inBuffer;
	float *speexBuffer;
	short *outBuffer;
	int rcode;
	unsigned int i;
	
	//spuDebugPrintf("[Speex][SPU] allocating buffers for decoding\n");
	speexBuffer = (float *)memalign(16, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(float));
	outBuffer = (short *)memalign(16, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(short));
	inBuffer = (char *)memalign(16, gviSpursSpeexTaskDesc.mInputBufferSize);

	memset(speexBuffer, 0, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(float));
	memset(outBuffer, 0, gviSpursSpeexTaskDesc.mOutputBufferSize);
	memset(inBuffer, 0, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(short));
	
	
	//spuDebugPrintf("[Speex][SPU] done allocating, getting input data, inbuffer size: %d\n", gSpuSampleTaskDesc.mInputBufferSize);
	cellDmaGet(inBuffer, (uint64_t)gviSpursSpeexTaskDesc.mInputBuffer, gviSpursSpeexTaskDesc.mInputBufferSize, DMA_TAG(1), 0,0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));
	// spuDebugPrintf("[Speex][SPU] done getting input data, preparing for speex to decode\n");
	// read the data into the bits
	// (re)initialize the bits struct
	speex_bits_init_buffer(&gviSpursSpeexBits,gviSpursSpeexBitsBuffer,sizeof(gviSpursSpeexBitsBuffer));

	speex_bits_read_from(&gviSpursSpeexBits, (char *)inBuffer, gviSpursSpeexTaskDesc.mEncodedFrameSize);

	// decode it
	rcode = speex_decode((void *)gviSpursSpeexStateBuffer, &gviSpursSpeexBits, speexBuffer);
	assert(rcode == 0);
	//spuDebugPrintf("[Speex][SPU] done with speex decode\n");
	// convert the output from floats
	for(i = 0 ; i < gviSpursSpeexTaskDesc.mOutputBufferSize ; i++)
		outBuffer[i] = (short)speexBuffer[i];
	
	//spuDebugPrintf("[Speex][SPU] transferring data back\n");
	cellDmaPut(outBuffer, (uint64_t)gviSpursSpeexTaskDesc.mOutputBuffer, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(short), DMA_TAG(1), 0, 0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));
	//spuDebugPrintf("[Speex][SPU] done transferring data back\n");
	free(speexBuffer);
	free(inBuffer);
	free(outBuffer);
	spuTaskOut->mSpeexReturnCode = 0;
}
SIMD_FORCE_INLINE void small_cache_read_triple(	void* ls0, ppu_address_t ea0,
												void* ls1, ppu_address_t ea1,
												void* ls2, ppu_address_t ea2,
												size_t size)
{
		btAssert(size<16);
		ATTRIBUTE_ALIGNED16(char	tmpBuffer0[32]);
		ATTRIBUTE_ALIGNED16(char	tmpBuffer1[32]);
		ATTRIBUTE_ALIGNED16(char	tmpBuffer2[32]);

		uint32_t i;
		

		///make sure last 4 bits are the same, for cellDmaSmallGet
		char* localStore0 = (char*)ls0;
		uint32_t last4BitsOffset = ea0 & 0x0f;
		char* tmpTarget0 = tmpBuffer0 + last4BitsOffset;
#ifdef __SPU__
		cellDmaSmallGet(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
#else
		tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
#endif


		char* localStore1 = (char*)ls1;
		last4BitsOffset = ea1 & 0x0f;
		char* tmpTarget1 = tmpBuffer1 + last4BitsOffset;
#ifdef __SPU__
		cellDmaSmallGet(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
#else
		tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
#endif
		
		char* localStore2 = (char*)ls2;
		last4BitsOffset = ea2 & 0x0f;
		char* tmpTarget2 = tmpBuffer2 + last4BitsOffset;
#ifdef __SPU__
		cellDmaSmallGet(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
#else
		tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
#endif
		
		
		cellDmaWaitTagStatusAll( DMA_MASK(1) );

		//this is slowish, perhaps memcpy on SPU is smarter?
		for (i=0; btLikely( i<size );i++)
		{
			localStore0[i] = tmpTarget0[i];
			localStore1[i] = tmpTarget1[i];
			localStore2[i] = tmpTarget2[i];
		}

		
}
Beispiel #8
0
void
performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr)
{
	SpuVoronoiSimplexSolver simplexSolver;

	btTransform rayFromTrans, rayToTrans;
	rayFromTrans.setIdentity ();
	rayFromTrans.setOrigin (workUnit.rayFrom);
	rayToTrans.setIdentity ();
	rayToTrans.setOrigin (workUnit.rayTo);

	SpuCastResult result;

	/* Load the vertex data if the shape is a convex hull */
	/* XXX: We might be loading the shape twice */
	ATTRIBUTE_ALIGNED16(char convexHullShape[sizeof(btConvexHullShape)]);
	if (gatheredObjectData->m_shapeType == CONVEX_HULL_SHAPE_PROXYTYPE)
	{
		register int dmaSize;
		register ppu_address_t	dmaPpuAddress2;
		dmaSize = sizeof(btConvexHullShape);
		dmaPpuAddress2 = gatheredObjectData->m_collisionShape;
		cellDmaGet(&convexHullShape, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0);
		cellDmaWaitTagStatusAll(DMA_MASK(1));
		dmaConvexVertexData (&lsMemPtr->convexVertexData, (btConvexHullShape*)&convexHullShape);
		cellDmaWaitTagStatusAll(DMA_MASK(2)); // dmaConvexVertexData uses dma channel 2!
		lsMemPtr->convexVertexData.gSpuConvexShapePtr = gatheredObjectData->m_spuCollisionShape;
		lsMemPtr->convexVertexData.gConvexPoints = &lsMemPtr->convexVertexData.g_convexPointBuffer[0];
	}

	/* performRaycast */
	SpuSubsimplexRayCast caster (gatheredObjectData->m_spuCollisionShape, &lsMemPtr->convexVertexData, gatheredObjectData->m_shapeType, gatheredObjectData->m_collisionMargin, &simplexSolver);
	bool r = caster.calcTimeOfImpact (rayFromTrans, rayToTrans, gatheredObjectData->m_worldTransform, gatheredObjectData->m_worldTransform,result);

	if (r)
	{
		workUnitOut->hitFraction = result.m_fraction;
		workUnitOut->hitNormal = result.m_normal;
	}
}
void gviSpursSpeexDecodeSet(SpursSpeexTaskOutput *spuTaskOut)
{
	char *inBuffer;
	float *speexBuffer;
	short *outBuffer;
	int rcode;
	unsigned int i;

	speexBuffer = (float *)memalign(16, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(float));
	outBuffer = (short *)memalign(16, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(short));
	inBuffer = (char *)memalign(16, gviSpursSpeexTaskDesc.mInputBufferSize);

	memset(speexBuffer, 0, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(float));
	memset(inBuffer, 0, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(short));
	memset(outBuffer, 0, gviSpursSpeexTaskDesc.mInputBufferSize);

	cellDmaGet(inBuffer, (uint64_t)gviSpursSpeexTaskDesc.mInputBuffer, gviSpursSpeexTaskDesc.mInputBufferSize, DMA_TAG(1), 0,0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));

	// read the data into the bits
	speex_bits_read_from(&gviSpursSpeexBits, (char *)inBuffer, gviSpursSpeexTaskDesc.mEncodedFrameSize);

	// decode it
	rcode = speex_decode((void *)gviSpursSpeexStateBuffer, &gviSpursSpeexBits, speexBuffer);
	assert(rcode == 0);

	// convert the output from floats
	for(i = 0 ; i < gviSpursSpeexTaskDesc.mOutputBufferSize ; i++)
		// Expanded to remove warnings in VS2K5
		outBuffer[i] = (short)speexBuffer[i];

	cellDmaPut(outBuffer, (uint64_t)gviSpursSpeexTaskDesc.mOutputBuffer, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(short), DMA_TAG(1), 0, 0);
	cellDmaWaitTagStatusAll(DMA_MASK(1));
	free(speexBuffer);
	free(inBuffer);
	free(outBuffer);
	spuTaskOut->mSpeexReturnCode = 0;
}
Beispiel #10
0
///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes)
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
{
	
	btAssert(size<32);
	
	ATTRIBUTE_ALIGNED16(char	tmpBuffer[32]);

	char* mainMem = (char*)ea;
	char* localStore = (char*)ls;
	uint32_t i;
	

	///make sure last 4 bits are the same, for cellDmaSmallGet
	uint32_t last4BitsOffset = ea & 0x0f;
	char* tmpTarget = tmpBuffer + last4BitsOffset;
	
#if defined (__SPU__) || defined (USE_LIBSPE2)
	
	int remainingSize = size;

//#define FORCE_cellDmaUnalignedGet 1
#ifdef FORCE_cellDmaUnalignedGet
	cellDmaUnalignedGet(tmpTarget,ea,size,DMA_TAG(1),0,0);
#else
	char* remainingTmpTarget = tmpTarget;
	uint64_t remainingEa = ea;

	while (remainingSize)
	{
		switch (remainingSize)
		{
		case 1:
		case 2:
		case 4:
		case 8:
		case 16:
			{
				mfc_get(remainingTmpTarget,remainingEa,remainingSize,DMA_TAG(1),0,0);
				remainingSize=0;
				break;
			}
		default:
			{
				//spu_printf("unaligned DMA with non-natural size:%d\n",remainingSize);
				int actualSize = 0;

				if (remainingSize > 16)
					actualSize = 16;
				else
					if (remainingSize >8)
						actualSize=8;
					else
						if (remainingSize >4)
							actualSize=4;
						else
							if (remainingSize >2)
								actualSize=2;
				mfc_get(remainingTmpTarget,remainingEa,actualSize,DMA_TAG(1),0,0);
				remainingSize-=actualSize;
				remainingTmpTarget+=actualSize;
				remainingEa += actualSize;
			}
		}
	}
#endif//FORCE_cellDmaUnalignedGet

#else
	//copy into final destination
#ifdef USE_MEMCPY
		memcpy(tmpTarget,mainMem,size);
#else
		for ( i=0;i<size;i++)
		{
			tmpTarget[i] = mainMem[i];
		}
#endif //USE_MEMCPY

#endif

	cellDmaWaitTagStatusAll(DMA_MASK(1));

	//this is slowish, perhaps memcpy on SPU is smarter?
	for (i=0; btLikely( i<size );i++)
	{
		localStore[i] = tmpTarget[i];
	}

	return 0;
}
Beispiel #11
0
void	processRaycastTask(void* userPtr, void* lsMemory)
{
	RaycastTask_LocalStoreMemory* localMemory = (RaycastTask_LocalStoreMemory*)lsMemory;

	SpuRaycastTaskDesc* taskDescPtr = (SpuRaycastTaskDesc*)userPtr;
	SpuRaycastTaskDesc& taskDesc = *taskDescPtr;

	SpuCollisionObjectWrapper* cows = (SpuCollisionObjectWrapper*)taskDesc.spuCollisionObjectsWrappers;

	//spu_printf("in processRaycastTask %d\n", taskDesc.numSpuCollisionObjectWrappers);
	/* for each object */
	RaycastGatheredObjectData gatheredObjectData;
	for (int objectId = 0; objectId < taskDesc.numSpuCollisionObjectWrappers; objectId++)
	{
		//spu_printf("%d / %d\n", objectId, taskDesc.numSpuCollisionObjectWrappers);
		
		/* load initial collision shape */
		GatherCollisionObjectAndShapeData (&gatheredObjectData, localMemory, (ppu_address_t)&cows[objectId]);

		if (btBroadphaseProxy::isConcave (gatheredObjectData.m_shapeType))
		{
			SpuRaycastTaskWorkUnitOut tWorkUnitsOut[SPU_RAYCAST_WORK_UNITS_PER_TASK];
			for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
			{
				tWorkUnitsOut[rayId].hitFraction = 1.0;
			}

			performRaycastAgainstConcave (&gatheredObjectData, &taskDesc.workUnits[0], &tWorkUnitsOut[0], taskDesc.numWorkUnits, localMemory);

			for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
			{
				const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
				if (tWorkUnitsOut[rayId].hitFraction == 1.0)
					continue;

				ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
				dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
				cellDmaWaitTagStatusAll(DMA_MASK(1));
				
				
				/* XXX Only support taking the closest hit for now */
				if (tWorkUnitsOut[rayId].hitFraction < workUnitOut.hitFraction)
				{
					workUnitOut.hitFraction = tWorkUnitsOut[rayId].hitFraction;
					workUnitOut.hitNormal = tWorkUnitsOut[rayId].hitNormal;
				}

				/* write ray cast data back */
				dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
				cellDmaWaitTagStatusAll(DMA_MASK(1));
			}
		} else if (btBroadphaseProxy::isConvex (gatheredObjectData.m_shapeType)) {

			btVector3 objectBoxMin, objectBoxMax;
			computeAabb (objectBoxMin, objectBoxMax, (btConvexInternalShape*)gatheredObjectData.m_spuCollisionShape, gatheredObjectData.m_collisionShape, gatheredObjectData.m_shapeType, gatheredObjectData.m_worldTransform);
			for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
			{
				const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
			
				btScalar ignored_param = 1.0;
				btVector3 ignored_normal;
				if (btRayAabb(workUnit.rayFrom, workUnit.rayTo, objectBoxMin, objectBoxMax, ignored_param, ignored_normal))
				{
					ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
					SpuRaycastTaskWorkUnitOut tWorkUnitOut;
					tWorkUnitOut.hitFraction = 1.0;

					performRaycastAgainstConvex (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory);
					if (tWorkUnitOut.hitFraction == 1.0)
						continue;
	
					dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
					cellDmaWaitTagStatusAll(DMA_MASK(1));

					/* XXX Only support taking the closest hit for now */
					if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction)
					{
						workUnitOut.hitFraction = tWorkUnitOut.hitFraction;
						workUnitOut.hitNormal = tWorkUnitOut.hitNormal;
						/* write ray cast data back */
						dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
						cellDmaWaitTagStatusAll(DMA_MASK(1));
					}
				}
			}

		} else if (btBroadphaseProxy::isCompound (gatheredObjectData.m_shapeType)) {
			for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
			{
				const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
				ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
				SpuRaycastTaskWorkUnitOut tWorkUnitOut;
				tWorkUnitOut.hitFraction = 1.0;

				performRaycastAgainstCompound (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory);
				if (tWorkUnitOut.hitFraction == 1.0)
					continue;

				dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
				cellDmaWaitTagStatusAll(DMA_MASK(1));
				/* XXX Only support taking the closest hit for now */
				if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction)
				{
					workUnitOut.hitFraction = tWorkUnitOut.hitFraction;
					workUnitOut.hitNormal = tWorkUnitOut.hitNormal;
				}

				/* write ray cast data back */
				dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
				cellDmaWaitTagStatusAll(DMA_MASK(1));
			}
		}
	}
}
Beispiel #12
0
void performRaycastAgainstConcave (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr)
{
	//order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite
	register int dmaSize;
	register ppu_address_t	dmaPpuAddress2;

	
	btBvhTriangleMeshShape*	trimeshShape = (btBvhTriangleMeshShape*)gatheredObjectData->m_spuCollisionShape;

	//need the mesh interface, for access to triangle vertices
	dmaBvhShapeData (&(lsMemPtr->bvhShapeData), trimeshShape);

	unsigned short int quantizedQueryAabbMin[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
	unsigned short int quantizedQueryAabbMax[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
	btVector3 rayFromInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK];
	btVector3 rayToInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK];

	/* Calculate the AABB for the ray in the triangle mesh shape */
	btTransform rayInTriangleSpace;
	rayInTriangleSpace = gatheredObjectData->m_worldTransform.inverse();

	for (int i = 0; i < numWorkUnits; i++)
	{
		btVector3 aabbMin;
		btVector3 aabbMax;

		rayFromInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayFrom);
		rayToInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayTo);

		aabbMin = rayFromInTriangleSpace[i];
		aabbMin.setMin (rayToInTriangleSpace[i]);
		aabbMax = rayFromInTriangleSpace[i];
		aabbMax.setMax (rayToInTriangleSpace[i]);

		lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin[i],aabbMin,0);
		lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax[i],aabbMax,1);
	}

	QuantizedNodeArray&	nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray();
	//spu_printf("SPU: numNodes = %d\n",nodeArray.size());

	BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray();	

#ifdef CALLBACK_ALL
	spuRaycastNodeCallback nodeCallback (gatheredObjectData, workUnits, workUnitsOut, numWorkUnits, lsMemPtr);
#else
	spuRaycastNodeCallback1 nodeCallback (gatheredObjectData, workUnits, workUnitsOut, lsMemPtr);
#endif
	
	IndexedMeshArray&	indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray();

	//spu_printf("SPU:indexArray.size() = %d\n",indexArray.size());
	//	spu_printf("SPU: numSubTrees = %d\n",subTrees.size());
	//not likely to happen
	if (subTrees.size() && indexArray.size() == 1)
	{
		///DMA in the index info
		dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */);
		cellDmaWaitTagStatusAll(DMA_MASK(1));
		
		//display the headers
		int numBatch = subTrees.size();
		for (int i=0;i<numBatch;)
		{
// BEN: TODO - can reorder DMA transfers for less stall
			int remaining = subTrees.size() - i;
			int nextBatch = remaining < MAX_SPU_SUBTREE_HEADERS ? remaining : MAX_SPU_SUBTREE_HEADERS;
			
			dmaBvhSubTreeHeaders (&lsMemPtr->bvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1);
			cellDmaWaitTagStatusAll(DMA_MASK(1));
			

			//			spu_printf("nextBatch = %d\n",nextBatch);

			
			for (int j=0;j<nextBatch;j++)
			{
				const btBvhSubtreeInfo& subtree = lsMemPtr->bvhShapeData.gSubtreeHeaders[j];
				
				unsigned int overlap = 1;
				for (int boxId = 0; boxId < numWorkUnits; boxId++)
				{
					overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin[boxId],quantizedQueryAabbMax[boxId],subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
					if (overlap)
						break;
				}

				if (overlap)
				{
					btAssert(subtree.m_subtreeSize);

					//dma the actual nodes of this subtree
					dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2);

					cellDmaWaitTagStatusAll(DMA_MASK(2));

					/* Walk this subtree */
					
					{

						spuWalkStacklessQuantizedTreeAgainstRays(lsMemPtr,
										        &nodeCallback,
										        &rayFromInTriangleSpace[0],
											&rayToInTriangleSpace[0],
											numWorkUnits,
											&quantizedQueryAabbMin[0][0],&quantizedQueryAabbMax[0][0],
											&lsMemPtr->bvhShapeData.gSubtreeNodes[0], 0, subtree.m_subtreeSize);
					}
				}
				//				spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize);
			}

			//	unsigned short int	m_quantizedAabbMin[3];
			//	unsigned short int	m_quantizedAabbMax[3];
			//	int			m_rootNodeIndex;
			//	int			m_subtreeSize;
			i+=nextBatch;
		}

		//pre-fetch first tree, then loop and double buffer
	}
	
}
Beispiel #13
0
//-- MAIN METHOD
void processSampleTask(void* userPtr, void* lsMemory)
{
	//	BT_PROFILE("processSampleTask");

	SampleTask_LocalStoreMemory* localMemory = (SampleTask_LocalStoreMemory*)lsMemory;

	SpuSampleTaskDesc* taskDescPtr = (SpuSampleTaskDesc*)userPtr;
	SpuSampleTaskDesc& taskDesc = *taskDescPtr;

	switch (taskDesc.m_sampleCommand)
	{
	case CMD_SAMPLE_INTEGRATE_BODIES:
		{
			btTransform predictedTrans;
			btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr;

			int batchSize = taskDesc.m_sampleValue;
			if (batchSize>MAX_NUM_BODIES)
			{
				spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n");
				break;
			}
			int dmaArraySize = batchSize*sizeof(void*);

			uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr);

			//			spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize);

			if (dmaArraySize>=16)
			{
				cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize, DMA_TAG(1), 0, 0);	
				cellDmaWaitTagStatusAll(DMA_MASK(1));
			} else
			{
				stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize);
			}


			for ( int i=0;i<batchSize;i++)
			{
				///DMA rigid body

				void* localPtr = &localMemory->gLocalRigidBody[0];
				void* shortAdd = localMemory->gPointerArray[i];
				uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd);

				//	spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr);

				int dmaBodySize = sizeof(btRigidBody);

				cellDmaGet((void*)localPtr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);	
				cellDmaWaitTagStatusAll(DMA_MASK(1));


				float timeStep = 1.f/60.f;

				btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj);
				if (body)
				{
					if (body->isActive() && (!body->isStaticOrKinematicObject()))
					{
						body->predictIntegratedTransform(timeStep, predictedTrans);
						body->proceedToTransform( predictedTrans);
						void* ptr = (void*)localPtr;
						//	spu_printf("cellDmaLargePut from %llx to LS %llx\n",ptr,ppuRigidBodyAddress);

						cellDmaLargePut(ptr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);
						cellDmaWaitTagStatusAll(DMA_MASK(1));

					}
				}

			}
			break;
		}


	case CMD_SAMPLE_PREDICT_MOTION_BODIES:
		{
			btTransform predictedTrans;
			btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr;

			int batchSize = taskDesc.m_sampleValue;
			int dmaArraySize = batchSize*sizeof(void*);

			if (batchSize>MAX_NUM_BODIES)
			{
				spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n");
				break;
			}

			uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr);

			//			spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize);

			if (dmaArraySize>=16)
			{
				cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize, DMA_TAG(1), 0, 0);	
				cellDmaWaitTagStatusAll(DMA_MASK(1));
			} else
			{
				stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize);
			}


			for ( int i=0;i<batchSize;i++)
			{
				///DMA rigid body

				void* localPtr = &localMemory->gLocalRigidBody[0];
				void* shortAdd = localMemory->gPointerArray[i];
				uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd);

				//	spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr);

				int dmaBodySize = sizeof(btRigidBody);

				cellDmaGet((void*)localPtr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);	
				cellDmaWaitTagStatusAll(DMA_MASK(1));


				float timeStep = 1.f/60.f;

				btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj);
				if (body)
				{
					if (!body->isStaticOrKinematicObject())
					{
						if (body->isActive())
						{
							body->integrateVelocities( timeStep);
							//damping
							body->applyDamping(timeStep);

							body->predictIntegratedTransform(timeStep,body->getInterpolationWorldTransform());

							void* ptr = (void*)localPtr;
							cellDmaLargePut(ptr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);
							cellDmaWaitTagStatusAll(DMA_MASK(1));
						}
					}
				}

			}
			break;
		}
	


	default:
		{

		}
	};
}