SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size) { #if USE_SOFTWARE_CACHE // Check for alignment requirements. We need to make sure the entire request fits within one cache line, // so the first and last bytes should fall on the same cache line btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK)); void* ls = spe_cache_read(ea); memcpy(buffer, ls, size); #else stallingUnalignedDmaSmallGet(buffer,ea,size); #endif }
//-- MAIN METHOD void processSampleTask(void* userPtr, void* lsMemory) { // BT_PROFILE("processSampleTask"); SampleTask_LocalStoreMemory* localMemory = (SampleTask_LocalStoreMemory*)lsMemory; SpuSampleTaskDesc* taskDescPtr = (SpuSampleTaskDesc*)userPtr; SpuSampleTaskDesc& taskDesc = *taskDescPtr; switch (taskDesc.m_sampleCommand) { case CMD_SAMPLE_INTEGRATE_BODIES: { btTransform predictedTrans; btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr; int batchSize = taskDesc.m_sampleValue; if (batchSize>MAX_NUM_BODIES) { spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n"); break; } int dmaArraySize = batchSize*sizeof(void*); uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr); // spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize); if (dmaArraySize>=16) { cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); } else { stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize); } for ( int i=0;i<batchSize;i++) { ///DMA rigid body void* localPtr = &localMemory->gLocalRigidBody[0]; void* shortAdd = localMemory->gPointerArray[i]; uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd); // spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr); int dmaBodySize = sizeof(btRigidBody); cellDmaGet((void*)localPtr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); float timeStep = 1.f/60.f; btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj); if (body) { if (body->isActive() && (!body->isStaticOrKinematicObject())) { body->predictIntegratedTransform(timeStep, predictedTrans); body->proceedToTransform( predictedTrans); void* ptr = (void*)localPtr; // spu_printf("cellDmaLargePut from %llx to LS %llx\n",ptr,ppuRigidBodyAddress); cellDmaLargePut(ptr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); } } } break; } case CMD_SAMPLE_PREDICT_MOTION_BODIES: { btTransform predictedTrans; btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr; int batchSize = taskDesc.m_sampleValue; int dmaArraySize = batchSize*sizeof(void*); if (batchSize>MAX_NUM_BODIES) { spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n"); break; } uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr); // spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize); if (dmaArraySize>=16) { cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); } else { stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize); } for ( int i=0;i<batchSize;i++) { ///DMA rigid body void* localPtr = &localMemory->gLocalRigidBody[0]; void* shortAdd = localMemory->gPointerArray[i]; uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd); // spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr); int dmaBodySize = sizeof(btRigidBody); cellDmaGet((void*)localPtr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); float timeStep = 1.f/60.f; btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj); if (body) { if (!body->isStaticOrKinematicObject()) { if (body->isActive()) { body->integrateVelocities( timeStep); //damping body->applyDamping(timeStep); body->predictIntegratedTransform(timeStep,body->getInterpolationWorldTransform()); void* ptr = (void*)localPtr; cellDmaLargePut(ptr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); } } } } break; } default: { } }; }