void GatherCollisionObjectAndShapeData (RaycastGatheredObjectData* gatheredObjectData, RaycastTask_LocalStoreMemory* lsMemPtr, ppu_address_t objectWrapper) { register int dmaSize; register ppu_address_t dmaPpuAddress2; /* DMA Collision object wrapper into local store */ dmaSize = sizeof(SpuCollisionObjectWrapper); dmaPpuAddress2 = objectWrapper; cellDmaGet(&lsMemPtr->gCollisionObjectWrapper, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); /* DMA Collision object into local store */ dmaSize = sizeof(btCollisionObject); dmaPpuAddress2 = lsMemPtr->getCollisionObjectWrapper()->getCollisionObjectPtr(); cellDmaGet(&lsMemPtr->gColObj, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(2)); /* Gather information about collision object and shape */ gatheredObjectData->m_worldTransform = lsMemPtr->getColObj()->getWorldTransform(); gatheredObjectData->m_collisionMargin = lsMemPtr->getCollisionObjectWrapper()->getCollisionMargin (); gatheredObjectData->m_shapeType = lsMemPtr->getCollisionObjectWrapper()->getShapeType (); gatheredObjectData->m_collisionShape = (ppu_address_t)lsMemPtr->getColObj()->getCollisionShape(); gatheredObjectData->m_spuCollisionShape = (void*)&lsMemPtr->gCollisionShape.collisionShape; /* DMA shape data */ dmaCollisionShape (gatheredObjectData->m_spuCollisionShape, gatheredObjectData->m_collisionShape, 1, gatheredObjectData->m_shapeType); cellDmaWaitTagStatusAll(DMA_MASK(1)); if (btBroadphaseProxy::isConvex (gatheredObjectData->m_shapeType)) { btConvexInternalShape* spuConvexShape = (btConvexInternalShape*)gatheredObjectData->m_spuCollisionShape; gatheredObjectData->m_primitiveDimensions = spuConvexShape->getImplicitShapeDimensions (); } else { gatheredObjectData->m_primitiveDimensions = btVector3(1.0, 1.0, 1.0); } }
void processDecodeInit(unsigned int uiPtr) { SpursSpeexTaskOutput spuOutput; cellDmaGet(&gviSpursSpeexTaskDesc, uiPtr, sizeof(SpursSpeexTaskDesc), DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); //spuDebugPrintf("[Speex][SPU] CMD_SAMPLE_TASK_DECODE_INIT_COMMAND\n"); if (gviSpursSpeexTaskDesc.mDebugPause) { snPause(); } gviSpursSpeexDecoderInitialize(&spuOutput); if (spuOutput.mSpeexReturnCode < 0) { spuDebugPrintf("[Speex][SPU] failed to initialize decoder, ret = %d\n", spuOutput.mSpeexReturnCode); } cellDmaPut(&spuOutput, (uint64_t)gviSpursSpeexTaskDesc.mSpeexTaskOutput, sizeof(SpursSpeexTaskOutput), DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); cellDmaLargePut(gviSpursSpeexStateBuffer, (uint64_t)gviSpursSpeexTaskDesc.mSpeexStateBuffer, gviSpursSpeexTaskDesc.mSpeexStateBufferSize, DMA_TAG(1), 0,0); cellDmaWaitTagStatusAll(DMA_MASK(1)); //spuDebugPrintf("[Speex][SPU] buffer dma done\n"); }
// Note: Dma addresses are guaranteed to be aligned to 16 bytes (128 bits) __ri tDMA_TAG *dmaGetAddr(u32 addr, bool write) { // if (addr & 0xf) { DMA_LOG("*PCSX2*: DMA address not 128bit aligned: %8.8x", addr); } if (DMA_TAG(addr).SPR) return (tDMA_TAG*)&eeMem->Scratch[addr & 0x3ff0]; // FIXME: Why??? DMA uses physical addresses addr &= 0x1ffffff0; if (addr < Ps2MemSize::Base) { return (tDMA_TAG*)&eeMem->Main[addr]; } else if (addr < 0x10000000) { return (tDMA_TAG*)(write ? eeMem->ZeroWrite : eeMem->ZeroRead); } else if (addr < 0x10004000) { // Secret scratchpad address for DMA = end of maximum main memory? //Console.Warning("Writing to the scratchpad without the SPR flag set!"); return (tDMA_TAG*)&eeMem->Scratch[addr & 0x3ff0]; } else { Console.Error( "*PCSX2*: DMA error: %8.8x", addr); return NULL; } }
void processDecodeSet(unsigned int uiPtr) { SpursSpeexTaskOutput spuOutput; cellDmaGet(&gviSpursSpeexTaskDesc, uiPtr, sizeof(SpursSpeexTaskDesc), DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); //spuDebugPrintf("[Speex][SPU] CMD_SAMPLE_TASK_DECODESET_COMMAND\n"); if (gviSpursSpeexTaskDesc.mDebugPause) { snPause(); } cellDmaLargeGet(gviSpursSpeexStateBuffer, (uint64_t)gviSpursSpeexTaskDesc.mSpeexStateBuffer, SPEEX_DECODER_STATE_BUFFER_SIZE, DMA_TAG(1), 0,0); cellDmaWaitTagStatusAll(DMA_MASK(1)); gviSpursSpeexDecodeSet(&spuOutput); if (spuOutput.mSpeexReturnCode < 0) { spuDebugPrintf("SPU: failed to encode, ret = %d\n", spuOutput.mSpeexReturnCode); } cellDmaPut(&spuOutput, (uint64_t)gviSpursSpeexTaskDesc.mSpeexTaskOutput, sizeof(SpursSpeexTaskOutput), DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); cellDmaLargePut(gviSpursSpeexStateBuffer, (uint64_t)gviSpursSpeexTaskDesc.mSpeexStateBuffer, SPEEX_DECODER_STATE_BUFFER_SIZE, DMA_TAG(1), 0,0); cellDmaWaitTagStatusAll(DMA_MASK(1)); //spuDebugPrintf("[Speex][SPU] buffer dma done\n"); }
void procesEncodeInit(unsigned int uiPtr) { SpursSpeexTaskOutput spuOutput; //spuDebugPrintf("[Speex][SPU] CMD_SAMPLE_TASK_ENCODE_INIT_COMMAND\n"); cellDmaGet(&gviSpursSpeexTaskDesc, uiPtr, sizeof(SpursSpeexTaskDesc), DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); if (gviSpursSpeexTaskDesc.mDebugPause) { snPause(); } gviSpursSpeexEncoderInitialize(&spuOutput); if (spuOutput.mSpeexReturnCode < 0) { spuDebugPrintf("[Speex][SPU] failed to initialize encoder, ret = %d\n", spuOutput.mSpeexReturnCode); } //spuDebugPrintf("[Speex][SPU] done with initializing things for speex, now returning data via DMA put\n"); //printGlobalTaskDescData(); cellDmaPut(&spuOutput, (uint64_t)gviSpursSpeexTaskDesc.mSpeexTaskOutput, sizeof(SpursSpeexTaskOutput), DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); //spuDebugPrintf("[Speex][SPU] task dma done\n"); cellDmaLargePut(gviSpursSpeexStateBuffer, (uint64_t)gviSpursSpeexTaskDesc.mSpeexStateBuffer, SPEEX_ENCODER_STATE_BUFFER_SIZE, DMA_TAG(1), 0,0); cellDmaWaitTagStatusAll(DMA_MASK(1)); //spuDebugPrintf("[Speex][SPU] buffer dma done\n"); }
void small_cache_read_triple( void* ls0, ppu_address_t ea0, void* ls1, ppu_address_t ea1, void* ls2, ppu_address_t ea2, size_t size) { btAssert(size<16); ATTRIBUTE_ALIGNED16(char tmpBuffer0[32]); ATTRIBUTE_ALIGNED16(char tmpBuffer1[32]); ATTRIBUTE_ALIGNED16(char tmpBuffer2[32]); uint32_t i; ///make sure last 4 bits are the same, for cellDmaSmallGet char* localStore0 = (char*)ls0; uint32_t last4BitsOffset = ea0 & 0x0f; char* tmpTarget0 = tmpBuffer0 + last4BitsOffset; tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0); char* localStore1 = (char*)ls1; last4BitsOffset = ea1 & 0x0f; char* tmpTarget1 = tmpBuffer1 + last4BitsOffset; tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0); char* localStore2 = (char*)ls2; last4BitsOffset = ea2 & 0x0f; char* tmpTarget2 = tmpBuffer2 + last4BitsOffset; tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0); cellDmaWaitTagStatusAll( DMA_MASK(1) ); //this is slowish, perhaps memcpy on SPU is smarter? for (i=0; btLikely( i<size );i++) { localStore0[i] = tmpTarget0[i]; localStore1[i] = tmpTarget1[i]; localStore2[i] = tmpTarget2[i]; } }
void SpuContactResult::writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold) { ///only write back the contact information on SPU. Other platforms avoid copying, and use the data in-place ///see SpuFakeDma.cpp 'cellDmaLargeGetReadOnly' #if defined (__SPU__) || defined (USE_LIBSPE2) memcpy(g_manifoldDmaExport.getFront(),lsManifold,sizeof(btPersistentManifold)); g_manifoldDmaExport.swapBuffers(); ppu_address_t mmAddr = (ppu_address_t)mmManifold; g_manifoldDmaExport.backBufferDmaPut(mmAddr, sizeof(btPersistentManifold), DMA_TAG(9)); // Should there be any kind of wait here? What if somebody tries to use this tag again? What if we call this function again really soon? //no, the swapBuffers does the wait #endif }
void gsKit_queue_exec_real(GSGLOBAL *gsGlobal, GSQUEUE *Queue) { if(Queue->tag_size == 0) return; // This superstrange oldQueue crap is because Persistent drawbuffers need to be "backed up" // or else they will balloon in size due to appending the finish token. // So we back up the current *state* (NOT DATA) of them here and restore it afterward. GSQUEUE oldQueue = gsKit_set_finish(gsGlobal); *(u64 *)Queue->dma_tag = DMA_TAG(Queue->tag_size, 0, DMA_END, 0, 0, 0); if(Queue->last_type != GIF_AD) { *(u64 *)Queue->last_tag = ((u64)Queue->same_obj | *(u64 *)Queue->last_tag); } if(!gsGlobal->FirstFrame) gsKit_finish(); GS_SETREG_CSR_FINISH(1); dmaKit_wait_fast(); dmaKit_send_chain_ucab(DMA_CHANNEL_GIF, Queue->pool[Queue->dbuf]); if(Queue->mode != GS_PERSISTENT) { Queue->dbuf ^= 1; Queue->dma_tag = Queue->pool[Queue->dbuf]; Queue->pool_cur = Queue->dma_tag + 16; Queue->last_type = GIF_RESERVED; Queue->last_tag = Queue->pool_cur; Queue->tag_size = 0; } else { *Queue = oldQueue; dmaKit_wait_fast(); } }
void performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr) { SpuVoronoiSimplexSolver simplexSolver; btTransform rayFromTrans, rayToTrans; rayFromTrans.setIdentity (); rayFromTrans.setOrigin (workUnit.rayFrom); rayToTrans.setIdentity (); rayToTrans.setOrigin (workUnit.rayTo); SpuCastResult result; /* Load the vertex data if the shape is a convex hull */ /* XXX: We might be loading the shape twice */ ATTRIBUTE_ALIGNED16(char convexHullShape[sizeof(btConvexHullShape)]); if (gatheredObjectData->m_shapeType == CONVEX_HULL_SHAPE_PROXYTYPE) { register int dmaSize; register ppu_address_t dmaPpuAddress2; dmaSize = sizeof(btConvexHullShape); dmaPpuAddress2 = gatheredObjectData->m_collisionShape; cellDmaGet(&convexHullShape, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); dmaConvexVertexData (&lsMemPtr->convexVertexData, (btConvexHullShape*)&convexHullShape); cellDmaWaitTagStatusAll(DMA_MASK(2)); // dmaConvexVertexData uses dma channel 2! lsMemPtr->convexVertexData.gSpuConvexShapePtr = gatheredObjectData->m_spuCollisionShape; lsMemPtr->convexVertexData.gConvexPoints = &lsMemPtr->convexVertexData.g_convexPointBuffer[0]; } /* performRaycast */ SpuSubsimplexRayCast caster (gatheredObjectData->m_spuCollisionShape, &lsMemPtr->convexVertexData, gatheredObjectData->m_shapeType, gatheredObjectData->m_collisionMargin, &simplexSolver); bool r = caster.calcTimeOfImpact (rayFromTrans, rayToTrans, gatheredObjectData->m_worldTransform, gatheredObjectData->m_worldTransform,result); if (r) { workUnitOut->hitFraction = result.m_fraction; workUnitOut->hitNormal = result.m_normal; } }
void dmaLoadRayOutput (ppu_address_t rayOutputAddr, SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag) { cellDmaGet(rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0); }
void dmaStoreRayOutput (ppu_address_t rayOutputAddr, const SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag) { cellDmaLargePut (rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0); }
void gviSpursSpeexDecodeAdd(SpursSpeexTaskOutput *spuTaskOut) { char *inBuffer; float *speexBuffer; short *outBuffer; int rcode; unsigned int i; //spuDebugPrintf("[Speex][SPU] allocating buffers for decoding\n"); speexBuffer = (float *)memalign(16, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(float)); outBuffer = (short *)memalign(16, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(short)); inBuffer = (char *)memalign(16, gviSpursSpeexTaskDesc.mInputBufferSize); memset(speexBuffer, 0, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(float)); memset(outBuffer, 0, gviSpursSpeexTaskDesc.mOutputBufferSize); memset(inBuffer, 0, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(short)); //spuDebugPrintf("[Speex][SPU] done allocating, getting input data, inbuffer size: %d\n", gSpuSampleTaskDesc.mInputBufferSize); cellDmaGet(inBuffer, (uint64_t)gviSpursSpeexTaskDesc.mInputBuffer, gviSpursSpeexTaskDesc.mInputBufferSize, DMA_TAG(1), 0,0); cellDmaWaitTagStatusAll(DMA_MASK(1)); // spuDebugPrintf("[Speex][SPU] done getting input data, preparing for speex to decode\n"); // read the data into the bits // (re)initialize the bits struct speex_bits_init_buffer(&gviSpursSpeexBits,gviSpursSpeexBitsBuffer,sizeof(gviSpursSpeexBitsBuffer)); speex_bits_read_from(&gviSpursSpeexBits, (char *)inBuffer, gviSpursSpeexTaskDesc.mEncodedFrameSize); // decode it rcode = speex_decode((void *)gviSpursSpeexStateBuffer, &gviSpursSpeexBits, speexBuffer); assert(rcode == 0); //spuDebugPrintf("[Speex][SPU] done with speex decode\n"); // convert the output from floats for(i = 0 ; i < gviSpursSpeexTaskDesc.mOutputBufferSize ; i++) outBuffer[i] = (short)speexBuffer[i]; //spuDebugPrintf("[Speex][SPU] transferring data back\n"); cellDmaPut(outBuffer, (uint64_t)gviSpursSpeexTaskDesc.mOutputBuffer, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(short), DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); //spuDebugPrintf("[Speex][SPU] done transferring data back\n"); free(speexBuffer); free(inBuffer); free(outBuffer); spuTaskOut->mSpeexReturnCode = 0; }
//-- MAIN METHOD void processSampleTask(void* userPtr, void* lsMemory) { // BT_PROFILE("processSampleTask"); SampleTask_LocalStoreMemory* localMemory = (SampleTask_LocalStoreMemory*)lsMemory; SpuSampleTaskDesc* taskDescPtr = (SpuSampleTaskDesc*)userPtr; SpuSampleTaskDesc& taskDesc = *taskDescPtr; switch (taskDesc.m_sampleCommand) { case CMD_SAMPLE_INTEGRATE_BODIES: { btTransform predictedTrans; btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr; int batchSize = taskDesc.m_sampleValue; if (batchSize>MAX_NUM_BODIES) { spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n"); break; } int dmaArraySize = batchSize*sizeof(void*); uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr); // spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize); if (dmaArraySize>=16) { cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); } else { stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize); } for ( int i=0;i<batchSize;i++) { ///DMA rigid body void* localPtr = &localMemory->gLocalRigidBody[0]; void* shortAdd = localMemory->gPointerArray[i]; uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd); // spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr); int dmaBodySize = sizeof(btRigidBody); cellDmaGet((void*)localPtr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); float timeStep = 1.f/60.f; btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj); if (body) { if (body->isActive() && (!body->isStaticOrKinematicObject())) { body->predictIntegratedTransform(timeStep, predictedTrans); body->proceedToTransform( predictedTrans); void* ptr = (void*)localPtr; // spu_printf("cellDmaLargePut from %llx to LS %llx\n",ptr,ppuRigidBodyAddress); cellDmaLargePut(ptr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); } } } break; } case CMD_SAMPLE_PREDICT_MOTION_BODIES: { btTransform predictedTrans; btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr; int batchSize = taskDesc.m_sampleValue; int dmaArraySize = batchSize*sizeof(void*); if (batchSize>MAX_NUM_BODIES) { spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n"); break; } uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr); // spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize); if (dmaArraySize>=16) { cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); } else { stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize); } for ( int i=0;i<batchSize;i++) { ///DMA rigid body void* localPtr = &localMemory->gLocalRigidBody[0]; void* shortAdd = localMemory->gPointerArray[i]; uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd); // spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr); int dmaBodySize = sizeof(btRigidBody); cellDmaGet((void*)localPtr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); float timeStep = 1.f/60.f; btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj); if (body) { if (!body->isStaticOrKinematicObject()) { if (body->isActive()) { body->integrateVelocities( timeStep); //damping body->applyDamping(timeStep); body->predictIntegratedTransform(timeStep,body->getInterpolationWorldTransform()); void* ptr = (void*)localPtr; cellDmaLargePut(ptr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); } } } } break; } default: { } }; }
void gviSpursSpeexDecodeSet(SpursSpeexTaskOutput *spuTaskOut) { char *inBuffer; float *speexBuffer; short *outBuffer; int rcode; unsigned int i; speexBuffer = (float *)memalign(16, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(float)); outBuffer = (short *)memalign(16, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(short)); inBuffer = (char *)memalign(16, gviSpursSpeexTaskDesc.mInputBufferSize); memset(speexBuffer, 0, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(float)); memset(inBuffer, 0, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(short)); memset(outBuffer, 0, gviSpursSpeexTaskDesc.mInputBufferSize); cellDmaGet(inBuffer, (uint64_t)gviSpursSpeexTaskDesc.mInputBuffer, gviSpursSpeexTaskDesc.mInputBufferSize, DMA_TAG(1), 0,0); cellDmaWaitTagStatusAll(DMA_MASK(1)); // read the data into the bits speex_bits_read_from(&gviSpursSpeexBits, (char *)inBuffer, gviSpursSpeexTaskDesc.mEncodedFrameSize); // decode it rcode = speex_decode((void *)gviSpursSpeexStateBuffer, &gviSpursSpeexBits, speexBuffer); assert(rcode == 0); // convert the output from floats for(i = 0 ; i < gviSpursSpeexTaskDesc.mOutputBufferSize ; i++) // Expanded to remove warnings in VS2K5 outBuffer[i] = (short)speexBuffer[i]; cellDmaPut(outBuffer, (uint64_t)gviSpursSpeexTaskDesc.mOutputBuffer, gviSpursSpeexTaskDesc.mOutputBufferSize * sizeof(short), DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); free(speexBuffer); free(inBuffer); free(outBuffer); spuTaskOut->mSpeexReturnCode = 0; }
void gsKit_vram_dump(GSGLOBAL *gsGlobal, char *Path, u32 StartAddr, u32 EndAddr) { #if 0 u64* p_store; u64* p_data; u32* p_mem; int packets; int remain; int qwc; int Size; printf("THIS IS NOT DONE YET!\n"); return 0; StartAddr = (-GS_VRAM_BLOCKSIZE)&(StartAddr+GS_VRAM_BLOCKSIZE-1); EndAddr = (-GS_VRAM_BLOCKSIZE)&(EndAddr+GS_VRAM_BLOCKSIZE-1); Size = EndAddr - StartAddr; qwc = Size / 16; if( Size % 16 ) { #ifdef DEBUG printf("Uneven division of quad word count from VRAM alloc. Rounding up QWC.\n"); #endif qwc++; } packets = qwc / DMA_MAX_SIZE; remain = qwc % DMA_MAX_SIZE; p_store = p_data = dmaKit_spr_alloc( 7 * 16 ); FlushCache(0); // DMA DATA *p_data++ = DMA_TAG( 6, 0, DMA_CNT, 0, 0, 0 ); *p_data++ = 0; *p_data++ = GIF_TAG( 5, 1, 0, 0, 0, 1 ); *p_data++ = GIF_AD; *p_data++ = GS_SETREG_BITBLTBUF(StartAddr / 64, (4095.9375 / 64), 0, 0, 0, 0); *p_data++ = GS_BITBLTBUF; *p_data++ = GS_SETREG_TRXPOS(0, 0, 0, 0, 0); *p_data++ = GS_TRXPOS; *p_data++ = GS_SETREG_TRXREG(4095.9375, 4095.9375); *p_data++ = GS_TRXREG; *p_data++ = 0; *p_data++ = GS_FINISH; *p_data++ = GS_SETREG_TRXDIR(1); *p_data++ = GS_TRXDIR; dmaKit_send_spr( DMA_CHANNEL_GIF, 0, p_store, 7 ); while(GS_CSR_FINISH != 1); GS_SETREG_CSR_FINISH(1); FlushCache(0); GS_SETREG_BUSDIR(1); #endif }
///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes) int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size) { btAssert(size<32); ATTRIBUTE_ALIGNED16(char tmpBuffer[32]); char* mainMem = (char*)ea; char* localStore = (char*)ls; uint32_t i; ///make sure last 4 bits are the same, for cellDmaSmallGet uint32_t last4BitsOffset = ea & 0x0f; char* tmpTarget = tmpBuffer + last4BitsOffset; #if defined (__SPU__) || defined (USE_LIBSPE2) int remainingSize = size; //#define FORCE_cellDmaUnalignedGet 1 #ifdef FORCE_cellDmaUnalignedGet cellDmaUnalignedGet(tmpTarget,ea,size,DMA_TAG(1),0,0); #else char* remainingTmpTarget = tmpTarget; uint64_t remainingEa = ea; while (remainingSize) { switch (remainingSize) { case 1: case 2: case 4: case 8: case 16: { mfc_get(remainingTmpTarget,remainingEa,remainingSize,DMA_TAG(1),0,0); remainingSize=0; break; } default: { //spu_printf("unaligned DMA with non-natural size:%d\n",remainingSize); int actualSize = 0; if (remainingSize > 16) actualSize = 16; else if (remainingSize >8) actualSize=8; else if (remainingSize >4) actualSize=4; else if (remainingSize >2) actualSize=2; mfc_get(remainingTmpTarget,remainingEa,actualSize,DMA_TAG(1),0,0); remainingSize-=actualSize; remainingTmpTarget+=actualSize; remainingEa += actualSize; } } } #endif//FORCE_cellDmaUnalignedGet #else //copy into final destination #ifdef USE_MEMCPY memcpy(tmpTarget,mainMem,size); #else for ( i=0;i<size;i++) { tmpTarget[i] = mainMem[i]; } #endif //USE_MEMCPY #endif cellDmaWaitTagStatusAll(DMA_MASK(1)); //this is slowish, perhaps memcpy on SPU is smarter? for (i=0; btLikely( i<size );i++) { localStore[i] = tmpTarget[i]; } return 0; }
void gviSpursSpeexEncode(SpursSpeexTaskOutput *spuTaskOut) { short *inBuffer; float *speexBuffer; char *outBuffer; unsigned int i; spuTaskOut->mSpeexEncodedFrameSize = 0; spuTaskOut->mSpeexInitialized = 1; spuTaskOut->mSpeexSamplesPerFrame = 0; spuTaskOut->mSpeexReturnCode = 0; spuTaskOut->mSpeexOutBufferSize = 0; speexBuffer = (float *)memalign(16, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(float)); inBuffer = (short *)memalign(16, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(short)); outBuffer = (char *)memalign(16, gviSpursSpeexTaskDesc.mOutputBufferSize); memset(speexBuffer, 0, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(float)); memset(inBuffer, 0, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(short)); memset(outBuffer, 0, gviSpursSpeexTaskDesc.mOutputBufferSize); cellDmaGet(inBuffer, (uint64_t)gviSpursSpeexTaskDesc.mInputBuffer, gviSpursSpeexTaskDesc.mInputBufferSize * sizeof(short), DMA_TAG(1), 0,0); cellDmaWaitTagStatusAll(DMA_MASK(1)); // convert the input to floats for encoding for(i = 0 ; i < gviSpursSpeexTaskDesc.mInputBufferSize ; i++) speexBuffer[i] = inBuffer[i]; // (re)initialize the bits struct speex_bits_init_buffer(&gviSpursSpeexBits,gviSpursSpeexBitsBuffer,sizeof(gviSpursSpeexBitsBuffer)); // flush the bits speex_bits_reset(&gviSpursSpeexBits); // encode the frame speex_encode(gviSpursSpeexStateBuffer, speexBuffer, &gviSpursSpeexBits); // write the bits to the output spuTaskOut->mSpeexOutBufferSize = speex_bits_write(&gviSpursSpeexBits, (char *)outBuffer, gviSpursSpeexTaskDesc.mEncodedFrameSize); //spuDebugPrintf("[Speex][SPU] transferring data back, output size should be: %d\n", gviSpursSpeexTaskDesc.mOutputBufferSize>16?gviSpursSpeexTaskDesc.mOutputBufferSize:16); cellDmaPut(outBuffer, (uint64_t)gviSpursSpeexTaskDesc.mOutputBuffer, gviSpursSpeexTaskDesc.mOutputBufferSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); //spuDebugPrintf("[Speex][SPU] done transferring data back\n"); free(speexBuffer); free(inBuffer); free(outBuffer); spuTaskOut->mSpeexReturnCode = 0; }