void testMode(VIF::Unit *unit, u32 mode, bool maskEnabled, u32 mask = 0) { u8 *vuMem = unit->vuMem; volatile u32 *vuMem_32 = (volatile u32 *)vuMem; memset(vuMem, 0, 16 * 4); SyncDCache(vuMem, vuMem + 16 * 4); VIF::Packet vifcode(16 * 7); vifcode.NOP(); vifcode.NOP(); vifcode.STCYCL(4, 4); vifcode.STROW(0x1000, 0x1000, 0x1000, 0x1000); vifcode.STMASK(mask); vifcode.STMOD(mode); vifcode.UNPACK(VIF::UNPACK_TYPE_V4_32, 4, 0, maskEnabled ? VIF::UNPACK_ENABLE_MASKS : VIF::UNPACK_NORMAL); for (int i = 0; i < 16; i++) { vifcode.Data32(i); } DMA::SendSimple(unit->dmaChannel, vifcode.Raw(), 16 * 7); SyncDCache(vuMem, vuMem + 16 * 4); for (int i = 0; i < 4; i++) { printf(" vumem(%02x): %08x - %08x - %08x - %08x\n", i * 0x10, vuMem_32[(i * 4) + 0], vuMem_32[(i * 4) + 1], vuMem_32[(i * 4) + 2], vuMem_32[(i * 4) + 3]); } printf(" row: %08x - %08x - %08x - %08x\n", unit->regs->r0, unit->regs->r1, unit->regs->r2, unit->regs->r3); printf(" mode: %08x\n", unit->regs->mode); printf("\n"); }
void testSimpleUnpack(VIF::Unit *unit, const char *description, VIF::UnpackType unpackType, bool zeroExtend, u32 value0, u32 value1 = 0, u32 value2 = 0, u32 value3 = 0) { unit->regs->fbrst = VIF::FBRST_RST; static const u32 packetSize = 16 * 2; static const u32 vectorSize = 16 * 1; u8 *vuMem = unit->vuMem; volatile u32 *vu_32 = (volatile u32 *)vuMem; memset(vuMem, 0xFF, vectorSize); SyncDCache(vuMem, vuMem + vectorSize); VIF::Packet vifcode(packetSize); vifcode.STCYCL(4, 4); vifcode.UNPACK(unpackType, 1, 0, zeroExtend ? VIF::UNPACK_ZERO_EXTEND : VIF::UNPACK_NORMAL); vifcode.Data32(value0); vifcode.Data32(value1); vifcode.Data32(value2); vifcode.Data32(value3); DMA::SendSimple(unit->dmaChannel, vifcode.Raw(), packetSize); SyncDCache(vuMem, vuMem + vectorSize); printf("Simple UNPACK %s - %08x - %08x - %08x - %08x\n", description, vu_32[0], vu_32[1], vu_32[2], vu_32[3]); }
void testSetUnpack(VIF::Unit *unit) { unit->regs->fbrst = VIF::FBRST_RST; static const u32 packetSize = 16 * 4; static const u32 vectorSize = 16 * 256; u8 *vuMem = unit->vuMem; volatile u32 *vu_32 = (volatile u32 *)vuMem; memset(vuMem, 0xFF, vectorSize); SyncDCache(vuMem, vuMem + vectorSize); VIF::Packet vifcode(packetSize); vifcode.STCOL(0xFFEEDDCC, 0xBBAA9988, 0x77665544, 0x33221100); vifcode.STMASK(0xAAAAAAAA); //Use COL value for all writes vifcode.STCYCL(1, 0); vifcode.UNPACK(VIF::UNPACK_TYPE_S8, 256, 0, VIF::UNPACK_ENABLE_MASKS); DMA::SendSimple(unit->dmaChannel, vifcode.Raw(), packetSize); SyncDCache(vuMem, vuMem + vectorSize); printf("Set 4096 bytes UNPACK - %08x - %08x - %08x - %08x\n", vu_32[0], vu_32[1], vu_32[1022], vu_32[1023]); }
static int rmUploadClut(GSCLUT *clut) { if (clut->VramClut && clut->VramClut != GSKIT_ALLOC_ERROR) // already uploaded return 1; u32 size; u32 w, h; if (!rmClutSize(clut, &size, &w, &h)) return 0; size = (-GS_VRAM_BLOCKSIZE_256)&(size+GS_VRAM_BLOCKSIZE_256-1); // too large to fit VRAM with the currently allocated space? if(gsGlobal->CurrentPointer + size >= __VRAM_SIZE) { if (size >= __VRAM_SIZE) { // Only log this if the allocation is too large itself LOG("RENDERMAN Requested clut allocation is bigger than VRAM!\n"); // We won't allocate this, it's too large clut->VramClut = GSKIT_ALLOC_ERROR; return 0; } rmFlush(); } clut->VramClut = gsGlobal->CurrentPointer; gsGlobal->CurrentPointer += size; rmAppendUploadedCLUTs(clut); SyncDCache(clut->Clut, (u8*)(clut->Clut)+size); gsKit_texture_send_inline(gsGlobal, clut->Clut, w, h, clut->VramClut, clut->ClutPSM, 1, GS_CLUT_PALLETE); return 1; }
static int rmUploadTexture(GSTEXTURE* txt) { // For clut based textures... if (txt->Clut) { // upload CLUT first if (!rmUploadClut((GSCLUT *)txt->Clut)) return 0; // copy the new VramClut txt->VramClut = ((GSCLUT*)txt->Clut)->VramClut; } u32 size = gsKit_texture_size(txt->Width, txt->Height, txt->PSM); // alignment of the allocation size = (-GS_VRAM_BLOCKSIZE_256)&(size+GS_VRAM_BLOCKSIZE_256-1); // too large to fit VRAM with the currently allocated space? if(gsGlobal->CurrentPointer + size >= __VRAM_SIZE) { if (size >= __VRAM_SIZE) { // Only log this if the allocation is too large itself LOG("RENDERMAN Requested texture allocation is bigger than VRAM!\n"); // We won't allocate this, it's too large txt->Vram = GSKIT_ALLOC_ERROR; return 0; } rmFlush(); // Should not flush CLUT away. If this happenned we have to reupload if (txt->Clut) { if (!rmUploadClut((GSCLUT *)txt->Clut)) return 0; txt->VramClut = ((GSCLUT*)txt->Clut)->VramClut; } // only could fit CLUT but not the pixmap with it! if(gsGlobal->CurrentPointer + size >= __VRAM_SIZE) return 0; } txt->Vram = gsGlobal->CurrentPointer; gsGlobal->CurrentPointer += size; rmAppendUploadedTextures(txt); // We can't do gsKit_texture_upload since it'd assume txt->Clut is the CLUT table directly // whereas we're using it as a pointer to our structure containg clut data gsKit_setup_tbw(txt); SyncDCache(txt->Mem, (u8*)(txt->Mem)+size); gsKit_texture_send_inline(gsGlobal, txt->Mem, txt->Width, txt->Height, txt->Vram, txt->PSM, txt->TBW, txt->Clut ? GS_CLUT_TEXTURE : GS_CLUT_NONE); return 1; }
static void testSizeZero(u32 *buf) { printf("Size zero:\n"); DMA::SrcChainPacket srcTag(512); memset(buf, 0xDD, 16 * 1024); buf[0] = 0x01234567; buf[1] = 0x89ABCDEF; buf[2] = 0xDEADBEEF; buf[3] = 0x1337C0DE; srcTag.Reset(); srcTag.REFE(buf, 0); toSPR->sadr = 0; DMA::SendChain(toSPR, srcTag.Raw(), 512); SyncDCache(buf, (u8 *)buf + 16 * 1024); memset(buf, 0xCC, 16 * 1024); SyncDCache(buf, (u8 *)buf + 16 * 1024); fromSPR->sadr = 0; DMA::SendSimple(fromSPR, buf, 16 * 1024); SyncDCache(buf, (u8 *)buf + 16 * 1024); printf(" SADR updated: to=%08x, from=%08x\n", toSPR->sadr, fromSPR->sadr); printf(" Send zero 0x0000: %08x %08x %08x %08x\n", buf[0], buf[1], buf[2], buf[3]); printf(" Send zero 0x0010: %08x %08x %08x %08x\n", buf[4], buf[5], buf[6], buf[7]); for (u32 i = 16 / sizeof(u32); i < 16 * 1024 / sizeof(u32); ++i) { if (buf[i] != 0) { printf(" Read zeros until: 0x%04x %08x\n", i * sizeof(u32), buf[i]); break; } } fromSPR->sadr = 0; toSPR->sadr = 0; }
/* * Common helper */ static struct pad_data* padGetDmaStr(int port, int slot) { struct pad_data *pdata; pdata = PadState[port][slot].padData; SyncDCache(pdata, (u8 *)pdata + 256); if(pdata[0].frame < pdata[1].frame) { return &pdata[1]; } else { return &pdata[0]; } }
int sbv_patch_enable_lmb() { u8 buf[256]; slib_exp_lib_t *modload_lib = (slib_exp_lib_t *)buf; smod_mod_info_t *loadfile_info = (smod_mod_info_t *)buf; void *pStartModule, *pLoadModuleBuffer, *lf_text_start, *patch_addr; u32 lf_rpc_dispatch, lf_jump_table, result; int nexps, id, i; memset(&_slib_cur_exp_lib_list, 0, sizeof(slib_exp_lib_list_t)); /* Locate the modload export library - it must have at least 16 exports. */ if ((nexps = slib_get_exp_lib("modload", modload_lib)) < 16) return -1; pStartModule = modload_lib->exports[8]; pLoadModuleBuffer = modload_lib->exports[10]; /* Now we need to find the loadfile module. */ memset(buf, 0, sizeof(smod_mod_info_t)); if (!(id = smod_get_mod_by_name("LoadModuleByEE", loadfile_info))) return -1; /* Locate the loadfile RPC dispatch code, where the first 4 instructions look like: 27bdffe8 addiu $sp, -24 2c820006 sltiu $v0, $a0, 6 14400003 bnez $v0, +12 afbf0010 sw $ra, 0x10($sp) */ lf_text_start = (void *)(loadfile_info->text_start + 0x400); smem_read(lf_text_start, buf, sizeof buf); for (i = 0; i < sizeof buf; i += 4) { if ((*(u32 *)(buf + i) == 0x27bdffe8) && (*(u32 *)(buf + i + 4) == 0x2c820006) && (*(u32 *)(buf + i + 8) == 0x14400003) && (*(u32 *)(buf + i + 12) == 0xafbf0010)) break; } /* This is a special case: if the IOP was reset with an image that contains a LOADFILE that supports LMB, we won't detect the dispatch routine. If we even got this far in the code then we can return success. */ if (i >= sizeof buf) return 0; /* We need to extract the address of the jump table, it's only 40 bytes in. */ lf_rpc_dispatch = (u32)lf_text_start + i; smem_read((void *)lf_rpc_dispatch, buf, 40); lf_jump_table = (*(u16 *)(buf + 0x1c) << 16) + *(s16 *)(buf + 0x24); /* Now we can patch our subversive LoadModuleBuffer RPC call. */ SifInitIopHeap(); if (!(patch_addr = SifAllocIopHeap(sizeof lmb_patch))) return -1; /* result is where the RPC return structure is stored. */ result = (u32)patch_addr + 96; lmb_patch[5] = JAL((u32)pLoadModuleBuffer); lmb_patch[7] = HI16(result); lmb_patch[9] = LO16(result); lmb_patch[15] = JAL((u32)pStartModule); SyncDCache(lmb_patch, (void *)(lmb_patch + 24)); smem_write(patch_addr, lmb_patch, sizeof lmb_patch); /* Finally. The last thing to do is to patch the loadfile RPC dispatch routine so that it will jump to entry #6 in it's jump table, and to patch the jump table itself. */ ee_kmode_enter(); *(u32 *)(SUB_VIRT_MEM + lf_rpc_dispatch + 4) = 0x2c820007; *(u32 *)(SUB_VIRT_MEM + lf_jump_table + 0x18) = (u32)patch_addr; ee_kmode_exit(); return 0; }
/* data (anything). */ static void* InitCB ( void* apParam, MPEGSequenceInfo* apInfo ) { int lDataSize = apInfo -> m_Width * apInfo -> m_Height * 4; char* retVal = ( char* )malloc ( lDataSize ); InitCBParam* lpParam = ( InitCBParam* )apParam; int lMBW = ( apInfo -> m_Width ) >> 4; int lMBH = ( apInfo -> m_Height ) >> 4; int lTBW = ( apInfo -> m_Width + 63 ) >> 6; int lTW = draw_log2 ( apInfo -> m_Width ); int lTH = draw_log2 ( apInfo -> m_Height ); int lX, lY; char* lpImg; QWORD* q; lpParam -> m_TexAddr >>= 6; lpParam -> m_pData = lpImg = retVal; lpParam -> m_pInfo = apInfo; SyncDCache ( retVal, retVal + lDataSize ); /* This initializes picture transfer packet. */ /* Decoded picture is a sequence of 16x16 pixels */ /* 'subpictures' (macroblocks) and DMA controller */ /* will transfer them all at once using source */ /* chain transfer mode. */ packet_allocate(&lpParam -> m_XFerPck,(10 + 12 * lMBW * lMBH )>>1,0,0); q = lpParam-> m_XFerPck.data; DMATAG_CNT(q, 3, 0, 0, 0); q++; PACK_GIFTAG(q,GIF_SET_TAG( 2, 0, 0, 0, 0, 1 ),GIF_REG_AD); q++; PACK_GIFTAG(q,GS_SET_TRXREG( 16, 16 ), GS_REG_TRXREG); q++; PACK_GIFTAG(q,GS_SET_BITBLTBUF( 0, 0, 0, lpParam -> m_TexAddr, lTBW, GS_PSM_32 ), GS_REG_BITBLTBUF); q++; for ( lY = 0; lY < apInfo -> m_Height; lY += 16 ) { for ( lX = 0; lX < apInfo -> m_Width; lX += 16, lpImg += 1024 ) { DMATAG_CNT(q, 4, 0, 0, 0 ); q++; PACK_GIFTAG(q,GIF_SET_TAG( 2, 0, 0, 0, 0, 1 ), GIF_REG_AD ); q++; PACK_GIFTAG(q,GS_SET_TRXPOS( 0, 0, lX, lY, 0 ), GS_REG_TRXPOS ); q++; PACK_GIFTAG(q,GS_SET_TRXDIR( 0 ), GS_REG_TRXDIR ); q++; PACK_GIFTAG(q,GIF_SET_TAG( 64, 1, 0, 0, 2, 0),0); q++; DMATAG_REF(q, 64, ( unsigned )lpImg, 0, 0, 0); q++; } /* end for */ } /* end for */ //DMATAG_END(q,0,0,0,0); //q++; lpParam-> m_XFerPck.qwc = q - lpParam-> m_XFerPck.data; /* This initializes picture drawing packet. Just textrured sprite */ /* that occupies the whole screen (no aspect ratio is taken into */ /* account for simplicity. */ packet_allocate(&lpParam -> m_DrawPck,7,0,0); q = lpParam -> m_DrawPck.data; PACK_GIFTAG(q, GIF_SET_TAG( 6, 1, 0, 0, 0, 1 ), GIF_REG_AD ); q++; PACK_GIFTAG(q, GS_SET_TEX0( lpParam -> m_TexAddr, lTBW, GS_PSM_32, lTW, lTH, 1, 1, 0, 0, 0, 0, 0 ), GS_REG_TEX0_1 ); q++; PACK_GIFTAG(q, GS_SET_PRIM( 6, 0, 1, 0, 0, 0, 1, 0, 0 ), GS_REG_PRIM ); q++; PACK_GIFTAG(q, GS_SET_UV( 0, 0 ), GS_REG_UV ); q++; PACK_GIFTAG(q, GS_SET_XYZ( 0, 0, 0 ), GS_REG_XYZ2 ); q++; PACK_GIFTAG(q, GS_SET_UV( apInfo -> m_Width << 4, apInfo -> m_Height << 4 ), GS_REG_UV ); q++; PACK_GIFTAG(q, GS_SET_XYZ( 640 << 4, 512 << 4, 0 ), GS_REG_XYZ2 ); q++; lpParam -> m_DrawPck.qwc = q - lpParam -> m_DrawPck.data; return retVal; } /* end InitCB */