void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst) { pALUInst->m_uIndex = pShader->lstALUInstructions.uNumOfNode; AddInstToList(&(pShader->lstALUInstructions), (R700ShaderInstruction*)pALUInst); pShader->uShaderBinaryDWORDSize += GetInstructionSize(pALUInst->m_ShaderInstType); pShader->nRegs = (pShader->nRegs < pALUInst->m_Word1.f.dst_gpr) ? pALUInst->m_Word1.f.dst_gpr : pShader->nRegs; pShader->bLinksDirty = GL_TRUE; pShader->bNeedsAssembly = GL_TRUE; pALUInst->useCount++; }
void ResolveLinks(R700_Shader *pShader) { GLuint uiSize; R700ShaderInstruction *pInst; R700ALUInstruction *pALUinst; R700TextureInstruction *pTEXinst; R700VertexInstruction *pVTXinst; GLuint vtxOffset; GLuint cfOffset = 0x0; GLuint aluOffset = cfOffset + pShader->lstCFInstructions.uNumOfNode * GetInstructionSize(SIT_CF); GLuint texOffset = aluOffset; // + m_lstALUInstructions.size() * R700ALUInstruction::SIZE, pInst = pShader->lstALUInstructions.pHead; while(NULL != pInst) { texOffset += GetInstructionSize(pInst->m_ShaderInstType); pInst = pInst->pNextInst; }; vtxOffset = texOffset + pShader->lstTEXInstructions.uNumOfNode * GetInstructionSize(SIT_TEX); if ( ((pShader->lstTEXInstructions.uNumOfNode > 0) && (texOffset % 4 != 0)) || ((pShader->lstVTXInstructions.uNumOfNode > 0) && (vtxOffset % 4 != 0)) ) { pALUinst = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); Init_R700ALUInstruction(pALUinst); AddALUInstruction(pShader, pALUinst); texOffset += GetInstructionSize(SIT_ALU); vtxOffset += GetInstructionSize(SIT_ALU); } pInst = pShader->lstALUInstructions.pHead; uiSize = 0; while(NULL != pInst) { pALUinst = (R700ALUInstruction*)pInst; if(pALUinst->m_pLinkedALUClause != NULL) { // This address is quad-word aligned pALUinst->m_pLinkedALUClause->m_Word0.f.addr = (aluOffset + uiSize) >> 1; } uiSize += GetInstructionSize(pALUinst->m_ShaderInstType); pInst = pInst->pNextInst; };
void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst) { pVTXInst->m_uIndex = pShader->lstVTXInstructions.uNumOfNode; AddInstToList(&(pShader->lstVTXInstructions), (R700ShaderInstruction*)pVTXInst); pShader->uShaderBinaryDWORDSize += GetInstructionSize(pVTXInst->m_ShaderInstType); if(pVTXInst->m_ShaderInstType == SIT_VTX_GENERIC) { R700VertexGenericFetch* pVTXGenericClause = (R700VertexGenericFetch*)pVTXInst; pShader->nRegs = (pShader->nRegs < pVTXGenericClause->m_Word1_GPR.f.dst_gpr) ? pVTXGenericClause->m_Word1_GPR.f.dst_gpr : pShader->nRegs; } pShader->bLinksDirty = GL_TRUE; pShader->bNeedsAssembly = GL_TRUE; pVTXInst->useCount++; }
void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst) { R700ControlFlowSXClause* pSXClause; R700ControlFlowSMXClause* pSMXClause; pCFInst->m_uIndex = pShader->plstCFInstructions_active->uNumOfNode; AddInstToList(pShader->plstCFInstructions_active, (R700ShaderInstruction*)pCFInst); pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType); pSXClause = NULL; pSMXClause = NULL; switch (pCFInst->m_ShaderInstType) { case SIT_CF_ALL_EXP_SX: pSXClause = (R700ControlFlowSXClause*)pCFInst; break; case SIT_CF_ALL_EXP_SMX: pSMXClause = (R700ControlFlowSMXClause*)pCFInst; break; default: break; }; if((pSXClause != NULL) && (pSXClause->m_Word0.f.type == SQ_EXPORT_PARAM)) { pShader->nParamExports += pSXClause->m_Word1.f.burst_count + 1; } else if ((pSMXClause != NULL) && (pSMXClause->m_Word1.f.cf_inst == SQ_CF_INST_MEM_RING) && (pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE || pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE_IND)) { pShader->nMemExports += pSMXClause->m_Word1.f.burst_count + 1; } pShader->bLinksDirty = GL_TRUE; pShader->bNeedsAssembly = GL_TRUE; pCFInst->useCount++; }
bool SharkMemory::DetourFunction(void **ppDelegate, void *pRedirect) { ThreadGrabber threadGrabber; if (!threadGrabber.update(GetCurrentProcessId())) { LOG_DEBUG("Detour transaction failed: Could not take thread snapshot!"); return false; } bool success = false; void *function = *ppDelegate; DWORD_PTR functionOffset = reinterpret_cast<DWORD_PTR>(function); DWORD oldProtection = 0; if (SetMemoryProtection(functionOffset, 0x20, PAGE_EXECUTE_READWRITE, &oldProtection)) { uint32 trampolineSize = 0; uint32 jumpSize = sizeof(DWORD_PTR) + 1; while (trampolineSize < jumpSize) trampolineSize += GetInstructionSize(functionOffset + trampolineSize); // allocate a trampoline... byte *trampoline = m_trampolineHeap.allocate(trampolineSize + jumpSize); if (trampoline != nullptr) { memcpy(trampoline, function, trampolineSize); trampoline[trampolineSize] = 0xE9; DWORD_PTR trampolineOffset = reinterpret_cast<DWORD_PTR>(trampoline); GetMemory<DWORD_PTR>(trampolineOffset + trampolineSize + 1) = functionOffset - (trampolineOffset + jumpSize); SHookInformation hookInfo = {0}; hookInfo.function = function; hookInfo.trampoline = trampoline; hookInfo.bytes.put_array(trampoline, trampolineSize); m_hooks[trampoline] = hookInfo; // hook chain support - relocate jump far, call far if (trampolineSize > sizeof(DWORD_PTR) && (trampoline[0] == 0xE8 || trampoline[0] == 0xE9)) { DWORD_PTR& relocate = GetMemory<DWORD_PTR>(trampolineOffset + 1); relocate += functionOffset; relocate -= trampolineOffset; } ByteBuffer jump(trampolineSize, 0xCC); jump << byte(0xE9); jump << reinterpret_cast<DWORD_PTR>(pRedirect) - (functionOffset + jumpSize); const auto &threads = threadGrabber.threads(); for (const auto& thread: threads) _detourUpdateThread(thread, hookInfo); *ppDelegate = trampoline; success = WriteMemory_Safe(functionOffset, jump); for (const auto& thread: threads) _detourResumeThread(thread); } else LOG_DEBUG("Detour transaction failed: Trampoline heap alloc failed!"); SetMemoryProtection(functionOffset, 0x20, oldProtection); } else LOG_DEBUG("Detour transaction failed: Cannot set code protection!"); return success; }