//========================================================================= // Internal function: // // Skip over jumps that lead to the real function. Gets around import // jump tables, etc. //========================================================================= static PBYTE SkipJumps(PBYTE pbCode) { #ifdef _M_IX86_X64 if (pbCode[0] == 0xff && pbCode[1] == 0x25) { #ifdef _M_IX86 // on x86 we have an absolute pointer... PBYTE pbTarget = *(PBYTE *)&pbCode[2]; // ... that shows us an absolute pointer. return SkipJumps(*(PBYTE *)pbTarget); #elif defined _M_X64 // on x64 we have a 32-bit offset... INT32 lOffset = *(INT32 *)&pbCode[2]; // ... that shows us an absolute pointer return SkipJumps(*(PBYTE*)(pbCode + 6 + lOffset)); #endif } else if (pbCode[0] == 0xe9) { // here the behavior is identical, we have... // ...a 32-bit offset to the destination. return SkipJumps(pbCode + 5 + *(INT32 *)&pbCode[1]); } else if (pbCode[0] == 0xeb) { // and finally an 8-bit offset to the destination return SkipJumps(pbCode + 2 + *(CHAR *)&pbCode[1]); } #else #error unsupported platform #endif return pbCode; }
//========================================================================= // Internal function: // // Skip over jumps that lead to the real function. Gets around import // jump tables, etc. //========================================================================= static PBYTE SkipJumps(PBYTE pbCode) { PBYTE pbOrgCode = pbCode; #ifdef _M_IX86_X64 #ifdef _M_IX86 //mov edi,edi: hot patch point if (pbCode[0] == 0x8b && pbCode[1] == 0xff) pbCode += 2; // push ebp; mov ebp, esp; pop ebp; // "collapsed" stackframe generated by MSVC if (pbCode[0] == 0x55 && pbCode[1] == 0x8b && pbCode[2] == 0xec && pbCode[3] == 0x5d) pbCode += 4; #endif if (pbCode[0] == 0xff && pbCode[1] == 0x25) { #ifdef _M_IX86 // on x86 we have an absolute pointer... PBYTE pbTarget = *(PBYTE *)&pbCode[2]; // ... that shows us an absolute pointer. return SkipJumps(*(PBYTE *)pbTarget); #elif defined _M_X64 // on x64 we have a 32-bit offset... INT32 lOffset = *(INT32 *)&pbCode[2]; // ... that shows us an absolute pointer return SkipJumps(*(PBYTE*)(pbCode + 6 + lOffset)); } else if (pbCode[0] == 0x48 && pbCode[1] == 0xff && pbCode[2] == 0x25) { // or we can have the same with a REX prefix INT32 lOffset = *(INT32 *)&pbCode[3]; // ... that shows us an absolute pointer return SkipJumps(*(PBYTE*)(pbCode + 7 + lOffset)); #endif } else if (pbCode[0] == 0xe9) { // here the behavior is identical, we have... // ...a 32-bit offset to the destination. return SkipJumps(pbCode + 5 + *(INT32 *)&pbCode[1]); } else if (pbCode[0] == 0xeb) { // and finally an 8-bit offset to the destination return SkipJumps(pbCode + 2 + *(CHAR *)&pbCode[1]); } #else #error unsupported platform #endif return pbOrgCode; }
//========================================================================= BOOL Mhook_SetHook(PVOID *ppSystemFunction, PVOID pHookFunction) { MHOOKS_TRAMPOLINE* pTrampoline = NULL; PVOID pSystemFunction = *ppSystemFunction; // ensure thread-safety EnterCritSec(); ODPRINTF((L"mhooks: Mhook_SetHook: Started on the job: %p / %p", pSystemFunction, pHookFunction)); // find the real functions (jump over jump tables, if any) pSystemFunction = SkipJumps((PBYTE)pSystemFunction); pHookFunction = SkipJumps((PBYTE)pHookFunction); ODPRINTF((L"mhooks: Mhook_SetHook: Started on the job: %p / %p", pSystemFunction, pHookFunction)); // figure out the length of the overwrite zone MHOOKS_PATCHDATA patchdata = {0}; DWORD dwInstructionLength = DisassembleAndSkip(pSystemFunction, MHOOK_JMPSIZE, &patchdata); if (dwInstructionLength >= MHOOK_JMPSIZE) { ODPRINTF((L"mhooks: Mhook_SetHook: disassembly signals %d bytes", dwInstructionLength)); // suspend every other thread in this process, and make sure their IP // is not in the code we're about to overwrite. SuspendOtherThreads((PBYTE)pSystemFunction, dwInstructionLength); // allocate a trampoline structure (TODO: it is pretty wasteful to get // VirtualAlloc to grab chunks of memory smaller than 100 bytes) pTrampoline = TrampolineAlloc((PBYTE)pSystemFunction, patchdata.nLimitUp, patchdata.nLimitDown); if (pTrampoline) { ODPRINTF((L"mhooks: Mhook_SetHook: allocated structure at %p", pTrampoline)); DWORD dwOldProtectSystemFunction = 0; DWORD dwOldProtectTrampolineFunction = 0; // set the system function to PAGE_EXECUTE_READWRITE if (VirtualProtect(pSystemFunction, dwInstructionLength, PAGE_EXECUTE_READWRITE, &dwOldProtectSystemFunction)) { ODPRINTF((L"mhooks: Mhook_SetHook: readwrite set on system function")); // mark our trampoline buffer to PAGE_EXECUTE_READWRITE if (VirtualProtect(pTrampoline, sizeof(MHOOKS_TRAMPOLINE), PAGE_EXECUTE_READWRITE, &dwOldProtectTrampolineFunction)) { ODPRINTF((L"mhooks: Mhook_SetHook: readwrite set on trampoline structure")); // create our trampoline function PBYTE pbCode = pTrampoline->codeTrampoline; // save original code.. for (DWORD i = 0; i<dwInstructionLength; i++) { pTrampoline->codeUntouched[i] = pbCode[i] = ((PBYTE)pSystemFunction)[i]; } pbCode += dwInstructionLength; // plus a jump to the continuation in the original location pbCode = EmitJump(pbCode, ((PBYTE)pSystemFunction) + dwInstructionLength); ODPRINTF((L"mhooks: Mhook_SetHook: updated the trampoline")); // fix up any IP-relative addressing in the code FixupIPRelativeAddressing(pTrampoline->codeTrampoline, (PBYTE)pSystemFunction, &patchdata); DWORD_PTR dwDistance = (PBYTE)pHookFunction < (PBYTE)pSystemFunction ? (PBYTE)pSystemFunction - (PBYTE)pHookFunction : (PBYTE)pHookFunction - (PBYTE)pSystemFunction; if (dwDistance > 0x7fff0000) { // create a stub that jumps to the replacement function. // we need this because jumping from the API to the hook directly // will be a long jump, which is 14 bytes on x64, and we want to // avoid that - the API may or may not have room for such stuff. // (remember, we only have 5 bytes guaranteed in the API.) // on the other hand we do have room, and the trampoline will always be // within +/- 2GB of the API, so we do the long jump in there. // the API will jump to the "reverse trampoline" which // will jump to the user's hook code. pbCode = pTrampoline->codeJumpToHookFunction; pbCode = EmitJump(pbCode, (PBYTE)pHookFunction); ODPRINTF((L"mhooks: Mhook_SetHook: created reverse trampoline")); FlushInstructionCache(GetCurrentProcess(), pTrampoline->codeJumpToHookFunction, pbCode - pTrampoline->codeJumpToHookFunction); // update the API itself pbCode = (PBYTE)pSystemFunction; pbCode = EmitJump(pbCode, pTrampoline->codeJumpToHookFunction); } else { // the jump will be at most 5 bytes so we can do it directly // update the API itself pbCode = (PBYTE)pSystemFunction; pbCode = EmitJump(pbCode, (PBYTE)pHookFunction); } // update data members pTrampoline->cbOverwrittenCode = dwInstructionLength; pTrampoline->pSystemFunction = (PBYTE)pSystemFunction; pTrampoline->pHookFunction = (PBYTE)pHookFunction; // flush instruction cache and restore original protection FlushInstructionCache(GetCurrentProcess(), pTrampoline->codeTrampoline, dwInstructionLength); VirtualProtect(pTrampoline, sizeof(MHOOKS_TRAMPOLINE), dwOldProtectTrampolineFunction, &dwOldProtectTrampolineFunction); } else { ODPRINTF((L"mhooks: Mhook_SetHook: failed VirtualProtect 2: %d", gle())); } // flush instruction cache and restore original protection FlushInstructionCache(GetCurrentProcess(), pSystemFunction, dwInstructionLength); VirtualProtect(pSystemFunction, dwInstructionLength, dwOldProtectSystemFunction, &dwOldProtectSystemFunction); } else { ODPRINTF((L"mhooks: Mhook_SetHook: failed VirtualProtect 1: %d", gle())); } if (pTrampoline->pSystemFunction) { // this is what the application will use as the entry point // to the "original" unhooked function. *ppSystemFunction = pTrampoline->codeTrampoline; ODPRINTF((L"mhooks: Mhook_SetHook: Hooked the function!")); } else { // if we failed discard the trampoline (forcing VirtualFree) TrampolineFree(pTrampoline, TRUE); pTrampoline = NULL; } } // resume everybody else ResumeOtherThreads(); } else { ODPRINTF((L"mhooks: disassembly signals %d bytes (unacceptable)", dwInstructionLength)); } LeaveCritSec(); return (pTrampoline != NULL); }