bool cmpMem64(void* dstMem, DWORD64 srcMem, size_t sz) { if ((nullptr == dstMem) || (0 == srcMem) || (0 == sz)) return false; bool result = false; reg64 _src = { srcMem }; #ifdef _M_IX86 __asm { X64_Start(); ;// below code is compiled as x86 inline asm, but it is executed as x64 code ;// that's why it need sometimes REX_W() macro, right column contains detailed ;// transcription how it will be interpreted by CPU push edi ;// push rdi push esi ;// push rsi ;// mov edi, dstMem ;// mov edi, dword ptr [dstMem] ; high part of RDI is zeroed REX_W mov esi, _src.dw[0] ;// mov rsi, qword ptr [_src] mov ecx, sz ;// mov ecx, dword ptr [sz] ; high part of RCX is zeroed ;// mov eax, ecx ;// mov eax, ecx and eax, 3 ;// and eax, 3 shr ecx, 2 ;// shr ecx, 2 ;// repe cmpsd ;// repe cmps dword ptr [rsi], dword ptr [rdi] jnz _ret_false ;// jnz _ret_false ;// test eax, eax ;// test eax, eax je _move_0 ;// je _move_0 cmp eax, 1 ;// cmp eax, 1 je _move_1 ;// je _move_1 ;// cmpsw ;// cmps word ptr [rsi], word ptr [rdi] jnz _ret_false ;// jnz _ret_false cmp eax, 2 ;// cmp eax, 2 je _move_0 ;// je _move_0 ;// _move_1: ;// cmpsb ;// cmps byte ptr [rsi], byte ptr [rdi] jnz _ret_false ;// jnz _ret_false ;// _move_0: ;// mov result, 1 ;// mov byte ptr [result], 1 ;// _ret_false: ;// pop esi ;// pop rsi pop edi ;// pop rdi X64_End(); } #endif return result; }
DWORD64 getTEB64() { reg64 reg; reg.v = 0; #ifdef _M_IX86 X64_Start(); // R12 register should always contain pointer to TEB64 in WoW64 processes X64_Push(_R12); // below pop will pop QWORD from stack, as we're in x64 mode now __asm pop reg.dw[0] X64_End(); #endif return reg.v; }
DWORD64 Wow64Local::getTEB64( TEB64& out ) { reg64 reg; reg.v = 0; X64_Start(); //R12 register should always contain pointer to TEB64 in WoW64 processes X64_Push(_R12); //below pop will pop QWORD from stack, as we're in x64 mode now __asm pop reg.dw[0] X64_End(); memcpy64((DWORD64)&out, reg.v, sizeof(out)); return reg.dw[0]; }
void __declspec(naked, noinline) Wow64Local::memcpy64(DWORD64 /*dst*/, DWORD64 /*src*/, DWORD /*size*/) { __asm { push ebp mov ebp, esp pushad } X64_Start(); /* mov rdi, QWORD PTR [rbp + 0x8] mov rsi, QWORD PTR [rbp + 0x10] mov ecx, DWORD PTR [rbp + 0x18] loop1: mov al, BYTE PTR [ri] mov BYTE PTR [rdi], al add rsi, 0x1 add rdi, 0x1 sub ecx, 0x1 test ecx, ecx jnz loop1 */ EMIT(0x48) EMIT(0x8B) EMIT(0x7D) EMIT(0x08) EMIT(0x48) EMIT(0x8B) EMIT(0x75) EMIT(0x10) EMIT(0x8B) EMIT(0x4D) EMIT(0x18) EMIT(0x8A) EMIT(0x06) EMIT(0x88) EMIT(0x07) EMIT(0x48) EMIT(0x83) EMIT(0xC6) EMIT(0x01) EMIT(0x48) EMIT(0x83) EMIT(0xC7) EMIT(0x01) EMIT(0x83) EMIT(0xE9) EMIT(0x01) EMIT(0x85) EMIT(0xC9) EMIT(0x75) EMIT(0xED) X64_End(); __asm { popad mov esp, ebp pop ebp retn 20 } }
DWORD64 Wow64Local::GetTEB64(_TEB64& out) { UNREFERENCED_PARAMETER(out); reg64 reg; reg.v = 0; #ifdef _M_IX86 _asm { X64_Start(); //R12 register should always contain pointer to TEB64 in WoW64 processes X64_Push(_R12); //below pop will pop QWORD from stack, as we're in x64 mode now __asm pop reg.dw[0] X64_End(); } memcpy64((DWORD64)&out, reg.v, sizeof(out)); #endif return reg.v; }
DWORD64 Wow64Local::X64CallV( DWORD64 func, int argC, va_list args ) { DWORD64 _rcx = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0; DWORD64 _rdx = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0; DWORD64 _r8 = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0; DWORD64 _r9 = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0; reg64 _rax; _rax.v = 0; DWORD64 restArgs = (DWORD64)&va_arg(args, DWORD64); //conversion to QWORD for easier use in inline assembly DWORD64 _argC = argC; DWORD64 _func = func; DWORD back_esp = 0; __asm { ;//keep original esp in back_esp variable mov back_esp, esp ;//align esp to 8, without aligned stack some syscalls may return errors ! and esp, 0xFFFFFFF8 X64_Start(); ;//fill first four arguments push _rcx X64_Pop(_RCX); push _rdx X64_Pop(_RDX); push _r8 X64_Pop(_R8); push _r9 X64_Pop(_R9); push edi push restArgs X64_Pop(_RDI); push _argC X64_Pop(_RAX); ;//put rest of arguments on the stack test eax, eax jz _ls_e lea edi, dword ptr [edi + 8*eax - 8] _ls: test eax, eax jz _ls_e push dword ptr [edi] sub edi, 8 sub eax, 1 jmp _ls _ls_e: ;//create stack space for spilling registers sub esp, 0x20 call _func ;//cleanup stack push _argC X64_Pop(_RCX); lea esp, dword ptr [esp + 8*ecx + 0x20] pop edi //set return value X64_Push(_RAX); pop _rax.dw[0] X64_End(); mov esp, back_esp } return _rax.v; }
extern "C" DWORD64 __cdecl X64Call(DWORD64 func, int argC, ...) { if (!g_isWow64) return 0; va_list args; va_start(args, argC); reg64 _rcx = { (argC > 0) ? argC--, va_arg(args, DWORD64) : 0 }; reg64 _rdx = { (argC > 0) ? argC--, va_arg(args, DWORD64) : 0 }; reg64 _r8 = { (argC > 0) ? argC--, va_arg(args, DWORD64) : 0 }; reg64 _r9 = { (argC > 0) ? argC--, va_arg(args, DWORD64) : 0 }; reg64 _rax = { 0 }; reg64 restArgs = { (DWORD64)&va_arg(args, DWORD64) }; // conversion to QWORD for easier use in inline assembly #ifdef _M_IX86 reg64 _argC = { (DWORD64)argC }; DWORD back_esp = 0; WORD back_fs = 0; __asm { ;// reset FS segment, to properly handle RFG mov back_fs, fs mov eax, 0x2B mov fs, ax ;// keep original esp in back_esp variable mov back_esp, esp ;// align esp to 0x10, without aligned stack some syscalls may return errors ! ;// (actually, for syscalls it is sufficient to align to 8, but SSE opcodes ;// requires 0x10 alignment), it will be further adjusted according to the ;// number of arguments above 4 and esp, 0xFFFFFFF0 X64_Start(); ;// below code is compiled as x86 inline asm, but it is executed as x64 code ;// that's why it need sometimes REX_W() macro, right column contains detailed ;// transcription how it will be interpreted by CPU ;// fill first four arguments REX_W mov ecx, _rcx.dw[0] ;// mov rcx, qword ptr [_rcx] REX_W mov edx, _rdx.dw[0] ;// mov rdx, qword ptr [_rdx] push _r8.v ;// push qword ptr [_r8] X64_Pop(_R8); ;// pop r8 push _r9.v ;// push qword ptr [_r9] X64_Pop(_R9); ;// pop r9 ;// REX_W mov eax, _argC.dw[0] ;// mov rax, qword ptr [_argC] ;// ;// final stack adjustment, according to the ;// ;// number of arguments above 4 ;// test al, 1 ;// test al, 1 jnz _no_adjust ;// jnz _no_adjust sub esp, 8 ;// sub rsp, 8 _no_adjust: ;// ;// push edi ;// push rdi REX_W mov edi, restArgs.dw[0] ;// mov rdi, qword ptr [restArgs] ;// ;// put rest of arguments on the stack ;// REX_W test eax, eax ;// test rax, rax jz _ls_e ;// je _ls_e REX_W lea edi, dword ptr [edi + 8*eax - 8] ;// lea rdi, [rdi + rax*8 - 8] ;// _ls: ;// REX_W test eax, eax ;// test rax, rax jz _ls_e ;// je _ls_e push dword ptr [edi] ;// push qword ptr [rdi] REX_W sub edi, 8 ;// sub rdi, 8 REX_W sub eax, 1 ;// sub rax, 1 jmp _ls ;// jmp _ls _ls_e: ;// ;// ;// create stack space for spilling registers ;// REX_W sub esp, 0x20 ;// sub rsp, 20h ;// call func ;// call qword ptr [func] ;// ;// cleanup stack ;// REX_W mov ecx, _argC.dw[0] ;// mov rcx, qword ptr [_argC] REX_W lea esp, dword ptr [esp + 8*ecx + 0x20] ;// lea rsp, [rsp + rcx*8 + 20h] ;// pop edi ;// pop rdi ;// // set return value ;// REX_W mov _rax.dw[0], eax ;// mov qword ptr [_rax], rax X64_End(); mov ax, ds mov ss, ax mov esp, back_esp ;// restore FS segment mov ax, back_fs mov fs, ax } #endif // _M_IX86 return _rax.v; }
#include <Windows.h> #include <stdio.h> #include "Heavens Gate.h" __declspec(naked) void* memcpy64(unsigned long long Dst, unsigned long long Src, unsigned long len) { __asm { push ebp mov ebp, esp push esi push edi X64_Start() EMIT(0x67) EMIT(0x48) EMIT(0x8B) EMIT(0x75) EMIT(0x10) //mov rsi, qword ptr[ebp+16] EMIT(0x67) EMIT(0x48) EMIT(0x8B) EMIT(0x7D) EMIT(0x08) //mov rdi, qword ptr[ebp+8] EMIT(0x67) EMIT(0x8B) EMIT(0x4D) EMIT(0x18) //mov ecx, dword ptr [ebp+24] EMIT(0x8a) EMIT(0x06) //mov al, byte ptr[rsi] EMIT(0x88) EMIT(0x07) //mov byte ptr[rdi], al EMIT(0x48) EMIT(0xFF) EMIT(0xC6) //inc rsi EMIT(0x48) EMIT(0xFF) EMIT(0xC7) //inc rdi EMIT(0xE2) EMIT(0xF4) //loop e (mov al, byte ptr[rsi]) X64_End() pop edi pop esi mov esp, ebp pop ebp ret } }
/** * * X64Call Part of WOW64Ext Library * See internals.h */ extern __declspec(dllexport) unsigned __int64 X64Call( void * lvpFunctionPtr, int nArgc, ... ) { va_list args; DWORD64 arg1, arg2, arg3, arg4, _nArgc, _lvpFunctionPtr, rest; DWORD dwEspBackup; union reg64 sRax; va_start( args, nArgc ); arg1 = ( nArgc ) ? nArgc--, va_arg( args, DWORD64 ) : 0; arg2 = ( nArgc ) ? nArgc--, va_arg( args, DWORD64 ) : 0; arg3 = ( nArgc ) ? nArgc--, va_arg( args, DWORD64 ) : 0; arg4 = ( nArgc ) ? nArgc--, va_arg( args, DWORD64 ) : 0; rest = (DWORD64)&va_arg( args, DWORD64 ); _nArgc = nArgc; _lvpFunctionPtr = (DWORD64)lvpFunctionPtr; dwEspBackup; sRax.v = 0; __asm { mov dwEspBackup, esp and sp, 0xFFF8 X64_Start(); push arg1 X64_Pop(_RCX); push arg2 X64_Pop(_RDX); push arg3 X64_Pop(_R8); push arg4 X64_Pop(_R9); push edi push rest X64_Pop(_RDI); push _nArgc X64_Pop(_RAX); test eax, eax jz _ls_e lea edi, dword ptr [edi + 8*eax - 8] _ls: test eax, eax jz _ls_e push dword ptr [edi] sub edi, 8 sub eax, 1 jmp _ls _ls_e: sub esp, 0x20 call _lvpFunctionPtr push _nArgc X64_Pop(_RCX); lea esp, dword ptr [esp + 8*ecx + 0x20] pop edi X64_Push(_RAX); pop sRax.dw[0] X64_End(); mov esp, dwEspBackup } }
PTEB64 NtTeb64( void ) { X64_Start(); GETTEB(); X64_End(); }