//------------------------------------------------------------------------- BOOL CreateTrampolineFunction(PTRAMPOLINE ct) { #ifdef _M_X64 CALL_ABS call = { 0xFF, 0x15, 0x00000002, // FF15 00000002: CALL [RIP+8] 0xEB, 0x08, // EB 08: JMP +10 0x0000000000000000ULL // Absolute destination address }; JMP_ABS jmp = { 0xFF, 0x25, 0x00000000, // FF25 00000000: JMP [RIP+6] 0x0000000000000000ULL // Absolute destination address }; JCC_ABS jcc = { 0x70, 0x0E, // 7* 0E: J** +16 0xFF, 0x25, 0x00000000, // FF25 00000000: JMP [RIP+6] 0x0000000000000000ULL // Absolute destination address }; #else CALL_REL call = { 0xE8, // E8 xxxxxxxx: CALL +5+xxxxxxxx 0x00000000 // Relative destination address }; JMP_REL jmp = { 0xE9, // E9 xxxxxxxx: JMP +5+xxxxxxxx 0x00000000 // Relative destination address }; JCC_REL jcc = { 0x0F, 0x80, // 0F8* xxxxxxxx: J** +6+xxxxxxxx 0x00000000 // Relative destination address }; #endif UINT8 oldPos = 0; UINT8 newPos = 0; ULONG_PTR jmpDest = 0; // Destination address of an internal jump. BOOL finished = FALSE; // Is the function completed? #ifdef _M_X64 UINT8 instBuf[16]; #endif ct->patchAbove = FALSE; ct->nIP = 0; do { HDE hs; UINT copySize; LPVOID pCopySrc; ULONG_PTR pOldInst = (ULONG_PTR)ct->pTarget + oldPos; ULONG_PTR pNewInst = (ULONG_PTR)ct->pTrampoline + newPos; copySize = HDE_DISASM((LPVOID)pOldInst, &hs); if (hs.flags & F_ERROR) return FALSE; pCopySrc = (LPVOID)pOldInst; if (oldPos >= sizeof(JMP_REL)) { // The trampoline function is long enough. // Complete the function with the jump to the target function. #ifdef _M_X64 jmp.address = pOldInst; #else jmp.operand = (UINT32)(pOldInst - (pNewInst + sizeof(jmp))); #endif pCopySrc = &jmp; copySize = sizeof(jmp); finished = TRUE; } #ifdef _M_X64 else if ((hs.modrm & 0xC7) == 0x05) { // Instructions using RIP relative addressing. (ModR/M = 00???101B) // Modify the RIP relative address. PUINT32 pRelAddr; // Avoid using memcpy to reduce the footprint. __movsb(instBuf, (LPBYTE)pOldInst, copySize); pCopySrc = instBuf; // Relative address is stored at (instruction length - immediate value length - 4). pRelAddr = (PUINT32)(instBuf + hs.len - ((hs.flags & 0x3C) >> 2) - 4); *pRelAddr = (UINT32)((pOldInst + hs.len + (INT32)hs.disp.disp32) - (pNewInst + hs.len)); // Complete the function if JMP (FF /4). if (hs.opcode == 0xFF && hs.modrm_reg == 4) finished = TRUE; } #endif else if (hs.opcode == 0xE8) { // Direct relative CALL ULONG_PTR dest = pOldInst + hs.len + (INT32)hs.imm.imm32; #ifdef _M_X64 call.address = dest; #else call.operand = (UINT32)(dest - (pNewInst + sizeof(call))); #endif pCopySrc = &call; copySize = sizeof(call); } else if ((hs.opcode & 0xFD) == 0xE9) { // Direct relative JMP (EB or E9) ULONG_PTR dest = pOldInst + hs.len; if (hs.opcode == 0xEB) // isShort jmp dest += (INT8)hs.imm.imm8; else dest += (INT32)hs.imm.imm32; // Simply copy an internal jump. if ((ULONG_PTR)ct->pTarget <= dest && dest < ((ULONG_PTR)ct->pTarget + sizeof(JMP_REL))) { if (jmpDest < dest) jmpDest = dest; } else { #ifdef _M_X64 jmp.address = dest; #else jmp.operand = (UINT32)(dest - (pNewInst + sizeof(jmp))); #endif pCopySrc = &jmp; copySize = sizeof(jmp); // Exit the function If it is not in the branch finished = (pOldInst >= jmpDest); } } else if ((hs.opcode & 0xF0) == 0x70 || (hs.opcode & 0xFC) == 0xE0 || (hs.opcode2 & 0xF0) == 0x80) { // Direct relative Jcc ULONG_PTR dest = pOldInst + hs.len; if ((hs.opcode & 0xF0) == 0x70 // Jcc || (hs.opcode & 0xFC) == 0xE0) // LOOPNZ/LOOPZ/LOOP/JECXZ dest += (INT8)hs.imm.imm8; else dest += (INT32)hs.imm.imm32; // Simply copy an internal jump. if ((ULONG_PTR)ct->pTarget <= dest && dest < ((ULONG_PTR)ct->pTarget + sizeof(JMP_REL))) { if (jmpDest < dest) jmpDest = dest; } else if ((hs.opcode & 0xFC) == 0xE0) { // LOOPNZ/LOOPZ/LOOP/JCXZ/JECXZ to the outside are not supported. return FALSE; } else { UINT8 cond = ((hs.opcode != 0x0F ? hs.opcode : hs.opcode2) & 0x0F); #ifdef _M_X64 // Invert the condition. jcc.opcode = 0x71 ^ cond; jcc.address = dest; #else jcc.opcode1 = 0x80 | cond; jcc.operand = (UINT32)(dest - (pNewInst + sizeof(jcc))); #endif pCopySrc = &jcc; copySize = sizeof(jcc); } } else if ((hs.opcode & 0xFE) == 0xC2) { // RET (C2 or C3) // Complete the function if not in a branch. finished = (pOldInst >= jmpDest); } // Can't alter the instruction length in a branch. if (pOldInst < jmpDest && copySize != hs.len) return FALSE; if ((newPos + copySize) > TRAMPOLINE_MAX_SIZE) return FALSE; if (ct->nIP >= ARRAYSIZE(ct->oldIPs)) return FALSE; ct->oldIPs[ct->nIP] = oldPos; ct->newIPs[ct->nIP] = newPos; ct->nIP++; // Avoid using memcpy to reduce the footprint. __movsb((LPBYTE)ct->pTrampoline + newPos, pCopySrc, copySize); newPos += copySize; oldPos += hs.len; }
DLLEXPORT UINT_PTR WINAPI ReflectiveLoader( VOID ) #endif { // the functions we need LOADLIBRARYA pLoadLibraryA; GETPROCADDRESS pGetProcAddress; VIRTUALALLOC pVirtualAlloc; VIRTUALLOCK pVirtualLock; OUTPUTDEBUG pOutputDebug; USHORT usCounter; // the initial location of this image in memory UINT_PTR uiLibraryAddress; // the kernels base address and later this images newly loaded base address UINT_PTR uiBaseAddress; // variables for processing the kernels export table UINT_PTR uiAddressArray; UINT_PTR uiNameArray; UINT_PTR uiExportDir; UINT_PTR uiNameOrdinals; DWORD dwHashValue; // variables for loading this image UINT_PTR uiHeaderValue; UINT_PTR uiValueA; UINT_PTR uiValueB; UINT_PTR uiValueC; UINT_PTR uiValueD; UINT_PTR uiValueE; register UINT_PTR inspect; // STEP 0: calculate our images current base address // we will start searching backwards from our current EIP #ifdef _WIN64 uiLibraryAddress = eip(); #else __asm { call geteip geteip: pop uiLibraryAddress } #endif // loop through memory backwards searching for our images base address // we dont need SEH style search as we shouldnt generate any access violations with this while( TRUE ) { if( ((PIMAGE_DOS_HEADER)uiLibraryAddress)->e_magic == IMAGE_DOS_SIGNATURE ) { uiHeaderValue = ((PIMAGE_DOS_HEADER)uiLibraryAddress)->e_lfanew; // some x64 dll's can trigger a bogus signature (IMAGE_DOS_SIGNATURE == 'POP r10'), // we sanity check the e_lfanew with an upper threshold value of 1024 to avoid problems. if( uiHeaderValue >= sizeof(IMAGE_DOS_HEADER) && uiHeaderValue < 1024 ) { uiHeaderValue += uiLibraryAddress; // break if we have found a valid MZ/PE header if( ((PIMAGE_NT_HEADERS)uiHeaderValue)->Signature == IMAGE_NT_SIGNATURE ) break; } } uiLibraryAddress--; } // STEP 1: process the kernels exports for the functions our loader needs... // get the Process Enviroment Block #ifdef _WIN64 uiBaseAddress = __readgsqword( 0x60 ); #else uiBaseAddress = __readfsdword( 0x30 ); #endif // get the processes loaded modules. ref: http://msdn.microsoft.com/en-us/library/aa813708(VS.85).aspx uiBaseAddress = (UINT_PTR)((_PPEB)uiBaseAddress)->pLdr; // get the first entry of the InMemoryOrder module list uiValueA = (UINT_PTR)((PPEB_LDR_DATA)uiBaseAddress)->InMemoryOrderModuleList.Flink; while( uiValueA ) { // get pointer to current modules name (unicode string) uiValueB = (UINT_PTR)((PLDR_DATA_TABLE_ENTRY)uiValueA)->BaseDllName.pBuffer; // set bCounter to the length for the loop usCounter = ((PLDR_DATA_TABLE_ENTRY)uiValueA)->BaseDllName.Length; // clear uiValueC which will store the hash of the module name uiValueC = 0; // compute the hash of the module name... do { uiValueC = ror( (DWORD)uiValueC ); // normalize to uppercase if the module name is in lowercase if( *((BYTE *)uiValueB) >= 'a' ) uiValueC += *((BYTE *)uiValueB) - 0x20; else uiValueC += *((BYTE *)uiValueB); uiValueB++; } while( --usCounter ); // compare the hash with that of kernel32.dll if( (DWORD)uiValueC == KERNEL32DLL_HASH ) { // get this modules base address uiBaseAddress = (UINT_PTR)((PLDR_DATA_TABLE_ENTRY)uiValueA)->DllBase; break; } // get the next entry uiValueA = DEREF( uiValueA ); } // get the VA of the modules NT Header uiExportDir = uiBaseAddress + ((PIMAGE_DOS_HEADER)uiBaseAddress)->e_lfanew; // uiNameArray = the address of the modules export directory entry uiNameArray = (UINT_PTR)&((PIMAGE_NT_HEADERS)uiExportDir)->OptionalHeader.DataDirectory[ IMAGE_DIRECTORY_ENTRY_EXPORT ]; // get the VA of the export directory uiExportDir = ( uiBaseAddress + ((PIMAGE_DATA_DIRECTORY)uiNameArray)->VirtualAddress ); // get the VA for the array of name pointers uiNameArray = ( uiBaseAddress + ((PIMAGE_EXPORT_DIRECTORY )uiExportDir)->AddressOfNames ); // get the VA for the array of name ordinals uiNameOrdinals = ( uiBaseAddress + ((PIMAGE_EXPORT_DIRECTORY )uiExportDir)->AddressOfNameOrdinals ); usCounter = 5; // loop while we still have imports to find while( usCounter > 0 ) { // compute the hash values for this function name dwHashValue = hash( (char *)( uiBaseAddress + DEREF_32( uiNameArray ) ) ); // if we have found a function we want we get its virtual address if( dwHashValue == LOADLIBRARYA_HASH || dwHashValue == GETPROCADDRESS_HASH || dwHashValue == VIRTUALALLOC_HASH || dwHashValue == VIRTUALLOCK_HASH || dwHashValue == OUTPUTDEBUG_HASH ) { // get the VA for the array of addresses uiAddressArray = ( uiBaseAddress + ((PIMAGE_EXPORT_DIRECTORY )uiExportDir)->AddressOfFunctions ); // use this functions name ordinal as an index into the array of name pointers uiAddressArray += ( DEREF_16( uiNameOrdinals ) * sizeof(DWORD) ); // store this functions VA if( dwHashValue == LOADLIBRARYA_HASH ) pLoadLibraryA = (LOADLIBRARYA)( uiBaseAddress + DEREF_32( uiAddressArray ) ); else if( dwHashValue == GETPROCADDRESS_HASH ) pGetProcAddress = (GETPROCADDRESS)( uiBaseAddress + DEREF_32( uiAddressArray ) ); else if( dwHashValue == VIRTUALALLOC_HASH ) pVirtualAlloc = (VIRTUALALLOC)( uiBaseAddress + DEREF_32( uiAddressArray ) ); else if( dwHashValue == VIRTUALLOCK_HASH ) pVirtualLock = (VIRTUALLOCK)( uiBaseAddress + DEREF_32( uiAddressArray ) ); else if( dwHashValue == OUTPUTDEBUG_HASH ) pOutputDebug = (OUTPUTDEBUG)( uiBaseAddress + DEREF_32( uiAddressArray ) ); // decrement our counter usCounter--; } // get the next exported function name uiNameArray += sizeof(DWORD); // get the next exported function name ordinal uiNameOrdinals += sizeof(WORD); } // STEP 2: load our image into a new permanent location in memory... // get the VA of the NT Header for the PE to be loaded uiHeaderValue = uiLibraryAddress + ((PIMAGE_DOS_HEADER)uiLibraryAddress)->e_lfanew; // allocate all the memory for the DLL to be loaded into. we can load at any address because we will // relocate the image. Also zeros all memory and marks it as READ, WRITE and EXECUTE to avoid any problems. uiBaseAddress = (UINT_PTR)pVirtualAlloc( NULL, ((PIMAGE_NT_HEADERS)uiHeaderValue)->OptionalHeader.SizeOfImage, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE ); // prevent our image from being swapped to the pagefile pVirtualLock((LPVOID)uiBaseAddress, ((PIMAGE_NT_HEADERS)uiHeaderValue)->OptionalHeader.SizeOfImage); // we must now copy over the headers uiValueA = ((PIMAGE_NT_HEADERS)uiHeaderValue)->OptionalHeader.SizeOfHeaders; uiValueB = uiLibraryAddress; uiValueC = uiBaseAddress; __movsb( (PBYTE)uiValueC, (PBYTE)uiValueB, uiValueA ); // STEP 3: load in all of our sections... // uiValueA = the VA of the first section uiValueA = ( (UINT_PTR)&((PIMAGE_NT_HEADERS)uiHeaderValue)->OptionalHeader + ((PIMAGE_NT_HEADERS)uiHeaderValue)->FileHeader.SizeOfOptionalHeader ); uiValueE = ((PIMAGE_NT_HEADERS)uiHeaderValue)->FileHeader.NumberOfSections; // iterate through all sections, loading them into memory. while( uiValueE-- ) { // uiValueB is the VA for this section uiValueB = ( uiBaseAddress + ((PIMAGE_SECTION_HEADER)uiValueA)->VirtualAddress ); // uiValueC if the VA for this sections data uiValueC = ( uiLibraryAddress + ((PIMAGE_SECTION_HEADER)uiValueA)->PointerToRawData ); // copy the section over uiValueD = ((PIMAGE_SECTION_HEADER)uiValueA)->SizeOfRawData; __movsb( (PBYTE)uiValueB, (PBYTE)uiValueC, uiValueD ); // get the VA of the next section uiValueA += sizeof( IMAGE_SECTION_HEADER ); } // STEP 4: process our images import table... // uiValueB = the address of the import directory uiValueB = (UINT_PTR)&((PIMAGE_NT_HEADERS)uiHeaderValue)->OptionalHeader.DataDirectory[ IMAGE_DIRECTORY_ENTRY_IMPORT ]; uiValueC = ( uiBaseAddress + (UINT_PTR)((PIMAGE_DATA_DIRECTORY)uiValueB)->VirtualAddress ); // iterate through all imports until a null RVA is found (Characteristics is mis-named) while( ((PIMAGE_IMPORT_DESCRIPTOR)uiValueC)->Characteristics ) { /* pOutputDebug("Loading library: "); pOutputDebug((LPCSTR)( uiBaseAddress + ((PIMAGE_IMPORT_DESCRIPTOR)uiValueC)->Name )); pOutputDebug("\n"); */ // use LoadLibraryA to load the imported module into memory uiLibraryAddress = (UINT_PTR)pLoadLibraryA( (LPCSTR)( uiBaseAddress + ((PIMAGE_IMPORT_DESCRIPTOR)uiValueC)->Name ) ); if (! uiLibraryAddress) { //pOutputDebug("Loading library FAILED\n"); // get the next import uiValueC += sizeof( IMAGE_IMPORT_DESCRIPTOR ); continue; } // uiValueD = VA of the OriginalFirstThunk uiValueD = ( uiBaseAddress + ((PIMAGE_IMPORT_DESCRIPTOR)uiValueC)->OriginalFirstThunk ); // uiValueA = VA of the IAT (via first thunk not origionalfirstthunk) uiValueA = ( uiBaseAddress + ((PIMAGE_IMPORT_DESCRIPTOR)uiValueC)->FirstThunk ); // itterate through all imported functions, importing by ordinal if no name present while( DEREF(uiValueA) ) { // sanity check uiValueD as some compilers only import by FirstThunk if( uiValueD && ((PIMAGE_THUNK_DATA)uiValueD)->u1.Ordinal & IMAGE_ORDINAL_FLAG ) { // get the VA of the modules NT Header uiExportDir = uiLibraryAddress + ((PIMAGE_DOS_HEADER)uiLibraryAddress)->e_lfanew; // uiNameArray = the address of the modules export directory entry uiNameArray = (UINT_PTR)&((PIMAGE_NT_HEADERS)uiExportDir)->OptionalHeader.DataDirectory[ IMAGE_DIRECTORY_ENTRY_EXPORT ]; // get the VA of the export directory uiExportDir = ( uiLibraryAddress + ((PIMAGE_DATA_DIRECTORY)uiNameArray)->VirtualAddress ); // get the VA for the array of addresses uiAddressArray = ( uiLibraryAddress + ((PIMAGE_EXPORT_DIRECTORY )uiExportDir)->AddressOfFunctions ); // use the import ordinal (- export ordinal base) as an index into the array of addresses uiAddressArray += ( ( IMAGE_ORDINAL( ((PIMAGE_THUNK_DATA)uiValueD)->u1.Ordinal ) - ((PIMAGE_EXPORT_DIRECTORY )uiExportDir)->Base ) * sizeof(DWORD) ); // patch in the address for this imported function DEREF(uiValueA) = ( uiLibraryAddress + DEREF_32(uiAddressArray) ); } else { // get the VA of this functions import by name struct uiValueB = ( uiBaseAddress + DEREF(uiValueA) ); /* pOutputDebug("Resolving function: "); pOutputDebug((LPCSTR)( (LPCSTR)((PIMAGE_IMPORT_BY_NAME)uiValueB)->Name )); pOutputDebug("\n"); */ // use GetProcAddress and patch in the address for this imported function DEREF(uiValueA) = (UINT_PTR)pGetProcAddress( (HMODULE)uiLibraryAddress, (LPCSTR)((PIMAGE_IMPORT_BY_NAME)uiValueB)->Name ); } // get the next imported function uiValueA += sizeof( UINT_PTR ); if( uiValueD ) uiValueD += sizeof( UINT_PTR ); } // get the next import uiValueC += sizeof( IMAGE_IMPORT_DESCRIPTOR ); } // STEP 5: process all of our images relocations... // calculate the base address delta and perform relocations (even if we load at desired image base) uiLibraryAddress = uiBaseAddress - ((PIMAGE_NT_HEADERS)uiHeaderValue)->OptionalHeader.ImageBase; // uiValueB = the address of the relocation directory uiValueB = (UINT_PTR)&((PIMAGE_NT_HEADERS)uiHeaderValue)->OptionalHeader.DataDirectory[ IMAGE_DIRECTORY_ENTRY_BASERELOC ]; // check if there are any relocations present if( ((PIMAGE_DATA_DIRECTORY)uiValueB)->Size ) { // uiValueC is now the first entry (IMAGE_BASE_RELOCATION) uiValueC = ( uiBaseAddress + ((PIMAGE_DATA_DIRECTORY)uiValueB)->VirtualAddress ); // and we iterate through all entries... while( ((PIMAGE_BASE_RELOCATION)uiValueC)->SizeOfBlock ) { // uiValueA = the VA for this relocation block uiValueA = ( uiBaseAddress + ((PIMAGE_BASE_RELOCATION)uiValueC)->VirtualAddress ); // uiValueB = number of entries in this relocation block uiValueB = ( ((PIMAGE_BASE_RELOCATION)uiValueC)->SizeOfBlock - sizeof(IMAGE_BASE_RELOCATION) ) / sizeof( IMAGE_RELOC ); // uiValueD is now the first entry in the current relocation block uiValueD = uiValueC + sizeof(IMAGE_BASE_RELOCATION); // we itterate through all the entries in the current block... while( uiValueB-- ) { // perform the relocation, skipping IMAGE_REL_BASED_ABSOLUTE as required. // we dont use a switch statement to avoid the compiler building a jump table // which would not be very position independent! if( ((PIMAGE_RELOC)uiValueD)->type == IMAGE_REL_BASED_DIR64 ) *(UINT_PTR *)(uiValueA + ((PIMAGE_RELOC)uiValueD)->offset) += uiLibraryAddress; else if( ((PIMAGE_RELOC)uiValueD)->type == IMAGE_REL_BASED_HIGHLOW ) *(DWORD *)(uiValueA + ((PIMAGE_RELOC)uiValueD)->offset) += (DWORD)uiLibraryAddress; else if( ((PIMAGE_RELOC)uiValueD)->type == IMAGE_REL_BASED_HIGH ) *(WORD *)(uiValueA + ((PIMAGE_RELOC)uiValueD)->offset) += HIWORD(uiLibraryAddress); else if( ((PIMAGE_RELOC)uiValueD)->type == IMAGE_REL_BASED_LOW ) *(WORD *)(uiValueA + ((PIMAGE_RELOC)uiValueD)->offset) += LOWORD(uiLibraryAddress); // get the next entry in the current relocation block uiValueD += sizeof( IMAGE_RELOC ); } // get the next entry in the relocation directory uiValueC = uiValueC + ((PIMAGE_BASE_RELOCATION)uiValueC)->SizeOfBlock; } } // STEP 6: process the images exception directory if it has one (PE32+ for x64) /* // uiValueB = the address of the relocation directory uiValueB = (UINT_PTR)&((PIMAGE_NT_HEADERS)uiHeaderValue)->OptionalHeader.DataDirectory[ IMAGE_DIRECTORY_ENTRY_EXCEPTION ]; // check if their are any exception etries present if( ((PIMAGE_DATA_DIRECTORY)uiValueB)->Size ) { // get the number of entries uiValueA = ((PIMAGE_DATA_DIRECTORY)uiValueB)->Size / sizeof( IMAGE_RUNTIME_FUNCTION_ENTRY ); // uiValueC is now the first entry (IMAGE_RUNTIME_FUNCTION_ENTRY) uiValueC = ( uiBaseAddress + ((PIMAGE_DATA_DIRECTORY)uiValueB)->VirtualAddress ); // itterate through all entries while( uiValueA-- ) { //((IMAGE_RUNTIME_FUNCTION_ENTRY)uiValueC).BeginAddress // get the next entry uiValueC += sizeof( IMAGE_RUNTIME_FUNCTION_ENTRY ); } } */ // STEP 7: call our images entry point // uiValueA = the VA of our newly loaded DLL/EXE's entry point uiValueA = ( uiBaseAddress + ((PIMAGE_NT_HEADERS)uiHeaderValue)->OptionalHeader.AddressOfEntryPoint ); // call our respective entry point, fudging our hInstance value #ifdef REFLECTIVEDLLINJECTION_VIA_LOADREMOTELIBRARYR // if we are injecting a DLL via LoadRemoteLibraryR we call DllMain and pass in our parameter (via the DllMain lpReserved parameter) ((DLLMAIN)uiValueA)( (HINSTANCE)uiBaseAddress, DLL_PROCESS_ATTACH, lpParameter ); #else // if we are injecting an DLL via a stub we call DllMain with no parameter ((DLLMAIN)uiValueA)( (HINSTANCE)uiBaseAddress, DLL_PROCESS_ATTACH, NULL ); #endif // STEP 8: return our new entry point address so whatever called us can call DLL_METASPLOIT_ATTACH/DLL_METASPLOIT_DETACH return uiValueA; }
extern "C" void * __cdecl memcpy(void *destination, const void *source, size_t num) { __movsb(static_cast<unsigned char*>(destination), static_cast<unsigned const char*>(source), num); return destination; }
///Dumps all NtXxx functions along with its RVA and syscall number into a PE internal buffer. ///RVA gets dumped too so we could easily extend this function for use with non-NtXxx functions ///like KiUserCallbackDispatcher, RtlAdjustPrivileges or LdrLoadDll. ///This would, however, require us to load the DLL as an executable image because the indirect calls within these ///non-NtXxx functions often look like "call qword ptr [__imp__NtXxx]" or "mov rcx, qword ptr [__imp__AnyDataSymbol]" ///which would need to have the IAT and data section of the ntdll correctly loaded. NTSTATUS createNtapiLookupTable(PVOID pRawNtdllBase) { PIMAGE_NT_HEADERS64 pNtdllPeHeader = NULL; ULONG rvaNtdllExportDirectory = 0x0; PIMAGE_EXPORT_DIRECTORY pNtdllExportDirectory = NULL; PULONG pNameRvaArray = NULL; PUSHORT pNameOrdinalArray = NULL; PULONG pFunctionRvaArray = NULL; char* pCurrName = NULL; ULONG rvaCurrentFunction = 0x0; PVOID pDesiredFunctionAddress; SIZE_T currStringLen = 0; char* pCurrPos = NULL; pNtdllPeHeader = (PIMAGE_NT_HEADERS64)((PUCHAR)pRawNtdllBase + ((PIMAGE_DOS_HEADER)pRawNtdllBase)->e_lfanew); if ((pNtdllPeHeader->Signature != IMAGE_NT_SIGNATURE) || (pNtdllPeHeader->OptionalHeader.Magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC) || (pNtdllPeHeader->FileHeader.Machine != IMAGE_FILE_MACHINE_AMD64)) return STATUS_INVALID_IMAGE_WIN_64; if (!pNtdllPeHeader->OptionalHeader.NumberOfRvaAndSizes) return STATUS_RESOURCE_DATA_NOT_FOUND; rvaNtdllExportDirectory = pNtdllPeHeader->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress; pNtdllExportDirectory = (PIMAGE_EXPORT_DIRECTORY)rvaToFileOffset(rvaNtdllExportDirectory, pRawNtdllBase); if (!pNtdllExportDirectory) return STATUS_NOT_EXPORT_FORMAT; pNameRvaArray = (PULONG)rvaToFileOffset(pNtdllExportDirectory->AddressOfNames, pRawNtdllBase); pNameOrdinalArray = (PUSHORT)rvaToFileOffset(pNtdllExportDirectory->AddressOfNameOrdinals, pRawNtdllBase); pFunctionRvaArray = (PULONG)rvaToFileOffset(pNtdllExportDirectory->AddressOfFunctions, pRawNtdllBase); if (!pNameRvaArray || !pNameOrdinalArray || !pFunctionRvaArray) return STATUS_INVALID_IMAGE_FORMAT; pCurrPos = sg_pSyscallTable + 4; for (ULONG i = 0; i < pNtdllExportDirectory->NumberOfNames; i++) { pCurrName = (char*)rvaToFileOffset(pNameRvaArray[i], pRawNtdllBase); if (!pCurrName) continue; if(('N' == pCurrName[0]) && ('t' == pCurrName[1])) { rvaCurrentFunction = pFunctionRvaArray[pNameOrdinalArray[i]]; rvaToFileOffset(rvaCurrentFunction, pRawNtdllBase); pDesiredFunctionAddress = rvaToFileOffset(rvaCurrentFunction, pRawNtdllBase); if (pDesiredFunctionAddress) { currStringLen = strlen(pCurrName); *(PUSHORT)pCurrPos = (USHORT)currStringLen; pCurrPos += 2; __movsb((PUCHAR)pCurrPos, (PUCHAR)pCurrName, currStringLen); pCurrPos += currStringLen; *(PULONG)pCurrPos = rvaCurrentFunction; *(PULONG)(pCurrPos + 4) = ((PNT_SYSCALL_STUB)pDesiredFunctionAddress)->syscallNumber; pCurrPos += 8; ///Save the number of NtXxx functions... (*(PULONG)sg_pSyscallTable)++; } } } ///This means we have never found ANY NtXxx function. Therefore the user needs to make sure (using Process Hacker e.g.) ///that the ntdll file hase been loaded correctly (See Memory tab in PH). if (!*(PULONG)sg_pSyscallTable) return STATUS_INTERNAL_ERROR; ///A pristine ntdll should have at least 64 NtXxx functions IMHO... if(0x40 > *(PULONG)sg_pSyscallTable) return STATUS_PARTIAL_COPY; return STATUS_SUCCESS; }
static inline VOID EmulatorMoveMemory(OUT VOID UNALIGNED *Destination, IN const VOID UNALIGNED *Source, IN SIZE_T Length) { #if 1 /* * We use a switch here to detect small moves of memory, as these * constitute the bulk of our moves. * Using RtlMoveMemory for all these small moves would be slow otherwise. */ switch (Length) { case 0: return; case sizeof(UCHAR): *(PUCHAR)Destination = *(PUCHAR)Source; return; case sizeof(USHORT): *(PUSHORT)Destination = *(PUSHORT)Source; return; case sizeof(ULONG): *(PULONG)Destination = *(PULONG)Source; return; case sizeof(ULONGLONG): *(PULONGLONG)Destination = *(PULONGLONG)Source; return; default: #if defined(__GNUC__) __builtin_memmove(Destination, Source, Length); #else RtlMoveMemory(Destination, Source, Length); #endif } #else // defined(_MSC_VER) PUCHAR Dest = (PUCHAR)Destination; PUCHAR Src = (PUCHAR)Source; SIZE_T Count, NewSize = Length; /* Move dword */ Count = NewSize >> 2; // NewSize / sizeof(ULONG); NewSize = NewSize & 3; // NewSize % sizeof(ULONG); __movsd(Dest, Src, Count); Dest += Count << 2; // Count * sizeof(ULONG); Src += Count << 2; /* Move word */ Count = NewSize >> 1; // NewSize / sizeof(USHORT); NewSize = NewSize & 1; // NewSize % sizeof(USHORT); __movsw(Dest, Src, Count); Dest += Count << 1; // Count * sizeof(USHORT); Src += Count << 1; /* Move byte */ Count = NewSize; // NewSize / sizeof(UCHAR); // NewSize = NewSize; // NewSize % sizeof(UCHAR); __movsb(Dest, Src, Count); #endif }
//------------------------------------------------------------------------- BOOL CreateTrampolineFunction(TRAMPOLINE *ct) { #if defined _M_X64 CALL_ABS call = { 0x15FF, 0x00000000 }; JMP_ABS jmp = { 0x25FF, 0x00000000 }; JCC_ABS jcc = { 0x70, 0x06, 0x25FF, 0x00000000 }; #elif defined _M_IX86 CALL_REL call = { 0xE8, 0x00000000 }; JMP_REL jmp = { 0xE9, 0x00000000 }; JCC_REL jcc = { 0x800F, 0x00000000 }; #endif size_t oldPos = 0; size_t newPos = 0; ULONG_PTR jmpDest = 0; // Destination address of an internal jump. BOOL finished = FALSE; // Is the function completed? #if defined _M_X64 size_t tableSize = 0; UINT8 instBuf[16]; #endif while (!finished) { hde_t hs; UINT copySize; void *pCopySrc; ULONG_PTR pOldInst = (ULONG_PTR)ct->pTarget + oldPos; ULONG_PTR pNewInst = (ULONG_PTR)ct->pTrampoline + newPos; copySize = HDE_DISASM((void *)pOldInst, &hs); if (hs.flags & F_ERROR) return FALSE; pCopySrc = (void *)pOldInst; if (oldPos >= sizeof(JMP_REL)) { // The trampoline function is long enough. // Complete the function with the jump to the target function. #if defined _M_X64 if (tableSize >= ct->tableSize) return FALSE; ct->pTable[tableSize++] = pOldInst; jmp.operand = (UINT32)((ULONG_PTR)(ct->pTable + tableSize - 1) - (pNewInst + sizeof(JMP_ABS))); #elif defined _M_IX86 jmp.operand = (UINT32)(pOldInst - (pNewInst + sizeof(JMP_REL))); #endif pCopySrc = &jmp; copySize = sizeof(jmp); finished = TRUE; } #if defined _M_X64 else if ((hs.modrm & 0xC7) == 0x05) { // Instructions using RIP relative addressing. (ModR/M = 00???101B) // Modify the RIP relative address. UINT32 *pRelAddr; __movsb(instBuf, (PBYTE)pOldInst, copySize); pCopySrc = instBuf; // Relative address is stored at (instruction length - immediate value length - 4). pRelAddr = (PUINT32)(instBuf + hs.len - ((hs.flags & 0x3C) >> 2) - 4); *pRelAddr = (UINT32)((pOldInst + hs.len + (INT32)hs.disp.disp32) - (pNewInst + hs.len)); // Complete the function if JMP (FF /4). if (hs.opcode == 0xFF && hs.modrm_reg == 4) finished = TRUE; } #endif else if (hs.opcode == 0xE8)