/// <summary> /// Create worker RPC thread /// </summary> /// <returns>Thread ID</returns> DWORD RemoteExec::CreateWorkerThread() { AsmJit::Assembler a; AsmJitHelper ah( a ); AsmJit::Label l_loop = a.newLabel(); // // Create execution thread // if(!_hWorkThd.valid()) { eModType mt = mt_default; if (_memory.core().native()->GetWow64Barrier().type == wow_64_32) { mt = mt_mod64; ah.SwitchTo64(); // Align stack on 16 byte boundary a.and_( AsmJit::nsp, -16 ); // Allocate new x64 activation stack auto createActStack = _mods.GetExport( _mods.GetModule( L"ntdll.dll", LdrList, mt ), "RtlAllocateActivationContextStack" ).procAddress; if(createActStack) { ah.GenCall( static_cast<size_t>(createActStack), { _userData.ptr<size_t>() + 0x3000 } ); a.mov( AsmJit::nax, _userData.ptr<size_t>() + 0x3000 ); a.mov( AsmJit::nax, AsmJit::sysint_ptr( AsmJit::nax ) ); ah.SetTebPtr(); a.mov( AsmJit::sysint_ptr( AsmJit::ndx, 0x2c8 ), AsmJit::nax ); } } auto proc = _mods.GetExport( _mods.GetModule( L"ntdll.dll", LdrList, mt ), "NtDelayExecution" ).procAddress; auto pExitThread = _mods.GetExport( _mods.GetModule( L"ntdll.dll" ), "NtTerminateThread" ).procAddress; if (proc == 0 || pExitThread == 0) return 0; /* for(;;) SleepEx(5, TRUE); ExitThread(SetEvent(m_hWaitEvent)); */ a.bind( l_loop ); ah.GenCall( static_cast<size_t>(proc), { TRUE, _workerCode.ptr<size_t>() } ); a.jmp( l_loop ); ah.ExitThreadWithStatus( pExitThread, _userData.ptr<size_t>() ); // Write code into process LARGE_INTEGER liDelay = { 0 }; liDelay.QuadPart = -10 * 1000 * 5; _workerCode.Write( 0, liDelay ); _workerCode.Write( sizeof(LARGE_INTEGER), a.getCodeSize(), a.make() ); _hWorkThd = _threads.CreateNew( _workerCode.ptr<size_t>() + sizeof(LARGE_INTEGER), _userData.ptr<size_t>() ); } return _hWorkThd.id(); }
/// <summary> /// Execute code in context of any existing thread /// </summary> /// <param name="pCode">Cde to execute</param> /// <param name="size">Code size.</param> /// <param name="callResult">Execution result</param> /// <param name="thd">Target thread</param> /// <returns>Status</returns> NTSTATUS RemoteExec::ExecInAnyThread( PVOID pCode, size_t size, uint64_t& callResult, Thread& thd ) { NTSTATUS dwResult = STATUS_SUCCESS; CONTEXT_T ctx; // Prepare for remote exec CreateRPCEnvironment( true ); // Write code dwResult = CopyCode( pCode, size ); if (dwResult != STATUS_SUCCESS) return dwResult; if (_hWaitEvent) ResetEvent( _hWaitEvent ); if (!thd.Suspend()) return LastNtStatus(); if (thd.GetContext( ctx, CONTEXT_ALL, true )) { AsmJit::Assembler a; AsmJitHelper ah( a ); #ifdef _M_AMD64 const int count = 15; AsmJit::GPReg regs[] = { AsmJit::rax, AsmJit::rbx, AsmJit::rcx, AsmJit::rdx, AsmJit::rsi, AsmJit::rdi, AsmJit::r8, AsmJit::r9, AsmJit::r10, AsmJit::r11, AsmJit::r12, AsmJit::r13, AsmJit::r14, AsmJit::r15, AsmJit::rbp }; // // Preserve thread context // I don't care about FPU, XMM and anything else // a.sub(AsmJit::rsp, count * WordSize); // Stack must be aligned on 16 bytes a.pushfq(); // // Save registers for (int i = 0; i < count; i++) a.mov( AsmJit::Mem( AsmJit::rsp, i * WordSize ), regs[i] ); ah.GenCall( _userCode.ptr<size_t>(), { _userData.ptr<size_t>() } ); AddReturnWithEvent( ah, rt_int32, INTRET_OFFSET ); // Restore registers for (int i = 0; i < count; i++) a.mov( regs[i], AsmJit::Mem( AsmJit::rsp, i * WordSize ) ); a.popfq(); a.add( AsmJit::rsp, count * WordSize ); a.jmp( ctx.Rip ); #else a.pushad(); a.pushfd(); ah.GenCall( _userCode.ptr<size_t>(), { _userData.ptr<size_t>() } ); AddReturnWithEvent( ah, rt_int32, INTRET_OFFSET ); a.popfd(); a.popad(); a.push( ctx.NIP ); a.ret(); #endif if (_userCode.Write( size, a.getCodeSize(), a.make() ) == STATUS_SUCCESS) { ctx.NIP = _userCode.ptr<size_t>() + size; if (!thd.SetContext( ctx, true )) dwResult = LastNtStatus(); } else dwResult = LastNtStatus(); } else dwResult = LastNtStatus(); thd.Resume(); if (dwResult == STATUS_SUCCESS) { WaitForSingleObject( _hWaitEvent, INFINITE ); callResult = _userData.Read<size_t>( INTRET_OFFSET, 0 ); } return dwResult; }