/// <summary>
/// Unload specific module from target process. Can't be used to unload manually mapped modules
/// </summary>
/// <param name="hMod">Module to unload</param>
/// <returns>true on success</returns>
bool ProcessModules::Unload( const ModuleData* hMod )
{
    uint64_t res = 0;
    AsmJit::Assembler a;
    AsmJitHelper ah( a );

    // Module not present or is manually mapped
    if (hMod == nullptr ||  hMod->manual || !ValidateModule( hMod->baseAddress ))
    {
        LastNtStatus( STATUS_NOT_FOUND );
        return false;
    }

    // Unload routine
    auto pUnload = GetExport( GetModule( L"ntdll.dll", LdrList, hMod->type ), "LdrUnloadDll" );
    if (pUnload.procAddress == 0)
        return nullptr;

    ah.GenCall( static_cast<size_t>(pUnload.procAddress), { static_cast<size_t>(hMod->baseAddress) } );
    a.ret();

    _proc.remote().ExecInNewThread( a.make(), a.getCodeSize(), res );

    return true;
}
Exemple #2
0
/// <summary>
/// Create new thread and execute code in it. Wait until execution ends
/// </summary>
/// <param name="pCode">Code to execute</param>
/// <param name="size">Code size</param>
/// <param name="callResult">Code return value</param>
/// <returns>Status</returns>
NTSTATUS RemoteExec::ExecInNewThread( PVOID pCode, size_t size, uint64_t& callResult )
{
    AsmJit::Assembler a;
    AsmJitHelper ah( a );
    NTSTATUS dwResult = STATUS_SUCCESS;

    // Write code
    dwResult = CopyCode( pCode, size );
    if (dwResult != STATUS_SUCCESS)
        return dwResult;

    bool switchMode = (_proc.core().native()->GetWow64Barrier().type == wow_64_32);
    auto pExitThread = _mods.GetExport( _mods.GetModule( L"ntdll.dll" ), "NtTerminateThread" ).procAddress;
    if (pExitThread == 0)
        return LastNtStatus( STATUS_NOT_FOUND );

    ah.GenPrologue( switchMode );

    // Prepare thread to run in x64 mode
    if(switchMode)
    {
        // Allocate new x64 activation stack
        auto createActStack = _mods.GetExport( _mods.GetModule( L"ntdll.dll", LdrList, mt_mod64 ),
                                               "RtlAllocateActivationContextStack" ).procAddress;
        if (createActStack)
        {
            ah.GenCall( static_cast<size_t>(createActStack), { _userData.ptr<size_t>() + 0x3100 } );
            a.mov( AsmJit::nax, _userData.ptr<size_t>( ) + 0x3100 );
            a.mov( AsmJit::nax, AsmJit::sysint_ptr( AsmJit::nax ) );

            ah.SetTebPtr();
            a.mov( AsmJit::sysint_ptr( AsmJit::ndx, 0x2c8 ), AsmJit::nax );
        }
    }

    ah.GenCall( _userCode.ptr<size_t>(), { } );
    ah.ExitThreadWithStatus( pExitThread, _userData.ptr<size_t>( ) + INTRET_OFFSET );

    // Execute code in newly created thread
    if (_userCode.Write( size, a.getCodeSize(), a.make() ) == STATUS_SUCCESS)
    {
        auto thread = _threads.CreateNew( _userCode.ptr<ptr_t>() + size, _userData.ptr<ptr_t>() );

        dwResult = thread.Join();
        callResult = _userData.Read<uint64_t>( INTRET_OFFSET, 0 );
    }
    else
        dwResult = LastNtStatus();

    return dwResult;
}
Exemple #3
0
/// <summary>
/// Set custom exception handler to bypass SafeSEH under DEP 
/// </summary>
/// <param name="pImage">image data</param>
/// <returns>true on success</returns>
bool MMap::EnableExceptions( ImageContext* pImage )
{
#ifdef _M_AMD64
    size_t size = pImage->PEImage.DirectorySize( IMAGE_DIRECTORY_ENTRY_EXCEPTION );
    IMAGE_RUNTIME_FUNCTION_ENTRY *pExpTable = 
        reinterpret_cast<decltype(pExpTable)>(pImage->PEImage.DirectoryAddress( IMAGE_DIRECTORY_ENTRY_EXCEPTION ));

    // Invoke RtlAddFunctionTable
    if(pExpTable)
    {
        AsmJit::Assembler a;
        AsmJitHelper ah(a);
        uint64_t result = 0;

        pImage->pExpTableAddr = REBASE( pExpTable, pImage->FileImage.base(), pImage->imgMem.ptr<ptr_t>() );
        auto pAddTable = _process.modules().GetExport( _process.modules().GetModule( L"ntdll.dll", LdrList, pImage->PEImage.mType() ),
                                                       "RtlAddFunctionTable" );

        ah.GenPrologue();
        ah.GenCall( static_cast<size_t>(pAddTable.procAddress), { pImage->pExpTableAddr, 
                                                                  size / sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY),
                                                                  pImage->imgMem.ptr<size_t>() } );
        _process.remote().AddReturnWithEvent( ah );
        ah.GenEpilogue();

        if (_process.remote().ExecInWorkerThread( a.make(), a.getCodeSize(), result ) != STATUS_SUCCESS)
            return false;

        if (pImage->flags & CreateLdrRef)
            return true;
        else
            return (MExcept::CreateVEH( pImage->imgMem.ptr<size_t>(), pImage->PEImage.imageSize() ) == STATUS_SUCCESS);
    }
    else
        return false;
#else
    _process.nativeLdr().InsertInvertedFunctionTable( pImage->imgMem.ptr<void*>(), pImage->PEImage.imageSize() );

    if (pImage->flags & PartialExcept)
        return true;
    else
        return (MExcept::CreateVEH( pImage->imgMem.ptr<size_t>(), pImage->PEImage.imageSize() ) == STATUS_SUCCESS);
#endif

}
Exemple #4
0
/// <summary>
/// Remove custom exception handler
/// </summary>
/// <param name="pImage">image data</param>
/// <returns>true on success</returns>
bool MMap::DisableExceptions( ImageContext* pImage )
{
#ifdef _M_AMD64
    if(pImage->pExpTableAddr)
    {
        AsmJit::Assembler a;
        AsmJitHelper ah( a );
        size_t result = 0;

        ah.GenPrologue();

        auto pRmoveTable = _process.modules().GetExport( _process.modules().GetModule( L"ntdll.dll", LdrList, pImage->PEImage.mType() ),
                                                         "RtlDeleteFunctionTable" );

        // RtlDeleteFunctionTable(pExpTable);
        ah.GenCall( static_cast<size_t>(pRmoveTable.procAddress), { pImage->pExpTableAddr } );
        _process.remote().AddReturnWithEvent( ah );
        ah.GenEpilogue();

        if (_process.remote().ExecInWorkerThread( a.make(), a.getCodeSize(), result ) != STATUS_SUCCESS)
            return false;

        if (pImage->flags & CreateLdrRef)
            return true;
        else
            return (MExcept::RemoveVEH() == STATUS_SUCCESS);
    }
    else
        return false;
#else
    if (pImage->flags & (PartialExcept | NoExceptions))
        return true;
    else
        return (MExcept::RemoveVEH() == STATUS_SUCCESS);

#endif
}
Exemple #5
0
/// <summary>
/// Create activation context
/// Target memory layout:
/// -----------------------------
/// | hCtx | ACTCTX | file_path |
/// -----------------------------
/// </summary>
/// <param name="path">Image file path.</param>
/// <param name="id">Manifest resource id</param>
/// <returns>true on success</returns>
bool MMap::CreateActx( const std::wstring& path, int id /*= 2 */ )
{
    AsmJit::Assembler a;
    AsmJitHelper ah( a );

    uint64_t result = 0;
    ACTCTXW act = { 0 };

    _pAContext = _process.memory().Allocate( 512, PAGE_READWRITE );
    
    act.cbSize = sizeof(act);
    act.dwFlags = ACTCTX_FLAG_RESOURCE_NAME_VALID;
    act.lpSource = reinterpret_cast<LPCWSTR>(_pAContext.ptr<size_t>() + sizeof(HANDLE) + sizeof(act));
    act.lpResourceName = MAKEINTRESOURCEW( id );

    bool switchMode = (_process.core().native()->GetWow64Barrier().type == wow_64_32);
    auto pCreateActx = _process.modules().GetExport( _process.modules().GetModule( L"kernel32.dll" ), "CreateActCtxW" );
    if (pCreateActx.procAddress == 0)
        return nullptr;

    // CreateActCtx(&act)
    // Emulate Wow64
    if (switchMode)
    {
        _ACTCTXW_T<DWORD> act32 = { 0 };

        act32.cbSize = sizeof(act32);
        act32.dwFlags = ACTCTX_FLAG_RESOURCE_NAME_VALID;
        act32.lpSource = _pAContext.ptr<DWORD>() + sizeof(HANDLE) + sizeof(act32);
        act32.lpResourceName = id ;

        a.push( _pAContext.ptr<DWORD>() + static_cast<DWORD>(sizeof(HANDLE)) );
        a.mov( AsmJit::eax, static_cast<DWORD>(pCreateActx.procAddress) );
        a.call( AsmJit::nax );
        a.mov( AsmJit::edx, _pAContext.ptr<DWORD>() );
        a.mov( AsmJit::dword_ptr( AsmJit::edx ), AsmJit::eax );

        auto pTermThd = _process.modules().GetExport( _process.modules().GetModule( L"ntdll.dll" ), "NtTerminateThread" );
        a.push( AsmJit::nax );
        a.push( DWORD( 0 ) );
        a.mov( AsmJit::eax, static_cast<DWORD>(pTermThd.procAddress) );
        a.call( AsmJit::nax );
        a.ret( 4 );
        
        // Write path to file
        _pAContext.Write( sizeof(HANDLE), act32 );
        _pAContext.Write( sizeof(HANDLE) + sizeof(act32), (path.length() + 1) * sizeof(wchar_t), path.c_str() );

        auto pCode = _process.memory().Allocate( 0x1000 );
        pCode.Write( 0, a.getCodeSize(), a.make() );

        result = _process.remote().ExecDirect( pCode.ptr<ptr_t>(), _pAContext.ptr<size_t>() + sizeof(HANDLE) );
    }
    // Native way
    else
    {
        ah.GenPrologue();

        ah.GenCall( static_cast<size_t>(pCreateActx.procAddress), { _pAContext.ptr<size_t>() + sizeof(HANDLE) } );

        a.mov( AsmJit::ndx, _pAContext.ptr<size_t>() );
        a.mov( AsmJit::sysint_ptr( AsmJit::ndx ), AsmJit::nax );

        _process.remote().AddReturnWithEvent( ah );
        ah.GenEpilogue();

        // Write path to file
        _pAContext.Write( sizeof(HANDLE), act );
        _pAContext.Write( sizeof(HANDLE) + sizeof(act), (path.length() + 1) * sizeof(wchar_t), path.c_str() );

        _process.remote().ExecInWorkerThread( a.make(), a.getCodeSize(), result );
    }


    if (reinterpret_cast<HANDLE>(result) == INVALID_HANDLE_VALUE)
    {
        _pAContext.Free();

        // SetLastError( err::mapping::CantCreateActx );
        return false;
    }

    return true;
}
Exemple #6
0
/// <summary>
/// Run module initializers(TLS and entry point).
/// </summary>
/// <param name="pImage">Image data</param>
/// <param name="dwReason">one of the following:
/// DLL_PROCESS_ATTACH
/// DLL_THREAD_ATTACH
/// DLL_PROCESS_DETACH
/// DLL_THREAD_DETTACH
/// </param>
/// <returns>true on success</returns>
bool MMap::RunModuleInitializers( ImageContext* pImage, DWORD dwReason )
{
    AsmJit::Assembler a;
    AsmJitHelper ah( a );
    uint64_t result = 0;

    auto hNtdll = _process.modules().GetModule( L"ntdll.dll", LdrList, pImage->PEImage.mType() );
    auto pActivateActx = _process.modules().GetExport( hNtdll, "RtlActivateActivationContext" );
    auto pDeactivateeActx = _process.modules().GetExport( hNtdll, "RtlDeactivateActivationContext" );

    ah.GenPrologue();

    // ActivateActCtx
    if (_pAContext.valid())
    {
        a.mov( AsmJit::nax, _pAContext.ptr<size_t>() );
        a.mov( AsmJit::nax, AsmJit::dword_ptr( AsmJit::nax ) );
        ah.GenCall( static_cast<size_t>(pActivateActx.procAddress), { 0, AsmJit::nax, _pAContext.ptr<size_t>() + sizeof(HANDLE) } );
    }

    // Function order
    // TLS first, entry point last
    if (dwReason == DLL_PROCESS_ATTACH || dwReason == DLL_THREAD_ATTACH)
    {
        // PTLS_CALLBACK_FUNCTION(pImage->ImageBase, dwReason, NULL);
        if (!(pImage->flags & NoTLS))
            for (auto& pCallback : pImage->tlsCallbacks)
                ah.GenCall( static_cast<size_t>(pCallback), { pImage->imgMem.ptr<size_t>(), dwReason, NULL } );

        // DllMain
        if (pImage->EntryPoint != 0)
            ah.GenCall( static_cast<size_t>(pImage->EntryPoint), { pImage->imgMem.ptr<size_t>(), dwReason, NULL } );
    }
    // Entry point first, TLS last
    else
    {
        // DllMain
        if (pImage->EntryPoint != 0)
            ah.GenCall( static_cast<size_t>(pImage->EntryPoint), { pImage->imgMem.ptr<size_t>(), dwReason, NULL } );

        // PTLS_CALLBACK_FUNCTION(pImage->ImageBase, dwReason, NULL);
        if (!(pImage->flags & NoTLS))
            for (auto& pCallback : pImage->tlsCallbacks)
                ah.GenCall( static_cast<size_t>(pCallback), { pImage->imgMem.ptr<size_t>(), dwReason, NULL } );
    }

    // DeactivateActCtx
    if (_pAContext.valid())
    {
        a.mov( AsmJit::nax, _pAContext.ptr<size_t>() + sizeof(HANDLE) );
        a.mov( AsmJit::nax, AsmJit::dword_ptr( AsmJit::nax ) );
        ah.GenCall( static_cast<size_t>(pDeactivateeActx.procAddress), { 0, AsmJit::nax } );
    }

    _process.remote().AddReturnWithEvent( ah );
    ah.GenEpilogue();

    _process.remote().ExecInWorkerThread( a.make(), a.getCodeSize(), result );

    return true;
}
Exemple #7
0
/// <summary>
/// Create event to synchronize APC procedures
/// </summary>
/// <param name="threadID">The thread identifier.</param>
/// <returns>true on success</returns>
bool RemoteExec::CreateAPCEvent( DWORD threadID )
{
    if(_hWaitEvent == NULL)
    {
        AsmJit::Assembler a;
        AsmJitHelper ah( a );

        wchar_t pEventName[128] = { 0 };
        uint64_t dwResult = NULL;
        size_t len = sizeof(pEventName);
        OBJECT_ATTRIBUTES obAttr = { 0 };
        UNICODE_STRING ustr = { 0 };

        // Detect ntdll type
        eModType mt = mt_default;
        if (_memory.core().native()->GetWow64Barrier().type == wow_64_32)
            mt = mt_mod64;

        // Event name
        swprintf_s( pEventName, ARRAYSIZE( pEventName ), L"\\BaseNamedObjects\\_MMapEvent_0x%x_0x%x", threadID, GetTickCount() );

        // Prepare Arguments
        ustr.Length = static_cast<USHORT>(wcslen( pEventName ) * sizeof(wchar_t));
        ustr.MaximumLength = static_cast<USHORT>(sizeof(pEventName));
        ustr.Buffer = reinterpret_cast<PWSTR>(_userData.ptr<size_t>() + ARGS_OFFSET + sizeof(obAttr) + sizeof(ustr));

        obAttr.ObjectName = reinterpret_cast<PUNICODE_STRING>(_userData.ptr<size_t>() + ARGS_OFFSET + sizeof(obAttr));
        obAttr.Length = sizeof(obAttr);

        auto pCreateEvent = _mods.GetExport( _mods.GetModule( L"ntdll.dll", LdrList, mt ), "NtCreateEvent" ).procAddress;
        if (pCreateEvent == 0)
            return false;

        ah.GenCall( static_cast<size_t>(pCreateEvent), {
            _userData.ptr<size_t>( ) + EVENT_OFFSET, EVENT_ALL_ACCESS,
            _userData.ptr<size_t>() + ARGS_OFFSET, 0, FALSE
        } );

        // Save status
        a.mov( AsmJit::ndx, _userData.ptr<size_t>() + ERR_OFFSET );
        a.mov( AsmJit::dword_ptr( AsmJit::ndx ), AsmJit::eax );

        a.ret();

        NTSTATUS status = _userData.Write( ARGS_OFFSET, obAttr );
        status |= _userData.Write( ARGS_OFFSET + sizeof(obAttr), ustr );
        status |= _userData.Write( ARGS_OFFSET + sizeof(obAttr) + sizeof(ustr), len, pEventName );

        if (status != STATUS_SUCCESS)
            return false;

        ExecInNewThread( a.make(), a.getCodeSize(), dwResult );

        status = _userData.Read<NTSTATUS>( ERR_OFFSET, -1 );
        if (status != STATUS_SUCCESS)
            return false;

        ustr.Buffer = pEventName;
        obAttr.ObjectName = &ustr;

        // Open created event
        status = GET_IMPORT( NtOpenEvent )( &_hWaitEvent, SYNCHRONIZE | EVENT_MODIFY_STATE, &obAttr );

        if (status != STATUS_SUCCESS)
            return false;
    }

    return true;
}
Exemple #8
0
/// <summary>
/// Create worker RPC thread
/// </summary>
/// <returns>Thread ID</returns>
DWORD RemoteExec::CreateWorkerThread()
{
    AsmJit::Assembler a;
    AsmJitHelper ah( a );
    AsmJit::Label l_loop = a.newLabel();

    //
    // Create execution thread
    //
    if(!_hWorkThd.valid())
    {
        eModType mt = mt_default;
        if (_memory.core().native()->GetWow64Barrier().type == wow_64_32)
        {
            mt = mt_mod64;

            ah.SwitchTo64();

            // Align stack on 16 byte boundary
            a.and_( AsmJit::nsp, -16 );

            // Allocate new x64 activation stack
            auto createActStack = _mods.GetExport( _mods.GetModule( L"ntdll.dll", LdrList, mt ),
                                                   "RtlAllocateActivationContextStack" ).procAddress;
            if(createActStack)
            {
                ah.GenCall( static_cast<size_t>(createActStack), { _userData.ptr<size_t>() + 0x3000 } );
                a.mov( AsmJit::nax, _userData.ptr<size_t>() + 0x3000 );
                a.mov( AsmJit::nax, AsmJit::sysint_ptr( AsmJit::nax ) );

                ah.SetTebPtr();
                a.mov( AsmJit::sysint_ptr( AsmJit::ndx, 0x2c8 ), AsmJit::nax );
            }
        }

        auto proc = _mods.GetExport( _mods.GetModule( L"ntdll.dll", LdrList, mt ), "NtDelayExecution" ).procAddress;
        auto pExitThread = _mods.GetExport( _mods.GetModule( L"ntdll.dll" ), "NtTerminateThread" ).procAddress;
        if (proc == 0 || pExitThread == 0)
            return 0;

        /*
            for(;;)
                SleepEx(5, TRUE);

            ExitThread(SetEvent(m_hWaitEvent));
        */
        a.bind( l_loop );
        ah.GenCall( static_cast<size_t>(proc), { TRUE, _workerCode.ptr<size_t>() } );
        a.jmp( l_loop );

        ah.ExitThreadWithStatus( pExitThread, _userData.ptr<size_t>() );

        // Write code into process
        LARGE_INTEGER liDelay = { 0 };
        liDelay.QuadPart = -10 * 1000 * 5;

        _workerCode.Write( 0, liDelay );
        _workerCode.Write( sizeof(LARGE_INTEGER), a.getCodeSize(), a.make() );

        _hWorkThd = _threads.CreateNew( _workerCode.ptr<size_t>() + sizeof(LARGE_INTEGER), _userData.ptr<size_t>() );
    }

    return _hWorkThd.id();
}
Exemple #9
0
/// <summary>
/// Execute code in context of any existing thread
/// </summary>
/// <param name="pCode">Cde to execute</param>
/// <param name="size">Code size.</param>
/// <param name="callResult">Execution result</param>
/// <param name="thd">Target thread</param>
/// <returns>Status</returns>
NTSTATUS RemoteExec::ExecInAnyThread( PVOID pCode, size_t size, uint64_t& callResult, Thread& thd )
{
    NTSTATUS dwResult = STATUS_SUCCESS;
    CONTEXT_T ctx;

    // Prepare for remote exec
    CreateRPCEnvironment( true );

    // Write code
    dwResult = CopyCode( pCode, size );
    if (dwResult != STATUS_SUCCESS)
        return dwResult;

    if (_hWaitEvent)
        ResetEvent( _hWaitEvent );

    if (!thd.Suspend())
        return LastNtStatus();

    if (thd.GetContext( ctx, CONTEXT_ALL, true ))
    {
        AsmJit::Assembler a;
        AsmJitHelper ah( a );

#ifdef _M_AMD64
        const int count = 15;
        AsmJit::GPReg regs[] = { AsmJit::rax, AsmJit::rbx, AsmJit::rcx, AsmJit::rdx, AsmJit::rsi,
                                 AsmJit::rdi, AsmJit::r8,  AsmJit::r9,  AsmJit::r10, AsmJit::r11,
                                 AsmJit::r12, AsmJit::r13, AsmJit::r14, AsmJit::r15, AsmJit::rbp
                               };
        //
        // Preserve thread context
        // I don't care about FPU, XMM and anything else
        //
        a.sub(AsmJit::rsp, count * WordSize);  // Stack must be aligned on 16 bytes
        a.pushfq();                            //

        // Save registers
        for (int i = 0; i < count; i++)
            a.mov( AsmJit::Mem( AsmJit::rsp, i * WordSize ), regs[i] );

        ah.GenCall( _userCode.ptr<size_t>(), { _userData.ptr<size_t>() } );
        AddReturnWithEvent( ah, rt_int32, INTRET_OFFSET );

        // Restore registers
        for (int i = 0; i < count; i++)
            a.mov( regs[i], AsmJit::Mem( AsmJit::rsp, i * WordSize ) );

        a.popfq();
        a.add( AsmJit::rsp, count * WordSize );

        a.jmp( ctx.Rip );
#else
        a.pushad();
        a.pushfd();

        ah.GenCall( _userCode.ptr<size_t>(), { _userData.ptr<size_t>() } );
        AddReturnWithEvent( ah, rt_int32, INTRET_OFFSET );

        a.popfd();
        a.popad();

        a.push( ctx.NIP );
        a.ret();
#endif

        if (_userCode.Write( size, a.getCodeSize(), a.make() ) == STATUS_SUCCESS)
        {
            ctx.NIP = _userCode.ptr<size_t>() + size;

            if (!thd.SetContext( ctx, true ))
                dwResult = LastNtStatus();
        }
        else
            dwResult = LastNtStatus();
    }
    else
        dwResult = LastNtStatus();

    thd.Resume();

    if (dwResult == STATUS_SUCCESS)
    {
        WaitForSingleObject( _hWaitEvent, INFINITE );
        callResult = _userData.Read<size_t>( INTRET_OFFSET, 0 );
    }

    return dwResult;
}
/// <summary>
/// Inject image into target process
/// </summary>
/// <param name="path">Full-qualified image path</param>
/// <returns>Module info. nullptr if failed</returns>
const ModuleData* ProcessModules::Inject( const std::wstring& path )
{
    const ModuleData* mod = 0;
    ptr_t res = 0;
    AsmJit::Assembler a;
    AsmJitHelper ah( a );
    FileProjection fp;
    pe::PEParser img;

    fp.Project( path );
    img.Parse( fp, fp.isPlainData() );
    fp.Release();

    // Already loaded
    if ((mod = GetModule( path, LdrList, img.mType() )) != nullptr)
        return mod;

    // Image path
    UNICODE_STRING ustr = { 0 };
    auto modName = _memory.Allocate( 0x1000, PAGE_READWRITE );

    ustr.Buffer = reinterpret_cast<PWSTR>(modName.ptr<size_t>() + sizeof(ustr));
    ustr.Length = static_cast<USHORT>(path.size() * sizeof(wchar_t));
    ustr.MaximumLength = ustr.Length;

    modName.Write( 0, ustr );
    modName.Write( sizeof(ustr), path.size() * sizeof(wchar_t), path.c_str() );

    auto pLoadLibrary = GetExport( GetModule( L"kernel32.dll", LdrList, img.mType() ), "LoadLibraryW" ).procAddress;

    // Try to use LoadLibrary if possible
    if (pLoadLibrary != 0 &&
         ((img.mType() == mt_mod64 && _core.isWow64() == false) ||
         (img.mType() == mt_mod32 && _core.isWow64() == true)))
    {
        _proc.remote().ExecDirect( pLoadLibrary, modName.ptr<ptr_t>() + sizeof(ustr) );
    }
    else
    {
        auto pLdrLoadDll = GetExport( GetModule( L"ntdll.dll", Sections, img.mType() ), "LdrLoadDll" ).procAddress;
        if (pLdrLoadDll == 0)
            return nullptr;

        // Patch LdrFindOrMapDll to enable kernel32.dll loading
        #ifdef _M_AMD64
        if (!_ldrPatched && IsWindows7OrGreater( ) && !IsWindows8OrGreater( ))
        {
            uint8_t patch[] = { 0x90, 0x90, 0x90, 0x90, 0x90, 0x90 };
            auto patchBase = _proc.nativeLdr().LdrKernel32PatchAddress();

            if (patchBase != 0)
            {
                DWORD flOld = 0;
                _memory.Protect( patchBase, 6, PAGE_EXECUTE_READWRITE, &flOld );
                _memory.Write( patchBase, sizeof(patch), patch );
                _memory.Protect( patchBase, 6, flOld, nullptr );
            }

            _ldrPatched = true;
        }
        #endif

        ah.GenCall( (size_t)pLdrLoadDll, { 0, 0, modName.ptr<size_t>( ), modName.ptr<size_t>( ) + 0x800 } );
        a.ret();

        _proc.remote().ExecInNewThread( a.make(), a.getCodeSize(), res );
    }

    if (res == STATUS_SUCCESS)
        return GetModule( path, LdrList, img.mType() );
    else
        return nullptr;
}