Esempio n. 1
0
/**
 * Grows the cache.
 *
 * @returns IPRT status code.
 * @param   pThis               The memory cache instance.
 */
static int rtMemCacheGrow(RTMEMCACHEINT *pThis)
{
    /*
     * Enter the critical section here to avoid allocation races leading to
     * wasted memory (++) and make it easier to link in the new page.
     */
    RTCritSectEnter(&pThis->CritSect);
    int rc = VINF_SUCCESS;
    if (pThis->cFree < 0)
    {
        /*
         * Allocate and initialize the new page.
         *
         * We put the constructor bitmap at the lower end right after cFree.
         * We then push the object array to the end of the page and place the
         * allocation bitmap below it.  The hope is to increase the chance that
         * the allocation bitmap is in a different cache line than cFree since
         * this increases performance markably when lots of threads are beating
         * on the cache.
         */
        PRTMEMCACHEPAGE pPage = (PRTMEMCACHEPAGE)RTMemPageAlloc(PAGE_SIZE);
        if (pPage)
        {
            uint32_t const cObjects = RT_MIN(pThis->cPerPage, pThis->cMax - pThis->cTotal);

            ASMMemZeroPage(pPage);
            pPage->pCache       = pThis;
            pPage->pNext        = NULL;
            pPage->cFree        = cObjects;
            pPage->cObjects     = cObjects;
            uint8_t *pb = (uint8_t *)(pPage + 1);
            pb = RT_ALIGN_PT(pb, 8, uint8_t *);
            pPage->pbmCtor      = pb;
            pb = (uint8_t *)pPage + PAGE_SIZE - pThis->cbObject * cObjects;
            pPage->pbObjects    = pb;   Assert(RT_ALIGN_P(pb, pThis->cbAlignment) == pb);
            pb -= pThis->cBits / 8;
            pb = (uint8_t *)((uintptr_t)pb & ~(uintptr_t)7);
            pPage->pbmAlloc     = pb;
            Assert((uintptr_t)pPage->pbmCtor + pThis->cBits / 8 <= (uintptr_t)pPage->pbmAlloc);

            /* Mark the bitmap padding and any unused objects as allocated. */
            for (uint32_t iBit = cObjects; iBit < pThis->cBits; iBit++)
                ASMBitSet(pPage->pbmAlloc, iBit);

            /* Make it the hint. */
            ASMAtomicWritePtr(&pThis->pPageHint, pPage);

            /* Link the page in at the end of the list. */
            ASMAtomicWritePtr(pThis->ppPageNext, pPage);
            pThis->ppPageNext = &pPage->pNext;

            /* Add it to the page counts. */
            ASMAtomicAddS32(&pThis->cFree, cObjects);
            ASMAtomicAddU32(&pThis->cTotal, cObjects);
        }
        else
Esempio n. 2
0
/** Generic implementation of krdrRTFileMap. */
static int  krdrRTFileGenericMap(PKRDR pRdr, PKRDRFILEPREP pPrep, KU32 cSegments, PCKLDRSEG paSegments, KBOOL fFixed)
{
    int rc = 0;
    KU32 i;

    /*
     * Generic mapping code using kHlpPageAlloc(), kHlpPageFree() and kHlpPageProtect().
     */
    pPrep->pv = RTMemPageAlloc(pPrep->cb);
    if (!pPrep->pv)
        return KERR_NO_MEMORY;

    /*
     * Load the data.
     */
    for (i = 0; i < cSegments; i++)
    {
        void *pv;

        if (    paSegments[i].RVA == NIL_KLDRADDR
                ||  paSegments[i].cbFile <= 0)
            continue;

        pv = (KU8 *)pPrep->pv + paSegments[i].RVA;
        rc = pRdr->pOps->pfnRead(pRdr, pv, paSegments[i].cbFile, paSegments[i].offFile);
        if (rc)
            break;
    }

    /*
     * Set segment protection.
     */
    if (!rc)
    {
        rc = krdrRTFileGenericProtect(pRdr, pPrep, cSegments, paSegments, 0 /* protect */);
        if (!rc)
            return 0;
        krdrRTFileGenericProtect(pRdr, pPrep, cSegments, paSegments, 1 /* unprotect */);
    }

    /* bailout */
    RTMemPageFree(pPrep->pv, pPrep->cb);
    return rc;
}
Esempio n. 3
0
static void doInVmmTests(RTTEST hTest)
{
    /*
     * Create empty VM structure and init SSM.
     */
    int rc = SUPR3Init(NULL);
    if (RT_FAILURE(rc))
    {
        RTTestSkipped(hTest, "SUPR3Init failed with rc=%Rrc",  rc);
        return;
    }

    PVM pVM;
    RTTESTI_CHECK_RC_RETV(SUPR3PageAlloc(RT_ALIGN_Z(sizeof(*pVM), PAGE_SIZE) >> PAGE_SHIFT, (void **)&pVM), VINF_SUCCESS);


    PUVM pUVM = (PUVM)RTMemPageAlloc(sizeof(*pUVM));
    pUVM->u32Magic = UVM_MAGIC;
    pUVM->pVM = pVM;
    pVM->pUVM = pUVM;

    /*
     * Do the testing.
     */
    RTTESTI_CHECK_RC_RETV(STAMR3InitUVM(pUVM), VINF_SUCCESS);
    RTTESTI_CHECK_RC_RETV(MMR3InitUVM(pUVM), VINF_SUCCESS);
    RTTESTI_CHECK_RC_RETV(CFGMR3Init(pVM, NULL, NULL), VINF_SUCCESS);
    RTTESTI_CHECK_RETV(CFGMR3GetRoot(pVM) != NULL);

    doTestsOnDefaultValues(CFGMR3GetRoot(pVM));
    doGeneralTests(CFGMR3GetRoot(pVM));


    /* done */
    RTTESTI_CHECK_RC_RETV(CFGMR3Term(pVM), VINF_SUCCESS);
}
static void doTest(RTTEST hTest)
{
    NOREF(hTest);
    uint32_t iAllocCpu = 0;
    while (iAllocCpu < RTCPUSET_MAX_CPUS)
    {
        const uint32_t cbTestSet   = _1M * 32;
        const uint32_t cIterations = 384;

        /*
         * Change CPU and allocate a chunk of memory.
         */
        RTTESTI_CHECK_RC_OK_RETV(RTThreadSetAffinityToCpu(RTMpCpuIdFromSetIndex(iAllocCpu)));

        void *pvTest = RTMemPageAlloc(cbTestSet); /* may be leaked, who cares */
        RTTESTI_CHECK_RETV(pvTest != NULL);
        memset(pvTest, 0xef, cbTestSet);

        /*
         * Do the tests.
         */
        uint32_t iAccessCpu = 0;
        while (iAccessCpu < RTCPUSET_MAX_CPUS)
        {
            RTTESTI_CHECK_RC_OK_RETV(RTThreadSetAffinityToCpu(RTMpCpuIdFromSetIndex(iAccessCpu)));

            /*
             * The write test.
             */
            RTTimeNanoTS(); RTThreadYield();
            uint64_t u64StartTS = RTTimeNanoTS();
            for (uint32_t i = 0; i < cIterations; i++)
            {
                ASMCompilerBarrier(); /* paranoia */
                memset(pvTest, i, cbTestSet);
            }
            uint64_t const cNsElapsedWrite = RTTimeNanoTS() - u64StartTS;
            uint64_t cMBPerSec = (uint64_t)(  ((uint64_t)cIterations * cbTestSet) /* bytes */
                                            / ((long double)cNsElapsedWrite / RT_NS_1SEC_64) /* seconds */
                                            / _1M /* MB */ );
            RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-write", iAllocCpu, iAccessCpu);

            /*
             * The read test.
             */
            memset(pvTest, 0, cbTestSet);
            RTTimeNanoTS(); RTThreadYield();
            u64StartTS = RTTimeNanoTS();
            for (uint32_t i = 0; i < cIterations; i++)
            {
#if 1
                size_t register u = 0;
                size_t volatile *puCur = (size_t volatile *)pvTest;
                size_t volatile *puEnd = puCur + cbTestSet / sizeof(size_t);
                while (puCur != puEnd)
                    u += *puCur++;
#else
                ASMCompilerBarrier(); /* paranoia */
                void *pvFound = memchr(pvTest, (i & 127) + 1, cbTestSet);
                RTTESTI_CHECK(pvFound == NULL);
#endif
            }
            uint64_t const cNsElapsedRead = RTTimeNanoTS() - u64StartTS;
            cMBPerSec = (uint64_t)(  ((uint64_t)cIterations * cbTestSet) /* bytes */
                                   / ((long double)cNsElapsedRead / RT_NS_1SEC_64) /* seconds */
                                   / _1M /* MB */ );
            RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-read", iAllocCpu, iAccessCpu);

            /*
             * The read/write test.
             */
            RTTimeNanoTS(); RTThreadYield();
            u64StartTS = RTTimeNanoTS();
            for (uint32_t i = 0; i < cIterations; i++)
            {
                ASMCompilerBarrier(); /* paranoia */
                memcpy(pvTest, (uint8_t *)pvTest + cbTestSet / 2, cbTestSet / 2);
            }
            uint64_t const cNsElapsedRW = RTTimeNanoTS() - u64StartTS;
            cMBPerSec = (uint64_t)(  ((uint64_t)cIterations * cbTestSet) /* bytes */
                                   / ((long double)cNsElapsedRW / RT_NS_1SEC_64) /* seconds */
                                   / _1M /* MB */ );
            RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-read-write", iAllocCpu, iAccessCpu);

            /*
             * Total time.
             */
            RTTestIValueF(cNsElapsedRead + cNsElapsedWrite + cNsElapsedRW, RTTESTUNIT_NS,
                          "cpu%02u-mem%02u-time", iAllocCpu, iAccessCpu);

            /* advance */
            iAccessCpu = getNextCpu(iAccessCpu);
        }

        /*
         * Clean up and advance to the next CPU.
         */
        RTMemPageFree(pvTest, cbTestSet);
        iAllocCpu = getNextCpu(iAllocCpu);
    }
}
Esempio n. 5
0
/**
 * Wipes free space on one or more volumes by creating large files.
 */
static RTEXITCODE handlerWipeFreeSpace(int argc, char **argv)
{
    /*
     * Parse arguments.
     */
    const char *apszDefFiles[2] = { "./wipefree.spc", NULL };
    bool        fAll            = false;
    uint32_t    u32Filler       = UINT32_C(0xf6f6f6f6);
    uint64_t    cbMinLeftOpt    = _32M;

    static RTGETOPTDEF const s_aOptions[] =
    {
        { "--all",      'a', RTGETOPT_REQ_NOTHING },
        { "--filler",   'f', RTGETOPT_REQ_UINT32 },
        { "--min-free", 'm', RTGETOPT_REQ_UINT64 },
    };
    RTGETOPTSTATE State;
    RTGetOptInit(&State, argc, argv, &s_aOptions[0], RT_ELEMENTS(s_aOptions), 1, RTGETOPTINIT_FLAGS_OPTS_FIRST);
    RTGETOPTUNION ValueUnion;
    int chOpt;
    while (  (chOpt = RTGetOpt(&State, &ValueUnion)) != 0
           && chOpt != VINF_GETOPT_NOT_OPTION)
    {
        switch (chOpt)
        {
            case 'a':
                fAll = true;
                break;
            case 'f':
                u32Filler = ValueUnion.u32;
                break;
            case 'm':
                cbMinLeftOpt = ValueUnion.u64;
                break;
            case 'h':
                RTPrintf("usage: wipefrespace [options] [filename1 [..]]\n"
                         "\n"
                         "Options:\n"
                         "  -a, --all\n"
                         "    Try do the free space wiping on all seemingly relevant file systems.\n"
                         "    Changes the meaning of the filenames  "
                         "    This is not yet implemented\n"
                         "  -p, --filler <32-bit value>\n"
                         "    What to fill the blocks we write with.\n"
                         "    Default: 0xf6f6f6f6\n"
                         "  -m, --min-free <64-bit byte count>\n"
                         "    Specifies when to stop in terms of free disk space (in bytes).\n"
                         "    Default: 32MB\n"
                         "\n"
                         "Zero or more names of files to do the free space wiping thru can be given.\n"
                         "When --all is NOT used, each of the files are used to do free space wiping on\n"
                         "the volume they will live on.  However, when --all is in effect the files are\n"
                         "appended to the volume mountpoints and only the first that can be created will\n"
                         "be used.  Files (used ones) will be removed when done.\n"
                         "\n"
                         "If no filename is given, the default is: %s\n"
                         , apszDefFiles[0]);
                return RTEXITCODE_SUCCESS;

            default:
                return RTGetOptPrintError(chOpt, &ValueUnion);
        }
    }

    char **papszFiles;
    if (chOpt == 0)
        papszFiles = (char **)apszDefFiles;
    else
        papszFiles = RTGetOptNonOptionArrayPtr(&State);

    /*
     * Allocate and prep a memory which we'll write over and over again.
     */
    uint32_t  cbFiller   = _2M;
    uint32_t *pu32Filler = (uint32_t *)RTMemPageAlloc(cbFiller);
    while (!pu32Filler)
    {
        cbFiller <<= 1;
        if (cbFiller >= _4K)
            pu32Filler = (uint32_t *)RTMemPageAlloc(cbFiller);
        else
            return RTMsgErrorExit(RTEXITCODE_FAILURE, "RTMemPageAlloc failed for sizes between 4KB and 2MB!\n");
    }
    for (uint32_t i = 0; i < cbFiller / sizeof(pu32Filler[0]); i++)
        pu32Filler[i] = u32Filler;

    /*
     * Do the requested work.
     */
    RTEXITCODE rcExit = RTEXITCODE_SUCCESS;
    if (!fAll)
    {
        for (uint32_t iFile = 0; papszFiles[iFile] != NULL; iFile++)
        {
            RTEXITCODE rcExit2 = doOneFreeSpaceWipe(papszFiles[iFile], pu32Filler, cbFiller, cbMinLeftOpt);
            if (rcExit2 != RTEXITCODE_SUCCESS && rcExit == RTEXITCODE_SUCCESS)
                rcExit = rcExit2;
        }
    }
    else
    {
        /*
         * Reject --all for now.
         */
        rcExit = RTMsgErrorExit(RTEXITCODE_FAILURE,  "The --all option is not yet implemented!\n");
    }

    RTMemPageFree(pu32Filler, cbFiller);
    return rcExit;
}
/**
 * Internal allocator.
 */
RTDECL(void *) rtR3MemAlloc(const char *pszOp, RTMEMTYPE enmType, size_t cbUnaligned, size_t cbAligned,
                            const char *pszTag, void *pvCaller, RT_SRC_POS_DECL)
{
    /*
     * Sanity.
     */
    if (    RT_ALIGN_Z(RTALLOC_EFENCE_SIZE, PAGE_SIZE) != RTALLOC_EFENCE_SIZE
        &&  RTALLOC_EFENCE_SIZE <= 0)
    {
        rtmemComplain(pszOp, "Invalid E-fence size! %#x\n", RTALLOC_EFENCE_SIZE);
        return NULL;
    }
    if (!cbUnaligned)
    {
#if 0
        rtmemComplain(pszOp, "Request of ZERO bytes allocation!\n");
        return NULL;
#else
        cbAligned = cbUnaligned = 1;
#endif
    }

#ifndef RTALLOC_EFENCE_IN_FRONT
    /* Alignment decreases fence accuracy, but this is at least partially
     * counteracted by filling and checking the alignment padding. When the
     * fence is in front then then no extra alignment is needed. */
    cbAligned = RT_ALIGN_Z(cbAligned, RTALLOC_EFENCE_ALIGNMENT);
#endif

#ifdef RTALLOC_EFENCE_TRACE
    /*
     * Allocate the trace block.
     */
    PRTMEMBLOCK pBlock = rtmemBlockCreate(enmType, cbUnaligned, cbAligned, pszTag, pvCaller, RT_SRC_POS_ARGS);
    if (!pBlock)
    {
        rtmemComplain(pszOp, "Failed to allocate trace block!\n");
        return NULL;
    }
#endif

    /*
     * Allocate a block with page alignment space + the size of the E-fence.
     */
    size_t  cbBlock = RT_ALIGN_Z(cbAligned, PAGE_SIZE) + RTALLOC_EFENCE_SIZE;
    void   *pvBlock = RTMemPageAlloc(cbBlock);
    if (pvBlock)
    {
        /*
         * Calc the start of the fence and the user block
         * and then change the page protection of the fence.
         */
#ifdef RTALLOC_EFENCE_IN_FRONT
        void *pvEFence = pvBlock;
        void *pv       = (char *)pvEFence + RTALLOC_EFENCE_SIZE;
# ifdef RTALLOC_EFENCE_NOMAN_FILLER
        memset((char *)pv + cbUnaligned, RTALLOC_EFENCE_NOMAN_FILLER, cbBlock - RTALLOC_EFENCE_SIZE - cbUnaligned);
# endif
#else
        void *pvEFence = (char *)pvBlock + (cbBlock - RTALLOC_EFENCE_SIZE);
        void *pv       = (char *)pvEFence - cbAligned;
# ifdef RTALLOC_EFENCE_NOMAN_FILLER
        memset(pvBlock, RTALLOC_EFENCE_NOMAN_FILLER, cbBlock - RTALLOC_EFENCE_SIZE - cbAligned);
        memset((char *)pv + cbUnaligned, RTALLOC_EFENCE_NOMAN_FILLER, cbAligned - cbUnaligned);
# endif
#endif

#ifdef RTALLOC_EFENCE_FENCE_FILLER
        memset(pvEFence, RTALLOC_EFENCE_FENCE_FILLER, RTALLOC_EFENCE_SIZE);
#endif
        int rc = RTMemProtect(pvEFence, RTALLOC_EFENCE_SIZE, RTMEM_PROT_NONE);
        if (!rc)
        {
#ifdef RTALLOC_EFENCE_TRACE
            rtmemBlockInsert(pBlock, pv);
#endif
            if (enmType == RTMEMTYPE_RTMEMALLOCZ)
                memset(pv, 0, cbUnaligned);
#ifdef RTALLOC_EFENCE_FILLER
            else
                memset(pv, RTALLOC_EFENCE_FILLER, cbUnaligned);
#endif

            rtmemLog(pszOp, "returns %p (pvBlock=%p cbBlock=%#x pvEFence=%p cbUnaligned=%#x)\n", pv, pvBlock, cbBlock, pvEFence, cbUnaligned);
            return pv;
        }
        rtmemComplain(pszOp, "RTMemProtect failed, pvEFence=%p size %d, rc=%d\n", pvEFence, RTALLOC_EFENCE_SIZE, rc);
        RTMemPageFree(pvBlock, cbBlock);
    }
    else
        rtmemComplain(pszOp, "Failed to allocated %lu (%lu) bytes.\n", (unsigned long)cbBlock, (unsigned long)cbUnaligned);

#ifdef RTALLOC_EFENCE_TRACE
    rtmemBlockFree(pBlock);
#endif
    return NULL;
}
Esempio n. 7
0
static void rtMemReplaceMallocAndFriends(void)
{
    struct
    {
        const char *pszName;
        PFNRT       pfnReplacement;
        PFNRT       pfnOrg;
        PFNRT      *ppfnJumpBack;
    } aApis[] =
    {
        { "free",    (PFNRT)rtMemReplacementFree,    (PFNRT)free,    (PFNRT *)&g_pfnOrgFree },
        { "realloc", (PFNRT)rtMemReplacementRealloc, (PFNRT)realloc, (PFNRT *)&g_pfnOrgRealloc },
        { "calloc",  (PFNRT)rtMemReplacementCalloc,  (PFNRT)calloc,  (PFNRT *)&g_pfnOrgCalloc },
        { "malloc",  (PFNRT)rtMemReplacementMalloc,  (PFNRT)malloc,  (PFNRT *)&g_pfnOrgMalloc },
#ifdef RT_OS_DARWIN
        { "malloc_size", (PFNRT)rtMemReplacementMallocSize,  (PFNRT)malloc_size,  (PFNRT *)&g_pfnOrgMallocSize },
#endif
    };

    /*
     * Initialize the jump backs to avoid recursivly entering this function.
     */
    for (unsigned i = 0; i < RT_ELEMENTS(aApis); i++)
        *aApis[i].ppfnJumpBack = aApis[i].pfnOrg;

    /*
     * Give the user an option to skip replacing malloc.
     */
    if (getenv("IPRT_DONT_REPLACE_MALLOC"))
        return;

    /*
     * Allocate a page for jump back code (we leak it).
     */
    uint8_t *pbExecPage = (uint8_t *)RTMemPageAlloc(PAGE_SIZE); AssertFatal(pbExecPage);
    int rc = RTMemProtect(pbExecPage, PAGE_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC); AssertFatalRC(rc);

    /*
     * Do the ground work.
     */
    uint8_t *pb = pbExecPage;
    for (unsigned i = 0; i < RT_ELEMENTS(aApis); i++)
    {
        /* Resolve it. */
        PFNRT pfnOrg = (PFNRT)(uintptr_t)dlsym(RTLD_DEFAULT, aApis[i].pszName);
        if (pfnOrg)
            aApis[i].pfnOrg = pfnOrg;
        else
            pfnOrg = aApis[i].pfnOrg;

        /* Figure what we can replace and how much to duplicate in the jump back code. */
# ifdef RT_ARCH_AMD64
        uint32_t         cbNeeded   = 12;
        DISCPUMODE const enmCpuMode = DISCPUMODE_64BIT;
# elif defined(RT_ARCH_X86)
        uint32_t   const cbNeeded   = 5;
        DISCPUMODE const enmCpuMode = DISCPUMODE_32BIT;
# else
#  error "Port me"
# endif
        uint32_t offJmpBack = 0;
        uint32_t cbCopy = 0;
        while (offJmpBack < cbNeeded)
        {
            DISCPUSTATE Dis;
            uint32_t cbInstr = 1;
            rc = DISInstr((void *)((uintptr_t)pfnOrg + offJmpBack), enmCpuMode, &Dis, &cbInstr); AssertFatalRC(rc);
            AssertFatal(!(Dis.pCurInstr->fOpType & (DISOPTYPE_CONTROLFLOW)));
# ifdef RT_ARCH_AMD64
#  ifdef RT_OS_DARWIN
            /* Kludge for: cmp [malloc_def_zone_state], 1; jg 2; call _malloc_initialize; 2: */
            DISQPVPARAMVAL Parm;
            if (   Dis.ModRM.Bits.Mod == 0
                && Dis.ModRM.Bits.Rm == 5 /* wrt RIP */
                && (Dis.Param2.fUse & (DISUSE_IMMEDIATE16_SX8 | DISUSE_IMMEDIATE32_SX8 | DISUSE_IMMEDIATE64_SX8))
                && Dis.Param2.uValue == 1
                && Dis.pCurInstr->uOpcode == OP_CMP)
            {
                cbCopy = offJmpBack;

                offJmpBack += cbInstr;
                rc = DISInstr((void *)((uintptr_t)pfnOrg + offJmpBack), enmCpuMode, &Dis, &cbInstr); AssertFatalRC(rc);
                if (   Dis.pCurInstr->uOpcode == OP_JNBE
                    && Dis.Param1.uDisp.i8 == 5)
                {
                    offJmpBack += cbInstr + 5;
                    AssertFatal(offJmpBack >= cbNeeded);
                    break;
                }
            }
#  endif
            AssertFatal(!(Dis.ModRM.Bits.Mod == 0 && Dis.ModRM.Bits.Rm == 5 /* wrt RIP */));
# endif
            offJmpBack += cbInstr;
        }
        if (!cbCopy)
            cbCopy = offJmpBack;

        /* Assemble the jump back. */
        memcpy(pb, (void *)(uintptr_t)pfnOrg, cbCopy);
        uint32_t off = cbCopy;
# ifdef RT_ARCH_AMD64
        pb[off++] = 0xff; /* jmp qword [$+8 wrt RIP] */
        pb[off++] = 0x25;
        *(uint32_t *)&pb[off] = 0;
        off += 4;
        *(uint64_t *)&pb[off] = (uintptr_t)pfnOrg + offJmpBack;
        off += 8;
        off = RT_ALIGN_32(off, 16);
# elif defined(RT_ARCH_X86)
        pb[off++] = 0xe9; /* jmp rel32 */
        *(uint32_t *)&pb[off] = (uintptr_t)pfnOrg + offJmpBack - (uintptr_t)&pb[4];
        off += 4;
        off = RT_ALIGN_32(off, 8);
# else
#  error "Port me"
# endif
        *aApis[i].ppfnJumpBack = (PFNRT)(uintptr_t)pb;
        pb += off;
    }

    /*
     * Modify the APIs.
     */
    for (unsigned i = 0; i < RT_ELEMENTS(aApis); i++)
    {
        pb = (uint8_t *)(uintptr_t)aApis[i].pfnOrg;
        rc = RTMemProtect(pb, 16, RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC); AssertFatalRC(rc);

# ifdef RT_ARCH_AMD64
        /* Assemble the LdrLoadDll patch. */
        *pb++ = 0x48; /* mov rax, qword */
        *pb++ = 0xb8;
        *(uint64_t *)pb = (uintptr_t)aApis[i].pfnReplacement;
        pb += 8;
        *pb++ = 0xff; /* jmp rax */
        *pb++ = 0xe0;
# elif defined(RT_ARCH_X86)
        *pb++ = 0xe9; /* jmp rel32 */
        *(uint32_t *)pb = (uintptr_t)aApis[i].pfnReplacement - (uintptr_t)&pb[4];
# else
#  error "Port me"
# endif
    }
}