Ejemplo n.º 1
0
DECLHIDDEN(int) rtSchedNativeCalcDefaultPriority(RTTHREADTYPE enmType)
{
    Assert(enmType > RTTHREADTYPE_INVALID && enmType < RTTHREADTYPE_END);

    /*
     * Get the current priority.
     */
    int iBasePriority = rtSchedDarwinGetBasePriority();
    Assert(iBasePriority >= 0 && iBasePriority <= 63);

    /*
     * If it doesn't match the default, select the closest one from the table.
     */
    int offBest = RT_ABS(g_pProcessPriority->aTypes[enmType].iBasePriority - iBasePriority);
    if (offBest)
    {
        for (unsigned i = 0; i < RT_ELEMENTS(g_aPriorities); i++)
        {
            int off = RT_ABS(g_aPriorities[i].aTypes[enmType].iBasePriority - iBasePriority);
            if (off < offBest)
            {
                g_pProcessPriority = &g_aPriorities[i];
                if (!off)
                    break;
                offBest = off;
            }
        }
    }

    return VINF_SUCCESS;
}
static void Test4(unsigned cThreads, unsigned cSeconds, unsigned uWritePercent, bool fYield, bool fQuiet)
{
    unsigned i;
    uint64_t acIterations[32];
    RTTHREAD aThreads[RT_ELEMENTS(acIterations)];
    AssertRelease(cThreads <= RT_ELEMENTS(acIterations));

    RTTestSubF(g_hTest, "Test4 - %u threads, %u sec, %u%% writes, %syielding",
               cThreads, cSeconds, uWritePercent, fYield ? "" : "non-");

    /*
     * Init globals.
     */
    g_fYield = fYield;
    g_fQuiet = fQuiet;
    g_fTerminate = false;
    g_uWritePercent = uWritePercent;
    g_cConcurrentWriters = 0;
    g_cConcurrentReaders = 0;

    RTTEST_CHECK_RC_RETV(g_hTest, RTCritSectRwInit(&g_CritSectRw), VINF_SUCCESS);

    /*
     * Create the threads and let them block on the semrw.
     */
    RTTEST_CHECK_RC_RETV(g_hTest, RTCritSectRwEnterExcl(&g_CritSectRw), VINF_SUCCESS);

    for (i = 0; i < cThreads; i++)
    {
        acIterations[i] = 0;
        RTTEST_CHECK_RC_RETV(g_hTest, RTThreadCreateF(&aThreads[i], Test4Thread, &acIterations[i], 0,
                                                      RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE,
                                                      "test-%u", i), VINF_SUCCESS);
    }

    /*
     * Do the test run.
     */
    uint32_t cErrorsBefore = RTTestErrorCount(g_hTest);
    uint64_t u64StartTS = RTTimeNanoTS();
    RTTEST_CHECK_RC(g_hTest, RTCritSectRwLeaveExcl(&g_CritSectRw), VINF_SUCCESS);
    RTThreadSleep(cSeconds * 1000);
    ASMAtomicWriteBool(&g_fTerminate, true);
    uint64_t ElapsedNS = RTTimeNanoTS() - u64StartTS;

    /*
     * Clean up the threads and semaphore.
     */
    for (i = 0; i < cThreads; i++)
        RTTEST_CHECK_RC(g_hTest, RTThreadWait(aThreads[i], 5000, NULL), VINF_SUCCESS);

    RTTEST_CHECK_MSG(g_hTest, g_cConcurrentWriters == 0, (g_hTest, "g_cConcurrentWriters=%u at end of test\n", g_cConcurrentWriters));
    RTTEST_CHECK_MSG(g_hTest, g_cConcurrentReaders == 0, (g_hTest, "g_cConcurrentReaders=%u at end of test\n", g_cConcurrentReaders));

    RTTEST_CHECK_RC(g_hTest, RTCritSectRwDelete(&g_CritSectRw), VINF_SUCCESS);

    if (RTTestErrorCount(g_hTest) != cErrorsBefore)
        RTThreadSleep(100);

    /*
     * Collect and display the results.
     */
    uint64_t cItrTotal = acIterations[0];
    for (i = 1; i < cThreads; i++)
        cItrTotal += acIterations[i];

    uint64_t cItrNormal = cItrTotal / cThreads;
    uint64_t cItrMinOK = cItrNormal / 20; /* 5% */
    uint64_t cItrMaxDeviation = 0;
    for (i = 0; i < cThreads; i++)
    {
        uint64_t cItrDelta = RT_ABS((int64_t)(acIterations[i] - cItrNormal));
        if (acIterations[i] < cItrMinOK)
            RTTestFailed(g_hTest, "Thread %u did less than 5%% of the iterations - %llu (it) vs. %llu (5%%) - %llu%%\n",
                         i, acIterations[i], cItrMinOK, cItrDelta * 100 / cItrNormal);
        else if (cItrDelta > cItrNormal / 2)
            RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS,
                         "Warning! Thread %u deviates by more than 50%% - %llu (it) vs. %llu (avg) - %llu%%\n",
                         i, acIterations[i], cItrNormal, cItrDelta * 100 / cItrNormal);
        if (cItrDelta > cItrMaxDeviation)
            cItrMaxDeviation = cItrDelta;

    }

    //RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS,
    //             "Threads: %u  Total: %llu  Per Sec: %llu  Avg: %llu ns  Max dev: %llu%%\n",
    //             cThreads,
    //             cItrTotal,
    //             cItrTotal / cSeconds,
    //             ElapsedNS / cItrTotal,
    //             cItrMaxDeviation * 100 / cItrNormal
    //             );
    //
    RTTestValue(g_hTest, "Thruput", cItrTotal * UINT32_C(1000000000) / ElapsedNS, RTTESTUNIT_CALLS_PER_SEC);
    RTTestValue(g_hTest, "Max diviation", cItrMaxDeviation * 100 / cItrNormal, RTTESTUNIT_PCT);
}
static int Test1(unsigned cThreads, unsigned cSeconds, bool fYield, bool fQuiet)
{
    int rc;
    unsigned i;
    uint64_t g_au64[32];
    RTTHREAD aThreads[RT_ELEMENTS(g_au64)];
    AssertRelease(cThreads <= RT_ELEMENTS(g_au64));

    /*
     * Init globals.
     */
    g_fYield = fYield;
    g_fQuiet = fQuiet;
    g_fTerminate = false;

    rc = RTSemMutexCreate(&g_hMutex);
    if (RT_FAILURE(rc))
        return PrintError("RTSemMutexCreate failed (rc=%Rrc)\n", rc);

    /*
     * Create the threads and let them block on the mutex.
     */
    rc = RTSemMutexRequest(g_hMutex, RT_INDEFINITE_WAIT);
    if (RT_FAILURE(rc))
        return PrintError("RTSemMutexRequest failed (rc=%Rrc)\n", rc);

    for (i = 0; i < cThreads; i++)
    {
        g_au64[i] = 0;
        rc = RTThreadCreate(&aThreads[i], ThreadTest1, &g_au64[i], 0, RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "test");
        if (RT_FAILURE(rc))
            return PrintError("RTThreadCreate failed for thread %u (rc=%Rrc)\n", i, rc);
    }

    if (!fQuiet)
        RTPrintf("tstSemMutex: %zu Threads created. Racing them for %u seconds (%s) ...\n",
                 cThreads, cSeconds, g_fYield ? "yielding" : "no yielding");

    uint64_t u64StartTS = RTTimeNanoTS();
    rc = RTSemMutexRelease(g_hMutex);
    if (RT_FAILURE(rc))
        PrintError("RTSemMutexRelease failed (rc=%Rrc)\n", rc);
    RTThreadSleep(cSeconds * 1000);
    ASMAtomicXchgBool(&g_fTerminate, true);
    uint64_t ElapsedNS = RTTimeNanoTS() - u64StartTS;

    for (i = 0; i < cThreads; i++)
    {
        rc = RTThreadWait(aThreads[i], 5000, NULL);
        if (RT_FAILURE(rc))
            PrintError("RTThreadWait failed for thread %u (rc=%Rrc)\n", i, rc);
    }

    rc = RTSemMutexDestroy(g_hMutex);
    if (RT_FAILURE(rc))
        PrintError("RTSemMutexDestroy failed - %Rrc\n", rc);
    g_hMutex = NIL_RTSEMMUTEX;
    if (g_cErrors)
        RTThreadSleep(100);

    /*
     * Collect and display the results.
     */
    uint64_t Total = g_au64[0];
    for (i = 1; i < cThreads; i++)
        Total += g_au64[i];

    uint64_t Normal = Total / cThreads;
    uint64_t MaxDeviation = 0;
    for (i = 0; i < cThreads; i++)
    {
        uint64_t Delta = RT_ABS((int64_t)(g_au64[i] - Normal));
        if (Delta > Normal / 2)
            RTPrintf("tstSemMutex: Warning! Thread %d deviates by more than 50%% - %llu (it) vs. %llu (avg)\n",
                     i, g_au64[i], Normal);
        if (Delta > MaxDeviation)
            MaxDeviation = Delta;

    }

    RTPrintf("tstSemMutex: Threads: %u  Total: %llu  Per Sec: %llu  Avg: %llu ns  Max dev: %llu%%\n",
             cThreads,
             Total,
             Total / cSeconds,
             ElapsedNS / Total,
             MaxDeviation * 100 / Normal
             );
    return 0;
}
Ejemplo n.º 4
0
int main(int argc, char **argv)
{
    RTR3InitExe(argc, &argv, 0);

    /*
     * Parse args
     */
    static const RTGETOPTDEF g_aOptions[] =
    {
        { "--iterations",       'i', RTGETOPT_REQ_INT32 },
        { "--hex",              'h', RTGETOPT_REQ_NOTHING },
        { "--decimal",          'd', RTGETOPT_REQ_NOTHING },
        { "--spin",             's', RTGETOPT_REQ_NOTHING },
        { "--reference",        'r', RTGETOPT_REQ_UINT64 },  /* reference value of CpuHz, display the
                                                              * CpuHz deviation in a separate column. */
    };

    uint32_t cIterations = 40;
    bool fHex = true;
    bool fSpin = false;
    int ch;
    uint64_t uCpuHzRef = 0;
    uint64_t uCpuHzOverallDeviation = 0;
    int64_t  iCpuHzMaxDeviation = 0;
    int32_t cCpuHzOverallDevCnt = 0;
    RTGETOPTUNION ValueUnion;
    RTGETOPTSTATE GetState;
    RTGetOptInit(&GetState, argc, argv, g_aOptions, RT_ELEMENTS(g_aOptions), 1, RTGETOPTINIT_FLAGS_NO_STD_OPTS);
    while ((ch = RTGetOpt(&GetState, &ValueUnion)))
    {
        switch (ch)
        {
            case 'i':
                cIterations = ValueUnion.u32;
                break;

            case 'd':
                fHex = false;
                break;

            case 'h':
                fHex = true;
                break;

            case 's':
                fSpin = true;
                break;

            case 'r':
                uCpuHzRef = ValueUnion.u64;
                break;

            default:
                return RTGetOptPrintError(ch, &ValueUnion);
        }
    }

    /*
     * Init
     */
    PSUPDRVSESSION pSession = NIL_RTR0PTR;
    int rc = SUPR3Init(&pSession);
    if (RT_SUCCESS(rc))
    {
        if (g_pSUPGlobalInfoPage)
        {
            RTPrintf("tstGIP-2: cCpus=%d  u32UpdateHz=%RU32  u32UpdateIntervalNS=%RU32  u64NanoTSLastUpdateHz=%RX64  u64CpuHz=%RU64  uCpuHzRef=%RU64  u32Mode=%d (%s) u32Version=%#x\n",
                     g_pSUPGlobalInfoPage->cCpus,
                     g_pSUPGlobalInfoPage->u32UpdateHz,
                     g_pSUPGlobalInfoPage->u32UpdateIntervalNS,
                     g_pSUPGlobalInfoPage->u64NanoTSLastUpdateHz,
                     g_pSUPGlobalInfoPage->u64CpuHz,
                     uCpuHzRef,
                     g_pSUPGlobalInfoPage->u32Mode,
                     SUPGetGIPModeName(g_pSUPGlobalInfoPage),
                     g_pSUPGlobalInfoPage->u32Version);
            RTPrintf(fHex
                     ? "tstGIP-2:     it: u64NanoTS        delta     u64TSC           UpIntTSC H  TransId      CpuHz      %sTSC Interval History...\n"
                     : "tstGIP-2:     it: u64NanoTS        delta     u64TSC             UpIntTSC H    TransId      CpuHz      %sTSC Interval History...\n",
                     uCpuHzRef ? "  CpuHz deviation  " : "");
            static SUPGIPCPU s_aaCPUs[2][256];
            for (uint32_t i = 0; i < cIterations; i++)
            {
                /* copy the data */
                memcpy(&s_aaCPUs[i & 1][0], &g_pSUPGlobalInfoPage->aCPUs[0], g_pSUPGlobalInfoPage->cCpus * sizeof(g_pSUPGlobalInfoPage->aCPUs[0]));

                /* display it & find something to spin on. */
                uint32_t u32TransactionId = 0;
                uint32_t volatile *pu32TransactionId = NULL;
                for (unsigned iCpu = 0; iCpu < g_pSUPGlobalInfoPage->cCpus; iCpu++)
                    if (    g_pSUPGlobalInfoPage->aCPUs[iCpu].u64CpuHz > 0
                        &&  g_pSUPGlobalInfoPage->aCPUs[iCpu].u64CpuHz != _4G + 1)
                    {
                        char szCpuHzDeviation[32];
                        PSUPGIPCPU pPrevCpu = &s_aaCPUs[!(i & 1)][iCpu];
                        PSUPGIPCPU pCpu = &s_aaCPUs[i & 1][iCpu];
                        if (uCpuHzRef)
                        {
                            int64_t iCpuHzDeviation = pCpu->u64CpuHz - uCpuHzRef;
                            uint64_t uCpuHzDeviation = RT_ABS(iCpuHzDeviation);
                            if (uCpuHzDeviation > 999999999)
                                RTStrPrintf(szCpuHzDeviation, sizeof(szCpuHzDeviation), "%17s  ", "?");
                            else
                            {
                                /* Wait until the history validation code takes effect. */
                                if (pCpu->u32TransactionId > 23 + (8 * 2) + 1)
                                {
                                    if (RT_ABS(iCpuHzDeviation) > RT_ABS(iCpuHzMaxDeviation))
                                        iCpuHzMaxDeviation = iCpuHzDeviation;
                                    uCpuHzOverallDeviation += uCpuHzDeviation;
                                    cCpuHzOverallDevCnt++;
                                }
                                uint32_t uPct = (uint32_t)(uCpuHzDeviation * 100000 / uCpuHzRef + 5);
                                RTStrPrintf(szCpuHzDeviation, sizeof(szCpuHzDeviation), "%10RI64%3d.%02d%%  ",
                                            iCpuHzDeviation, uPct / 1000, (uPct % 1000) / 10);
                            }
                        }
                        else
                            szCpuHzDeviation[0] = '\0';
                        RTPrintf(fHex
                                 ? "tstGIP-2: %4d/%d: %016llx %09llx %016llx %08x %d %08x %15llu %s%08x %08x %08x %08x %08x %08x %08x %08x (%d)\n"
                                 : "tstGIP-2: %4d/%d: %016llu %09llu %016llu %010u %d %010u %15llu %s%08x %08x %08x %08x %08x %08x %08x %08x (%d)\n",
                                 i, iCpu,
                                 pCpu->u64NanoTS,
                                 i ? pCpu->u64NanoTS - pPrevCpu->u64NanoTS : 0,
                                 pCpu->u64TSC,
                                 pCpu->u32UpdateIntervalTSC,
                                 pCpu->iTSCHistoryHead,
                                 pCpu->u32TransactionId,
                                 pCpu->u64CpuHz,
                                 szCpuHzDeviation,
                                 pCpu->au32TSCHistory[0],
                                 pCpu->au32TSCHistory[1],
                                 pCpu->au32TSCHistory[2],
                                 pCpu->au32TSCHistory[3],
                                 pCpu->au32TSCHistory[4],
                                 pCpu->au32TSCHistory[5],
                                 pCpu->au32TSCHistory[6],
                                 pCpu->au32TSCHistory[7],
                                 pCpu->cErrors);
                        if (!pu32TransactionId)
                        {
                            pu32TransactionId = &g_pSUPGlobalInfoPage->aCPUs[iCpu].u32TransactionId;
                            u32TransactionId = pCpu->u32TransactionId;
                        }
                    }

                /* wait a bit / spin */
                if (!fSpin)
                    RTThreadSleep(9);
                else
                {
                    if (pu32TransactionId)
                    {
                        uint32_t uTmp;
                        while (   u32TransactionId == (uTmp = *pu32TransactionId)
                               || (uTmp & 1))
                            ASMNopPause();
                    }
                    else
                        RTThreadSleep(1);
                }
            }

            /*
             * Display TSC deltas.
             *
             * First iterative over the APIC ID array to get mostly consistent CPUID to APIC ID mapping.
             * Then iterate over the offline CPUs. It is possible that there's a race between the online/offline
             * states between the two iterations, but that cannot be helped from ring-3 anyway and not a biggie.
             */
            RTPrintf("tstGIP-2: TSC deltas:\n");
            RTPrintf("tstGIP-2:  idApic: i64TSCDelta\n");
            for (unsigned i = 0; i < RT_ELEMENTS(g_pSUPGlobalInfoPage->aiCpuFromApicId); i++)
            {
                uint16_t iCpu = g_pSUPGlobalInfoPage->aiCpuFromApicId[i];
                if (iCpu != UINT16_MAX)
                {
                    RTPrintf("tstGIP-2: %7d: %lld\n", g_pSUPGlobalInfoPage->aCPUs[iCpu].idApic,
                             g_pSUPGlobalInfoPage->aCPUs[iCpu].i64TSCDelta);
                }
            }

            for (unsigned iCpu = 0; iCpu < g_pSUPGlobalInfoPage->cCpus; iCpu++)
                if (g_pSUPGlobalInfoPage->aCPUs[iCpu].idApic == UINT16_MAX)
                    RTPrintf("tstGIP-2: offline: %lld\n", g_pSUPGlobalInfoPage->aCPUs[iCpu].i64TSCDelta);

            RTPrintf("tstGIP-2: enmUseTscDelta=%d  fGetGipCpu=%#x\n",
                     g_pSUPGlobalInfoPage->enmUseTscDelta, g_pSUPGlobalInfoPage->fGetGipCpu);
            if (   uCpuHzRef
                && cCpuHzOverallDevCnt)
            {
                uint32_t uPct    = (uint32_t)(uCpuHzOverallDeviation * 100000 / cCpuHzOverallDevCnt / uCpuHzRef + 5);
                RTPrintf("tstGIP-2: Average CpuHz deviation: %d.%02d%%\n",
                         uPct / 1000, (uPct % 1000) / 10);

                uint32_t uMaxPct = (uint32_t)(RT_ABS(iCpuHzMaxDeviation) * 100000 / uCpuHzRef + 5);
                RTPrintf("tstGIP-2: Maximum CpuHz deviation: %d.%02d%% (%RI64 ticks)\n",
                         uMaxPct / 1000, (uMaxPct % 1000) / 10, iCpuHzMaxDeviation);
            }
        }
        else
        {
            RTPrintf("tstGIP-2: g_pSUPGlobalInfoPage is NULL\n");
            rc = -1;
        }

        SUPR3Term(false /*fForced*/);
    }
    else
        RTPrintf("tstGIP-2: SUPR3Init failed: %Rrc\n", rc);
    return !!rc;
}
static void supR3HardenedDirectSyscall(PSUPHNTIMPDLL pDll, PCSUPHNTIMPFUNC pImport, PCSUPHNTIMPSYSCALL pSyscall,
                                       PSUPHNTLDRCACHEENTRY pLdrEntry, uint8_t *pbBits, bool fReportErrors)
{
    /*
     * Skip non-syscall entries.
     */
    if (!pSyscall->puApiNo)
        return;

    /*
     * Locate the virgin bits.
     */
    RTLDRADDR uValue;
    int rc = RTLdrGetSymbolEx(pLdrEntry->hLdrMod, pbBits, (uintptr_t)pDll->pbImageBase, UINT32_MAX, pImport->pszName, &uValue);
    if (RT_FAILURE(rc))
    {
        SUPHNTIMP_ERROR(fReportErrors, 16, "supR3HardenedDirectSyscall", kSupInitOp_Misc, rc,
                        "%s: RTLdrGetSymbolEx failed on %s: %Rrc", pDll->pszName, pImport->pszName, rc);
        return;
    }
    uintptr_t offSymbol = (uintptr_t)uValue - (uintptr_t)pDll->pbImageBase;
    uint8_t const *pbFunction = &pbBits[offSymbol];

    /*
     * Parse the code and extract the API call number.
     */
#ifdef RT_ARCH_AMD64
    /* Pattern #1: XP64/W2K3-64 thru Windows 8.1
       0:000> u ntdll!NtCreateSection
       ntdll!NtCreateSection:
       00000000`779f1750 4c8bd1          mov     r10,rcx
       00000000`779f1753 b847000000      mov     eax,47h
       00000000`779f1758 0f05            syscall
       00000000`779f175a c3              ret
       00000000`779f175b 0f1f440000      nop     dword ptr [rax+rax] */
    if (   pbFunction[ 0] == 0x4c /* mov r10, rcx */
        && pbFunction[ 1] == 0x8b
        && pbFunction[ 2] == 0xd1
        && pbFunction[ 3] == 0xb8 /* mov eax, 0000yyzzh */
        //&& pbFunction[ 4] == 0xZZ
        //&& pbFunction[ 5] == 0xYY
        && pbFunction[ 6] == 0x00
        && pbFunction[ 7] == 0x00
        && pbFunction[ 8] == 0x0f /* syscall */
        && pbFunction[ 9] == 0x05
        && pbFunction[10] == 0xc3 /* ret */ )
    {
        *pSyscall->puApiNo = RT_MAKE_U16(pbFunction[4], pbFunction[5]);
        *pImport->ppfnImport = pSyscall->pfnType1;
        return;
    }
#else
    /* Pattern #1: XP thru Windows 7
            kd> u ntdll!NtCreateSection
            ntdll!NtCreateSection:
            7c90d160 b832000000      mov     eax,32h
            7c90d165 ba0003fe7f      mov     edx,offset SharedUserData!SystemCallStub (7ffe0300)
            7c90d16a ff12            call    dword ptr [edx]
            7c90d16c c21c00          ret     1Ch
            7c90d16f 90              nop
       The variable bit is the value loaded into eax: XP=32h, W2K3=34h, Vista=4bh, W7=54h

       Pattern #2: Windows 8.1
            0:000:x86> u ntdll_6a0f0000!NtCreateSection
            ntdll_6a0f0000!NtCreateSection:
            6a15eabc b854010000      mov     eax,154h
            6a15eac1 e803000000      call    ntdll_6a0f0000!NtCreateSection+0xd (6a15eac9)
            6a15eac6 c21c00          ret     1Ch
            6a15eac9 8bd4            mov     edx,esp
            6a15eacb 0f34            sysenter
            6a15eacd c3              ret
       The variable bit is the value loaded into eax: W81=154h
       Note! One nice thing here is that we can share code pattern #1.  */

    if (   pbFunction[ 0] == 0xb8 /* mov eax, 0000yyzzh*/
        //&& pbFunction[ 1] <= 0xZZ
        //&& pbFunction[ 2] <= 0xYY
        && pbFunction[ 3] == 0x00
        && pbFunction[ 4] == 0x00)
    {
        *pSyscall->puApiNo = RT_MAKE_U16(pbFunction[1], pbFunction[2]);
        if (   pbFunction[5] == 0xba /* mov edx, offset SharedUserData!SystemCallStub */
            && pbFunction[ 6] == 0x00
            && pbFunction[ 7] == 0x03
            && pbFunction[ 8] == 0xfe
            && pbFunction[ 9] == 0x7f
            && pbFunction[10] == 0xff /* call [edx] */
            && pbFunction[11] == 0x12
            && (   (   pbFunction[12] == 0xc2 /* ret 1ch */
                    && pbFunction[13] == pSyscall->cbParams
                    && pbFunction[14] == 0x00)
                || (   pbFunction[12] == 0xc3 /* ret */
                    && pSyscall->cbParams == 0)
                )
           )
        {
            *pImport->ppfnImport = pSyscall->pfnType1;
            return;
        }

        if (   pbFunction[ 5] == 0xe8 /* call [$+3] */
            && RT_ABS(*(int32_t *)&pbFunction[6]) < 0x10
            && (   (   pbFunction[10] == 0xc2 /* ret 1ch */
                    && pbFunction[11] == pSyscall->cbParams
                    && pbFunction[12] == 0x00)
                || (   pbFunction[10] == 0xc3 /* ret */
                    && pSyscall->cbParams == 0)
               )
           )
        {
            *pImport->ppfnImport = pSyscall->pfnType2;
            return;
        }
    }
#endif

    /*
     * Failed to parse it.
     */
    volatile uint8_t abCopy[16];
    memcpy((void *)&abCopy[0], pbFunction, sizeof(abCopy));
    SUPHNTIMP_ERROR(fReportErrors, 17, "supR3HardenedWinInitImports", kSupInitOp_Misc, rc,
                    "%ls: supHardNtLdrCacheOpen failed: '%s': %.16Rhxs",
                    pDll->pwszName, pImport->pszName, &abCopy[0]);
}
DECLHIDDEN(int) rtR0InitNative(void)
{
    /*
     * Init the Nt cpu set.
     */
#ifdef IPRT_TARGET_NT4
    KAFFINITY ActiveProcessors = (UINT64_C(1) << KeNumberProcessors) - UINT64_C(1);
#else
    KAFFINITY ActiveProcessors = KeQueryActiveProcessors();
#endif
    RTCpuSetEmpty(&g_rtMpNtCpuSet);
    RTCpuSetFromU64(&g_rtMpNtCpuSet, ActiveProcessors);
/** @todo Port to W2K8 with > 64 cpus/threads. */

    /*
     * Initialize the function pointers.
     */
#ifdef IPRT_TARGET_NT4
    g_pfnrtNtExSetTimerResolution = NULL;
    g_pfnrtNtKeFlushQueuedDpcs = NULL;
    g_pfnrtHalRequestIpiW7Plus = NULL;
    g_pfnrtHalRequestIpiPreW7 = NULL;
    g_pfnrtNtHalSendSoftwareInterrupt = NULL;
    g_pfnrtKeIpiGenericCall = NULL;
    g_pfnrtKeInitializeAffinityEx = NULL;
    g_pfnrtKeAddProcessorAffinityEx = NULL;
    g_pfnrtKeGetProcessorIndexFromNumber = NULL;
    g_pfnrtRtlGetVersion = NULL;
    g_pfnrtKeQueryInterruptTime = NULL;
    g_pfnrtKeQueryInterruptTimePrecise = NULL;
    g_pfnrtKeQuerySystemTime = NULL;
    g_pfnrtKeQuerySystemTimePrecise = NULL;
#else
    UNICODE_STRING RoutineName;
    RtlInitUnicodeString(&RoutineName, L"ExSetTimerResolution");
    g_pfnrtNtExSetTimerResolution = (PFNMYEXSETTIMERRESOLUTION)MmGetSystemRoutineAddress(&RoutineName);

    RtlInitUnicodeString(&RoutineName, L"KeFlushQueuedDpcs");
    g_pfnrtNtKeFlushQueuedDpcs = (PFNMYKEFLUSHQUEUEDDPCS)MmGetSystemRoutineAddress(&RoutineName);

    RtlInitUnicodeString(&RoutineName, L"HalRequestIpi");
    g_pfnrtHalRequestIpiW7Plus = (PFNHALREQUESTIPI_W7PLUS)MmGetSystemRoutineAddress(&RoutineName);
    g_pfnrtHalRequestIpiPreW7 = (PFNHALREQUESTIPI_PRE_W7)g_pfnrtHalRequestIpiW7Plus;

    RtlInitUnicodeString(&RoutineName, L"HalSendSoftwareInterrupt");
    g_pfnrtNtHalSendSoftwareInterrupt = (PFNHALSENDSOFTWAREINTERRUPT)MmGetSystemRoutineAddress(&RoutineName);

    RtlInitUnicodeString(&RoutineName, L"KeIpiGenericCall");
    g_pfnrtKeIpiGenericCall = (PFNRTKEIPIGENERICCALL)MmGetSystemRoutineAddress(&RoutineName);

    RtlInitUnicodeString(&RoutineName, L"KeInitializeAffinityEx");
    g_pfnrtKeInitializeAffinityEx = (PFNKEINITIALIZEAFFINITYEX)MmGetSystemRoutineAddress(&RoutineName);

    RtlInitUnicodeString(&RoutineName, L"KeAddProcessorAffinityEx");
    g_pfnrtKeAddProcessorAffinityEx = (PFNKEADDPROCESSORAFFINITYEX)MmGetSystemRoutineAddress(&RoutineName);

    RtlInitUnicodeString(&RoutineName, L"KeGetProcessorIndexFromNumber");
    g_pfnrtKeGetProcessorIndexFromNumber = (PFNKEGETPROCESSORINDEXFROMNUMBER)MmGetSystemRoutineAddress(&RoutineName);

    RtlInitUnicodeString(&RoutineName, L"RtlGetVersion");
    g_pfnrtRtlGetVersion = (PFNRTRTLGETVERSION)MmGetSystemRoutineAddress(&RoutineName);
# ifndef RT_ARCH_AMD64
    RtlInitUnicodeString(&RoutineName, L"KeQueryInterruptTime");
    g_pfnrtKeQueryInterruptTime = (PFNRTKEQUERYINTERRUPTTIME)MmGetSystemRoutineAddress(&RoutineName);

    RtlInitUnicodeString(&RoutineName, L"KeQuerySystemTime");
    g_pfnrtKeQuerySystemTime = (PFNRTKEQUERYSYSTEMTIME)MmGetSystemRoutineAddress(&RoutineName);
# endif
    RtlInitUnicodeString(&RoutineName, L"KeQueryInterruptTimePrecise");
    g_pfnrtKeQueryInterruptTimePrecise = (PFNRTKEQUERYINTERRUPTTIMEPRECISE)MmGetSystemRoutineAddress(&RoutineName);

    RtlInitUnicodeString(&RoutineName, L"KeQuerySystemTimePrecise");
    g_pfnrtKeQuerySystemTimePrecise = (PFNRTKEQUERYSYSTEMTIMEPRECISE)MmGetSystemRoutineAddress(&RoutineName);
#endif

    /*
     * HACK ALERT! (and déjà vu warning - remember win32k.sys?)
     *
     * Try find _KPRCB::QuantumEnd and _KPRCB::[DpcData.]DpcQueueDepth.
     * For purpose of verification we use the VendorString member (12+1 chars).
     *
     * The offsets was initially derived by poking around with windbg
     * (dt _KPRCB, !prcb ++, and such like). Systematic harvesting was then
     * planned using dia2dump, grep and the symbol pack in a manner like this:
     *      dia2dump -type _KDPC_DATA -type _KPRCB EXE\ntkrnlmp.pdb | grep -wE "QuantumEnd|DpcData|DpcQueueDepth|VendorString"
     *
     * The final solution ended up using a custom harvester program called
     * ntBldSymDb that recursively searches thru unpacked symbol packages for
     * the desired structure offsets.  The program assumes that the packages
     * are unpacked into directories with the same name as the package, with
     * exception of some of the w2k packages which requires a 'w2k' prefix to
     * be distinguishable from another.
     */

    RTNTSDBOSVER OsVerInfo;
    rtR0NtGetOsVersionInfo(&OsVerInfo);

    /*
     * Gather consistent CPU vendor string and PRCB pointers.
     */
    KIRQL OldIrql;
    KeRaiseIrql(DISPATCH_LEVEL, &OldIrql); /* make sure we stay on the same cpu */

    union
    {
        uint32_t auRegs[4];
        char szVendor[4*3+1];
    } u;
    ASMCpuId(0, &u.auRegs[3], &u.auRegs[0], &u.auRegs[2], &u.auRegs[1]);
    u.szVendor[4*3] = '\0';

    uint8_t *pbPrcb;
    __try /* Warning. This try/except statement may provide some false safety. */
    {
#if defined(RT_ARCH_X86)
        PKPCR    pPcr   = (PKPCR)__readfsdword(RT_OFFSETOF(KPCR,SelfPcr));
        pbPrcb = (uint8_t *)pPcr->Prcb;
#elif defined(RT_ARCH_AMD64)
        PKPCR    pPcr   = (PKPCR)__readgsqword(RT_OFFSETOF(KPCR,Self));
        pbPrcb = (uint8_t *)pPcr->CurrentPrcb;
#else
# error "port me"
        pbPrcb = NULL;
#endif
    }
    __except(EXCEPTION_EXECUTE_HANDLER)
    {
        pbPrcb = NULL;
    }

    /*
     * Search the database
     */
    if (pbPrcb)
    {
        /* Find the best matching kernel version based on build number. */
        uint32_t iBest      = UINT32_MAX;
        int32_t  iBestDelta = INT32_MAX;
        for (uint32_t i = 0; i < RT_ELEMENTS(g_artNtSdbSets); i++)
        {
            if (g_artNtSdbSets[i].OsVerInfo.fChecked != OsVerInfo.fChecked)
                continue;
            if (OsVerInfo.fSmp /*must-be-smp*/ && !g_artNtSdbSets[i].OsVerInfo.fSmp)
                continue;

            int32_t iDelta = RT_ABS((int32_t)OsVerInfo.uBuildNo - (int32_t)g_artNtSdbSets[i].OsVerInfo.uBuildNo);
            if (   iDelta == 0
                && (g_artNtSdbSets[i].OsVerInfo.uCsdNo == OsVerInfo.uCsdNo || OsVerInfo.uCsdNo == MY_NIL_CSD))
            {
                /* prefect */
                iBestDelta = iDelta;
                iBest      = i;
                break;
            }
            if (   iDelta < iBestDelta
                || iBest == UINT32_MAX
                || (   iDelta == iBestDelta
                    && OsVerInfo.uCsdNo != MY_NIL_CSD
                    &&   RT_ABS(g_artNtSdbSets[i    ].OsVerInfo.uCsdNo - (int32_t)OsVerInfo.uCsdNo)
                       < RT_ABS(g_artNtSdbSets[iBest].OsVerInfo.uCsdNo - (int32_t)OsVerInfo.uCsdNo)
                   )
                )
            {
                iBestDelta = iDelta;
                iBest      = i;
            }
        }
        if (iBest < RT_ELEMENTS(g_artNtSdbSets))
        {
            /* Try all sets: iBest -> End; iBest -> Start. */
            bool    fDone = false;
            int32_t i     = iBest;
            while (   i < RT_ELEMENTS(g_artNtSdbSets)
                   && !(fDone = rtR0NtTryMatchSymSet(&g_artNtSdbSets[i], pbPrcb, u.szVendor, &OsVerInfo)))
                i++;
            if (!fDone)
            {
                i = (int32_t)iBest - 1;
                while (   i >= 0
                       && !(fDone = rtR0NtTryMatchSymSet(&g_artNtSdbSets[i], pbPrcb, u.szVendor, &OsVerInfo)))
                    i--;
            }
        }
        else
            DbgPrint("IPRT: Failed to locate data set.\n");
    }
    else
        DbgPrint("IPRT: Failed to get PCBR pointer.\n");

    KeLowerIrql(OldIrql); /* Lowering the IRQL early in the hope that we may catch exceptions below. */

#ifndef IN_GUEST
    if (!g_offrtNtPbQuantumEnd && !g_offrtNtPbDpcQueueDepth)
        DbgPrint("IPRT: Neither _KPRCB::QuantumEnd nor _KPRCB::DpcQueueDepth was not found! Kernel %u.%u %u %s\n",
                 OsVerInfo.uMajorVer, OsVerInfo.uMinorVer, OsVerInfo.uBuildNo, OsVerInfo.fChecked ? "checked" : "free");
# ifdef DEBUG
    else
        DbgPrint("IPRT: _KPRCB:{.QuantumEnd=%x/%d, .DpcQueueDepth=%x/%d} Kernel %u.%u %u %s\n",
                 g_offrtNtPbQuantumEnd, g_cbrtNtPbQuantumEnd, g_offrtNtPbDpcQueueDepth,
                 OsVerInfo.uMajorVer, OsVerInfo.uMinorVer, OsVerInfo.uBuildNo, OsVerInfo.fChecked ? "checked" : "free");
# endif
#endif

    /*
     * Special IPI fun for RTMpPokeCpu.
     *
     * On Vista and later the DPC method doesn't seem to reliably send IPIs,
     * so we have to use alternative methods.  The NtHalSendSoftwareInterrupt
     * is preferrable, but it's AMD64 only.  The NalRequestIpip method changed
     * in Windows 7 with the lots-of-processors-support, but it's the only
     * targeted IPI game in town if we cannot use KeInsertQueueDpc.  Worst case
     * we use broadcast IPIs.
     */
    if (   OsVerInfo.uMajorVer > 6
        || (OsVerInfo.uMajorVer == 6 && OsVerInfo.uMinorVer > 0))
        g_pfnrtHalRequestIpiPreW7 = NULL;
    else
        g_pfnrtHalRequestIpiW7Plus = NULL;

    g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingDpc;
#ifndef IPRT_TARGET_NT4
    if (g_pfnrtNtHalSendSoftwareInterrupt)
        g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingHalSendSoftwareInterrupt;
    else if (   g_pfnrtHalRequestIpiW7Plus
             && g_pfnrtKeInitializeAffinityEx
             && g_pfnrtKeAddProcessorAffinityEx
             && g_pfnrtKeGetProcessorIndexFromNumber)
        g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingHalReqestIpiW7Plus;
    else if (OsVerInfo.uMajorVer >= 6 && g_pfnrtKeIpiGenericCall)
        g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingBroadcastIpi;
    /* else: Windows XP should send always send an IPI -> VERIFY */
#endif

    return VINF_SUCCESS;
}