DECLHIDDEN(int) rtSchedNativeCalcDefaultPriority(RTTHREADTYPE enmType) { Assert(enmType > RTTHREADTYPE_INVALID && enmType < RTTHREADTYPE_END); /* * Get the current priority. */ int iBasePriority = rtSchedDarwinGetBasePriority(); Assert(iBasePriority >= 0 && iBasePriority <= 63); /* * If it doesn't match the default, select the closest one from the table. */ int offBest = RT_ABS(g_pProcessPriority->aTypes[enmType].iBasePriority - iBasePriority); if (offBest) { for (unsigned i = 0; i < RT_ELEMENTS(g_aPriorities); i++) { int off = RT_ABS(g_aPriorities[i].aTypes[enmType].iBasePriority - iBasePriority); if (off < offBest) { g_pProcessPriority = &g_aPriorities[i]; if (!off) break; offBest = off; } } } return VINF_SUCCESS; }
static void Test4(unsigned cThreads, unsigned cSeconds, unsigned uWritePercent, bool fYield, bool fQuiet) { unsigned i; uint64_t acIterations[32]; RTTHREAD aThreads[RT_ELEMENTS(acIterations)]; AssertRelease(cThreads <= RT_ELEMENTS(acIterations)); RTTestSubF(g_hTest, "Test4 - %u threads, %u sec, %u%% writes, %syielding", cThreads, cSeconds, uWritePercent, fYield ? "" : "non-"); /* * Init globals. */ g_fYield = fYield; g_fQuiet = fQuiet; g_fTerminate = false; g_uWritePercent = uWritePercent; g_cConcurrentWriters = 0; g_cConcurrentReaders = 0; RTTEST_CHECK_RC_RETV(g_hTest, RTCritSectRwInit(&g_CritSectRw), VINF_SUCCESS); /* * Create the threads and let them block on the semrw. */ RTTEST_CHECK_RC_RETV(g_hTest, RTCritSectRwEnterExcl(&g_CritSectRw), VINF_SUCCESS); for (i = 0; i < cThreads; i++) { acIterations[i] = 0; RTTEST_CHECK_RC_RETV(g_hTest, RTThreadCreateF(&aThreads[i], Test4Thread, &acIterations[i], 0, RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "test-%u", i), VINF_SUCCESS); } /* * Do the test run. */ uint32_t cErrorsBefore = RTTestErrorCount(g_hTest); uint64_t u64StartTS = RTTimeNanoTS(); RTTEST_CHECK_RC(g_hTest, RTCritSectRwLeaveExcl(&g_CritSectRw), VINF_SUCCESS); RTThreadSleep(cSeconds * 1000); ASMAtomicWriteBool(&g_fTerminate, true); uint64_t ElapsedNS = RTTimeNanoTS() - u64StartTS; /* * Clean up the threads and semaphore. */ for (i = 0; i < cThreads; i++) RTTEST_CHECK_RC(g_hTest, RTThreadWait(aThreads[i], 5000, NULL), VINF_SUCCESS); RTTEST_CHECK_MSG(g_hTest, g_cConcurrentWriters == 0, (g_hTest, "g_cConcurrentWriters=%u at end of test\n", g_cConcurrentWriters)); RTTEST_CHECK_MSG(g_hTest, g_cConcurrentReaders == 0, (g_hTest, "g_cConcurrentReaders=%u at end of test\n", g_cConcurrentReaders)); RTTEST_CHECK_RC(g_hTest, RTCritSectRwDelete(&g_CritSectRw), VINF_SUCCESS); if (RTTestErrorCount(g_hTest) != cErrorsBefore) RTThreadSleep(100); /* * Collect and display the results. */ uint64_t cItrTotal = acIterations[0]; for (i = 1; i < cThreads; i++) cItrTotal += acIterations[i]; uint64_t cItrNormal = cItrTotal / cThreads; uint64_t cItrMinOK = cItrNormal / 20; /* 5% */ uint64_t cItrMaxDeviation = 0; for (i = 0; i < cThreads; i++) { uint64_t cItrDelta = RT_ABS((int64_t)(acIterations[i] - cItrNormal)); if (acIterations[i] < cItrMinOK) RTTestFailed(g_hTest, "Thread %u did less than 5%% of the iterations - %llu (it) vs. %llu (5%%) - %llu%%\n", i, acIterations[i], cItrMinOK, cItrDelta * 100 / cItrNormal); else if (cItrDelta > cItrNormal / 2) RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "Warning! Thread %u deviates by more than 50%% - %llu (it) vs. %llu (avg) - %llu%%\n", i, acIterations[i], cItrNormal, cItrDelta * 100 / cItrNormal); if (cItrDelta > cItrMaxDeviation) cItrMaxDeviation = cItrDelta; } //RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, // "Threads: %u Total: %llu Per Sec: %llu Avg: %llu ns Max dev: %llu%%\n", // cThreads, // cItrTotal, // cItrTotal / cSeconds, // ElapsedNS / cItrTotal, // cItrMaxDeviation * 100 / cItrNormal // ); // RTTestValue(g_hTest, "Thruput", cItrTotal * UINT32_C(1000000000) / ElapsedNS, RTTESTUNIT_CALLS_PER_SEC); RTTestValue(g_hTest, "Max diviation", cItrMaxDeviation * 100 / cItrNormal, RTTESTUNIT_PCT); }
static int Test1(unsigned cThreads, unsigned cSeconds, bool fYield, bool fQuiet) { int rc; unsigned i; uint64_t g_au64[32]; RTTHREAD aThreads[RT_ELEMENTS(g_au64)]; AssertRelease(cThreads <= RT_ELEMENTS(g_au64)); /* * Init globals. */ g_fYield = fYield; g_fQuiet = fQuiet; g_fTerminate = false; rc = RTSemMutexCreate(&g_hMutex); if (RT_FAILURE(rc)) return PrintError("RTSemMutexCreate failed (rc=%Rrc)\n", rc); /* * Create the threads and let them block on the mutex. */ rc = RTSemMutexRequest(g_hMutex, RT_INDEFINITE_WAIT); if (RT_FAILURE(rc)) return PrintError("RTSemMutexRequest failed (rc=%Rrc)\n", rc); for (i = 0; i < cThreads; i++) { g_au64[i] = 0; rc = RTThreadCreate(&aThreads[i], ThreadTest1, &g_au64[i], 0, RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "test"); if (RT_FAILURE(rc)) return PrintError("RTThreadCreate failed for thread %u (rc=%Rrc)\n", i, rc); } if (!fQuiet) RTPrintf("tstSemMutex: %zu Threads created. Racing them for %u seconds (%s) ...\n", cThreads, cSeconds, g_fYield ? "yielding" : "no yielding"); uint64_t u64StartTS = RTTimeNanoTS(); rc = RTSemMutexRelease(g_hMutex); if (RT_FAILURE(rc)) PrintError("RTSemMutexRelease failed (rc=%Rrc)\n", rc); RTThreadSleep(cSeconds * 1000); ASMAtomicXchgBool(&g_fTerminate, true); uint64_t ElapsedNS = RTTimeNanoTS() - u64StartTS; for (i = 0; i < cThreads; i++) { rc = RTThreadWait(aThreads[i], 5000, NULL); if (RT_FAILURE(rc)) PrintError("RTThreadWait failed for thread %u (rc=%Rrc)\n", i, rc); } rc = RTSemMutexDestroy(g_hMutex); if (RT_FAILURE(rc)) PrintError("RTSemMutexDestroy failed - %Rrc\n", rc); g_hMutex = NIL_RTSEMMUTEX; if (g_cErrors) RTThreadSleep(100); /* * Collect and display the results. */ uint64_t Total = g_au64[0]; for (i = 1; i < cThreads; i++) Total += g_au64[i]; uint64_t Normal = Total / cThreads; uint64_t MaxDeviation = 0; for (i = 0; i < cThreads; i++) { uint64_t Delta = RT_ABS((int64_t)(g_au64[i] - Normal)); if (Delta > Normal / 2) RTPrintf("tstSemMutex: Warning! Thread %d deviates by more than 50%% - %llu (it) vs. %llu (avg)\n", i, g_au64[i], Normal); if (Delta > MaxDeviation) MaxDeviation = Delta; } RTPrintf("tstSemMutex: Threads: %u Total: %llu Per Sec: %llu Avg: %llu ns Max dev: %llu%%\n", cThreads, Total, Total / cSeconds, ElapsedNS / Total, MaxDeviation * 100 / Normal ); return 0; }
int main(int argc, char **argv) { RTR3InitExe(argc, &argv, 0); /* * Parse args */ static const RTGETOPTDEF g_aOptions[] = { { "--iterations", 'i', RTGETOPT_REQ_INT32 }, { "--hex", 'h', RTGETOPT_REQ_NOTHING }, { "--decimal", 'd', RTGETOPT_REQ_NOTHING }, { "--spin", 's', RTGETOPT_REQ_NOTHING }, { "--reference", 'r', RTGETOPT_REQ_UINT64 }, /* reference value of CpuHz, display the * CpuHz deviation in a separate column. */ }; uint32_t cIterations = 40; bool fHex = true; bool fSpin = false; int ch; uint64_t uCpuHzRef = 0; uint64_t uCpuHzOverallDeviation = 0; int64_t iCpuHzMaxDeviation = 0; int32_t cCpuHzOverallDevCnt = 0; RTGETOPTUNION ValueUnion; RTGETOPTSTATE GetState; RTGetOptInit(&GetState, argc, argv, g_aOptions, RT_ELEMENTS(g_aOptions), 1, RTGETOPTINIT_FLAGS_NO_STD_OPTS); while ((ch = RTGetOpt(&GetState, &ValueUnion))) { switch (ch) { case 'i': cIterations = ValueUnion.u32; break; case 'd': fHex = false; break; case 'h': fHex = true; break; case 's': fSpin = true; break; case 'r': uCpuHzRef = ValueUnion.u64; break; default: return RTGetOptPrintError(ch, &ValueUnion); } } /* * Init */ PSUPDRVSESSION pSession = NIL_RTR0PTR; int rc = SUPR3Init(&pSession); if (RT_SUCCESS(rc)) { if (g_pSUPGlobalInfoPage) { RTPrintf("tstGIP-2: cCpus=%d u32UpdateHz=%RU32 u32UpdateIntervalNS=%RU32 u64NanoTSLastUpdateHz=%RX64 u64CpuHz=%RU64 uCpuHzRef=%RU64 u32Mode=%d (%s) u32Version=%#x\n", g_pSUPGlobalInfoPage->cCpus, g_pSUPGlobalInfoPage->u32UpdateHz, g_pSUPGlobalInfoPage->u32UpdateIntervalNS, g_pSUPGlobalInfoPage->u64NanoTSLastUpdateHz, g_pSUPGlobalInfoPage->u64CpuHz, uCpuHzRef, g_pSUPGlobalInfoPage->u32Mode, SUPGetGIPModeName(g_pSUPGlobalInfoPage), g_pSUPGlobalInfoPage->u32Version); RTPrintf(fHex ? "tstGIP-2: it: u64NanoTS delta u64TSC UpIntTSC H TransId CpuHz %sTSC Interval History...\n" : "tstGIP-2: it: u64NanoTS delta u64TSC UpIntTSC H TransId CpuHz %sTSC Interval History...\n", uCpuHzRef ? " CpuHz deviation " : ""); static SUPGIPCPU s_aaCPUs[2][256]; for (uint32_t i = 0; i < cIterations; i++) { /* copy the data */ memcpy(&s_aaCPUs[i & 1][0], &g_pSUPGlobalInfoPage->aCPUs[0], g_pSUPGlobalInfoPage->cCpus * sizeof(g_pSUPGlobalInfoPage->aCPUs[0])); /* display it & find something to spin on. */ uint32_t u32TransactionId = 0; uint32_t volatile *pu32TransactionId = NULL; for (unsigned iCpu = 0; iCpu < g_pSUPGlobalInfoPage->cCpus; iCpu++) if ( g_pSUPGlobalInfoPage->aCPUs[iCpu].u64CpuHz > 0 && g_pSUPGlobalInfoPage->aCPUs[iCpu].u64CpuHz != _4G + 1) { char szCpuHzDeviation[32]; PSUPGIPCPU pPrevCpu = &s_aaCPUs[!(i & 1)][iCpu]; PSUPGIPCPU pCpu = &s_aaCPUs[i & 1][iCpu]; if (uCpuHzRef) { int64_t iCpuHzDeviation = pCpu->u64CpuHz - uCpuHzRef; uint64_t uCpuHzDeviation = RT_ABS(iCpuHzDeviation); if (uCpuHzDeviation > 999999999) RTStrPrintf(szCpuHzDeviation, sizeof(szCpuHzDeviation), "%17s ", "?"); else { /* Wait until the history validation code takes effect. */ if (pCpu->u32TransactionId > 23 + (8 * 2) + 1) { if (RT_ABS(iCpuHzDeviation) > RT_ABS(iCpuHzMaxDeviation)) iCpuHzMaxDeviation = iCpuHzDeviation; uCpuHzOverallDeviation += uCpuHzDeviation; cCpuHzOverallDevCnt++; } uint32_t uPct = (uint32_t)(uCpuHzDeviation * 100000 / uCpuHzRef + 5); RTStrPrintf(szCpuHzDeviation, sizeof(szCpuHzDeviation), "%10RI64%3d.%02d%% ", iCpuHzDeviation, uPct / 1000, (uPct % 1000) / 10); } } else szCpuHzDeviation[0] = '\0'; RTPrintf(fHex ? "tstGIP-2: %4d/%d: %016llx %09llx %016llx %08x %d %08x %15llu %s%08x %08x %08x %08x %08x %08x %08x %08x (%d)\n" : "tstGIP-2: %4d/%d: %016llu %09llu %016llu %010u %d %010u %15llu %s%08x %08x %08x %08x %08x %08x %08x %08x (%d)\n", i, iCpu, pCpu->u64NanoTS, i ? pCpu->u64NanoTS - pPrevCpu->u64NanoTS : 0, pCpu->u64TSC, pCpu->u32UpdateIntervalTSC, pCpu->iTSCHistoryHead, pCpu->u32TransactionId, pCpu->u64CpuHz, szCpuHzDeviation, pCpu->au32TSCHistory[0], pCpu->au32TSCHistory[1], pCpu->au32TSCHistory[2], pCpu->au32TSCHistory[3], pCpu->au32TSCHistory[4], pCpu->au32TSCHistory[5], pCpu->au32TSCHistory[6], pCpu->au32TSCHistory[7], pCpu->cErrors); if (!pu32TransactionId) { pu32TransactionId = &g_pSUPGlobalInfoPage->aCPUs[iCpu].u32TransactionId; u32TransactionId = pCpu->u32TransactionId; } } /* wait a bit / spin */ if (!fSpin) RTThreadSleep(9); else { if (pu32TransactionId) { uint32_t uTmp; while ( u32TransactionId == (uTmp = *pu32TransactionId) || (uTmp & 1)) ASMNopPause(); } else RTThreadSleep(1); } } /* * Display TSC deltas. * * First iterative over the APIC ID array to get mostly consistent CPUID to APIC ID mapping. * Then iterate over the offline CPUs. It is possible that there's a race between the online/offline * states between the two iterations, but that cannot be helped from ring-3 anyway and not a biggie. */ RTPrintf("tstGIP-2: TSC deltas:\n"); RTPrintf("tstGIP-2: idApic: i64TSCDelta\n"); for (unsigned i = 0; i < RT_ELEMENTS(g_pSUPGlobalInfoPage->aiCpuFromApicId); i++) { uint16_t iCpu = g_pSUPGlobalInfoPage->aiCpuFromApicId[i]; if (iCpu != UINT16_MAX) { RTPrintf("tstGIP-2: %7d: %lld\n", g_pSUPGlobalInfoPage->aCPUs[iCpu].idApic, g_pSUPGlobalInfoPage->aCPUs[iCpu].i64TSCDelta); } } for (unsigned iCpu = 0; iCpu < g_pSUPGlobalInfoPage->cCpus; iCpu++) if (g_pSUPGlobalInfoPage->aCPUs[iCpu].idApic == UINT16_MAX) RTPrintf("tstGIP-2: offline: %lld\n", g_pSUPGlobalInfoPage->aCPUs[iCpu].i64TSCDelta); RTPrintf("tstGIP-2: enmUseTscDelta=%d fGetGipCpu=%#x\n", g_pSUPGlobalInfoPage->enmUseTscDelta, g_pSUPGlobalInfoPage->fGetGipCpu); if ( uCpuHzRef && cCpuHzOverallDevCnt) { uint32_t uPct = (uint32_t)(uCpuHzOverallDeviation * 100000 / cCpuHzOverallDevCnt / uCpuHzRef + 5); RTPrintf("tstGIP-2: Average CpuHz deviation: %d.%02d%%\n", uPct / 1000, (uPct % 1000) / 10); uint32_t uMaxPct = (uint32_t)(RT_ABS(iCpuHzMaxDeviation) * 100000 / uCpuHzRef + 5); RTPrintf("tstGIP-2: Maximum CpuHz deviation: %d.%02d%% (%RI64 ticks)\n", uMaxPct / 1000, (uMaxPct % 1000) / 10, iCpuHzMaxDeviation); } } else { RTPrintf("tstGIP-2: g_pSUPGlobalInfoPage is NULL\n"); rc = -1; } SUPR3Term(false /*fForced*/); } else RTPrintf("tstGIP-2: SUPR3Init failed: %Rrc\n", rc); return !!rc; }
static void supR3HardenedDirectSyscall(PSUPHNTIMPDLL pDll, PCSUPHNTIMPFUNC pImport, PCSUPHNTIMPSYSCALL pSyscall, PSUPHNTLDRCACHEENTRY pLdrEntry, uint8_t *pbBits, bool fReportErrors) { /* * Skip non-syscall entries. */ if (!pSyscall->puApiNo) return; /* * Locate the virgin bits. */ RTLDRADDR uValue; int rc = RTLdrGetSymbolEx(pLdrEntry->hLdrMod, pbBits, (uintptr_t)pDll->pbImageBase, UINT32_MAX, pImport->pszName, &uValue); if (RT_FAILURE(rc)) { SUPHNTIMP_ERROR(fReportErrors, 16, "supR3HardenedDirectSyscall", kSupInitOp_Misc, rc, "%s: RTLdrGetSymbolEx failed on %s: %Rrc", pDll->pszName, pImport->pszName, rc); return; } uintptr_t offSymbol = (uintptr_t)uValue - (uintptr_t)pDll->pbImageBase; uint8_t const *pbFunction = &pbBits[offSymbol]; /* * Parse the code and extract the API call number. */ #ifdef RT_ARCH_AMD64 /* Pattern #1: XP64/W2K3-64 thru Windows 8.1 0:000> u ntdll!NtCreateSection ntdll!NtCreateSection: 00000000`779f1750 4c8bd1 mov r10,rcx 00000000`779f1753 b847000000 mov eax,47h 00000000`779f1758 0f05 syscall 00000000`779f175a c3 ret 00000000`779f175b 0f1f440000 nop dword ptr [rax+rax] */ if ( pbFunction[ 0] == 0x4c /* mov r10, rcx */ && pbFunction[ 1] == 0x8b && pbFunction[ 2] == 0xd1 && pbFunction[ 3] == 0xb8 /* mov eax, 0000yyzzh */ //&& pbFunction[ 4] == 0xZZ //&& pbFunction[ 5] == 0xYY && pbFunction[ 6] == 0x00 && pbFunction[ 7] == 0x00 && pbFunction[ 8] == 0x0f /* syscall */ && pbFunction[ 9] == 0x05 && pbFunction[10] == 0xc3 /* ret */ ) { *pSyscall->puApiNo = RT_MAKE_U16(pbFunction[4], pbFunction[5]); *pImport->ppfnImport = pSyscall->pfnType1; return; } #else /* Pattern #1: XP thru Windows 7 kd> u ntdll!NtCreateSection ntdll!NtCreateSection: 7c90d160 b832000000 mov eax,32h 7c90d165 ba0003fe7f mov edx,offset SharedUserData!SystemCallStub (7ffe0300) 7c90d16a ff12 call dword ptr [edx] 7c90d16c c21c00 ret 1Ch 7c90d16f 90 nop The variable bit is the value loaded into eax: XP=32h, W2K3=34h, Vista=4bh, W7=54h Pattern #2: Windows 8.1 0:000:x86> u ntdll_6a0f0000!NtCreateSection ntdll_6a0f0000!NtCreateSection: 6a15eabc b854010000 mov eax,154h 6a15eac1 e803000000 call ntdll_6a0f0000!NtCreateSection+0xd (6a15eac9) 6a15eac6 c21c00 ret 1Ch 6a15eac9 8bd4 mov edx,esp 6a15eacb 0f34 sysenter 6a15eacd c3 ret The variable bit is the value loaded into eax: W81=154h Note! One nice thing here is that we can share code pattern #1. */ if ( pbFunction[ 0] == 0xb8 /* mov eax, 0000yyzzh*/ //&& pbFunction[ 1] <= 0xZZ //&& pbFunction[ 2] <= 0xYY && pbFunction[ 3] == 0x00 && pbFunction[ 4] == 0x00) { *pSyscall->puApiNo = RT_MAKE_U16(pbFunction[1], pbFunction[2]); if ( pbFunction[5] == 0xba /* mov edx, offset SharedUserData!SystemCallStub */ && pbFunction[ 6] == 0x00 && pbFunction[ 7] == 0x03 && pbFunction[ 8] == 0xfe && pbFunction[ 9] == 0x7f && pbFunction[10] == 0xff /* call [edx] */ && pbFunction[11] == 0x12 && ( ( pbFunction[12] == 0xc2 /* ret 1ch */ && pbFunction[13] == pSyscall->cbParams && pbFunction[14] == 0x00) || ( pbFunction[12] == 0xc3 /* ret */ && pSyscall->cbParams == 0) ) ) { *pImport->ppfnImport = pSyscall->pfnType1; return; } if ( pbFunction[ 5] == 0xe8 /* call [$+3] */ && RT_ABS(*(int32_t *)&pbFunction[6]) < 0x10 && ( ( pbFunction[10] == 0xc2 /* ret 1ch */ && pbFunction[11] == pSyscall->cbParams && pbFunction[12] == 0x00) || ( pbFunction[10] == 0xc3 /* ret */ && pSyscall->cbParams == 0) ) ) { *pImport->ppfnImport = pSyscall->pfnType2; return; } } #endif /* * Failed to parse it. */ volatile uint8_t abCopy[16]; memcpy((void *)&abCopy[0], pbFunction, sizeof(abCopy)); SUPHNTIMP_ERROR(fReportErrors, 17, "supR3HardenedWinInitImports", kSupInitOp_Misc, rc, "%ls: supHardNtLdrCacheOpen failed: '%s': %.16Rhxs", pDll->pwszName, pImport->pszName, &abCopy[0]); }
DECLHIDDEN(int) rtR0InitNative(void) { /* * Init the Nt cpu set. */ #ifdef IPRT_TARGET_NT4 KAFFINITY ActiveProcessors = (UINT64_C(1) << KeNumberProcessors) - UINT64_C(1); #else KAFFINITY ActiveProcessors = KeQueryActiveProcessors(); #endif RTCpuSetEmpty(&g_rtMpNtCpuSet); RTCpuSetFromU64(&g_rtMpNtCpuSet, ActiveProcessors); /** @todo Port to W2K8 with > 64 cpus/threads. */ /* * Initialize the function pointers. */ #ifdef IPRT_TARGET_NT4 g_pfnrtNtExSetTimerResolution = NULL; g_pfnrtNtKeFlushQueuedDpcs = NULL; g_pfnrtHalRequestIpiW7Plus = NULL; g_pfnrtHalRequestIpiPreW7 = NULL; g_pfnrtNtHalSendSoftwareInterrupt = NULL; g_pfnrtKeIpiGenericCall = NULL; g_pfnrtKeInitializeAffinityEx = NULL; g_pfnrtKeAddProcessorAffinityEx = NULL; g_pfnrtKeGetProcessorIndexFromNumber = NULL; g_pfnrtRtlGetVersion = NULL; g_pfnrtKeQueryInterruptTime = NULL; g_pfnrtKeQueryInterruptTimePrecise = NULL; g_pfnrtKeQuerySystemTime = NULL; g_pfnrtKeQuerySystemTimePrecise = NULL; #else UNICODE_STRING RoutineName; RtlInitUnicodeString(&RoutineName, L"ExSetTimerResolution"); g_pfnrtNtExSetTimerResolution = (PFNMYEXSETTIMERRESOLUTION)MmGetSystemRoutineAddress(&RoutineName); RtlInitUnicodeString(&RoutineName, L"KeFlushQueuedDpcs"); g_pfnrtNtKeFlushQueuedDpcs = (PFNMYKEFLUSHQUEUEDDPCS)MmGetSystemRoutineAddress(&RoutineName); RtlInitUnicodeString(&RoutineName, L"HalRequestIpi"); g_pfnrtHalRequestIpiW7Plus = (PFNHALREQUESTIPI_W7PLUS)MmGetSystemRoutineAddress(&RoutineName); g_pfnrtHalRequestIpiPreW7 = (PFNHALREQUESTIPI_PRE_W7)g_pfnrtHalRequestIpiW7Plus; RtlInitUnicodeString(&RoutineName, L"HalSendSoftwareInterrupt"); g_pfnrtNtHalSendSoftwareInterrupt = (PFNHALSENDSOFTWAREINTERRUPT)MmGetSystemRoutineAddress(&RoutineName); RtlInitUnicodeString(&RoutineName, L"KeIpiGenericCall"); g_pfnrtKeIpiGenericCall = (PFNRTKEIPIGENERICCALL)MmGetSystemRoutineAddress(&RoutineName); RtlInitUnicodeString(&RoutineName, L"KeInitializeAffinityEx"); g_pfnrtKeInitializeAffinityEx = (PFNKEINITIALIZEAFFINITYEX)MmGetSystemRoutineAddress(&RoutineName); RtlInitUnicodeString(&RoutineName, L"KeAddProcessorAffinityEx"); g_pfnrtKeAddProcessorAffinityEx = (PFNKEADDPROCESSORAFFINITYEX)MmGetSystemRoutineAddress(&RoutineName); RtlInitUnicodeString(&RoutineName, L"KeGetProcessorIndexFromNumber"); g_pfnrtKeGetProcessorIndexFromNumber = (PFNKEGETPROCESSORINDEXFROMNUMBER)MmGetSystemRoutineAddress(&RoutineName); RtlInitUnicodeString(&RoutineName, L"RtlGetVersion"); g_pfnrtRtlGetVersion = (PFNRTRTLGETVERSION)MmGetSystemRoutineAddress(&RoutineName); # ifndef RT_ARCH_AMD64 RtlInitUnicodeString(&RoutineName, L"KeQueryInterruptTime"); g_pfnrtKeQueryInterruptTime = (PFNRTKEQUERYINTERRUPTTIME)MmGetSystemRoutineAddress(&RoutineName); RtlInitUnicodeString(&RoutineName, L"KeQuerySystemTime"); g_pfnrtKeQuerySystemTime = (PFNRTKEQUERYSYSTEMTIME)MmGetSystemRoutineAddress(&RoutineName); # endif RtlInitUnicodeString(&RoutineName, L"KeQueryInterruptTimePrecise"); g_pfnrtKeQueryInterruptTimePrecise = (PFNRTKEQUERYINTERRUPTTIMEPRECISE)MmGetSystemRoutineAddress(&RoutineName); RtlInitUnicodeString(&RoutineName, L"KeQuerySystemTimePrecise"); g_pfnrtKeQuerySystemTimePrecise = (PFNRTKEQUERYSYSTEMTIMEPRECISE)MmGetSystemRoutineAddress(&RoutineName); #endif /* * HACK ALERT! (and déjà vu warning - remember win32k.sys?) * * Try find _KPRCB::QuantumEnd and _KPRCB::[DpcData.]DpcQueueDepth. * For purpose of verification we use the VendorString member (12+1 chars). * * The offsets was initially derived by poking around with windbg * (dt _KPRCB, !prcb ++, and such like). Systematic harvesting was then * planned using dia2dump, grep and the symbol pack in a manner like this: * dia2dump -type _KDPC_DATA -type _KPRCB EXE\ntkrnlmp.pdb | grep -wE "QuantumEnd|DpcData|DpcQueueDepth|VendorString" * * The final solution ended up using a custom harvester program called * ntBldSymDb that recursively searches thru unpacked symbol packages for * the desired structure offsets. The program assumes that the packages * are unpacked into directories with the same name as the package, with * exception of some of the w2k packages which requires a 'w2k' prefix to * be distinguishable from another. */ RTNTSDBOSVER OsVerInfo; rtR0NtGetOsVersionInfo(&OsVerInfo); /* * Gather consistent CPU vendor string and PRCB pointers. */ KIRQL OldIrql; KeRaiseIrql(DISPATCH_LEVEL, &OldIrql); /* make sure we stay on the same cpu */ union { uint32_t auRegs[4]; char szVendor[4*3+1]; } u; ASMCpuId(0, &u.auRegs[3], &u.auRegs[0], &u.auRegs[2], &u.auRegs[1]); u.szVendor[4*3] = '\0'; uint8_t *pbPrcb; __try /* Warning. This try/except statement may provide some false safety. */ { #if defined(RT_ARCH_X86) PKPCR pPcr = (PKPCR)__readfsdword(RT_OFFSETOF(KPCR,SelfPcr)); pbPrcb = (uint8_t *)pPcr->Prcb; #elif defined(RT_ARCH_AMD64) PKPCR pPcr = (PKPCR)__readgsqword(RT_OFFSETOF(KPCR,Self)); pbPrcb = (uint8_t *)pPcr->CurrentPrcb; #else # error "port me" pbPrcb = NULL; #endif } __except(EXCEPTION_EXECUTE_HANDLER) { pbPrcb = NULL; } /* * Search the database */ if (pbPrcb) { /* Find the best matching kernel version based on build number. */ uint32_t iBest = UINT32_MAX; int32_t iBestDelta = INT32_MAX; for (uint32_t i = 0; i < RT_ELEMENTS(g_artNtSdbSets); i++) { if (g_artNtSdbSets[i].OsVerInfo.fChecked != OsVerInfo.fChecked) continue; if (OsVerInfo.fSmp /*must-be-smp*/ && !g_artNtSdbSets[i].OsVerInfo.fSmp) continue; int32_t iDelta = RT_ABS((int32_t)OsVerInfo.uBuildNo - (int32_t)g_artNtSdbSets[i].OsVerInfo.uBuildNo); if ( iDelta == 0 && (g_artNtSdbSets[i].OsVerInfo.uCsdNo == OsVerInfo.uCsdNo || OsVerInfo.uCsdNo == MY_NIL_CSD)) { /* prefect */ iBestDelta = iDelta; iBest = i; break; } if ( iDelta < iBestDelta || iBest == UINT32_MAX || ( iDelta == iBestDelta && OsVerInfo.uCsdNo != MY_NIL_CSD && RT_ABS(g_artNtSdbSets[i ].OsVerInfo.uCsdNo - (int32_t)OsVerInfo.uCsdNo) < RT_ABS(g_artNtSdbSets[iBest].OsVerInfo.uCsdNo - (int32_t)OsVerInfo.uCsdNo) ) ) { iBestDelta = iDelta; iBest = i; } } if (iBest < RT_ELEMENTS(g_artNtSdbSets)) { /* Try all sets: iBest -> End; iBest -> Start. */ bool fDone = false; int32_t i = iBest; while ( i < RT_ELEMENTS(g_artNtSdbSets) && !(fDone = rtR0NtTryMatchSymSet(&g_artNtSdbSets[i], pbPrcb, u.szVendor, &OsVerInfo))) i++; if (!fDone) { i = (int32_t)iBest - 1; while ( i >= 0 && !(fDone = rtR0NtTryMatchSymSet(&g_artNtSdbSets[i], pbPrcb, u.szVendor, &OsVerInfo))) i--; } } else DbgPrint("IPRT: Failed to locate data set.\n"); } else DbgPrint("IPRT: Failed to get PCBR pointer.\n"); KeLowerIrql(OldIrql); /* Lowering the IRQL early in the hope that we may catch exceptions below. */ #ifndef IN_GUEST if (!g_offrtNtPbQuantumEnd && !g_offrtNtPbDpcQueueDepth) DbgPrint("IPRT: Neither _KPRCB::QuantumEnd nor _KPRCB::DpcQueueDepth was not found! Kernel %u.%u %u %s\n", OsVerInfo.uMajorVer, OsVerInfo.uMinorVer, OsVerInfo.uBuildNo, OsVerInfo.fChecked ? "checked" : "free"); # ifdef DEBUG else DbgPrint("IPRT: _KPRCB:{.QuantumEnd=%x/%d, .DpcQueueDepth=%x/%d} Kernel %u.%u %u %s\n", g_offrtNtPbQuantumEnd, g_cbrtNtPbQuantumEnd, g_offrtNtPbDpcQueueDepth, OsVerInfo.uMajorVer, OsVerInfo.uMinorVer, OsVerInfo.uBuildNo, OsVerInfo.fChecked ? "checked" : "free"); # endif #endif /* * Special IPI fun for RTMpPokeCpu. * * On Vista and later the DPC method doesn't seem to reliably send IPIs, * so we have to use alternative methods. The NtHalSendSoftwareInterrupt * is preferrable, but it's AMD64 only. The NalRequestIpip method changed * in Windows 7 with the lots-of-processors-support, but it's the only * targeted IPI game in town if we cannot use KeInsertQueueDpc. Worst case * we use broadcast IPIs. */ if ( OsVerInfo.uMajorVer > 6 || (OsVerInfo.uMajorVer == 6 && OsVerInfo.uMinorVer > 0)) g_pfnrtHalRequestIpiPreW7 = NULL; else g_pfnrtHalRequestIpiW7Plus = NULL; g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingDpc; #ifndef IPRT_TARGET_NT4 if (g_pfnrtNtHalSendSoftwareInterrupt) g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingHalSendSoftwareInterrupt; else if ( g_pfnrtHalRequestIpiW7Plus && g_pfnrtKeInitializeAffinityEx && g_pfnrtKeAddProcessorAffinityEx && g_pfnrtKeGetProcessorIndexFromNumber) g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingHalReqestIpiW7Plus; else if (OsVerInfo.uMajorVer >= 6 && g_pfnrtKeIpiGenericCall) g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingBroadcastIpi; /* else: Windows XP should send always send an IPI -> VERIFY */ #endif return VINF_SUCCESS; }