static void testPerformance(const char *pszSub, uint8_t const *pabInstrs, uintptr_t uEndPtr, DISCPUMODE enmDisCpuMode) { RTTestISubF("Performance - %s", pszSub); size_t const cbInstrs = uEndPtr - (uintptr_t)pabInstrs; uint64_t cInstrs = 0; uint64_t nsStart = RTTimeNanoTS(); for (uint32_t i = 0; i < _512K; i++) /* the samples are way to small. :-) */ { for (size_t off = 0; off < cbInstrs; cInstrs++) { uint32_t cb = 1; DISSTATE Dis; DISInstrWithReader((uintptr_t)&pabInstrs[off], enmDisCpuMode, testReadBytes, NULL, &Dis, &cb); off += cb; } } uint64_t cNsElapsed = RTTimeNanoTS() - nsStart; RTTestIValueF(cNsElapsed, RTTESTUNIT_NS, "%s-Total", pszSub); RTTestIValueF(cNsElapsed / cInstrs, RTTESTUNIT_NS_PER_CALL, "%s-per-instruction", pszSub); }
static void doTest(RTTEST hTest) { NOREF(hTest); uint32_t iAllocCpu = 0; while (iAllocCpu < RTCPUSET_MAX_CPUS) { const uint32_t cbTestSet = _1M * 32; const uint32_t cIterations = 384; /* * Change CPU and allocate a chunk of memory. */ RTTESTI_CHECK_RC_OK_RETV(RTThreadSetAffinityToCpu(RTMpCpuIdFromSetIndex(iAllocCpu))); void *pvTest = RTMemPageAlloc(cbTestSet); /* may be leaked, who cares */ RTTESTI_CHECK_RETV(pvTest != NULL); memset(pvTest, 0xef, cbTestSet); /* * Do the tests. */ uint32_t iAccessCpu = 0; while (iAccessCpu < RTCPUSET_MAX_CPUS) { RTTESTI_CHECK_RC_OK_RETV(RTThreadSetAffinityToCpu(RTMpCpuIdFromSetIndex(iAccessCpu))); /* * The write test. */ RTTimeNanoTS(); RTThreadYield(); uint64_t u64StartTS = RTTimeNanoTS(); for (uint32_t i = 0; i < cIterations; i++) { ASMCompilerBarrier(); /* paranoia */ memset(pvTest, i, cbTestSet); } uint64_t const cNsElapsedWrite = RTTimeNanoTS() - u64StartTS; uint64_t cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */ / ((long double)cNsElapsedWrite / RT_NS_1SEC_64) /* seconds */ / _1M /* MB */ ); RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-write", iAllocCpu, iAccessCpu); /* * The read test. */ memset(pvTest, 0, cbTestSet); RTTimeNanoTS(); RTThreadYield(); u64StartTS = RTTimeNanoTS(); for (uint32_t i = 0; i < cIterations; i++) { #if 1 size_t register u = 0; size_t volatile *puCur = (size_t volatile *)pvTest; size_t volatile *puEnd = puCur + cbTestSet / sizeof(size_t); while (puCur != puEnd) u += *puCur++; #else ASMCompilerBarrier(); /* paranoia */ void *pvFound = memchr(pvTest, (i & 127) + 1, cbTestSet); RTTESTI_CHECK(pvFound == NULL); #endif } uint64_t const cNsElapsedRead = RTTimeNanoTS() - u64StartTS; cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */ / ((long double)cNsElapsedRead / RT_NS_1SEC_64) /* seconds */ / _1M /* MB */ ); RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-read", iAllocCpu, iAccessCpu); /* * The read/write test. */ RTTimeNanoTS(); RTThreadYield(); u64StartTS = RTTimeNanoTS(); for (uint32_t i = 0; i < cIterations; i++) { ASMCompilerBarrier(); /* paranoia */ memcpy(pvTest, (uint8_t *)pvTest + cbTestSet / 2, cbTestSet / 2); } uint64_t const cNsElapsedRW = RTTimeNanoTS() - u64StartTS; cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */ / ((long double)cNsElapsedRW / RT_NS_1SEC_64) /* seconds */ / _1M /* MB */ ); RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-read-write", iAllocCpu, iAccessCpu); /* * Total time. */ RTTestIValueF(cNsElapsedRead + cNsElapsedWrite + cNsElapsedRW, RTTESTUNIT_NS, "cpu%02u-mem%02u-time", iAllocCpu, iAccessCpu); /* advance */ iAccessCpu = getNextCpu(iAccessCpu); } /* * Clean up and advance to the next CPU. */ RTMemPageFree(pvTest, cbTestSet); iAllocCpu = getNextCpu(iAllocCpu); } }