VOID NTAPI KsecReadMachineSpecificCounters( _Out_ PKSEC_MACHINE_SPECIFIC_COUNTERS MachineSpecificCounters) { #if defined(_M_IX86) || defined(_M_AMD64) /* Check if RDTSC is available */ if (ExIsProcessorFeaturePresent(PF_RDTSC_INSTRUCTION_AVAILABLE)) { /* Read the TSC value */ MachineSpecificCounters->Tsc = __rdtsc(); } /* Read the CPU event counter MSRs */ //MachineSpecificCounters->Ctr0 = __readmsr(0x12); //MachineSpecificCounters->Ctr1 = __readmsr(0x13); /* Check if this is an MMX capable CPU */ if (ExIsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE)) { /* Read the CPU performance counters 0 and 1 */ MachineSpecificCounters->Pmc0 = __readpmc(0); MachineSpecificCounters->Pmc1 = __readpmc(1); } #else #error Implement me! #endif }
/* * * WriteSmbusByteData - Write a single SPD byte onto any offset * */ STATIC AGESA_STATUS WriteSmbusByteData ( IN UINT16 Iobase, IN UINT8 Address, IN UINT8 ByteData, IN UINTN Offset ) { UINTN Status; UINT64 Limit; Address &= 0xFE; // set write bit __outbyte (Iobase + 0, 0xFF); // clear error status __outbyte (Iobase + 1, 0x1F); // clear error status __outbyte (Iobase + 3, Offset); // offset in eeprom __outbyte (Iobase + 4, Address); // slave address and write bit __outbyte (Iobase + 5, ByteData); // offset in byte data // __outbyte (Iobase + 2, 0x48); // write byte command /* time limit to avoid hanging for unexpected error status (should never happen) */ Limit = __rdtsc () + 2000000000 / 10; for (;;) { Status = __inbyte (Iobase); if (__rdtsc () > Limit) break; if ((Status & 2) == 0) continue; // SMBusInterrupt not set, keep waiting if ((Status & 1) == 1) continue; // HostBusy set, keep waiting break; } if (Status == 2) Status = 0; // check for done with no errors return Status; }
STATIC AGESA_STATUS ReadSmbusByte ( IN UINT16 Iobase, IN UINT8 Address, OUT UINT8 *Buffer ) { UINTN Status; UINT64 Limit; __outbyte (Iobase + 0, 0xFF); // clear error status __outbyte (Iobase + 1, 0x1F); // clear error status __outbyte (Iobase + 2, 0x44); // read command // time limit to avoid hanging for unexpected error status Limit = __rdtsc () + 2000000000 / 10; for (;;) { Status = __inbyte (Iobase); if (__rdtsc () > Limit) break; if ((Status & 2) == 0) continue; // SMBusInterrupt not set, keep waiting if ((Status & 1) == 1) continue; // HostBusy set, keep waiting break; } Buffer [0] = __inbyte (Iobase + 5); if (Status == 2) Status = 0; // check for done with no errors return Status; }
bool IsHashSecure(const unsigned char* hash) { KeWaitForSingleObject(&g_secuHashMutex,Executive,KernelMode,FALSE,NULL); UCHAR id = HashFunc(hash); bool bReturn = false; //SECURE_HASH* secuHash = g_secuHash[id]; SECURE_HASH* secuHash = g_queryHash[id]; unsigned __int64 uiStartTimer = __rdtsc(); while(secuHash) { if(RtlCompareMemory(secuHash->Hash, hash, HASH_SIZE) == HASH_SIZE) { bReturn = true; break; } if( ((__rdtsc() - uiStartTimer) / (1000 * 1000 * 1000)) >= MAXELPASETIMERFORCHECK ) { WriteSysLog(LOG_TYPE_INFO,L"elapse the large timer 6 seconds\n "); bReturn = false; break; } secuHash = secuHash->next; } releaseMutex(); return true == bReturn? true:false; }
void GSRasterizerList::Draw(const GSRasterizerData* data) { *m_sync = 0; m_stats.Reset(); __int64 start = __rdtsc(); POSITION pos = GetTailPosition(); while(pos) { GetPrev(pos)->Draw(data); } while(*m_sync) { _mm_pause(); } m_stats.ticks = __rdtsc() - start; pos = GetHeadPosition(); while(pos) { GSRasterizerStats s; GetNext(pos)->GetStats(s); m_stats.pixels += s.pixels; m_stats.prims = max(m_stats.prims, s.prims); } }
// Main function. int main (int argc, char * argv []) { std::vector<munkres_cpp::Matrix<double> *> matrices; read<double>(matrices); size_t iterations = 1000; size_t runs = 10; if (3 == argc) { runs = std::stoi (argv [1]); iterations = std::stoi (argv [2]); } std::cout << "Prepare to launch " << runs << " runs with " << iterations << " iterations each." << std::endl; for (size_t i = 0; i < matrices.size (); ++i) { std::cout << "Test case " << i + 1 << " from " << matrices.size () << std::endl; uint64_t rdtscMin = std::numeric_limits<uint64_t>::max (); for (size_t j = 0; j < runs; ++j) { uint64_t rdtscMinRun = std::numeric_limits<uint64_t>::max (); for (size_t k = 0; k < iterations; ++k) { munkres_cpp::Munkres<double> munkres; auto matrix = *matrices [i]; uint64_t rdtsc = __rdtsc (); munkres.solve (matrix); rdtsc = __rdtsc () - rdtsc; rdtscMinRun = std::min (rdtscMinRun, rdtsc); } std::cout << "Run " << std::setw (4) << j << ": " << rdtscMinRun << std::endl; rdtscMin = std::min (rdtscMin, rdtscMinRun); } std::cout << "The best: " << rdtscMin << std::endl; } }
/** * Write a single smbus byte. */ UINT8 writeSmbusByte(UINT16 iobase, UINT8 address, UINT8 buffer, int offset) { unsigned int status = -1; UINT64 time_limit; /* clear status register */ __outbyte(iobase + SMBUS_STATUS_REG, 0xFF); __outbyte(iobase + SMBUS_SLAVE_STATUS_REG, 0x1F); /* set offset, set slave address, set data and start writing */ __outbyte(iobase + SMBUS_CONTROL_REG, offset); __outbyte(iobase + SMBUS_HOST_CMD_REG, address & (~READ_BIT)); __outbyte(iobase + SMBUS_DATA0_REG, buffer); __outbyte(iobase + SMBUS_COMMAND_REG, SMBUS_WRITE_BYTE_COMMAND); /* time limit to avoid hanging for unexpected error status */ time_limit = __rdtsc() + MAX_READ_TSC_COUNT; while (__rdtsc() <= time_limit) { status = __inbyte(iobase + SMBUS_STATUS_REG); if ((status & SMBUS_INTERRUPT_MASK) == 0) continue; /* SMBusInterrupt not set, keep waiting */ if ((status & HOSTBUSY_MASK) != 0) continue; /* HostBusy set, keep waiting */ break; } if (status != STATUS__COMPLETED_SUCCESSFULLY) return AGESA_ERROR; return AGESA_SUCCESS; }
void PixelPipeline::wait_for_space() { #if defined(WIN32) && defined(PROFILE_PIPELINE) unsigned __int64 start_time = __rdtsc(); #endif int next_index = local_writer_index+1; if (next_index == queue_max) next_index = 0; if (next_index == local_reader_index) { update_local_reader_index(); while (next_index == local_reader_index) { event_reader_done.wait(); event_reader_done.reset(); update_local_reader_index(); } } #if defined(WIN32) && defined(PROFILE_PIPELINE) unsigned __int64 end_time = __rdtsc(); profiler.wait_for_space_time += end_time-start_time; #endif }
ULONG WINAPI Delegate(__in PVOID p){ int finalTime; ULONG timeEnd, timeStart; __int64 startCycleClock, endCycleClock; HANDLE t1, t2; printf("-----------------------------------------\n"); printf("Switching Windows Threads\n"); t1 = CreateThread( NULL, 0, Switch, NULL, CREATE_SUSPENDED, NULL); t2 = CreateThread( NULL, 0, Switch, NULL, CREATE_SUSPENDED, NULL); startCycleClock = __rdtsc(); timeStart = GetTickCount(); ResumeThread(t1); ResumeThread(t2); WaitForSingleObject(t1, INFINITE); WaitForSingleObject(t2, INFINITE); timeEnd = GetTickCount(); endCycleClock = __rdtsc(); finalTime = (int)(((timeEnd-timeStart) * 1000000) / (COUNT*2)); printf("Absolute Time is %d ns\n", finalTime); printf("Number of Cycles %d\n",(endCycleClock-startCycleClock)/(COUNT*2)); return 0; }
void PixelPipeline::wait_for_workers() { #if defined(WIN32) && defined(PROFILE_PIPELINE) unsigned __int64 start_time = __rdtsc(); #endif if (local_commands_written > 0) { cl_compiler_barrier(); writer_index.set(local_writer_index); local_commands_written = 0; for (int i = 0; i < active_cores; i++) event_more_commands[i].set(); } if (local_reader_index != local_writer_index) { update_local_reader_index(); while (local_reader_index != local_writer_index) { event_reader_done.wait(); event_reader_done.reset(); update_local_reader_index(); } } #if defined(WIN32) && defined(PROFILE_PIPELINE) unsigned __int64 end_time = __rdtsc(); profiler.wait_for_workers_time += end_time-start_time; #endif }
BOOL rdtsc_diff() { ULONGLONG tsc1; ULONGLONG tsc2; ULONGLONG tsc3; DWORD i = 0; // Try this 10 times in case of small fluctuations for (i = 0; i < 10; i++) { tsc1 = __rdtsc(); // Waste some cycles - should be faster than CloseHandle on bare metal GetProcessHeap(); tsc2 = __rdtsc(); // Waste some cycles - slightly longer than GetProcessHeap() on bare metal CloseHandle(0); tsc3 = __rdtsc(); // Did it take at least 10 times more CPU cycles to perform CloseHandle than it took to perform GetProcessHeap()? if ((LODWORD(tsc3) - LODWORD(tsc2)) / (LODWORD(tsc2) - LODWORD(tsc1)) >= 10) return TRUE; } // We consistently saw a small ratio of difference between GetProcessHeap and CloseHandle execution times // so we're probably in a VM! return FALSE; }
static int readSmbusByteData (int iobase, int address, char *buffer, int offset) { unsigned int status; UINT64 limit; address |= 1; // set read bit __outbyte (iobase + 0, 0xFF); // clear error status __outbyte (iobase + 1, 0x1F); // clear error status __outbyte (iobase + 3, offset); // offset in eeprom __outbyte (iobase + 4, address); // slave address and read bit __outbyte (iobase + 2, 0x48); // read byte command // time limit to avoid hanging for unexpected error status (should never happen) limit = __rdtsc () + 2000000000 / 10; for (;;) { status = __inbyte (iobase); if (__rdtsc () > limit) break; if ((status & 2) == 0) continue; // SMBusInterrupt not set, keep waiting if ((status & 1) == 1) continue; // HostBusy set, keep waiting break; } buffer [0] = __inbyte (iobase + 5); if (status == 2) status = 0; // check for done with no errors return status; }
void polMulOpt(int p1[], int p2[], int res[], int size1, int size2){ min = ULLONG_MAX; for (int i = 0; i < 5; i++){ start = __rdtsc(); for (size_t j = 0; j < size2; j++) { if (p2[j] == 0){ continue; } BOOL pos = p2[j] == 1; for (size_t k = 0; k < size1; k++) { if (pos){ res[k + j] += p1[k]; } else{ res[k + j] -= p1[k]; } //res[k + j] += p1[k] * p2[j]; } } finish = __rdtsc(); if (finish - start < min){ min = finish - start; } } printf("polMulOpt#time: %d\n\n", min); }
bool TBag::if_job(double& t) { unsigned long long ticks = __rdtsc(); bool res = if_job(); ticks = __rdtsc() - ticks; t = (double)ticks; return res; }
unsigned line_length(long long num_steps, double step, int slots) { // same as L151, volatile/debug if it doesn't work double sum[LINE_LENGTH_SLOTS] , computation_times[2][LINE_LENGTH_SLOTS - 1]; int i, j; for(j = 0; j < slots - 1; ++j) { #pragma omp parallel num_threads(2) { double x; double clock_time; unsigned long long start_clock, stop_clock; start_clock = __rdtsc(); int id = omp_get_thread_num() , slot = id + j; sum[slot] = 0.0; for (i = 0; i < num_steps; ++i) { x = (i + .5) * step; sum[slot] += 4.0 / (1. + x * x); } sum[slot] *= step; stop_clock = __rdtsc(); clock_time = (double)(stop_clock - start_clock) / (3 * BILLION); computation_times[id][j] = clock_time; // (double)(stop.tv_sec - start.tv_sec) + (double)(stop.tv_usec - start.tv_usec) / MILLION; } printf("\n%d", j); for(i = 0; i < 2; ++i) { printf("\t%f", computation_times[i][j]); } } printf("\n"); double avg = 0.0; for(j = 0; j < slots - 1; ++j) { computation_times[0][j] += computation_times[1][j]; avg += computation_times[0][j]; } avg /= slots - 1; int marker = 0; for(j = 0; j < slots - 1; ++j) { if(computation_times[0][j] / avg < 0.8) { if (marker > slots / 4) { return j - marker; } marker = j; } } return 0; }
void hal::os::Clock::ReadCounters(s64 & tsc, s64 & pc) { u64 rdtsc_a, rdtsc_b; LARGE_INTEGER pca; int count = 0; again: rdtsc_a = __rdtsc(); ::NtQueryPerformanceCounter(&pca, 0); rdtsc_b = __rdtsc(); if (rdtsc_b - rdtsc_a > 100000 && count++ < 5 ) goto again; tsc = s64((rdtsc_a + rdtsc_b)/2ULL); pc = s64(pca.QuadPart); }
void SystemTaskWrapper::Process( void ) { // Call the function, and figure out how long it took. i64 counter = __rdtsc(); pSystemTask->Update( deltaTime ); counter = __rdtsc() - counter; // Log this job's time in instrumentation. Singletons::Instrumentation.CaptureJobCounterTicks( pSystemTask->GetSystemType(), counter ); }
unsigned __int64 timestamp_precision() { unsigned __int64 pc_freq, pc_start, pc_end, tsc_start, tsc_end; ::QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER *>(&pc_freq)); ::QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&pc_start)); tsc_start = __rdtsc(); for (volatile int i = 0; i < 10000000; ++i) { } tsc_end = __rdtsc(); ::QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&pc_end)); return pc_freq * (tsc_end - tsc_start) / (pc_end - pc_start); }
unsigned int __stdcall methread1( void* ) { g_me1.WaitForAny( INFINITE ); g_me_tick = __rdtsc(); for( DWORD i = 0; i < g_rounds; ++i ) { g_me2.SetEvent( i % g_event_count ); g_me1.WaitForAny( INFINITE ); } g_me_tick = __rdtsc() - g_me_tick; _tprintf( _T("multiple event test result : %lld\n"), g_me_tick ); return 0; }
unsigned int __stdcall mcthread1( void* ) { g_mc1.WaitForAny( INFINITE ); g_mc_tick = __rdtsc(); for( DWORD i = 0; i < g_rounds; ++i ) { g_mc2.SetCondition( i % g_event_count ); g_mc1.WaitForAny( INFINITE ); } g_mc_tick = __rdtsc() - g_mc_tick; _tprintf( _T("multiple condition test result: %lld\n"), g_mc_tick ); return 0; }
VOID NTAPI KeStallExecutionProcessor(ULONG MicroSeconds) { ULONG64 StartTime, EndTime; /* Get the initial time */ StartTime = __rdtsc(); /* Calculate the ending time */ EndTime = StartTime + KeGetPcr()->StallScaleFactor * MicroSeconds; /* Loop until time is elapsed */ while (__rdtsc() < EndTime); }
void ResetCallTreeData() { if( ThreadIdHashTable == nullptr ) { return; // there's no call tree data captured by the profiler yet, we're done } EnterCriticalSection(&gCriticalSection); int registers[4]; __cpuid(registers, 0); DWORD64 TimeNow = __rdtsc(); ThreadIdHashTable->ResetCounters(TimeNow); LeaveCriticalSection(&gCriticalSection); DialogAllocator.FreeBlocks(); // free all the memory allocated by the DialogAllocator CaptureCallTreeThreadArrayPointer = nullptr; CaptureCallTreeThreadArraySize = 0; ListViewRowSelectedFunctions = -1; ListViewRowSelectedParentFunctions = -1; ListViewRowSelectedChildrenFunctions = -1; PostMessage(ghDialogWnd, WM_DISPLAYCALLTREEDATA, 0, 0); }
uint32_t HiResTime(void) /* return the current value of time stamp counter */ { #if defined(HI_RES_CLK_OK) uint32_t x[2]; #if defined(__BORLANDC__) #define COMPILER_ID "BCC" __emit__(0x0F,0x31); /* RDTSC instruction */ _asm { mov x[0],eax }; #elif defined(_MSC_VER) #define COMPILER_ID "MSC" #if defined(_MSC_VER) // && defined(_M_X64) x[0] = (uint32_t) __rdtsc(); #else _asm { _emit 0fh }; _asm { _emit 031h }; _asm { mov x[0],eax }; #endif #elif defined(__MINGW_H) || defined(__GNUC__) #define COMPILER_ID "GCC" asm volatile("rdtsc" : "=a"(x[0]), "=d"(x[1])); #else #error "HI_RES_CLK_OK -- but no assembler code for this platform (?)" #endif return x[0]; #else /* avoid annoying MSVC 9.0 compiler warning #4720 in ANSI mode! */ #if (!defined(_MSC_VER)) || (!defined(__STDC__)) || (_MSC_VER < 1300) FatalError("No support for RDTSC on this CPU platform\n"); #endif return 0; #endif /* defined(HI_RES_CLK_OK) */ }
void init_timing(void) { unsigned long long cycles; LARGE_INTEGER ll; lock_thread_to_core(); cycles = __rdtsc(); Sleep(1000); cycles = __rdtsc() - cycles; unlock_thread_from_core(); seconds_per_cycle = 1.0 / (double)cycles; QueryPerformanceFrequency(&ll); seconds_per_tick = 1.0 / (double)ll.QuadPart; QueryPerformanceCounter(&ll); start = seconds_per_tick * ll.QuadPart; set_timing_seconds(); }
/* RDTSC from Scott Duplichan */ static ulong64 TIMFUNC(void) { #if defined __GNUC__ #if defined(__i386__) || defined(__x86_64__) unsigned long long a; __asm__ __volatile__("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n":: "m"(a):"%eax", "%edx"); return a; #else /* gcc-IA64 version */ unsigned long result; __asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory"); while (__builtin_expect((int) result == -1, 0)) __asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory"); return result; #endif // Microsoft and Intel Windows compilers #elif defined _M_IX86 __asm rdtsc #elif defined _M_AMD64 return __rdtsc(); #elif defined _M_IA64 #if defined __INTEL_COMPILER #include <ia64intrin.h> #endif return __getReg(3116); #else #error need rdtsc function for this build #endif }
HRESULT CStunMessageBuilder::AddRandomTransactionId(StunTransactionId* pTransId) { StunTransactionId transid; uint32_t stun_cookie_nbo = htonl(STUN_COOKIE); uint32_t entropy=0; // on x86, the rdtsc instruction is about as good as it gets for a random sequence number // on linux, there's /dev/urandom #ifdef _WIN32 // on windows, there's lots of simple stuff we can get at to give us a random number // the rdtsc instruction is about as good as it gets uint64_t clock = __rdtsc(); entropy ^= (uint32_t)(clock); #else // on linux, /dev/urandom should be sufficient { int randomfile = ::open("/dev/urandom", O_RDONLY); if (randomfile >= 0) { int readret = read(randomfile, &entropy, sizeof(entropy)); UNREFERENCED_VARIABLE(readret); ASSERT(readret > 0); close(randomfile); } } if (entropy == 0) { entropy ^= getpid(); entropy ^= reinterpret_cast<uintptr_t>(this); entropy ^= time(NULL); entropy ^= AtomicIncrement(&g_sequence_number); } #endif srand(entropy); // the first four bytes of the transaction id is always the magic cookie // followed by 12 bytes of the real transaction id memcpy(transid.id, &stun_cookie_nbo, sizeof(stun_cookie_nbo)); for (int x = 4; x < (STUN_TRANSACTION_ID_LENGTH-4); x++) { transid.id[x] = (uint8_t)(rand() % 256); } if (pTransId) { *pTransId = transid; } return AddTransactionId(transid); }
void CColosseumCtrl::DoPropExchange(CPropExchange* pPX) { ExchangeVersion(pPX, MAKELONG(_wVerMinor, _wVerMajor)); COleControl::DoPropExchange(pPX); // TODO: Call PX_ functions for each persistent custom property. PX_String(pPX, _T("server"), m_server, _T("http://localhost:2222/Service1.svc")); //PX_Long(pPX, _T("File"), m_fileNumber); parseParameters(std::string((LPCSTR)m_server)); ///Create a temporary file using a unique timestamp TCHAR temp_path[MAX_PATH]; ///Get the temp path DWORD retValue = GetTempPath(MAX_PATH, temp_path); //If the returned number is greater than the number of MAX_PATH then stop execution if(retValue > MAX_PATH) ASSERT(1==0); std::stringstream ss; unsigned __int64 time_stamp; /* Initialize the file streams in the endpoint model vector*/ for(size_t i = 0; i < m_endpointModelVector.size(); i++) { time_stamp = __rdtsc(); ss << temp_path << "temp" << time_stamp + i << ".ifc"; m_endpointModelVector[i]->setFileName(ss.str()); m_endpointModelVector[i]->openFile(ss.str()); ss.str(""); } }
PixelPipeline::~PixelPipeline() { wait_for_workers(); #if defined(WIN32) && defined(PROFILE_PIPELINE) profiler.end_time = __rdtsc(); #endif event_stop.set(); for (std::vector<Thread>::size_type i = 0; i < worker_threads.size(); i++) worker_threads[i].join(); for (size_t i = 0; i < queue_max; i++) { delete command_queue[i]; command_queue[i] = 0; } if (cur_block && cur_block->refcount == 1) delete[] (char*) cur_block; #if defined(WIN32) && defined(PROFILE_PIPELINE) MessageBoxA(0, cl_format("Queue = %1\r\nSetEvent = %2\r\nWaitForWorkers = %3\r\nWaitForSpace = %4\r\nAllocFree = %5", (int)(profiler.queue_time*100/(profiler.end_time-profiler.start_time)), (int)(profiler.set_event_time*100/(profiler.end_time-profiler.start_time)), (int)(profiler.wait_for_workers_time*100/(profiler.end_time-profiler.start_time)), (int)(profiler.wait_for_space_time*100/(profiler.end_time-profiler.start_time)), (int)(profiler.alloc_time*100/(profiler.end_time-profiler.start_time))).c_str(), "DEBUG", MB_OK); #endif }
Random::Random() { s_randCleanup.Inited = true; Int64 ticks = DateTime::UtcNow().Ticks; ::RAND_add(&ticks, sizeof ticks, 2); #if UCFG_CPU_X86_X64 Int64 tsc = __rdtsc(); ::RAND_add(&tsc, sizeof tsc, 4); #endif #ifdef _WIN32 Int64 cnt = System.PerformanceCounter; ::RAND_add(&cnt, sizeof cnt, 4); #endif #ifdef WIN32 typedef BOOLEAN (APIENTRY *PFN_PRNG)(void*, ULONG); DlProcWrap<PFN_PRNG> pfnPrng("advapi32.dll", "SystemFunction036"); //!!! Undocumented if (pfnPrng) { byte buf[32]; if (pfnPrng(buf, sizeof buf)) ::RAND_add(&buf, sizeof buf, sizeof buf/2); } #endif }
void round(T arr[], size_t size){ min = ULLONG_MAX; for (int i = 0; i < 10; i++){ start = __rdtsc(); for (int i = 0; i < size; i++){ arr[i] = (arr[i] >= 0) ? (T)(int)(arr[i] + 0.5) : (T)(int)(arr[i] - 0.5); } finish = __rdtsc(); if (finish - start < min){ min = finish - start; } } printf("round#time for %d elements: %d\n", size, min); }