static void do_test_sync_cmpxchg(void) { int ret; unsigned long flags; unsigned int i; cycles_t time1, time2, time; u32 rem; local_irq_save(flags); preempt_disable(); time1 = get_cycles(); for (i = 0; i < NR_LOOPS; i++) { #ifdef CONFIG_X86_32 ret = sync_cmpxchg(&test_val, 0, 0); #else ret = cmpxchg(&test_val, 0, 0); #endif } time2 = get_cycles(); local_irq_restore(flags); preempt_enable(); time = time2 - time1; printk(KERN_ALERT "test results: time for locked cmpxchg\n"); printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); printk(KERN_ALERT "total time: %llu\n", time); time = div_u64_rem(time, NR_LOOPS, &rem); printk(KERN_ALERT "-> locked cmpxchg takes %llu cycles\n", time); printk(KERN_ALERT "test end\n"); }
static void do_testbaseline(void) { unsigned long flags; unsigned int i; cycles_t time1, time2, time; u32 rem; local_irq_save(flags); preempt_disable(); time1 = get_cycles(); for (i = 0; i < NR_LOOPS; i++) { asm volatile (""); } time2 = get_cycles(); local_irq_restore(flags); preempt_enable(); time = time2 - time1; printk(KERN_ALERT "test results: time for baseline\n"); printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); printk(KERN_ALERT "total time: %llu\n", time); time = div_u64_rem(time, NR_LOOPS, &rem); printk(KERN_ALERT "-> baseline takes %llu cycles\n", time); printk(KERN_ALERT "test end\n"); }
static void do_test_int(void) { unsigned long flags; unsigned int i; cycles_t time1, time2, time; u32 rem; local_irq_save(flags); preempt_disable(); time1 = get_cycles(); for (i = 0; i < NR_LOOPS; i++) { local_irq_restore(flags); local_irq_save(flags); } time2 = get_cycles(); local_irq_restore(flags); preempt_enable(); time = time2 - time1; printk(KERN_ALERT "test results: time for disabling/enabling interrupts (STI/CLI)\n"); printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); printk(KERN_ALERT "total time: %llu\n", time); time = div_u64_rem(time, NR_LOOPS, &rem); printk(KERN_ALERT "-> enabling/disabling interrupts (STI/CLI) takes %llu cycles\n", time); printk(KERN_ALERT "test end\n"); }
static void do_test_inc(void) { int ret; unsigned long flags; unsigned int i; cycles_t time1, time2, time; u32 rem; local_t loc_val; local_irq_save(flags); preempt_disable(); time1 = get_cycles(); for (i = 0; i < NR_LOOPS; i++) { ret = local_add_return(10, &loc_val); } time2 = get_cycles(); local_irq_restore(flags); preempt_enable(); time = time2 - time1; printk(KERN_ALERT "test results: time for non locked add return\n"); printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); printk(KERN_ALERT "total time: %llu\n", time); time = div_u64_rem(time, NR_LOOPS, &rem); printk(KERN_ALERT "-> non locked add return takes %llu cycles\n", time); printk(KERN_ALERT "test end\n"); }
void characterization(void) { int x,y,match; cycles_t t1, delta, prev; for(y=100; y<100000; y = y+100){ match = 0; prev = 0; for(x=0;x<1000;x++){ t1 = get_cycles(); calibration_modulate_data(y); // modulate_data(y); delta = (get_cycles() -t1); if( delta == prev ) match++; else{ prev = delta; match = 0; } if(match >10){ fprintf(stderr,"%d, %Ld %f\n",y, delta, (float)delta / y); break; } else usleep(1); } if(match == 0){ fprintf(stderr,"Convergence STOP \n"); return; } } }
static void __timer_delay(unsigned long cycles) { cycles_t start = get_cycles(); while ((get_cycles() - start) < cycles) cpu_relax(); }
void GpsimProcessor::executeNext() { if ( !m_bIsRunning ) return; if ( !m_bCanExecuteNextCycle ) { m_bCanExecuteNextCycle = true; return; } unsigned long long beforeExecuteCount = get_cycles().get(); if(get_bp().have_interrupt()) { m_pPicProcessor->interrupt(); } else { m_pPicProcessor->step_one(false); // Don't know what the false is for; gpsim ignores its value anyway // Some instructions take more than one cycle to execute, so ignore next cycle if this was the case if ( (get_cycles().get() - beforeExecuteCount) > 1 ) m_bCanExecuteNextCycle = false; } currentDebugger()->checkForBreak(); // Let's also update the values of RegisterInfo every 25 milliseconds if ( (beforeExecuteCount % 10000) == 0 ) registerMemory()->update(); }
/** * Generic version. * * It is not worth optimizing for the Warp case since this is only * called from silabs_reset for warped boxes. See fpga_wait_for_ciar. * * We do not use KeQueryPerformanceCounter on windows. From the docs: * It is not intended for measuring elapsed time, for computing * stalls or waits, or for iterations. */ int wait_for_ciar(PDEVICE_EXTENSION pdx) { #ifdef CONFIG_X86_TSC /* Roughly 1ms */ unsigned long long start = get_cycles(); while ((fpga_read(pdx->bar0, BAR0_STAT_LINES) & 0x1000) == 0) { if ((get_cycles() - start) > cpu_khz) { PK_PRINT("ERROR: wait_for_ciar timeout\n"); return 1; } cpu_relax(); } return 0; #else int count = 0; /* 35 seems to be max count */ while ((fpga_read(pdx->bar0, BAR0_STAT_LINES) & 0x1000) == 0) { if (++count > 1000) { PK_PRINT("ERROR: wait_for_ciar timeout\n"); return 1; } ndelay(100); } return 0; #endif }
void calibrated_ldelay(cycles_t loops) { cycles_t t1, t2, error; t1 = get_cycles(); while(get_cycles() - t1 < loops ); #if 0 /* * Here we compensate for the loop itself. * In order to keep it simple we do the following: * t1 -> t2 == LPJ delay loop * t2 -> t1 == Loop RTT overhead */ t1 = 0; t2 = 0; error = 0; do{ t1 = get_cycles(); if(!t2) t2 = t1; error += t1 - t2; /* Measure t2 -> t1 == Loop RTT overhead */ __ldelay(LPJ_MAX_RESOLUTION); t2 = get_cycles(); error += t2 - t1; /* Measure t1 -> t2 == LPJ delay loop */ }while(error < loops*2); #endif }
static cycles_t test_response(void) { cycles_t now, end; unsigned char in; int timeout = 0; local_irq_disable(); in = inb(0x379); inb(0x378); outb(0x08, 0x378); now = get_cycles(); while(1) { if (inb(0x379) != in) break; if (timeout++ > 1000000) { outb(0x00, 0x378); local_irq_enable(); return 0; } } end = get_cycles(); outb(0x00, 0x378); local_irq_enable(); return end - now; }
static void CIA_calctimers (void) { long ciaatimea = -1, ciaatimeb = -1, ciabtimea = -1, ciabtimeb = -1; eventtab[ev_cia].oldcycles = get_cycles (); if ((ciaacra & 0x21) == 0x01) { ciaatimea = (DIV10 - div10) + DIV10 * ciaata; } if ((ciaacrb & 0x61) == 0x01) { ciaatimeb = (DIV10 - div10) + DIV10 * ciaatb; } if ((ciabcra & 0x21) == 0x01) { ciabtimea = (DIV10 - div10) + DIV10 * ciabta; } if ((ciabcrb & 0x61) == 0x01) { ciabtimeb = (DIV10 - div10) + DIV10 * ciabtb; } eventtab[ev_cia].active = (ciaatimea != -1 || ciaatimeb != -1 || ciabtimea != -1 || ciabtimeb != -1); if (eventtab[ev_cia].active) { unsigned long int ciatime = ~0L; if (ciaatimea != -1) ciatime = ciaatimea; if (ciaatimeb != -1 && ciaatimeb < ciatime) ciatime = ciaatimeb; if (ciabtimea != -1 && ciabtimea < ciatime) ciatime = ciabtimea; if (ciabtimeb != -1 && ciabtimeb < ciatime) ciatime = ciabtimeb; eventtab[ev_cia].evtime = ciatime + get_cycles (); } events_schedule(); }
ActionSkBuff pfq_run(SkBuff skb, struct pfq_computation_tree *prg) { #ifdef PFQ_LANG_PROFILE static uint64_t nrun, total; uint64_t stop, start; #endif #ifdef PFQ_LANG_PROFILE start = get_cycles(); skb = #else return #endif pfq_bind(skb, prg->entry_point); #ifdef PFQ_LANG_PROFILE stop = get_cycles(); total += (stop-start); if ((nrun++ % 1048576) == 0) printk(KERN_INFO "[PFQ] PFQ/lang run: %llu_tsc.\n", total/nrun); return skb; #endif }
static notrace cycle_t bfin_read_cycles(struct clocksource *cs) { #ifdef CONFIG_CPU_FREQ return __bfin_cycles_off + (get_cycles() << __bfin_cycles_mod); #else return get_cycles(); #endif }
/* * Calculate the number of cycles/second. */ void calc_cps() { uint64_t end, start = get_cycles(); sleep(1); end = get_cycles(); cps = end-start; printf("Cycles/sec: %"PRIu64"\n", cps); }
static void CIA_calctimers (void) { long int ciaatimea = -1, ciaatimeb = -1, ciabtimea = -1, ciabtimeb = -1; eventtab[ev_cia].oldcycles = get_cycles (); if ((ciaacra & 0x21) == 0x01) { ciaatimea = (DIV10 - div10) + DIV10 * ciaata; } if ((ciaacrb & 0x61) == 0x41) { /* Timer B will not get any pulses if Timer A is off. */ if (ciaatimea >= 0) { /* If Timer A is in one-shot mode, and Timer B needs more than * one pulse, it will not underflow. */ if (ciaatb == 0 || (ciaacra & 0x8) == 0) { /* Otherwise, we can determine the time of the underflow. */ /* This may overflow, however. So just ignore this timer and use the fact that we'll call CIA_handler for the A timer. */ #if 0 ciaatimeb = ciaatimea + ciaala * DIV10 * ciaatb; #endif } } } if ((ciaacrb & 0x61) == 0x01) { ciaatimeb = (DIV10 - div10) + DIV10 * ciaatb; } if ((ciabcra & 0x21) == 0x01) { ciabtimea = (DIV10 - div10) + DIV10 * ciabta; } if ((ciabcrb & 0x61) == 0x41) { /* Timer B will not get any pulses if Timer A is off. */ if (ciabtimea >= 0) { /* If Timer A is in one-shot mode, and Timer B needs more than * one pulse, it will not underflow. */ if (ciabtb == 0 || (ciabcra & 0x8) == 0) { /* Otherwise, we can determine the time of the underflow. */ #if 0 ciabtimeb = ciabtimea + ciabla * DIV10 * ciabtb; #endif } } } if ((ciabcrb & 0x61) == 0x01) { ciabtimeb = (DIV10 - div10) + DIV10 * ciabtb; } eventtab[ev_cia].active = (ciaatimea != -1 || ciaatimeb != -1 || ciabtimea != -1 || ciabtimeb != -1); if (eventtab[ev_cia].active) { unsigned long int ciatime = ~0L; if (ciaatimea != -1) ciatime = ciaatimea; if (ciaatimeb != -1 && ciaatimeb < ciatime) ciatime = ciaatimeb; if (ciabtimea != -1 && ciabtimea < ciatime) ciatime = ciabtimea; if (ciabtimeb != -1 && ciabtimeb < ciatime) ciatime = ciabtimeb; eventtab[ev_cia].evtime = ciatime + get_cycles (); } events_schedule(); }
/** * The normal Linux udelay doesn't work for delays greater than 1ms * * @param usec */ static void _mdelay(uint64_t msec) { uint64_t start = get_cycles(); uint64_t delay = (1000 * msec) * mips_hpt_frequency / 1000000; do { } while (get_cycles() < start + delay); }
inline void __delay(unsigned long loops) { cycles_t start; start = get_cycles(); do { cpu_relax(); } while ((get_cycles() - start) < loops); }
/** * wait for a delay expressed in cycles_t, cycles of CPU * **/ static inline void cycles_delay(cycles_t delay) { cycles_t now = get_cycles(); cycles_t future = now + delay; /* can overflow and wrap arround */ if (future < now) { // if wrapped arround wait till now <= future while (get_cycles() > future) ; } while (get_cycles() < future) ; }
/* * TSC-warp measurement loop running on both CPUs: */ static __cpuinit void check_tsc_warp(void) { cycles_t start, now, prev, end; int i; rdtsc_barrier(); start = get_cycles(); rdtsc_barrier(); /* * The measurement runs for 20 msecs: */ end = start + tsc_khz * 20ULL; now = start; for (i = 0; ; i++) { /* * We take the global lock, measure TSC, save the * previous TSC that was measured (possibly on * another CPU) and update the previous TSC timestamp. */ __raw_spin_lock(&sync_lock); prev = last_tsc; rdtsc_barrier(); now = get_cycles(); rdtsc_barrier(); last_tsc = now; __raw_spin_unlock(&sync_lock); /* * Be nice every now and then (and also check whether * measurement is done [we also insert a 10 million * loops safety exit, so we dont lock up in case the * TSC readout is totally broken]): */ if (unlikely(!(i & 7))) { if (now > end || i > 10000000) break; cpu_relax(); touch_nmi_watchdog(); } /* * Outside the critical section we can now see whether * we saw a time-warp of the TSC going backwards: */ if (unlikely(prev > now)) { __raw_spin_lock(&sync_lock); max_warp = max(max_warp, prev - now); nr_warps++; __raw_spin_unlock(&sync_lock); } } WARN(!(now-start), "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", now-start, end-start); }
void compensated_timer(void) { cycles_t before,delta; while(1){ before = get_cycles(); calibrated_timer(MONOTONIC_PULSE_CYCLE, &carrier_ts); delta = get_cycles() - before; fprintf(stderr," %Lu\n", delta); } }
void aes_speed_test() { unsigned int key_sz=128;//256; unsigned char pt[16]; memcpy(pt,"0123456789ABCDEF",16); unsigned char ct[16]; unsigned char *key=(unsigned char *)"0123456789ABCDEF01234566789ABCDEF"; unsigned char key_expansion[(14+1)*4*4]; aes_key_expansion( key_sz, key_expansion, key, 1 ); unsigned const nTimes=100; unsigned const nBlocks=100; uint64_t cycles_start, cycles_end; std::vector<uint64_t> runtimes(nTimes); for (unsigned j = 0; j < nTimes; ++j) { cycles_start = get_cycles(); for (unsigned i = 0; i < nBlocks; ++i) { aes_encrypt( key_sz, key_expansion, pt, ct ); } cycles_end = get_cycles(); runtimes[j] = cycles_end - cycles_start; } std::sort(runtimes.begin(),runtimes.end()); printf("AES-128 Encryption Cycles per byte: %.2f\n",double(runtimes[nTimes/2])/double(nBlocks*16)); for (unsigned j = 0; j < nTimes; ++j) { cycles_start = get_cycles(); for (unsigned i = 0; i < nBlocks; ++i) { aes_decrypt( key_sz, key_expansion, pt, ct ); } cycles_end = get_cycles(); runtimes[j] = cycles_end - cycles_start; } std::sort(runtimes.begin(), runtimes.end()); printf("AES-128 Decryption Cycles per byte: %.2f\n",double(runtimes[nTimes/2])/double(nBlocks*16)); }
void calibrated_tx(void) { int ret; cycles_t t1, t2, t3, delta; while(1){ /* Here we mark the beginning of the cycle */ t1 = get_cycles(); fprintf(stderr,"%Lu\n", t1-t3); /* * Then we sleep for a period of time define as: * [(NSEC_PERIOD - TIMER_JITTER) - ((NSEC_PERIOD - TIMER_JITTER) + TIMER_JITTER)] */ ret = clock_nanosleep(CLOCK_MONOTONIC, TIMER_RELTIME, &carrier_ts, NULL); if(ret) DIE("clock_nanosleep"); /* Calculater the real length of the sleep */ delta = (get_cycles() - t1)/2; /* * If the sleep period is larger than the fundamental period (the timmer * jitter is larger than TIMER_JITTER ) then error out * TODO We need to ripple that value to the next sample */ if(delta > MONOTONIC_PULSE_CYCLE){ fprintf(stderr,"#"); return; } /* * Then we trigger the padding LPJ to reach the exact value of * MONOTONIC_PULSE_CYCLE */ calibrated_ldelay(MONOTONIC_PULSE_CYCLE - delta); /* * Then we mark the time after the whole cycle * t2 - t1 should be very close to MONOTONIC_PULSE_CYCLE */ t2 = get_cycles(); /* * Then we start TX ops * REMEMBER that we can be interrupted at any point in time * so the fundamental TX algo must be time adjusted as well */ calibrated_stream_tx(128, data); //WE need to prob back that jitter to the top of the loop t3 = get_cycles(); fprintf(stderr,"%Lu\n", t3-t1); } }
/*---------------------------------------------------------------------------*/ static int on_response(struct xio_session *session, struct xio_msg *msg, int last_in_rxq, void *cb_user_context) { struct thread_data *tdata = (struct thread_data *)cb_user_context; cycles_t rtt = (get_cycles()-(cycles_t)msg->user_context); if (tdata->do_stat) { if (rtt > tdata->stat.max_rtt) tdata->stat.max_rtt = rtt; if (rtt < tdata->stat.min_rtt) tdata->stat.min_rtt = rtt; tdata->stat.tot_rtt += rtt; tdata->stat.ccnt++; } tdata->rx_nr++; /* message is no longer needed */ xio_release_response(msg); if (tdata->disconnect) { if (tdata->rx_nr == tdata->tx_nr) xio_disconnect(tdata->conn); else msg_pool_put(tdata->pool, msg); return 0; } /* reset message */ msg->in.header.iov_len = 0; vmsg_sglist_set_nents(&msg->in, 0); msg->user_context = (void *)get_cycles(); if (xio_send_request(tdata->conn, msg) == -1) { if (xio_errno() != EAGAIN) printf("**** [%p] Error - xio_send_request " \ "failed %s\n", session, xio_strerror(xio_errno())); msg_pool_put(tdata->pool, msg); return 0; } if (tdata->do_stat) tdata->stat.scnt++; tdata->tx_nr++; return 0; }
int main() { cycles_t c1, c2; for(;;) { c1 = get_cycles(); sleep(1); c2 = get_cycles(); printf("begin: %ld\n", c1); printf("end: %ld\n", c2); } }
/* * Try to calibrate the TSC against the Programmable * Interrupt Timer and return the frequency of the TSC * in kHz. * * Return ULONG_MAX on failure to calibrate. */ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) { u64 tsc, t1, t2, delta; unsigned long tscmin, tscmax; int pitcnt; /* Set the Gate high, disable speaker */ outb((inb(0x61) & ~0x02) | 0x01, 0x61); /* * Setup CTC channel 2* for mode 0, (interrupt on terminal * count mode), binary count. Set the latch register to 50ms * (LSB then MSB) to begin countdown. */ outb(0xb0, 0x43); outb(latch & 0xff, 0x42); outb(latch >> 8, 0x42); tsc = t1 = t2 = get_cycles(); pitcnt = 0; tscmax = 0; tscmin = ULONG_MAX; while ((inb(0x61) & 0x20) == 0) { t2 = get_cycles(); delta = t2 - tsc; tsc = t2; if ((unsigned long) delta < tscmin) tscmin = (unsigned int) delta; if ((unsigned long) delta > tscmax) tscmax = (unsigned int) delta; pitcnt++; } /* * Sanity checks: * * If we were not able to read the PIT more than loopmin * times, then we have been hit by a massive SMI * * If the maximum is 10 times larger than the minimum, * then we got hit by an SMI as well. */ if (pitcnt < loopmin || tscmax > 10 * tscmin) return ULONG_MAX; /* Calculate the PIT value */ delta = t2 - t1; do_div(delta, ms); return delta; }
void calibrate_lstream(void) { int ret, x; cycles_t before,delta; x=0; while(1){ before = get_cycles(); calibrated_ldelay(MONOTONIC_PULSE_CYCLE); delta = get_cycles() - before; fprintf(stderr," %Lu\n", delta); if(!(x%100)) fprintf(stderr, "."); } }
static int test_cipher_cycles(struct blkcipher_desc *desc, int enc, struct scatterlist *sg, int blen) { unsigned long cycles = 0; int ret = 0; int i; local_bh_disable(); local_irq_disable(); /* Warm-up run. */ for (i = 0; i < 4; i++) { if (enc) ret = crypto_blkcipher_encrypt(desc, sg, sg, blen); else ret = crypto_blkcipher_decrypt(desc, sg, sg, blen); if (ret) goto out; } /* The real thing. */ for (i = 0; i < 8; i++) { cycles_t start, end; start = get_cycles(); if (enc) ret = crypto_blkcipher_encrypt(desc, sg, sg, blen); else ret = crypto_blkcipher_decrypt(desc, sg, sg, blen); end = get_cycles(); if (ret) goto out; cycles += end - start; } out: local_irq_enable(); local_bh_enable(); if (ret == 0) printk("1 operation in %lu cycles (%d bytes)\n", (cycles + 4) / 8, blen); return ret; }
static int interval_tree_test_init(void) { int i, j; unsigned long results; cycles_t time1, time2, _time; printk(KERN_ALERT "interval tree insert/remove"); prandom_seed_state(&rnd, 3141592653589793238ULL); init(); time1 = get_cycles(); for (i = 0; i < PERF_LOOPS; i++) { for (j = 0; j < NODES; j++) interval_tree_insert(nodes + j, &root); for (j = 0; j < NODES; j++) interval_tree_remove(nodes + j, &root); } time2 = get_cycles(); _time = time2 - time1; _time = div_u64(_time, PERF_LOOPS); printk(" -> %llu cycles\n", (unsigned long long)time); printk(KERN_ALERT "interval tree search"); for (j = 0; j < NODES; j++) interval_tree_insert(nodes + j, &root); time1 = get_cycles(); results = 0; for (i = 0; i < SEARCH_LOOPS; i++) for (j = 0; j < SEARCHES; j++) results += search(queries[j], &root); time2 = get_cycles(); _time = time2 - time1; _time = div_u64(_time, SEARCH_LOOPS); results = div_u64(results, SEARCH_LOOPS); printk(" -> %llu cycles (%lu results)\n", (unsigned long long)time, results); return -EAGAIN; /* Fail will directly unload the module */ }
static void compute_passed_time (void) { unsigned long int ccount = (get_cycles () - eventtab[ev_cia].oldcycles + div10); unsigned long int ciaclocks = ccount / DIV10; ciaata_passed = ciaatb_passed = ciabta_passed = ciabtb_passed = 0; /* CIA A timers */ if ((ciaacra & 0x21) == 0x01) { assert ((ciaata+1) >= ciaclocks); ciaata_passed = ciaclocks; } if ((ciaacrb & 0x61) == 0x01) { assert ((ciaatb+1) >= ciaclocks); ciaatb_passed = ciaclocks; } /* CIA B timers */ if ((ciabcra & 0x21) == 0x01) { assert ((ciabta+1) >= ciaclocks); ciabta_passed = ciaclocks; } if ((ciabcrb & 0x61) == 0x01) { assert ((ciabtb+1) >= ciaclocks); ciabtb_passed = ciaclocks; } }
void event2_newevent_xx (int no, evt t, uae_u32 data, evfunc2 func) { evt et; static int next = ev2_misc; et = t + get_cycles (); if (no < 0) { no = next; for (;;) { if (!eventtab2[no].active) break; if (eventtab2[no].evtime == et && eventtab2[no].handler == func && eventtab2[no].data == data) break; no++; if (no == ev2_max) no = ev2_misc; if (no == next) { write_log (_T("out of event2's!\n")); return; } } next = no; } eventtab2[no].active = true; eventtab2[no].evtime = et; eventtab2[no].handler = func; eventtab2[no].data = data; MISC_handler (); }