int test2() { short x[N]; short y[N]; int i, k, ret; unsigned int before, after; unsigned int time[M]; for (i = 0; i < N; i++) { x[i] = 1; y[i] = 1; } for (k = 0; k < M; k++) { before = cycles(); ret = dot_asm(x, y, N); after = cycles(); time[k] = after - before; } printf("Test 2: data in external memory, outboard cycles function\n"); printf(" ret = %d: run time:\n ", ret); for (k = 0; k < M; k++) printf("%u ", time[k]); printf("\n"); return ret; }
cycle_t test_ucontext_cycle( cycle_t ov) { ctx::stack_allocator alloc; ::getcontext( & uc); uc.uc_stack.ss_sp = static_cast< char * >( alloc.allocate(ctx::default_stacksize() ) ) - ctx::default_stacksize(); uc.uc_stack.ss_size = ctx::default_stacksize(); ::makecontext( & uc, f2, 7); // cache warum-up BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~) cycle_t start( cycles() ); BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~) cycle_t total( cycles() - start); // we have two jumps and two measuremt-overheads total -= ov; // overhead of measurement total /= BOOST_PP_LIMIT_MAG; // per call total /= 2; // 2x jump_to c1->c2 && c2->c1 return total; }
int test1() { short x[N]; short y[N]; int i, k, ret; unsigned int before, after; unsigned int time[M]; for (i = 0; i < N; i++) { x[i] = 1; y[i] = 1; } for (k = 0; k < M; k++) { before = cycles(); ret = dot_generic(x, y, N); after = cycles(); time[k] = after - before; } printf("Test 1: Vanilla C\n"); printf(" ret = %d: run time:\n ", ret); for (k = 0; k < M; k++) printf("%u ", time[k]); printf("\n"); return ret; }
u4byte k_cycles(const u4byte key_len, AESREF alg, const enum dir_flag f) { u1byte key[32]; u4byte i, cy0, cy1, cy2, c1, c2; // set up a random key of 256 bits block_rndfill(key, 32); // do an set_key to remove any 'first time through' effects alg.set_key(key, key_len, f); c1 = c2 = 0xffffffff; for(i = 0; i < loops; ++i) { block_rndfill(key, 32); // time one and two encryptions cycles(&cy0); alg.set_key(key, key_len, f); cycles(&cy1); alg.set_key(key, key_len, f); alg.set_key(key, key_len, f); cycles(&cy2); cy2 -= cy1; cy1 -= cy0; // time for one and two calls c1 = (c1 > cy1 ? cy1 : c1); // find minimum values over the loops c2 = (c2 > cy2 ? cy2 : c2); } return c2 - c1; // return one call timing }
void delay(int millisecs) { uint64_t r, t; if(millisecs <= 0) millisecs = 1; cycles(&r); for(t = r + (sys->cyclefreq*millisecs)/1000ull; r < t; cycles(&r)) ; }
cycle_type measure_cycles_void( cycle_type overhead) { boost::coroutines2::coroutine< void >::pull_type c{ fn }; cycle_type start( cycles() ); for ( std::size_t i = 0; i < jobs; ++i) { c(); } cycle_type total = cycles() - start; total -= overhead; // overhead of measurement total /= jobs; // loops total /= 2; // 2x jump_fcontext return total; }
cycle_type measure_cycles( cycle_type overhead) { stack_allocator stack_alloc; cycle_type start( cycles() ); for ( std::size_t i = 0; i < jobs; ++i) { coro_type::call_type c( fn, boost::coroutines::attributes( unwind_stack, preserve_fpu), stack_alloc); } cycle_type total = cycles() - start; total -= overhead; // overhead of measurement total /= jobs; // loops return total; }
cycle_type measure_cycles_fc() { // cache warum-up boost::context::jump_fcontext( & fcm, fc, 7, preserve_fpu); cycle_type start( cycles() ); for ( std::size_t i = 0; i < jobs; ++i) { boost::context::jump_fcontext( & fcm, fc, 7, preserve_fpu); } cycle_type total = cycles() - start; total -= overhead_cycle(); // overhead of measurement total /= jobs; // loops total /= 2; // 2x jump_fcontext return total; }
/* go to user space */ void kexit(Ureg* u) { Proc *up = externup(); uint64_t t; Tos *tos; Mach *mp; /* * precise time accounting, kernel exit * initialized in exec, sysproc.c */ tos = (Tos*)(USTKTOP-sizeof(Tos)); cycles(&t); tos->kcycles += t - up->kentry; tos->pcycles = up->pcycles; tos->pid = up->pid; if (up->ac != nil) mp = up->ac; else mp = machp(); tos->core = mp->machno; tos->nixtype = mp->NIX.nixtype; //_pmcupdate(m); /* * The process may change its core. * Be sure it has the right cyclefreq. */ tos->cyclefreq = mp->cyclefreq; /* thread local storage */ wrmsr(FSbase, up->tls); }
void trap(Ureg *ureg) { int user; ulong opc, cp; user = userureg(ureg); if(user){ if(up == nil) panic("user trap: up=nil"); up->dbgreg = ureg; cycles(&up->kentry); } switch(ureg->type){ case PsrMund: ureg->pc -= 4; if(user){ spllo(); if(okaddr(ureg->pc, 4, 0)){ opc = *(ulong*)ureg->pc; if((opc & 0x0f000000) == 0x0e000000 || (opc & 0x0e000000) == 0x0c000000){ cp = opc >> 8 & 15; if(cp == 10 || cp == 11){ mathtrap(ureg, opc); break; } } } postnote(up, 1, "sys: trap: invalid opcode", NDebug); break; }
void qunlock(QLock *q) { Proc *p; uint64_t t0; if(!canlock(&q->use)){ cycles(&t0); lock(&q->use); slockstat(getcallerpc(&q), t0); } if (q->locked == 0) print("qunlock called with qlock not held, from %#p\n", getcallerpc(&q)); p = q->head; if(p){ q->head = p->qnext; if(q->head == 0) q->tail = 0; unlock(&q->use); q->pc = p->qpc; ready(p); return; } q->locked = 0; q->pc = 0; unlock(&q->use); }
void runlock(RWlock *q) { Proc *p; uint64_t t0; if(!canlock(&q->use)){ cycles(&t0); lock(&q->use); slockstat(getcallerpc(&q), t0); } p = q->head; if(--(q->readers) > 0 || p == nil){ unlock(&q->use); return; } /* start waiting writer */ if(p->state != QueueingW) panic("runlock"); q->head = p->qnext; if(q->head == 0) q->tail = 0; q->writer = 1; unlock(&q->use); ready(p); }
cycle_type measure_cycles_void( cycle_type overhead) { boost::coroutines::asymmetric_coroutine< void >::pull_type c( fn_void, boost::coroutines::attributes( preserve_fpu) ); cycle_type start( cycles() ); for ( std::size_t i = 0; i < jobs; ++i) { c(); } cycle_type total = cycles() - start; total -= overhead; // overhead of measurement total /= jobs; // loops total /= 2; // 2x jump_fcontext return total; }
ulong _profout(void) { Plink *p; ulong arg; vlong t; arg = _savearg(); p = _tos->prof.pp; if (p == nil || (_tos->prof.pid != 0 && _tos->pid != _tos->prof.pid)) return arg; /* Not our process */ switch(_tos->prof.what){ case Profkernel: /* Add proc cycles on proc entry */ p->time = p->time + _tos->pcycles; goto proftime; case Profuser: /* Subtract kernel cycles on proc entry */ p->time = p->time - _tos->kcycles; /* fall through */ case Proftime: proftime: /* Add cycle counter on proc entry */ cycles((uvlong*)&t); p->time = p->time + t; break; case Profsample: p->time = p->time + _tos->clock; break; } _tos->prof.pp = p->old; return arg; }
void unlock(Lock *l) { Proc *up = externup(); uint64_t x; if(LOCKCYCLES){ cycles(&x); l->lockcycles = x - l->lockcycles; if(l->lockcycles > maxlockcycles){ maxlockcycles = l->lockcycles; maxlockpc = l->_pc; } } if(l->key == 0) print("unlock: not locked: pc %#p\n", getcallerpc()); if(l->isilock) print("unlock of ilock: pc %#p, held by %#p\n", getcallerpc(), l->_pc); if(l->p != up) print("unlock: up changed: pc %#p, acquired at pc %#p, lock p %#p, unlock up %#p\n", getcallerpc(), l->_pc, l->p, up); l->m = nil; l->key = 0; coherence(); if(up && adec(&up->nlocks) == 0 && up->delaysched && islo()){ /* * Call sched if the need arose while locks were held * But, don't do it from interrupt routines, hence the islo() test */ sched(); } }
inline long long test(Object* (*make)(int), int (*match)(Object*)) { size_t a = 0; // Accumulator to make sure compiler doesn't take some loop invariants out size_t j = 0; // Incremental number for the current path/object combination. Ensures all path get tested. std::vector<long long> medians(K); // Final verdict of medians for each of the K experiments std::vector<Object*> objects(N); std::vector<long long> timings(M); for (size_t k = 0; k < K; ++k) { for (size_t n = 0; n < N; ++n) objects[n] = make(j++); for (size_t m = 0; m < M; ++m) { time_stamp liStart = get_time_stamp(); for (size_t i = 0; i < N; ++i) a += match(objects[i]); time_stamp liFinish = get_time_stamp(); timings[m] = liFinish-liStart; } for (size_t n = 0; n < N; ++n) delete objects[n]; medians[k] = display("test", timings); // We are looking for a median per N iterations } std::sort(medians.begin(), medians.end()); return cycles(medians[K/2])/N; }
/* go to user space */ void kexit(Ureg* u) { Mach *m = machp(); uint64_t t; Tos *tos; Mach *mp; /* * precise time accounting, kernel exit * initialized in exec, sysproc.c */ tos = (Tos*)(USTKTOP-sizeof(Tos)); cycles(&t); tos->kcycles += t - m->externup->kentry; tos->pcycles = m->externup->pcycles; tos->pid = m->externup->pid; if (m->externup->ac != nil) mp = m->externup->ac; else mp = m; tos->core = mp->machno; tos->nixtype = mp->nixtype; //_pmcupdate(m); /* * The process may change its core. * Be sure it has the right cyclefreq. */ tos->cyclefreq = mp->cyclefreq; }
void procrestore(Proc *p) { uvlong t; cycles(&t); p->pcycles -= t; }
void procsave(Proc *p) { uvlong t; cycles(&t); p->pcycles += t; }
cycle_t test_function_cycle( cycle_t ov) { boost::function< void() > fn( boost::bind( f3) ); // cache warum-up BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FUNCTION, ~) cycle_t start( cycles() ); BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FUNCTION, ~) cycle_t total( cycles() - start); // we have two jumps and two measuremt-overheads total -= ov; // overhead of measurement total /= BOOST_PP_LIMIT_MAG; // per call total /= 2; // 2x jump_to c1->c2 && c2->c1 return total; }
inline std::ostream& operator<<(std::ostream& os, const verdict& r) { long long v = r.vis_time; long long m = r.mat_time; if (XTL_UNLIKELY(v <= 0 || m <= 0)) return os << "ERROR: Insufficient timer resolution. Increase number of iterations N"; else if (XTL_UNLIKELY(v <= m)) return os << std::setw(3) << int(m*100/v-100) << "% slower" << " V=" << std::setw(3) << cycles(v)/N << " M=" << std::setw(3) << cycles(m)/N; else return os << std::setw(3) << int(v*100/m-100) << "% faster" << " V=" << std::setw(3) << cycles(v)/N << " M=" << std::setw(3) << cycles(m)/N; }
/* * set mach dependent process state for a new process */ void procsetup(Proc* p) { fpusysprocsetup(p); cycles(&p->kentry); p->pcycles = -p->kentry; }
void ilock(Lock *l) { Proc *up = externup(); Mpl pl; uintptr_t pc; uint64_t t0; pc = getcallerpc(); lockstats.locks++; pl = splhi(); if(TAS(&l->key) != 0){ cycles(&t0); lockstats.glare++; /* * Cannot also check l->pc, l->m, or l->isilock here * because they might just not be set yet, or * (for pc and m) the lock might have just been unlocked. */ for(;;){ lockstats.inglare++; splx(pl); while(l->key) ; pl = splhi(); if(TAS(&l->key) == 0){ if(l != &waitstatslk) addwaitstat(pc, t0, WSlock); goto acquire; } } } acquire: machp()->ilockdepth++; if(up) up->lastilock = l; l->pl = pl; l->_pc = pc; l->p = up; l->isilock = 1; l->m = machp(); if(LOCKCYCLES) cycles(&l->lockcycles); }
cycle_type measure_cycles_ec() { boost::context::execution_context ctx( boost::context::execution_context::current() ); // cache warum-up boost::context::fixedsize_stack alloc; boost::context::execution_context ectx( std::allocator_arg, alloc, bar); ectx( & ctx); cycle_type start( cycles() ); for ( std::size_t i = 0; i < jobs; ++i) { ectx( & ctx); } cycle_type total = cycles() - start; total -= overhead_cycle(); // overhead of measurement total /= jobs; // loops total /= 2; // 2x jump_fcontext return total; }
int test0(void) { int k; unsigned int before, after; unsigned int time[M]; for (k = 0; k < M; k++) { before = cycles(); after = cycles(); time[k] = after - before; } printf("Test 0: Cycles only\n"); for (k = 0; k < M; k++) printf("%u ", time[k]); printf("\n"); return time[k]; }
uvlong tscticks(uvlong *hz) { if(hz != nil) *hz = m->cpuhz; cycles(&m->tscticks); /* Uses the rdtsc instruction */ return m->tscticks; }
std::string DHT22::message() { std::stringstream ss; ss << "Hum: "<< humidity()<<" Temp: "<<temperature() << " Total Errors: "<<errors() << " Error Rate: "<<error_rate() << " Cycle: "<<cycles()<<std::endl; return ss.str(); }
Array facet_cycles(Halffacet hf){ vector<Halffacet_cycle> hfcs; Halffacet_cycle hfc; for (hfc = hf->facet_cycles_begin(); hfc != hf->facet_cycles_end(); ++hfc){ hfcs.push_back(hfc); } Array cycles(hfcs.begin(), hfcs.end()); return cycles; }
void procrestore(Proc *p) { uvlong t; if(p->kp) return; cycles(&t); p->pcycles -= t; }
inline long long display(const char* name, std::vector<long long>& timings) { long long min, max, avg, med, dev; statistics(timings, min, max, avg, med, dev); // Get statistics from timings std::fstream file; file.open((std::string(name)+".csv").c_str(), std::fstream::out | std::fstream::app); if (file) { #if !defined(_MSC_VER) || _MSC_VER >= 1600 // This will convert timings into cycles per iteration std::transform( timings.begin(), timings.end(), std::ostream_iterator<long long>(file, ", "), [](long long t) { return cycles(t)/N; } ); #endif file << "End" << std::endl; } file.close(); std::cout << name << " Time: [" << std::setw(4) << microseconds(min) << " --" << std::setw(5) << microseconds(avg) << "/" << std::setw(4) << microseconds(med) << " --" << std::setw(5) << microseconds(max) << "] Dev = " << std::setw(4) << microseconds(dev) #if defined(XTL_TIMING_METHOD_1) || defined(XTL_TIMING_METHOD_2) << " Cycles/iteration: [" << std::setw(4) << cycles(min)/N << " --" << std::setw(5) << cycles(avg)/N << "/" << std::setw(4) << cycles(med)/N << " --" << std::setw(5) << cycles(max)/N << "]" #endif << std::endl; return med; }