int ti_threadgroup_fork(ti_threadgroup_t *tg, int16_t ext_tid, void **bcast_val) { if (tg->tid_map[ext_tid] == 0) { tg->envelope = bcast_val ? *bcast_val : NULL; cpu_sfence(); tg->forked = 1; tg->group_sense = tg->thread_sense[0]->sense; // if it's possible that threads are sleeping, signal them if (tg->sleep_threshold) { uv_mutex_lock(&tg->alarm_lock); uv_cond_broadcast(&tg->alarm); uv_mutex_unlock(&tg->alarm_lock); } } else { // spin up to threshold cycles (count sheep), then sleep uint64_t spin_cycles, spin_start = rdtsc(); while (tg->group_sense != tg->thread_sense[tg->tid_map[ext_tid]]->sense) { if (tg->sleep_threshold) { spin_cycles = rdtsc() - spin_start; if (spin_cycles >= tg->sleep_threshold) { uv_mutex_lock(&tg->alarm_lock); if (tg->group_sense != tg->thread_sense[tg->tid_map[ext_tid]]->sense) { uv_cond_wait(&tg->alarm, &tg->alarm_lock); } uv_mutex_unlock(&tg->alarm_lock); spin_start = rdtsc(); continue; } } cpu_pause(); } cpu_lfence(); if (bcast_val) *bcast_val = tg->envelope; } return 0; }
int main(int argc, char* argv[]) { char b; int bits1[48]; char word[100]; //allocate huge pages long an = 1024*1024*1024; char* a = (char*)mmap(NULL, an, PROT_READ | PROT_WRITE, FLAGS, 0, 0); if (a==MAP_FAILED) { printf("Error: could not allocate huge pages\n"); return 0; } int i=0; for (i=0; i<an; i++) { //fill the array a[i]=0; } for (i=0; i<1000000; i++) { int ra = rand() % an; long low; long high; rdtsc(low, high); b = a[ra]; long t = low; rdtsc(low,high); long tiempo = low-t; printf("%ld %p\n", tiempo, &a[ra]); } }
static double getFrequencyInMHz() { struct timeval tvstart, tvstop; unsigned long long int cycles[2]; gettimeofday(&tvstart, NULL); cycles[0] = rdtsc(); gettimeofday(&tvstart, NULL); usleep(250000); gettimeofday(&tvstop, NULL); cycles[1] = rdtsc(); gettimeofday(&tvstop, NULL); const unsigned long microseconds = ((tvstop.tv_sec-tvstart.tv_sec)*1000000) + (tvstop.tv_usec-tvstart.tv_usec); unsigned long mhz = (unsigned long) (cycles[1]-cycles[0]) / microseconds; //std::cout << "MIC frequency is " << mhz << " MHz" << std::endl; return (double)mhz; }
uint64_t timer_get_tsc(void) { uint64_t time_now; time_now = rdtsc(); if (!gd->arch.tsc_base) gd->arch.tsc_base = time_now; return time_now - gd->arch.tsc_base; }
void Handler ( int sig, siginfo_t * extra, void *cruft ) { static int last_i=0; unsigned int i, j; rdtsc(i,j); QString strInfo( "time:%1, %2, [%3] %4HZ "); strInfo.arg( "%1", j ).arg( "%2", i ).arg( "%3", i - last_i ).arg( "%4", ( i - last_i ) * 10 / 1000000 ); qDebug( ) << strInfo; last_i = i; }
/* It takes less memory since subkeys are not stored */ void PRESENT80bitslice16_cipher_(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY80], u64 ciphertext[BITSLICE16_P]) { PRESENT_init(); #ifdef MEASURE_PERF key_schedule_start = 0; #endif #ifdef MEASURE_PERF key_schedule_start = 0; #endif #ifdef MEASURE_PERF encrypt_start = rdtsc(); #endif PRESENT_CORE_ENCRYPT_AND_KEY_SCHED(BITSLICE16_P, 80); #ifdef MEASURE_PERF encrypt_end = rdtsc(); #endif return; }
// Send a message to all the cores // The message id will be msg_id void IPC_sendToAll(int msg_size, char msg_id) { uint64_t cycle_start, cycle_stop; char *msg; if (msg_size < MIN_MSG_SIZE) { msg_size = MIN_MSG_SIZE; } msg = (char*) malloc(GET_MALLOC_SIZE(sizeof(char) * msg_size)); if (!msg) { perror("IPC_sendToAll allocation error! "); exit(errno); } // malloc is lazy: the pages may not be really allocated yet. // We force the allocation and the fetch of the pages with bzero bzero(msg, msg_size); msg[0] = msg_id; #ifdef DEBUG printf( "[producer %i] going to send message %i of size %i to %i recipients\n", core_id, msg_long[0], msg_size, nb_receivers); #endif int sent = 0; while (sent < msg_size) { rdtsc(cycle_start); sent += sendto(sock, msg, msg_size, 0, (struct sockaddr*) &multicast_addr, sizeof(multicast_addr)); rdtsc(cycle_stop); nb_cycles_send += cycle_stop - cycle_start; } free(msg); }
uint64_t testloop1() { int i, warmup = niters; uint64_t start, end, result = 0; for(i = 0; i < niters + warmup; i++) { if(want_cache_flush) { flush_cache(); } start = rdtsc(); /* Perform access multiple times */ runloop(0, narrays, 0, nitems, 1); end = rdtsc(); if( i >= warmup ) { result += end - start; } } return result; }
void wtime(double *t) { uint64_t tsc; static uint64_t sec = 0; // struct timeval tv; // gettimeofday(&tv, (void *)0); tsc = rdtsc(); if (sec == 0) sec = tsc; *t = (tsc - sec); }
/*! Calculate elapsed time (delta). \return reference to this object */ const IntervalTimer& Calculate() { #if defined USE_RDTSC && defined HAVE_RDTSC Tickval::ticks now(rdtsc()); #else Tickval now(true); #endif delta_ = now - startTime_; return *this; }
static void store_initial_timestamp(void) { /* On Cougar Point we have two 32bit scratchpad registers available: * D0:F0 0xdc (SKPAD) * D31:F2 0xd0 (SATA SP) */ tsc_t tsc = rdtsc(); pci_write_config32(PCI_DEV(0, 0x00, 0), 0xdc, tsc.lo); pci_write_config32(PCI_DEV(0, 0x1f, 2), 0xd0, tsc.hi); }
int main() { u64 t1, t2; t1 = rdtsc(); t2 = rdtsc(); printf("rdtsc latency %u\n", (unsigned)(t2 - t1)); test_wrtsc(0); test_wrtsc(100000000000ull); if (check_cpuid_80000001_edx(CPUID_80000001_EDX_RDTSCP)) { test_rdtscp(0); test_rdtscp(10); test_rdtscp(0x100); } else printf("rdtscp not supported\n"); return report_summary(); }
static void causePagefaults(const char *path) { uint64_t start,end; uint64_t total = 0; uint64_t min = ULLONG_MAX, max = 0; int fd = -1; if(path) { fd = open(path,O_RDONLY); if(fd < 0) { printe("Unable to open '%s'",path); return; } } for(int j = 0; j < TEST_COUNT; ++j) { volatile char *addr = mmap(NULL,MAP_SIZE * PAGE_SIZE,path ? MAP_SIZE * PAGE_SIZE : 0, PROT_READ | PROT_WRITE,MAP_PRIVATE,fd,0); if(!addr) { printe("mmap failed"); return; } for(size_t i = 0; i < MAP_SIZE; ++i) { start = rdtsc(); *(addr + i * PAGE_SIZE) = 0; end = rdtsc(); uint64_t duration = end - start; if(duration < min) min = duration; if(duration > max) max = duration; total += duration; } if(munmap((void*)addr) != 0) printe("munmap failed"); } if(path) close(fd); printf("%-30s: %Lu cycles average\n",path ? path : "NULL",total / (TEST_COUNT * MAP_SIZE)); printf("%-30s: %Lu cycles minimum\n",path ? path : "NULL",min); printf("%-30s: %Lu cycles maximum\n",path ? path : "NULL",max); }
double omp_get_wtime (void) { double ret; ret = (double) (rdtsc() - start_tsc) / ((double) get_cpufreq() * 1000000.0); //printf("CPU frequency: %d MHz\n", get_cpufreq()); return ret; }
errval_t timing_sync_timer(void) { #if defined(__x86_64__) || defined(__i386__) uint64_t tscperms; errval_t err = sys_debug_get_tsc_per_ms(&tscperms); assert(err_is_ok(err)); // Exponential backoff loop for(uint64_t time_offset = MIN_DELAY_MS; time_offset <= MAX_DELAY_MS; time_offset *= 2) { uint64_t synctime = rdtsc() + tscperms * time_offset; int waitfor = 0; received = 0; error = SYS_ERR_OK; for(int i = 0; i < MAX_CPUS; i++) { struct intermon_binding *b = NULL; err = intermon_binding_get(i, &b); if(err_no(err) == MON_ERR_NO_MONITOR_FOR_CORE) { continue; } assert(err_is_ok(err)); err = b->tx_vtbl.rsrc_timer_sync(b, NOP_CONT, synctime); assert(err_is_ok(err)); waitfor++; } err = invoke_monitor_sync_timer(synctime); if(err_is_fail(err)) { error = err; } // Collect success/failure replies while(received < waitfor) { messages_wait_and_handle_next(); } if(err_is_fail(error)) { if(err_no(error) != SYS_ERR_SYNC_MISS) { return error; } } else { break; } } return error; #else printf("Phase-locked local clocks not supported on this platform!\n"); return SYS_ERR_OK; #endif }
void calc_processor_speed() { qword_t cpu_cycles; #if defined(CONFIG_X86_APICTIMER) dword_t bus_cycles; #endif #if defined(CONFIG_DEBUG_TRACE_INIT) printf("calculating processor speed...\n"); #endif #if defined(CONFIG_X86_APICTIMER) /* set timer to divide-1 mode and reload with a large value */ setup_apic_timer(0xFFFFFFFF); #endif wait_for_second_tick(); #if defined(CONFIG_X86_APICTIMER) bus_cycles = get_local_apic(X86_APIC_TIMER_CURRENT); #endif cpu_cycles = rdtsc(); wait_for_second_tick(); #if defined(CONFIG_X86_APICTIMER) bus_cycles -= get_local_apic(X86_APIC_TIMER_CURRENT); #endif cpu_cycles = rdtsc() - cpu_cycles; kernel_info_page.processor_frequency = (dword_t) cpu_cycles; #if defined(CONFIG_X86_APICTIMER) kernel_info_page.bus_frequency = bus_cycles; #endif #if defined(CONFIG_DEBUG_TRACE_INIT) printf("cpu speed: %d Hz\n", kernel_info_page.processor_frequency); #if defined(CONFIG_X86_APICTIMER) printf("bus speed: %d Hz\n", kernel_info_page.bus_frequency); #endif #endif #if defined(CONFIG_X86_APICTIMER) setup_apic_timer(kernel_info_page.bus_frequency / (1000000/TIME_QUANTUM)); #endif }
/* Piccolo80: two plaintexts and two keys as input */ void Piccolo80vperm_cipher(const u64 plaintext_in[VPERM_P], const u16 keys_in[VPERM_P][KEY80], u64 ciphertext_out[VPERM_P]){ /* Key schedule: subkeys are of size 2*264 bytes */ __attribute__ ((aligned (16))) u8 subkeys[VPERM_P * Piccolo80_SUBKEYS_SIZE]; /* 128-bit aligned buffers for xmm memory load */ __attribute__ ((aligned (16))) u8 keys[VPERM_P * KEY80 * sizeof(u16)]; __attribute__ ((aligned (16))) u8 plaintext[VPERM_P * sizeof(u64)]; __attribute__ ((aligned (16))) u8 ciphertext[VPERM_P * sizeof(u64)]; /* Copy the input to the aligned buffers */ memcpy(plaintext, plaintext_in, sizeof(plaintext)); memcpy(keys, keys_in, sizeof(keys)); #ifdef AVX /* Be sure to never enter the 'C' state when mixing VEX and non-VEX code * (see http://www.agner.org/optimize/microarchitecture.pdf, 9.12) */ asm("vzeroupper"); #endif #ifdef MEASURE_PERF key_schedule_start = rdtsc(); #endif /* Compute the subkeys */ Piccolo80vperm_key_schedule(keys, subkeys); #ifdef MEASURE_PERF key_schedule_end = rdtsc(); #endif #ifdef MEASURE_PERF encrypt_start = rdtsc(); #endif /* Call the core encryption */ Piccolo80vperm_core(plaintext, subkeys, ciphertext); #ifdef MEASURE_PERF encrypt_end = rdtsc(); #endif /* Copy back the result */ memcpy(ciphertext_out, ciphertext, sizeof(ciphertext)); return; }
/* LED128 vperm: two plaintexts and two keys as input */ void LED128vperm_cipher(const u64 plaintext_in[VPERM_P], const u16 keys_in[VPERM_P][KEY128], u64 ciphertext_out[VPERM_P]) { /* 128-bit aligned buffers for xmm memory load */ __attribute__ ((aligned (16))) u8 subkeys[VPERM_P * KEY128 * sizeof(u16)]; __attribute__ ((aligned (16))) u8 keys[VPERM_P * KEY128 * sizeof(u16)]; __attribute__ ((aligned (16))) u8 plaintext[VPERM_P * sizeof(u64)]; __attribute__ ((aligned (16))) u8 ciphertext[VPERM_P * sizeof(u64)]; /* Copy the input to the aligned buffers */ memcpy(plaintext, plaintext_in, sizeof(plaintext)); memcpy(keys, keys_in, sizeof(keys)); #ifdef AVX /* Be sure to never enter the 'C' state when mixing VEX and non-VEX code * (see http://www.agner.org/optimize/microarchitecture.pdf, 9.12) */ asm("vzeroupper"); #endif /* The key schedule does merely nothing ... */ #ifdef MEASURE_PERF key_schedule_start = 0; #endif LED128vperm_key_schedule(keys, subkeys); #ifdef MEASURE_PERF key_schedule_end = 0; #endif #ifdef MEASURE_PERF encrypt_start = rdtsc(); #endif /* There is *NO* keyschedule for LED */ /* Call the core encryption */ LED128vperm_core(plaintext, subkeys, ciphertext); #ifdef MEASURE_PERF encrypt_end = rdtsc(); #endif /* Copy back the result */ memcpy(ciphertext_out, ciphertext, sizeof(ciphertext)); return; }
int main(int argc, char * argv[]){ long long int size, i, x, n; unsigned long long start, stop; FILE *f1, *f2, *f3, *f4; f1 = fopen("zapis","w"); f2 = fopen("odczyt","w"); f3 = fopen("zapisLosowy","w"); f4 = fopen("odczytLosowy","w"); for(size = (1 << 2); size < (1 << 28); size <<= 1){ for(i=0; i<size; i++) tabIdx[i] = i; rdtsc(); start = RDTSC; for(i=0; i<size; ++i) tab[i] += 3; rdtsc(); stop = RDTSC; fprintf(f1,"%lld ",size); fprintf(f1,"%llu\n",(stop-start)/size); start = RDTSC; for(i=0; i<size; ++i) x = tab[i]; rdtsc(); stop = RDTSC; fprintf(f2,"%lld ",size); fprintf(f2,"%llu\n",(stop-start)/size); generateRandTab(tabIdx, size); rdtsc(); start = RDTSC; for(i=0; i<size; ++i) tab[tabIdx[i]] += 3; rdtsc(); stop = RDTSC; fprintf(f3,"%lld ",size); fprintf(f3,"%llu\n",(stop-start)/size); rdtsc(); start= RDTSC; for(i=0; i<size; ++i) x = tab[tabIdx[i]]; rdtsc(); stop = RDTSC; fprintf(f4,"%lld ",size); fprintf(f4,"%llu\n",(stop-start)/size); } fclose(f1); fclose(f2); fclose(f3); fclose(f4); }
// Send a message to all the cores // The message id will be msg_id void IPC_sendToAll(int msg_size, char msg_id) { #ifdef COMPUTE_CYCLES uint64_t cycle_start, cycle_stop; #endif char *msg; int msg_pos_in_ring_buffer; if (msg_size < MIN_MSG_SIZE) { msg_size = MIN_MSG_SIZE; } msg = mpsoc_alloc(msg_size, &msg_pos_in_ring_buffer); if (!msg) { perror("mpsoc_alloc error! "); exit(errno); } bzero(msg, msg_size); msg[0] = msg_id; #ifdef DEBUG printf( "[producer %i] going to send message %i of size %i to %i recipients\n", core_id, msg_long[0], msg_size, nb_receivers); #endif #ifdef COMPUTE_CYCLES rdtsc(cycle_start); #endif mpsoc_sendto(msg, msg_size, msg_pos_in_ring_buffer, -1); #ifdef COMPUTE_CYCLES rdtsc(cycle_stop); nb_cycles_send += cycle_stop - cycle_start; #endif }
int main (int argc, char *argv[]) { int r; /* Récupération arguments */ int size = atoi(argv[1]); int rept = atoi(argv[2]); srand(0); float *a = malloc(size * size * sizeof *a); /*Initialize*/ initialize(size,(float (*)[size]) a); /* Warmup */ printf("Valeur s = %f\n",kernel(size, (float (*)[size]) a)); /* Stockage des résultats */ float results[rept]; float sum = 0.0; float denominateur; float numerateur; /* Répétitions */ for (r=0; r<rept; r++){ //printf("%d\n",r); uint64_t t1 = rdtsc(); kernel(size, (float (*)[size]) a); uint64_t t2 = rdtsc(); denominateur = t2-t1; numerateur = size*size; //printf("%.6f\n", denominateur/numerateur); results[r]=denominateur/numerateur; sum+=denominateur/numerateur; } //printf("%d\n",r); /* Affichage performance */ qsort(results, rept, sizeof(float), &cmpfunc); printf("moy = %.6f\n", (float)sum/(float)rept); printf("min = %.6f\n", results[0]); printf("max = %.6f\n", results[rept-1]); printf("med = %.6f\n", results[rept/2]); return 0; }
void polybench_timer_stop() { #ifndef POLYBENCH_CYCLE_ACCURATE_TIMER polybench_t_end = rtclock (); #else polybench_c_end = rdtsc (); #endif #ifdef POLYBENCH_LINUX_FIFO_SCHEDULER polybench_linux_standard_scheduler (); #endif }
/*! Reset the interval start time. \return the old delta as double */ double Reset() { #if defined USE_RDTSC && defined HAVE_RDTSC const Tickval::ticks curr(delta_); startTime_ = rdtsc(); #else const double curr(AsDouble()); startTime_.now(); #endif return curr; }
int __init tsc_get_speed(void) { u32 tsc[2]; u32 tsc_s[2]; u32 tsc_e[2]; outb(0xb0, 0x43); outb((i8254_RELOAD_COUNT) & 0xFF, 0x42); outb((i8254_RELOAD_COUNT >> 8) & 0xFF, 0x42); rdtsc(tsc_s[0], tsc_s[1]); while(!(inb(0x61) & 0x20)); rdtsc(tsc_e[0], tsc_e[1]); sub_64_64(tsc, tsc_e, tsc_s); return (tsc[0]); }
int main(void) { /* NO SYSCALLS BETWEEN HERE AND RDTSC: next event for * replay must be rdtsc */ rdtsc(); breakpoint2(); atomic_printf("Write syscall...\n"); breakpoint3(); atomic_puts("EXIT-SUCCESS"); return 0; }
char* generarNombreRandom(){ char* nombreAleatorio; nombreAleatorio = string_new(); int numero; srand(rdtsc()); //semilla para crear el nro random numero = rand(); nombreAleatorio = string_itoa(numero); return nombreAleatorio; }
void tests_mm_reconf() { unsigned myid = my_cpu_info()->cpu_id; void *p_buffer = 0; size_t size = 16*KB; // 4 pages, fits into L1$ void membench() { unsigned i, j; uint64_t t1, t2; volatile uint32_t *p = (volatile uint32_t*)p_buffer; t1 = rdtsc(); for (i=0; i<4096; i++) { for (j=0; j<size; j += 4) { p[10]++; } } t2 = rdtsc(); printf("membench: %u tics/access\n", (t2-t1)/4096/(size/4)); }
// This function is called whenever new data arrives for client void handle_data_arrived(char *payload, size_t data_len) { volatile uint8_t *b =(uint8_t *)payload; if (read_incoming) { for (int i = 0; i< data_len; i++) { acc += (b[i]); } } if (is_server) { return; } #if TRACE_ONLY_LLNET trace_event(TRACE_SUBSYS_LLNET, TRACE_EVENT_LLNET_APPRX, 0); #endif // TRACE_ONLY_LLNET // record completion time cycles_t tsc = rdtsc(); cycles_t result[1] = { tsc - sent_at, }; if (bench_ctl_add_run(bench_ctl, result)) { uint64_t tscperus = tscperms / 1000; printf("cycles per us %"PRIu64"\n", tscperus); // Output our results bench_ctl_dump_csv_bincounting(bench_ctl, 0, 100, 9 * tscperus, 25 * tscperus, out_prefix, tscperus); bench_ctl_dump_analysis(bench_ctl, 0, out_prefix, tscperus); // bench_ctl_dump_csv(bench_ctl, out_prefix, tscperus); #if TRACE_ONLY_LLNET trace_event(TRACE_SUBSYS_LLNET, TRACE_EVENT_LLNET_STOP, 0); size_t trsz = trace_dump(trbuf, sizeof(trbuf) - 1, NULL); trbuf[trsz] = 0; printf("\n\n\n\nTrace results:\n%s\n\n\n", trbuf); #endif // TRACE_ONLY_LLNET bench_ctl_destroy(bench_ctl); terminate_benchmark(); printf("pkt content some is %zd\n", acc); return; } start_next_iteration(); } // end function: handle_data_arrived
int main(){ long start, end, total; double each; puts(" --- [ Benchmarking Thread Creation ] --- "); total = 0; pthread_t td; for (int i = 0; i < TIMES; i++){ start = rdtsc(); pthread_create(&td, NULL, run_thread, NULL); end = rdtsc(); total += end - start; // pthread_join(td, NULL); } each = total * 1.0 / TIMES ; printf(" Time to run %d iteration : %ld \n", TIMES, total); printf(" Average Thread Creation : %f \n", each); }
unsigned long long prof_start(int task) { #ifdef BIGPROF task_stack[depth]=task; #endif if (depth<9) depth++; else elog("depth overflow (%d)!",task); return rdtsc(); }