void runTest(StateMachine *machine, int isTableMachine) { FILE *file; int buffSize, len; char *buff; double rate; Timer t; MachineStats stats; int is_heavy, last_idx_in_root; double uncommonRate; stats.totalFailures = 0; stats.totalGotos = 0; file = fopen(TEST_INPUT, "rb"); if (!file) { fprintf(stderr, "Error opening file for reading\n"); exit(1); } fseek(file, 0L, SEEK_END); buffSize = ftell(file); fseek(file, 0L, SEEK_SET); buff = (char*)malloc(sizeof(char) * buffSize); if (buff == NULL) { fprintf(stderr, "Error allocating memory for buffer\n"); exit(1); } len = fread(buff, sizeof(char), buffSize, file); if (len != buffSize) { fprintf(stderr, "Error reading data from file\n"); exit(1); } t.micros = 0; if (isTableMachine) { startTiming(&t); matchTableMachine((TableStateMachine*)machine, NULL, FALSE, buff, buffSize, 1, NULL, NULL, NULL, NULL, &is_heavy, &last_idx_in_root, &uncommonRate); endTiming(&t); } else { startTiming(&t); match(machine, buff, buffSize, 0, &stats, 0, 0); endTiming(&t); } rate = GET_TRANSFER_RATE(buffSize, &t); printf("Time(micros)\tData(No H)\tData(w/ H)\tRate(No H) Mb/s\tRate (w/ H) Mb/s\n"); printf("%8ld\t%9d\t%9d\t%5.4f\t%5.4f\n", t.micros, buffSize, buffSize, rate, rate); free(buff); fclose(file); }
void render() { // Render frame if (prepared) { startTiming(); if (animating) { if (animStart > 0.0f) { animStart -= 0.15f * (1.0f / frameTimer); } if ((animate) & (animStart <= 0.0f)) { timer += 0.5f * (1.0f / frameTimer); if (timer > 1.0) { timer -= 1.0f; } } updateUniformBuffers(); } draw(); endTiming(); } }
void run() { while (!_done) { if (_process) { startTiming(_viewer, otherThreadTimeName); //------------------------------------------------------------ // Your processing goes here. // Do nothing for the specified number of milliseconds, just so we can // see it in the stats. osg::Timer_t startTick = osg::Timer::instance()->tick(); while (osg::Timer::instance()->delta_m(startTick, osg::Timer::instance()->tick()) < _timeToRun) { OpenThreads::Thread::YieldCurrentThread(); } //------------------------------------------------------------ endTiming(_viewer, otherThreadTimeName); _process = false; } else { OpenThreads::Thread::microSleep(50); } } }
/// Will just sleep for the given number of milliseconds in the same thread /// as the caller, recording the time taken in the viewer's stats. void doSomethingAndTimeIt(osgViewer::Viewer& viewer, const std::string& name, double milliseconds) { startTiming(viewer, name); //------------------------------------------------------------ // Your processing goes here. // Do nothing for the specified number of milliseconds, just so we can // see it in the stats. osg::Timer_t startTick = osg::Timer::instance()->tick(); while (osg::Timer::instance()->delta_m(startTick, osg::Timer::instance()->tick()) < milliseconds) { OpenThreads::Thread::YieldCurrentThread(); } //------------------------------------------------------------ endTiming(viewer, name); }
void runBench(HASH_FUNCTION hash_fn, int numReplicas, int numNodes, int numKeys, int keySize) { char *hash = NULL; if(hash_fn == HASH_FUNCTION_MD5) hash = "MD5"; else if(hash_fn == HASH_FUNCTION_SHA1) hash = "SHA1"; printf("----------------------------------------------------\n"); printf("bench (%s): replicas = %d, nodes = %d, keys: %d, ring size: %d\n", hash, numReplicas, numNodes, numKeys, numReplicas * numNodes); printf("----------------------------------------------------\n"); hash_ring_t *ring = hash_ring_create(numReplicas, hash_fn); addNodes(ring, numNodes); uint8_t *keys = (uint8_t*)malloc(keySize * numKeys); generateKeys(keys, numKeys, keySize); printf("running...\r"); uint64_t min = 0; uint64_t max = 0; uint64_t total = 0; int times = 100; int x, y; for(y = 0; y < times; y++) { startTiming(); for(x = 0; x < numKeys; x++) { assert(hash_ring_find_node(ring, keys + (keySize * x), keySize) != NULL); } uint64_t result = endTiming(); if(result > max) max = result; if(min == 0 || result < min) min = result; total += result; } printf("stats: total = %.5fs, avg/lookup: %.5fus, min: %.5fus, max: %.5fus, ops/sec: %.0f\n", (double)total / 1000000000, (((double)(total / numKeys)) / 1000) / times, (double)min / numKeys / 1000, (double)max / numKeys / 1000, 1000000000 / ((double)(total / (numKeys * times)))); free(keys); hash_ring_free(ring); }
void render() { if (prepared) { startTiming(); if (animating) { // Update rotation state.rotation.y += 0.05f * frameTimer; if (state.rotation.y > 360.0f) { state.rotation.y -= 360.0f; } updateUniformBuffers(); } draw(); endTiming(); } }
void inspectDumpFile(const char *path, int repeat, StateMachine *machine, TableStateMachine *tableMachine, int isTableMachine, int verbose, int timing, int threads, int packets_to_steal, int dedicated_use_compressed, int work_group_size, int max_wgs, double *thresholds, int drop) { #else void inspectDumpFile(const char *path, int repeat, StateMachine *machine, int isTableMachine, int verbose, int timing, int threads) { #endif double /*rate,*/ combinedRate, threadRate;//, rateWithHeaders; Timer t; long size;//, sizeWithHeaders; int i, cpuid; #ifdef GLOBAL_TIMING GlobalTimerResult gtimer_result; int j; #ifdef PRINT_GLOBAL_TIMER_EVENTS GlobalTimerEvent **events; #endif #endif ScannerData *scanners; PacketReaderData reader; LinkedList *packet_queues; MulticoreManager manager; #ifdef COUNT_FAIL_PERCENT long totalFailures, totalGotos; #endif #ifdef PAPI if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) { fprintf(stderr, "Cannot init PAPI\n"); exit(1); } if (PAPI_thread_init((unsigned long (*)(void))pthread_self) != PAPI_OK) { fprintf(stderr, "Cannot init PAPI for threads\n"); exit(1); } #endif packet_queues = (LinkedList*)malloc(sizeof(LinkedList) * threads); scanners = (ScannerData*)malloc(sizeof(ScannerData) * threads); for (i = 0; i < threads; i++) { list_init(&packet_queues[i]); } packetreader_init(&reader, path, repeat, packet_queues, threads); for (i = 0; i < threads; i++) { #ifdef HYBRID_SCANNER scanner_init(&(scanners[i]), i, &manager, machine, tableMachine, isTableMachine, &packet_queues[i], verbose, drop); #else scanner_init(&(scanners[i]), i, &manager, machine, isTableMachine, &packet_queues[i], verbose); #endif } #ifdef HYBRID_SCANNER multicore_manager_init(&manager, scanners, threads, work_group_size, max_wgs, packets_to_steal, dedicated_use_compressed); multicore_manager_set_thresholds(&manager, thresholds); #else multicore_manager_init(&manager, scanners, threads, 1, threads, 0, 0); #endif packetreader_start(&reader); packetreader_join(&reader); #ifdef HYBRID_SCANNER multicore_manager_start(&manager); #endif #ifdef GLOBAL_TIMING #ifdef PRINT_GLOBAL_TIMER_EVENTS events = NULL; #endif global_timer_start(&(manager.gtimer)); #endif if (timing) { startTiming(&t); } for (i = 0; i < threads; i++) { // If CPUs are ordered [core0,core0,...,core0,core1,core1,...,core1,...] //cpuid = i; // If CPUs are ordered [core0,core1,...,coreN,core0,core1,...,coreN,...] cpuid = (i % 2 == 0) ? i / 2 : (threads + i) / 2; scanner_start_with_affinity(&(scanners[i]), cpuid); // If you use the next line, comment out the pthread_attr_destroy call in scanner_join!!! //scanner_start(&(scanners[i])); } for (i = 0; i < threads; i++) { scanner_join(&(scanners[i])); } // scanner_start(&(scanners[0])); // scanner_start(&(scanners[1])); // scanner_join(&(scanners[0])); // scanner_join(&(scanners[1])); if (timing) { endTiming(&t); } #ifdef GLOBAL_TIMING global_timer_end(&(manager.gtimer)); #endif #ifdef HYBRID_SCANNER multicore_manager_stop(&manager); multicore_manager_join(&manager); #endif #ifdef GLOBAL_TIMING global_timer_join(&(manager.gtimer)); global_timer_get_results(&(manager.gtimer), >imer_result); #endif if (timing) { //endTiming(&t); size = reader.size; //sizeWithHeaders = reader.sizeWithHeaders; //rate = GET_TRANSFER_RATE(size, &t); //rateWithHeaders = GET_TRANSFER_RATE(sizeWithHeaders, &t); // printf("Time(micros)\tData(No H)\tData(w/ H)\tRate(No H) Mb/s\tRate (w/ H) Mb/s\n"); //printf("%8ld\t%9ld\t%9ld\t%5.4f\t%5.4f\n", t.micros, size, sizeWithHeaders, rate, rateWithHeaders); printf("TOTAL_BYTES\tTotal data scanned: %ld bytes\n", size); //printf("TOTAL_TIME\tTotal time: %ld ms\n", t.micros); //printf("TOTAL_THRPT\tTotal throughput: %5.4f Mbps\n", rate); combinedRate = 0; printf("Alert mode timer: %ld us\n", manager.alert_mode_timer.micros); for (i = 0; i < threads; i++) { if (0 && manager.alert_mode_used) { threadRate = GET_TRANSFER_RATE(scanners[i].bytes_scanned_since_alert, &(manager.alert_mode_timer)); } else { threadRate = GET_SCANNER_TRANSFER_RATE(&(scanners[i])); } combinedRate += threadRate; printf("T_%2d_THRPT\t%5.4f\tMbps\t%lu\tB\t%lu\tB\t%ld\tus\n", i, threadRate, scanners[i].bytes_scanned, scanners[i].bytes_scanned_since_alert, scanners[i].timer.micros); } printf("COMB_THRPT\t%5.4f\tMbps\n", combinedRate); #ifdef GLOBAL_TIMING //printf("\nGlobal timing:\n"); /* printf("Time\t"); for (j = 0; j < manager.gtimer.intervals; j++) { printf("%6ld\t", gtimer_result.times[j]); } printf("\n"); */ for (i = 0; i < manager.gtimer.num_scanners; i++) { printf("T_%2d_GTIME\t", i); for (j = 0; j < manager.gtimer.intervals; j++) { printf("%5.3f\t", gtimer_result.results[gtimer_result.intervals * i + j]); } printf("\n"); } #ifdef PRINT_GLOBAL_TIMER_EVENTS j = global_timer_get_events(&(manager.gtimer), &events); if (j > 0) { printf("\nEvents:\n"); for (i = 0; i < j; i++) { printf("Event %d: %s [Time: %d, Source: %s]\n", i, events[i]->text, events[i]->interval, events[i]->source); } } #endif #endif } #ifdef COUNT_FAIL_PERCENT totalFailures = totalGotos = 0; for (i = 0; i < threads; i++) { totalFailures += scanners[i].stats.totalFailures; totalGotos += scanners[i].stats.totalGotos; } printf("Fail percent: %f\n", ((double)totalFailures) / (totalFailures + totalGotos)); printf("Total failures: %ld, Total gotos: %ld\n", totalFailures, totalGotos); #endif multicore_manager_destroy(&manager); #ifdef GLOBAL_TIMING global_timer_destroy(&(manager.gtimer)); global_timer_destroy_result(>imer_result); #endif free(scanners); for (i = 0; i < threads; i++) { //printf("Status of input-queue of thread %d: in=%d, out=%d\n", i, packet_queues[i].in, packet_queues[i].out); list_destroy(&(packet_queues[i]), 1); } free(packet_queues); }
int main(int argc, char** argv) { cout << endl<< endl << "********************** program start************************ " << endl << endl; int nx = 256, ny = 256, nz = 256, nn = nx*ny*nz; FLOAT dx = Lx/(FLOAT)nx, dy = Ly/(FLOAT)ny, dz = Lz/(FLOAT)nz; FLOAT dt = 0.1*dx*dx/KAPPA; int step = 1000; double elaps=0.0; double getElapsedTime(); int thread_num=1; #ifdef _OPENMP #pragma omp parallel { thread_num = omp_get_num_threads(); if(omp_get_thread_num()==0)cout<<"\nUsed Number of Threads : "<< thread_num <<endl<<endl; } #endif // To avoid chaching effects for small message sizes // int fact = 1; for(;fact*nn*sizeof(FLOAT)<100e6;++fact); cout << "fact = " << fact << endl; // FLOAT* f = (FLOAT *)scalable_aligned_malloc(sizeof(FLOAT) * nn, SIMDALIGN); //FLOAT* fn = (FLOAT *)scalable_aligned_malloc(sizeof(FLOAT) * nn, SIMDALIGN); FLOAT* f = (FLOAT *)_mm_malloc(sizeof(FLOAT) * nn, SIMDALIGN); FLOAT* fn = (FLOAT *)_mm_malloc(sizeof(FLOAT) * nn, SIMDALIGN); initArray(f ,nx, ny, nz); initArray(fn,nx, ny, nz); long data = 0; FLOAT flops=0.0; startTiming(); for(int n = 0;n<step;++n){ // flops += diffusion_simd(nx, ny, nz, nn, dx, dy, dz, dt, f, fn); // flops += diffusion_peel(nx, ny, nz, nn, dx, dy, dz, dt, f, fn); flops += diffusion_tiled(nx, ny, nz, nn, dx, dy, dz, dt, f, fn); data+=nn*2*sizeof(FLOAT); swap(&f, &fn); elaps += dt; } endTiming(); cout<<"Buffer Size: " <<sizeof(FLOAT)*nn/(1000.0*1000.0) <<" [MB] Total Data: "<<data/(1000.0*1000.0*1000.0)<<" [GB]"<<endl; cout<<"Bandwidth: " <<data/(1000.0*1000.0*1000.0*getElapsedTime())<<"[GB/s]"<<endl; cout<<"FLOPS : " <<flops/(1000.0*1000.0*1000.0*getElapsedTime())<<"[GFLOPS]"<<endl; cout<<"Elapsed Time: " <<getElapsedTime()<<endl<<endl; error_func(nx, ny, nz, dx, f, elaps); // scalable_aligned_free(f ); // scalable_aligned_free(fn); _mm_free(f ); _mm_free(fn); return 0; }