Пример #1
0
void runTest(StateMachine *machine, int isTableMachine) {
	FILE *file;
	int buffSize, len;
	char *buff;
	double rate;
	Timer t;
	MachineStats stats;
	int is_heavy, last_idx_in_root;
	double uncommonRate;

	stats.totalFailures = 0;
	stats.totalGotos = 0;

	file = fopen(TEST_INPUT, "rb");
	if (!file) {
		fprintf(stderr, "Error opening file for reading\n");
		exit(1);
	}

	fseek(file, 0L, SEEK_END);
	buffSize = ftell(file);
	fseek(file, 0L, SEEK_SET);

	buff = (char*)malloc(sizeof(char) * buffSize);
	if (buff == NULL) {
		fprintf(stderr, "Error allocating memory for buffer\n");
		exit(1);
	}
	len = fread(buff, sizeof(char), buffSize, file);
	if (len != buffSize) {
		fprintf(stderr, "Error reading data from file\n");
		exit(1);
	}

	t.micros = 0;
	if (isTableMachine) {
		startTiming(&t);
		matchTableMachine((TableStateMachine*)machine, NULL, FALSE, buff, buffSize, 1, NULL, NULL, NULL, NULL, &is_heavy, &last_idx_in_root, &uncommonRate);
		endTiming(&t);
	} else {
		startTiming(&t);
		match(machine, buff, buffSize, 0, &stats, 0, 0);
		endTiming(&t);
	}
	rate = GET_TRANSFER_RATE(buffSize, &t);

	printf("Time(micros)\tData(No H)\tData(w/ H)\tRate(No H) Mb/s\tRate (w/ H) Mb/s\n");
	printf("%8ld\t%9d\t%9d\t%5.4f\t%5.4f\n", t.micros, buffSize, buffSize, rate, rate);

	free(buff);

	fclose(file);
}
Пример #2
0
	void render()
	{
		// Render frame
		if (prepared)
		{
			startTiming();
			if (animating)
			{
				if (animStart > 0.0f)
				{
					animStart -= 0.15f * (1.0f / frameTimer);
				}
				if ((animate) & (animStart <= 0.0f))
				{
					timer += 0.5f * (1.0f / frameTimer);
					if (timer > 1.0)
					{
						timer -= 1.0f;
					}
				}
				updateUniformBuffers();
			}
			draw();
			endTiming();
		}
	}
Пример #3
0
    void run()
    {
        while (!_done)
        {
            if (_process)
            {
                startTiming(_viewer, otherThreadTimeName);

                //------------------------------------------------------------
                // Your processing goes here.

                // Do nothing for the specified number of  milliseconds, just so we can 
                // see it in the stats.
                osg::Timer_t startTick = osg::Timer::instance()->tick();
                while (osg::Timer::instance()->delta_m(startTick, osg::Timer::instance()->tick()) < _timeToRun)
                {
                    OpenThreads::Thread::YieldCurrentThread();
                }
                //------------------------------------------------------------

                endTiming(_viewer, otherThreadTimeName);

                _process = false;
            }
            else
            {
                OpenThreads::Thread::microSleep(50);
            }
        }
    }
Пример #4
0
/// Will just sleep for the given number of milliseconds in the same thread 
/// as the caller, recording the time taken in the viewer's stats.
void doSomethingAndTimeIt(osgViewer::Viewer& viewer, const std::string& name, double milliseconds)
{
    startTiming(viewer, name);

    //------------------------------------------------------------
    // Your processing goes here.

    // Do nothing for the specified number of  milliseconds, just so we can 
    // see it in the stats.
    osg::Timer_t startTick = osg::Timer::instance()->tick();
    while (osg::Timer::instance()->delta_m(startTick, osg::Timer::instance()->tick()) < milliseconds)
    {
        OpenThreads::Thread::YieldCurrentThread();
    }
    //------------------------------------------------------------

    endTiming(viewer, name);
}
Пример #5
0
void runBench(HASH_FUNCTION hash_fn, int numReplicas, int numNodes, int numKeys, int keySize) {
    char *hash = NULL;
    if(hash_fn == HASH_FUNCTION_MD5) hash = "MD5";
    else if(hash_fn == HASH_FUNCTION_SHA1) hash = "SHA1";
    
    printf("----------------------------------------------------\n");
    printf("bench (%s): replicas = %d, nodes = %d, keys: %d, ring size: %d\n", hash, numReplicas, numNodes, numKeys, numReplicas * numNodes);
    printf("----------------------------------------------------\n");
    hash_ring_t *ring = hash_ring_create(numReplicas, hash_fn);
    
    addNodes(ring, numNodes);
    
    uint8_t *keys = (uint8_t*)malloc(keySize * numKeys);
    generateKeys(keys, numKeys, keySize);
    
    printf("running...\r");
    
    uint64_t min = 0;
    uint64_t max = 0;
    uint64_t total = 0;
    int times = 100;
    
    int x, y;
    for(y = 0; y < times; y++) {
        startTiming();
        for(x = 0; x < numKeys; x++) {
            assert(hash_ring_find_node(ring, keys + (keySize * x), keySize) != NULL);
        }
        uint64_t result = endTiming();
        if(result > max) max = result;
        if(min == 0 || result < min) min = result;
        total += result;
    }
    
    printf("stats: total = %.5fs, avg/lookup: %.5fus, min: %.5fus, max: %.5fus, ops/sec: %.0f\n", 
        (double)total / 1000000000,
        (((double)(total / numKeys)) / 1000) / times,
        (double)min / numKeys / 1000,
        (double)max / numKeys / 1000,
        1000000000 / ((double)(total / (numKeys * times))));
    
    free(keys);
    hash_ring_free(ring);
}
Пример #6
0
    void render()
    {
        if (prepared)
        {
            startTiming();
            if (animating)
            {
                // Update rotation
                state.rotation.y += 0.05f * frameTimer;
                if (state.rotation.y > 360.0f)
                {
                    state.rotation.y -= 360.0f;

                }
                updateUniformBuffers();
            }
            draw();
            endTiming();
        }
    }
Пример #7
0
void inspectDumpFile(const char *path, int repeat, StateMachine *machine, TableStateMachine *tableMachine, int isTableMachine,
		int verbose, int timing, int threads, int packets_to_steal, int dedicated_use_compressed,
		int work_group_size, int max_wgs, double *thresholds, int drop) {
#else
void inspectDumpFile(const char *path, int repeat, StateMachine *machine, int isTableMachine, int verbose, int timing, int threads) {
#endif
	double /*rate,*/ combinedRate, threadRate;//, rateWithHeaders;
	Timer t;
	long size;//, sizeWithHeaders;
	int i, cpuid;
#ifdef GLOBAL_TIMING
	GlobalTimerResult gtimer_result;
	int j;
#ifdef PRINT_GLOBAL_TIMER_EVENTS
	GlobalTimerEvent **events;
#endif
#endif
	ScannerData *scanners;
	PacketReaderData reader;
	LinkedList *packet_queues;

	MulticoreManager manager;

#ifdef COUNT_FAIL_PERCENT
	long totalFailures, totalGotos;
#endif

#ifdef PAPI
	if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) {
		fprintf(stderr, "Cannot init PAPI\n");
		exit(1);
	}

	if (PAPI_thread_init((unsigned long (*)(void))pthread_self) != PAPI_OK) {
		fprintf(stderr, "Cannot init PAPI for threads\n");
		exit(1);
	}
#endif

	packet_queues = (LinkedList*)malloc(sizeof(LinkedList) * threads);
	scanners = (ScannerData*)malloc(sizeof(ScannerData) * threads);

	for (i = 0; i < threads; i++) {
		list_init(&packet_queues[i]);
	}

	packetreader_init(&reader, path, repeat, packet_queues, threads);
	for (i = 0; i < threads; i++) {
#ifdef HYBRID_SCANNER
		scanner_init(&(scanners[i]), i, &manager, machine, tableMachine, isTableMachine, &packet_queues[i], verbose, drop);
#else
		scanner_init(&(scanners[i]), i, &manager, machine, isTableMachine, &packet_queues[i], verbose);
#endif
	}

#ifdef HYBRID_SCANNER
	multicore_manager_init(&manager, scanners, threads, work_group_size, max_wgs, packets_to_steal, dedicated_use_compressed);
	multicore_manager_set_thresholds(&manager, thresholds);
#else
	multicore_manager_init(&manager, scanners, threads, 1, threads, 0, 0);
#endif
	packetreader_start(&reader);

	packetreader_join(&reader);

#ifdef HYBRID_SCANNER
	multicore_manager_start(&manager);
#endif
#ifdef GLOBAL_TIMING
#ifdef PRINT_GLOBAL_TIMER_EVENTS
	events = NULL;
#endif
	global_timer_start(&(manager.gtimer));
#endif
	if (timing) {
		startTiming(&t);
	}

	for (i = 0; i < threads; i++) {
		// If CPUs are ordered [core0,core0,...,core0,core1,core1,...,core1,...]
		//cpuid = i;
		// If CPUs are ordered [core0,core1,...,coreN,core0,core1,...,coreN,...]
		cpuid = (i % 2 == 0) ? i / 2 : (threads + i) / 2;
		scanner_start_with_affinity(&(scanners[i]), cpuid);

		// If you use the next line, comment out the pthread_attr_destroy call in scanner_join!!!
		//scanner_start(&(scanners[i]));
	}

	for (i = 0; i < threads; i++) {
		scanner_join(&(scanners[i]));
	}

//	scanner_start(&(scanners[0]));
//	scanner_start(&(scanners[1]));
//	scanner_join(&(scanners[0]));
//	scanner_join(&(scanners[1]));

	if (timing) {
		endTiming(&t);
	}

#ifdef GLOBAL_TIMING
	global_timer_end(&(manager.gtimer));
#endif

#ifdef HYBRID_SCANNER
	multicore_manager_stop(&manager);

	multicore_manager_join(&manager);
#endif

#ifdef GLOBAL_TIMING
	global_timer_join(&(manager.gtimer));
	global_timer_get_results(&(manager.gtimer), &gtimer_result);
#endif

	if (timing) {
		//endTiming(&t);
		size = reader.size;
		//sizeWithHeaders = reader.sizeWithHeaders;
		//rate = GET_TRANSFER_RATE(size, &t);
		//rateWithHeaders = GET_TRANSFER_RATE(sizeWithHeaders, &t);

//		printf("Time(micros)\tData(No H)\tData(w/ H)\tRate(No H) Mb/s\tRate (w/ H) Mb/s\n");
		//printf("%8ld\t%9ld\t%9ld\t%5.4f\t%5.4f\n", t.micros, size, sizeWithHeaders, rate, rateWithHeaders);

		printf("TOTAL_BYTES\tTotal data scanned: %ld bytes\n", size);
		//printf("TOTAL_TIME\tTotal time: %ld ms\n", t.micros);
		//printf("TOTAL_THRPT\tTotal throughput: %5.4f Mbps\n", rate);

		combinedRate = 0;
		printf("Alert mode timer: %ld us\n", manager.alert_mode_timer.micros);
		for (i = 0; i < threads; i++) {
			if (0 && manager.alert_mode_used) {
				threadRate = GET_TRANSFER_RATE(scanners[i].bytes_scanned_since_alert, &(manager.alert_mode_timer));
			} else {
				threadRate = GET_SCANNER_TRANSFER_RATE(&(scanners[i]));
			}
			combinedRate += threadRate;
			printf("T_%2d_THRPT\t%5.4f\tMbps\t%lu\tB\t%lu\tB\t%ld\tus\n", i, threadRate, scanners[i].bytes_scanned, scanners[i].bytes_scanned_since_alert, scanners[i].timer.micros);
		}
		printf("COMB_THRPT\t%5.4f\tMbps\n", combinedRate);

#ifdef GLOBAL_TIMING
		//printf("\nGlobal timing:\n");
/*
		printf("Time\t");
		for (j = 0; j < manager.gtimer.intervals; j++) {
			printf("%6ld\t", gtimer_result.times[j]);
		}
		printf("\n");
*/
		for (i = 0; i < manager.gtimer.num_scanners; i++) {
			printf("T_%2d_GTIME\t", i);
			for (j = 0; j < manager.gtimer.intervals; j++) {
				printf("%5.3f\t", gtimer_result.results[gtimer_result.intervals * i + j]);
			}
			printf("\n");
		}

#ifdef PRINT_GLOBAL_TIMER_EVENTS
		j = global_timer_get_events(&(manager.gtimer), &events);
		if (j > 0) {
			printf("\nEvents:\n");
			for (i = 0; i < j; i++) {
				printf("Event %d: %s [Time: %d, Source: %s]\n", i, events[i]->text, events[i]->interval, events[i]->source);
			}
		}
#endif
#endif
	}
#ifdef COUNT_FAIL_PERCENT
	totalFailures = totalGotos = 0;
        for (i = 0; i < threads; i++) {
                totalFailures += scanners[i].stats.totalFailures;
		totalGotos += scanners[i].stats.totalGotos;
        }

        printf("Fail percent: %f\n", ((double)totalFailures) / (totalFailures + totalGotos));
        printf("Total failures: %ld, Total gotos: %ld\n", totalFailures, totalGotos);
#endif

    multicore_manager_destroy(&manager);
#ifdef GLOBAL_TIMING
    global_timer_destroy(&(manager.gtimer));
    global_timer_destroy_result(&gtimer_result);
#endif

    free(scanners);
    for (i = 0; i < threads; i++) {
    	//printf("Status of input-queue of thread %d: in=%d, out=%d\n", i, packet_queues[i].in, packet_queues[i].out);
    	list_destroy(&(packet_queues[i]), 1);
    }
    free(packet_queues);
}
Пример #8
0
int main(int argc, char** argv)
{
	cout << endl<< endl << "********************** program start************************ " << endl << endl;

	int    nx = 256,   ny = 256,   nz = 256, nn = nx*ny*nz;
    FLOAT  dx = Lx/(FLOAT)nx,  dy = Ly/(FLOAT)ny,   dz = Lz/(FLOAT)nz;
	FLOAT dt = 0.1*dx*dx/KAPPA;

	int step = 1000;
	double elaps=0.0;
	double getElapsedTime();

	int thread_num=1;

#ifdef _OPENMP
#pragma omp parallel
   {
	 thread_num = omp_get_num_threads();
     if(omp_get_thread_num()==0)cout<<"\nUsed Number of Threads : "<< thread_num <<endl<<endl;
   }
#endif


    // To avoid chaching effects for small message sizes //
    int fact = 1; for(;fact*nn*sizeof(FLOAT)<100e6;++fact);
	cout << "fact = " << fact << endl;

  //  FLOAT* f  = (FLOAT *)scalable_aligned_malloc(sizeof(FLOAT) * nn, SIMDALIGN);
  //FLOAT* fn = (FLOAT *)scalable_aligned_malloc(sizeof(FLOAT) * nn, SIMDALIGN);

	FLOAT* f  = (FLOAT *)_mm_malloc(sizeof(FLOAT) * nn, SIMDALIGN);
	FLOAT* fn = (FLOAT *)_mm_malloc(sizeof(FLOAT) * nn, SIMDALIGN);


    initArray(f ,nx, ny, nz);
    initArray(fn,nx, ny, nz);


    long data = 0;
	FLOAT flops=0.0;

    startTiming();
    for(int n = 0;n<step;++n){

//       flops += diffusion_simd(nx, ny, nz, nn, dx, dy, dz, dt, f, fn);
//       flops += diffusion_peel(nx, ny, nz, nn, dx, dy, dz, dt, f, fn);
       flops += diffusion_tiled(nx, ny, nz, nn, dx, dy, dz, dt, f, fn);

       data+=nn*2*sizeof(FLOAT);
	   swap(&f, &fn);
	   elaps += dt;

    }
    endTiming();


    cout<<"Buffer Size: " <<sizeof(FLOAT)*nn/(1000.0*1000.0) <<" [MB]  Total Data: "<<data/(1000.0*1000.0*1000.0)<<" [GB]"<<endl;
    cout<<"Bandwidth: " <<data/(1000.0*1000.0*1000.0*getElapsedTime())<<"[GB/s]"<<endl;
    cout<<"FLOPS    : " <<flops/(1000.0*1000.0*1000.0*getElapsedTime())<<"[GFLOPS]"<<endl;
    cout<<"Elapsed Time: " <<getElapsedTime()<<endl<<endl;

	error_func(nx, ny, nz, dx, f, elaps);

//    scalable_aligned_free(f );
//    scalable_aligned_free(fn);

		_mm_free(f );
		_mm_free(fn);

	return 0;
}