コード例 #1
0
ファイル: test.c プロジェクト: coldnew/test_altera
void print_dma_bandwidth(vbx_timestamp_t time_start,
                         vbx_timestamp_t time_stop,
                         int bytes,
                         int iterations,
                         double max_megabytes_per_second)
{
	if (time_stop < time_start) {
		printf("Error: DMA stop time (%llu) is less than start time (%llu)!\n",
		       (unsigned long long) time_stop,
		       (unsigned long long) time_start);
		printf("Skipping bandwidth calculation.\n");
		return;
	}

	vbx_timestamp_t cycles = time_stop - time_start;
	vbx_timestamp_t mxp_cycles = vbx_mxp_cycles(cycles);
	double seconds = ((double) cycles) / ((double) vbx_timestamp_freq());

	vbx_timestamp_t avg_mxp_cycles = mxp_cycles/iterations;
	double avg_seconds = seconds/((double) iterations);
	double bytes_per_second = ((double) bytes)/avg_seconds;
	double megabytes_per_second = bytes_per_second/(1024*1024);

	printf("Transfer length in bytes: %d\n", bytes);
	printf("Transfer time in seconds: %s\n", vbx_eng(avg_seconds, 4));
	printf("Transfer time in cycles: %llu\n",
	       (unsigned long long) avg_mxp_cycles);
	// printf("Bytes per second: %s\n", vbx_eng(bytes_per_second, 4));
	printf("Megabytes per second: %s\n", vbx_eng(megabytes_per_second, 4));
	printf("Efficiency: %.0f%%\n",
	       round(megabytes_per_second*100/max_megabytes_per_second));
	printf("Average of %d transfers.\n", iterations);
}
コード例 #2
0
ファイル: test.c プロジェクト: 8l/mxp
int VBX_T(vbw_vec_reverse_test_mm)()
{
	unsigned int aN[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 15, 16, 17, 20, 25, 31, 32, 33, 35, 40, 48, 60, 61, 62, 63, 64, 64, 65,
	                      66, 67, 68, 70, 80, 90, 99, 100, 101, 110, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 224, 224,
	                      256, 256, 288, 288, 320, 320, 352, 352, 384, 384, 400, 450, 512, 550, 600, 650, 700, 768, 768, 900,
	                      900, 1023, 1024, 1200, 1400, 1600, 1800, 2048, 2048, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800,
	                      2900, 3000, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3800, 3900, 4000, 4096, 4096, 4100, 4200, 4300,
	                      4400, 4500, 4600, 4700, 4800, 4900, 5000, 6000, 7000, 8000, 8192, 8192, 9000, 10000, 11000, 12000,
	                      13000, 14000, 15000, 16000, 16384, 16384, 20000, 25000, 30000, 32767, 32768, 32768, 35000, 40000,
	                      45000, 50000, 55000, 60000, 65000, 65535, 65536, 65536, 65537, 100000, 128000, 256000, 333333, 528374,
	                      528374 };

	int retval;
	unsigned int N;
	unsigned int NBYTES;
	unsigned int NREPS = 100;
	unsigned int NREPSFORLARGE = 10;
	unsigned int i,j,k;
	vbx_timestamp_t start=0,finish=0;

	for( i=0; i<sizeof(aN)/4; i++ ) {
		N = aN[i];
		//printf( "testing with vector size %d\n", N );

		if(N > 10000) NREPS = NREPSFORLARGE;

		NBYTES = N*sizeof(vbx_mm_t);

		vbx_mm_t *src = (vbx_mm_t *) vbx_shared_malloc( NBYTES );
		vbx_mm_t *dst = (vbx_mm_t *) vbx_shared_malloc( NBYTES );
		//printf("bytes alloc: %d\n", NBYTES );

		if( !src ) VBX_EXIT(-1);
		if( !dst ) VBX_EXIT(-1);

		for ( j=0; j<N; j++ ) {
			dst[j] = -1;                 // Fill the destination with -1
			src[j] = j;                  // Fill the source with enumerated values
		}

//			VBX_T(vbw_vec_reverse_ext)( dst, src, N );

		/** measure performance of function call **/
		start = vbx_timestamp();
		for(k=0; __builtin_expect(k<NREPS,1); k++ ) {
			retval = VBX_T(vbw_vec_reverse_ext)( dst, src, N );
		}
		finish = vbx_timestamp();
		printf( "length %d (%s):\tvbware mm f():\t%llu", N, VBX_EXPAND_AND_QUOTE(BYTEHALFWORD), (unsigned long long) vbx_mxp_cycles((finish-start)/NREPS) );

		#if VERIFY_VBWARE_ALGORITHM
			VBX_T(verify_vector)( src, dst, N );
		#else
			printf(" [VERIFY OFF]");
		#endif

		printf("\treturn value: %X", retval);

		/** measure performance of scalar **/
		vbx_mm_t *A = vbx_remap_cached( src, N*sizeof(vbx_mm_t) );   // Use cached pointers for better performance
		vbx_mm_t *B = vbx_remap_cached( dst, N*sizeof(vbx_mm_t) );
		start = vbx_timestamp();
		for(k=0; k<NREPS; k++ ) {
			unsigned int m;
			for(m=0; m<N; m++) {
				B[N-1-m]=A[m];
			}
		vbx_dcache_flush( A, N*sizeof(vbx_mm_t) );               // Make sure to read from main memory
		vbx_dcache_flush( B, N*sizeof(vbx_mm_t) );               // Make sure writes are committed to memory
		}
		finish = vbx_timestamp();

		printf( "\tscalar (cache friendly):\t%llu", (unsigned long long) vbx_mxp_cycles((finish-start)/NREPS) );

		#if VERIFY_SIMPLE_ALGORITHM
			VBX_T(verify_vector)( src, dst, N );
		#else
			printf(" [VERIFY OFF]");
		#endif
			printf("\tcycles\n");

			vbx_shared_free(src);
			vbx_shared_free(dst);
	}

	printf("All tests passed successfully.\n");

	return 0;
}
コード例 #3
0
ファイル: test.c プロジェクト: 8l/mxp
int VBX_T(vbw_vec_reverse_test)()
{
	unsigned int aN[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 15, 16, 17, 20, 25, 31, 32, 33, 35, 40, 48, 60, 61, 62, 63, 64, 64, 65,
	                      66, 67, 68, 70, 80, 90, 99, 100, 101, 110, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 224, 224,
	                      256, 256, 288, 288, 320, 320, 352, 352, 384, 384, 400, 450, 512, 550, 600, 650, 700, 768, 768, 900,
	                      900, 1023, 1024, 1200, 1400, 1600, 1800, 2048, 2048, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800,
	                      2900, 3000, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3800, 3900, 4000, 4096, 4096, 4100, 4200, 4300,
	                      4400, 4500, 4600, 4700, 4800, 4900, 5000, 6000, 7000, 8000, 8192, 8192, 9000, 10000, 11000, 12000,
	                      13000, 14000, 15000, 16000, 16384, 16384, 20000, 25000, 30000, 32767, 32768, 32768, 35000, 40000,
	                      45000, 50000, 55000, 60000, 65000, 65535, 65536, 65536 };

	int retval;
	unsigned int N;
	unsigned int NBYTES;
	unsigned int NREPS = 100;
	unsigned int i,k;

	vbx_timestamp_t start=0,finish=0;

	vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP();
	const unsigned int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size;

	for( i=0; i<sizeof(aN)/4; i++ ) {
		N = aN[i];
		//printf( "testing with vector size %d\n", N );

		NBYTES = sizeof(vbx_sp_t)*N;
		if( 2*NBYTES > VBX_SCRATCHPAD_SIZE ) continue;

		vbx_sp_t *vsrc = vbx_sp_malloc( NBYTES );
		vbx_sp_t *vdst = vbx_sp_malloc( NBYTES );
		//printf("bytes alloc: %d\n", NBYTES );

		if( !vsrc ) VBX_EXIT(-1);
		if( !vdst ) VBX_EXIT(-1);

		#if   ( VBX_TEMPLATE_T == BYTESIZE_DEF | VBX_TEMPLATE_T == UBYTESIZE_DEF )
			unsigned int mask = 0x007F;
		#elif ( VBX_TEMPLATE_T == HALFSIZE_DEF | VBX_TEMPLATE_T == UHALFSIZE_DEF )
			unsigned int mask = 0x7FFF;
		#else
			unsigned int mask = 0xFFFF;
		#endif

		vbx_set_vl( N );
		vbx( SV(T), VMOV, vdst,   -1, 0 );       // Fill the destination vector with -1
		vbx( SE(T), VAND, vsrc, mask, 0 );       // Fill the source vector with enumerated values
		//VBX_T(print_vector)( "vsrcInit", vsrc, N );
		//VBX_T(print_vector)( "vdstInit", vdst, N );

		/** measure performance of function call **/
		vbx_sync();
		start = vbx_timestamp();
		for(k=0; k<NREPS; k++ ) {
			retval = VBX_T(vbw_vec_reverse)( vdst, vsrc, N );
			vbx_sync();
		}
		finish = vbx_timestamp();
		printf( "length %d (%s):\tvbware sp f():\t%llu", N, VBX_EXPAND_AND_QUOTE(BYTEHALFWORD), (unsigned long long) vbx_mxp_cycles((finish-start)/NREPS) );
		//VBX_T(print_vector)( "vsrcPost", vsrc, N );
		//VBX_T(print_vector)( "vdstPost", vdst, N );

		#if VERIFY_VBWARE_ALGORITHM
			VBX_T(verify_vector)( vsrc, vdst, N );
		#else
			printf(" [VERIFY OFF]");
		#endif

		printf("\treturn value: %X", retval);

		vbx_set_vl( N );
		vbx( SE(T), VAND, vsrc, mask, 0 );       // Reset the source vector

		/** measure performance of simple algorithm **/
		vbx_sync();
		vbx_set_vl( 1 );
		vbx_set_2D( N, -sizeof(vbx_sp_t), sizeof(vbx_sp_t), 0 );

		start = vbx_timestamp();
		for(k=0; k<NREPS; k++ ) {
			vbx_2D( VV(T), VMOV, vdst+N-1, vsrc, 0 );
			vbx_sync();
		}
		finish = vbx_timestamp();

		printf( "\tsimple (vl=1):\t%llu", (unsigned long long) vbx_mxp_cycles((finish-start)/NREPS) );

		#if VERIFY_SIMPLE_ALGORITHM
			VBX_T(verify_vector)( vsrc, vdst, N );
		#else
			printf(" [VERIFY OFF]");
		#endif
			printf("\tcycles\n");

		vbx_sp_free();
	}

	vbx_sp_free();
	printf("All tests passed successfully.\n");

	return 0;
}
コード例 #4
0
ファイル: vbx_mtx_fdct.c プロジェクト: 8l/mxp
int main_tile()
{
	int i, j, k, l, base, block_num;
	int x, y;

	int time_start, time_stop;
	unsigned int cycles;
	double vbx_time, scalar_time;
	int wrong;

	int total_errors = 0;

	//all of the initialization can be hard coded without any computation
	vbx_mtx_fdct_t *v = vbx_mtx_fdct_init( coeff_v, image );
	vbx_timestamp_start();

	printf("\nGenerating initial data...\n");

	dt *image  = (dt *) malloc( IMAGE_WIDTH * IMAGE_HEIGHT * sizeof(dt) );
	GenerateRandomImage( image, IMAGE_WIDTH, IMAGE_HEIGHT, 0/*seed*/ );

	// Allocate memory to store results.
	// Results are computed BIGTILE_SIZE halfwords at a time.
	const int BIGTILE_SIZE = NUM_TILE_X * NUM_TILE_Y * DCT_SIZE;
	dt *block_s =                   malloc( BIGTILE_SIZE * sizeof(dt) );
	dt *block_v = (dt *) vbx_shared_malloc( BIGTILE_SIZE * sizeof(dt) );
	dt *coeff_v = (dt *) vbx_shared_malloc( BIGTILE_SIZE * sizeof(dt) );

	//Make an uncached 1D version of the coeff matrix
	for (i = 0; i < NUM_TILE_Y; i++) {             // row
		for (j = 0; j < BLOCK_SIZE; j++) {         // row
			for (k = 0; k < NUM_TILE_X; k++) {     // col
				for (l = 0; l < BLOCK_SIZE; l++) { // col
					coeff_v[i*NUM_TILE_X*DCT_SIZE + j*DCT_SIZE + k*BLOCK_SIZE + l] = cs[j][l];
				}
			}
		}
	}

#ifdef DEBUG
	printf("input matrix is:\n");
	for (i = 0; i < BLOCK_SIZE; i++) {
		base = i * BLOCK_SIZE;
		for (j = 0; j < BLOCK_SIZE; j++) {
			printf("%d ", (int) block_s[base + j]);
		}
		printf("\n");
	}
#endif

	printf("\nRunning DCT...\n");

	time_start = vbx_timestamp();
	for( y = 0; y < IMG_DOWN; y++ ) {
		for( x = 0; x < IMG_ACROSS; x++ ) {
			vbx_mtx_fdct_scalar( block_s, (dt*)cs, image, x/*start_x*/, y/*start_y*/, NUM_TILE_X, NUM_TILE_Y );
		}
	}
	time_stop = vbx_timestamp();

	cycles = time_stop - time_start;
	scalar_time = (double) cycles;
	scalar_time /= (double) vbx_timestamp_freq();
	scalar_time *= 1000.0;		//ms
	vbx_timestamp_t mxp_cycles = vbx_mxp_cycles(cycles);

	printf("%dx%d Block Size\n", BLOCK_SIZE, BLOCK_SIZE);
	printf("Finished, scalar CPU took %0.3f ms \n", scalar_time);
	printf(" CPU Cycles: %d\n", (int) mxp_cycles);
	printf(" CPU Cycles per block: %f\n", mxp_cycles / ((double) (NUM_BLOCKS)));

	vbx_sync(); // wait for image to be prefetched

	time_start = vbx_timestamp();
	for( y = 0; y < IMG_DOWN; y++ ) {
		for( x = 0; x < IMG_ACROSS; x++ ) {
			vbx_mtx_fdct( v, block_v, image, x/*start_x*/, y/*start_y*/, IMG_ACROSS-1,IMG_DOWN-1,NUM_TILE_X, NUM_TILE_Y );
		}
	}
	time_stop = vbx_timestamp();

	cycles = time_stop - time_start;
	vbx_time = (double) cycles;
	vbx_time /= (double) vbx_timestamp_freq();
	vbx_time *= 1000.0;			//ms
	mxp_cycles = vbx_mxp_cycles(cycles);

	printf("Finished, MXP took %0.3f ms \n", vbx_time);
	printf(" CPU Cycles: %d\n", (int) mxp_cycles);
	printf(" CPU Cycles per block: %f\n", mxp_cycles / ((double) (NUM_BLOCKS)));
	printf(" Speedup: %f\n", scalar_time / vbx_time);

	vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP();
	double vbx_mbps = (double) (NUM_BLOCKS) * 1000 / vbx_time;	// blocks per second
	printf("V%d@%dMHz: %dx%d tile, %dx%d blocks, %f blocks/s, %f megapixel/s\n",
	       this_mxp->vector_lanes, this_mxp->core_freq / 1000000, 
	       NUM_TILE_Y, NUM_TILE_X, 
	       BLOCK_SIZE, BLOCK_SIZE,
	       vbx_mbps, (vbx_mbps * DCT_SIZE) / 1000000);

	printf("\nChecking results...\n");

	wrong = 0;
	for (block_num = 0; block_num < NUM_BLOCKS; block_num++) {
		for (i = 0; i < BLOCK_SIZE; i++) {
			base = i * BLOCK_SIZE;
			for (j = 0; j < BLOCK_SIZE; j++) {
				if (block_s[block_num * DCT_SIZE + base + j] != block_v[block_num * DCT_SIZE + base + j]) {
					if (wrong < 5) {
						printf("\nError at %d [%d,%d], result is %d, should be %d\n",
							   block_num, i, j, (int) block_v[block_num * DCT_SIZE + base + j],
							   (int) block_s[block_num * DCT_SIZE + base + j]);
					}
					wrong++;
				}
			}
		}
	}

	printf("wrong is %d\n\n", wrong);
	total_errors += wrong;

	free(block_s);
	vbx_shared_free(block_v);
	vbx_shared_free(coeff_v);

	vbx_mtx_fdct_free( v );

	VBX_TEST_END(total_errors);

	return (0);
}