Exemple #1
0
void print_dma_bandwidth(vbx_timestamp_t time_start,
                         vbx_timestamp_t time_stop,
                         int bytes,
                         int iterations,
                         double max_megabytes_per_second)
{
	if (time_stop < time_start) {
		printf("Error: DMA stop time (%llu) is less than start time (%llu)!\n",
		       (unsigned long long) time_stop,
		       (unsigned long long) time_start);
		printf("Skipping bandwidth calculation.\n");
		return;
	}

	vbx_timestamp_t cycles = time_stop - time_start;
	vbx_timestamp_t mxp_cycles = vbx_mxp_cycles(cycles);
	double seconds = ((double) cycles) / ((double) vbx_timestamp_freq());

	vbx_timestamp_t avg_mxp_cycles = mxp_cycles/iterations;
	double avg_seconds = seconds/((double) iterations);
	double bytes_per_second = ((double) bytes)/avg_seconds;
	double megabytes_per_second = bytes_per_second/(1024*1024);

	printf("Transfer length in bytes: %d\n", bytes);
	printf("Transfer time in seconds: %s\n", vbx_eng(avg_seconds, 4));
	printf("Transfer time in cycles: %llu\n",
	       (unsigned long long) avg_mxp_cycles);
	// printf("Bytes per second: %s\n", vbx_eng(bytes_per_second, 4));
	printf("Megabytes per second: %s\n", vbx_eng(megabytes_per_second, 4));
	printf("Efficiency: %.0f%%\n",
	       round(megabytes_per_second*100/max_megabytes_per_second));
	printf("Average of %d transfers.\n", iterations);
}
Exemple #2
0
int main_tile()
{
	int i, j, k, l, base, block_num;
	int x, y;

	int time_start, time_stop;
	unsigned int cycles;
	double vbx_time, scalar_time;
	int wrong;

	int total_errors = 0;

	//all of the initialization can be hard coded without any computation
	vbx_mtx_fdct_t *v = vbx_mtx_fdct_init( coeff_v, image );
	vbx_timestamp_start();

	printf("\nGenerating initial data...\n");

	dt *image  = (dt *) malloc( IMAGE_WIDTH * IMAGE_HEIGHT * sizeof(dt) );
	GenerateRandomImage( image, IMAGE_WIDTH, IMAGE_HEIGHT, 0/*seed*/ );

	// Allocate memory to store results.
	// Results are computed BIGTILE_SIZE halfwords at a time.
	const int BIGTILE_SIZE = NUM_TILE_X * NUM_TILE_Y * DCT_SIZE;
	dt *block_s =                   malloc( BIGTILE_SIZE * sizeof(dt) );
	dt *block_v = (dt *) vbx_shared_malloc( BIGTILE_SIZE * sizeof(dt) );
	dt *coeff_v = (dt *) vbx_shared_malloc( BIGTILE_SIZE * sizeof(dt) );

	//Make an uncached 1D version of the coeff matrix
	for (i = 0; i < NUM_TILE_Y; i++) {             // row
		for (j = 0; j < BLOCK_SIZE; j++) {         // row
			for (k = 0; k < NUM_TILE_X; k++) {     // col
				for (l = 0; l < BLOCK_SIZE; l++) { // col
					coeff_v[i*NUM_TILE_X*DCT_SIZE + j*DCT_SIZE + k*BLOCK_SIZE + l] = cs[j][l];
				}
			}
		}
	}

#ifdef DEBUG
	printf("input matrix is:\n");
	for (i = 0; i < BLOCK_SIZE; i++) {
		base = i * BLOCK_SIZE;
		for (j = 0; j < BLOCK_SIZE; j++) {
			printf("%d ", (int) block_s[base + j]);
		}
		printf("\n");
	}
#endif

	printf("\nRunning DCT...\n");

	time_start = vbx_timestamp();
	for( y = 0; y < IMG_DOWN; y++ ) {
		for( x = 0; x < IMG_ACROSS; x++ ) {
			vbx_mtx_fdct_scalar( block_s, (dt*)cs, image, x/*start_x*/, y/*start_y*/, NUM_TILE_X, NUM_TILE_Y );
		}
	}
	time_stop = vbx_timestamp();

	cycles = time_stop - time_start;
	scalar_time = (double) cycles;
	scalar_time /= (double) vbx_timestamp_freq();
	scalar_time *= 1000.0;		//ms
	vbx_timestamp_t mxp_cycles = vbx_mxp_cycles(cycles);

	printf("%dx%d Block Size\n", BLOCK_SIZE, BLOCK_SIZE);
	printf("Finished, scalar CPU took %0.3f ms \n", scalar_time);
	printf(" CPU Cycles: %d\n", (int) mxp_cycles);
	printf(" CPU Cycles per block: %f\n", mxp_cycles / ((double) (NUM_BLOCKS)));

	vbx_sync(); // wait for image to be prefetched

	time_start = vbx_timestamp();
	for( y = 0; y < IMG_DOWN; y++ ) {
		for( x = 0; x < IMG_ACROSS; x++ ) {
			vbx_mtx_fdct( v, block_v, image, x/*start_x*/, y/*start_y*/, IMG_ACROSS-1,IMG_DOWN-1,NUM_TILE_X, NUM_TILE_Y );
		}
	}
	time_stop = vbx_timestamp();

	cycles = time_stop - time_start;
	vbx_time = (double) cycles;
	vbx_time /= (double) vbx_timestamp_freq();
	vbx_time *= 1000.0;			//ms
	mxp_cycles = vbx_mxp_cycles(cycles);

	printf("Finished, MXP took %0.3f ms \n", vbx_time);
	printf(" CPU Cycles: %d\n", (int) mxp_cycles);
	printf(" CPU Cycles per block: %f\n", mxp_cycles / ((double) (NUM_BLOCKS)));
	printf(" Speedup: %f\n", scalar_time / vbx_time);

	vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP();
	double vbx_mbps = (double) (NUM_BLOCKS) * 1000 / vbx_time;	// blocks per second
	printf("V%d@%dMHz: %dx%d tile, %dx%d blocks, %f blocks/s, %f megapixel/s\n",
	       this_mxp->vector_lanes, this_mxp->core_freq / 1000000, 
	       NUM_TILE_Y, NUM_TILE_X, 
	       BLOCK_SIZE, BLOCK_SIZE,
	       vbx_mbps, (vbx_mbps * DCT_SIZE) / 1000000);

	printf("\nChecking results...\n");

	wrong = 0;
	for (block_num = 0; block_num < NUM_BLOCKS; block_num++) {
		for (i = 0; i < BLOCK_SIZE; i++) {
			base = i * BLOCK_SIZE;
			for (j = 0; j < BLOCK_SIZE; j++) {
				if (block_s[block_num * DCT_SIZE + base + j] != block_v[block_num * DCT_SIZE + base + j]) {
					if (wrong < 5) {
						printf("\nError at %d [%d,%d], result is %d, should be %d\n",
							   block_num, i, j, (int) block_v[block_num * DCT_SIZE + base + j],
							   (int) block_s[block_num * DCT_SIZE + base + j]);
					}
					wrong++;
				}
			}
		}
	}

	printf("wrong is %d\n\n", wrong);
	total_errors += wrong;

	free(block_s);
	vbx_shared_free(block_v);
	vbx_shared_free(coeff_v);

	vbx_mtx_fdct_free( v );

	VBX_TEST_END(total_errors);

	return (0);
}