Beispiel #1
0
inline void matmul_matmul(size_t heightA, size_t widthA, size_t widthB,
		data_t *A, size_t rstrideA,
		data_t *B, size_t rstrideB,
		data_t *C, size_t rstrideC) {

	// I dont want to use MKL here, please call directly matmul_mkl
	if(heightA * widthB < kMinStrassen)
		matmul_simple(heightA, widthA, widthB, A, rstrideA, B, rstrideB, C, rstrideC);
	else
		matmul_strassen(heightA, widthA, widthB, A, rstrideA, B, rstrideB, C, rstrideC);
}
int main(int argc, char *argv[])
{
	e_epiphany_t Epiphany, *pEpiphany;
	e_mem_t      DRAM,     *pDRAM;
	unsigned int msize;
	float        seed;
	unsigned int addr; //, clocks;
	size_t       sz;
	double       tdiff[4];
	int          result, rerval;
	
	pEpiphany = &Epiphany;
	pDRAM     = &DRAM;
	msize     = 0x00400000;

	get_args(argc, argv);


	fo = stderr;
	fi = stdin;

	printf("\nMatrix: C[%d][%d] = A[%d][%d] * B[%d][%d]\n\n", _Smtx, _Smtx, _Smtx, _Smtx, _Smtx, _Smtx);
	printf("Using %d x %d cores\n\n", _Nside, _Nside);
	seed = 0.0;
	printf("Seed = %f\n", seed);



	// Connect to device for communicating with the Epiphany system
	// Prepare device
	e_set_host_verbosity(H_D0);
	e_init(NULL);
	e_reset_system();

	if (e_alloc(pDRAM, 0x00000000, msize))
	{
		printf("\nERROR: Can't allocate Epiphany DRAM!\n\n");
		exit(1);
	}
	if (e_open(pEpiphany, 0, 0, e_platform.chip[0].rows, e_platform.chip[0].cols))
	{
		printf("\nERROR: Can't establish connection to Epiphany device!\n\n");
		exit(1);
	}

	// Initialize Epiphany "Ready" state
	addr = offsetof(shared_buf_t, core.ready);
	Mailbox.core.ready = 0;
	e_write(pDRAM, 0, 0, addr, &Mailbox.core.ready, sizeof(Mailbox.core.ready));

	printf("Loading program on Epiphany chip...\n");
	e_set_loader_verbosity(ar.verbose);
	result = e_load_group(ar.srecFile, pEpiphany, 0, 0, pEpiphany->rows, pEpiphany->cols, ar.run_target);
	if (result == E_ERR) {
		printf("Error loading Epiphany program.\n");
		exit(1);
	}


	// Generate operand matrices based on a provided seed
	matrix_init(seed);


#ifdef __WIPE_OUT_RESULT_MATRIX__
	// Wipe-out any previous remains in result matrix (for verification)
	addr = offsetof(shared_buf_t, C[0]);
	sz = sizeof(Mailbox.C);
	printf("Writing C[%uB] to address %08x...\n", sz, addr);
	e_write(pDRAM, 0, 0, addr, (void *) Mailbox.C, sz);
#endif

	clock_gettime(CLOCK_MONOTONIC, &timer[0]);

	// Copy operand matrices to Epiphany system
	addr = offsetof(shared_buf_t, A[0]);
	sz = sizeof(Mailbox.A);
	printf("Writing A[%uB] to address %08x...\n", sz, addr);
	e_write(pDRAM, 0, 0, addr, (void *) Mailbox.A, sz);
	
	addr = offsetof(shared_buf_t, B[0]);
	sz = sizeof(Mailbox.B);
	printf("Writing B[%uB] to address %08x...\n", sz, addr);
	e_write(pDRAM, 0, 0, addr, (void *) Mailbox.B, sz);


	// Call the Epiphany matmul() function
	printf("GO Epiphany! ...   ");
	clock_gettime(CLOCK_MONOTONIC, &timer[1]);
	matmul_go(pDRAM);
	clock_gettime(CLOCK_MONOTONIC, &timer[2]);
	printf("Finished calculating Epiphany result.\n");


	// Read result matrix and timing
	addr = offsetof(shared_buf_t, C[0]);
	sz = sizeof(Mailbox.C);
	printf("Reading result from address %08x...\n", addr);
	e_read(pDRAM, 0, 0, addr, (void *) Mailbox.C, sz);

	clock_gettime(CLOCK_MONOTONIC, &timer[3]);


	// Calculate a reference result
	printf("Calculating result on Host ...   ");
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[4]);
#ifndef __DO_STRASSEN__
	matmul(Mailbox.A, Mailbox.B, Cref, _Smtx);
#else
	matmul_strassen(Mailbox.A, Mailbox.B, Cref, _Smtx);
#endif
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[5]);
	printf("Finished calculating Host result.\n");


	addr = offsetof(shared_buf_t, core.clocks);
	sz = sizeof(Mailbox.core.clocks);
	printf("Reading time from address %08x...\n", addr);
	e_read(pDRAM,0, 0, addr, &Mailbox.core.clocks, sizeof(Mailbox.core.clocks));
//	clocks = Mailbox.core.clocks;


	// Calculate the difference between the Epiphany result and the reference result
	printf("\n*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n");
	printf("Verifying result correctness ...   ");
	matsub(Mailbox.C, Cref, Cdiff, _Smtx);

	tdiff[0] = (timer[2].tv_sec - timer[1].tv_sec) * 1000 + ((double) (timer[2].tv_nsec - timer[1].tv_nsec) / 1000000.0);//total
	tdiff[1] = (timer[1].tv_sec - timer[0].tv_sec) * 1000 + ((double) (timer[1].tv_nsec - timer[0].tv_nsec) / 1000000.0);//write
	tdiff[2] = (timer[3].tv_sec - timer[2].tv_sec) * 1000 + ((double) (timer[3].tv_nsec - timer[2].tv_nsec) / 1000000.0);//read
	tdiff[3] = (timer[5].tv_sec - timer[4].tv_sec) * 1000 + ((double) (timer[5].tv_nsec - timer[4].tv_nsec) / 1000000.0);//ref


	// If the difference is 0, then the matrices are identical and the
	// calculation was correct
	if (iszero(Cdiff, _Smtx))
	{
		printf("C_epiphany == C_host\n");
		rerval = 0;
	} else {
		printf("\n\nERROR: C_epiphany is different from C_host !!!\n");
		rerval = 1;
	}
	printf("*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n");
	printf("\n");
	printf("Epiphany (compute):  %9.1f msec  (@ %03d MHz)\n"   , tdiff[0], eMHz);
	printf("         (write)  :  %9.1f msec \n"                , tdiff[1]);
	printf("         (read)   :  %9.1f msec\n"                 , tdiff[2]);
	printf("         (*total*):  %9.1f msec\n\n"               , tdiff[2]+tdiff[1]+tdiff[0]);
	printf("Host     (*total*):  %9.1f msec  (@ %03d MHz)\n"   , tdiff[3], aMHz);


#ifdef __DUMP_MATRICES__
	printf("\n\n\n");
	printf("A[][] = \n");
	matprt(Mailbox.A, _Smtx);
	printf("B[][] = \n");
	matprt(Mailbox.B, _Smtx);
	printf("C[][] = \n");
	matprt(Mailbox.C, _Smtx);
	printf("Cref[][] = \n");
	matprt(Cref, _Smtx);

	int i, j;
	for (i=0; i<_Nside; i++)
		for (j=0; j<_Nside; j++)
		{
			e_read(pEpiphany, i, j, 0x2000+0*sizeof(float), &Aepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x2000+2*sizeof(float), &Aepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x4000+0*sizeof(float), &Bepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x4000+2*sizeof(float), &Bepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float));
		}
	printf("Aepi[][] = \n");
	matprt(Aepi, _Smtx);
	printf("Bepi[][] = \n");
	matprt(Bepi, _Smtx);
#endif

	printf("\n* * *   EPIPHANY FTW !!!   * * *\n");


	// Close connection to device
	if (e_close(pEpiphany))
	{
		printf("\nERROR: Can't close connection to Epiphany device!\n\n");
		exit(1);
	}
	if (e_free(pDRAM))
	{
		printf("\nERROR: Can't release Epiphany DRAM!\n\n");
		exit(1);
	}

	e_finalize();

	return rerval;
}
Beispiel #3
0
int main(int argc, char *argv[])
{
	p_mem_t shared_mem, results_mem;
	uint32_t eram_base;
	char results[1024] = { '\0' };
	int device_cols, device_rows, nside;
	p_dev_t dev;
	p_prog_t prog;
	p_team_t team;
	p_coords_t size;
	p_coords_t start = { .row = 0, .col = 0 };

	unsigned int msize;
	float        seed;
	unsigned int addr; //, clocks;
	size_t       sz;
	int          verbose=0;
	double       tdiff[3];
	int          result, retval = 0;

	msize     = 0x00400000;

	get_args(argc, argv);

	fo = stderr;
	fi = stdin;
	printf( "------------------------------------------------------------\n");
	printf( "Calculating:   C[%d][%d] = A[%d][%d] * B[%d][%d]\n", _Smtx, _Smtx, _Smtx, _Smtx, _Smtx, _Smtx);
	seed = 0.0;
	if(verbose){
	  printf( "Seed = %f\n", seed);
	}

	dev = p_init(P_DEV_EPIPHANY, 0);
	if (p_error(dev)) {
		fprintf(stderr, "Error initializing PAL\n");
		return p_error(dev);
	}

	device_cols = p_query(dev, P_PROP_COLS);
	device_rows = p_query(dev, P_PROP_ROWS);

	// Use min size
	nside = device_cols > device_rows ? device_cols : device_rows;

	if (nside < 4) {
		fprintf(stderr, "Error: Too small device, need at least 4x4\n");
		return 1;
	}

	// Either 1024, 256, 64, or 16 cores (side must be power of two),
	nside = nside >= 32 ? 32 : nside >= 16 ? 16 : nside >= 8 ? 8 : 4;

	size.row = nside;
	size.col = nside;
	team = p_open4(dev, P_TOPOLOGY_2D, &start, &size);
	printf("Using team of size %d\n", p_team_size(team));
	if (p_error(team)) {
		fprintf(stderr, "Error opening team\n");
		return p_error(team);
	}

	prog = p_load(dev, ar.elfFile, 0);

	eram_base = (unsigned) p_query(dev, P_PROP_MEMBASE);
	shared_mem = p_map(dev, eram_base, msize);

	// Clear mailbox contents
	memset(&Mailbox, 0, sizeof(Mailbox));
	p_write(&shared_mem, &Mailbox, 0, sizeof(Mailbox), 0);

	// Generate operand matrices based on a provided seed
	matrix_init((int)seed);

#ifdef __WIPE_OUT_RESULT_MATRIX__
	// Wipe-out any previous remains in result matrix (for verification)
	addr = offsetof(shared_buf_t, C[0]);
	sz = sizeof(Mailbox.C);
	if(verbose){
	  printf( "Writing C[%uB] to address %08x...\n", (unsigned) sz, addr);
	}
	p_write(&shared_mem, (void *) Mailbox.C, addr, sz, 0);
#endif

	/* Wallclock time */
	clock_gettime(CLOCK_MONOTONIC, &timer[0]);
	/* Clock CPUTIME too. We don't want to indicate failure just
	 * because the system was under high load. */
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[4]);

	// Copy operand matrices to Epiphany system
	addr = offsetof(shared_buf_t, A[0]);
	sz = sizeof(Mailbox.A);
	if(verbose){
	  printf( "Writing A[%uB] to address %08x...\n", (unsigned) sz, addr);
	}
	p_write(&shared_mem, (void *) Mailbox.A, addr, sz, 0);

	addr = offsetof(shared_buf_t, B[0]);
	sz = sizeof(Mailbox.B);
	if(verbose){
	  printf( "Writing B[%uB] to address %08x...\n", (unsigned) sz, addr);
	}
	p_write(&shared_mem, (void *) Mailbox.B, addr, sz, 0);
	// Call the Epiphany matmul() function

	if(verbose){
	  printf( "GO Epiphany! ...   ");
	}
	if(verbose){
	  printf("Loading program on Epiphany chip...\n");
	}

	p_arg_t args[] = { &nside, sizeof(nside), true };
	if (p_run(prog, "matmul", team, 0, p_team_size(team), 1, args, 0)) {
		fprintf(stderr, "Error loading Epiphany program.\n");
		exit(1);
	}

	// Read result matrix and timing
	addr = offsetof(shared_buf_t, C[0]);
	sz = sizeof(Mailbox.C);
	if(verbose){
	  printf( "Reading result from address %08x...\n", addr);
	}
	p_read(&shared_mem, (void *) Mailbox.C, addr, sz, 0);

	clock_gettime(CLOCK_MONOTONIC, &timer[1]);
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[5]);


	// Calculate a reference result
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[2]);
#ifndef __DO_STRASSEN__
	matmul(Mailbox.A, Mailbox.B, Cref, _Smtx);
#else
	matmul_strassen(Mailbox.A, Mailbox.B, Cref, _Smtx);
#endif
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[3]);
	addr = offsetof(shared_buf_t, core.clocks);
	sz = sizeof(Mailbox.core.clocks);
	if(verbose){
	  printf( "Reading time from address %08x...\n", addr);
	}
	p_read(&shared_mem, &Mailbox.core.clocks, addr, sizeof(Mailbox.core.clocks), 0);
//	clocks = Mailbox.core.clocks;





	// Calculate the difference between the Epiphany result and the reference result
	matsub(Mailbox.C, Cref, Cdiff, _Smtx);

	tdiff[0] = (timer[1].tv_sec - timer[0].tv_sec) * 1000 + ((double) (timer[1].tv_nsec - timer[0].tv_nsec) / 1000000.0);
//	tdiff[0] = ((double) clocks) / eMHz * 1000;
	tdiff[1] = (timer[3].tv_sec - timer[2].tv_sec) * 1000 + ((double) (timer[3].tv_nsec - timer[2].tv_nsec) / 1000000.0);
	tdiff[2] = (timer[5].tv_sec - timer[4].tv_sec) * 1000 + ((double) (timer[5].tv_nsec - timer[4].tv_nsec) / 1000000.0);


	// If the difference is 0, then the matrices are identical and the
	// calculation was correct
	if (iszero(Cdiff, _Smtx))
	  {

	    printf( "Epiphany(time) %9.1f msec  (@ %03d MHz)\n", tdiff[0], eMHz);
	    printf( "Host(time)     %9.1f msec  (@ %03d MHz)\n", tdiff[1], aMHz);
	    printf( "------------------------------------------------------------\n");
	    printf( "TEST \"matmul-16\" PASSED\n");
	    retval = 0;
	} else {
	  printf( "\n\nERROR: C_epiphany is different from C_host !!!\n");
	  printf( "TEST \"matmul-16\" FAILED\n");
	  retval = 1;
	}

#if 0
#ifdef __DUMP_MATRICES__
	printf( "\n\n\n");
	printf( "A[][] = \n");
	matprt(Mailbox.A, _Smtx);
	printf( "B[][] = \n");
	matprt(Mailbox.B, _Smtx);
	printf( "C[][] = \n");
	matprt(Mailbox.C, _Smtx);
	printf( "Cref[][] = \n");
	matprt(Cref, _Smtx);

	int i, j;
	for (i=0; i<_Nside; i++)
		for (j=0; j<_Nside; j++)
		{
			e_read(pEpiphany, i, j, 0x2000+0*sizeof(float), &Aepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x2000+2*sizeof(float), &Aepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x4000+0*sizeof(float), &Bepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x4000+2*sizeof(float), &Bepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float));
		}
	printf( "Aepi[][] = \n");
	matprt(Aepi, _Smtx);
	printf( "Bepi[][] = \n");
	matprt(Bepi, _Smtx);
#endif
#endif



	// p_unmap ...
	p_close(team);
	p_finalize(dev);

	return retval;
}


// Initialize operand matrices
void matrix_init(int seed)
{
	int i, j, p;

	p = 0;
	for (i=0; i<_Smtx; i++)
		for (j=0; j<_Smtx; j++)
			Mailbox.A[p++] = (i + j + seed) % _MAX_MEMBER_;

	p = 0;
	for (i=0; i<_Smtx; i++)
		for (j=0; j<_Smtx; j++)
			Mailbox.B[p++] = ((i + j) * 2 + seed) % _MAX_MEMBER_;

	p = 0;
	for (i=0; i<_Smtx; i++)
		for (j=0; j<_Smtx; j++)
			Mailbox.C[p++] = 0x8dead;

	return;
}