示例#1
0
int main(void)
{
	unsigned *a, *b, *c, *size;
	unsigned i, j;
	int      coreid;
	unsigned *src, *dest;
	unsigned row, col;
	
	coreid = e_get_coreid() ^ 0x0c3; // Copy to the opposite core in chip
	src    = (unsigned *) 0x6000;
	dest   = (void *) ((coreid<<20) | 0x6000);
	
	size = (unsigned *) 0x1e00;
	a    = (unsigned *) 0x2000;
	b    = (unsigned *) 0x4000;
	c    = (unsigned *) 0x6000;
	
	// Doing convolution to generate busy level
	for (i=0; i<(*size); i++)
	{
		for (j=0; j<=i; j++)
		{
			c[i] += a[i-j] * b[j];
		}
	}
	
	for(i=0; i<100000; i++)
	{
		e_dma_copy(dest, src, *size);
	}
	
	// clear the IMASK
	e_irq_mask(E_SYNC, E_FALSE);
	
	// enable the global interrupt
	e_irq_global_mask(E_FALSE);

	__asm__ __volatile__("idle");

	return EXIT_SUCCESS;
}
int main(void)
{
	unsigned mesh_reg;
	unsigned mesh_reg_modify;
	unsigned time_c, time_p;
	unsigned time;
	unsigned tran,k,i,j,h,m,n,q;
	unsigned *mailbox, *mode;
	unsigned *commander;
	unsigned *counter;
	unsigned *master, *slave, *p;
	unsigned *row, *col;
	unsigned *n_row, *n_col;
	unsigned *neighbour0, *neighbour1, *neighbour2, *neighbour3;
	
	row = (unsigned *)0x5000;
	col = (unsigned *)0x5004;
	n_row = (unsigned *)0x5008;
	n_col = (unsigned *)0x500c;
	master = (unsigned *)0x2000;
	p =(unsigned *) 0x2000;
	slave = (unsigned *) e_get_global_address(*row, *col, p);
	commander = (unsigned *)0x5100;
	p = (unsigned *) 0x5300;
	counter = (unsigned *) e_get_global_address(*row, *col, p);
	mailbox = (unsigned *)0x6000;
	mode = (unsigned *)0x5400;
	tran = 2048;
	
	// Core number 
	k = (e_group_config.core_row)*e_group_config.group_cols + (e_group_config.core_col);
	
	// Broadcast to all the other neighbours
	p = (unsigned *)0x5100;
	e_neighbor_id(E_PREV_CORE, E_ROW_WRAP, n_row, n_col);
	neighbour0 = (unsigned *) e_get_global_address(*n_row, *n_col, p) ;
	
	e_neighbor_id(E_NEXT_CORE, E_ROW_WRAP, n_row, n_col);
	neighbour1 = (unsigned *) e_get_global_address(*n_row, *n_col, p) ;
	
	e_neighbor_id(E_PREV_CORE, E_COL_WRAP, n_row, n_col);
	neighbour2 = (unsigned *) e_get_global_address(*n_row, *n_col, p) ;
	
	e_neighbor_id(E_NEXT_CORE, E_COL_WRAP, n_row, n_col);
	neighbour3 = (unsigned *) e_get_global_address(*n_row, *n_col, p) ;
	
	// Initialize master and slave
	for(i=0; i<tran; i++)
	{
		master[i] = 0xdeadbee9;
		slave[i] = 0x00000000;
	}
	
	while(1)
	{
		//Clear the mode box
		mode[0] = 0xdeadbeef;	

		// Clear the start commander 
		commander[0] = 0x00000000;
		
		// Wait for the mesh event
		while(mode[0] == 0xdeadbeef)
		{};
		
		q = mode[0];
		mesh_reg = e_reg_read(E_REG_MESH_CONFIG);
		mesh_reg_modify = mesh_reg & 0xffffff0f;
		mesh_reg_modify = mesh_reg_modify |mesh_type[1][q]; 
		e_reg_write(E_REG_MESH_CONFIG, mesh_reg_modify);
		
		// Set the ctimer
		e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX) ;		
		
		// Waiting for the signal to start transfering
		while(commander[0] != 0xdeadbeef)
		{};
		
		// Start the ctimer and select the time type
		time_p = e_ctimer_start(E_CTIMER_0, E_CTIMER_MESH_0);
	
		// Broadcast to all the other neighbours
		neighbour0[0] = 0xdeadbeef;
		neighbour1[0] = 0xdeadbeef;
		neighbour2[0] = 0xdeadbeef;
		neighbour3[0] = 0xdeadbeef;
		
		e_dma_copy(slave, master, 0x2000);	
		
		// Wait for transfer finishing
		while(slave[2047] != 0xdeadbee9 )
		{};
		counter[k] = 1;
	
		// Get the time now
		time_c = e_ctimer_get(E_CTIMER_0);
	
		time = time_p - time_c;		
		
		// Output the result
		mailbox[q] = time;
		
		// Load the original value of E_REG_MESH_CONFIG
		e_reg_write(E_REG_MESH_CONFIG, mesh_reg);
	
		// Check if all the mesh events have been through
		if(q == 12)
		{
			break;
		}
	}
	return 0;
}
void __entry
stencil_thread( void* p )
{
	my_args_t* pargs = (my_args_t*)p;

	int i,j;
	int NI = pargs->ni;
	int NJ = pargs->nj;
	int di = pargs->di;
	int dj = pargs->dj;
	int niter = pargs->niter;
	float* A = pargs->A;
	float* B = pargs->B;
	float w0 = pargs->w0;
	float w1 = pargs->w1;
	float w2 = pargs->w2;
	float w3 = pargs->w3;
	float w4 = pargs->w4;

	int myrank_2d, mycoords[2];
	int dims[2] = {di, dj};
	int periods[2] = {1, 1}; // Periodic communication but ignoring edge copy where irrelvant

	MPI_Status status;
	MPI_Init(0,MPI_BUF_SIZE);

	MPI_Comm comm = MPI_COMM_THREAD;
	MPI_Comm comm_2d;
	MPI_Cart_create(comm, 2, dims, periods, 1, &comm_2d);
	MPI_Comm_rank(comm_2d, &myrank_2d);
	MPI_Cart_coords(comm_2d, myrank_2d, 2, mycoords);

	int x = mycoords[0];
	int y = mycoords[1];

	// ranks of neighbors
	int north, south, west, east;
	MPI_Cart_shift(comm_2d, 0, 1, &west, &east);
	MPI_Cart_shift(comm_2d, 1, 1, &north, &south);

	// local stencil sizes with padding
	int ni = (NI-2) / di + 2;
	int nj = (NJ-2) / dj + 2;

	// Load the initial values
	void* memfree = coprthr_tls_sbrk(0);
	float* a = (float*)coprthr_tls_sbrk(ni*nj*sizeof(float));
	float* b = (float*)coprthr_tls_sbrk(ni*nj*sizeof(float));
	float* nsbuf = (float*)coprthr_tls_sbrk(ni*sizeof(float));
	float* webuf = (float*)coprthr_tls_sbrk((nj-2)*sizeof(float));
	long long* srcadr;
	long long* dstadr;
	long long* nsend = (long long*)(nsbuf + ni);

	// Copy initial conditions (2D DMA would be better)
	for (j=0; j<nj; j++) e_dma_copy(a+j*ni, A + (y*(ni-2)+j)*NI+x*(nj-2), ni*sizeof(float));

	// Initial conditions
//	if(y==0) for (i=0; i<ni-2; i++) a[i] = -2.0f;
//	if(y==dj) for (i=0; i<ni-2; i++) a[(nj-1)*ni+i] = 1.0f;
//	if(x==di) for (j=0; j<nj-2; j++) a[(j+2)*ni-1] = -1.0f;
//	if(x==0) for (j=0; j<nj-2; j++) a[(j+1)*ni] = 2.0f;

	// Copy "a" into "b" (only need fixed borders would be better)
	for (i=0; i<ni*nj; i++) b[i] = a[i];

	while (niter--) {

/*		for (j=1; j<nj-1; j++) {
			for (i=1; i<ni-1; i++) {
				b[j*ni+i] = w0*a[j*ni+i-1] + w1*a[j*ni+i] + w2*a[j*ni+i+1] + w3*a[j*ni+i-ni] + w4*a[j*ni+i+ni];
			}
		}*/

		for (j=0; j<nj-2; j+=4)
		{
			float a14 = a[(j+1)*ni+0];
			float a15 = a[(j+1)*ni+1];
			float a24 = a[(j+2)*ni+0];
			float a25 = a[(j+2)*ni+1];
			float a34 = a[(j+3)*ni+0];
			float a35 = a[(j+3)*ni+1];
			float a44 = a[(j+4)*ni+0];
			float a45 = a[(j+4)*ni+1];
			for (i=0; i<ni-2; i+=4)
			{
				float a01 = a[(j+0)*ni+i+1];
				float a02 = a[(j+0)*ni+i+2];
				float a03 = a[(j+0)*ni+i+3];
				float a04 = a[(j+0)*ni+i+4];
				float a10 = a14;
				float a11 = a15;
				float a12 = a[(j+1)*ni+i+2];
				float a13 = a[(j+1)*ni+i+3];
				a14 = a[(j+1)*ni+i+4];
				a15 = a[(j+1)*ni+i+5];
				float a20 = a24;
				float a21 = a25;
				float a22 = a[(j+2)*ni+i+2];
				float a23 = a[(j+2)*ni+i+3];
				a24 = a[(j+2)*ni+i+4];
				a25 = a[(j+2)*ni+i+5];
				float a30 = a34;
				float a31 = a35;
				float a32 = a[(j+3)*ni+i+2];
				float a33 = a[(j+3)*ni+i+3];
				a34 = a[(j+3)*ni+i+4];
				a35 = a[(j+3)*ni+i+5];
				float a40 = a44;
				float a41 = a45;
				float a42 = a[(j+4)*ni+i+2];
				float a43 = a[(j+4)*ni+i+3];
				a44 = a[(j+4)*ni+i+4];
				a45 = a[(j+4)*ni+i+5];
				float a51 = a[(j+5)*ni+i+1];
				float a52 = a[(j+5)*ni+i+2];
				float a53 = a[(j+5)*ni+i+3];
				float a54 = a[(j+5)*ni+i+4];

				b[(j+1)*ni+i+1] = fma(w4,a21,fma(w3,a01,fma(w2,a12,fma(w1,a11,w0*a10))));
				b[(j+1)*ni+i+2] = fma(w4,a22,fma(w3,a02,fma(w2,a13,fma(w1,a12,w0*a11))));
				b[(j+1)*ni+i+3] = fma(w4,a23,fma(w3,a03,fma(w2,a14,fma(w1,a13,w0*a12))));
				b[(j+1)*ni+i+4] = fma(w4,a24,fma(w3,a04,fma(w2,a15,fma(w1,a14,w0*a13))));
				b[(j+2)*ni+i+1] = fma(w4,a31,fma(w3,a11,fma(w2,a22,fma(w1,a21,w0*a20))));
				b[(j+2)*ni+i+2] = fma(w4,a32,fma(w3,a12,fma(w2,a23,fma(w1,a22,w0*a21))));
				b[(j+2)*ni+i+3] = fma(w4,a33,fma(w3,a13,fma(w2,a24,fma(w1,a23,w0*a22))));
				b[(j+2)*ni+i+4] = fma(w4,a34,fma(w3,a14,fma(w2,a25,fma(w1,a24,w0*a23))));
				b[(j+3)*ni+i+1] = fma(w4,a41,fma(w3,a21,fma(w2,a32,fma(w1,a31,w0*a30))));
				b[(j+3)*ni+i+2] = fma(w4,a42,fma(w3,a22,fma(w2,a33,fma(w1,a32,w0*a31))));
				b[(j+3)*ni+i+3] = fma(w4,a43,fma(w3,a23,fma(w2,a34,fma(w1,a33,w0*a32))));
				b[(j+3)*ni+i+4] = fma(w4,a44,fma(w3,a24,fma(w2,a35,fma(w1,a34,w0*a33))));
				b[(j+4)*ni+i+1] = fma(w4,a51,fma(w3,a31,fma(w2,a42,fma(w1,a41,w0*a40))));
				b[(j+4)*ni+i+2] = fma(w4,a52,fma(w3,a32,fma(w2,a43,fma(w1,a42,w0*a41))));
				b[(j+4)*ni+i+3] = fma(w4,a53,fma(w3,a33,fma(w2,a44,fma(w1,a43,w0*a42))));
				b[(j+4)*ni+i+4] = fma(w4,a54,fma(w3,a34,fma(w2,a45,fma(w1,a44,w0*a43))));

			}
		}

		// north/south
		dstadr = (long long*)nsbuf;
		srcadr = (long long*)(b+ni);
		while (dstadr != nsend) *dstadr++ = *srcadr++; // second row
		MPI_Sendrecv_replace(nsbuf, ni, MPI_FLOAT, north, 1, south, 1, comm, &status);
		if (y!=dj-1) {
			dstadr = (long long*)(b+(nj-1)*ni);
			srcadr = (long long*)nsbuf;
			while (srcadr != nsend) *dstadr++ = *srcadr++; // last row
		}
		dstadr = (long long*)nsbuf;
		srcadr = (long long*)(b+(nj-2)*ni);
		while (dstadr != nsend) *dstadr++ = *srcadr++; // second to last row
		MPI_Sendrecv_replace(nsbuf, ni, MPI_FLOAT, south, 1, north, 1, comm, &status);
		if (y) {
			dstadr = (long long*)b;
			srcadr = (long long*)nsbuf;
			while (srcadr != nsend) *dstadr++ = *srcadr++; // first row
		}

		// west/east
		for (j=0; j<nj-2; j++) webuf[j] = b[(j+1)*ni+1]; // second column
		MPI_Sendrecv_replace(webuf, nj-2, MPI_FLOAT, west, 1, east, 1, comm, &status);
		if (x!=di-1) for (j=0; j<nj-2; j++) b[(j+2)*ni-1] = webuf[j]; // last column
		for (j=0; j<nj-2; j++) webuf[j] = b[(j+2)*ni-2]; // second to last column
		MPI_Sendrecv_replace(webuf, nj-2, MPI_FLOAT, east, 1, west, 1, comm, &status);
		if (x) for (j=0; j<nj-2; j++) b[(j+1)*ni] = webuf[j]; // first column

		float* tmp = b;
		b = a;
		a = tmp;
	}

	// Copy internal results
	for (j=1; j<nj-1; j++) e_dma_copy(B + (y*(ni-2)+j)*NI+x*(nj-2)+1, a+j*ni+1, (ni-2)*sizeof(float));

	coprthr_tls_brk(memfree);

	MPI_Finalize();
}
示例#4
0
	__kernel void
nbody_thread( void* p )
{
	my_args_t* pargs = (my_args_t*)p;

	int n = pargs->n;
	int cnt = pargs->cnt;
	unsigned int s_x, s_y, s_z, s_m;
	unsigned int page = 0;
	float dt = pargs->dt;
	float es = pargs->es;
	Particle *particles = pargs->p;
	ParticleV *state = pargs->v;

	int rank, size, npart, i;
	int left, right;

	MPI_Status status;
	MPI_Init(0,MPI_BUF_SIZE);
	MPI_Comm comm = MPI_COMM_THREAD;
	MPI_Comm_rank(comm, &rank);
	MPI_Comm_size(comm, &size);
	MPI_Cart_shift(comm, 0, 1, &left, &right);

	npart = n / size;

	void* memfree = coprthr_tls_sbrk(0);
	Particle* my_particles = (Particle*)coprthr_tls_sbrk(npart*sizeof(Particle));
	ParticleV* my_state = (ParticleV*)coprthr_tls_sbrk(npart*sizeof(ParticleV));
	Particle* sendbuf = (Particle*)coprthr_tls_sbrk(npart*sizeof(Particle));

	e_dma_copy(my_particles, particles + npart*rank, npart*sizeof(Particle));
	e_dma_copy(my_state, state + npart*rank, npart*sizeof(ParticleV));
	unsigned int rgba_black = 0x00000000;
	unsigned int rgba_white = 0x00ffffff;

	while (cnt--) {

		for (i=0; i<npart; i++) sendbuf[i] = my_particles[i];

		for (i=0; i<size; i++) {
			if (i) MPI_Sendrecv_replace(sendbuf, sizeof(Particle)/sizeof(float)*npart, MPI_FLOAT, left, 1, right, 1, comm, &status);
			ComputeAccel(my_particles, sendbuf, my_state, npart, es);
		}
		e_dma_copy(particles + npart*rank, my_particles, npart*sizeof(Particle));
		ComputeNewPos(my_particles, my_state, npart, dt);

		for(i = 0; i < npart; i++){
			s_x = (int) particles[i + npart*rank].x;
			s_y = (int) particles[i + npart*rank].y;
			if(s_x >= 0 && s_x < pargs->fbinfo.xres_virtual && s_y >= 0 && s_y < pargs->fbinfo.yres_virtual){
				e_dma_copy((char *) pargs->fbinfo.smem_start + (s_y * pargs->fbinfo.line_length) + (s_x * BPP), (char *) &rgba_black, 1 * BPP);
			}
			s_x = (int) my_particles[i].x;
			s_y = (int) my_particles[i].y;
			if(cnt  > 1 && s_x >= 0 && s_x < pargs->fbinfo.xres_virtual && s_y >= 0 && s_y < pargs->fbinfo.yres_virtual){
				e_dma_copy((char *) pargs->fbinfo.smem_start + (s_y * pargs->fbinfo.line_length) + (s_x * BPP), (char *) &rgba_white, 1 * BPP);
			}
		}

	}

	coprthr_tls_brk(memfree);

	MPI_Finalize();
}
示例#5
0
int main(void)
{
	unsigned time_c, time_p;
	unsigned time;
	unsigned tran,k,i,j,q,h,m,n;
	unsigned *commander;
	unsigned *n_row, *n_col, *p, *nei_row, *nei_col;
	unsigned *neighbour, *neighbour0;
	unsigned *master;
	unsigned *counter;
	
	// Define the mailbox
	master = (unsigned *)0x2000;
	n_row = (unsigned *)0x6000;
	n_col = (unsigned *)0x6004;
	neighbour0 = (unsigned *)0x6008;
	nei_row = (unsigned *) 0x600c;
	nei_col = (unsigned *) 0x6010;
	p =(unsigned *) 0x2000;
	commander = (unsigned *)0x6100;
	counter = (unsigned *)0x80806300;
	tran = 2048;
	
	// Get the neighbour global address
	e_neighbor_id(E_PREV_CORE, E_ROW_WRAP, n_row, n_col);
	neighbour = (unsigned *) e_get_global_address(*n_row, *n_col, p) ;
	k = (*n_row)*e_group_config.group_cols + (*n_col);
	
	commander[0] = 0x00000000;
	
	// Broadcast to the next core
	p = (unsigned *)0x6100;
	e_neighbor_id(E_NEXT_CORE, E_COL_WRAP, nei_row, nei_col);
	neighbour0 = (unsigned *) e_get_global_address(*nei_row, *nei_col, p) ;
	
	// Initialize master and slave
	for(i=0; i<tran; i++)
	{
		master[i] = 0xdeadbee1;
		neighbour[i] = 0x00000000;
	}
			
	// Waiting for the signal to start transfering
	while(commander[0] != (unsigned) 0xdeadbeef)
	{};
	
	// Broadcast the signal to neighbour
	neighbour0[0] = 0xdeadbeef;
	
	// Write to all neighbour cores
	e_dma_copy(neighbour, master, 0x2000);	
	
	while(1)
	{
		if(neighbour[2047] == 0xdeadbee1)
		{
			counter[e_group_config.core_row * (e_group_config.group_cols/2) + e_group_config.core_col] = 1;
			break;
		}
	}
		
	return 0;
}
示例#6
0
__kernel void
dla_thread( void* p )
{
    my_args_t* pargs = (my_args_t*)p;
    int baseColor;
    int i, n, ix, iy, ixnew, iynew;
    int found;
    int R = 10;
    int R2 = 5;
    int seq;
    int row1[3], row2[3], row3[3];
    int rank;
    int x_half = pargs->fbinfo.xres_virtual / 2;
    int y_half = pargs->fbinfo.yres_virtual / 2;
    float angle, mySin, myCos, inv_rand_max = 1.0f / (float) 32767;


    MPI_Status status;
    MPI_Init(0,MPI_BUF_SIZE);
    MPI_Comm comm = MPI_COMM_THREAD;
    MPI_Comm_rank(comm, &rank);

    baseColor = 0x00ffffff;

    void* memfree = coprthr_tls_sbrk(0);
    fast_srand(getpid());
    seq = 1;
    if(rank == 0) {
        e_dma_copy((char *) pargs->fbinfo.smem_start + (y_half * pargs->fbinfo.line_length) + ((x_half + pargs->offset) * BPP), (char *) &baseColor, 1 * BPP);
    }
    for(n = 0; n < pargs->n; n++) {
        angle = ((2.0f * ((float) fastrand()) * inv_rand_max) - 1.0f) * 3.14159265f;
        if(angle < 0.0f) {
            mySin = 1.275323954f * angle + 0.405284735f * angle * angle;
            if(mySin < 0)
                mySin = 0.225f * (mySin * -mySin - mySin) + mySin;
            else
                mySin = 0.225f * (mySin * mySin - mySin) + mySin;
        }
        else {
            mySin = 1.275323954f * angle - 0.405284735f * angle * angle;
            if(mySin < 0)
                mySin = 0.225f * (mySin * -mySin - mySin) + mySin;
            else
                mySin = 0.225f * (mySin * mySin - mySin) + mySin;
        }
        angle += 1.57079632f;
        if(angle > 3.14159265f)
            angle -= 6.28318531f;
        if(angle < 0.0f) {
            myCos = 1.275323954f * angle + 0.405284735f * angle * angle;
            if(myCos < 0)
                myCos = 0.225f * (myCos * -myCos - myCos) + myCos;
            else
                myCos = 0.225f * (myCos * myCos - myCos) + myCos;
        }
        else {
            myCos = 1.275323954f * angle - 0.405284735f * angle * angle;
            if(myCos < 0)
                myCos = 0.225f * (myCos * -myCos - myCos) + myCos;
            else
                myCos = 0.225f * (myCos * myCos - myCos) + myCos;
        }
        ix = ((x_half + pargs->offset) + ((R2 - 2) * myCos));
        iy = (y_half + ((R2 - 2) * mySin));

        while(1) {
            ixnew = ix + (fastrand() % 3) - 1;
            if(ixnew > (x_half + pargs->offset) - R2 && ixnew < (x_half + pargs->offset) + R2) {
                ix = ixnew;
            }
            else {
                continue;
            }
            iynew = iy + (fastrand() % 3) - 1;
            if(iynew > y_half - R2 && iynew < y_half + R2) {
                iy = iynew;
            }
            else {
                continue;
            }
            found = 0;
            e_dma_copy(row1, (char *) pargs->fbinfo.smem_start + ((iy - 1) * pargs->fbinfo.line_length) + ((ix - 1) * BPP), 3 * BPP);
            e_dma_copy(row2, (char *) pargs->fbinfo.smem_start + ((iy) * pargs->fbinfo.line_length) + ((ix - 1) * BPP), 3 * BPP);
            e_dma_copy(row3, (char *) pargs->fbinfo.smem_start + ((iy + 1) * pargs->fbinfo.line_length) + ((ix - 1) * BPP), 3 * BPP);
            if(row1[0] != pargs->fbinfo.emptyPixVal || row2[0] != pargs->fbinfo.emptyPixVal || row3[0] != pargs->fbinfo.emptyPixVal) {
                found = 1;
                break;
            }
            if(row1[1] != pargs->fbinfo.emptyPixVal || row2[1] != pargs->fbinfo.emptyPixVal || row3[1] != pargs->fbinfo.emptyPixVal) {
                found = 1;
                break;
            }
            if(row1[2] != pargs->fbinfo.emptyPixVal || row2[2] != pargs->fbinfo.emptyPixVal || row3[2] != pargs->fbinfo.emptyPixVal) {
                found = 1;
                break;
            }
        }
        e_dma_copy((char *) pargs->fbinfo.smem_start + (iy * pargs->fbinfo.line_length) + (ix * BPP), (char *) &baseColor, 1 * BPP);

        if((ix - 3 <= ((x_half + pargs->offset) - R2) || (ix + 3 >= (x_half + pargs->offset) + R2) || (iy - 3 <= y_half - R2) || (iy + 3 >= y_half + R2))) {
            R += 4;
            R2 += 2;
            if(seq == 1) {
                baseColor -= (pargs->color == 'R' ? 0x00020000 : 0x00030000);
            }
            else if(seq == 2) {
                baseColor -= (pargs->color == 'B' ? 0x00000002 : 0x00000003);
            }
            else {
                baseColor -= (pargs->color == 'G' ? 0x00000200 : 0x00000300);
            }
            seq++;
            if(seq > 3)
                seq = 1;
        }
    }

    coprthr_tls_brk(memfree);

    MPI_Finalize();
    return 0;
}
示例#7
0
int main()
{
  msg_t *m = (msg_t *) 0x4000;
  uint64_t d64;
  uint32_t d32;
  uint16_t d16;
  uint8_t  d8;
  unsigned int dt;
  void *da, *dua, *dc;
  for (unsigned int row = 0; row < E_ROWS; row++) {
    for (unsigned int col = 0; col < E_COLS; col++) {
      if (row == 0 && col == 0) continue;
      unsigned int tcore = row * E_COLS + col;

      da = e_get_global_address(row, col, (void *) MEM_ALINEADA);
      dua = e_get_global_address(row, col, (void *) MEM_NO_ALINEADA);
      dc = e_get_global_address(row, col, (void *) MEM_A_CABALLO);

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d64, da, sizeof(d64));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].t64 = (dt - DUMMY_WAIT) / (double) VECES;

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d32, da, sizeof(d32));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].t32 = (dt - DUMMY_WAIT) / (double) VECES;

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d16, da, sizeof(d16));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].t16 = (dt - DUMMY_WAIT) / (double) VECES;

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d8, da, sizeof(d8));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].t8 = (dt - DUMMY_WAIT) / (double) VECES;

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d64, dua, sizeof(d64));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].ua64 = (dt - DUMMY_WAIT) / (double) VECES;

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d32, dua, sizeof(d32));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].ua32 = (dt - DUMMY_WAIT) / (double) VECES;

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d16, dua, sizeof(d16));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].ua16 = (dt - DUMMY_WAIT) / (double) VECES;

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d8, dua, sizeof(d8));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].ua8 = (dt - DUMMY_WAIT) / (double) VECES;

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d64, dua, sizeof(d64));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].c64 = (dt - DUMMY_WAIT) / (double) VECES;

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d32, dc, sizeof(d32));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].c32 = (dt - DUMMY_WAIT) / (double) VECES;

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d16, dc, sizeof(d16));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].c16 = (dt - DUMMY_WAIT) / (double) VECES;

      e_dma_wait(E_DMA_1);
      e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX);
      e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
      for (int i = 0; i < VECES; i++)
        e_dma_copy(&d8, dc, sizeof(d8));
      e_dma_wait(E_DMA_1);
      e_wait(E_CTIMER_1, DUMMY_WAIT);
      e_ctimer_stop(E_CTIMER_0);
      dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0);
      m->ticks[tcore].c8 = (dt - DUMMY_WAIT) / (double) VECES;
    }
  }
  m->finalizado = E_TRUE;

  return 0;
}
int main(void)
{
	unsigned time_c, time_p;
	unsigned time;
	unsigned tran,k,i,j,q,h,m,n;
	unsigned *box;
	unsigned *n_row, *n_col, *p;
	unsigned *neighbour_n;
	unsigned *neighbour_s;
	unsigned *neighbour_w;
	unsigned *neighbour_e;
	unsigned *master;
	
	// Define the mailbox
	master = (unsigned *)0x2000;
	box = (unsigned *) 0x5000;
	n_row = (unsigned *)0x6000;
	n_col = (unsigned *)0x6004;
	tran = 2048;
	p =(unsigned *) 0x2000;

	// Get the neighbour global address
	e_neighbor_id(E_NEXT_CORE, E_ROW_WRAP, n_row, n_col);
	neighbour_e = (unsigned *) e_get_global_address(*n_row, *n_col, p) ;
	
	e_neighbor_id(E_PREV_CORE, E_ROW_WRAP, n_row, n_col);
	neighbour_w = (unsigned *) e_get_global_address(*n_row, *n_col, p) ;
	
	e_neighbor_id(E_NEXT_CORE, E_COL_WRAP, n_row, n_col);
	neighbour_s = (unsigned *) e_get_global_address(*n_row, *n_col, p) ;
	
	e_neighbor_id(E_PREV_CORE, E_COL_WRAP, n_row, n_col);
	neighbour_n = (unsigned *) e_get_global_address(*n_row, *n_col, p) ;	
	
	// Test the writing bandwidth	
	// Initialize master and slave
	for(i=0; i<tran; i++)
	{
		master[i] = 0xdeadbeef;
		neighbour_e[i] = 0x00000000;
		neighbour_w[i] = 0x00000000;
		neighbour_s[i] = 0x00000000;
		neighbour_n[i] = 0x00000000;
	}
			
	
	// Set the ctimer
	e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX) ;
	
	// Start the ctimer and select the time type
	time_p = e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
	
	// Write to all neighbour cores
	e_dma_copy(neighbour_e, master, 0x2000);	
	e_dma_copy(neighbour_w, master, 0x2000);
	e_dma_copy(neighbour_s, master, 0x2000);
	e_dma_copy(neighbour_n, master, 0x2000);
		
	// Get the time now
	time_c = e_ctimer_get(E_CTIMER_0);	
		
	time = time_p - time_c;
		
	// Output the result
	box[0] = time;	

	// Test the reading bandwidth
	// Initialize master and slave
	for(i=0; i<tran; i++)
	{
		master[i] = 0x00000000;
		neighbour_e[i] = 0xdeadbee1;
		neighbour_w[i] = 0xdeadbee2;
		neighbour_s[i] = 0xdeadbee3;
		neighbour_n[i] = 0xdeadbee4;
	}
			
	
	// Set the ctimer
	e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX) ;
	
	// Start the ctimer and select the time type
	time_p = e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK);
	
	// Read from all neighbour cores
	e_dma_copy(master, neighbour_e, 0x2000);	
	e_dma_copy(master, neighbour_w, 0x2000);
	e_dma_copy(master, neighbour_s, 0x2000);
	e_dma_copy(master, neighbour_n, 0x2000);
		
	// Get the time now
	time_c = e_ctimer_get(E_CTIMER_0);	
		
	time = time_p - time_c;
		
	// Output the result
	box[1] = time;	
	return 0;
}