int main(void) { unsigned *a, *b, *c, *size; unsigned i, j; int coreid; unsigned *src, *dest; unsigned row, col; coreid = e_get_coreid() ^ 0x0c3; // Copy to the opposite core in chip src = (unsigned *) 0x6000; dest = (void *) ((coreid<<20) | 0x6000); size = (unsigned *) 0x1e00; a = (unsigned *) 0x2000; b = (unsigned *) 0x4000; c = (unsigned *) 0x6000; // Doing convolution to generate busy level for (i=0; i<(*size); i++) { for (j=0; j<=i; j++) { c[i] += a[i-j] * b[j]; } } for(i=0; i<100000; i++) { e_dma_copy(dest, src, *size); } // clear the IMASK e_irq_mask(E_SYNC, E_FALSE); // enable the global interrupt e_irq_global_mask(E_FALSE); __asm__ __volatile__("idle"); return EXIT_SUCCESS; }
int main(void) { unsigned mesh_reg; unsigned mesh_reg_modify; unsigned time_c, time_p; unsigned time; unsigned tran,k,i,j,h,m,n,q; unsigned *mailbox, *mode; unsigned *commander; unsigned *counter; unsigned *master, *slave, *p; unsigned *row, *col; unsigned *n_row, *n_col; unsigned *neighbour0, *neighbour1, *neighbour2, *neighbour3; row = (unsigned *)0x5000; col = (unsigned *)0x5004; n_row = (unsigned *)0x5008; n_col = (unsigned *)0x500c; master = (unsigned *)0x2000; p =(unsigned *) 0x2000; slave = (unsigned *) e_get_global_address(*row, *col, p); commander = (unsigned *)0x5100; p = (unsigned *) 0x5300; counter = (unsigned *) e_get_global_address(*row, *col, p); mailbox = (unsigned *)0x6000; mode = (unsigned *)0x5400; tran = 2048; // Core number k = (e_group_config.core_row)*e_group_config.group_cols + (e_group_config.core_col); // Broadcast to all the other neighbours p = (unsigned *)0x5100; e_neighbor_id(E_PREV_CORE, E_ROW_WRAP, n_row, n_col); neighbour0 = (unsigned *) e_get_global_address(*n_row, *n_col, p) ; e_neighbor_id(E_NEXT_CORE, E_ROW_WRAP, n_row, n_col); neighbour1 = (unsigned *) e_get_global_address(*n_row, *n_col, p) ; e_neighbor_id(E_PREV_CORE, E_COL_WRAP, n_row, n_col); neighbour2 = (unsigned *) e_get_global_address(*n_row, *n_col, p) ; e_neighbor_id(E_NEXT_CORE, E_COL_WRAP, n_row, n_col); neighbour3 = (unsigned *) e_get_global_address(*n_row, *n_col, p) ; // Initialize master and slave for(i=0; i<tran; i++) { master[i] = 0xdeadbee9; slave[i] = 0x00000000; } while(1) { //Clear the mode box mode[0] = 0xdeadbeef; // Clear the start commander commander[0] = 0x00000000; // Wait for the mesh event while(mode[0] == 0xdeadbeef) {}; q = mode[0]; mesh_reg = e_reg_read(E_REG_MESH_CONFIG); mesh_reg_modify = mesh_reg & 0xffffff0f; mesh_reg_modify = mesh_reg_modify |mesh_type[1][q]; e_reg_write(E_REG_MESH_CONFIG, mesh_reg_modify); // Set the ctimer e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX) ; // Waiting for the signal to start transfering while(commander[0] != 0xdeadbeef) {}; // Start the ctimer and select the time type time_p = e_ctimer_start(E_CTIMER_0, E_CTIMER_MESH_0); // Broadcast to all the other neighbours neighbour0[0] = 0xdeadbeef; neighbour1[0] = 0xdeadbeef; neighbour2[0] = 0xdeadbeef; neighbour3[0] = 0xdeadbeef; e_dma_copy(slave, master, 0x2000); // Wait for transfer finishing while(slave[2047] != 0xdeadbee9 ) {}; counter[k] = 1; // Get the time now time_c = e_ctimer_get(E_CTIMER_0); time = time_p - time_c; // Output the result mailbox[q] = time; // Load the original value of E_REG_MESH_CONFIG e_reg_write(E_REG_MESH_CONFIG, mesh_reg); // Check if all the mesh events have been through if(q == 12) { break; } } return 0; }
void __entry stencil_thread( void* p ) { my_args_t* pargs = (my_args_t*)p; int i,j; int NI = pargs->ni; int NJ = pargs->nj; int di = pargs->di; int dj = pargs->dj; int niter = pargs->niter; float* A = pargs->A; float* B = pargs->B; float w0 = pargs->w0; float w1 = pargs->w1; float w2 = pargs->w2; float w3 = pargs->w3; float w4 = pargs->w4; int myrank_2d, mycoords[2]; int dims[2] = {di, dj}; int periods[2] = {1, 1}; // Periodic communication but ignoring edge copy where irrelvant MPI_Status status; MPI_Init(0,MPI_BUF_SIZE); MPI_Comm comm = MPI_COMM_THREAD; MPI_Comm comm_2d; MPI_Cart_create(comm, 2, dims, periods, 1, &comm_2d); MPI_Comm_rank(comm_2d, &myrank_2d); MPI_Cart_coords(comm_2d, myrank_2d, 2, mycoords); int x = mycoords[0]; int y = mycoords[1]; // ranks of neighbors int north, south, west, east; MPI_Cart_shift(comm_2d, 0, 1, &west, &east); MPI_Cart_shift(comm_2d, 1, 1, &north, &south); // local stencil sizes with padding int ni = (NI-2) / di + 2; int nj = (NJ-2) / dj + 2; // Load the initial values void* memfree = coprthr_tls_sbrk(0); float* a = (float*)coprthr_tls_sbrk(ni*nj*sizeof(float)); float* b = (float*)coprthr_tls_sbrk(ni*nj*sizeof(float)); float* nsbuf = (float*)coprthr_tls_sbrk(ni*sizeof(float)); float* webuf = (float*)coprthr_tls_sbrk((nj-2)*sizeof(float)); long long* srcadr; long long* dstadr; long long* nsend = (long long*)(nsbuf + ni); // Copy initial conditions (2D DMA would be better) for (j=0; j<nj; j++) e_dma_copy(a+j*ni, A + (y*(ni-2)+j)*NI+x*(nj-2), ni*sizeof(float)); // Initial conditions // if(y==0) for (i=0; i<ni-2; i++) a[i] = -2.0f; // if(y==dj) for (i=0; i<ni-2; i++) a[(nj-1)*ni+i] = 1.0f; // if(x==di) for (j=0; j<nj-2; j++) a[(j+2)*ni-1] = -1.0f; // if(x==0) for (j=0; j<nj-2; j++) a[(j+1)*ni] = 2.0f; // Copy "a" into "b" (only need fixed borders would be better) for (i=0; i<ni*nj; i++) b[i] = a[i]; while (niter--) { /* for (j=1; j<nj-1; j++) { for (i=1; i<ni-1; i++) { b[j*ni+i] = w0*a[j*ni+i-1] + w1*a[j*ni+i] + w2*a[j*ni+i+1] + w3*a[j*ni+i-ni] + w4*a[j*ni+i+ni]; } }*/ for (j=0; j<nj-2; j+=4) { float a14 = a[(j+1)*ni+0]; float a15 = a[(j+1)*ni+1]; float a24 = a[(j+2)*ni+0]; float a25 = a[(j+2)*ni+1]; float a34 = a[(j+3)*ni+0]; float a35 = a[(j+3)*ni+1]; float a44 = a[(j+4)*ni+0]; float a45 = a[(j+4)*ni+1]; for (i=0; i<ni-2; i+=4) { float a01 = a[(j+0)*ni+i+1]; float a02 = a[(j+0)*ni+i+2]; float a03 = a[(j+0)*ni+i+3]; float a04 = a[(j+0)*ni+i+4]; float a10 = a14; float a11 = a15; float a12 = a[(j+1)*ni+i+2]; float a13 = a[(j+1)*ni+i+3]; a14 = a[(j+1)*ni+i+4]; a15 = a[(j+1)*ni+i+5]; float a20 = a24; float a21 = a25; float a22 = a[(j+2)*ni+i+2]; float a23 = a[(j+2)*ni+i+3]; a24 = a[(j+2)*ni+i+4]; a25 = a[(j+2)*ni+i+5]; float a30 = a34; float a31 = a35; float a32 = a[(j+3)*ni+i+2]; float a33 = a[(j+3)*ni+i+3]; a34 = a[(j+3)*ni+i+4]; a35 = a[(j+3)*ni+i+5]; float a40 = a44; float a41 = a45; float a42 = a[(j+4)*ni+i+2]; float a43 = a[(j+4)*ni+i+3]; a44 = a[(j+4)*ni+i+4]; a45 = a[(j+4)*ni+i+5]; float a51 = a[(j+5)*ni+i+1]; float a52 = a[(j+5)*ni+i+2]; float a53 = a[(j+5)*ni+i+3]; float a54 = a[(j+5)*ni+i+4]; b[(j+1)*ni+i+1] = fma(w4,a21,fma(w3,a01,fma(w2,a12,fma(w1,a11,w0*a10)))); b[(j+1)*ni+i+2] = fma(w4,a22,fma(w3,a02,fma(w2,a13,fma(w1,a12,w0*a11)))); b[(j+1)*ni+i+3] = fma(w4,a23,fma(w3,a03,fma(w2,a14,fma(w1,a13,w0*a12)))); b[(j+1)*ni+i+4] = fma(w4,a24,fma(w3,a04,fma(w2,a15,fma(w1,a14,w0*a13)))); b[(j+2)*ni+i+1] = fma(w4,a31,fma(w3,a11,fma(w2,a22,fma(w1,a21,w0*a20)))); b[(j+2)*ni+i+2] = fma(w4,a32,fma(w3,a12,fma(w2,a23,fma(w1,a22,w0*a21)))); b[(j+2)*ni+i+3] = fma(w4,a33,fma(w3,a13,fma(w2,a24,fma(w1,a23,w0*a22)))); b[(j+2)*ni+i+4] = fma(w4,a34,fma(w3,a14,fma(w2,a25,fma(w1,a24,w0*a23)))); b[(j+3)*ni+i+1] = fma(w4,a41,fma(w3,a21,fma(w2,a32,fma(w1,a31,w0*a30)))); b[(j+3)*ni+i+2] = fma(w4,a42,fma(w3,a22,fma(w2,a33,fma(w1,a32,w0*a31)))); b[(j+3)*ni+i+3] = fma(w4,a43,fma(w3,a23,fma(w2,a34,fma(w1,a33,w0*a32)))); b[(j+3)*ni+i+4] = fma(w4,a44,fma(w3,a24,fma(w2,a35,fma(w1,a34,w0*a33)))); b[(j+4)*ni+i+1] = fma(w4,a51,fma(w3,a31,fma(w2,a42,fma(w1,a41,w0*a40)))); b[(j+4)*ni+i+2] = fma(w4,a52,fma(w3,a32,fma(w2,a43,fma(w1,a42,w0*a41)))); b[(j+4)*ni+i+3] = fma(w4,a53,fma(w3,a33,fma(w2,a44,fma(w1,a43,w0*a42)))); b[(j+4)*ni+i+4] = fma(w4,a54,fma(w3,a34,fma(w2,a45,fma(w1,a44,w0*a43)))); } } // north/south dstadr = (long long*)nsbuf; srcadr = (long long*)(b+ni); while (dstadr != nsend) *dstadr++ = *srcadr++; // second row MPI_Sendrecv_replace(nsbuf, ni, MPI_FLOAT, north, 1, south, 1, comm, &status); if (y!=dj-1) { dstadr = (long long*)(b+(nj-1)*ni); srcadr = (long long*)nsbuf; while (srcadr != nsend) *dstadr++ = *srcadr++; // last row } dstadr = (long long*)nsbuf; srcadr = (long long*)(b+(nj-2)*ni); while (dstadr != nsend) *dstadr++ = *srcadr++; // second to last row MPI_Sendrecv_replace(nsbuf, ni, MPI_FLOAT, south, 1, north, 1, comm, &status); if (y) { dstadr = (long long*)b; srcadr = (long long*)nsbuf; while (srcadr != nsend) *dstadr++ = *srcadr++; // first row } // west/east for (j=0; j<nj-2; j++) webuf[j] = b[(j+1)*ni+1]; // second column MPI_Sendrecv_replace(webuf, nj-2, MPI_FLOAT, west, 1, east, 1, comm, &status); if (x!=di-1) for (j=0; j<nj-2; j++) b[(j+2)*ni-1] = webuf[j]; // last column for (j=0; j<nj-2; j++) webuf[j] = b[(j+2)*ni-2]; // second to last column MPI_Sendrecv_replace(webuf, nj-2, MPI_FLOAT, east, 1, west, 1, comm, &status); if (x) for (j=0; j<nj-2; j++) b[(j+1)*ni] = webuf[j]; // first column float* tmp = b; b = a; a = tmp; } // Copy internal results for (j=1; j<nj-1; j++) e_dma_copy(B + (y*(ni-2)+j)*NI+x*(nj-2)+1, a+j*ni+1, (ni-2)*sizeof(float)); coprthr_tls_brk(memfree); MPI_Finalize(); }
__kernel void nbody_thread( void* p ) { my_args_t* pargs = (my_args_t*)p; int n = pargs->n; int cnt = pargs->cnt; unsigned int s_x, s_y, s_z, s_m; unsigned int page = 0; float dt = pargs->dt; float es = pargs->es; Particle *particles = pargs->p; ParticleV *state = pargs->v; int rank, size, npart, i; int left, right; MPI_Status status; MPI_Init(0,MPI_BUF_SIZE); MPI_Comm comm = MPI_COMM_THREAD; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); MPI_Cart_shift(comm, 0, 1, &left, &right); npart = n / size; void* memfree = coprthr_tls_sbrk(0); Particle* my_particles = (Particle*)coprthr_tls_sbrk(npart*sizeof(Particle)); ParticleV* my_state = (ParticleV*)coprthr_tls_sbrk(npart*sizeof(ParticleV)); Particle* sendbuf = (Particle*)coprthr_tls_sbrk(npart*sizeof(Particle)); e_dma_copy(my_particles, particles + npart*rank, npart*sizeof(Particle)); e_dma_copy(my_state, state + npart*rank, npart*sizeof(ParticleV)); unsigned int rgba_black = 0x00000000; unsigned int rgba_white = 0x00ffffff; while (cnt--) { for (i=0; i<npart; i++) sendbuf[i] = my_particles[i]; for (i=0; i<size; i++) { if (i) MPI_Sendrecv_replace(sendbuf, sizeof(Particle)/sizeof(float)*npart, MPI_FLOAT, left, 1, right, 1, comm, &status); ComputeAccel(my_particles, sendbuf, my_state, npart, es); } e_dma_copy(particles + npart*rank, my_particles, npart*sizeof(Particle)); ComputeNewPos(my_particles, my_state, npart, dt); for(i = 0; i < npart; i++){ s_x = (int) particles[i + npart*rank].x; s_y = (int) particles[i + npart*rank].y; if(s_x >= 0 && s_x < pargs->fbinfo.xres_virtual && s_y >= 0 && s_y < pargs->fbinfo.yres_virtual){ e_dma_copy((char *) pargs->fbinfo.smem_start + (s_y * pargs->fbinfo.line_length) + (s_x * BPP), (char *) &rgba_black, 1 * BPP); } s_x = (int) my_particles[i].x; s_y = (int) my_particles[i].y; if(cnt > 1 && s_x >= 0 && s_x < pargs->fbinfo.xres_virtual && s_y >= 0 && s_y < pargs->fbinfo.yres_virtual){ e_dma_copy((char *) pargs->fbinfo.smem_start + (s_y * pargs->fbinfo.line_length) + (s_x * BPP), (char *) &rgba_white, 1 * BPP); } } } coprthr_tls_brk(memfree); MPI_Finalize(); }
int main(void) { unsigned time_c, time_p; unsigned time; unsigned tran,k,i,j,q,h,m,n; unsigned *commander; unsigned *n_row, *n_col, *p, *nei_row, *nei_col; unsigned *neighbour, *neighbour0; unsigned *master; unsigned *counter; // Define the mailbox master = (unsigned *)0x2000; n_row = (unsigned *)0x6000; n_col = (unsigned *)0x6004; neighbour0 = (unsigned *)0x6008; nei_row = (unsigned *) 0x600c; nei_col = (unsigned *) 0x6010; p =(unsigned *) 0x2000; commander = (unsigned *)0x6100; counter = (unsigned *)0x80806300; tran = 2048; // Get the neighbour global address e_neighbor_id(E_PREV_CORE, E_ROW_WRAP, n_row, n_col); neighbour = (unsigned *) e_get_global_address(*n_row, *n_col, p) ; k = (*n_row)*e_group_config.group_cols + (*n_col); commander[0] = 0x00000000; // Broadcast to the next core p = (unsigned *)0x6100; e_neighbor_id(E_NEXT_CORE, E_COL_WRAP, nei_row, nei_col); neighbour0 = (unsigned *) e_get_global_address(*nei_row, *nei_col, p) ; // Initialize master and slave for(i=0; i<tran; i++) { master[i] = 0xdeadbee1; neighbour[i] = 0x00000000; } // Waiting for the signal to start transfering while(commander[0] != (unsigned) 0xdeadbeef) {}; // Broadcast the signal to neighbour neighbour0[0] = 0xdeadbeef; // Write to all neighbour cores e_dma_copy(neighbour, master, 0x2000); while(1) { if(neighbour[2047] == 0xdeadbee1) { counter[e_group_config.core_row * (e_group_config.group_cols/2) + e_group_config.core_col] = 1; break; } } return 0; }
__kernel void dla_thread( void* p ) { my_args_t* pargs = (my_args_t*)p; int baseColor; int i, n, ix, iy, ixnew, iynew; int found; int R = 10; int R2 = 5; int seq; int row1[3], row2[3], row3[3]; int rank; int x_half = pargs->fbinfo.xres_virtual / 2; int y_half = pargs->fbinfo.yres_virtual / 2; float angle, mySin, myCos, inv_rand_max = 1.0f / (float) 32767; MPI_Status status; MPI_Init(0,MPI_BUF_SIZE); MPI_Comm comm = MPI_COMM_THREAD; MPI_Comm_rank(comm, &rank); baseColor = 0x00ffffff; void* memfree = coprthr_tls_sbrk(0); fast_srand(getpid()); seq = 1; if(rank == 0) { e_dma_copy((char *) pargs->fbinfo.smem_start + (y_half * pargs->fbinfo.line_length) + ((x_half + pargs->offset) * BPP), (char *) &baseColor, 1 * BPP); } for(n = 0; n < pargs->n; n++) { angle = ((2.0f * ((float) fastrand()) * inv_rand_max) - 1.0f) * 3.14159265f; if(angle < 0.0f) { mySin = 1.275323954f * angle + 0.405284735f * angle * angle; if(mySin < 0) mySin = 0.225f * (mySin * -mySin - mySin) + mySin; else mySin = 0.225f * (mySin * mySin - mySin) + mySin; } else { mySin = 1.275323954f * angle - 0.405284735f * angle * angle; if(mySin < 0) mySin = 0.225f * (mySin * -mySin - mySin) + mySin; else mySin = 0.225f * (mySin * mySin - mySin) + mySin; } angle += 1.57079632f; if(angle > 3.14159265f) angle -= 6.28318531f; if(angle < 0.0f) { myCos = 1.275323954f * angle + 0.405284735f * angle * angle; if(myCos < 0) myCos = 0.225f * (myCos * -myCos - myCos) + myCos; else myCos = 0.225f * (myCos * myCos - myCos) + myCos; } else { myCos = 1.275323954f * angle - 0.405284735f * angle * angle; if(myCos < 0) myCos = 0.225f * (myCos * -myCos - myCos) + myCos; else myCos = 0.225f * (myCos * myCos - myCos) + myCos; } ix = ((x_half + pargs->offset) + ((R2 - 2) * myCos)); iy = (y_half + ((R2 - 2) * mySin)); while(1) { ixnew = ix + (fastrand() % 3) - 1; if(ixnew > (x_half + pargs->offset) - R2 && ixnew < (x_half + pargs->offset) + R2) { ix = ixnew; } else { continue; } iynew = iy + (fastrand() % 3) - 1; if(iynew > y_half - R2 && iynew < y_half + R2) { iy = iynew; } else { continue; } found = 0; e_dma_copy(row1, (char *) pargs->fbinfo.smem_start + ((iy - 1) * pargs->fbinfo.line_length) + ((ix - 1) * BPP), 3 * BPP); e_dma_copy(row2, (char *) pargs->fbinfo.smem_start + ((iy) * pargs->fbinfo.line_length) + ((ix - 1) * BPP), 3 * BPP); e_dma_copy(row3, (char *) pargs->fbinfo.smem_start + ((iy + 1) * pargs->fbinfo.line_length) + ((ix - 1) * BPP), 3 * BPP); if(row1[0] != pargs->fbinfo.emptyPixVal || row2[0] != pargs->fbinfo.emptyPixVal || row3[0] != pargs->fbinfo.emptyPixVal) { found = 1; break; } if(row1[1] != pargs->fbinfo.emptyPixVal || row2[1] != pargs->fbinfo.emptyPixVal || row3[1] != pargs->fbinfo.emptyPixVal) { found = 1; break; } if(row1[2] != pargs->fbinfo.emptyPixVal || row2[2] != pargs->fbinfo.emptyPixVal || row3[2] != pargs->fbinfo.emptyPixVal) { found = 1; break; } } e_dma_copy((char *) pargs->fbinfo.smem_start + (iy * pargs->fbinfo.line_length) + (ix * BPP), (char *) &baseColor, 1 * BPP); if((ix - 3 <= ((x_half + pargs->offset) - R2) || (ix + 3 >= (x_half + pargs->offset) + R2) || (iy - 3 <= y_half - R2) || (iy + 3 >= y_half + R2))) { R += 4; R2 += 2; if(seq == 1) { baseColor -= (pargs->color == 'R' ? 0x00020000 : 0x00030000); } else if(seq == 2) { baseColor -= (pargs->color == 'B' ? 0x00000002 : 0x00000003); } else { baseColor -= (pargs->color == 'G' ? 0x00000200 : 0x00000300); } seq++; if(seq > 3) seq = 1; } } coprthr_tls_brk(memfree); MPI_Finalize(); return 0; }
int main() { msg_t *m = (msg_t *) 0x4000; uint64_t d64; uint32_t d32; uint16_t d16; uint8_t d8; unsigned int dt; void *da, *dua, *dc; for (unsigned int row = 0; row < E_ROWS; row++) { for (unsigned int col = 0; col < E_COLS; col++) { if (row == 0 && col == 0) continue; unsigned int tcore = row * E_COLS + col; da = e_get_global_address(row, col, (void *) MEM_ALINEADA); dua = e_get_global_address(row, col, (void *) MEM_NO_ALINEADA); dc = e_get_global_address(row, col, (void *) MEM_A_CABALLO); e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d64, da, sizeof(d64)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].t64 = (dt - DUMMY_WAIT) / (double) VECES; e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d32, da, sizeof(d32)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].t32 = (dt - DUMMY_WAIT) / (double) VECES; e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d16, da, sizeof(d16)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].t16 = (dt - DUMMY_WAIT) / (double) VECES; e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d8, da, sizeof(d8)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].t8 = (dt - DUMMY_WAIT) / (double) VECES; e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d64, dua, sizeof(d64)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].ua64 = (dt - DUMMY_WAIT) / (double) VECES; e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d32, dua, sizeof(d32)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].ua32 = (dt - DUMMY_WAIT) / (double) VECES; e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d16, dua, sizeof(d16)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].ua16 = (dt - DUMMY_WAIT) / (double) VECES; e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d8, dua, sizeof(d8)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].ua8 = (dt - DUMMY_WAIT) / (double) VECES; e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d64, dua, sizeof(d64)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].c64 = (dt - DUMMY_WAIT) / (double) VECES; e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d32, dc, sizeof(d32)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].c32 = (dt - DUMMY_WAIT) / (double) VECES; e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d16, dc, sizeof(d16)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].c16 = (dt - DUMMY_WAIT) / (double) VECES; e_dma_wait(E_DMA_1); e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); for (int i = 0; i < VECES; i++) e_dma_copy(&d8, dc, sizeof(d8)); e_dma_wait(E_DMA_1); e_wait(E_CTIMER_1, DUMMY_WAIT); e_ctimer_stop(E_CTIMER_0); dt = E_CTIMER_MAX - e_ctimer_get(E_CTIMER_0); m->ticks[tcore].c8 = (dt - DUMMY_WAIT) / (double) VECES; } } m->finalizado = E_TRUE; return 0; }
int main(void) { unsigned time_c, time_p; unsigned time; unsigned tran,k,i,j,q,h,m,n; unsigned *box; unsigned *n_row, *n_col, *p; unsigned *neighbour_n; unsigned *neighbour_s; unsigned *neighbour_w; unsigned *neighbour_e; unsigned *master; // Define the mailbox master = (unsigned *)0x2000; box = (unsigned *) 0x5000; n_row = (unsigned *)0x6000; n_col = (unsigned *)0x6004; tran = 2048; p =(unsigned *) 0x2000; // Get the neighbour global address e_neighbor_id(E_NEXT_CORE, E_ROW_WRAP, n_row, n_col); neighbour_e = (unsigned *) e_get_global_address(*n_row, *n_col, p) ; e_neighbor_id(E_PREV_CORE, E_ROW_WRAP, n_row, n_col); neighbour_w = (unsigned *) e_get_global_address(*n_row, *n_col, p) ; e_neighbor_id(E_NEXT_CORE, E_COL_WRAP, n_row, n_col); neighbour_s = (unsigned *) e_get_global_address(*n_row, *n_col, p) ; e_neighbor_id(E_PREV_CORE, E_COL_WRAP, n_row, n_col); neighbour_n = (unsigned *) e_get_global_address(*n_row, *n_col, p) ; // Test the writing bandwidth // Initialize master and slave for(i=0; i<tran; i++) { master[i] = 0xdeadbeef; neighbour_e[i] = 0x00000000; neighbour_w[i] = 0x00000000; neighbour_s[i] = 0x00000000; neighbour_n[i] = 0x00000000; } // Set the ctimer e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX) ; // Start the ctimer and select the time type time_p = e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); // Write to all neighbour cores e_dma_copy(neighbour_e, master, 0x2000); e_dma_copy(neighbour_w, master, 0x2000); e_dma_copy(neighbour_s, master, 0x2000); e_dma_copy(neighbour_n, master, 0x2000); // Get the time now time_c = e_ctimer_get(E_CTIMER_0); time = time_p - time_c; // Output the result box[0] = time; // Test the reading bandwidth // Initialize master and slave for(i=0; i<tran; i++) { master[i] = 0x00000000; neighbour_e[i] = 0xdeadbee1; neighbour_w[i] = 0xdeadbee2; neighbour_s[i] = 0xdeadbee3; neighbour_n[i] = 0xdeadbee4; } // Set the ctimer e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX) ; // Start the ctimer and select the time type time_p = e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); // Read from all neighbour cores e_dma_copy(master, neighbour_e, 0x2000); e_dma_copy(master, neighbour_w, 0x2000); e_dma_copy(master, neighbour_s, 0x2000); e_dma_copy(master, neighbour_n, 0x2000); // Get the time now time_c = e_ctimer_get(E_CTIMER_0); time = time_p - time_c; // Output the result box[1] = time; return 0; }