/** * Setup the DMA 1 Engine for trace work * No parameters needed */ int setupDMA2() { /* Setup a slave DMA to point to the host buffer */ /* right now it is setup to transfer 1M words in a circular buffer */ e_dma_set_desc(E_DMA_1 /* channel */, (E_DMA_ENABLE | E_DMA_DWORD | E_DMA_CHAIN) , &dmaDesc /* *next (infinite loop) */ , 0 /* i_stride src */, 8 /* i_stride dst */ , 8 /* inner cnt */, 1 /* outer cnt */ , 0 /* o_stride src */, 0 /* o_stride dst */ , 0 /* src addr - not used */ , (void*)(e_emem_config.base + HOST_TRACE_BUF_OFFSET) /*dst*/ , &dmaDesc ); e_dma_start(&dmaDesc, E_DMA_1); // start DMA engine go forever return 0; }
int main(void) { unsigned k,i,j; unsigned *dst, *mst; unsigned tran; e_dma_desc_t dma_desc; tran = 128; dst = (unsigned *)0x2000; mst = (unsigned *)0x4000; e_reg_write(E_REG_DMA0CONFIG, 0); e_reg_write(E_REG_DMA1CONFIG, 0); // Initialize the buffer in receiver core for (i=0; i<tran*5; i++) { dst[i] = 0x00000000; } mst[0] = 0x00000000; // Initialize the auto dma register in receiver core e_reg_write(E_REG_DMA0AUTODMA0, 0x00000000); // Prepare for the descriptor slave dma e_dma_set_desc(E_DMA_0,(E_DMA_ENABLE|E_DMA_WORD), 0x0000, 0x0000, 0x0004, 0x0400, 0x0001, 0x0000, 0x0000, 0x0000,(void *)dst, &dma_desc); // Wait for the signal to start dma transfering while(*mst == 0x00000000){}; e_dma_start(&dma_desc, E_DMA_0); e_dma_wait(E_DMA_0); return 0; }
int e_dma_copy(e_dma_id_t chan, void *dst, void *src, size_t bytes, e_dma_align_t align) { unsigned index; unsigned shift; unsigned stride; unsigned config; switch (align) { default: return -1; case E_ALIGN_BYTE: config = E_DMA_BYTE | E_DMA_MASTER | E_DMA_ENABLE; break; case E_ALIGN_SHORT: config = E_DMA_SHORT | E_DMA_MASTER | E_DMA_ENABLE; break; case E_ALIGN_LONG: config = E_DMA_LONG | E_DMA_MASTER | E_DMA_ENABLE; break; case E_ALIGN_DOUBLE: config = E_DMA_DOUBLE | E_DMA_MASTER | E_DMA_ENABLE; break; case E_ALIGN_AUTO: index = (((unsigned) dst) | ((unsigned) src) | ((unsigned) bytes)) & 7; config = dma_configs[index]; break; } shift = config >> 5; stride = 0x10001 << shift; _tcb.config = config; _tcb.inner_stride = stride; _tcb.count = 0x10000 | (bytes >> shift); _tcb.outer_stride = stride; _tcb.src_addr = src; _tcb.dst_addr = dst; return e_dma_start(chan, &_tcb); }
int main(void) { unsigned k,i,j; e_dma_desc_t dma_desc[4]; unsigned *dst, *src, *dst1, *src1, *dst2, *src2; unsigned tran; unsigned tran1; unsigned index[3]; unsigned neighbour_core; unsigned *n_row, *n_col, *p; unsigned *mailbox, *mailbox1, *mailbox2, *mailbox3; n_row = (unsigned *)0x00006400; n_col = (unsigned *)0x00006404; tran = 128; p = 0x0000; // Get the core id of neighbour core e_neighbor_id(E_NEXT_CORE, E_ROW_WRAP, n_row, n_col); neighbour_core = (unsigned) e_get_global_address(*n_row, *n_col, p); // Define the mailbox mailbox = (unsigned *)0x6000; mailbox1 = (unsigned *)0x6100; mailbox2 = (unsigned *)0x6200; mailbox3 = (unsigned *)0x6300; // Initialize the buffer address for dma chain test src = (int *)0x2000; dst = (int *)(neighbour_core + (unsigned)0x2000); src1 = (int *)0x2300; dst1 = (int *)(neighbour_core + (unsigned)0x2500); src2 = (int *)0x2600; dst2 = (int *)(neighbour_core + (unsigned)0x2a00); // Test for word size // Initialize the source and destination buffer for (i=0; i<tran; i++) { src[i] = 0xaaaaaaaa; src1[i] = 0xbbbbbbbb; src2[i] = 0xcccccccc; } for (i=0; i<tran*6; i++) { dst[i] = 0x00000000; } // Prepare for the descriptor for 2d dma e_dma_set_desc(E_DMA_0,(E_DMA_ENABLE|E_DMA_MASTER|E_DMA_WORD), 0x0000, 0x0004, 0x0004, 0x0080, 0x0003, 0x0104 , 0x0304, (void *)src,(void *)dst, &dma_desc[0]); // Start transaction e_dma_start(&dma_desc[0], E_DMA_0); // Wait e_dma_wait(E_DMA_0); // Check the destination buffer value index[0] = checkbuffer(dst, (unsigned)0xaaaaaaaa, tran); index[1] = checkbuffer(dst1, (unsigned)0xbbbbbbbb, tran); index[2] = checkbuffer(dst2, (unsigned)0xcccccccc, tran); if((index[0]|index[1]|index[2]) == 0) { mailbox[0] = 0xffffffff; }else { mailbox[0] = 0x00000000; } // Test for doubleword size // Initialize the source and destination buffer for (i=0; i<tran; i++) { src[i] = 0xaaaaaaaa; src1[i] = 0xbbbbbbbb; src2[i] = 0xcccccccc; } for (i=0; i<tran*6; i++) { dst[i] = 0x00000000; } // Prepare for the descriptor for 2d dma e_dma_set_desc(E_DMA_0,(E_DMA_ENABLE|E_DMA_MASTER|E_DMA_DWORD), 0x0000, 0x0008, 0x0008, 0x0040, 0x0003, 0x0108 , 0x0308, (void *)src,(void *)dst, &dma_desc[0]); // Start transaction e_dma_start(&dma_desc[0], E_DMA_0); // Wait e_dma_wait(E_DMA_0); // Check the destination buffer value index[0] = checkbuffer(dst, (unsigned)0xaaaaaaaa, tran); index[1] = checkbuffer(dst1, (unsigned)0xbbbbbbbb, tran); index[2] = checkbuffer(dst2, (unsigned)0xcccccccc, tran); if((index[0]|index[1]|index[2]) == 0) { mailbox1[0] = 0xffffffff; }else { mailbox1[0] = 0x00000000; } // Test for half size // Initialize the source and destination buffer for (i=0; i<tran; i++) { src[i] = 0xaaaaaaaa; src1[i] = 0xbbbbbbbb; src2[i] = 0xcccccccc; } for (i=0; i<tran*6; i++) { dst[i] = 0x00000000; } // Prepare for the descriptor for 2d dma e_dma_set_desc(E_DMA_0,(E_DMA_ENABLE|E_DMA_MASTER|E_DMA_HWORD), 0x0000, 0x0002, 0x0002, 0x0100, 0x0003, 0x0102 , 0x0302, (void *)src,(void *)dst, &dma_desc[0]); // Start transaction e_dma_start(&dma_desc[0], E_DMA_0); // Wait e_dma_wait(E_DMA_0); // Check the destination buffer value index[0] = checkbuffer(dst, (unsigned)0xaaaaaaaa, tran); index[1] = checkbuffer(dst1, (unsigned)0xbbbbbbbb, tran); index[2] = checkbuffer(dst2, (unsigned)0xcccccccc, tran); if((index[0]|index[1]|index[2]) == 0) { mailbox2[0] = 0xffffffff; }else { mailbox2[0] = 0x00000000; } // Test for byte size // Initialize the source and destination buffer for (i=0; i<tran; i++) { src[i] = 0xaaaaaaaa; src1[i] = 0xbbbbbbbb; src2[i] = 0xcccccccc; } for (i=0; i<tran*6; i++) { dst[i] = 0x00000000; } // Prepare for the descriptor for 2d dma e_dma_set_desc(E_DMA_0,(E_DMA_ENABLE|E_DMA_MASTER|E_DMA_BYTE), 0x0000, 0x0001, 0x0001, 0x0200, 0x0003, 0x0101 , 0x0301, (void *)src,(void *)dst, &dma_desc[0]); // Start transaction e_dma_start(&dma_desc[0], E_DMA_0); // Wait e_dma_wait(E_DMA_0); // Check the destination buffer value index[0] = checkbuffer(dst, (unsigned)0xaaaaaaaa, tran); index[1] = checkbuffer(dst1, (unsigned)0xbbbbbbbb, tran); index[2] = checkbuffer(dst2, (unsigned)0xcccccccc, tran); if((index[0]|index[1]|index[2]) == 0) { mailbox3[0] = 0xffffffff; }else { mailbox3[0] = 0x00000000; } return 0; }
int main(void) { unsigned k,i,j; e_dma_desc_t dma_desc; e_dma_desc_t dma_desc1; unsigned *dst, *src; unsigned tran; unsigned *p; unsigned dma_busy; unsigned *test_c1; unsigned *test_c2; e_memseg_t emem; // Attach to the shm segment if ( E_OK != e_shm_attach(&emem, "shm_1") ) { return -1; } tran = 128; p = (unsigned *)emem.ephy_base; // Initialize the buffer address dst = (int *)0x2000; test_c1 = (unsigned *)0x6000; test_c2 = (unsigned *)0x6004; src = p; // Under the message mode // Initialize the source, destination buffer for (k=0; k<tran; k++) { src[k] = 0x12345678; dst[k] = 0x00000000; } src[tran-1] = 0x87654321; // Prepare for the descriptor for 2d dma e_dma_set_desc(E_DMA_0,(E_DMA_ENABLE|E_DMA_MASTER|E_DMA_WORD|E_DMA_MSGMODE), 0x0000, 0x0004, 0x0004, 0x0080, 0x0001, 0x0000, 0x0000, (void *)src,(void *)dst, &dma_desc); // Start transaction e_dma_start(&dma_desc, E_DMA_0); do { dma_busy = e_reg_read(E_REG_DMA0STATUS) & 0xf; } while (dma_busy); test_c1[0] = dst[tran-1]; // Not under message mode // Initialize the source, destination buffer for (k=0; k<tran; k++) { src[k] = 0x12345678; dst[k] = 0x00000000; } src[tran-1] = 0x87654321; // Prepare for the descriptor for 2d dma e_dma_set_desc(E_DMA_0,(E_DMA_ENABLE|E_DMA_MASTER|E_DMA_WORD), 0x0000, 0x0004, 0x0004, 0x0080, 0x0001, 0x0000, 0x0000, (void *)src,(void *)dst, &dma_desc); // Start transaction e_dma_start(&dma_desc, E_DMA_0); do { dma_busy = e_reg_read(E_REG_DMA0STATUS) & 0xf; } while (dma_busy); test_c2[0] = dst[tran-1]; return 0; }
__kernel void my_thread( void* p) { my_args_t* pargs = (my_args_t*)p; int N = pargs->N, s = pargs->s, d = pargs->d; float *ga = pargs->ga, *gb = pargs->gb, *gc = pargs->gc; int n = N/d; int myrank_2d, mycoords[2]; int dims[2] = {d, d}; int periods[2] = {1, 1}; MPI_Status status; MPI_Init(0,MPI_BUF_SIZE); MPI_Comm comm = MPI_COMM_THREAD; MPI_Comm comm_2d; MPI_Cart_create(comm, 2, dims, periods, 1, &comm_2d); MPI_Comm_rank(comm_2d, &myrank_2d); MPI_Cart_coords(comm_2d, myrank_2d, 2, mycoords); // Compute ranks of the up and left shifts int uprank, downrank, leftrank, rightrank; MPI_Cart_shift(comm_2d, 0, 1, &leftrank, &rightrank); MPI_Cart_shift(comm_2d, 1, 1, &uprank, &downrank); int x = mycoords[0]; int y = mycoords[1]; // this removes initial skew shift by reading in directly int skew = (x+y) % d; void* memfree = coprthr_tls_sbrk(0); float* a = (float*)coprthr_tls_sbrk(n*n*sizeof(float)); float* b = (float*)coprthr_tls_sbrk(n*n*sizeof(float)); float* c = (float*)coprthr_tls_sbrk(n*n*sizeof(float)); e_dma_desc_t dma_c_read, dma_c_write, dma_a_read, dma_b_read; #define DWORD_WRITE(desc,w,h,W,src,dst) \ e_dma_set_desc(E_DMA_0, (E_DMA_ENABLE|E_DMA_MASTER|E_DMA_DWORD), 0x0000, \ 0x0008, 0x0008, \ w/2, h, \ 8, 4*(W-w+2), \ (void*)src, (void*)dst, &desc) #define DWORD_READ(desc,w,h,W,src,dst) \ e_dma_set_desc(E_DMA_0, (E_DMA_ENABLE|E_DMA_MASTER|E_DMA_DWORD), 0x0000, \ 0x0008, 0x0008, \ w/2, h, \ 4*(W-w+2), 8, \ (void*)src, (void*)dst, &desc) int loop; for(loop=0;loop<LOOP1;loop++) { int i,j,k,l; for (i=0; i<s; i++) { for (j=0; j<s; j++) { float* rgc = gc + ((i*N + y*n)*s + j)*N + x*n; DWORD_WRITE(dma_c_write,n,n,s*N,c,rgc); DWORD_READ(dma_c_read,n,n,s*N,rgc,c); // read C e_dma_start(&dma_c_read, E_DMA_0); e_dma_wait(E_DMA_0); for (k=0; k<s; k++) { float* rga = ga + ((i*N + y*n)*s + k)*N + skew*n; float* rgb = gb + ((k*N + skew*n)*s + j)*N + x*n; // read A and B DWORD_READ(dma_b_read,n,n,s*N,rgb,b); DWORD_READ(dma_a_read,n,n,s*N,rga,a); e_dma_start(&dma_b_read, E_DMA_0); e_dma_wait(E_DMA_0); e_dma_start(&dma_a_read, E_DMA_0); e_dma_wait(E_DMA_0); // transpose B int ji, ii; for (ji=0; ji<n-1; ji++) { for(ii=ji+1; ii<n; ii++) { int tmp = b[ji*n+ii]; b[ji*n+ii] = b[ii*n+ji]; b[ii*n+ji] = tmp; } } int loop; for (loop=0;loop<LOOP3;loop++) { // Get into the main computation loop for (l=1; l<d; l++) { int loop; for(loop=0;loop<LOOP2;loop++) MatrixMultiply(n, a, b, c); // Shift matrix a left by one and shift matrix b up by one MPI_Sendrecv_replace(a, n*n, MPI_FLOAT, leftrank, 1, rightrank, 1, comm_2d, &status); MPI_Sendrecv_replace(b, n*n, MPI_FLOAT, uprank, 1, downrank, 1, comm_2d, &status); } MatrixMultiply(n, a, b, c); } // end LOOP3 } // write C e_dma_start(&dma_c_write, E_DMA_1); e_dma_wait(E_DMA_1); } } } // end LOOP1 coprthr_tls_brk(memfree); MPI_Finalize(); }