int main(int argc, char *argv[]) { e_epiphany_t Epiphany, *pEpiphany; e_mem_t DRAM, *pDRAM; unsigned int msize; float seed; unsigned int addr; //, clocks; size_t sz; double tdiff[4]; int result, rerval; pEpiphany = &Epiphany; pDRAM = &DRAM; msize = 0x00400000; get_args(argc, argv); fo = stderr; fi = stdin; printf("\nMatrix: C[%d][%d] = A[%d][%d] * B[%d][%d]\n\n", _Smtx, _Smtx, _Smtx, _Smtx, _Smtx, _Smtx); printf("Using %d x %d cores\n\n", _Nside, _Nside); seed = 0.0; printf("Seed = %f\n", seed); // Connect to device for communicating with the Epiphany system // Prepare device e_set_host_verbosity(H_D0); e_init(NULL); e_reset_system(); if (e_alloc(pDRAM, 0x00000000, msize)) { printf("\nERROR: Can't allocate Epiphany DRAM!\n\n"); exit(1); } if (e_open(pEpiphany, 0, 0, e_platform.chip[0].rows, e_platform.chip[0].cols)) { printf("\nERROR: Can't establish connection to Epiphany device!\n\n"); exit(1); } // Initialize Epiphany "Ready" state addr = offsetof(shared_buf_t, core.ready); Mailbox.core.ready = 0; e_write(pDRAM, 0, 0, addr, &Mailbox.core.ready, sizeof(Mailbox.core.ready)); printf("Loading program on Epiphany chip...\n"); e_set_loader_verbosity(ar.verbose); result = e_load_group(ar.srecFile, pEpiphany, 0, 0, pEpiphany->rows, pEpiphany->cols, ar.run_target); if (result == E_ERR) { printf("Error loading Epiphany program.\n"); exit(1); } // Generate operand matrices based on a provided seed matrix_init(seed); #ifdef __WIPE_OUT_RESULT_MATRIX__ // Wipe-out any previous remains in result matrix (for verification) addr = offsetof(shared_buf_t, C[0]); sz = sizeof(Mailbox.C); printf("Writing C[%uB] to address %08x...\n", sz, addr); e_write(pDRAM, 0, 0, addr, (void *) Mailbox.C, sz); #endif clock_gettime(CLOCK_MONOTONIC, &timer[0]); // Copy operand matrices to Epiphany system addr = offsetof(shared_buf_t, A[0]); sz = sizeof(Mailbox.A); printf("Writing A[%uB] to address %08x...\n", sz, addr); e_write(pDRAM, 0, 0, addr, (void *) Mailbox.A, sz); addr = offsetof(shared_buf_t, B[0]); sz = sizeof(Mailbox.B); printf("Writing B[%uB] to address %08x...\n", sz, addr); e_write(pDRAM, 0, 0, addr, (void *) Mailbox.B, sz); // Call the Epiphany matmul() function printf("GO Epiphany! ... "); clock_gettime(CLOCK_MONOTONIC, &timer[1]); matmul_go(pDRAM); clock_gettime(CLOCK_MONOTONIC, &timer[2]); printf("Finished calculating Epiphany result.\n"); // Read result matrix and timing addr = offsetof(shared_buf_t, C[0]); sz = sizeof(Mailbox.C); printf("Reading result from address %08x...\n", addr); e_read(pDRAM, 0, 0, addr, (void *) Mailbox.C, sz); clock_gettime(CLOCK_MONOTONIC, &timer[3]); // Calculate a reference result printf("Calculating result on Host ... "); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[4]); #ifndef __DO_STRASSEN__ matmul(Mailbox.A, Mailbox.B, Cref, _Smtx); #else matmul_strassen(Mailbox.A, Mailbox.B, Cref, _Smtx); #endif clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[5]); printf("Finished calculating Host result.\n"); addr = offsetof(shared_buf_t, core.clocks); sz = sizeof(Mailbox.core.clocks); printf("Reading time from address %08x...\n", addr); e_read(pDRAM,0, 0, addr, &Mailbox.core.clocks, sizeof(Mailbox.core.clocks)); // clocks = Mailbox.core.clocks; // Calculate the difference between the Epiphany result and the reference result printf("\n*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n"); printf("Verifying result correctness ... "); matsub(Mailbox.C, Cref, Cdiff, _Smtx); tdiff[0] = (timer[2].tv_sec - timer[1].tv_sec) * 1000 + ((double) (timer[2].tv_nsec - timer[1].tv_nsec) / 1000000.0);//total tdiff[1] = (timer[1].tv_sec - timer[0].tv_sec) * 1000 + ((double) (timer[1].tv_nsec - timer[0].tv_nsec) / 1000000.0);//write tdiff[2] = (timer[3].tv_sec - timer[2].tv_sec) * 1000 + ((double) (timer[3].tv_nsec - timer[2].tv_nsec) / 1000000.0);//read tdiff[3] = (timer[5].tv_sec - timer[4].tv_sec) * 1000 + ((double) (timer[5].tv_nsec - timer[4].tv_nsec) / 1000000.0);//ref // If the difference is 0, then the matrices are identical and the // calculation was correct if (iszero(Cdiff, _Smtx)) { printf("C_epiphany == C_host\n"); rerval = 0; } else { printf("\n\nERROR: C_epiphany is different from C_host !!!\n"); rerval = 1; } printf("*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n"); printf("\n"); printf("Epiphany (compute): %9.1f msec (@ %03d MHz)\n" , tdiff[0], eMHz); printf(" (write) : %9.1f msec \n" , tdiff[1]); printf(" (read) : %9.1f msec\n" , tdiff[2]); printf(" (*total*): %9.1f msec\n\n" , tdiff[2]+tdiff[1]+tdiff[0]); printf("Host (*total*): %9.1f msec (@ %03d MHz)\n" , tdiff[3], aMHz); #ifdef __DUMP_MATRICES__ printf("\n\n\n"); printf("A[][] = \n"); matprt(Mailbox.A, _Smtx); printf("B[][] = \n"); matprt(Mailbox.B, _Smtx); printf("C[][] = \n"); matprt(Mailbox.C, _Smtx); printf("Cref[][] = \n"); matprt(Cref, _Smtx); int i, j; for (i=0; i<_Nside; i++) for (j=0; j<_Nside; j++) { e_read(pEpiphany, i, j, 0x2000+0*sizeof(float), &Aepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x2000+2*sizeof(float), &Aepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x4000+0*sizeof(float), &Bepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x4000+2*sizeof(float), &Bepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float)); } printf("Aepi[][] = \n"); matprt(Aepi, _Smtx); printf("Bepi[][] = \n"); matprt(Bepi, _Smtx); #endif printf("\n* * * EPIPHANY FTW !!! * * *\n"); // Close connection to device if (e_close(pEpiphany)) { printf("\nERROR: Can't close connection to Epiphany device!\n\n"); exit(1); } if (e_free(pDRAM)) { printf("\nERROR: Can't release Epiphany DRAM!\n\n"); exit(1); } e_finalize(); return rerval; }
void vector() { int i,k,dim,nrow,ncol,start0,start1,start2,itemp,first,minus ; double val ; ptr qy ; start=1 ; /* makes vartyp=-1 */ p=getvar() ; if (errnum) derror(errnum) ; if (p==NULL || p->symtype!=-1) derror(70) ; /* undef. or un-REAL */ i=p->valptr ; if (intval[i]!=1) derror(74) ; /* no vector */ nrow=intval[i+1] ; start0=intval[i+2]; if (warea[gi++]!='=') derror(29) ; /* no = sign */ if (warea[gi]=='0') { /* vector=0 */ for (k=start0 ; k<start0+nrow ; k++) relval[k]=0 ; gi++ ; /* skip 0 */ goto L5 ; /* done! */ } first=TRUE ; /* first-pass flag */ if (warea[gi]=='-') { minus=TRUE ; gi++ ; } else minus=FALSE ; /* deal with minus sign, if any */ L10 : i=gi ; start=0 ; val=getoprnd() ; /* try for scalar multiplier */ if (errnum) goto L15 ; /* not scalar */ if (minus) val=-val ; if (warea[gi++]!='*') derror(75) ; start=1 ; /* prepare to multiply scalar ... */ qy=getvar() ; if (errnum) derror(errnum) ; /* ... by a vector */ if (qy==NULL || qy->symtype!=-1) derror(70) ; /* undef, notREAL */ if (qy==p) derror(85) ; /* illegal recursion */ i=qy->valptr ; if (intval[i]!=1 || intval[i+1]!=nrow) derror(74) ; start1=intval[i+2] ; if (first) for (k=0 ; k<nrow ; k++) relval[start0+k]=val*relval[start1+k] ; else for (k=0 ; k<nrow ; k++) relval[start0+k]+=val*relval[start1+k] ; goto L20 ; L15 : if (errnum!=73) derror(errnum) ; errnum=0 ; start=1 ; gi=i ; /* no scalar, try for vector or matrix */ qy=getvar() ; if (errnum) derror(errnum) ; if (qy==NULL || qy->symtype!=-1) derror(70) ; if (qy==p) derror(85) ; /* illegal recursion */ i=qy->valptr ; dim=intval[i] ; if (dim>2 || intval[i+1]!=nrow) derror(74) ; if (dim==1) { /* vector-only term */ start1=intval[i+2] ; if (first) { if (minus) for (k=0 ; k<nrow ; k++) relval[start0+k]= -relval[start1+k] ; else for ( k=0 ; k<nrow ; k++) relval[start0+k]=relval[start1+k] ; } /* if ( first */ else if (minus) for (k=0 ; k<nrow ; k++) /* not first */ relval[start0+k]-=relval[start1+k] ; else for (k=0 ; k<nrow ; k++) relval[start0+k]+=relval[start1+k] ; } /* if ( dim=1 */ else if (dim==2) { /* matrix multiplier */ ncol=intval[i+2] ; start1=intval[i+3] ; if (warea[gi++]!='*') derror(75) ; /* skip * */ qy=getvar() ; if (errnum) derror(errnum) ; /* next operand */ if (qy==NULL || qy->symtype!=-1) derror(70) ; if (qy==p) derror(85) ; /* illegal recursion */ i=qy->valptr ; if (intval[i]!=1 || intval[i+1]!=ncol) derror(74) ; start2=intval[i+2] ; if (minus) for (k=0 ; k<nrow ; k++) { itemp=start1+k*ncol ; /* base of kth row */ if (first) val=0 ; else val=relval[start0+k] ; for (i=0 ; i<ncol ; i++) val-=relval[itemp+i]*relval[start2+i] ; relval[start0+k]=val ; } else for (k=0 ; k<nrow ; k++) { itemp=start1+k*ncol ; if (first) val=0 ; else val=relval[start0+k] ; for (i=0 ; i<ncol ; i++) val+=relval[itemp+i]*relval[start2+i] ; relval[start0+k]=val ; } } /* ELSE matrix multiplier */ L20 : if (warea[gi]=='+') minus=FALSE ; else if (warea[gi]=='-') minus=TRUE ; else goto L5 ; first=FALSE ; gi++ ; goto L10 ; L5 : start=0 ; /* done */ if (tracer>=4 && !errnum) { printf("%c%s",'\33',"[7m") ; /* ANSI reverse video */ matprt() ; printf("%c%s",'\33',"[0m") ; /* ANSI restore video */ } } /* vector */
int main(int argc, char *argv[]) { p_mem_t shared_mem, results_mem; uint32_t eram_base; char results[1024] = { '\0' }; int device_cols, device_rows, nside; p_dev_t dev; p_prog_t prog; p_team_t team; p_coords_t size; p_coords_t start = { .row = 0, .col = 0 }; unsigned int msize; float seed; unsigned int addr; //, clocks; size_t sz; int verbose=0; double tdiff[3]; int result, retval = 0; msize = 0x00400000; get_args(argc, argv); fo = stderr; fi = stdin; printf( "------------------------------------------------------------\n"); printf( "Calculating: C[%d][%d] = A[%d][%d] * B[%d][%d]\n", _Smtx, _Smtx, _Smtx, _Smtx, _Smtx, _Smtx); seed = 0.0; if(verbose){ printf( "Seed = %f\n", seed); } dev = p_init(P_DEV_EPIPHANY, 0); if (p_error(dev)) { fprintf(stderr, "Error initializing PAL\n"); return p_error(dev); } device_cols = p_query(dev, P_PROP_COLS); device_rows = p_query(dev, P_PROP_ROWS); // Use min size nside = device_cols > device_rows ? device_cols : device_rows; if (nside < 4) { fprintf(stderr, "Error: Too small device, need at least 4x4\n"); return 1; } // Either 1024, 256, 64, or 16 cores (side must be power of two), nside = nside >= 32 ? 32 : nside >= 16 ? 16 : nside >= 8 ? 8 : 4; size.row = nside; size.col = nside; team = p_open4(dev, P_TOPOLOGY_2D, &start, &size); printf("Using team of size %d\n", p_team_size(team)); if (p_error(team)) { fprintf(stderr, "Error opening team\n"); return p_error(team); } prog = p_load(dev, ar.elfFile, 0); eram_base = (unsigned) p_query(dev, P_PROP_MEMBASE); shared_mem = p_map(dev, eram_base, msize); // Clear mailbox contents memset(&Mailbox, 0, sizeof(Mailbox)); p_write(&shared_mem, &Mailbox, 0, sizeof(Mailbox), 0); // Generate operand matrices based on a provided seed matrix_init((int)seed); #ifdef __WIPE_OUT_RESULT_MATRIX__ // Wipe-out any previous remains in result matrix (for verification) addr = offsetof(shared_buf_t, C[0]); sz = sizeof(Mailbox.C); if(verbose){ printf( "Writing C[%uB] to address %08x...\n", (unsigned) sz, addr); } p_write(&shared_mem, (void *) Mailbox.C, addr, sz, 0); #endif /* Wallclock time */ clock_gettime(CLOCK_MONOTONIC, &timer[0]); /* Clock CPUTIME too. We don't want to indicate failure just * because the system was under high load. */ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[4]); // Copy operand matrices to Epiphany system addr = offsetof(shared_buf_t, A[0]); sz = sizeof(Mailbox.A); if(verbose){ printf( "Writing A[%uB] to address %08x...\n", (unsigned) sz, addr); } p_write(&shared_mem, (void *) Mailbox.A, addr, sz, 0); addr = offsetof(shared_buf_t, B[0]); sz = sizeof(Mailbox.B); if(verbose){ printf( "Writing B[%uB] to address %08x...\n", (unsigned) sz, addr); } p_write(&shared_mem, (void *) Mailbox.B, addr, sz, 0); // Call the Epiphany matmul() function if(verbose){ printf( "GO Epiphany! ... "); } if(verbose){ printf("Loading program on Epiphany chip...\n"); } p_arg_t args[] = { &nside, sizeof(nside), true }; if (p_run(prog, "matmul", team, 0, p_team_size(team), 1, args, 0)) { fprintf(stderr, "Error loading Epiphany program.\n"); exit(1); } // Read result matrix and timing addr = offsetof(shared_buf_t, C[0]); sz = sizeof(Mailbox.C); if(verbose){ printf( "Reading result from address %08x...\n", addr); } p_read(&shared_mem, (void *) Mailbox.C, addr, sz, 0); clock_gettime(CLOCK_MONOTONIC, &timer[1]); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[5]); // Calculate a reference result clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[2]); #ifndef __DO_STRASSEN__ matmul(Mailbox.A, Mailbox.B, Cref, _Smtx); #else matmul_strassen(Mailbox.A, Mailbox.B, Cref, _Smtx); #endif clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[3]); addr = offsetof(shared_buf_t, core.clocks); sz = sizeof(Mailbox.core.clocks); if(verbose){ printf( "Reading time from address %08x...\n", addr); } p_read(&shared_mem, &Mailbox.core.clocks, addr, sizeof(Mailbox.core.clocks), 0); // clocks = Mailbox.core.clocks; // Calculate the difference between the Epiphany result and the reference result matsub(Mailbox.C, Cref, Cdiff, _Smtx); tdiff[0] = (timer[1].tv_sec - timer[0].tv_sec) * 1000 + ((double) (timer[1].tv_nsec - timer[0].tv_nsec) / 1000000.0); // tdiff[0] = ((double) clocks) / eMHz * 1000; tdiff[1] = (timer[3].tv_sec - timer[2].tv_sec) * 1000 + ((double) (timer[3].tv_nsec - timer[2].tv_nsec) / 1000000.0); tdiff[2] = (timer[5].tv_sec - timer[4].tv_sec) * 1000 + ((double) (timer[5].tv_nsec - timer[4].tv_nsec) / 1000000.0); // If the difference is 0, then the matrices are identical and the // calculation was correct if (iszero(Cdiff, _Smtx)) { printf( "Epiphany(time) %9.1f msec (@ %03d MHz)\n", tdiff[0], eMHz); printf( "Host(time) %9.1f msec (@ %03d MHz)\n", tdiff[1], aMHz); printf( "------------------------------------------------------------\n"); printf( "TEST \"matmul-16\" PASSED\n"); retval = 0; } else { printf( "\n\nERROR: C_epiphany is different from C_host !!!\n"); printf( "TEST \"matmul-16\" FAILED\n"); retval = 1; } #if 0 #ifdef __DUMP_MATRICES__ printf( "\n\n\n"); printf( "A[][] = \n"); matprt(Mailbox.A, _Smtx); printf( "B[][] = \n"); matprt(Mailbox.B, _Smtx); printf( "C[][] = \n"); matprt(Mailbox.C, _Smtx); printf( "Cref[][] = \n"); matprt(Cref, _Smtx); int i, j; for (i=0; i<_Nside; i++) for (j=0; j<_Nside; j++) { e_read(pEpiphany, i, j, 0x2000+0*sizeof(float), &Aepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x2000+2*sizeof(float), &Aepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x4000+0*sizeof(float), &Bepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x4000+2*sizeof(float), &Bepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float)); } printf( "Aepi[][] = \n"); matprt(Aepi, _Smtx); printf( "Bepi[][] = \n"); matprt(Bepi, _Smtx); #endif #endif // p_unmap ... p_close(team); p_finalize(dev); return retval; } // Initialize operand matrices void matrix_init(int seed) { int i, j, p; p = 0; for (i=0; i<_Smtx; i++) for (j=0; j<_Smtx; j++) Mailbox.A[p++] = (i + j + seed) % _MAX_MEMBER_; p = 0; for (i=0; i<_Smtx; i++) for (j=0; j<_Smtx; j++) Mailbox.B[p++] = ((i + j) * 2 + seed) % _MAX_MEMBER_; p = 0; for (i=0; i<_Smtx; i++) for (j=0; j<_Smtx; j++) Mailbox.C[p++] = 0x8dead; return; }