void pure_g_ecef(void) { int i; double rad1, rad2; double vv[3], vv1[3], r_vec[3]; rad1 = (r0 * r0) / (pure_R * pure_R); rad2 = pure_ecef_pos[2] / pure_R; pure_ecef_gravity[0] = -(mue / (r0 * r0)) * (pure_ecef_pos[0] / pure_R) * ((rad1 * (1 + epsilon)) + (jconst2 * (rad1 * rad1)) * (1 - 5 * (rad2 * rad2))); pure_ecef_gravity[1] = -(mue / (r0 * r0)) * (pure_ecef_pos[1] / pure_R) * ((rad1 * (1 + epsilon)) + (jconst2 * (rad1 * rad1)) * (1 - 5 * (rad2 * rad2))); pure_ecef_gravity[2] = -(mue / (r0 * r0)) * (pure_ecef_pos[2] / pure_R) * ((rad1 * (1 + epsilon)) + (jconst2 * (rad1 * rad1)) * (3 - 5 * (rad2 * rad2))); for (i = 0; i < 3; i++) { r_vec[i] = pure_ecef_pos[i]; vv[i] = 0.0; } cross(omega, r_vec, vv); cross(omega, vv, vv1); matsub(3,1,(double*)pure_ecef_gravity, (double*)vv1, (double*)pure_ecef_gravity); pure_g_ecef_mag = sqrt((pure_ecef_gravity[0] * pure_ecef_gravity[0]) + (pure_ecef_gravity[1] * pure_ecef_gravity[1]) + (pure_ecef_gravity[2] * pure_ecef_gravity[2])); matmulint(3,1,(double*)pure_ecef_gravity,eight_delt,(double*)pure_ecef_gravity); } //end of g_ecef()
void pure_vel_update(void) { int i; double temp[3]; matmul(3, 3, (double *)p_dcm, 3, 1, (double *)p_velo_20ms, (double *)pure_vel); //earth rate correction cross(omg_dub, pure_v_old, temp); matmulint(3,1,(double *)temp, eight_delt, (double *)temp); matsub(3,1,(double *)pure_vel, (double *)temp, (double *)pure_vel); matadd(3,1,(double *)pure_vel, (double *)pure_ecef_gravity, (double *)pure_vel); matadd(3,1,(double *)pure_vel, (double *)pure_v_old, (double *)pure_vel); matadd(3,1,(double *)pure_vel, (double *)pure_v_old, (double *)pure_vav); matmulint(3,1,(double *)pure_vav, 0.5, (double *)pure_vav); for (i = 0; i < 3; i++) pure_v_old[i] = pure_vel[i]; }
void mgfas(double **u, int n, int maxcyc) { double anorm2(double **a, int n); void copy(double **aout, double **ain, int n); void interp(double **uf, double **uc, int nf); void lop(double **out, double **u, int n); void matadd(double **a, double **b, double **c, int n); void matsub(double **a, double **b, double **c, int n); void relax2(double **u, double **rhs, int n); void rstrct(double **uc, double **uf, int nc); void slvsm2(double **u, double **rhs); unsigned int j,jcycle,jj,jm1,jpost,jpre,nf,ng=0,ngrid,nn; double **irho[NGMAX+1],**irhs[NGMAX+1],**itau[NGMAX+1], **itemp[NGMAX+1],**iu[NGMAX+1]; double res,trerr; nn=n; while (nn >>= 1) ng++; if (n != 1+(1L << ng)) nrerror("n-1 must be a power of 2 in mgfas."); if (ng > NGMAX) nrerror("increase NGMAX in mglin."); nn=n/2+1; ngrid=ng-1; irho[ngrid]=dmatrix(1,nn,1,nn); rstrct(irho[ngrid],u,nn); while (nn > 3) { nn=nn/2+1; irho[--ngrid]=dmatrix(1,nn,1,nn); rstrct(irho[ngrid],irho[ngrid+1],nn); } nn=3; iu[1]=dmatrix(1,nn,1,nn); irhs[1]=dmatrix(1,nn,1,nn); itau[1]=dmatrix(1,nn,1,nn); itemp[1]=dmatrix(1,nn,1,nn); slvsm2(iu[1],irho[1]); free_dmatrix(irho[1],1,nn,1,nn); ngrid=ng; for (j=2;j<=ngrid;j++) { nn=2*nn-1; iu[j]=dmatrix(1,nn,1,nn); irhs[j]=dmatrix(1,nn,1,nn); itau[j]=dmatrix(1,nn,1,nn); itemp[j]=dmatrix(1,nn,1,nn); interp(iu[j],iu[j-1],nn); copy(irhs[j],(j != ngrid ? irho[j] : u),nn); for (jcycle=1;jcycle<=maxcyc;jcycle++) { nf=nn; for (jj=j;jj>=2;jj--) { for (jpre=1;jpre<=NPRE;jpre++) relax2(iu[jj],irhs[jj],nf); lop(itemp[jj],iu[jj],nf); nf=nf/2+1; jm1=jj-1; rstrct(itemp[jm1],itemp[jj],nf); rstrct(iu[jm1],iu[jj],nf); lop(itau[jm1],iu[jm1],nf); matsub(itau[jm1],itemp[jm1],itau[jm1],nf); if (jj == j) trerr=ALPHA*anorm2(itau[jm1],nf); rstrct(irhs[jm1],irhs[jj],nf); matadd(irhs[jm1],itau[jm1],irhs[jm1],nf); } slvsm2(iu[1],irhs[1]); nf=3; for (jj=2;jj<=j;jj++) { jm1=jj-1; rstrct(itemp[jm1],iu[jj],nf); matsub(iu[jm1],itemp[jm1],itemp[jm1],nf); nf=2*nf-1; interp(itau[jj],itemp[jm1],nf); matadd(iu[jj],itau[jj],iu[jj],nf); for (jpost=1;jpost<=NPOST;jpost++) relax2(iu[jj],irhs[jj],nf); } lop(itemp[j],iu[j],nf); matsub(itemp[j],irhs[j],itemp[j],nf); res=anorm2(itemp[j],nf); if (res < trerr) break; } } copy(u,iu[ngrid],n); for (nn=n,j=ng;j>=1;j--,nn=nn/2+1) { free_dmatrix(itemp[j],1,nn,1,nn); free_dmatrix(itau[j],1,nn,1,nn); free_dmatrix(irhs[j],1,nn,1,nn); free_dmatrix(iu[j],1,nn,1,nn); if (j != ng && j != 1) free_dmatrix(irho[j],1,nn,1,nn); } }
int main(int argc, char *argv[]) { e_epiphany_t Epiphany, *pEpiphany; e_mem_t DRAM, *pDRAM; unsigned int msize; float seed; unsigned int addr; //, clocks; size_t sz; double tdiff[4]; int result, rerval; pEpiphany = &Epiphany; pDRAM = &DRAM; msize = 0x00400000; get_args(argc, argv); fo = stderr; fi = stdin; printf("\nMatrix: C[%d][%d] = A[%d][%d] * B[%d][%d]\n\n", _Smtx, _Smtx, _Smtx, _Smtx, _Smtx, _Smtx); printf("Using %d x %d cores\n\n", _Nside, _Nside); seed = 0.0; printf("Seed = %f\n", seed); // Connect to device for communicating with the Epiphany system // Prepare device e_set_host_verbosity(H_D0); e_init(NULL); e_reset_system(); if (e_alloc(pDRAM, 0x00000000, msize)) { printf("\nERROR: Can't allocate Epiphany DRAM!\n\n"); exit(1); } if (e_open(pEpiphany, 0, 0, e_platform.chip[0].rows, e_platform.chip[0].cols)) { printf("\nERROR: Can't establish connection to Epiphany device!\n\n"); exit(1); } // Initialize Epiphany "Ready" state addr = offsetof(shared_buf_t, core.ready); Mailbox.core.ready = 0; e_write(pDRAM, 0, 0, addr, &Mailbox.core.ready, sizeof(Mailbox.core.ready)); printf("Loading program on Epiphany chip...\n"); e_set_loader_verbosity(ar.verbose); result = e_load_group(ar.srecFile, pEpiphany, 0, 0, pEpiphany->rows, pEpiphany->cols, ar.run_target); if (result == E_ERR) { printf("Error loading Epiphany program.\n"); exit(1); } // Generate operand matrices based on a provided seed matrix_init(seed); #ifdef __WIPE_OUT_RESULT_MATRIX__ // Wipe-out any previous remains in result matrix (for verification) addr = offsetof(shared_buf_t, C[0]); sz = sizeof(Mailbox.C); printf("Writing C[%uB] to address %08x...\n", sz, addr); e_write(pDRAM, 0, 0, addr, (void *) Mailbox.C, sz); #endif clock_gettime(CLOCK_MONOTONIC, &timer[0]); // Copy operand matrices to Epiphany system addr = offsetof(shared_buf_t, A[0]); sz = sizeof(Mailbox.A); printf("Writing A[%uB] to address %08x...\n", sz, addr); e_write(pDRAM, 0, 0, addr, (void *) Mailbox.A, sz); addr = offsetof(shared_buf_t, B[0]); sz = sizeof(Mailbox.B); printf("Writing B[%uB] to address %08x...\n", sz, addr); e_write(pDRAM, 0, 0, addr, (void *) Mailbox.B, sz); // Call the Epiphany matmul() function printf("GO Epiphany! ... "); clock_gettime(CLOCK_MONOTONIC, &timer[1]); matmul_go(pDRAM); clock_gettime(CLOCK_MONOTONIC, &timer[2]); printf("Finished calculating Epiphany result.\n"); // Read result matrix and timing addr = offsetof(shared_buf_t, C[0]); sz = sizeof(Mailbox.C); printf("Reading result from address %08x...\n", addr); e_read(pDRAM, 0, 0, addr, (void *) Mailbox.C, sz); clock_gettime(CLOCK_MONOTONIC, &timer[3]); // Calculate a reference result printf("Calculating result on Host ... "); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[4]); #ifndef __DO_STRASSEN__ matmul(Mailbox.A, Mailbox.B, Cref, _Smtx); #else matmul_strassen(Mailbox.A, Mailbox.B, Cref, _Smtx); #endif clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[5]); printf("Finished calculating Host result.\n"); addr = offsetof(shared_buf_t, core.clocks); sz = sizeof(Mailbox.core.clocks); printf("Reading time from address %08x...\n", addr); e_read(pDRAM,0, 0, addr, &Mailbox.core.clocks, sizeof(Mailbox.core.clocks)); // clocks = Mailbox.core.clocks; // Calculate the difference between the Epiphany result and the reference result printf("\n*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n"); printf("Verifying result correctness ... "); matsub(Mailbox.C, Cref, Cdiff, _Smtx); tdiff[0] = (timer[2].tv_sec - timer[1].tv_sec) * 1000 + ((double) (timer[2].tv_nsec - timer[1].tv_nsec) / 1000000.0);//total tdiff[1] = (timer[1].tv_sec - timer[0].tv_sec) * 1000 + ((double) (timer[1].tv_nsec - timer[0].tv_nsec) / 1000000.0);//write tdiff[2] = (timer[3].tv_sec - timer[2].tv_sec) * 1000 + ((double) (timer[3].tv_nsec - timer[2].tv_nsec) / 1000000.0);//read tdiff[3] = (timer[5].tv_sec - timer[4].tv_sec) * 1000 + ((double) (timer[5].tv_nsec - timer[4].tv_nsec) / 1000000.0);//ref // If the difference is 0, then the matrices are identical and the // calculation was correct if (iszero(Cdiff, _Smtx)) { printf("C_epiphany == C_host\n"); rerval = 0; } else { printf("\n\nERROR: C_epiphany is different from C_host !!!\n"); rerval = 1; } printf("*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n"); printf("\n"); printf("Epiphany (compute): %9.1f msec (@ %03d MHz)\n" , tdiff[0], eMHz); printf(" (write) : %9.1f msec \n" , tdiff[1]); printf(" (read) : %9.1f msec\n" , tdiff[2]); printf(" (*total*): %9.1f msec\n\n" , tdiff[2]+tdiff[1]+tdiff[0]); printf("Host (*total*): %9.1f msec (@ %03d MHz)\n" , tdiff[3], aMHz); #ifdef __DUMP_MATRICES__ printf("\n\n\n"); printf("A[][] = \n"); matprt(Mailbox.A, _Smtx); printf("B[][] = \n"); matprt(Mailbox.B, _Smtx); printf("C[][] = \n"); matprt(Mailbox.C, _Smtx); printf("Cref[][] = \n"); matprt(Cref, _Smtx); int i, j; for (i=0; i<_Nside; i++) for (j=0; j<_Nside; j++) { e_read(pEpiphany, i, j, 0x2000+0*sizeof(float), &Aepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x2000+2*sizeof(float), &Aepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x4000+0*sizeof(float), &Bepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x4000+2*sizeof(float), &Bepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float)); } printf("Aepi[][] = \n"); matprt(Aepi, _Smtx); printf("Bepi[][] = \n"); matprt(Bepi, _Smtx); #endif printf("\n* * * EPIPHANY FTW !!! * * *\n"); // Close connection to device if (e_close(pEpiphany)) { printf("\nERROR: Can't close connection to Epiphany device!\n\n"); exit(1); } if (e_free(pDRAM)) { printf("\nERROR: Can't release Epiphany DRAM!\n\n"); exit(1); } e_finalize(); return rerval; }
void matmul_strassen(double* a, double* b, double* c, int n) { double* tmp1 = (double*) malloc((n*n/4)*sizeof(double)); double* tmp2 = (double*) malloc((n*n/4)*sizeof(double)); double* a11 = (double*) malloc((n*n/4)*sizeof(double)); double* a12 = (double*) malloc((n*n/4)*sizeof(double)); double* a21 = (double*) malloc((n*n/4)*sizeof(double)); double* a22 = (double*) malloc((n*n/4)*sizeof(double)); double* b11 = (double*) malloc((n*n/4)*sizeof(double)); double* b12 = (double*) malloc((n*n/4)*sizeof(double)); double* b21 = (double*) malloc((n*n/4)*sizeof(double)); double* b22 = (double*) malloc((n*n/4)*sizeof(double)); double* c11 = (double*) malloc((n*n/4)*sizeof(double)); double* c12 = (double*) malloc((n*n/4)*sizeof(double)); double* c21 = (double*) malloc((n*n/4)*sizeof(double)); double* c22 = (double*) malloc((n*n/4)*sizeof(double)); double* m1 = (double*) malloc((n*n/4)*sizeof(double)); double* m2 = (double*) malloc((n*n/4)*sizeof(double)); double* m3 = (double*) malloc((n*n/4)*sizeof(double)); double* m4 = (double*) malloc((n*n/4)*sizeof(double)); double* m5 = (double*) malloc((n*n/4)*sizeof(double)); double* m6 = (double*) malloc((n*n/4)*sizeof(double)); double* m7 = (double*) malloc((n*n/4)*sizeof(double)); int i, j, k, ii, jj, kk, N; double tmp; // partition A and B N = n/2; for (i=0; i< N; i++) { for (j=0; j< N; j++) { a11[i*N +j ] = a[i*n+j]; b11[i*N +j ] = b[i*n+j]; a12[i*N +j ] = a[i*n+j+N]; b12[i*N +j ] = b[i*n+j+N]; a21[i*N +j ] = a[(i+N)*n+j]; b21[i*N +j ] = b[(i+N)*n+j]; a22[i*N +j ] = a[(i+N)*n+j+N]; b22[i*N +j ] = b[(i+N)*n+j+N]; } } //print(a, n); //print(a11, N); //print(a12, N); //print(a21, N); //print(a22, N); //form m1 = (a11 + a22)(b11 + b22) matadd(a11, a22, tmp1, N); matadd(b11, b22, tmp2, N); matmul(tmp1, tmp2, m1, N); //form m2 = (a21 + a22)b11 matadd(a21, a22, tmp1, N); matmul(tmp1, b11, m2, N); //form m3 = a11(b12 - b22) matsub(b12, b22, tmp1, N); matmul(a11, tmp1, m3, N); //form m4 = a22(b21 - b11) matsub(b21, b11, tmp1, N); matmul(a22, tmp1, m4, N); //form m5 = (a11 +a12)b22 matadd(a11, a12, tmp1, N); matmul(tmp1, b22, m5, N); //form m6 = (a21 -a11)(b11 + b12) matsub(a21, a11, tmp1, N); matadd(b11, b12, tmp2, N); matmul(tmp1, tmp2, m6, N); //form m7 = (a12 -a22)(b21 + b22) matsub(a12, a22, tmp1, N); matadd(b21, b22, tmp2, N); matmul(tmp1, tmp2, m7, N); //============================ //form c11 = m1 + m4 - m5 + m7 matadd(m1, m4, tmp1, N); matsub(tmp1, m5, tmp2, N); matadd(tmp2, m7, c11, N); //form c12 = m3 + m5 matadd(m3, m5, c12, N); //form c21 = m2 + m4 matadd(m2, m4, c21, N); //fomr c22 = m1 - m2 + m3 + m6 matsub(m1, m2, tmp1, N); matadd(tmp1, m3, tmp2, N); matadd(tmp2, m6, c22, N); for (i=0; i< N; i++) { for (j=0; j< N; j++) { c[i*n+j] = c11[i*N +j ]; c[i*n+j+N] = c12[i*N +j ]; c[(i+N)*n+j] = c21[i*N +j ]; c[(i+N)*n+j+N] = c22[i*N +j ]; } } free(tmp1); free(tmp2); free(a11); free(a12); free(a21); free(a22); free(b11); free(b12); free(b21); free(b22); free(c11); free(c12); free(c21); free(c22); free(m1); free(m2); free(m3); free(m4); free(m5); free(m6); free(m7); }
int main(int argc, char *argv[]) { p_mem_t shared_mem, results_mem; uint32_t eram_base; char results[1024] = { '\0' }; int device_cols, device_rows, nside; p_dev_t dev; p_prog_t prog; p_team_t team; p_coords_t size; p_coords_t start = { .row = 0, .col = 0 }; unsigned int msize; float seed; unsigned int addr; //, clocks; size_t sz; int verbose=0; double tdiff[3]; int result, retval = 0; msize = 0x00400000; get_args(argc, argv); fo = stderr; fi = stdin; printf( "------------------------------------------------------------\n"); printf( "Calculating: C[%d][%d] = A[%d][%d] * B[%d][%d]\n", _Smtx, _Smtx, _Smtx, _Smtx, _Smtx, _Smtx); seed = 0.0; if(verbose){ printf( "Seed = %f\n", seed); } dev = p_init(P_DEV_EPIPHANY, 0); if (p_error(dev)) { fprintf(stderr, "Error initializing PAL\n"); return p_error(dev); } device_cols = p_query(dev, P_PROP_COLS); device_rows = p_query(dev, P_PROP_ROWS); // Use min size nside = device_cols > device_rows ? device_cols : device_rows; if (nside < 4) { fprintf(stderr, "Error: Too small device, need at least 4x4\n"); return 1; } // Either 1024, 256, 64, or 16 cores (side must be power of two), nside = nside >= 32 ? 32 : nside >= 16 ? 16 : nside >= 8 ? 8 : 4; size.row = nside; size.col = nside; team = p_open4(dev, P_TOPOLOGY_2D, &start, &size); printf("Using team of size %d\n", p_team_size(team)); if (p_error(team)) { fprintf(stderr, "Error opening team\n"); return p_error(team); } prog = p_load(dev, ar.elfFile, 0); eram_base = (unsigned) p_query(dev, P_PROP_MEMBASE); shared_mem = p_map(dev, eram_base, msize); // Clear mailbox contents memset(&Mailbox, 0, sizeof(Mailbox)); p_write(&shared_mem, &Mailbox, 0, sizeof(Mailbox), 0); // Generate operand matrices based on a provided seed matrix_init((int)seed); #ifdef __WIPE_OUT_RESULT_MATRIX__ // Wipe-out any previous remains in result matrix (for verification) addr = offsetof(shared_buf_t, C[0]); sz = sizeof(Mailbox.C); if(verbose){ printf( "Writing C[%uB] to address %08x...\n", (unsigned) sz, addr); } p_write(&shared_mem, (void *) Mailbox.C, addr, sz, 0); #endif /* Wallclock time */ clock_gettime(CLOCK_MONOTONIC, &timer[0]); /* Clock CPUTIME too. We don't want to indicate failure just * because the system was under high load. */ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[4]); // Copy operand matrices to Epiphany system addr = offsetof(shared_buf_t, A[0]); sz = sizeof(Mailbox.A); if(verbose){ printf( "Writing A[%uB] to address %08x...\n", (unsigned) sz, addr); } p_write(&shared_mem, (void *) Mailbox.A, addr, sz, 0); addr = offsetof(shared_buf_t, B[0]); sz = sizeof(Mailbox.B); if(verbose){ printf( "Writing B[%uB] to address %08x...\n", (unsigned) sz, addr); } p_write(&shared_mem, (void *) Mailbox.B, addr, sz, 0); // Call the Epiphany matmul() function if(verbose){ printf( "GO Epiphany! ... "); } if(verbose){ printf("Loading program on Epiphany chip...\n"); } p_arg_t args[] = { &nside, sizeof(nside), true }; if (p_run(prog, "matmul", team, 0, p_team_size(team), 1, args, 0)) { fprintf(stderr, "Error loading Epiphany program.\n"); exit(1); } // Read result matrix and timing addr = offsetof(shared_buf_t, C[0]); sz = sizeof(Mailbox.C); if(verbose){ printf( "Reading result from address %08x...\n", addr); } p_read(&shared_mem, (void *) Mailbox.C, addr, sz, 0); clock_gettime(CLOCK_MONOTONIC, &timer[1]); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[5]); // Calculate a reference result clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[2]); #ifndef __DO_STRASSEN__ matmul(Mailbox.A, Mailbox.B, Cref, _Smtx); #else matmul_strassen(Mailbox.A, Mailbox.B, Cref, _Smtx); #endif clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[3]); addr = offsetof(shared_buf_t, core.clocks); sz = sizeof(Mailbox.core.clocks); if(verbose){ printf( "Reading time from address %08x...\n", addr); } p_read(&shared_mem, &Mailbox.core.clocks, addr, sizeof(Mailbox.core.clocks), 0); // clocks = Mailbox.core.clocks; // Calculate the difference between the Epiphany result and the reference result matsub(Mailbox.C, Cref, Cdiff, _Smtx); tdiff[0] = (timer[1].tv_sec - timer[0].tv_sec) * 1000 + ((double) (timer[1].tv_nsec - timer[0].tv_nsec) / 1000000.0); // tdiff[0] = ((double) clocks) / eMHz * 1000; tdiff[1] = (timer[3].tv_sec - timer[2].tv_sec) * 1000 + ((double) (timer[3].tv_nsec - timer[2].tv_nsec) / 1000000.0); tdiff[2] = (timer[5].tv_sec - timer[4].tv_sec) * 1000 + ((double) (timer[5].tv_nsec - timer[4].tv_nsec) / 1000000.0); // If the difference is 0, then the matrices are identical and the // calculation was correct if (iszero(Cdiff, _Smtx)) { printf( "Epiphany(time) %9.1f msec (@ %03d MHz)\n", tdiff[0], eMHz); printf( "Host(time) %9.1f msec (@ %03d MHz)\n", tdiff[1], aMHz); printf( "------------------------------------------------------------\n"); printf( "TEST \"matmul-16\" PASSED\n"); retval = 0; } else { printf( "\n\nERROR: C_epiphany is different from C_host !!!\n"); printf( "TEST \"matmul-16\" FAILED\n"); retval = 1; } #if 0 #ifdef __DUMP_MATRICES__ printf( "\n\n\n"); printf( "A[][] = \n"); matprt(Mailbox.A, _Smtx); printf( "B[][] = \n"); matprt(Mailbox.B, _Smtx); printf( "C[][] = \n"); matprt(Mailbox.C, _Smtx); printf( "Cref[][] = \n"); matprt(Cref, _Smtx); int i, j; for (i=0; i<_Nside; i++) for (j=0; j<_Nside; j++) { e_read(pEpiphany, i, j, 0x2000+0*sizeof(float), &Aepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x2000+2*sizeof(float), &Aepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x4000+0*sizeof(float), &Bepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float)); e_read(pEpiphany, i, j, 0x4000+2*sizeof(float), &Bepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float)); } printf( "Aepi[][] = \n"); matprt(Aepi, _Smtx); printf( "Bepi[][] = \n"); matprt(Bepi, _Smtx); #endif #endif // p_unmap ... p_close(team); p_finalize(dev); return retval; } // Initialize operand matrices void matrix_init(int seed) { int i, j, p; p = 0; for (i=0; i<_Smtx; i++) for (j=0; j<_Smtx; j++) Mailbox.A[p++] = (i + j + seed) % _MAX_MEMBER_; p = 0; for (i=0; i<_Smtx; i++) for (j=0; j<_Smtx; j++) Mailbox.B[p++] = ((i + j) * 2 + seed) % _MAX_MEMBER_; p = 0; for (i=0; i<_Smtx; i++) for (j=0; j<_Smtx; j++) Mailbox.C[p++] = 0x8dead; return; }