int main(int argc, char *argv[]) { int row1,col1,row2; //============================================================================ // Initialize array elements for(row1=0; row1<ARRAY_SIZE; row1++) { for(col1=0;col1<ARRAY_SIZE;col1++) { arrayInPPE1[row1][col1] = rand()%100; } } long unsigned int start_t,end_t; start_t = __mftb(); //========================================================================= /*Loop for array computation*/ for(row1 = 0;row1<ARRAY_SIZE;row1++) { for(col1=0;col1< ARRAY_SIZE;col1++) { arrayResult[row1][col1] = 0; for(row2 = 0;row2< ARRAY_SIZE;row2++) { arrayResult[row1][col1] += arrayInPPE1[row1][row2]*arrayInPPE1[row2][col1]; } } } end_t = __mftb(); //========================================================================= /* Print out the multiplication result */ for(row1=0; row1<ARRAY_SIZE; row1++) { printf("\n"); for(col1=0;col1<ARRAY_SIZE;col1++) { printf("%d\t",arrayResult[row1][col1]); } } printf("\nArray multiplication computation time is %f us\n",(float)(end_t-start_t)/79.8); return 0; }
unsigned long long _bgq_GetTimeBase( void ) { #if defined (__GNUC__) return GetTimeBase(); #elif defined (__IBMC__) return __mftb(); #else #error "Cannot find GetTimeBase for BG/Q (unhandled compiler)" #endif }
static inline double ppc_intrinsic_time (void) { unsigned long hi, lo; do { hi = __mftbu(); lo = __mftb(); } while (hi != (unsigned long) __mftbu()); return (hi * 0x100000000ull + lo) * timeBaseRatio; }
int main(int argc, char *argv[]) { int rc; unsigned int i,j,l,m; //============================================================================ // Initialize array elements for(i=0; i<ARRAY_SIZE; i++) { for(j=0;j<ARRAY_SIZE;j++) { Matrix[i][j] = rand()%1000; Transpose[j][i]=Matrix[i][j]; } } printf("Computing... Wait for some time!!!\n"); #ifdef TIMING /********************************************************/ long unsigned int start_t, end_t; start_t=__mftb(); /********************************************************/ #endif // Fill in control block //============================================================================ // create SPE context and load SPE program into the SPE context for(i=0;i<64;i++) { for(j=0;j<8;j++) { for(l=0;l<8;l++) { //printf("\n%d %d %d",i,j,l); cb[l].data=(unsigned int)Matrix[i*16]; cb[l].data1=(unsigned int)Transpose[((j*8)+l)*16]; cb[l].result=(unsigned int)&Mult[0][(i*64+j*8+l)*256]; if ((data[l].speid = spe_context_create (0, NULL)) == NULL) { fprintf (stderr, "Failed spe_context_create(errno=%d strerror=%s)\n", errno, strerror(errno)); exit (3); } //Load program if ((rc = spe_program_load (data[l].speid, &spu)) != 0) { fprintf (stderr, "Failed spe_program_load(errno=%d strerror=%s)\n", errno, strerror(errno)); exit (3); } data[l].argp = (unsigned long long *) &cb[l]; // create SPE pthreads if ((rc = pthread_create (&data[l].pthread, NULL, &ppu_pthread_function, &data[l])) != 0) { fprintf (stderr, "Failed pthread_create(errno=%d strerror=%s)\n", errno, strerror(errno)); exit (3); } } // wait for SPE to complete for(m=0;m<8;m++) { if ((rc = pthread_join (data[m].pthread, NULL)) != 0) { fprintf (stderr, "Failed pthread_join(rc=%d, errno=%d strerror=%s)\n", rc, errno, strerror(errno)); exit (1); } if ((rc = spe_context_destroy (data[m].speid)) != 0) { fprintf (stderr, "Failed spe_context_destroy(rc=%d, errno=%d strerror=%s)\n", rc, errno, strerror(errno)); exit (1); } } } } #ifdef TIMING //compute the timein us(10E-6) end_t=__mftb(); printf("\n\nTotal time used by SPE: %f us\n", (float)((end_t-start_t)/79.8)); #endif printf("\n\nPrinting matrix in 3 seconds..\n"); sleep(3); printMatrix(); printf("\n\nTotal time used by SPE: %f us\n\n", (float)((end_t-start_t)/79.8)); }