int main(int argc, char *argv[]) {
  
  int row1,col1,row2;

  //============================================================================  
  // Initialize array elements
  for(row1=0; row1<ARRAY_SIZE; row1++)
  {	
	for(col1=0;col1<ARRAY_SIZE;col1++)
	{
		 arrayInPPE1[row1][col1] = rand()%100;
	
	}
  }


  long unsigned int start_t,end_t;
  start_t = __mftb();

//=========================================================================
/*Loop for array computation*/
for(row1 = 0;row1<ARRAY_SIZE;row1++)
{
	for(col1=0;col1< ARRAY_SIZE;col1++)
	{
		arrayResult[row1][col1] = 0;
		for(row2 = 0;row2< ARRAY_SIZE;row2++)
		{
			arrayResult[row1][col1] += arrayInPPE1[row1][row2]*arrayInPPE1[row2][col1];
		}
	}
}
	
end_t = __mftb();	  

//=========================================================================
   /* Print out the multiplication result */
  for(row1=0; row1<ARRAY_SIZE; row1++)
{
	printf("\n");
	for(col1=0;col1<ARRAY_SIZE;col1++)
	{
	      printf("%d\t",arrayResult[row1][col1]);

	}
}

printf("\nArray multiplication computation time is %f us\n",(float)(end_t-start_t)/79.8);
  
  return 0;
}
Exemple #2
0
unsigned long long  _bgq_GetTimeBase( void )
{
#if defined (__GNUC__)
	return GetTimeBase();
#elif defined (__IBMC__)
	return __mftb();
#else
	#error "Cannot find GetTimeBase for BG/Q (unhandled compiler)"
#endif
}
Exemple #3
0
static inline double
ppc_intrinsic_time (void)
{
  unsigned long hi, lo;
  do 
    {
      hi = __mftbu();
      lo = __mftb();
    } while (hi != (unsigned long) __mftbu());
  return (hi * 0x100000000ull + lo) * timeBaseRatio;
}
int main(int argc, char *argv[]) {
  
  int rc;
  unsigned int i,j,l,m;
  //============================================================================  
  // Initialize array elements
for(i=0; i<ARRAY_SIZE; i++)
  {
	for(j=0;j<ARRAY_SIZE;j++)
	 { 
	   Matrix[i][j] = rand()%1000;
	   Transpose[j][i]=Matrix[i][j]; 
	}
  }

printf("Computing... Wait for some time!!!\n");
#ifdef TIMING  
 /********************************************************/
  long unsigned int start_t, end_t;
  start_t=__mftb();
 /********************************************************/
#endif



  // Fill in control block
 //============================================================================
  // create SPE context and load SPE program into the SPE context

for(i=0;i<64;i++)
{
  for(j=0;j<8;j++)
    {
	for(l=0;l<8;l++)
	{
	//printf("\n%d %d %d",i,j,l);
	cb[l].data=(unsigned int)Matrix[i*16];
  	cb[l].data1=(unsigned int)Transpose[((j*8)+l)*16];
  	cb[l].result=(unsigned int)&Mult[0][(i*64+j*8+l)*256];
  
	if ((data[l].speid = spe_context_create (0, NULL)) == NULL)
    	{
      		fprintf (stderr, "Failed spe_context_create(errno=%d strerror=%s)\n", errno, strerror(errno));
      		exit (3);
    	}

 
 //Load program

  	if ((rc = spe_program_load (data[l].speid, &spu)) != 0)
    	{	
      		fprintf (stderr, "Failed spe_program_load(errno=%d strerror=%s)\n", errno, strerror(errno));
      		exit (3);
    	}
  
  	data[l].argp = (unsigned long long *) &cb[l];

 // create SPE pthreads
  	if ((rc = pthread_create (&data[l].pthread, NULL, &ppu_pthread_function, &data[l])) != 0)
    	{
      		fprintf (stderr, "Failed pthread_create(errno=%d strerror=%s)\n", errno, strerror(errno));
      		exit (3);
    	}      

     }
 
   // wait for SPE to complete
     for(m=0;m<8;m++)
     {
	if ((rc = pthread_join (data[m].pthread, NULL)) != 0)
    	{
      		fprintf (stderr, "Failed pthread_join(rc=%d, errno=%d strerror=%s)\n", rc, errno, strerror(errno));
      		exit (1);
    	}


  	if ((rc = spe_context_destroy (data[m].speid)) != 0)
    	{
      		fprintf (stderr, "Failed spe_context_destroy(rc=%d, errno=%d strerror=%s)\n", rc, errno, strerror(errno));
      		exit (1);
    	}
     }	

   }
}

#ifdef TIMING  
  //compute the timein us(10E-6)       
    end_t=__mftb();
      printf("\n\nTotal time used by SPE: %f us\n", (float)((end_t-start_t)/79.8));  
#endif
 printf("\n\nPrinting matrix in 3 seconds..\n");
 sleep(3);
 printMatrix();

 printf("\n\nTotal time used by SPE: %f us\n\n", (float)((end_t-start_t)/79.8));

}