Exemplo n.º 1
1
void main()
{
	int i,img_width=IMG_WIDTH,img_height=IMG_HEIGHT;

	core_id = DNUM;

	CSL_tscEnable();

	CACHE_setL2Size (CACHE_0KCACHE);
	CACHE_setL1DSize(CACHE_L1_32KCACHE);
	CACHE_disableCaching (128);

	maps_info_ptr = (maps_info*)MAPS_INFO_PTR;

	if(DNUM==MASTER_CORE_ID)
	{
		CSL_semAcquireDirect(INIT_DONE_SEM);
		memset((void*)MSMC_REG_BASE,0x0,MSMC_SRAM_SIZE);
		memset((void*)MAPS_INFO_PTR,0x0,0x100);
		do_power_gating();
		compute_num_maps();
	}

	memset((void*)L2_HEAP_BASE,0x0,L2_HEAP_SIZE);

	for(i=0;i<ITERATIONS;i++)
	{
		startVal = _itoll(TSCH,TSCL);
		deeplearn(img_width, img_height);
		endVal = _itoll(TSCH,TSCL);
		cumulative += ((endVal-startVal)/DSP_FREQ_IN_MHZ);
	}
	if(DNUM==MASTER_CORE_ID)
	{
		printf("TimeTaken %lfus\n",(cumulative/ITERATIONS));
	}
	cumulative=0;
}
Exemplo n.º 2
0
int main(void)
{
	int i,img_width=IMG_WIDTH,img_height=IMG_HEIGHT;
	uint64_t startVal,endVal;
	double cumulative=0;

	TSCL = 0;
	config_AMMU();
	enable_L1Cache();
	enable_L2Cache();

	for(i=0;i<ITERATIONS;i++)
	{
		startVal = _itoll(TSCH,TSCL);
		deeplearn(img_width, img_height);
		endVal = _itoll(TSCH,TSCL);
		cumulative += ((endVal-startVal)/DSP_FREQ);
	}
#ifdef FUNCTION_PROFILE
		printf("%lf %lf %lf \n",(layer1/ITERATIONS),(layer2/ITERATIONS),(layer3/ITERATIONS));
		printf("%lf %lf %lf %lf \n",(pad1/ITERATIONS),(conv1/ITERATIONS),(rect1/ITERATIONS),(pool1/ITERATIONS));
		printf("%lf %lf %lf %lf %lf \n",(add1/ITERATIONS),(pad2/ITERATIONS),(conv2/ITERATIONS),(rect2/ITERATIONS),(pool2/ITERATIONS));
		printf("%lf %lf %lf %lf %lf \n",(add2/ITERATIONS),(pad3/ITERATIONS),(conv3/ITERATIONS),(rect3/ITERATIONS),(pool3/ITERATIONS));
#else
		printf("%lf us",(cumulative/ITERATIONS));
#endif
}
Exemplo n.º 3
0
void main()
{
	int i,img_width=32,img_height=32;

	core_id = DNUM;

	CSL_tscEnable();

	CACHE_setL2Size (CACHE_0KCACHE);
	CACHE_setL1DSize(CACHE_L1_32KCACHE);
	CACHE_disableCaching (128);

	maps_info_ptr = (maps_info*)MAPS_INFO_PTR;

	if(DNUM==MASTER_CORE_ID)
	{
		CSL_semAcquireDirect(INIT_DONE_SEM);
		memset((void*)MSMC_REG_BASE,0x0,MSMC_SRAM_SIZE);
		memset((void*)MAPS_INFO_PTR,0x0,0x100);
		do_power_gating();
		compute_num_maps();
	}

	memset((void*)L2_HEAP_BASE,0x0,L2_HEAP_SIZE);

	for(i=0;i<ITERATIONS;i++)
	{
		startVal = _itoll(TSCH,TSCL);
		deeplearn(in_img, img_width, img_height);
		endVal = _itoll(TSCH,TSCL);
		cumulative += ((endVal-startVal)/DSP_FREQ_IN_MHZ);
	}
	if(DNUM==MASTER_CORE_ID)
	{
#ifdef FUNCTION_PROFILE
		printf("%lf %lf %lf \n",(layer1/ITERATIONS),(layer2/ITERATIONS),(layer3/ITERATIONS));
		printf("%lf %lf %lf %lf \n",(pad1/ITERATIONS),(conv1/ITERATIONS),(rect1/ITERATIONS),(pool1/ITERATIONS));
		printf("%lf %lf %lf %lf %lf \n",(add1/ITERATIONS),(pad2/ITERATIONS),(conv2/ITERATIONS),(rect2/ITERATIONS),(pool2/ITERATIONS));
		printf("%lf %lf %lf %lf %lf \n",(add2/ITERATIONS),(pad3/ITERATIONS),(conv3/ITERATIONS),(rect3/ITERATIONS),(pool3/ITERATIONS));
#else
		printf("%lf us",(cumulative/ITERATIONS));
#endif
	}
	cumulative=0;
}
Exemplo n.º 4
0
int main(void)
{
	int i,img_width=IMG_WIDTH,img_height=IMG_HEIGHT;
	uint64_t startVal,endVal;
	double cumulative=0;

	TSCL = 0;
	config_AMMU();
	enable_L1Cache();
	enable_L2Cache();

	for(i=0;i<ITERATIONS;i++)
	{
		startVal = _itoll(TSCH,TSCL);
		deeplearn(img_width, img_height);
		endVal = _itoll(TSCH,TSCL);
		cumulative += ((endVal-startVal)/DSP_FREQ);
	}
	printf("TimeTaken %lfus\n",(cumulative/ITERATIONS));
}
Exemplo n.º 5
0
   double ATL_cputime(void)
   {
      static int INIT=0;                  /* First time in, reset timer. */
      long long unsigned int now;
      static const double CPS = 1.0 / (1000000000.0);
      double d;

      if (INIT==0)                        /* Reset timer if first run. */
      {
         INIT = 1;                        /* Remember we did it. */
         TSCL = 0;                        /* Ensure hardware time is running. */
      }

      now = _itoll(TSCH, TSCL);           /* Convert timer regs to long long. */
      d = (double) (now);                 /* get as a double. */
      d *= CPS;                           /* Convert to seconds. */
      return(d);                          /* Exit with answer.  */
   } /* END ATL_cputime */
  unsigned int            row4_54,        row5_54;     

  unsigned int            mask1_44,       mask2_44,       mask3_44;
  unsigned int            mask4_44,       mask5_44;

  const short *restrict   in0;
  const short *restrict   in1;
  const short *restrict   in2;
  const short *restrict   in3;
  const short *restrict   in4;

  /* -------------------------------------------------------------------- */
  /*  Load mask values (reverse order for mask rotation)                  */
  /* -------------------------------------------------------------------- */
  mask_temp  = _mem8_const ((void *) &mask_ptr[21]);
  mask1_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)),
                      _packlh2(_hill(mask_temp),_hill(mask_temp)));
  mask1_44   = _pack2((int) mask_ptr[20],  (int) mask_ptr[20]);

  mask_temp  = _mem8_const ((void *) &mask_ptr[16]);
  mask2_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)),
                      _packlh2(_hill(mask_temp),_hill(mask_temp)));
  mask2_44   = _pack2((int) mask_ptr[15],  (int) mask_ptr[15]);

  mask_temp  = _mem8_const ((void *) &mask_ptr[11]);
  mask3_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)),
                      _packlh2(_hill(mask_temp),_hill(mask_temp)));
  mask3_44   = _pack2((int) mask_ptr[10], (int) mask_ptr[10]);

  mask_temp  = _mem8_const ((void *) &mask_ptr[6]);
  mask4_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)),
                      _packlh2(_hill(mask_temp),_hill(mask_temp)));
Exemplo n.º 7
0
static __inline void *optimized_mem_set(void *mem, int ch, size_t n)
{
   char  * restrict dst1, * restrict dst2;
   int    pre_bytes, post_bytes, wfill, i;

   unsigned char *outbuf = mem;
   unsigned int count = n;

   dst1 = (char *)outbuf;

#if defined(_TMS320C6400) || defined(_TMS320C6740) || defined(_TMS320C6600) || \
    defined(_TI_C6X_TESLA)

   /*---------------------------------------------------------------------*/
   /* We do not use 'dwfill' on other variations of the C6x architecture, */
   /* so limit 'dwfill' references to the architectures that use it.      */
   /*---------------------------------------------------------------------*/
   {
      long long dwfill;

      /*------------------------------------------------------------------*/
      /* Set up 64-bit and 32-bit fill values.                            */
      /*------------------------------------------------------------------*/
      wfill  = _pack2 (ch, ch);
      wfill  = _packl4(wfill, wfill);
      dwfill = _itoll (wfill, wfill); 

      /*------------------------------------------------------------------*/
      /* Calculate # of bytes to pre-copy to get to an alignment of 8     */
      /*------------------------------------------------------------------*/
      pre_bytes = (8 - (int) dst1) & 7;
       
      if (count > pre_bytes)
      {
         count -= pre_bytes;
         if (pre_bytes & 1) { *dst1        = ch;    dst1 += 1; }
         if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; }
         if (pre_bytes & 4) { _amem4(dst1) = wfill; dst1 += 4; }
      } 
            
      /*------------------------------------------------------------------*/
      /* Double word fills                                                */
      /*------------------------------------------------------------------*/
      post_bytes = count > 0 ? count : 0;
      dst2       = dst1 + 8;
       
      if (count > 15)
         for (i = 0; i < count >> 4; i++)
         {
            _amem8(dst1) = dwfill; dst1 += 16;
            _amem8(dst2) = dwfill; dst2 += 16;
            post_bytes -= 16;
         }
       
      /*------------------------------------------------------------------*/
      /* Finish transfer with 8, 4, 2 and/or 1-byte writes                */
      /*------------------------------------------------------------------*/
      if (post_bytes & 8) { _mem8(dst1) = dwfill; dst1 += 8; }
      if (post_bytes & 4) { _mem4(dst1) = wfill;  dst1 += 4; }
      if (post_bytes & 2) {  dst1[0]    = ch;  
      dst1[1]    = ch;     dst1 += 2; }
      if (post_bytes & 1) { *dst1       = ch;     dst1 += 1; }
   }

#else

   /*--------------------------------------------------------------------*/
   /* Set up 32-bit fill value.                                          */
   /*--------------------------------------------------------------------*/
   wfill  = _mpy(0x101, (int)ch);
   wfill += (wfill << 16);

   /*--------------------------------------------------------------------*/
   /* Calculate number of bytes to pre-copy to get to an alignment of 4  */
   /*--------------------------------------------------------------------*/
   pre_bytes = (4 - (int) dst1) & 3;

   if (count > pre_bytes)
   {
      count -= pre_bytes;
      if (pre_bytes & 1) { *dst1        = ch;    dst1 += 1; }
      if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; }
   }

   /*--------------------------------------------------------------------*/
   /* Double word fills                                                  */
   /*--------------------------------------------------------------------*/
   post_bytes = count > 0 ? count : 0;
   dst2       = dst1 + 4;

   if (count > 7)
      for (i = 0; i < count >> 3; i++)
      {
         _amem4(dst1) = wfill; dst1 += 8;
         _amem4(dst2) = wfill; dst2 += 8;
         post_bytes -= 8;
      }

   /*--------------------------------------------------------------------*/
   /* Finish transfer with up to 7 single-byte writes.                   */
   /*--------------------------------------------------------------------*/
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }

#endif

   return dst1;
}