void main() { int i,img_width=IMG_WIDTH,img_height=IMG_HEIGHT; core_id = DNUM; CSL_tscEnable(); CACHE_setL2Size (CACHE_0KCACHE); CACHE_setL1DSize(CACHE_L1_32KCACHE); CACHE_disableCaching (128); maps_info_ptr = (maps_info*)MAPS_INFO_PTR; if(DNUM==MASTER_CORE_ID) { CSL_semAcquireDirect(INIT_DONE_SEM); memset((void*)MSMC_REG_BASE,0x0,MSMC_SRAM_SIZE); memset((void*)MAPS_INFO_PTR,0x0,0x100); do_power_gating(); compute_num_maps(); } memset((void*)L2_HEAP_BASE,0x0,L2_HEAP_SIZE); for(i=0;i<ITERATIONS;i++) { startVal = _itoll(TSCH,TSCL); deeplearn(img_width, img_height); endVal = _itoll(TSCH,TSCL); cumulative += ((endVal-startVal)/DSP_FREQ_IN_MHZ); } if(DNUM==MASTER_CORE_ID) { printf("TimeTaken %lfus\n",(cumulative/ITERATIONS)); } cumulative=0; }
int main(void) { int i,img_width=IMG_WIDTH,img_height=IMG_HEIGHT; uint64_t startVal,endVal; double cumulative=0; TSCL = 0; config_AMMU(); enable_L1Cache(); enable_L2Cache(); for(i=0;i<ITERATIONS;i++) { startVal = _itoll(TSCH,TSCL); deeplearn(img_width, img_height); endVal = _itoll(TSCH,TSCL); cumulative += ((endVal-startVal)/DSP_FREQ); } #ifdef FUNCTION_PROFILE printf("%lf %lf %lf \n",(layer1/ITERATIONS),(layer2/ITERATIONS),(layer3/ITERATIONS)); printf("%lf %lf %lf %lf \n",(pad1/ITERATIONS),(conv1/ITERATIONS),(rect1/ITERATIONS),(pool1/ITERATIONS)); printf("%lf %lf %lf %lf %lf \n",(add1/ITERATIONS),(pad2/ITERATIONS),(conv2/ITERATIONS),(rect2/ITERATIONS),(pool2/ITERATIONS)); printf("%lf %lf %lf %lf %lf \n",(add2/ITERATIONS),(pad3/ITERATIONS),(conv3/ITERATIONS),(rect3/ITERATIONS),(pool3/ITERATIONS)); #else printf("%lf us",(cumulative/ITERATIONS)); #endif }
void main() { int i,img_width=32,img_height=32; core_id = DNUM; CSL_tscEnable(); CACHE_setL2Size (CACHE_0KCACHE); CACHE_setL1DSize(CACHE_L1_32KCACHE); CACHE_disableCaching (128); maps_info_ptr = (maps_info*)MAPS_INFO_PTR; if(DNUM==MASTER_CORE_ID) { CSL_semAcquireDirect(INIT_DONE_SEM); memset((void*)MSMC_REG_BASE,0x0,MSMC_SRAM_SIZE); memset((void*)MAPS_INFO_PTR,0x0,0x100); do_power_gating(); compute_num_maps(); } memset((void*)L2_HEAP_BASE,0x0,L2_HEAP_SIZE); for(i=0;i<ITERATIONS;i++) { startVal = _itoll(TSCH,TSCL); deeplearn(in_img, img_width, img_height); endVal = _itoll(TSCH,TSCL); cumulative += ((endVal-startVal)/DSP_FREQ_IN_MHZ); } if(DNUM==MASTER_CORE_ID) { #ifdef FUNCTION_PROFILE printf("%lf %lf %lf \n",(layer1/ITERATIONS),(layer2/ITERATIONS),(layer3/ITERATIONS)); printf("%lf %lf %lf %lf \n",(pad1/ITERATIONS),(conv1/ITERATIONS),(rect1/ITERATIONS),(pool1/ITERATIONS)); printf("%lf %lf %lf %lf %lf \n",(add1/ITERATIONS),(pad2/ITERATIONS),(conv2/ITERATIONS),(rect2/ITERATIONS),(pool2/ITERATIONS)); printf("%lf %lf %lf %lf %lf \n",(add2/ITERATIONS),(pad3/ITERATIONS),(conv3/ITERATIONS),(rect3/ITERATIONS),(pool3/ITERATIONS)); #else printf("%lf us",(cumulative/ITERATIONS)); #endif } cumulative=0; }
int main(void) { int i,img_width=IMG_WIDTH,img_height=IMG_HEIGHT; uint64_t startVal,endVal; double cumulative=0; TSCL = 0; config_AMMU(); enable_L1Cache(); enable_L2Cache(); for(i=0;i<ITERATIONS;i++) { startVal = _itoll(TSCH,TSCL); deeplearn(img_width, img_height); endVal = _itoll(TSCH,TSCL); cumulative += ((endVal-startVal)/DSP_FREQ); } printf("TimeTaken %lfus\n",(cumulative/ITERATIONS)); }
double ATL_cputime(void) { static int INIT=0; /* First time in, reset timer. */ long long unsigned int now; static const double CPS = 1.0 / (1000000000.0); double d; if (INIT==0) /* Reset timer if first run. */ { INIT = 1; /* Remember we did it. */ TSCL = 0; /* Ensure hardware time is running. */ } now = _itoll(TSCH, TSCL); /* Convert timer regs to long long. */ d = (double) (now); /* get as a double. */ d *= CPS; /* Convert to seconds. */ return(d); /* Exit with answer. */ } /* END ATL_cputime */
unsigned int row4_54, row5_54; unsigned int mask1_44, mask2_44, mask3_44; unsigned int mask4_44, mask5_44; const short *restrict in0; const short *restrict in1; const short *restrict in2; const short *restrict in3; const short *restrict in4; /* -------------------------------------------------------------------- */ /* Load mask values (reverse order for mask rotation) */ /* -------------------------------------------------------------------- */ mask_temp = _mem8_const ((void *) &mask_ptr[21]); mask1_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)), _packlh2(_hill(mask_temp),_hill(mask_temp))); mask1_44 = _pack2((int) mask_ptr[20], (int) mask_ptr[20]); mask_temp = _mem8_const ((void *) &mask_ptr[16]); mask2_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)), _packlh2(_hill(mask_temp),_hill(mask_temp))); mask2_44 = _pack2((int) mask_ptr[15], (int) mask_ptr[15]); mask_temp = _mem8_const ((void *) &mask_ptr[11]); mask3_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)), _packlh2(_hill(mask_temp),_hill(mask_temp))); mask3_44 = _pack2((int) mask_ptr[10], (int) mask_ptr[10]); mask_temp = _mem8_const ((void *) &mask_ptr[6]); mask4_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)), _packlh2(_hill(mask_temp),_hill(mask_temp)));
static __inline void *optimized_mem_set(void *mem, int ch, size_t n) { char * restrict dst1, * restrict dst2; int pre_bytes, post_bytes, wfill, i; unsigned char *outbuf = mem; unsigned int count = n; dst1 = (char *)outbuf; #if defined(_TMS320C6400) || defined(_TMS320C6740) || defined(_TMS320C6600) || \ defined(_TI_C6X_TESLA) /*---------------------------------------------------------------------*/ /* We do not use 'dwfill' on other variations of the C6x architecture, */ /* so limit 'dwfill' references to the architectures that use it. */ /*---------------------------------------------------------------------*/ { long long dwfill; /*------------------------------------------------------------------*/ /* Set up 64-bit and 32-bit fill values. */ /*------------------------------------------------------------------*/ wfill = _pack2 (ch, ch); wfill = _packl4(wfill, wfill); dwfill = _itoll (wfill, wfill); /*------------------------------------------------------------------*/ /* Calculate # of bytes to pre-copy to get to an alignment of 8 */ /*------------------------------------------------------------------*/ pre_bytes = (8 - (int) dst1) & 7; if (count > pre_bytes) { count -= pre_bytes; if (pre_bytes & 1) { *dst1 = ch; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } if (pre_bytes & 4) { _amem4(dst1) = wfill; dst1 += 4; } } /*------------------------------------------------------------------*/ /* Double word fills */ /*------------------------------------------------------------------*/ post_bytes = count > 0 ? count : 0; dst2 = dst1 + 8; if (count > 15) for (i = 0; i < count >> 4; i++) { _amem8(dst1) = dwfill; dst1 += 16; _amem8(dst2) = dwfill; dst2 += 16; post_bytes -= 16; } /*------------------------------------------------------------------*/ /* Finish transfer with 8, 4, 2 and/or 1-byte writes */ /*------------------------------------------------------------------*/ if (post_bytes & 8) { _mem8(dst1) = dwfill; dst1 += 8; } if (post_bytes & 4) { _mem4(dst1) = wfill; dst1 += 4; } if (post_bytes & 2) { dst1[0] = ch; dst1[1] = ch; dst1 += 2; } if (post_bytes & 1) { *dst1 = ch; dst1 += 1; } } #else /*--------------------------------------------------------------------*/ /* Set up 32-bit fill value. */ /*--------------------------------------------------------------------*/ wfill = _mpy(0x101, (int)ch); wfill += (wfill << 16); /*--------------------------------------------------------------------*/ /* Calculate number of bytes to pre-copy to get to an alignment of 4 */ /*--------------------------------------------------------------------*/ pre_bytes = (4 - (int) dst1) & 3; if (count > pre_bytes) { count -= pre_bytes; if (pre_bytes & 1) { *dst1 = ch; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } } /*--------------------------------------------------------------------*/ /* Double word fills */ /*--------------------------------------------------------------------*/ post_bytes = count > 0 ? count : 0; dst2 = dst1 + 4; if (count > 7) for (i = 0; i < count >> 3; i++) { _amem4(dst1) = wfill; dst1 += 8; _amem4(dst2) = wfill; dst2 += 8; post_bytes -= 8; } /*--------------------------------------------------------------------*/ /* Finish transfer with up to 7 single-byte writes. */ /*--------------------------------------------------------------------*/ if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } #endif return dst1; }