void main() { int i,img_width=IMG_WIDTH,img_height=IMG_HEIGHT; core_id = DNUM; CSL_tscEnable(); CACHE_setL2Size (CACHE_0KCACHE); CACHE_setL1DSize(CACHE_L1_32KCACHE); CACHE_disableCaching (128); maps_info_ptr = (maps_info*)MAPS_INFO_PTR; if(DNUM==MASTER_CORE_ID) { CSL_semAcquireDirect(INIT_DONE_SEM); memset((void*)MSMC_REG_BASE,0x0,MSMC_SRAM_SIZE); memset((void*)MAPS_INFO_PTR,0x0,0x100); do_power_gating(); compute_num_maps(); } memset((void*)L2_HEAP_BASE,0x0,L2_HEAP_SIZE); for(i=0;i<ITERATIONS;i++) { startVal = _itoll(TSCH,TSCL); deeplearn(img_width, img_height); endVal = _itoll(TSCH,TSCL); cumulative += ((endVal-startVal)/DSP_FREQ_IN_MHZ); } if(DNUM==MASTER_CORE_ID) { printf("TimeTaken %lfus\n",(cumulative/ITERATIONS)); } cumulative=0; }
void Osal_platformSpiCsEnter(void) { /* Get the hardware semaphore. * * Acquire Multi core CPPI synchronization lock */ while ((CSL_semAcquireDirect (PLATFORM_SPI_HW_SEM)) == 0); return; }
/** * ============================================================================ * @n@b Osal_biosMultiCoreCsEnter * * @b brief * @n This API ensures multi-core synchronization to the caller. * * This is a BLOCKING API. * * This API ensures multi-core synchronization between * multiple processes trying to access FFTC shared * library at the same time. * * @param[in] None * * @return None * ============================================================================= */ Void Osal_biosMultiCoreCsEnter () { /* Get the hardware semaphore. * * Acquire Multi core synchronization lock */ while ((CSL_semAcquireDirect (FFTC_HW_SEM)) == 0); return; }
void deeplearn(short* data, uint32_t w, uint32_t h) { core_id = DNUM; MemMgr_HeapInit(w,h); operateLayer1(data, w, h); CACHE_wbAllL1dWait(); /* All cores update the counter informing that they finished their iteration */ while ((CSL_semAcquireDirect (LAYER_1_SEM)) == 0); maps_info_ptr->layer1_sync++; /* Release the hardware semaphore. */ CSL_semReleaseSemaphore (LAYER_1_SEM); /* All cores wait here to sync up */ while (maps_info_ptr->layer1_sync != NUM_CORES); if(DNUM==MASTER_CORE_ID) CSL_semAcquireDirect(INIT_DONE_SEM); operateLayer2(w / 2, h / 2); CACHE_wbAllL1dWait(); // /* All cores update the counter informing that they finished their iteration */ // while ((CSL_semAcquireDirect (LAYER_2_SEM)) == 0); // // maps_info_ptr->layer2_sync++; // // /* Release the hardware semaphore. */ // CSL_semReleaseSemaphore (LAYER_2_SEM); // // /* All cores wait here to sync up */ // while (maps_info_ptr->layer2_sync != 0x8); // dummy classifier //dummy_classifier(pInputNeurons, 50*9, 64, 10, pInputWt, pHiddenBias, pOutputWt); }
void deeplearn(short* data, uint32_t w, uint32_t h) { core_id = DNUM; MemMgr_HeapInit(w,h); if(DNUM==MASTER_CORE_ID) { INIT_DONE = 0x0; } operateLayer1(data, w, h); CACHE_wbAllL1dWait(); /* All cores update the counter informing that they finished their iteration */ while ((CSL_semAcquireDirect (LAYER_1_SEM)) == 0); L1_SYNC = L1_SYNC+1; /* Release the hardware semaphore. */ CSL_semReleaseSemaphore (LAYER_1_SEM); /* All cores wait here to sync up */ while (L1_SYNC != NUM_CORES); operateLayer2(w / 2, h / 2); CACHE_wbAllL1dWait(); /* All cores update the counter informing that they finished their iteration */ while ((CSL_semAcquireDirect (LAYER_2_SEM)) == 0); L2_SYNC = L2_SYNC+1; /* Release the hardware semaphore. */ CSL_semReleaseSemaphore (LAYER_2_SEM); /* All cores wait here to sync up */ while (L2_SYNC != NUM_CORES); }
/** * ============================================================================ * @n@b Osal_qmssCsEnter * * @b brief * @n This API ensures multi-core and multi-threaded * synchronization to the caller. * * This is a BLOCKING API. * * This API ensures multi-core synchronization between * multiple processes trying to access QMSS shared * library at the same time. * * @param[in] None * * @return * Handle used to lock critical section * ============================================================================= */ Void* Osal_qmssCsEnter (Void) { /* Get the hardware semaphore. * * Acquire Multi core QMSS synchronization lock */ while ((CSL_semAcquireDirect (QMSS_HW_SEM)) == 0); /* Disable all interrupts and OS scheduler. * * Acquire Multi threaded / process synchronization lock. */ coreKey [CSL_chipReadDNUM ()] = Hwi_disable(); return NULL; }
void main() { int i,img_width=32,img_height=32; core_id = DNUM; CSL_tscEnable(); CACHE_setL2Size (CACHE_0KCACHE); CACHE_setL1DSize(CACHE_L1_32KCACHE); CACHE_disableCaching (128); maps_info_ptr = (maps_info*)MAPS_INFO_PTR; if(DNUM==MASTER_CORE_ID) { CSL_semAcquireDirect(INIT_DONE_SEM); memset((void*)MSMC_REG_BASE,0x0,MSMC_SRAM_SIZE); memset((void*)MAPS_INFO_PTR,0x0,0x100); do_power_gating(); compute_num_maps(); } memset((void*)L2_HEAP_BASE,0x0,L2_HEAP_SIZE); for(i=0;i<ITERATIONS;i++) { startVal = _itoll(TSCH,TSCL); deeplearn(in_img, img_width, img_height); endVal = _itoll(TSCH,TSCL); cumulative += ((endVal-startVal)/DSP_FREQ_IN_MHZ); } if(DNUM==MASTER_CORE_ID) { #ifdef FUNCTION_PROFILE printf("%lf %lf %lf \n",(layer1/ITERATIONS),(layer2/ITERATIONS),(layer3/ITERATIONS)); printf("%lf %lf %lf %lf \n",(pad1/ITERATIONS),(conv1/ITERATIONS),(rect1/ITERATIONS),(pool1/ITERATIONS)); printf("%lf %lf %lf %lf %lf \n",(add1/ITERATIONS),(pad2/ITERATIONS),(conv2/ITERATIONS),(rect2/ITERATIONS),(pool2/ITERATIONS)); printf("%lf %lf %lf %lf %lf \n",(add2/ITERATIONS),(pad3/ITERATIONS),(conv3/ITERATIONS),(rect3/ITERATIONS),(pool3/ITERATIONS)); #else printf("%lf us",(cumulative/ITERATIONS)); #endif } cumulative=0; }
/** * @b Description * @n * The function is used to enter a critical section. * Function protects against * * access from multiple cores * and * access from multiple threads on single core * * @retval * Handle used to lock critical section */ Ptr Osal_qmssCsEnter (Void) { /* Get the hardware semaphore */ while ((CSL_semAcquireDirect (QMSS_HW_SEM)) == 0); return NULL; }
/** * @b Description * @n * The function is used to enter a critical section. * Function protects against * access from multiple cores * and * access from multiple threads on single core * * @retval * Handle used to lock critical section */ Ptr Osal_cppiCsEnter (Void) { /* Get the hardware semaphore for protection against multiple core access */ while ((CSL_semAcquireDirect (CPPI_HW_SEM)) == 0); return NULL; }
/** * @b Description * @n * This is the Multicore OSAL Implementation to protect the driver shared * resources across multiple cores. * * @retval * Semaphore Opaque Handle */ void* Osal_srioEnterMultipleCoreCriticalSection(void) { /* Get the hardware semaphore */ while ((CSL_semAcquireDirect (SRIO_HW_SEM)) == 0); return NULL; }
void MemMgr_HeapInit(uint32_t img_width, uint32_t img_height) { int num_core,l1_width,l1_height; int l2_width,l2_height; int iN,hN,oN; alloc_info msmc_info,l2_info; l1_width = img_width/L1_DOWN_SIZE_FACTOR; l1_height = img_height/L1_DOWN_SIZE_FACTOR; l2_width = l1_width/L2_DOWN_SIZE_FACTOR; l2_height = l1_height/L2_DOWN_SIZE_FACTOR; l1_size = l1_width*l1_height; l2_size = l2_width*l2_height; if(DNUM==MASTER_CORE_ID) { msmc_info.free_ptr = (char*)MSMC_REG_BASE; msmc_info.block_count = 0; msmc_info.block_size = CACHE_L2_LINESIZE; msmc_info.max_size = MSMC_SRAM_SIZE; msmc_info.max_blocks = MSMC_SRAM_SIZE/CACHE_L2_LINESIZE; for(num_core=0; num_core<NUM_CORES;num_core++) { #ifndef FULLY_CONNECTED maps_info_ptr->l1_maps_ptr[num_core] = mem_alloc(&smem_info,l1_width*l1_height*maps_info_ptr->l1_maps[num_core]*sizeof(short)); ///*TODO*/data allignment while(maps_info_ptr->l1_maps_ptr[num_core]==NULL); #else maps_info_ptr->l1_maps_ptr[num_core] = mem_alloc(&smem_info,l1_width*l1_height*sizeof(short)); ///*TODO*/data allignment while(maps_info_ptr->l1_maps_ptr[num_core]==NULL); #endif } for(num_core=0; num_core<NUM_CORES;num_core++) { maps_info_ptr->l2_maps_ptr[num_core] = mem_alloc(&smem_info,l2_width*l2_height*maps_info_ptr->l2_maps[num_core]*sizeof(short)); while(maps_info_ptr->l2_maps_ptr[num_core]==NULL); } while ((CSL_semAcquireDirect (LAYER_1_SEM)) == 0); /* The core has completed local initialization */ maps_info_ptr->layer1_sync = 0; /* Release the hardware semaphore. */ CSL_semReleaseSemaphore ( LAYER_1_SEM); while ((CSL_semAcquireDirect (LAYER_2_SEM)) == 0); /* The core has completed local initialization */ maps_info_ptr->layer2_sync = 0; /* Release the hardware semaphore. */ CSL_semReleaseSemaphore ( LAYER_2_SEM); CSL_semReleaseSemaphore(INIT_DONE_SEM); } while(!CSL_semIsFree(INIT_DONE_SEM)); l2_info.free_ptr = (char*)L2_HEAP_BASE; l2_info.block_count = 0; l2_info.block_size = CACHE_L1D_LINESIZE; l2_info.max_size = L2_HEAP_SIZE; l2_info.max_blocks = L2_HEAP_SIZE/CACHE_L1D_LINESIZE; temp_img_ptr = mem_alloc(&l2_info,img_width*img_height*sizeof(short)); while(temp_img_ptr==NULL); layer1_ptr = mem_alloc(&l2_info,l1_width*l1_height*sizeof(short)); while(layer1_ptr==NULL); temp1_ptr = mem_alloc(&l2_info,l1_width*l1_height*sizeof(short)); while(temp1_ptr==NULL); if(DNUM == MASTER_CORE_ID) { //W3 = 12; H3 = 12; K3 = 5; N3 = 64; //iN = N3 * ((W3 - K3 + 1)/2) * ((H3 - K3 + 1)/2); iN = 50*9; hN = 64; oN = 10; pInputNeurons = mem_alloc(&l2_info,iN*sizeof(float)); while(pInputNeurons==NULL); pInputWt = mem_alloc(&l2_info,hN*iN*sizeof(float)); while(pInputWt==NULL); pHiddenBias = mem_alloc(&l2_info,hN*sizeof(float)); while(pHiddenBias==NULL); pOutputWt = mem_alloc(&l2_info,hN*oN*sizeof(float)); while(pOutputWt==NULL); } local_l1_maps_ptr= (short*)maps_info_ptr->l1_maps_ptr[DNUM]; local_l2_maps_ptr= (short*)maps_info_ptr->l2_maps_ptr[DNUM]; }
void MemMgr_HeapInit(uint32_t img_width, uint32_t img_height) { int num_core,l1_width,l1_height; int l2_width,l2_height; alloc_info msmc_info,l2_info; int locmem_size; l1_width = img_width/L1_DOWN_SIZE_FACTOR; l1_height = img_height/L1_DOWN_SIZE_FACTOR; l2_width = l1_width/L2_DOWN_SIZE_FACTOR; l2_height = l1_height/L2_DOWN_SIZE_FACTOR; l1_size = l1_width*l1_height*sizeof(short); l2_size = l2_width*l2_height*sizeof(short); if(DNUM==MASTER_CORE_ID) { msmc_info.free_ptr = (char*)MSMC_REG_BASE; msmc_info.block_count = 0; msmc_info.block_size = CACHE_L2_LINESIZE; msmc_info.max_size = MSMC_SRAM_SIZE; msmc_info.max_blocks = MSMC_SRAM_SIZE/CACHE_L2_LINESIZE; memset((void*)MSMC_REG_BASE,0x0,MSMC_SRAM_SIZE); for(num_core=0; num_core<NUM_CORES;num_core++) { maps_info_ptr->l1_maps_ptr[num_core] = mem_alloc(&msmc_info,l1_width*l1_height*maps_info_ptr->l1_maps[num_core]*sizeof(short)); while(maps_info_ptr->l1_maps_ptr[num_core]==NULL); } for(num_core=0; num_core<NUM_CORES;num_core++) { maps_info_ptr->l2_maps_ptr[num_core] = mem_alloc(&msmc_info,l2_width*l2_height*maps_info_ptr->l2_maps[num_core]*sizeof(short)); while(maps_info_ptr->l2_maps_ptr[num_core]==NULL); } while ((CSL_semAcquireDirect (LAYER_1_SEM)) == 0); /* The core has completed local initialization */ L1_SYNC = 0; /* Release the hardware semaphore. */ CSL_semReleaseSemaphore ( LAYER_1_SEM); while ((CSL_semAcquireDirect (LAYER_2_SEM)) == 0); /* The core has completed local initialization */ L2_SYNC = 0; /* Release the hardware semaphore. */ CSL_semReleaseSemaphore ( LAYER_2_SEM); INIT_DONE = 0x1; } while(INIT_DONE==0x0); l2_info.free_ptr = (char*)L2_HEAP_BASE; l2_info.block_count = 0; l2_info.block_size = CACHE_L1D_LINESIZE; l2_info.max_size = L2_HEAP_SIZE; l2_info.max_blocks = L2_HEAP_SIZE/CACHE_L1D_LINESIZE; memset((void*)L2_HEAP_BASE,0x0,L2_HEAP_SIZE); locmem_size = (img_width*img_height*sizeof(short))+(3*l1_size); if(locmem_size>L2_HEAP_SIZE) { printf("Insufficient l2 memory \n"); while(1); } temp_img_ptr = mem_alloc(&l2_info,img_width*img_height*sizeof(short)); layer1_ptr = mem_alloc(&l2_info,l1_width*l1_height*sizeof(short)); temp1_ptr = mem_alloc(&l2_info,l1_width*l1_height*sizeof(short)); temp2_ptr = mem_alloc(&l2_info,l1_width*l1_height*sizeof(short)); local_l1_maps_ptr= (short*)maps_info_ptr->l1_maps_ptr[DNUM]; local_l2_maps_ptr= (short*)maps_info_ptr->l2_maps_ptr[DNUM]; }