Beispiel #1
1
void main()
{
	int i,img_width=IMG_WIDTH,img_height=IMG_HEIGHT;

	core_id = DNUM;

	CSL_tscEnable();

	CACHE_setL2Size (CACHE_0KCACHE);
	CACHE_setL1DSize(CACHE_L1_32KCACHE);
	CACHE_disableCaching (128);

	maps_info_ptr = (maps_info*)MAPS_INFO_PTR;

	if(DNUM==MASTER_CORE_ID)
	{
		CSL_semAcquireDirect(INIT_DONE_SEM);
		memset((void*)MSMC_REG_BASE,0x0,MSMC_SRAM_SIZE);
		memset((void*)MAPS_INFO_PTR,0x0,0x100);
		do_power_gating();
		compute_num_maps();
	}

	memset((void*)L2_HEAP_BASE,0x0,L2_HEAP_SIZE);

	for(i=0;i<ITERATIONS;i++)
	{
		startVal = _itoll(TSCH,TSCL);
		deeplearn(img_width, img_height);
		endVal = _itoll(TSCH,TSCL);
		cumulative += ((endVal-startVal)/DSP_FREQ_IN_MHZ);
	}
	if(DNUM==MASTER_CORE_ID)
	{
		printf("TimeTaken %lfus\n",(cumulative/ITERATIONS));
	}
	cumulative=0;
}
void Osal_platformSpiCsEnter(void)
{
    /* Get the hardware semaphore.
     *
     * Acquire Multi core CPPI synchronization lock
     */
    while ((CSL_semAcquireDirect (PLATFORM_SPI_HW_SEM)) == 0);

    return;
}
Beispiel #3
0
/**
 * ============================================================================
 *  @n@b Osal_biosMultiCoreCsEnter
 *
 *  @b  brief
 *  @n  This API ensures multi-core synchronization to the caller.
 *
 *      This is a BLOCKING API.
 *
 *      This API ensures multi-core synchronization between
 *      multiple processes trying to access FFTC shared
 *      library at the same time.
 *
 *  @param[in]  None
 *
 *  @return     None
 * =============================================================================
 */
Void Osal_biosMultiCoreCsEnter ()
{
    /* Get the hardware semaphore.
     *
     * Acquire Multi core synchronization lock
     */
    while ((CSL_semAcquireDirect (FFTC_HW_SEM)) == 0);

    return;
}
Beispiel #4
0
void deeplearn(short* data, uint32_t w, uint32_t h)
{
	core_id = DNUM;

	MemMgr_HeapInit(w,h);

    operateLayer1(data, w, h);
    CACHE_wbAllL1dWait();

    /* All cores update the counter informing that they finished their iteration */
    while ((CSL_semAcquireDirect (LAYER_1_SEM)) == 0);

    maps_info_ptr->layer1_sync++;

    /* Release the hardware semaphore. */
    CSL_semReleaseSemaphore (LAYER_1_SEM);

    /* All cores wait here to sync up */
    while (maps_info_ptr->layer1_sync != NUM_CORES);
	if(DNUM==MASTER_CORE_ID)
		CSL_semAcquireDirect(INIT_DONE_SEM);

    operateLayer2(w / 2, h / 2);
    CACHE_wbAllL1dWait();

//    /* All cores update the counter informing that they finished their iteration */
//    while ((CSL_semAcquireDirect (LAYER_2_SEM)) == 0);
//
//    maps_info_ptr->layer2_sync++;
//
//    /* Release the hardware semaphore. */
//    CSL_semReleaseSemaphore (LAYER_2_SEM);
//
//    /* All cores wait here to sync up */
//    while (maps_info_ptr->layer2_sync != 0x8);

	// dummy classifier
	//dummy_classifier(pInputNeurons, 50*9, 64, 10, pInputWt, pHiddenBias, pOutputWt);

}
void deeplearn(short* data, uint32_t w, uint32_t h)
{
	core_id = DNUM;

	MemMgr_HeapInit(w,h);

	if(DNUM==MASTER_CORE_ID)
	{
		INIT_DONE = 0x0;
	}

    operateLayer1(data, w, h);
    CACHE_wbAllL1dWait();

    /* All cores update the counter informing that they finished their iteration */
    while ((CSL_semAcquireDirect (LAYER_1_SEM)) == 0);

    L1_SYNC = L1_SYNC+1;

    /* Release the hardware semaphore. */
    CSL_semReleaseSemaphore (LAYER_1_SEM);

    /* All cores wait here to sync up */
    while (L1_SYNC != NUM_CORES);

    operateLayer2(w / 2, h / 2);
    CACHE_wbAllL1dWait();

    /* All cores update the counter informing that they finished their iteration */
    while ((CSL_semAcquireDirect (LAYER_2_SEM)) == 0);

    L2_SYNC = L2_SYNC+1;

    /* Release the hardware semaphore. */
    CSL_semReleaseSemaphore (LAYER_2_SEM);

    /* All cores wait here to sync up */
    while (L2_SYNC != NUM_CORES);
}
Beispiel #6
0
/**
 * ============================================================================
 *  @n@b Osal_qmssCsEnter
 *
 *  @b  brief
 *  @n  This API ensures multi-core and multi-threaded
 *      synchronization to the caller.
 *
 *      This is a BLOCKING API.
 *
 *      This API ensures multi-core synchronization between
 *      multiple processes trying to access QMSS shared
 *      library at the same time.
 *
 *  @param[in]  None
 *
 *  @return
 *      Handle used to lock critical section
 * =============================================================================
 */
Void* Osal_qmssCsEnter (Void)
{
    /* Get the hardware semaphore.
     *
     * Acquire Multi core QMSS synchronization lock
     */
    while ((CSL_semAcquireDirect (QMSS_HW_SEM)) == 0);

    /* Disable all interrupts and OS scheduler.
     *
     * Acquire Multi threaded / process synchronization lock.
     */
    coreKey [CSL_chipReadDNUM ()] = Hwi_disable();

    return NULL;
}
Beispiel #7
0
void main()
{
	int i,img_width=32,img_height=32;

	core_id = DNUM;

	CSL_tscEnable();

	CACHE_setL2Size (CACHE_0KCACHE);
	CACHE_setL1DSize(CACHE_L1_32KCACHE);
	CACHE_disableCaching (128);

	maps_info_ptr = (maps_info*)MAPS_INFO_PTR;

	if(DNUM==MASTER_CORE_ID)
	{
		CSL_semAcquireDirect(INIT_DONE_SEM);
		memset((void*)MSMC_REG_BASE,0x0,MSMC_SRAM_SIZE);
		memset((void*)MAPS_INFO_PTR,0x0,0x100);
		do_power_gating();
		compute_num_maps();
	}

	memset((void*)L2_HEAP_BASE,0x0,L2_HEAP_SIZE);

	for(i=0;i<ITERATIONS;i++)
	{
		startVal = _itoll(TSCH,TSCL);
		deeplearn(in_img, img_width, img_height);
		endVal = _itoll(TSCH,TSCL);
		cumulative += ((endVal-startVal)/DSP_FREQ_IN_MHZ);
	}
	if(DNUM==MASTER_CORE_ID)
	{
#ifdef FUNCTION_PROFILE
		printf("%lf %lf %lf \n",(layer1/ITERATIONS),(layer2/ITERATIONS),(layer3/ITERATIONS));
		printf("%lf %lf %lf %lf \n",(pad1/ITERATIONS),(conv1/ITERATIONS),(rect1/ITERATIONS),(pool1/ITERATIONS));
		printf("%lf %lf %lf %lf %lf \n",(add1/ITERATIONS),(pad2/ITERATIONS),(conv2/ITERATIONS),(rect2/ITERATIONS),(pool2/ITERATIONS));
		printf("%lf %lf %lf %lf %lf \n",(add2/ITERATIONS),(pad3/ITERATIONS),(conv3/ITERATIONS),(rect3/ITERATIONS),(pool3/ITERATIONS));
#else
		printf("%lf us",(cumulative/ITERATIONS));
#endif
	}
	cumulative=0;
}
/**
 *  @b Description
 *  @n  
 *      The function is used to enter a critical section.
 *      Function protects against 
 *      
 *      access from multiple cores 
 *      and 
 *      access from multiple threads on single core
 *
 *  @retval
 *      Handle used to lock critical section
 */
Ptr Osal_qmssCsEnter (Void)
{
    /* Get the hardware semaphore */
    while ((CSL_semAcquireDirect (QMSS_HW_SEM)) == 0);
    return NULL;
}
/**
 *  @b Description
 *  @n  
 *      The function is used to enter a critical section.
 *      Function protects against 
 *      access from multiple cores 
 *      and 
 *      access from multiple threads on single core
 *
 *  @retval
 *      Handle used to lock critical section
 */
Ptr Osal_cppiCsEnter (Void)
{
    /* Get the hardware semaphore for protection against multiple core access */
    while ((CSL_semAcquireDirect (CPPI_HW_SEM)) == 0);
    return NULL;
}
/**
 *  @b Description
 *  @n  
 *      This is the Multicore OSAL Implementation to protect the driver shared
 *      resources across multiple cores.
 *
 *  @retval
 *      Semaphore Opaque Handle
 */
void* Osal_srioEnterMultipleCoreCriticalSection(void)
{
    /* Get the hardware semaphore */
    while ((CSL_semAcquireDirect (SRIO_HW_SEM)) == 0);
    return NULL;
}
Beispiel #11
0
void MemMgr_HeapInit(uint32_t img_width, uint32_t img_height)
{
	int num_core,l1_width,l1_height;
	int l2_width,l2_height;
	int iN,hN,oN;
	alloc_info msmc_info,l2_info;

	l1_width = img_width/L1_DOWN_SIZE_FACTOR;
	l1_height = img_height/L1_DOWN_SIZE_FACTOR;
	l2_width = l1_width/L2_DOWN_SIZE_FACTOR;
	l2_height = l1_height/L2_DOWN_SIZE_FACTOR;

	l1_size = l1_width*l1_height;
	l2_size = l2_width*l2_height;

	if(DNUM==MASTER_CORE_ID)
	{
		msmc_info.free_ptr = (char*)MSMC_REG_BASE;
		msmc_info.block_count = 0;
		msmc_info.block_size  = CACHE_L2_LINESIZE;
		msmc_info.max_size    = MSMC_SRAM_SIZE;
		msmc_info.max_blocks  = MSMC_SRAM_SIZE/CACHE_L2_LINESIZE;

		for(num_core=0; num_core<NUM_CORES;num_core++)
		{
#ifndef FULLY_CONNECTED
			maps_info_ptr->l1_maps_ptr[num_core] = mem_alloc(&smem_info,l1_width*l1_height*maps_info_ptr->l1_maps[num_core]*sizeof(short)); ///*TODO*/data allignment
			while(maps_info_ptr->l1_maps_ptr[num_core]==NULL);
#else
			maps_info_ptr->l1_maps_ptr[num_core] = mem_alloc(&smem_info,l1_width*l1_height*sizeof(short)); ///*TODO*/data allignment
			while(maps_info_ptr->l1_maps_ptr[num_core]==NULL);
#endif
		}
		for(num_core=0; num_core<NUM_CORES;num_core++)
		{
			maps_info_ptr->l2_maps_ptr[num_core] = mem_alloc(&smem_info,l2_width*l2_height*maps_info_ptr->l2_maps[num_core]*sizeof(short));
			while(maps_info_ptr->l2_maps_ptr[num_core]==NULL);
		}
	    while ((CSL_semAcquireDirect (LAYER_1_SEM)) == 0);

	    /* The core has completed local initialization */
	    maps_info_ptr->layer1_sync = 0;

	    /* Release the hardware semaphore. */
	    CSL_semReleaseSemaphore ( LAYER_1_SEM);

	    while ((CSL_semAcquireDirect (LAYER_2_SEM)) == 0);

	    /* The core has completed local initialization */
	    maps_info_ptr->layer2_sync = 0;

	    /* Release the hardware semaphore. */
	    CSL_semReleaseSemaphore ( LAYER_2_SEM);

	    CSL_semReleaseSemaphore(INIT_DONE_SEM);
	}

	while(!CSL_semIsFree(INIT_DONE_SEM));

	l2_info.free_ptr = (char*)L2_HEAP_BASE;
	l2_info.block_count = 0;
	l2_info.block_size  = CACHE_L1D_LINESIZE;
	l2_info.max_size    = L2_HEAP_SIZE;
	l2_info.max_blocks  = L2_HEAP_SIZE/CACHE_L1D_LINESIZE;

	temp_img_ptr 	= mem_alloc(&l2_info,img_width*img_height*sizeof(short));
	while(temp_img_ptr==NULL);
	layer1_ptr  	= mem_alloc(&l2_info,l1_width*l1_height*sizeof(short));
	while(layer1_ptr==NULL);
	temp1_ptr  		= mem_alloc(&l2_info,l1_width*l1_height*sizeof(short));
	while(temp1_ptr==NULL);

	if(DNUM == MASTER_CORE_ID)
	{
		//W3 = 12; H3 = 12; K3 = 5; N3 = 64;
		//iN = N3 * ((W3 - K3 + 1)/2) * ((H3 - K3 + 1)/2);
		iN = 50*9;
		hN = 64;
		oN = 10;

		pInputNeurons 	= mem_alloc(&l2_info,iN*sizeof(float));
		while(pInputNeurons==NULL);
		pInputWt 		= mem_alloc(&l2_info,hN*iN*sizeof(float));
		while(pInputWt==NULL);
		pHiddenBias 	= mem_alloc(&l2_info,hN*sizeof(float));
		while(pHiddenBias==NULL);
		pOutputWt 		= mem_alloc(&l2_info,hN*oN*sizeof(float));
		while(pOutputWt==NULL);
	}

	local_l1_maps_ptr= (short*)maps_info_ptr->l1_maps_ptr[DNUM];
	local_l2_maps_ptr= (short*)maps_info_ptr->l2_maps_ptr[DNUM];
}
void MemMgr_HeapInit(uint32_t img_width, uint32_t img_height)
{
	int num_core,l1_width,l1_height;
	int l2_width,l2_height;
	alloc_info msmc_info,l2_info;
	int locmem_size;

	l1_width = img_width/L1_DOWN_SIZE_FACTOR;
	l1_height = img_height/L1_DOWN_SIZE_FACTOR;
	l2_width = l1_width/L2_DOWN_SIZE_FACTOR;
	l2_height = l1_height/L2_DOWN_SIZE_FACTOR;

	l1_size = l1_width*l1_height*sizeof(short);
	l2_size = l2_width*l2_height*sizeof(short);

	if(DNUM==MASTER_CORE_ID)
	{
		msmc_info.free_ptr = (char*)MSMC_REG_BASE;
		msmc_info.block_count = 0;
		msmc_info.block_size  = CACHE_L2_LINESIZE;
		msmc_info.max_size    = MSMC_SRAM_SIZE;
		msmc_info.max_blocks  = MSMC_SRAM_SIZE/CACHE_L2_LINESIZE;

		memset((void*)MSMC_REG_BASE,0x0,MSMC_SRAM_SIZE);

		for(num_core=0; num_core<NUM_CORES;num_core++)
		{
			maps_info_ptr->l1_maps_ptr[num_core] = mem_alloc(&msmc_info,l1_width*l1_height*maps_info_ptr->l1_maps[num_core]*sizeof(short));
			while(maps_info_ptr->l1_maps_ptr[num_core]==NULL);
		}
		for(num_core=0; num_core<NUM_CORES;num_core++)
		{
			maps_info_ptr->l2_maps_ptr[num_core] = mem_alloc(&msmc_info,l2_width*l2_height*maps_info_ptr->l2_maps[num_core]*sizeof(short));
			while(maps_info_ptr->l2_maps_ptr[num_core]==NULL);
		}
	    while ((CSL_semAcquireDirect (LAYER_1_SEM)) == 0);

	    /* The core has completed local initialization */
	    L1_SYNC = 0;

	    /* Release the hardware semaphore. */
	    CSL_semReleaseSemaphore ( LAYER_1_SEM);

	    while ((CSL_semAcquireDirect (LAYER_2_SEM)) == 0);

	    /* The core has completed local initialization */
	    L2_SYNC = 0;

	    /* Release the hardware semaphore. */
	    CSL_semReleaseSemaphore ( LAYER_2_SEM);

		INIT_DONE = 0x1;
	}

	while(INIT_DONE==0x0);

	l2_info.free_ptr = (char*)L2_HEAP_BASE;
	l2_info.block_count = 0;
	l2_info.block_size  = CACHE_L1D_LINESIZE;
	l2_info.max_size    = L2_HEAP_SIZE;
	l2_info.max_blocks  = L2_HEAP_SIZE/CACHE_L1D_LINESIZE;

	memset((void*)L2_HEAP_BASE,0x0,L2_HEAP_SIZE);

	locmem_size = (img_width*img_height*sizeof(short))+(3*l1_size);
	if(locmem_size>L2_HEAP_SIZE)
	{
		printf("Insufficient l2 memory \n");
		while(1);
	}

	temp_img_ptr 	= mem_alloc(&l2_info,img_width*img_height*sizeof(short));
	layer1_ptr  	= mem_alloc(&l2_info,l1_width*l1_height*sizeof(short));
	temp1_ptr  		= mem_alloc(&l2_info,l1_width*l1_height*sizeof(short));
	temp2_ptr  		= mem_alloc(&l2_info,l1_width*l1_height*sizeof(short));

	local_l1_maps_ptr= (short*)maps_info_ptr->l1_maps_ptr[DNUM];
	local_l2_maps_ptr= (short*)maps_info_ptr->l2_maps_ptr[DNUM];
}