Ejemplo n.º 1
0
static int gic_notifier(struct notifier_block *self, unsigned long cmd,	void *v)
{
	int i;

	for (i = 0; i < MAX_GIC_NR; i++) {
		switch (cmd) {
		case CPU_PM_ENTER:
			gic_cpu_save(i);
			break;
		case CPU_PM_ENTER_FAILED:
		case CPU_PM_EXIT:
			gic_cpu_restore(i);
			break;
		case CPU_CLUSTER_PM_ENTER:
			gic_dist_save(i);
			break;
		case CPU_CLUSTER_PM_ENTER_FAILED:
		case CPU_CLUSTER_PM_EXIT:
			gic_dist_restore(i);
			break;
		}
	}

	return NOTIFY_OK;
}
Ejemplo n.º 2
0
static int gic_runtime_suspend(struct device *dev)
{
	struct gic_chip_data *gic = dev_get_drvdata(dev);

	gic_dist_save(gic);
	gic_cpu_save(gic);

	return pm_clk_suspend(dev);
}
Ejemplo n.º 3
0
/*******************************************************************************
 * FVP handler called when an affinity instance is about to be suspended. The
 * level and mpidr determine the affinity instance. The 'state' arg. allows the
 * platform to decide whether the cluster is being turned off and take apt
 * actions.
 *
 * CAUTION: There is no guarantee that caches will remain turned on across calls
 * to this function as each affinity level is dealt with. So do not write & read
 * global variables across calls. It will be wise to do flush a write to the
 * global to prevent unpredictable results.
 ******************************************************************************/
int plat_affinst_suspend(unsigned long mpidr,
			unsigned long sec_entrypoint,
			unsigned long ns_entrypoint,
			unsigned int afflvl,
			unsigned int state)
{
	unsigned int ectlr;
	/* Determine if any platform actions need to be executed. */
	if (plat_do_plat_actions(afflvl, state) == -EAGAIN)
		return PSCI_E_SUCCESS;

	//set cpu0 as aa64 for cpu reset
	mmio_write_32(MP0_MISC_CONFIG3, mmio_read_32(MP0_MISC_CONFIG3) | (1<<12));

	ectlr = read_cpuectlr();
	ectlr &= ~CPUECTLR_SMP_BIT;
	write_cpuectlr(ectlr);

	/* Program the jump address for the target cpu */
	plat_program_mailbox(read_mpidr_el1(), sec_entrypoint);

	/* Program the power controller to enable wakeup interrupts. */
	// plat_pwrc_set_wen(mpidr);

	/* Perform the common cpu specific operations */
	// plat_cpu_pwrdwn_common();
	gic_cpuif_deactivate(get_plat_config()->gicc_base);

	/* Perform the common cluster specific operations */
	if (afflvl >= MPIDR_AFFLVL1) {
		// plat_cluster_pwrdwn_common();
		if (get_plat_config()->flags & CONFIG_HAS_CCI)
			cci_disable_cluster_coherency(mpidr);

		disable_scu(mpidr);
	}
	if (afflvl >= MPIDR_AFFLVL2) {
		plat_save_el3_dormant_data();
		generic_timer_backup();
		gic_dist_save();
	}

	return PSCI_E_SUCCESS;
}
Ejemplo n.º 4
0
/*******************************************************************************
 * MTK_platform handler called when an affinity instance is about to be suspended. The
 * level and mpidr determine the affinity instance. The 'state' arg. allows the
 * platform to decide whether the cluster is being turned off and take apt
 * actions.
 *
 * CAUTION: This function is called with coherent stacks so that caches can be
 * turned off, flushed and coherency disabled. There is no guarantee that caches
 * will remain turned on across calls to this function as each affinity level is
 * dealt with. So do not write & read global variables across calls. It will be
 * wise to do flush a write to the global to prevent unpredictable results.
 ******************************************************************************/
int mt_affinst_suspend(unsigned long mpidr,
			unsigned long sec_entrypoint,
			unsigned long ns_entrypoint,
			unsigned int afflvl,
			unsigned int state)
{
	int rc = PSCI_E_SUCCESS;
	unsigned int gicc_base, ectlr;
	unsigned long cpu_setup, cci_setup, linear_id;
	mailbox_t *mt_mboxes;

	switch (afflvl) {
        case MPIDR_AFFLVL2: 
                if (state == PSCI_STATE_OFF) {
                        struct _el3_dormant_data *p = &el3_dormant_data[0];

                        p->mp0_l2actlr_el1 = read_l2actlr();
                        p->mp0_l2ectlr_el1 = read_l2ectlr();

                        //backup L2RSTDISABLE and set as "not disable L2 reset"
                        p->mp0_l2rstdisable = mmio_read_32(MP0_CA7L_CACHE_CONFIG);
                        mmio_write_32(MP0_CA7L_CACHE_CONFIG, 
                                      mmio_read_32(MP0_CA7L_CACHE_CONFIG) & ~L2RSTDISABLE);
                        //backup generic timer
                        //printf("[ATF_Suspend]read_cntpct_el0()=%lu\n", read_cntpct_el0());
                        generic_timer_backup();

                        gic_dist_save();
                }
                break;

	case MPIDR_AFFLVL1:
		if (state == PSCI_STATE_OFF) {
			/*
			 * Disable coherency if this cluster is to be
			 * turned off
			 */
			cci_setup = mt_get_cfgvar(CONFIG_HAS_CCI);
			if (cci_setup) {
				cci_disable_coherency(mpidr);
			}
			disable_scu(mpidr);

			trace_power_flow(mpidr, CLUSTER_SUSPEND);
		}
		break;

	case MPIDR_AFFLVL0:
		if (state == PSCI_STATE_OFF) {
                        //set cpu0 as aa64 for cpu reset
                        mmio_write_32(MP0_MISC_CONFIG3, mmio_read_32(MP0_MISC_CONFIG3) | (1<<12));
			/*
			 * Take this cpu out of intra-cluster coherency if
			 * the MTK_platform flavour supports the SMP bit.
			 */
			cpu_setup = mt_get_cfgvar(CONFIG_CPU_SETUP);
			if (cpu_setup) {
				ectlr = read_cpuectlr();
				ectlr &= ~CPUECTLR_SMP_BIT;
				write_cpuectlr(ectlr);
			}

			/* Program the jump address for the target cpu */
			linear_id = platform_get_core_pos(mpidr);
			mt_mboxes = (mailbox_t *) (MBOX_OFF);
			mt_mboxes[linear_id].value = sec_entrypoint;
			flush_dcache_range((unsigned long) &mt_mboxes[linear_id],
					   sizeof(unsigned long));

			/*
			 * Prevent interrupts from spuriously waking up
			 * this cpu
			 */
                        //gic_cpu_save();
			gicc_base = mt_get_cfgvar(CONFIG_GICC_ADDR);
			gic_cpuif_deactivate(gicc_base);
			trace_power_flow(mpidr, CPU_SUSPEND);
		}
		break;

	default:
		assert(0);
	}

	return rc;
}
int  main (void)

{
	u32 count = 0;
	s32 i, j, k = 0;
	u32 max, sum;
	volatile u32 tmp;
	u32 *p;
	char *pstr;
	p = (u32 *)(DRAM_PHYS_START + 0x6000);
	u32  boot;
	u32  load_start, load_end;
	u32  boot_start, boot_end;
	u32 bm_load_duration_ns, bm_load_duration_ms;

	/* get the timestamp of the osc timer1 */
	boot_end = readl((const volatile void *)SOCFPGA_OSC1TIMER1_ADDRESS + 0x4);

	InitPeripheral();
	cpu_local_irq_disable();
	
	/*  asp is cleared by the preloader , and preloader will save
	  *  qspi probe, read function in it. Please don't clear
	  *  we reuse the preloader qspi driver at here, so we save
	  *  the probe and read function pointer into asp.
	  *  it at here.
	  */
	//memset(asp, 0, sizeof(struct amp_share_param));
	asp->bm_magic = ('b' << 8) | 'm';
	
	/**** interrupt initialize*******/
    gic_Int_init();
	/**** hook ISR callback ******/
	gic_sgi_init();
	
#ifdef NEED_SAVE_RESTORE_GIC_REGS_FROM_BM
	gic_dist_save();
#endif
	/**** interrupt ready *****/
	cpu_local_irq_enable();
	UART_DEBUG("start bm...... %x\r\n", p);
	UART_DEBUG("++start bm...... 12345\r\n");
	bmlog("start bm......%x\r\n", p);
	UART_DEBUG("start bm...... %x\r\n", 0x55aa);
	UART_DEBUG("--start bm...... 67890\r\n");

	/* auto detect the boot mode */
	boot = bm_get_boot_mode();
	if((BOOTSEL_MODE_SD_1_8V == boot) || (BOOTSEL_MODE_SD_3_3V == boot))
	{		
		if(!bm_sd_load_rbf())
			bmlog("load fpga rbf is ok!\n");
	}
	else if((BOOTSET_MODE_QSPI_1_8V == boot) || (BOOTSET_MODE_QSPI_3_3V == boot))
	{
		if(!bm_qspi_load_rbf())
			bmlog("load fpga rbf is ok!\n");
	}
	
	/* osc_timer1 is running at 25MHz, 40ns per cycle */
	asp->boot_end_stamp = boot_end;
	boot_start =  asp->boot_start_stamp;
	bm_load_duration_ns = (boot_start - boot_end) * 40;
	bm_load_duration_ms = bm_load_duration_ns/1000/1000;
	bmlog("start[0x%x],end[%x]\n, bm_load_ns = %u(ns), bm_load_ms= %u(ms)\n", 
										   boot_start,   
										   boot_end,
										   bm_load_duration_ns,
										   bm_load_duration_ms
										   );
	load_start = asp->load_bm_start;
	load_end   = asp->load_bm_end;
	bmlog("real loading bm time duration is %u(ms)(including qspi/sd init)\n", load_end - load_start);

	if(fpgamgr_program_fpga((const unsigned long *)FPGA_RBF_BASE, FPGA_RBF_SIZE) < 0)	
		UART_DEBUG("config fpga failed!\r\n");

	else
	{
		UART_DEBUG("config fpga OK!\r\n");
		writel(('R'<<16)|('B'<<8)|('F'<<0),&(asp->preloader_wait_bm_load_rbf));
	}


	pstr = (char *)FPGA_SDRAM_PHYS_BASE;
	sprintf(pstr, "%s\n", "i am from fpga ddr ram");
	bmlog("%s", pstr);
	
	pstr = (char *)FPGA_SRAM_PHYS_BASE;
	sprintf(pstr, "%s\n", "i am from fpga sram");
	bmlog("%s", pstr);

#ifdef	LCD1602_DISP
	IIC_InitIp();
	LCD_SetCursor(0);
	sprintf(cDispBuf[0], "fpga config done!");
	IIC_EXfer(LCD_ADDR, cDispBuf[0], strlen(cDispBuf[0])>15?16:strlen(cDispBuf[0]));
#endif
	
	while(!gCacheCoherence)
	{
		/** led blink for hand shake debug with linux **/
		k++;
		if((k&0x3FFFF) == 0)
			LED27_BLINK();//*(volatile u32 *)HPS_GPIO1_BASE_ADDR ^= (0x1<<12);
	}
	/*
	**************************************************************
	**** cause Linux peer use 2GB user space/2GB kernel space split *******
	**** so it is 2048 L1 entry here, if use 3GB user/1 GB space split ********
	**** it woule be 1024 here, and p should adjust to 0x7000 yejc    ********
	**************************************************************
	*/
	for(j = 0; j < 2048-16; j++) //16MB for IO map space
				PageTable[j+2048] = *p++;

	/*
	**************************************************************
	**** time to bring bm core to smp cache coherence environmence *******
	**************************************************************
	*/
	__asm__ __volatile__("dsb\n"
	"isb\n"
	"mrc    p15, 0, r1, c1, c0, 0\n"
	"ldr	r2, =0x40180d\n"
	"orr	r1, r1, r2\n"
	"mcr    p15, 0, r1, c1, c0, 0\n"
	"dsb\n"
	"isb\n"
	: : :"memory", "cc");


	bmlog("L1 L2 cache enabled, SCU enabled~~~\n");
	//asp = 0xFC700000;
	
	/*
	********************************************************************************
	** from this point on, you can free to invoke the linux kernel space text function from BM,  *****
	** only if the function would not cause the shceduler to action, or the CPU of BM would trap ***** 
	** in the cpu_idle(clone from linux cpu core) process if we can't not obtain the lock/mutex  *****
	** /semaphore                                                                                                              ****
	********************************************************************************
	*/
	//printk = (printk_fn)asp->sta.printk_fn;
	_raw_spinlock = (raw_spinlock_fn)asp->sta.spinlock_lock_fn;
	_raw_spinunlock = (raw_spinlock_fn)asp->sta.spinlock_unlock_fn;
	/* semaphore */
	down_trylock = (down_trylock_fn)asp->sta._down_trylock_fn;

	while(1)
	{
		if(sgi15task_pending)
		{	
			if(ACCESS_ONCE(asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].linux_cmd_args) == 0)
			{
			/**************************************************************************************/
			/**** place interrupt here test worse case interrupt latency would be more accuracy ***/
			/**** casue we not only take care of 10000 interrupts/second from FPGA, but also    ***/
			/**** suffer more than 6000 extra interrupts/second from IPI and offen L1 data cahce miss **/
			/**************************************************************************************/
#ifdef TEST_IL_MORE_ACCURACY
			if(testIL)
			{
				bmlog("\n________________________________________________________________________________\n");
                bmlog("Now the BM CPU would suffer more than 16000 interrupts/second(10000i/s from\n");
                bmlog("FPGA_IRQ0 req(100us),more than 6000i/s from IPI(Inner Process Interrupt), and\n");
                bmlog("process several Gbps data per second, they are all concurrently, very intensive\n");
                bmlog("load for BM CPU core!\n");
                bmlog("__________________________________________________________________________________\n");
				testIL = 0;
				gic_Int_dis (GIC_PFGA0); //72 FPGA_IRQ0
				gic_Int_clr (GIC_PFGA0);
				pvt_init(1);
				fire_fpga_irq();
				pvt_start();
				gic_Int_en (GIC_PFGA0);
			}
#endif
			/************************************************************************/
			//p = (unsigned int *)0x1E200000;
			p = (unsigned int *)0xFC800000;
			for(i = 0; i < (DRAM_BUF_SIZE/4); i++)
			{
				if(*p != CPU_DATA_PATERN0)
					bmlog("check buf from linux failed! i=%d, *p=%x\n", i, *p);
				p++;
			}
			//bmlog("check buf from linux finish!\n");
			//p = (unsigned int *)0x1E200000;
			p = (unsigned int *)0xFC800000;
			memset_int(p, CPU_DATA_PATERN3, DRAM_BUF_SIZE);
			//now u can mesure blink frequency on hps  LED3 and multiply 2 to calculate the interrupte frequency
			//and then you can evaulate how fast the cpu do 2 times DRAM_SIZE write and 2 times DRAM_SIZE read
			//you sholud know that cpu spend must time to iter, compare in the read "for loop" and BM do interrupt 
			//handler&cpu mode switch Linux handle system tick&sgi intrrupt and sched task, so the actual memory system
			//band width is greater than this evaluate value
			//be aware 65536B is bigger than L1 Dcache but little than L2 Dcache
			//evaluate data process speed in our case is:
			//1842Hz * 2toggle * 2w * 2r&check * DRAM_SIZE(65536B) = 965738496B/s = 7.73Gbps
			

			//toggle hps led
			//*(volatile u32 *)HPS_GPIO1_BASE_ADDR ^= (0x1<<12);
			LED27_BLINK();
			sgi15task_pending = 0;
			gic_raise_interrupt(CPU0, GIC_SGI13);
				}
			else if(ACCESS_ONCE(asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].linux_cmd_args) == 1)
				{
			p = (unsigned int *)0xfe700000;
			for(i = 0; i < (SRAM_BUF_SIZE/4); i++)
			{
				if(*p != CPU_DATA_PATERN0)
					bmlog("check buf from linux failed! i=%d, *p=%x\n", i, *p);
				p++;
			}
			//bmlog("check buf from linux finish!\n");
			p = (unsigned int *)0xfe700000;
			memset_int(p, CPU_DATA_PATERN3, SRAM_BUF_SIZE);
			//now u can mesure blink frequency on hps LED2  and multiply 2 to calculate the interrupte frequency
			//and then you can evaulate how fast the cpu do 2 times SRAM_SIZE write and 2 times SRAM_SIZE read
			//you sholud know that cpu spend must time to iter, compare in the read "for loop" and BM do interrupt 
			//handler&cpu mode switch Linux handle system tick&sgi intrrupt and sched task, so the actual memory system
			//band width is greater than this evaluate value
			//be aware 32768B is bigger than L1 Dcache but little than L2 Dcache
			//evaluate data process speed in our case is:
			//3230Hz * 2toggle * 2w * 2r&check * SRAM_SIZE(32768B) = 846725120B/s = 6.77Gbps //little then DRAM cause much more interrupt overhead
			

			//toggle hps led
			//*(volatile u32 *)HPS_GPIO1_BASE_ADDR ^= (0x2<<12);
			LED28_BLINK();
			sgi15task_pending = 0;
			gic_raise_interrupt(CPU0, GIC_SGI13);
				
		}
		//bmlog("CPU1#%04d:msg from cpu1 call~~~~~~~~~~~~~~~~\n", i);
	}
		if(asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].linux_cmd_args == 2)
		{
					for(i = 0; i < SPINLOCK_TEST_COUNT; i++)
        			{
                		_raw_spinlock(&asp->rslocks[0]);
                		tmp = asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].bm_cmd_status;
                		//dummy j++
                		j++;
                		asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].bm_cmd_status += 2;
                		if((asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].bm_cmd_status - tmp) != 2)
                        	bmlog("BM:spinlock test failed!\n");
                		_raw_spinunlock(&asp->rslocks[0]);
                		//dummy operation on j++ simulate the actual scenario to give another cpu chance
                		//to take lock, reduce starvation situation
                		j++;
        			}
					//bmlog("\nBM spinlock test:%d\n", tmp + 2);
					bmlog("\n----------------------------\n");
					bmlog("BM spinlock test:%d\n", tmp + 2);
					bmlog("----------------------------\n");


			/*************************************************************************************/
			/**** place interrupt test here test cpu data process speed would be more accuracy ***/
			/*************************************************************************************/
#ifdef TEST_DATA_PROCESS_SPEED_MORE_ACCURACY
			gic_Int_dis (GIC_PFGA0); //72 FPGA_IRQ0
			gic_Int_clr (GIC_PFGA0);
			pvt_init(1);
			fire_fpga_irq();
			pvt_start();
			gic_Int_en (GIC_PFGA0);
#endif
			/************************************************************************/
			while(!ACCESS_ONCE(gINTtestDone));
			ACCESS_ONCE(gINTtestDone) = 0;
			for(i = 0; i < IL_TEST_COUNT - 1; i++)
			{
				p_pvt[i] = p_pvt[i] - p_pvt[i + 1]; //delta t of pvt, be careful pvt counter overlap!
			}
			for(i = 0; i < IL_TEST_COUNT - 1; i++)
			{
				if(p_pvt[i] > PVT_100US_CYCLE)
					p_pvt[i] -= PVT_100US_CYCLE; /* pvt 10ns resolution, 10000 * 10ns = 100us*/ //IL(interrupt latency jitter)
				else
					p_pvt[i] = PVT_100US_CYCLE - p_pvt[i];
			}
			max = p_pvt[0];
			k = 0;
			for(i = 0; i < IL_TEST_COUNT - 1; i++)
				if(p_pvt[i] > max)
				{
					max = p_pvt[i];
					k = i;
				}
			max *= 10;
			bmlog("\n------------------------------------------------------------------------------------------\n");
			//bmlog("\ninterrupt latency test method 1(use private timer)\n");
			bmlog("interrupt latency test method 1(use private timer)\n");
			//bmlog("max interrupt latency jitter: %d ns\n", max);
			bmlog("max interrupt latency jitter: %d ns\n", max);
			sum = 0;
			for(i = 0; i < IL_TEST_COUNT - 1; i++)
			{
				sum += p_pvt[i];				//be carefule sum overflow
			}
			sum /= (IL_TEST_COUNT - 1);
			sum *= 10;
			//bmlog("average interrupt latency jitter: %d ns\n", sum);
			bmlog("average interrupt latency jitter: %d ns\n", sum);
			//90ns is measure from oscilloscope, see AMP Reference Design for detail
			bmlog("max interrupt latency: %d ns(use private timer@CPU core cluster)\n", max + 90);
			if(max>410)
			    bmlog("max interrupt latency: %d ns(use private timer@CPU core cluster),max_index=%d\n", max + 90, k); 
			bmlog("average interrupt latency: %d ns(use private timer@CPU core cluster)\n", sum + 90);
			bmlog("-----------------------------------------------------------------------------------------------\n");


			bmlog("\n-----------------------------------------------------------------------------------------------\n");
			//bmlog("\nfpga send %d times irq req to arm, arm ack %d times, lost irq %d times\n", gFPGA_IRQ_req, gARM_IRQ_ack, gFPGA_IRQ_req - gARM_IRQ_ack);
			bmlog("fpga send %d times irq req to arm, arm ack %d times, lost irq %d times\n", 
			ACCESS_ONCE(gFPGA_IRQ_req), ACCESS_ONCE(gARM_IRQ_ack), ACCESS_ONCE(gFPGA_IRQ_req) - ACCESS_ONCE(gARM_IRQ_ack));
			bmlog("-------------------------------------------------------------------------------------------------\n");

			testIL = 1;
#ifdef DMA_AND_ACP_TEST
			dma_init();
			//DMAC_regs_dump(0);
			dma_mem2mem();//new for test
			//DMAC_regs_dump(0);
			while(!ACCESS_ONCE(gDMAtestDone));
			ACCESS_ONCE(gDMAtestDone) = 0;
			dma_mem2mem_done();
			//DMAC_regs_dump(0);
			bmlog("DMA test done\n");
			bmlog("-----------------------------------------------\n\n");

			dma_mem2mem_use_acp();
			while(!ACCESS_ONCE(gDMAtestDone));
			ACCESS_ONCE(gDMAtestDone) = 0;
			dma_mem2mem_use_acp_done();
			bmlog("DMA test acp done\n");
			bmlog("-----------------------------------------------\n\n");

			dma_ARMmem2FPGAmem();
			while(!ACCESS_ONCE(gDMAtestDone));
			ACCESS_ONCE(gDMAtestDone) = 0;
			dma_mem2mem_done();
			bmlog("dma_ARMmem2FPGAmem test done\n");
			bmlog("-----------------------------------------------\n\n");

			dma_FPGAmem2ARMmem_use_acp();
			while(!ACCESS_ONCE(gDMAtestDone));
			ACCESS_ONCE(gDMAtestDone) = 0;
			dma_mem2mem_done();
			bmlog("dma_FPGAmem2ARMmem_use_acp test done\n");
			bmlog("-----------------------------------------------\n\n");
#endif

			count++;
#ifdef	LCD1602_DISP
			LCD_SetCursor(1);
			memset(cDispBuf[1], ' ', 16);
			sprintf(cDispBuf[1], "test:%d", count);
			IIC_EXfer(LCD_ADDR, cDispBuf[1], 16);
#endif
			
			asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].linux_cmd_args = -1; //tell linux interrupt latency and dma test done!
	
		}
		//dummy k++ and toggle hps led1, waste cpu time..
		//indicate bm activity, u can measure the toggle freqency to evalute how much time spend per loop
		//loop cycle = 1s /(toggle freqency * 2 * 1048576)
		//in our case cpu = 800MHz, no interrupt to handle no work cmd,
		// loop cycle = 1/(42.384*2*1048576) * 10^9 = 11.25ns
		//if remov the K++, if((k&0xFFFFF) == 0){...} block, we can remove about 7 dissemable instrution~8 clock cycle
		// 11.25 * (6 +6)(instrution)/(6+6+8) total instrution = 6.75ns
		k++;
		if((k&0x1FFFFF) == 0)
			LED30_BLINK();
	}

}