int  main (void)

{
	u32 count = 0;
	s32 i, j, k = 0;
	u32 max, sum;
	volatile u32 tmp;
	u32 *p;
	char *pstr;
	p = (u32 *)(DRAM_PHYS_START + 0x6000);
	u32  boot;
	u32  load_start, load_end;
	u32  boot_start, boot_end;
	u32 bm_load_duration_ns, bm_load_duration_ms;

	/* get the timestamp of the osc timer1 */
	boot_end = readl((const volatile void *)SOCFPGA_OSC1TIMER1_ADDRESS + 0x4);

	InitPeripheral();
	cpu_local_irq_disable();
	
	/*  asp is cleared by the preloader , and preloader will save
	  *  qspi probe, read function in it. Please don't clear
	  *  we reuse the preloader qspi driver at here, so we save
	  *  the probe and read function pointer into asp.
	  *  it at here.
	  */
	//memset(asp, 0, sizeof(struct amp_share_param));
	asp->bm_magic = ('b' << 8) | 'm';
	
	/**** interrupt initialize*******/
    gic_Int_init();
	/**** hook ISR callback ******/
	gic_sgi_init();
	
#ifdef NEED_SAVE_RESTORE_GIC_REGS_FROM_BM
	gic_dist_save();
#endif
	/**** interrupt ready *****/
	cpu_local_irq_enable();
	UART_DEBUG("start bm...... %x\r\n", p);
	UART_DEBUG("++start bm...... 12345\r\n");
	bmlog("start bm......%x\r\n", p);
	UART_DEBUG("start bm...... %x\r\n", 0x55aa);
	UART_DEBUG("--start bm...... 67890\r\n");

	/* auto detect the boot mode */
	boot = bm_get_boot_mode();
	if((BOOTSEL_MODE_SD_1_8V == boot) || (BOOTSEL_MODE_SD_3_3V == boot))
	{		
		if(!bm_sd_load_rbf())
			bmlog("load fpga rbf is ok!\n");
	}
	else if((BOOTSET_MODE_QSPI_1_8V == boot) || (BOOTSET_MODE_QSPI_3_3V == boot))
	{
		if(!bm_qspi_load_rbf())
			bmlog("load fpga rbf is ok!\n");
	}
	
	/* osc_timer1 is running at 25MHz, 40ns per cycle */
	asp->boot_end_stamp = boot_end;
	boot_start =  asp->boot_start_stamp;
	bm_load_duration_ns = (boot_start - boot_end) * 40;
	bm_load_duration_ms = bm_load_duration_ns/1000/1000;
	bmlog("start[0x%x],end[%x]\n, bm_load_ns = %u(ns), bm_load_ms= %u(ms)\n", 
										   boot_start,   
										   boot_end,
										   bm_load_duration_ns,
										   bm_load_duration_ms
										   );
	load_start = asp->load_bm_start;
	load_end   = asp->load_bm_end;
	bmlog("real loading bm time duration is %u(ms)(including qspi/sd init)\n", load_end - load_start);

	if(fpgamgr_program_fpga((const unsigned long *)FPGA_RBF_BASE, FPGA_RBF_SIZE) < 0)	
		UART_DEBUG("config fpga failed!\r\n");

	else
	{
		UART_DEBUG("config fpga OK!\r\n");
		writel(('R'<<16)|('B'<<8)|('F'<<0),&(asp->preloader_wait_bm_load_rbf));
	}


	pstr = (char *)FPGA_SDRAM_PHYS_BASE;
	sprintf(pstr, "%s\n", "i am from fpga ddr ram");
	bmlog("%s", pstr);
	
	pstr = (char *)FPGA_SRAM_PHYS_BASE;
	sprintf(pstr, "%s\n", "i am from fpga sram");
	bmlog("%s", pstr);

#ifdef	LCD1602_DISP
	IIC_InitIp();
	LCD_SetCursor(0);
	sprintf(cDispBuf[0], "fpga config done!");
	IIC_EXfer(LCD_ADDR, cDispBuf[0], strlen(cDispBuf[0])>15?16:strlen(cDispBuf[0]));
#endif
	
	while(!gCacheCoherence)
	{
		/** led blink for hand shake debug with linux **/
		k++;
		if((k&0x3FFFF) == 0)
			LED27_BLINK();//*(volatile u32 *)HPS_GPIO1_BASE_ADDR ^= (0x1<<12);
	}
	/*
	**************************************************************
	**** cause Linux peer use 2GB user space/2GB kernel space split *******
	**** so it is 2048 L1 entry here, if use 3GB user/1 GB space split ********
	**** it woule be 1024 here, and p should adjust to 0x7000 yejc    ********
	**************************************************************
	*/
	for(j = 0; j < 2048-16; j++) //16MB for IO map space
				PageTable[j+2048] = *p++;

	/*
	**************************************************************
	**** time to bring bm core to smp cache coherence environmence *******
	**************************************************************
	*/
	__asm__ __volatile__("dsb\n"
	"isb\n"
	"mrc    p15, 0, r1, c1, c0, 0\n"
	"ldr	r2, =0x40180d\n"
	"orr	r1, r1, r2\n"
	"mcr    p15, 0, r1, c1, c0, 0\n"
	"dsb\n"
	"isb\n"
	: : :"memory", "cc");


	bmlog("L1 L2 cache enabled, SCU enabled~~~\n");
	//asp = 0xFC700000;
	
	/*
	********************************************************************************
	** from this point on, you can free to invoke the linux kernel space text function from BM,  *****
	** only if the function would not cause the shceduler to action, or the CPU of BM would trap ***** 
	** in the cpu_idle(clone from linux cpu core) process if we can't not obtain the lock/mutex  *****
	** /semaphore                                                                                                              ****
	********************************************************************************
	*/
	//printk = (printk_fn)asp->sta.printk_fn;
	_raw_spinlock = (raw_spinlock_fn)asp->sta.spinlock_lock_fn;
	_raw_spinunlock = (raw_spinlock_fn)asp->sta.spinlock_unlock_fn;
	/* semaphore */
	down_trylock = (down_trylock_fn)asp->sta._down_trylock_fn;

	while(1)
	{
		if(sgi15task_pending)
		{	
			if(ACCESS_ONCE(asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].linux_cmd_args) == 0)
			{
			/**************************************************************************************/
			/**** place interrupt here test worse case interrupt latency would be more accuracy ***/
			/**** casue we not only take care of 10000 interrupts/second from FPGA, but also    ***/
			/**** suffer more than 6000 extra interrupts/second from IPI and offen L1 data cahce miss **/
			/**************************************************************************************/
#ifdef TEST_IL_MORE_ACCURACY
			if(testIL)
			{
				bmlog("\n________________________________________________________________________________\n");
                bmlog("Now the BM CPU would suffer more than 16000 interrupts/second(10000i/s from\n");
                bmlog("FPGA_IRQ0 req(100us),more than 6000i/s from IPI(Inner Process Interrupt), and\n");
                bmlog("process several Gbps data per second, they are all concurrently, very intensive\n");
                bmlog("load for BM CPU core!\n");
                bmlog("__________________________________________________________________________________\n");
				testIL = 0;
				gic_Int_dis (GIC_PFGA0); //72 FPGA_IRQ0
				gic_Int_clr (GIC_PFGA0);
				pvt_init(1);
				fire_fpga_irq();
				pvt_start();
				gic_Int_en (GIC_PFGA0);
			}
#endif
			/************************************************************************/
			//p = (unsigned int *)0x1E200000;
			p = (unsigned int *)0xFC800000;
			for(i = 0; i < (DRAM_BUF_SIZE/4); i++)
			{
				if(*p != CPU_DATA_PATERN0)
					bmlog("check buf from linux failed! i=%d, *p=%x\n", i, *p);
				p++;
			}
			//bmlog("check buf from linux finish!\n");
			//p = (unsigned int *)0x1E200000;
			p = (unsigned int *)0xFC800000;
			memset_int(p, CPU_DATA_PATERN3, DRAM_BUF_SIZE);
			//now u can mesure blink frequency on hps  LED3 and multiply 2 to calculate the interrupte frequency
			//and then you can evaulate how fast the cpu do 2 times DRAM_SIZE write and 2 times DRAM_SIZE read
			//you sholud know that cpu spend must time to iter, compare in the read "for loop" and BM do interrupt 
			//handler&cpu mode switch Linux handle system tick&sgi intrrupt and sched task, so the actual memory system
			//band width is greater than this evaluate value
			//be aware 65536B is bigger than L1 Dcache but little than L2 Dcache
			//evaluate data process speed in our case is:
			//1842Hz * 2toggle * 2w * 2r&check * DRAM_SIZE(65536B) = 965738496B/s = 7.73Gbps
			

			//toggle hps led
			//*(volatile u32 *)HPS_GPIO1_BASE_ADDR ^= (0x1<<12);
			LED27_BLINK();
			sgi15task_pending = 0;
			gic_raise_interrupt(CPU0, GIC_SGI13);
				}
			else if(ACCESS_ONCE(asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].linux_cmd_args) == 1)
				{
			p = (unsigned int *)0xfe700000;
			for(i = 0; i < (SRAM_BUF_SIZE/4); i++)
			{
				if(*p != CPU_DATA_PATERN0)
					bmlog("check buf from linux failed! i=%d, *p=%x\n", i, *p);
				p++;
			}
			//bmlog("check buf from linux finish!\n");
			p = (unsigned int *)0xfe700000;
			memset_int(p, CPU_DATA_PATERN3, SRAM_BUF_SIZE);
			//now u can mesure blink frequency on hps LED2  and multiply 2 to calculate the interrupte frequency
			//and then you can evaulate how fast the cpu do 2 times SRAM_SIZE write and 2 times SRAM_SIZE read
			//you sholud know that cpu spend must time to iter, compare in the read "for loop" and BM do interrupt 
			//handler&cpu mode switch Linux handle system tick&sgi intrrupt and sched task, so the actual memory system
			//band width is greater than this evaluate value
			//be aware 32768B is bigger than L1 Dcache but little than L2 Dcache
			//evaluate data process speed in our case is:
			//3230Hz * 2toggle * 2w * 2r&check * SRAM_SIZE(32768B) = 846725120B/s = 6.77Gbps //little then DRAM cause much more interrupt overhead
			

			//toggle hps led
			//*(volatile u32 *)HPS_GPIO1_BASE_ADDR ^= (0x2<<12);
			LED28_BLINK();
			sgi15task_pending = 0;
			gic_raise_interrupt(CPU0, GIC_SGI13);
				
		}
		//bmlog("CPU1#%04d:msg from cpu1 call~~~~~~~~~~~~~~~~\n", i);
	}
		if(asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].linux_cmd_args == 2)
		{
					for(i = 0; i < SPINLOCK_TEST_COUNT; i++)
        			{
                		_raw_spinlock(&asp->rslocks[0]);
                		tmp = asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].bm_cmd_status;
                		//dummy j++
                		j++;
                		asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].bm_cmd_status += 2;
                		if((asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].bm_cmd_status - tmp) != 2)
                        	bmlog("BM:spinlock test failed!\n");
                		_raw_spinunlock(&asp->rslocks[0]);
                		//dummy operation on j++ simulate the actual scenario to give another cpu chance
                		//to take lock, reduce starvation situation
                		j++;
        			}
					//bmlog("\nBM spinlock test:%d\n", tmp + 2);
					bmlog("\n----------------------------\n");
					bmlog("BM spinlock test:%d\n", tmp + 2);
					bmlog("----------------------------\n");


			/*************************************************************************************/
			/**** place interrupt test here test cpu data process speed would be more accuracy ***/
			/*************************************************************************************/
#ifdef TEST_DATA_PROCESS_SPEED_MORE_ACCURACY
			gic_Int_dis (GIC_PFGA0); //72 FPGA_IRQ0
			gic_Int_clr (GIC_PFGA0);
			pvt_init(1);
			fire_fpga_irq();
			pvt_start();
			gic_Int_en (GIC_PFGA0);
#endif
			/************************************************************************/
			while(!ACCESS_ONCE(gINTtestDone));
			ACCESS_ONCE(gINTtestDone) = 0;
			for(i = 0; i < IL_TEST_COUNT - 1; i++)
			{
				p_pvt[i] = p_pvt[i] - p_pvt[i + 1]; //delta t of pvt, be careful pvt counter overlap!
			}
			for(i = 0; i < IL_TEST_COUNT - 1; i++)
			{
				if(p_pvt[i] > PVT_100US_CYCLE)
					p_pvt[i] -= PVT_100US_CYCLE; /* pvt 10ns resolution, 10000 * 10ns = 100us*/ //IL(interrupt latency jitter)
				else
					p_pvt[i] = PVT_100US_CYCLE - p_pvt[i];
			}
			max = p_pvt[0];
			k = 0;
			for(i = 0; i < IL_TEST_COUNT - 1; i++)
				if(p_pvt[i] > max)
				{
					max = p_pvt[i];
					k = i;
				}
			max *= 10;
			bmlog("\n------------------------------------------------------------------------------------------\n");
			//bmlog("\ninterrupt latency test method 1(use private timer)\n");
			bmlog("interrupt latency test method 1(use private timer)\n");
			//bmlog("max interrupt latency jitter: %d ns\n", max);
			bmlog("max interrupt latency jitter: %d ns\n", max);
			sum = 0;
			for(i = 0; i < IL_TEST_COUNT - 1; i++)
			{
				sum += p_pvt[i];				//be carefule sum overflow
			}
			sum /= (IL_TEST_COUNT - 1);
			sum *= 10;
			//bmlog("average interrupt latency jitter: %d ns\n", sum);
			bmlog("average interrupt latency jitter: %d ns\n", sum);
			//90ns is measure from oscilloscope, see AMP Reference Design for detail
			bmlog("max interrupt latency: %d ns(use private timer@CPU core cluster)\n", max + 90);
			if(max>410)
			    bmlog("max interrupt latency: %d ns(use private timer@CPU core cluster),max_index=%d\n", max + 90, k); 
			bmlog("average interrupt latency: %d ns(use private timer@CPU core cluster)\n", sum + 90);
			bmlog("-----------------------------------------------------------------------------------------------\n");


			bmlog("\n-----------------------------------------------------------------------------------------------\n");
			//bmlog("\nfpga send %d times irq req to arm, arm ack %d times, lost irq %d times\n", gFPGA_IRQ_req, gARM_IRQ_ack, gFPGA_IRQ_req - gARM_IRQ_ack);
			bmlog("fpga send %d times irq req to arm, arm ack %d times, lost irq %d times\n", 
			ACCESS_ONCE(gFPGA_IRQ_req), ACCESS_ONCE(gARM_IRQ_ack), ACCESS_ONCE(gFPGA_IRQ_req) - ACCESS_ONCE(gARM_IRQ_ack));
			bmlog("-------------------------------------------------------------------------------------------------\n");

			testIL = 1;
#ifdef DMA_AND_ACP_TEST
			dma_init();
			//DMAC_regs_dump(0);
			dma_mem2mem();//new for test
			//DMAC_regs_dump(0);
			while(!ACCESS_ONCE(gDMAtestDone));
			ACCESS_ONCE(gDMAtestDone) = 0;
			dma_mem2mem_done();
			//DMAC_regs_dump(0);
			bmlog("DMA test done\n");
			bmlog("-----------------------------------------------\n\n");

			dma_mem2mem_use_acp();
			while(!ACCESS_ONCE(gDMAtestDone));
			ACCESS_ONCE(gDMAtestDone) = 0;
			dma_mem2mem_use_acp_done();
			bmlog("DMA test acp done\n");
			bmlog("-----------------------------------------------\n\n");

			dma_ARMmem2FPGAmem();
			while(!ACCESS_ONCE(gDMAtestDone));
			ACCESS_ONCE(gDMAtestDone) = 0;
			dma_mem2mem_done();
			bmlog("dma_ARMmem2FPGAmem test done\n");
			bmlog("-----------------------------------------------\n\n");

			dma_FPGAmem2ARMmem_use_acp();
			while(!ACCESS_ONCE(gDMAtestDone));
			ACCESS_ONCE(gDMAtestDone) = 0;
			dma_mem2mem_done();
			bmlog("dma_FPGAmem2ARMmem_use_acp test done\n");
			bmlog("-----------------------------------------------\n\n");
#endif

			count++;
#ifdef	LCD1602_DISP
			LCD_SetCursor(1);
			memset(cDispBuf[1], ' ', 16);
			sprintf(cDispBuf[1], "test:%d", count);
			IIC_EXfer(LCD_ADDR, cDispBuf[1], 16);
#endif
			
			asp->sra[SGI_LINUX_REQ_BM_CONSUME_BUF].linux_cmd_args = -1; //tell linux interrupt latency and dma test done!
	
		}
		//dummy k++ and toggle hps led1, waste cpu time..
		//indicate bm activity, u can measure the toggle freqency to evalute how much time spend per loop
		//loop cycle = 1s /(toggle freqency * 2 * 1048576)
		//in our case cpu = 800MHz, no interrupt to handle no work cmd,
		// loop cycle = 1/(42.384*2*1048576) * 10^9 = 11.25ns
		//if remov the K++, if((k&0xFFFFF) == 0){...} block, we can remove about 7 dissemable instrution~8 clock cycle
		// 11.25 * (6 +6)(instrution)/(6+6+8) total instrution = 6.75ns
		k++;
		if((k&0x1FFFFF) == 0)
			LED30_BLINK();
	}

}
Beispiel #2
0
void main() 
{
	setup_wdt(WDT_ON);		// Watchdog starten
	
	InitPeripheral();

	lcd_init();
	
	systeminit();			// Werte aus dem EEPROM lesen


	lcd_putc('\f');			// Clear display 
	lcd_send_byte(0,0x0C);	// Cursor aus
	Menue = 0;
	Pfad = 0;

	GutMeldung=0;

	if(!Tag)Jahrwartung=1;	

	for(;;)
	{
        restart_wdt();                  // Watchdog rücksetzen

		/////////////////////////////////////////////
		// Analogwerte einlesen, auswerten und anzeigen

		memset(Zeile1, 0x20, sizeof(Zeile1));
		memset(Zeile2, 0x20, sizeof(Zeile2));
		
		if (input(Wartung ))
		{	
			NeuMenue=1;
			
			ADW();
			
			if(!KSZ)
			{
				Alarmpunkte();
				
				if(Alarmalt<Alarm)Hupen=1; 
				Alarmalt=Alarm; 
	 
				if(Stoerung[0]||Stoerung[1])Gutmeldung=0; 
				else Gutmeldung=32; 
				 
				PORTD=Out[0]|Out[1]|GutMeldung|Hupen;
			}
							
			TasterReset();

			if(!JahrWartung)
			{	
				WertAnzeige();
			}
			else AusWart();
		}

		///////////////////////////////////////////
		// oder Konfigurationsmenue 

		else 
		{
			if(NeuMenue)
			{
				Passwort = 1;
				p[0]='0';
				p[1]='0';
				p[2]='0';
				p[3]='0';
				Pos=2;
				Menue=0;
				Pfad=0;
				NeuMenue=0;
			}
			
			if(Passwort)
			{

				PWDisplay();
				
				lcd_gotoxy(Pos,2);
				lcd_send_byte(0,0x0F);	// Cursor ein
				Tasterauswertung();
			}
			else			
			{
				Tasterauswertung();
				Menuefuehrung();
				Untermenue();
				NeuerWert=0;
			}
		}

	}//for(;;)
}