void display (int rows, int columns, long *outputArray, double *startTime, long *countIn, long *countOut)
{
	int 				i, j, k = 0;
	Types_Timestamp64	endTime64;
	Types_FreqHz		freq;
	unsigned long long	endClockCycles;
	double				endTime, timeTaken;

	unsigned long		required = 1000000;

	Timestamp_get64(&endTime64);
	Timestamp_getFreq(&freq);
	endClockCycles = ((endTime64.hi*4294967296) + endTime64.lo);
	endTime = ((endClockCycles/(double)freq.lo));
	timeTaken = endTime - *startTime;

	if (*countIn == required - 1)
	{
/*		printf ("\nResulting Array: \n");
		for (i = 0; i < rows; i++)
		{
			for (j = 0; j < columns; j++)
			{
				printf("%ld\t", *((outputArray+i*columns) + j));
			}
			printf("\n");
		}
*/
		printf("\n%ld loop(s) accumulating square matrices of size %d took %fs\n", required, rows , timeTaken);
		exit(0);
	}

	*countOut = *countIn + 1;

}
Exemple #2
0
Int32 Utils_prfTsPrint(Utils_PrfTsHndl * pHndl, Bool resetAfterPrint)
{
    UInt32 timeMs, fps, fpc;
    Types_FreqHz cpuHz;

    /* This is not used as 64 bit timestamp is not working TODO */
    UInt32 cpuKhz;

    Timestamp_getFreq(&cpuHz);

    /* Currently thi is not used as 64bit timestamp is not working TODO */
    cpuKhz = cpuHz.lo / 1000;                              /* convert to Khz */

    /* Currently thi is not used as 64bit timestamp is not working TODO */
    timeMs = (pHndl->totalTs) / cpuKhz;

    fps = (pHndl->numFrames * 1000) / timeMs;
    fpc = (pHndl->numFrames) / pHndl->count;

    Vps_printf(" %d: PRF : %s : t: %d ms, c: %d, f: %d, fps: %d, fpc: %d \r\n", Utils_getCurTimeInMsec(), pHndl->name, timeMs,  /* in
                                                                                                                         * msecs
                                                                                                                         */
               pHndl->count, pHndl->numFrames, fps,        /* frames per
                                                            * second */
               fpc                                         /* frames per
                                                            * count */
        );

    if (resetAfterPrint)
        Utils_prfTsReset(pHndl);

    return 0;
}
Exemple #3
0
/*
 *  ======== delayMicroseconds ========
 *  Delay for the given number of microseconds.
 */
void delayMicroseconds(unsigned int us)
{
    if (us <7) {
        //The overhead in calling and returning from this function takes about 6us
    }
    else if (us <=20) {
        int time;
        for (time = 5*(us-6); time > 0; time--) {
            asm("   nop");
        }
    }
    else if (us < 70) {
        int time;
        for (time = 5*us; time > 0; time--) {
            asm("   nop");
        }
    }
    else {
        uint32_t t0, deltaT;
        Types_FreqHz freq;

        Timestamp_getFreq(&freq);
        deltaT = us * (freq.lo/1000000);

        t0 = Timestamp_get32();

        while ((Timestamp_get32()-t0) < deltaT) {
            ;
        }
    }
}
void display (int rowsA, int columnsB, long *arrayC, double *startTime)
{
	int 				i, j = 0;
	Types_Timestamp64	endTime64;
	Types_FreqHz		freq;
	unsigned long long	endClockCycles;
	double				endTime, timeTaken;

	Timestamp_get64(&endTime64);
	Timestamp_getFreq(&freq);
	endClockCycles = ((endTime64.hi * 4294967296) + endTime64.lo);
	endTime = (endClockCycles/(double)freq.lo);
	timeTaken = endTime - *startTime;

/*
	printf ("\nResulting Array: \n");

	for (i = 0; i < rowsA; i++)
	{
		for (j = 0; j < columnsB; j++)
		{
			printf("%ld\t", *(arrayC+((i*columnsB) + j)));
		}
		printf("\n");
	}
*/

	printf("\nMultiplication of %d square matrices took %fs and %llu clock cycles\n", rowsA, timeTaken, endClockCycles);

	exit(0);
}
void generate (int rowsA, int columnsA, int rowsB, int columnsB, long *arrayA, long *arrayB, double *startTime)
{
	printf("\n\nCross Core Multiplication Beginning\n");	// Print information message

	int generationCount = 1;		// Used in generation of the arrays
	int i, j = 0;					// Used to count rows and columns of the arrays
	Types_Timestamp64	startTime64;	// 64 bit timestamp
	Types_FreqHz		freq;			// frequency of cores

	for (i = 0; i < rowsA; i++)			// Generate array A
	{
		for (j = 0; j < columnsA; j++)
		{
			*((arrayA+i*columnsA) + j) = (generationCount);		// Initialise arrayA
		}
		generationCount++;
	}

	generationCount = 1;

	for (i = 0; i < rowsB; i++)			// Generate array B
	{
		for (j = 0; j < columnsB; j++)
		{
			*((arrayB+i*columnsB) + j) = (generationCount);		// Initialise arrayB
		}
		generationCount++;
	}

	Timestamp_getFreq(&freq);		// Get the frequency of the cores
	Timestamp_get64(&startTime64);	// Get the starting timestamp
	*startTime = ((startTime64.lo/(double)freq.lo));	// Calculate a time in seconds for use in timestamping
}
Exemple #6
0
/*
 *  ======== Rta_getCpuSpeed ========
 */
Void Rta_getCpuSpeed(Rta_ResponsePacket *resp)
{
    Types_FreqHz freq;
    
    /* Get the Timestamp frequency. */
    Timestamp_getFreq(&freq);
    
    resp->resp0 = freq.hi;
    resp->resp1 = freq.lo;    
}
Exemple #7
0
/*
 *  ======== micros ========
 */
unsigned long micros(void)
{
    Types_FreqHz freq;
    Types_Timestamp64 time;
    uint64_t t64;

    Timestamp_getFreq(&freq);
    Timestamp_get64(&time);
    t64 = ((uint64_t)time.hi << 32) | time.lo;
    return (t64/(freq.lo/1000000));
}
Exemple #8
0
Void Utils_IntLatencyCalculate(Utils_IntLatencyMeasure * latencyMeasure,
                               UInt intId)
{
    if (latencyMeasure->start)
    {
        UInt32 curTime = Timestamp_get32();
        UInt32 tsDelta;

        if ((latencyMeasure->prevIntTime != 0)
            && (latencyMeasure->prevIntTime < curTime))
        {
            tsDelta = (curTime - latencyMeasure->prevIntTime) /
                latencyMeasure->timerFreqPerMicroSec;
            if (tsDelta > (latencyMeasure->expectedInterruptInterval +
                           latencyMeasure->maxAllowedLatency))
            {
                UInt32 lateIntIdx =
                    latencyMeasure->numLateInts %
                    UTILS_INTLATENCY_LATE_IRP_COUNT;

                latencyMeasure->lateIntIrp[lateIntIdx] = (UInt32) Task_self();
                latencyMeasure->numLateInts++;
            }
        }
        else
        {
            if (latencyMeasure->prevIntTime == 0)
            {
                Types_FreqHz freq;
                Bits64 freqInMicrosec;

                Timestamp_getFreq(&freq);
                freqInMicrosec = freq.hi;
                freqInMicrosec <<= 32;
                freqInMicrosec |= freq.lo;
                freqInMicrosec /= UTILS_FREQPERMICROSEC_DIV_FACTOR;
                latencyMeasure->timerFreqPerMicroSec = (UInt32) freqInMicrosec;
                latencyMeasure->numLateInts = 0;
                // latencyMeasure->hHwi = Hwi_getHandle(intId);
            }

        }
        latencyMeasure->prevIntTime = curTime;
    }
}
Exemple #9
0
/*
 *  ======== cpuloadInit ========
 */
Void cpuLoadInit(Void) 
{
    Types_FreqHz freq;
    ULong maxLoad;
    Int i;
 
    /* freq is maximum timestamp counts per second (100% cpuload) */ 
    Timestamp_getFreq(&freq); 
    maxLoad = freq.lo / NUMPERSEC;  /* since we run load NUMPERSEC times */
        
    /* 
     * calculate loadValues for each thread type for 
     * each load interval (5 seconds)
     */
    for (i = 0; i < LOAD_STEPS; i++) {
        hwiLoadValue[i] = hwiLoadPercent[i] * maxLoad / 100;
        swiLoadValue[i] = swiLoadPercent[i] * maxLoad / 100;
        taskLoadValue[i] = taskLoadPercent[i] * maxLoad / 100;
    }
}
void generate (int rowsA, int columnsA, int rowsB, int columnsB, long *arrayA, long *arrayB, double *startTime)
{
	// !!! TODO: add checking around the matrices size

	printf("\n\nCross core multiplication beginning of %d square matrices\n", rowsA);

	int 				generationCount = 1;
	Types_Timestamp64	startTime64;
	Types_FreqHz		freq;

	unsigned int generateEndTime;
	int i, j = 0;

	for (i = 0; i < rowsA; i++)			// Generate array A
	{
		for (j = 0; j < columnsA; j++)
		{
			*(arrayA+((i*columnsA) + j)) = (generationCount);
		}
		generationCount++;
	}

	generationCount = 1;

	for (i = 0; i < rowsB; i++)			// Generate array B
	{
		for (j = 0; j < columnsB; j++)
		{
			*(arrayB+((i*columnsB) + j)) = (generationCount);
		}
		generationCount++;
	}

	Timestamp_getFreq(&freq);
	Timestamp_get64(&startTime64);
	*startTime = ((startTime64.lo/(double)freq.lo));
}
Exemple #11
0
/*
 *  ======== taskLoad ========
 */
Void taskLoad(Void)
{
    Bool flag;
    Types_Timestamp64 startTime;
    Types_Timestamp64 currentTime;
    Types_FreqHz freq;
    UInt32 count;
    Int loops;

    /* Have this task use ~50% of the CPU */
    Timestamp_getFreq(&freq);
    count = freq.lo / 1000 / 1000 * (Clock_tickPeriod/ 2);

    while (TRUE) {        
        Semaphore_pend(loadSem, BIOS_WAIT_FOREVER);        

        Log_write1(UIABenchmark_start, (xdc_IArg)"running");
        Timestamp_get64(&startTime);

        flag = TRUE;
        loops = 0;

        while (flag == TRUE) {
            Timestamp_get64(&currentTime);

            loops++;

            // TODO deal with wrap
            if (startTime.lo + count <= currentTime.lo) {
                flag = FALSE;
                Log_write1(UIABenchmark_stop, (xdc_IArg)"running");                
                Log_write1(UIABenchmark_stop, (xdc_IArg)"whole");
            }
        }
    }
}
Exemple #12
0
/*
 *  ======== printStatistics ========
 */
Void printStatistics()
{
    UInt32 timeElapsed;
    UInt i;
    Types_FreqHz timerFreq, cpuFreq;
    Float cpuTimerFreqRatio;
    
    Timestamp_getFreq(&timerFreq);
    BIOS_getCpuFreq(&cpuFreq);
    
    cpuTimerFreqRatio = (Float)cpuFreq.lo / (Float)timerFreq.lo;

    /* Convert timestamps to CPU time */
    for (i = 0; i < NUMLOOPS; i++) {
        rawtimestamps[i] *= cpuTimerFreqRatio;
    }
    
    for (i = 0; i < NUMLOOPS - 1; i++) {
        latencies[i] = (rawtimestamps[i + 1] - rawtimestamps[i]) / numCores;
    }

    /* squelch any rollover-effected latencies */
    for (i = 0; i < NUMLOOPS - 2; i++) {
        if (latencies[i] > 4 * latencies[i+1]) {
            latencies[i] = latencies[i+1];
            rawtimestamps[i] = rawtimestamps[i+1];
        }
    }

    getStats(latencies + NUMIGNORED, NUMLOOPS - NUMIGNORED - 2, &latencyStats);
    
    timeElapsed =  rawtimestamps[NUMLOOPS - NUMIGNORED - 2] - 
            rawtimestamps[NUMIGNORED];
    /* Throughput = time elapsed divided by total #of of hops */
    
    System_printf("======== SYSTEM ATTRIBUTES ======== \n");
    System_printf("Device name:                  %s\n", DEVICENAME);
    System_printf("Processor names:              %s\n", PROCNAMES);
    System_printf("CPU Freq:                     %d MHz\n", 
        cpuFreq.lo / 1000000);
    System_printf("Timer Freq:                   %d MHz\n\n", 
        timerFreq.lo / 1000000);

    System_printf("======== BENCHMARK ATTRIBUTES ======== \n");
    System_printf("Notify setup delegate:        %s\n", NOTIFYSETUP);
    System_printf("Number of processors:         %d\n", numCores);
    System_printf("Number of notifications:      %d\n", latencyStats.numVals);
    System_printf("Build profile:                %s\n\n", BUILDPROFILE);

    System_printf("======== NOTIFY BENCHMARK RESULTS ======== \n");    
    System_printf("Average 1-way latency:        %10d (cycles/msg)           %10d (ns/msg)\n", 
        (UInt32)latencyStats.mean, CYCLES_TO_NS(latencyStats.mean, cpuFreq.lo));
    System_printf("Maximum 1-way latency:        %10d (cycles/msg) (#%5d)  %10d (ns/msg)\n", 
        latencyStats.max, latencyStats.maxIndex, CYCLES_TO_NS(latencyStats.max, cpuFreq.lo));
    System_printf("Minimum 1-way latency:        %10d (cycles/msg) (#%5d)  %10d (ns/msg)\n", 
        latencyStats.min, latencyStats.minIndex, CYCLES_TO_NS(latencyStats.min, cpuFreq.lo)); 
    System_printf("Standard deviation:           %10d (cycles/msg)\n", 
        (UInt32)latencyStats.stddev);
    System_printf("Total time elapsed:           %10d (cycles)     %10d (us)\n",
        timeElapsed, CYCLES_TO_US(timeElapsed, cpuFreq.lo));
}
Exemple #13
0
/**
 * Send all prepared IPC messages to all cores and return the calculation result (ssd/jac/hess)
 */
void send_to_cores(const processing_type_e ProcessingType, const uint32_T number_of_cores, real32_T *SSD, real32_T JD[3], real32_T JD2[9])
{
    process_message_t * p_msg = 0;
    uint16_t msgId = 0;
    int32_T ret_val=0;
#ifdef _TRACE_MC_
    Types_FreqHz freq;
    float processing_time=0;
    Int32 ts1, ts2;
#endif

    int32_t j;
    int32_t i;

#ifdef _TRACE_MC_
    logout("[MAIN  ] Execute Process (ProcessingType=%u)\n", ProcessingType);		//trace
    Timestamp_getFreq(&freq);
#endif

#ifdef _DO_ERROR_CHECKS_
    if(NULL == h_receive_queue)
    {
        logout("No master msg receive queue available.\n", max_core);
    }

    if ((number_of_cores <= 0) || (number_of_cores > max_core)) {
        logout("Invalid number_of_cores: It should be between 1 to %u\n", max_core);
        ret_val = -1;
        goto mcip_process_error;
    }
#endif

    //CACHING NOTE:
    //The picture data was cache write backed after images have been received. More
    //data is not to be cache write backed as we pass all other data (also arrays
    //element by element) to the cores using the message queue. Results are passed
    //back also using the message interface as we don't receive bulk data results.

#ifdef _TRACE_MC_
    ts1 = (Int32) Timestamp_get32();
#endif

    /* Send messages to processing cores, start at the highest core */
    for (i = CORE_AMOUNT-1; i >= (int)(CORE_AMOUNT-number_of_cores); i-- ) {
        p_msg = p_queue_msg[i];
        MessageQ_setMsgId(p_msg, ++msgId);
        MessageQ_setReplyQueue(h_receive_queue, (MessageQ_Msg)p_msg);

#ifdef _TRACE_MC_
        logout("[MAIN  ] Start process on core %u (ProcessingType=%u)\n", p_msg->core_id, ProcessingType, p_msg->info.NewImageDataArrived);		//trace
#endif

        /* send the message to the remote processor */
        if (MessageQ_put(queue_id[p_msg->core_id], (MessageQ_Msg)p_msg) < 0) {
            logout("MessageQ_put had a failure error\n");
            ret_val = -1;
            goto mcip_process_error;
        }
    }

    //All cores have invalidated their cache to read new image data. Next time cache invalidation is no more necessary (until new image data arrives).
    g_NewImageDataArrived = 0;
#ifdef _TRACE_MC_
    logout("[MAIN  ] Reset g_NetImageDataArrived signal to %d.\n", g_NewImageDataArrived);
#endif

    //Clear result buffers (will be summed up, have to start at 0)
    if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType)
    {
        (*SSD)=0;
        if(pt_ssdJacHess == ProcessingType)
        {
            memset(JD, 0, sizeof(real32_T) * 3);
            memset(JD2, 0, sizeof(real32_T) * 9);
        }
    }

    //ToDo: Once it looked like all other cores finished calculating before core 0 started. Why ?
    //One could think of having no mcip_core_task at the main core and call the calculation directly instead ... Use _TRACE_MC_ (only) to see this
    //ToDo: When adding a big sleep command to the processing functions one should see if there's something wrong

    /* Receive the result */
    for (i = (CORE_AMOUNT-number_of_cores); i < CORE_AMOUNT; i++) {

        if (MessageQ_get(h_receive_queue, (MessageQ_Msg *)&p_msg, MessageQ_FOREVER) < 0) {
            logout("This should not happen since timeout is forever\n");
            ret_val = -1;
        }/* else if (p_msg->info.flag != 0) {
           logout("Process image error received from core %d\n", p_msg->core_id);
   		ret_val = -1;
       }*/
#ifdef _TRACE_MC_
        if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType)
        {
            logout("[MAIN  ] process answer received from core %u (SSD=%f, ProcessingType=%u)\n", p_msg->core_id, (double)p_msg->info.out_SSD, ProcessingType);		//trace
            if(pt_ssdJacHess == ProcessingType)
            {
                logout("[MAIN  ] JD = [%f %f %f], JD2 = [%f ... %f ... %f]\n", (double)p_msg->info.out_JD[0], (double)p_msg->info.out_JD[1], (double)p_msg->info.out_JD[2],
                       (double)p_msg->info.out_JD2[0], (double)p_msg->info.out_JD2[4], (double)p_msg->info.out_JD2[8]);
            }
        }
        else
        {
            logout("[MAIN  ] process answer received from core %u (ProcessingType=%u)\n", p_msg->core_id, ProcessingType);		//trace
        }
#endif

        //Sum up the results
        if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType)
        {
            (*SSD) += p_msg->info.out_SSD;
            if(pt_ssdJacHess == ProcessingType)
            {
                for(j=0; j<3; j++)
                {
                    JD[j] += p_msg->info.out_JD[j];
                }
                for(j=0; j<9; j++)
                {
                    JD2[j] += p_msg->info.out_JD2[j];
                }
            }
        }
    }

    if (ret_val == -1) {
        goto mcip_process_error;
    }

#ifdef _TRACE_MC_
    ts2 = (Int32) Timestamp_get32();
    ts2 = ts2 - ts1;
    processing_time = ((float)ts2 / (float)freq.lo);
    if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType)
    {
        logout("[MAIN  ] SSD calculated in: %f s. Result = %f\n", processing_time, (double)(*SSD));		//trace
        if(pt_ssdJacHess == ProcessingType)
        {
            logout("[MAIN  ] JD = [%f %f %f], JD2 = [%f ... %f ... %f]\n", (double)JD[0], (double)JD[1], (double)JD[2], (double)JD2[0], (double)JD2[4], (double)JD2[8]);
        }
    }
    else
    {
        logout("[MAIN  ] Image shrinked in: %f s.\n", processing_time);		//trace
    }
#endif

    return;

mcip_process_error:
    logout("mcip_process_error !!! \n");
    shutdown_message_q();
}
Exemple #14
0
/*
 *  ======== Timer_checkFreq ========
 */
Void Timer_checkFreq(Timer_Object *obj)
{
    UInt key;
    UInt32 timerCountStart, timerCountEnd, tsCountStart, tsCountEnd;
    UInt32 deltaTs, deltaCnt;
    Types_FreqHz timerFreq, timestampFreq;
    UInt freqRatio;
    UInt32 actualFrequency;
    Timer_Object tempObj;
    
    /* 
     *  Make a temporary copy of 'obj' and modify it to be used for the timer
     *  frequency check.  Set the period to Timer_MAX_PERIOD to ensure that
     *  the timer does not roll over while performing the check.  
     */
    memcpy((void *)&tempObj, (void *)obj, sizeof(Timer_Object));
    tempObj.period = Timer_MAX_PERIOD;
    tempObj.periodType = Timer_PeriodType_COUNTS;
    tempObj.runMode = Timer_RunMode_ONESHOT;
    tempObj.startMode = Timer_StartMode_USER;

    /* Initialize the timer registers */
    Timer_deviceConfig(&tempObj, NULL);

    /* Get the frequencies of the Timer and the Timestamp */
    Timer_getFreq(&tempObj, &timerFreq);
    Timestamp_getFreq(&timestampFreq);

    /* Assume that timer frequency is less than 2^32 Hz */
    Assert_isTrue(timestampFreq.hi == 0 && timerFreq.hi == 0, NULL);

    freqRatio = timestampFreq.lo / timerFreq.lo;

    key = Hwi_disable();
    
    /* 
     *  Warning: halting the core between Timer_start and the point of
     *  code indicated below can cause the frequency check to fail.  This is
     *  is because the DMTimer will continue to run while this core is halted,
     *  this causing the ratio between timer counts to change
     */
    Timer_start(&tempObj);

    /* Record the initial timer & timestamp counts */
    timerCountStart = Timer_getCount(&tempObj);
    tsCountStart = Timestamp_get32();

    /* Wait for 'TIMERCOUNTS' timer counts to elapse */
    while (Timer_getCount(&tempObj) < timerCountStart + TIMERCOUNTS);

    timerCountEnd = Timer_getCount(&tempObj);

    /* Record the timestamp ticks that have elapsed during the above loop */
    tsCountEnd = Timestamp_get32();

    /* End of code segment where core should not be halted */

    Hwi_restore(key);

    deltaTs = tsCountEnd - tsCountStart;
    deltaCnt = timerCountEnd - timerCountStart;

    /* Check the timer frequency.  Allow a margin of error. */
    if (((deltaTs / deltaCnt) > freqRatio * 2) || 
        ((deltaTs / deltaCnt) < freqRatio / 2)) {
        actualFrequency = ((UInt64)timestampFreq.lo * (UInt64)deltaCnt) / (UInt64)deltaTs;
        Error_raise(NULL, Timer_E_freqMismatch,
                Timer_module->intFreqs[obj->id].lo, actualFrequency);
    }
}