void display (int rows, int columns, long *outputArray, double *startTime, long *countIn, long *countOut) { int i, j, k = 0; Types_Timestamp64 endTime64; Types_FreqHz freq; unsigned long long endClockCycles; double endTime, timeTaken; unsigned long required = 1000000; Timestamp_get64(&endTime64); Timestamp_getFreq(&freq); endClockCycles = ((endTime64.hi*4294967296) + endTime64.lo); endTime = ((endClockCycles/(double)freq.lo)); timeTaken = endTime - *startTime; if (*countIn == required - 1) { /* printf ("\nResulting Array: \n"); for (i = 0; i < rows; i++) { for (j = 0; j < columns; j++) { printf("%ld\t", *((outputArray+i*columns) + j)); } printf("\n"); } */ printf("\n%ld loop(s) accumulating square matrices of size %d took %fs\n", required, rows , timeTaken); exit(0); } *countOut = *countIn + 1; }
Int32 Utils_prfTsPrint(Utils_PrfTsHndl * pHndl, Bool resetAfterPrint) { UInt32 timeMs, fps, fpc; Types_FreqHz cpuHz; /* This is not used as 64 bit timestamp is not working TODO */ UInt32 cpuKhz; Timestamp_getFreq(&cpuHz); /* Currently thi is not used as 64bit timestamp is not working TODO */ cpuKhz = cpuHz.lo / 1000; /* convert to Khz */ /* Currently thi is not used as 64bit timestamp is not working TODO */ timeMs = (pHndl->totalTs) / cpuKhz; fps = (pHndl->numFrames * 1000) / timeMs; fpc = (pHndl->numFrames) / pHndl->count; Vps_printf(" %d: PRF : %s : t: %d ms, c: %d, f: %d, fps: %d, fpc: %d \r\n", Utils_getCurTimeInMsec(), pHndl->name, timeMs, /* in * msecs */ pHndl->count, pHndl->numFrames, fps, /* frames per * second */ fpc /* frames per * count */ ); if (resetAfterPrint) Utils_prfTsReset(pHndl); return 0; }
/* * ======== delayMicroseconds ======== * Delay for the given number of microseconds. */ void delayMicroseconds(unsigned int us) { if (us <7) { //The overhead in calling and returning from this function takes about 6us } else if (us <=20) { int time; for (time = 5*(us-6); time > 0; time--) { asm(" nop"); } } else if (us < 70) { int time; for (time = 5*us; time > 0; time--) { asm(" nop"); } } else { uint32_t t0, deltaT; Types_FreqHz freq; Timestamp_getFreq(&freq); deltaT = us * (freq.lo/1000000); t0 = Timestamp_get32(); while ((Timestamp_get32()-t0) < deltaT) { ; } } }
void display (int rowsA, int columnsB, long *arrayC, double *startTime) { int i, j = 0; Types_Timestamp64 endTime64; Types_FreqHz freq; unsigned long long endClockCycles; double endTime, timeTaken; Timestamp_get64(&endTime64); Timestamp_getFreq(&freq); endClockCycles = ((endTime64.hi * 4294967296) + endTime64.lo); endTime = (endClockCycles/(double)freq.lo); timeTaken = endTime - *startTime; /* printf ("\nResulting Array: \n"); for (i = 0; i < rowsA; i++) { for (j = 0; j < columnsB; j++) { printf("%ld\t", *(arrayC+((i*columnsB) + j))); } printf("\n"); } */ printf("\nMultiplication of %d square matrices took %fs and %llu clock cycles\n", rowsA, timeTaken, endClockCycles); exit(0); }
void generate (int rowsA, int columnsA, int rowsB, int columnsB, long *arrayA, long *arrayB, double *startTime) { printf("\n\nCross Core Multiplication Beginning\n"); // Print information message int generationCount = 1; // Used in generation of the arrays int i, j = 0; // Used to count rows and columns of the arrays Types_Timestamp64 startTime64; // 64 bit timestamp Types_FreqHz freq; // frequency of cores for (i = 0; i < rowsA; i++) // Generate array A { for (j = 0; j < columnsA; j++) { *((arrayA+i*columnsA) + j) = (generationCount); // Initialise arrayA } generationCount++; } generationCount = 1; for (i = 0; i < rowsB; i++) // Generate array B { for (j = 0; j < columnsB; j++) { *((arrayB+i*columnsB) + j) = (generationCount); // Initialise arrayB } generationCount++; } Timestamp_getFreq(&freq); // Get the frequency of the cores Timestamp_get64(&startTime64); // Get the starting timestamp *startTime = ((startTime64.lo/(double)freq.lo)); // Calculate a time in seconds for use in timestamping }
/* * ======== Rta_getCpuSpeed ======== */ Void Rta_getCpuSpeed(Rta_ResponsePacket *resp) { Types_FreqHz freq; /* Get the Timestamp frequency. */ Timestamp_getFreq(&freq); resp->resp0 = freq.hi; resp->resp1 = freq.lo; }
/* * ======== micros ======== */ unsigned long micros(void) { Types_FreqHz freq; Types_Timestamp64 time; uint64_t t64; Timestamp_getFreq(&freq); Timestamp_get64(&time); t64 = ((uint64_t)time.hi << 32) | time.lo; return (t64/(freq.lo/1000000)); }
Void Utils_IntLatencyCalculate(Utils_IntLatencyMeasure * latencyMeasure, UInt intId) { if (latencyMeasure->start) { UInt32 curTime = Timestamp_get32(); UInt32 tsDelta; if ((latencyMeasure->prevIntTime != 0) && (latencyMeasure->prevIntTime < curTime)) { tsDelta = (curTime - latencyMeasure->prevIntTime) / latencyMeasure->timerFreqPerMicroSec; if (tsDelta > (latencyMeasure->expectedInterruptInterval + latencyMeasure->maxAllowedLatency)) { UInt32 lateIntIdx = latencyMeasure->numLateInts % UTILS_INTLATENCY_LATE_IRP_COUNT; latencyMeasure->lateIntIrp[lateIntIdx] = (UInt32) Task_self(); latencyMeasure->numLateInts++; } } else { if (latencyMeasure->prevIntTime == 0) { Types_FreqHz freq; Bits64 freqInMicrosec; Timestamp_getFreq(&freq); freqInMicrosec = freq.hi; freqInMicrosec <<= 32; freqInMicrosec |= freq.lo; freqInMicrosec /= UTILS_FREQPERMICROSEC_DIV_FACTOR; latencyMeasure->timerFreqPerMicroSec = (UInt32) freqInMicrosec; latencyMeasure->numLateInts = 0; // latencyMeasure->hHwi = Hwi_getHandle(intId); } } latencyMeasure->prevIntTime = curTime; } }
/* * ======== cpuloadInit ======== */ Void cpuLoadInit(Void) { Types_FreqHz freq; ULong maxLoad; Int i; /* freq is maximum timestamp counts per second (100% cpuload) */ Timestamp_getFreq(&freq); maxLoad = freq.lo / NUMPERSEC; /* since we run load NUMPERSEC times */ /* * calculate loadValues for each thread type for * each load interval (5 seconds) */ for (i = 0; i < LOAD_STEPS; i++) { hwiLoadValue[i] = hwiLoadPercent[i] * maxLoad / 100; swiLoadValue[i] = swiLoadPercent[i] * maxLoad / 100; taskLoadValue[i] = taskLoadPercent[i] * maxLoad / 100; } }
void generate (int rowsA, int columnsA, int rowsB, int columnsB, long *arrayA, long *arrayB, double *startTime) { // !!! TODO: add checking around the matrices size printf("\n\nCross core multiplication beginning of %d square matrices\n", rowsA); int generationCount = 1; Types_Timestamp64 startTime64; Types_FreqHz freq; unsigned int generateEndTime; int i, j = 0; for (i = 0; i < rowsA; i++) // Generate array A { for (j = 0; j < columnsA; j++) { *(arrayA+((i*columnsA) + j)) = (generationCount); } generationCount++; } generationCount = 1; for (i = 0; i < rowsB; i++) // Generate array B { for (j = 0; j < columnsB; j++) { *(arrayB+((i*columnsB) + j)) = (generationCount); } generationCount++; } Timestamp_getFreq(&freq); Timestamp_get64(&startTime64); *startTime = ((startTime64.lo/(double)freq.lo)); }
/* * ======== taskLoad ======== */ Void taskLoad(Void) { Bool flag; Types_Timestamp64 startTime; Types_Timestamp64 currentTime; Types_FreqHz freq; UInt32 count; Int loops; /* Have this task use ~50% of the CPU */ Timestamp_getFreq(&freq); count = freq.lo / 1000 / 1000 * (Clock_tickPeriod/ 2); while (TRUE) { Semaphore_pend(loadSem, BIOS_WAIT_FOREVER); Log_write1(UIABenchmark_start, (xdc_IArg)"running"); Timestamp_get64(&startTime); flag = TRUE; loops = 0; while (flag == TRUE) { Timestamp_get64(¤tTime); loops++; // TODO deal with wrap if (startTime.lo + count <= currentTime.lo) { flag = FALSE; Log_write1(UIABenchmark_stop, (xdc_IArg)"running"); Log_write1(UIABenchmark_stop, (xdc_IArg)"whole"); } } } }
/* * ======== printStatistics ======== */ Void printStatistics() { UInt32 timeElapsed; UInt i; Types_FreqHz timerFreq, cpuFreq; Float cpuTimerFreqRatio; Timestamp_getFreq(&timerFreq); BIOS_getCpuFreq(&cpuFreq); cpuTimerFreqRatio = (Float)cpuFreq.lo / (Float)timerFreq.lo; /* Convert timestamps to CPU time */ for (i = 0; i < NUMLOOPS; i++) { rawtimestamps[i] *= cpuTimerFreqRatio; } for (i = 0; i < NUMLOOPS - 1; i++) { latencies[i] = (rawtimestamps[i + 1] - rawtimestamps[i]) / numCores; } /* squelch any rollover-effected latencies */ for (i = 0; i < NUMLOOPS - 2; i++) { if (latencies[i] > 4 * latencies[i+1]) { latencies[i] = latencies[i+1]; rawtimestamps[i] = rawtimestamps[i+1]; } } getStats(latencies + NUMIGNORED, NUMLOOPS - NUMIGNORED - 2, &latencyStats); timeElapsed = rawtimestamps[NUMLOOPS - NUMIGNORED - 2] - rawtimestamps[NUMIGNORED]; /* Throughput = time elapsed divided by total #of of hops */ System_printf("======== SYSTEM ATTRIBUTES ======== \n"); System_printf("Device name: %s\n", DEVICENAME); System_printf("Processor names: %s\n", PROCNAMES); System_printf("CPU Freq: %d MHz\n", cpuFreq.lo / 1000000); System_printf("Timer Freq: %d MHz\n\n", timerFreq.lo / 1000000); System_printf("======== BENCHMARK ATTRIBUTES ======== \n"); System_printf("Notify setup delegate: %s\n", NOTIFYSETUP); System_printf("Number of processors: %d\n", numCores); System_printf("Number of notifications: %d\n", latencyStats.numVals); System_printf("Build profile: %s\n\n", BUILDPROFILE); System_printf("======== NOTIFY BENCHMARK RESULTS ======== \n"); System_printf("Average 1-way latency: %10d (cycles/msg) %10d (ns/msg)\n", (UInt32)latencyStats.mean, CYCLES_TO_NS(latencyStats.mean, cpuFreq.lo)); System_printf("Maximum 1-way latency: %10d (cycles/msg) (#%5d) %10d (ns/msg)\n", latencyStats.max, latencyStats.maxIndex, CYCLES_TO_NS(latencyStats.max, cpuFreq.lo)); System_printf("Minimum 1-way latency: %10d (cycles/msg) (#%5d) %10d (ns/msg)\n", latencyStats.min, latencyStats.minIndex, CYCLES_TO_NS(latencyStats.min, cpuFreq.lo)); System_printf("Standard deviation: %10d (cycles/msg)\n", (UInt32)latencyStats.stddev); System_printf("Total time elapsed: %10d (cycles) %10d (us)\n", timeElapsed, CYCLES_TO_US(timeElapsed, cpuFreq.lo)); }
/** * Send all prepared IPC messages to all cores and return the calculation result (ssd/jac/hess) */ void send_to_cores(const processing_type_e ProcessingType, const uint32_T number_of_cores, real32_T *SSD, real32_T JD[3], real32_T JD2[9]) { process_message_t * p_msg = 0; uint16_t msgId = 0; int32_T ret_val=0; #ifdef _TRACE_MC_ Types_FreqHz freq; float processing_time=0; Int32 ts1, ts2; #endif int32_t j; int32_t i; #ifdef _TRACE_MC_ logout("[MAIN ] Execute Process (ProcessingType=%u)\n", ProcessingType); //trace Timestamp_getFreq(&freq); #endif #ifdef _DO_ERROR_CHECKS_ if(NULL == h_receive_queue) { logout("No master msg receive queue available.\n", max_core); } if ((number_of_cores <= 0) || (number_of_cores > max_core)) { logout("Invalid number_of_cores: It should be between 1 to %u\n", max_core); ret_val = -1; goto mcip_process_error; } #endif //CACHING NOTE: //The picture data was cache write backed after images have been received. More //data is not to be cache write backed as we pass all other data (also arrays //element by element) to the cores using the message queue. Results are passed //back also using the message interface as we don't receive bulk data results. #ifdef _TRACE_MC_ ts1 = (Int32) Timestamp_get32(); #endif /* Send messages to processing cores, start at the highest core */ for (i = CORE_AMOUNT-1; i >= (int)(CORE_AMOUNT-number_of_cores); i-- ) { p_msg = p_queue_msg[i]; MessageQ_setMsgId(p_msg, ++msgId); MessageQ_setReplyQueue(h_receive_queue, (MessageQ_Msg)p_msg); #ifdef _TRACE_MC_ logout("[MAIN ] Start process on core %u (ProcessingType=%u)\n", p_msg->core_id, ProcessingType, p_msg->info.NewImageDataArrived); //trace #endif /* send the message to the remote processor */ if (MessageQ_put(queue_id[p_msg->core_id], (MessageQ_Msg)p_msg) < 0) { logout("MessageQ_put had a failure error\n"); ret_val = -1; goto mcip_process_error; } } //All cores have invalidated their cache to read new image data. Next time cache invalidation is no more necessary (until new image data arrives). g_NewImageDataArrived = 0; #ifdef _TRACE_MC_ logout("[MAIN ] Reset g_NetImageDataArrived signal to %d.\n", g_NewImageDataArrived); #endif //Clear result buffers (will be summed up, have to start at 0) if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType) { (*SSD)=0; if(pt_ssdJacHess == ProcessingType) { memset(JD, 0, sizeof(real32_T) * 3); memset(JD2, 0, sizeof(real32_T) * 9); } } //ToDo: Once it looked like all other cores finished calculating before core 0 started. Why ? //One could think of having no mcip_core_task at the main core and call the calculation directly instead ... Use _TRACE_MC_ (only) to see this //ToDo: When adding a big sleep command to the processing functions one should see if there's something wrong /* Receive the result */ for (i = (CORE_AMOUNT-number_of_cores); i < CORE_AMOUNT; i++) { if (MessageQ_get(h_receive_queue, (MessageQ_Msg *)&p_msg, MessageQ_FOREVER) < 0) { logout("This should not happen since timeout is forever\n"); ret_val = -1; }/* else if (p_msg->info.flag != 0) { logout("Process image error received from core %d\n", p_msg->core_id); ret_val = -1; }*/ #ifdef _TRACE_MC_ if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType) { logout("[MAIN ] process answer received from core %u (SSD=%f, ProcessingType=%u)\n", p_msg->core_id, (double)p_msg->info.out_SSD, ProcessingType); //trace if(pt_ssdJacHess == ProcessingType) { logout("[MAIN ] JD = [%f %f %f], JD2 = [%f ... %f ... %f]\n", (double)p_msg->info.out_JD[0], (double)p_msg->info.out_JD[1], (double)p_msg->info.out_JD[2], (double)p_msg->info.out_JD2[0], (double)p_msg->info.out_JD2[4], (double)p_msg->info.out_JD2[8]); } } else { logout("[MAIN ] process answer received from core %u (ProcessingType=%u)\n", p_msg->core_id, ProcessingType); //trace } #endif //Sum up the results if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType) { (*SSD) += p_msg->info.out_SSD; if(pt_ssdJacHess == ProcessingType) { for(j=0; j<3; j++) { JD[j] += p_msg->info.out_JD[j]; } for(j=0; j<9; j++) { JD2[j] += p_msg->info.out_JD2[j]; } } } } if (ret_val == -1) { goto mcip_process_error; } #ifdef _TRACE_MC_ ts2 = (Int32) Timestamp_get32(); ts2 = ts2 - ts1; processing_time = ((float)ts2 / (float)freq.lo); if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType) { logout("[MAIN ] SSD calculated in: %f s. Result = %f\n", processing_time, (double)(*SSD)); //trace if(pt_ssdJacHess == ProcessingType) { logout("[MAIN ] JD = [%f %f %f], JD2 = [%f ... %f ... %f]\n", (double)JD[0], (double)JD[1], (double)JD[2], (double)JD2[0], (double)JD2[4], (double)JD2[8]); } } else { logout("[MAIN ] Image shrinked in: %f s.\n", processing_time); //trace } #endif return; mcip_process_error: logout("mcip_process_error !!! \n"); shutdown_message_q(); }
/* * ======== Timer_checkFreq ======== */ Void Timer_checkFreq(Timer_Object *obj) { UInt key; UInt32 timerCountStart, timerCountEnd, tsCountStart, tsCountEnd; UInt32 deltaTs, deltaCnt; Types_FreqHz timerFreq, timestampFreq; UInt freqRatio; UInt32 actualFrequency; Timer_Object tempObj; /* * Make a temporary copy of 'obj' and modify it to be used for the timer * frequency check. Set the period to Timer_MAX_PERIOD to ensure that * the timer does not roll over while performing the check. */ memcpy((void *)&tempObj, (void *)obj, sizeof(Timer_Object)); tempObj.period = Timer_MAX_PERIOD; tempObj.periodType = Timer_PeriodType_COUNTS; tempObj.runMode = Timer_RunMode_ONESHOT; tempObj.startMode = Timer_StartMode_USER; /* Initialize the timer registers */ Timer_deviceConfig(&tempObj, NULL); /* Get the frequencies of the Timer and the Timestamp */ Timer_getFreq(&tempObj, &timerFreq); Timestamp_getFreq(×tampFreq); /* Assume that timer frequency is less than 2^32 Hz */ Assert_isTrue(timestampFreq.hi == 0 && timerFreq.hi == 0, NULL); freqRatio = timestampFreq.lo / timerFreq.lo; key = Hwi_disable(); /* * Warning: halting the core between Timer_start and the point of * code indicated below can cause the frequency check to fail. This is * is because the DMTimer will continue to run while this core is halted, * this causing the ratio between timer counts to change */ Timer_start(&tempObj); /* Record the initial timer & timestamp counts */ timerCountStart = Timer_getCount(&tempObj); tsCountStart = Timestamp_get32(); /* Wait for 'TIMERCOUNTS' timer counts to elapse */ while (Timer_getCount(&tempObj) < timerCountStart + TIMERCOUNTS); timerCountEnd = Timer_getCount(&tempObj); /* Record the timestamp ticks that have elapsed during the above loop */ tsCountEnd = Timestamp_get32(); /* End of code segment where core should not be halted */ Hwi_restore(key); deltaTs = tsCountEnd - tsCountStart; deltaCnt = timerCountEnd - timerCountStart; /* Check the timer frequency. Allow a margin of error. */ if (((deltaTs / deltaCnt) > freqRatio * 2) || ((deltaTs / deltaCnt) < freqRatio / 2)) { actualFrequency = ((UInt64)timestampFreq.lo * (UInt64)deltaCnt) / (UInt64)deltaTs; Error_raise(NULL, Timer_E_freqMismatch, Timer_module->intFreqs[obj->id].lo, actualFrequency); } }