Exemple #1
0
/*
 *  ======== delayMicroseconds ========
 *  Delay for the given number of microseconds.
 */
void delayMicroseconds(unsigned int us)
{
    if (us <7) {
        //The overhead in calling and returning from this function takes about 6us
    }
    else if (us <=20) {
        int time;
        for (time = 5*(us-6); time > 0; time--) {
            asm("   nop");
        }
    }
    else if (us < 70) {
        int time;
        for (time = 5*us; time > 0; time--) {
            asm("   nop");
        }
    }
    else {
        uint32_t t0, deltaT;
        Types_FreqHz freq;

        Timestamp_getFreq(&freq);
        deltaT = us * (freq.lo/1000000);

        t0 = Timestamp_get32();

        while ((Timestamp_get32()-t0) < deltaT) {
            ;
        }
    }
}
Exemple #2
0
/*
 *  ======== doLoad ========
 *  stall in a loop until timestamp equals initial 
 *  value plus count.
 */
Void doLoad(ULong count)
{
    ULong now, end;

    /* compute load loop endpoint */
    end = Timestamp_get32() + count;

    /* loop until we reach termination timestamp */
    do {
        now = Timestamp_get32();
    } while ((end - now) < count);
}
Exemple #3
0
void readPPM(int id, int height, int width, unsigned char *rgbPtr) {
	const unsigned char* im_data = (id == 0) ? im0_data : im1_data;
	static int i = 0;
	static unsigned int time = 0;
	unsigned int now;

	if (i == 0) {
		// 32bits precision is not sufficient here. Needs 64bits instead.
		now = Timestamp_get32();
		unsigned int delta = (now - time) / NB_ITERATION_MEASURED;
		float fps = 1000000000.0 / (float) delta;
		System_printf("fps: %f\n", fps);
		time = Timestamp_get32();
	}
	i = (i + 1) % NB_ITERATION_MEASURED;

	memcpy((void*)rgbPtr,(void*)im_data, 3*height*width*sizeof(unsigned char));
}
/*
 *  ======== tsk0 ========
 */
Void tsk0(UArg arg0, UArg arg1)
{
    Int              status;
    MessageQ_Msg     msg; 
    
    System_printf("tsk0 starting\n");

    /* Register this heap with MessageQ */
    MessageQ_registerHeap((IHeap_Handle)SharedRegion_getHeap(0), HEAP_ID);

    /* Open the 'next' remote message queue. Spin until it is ready. */
    do {
        status = MessageQ_open(nextQueueName, &nextQueueId); 
    }
    while (status < 0);
    
    if (selfId == 0) {
        msg = MessageQ_alloc(HEAP_ID, MSGSIZE);
        if (msg == NULL) {
            System_abort("MessageQ_alloc failed\n");
        }
        
        /* Kick off the loop */
        status = MessageQ_put(nextQueueId, msg);
        if (status < 0) {
            System_abort("MessageQ_put failed\n");
        }
    }
    
    for (numReceived = 0; numReceived < NUMLOOPS; numReceived++) {
        /* Get a message */
        status = MessageQ_get(messageQ, &msg, MessageQ_FOREVER);
        if (status < 0) {
            System_abort("MessageQ_get failed\n");
        }
        
        if (selfId == 0) {
            rawtimestamps[numReceived] = Timestamp_get32();
            
            if (numReceived == NUMLOOPS - 1) {
                printStatistics();
                break;
            }
        }
        
        status = MessageQ_put(nextQueueId, msg);
        if (status < 0) {
            System_abort("MessageQ_put failed\n");
        }
    }
    
    System_exit(0);
    
}
Exemple #5
0
void main(void) {

	Uint32 timestamp, delta;
	timestamp = Timestamp_get32();

	EDMA3_DRV_Result edmaResult = EDMA3_DRV_SOK;


	unsigned const short *L1d_p9;
	unsigned const short *L1d_p10;

	unsigned short *mainSourceImg;
	unsigned short *referenceImg1;

	L1d_p9 = (unsigned const short *) Memory_alloc((IHeap_Handle) l2Heap, (NUM_BYTES1), 0, NULL);
	L1d_p10 = (unsigned const short *) Memory_alloc((IHeap_Handle) l2Heap, (NUM_BYTES3), 0, NULL);

	referenceImg1 = (unsigned short *) Memory_alloc((IHeap_Handle) l2Heap, (NUM_BYTES1), 0, NULL);
	mainSourceImg = (unsigned short *) Memory_alloc((IHeap_Handle) l2Heap, (NUM_BYTES3), 0, NULL);

	int i;

	edmaWait4Completion(1);

	for (i = 0; i < 1024; i++) {
		mainSourceImg[i] = 1;
	}

	edmaInit();

	edmaInitiateXfer(L1d_p10, mainSourceImg, NUM_BYTES2, 8, 1, NUM_BYTES2, NUM_BYTES2, 1, 1, 0, 1);
	edmaWait4Completion(0);

	edmaInitiateXfer(L1d_p9, referenceImg1, NUM_BYTES2, 2, 1, NUM_BYTES2, NUM_BYTES2, 1, 1, 0, 1);
	edmaWait4Completion(0);

	delta = Timestamp_get32()-timestamp;

}
Exemple #6
0
/*
 *  ======== tsk0_func ========
 *  This function is executed only on CORE0.
 *  It sends an event to the next processor then pends on a semaphore.
 *  The semaphore is posted by the callback function.
 */
Void tsk0_func(UArg arg0, UArg arg1)
{
    Int status;
    
    /* Send an event to the next processor */
    if (MultiProc_self() == 0) {
        rawtimestamps[seq++] = Timestamp_get32();
        status = Notify_sendEvent(dstProc, INTERRUPT_LINE, EVENTID, NULL, TRUE);
        if (status < 0) {
            System_abort("Notify_sendEvent failed\n");
        }
    }
    
    Task_exit();
}
Exemple #7
0
/*
 *  ======== cbFxn ========
 *  This function was registered with Notify. It is called when any event is
 *  sent to this processor.
 */
Void cbFxn(UInt16 procId, UInt16 lineId,
           UInt32 eventId, UArg arg, UInt32 payload)
{
    Int status;

    if (selfId == 0) {
        rawtimestamps[seq] = Timestamp_get32();
    }
    seq++;
    
    status = Notify_sendEvent(dstProc, INTERRUPT_LINE, EVENTID, NULL, TRUE);
    if (status < 0) {
        System_abort("Notify_sendEvent failed\n");
    }
}
Exemple #8
0
Void Utils_IntLatencyCalculate(Utils_IntLatencyMeasure * latencyMeasure,
                               UInt intId)
{
    if (latencyMeasure->start)
    {
        UInt32 curTime = Timestamp_get32();
        UInt32 tsDelta;

        if ((latencyMeasure->prevIntTime != 0)
            && (latencyMeasure->prevIntTime < curTime))
        {
            tsDelta = (curTime - latencyMeasure->prevIntTime) /
                latencyMeasure->timerFreqPerMicroSec;
            if (tsDelta > (latencyMeasure->expectedInterruptInterval +
                           latencyMeasure->maxAllowedLatency))
            {
                UInt32 lateIntIdx =
                    latencyMeasure->numLateInts %
                    UTILS_INTLATENCY_LATE_IRP_COUNT;

                latencyMeasure->lateIntIrp[lateIntIdx] = (UInt32) Task_self();
                latencyMeasure->numLateInts++;
            }
        }
        else
        {
            if (latencyMeasure->prevIntTime == 0)
            {
                Types_FreqHz freq;
                Bits64 freqInMicrosec;

                Timestamp_getFreq(&freq);
                freqInMicrosec = freq.hi;
                freqInMicrosec <<= 32;
                freqInMicrosec |= freq.lo;
                freqInMicrosec /= UTILS_FREQPERMICROSEC_DIV_FACTOR;
                latencyMeasure->timerFreqPerMicroSec = (UInt32) freqInMicrosec;
                latencyMeasure->numLateInts = 0;
                // latencyMeasure->hHwi = Hwi_getHandle(intId);
            }

        }
        latencyMeasure->prevIntTime = curTime;
    }
}
Exemple #9
0
void dumpTime(int id, int* dumpBuffer){
    dumpBuffer[id] = Timestamp_get32();
    CACHE_wbInvL2(dumpBuffer+id, sizeof(int), CACHE_WAIT);
}
Exemple #10
0
/**
 * Send all prepared IPC messages to all cores and return the calculation result (ssd/jac/hess)
 */
void send_to_cores(const processing_type_e ProcessingType, const uint32_T number_of_cores, real32_T *SSD, real32_T JD[3], real32_T JD2[9])
{
    process_message_t * p_msg = 0;
    uint16_t msgId = 0;
    int32_T ret_val=0;
#ifdef _TRACE_MC_
    Types_FreqHz freq;
    float processing_time=0;
    Int32 ts1, ts2;
#endif

    int32_t j;
    int32_t i;

#ifdef _TRACE_MC_
    logout("[MAIN  ] Execute Process (ProcessingType=%u)\n", ProcessingType);		//trace
    Timestamp_getFreq(&freq);
#endif

#ifdef _DO_ERROR_CHECKS_
    if(NULL == h_receive_queue)
    {
        logout("No master msg receive queue available.\n", max_core);
    }

    if ((number_of_cores <= 0) || (number_of_cores > max_core)) {
        logout("Invalid number_of_cores: It should be between 1 to %u\n", max_core);
        ret_val = -1;
        goto mcip_process_error;
    }
#endif

    //CACHING NOTE:
    //The picture data was cache write backed after images have been received. More
    //data is not to be cache write backed as we pass all other data (also arrays
    //element by element) to the cores using the message queue. Results are passed
    //back also using the message interface as we don't receive bulk data results.

#ifdef _TRACE_MC_
    ts1 = (Int32) Timestamp_get32();
#endif

    /* Send messages to processing cores, start at the highest core */
    for (i = CORE_AMOUNT-1; i >= (int)(CORE_AMOUNT-number_of_cores); i-- ) {
        p_msg = p_queue_msg[i];
        MessageQ_setMsgId(p_msg, ++msgId);
        MessageQ_setReplyQueue(h_receive_queue, (MessageQ_Msg)p_msg);

#ifdef _TRACE_MC_
        logout("[MAIN  ] Start process on core %u (ProcessingType=%u)\n", p_msg->core_id, ProcessingType, p_msg->info.NewImageDataArrived);		//trace
#endif

        /* send the message to the remote processor */
        if (MessageQ_put(queue_id[p_msg->core_id], (MessageQ_Msg)p_msg) < 0) {
            logout("MessageQ_put had a failure error\n");
            ret_val = -1;
            goto mcip_process_error;
        }
    }

    //All cores have invalidated their cache to read new image data. Next time cache invalidation is no more necessary (until new image data arrives).
    g_NewImageDataArrived = 0;
#ifdef _TRACE_MC_
    logout("[MAIN  ] Reset g_NetImageDataArrived signal to %d.\n", g_NewImageDataArrived);
#endif

    //Clear result buffers (will be summed up, have to start at 0)
    if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType)
    {
        (*SSD)=0;
        if(pt_ssdJacHess == ProcessingType)
        {
            memset(JD, 0, sizeof(real32_T) * 3);
            memset(JD2, 0, sizeof(real32_T) * 9);
        }
    }

    //ToDo: Once it looked like all other cores finished calculating before core 0 started. Why ?
    //One could think of having no mcip_core_task at the main core and call the calculation directly instead ... Use _TRACE_MC_ (only) to see this
    //ToDo: When adding a big sleep command to the processing functions one should see if there's something wrong

    /* Receive the result */
    for (i = (CORE_AMOUNT-number_of_cores); i < CORE_AMOUNT; i++) {

        if (MessageQ_get(h_receive_queue, (MessageQ_Msg *)&p_msg, MessageQ_FOREVER) < 0) {
            logout("This should not happen since timeout is forever\n");
            ret_val = -1;
        }/* else if (p_msg->info.flag != 0) {
           logout("Process image error received from core %d\n", p_msg->core_id);
   		ret_val = -1;
       }*/
#ifdef _TRACE_MC_
        if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType)
        {
            logout("[MAIN  ] process answer received from core %u (SSD=%f, ProcessingType=%u)\n", p_msg->core_id, (double)p_msg->info.out_SSD, ProcessingType);		//trace
            if(pt_ssdJacHess == ProcessingType)
            {
                logout("[MAIN  ] JD = [%f %f %f], JD2 = [%f ... %f ... %f]\n", (double)p_msg->info.out_JD[0], (double)p_msg->info.out_JD[1], (double)p_msg->info.out_JD[2],
                       (double)p_msg->info.out_JD2[0], (double)p_msg->info.out_JD2[4], (double)p_msg->info.out_JD2[8]);
            }
        }
        else
        {
            logout("[MAIN  ] process answer received from core %u (ProcessingType=%u)\n", p_msg->core_id, ProcessingType);		//trace
        }
#endif

        //Sum up the results
        if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType)
        {
            (*SSD) += p_msg->info.out_SSD;
            if(pt_ssdJacHess == ProcessingType)
            {
                for(j=0; j<3; j++)
                {
                    JD[j] += p_msg->info.out_JD[j];
                }
                for(j=0; j<9; j++)
                {
                    JD2[j] += p_msg->info.out_JD2[j];
                }
            }
        }
    }

    if (ret_val == -1) {
        goto mcip_process_error;
    }

#ifdef _TRACE_MC_
    ts2 = (Int32) Timestamp_get32();
    ts2 = ts2 - ts1;
    processing_time = ((float)ts2 / (float)freq.lo);
    if(pt_ssd == ProcessingType || pt_ssdJacHess == ProcessingType)
    {
        logout("[MAIN  ] SSD calculated in: %f s. Result = %f\n", processing_time, (double)(*SSD));		//trace
        if(pt_ssdJacHess == ProcessingType)
        {
            logout("[MAIN  ] JD = [%f %f %f], JD2 = [%f ... %f ... %f]\n", (double)JD[0], (double)JD[1], (double)JD[2], (double)JD2[0], (double)JD2[4], (double)JD2[8]);
        }
    }
    else
    {
        logout("[MAIN  ] Image shrinked in: %f s.\n", processing_time);		//trace
    }
#endif

    return;

mcip_process_error:
    logout("mcip_process_error !!! \n");
    shutdown_message_q();
}
Exemple #11
0
/*
 *  ======== Timer_checkFreq ========
 */
Void Timer_checkFreq(Timer_Object *obj)
{
    UInt key;
    UInt32 timerCountStart, timerCountEnd, tsCountStart, tsCountEnd;
    UInt32 deltaTs, deltaCnt;
    Types_FreqHz timerFreq, timestampFreq;
    UInt freqRatio;
    UInt32 actualFrequency;
    Timer_Object tempObj;
    
    /* 
     *  Make a temporary copy of 'obj' and modify it to be used for the timer
     *  frequency check.  Set the period to Timer_MAX_PERIOD to ensure that
     *  the timer does not roll over while performing the check.  
     */
    memcpy((void *)&tempObj, (void *)obj, sizeof(Timer_Object));
    tempObj.period = Timer_MAX_PERIOD;
    tempObj.periodType = Timer_PeriodType_COUNTS;
    tempObj.runMode = Timer_RunMode_ONESHOT;
    tempObj.startMode = Timer_StartMode_USER;

    /* Initialize the timer registers */
    Timer_deviceConfig(&tempObj, NULL);

    /* Get the frequencies of the Timer and the Timestamp */
    Timer_getFreq(&tempObj, &timerFreq);
    Timestamp_getFreq(&timestampFreq);

    /* Assume that timer frequency is less than 2^32 Hz */
    Assert_isTrue(timestampFreq.hi == 0 && timerFreq.hi == 0, NULL);

    freqRatio = timestampFreq.lo / timerFreq.lo;

    key = Hwi_disable();
    
    /* 
     *  Warning: halting the core between Timer_start and the point of
     *  code indicated below can cause the frequency check to fail.  This is
     *  is because the DMTimer will continue to run while this core is halted,
     *  this causing the ratio between timer counts to change
     */
    Timer_start(&tempObj);

    /* Record the initial timer & timestamp counts */
    timerCountStart = Timer_getCount(&tempObj);
    tsCountStart = Timestamp_get32();

    /* Wait for 'TIMERCOUNTS' timer counts to elapse */
    while (Timer_getCount(&tempObj) < timerCountStart + TIMERCOUNTS);

    timerCountEnd = Timer_getCount(&tempObj);

    /* Record the timestamp ticks that have elapsed during the above loop */
    tsCountEnd = Timestamp_get32();

    /* End of code segment where core should not be halted */

    Hwi_restore(key);

    deltaTs = tsCountEnd - tsCountStart;
    deltaCnt = timerCountEnd - timerCountStart;

    /* Check the timer frequency.  Allow a margin of error. */
    if (((deltaTs / deltaCnt) > freqRatio * 2) || 
        ((deltaTs / deltaCnt) < freqRatio / 2)) {
        actualFrequency = ((UInt64)timestampFreq.lo * (UInt64)deltaCnt) / (UInt64)deltaTs;
        Error_raise(NULL, Timer_E_freqMismatch,
                Timer_module->intFreqs[obj->id].lo, actualFrequency);
    }
}
t_real matlab_c_ssdRigid2D(t_pixel* dataR, int mR[2], t_real omegaR[4],
                                          t_pixel* dataT, int mT[2], t_real omegaT[4],
                                          const t_real w[3],
										  const unsigned int MarginAddon[3],
										  const t_real DSPRange[4],
										  const unsigned int i_from, const unsigned int i_to,
										  t_real grad[3], t_real H[9]){

  	  #ifdef _TRACE_PAPER_FIGURES_
	  uint32_t time_s = Timestamp_get32();
	  #endif

	  //Apply margin, set pointer fo first pixe containing data
	  /* Bildbreite incl. Margin */
	  //const uint32_T width = ((BoundBox[1] - BoundBox[0]) + MarginAddon[0]) + 1U;
	  const unsigned int width = (mT[0] + MarginAddon[0]);
	  const unsigned int uImageStart = width * MarginAddon[1];
	  dataT = &dataT[uImageStart];

		#ifdef _DO_ERROR_CHECKS_
		  CalcMinMaxIndex(MarginAddon, mT, uImageStart);
		#endif

  	  //precompute some values
	  const t_real s = sin(w[0]);
	  const t_real c = cos(w[0]);
	  const t_real precomputedParamA = (w[1] - omegaT[0] + c*omegaR[0] - s*omegaR[2]) + 0.5f*(c - s - 1.0f);
	  const t_real precomputedParamB = (w[2] - omegaT[2] + s*omegaR[0] + c*omegaR[2]) + 0.5f*(s + c - 1.0f);

	  t_real fval = 0.0f;

	  // Declare gradient entries as scalars due to OpenMP array reduction limitations
	  t_real grad_r0 = 0.0f;  t_real grad_r1 = 0.0f;  t_real grad_r2 = 0.0f;

	  t_real HR_00 = 0.0f;  t_real HR_01 = 0.0f;  t_real HR_02 = 0.0f;
	  t_real HR_11 = 0.0f;  t_real HR_12 = 0.0f;
	  t_real HR_22 = 0.0f;

	  // Variables are declared inside loop on purpose. No speedup was observed when setting up
	  // variables outside and declaring them as private, yet errors can easily remain
	  // undetected that way.

	//#pragma omp parallel for reduction(+: fval, grad_r0, grad_r1, grad_r2, HR_00, HR_01, HR_02, HR_11, HR_12, HR_22) if(useOpenMP)
	  for (int j=i_from; j<i_to/*mR[1]*/; j++) {
	    const t_real y_grid = omegaR[2] + (j+0.5f);
	    const int yIndex = j*mR[0];
		//#pragma MUST_ITERATE(3)
		//#pragma MUST_ITERATE(2, ,2)    //ToDo: Would this make the code faster ? As we have padding borders (which might have to be sized +1 pixel below then) it might work to round i to the next multiple of 2  ...
	    for (int i=0; i<mR[0]; i++) {

	      t_real dTx, dTy;

	      // Compute corresponding template image coordinates belonging to current pixel
	      // shifts for cell-centered discretization are incorporated into precomputedParams
	      const t_real x_voxel = c*i - s*j + precomputedParamA;	//ToDo: What is faster ? To loop with a float value and omit the multiplications here ? Or to loop with integer value and to multiply here ?
	      const t_real y_voxel = s*i + c*j + precomputedParamB;

	      // Subtract current values for R and T (factor -1 is included here)
		  const t_real val =  linearInterPoint2D(dataT, x_voxel, y_voxel, dTx, dTy, width
				  #ifdef _TEST_DIRICHLET_AS_IF_THERE_WAS_NO_PADDING_
				  , mT[0], mT[1]
				  #endif
				  ) - (dataR[yIndex+i]);

	      // Compute current grid position in world coordinates
	      const t_real x_grid = omegaR[0] + (i+0.5f);

	      const t_real dxx = dTx*x_grid;
	      const t_real dxy = dTx*y_grid;
	      const t_real dx  = dTx;

	      const t_real dyx = dTy*x_grid;
	      const t_real dyy = dTy*y_grid;
	      const t_real dy  = dTy;

	      const t_real term = c*dyx - s*dxx - c*dxy - s*dyy;

	      grad_r0 += val*term;
	      grad_r1 += val*dx;
	      grad_r2 += val*dy;

	      HR_00 += term*term;
	      HR_01 += term*dx;
	      HR_02 += term*dy;

	      HR_11 += dx*dx;
	      HR_12 += dx*dy;

	      HR_22 += dy*dy;

	      fval += val*val;
	    }
	  }

	  // multiply grad with h_bar
	  grad[0] = grad_r0; grad[1] = grad_r1; grad[2] = grad_r2;

	  H[0] = HR_00;  H[1] = HR_01;  H[2] = HR_02;
	  H[1*3+1] = HR_11;  H[1*3+2] = HR_12;  H[2*3+2] = HR_22;

	  // fill lower left part of matrix
	  for (int i=1; i<3; i++){
	    for (int j=0; j<i; j++){
	      H[i*3+j] = H[j*3+i];
	    }
	  }

	  t_real fRetVal = 0.5f*fval;

	  #ifdef _TRACE_PAPER_FIGURES_
	  uint32_t time_e = Timestamp_get32();
	  uint32_t TimeForDoubleLoop = time_e-time_s;
	  logout("SSDJacHess took %d us.\n", TimeForDoubleLoop / DSP_CLOCKRATE_US);
	  #endif

	  return fRetVal;
} // end of computeFunctionValueAndDerivatives
t_real matlab_c_ssdRigid2D(t_pixel* dataR, int mR[2], t_real omegaR[4],
                            t_pixel* dataT, int mT[2], t_real omegaT[4],
                            const t_real w[3], const unsigned int MarginAddon[3],
							const t_real DSPRange[4], const unsigned int i_from, const unsigned int i_to){

	  #ifdef _TRACE_PAPER_FIGURES_
	  uint32_t time_s = Timestamp_get32();
	  #endif

	  //Apply margin, set pointer fo first pixe containing data
	  /* Bildbreite incl. Margin */
	  //const unsigned int width = ((BoundBox[1] - BoundBox[0]) + MarginAddon[0]) + 1U;
	  const unsigned int width = (mT[0] + MarginAddon[0]);
	  const unsigned int uImageStart = width * MarginAddon[1];
	  dataT = &dataT[uImageStart];

		#ifdef _DO_ERROR_CHECKS_
		  CalcMinMaxIndex(MarginAddon, mT, uImageStart);
		#endif

	  //precompute some values
	  const t_real s = sin(w[0]);
	  const t_real c = cos(w[0]);
	  const t_real precomputedParamA = (w[1] - omegaT[0] + c*omegaR[0] - s*omegaR[2]) + 0.5f*(c - s - 1.0f);
	  const t_real precomputedParamB = (w[2] - omegaT[2] + s*omegaR[0] + c*omegaR[2]) + 0.5f*(s + c - 1.0f);
	  t_real fval = 0.0f;


	  // Variables are declared inside loop on purpose. No speedup was observed when setting up
	  // variables outside and declaring them as private, yet errors can easily remain
	  // undetected that way.

	//#pragma omp parallel for reduction(+: fval) if(useOpenMP)
	  for (int j=i_from; j<i_to/*mR[1]*/; j++) {
	    const int yIndex = j*mR[0];
		//#pragma MUST_ITERATE(5)
		//#pragma MUST_ITERATE(2, ,2)    //ToDo: Would this make the code faster ? As we have padding borders (which might have to be sized +1 pixel below then) it might work to round i to the next multiple of 2  ...
	    for (int i=0; i<mR[0]; i++) {

	      // Compute corresponding template image coordinates belonging to current pixel
	      // shifts for cell-centered discretization are incorporated into precomputedParams
	      const t_real x_voxel = c*i - s*j + precomputedParamA;
	      const t_real y_voxel = s*i + c*j + precomputedParamB;

	      // Subtract current values for R and T

		  //rbe todo: substraction is vice versa here and in the other place. Dies this make sence ?
		  const t_real val =  dataR[yIndex+i] - linearInterPoint2D(dataT, x_voxel, y_voxel, width
			#ifdef _TEST_DIRICHLET_AS_IF_THERE_WAS_NO_PADDING_
			, mT[0], mT[1]
			#endif
		  );

	      fval += val*val;
	    }
	  }

	  t_real fRetVal = 0.5f*fval;

	  #ifdef _TRACE_PAPER_FIGURES_
	  uint32_t time_e = Timestamp_get32();
	  uint32_t TimeForDoubleLoop = time_e-time_s;
	  logout("SSD took %d us.\n", TimeForDoubleLoop / DSP_CLOCKRATE_US);
	  #endif

	  return fRetVal;


} // end of computeFunctionValueAffine