//GPU implementation of the MLS alg., the returns are in the graphics card
void MLSGpuVolumeMapping::updateMapping(const Vector3d* input, Vector3d* output)
{
	//update VBO
	_updateBuffer(m_pSrcObj, m_pDstObj, vboDefRefVertexArray, input, output);

	//Perform GPU based deformation
	const int timerid=4;
	startFastTimer(timerid);
	const int nv = m_pDstObj->m_nVertexCount;
	runGpuMlsDeformation(
		nv,						//length of the problem (here is the vertex length)
		vboRefVertexArray,		//static vertex array of the reference model, float3
		vboDefRefVertexArray,    //deformed vertex array of the reference model, float3
		//&m_pDeviceDefRefVertex[0].x,
		vboNeighborArray,		//neighbourhood array, or connectivity, int4
		vboVertexArray,		    //static vertex array of the visual model, float3
		vboDefVertexArray,		//deformed vertex array of the visual model, float3
		vboQuatArray);		    //rotation quaternion array, float4
	stopFastTimer(timerid);
	reportTimeDifference(timerid, "GPU MLS time is");

	//Copy buffer
    glBindBuffer(GL_ARRAY_BUFFER, vboDefVertexArray);
	Vector3f *pDefVert = (Vector3f *)glMapBuffer(GL_ARRAY_BUFFER, GL_READ_WRITE);
	if (pDefVert==NULL) return;
	for (int i=0; i<m_pDstObj->m_nVertexCount; i++, pDefVert++){
		output[i] = Vector3d(pDefVert->x, pDefVert->y, pDefVert->z);
		//if (i<10) printf("%d: %lg %lg %lg\n", i, output[i].x, output[i].y, output[i].z);
	}
	//printf("\n\n");
	glUnmapBuffer(GL_ARRAY_BUFFER);
    glBindBuffer(GL_ARRAY_BUFFER, 0);
	return;
}
/**
* @details
* Method to run the channeliser.
*
* The channeliser performs channelisation of a number of sub-bands containing
* a complex time series.
*
* Parallelisation, by means of openMP threads, is carried out by splitting
* the sub-bands as evenly as possible between threads.
*
* @param[in]  timeSeries 	Buffer of time samples to be channelised.
* @param[out] spectrum	 	Set of spectra produced.
*/
void PPFChanneliser::run(const TimeSeriesDataSetC32* timeSeries,
        SpectrumDataSetC32* spectra)
{
    // Perform a number of sanity checks on the input data.
    _checkData(timeSeries);

    // Make local copies of the data dimensions.
    unsigned nSubbands      = timeSeries->nSubbands();
    unsigned nPolarisations = timeSeries->nPolarisations();
    unsigned nTimeBlocks    = timeSeries->nTimeBlocks();
    unsigned nTimesPerBlock = timeSeries->nTimesPerBlock();

    // Resize the output spectra blob (if required).
    spectra->resize(nTimeBlocks, nSubbands, nPolarisations, _nChannels);

    // Set the timing parameters - Only need the timestamp of the first packet
    // for this version of the Channeliser.
    spectra->setLofarTimestamp(timeSeries->getLofarTimestamp());
    spectra->setBlockRate(timeSeries->getBlockRate() * _nChannels);

    const float* coeffs = &_coeffs[0];
    unsigned threadId = 0, nThreads = 0, start = 0, end = 0;
    Complex *workBuffer = 0, *filteredSamples = 0;
    Complex const * timeData = 0;
    const Complex* timeStart = timeSeries->constData();
    Complex* spectraStart = spectra->data();

    if (_nChannels == 1)
    {
         // Loop over data to be channelised.
         for (unsigned subband = 0; subband < nSubbands; ++subband)
         {
             for (unsigned pol = 0; pol < nPolarisations; ++pol)
             {
                 for (unsigned block = 0; block < nTimeBlocks; ++block)
                 {
                     // Get pointer to time series array.
                     unsigned index = timeSeries->index(subband, nTimesPerBlock,
                                  pol, nPolarisations, block, nTimeBlocks);
                     timeData = &timeStart[index];
                     for (unsigned t = 0; t < nTimesPerBlock; ++t) {
                         // FFT the filtered sub-band data to form a new spectrum.
                         unsigned indexSpectra = spectra->index(subband, nSubbands,
                                 pol, nPolarisations, (nTimesPerBlock*block)+t, _nChannels);
//                         spectraStart = &spectra->data()[indexSpectra];
                         spectraStart[indexSpectra] = timeData[t];
                     }
                 }
             }
         }

    } else {
        // Set up work buffers (if required).
        unsigned nFilterTaps = _ppfCoeffs.nTaps();
        if (!_buffersInitialised)
            _setupWorkBuffers(nSubbands, nPolarisations, _nChannels, nFilterTaps);

        // Channeliser processing.
        #pragma omp parallel \
            shared(nTimeBlocks, nPolarisations, nSubbands, nFilterTaps, coeffs,\
                    timeStart, spectraStart) \
            private(threadId, nThreads, start, end, workBuffer, filteredSamples, \
                    timeData)
        {
            threadId = omp_get_thread_num();
            nThreads = omp_get_num_threads();

            // Assign processing threads in a round robin fashion to subbands.
            _assign_threads(start, end, nSubbands, nThreads, threadId);

            // Pointer to work buffer for the thread.
            filteredSamples = &_filteredData[threadId][0];

            // Loop over data to be channelised.
            for (unsigned subband = start; subband < end; ++subband)
            {
                for (unsigned pol = 0; pol < nPolarisations; ++pol)
                {
                    for (unsigned block = 0; block < nTimeBlocks; ++block)
                    {
                        // Get pointer to time series array.
                        unsigned index = timeSeries->index(subband, nTimesPerBlock,
                                     pol, nPolarisations, block, nTimeBlocks);
                        timeData = &timeStart[index];

                        // Get a pointer to the work buffer.
                        workBuffer = &(_workBuffer[subband * nPolarisations + pol])[0];

                        // Update buffered (lagged) data for the sub-band.
                        _updateBuffer(timeData, _nChannels, nFilterTaps, workBuffer);

                        // Apply the PPF.
                        _filter(workBuffer, nFilterTaps, _nChannels, coeffs, filteredSamples);

                        // FFT the filtered sub-band data to form a new spectrum.
                        unsigned indexSpectra = spectra->index(subband, nSubbands,
                                pol, nPolarisations, block, _nChannels);
                        _fft(filteredSamples, &spectraStart[indexSpectra]);
                    }
                }
            }

        } // end of parallel region.

    }

}