//GPU implementation of the MLS alg., the returns are in the graphics card void MLSGpuVolumeMapping::updateMapping(const Vector3d* input, Vector3d* output) { //update VBO _updateBuffer(m_pSrcObj, m_pDstObj, vboDefRefVertexArray, input, output); //Perform GPU based deformation const int timerid=4; startFastTimer(timerid); const int nv = m_pDstObj->m_nVertexCount; runGpuMlsDeformation( nv, //length of the problem (here is the vertex length) vboRefVertexArray, //static vertex array of the reference model, float3 vboDefRefVertexArray, //deformed vertex array of the reference model, float3 //&m_pDeviceDefRefVertex[0].x, vboNeighborArray, //neighbourhood array, or connectivity, int4 vboVertexArray, //static vertex array of the visual model, float3 vboDefVertexArray, //deformed vertex array of the visual model, float3 vboQuatArray); //rotation quaternion array, float4 stopFastTimer(timerid); reportTimeDifference(timerid, "GPU MLS time is"); //Copy buffer glBindBuffer(GL_ARRAY_BUFFER, vboDefVertexArray); Vector3f *pDefVert = (Vector3f *)glMapBuffer(GL_ARRAY_BUFFER, GL_READ_WRITE); if (pDefVert==NULL) return; for (int i=0; i<m_pDstObj->m_nVertexCount; i++, pDefVert++){ output[i] = Vector3d(pDefVert->x, pDefVert->y, pDefVert->z); //if (i<10) printf("%d: %lg %lg %lg\n", i, output[i].x, output[i].y, output[i].z); } //printf("\n\n"); glUnmapBuffer(GL_ARRAY_BUFFER); glBindBuffer(GL_ARRAY_BUFFER, 0); return; }
/** * @details * Method to run the channeliser. * * The channeliser performs channelisation of a number of sub-bands containing * a complex time series. * * Parallelisation, by means of openMP threads, is carried out by splitting * the sub-bands as evenly as possible between threads. * * @param[in] timeSeries Buffer of time samples to be channelised. * @param[out] spectrum Set of spectra produced. */ void PPFChanneliser::run(const TimeSeriesDataSetC32* timeSeries, SpectrumDataSetC32* spectra) { // Perform a number of sanity checks on the input data. _checkData(timeSeries); // Make local copies of the data dimensions. unsigned nSubbands = timeSeries->nSubbands(); unsigned nPolarisations = timeSeries->nPolarisations(); unsigned nTimeBlocks = timeSeries->nTimeBlocks(); unsigned nTimesPerBlock = timeSeries->nTimesPerBlock(); // Resize the output spectra blob (if required). spectra->resize(nTimeBlocks, nSubbands, nPolarisations, _nChannels); // Set the timing parameters - Only need the timestamp of the first packet // for this version of the Channeliser. spectra->setLofarTimestamp(timeSeries->getLofarTimestamp()); spectra->setBlockRate(timeSeries->getBlockRate() * _nChannels); const float* coeffs = &_coeffs[0]; unsigned threadId = 0, nThreads = 0, start = 0, end = 0; Complex *workBuffer = 0, *filteredSamples = 0; Complex const * timeData = 0; const Complex* timeStart = timeSeries->constData(); Complex* spectraStart = spectra->data(); if (_nChannels == 1) { // Loop over data to be channelised. for (unsigned subband = 0; subband < nSubbands; ++subband) { for (unsigned pol = 0; pol < nPolarisations; ++pol) { for (unsigned block = 0; block < nTimeBlocks; ++block) { // Get pointer to time series array. unsigned index = timeSeries->index(subband, nTimesPerBlock, pol, nPolarisations, block, nTimeBlocks); timeData = &timeStart[index]; for (unsigned t = 0; t < nTimesPerBlock; ++t) { // FFT the filtered sub-band data to form a new spectrum. unsigned indexSpectra = spectra->index(subband, nSubbands, pol, nPolarisations, (nTimesPerBlock*block)+t, _nChannels); // spectraStart = &spectra->data()[indexSpectra]; spectraStart[indexSpectra] = timeData[t]; } } } } } else { // Set up work buffers (if required). unsigned nFilterTaps = _ppfCoeffs.nTaps(); if (!_buffersInitialised) _setupWorkBuffers(nSubbands, nPolarisations, _nChannels, nFilterTaps); // Channeliser processing. #pragma omp parallel \ shared(nTimeBlocks, nPolarisations, nSubbands, nFilterTaps, coeffs,\ timeStart, spectraStart) \ private(threadId, nThreads, start, end, workBuffer, filteredSamples, \ timeData) { threadId = omp_get_thread_num(); nThreads = omp_get_num_threads(); // Assign processing threads in a round robin fashion to subbands. _assign_threads(start, end, nSubbands, nThreads, threadId); // Pointer to work buffer for the thread. filteredSamples = &_filteredData[threadId][0]; // Loop over data to be channelised. for (unsigned subband = start; subband < end; ++subband) { for (unsigned pol = 0; pol < nPolarisations; ++pol) { for (unsigned block = 0; block < nTimeBlocks; ++block) { // Get pointer to time series array. unsigned index = timeSeries->index(subband, nTimesPerBlock, pol, nPolarisations, block, nTimeBlocks); timeData = &timeStart[index]; // Get a pointer to the work buffer. workBuffer = &(_workBuffer[subband * nPolarisations + pol])[0]; // Update buffered (lagged) data for the sub-band. _updateBuffer(timeData, _nChannels, nFilterTaps, workBuffer); // Apply the PPF. _filter(workBuffer, nFilterTaps, _nChannels, coeffs, filteredSamples); // FFT the filtered sub-band data to form a new spectrum. unsigned indexSpectra = spectra->index(subband, nSubbands, pol, nPolarisations, block, _nChannels); _fft(filteredSamples, &spectraStart[indexSpectra]); } } } } // end of parallel region. } }