int get_platforms(cl_platform_id **platforms) { cl_int err; cl_uint num; *platforms = NULL; err = clGetPlatformIDs(0, 0, &num); CL_CHECK_ERR(err); if (num < 1) return 0; *platforms = (cl_platform_id *) malloc(num * sizeof(cl_platform_id)); if (*platforms == NULL) { fprintf(stderr, "Failed to allocate memory in %s at line %d\n", __FILE__, __LINE__); exit(1); } err = clGetPlatformIDs(num, *platforms, 0); CL_CHECK_ERR(err); return num; }
int get_devices(cl_platform_id platform, cl_device_type device_type, cl_device_id **devices) { cl_int err; cl_uint num; *devices = NULL; err = clGetDeviceIDs(platform, device_type, 0, 0, &num); CL_CHECK_ERR(err); if (num < 1) return 0; *devices = (cl_device_id *) malloc(num * sizeof(cl_device_id)); if (*devices == NULL) { fprintf(stderr, "Failed to allocate memory in %s at line %d\n", __FILE__, __LINE__); exit(1); } err = clGetDeviceIDs(platform, device_type, num, *devices, 0); CL_CHECK_ERR(err); return num; }
SpectrumAnalyzer::~SpectrumAnalyzer() { qDeleteAll(m_ranges); CL_CHECK_ERR("clReleaseKernel", clReleaseKernel(m_circsum)); CL_CHECK_ERR("clReleaseProgram", clReleaseProgram(m_program)); CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(m_sampleHistoryCircQueue)); CL_CHECK_ERR("clReleaseCommandQueue", clReleaseCommandQueue(m_command_queue)); CL_CHECK_ERR("clReleaseContext", clReleaseContext(m_context)); }
vector<cpx> cl_fft<cpx>::run(const vector<cpx> &input) { cl_event upload_unmap_evt, start_evt, download_map_evt, *kernel_evts = new cl_event[launches.size()]; cl_int err; // Upload cl_float2 *input_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue, v_samples, CL_TRUE, CL_MAP_WRITE, 0, samplesMemSize, 0, NULL, NULL, &err); CL_CHECK_ERR("clEnqueueMapBuffer", err); for (int i = 0; i < samplesPerRun; i++) { input_buffer[i].x = real(input[i]); input_buffer[i].y = imag(input[i]); } CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, v_samples, input_buffer, 0, NULL, &upload_unmap_evt)); // Calcola la FFT cl_mem v_out = runInternal(v_samples, &start_evt, kernel_evts); // Download vector<cpx> result(samplesPerRun); cl_float2 *output_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue, v_out, CL_TRUE, CL_MAP_READ, 0, tmpMemSize, 1, &kernel_evts[launches.size() - 1], &download_map_evt, &err); CL_CHECK_ERR("clEnqueueMapBuffer", err); for (int i = 0; i < samplesPerRun; i++) result[i] = cpx(output_buffer[i].x, output_buffer[i].y); CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, v_out, output_buffer, 0, NULL, NULL)); printStatsAndReleaseEvents(upload_unmap_evt, start_evt, kernel_evts, download_map_evt); delete[] kernel_evts; return result; }
static int runTest(cl_context context, cl_command_queue command_queue, I *cl_algorithm_instance, int N, bool print, bool check) { vector<T> input = generateTestData<T>(N); vector<cpx> output(N); cl_int err; const size_t samplesMemSize = cl_deviceDataSize<T>(N); cl_mem v_output, v_input = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, samplesMemSize, &input[0], &err); CL_CHECK_ERR("clCreateBuffer", err); cl_event kernel_evt; v_output = cl_algorithm_instance->run(v_input, &kernel_evt); CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(v_input)); cl_float2 *output_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue, v_output, CL_TRUE, CL_MAP_READ, 0, N * sizeof(cl_float2), 1, &kernel_evt, NULL, &err); CL_CHECK_ERR("clEnqueueMapBuffer", err); for (int i = 0; i < N; i++) output[i] = cpx(output_buffer[i].x, output_buffer[i].y); CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, v_output, output_buffer, 0, NULL, NULL)); if (print) cerr << output << endl; if (check) { #ifdef ALLOW_NPOT fprintf(stderr, "Calcolo serial naive DFT per riferimento...\n"); const vector<cpx> ref = serial_naive_dft(input); #else fprintf(stderr, "Calcolo serial non-recursive FFT per riferimento...\n"); const vector<cpx> ref = serial_nonrecursive_fft(input); #endif fprintf(stderr, "Distanza massima: %g\n", maxAbsDistance(output, ref)); } return EXIT_SUCCESS; }
QVector<float> SpectrumAnalyzerRangeData::runFFT(cl_mem samplesHistory, cl_int historyOffset, cl_int historyLength) { cl_event presum_evt, algo_evt; CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(circsum, 0, sizeof(cl_mem), &samples)); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(circsum, 1, sizeof(cl_mem), &samplesHistory)); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(circsum, 2, sizeof(cl_int), &historyOffset)); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(circsum, 3, sizeof(cl_int), &historyLength)); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(circsum, 4, sizeof(cl_int), &presumWindows)); CL_CHECK_ERR("clEnqueueNDRangeKernel", clEnqueueNDRangeKernel(command_queue, circsum, 1, NULL, &windowSize, &presumGroupSize, 0, NULL, &presum_evt )); clEnqueueWaitForEvents(command_queue, 1, &presum_evt); cl_mem output = algorithm.run(samples, &algo_evt); QVector<float> risultato(windowSize); cl_int err; cl_float2 *output_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue, output, CL_TRUE, CL_MAP_READ, 0, windowSize * sizeof(cl_float2), 1, &algo_evt, NULL, &err); CL_CHECK_ERR("clEnqueueMapBuffer", err); for (int i = 0; i < windowSize; i++) risultato[i] = abs(cpx(output_buffer[i].x, output_buffer[i].y)); CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, output, output_buffer, 0, NULL, NULL)); CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(presum_evt)); CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(algo_evt)); return risultato; }
char *get_device_info(cl_device_id device, cl_device_info device_info, char **buffer, int *len) { cl_int err; int required_len = 256; do { if (*len < required_len) { *buffer = (char *)realloc(*buffer, required_len * sizeof(char)); if (buffer == NULL) { fprintf(stderr, "Failed to allocate memory in %s at line %d\n", __FILE__, __LINE__); return NULL; } *len = required_len; } err = clGetDeviceInfo(device, device_info, *len, *buffer, &required_len); CL_CHECK_ERR(err); } while (*len < required_len); return *buffer; }
SpectrumAnalyzer::SpectrumAnalyzer(LiveAudioInput *audioIn, cl_platform_id platform, cl_device_id device, const SpectrumAnalyzerRange *ranges, int numRanges, QObject *parent) : QObject(parent) { audioIn->setParent(this); m_context = clhCreateContextSingleDevice(platform, device); m_command_queue = clhCreateCommandQueue(m_context, device, true /* profiling abilitato */); clhEmptyNvidiaCache(); m_program = clhBuildProgram(m_context, device, "tuner/SpectrumAnalyzer.cl"); m_circsum = clhCreateKernel(m_program, "circsum"); m_sampleHistoryLength = m_sampleHistoryOffset = 0; for (int i = 0; i < numRanges; i++) { m_ranges.append(new SpectrumAnalyzerRangeData(platform, device, m_context, m_command_queue, ranges[i], m_circsum)); m_sampleHistoryLength = qMax(m_sampleHistoryLength, ranges[i].windowSize * ranges[i].presumWindows); } cl_int err; m_sampleHistoryCircQueue = clCreateBuffer(m_context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, m_sampleHistoryLength * sizeof(cl_float), NULL, &err); CL_CHECK_ERR("clCreateBuffer", err); // TODO: memset zero del buffer m_sampleRate = audioIn->sampleRate(); connect(audioIn, SIGNAL(newChunkAvailable(QVector<qint16>)), this, SLOT(slotAudioChunkAvailable(QVector<qint16>))); }
cl_fft<T>::~cl_fft() { CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(v_samples)); CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(v_tmp1)); CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(v_tmp2)); CL_CHECK_ERR("clReleaseKernel", clReleaseKernel(k_fftstep_init)); CL_CHECK_ERR("clReleaseKernel", clReleaseKernel(k_fftstep_cpx2cpx)); CL_CHECK_ERR("clReleaseKernel", clReleaseKernel(k_fftstep_real2cpx)); CL_CHECK_ERR("clReleaseKernel", clReleaseKernel(k_fftstep_optibase)); CL_CHECK_ERR("clReleaseProgram", clReleaseProgram(program)); }
cl_program get_program_from_file(cl_context context, cl_device_id device, const char *filename) { FILE *fp; int size; char *buffer; cl_int err; cl_program program; char buf[100000]; /* Read file into buffer. */ fp = fopen(filename, "r"); if (fp == NULL) { fprintf(stderr, "Failed to open file: %s\n", filename); exit(1); } fseek(fp, 0, SEEK_END); size = ftell(fp); rewind(fp); buffer = (char *) malloc((size+1) * sizeof(char)); buffer[size] = '\0'; fread(buffer, sizeof(char), size, fp); fclose(fp); /* Create program. */ program = clCreateProgramWithSource(context, 1, &buffer, NULL, &err); CL_CHECK_ERR(err); /* Build program. */ if (clBuildProgram(program, 1, &device, "", NULL, NULL) != CL_SUCCESS) { clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 100000, buf, NULL); fprintf(stderr, "CL Compilation failed:\n%s", buffer); exit(1); } free(buffer); err = clUnloadCompiler(); CL_CHECK_ERR(err); return program; }
SpectrumAnalyzerRangeData::SpectrumAnalyzerRangeData(cl_platform_id platform, cl_device_id device, cl_context context, cl_command_queue command_queue, const SpectrumAnalyzerRange &range, cl_kernel circsum) : firstKey(range.firstKey), lastKey(range.lastKey), algorithm(platform, device, context, command_queue, range.windowSize), command_queue(command_queue), circsum(circsum), windowSize(range.windowSize), presumWindows(range.presumWindows) { cl_int err; samples = clCreateBuffer(context, CL_MEM_READ_ONLY, windowSize * sizeof(cl_float), NULL, &err); CL_CHECK_ERR("clCreateBuffer", err); }
cl_context clhCreateContextSingleDevice(cl_platform_id platform, cl_device_id device) { const cl_context_properties ctx_prop[] = { // è una lista di coppie CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 // terminatore lista }; cl_int err; cl_context result = clCreateContext(ctx_prop, 1, &device, NULL, NULL, &err); CL_CHECK_ERR("clCreateContext", err); return result; }
void SpectrumAnalyzer::slotAudioChunkAvailable(const QVector<qint16> &data) { // Assume che il numero di dati da aggiungere sia un sottomultiplo di m_sampleHistoryLength cl_int err; cl_float *buffer = (cl_float*)clEnqueueMapBuffer(m_command_queue, m_sampleHistoryCircQueue, CL_TRUE, CL_MAP_WRITE, m_sampleHistoryOffset * sizeof(cl_float), data.size() * sizeof(cl_float), 0, NULL, NULL, &err); CL_CHECK_ERR("clEnqueueMapBuffer", err); const float gain = 1.0 / 5000; for (int i = 0; i < data.size(); i++) buffer[i] = data[i] * gain; CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(m_command_queue, m_sampleHistoryCircQueue, buffer, 0, NULL, NULL)); if ((m_sampleHistoryOffset += data.size()) == m_sampleHistoryLength) m_sampleHistoryOffset = 0; QSet<int> pressedKeys; for (int i = 0; i < m_ranges.size(); i++) { const QVector<float> ft = m_ranges[i]->runFFT(m_sampleHistoryCircQueue, m_sampleHistoryOffset, m_sampleHistoryLength); pressedKeys += analyzeFT(i, ft); } emit pressedKeysAvailable(pressedKeys); }
cl_mem cl_fft<T>::run(const cl_mem input, cl_event *out_finishEvent) { cl_event start_evt, *kernel_evts = new cl_event[launches.size()]; cl_mem v_out = runInternal(input, &start_evt, kernel_evts); if (out_finishEvent) { *out_finishEvent = kernel_evts[launches.size() - 1]; CL_CHECK_ERR("clRetainEvent", clRetainEvent(*out_finishEvent)); } #if 0 printStatsAndReleaseEvents(0, start_evt, kernel_evts, 0); #else CL_CHECK_ERR("clReleaseEvent", clRetainEvent(start_evt)); for (unsigned int i = 0; i < launches.size(); i++) CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(kernel_evts[i])); #endif delete[] kernel_evts; return v_out; }
void cl_fft<T>::init() { program = clhBuildProgram(context, device, "dft-algorithms/cl_fft.cl"); k_fftstep_init = clhCreateKernel(program, "fftstep_init"); k_fftstep_cpx2cpx = clhCreateKernel(program, "fftstep_cpx2cpx"); k_fftstep_real2cpx = clhCreateKernel(program, "fftstep_real2cpx"); k_fftstep_optibase = clhCreateKernel(program, "fftstep_optibase"); cl_image_format fmt; fmt.image_channel_order = cl_channelOrder<cpx>(); fmt.image_channel_data_type = CL_FLOAT; // deve essere una una potenza di due const size_t maxGroupSize = atoi(getenv("GS_X") ?: "256"); // Parametri di lancio dei kernel mtx_cpx2cpx e mtx_real2cpx // Griglia con una riga per ogni coppia di righe nella matrice launch_step tmp; tmp.globalSize[0] = samplesPerRun / 2; tmp.globalSize[1] = 1; tmp.groupSize[0] = maxGroupSize; tmp.groupSize[1] = 1; // Calcola log2 esatto di tmp.globalSize[0] int Wshift = 0; while ((1 << Wshift) != tmp.globalSize[0]) Wshift++; while (tmp.globalSize[0] != 0) { if (tmp.groupSize[0] > tmp.globalSize[0]) tmp.groupSize[0] = tmp.globalSize[0]; tmp.groupSize[1] = min(maxGroupSize / tmp.groupSize[0], tmp.globalSize[1]); if (launches.size() != 1 && tmp.globalSize[0] == 2 && tmp.globalSize[1] >= OPTIBASE_GS ) { // sotto queste condizioni possiamo usare il kernel optibase tmp.globalSize[0] = samplesPerRun / 4; tmp.globalSize[1] = 1; tmp.groupSize[0] = OPTIBASE_GS; tmp.groupSize[1] = 1; tmp.isOptibase = true; } else { tmp.Wshift = Wshift; tmp.isOptibase = false; } launches.push_back(tmp); if (tmp.isOptibase) break; tmp.globalSize[0] /= 2; tmp.globalSize[1] *= 2; Wshift--; } cl_int err; size_t twiddleFactorsCount = samplesPerRun / 2; twiddleFactorsMemSize = twiddleFactorsCount * sizeof(cl_float2); size_t rows, cols; if (twiddleFactorsCount <= 4096) { cols = twiddleFactorsCount; rows = 1; } else { cols = 4096; rows = (twiddleFactorsCount + 4096 - 1) / 4096; } v_twiddleFactors = clCreateImage2D(context, CL_MEM_READ_WRITE, &fmt, cols, rows, 0, NULL, &err); CL_CHECK_ERR("clCreateBuffer", err); fprintf(stderr, "Memoria occupata dai twiddle factors [N=%d]: %g KiB\n\n", samplesPerRun, twiddleFactorsMemSize / 1.024e3); samplesMemSize = cl_deviceDataSize<T>(samplesPerRun); v_samples = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, samplesMemSize, NULL, &err); CL_CHECK_ERR("clCreateBuffer", err); tmpMemSize = samplesPerRun * sizeof(cl_float2); v_tmp1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, tmpMemSize, NULL, &err); CL_CHECK_ERR("clCreateBuffer", err); v_tmp2 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, tmpMemSize, NULL, &err); CL_CHECK_ERR("clCreateBuffer", err); cl_event init_evt; size_t initGroupSize = min(maxGroupSize, twiddleFactorsCount); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(k_fftstep_init, 0, sizeof(cl_mem), &v_twiddleFactors)); CL_CHECK_ERR("clEnqueueNDRangeKernel", clEnqueueNDRangeKernel(command_queue, k_fftstep_init, 1, NULL, &twiddleFactorsCount, &initGroupSize, 0, NULL, &init_evt )); const float init_secs = clhEventWaitAndGetDuration(init_evt); const float init_memSizeMiB = twiddleFactorsMemSize / SIZECONV_MB; fprintf(stderr, "%s [N=%d, GS=%d]:\n", "OpenCL FFT twiddle factors initialization", samplesPerRun, maxGroupSize); fprintf(stderr, " kernel %g ms, %g MiB/s, %g valori/s\n", init_secs * 1e3, init_memSizeMiB / init_secs, samplesPerRun / 2 / init_secs); CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(init_evt)); }
int main(int argc, const char **argv) { int input_size; bool use_complex_inputs; bool print = false; bool check = false; int cl_platform_index = 0; int cl_device_index = 0; // Parsing degli eventuali ultimi due argomenti numerici, da destra verso // sinistra if (argc >= 4 && isdigit_string(argv[argc - 1])) cl_platform_index = atoi(argv[--argc]); if (argc >= 4 && isdigit_string(argv[argc - 1])) { cl_device_index = cl_platform_index; cl_platform_index = atoi(argv[--argc]); } if ( !(argc == 3 || (argc == 4 && (print = !strcmp(argv[3], "print"))) || (argc == 4 && (check = !strcmp(argv[3], "check"))) ) || (use_complex_inputs = !!strcmp(argv[1], "real")) == !!strcmp(argv[1], "complex") || (input_size = atoi(argv[2])) <= 0) { #ifdef ALLOW_NPOT const char *pot_text = ""; #else const char *pot_text = " (potenza di due)"; #endif cerr << "Uso: " << argv[0] << " <real | complex> input-size [print | check] [cl-platform-num [cl-device-num]]" << endl << endl; cerr << " real Usa input di tipo reale" << endl; cerr << " complex Usa input di tipo complesso" << endl; cerr << " input-size Numero di samples da passare in input" << pot_text << endl; cerr << " print Mostra output" << endl; #ifdef ALLOW_NPOT cerr << " check Confronta output con serial naive DFT" << endl; #else cerr << " check Confronta output con serial non-recursive FFT" << endl; #endif cerr << " cl-platform-num Indice della piattaforma OpenCL da utilizzare (default: 0)" << endl; cerr << " cl-device-num Indice del dispositivo OpenCL da utilizzare (default: 0)" << endl; cerr << endl; cerr << "Piattaforme e dispositivi OpenCL disponibili:" << endl; vector<string> platforms = clhAvailablePlatformNames(); if (platforms.size() == 0) { cerr << "(nessuna piattaforma)" << endl; } else { for (unsigned i = 0; i < platforms.size(); i++) { cerr << "#" << i << " " << platforms[i] << endl; vector<string> devices = clhAvailableDeviceNames(clhSelectPlatform(i)); if (devices.size() == 0) { cerr << " (nessun dispositivo)" << endl; } else { for (unsigned j = 0; j < devices.size(); j++) cerr << " -> " << i << " " << j << " - " << devices[j] << endl; } } } cerr << endl; return EXIT_FAILURE; } #ifndef ALLOW_NPOT if (input_size & (input_size-1)) { cerr << "input-size deve essere una potenza di 2" << endl; return EXIT_FAILURE; } #endif cl_platform_id platform = clhSelectPlatform(cl_platform_index); cerr << "CL platform: " << clhGetPlatformFriendlyName(platform) << endl; cl_device_id device = clhSelectDevice(platform, cl_device_index); cerr << "CL device: " << clhGetDeviceFriendlyName(device) << endl; cl_context context = clhCreateContextSingleDevice(platform, device); cl_command_queue command_queue = clhCreateCommandQueue(context, device, true /* profiling abilitato */); clhEmptyNvidiaCache(); if (use_complex_inputs) { ALGOCLASS<cpx> instance(platform, device, context, command_queue, input_size); cerr << "sleep(1)" << endl; sleep(1); runTest<cpx>(context, command_queue, &instance, input_size, print, check); } else { ALGOCLASS<float> instance(platform, device, context, command_queue, input_size); cerr << "sleep(1)" << endl; sleep(1); runTest<float>(context, command_queue, &instance, input_size, print, check); } CL_CHECK_ERR("clReleaseCommandQueue", clReleaseCommandQueue(command_queue)); CL_CHECK_ERR("clReleaseContext", clReleaseContext(context)); }
void cl_fft<T>::printStatsAndReleaseEvents(cl_event upload_unmap_evt, cl_event start_evt, cl_event *kernel_evts, cl_event download_map_evt) { const float step0_memSizeMiB = (samplesMemSize + tmpMemSize) / SIZECONV_MB; const float stepN_memSizeMiB = (2*tmpMemSize) / SIZECONV_MB; fprintf(stderr, "%s [N=%d]:\n", cl_fft_algoName<T>(), samplesPerRun); if (upload_unmap_evt) { const float upload_secs = clhEventWaitAndGetDuration(upload_unmap_evt); const float upload_memSizeMiB = samplesMemSize / SIZECONV_MB; fprintf(stderr, " upload %g ms, %g MiB/s\n", upload_secs * 1e3, upload_memSizeMiB / upload_secs); CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(upload_unmap_evt)); } float kernel_secs = 0; float kernel_mem = 0; for (unsigned int i = 0; i < launches.size(); i++) { const float step_secs = clhEventWaitAndGetDuration(kernel_evts[i]); const float memSizeMiB = (i == 0) ? step0_memSizeMiB : stepN_memSizeMiB; char kernel_name[30]; if (launches[i].isOptibase == false) strcpy(kernel_name, i == 0 ? cl_fft_kernelName<T>() : "cpx2cpx"); else strcpy(kernel_name, "optibase"); fprintf(stderr, " step%d (%s) [GRID=%dx%d GS=%dx%d] %g ms, %g MiB/s\n", i, kernel_name, (int)(launches[i].globalSize[0] / launches[i].groupSize[0]), (int)(launches[i].globalSize[1] / launches[i].groupSize[1]), (int)launches[i].groupSize[0], (int)launches[i].groupSize[1], step_secs * 1e3, memSizeMiB / step_secs); kernel_mem += memSizeMiB; kernel_secs += step_secs; } // Calcola statistiche globali in base alla somma del tempo di esecuzione dei singoli step fprintf(stderr, " total(SUM) %g ms, %g MiB/s, %g Ksamples/s\n", kernel_secs * 1e3, kernel_mem / kernel_secs, 1e-3 * samplesPerRun / kernel_secs); // Calcola statistiche globali in base al tempo intercorso tra il marker e l'ultimo kernel kernel_secs = clhEventWaitAndGetDifference(start_evt, kernel_evts[launches.size() - 1]); fprintf(stderr, " total(MARKER) %g ms, %g MiB/s, %g Ksamples/s\n", kernel_secs * 1e3, kernel_mem / kernel_secs, 1e-3 * samplesPerRun / kernel_secs); if (download_map_evt) { const float download_secs = clhEventWaitAndGetDuration(download_map_evt); const float download_memSizeMiB = tmpMemSize / SIZECONV_MB; fprintf(stderr, " download %g ms, %g MiB/s\n", download_secs * 1e3, download_memSizeMiB / download_secs); CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(download_map_evt)); } if (getenv("PRINT_KERNEL_EXECUTION_TIME")) printf("%g\n", kernel_secs * 1e3); CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(start_evt)); for (unsigned int i = 0; i < launches.size(); i++) CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(kernel_evts[i])); }
void call_kernel_mem(int nc,int np,float *sdens_out,char * cl_name) { //---------------------------------------------------------------------------- // Initialization size_t global; // global domain size for our calculation size_t local; // local domain size for our calculation //cl_device_id device_id; // compute device id cl_context context; // compute context cl_command_queue commands; // compute command queue cl_program program; // compute program cl_kernel kernel; // compute kernel //---------------------------------------------------------------------------- // Claim Variables for Device cl_mem output; // device memory used for the output array //---------------------------------------------------------------------------- // Setup Context of OpenCL int err; int gpu = 1; unsigned int ndevices = 0; //err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, &ndevices); //printf("--------------------------%d\n", ndevices); //context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); //commands = clCreateCommandQueue(context, device_id, 0, &err); clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, NULL, &ndevices); cl_device_id devices[ndevices]; err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, ndevices, devices, NULL); printf("--------------------------%d\n",err); //context = clCreateContext(NULL, ndevices, devices, NULL, NULL, &err); context = CL_CHECK_ERR(clCreateContext(NULL, ndevices, devices, &pfn_notify, NULL, &_err)); printf("--------------------------%d\n",err); commands = clCreateCommandQueue(context, devices[1], 0, &err); printf("--------------------------%d\n",err); //--------------------------------------------------------------------- //* Load kernel source file */ int MAX_SOURCE_SIZE = 1048576; FILE * fp; //const char fileName[] = "./sph_opencl.cl"; size_t KernelSourceSize; char *KernelSource; fp = fopen(cl_name, "r"); if (!fp) { fprintf(stderr, "Failed to load kernel.\n"); exit(1); } KernelSource = (char *)malloc(MAX_SOURCE_SIZE*sizeof(char)); KernelSourceSize = fread(KernelSource,sizeof(char), MAX_SOURCE_SIZE, fp); program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err); printf("--------------------------%d\n",err); clBuildProgram(program, 0, NULL, NULL, NULL, NULL); kernel = clCreateKernel(program, "sph_cl", &err); printf("--------------------------%d\n",err); //---------------------------------------------------------------------------- // Allocate Memory for Device output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*nc*nc, NULL, NULL); //---------------------------------------------------------------------------- // Passing Parameters into Kernel Functions err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &output); printf("passing--------------------------%d\n",err); err = clSetKernelArg(kernel, 1, sizeof(int), &nc); printf("--------------------------%d\n",err); err = clSetKernelArg(kernel, 2, sizeof(int), &np); printf("passing--------------------------%d\n",err); //---------------------------------------------------------------------------- // Runing Kernel Functions clGetKernelWorkGroupInfo(kernel, devices[1], CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); printf("********************%zd\n", local); global = nc*nc; local = 32; //clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global,&local, 0, NULL, NULL); err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global,&local, 0, NULL, NULL); printf("--------------------------%d\n",err); //err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL,&global,NULL, 0, NULL, NULL); clFinish(commands); //---------------------------------------------------------------------------- // Output Array printf("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n"); err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float)*nc*nc, sdens_out, 0, NULL, NULL ); printf("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^%d\n",err); //---------------------------------------------------------------------------- // Free the Memory in Device clReleaseMemObject(output); //---------------------------------------------------------------------------- clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(commands); clReleaseContext(context); //printf("nKernel source:\n\n %s \n", KernelSource); free(KernelSource); }
cl_mem cl_fft<float>::runInternal(const cl_mem input, cl_event *out_startEvent, cl_event *out_kernelEvents) { CL_CHECK_ERR("clEnqueueMarker", clEnqueueMarker(command_queue, out_startEvent)); // Lanci del kernel const cl_uint Nhalf = samplesPerRun / 2; cl_event prev_evt = *out_startEvent; for (unsigned int i = 0; i < launches.size(); i++) { if (launches[i].isOptibase == false) { // Solo il primo step ha input reali cl_kernel kernel = (i == 0) ? k_fftstep_real2cpx : k_fftstep_cpx2cpx; CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(kernel, 0, sizeof(cl_mem), (i == 0) ? &input : &v_tmp1)); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(kernel, 1, sizeof(cl_mem), &v_tmp2)); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(kernel, 2, sizeof(cl_mem), &v_twiddleFactors)); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(kernel, 3, sizeof(cl_uint), &launches[i].Wshift)); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(kernel, 4, sizeof(cl_uint), &Nhalf)); CL_CHECK_ERR("clEnqueueNDRangeKernel", clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, launches[i].globalSize, launches[i].groupSize, 1, &prev_evt, &out_kernelEvents[i] )); } else { CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(k_fftstep_optibase, 0, sizeof(cl_mem), &v_tmp1)); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(k_fftstep_optibase, 1, sizeof(cl_mem), &v_tmp2)); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(k_fftstep_optibase, 2, sizeof(cl_mem), &v_twiddleFactors)); CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(k_fftstep_optibase, 3, sizeof(cl_uint), &Nhalf)); CL_CHECK_ERR("clEnqueueNDRangeKernel", clEnqueueNDRangeKernel(command_queue, k_fftstep_optibase, 1, NULL, launches[i].globalSize, launches[i].groupSize, 1, &prev_evt, &out_kernelEvents[i] )); } prev_evt = out_kernelEvents[i]; swap(v_tmp1, v_tmp2); } return v_tmp1; }
int main(int argc, char **argv) { printf("enter demo main\n"); fflush(stdout); putenv("POCL_VERBOSE=1"); putenv("POCL_DEVICES=basic"); putenv("POCL_LEAVE_TEMP_DIRS=1"); putenv("POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES=1"); putenv("POCL_TEMP_DIR=pocl"); putenv("POCL_CACHE_DIR=pocl"); putenv("POCL_WORK_GROUP_METHOD=spmd"); if(argc >= 2){ printf("argv[1]:%s:\n",argv[1]); if(!strcmp(argv[1], "h")) putenv("POCL_WORK_GROUP_METHOD=spmd"); if(!strcmp(argv[1], "c")) putenv("POCL_CROSS_COMPILE=1"); } if(argc >= 3){ printf("argv[2]:%s:\n",argv[2]); if(!strcmp(argv[2], "h")) putenv("POCL_WORK_GROUP_METHOD=spmd"); if(!strcmp(argv[2], "c")) putenv("POCL_CROSS_COMPILE=1"); } //putenv("LD_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib"); //putenv("LTDL_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib"); //lt_dlsetsearchpath("/scratch/colins/build/linux/fs/lib"); //printf("SEARCH_PATH:%s\n",lt_dlgetsearchpath()); cl_platform_id platforms[100]; cl_uint platforms_n = 0; CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n)); printf("=== %d OpenCL platform(s) found: ===\n", platforms_n); for (int i=0; i<platforms_n; i++) { char buffer[10240]; printf(" -- %d --\n", i); CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL)); printf(" PROFILE = %s\n", buffer); CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL)); printf(" VERSION = %s\n", buffer); CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL)); printf(" NAME = %s\n", buffer); CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL)); printf(" VENDOR = %s\n", buffer); CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL)); printf(" EXTENSIONS = %s\n", buffer); } if (platforms_n == 0) return 1; cl_device_id devices[100]; cl_uint devices_n = 0; // CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n)); CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n)); printf("=== %d OpenCL device(s) found on platform:\n", platforms_n); for (int i=0; i<devices_n; i++) { char buffer[10240]; cl_uint buf_uint; cl_ulong buf_ulong; printf(" -- %d --\n", i); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL)); printf(" DEVICE_NAME = %s\n", buffer); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL)); printf(" DEVICE_VENDOR = %s\n", buffer); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL)); printf(" DEVICE_VERSION = %s\n", buffer); CL_CHECK(clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL)); printf(" DRIVER_VERSION = %s\n", buffer); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL)); printf(" DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL)); printf(" DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL)); printf(" DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong); } if (devices_n == 0) return 1; cl_context context; context = CL_CHECK_ERR(clCreateContext(NULL, 1, devices+1, &pfn_notify, NULL, &_err)); cl_command_queue queue; queue = CL_CHECK_ERR(clCreateCommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE, &_err)); cl_kernel kernel = 0; cl_mem memObjects[3] = {0,0,0}; // Create OpenCL program - first attempt to load cached binary. // If that is not available, then create the program from source // and store the binary for future use. std::cout << "Attempting to create program from binary..." << std::endl; cl_program program = CreateProgramFromBinary(context, devices[1], "kernel.cl.bin"); if (program == NULL) { std::cout << "Binary not loaded, create from source..." << std::endl; program = CreateProgram(context, devices[1], "kernel.cl"); if (program == NULL) { Cleanup(context, queue, program, kernel, memObjects); return 1; } std::cout << "Save program binary for future run..." << std::endl; if (SaveProgramBinary(program, devices[1], "kernel.cl.bin") == false) { std::cerr << "Failed to write program binary" << std::endl; Cleanup(context, queue, program, kernel, memObjects); return 1; } } else { std::cout << "Read program from binary." << std::endl; } printf("attempting to create input buffer\n"); fflush(stdout); cl_mem input_bufferA; input_bufferA = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err)); cl_mem input_bufferB; input_bufferB = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err)); printf("attempting to create output buffer\n"); fflush(stdout); cl_mem output_buffer; output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err)); memObjects[0] = input_bufferA; memObjects[1] = input_bufferB; memObjects[2] = output_buffer; size_t width = NUM_DATA; printf("attempting to create kernel\n"); fflush(stdout); kernel = CL_CHECK_ERR(clCreateKernel(program, "sgemm_single", &_err)); CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_bufferA), &input_bufferA)); CL_CHECK(clSetKernelArg(kernel, 1, sizeof(input_bufferB), &input_bufferB)); CL_CHECK(clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer)); CL_CHECK(clSetKernelArg(kernel, 3, sizeof(width), &width)); printf("attempting to enqueue write buffer\n"); fflush(stdout); for (int i=0; i<NUM_DATA*NUM_DATA; i++) { float in = ((float)rand()/(float)(RAND_MAX)) * 100.0; CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferA, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL)); in = ((float)rand()/(float)(RAND_MAX)) * 100.0; CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferB, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL)); } cl_event kernel_completion; const size_t local_work_size[3] = { 64, 1, 1}; // a_offset size_t global_work_size[3] = { NUM_DATA, NUM_DATA, NUM_DATA }; printf("attempting to enqueue kernel\n"); fflush(stdout); CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &kernel_completion)); printf("Enqueue'd kerenel\n"); fflush(stdout); cl_ulong time_start, time_end; CL_CHECK(clWaitForEvents(1, &kernel_completion)); CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL)); CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL)); double elapsed = time_end - time_start; printf("time(ns):%lg\n",elapsed); CL_CHECK(clReleaseEvent(kernel_completion)); printf("Result:"); for (int i=0; i<NUM_DATA*NUM_DATA; i++) { float data; CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, i*sizeof(float), 4, &data, 0, NULL, NULL)); //printf(" %f", data); } printf("\n"); CL_CHECK(clReleaseMemObject(memObjects[0])); CL_CHECK(clReleaseMemObject(memObjects[1])); CL_CHECK(clReleaseMemObject(memObjects[2])); CL_CHECK(clReleaseKernel(kernel)); CL_CHECK(clReleaseProgram(program)); CL_CHECK(clReleaseContext(context)); return 0; }
int main(int argc, char **argv) { cl_platform_id platforms[100]; cl_uint platforms_n = 0; CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n)); printf("=== %d OpenCL platform(s) found: ===\n", platforms_n); for (int i=0; i<platforms_n; i++) { char buffer[10240]; printf(" -- %d --\n", i); CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL)); printf(" PROFILE = %s\n", buffer); CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL)); printf(" VERSION = %s\n", buffer); CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL)); printf(" NAME = %s\n", buffer); CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL)); printf(" VENDOR = %s\n", buffer); CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL)); printf(" EXTENSIONS = %s\n", buffer); } if (platforms_n == 0) return 1; cl_device_id devices[100]; cl_uint devices_n = 0; // CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n)); CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n)); printf("=== %d OpenCL device(s) found on platform:\n", platforms_n); for (int i=0; i<devices_n; i++) { char buffer[10240]; cl_uint buf_uint; cl_ulong buf_ulong; printf(" -- %d --\n", i); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL)); printf(" DEVICE_NAME = %s\n", buffer); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL)); printf(" DEVICE_VENDOR = %s\n", buffer); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL)); printf(" DEVICE_VERSION = %s\n", buffer); CL_CHECK(clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL)); printf(" DRIVER_VERSION = %s\n", buffer); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL)); printf(" DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL)); printf(" DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint); CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL)); printf(" DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong); } if (devices_n == 0) return 1; cl_context context; context = CL_CHECK_ERR(clCreateContext(NULL, 1, devices, &pfn_notify, NULL, &_err)); const char *program_source[] = { "__kernel void simple_demo(__global int *src, __global int *dst, int factor)\n", "{\n", " int i = get_global_id(0);\n", " dst[i] = src[i] * factor;\n", "}\n" }; cl_program program; program = CL_CHECK_ERR(clCreateProgramWithSource(context, sizeof(program_source)/sizeof(*program_source), program_source, NULL, &_err)); if (clBuildProgram(program, 1, devices, "", NULL, NULL) != CL_SUCCESS) { char buffer[10240]; clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL); fprintf(stderr, "CL Compilation failed:\n%s", buffer); abort(); } CL_CHECK(clUnloadCompiler()); cl_mem input_buffer; input_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int)*NUM_DATA, NULL, &_err)); cl_mem output_buffer; output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int)*NUM_DATA, NULL, &_err)); int factor = 2; cl_kernel kernel; kernel = CL_CHECK_ERR(clCreateKernel(program, "simple_demo", &_err)); CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer)); CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer)); CL_CHECK(clSetKernelArg(kernel, 2, sizeof(factor), &factor)); cl_command_queue queue; queue = CL_CHECK_ERR(clCreateCommandQueue(context, devices[0], 0, &_err)); for (int i=0; i<NUM_DATA; i++) { CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, i*sizeof(int), sizeof(int), &i, 0, NULL, NULL)); } cl_event kernel_completion; size_t global_work_size[1] = { NUM_DATA }; CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, &kernel_completion)); CL_CHECK(clWaitForEvents(1, &kernel_completion)); CL_CHECK(clReleaseEvent(kernel_completion)); printf("Result:"); for (int i=0; i<NUM_DATA; i++) { int data; CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, i*sizeof(int), sizeof(int), &data, 0, NULL, NULL)); printf(" %d", data); } printf("\n"); CL_CHECK(clReleaseMemObject(input_buffer)); CL_CHECK(clReleaseMemObject(output_buffer)); CL_CHECK(clReleaseKernel(kernel)); CL_CHECK(clReleaseProgram(program)); CL_CHECK(clReleaseContext(context)); return 0; }
SpectrumAnalyzerRangeData::~SpectrumAnalyzerRangeData() { CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(samples)); }