Пример #1
0
int get_platforms(cl_platform_id **platforms)
{
	cl_int err;
	cl_uint num;
	
	*platforms = NULL;

	err = clGetPlatformIDs(0, 0, &num);
    CL_CHECK_ERR(err);
    
	if (num < 1)
		return 0;
	
	*platforms = (cl_platform_id *) malloc(num * sizeof(cl_platform_id));
	if (*platforms == NULL)
	{
		fprintf(stderr, "Failed to allocate memory in %s at line %d\n", __FILE__, __LINE__);
		exit(1);
	}
	
	err = clGetPlatformIDs(num, *platforms, 0);
    CL_CHECK_ERR(err);

	return num;
}
Пример #2
0
int get_devices(cl_platform_id platform, cl_device_type device_type, cl_device_id **devices)
{
	cl_int err;
	cl_uint num;

	*devices = NULL;

	err = clGetDeviceIDs(platform, device_type, 0, 0, &num);
    CL_CHECK_ERR(err);
    
	if (num < 1)
		return 0;
	
	*devices = (cl_device_id *) malloc(num * sizeof(cl_device_id));
	if (*devices == NULL)
	{
		fprintf(stderr, "Failed to allocate memory in %s at line %d\n", __FILE__, __LINE__);
		exit(1);
	}

	err = clGetDeviceIDs(platform, device_type, num, *devices, 0);
    CL_CHECK_ERR(err);
	
	return num;
}
Пример #3
0
SpectrumAnalyzer::~SpectrumAnalyzer()
{
	qDeleteAll(m_ranges);

	CL_CHECK_ERR("clReleaseKernel", clReleaseKernel(m_circsum));
	CL_CHECK_ERR("clReleaseProgram", clReleaseProgram(m_program));
	CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(m_sampleHistoryCircQueue));
	CL_CHECK_ERR("clReleaseCommandQueue", clReleaseCommandQueue(m_command_queue));
	CL_CHECK_ERR("clReleaseContext", clReleaseContext(m_context));
}
Пример #4
0
vector<cpx> cl_fft<cpx>::run(const vector<cpx> &input)
{
	cl_event upload_unmap_evt, start_evt, download_map_evt, *kernel_evts = new cl_event[launches.size()];
	cl_int err;

	// Upload
	cl_float2 *input_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue,
		v_samples,
		CL_TRUE,
		CL_MAP_WRITE,
		0,
		samplesMemSize,
		0,
		NULL,
		NULL,
		&err);
	CL_CHECK_ERR("clEnqueueMapBuffer", err);

	for (int i = 0; i < samplesPerRun; i++)
	{
		input_buffer[i].x = real(input[i]);
		input_buffer[i].y = imag(input[i]);
	}

	CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, v_samples, input_buffer, 0, NULL, &upload_unmap_evt));

	// Calcola la FFT
	cl_mem v_out = runInternal(v_samples, &start_evt, kernel_evts);

	// Download
	vector<cpx> result(samplesPerRun);
	cl_float2 *output_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue,
		v_out,
		CL_TRUE,
		CL_MAP_READ,
		0,
		tmpMemSize,
		1,
		&kernel_evts[launches.size() - 1],
		&download_map_evt,
		&err);
	CL_CHECK_ERR("clEnqueueMapBuffer", err);

	for (int i = 0; i < samplesPerRun; i++)
		result[i] = cpx(output_buffer[i].x, output_buffer[i].y);

	CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, v_out, output_buffer, 0, NULL, NULL));

	printStatsAndReleaseEvents(upload_unmap_evt, start_evt, kernel_evts, download_map_evt);

	delete[] kernel_evts;

	return result;
}
Пример #5
0
static int runTest(cl_context context, cl_command_queue command_queue, I *cl_algorithm_instance, int N, bool print, bool check)
{
	vector<T> input = generateTestData<T>(N);
	vector<cpx> output(N);
	cl_int err;

	const size_t samplesMemSize = cl_deviceDataSize<T>(N);
	cl_mem v_output, v_input = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, samplesMemSize, &input[0], &err);
	CL_CHECK_ERR("clCreateBuffer", err);

	cl_event kernel_evt;
	v_output = cl_algorithm_instance->run(v_input, &kernel_evt);

	CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(v_input));

	cl_float2 *output_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue,
		v_output,
		CL_TRUE,
		CL_MAP_READ,
		0,
		N * sizeof(cl_float2),
		1,
		&kernel_evt,
		NULL,
		&err);
	CL_CHECK_ERR("clEnqueueMapBuffer", err);

	for (int i = 0; i < N; i++)
		output[i] = cpx(output_buffer[i].x, output_buffer[i].y);

	CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, v_output, output_buffer, 0, NULL, NULL));

	if (print)
		cerr << output << endl;

	if (check)
	{
#ifdef ALLOW_NPOT
		fprintf(stderr, "Calcolo serial naive DFT per riferimento...\n");
		const vector<cpx> ref = serial_naive_dft(input);
#else
		fprintf(stderr, "Calcolo serial non-recursive FFT per riferimento...\n");
		const vector<cpx> ref = serial_nonrecursive_fft(input);
#endif
		fprintf(stderr, "Distanza massima: %g\n", maxAbsDistance(output, ref));

	}

	return EXIT_SUCCESS;
}
Пример #6
0
QVector<float> SpectrumAnalyzerRangeData::runFFT(cl_mem samplesHistory, cl_int historyOffset, cl_int historyLength)
{
	cl_event presum_evt, algo_evt;

	CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(circsum, 0, sizeof(cl_mem), &samples));
	CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(circsum, 1, sizeof(cl_mem), &samplesHistory));
	CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(circsum, 2, sizeof(cl_int), &historyOffset));
	CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(circsum, 3, sizeof(cl_int), &historyLength));
	CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(circsum, 4, sizeof(cl_int), &presumWindows));
	CL_CHECK_ERR("clEnqueueNDRangeKernel", clEnqueueNDRangeKernel(command_queue,
		circsum,
		1,
		NULL,
		&windowSize,
		&presumGroupSize,
		0,
		NULL,
		&presum_evt
	));

	clEnqueueWaitForEvents(command_queue, 1, &presum_evt);

	cl_mem output = algorithm.run(samples, &algo_evt);

	QVector<float> risultato(windowSize);

	cl_int err;
	cl_float2 *output_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue,
		output,
		CL_TRUE,
		CL_MAP_READ,
		0,
		windowSize * sizeof(cl_float2),
		1,
		&algo_evt,
		NULL,
		&err);
	CL_CHECK_ERR("clEnqueueMapBuffer", err);

	for (int i = 0; i < windowSize; i++)
		risultato[i] = abs(cpx(output_buffer[i].x, output_buffer[i].y));

	CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, output, output_buffer, 0, NULL, NULL));

	CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(presum_evt));
	CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(algo_evt));

	return risultato;
}
Пример #7
0
char *get_device_info(cl_device_id device, cl_device_info device_info, char **buffer, int *len)
{
	cl_int err;
	int required_len = 256;

	do
	{
		if (*len < required_len)
		{
			*buffer = (char *)realloc(*buffer, required_len * sizeof(char));
			if (buffer == NULL)
			{
				fprintf(stderr, "Failed to allocate memory in %s at line %d\n", __FILE__, __LINE__);
				return NULL;
			}

			*len = required_len;
		}

		err = clGetDeviceInfo(device, device_info, *len, *buffer, &required_len);
		CL_CHECK_ERR(err);
	} while (*len < required_len);

	return *buffer;
}
Пример #8
0
SpectrumAnalyzer::SpectrumAnalyzer(LiveAudioInput *audioIn, cl_platform_id platform, cl_device_id device,
	const SpectrumAnalyzerRange *ranges, int numRanges, QObject *parent)
: QObject(parent)
{
	audioIn->setParent(this);

	m_context = clhCreateContextSingleDevice(platform, device);
	m_command_queue = clhCreateCommandQueue(m_context, device, true /* profiling abilitato */);
	clhEmptyNvidiaCache();

	m_program = clhBuildProgram(m_context, device, "tuner/SpectrumAnalyzer.cl");
	m_circsum = clhCreateKernel(m_program, "circsum");

	m_sampleHistoryLength = m_sampleHistoryOffset = 0;
	for (int i = 0; i < numRanges; i++)
	{
		m_ranges.append(new SpectrumAnalyzerRangeData(platform, device, m_context, m_command_queue, ranges[i], m_circsum));
		m_sampleHistoryLength = qMax(m_sampleHistoryLength, ranges[i].windowSize * ranges[i].presumWindows);
	}

	cl_int err;
	m_sampleHistoryCircQueue = clCreateBuffer(m_context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, m_sampleHistoryLength * sizeof(cl_float), NULL, &err);
	CL_CHECK_ERR("clCreateBuffer", err);

	// TODO: memset zero del buffer

	m_sampleRate = audioIn->sampleRate();

	connect(audioIn, SIGNAL(newChunkAvailable(QVector<qint16>)), this, SLOT(slotAudioChunkAvailable(QVector<qint16>)));
}
Пример #9
0
cl_fft<T>::~cl_fft()
{
	CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(v_samples));
	CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(v_tmp1));
	CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(v_tmp2));

	CL_CHECK_ERR("clReleaseKernel", clReleaseKernel(k_fftstep_init));
	CL_CHECK_ERR("clReleaseKernel", clReleaseKernel(k_fftstep_cpx2cpx));
	CL_CHECK_ERR("clReleaseKernel", clReleaseKernel(k_fftstep_real2cpx));
	CL_CHECK_ERR("clReleaseKernel", clReleaseKernel(k_fftstep_optibase));
	CL_CHECK_ERR("clReleaseProgram", clReleaseProgram(program));
}
Пример #10
0
cl_program get_program_from_file(cl_context context, cl_device_id device, const char *filename)
{
	FILE *fp;
	int size;
	char *buffer;
	cl_int err;
	cl_program program;
	char buf[100000];
	
	/* Read file into buffer. */
	fp = fopen(filename, "r");
	if (fp == NULL)
	{
		fprintf(stderr, "Failed to open file: %s\n", filename);
		exit(1);
	}
	fseek(fp, 0, SEEK_END);
	size = ftell(fp);
	rewind(fp);
	buffer = (char *) malloc((size+1) * sizeof(char));
	buffer[size] = '\0';
	fread(buffer, sizeof(char), size, fp);
	fclose(fp);

	/* Create program. */
	program = clCreateProgramWithSource(context, 1, &buffer, NULL, &err);
	CL_CHECK_ERR(err);

	/* Build program. */
	if (clBuildProgram(program, 1, &device, "", NULL, NULL) != CL_SUCCESS)
	{
		clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 100000, buf, NULL);
		fprintf(stderr, "CL Compilation failed:\n%s", buffer);
		exit(1);
	}
	free(buffer);

	err = clUnloadCompiler();
	CL_CHECK_ERR(err);

	return program;
}
Пример #11
0
SpectrumAnalyzerRangeData::SpectrumAnalyzerRangeData(cl_platform_id platform, cl_device_id device,
	cl_context context, cl_command_queue command_queue, const SpectrumAnalyzerRange &range, cl_kernel circsum)
: firstKey(range.firstKey), lastKey(range.lastKey),
  algorithm(platform, device, context, command_queue, range.windowSize),
  command_queue(command_queue), circsum(circsum), windowSize(range.windowSize),
  presumWindows(range.presumWindows)
{
	cl_int err;
	samples = clCreateBuffer(context, CL_MEM_READ_ONLY, windowSize * sizeof(cl_float), NULL, &err);
	CL_CHECK_ERR("clCreateBuffer", err);
}
Пример #12
0
cl_context clhCreateContextSingleDevice(cl_platform_id platform, cl_device_id device)
{
	const cl_context_properties ctx_prop[] = { // è una lista di coppie
		CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
		0 // terminatore lista
	};

	cl_int err;

	cl_context result = clCreateContext(ctx_prop, 1, &device, NULL, NULL, &err);
	CL_CHECK_ERR("clCreateContext", err);

	return result;
}
Пример #13
0
void SpectrumAnalyzer::slotAudioChunkAvailable(const QVector<qint16> &data)
{
	// Assume che il numero di dati da aggiungere sia un sottomultiplo di m_sampleHistoryLength

	cl_int err;
	cl_float *buffer = (cl_float*)clEnqueueMapBuffer(m_command_queue,
		m_sampleHistoryCircQueue,
		CL_TRUE,
		CL_MAP_WRITE,
		m_sampleHistoryOffset * sizeof(cl_float),
		data.size() * sizeof(cl_float),
		0,
		NULL,
		NULL,
		&err);
	CL_CHECK_ERR("clEnqueueMapBuffer", err);

	const float gain = 1.0 / 5000;
	for (int i = 0; i < data.size(); i++)
		buffer[i] = data[i] * gain;

	CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(m_command_queue, m_sampleHistoryCircQueue, buffer, 0, NULL, NULL));

	if ((m_sampleHistoryOffset += data.size()) == m_sampleHistoryLength)
		m_sampleHistoryOffset = 0;

	QSet<int> pressedKeys;

	for (int i = 0; i < m_ranges.size(); i++)
	{
		const QVector<float> ft = m_ranges[i]->runFFT(m_sampleHistoryCircQueue, m_sampleHistoryOffset, m_sampleHistoryLength);
		pressedKeys += analyzeFT(i, ft);
	}

	emit pressedKeysAvailable(pressedKeys);
}
Пример #14
0
cl_mem cl_fft<T>::run(const cl_mem input, cl_event *out_finishEvent)
{
	cl_event start_evt, *kernel_evts = new cl_event[launches.size()];

	cl_mem v_out = runInternal(input, &start_evt, kernel_evts);

	if (out_finishEvent)
	{
		*out_finishEvent = kernel_evts[launches.size() - 1];
		CL_CHECK_ERR("clRetainEvent", clRetainEvent(*out_finishEvent));
	}

#if 0
	printStatsAndReleaseEvents(0, start_evt, kernel_evts, 0);
#else
	CL_CHECK_ERR("clReleaseEvent", clRetainEvent(start_evt));
	for (unsigned int i = 0; i < launches.size(); i++)
		CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(kernel_evts[i]));
#endif

	delete[] kernel_evts;

	return v_out;
}
Пример #15
0
void cl_fft<T>::init()
{
	program = clhBuildProgram(context, device, "dft-algorithms/cl_fft.cl");
	k_fftstep_init = clhCreateKernel(program, "fftstep_init");
	k_fftstep_cpx2cpx = clhCreateKernel(program, "fftstep_cpx2cpx");
	k_fftstep_real2cpx = clhCreateKernel(program, "fftstep_real2cpx");

	k_fftstep_optibase = clhCreateKernel(program, "fftstep_optibase");

	cl_image_format fmt;
	fmt.image_channel_order = cl_channelOrder<cpx>();
	fmt.image_channel_data_type = CL_FLOAT;

	// deve essere una una potenza di due
	const size_t maxGroupSize = atoi(getenv("GS_X") ?: "256");

	// Parametri di lancio dei kernel mtx_cpx2cpx e mtx_real2cpx
	// Griglia con una riga per ogni coppia di righe nella matrice
	launch_step tmp;
	tmp.globalSize[0] = samplesPerRun / 2;
	tmp.globalSize[1] = 1;
	tmp.groupSize[0] = maxGroupSize;
	tmp.groupSize[1] = 1;

	// Calcola log2 esatto di tmp.globalSize[0]
	int Wshift = 0;
	while ((1 << Wshift) != tmp.globalSize[0]) Wshift++;

	while (tmp.globalSize[0] != 0)
	{
		if (tmp.groupSize[0] > tmp.globalSize[0])
			tmp.groupSize[0] = tmp.globalSize[0];
		tmp.groupSize[1] = min(maxGroupSize / tmp.groupSize[0], tmp.globalSize[1]);

		if (launches.size() != 1 && tmp.globalSize[0] == 2 &&
			tmp.globalSize[1] >= OPTIBASE_GS )
		{
			// sotto queste condizioni possiamo usare il kernel optibase
			tmp.globalSize[0] = samplesPerRun / 4;
			tmp.globalSize[1] = 1;
			tmp.groupSize[0] = OPTIBASE_GS;
			tmp.groupSize[1] = 1;
			tmp.isOptibase = true;
		}
		else
		{
			tmp.Wshift = Wshift;
			tmp.isOptibase = false;
		}

		launches.push_back(tmp);
		if (tmp.isOptibase)
			break;
			
		tmp.globalSize[0] /= 2;
		tmp.globalSize[1] *= 2;
		Wshift--;
	}

	cl_int err;
	size_t twiddleFactorsCount = samplesPerRun / 2;
	twiddleFactorsMemSize = twiddleFactorsCount * sizeof(cl_float2);
	size_t rows, cols;
	if (twiddleFactorsCount <= 4096)
	{
		cols = twiddleFactorsCount;
		rows = 1;
	}
	else
	{
		cols = 4096;
		rows = (twiddleFactorsCount + 4096 - 1) / 4096;
	}
	v_twiddleFactors = clCreateImage2D(context, CL_MEM_READ_WRITE, &fmt,
		cols, rows, 0, NULL, &err);
	CL_CHECK_ERR("clCreateBuffer", err);

	fprintf(stderr, "Memoria occupata dai twiddle factors [N=%d]: %g KiB\n\n",
		samplesPerRun, twiddleFactorsMemSize / 1.024e3);

	samplesMemSize = cl_deviceDataSize<T>(samplesPerRun);
	v_samples = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, samplesMemSize, NULL, &err);
	CL_CHECK_ERR("clCreateBuffer", err);

	tmpMemSize = samplesPerRun * sizeof(cl_float2);
	v_tmp1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, tmpMemSize, NULL, &err);
	CL_CHECK_ERR("clCreateBuffer", err);
	v_tmp2 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, tmpMemSize, NULL, &err);
	CL_CHECK_ERR("clCreateBuffer", err);

	cl_event init_evt;
	size_t initGroupSize = min(maxGroupSize, twiddleFactorsCount);
	CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(k_fftstep_init, 0, sizeof(cl_mem), &v_twiddleFactors));
	CL_CHECK_ERR("clEnqueueNDRangeKernel", clEnqueueNDRangeKernel(command_queue,
		k_fftstep_init,
		1,
		NULL,
		&twiddleFactorsCount,
		&initGroupSize,
		0,
		NULL,
		&init_evt
	));

	const float init_secs = clhEventWaitAndGetDuration(init_evt);
	const float init_memSizeMiB = twiddleFactorsMemSize / SIZECONV_MB;

	fprintf(stderr, "%s [N=%d, GS=%d]:\n",
		"OpenCL FFT twiddle factors initialization", samplesPerRun, maxGroupSize);

	fprintf(stderr, " kernel %g ms, %g MiB/s, %g valori/s\n",
		init_secs * 1e3, init_memSizeMiB / init_secs, samplesPerRun / 2 / init_secs);

	CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(init_evt));
}
Пример #16
0
int main(int argc, const char **argv)
{
	int input_size;
	bool use_complex_inputs;
	bool print = false;
	bool check = false;

	int cl_platform_index = 0;
	int cl_device_index = 0;

	// Parsing degli eventuali ultimi due argomenti numerici, da destra verso
	// sinistra
	if (argc >= 4 && isdigit_string(argv[argc - 1]))
		cl_platform_index = atoi(argv[--argc]);
	if (argc >= 4 && isdigit_string(argv[argc - 1]))
	{
		cl_device_index = cl_platform_index;
		cl_platform_index = atoi(argv[--argc]);
	}

	if (
		!(argc == 3
			|| (argc == 4 && (print = !strcmp(argv[3], "print")))
			|| (argc == 4 && (check = !strcmp(argv[3], "check")))
		)
		|| (use_complex_inputs = !!strcmp(argv[1], "real")) == !!strcmp(argv[1], "complex")
		|| (input_size = atoi(argv[2])) <= 0)
	{
#ifdef ALLOW_NPOT
		const char *pot_text = "";
#else
		const char *pot_text = " (potenza di due)";
#endif

		cerr << "Uso: " << argv[0] << " <real | complex> input-size [print | check] [cl-platform-num [cl-device-num]]" << endl << endl;
		cerr << " real             Usa input di tipo reale" << endl;
		cerr << " complex          Usa input di tipo complesso" << endl;
		cerr << " input-size       Numero di samples da passare in input" << pot_text << endl;
		cerr << " print            Mostra output" << endl;
#ifdef ALLOW_NPOT
		cerr << " check            Confronta output con serial naive DFT" << endl;
#else
		cerr << " check            Confronta output con serial non-recursive FFT" << endl;
#endif
		cerr << " cl-platform-num  Indice della piattaforma OpenCL da utilizzare (default: 0)" << endl;
		cerr << " cl-device-num    Indice del dispositivo OpenCL da utilizzare (default: 0)" << endl;
		cerr << endl;

		cerr << "Piattaforme e dispositivi OpenCL disponibili:" << endl;
		vector<string> platforms = clhAvailablePlatformNames();
		if (platforms.size() == 0)
		{
			cerr << "(nessuna piattaforma)" << endl;
		}
		else
		{
			for (unsigned i = 0; i < platforms.size(); i++)
			{
				cerr << "#" << i << " " << platforms[i] << endl;

				vector<string> devices = clhAvailableDeviceNames(clhSelectPlatform(i));
				if (devices.size() == 0)
				{
					cerr << "   (nessun dispositivo)" << endl;
				}
				else
				{
					for (unsigned j = 0; j < devices.size(); j++)
						cerr << "  -> " << i << " " << j << " - " << devices[j] << endl;
				}
			}
		}

		cerr << endl;
		return EXIT_FAILURE;
	}

#ifndef ALLOW_NPOT
	if (input_size & (input_size-1))
	{
		cerr << "input-size deve essere una potenza di 2" << endl;
		return EXIT_FAILURE;
	}
#endif

	cl_platform_id platform = clhSelectPlatform(cl_platform_index);
	cerr << "CL platform: " << clhGetPlatformFriendlyName(platform) << endl;

	cl_device_id device = clhSelectDevice(platform, cl_device_index);
	cerr << "CL device: " << clhGetDeviceFriendlyName(device) << endl;

	cl_context context = clhCreateContextSingleDevice(platform, device);
	cl_command_queue command_queue = clhCreateCommandQueue(context, device, true /* profiling abilitato */);

	clhEmptyNvidiaCache();

	if (use_complex_inputs)
	{
		ALGOCLASS<cpx> instance(platform, device, context, command_queue, input_size);
		cerr << "sleep(1)" << endl;
		sleep(1);
		runTest<cpx>(context, command_queue, &instance, input_size, print, check);
	}
	else
	{
		ALGOCLASS<float> instance(platform, device, context, command_queue, input_size);
		cerr << "sleep(1)" << endl;
		sleep(1);
		runTest<float>(context, command_queue, &instance, input_size, print, check);
	}

	CL_CHECK_ERR("clReleaseCommandQueue", clReleaseCommandQueue(command_queue));
	CL_CHECK_ERR("clReleaseContext", clReleaseContext(context));
}
Пример #17
0
void cl_fft<T>::printStatsAndReleaseEvents(cl_event upload_unmap_evt, cl_event start_evt, cl_event *kernel_evts, cl_event download_map_evt)
{
	const float step0_memSizeMiB = (samplesMemSize + tmpMemSize) / SIZECONV_MB;
	const float stepN_memSizeMiB = (2*tmpMemSize) / SIZECONV_MB;

	fprintf(stderr, "%s [N=%d]:\n",
		cl_fft_algoName<T>(), samplesPerRun);

	if (upload_unmap_evt)
	{
		const float upload_secs = clhEventWaitAndGetDuration(upload_unmap_evt);
		const float upload_memSizeMiB = samplesMemSize / SIZECONV_MB;

		fprintf(stderr, " upload %g ms, %g MiB/s\n",
			upload_secs * 1e3, upload_memSizeMiB / upload_secs);

		CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(upload_unmap_evt));
	}

	float kernel_secs = 0;
	float kernel_mem = 0;
	for (unsigned int i = 0; i < launches.size(); i++)
	{
		const float step_secs = clhEventWaitAndGetDuration(kernel_evts[i]);
		const float memSizeMiB = (i == 0) ? step0_memSizeMiB : stepN_memSizeMiB;
		char kernel_name[30];
		if (launches[i].isOptibase == false)
			strcpy(kernel_name, i == 0 ? cl_fft_kernelName<T>() : "cpx2cpx");
		else
			strcpy(kernel_name, "optibase");
		fprintf(stderr, " step%d (%s) [GRID=%dx%d GS=%dx%d] %g ms, %g MiB/s\n",
			i, kernel_name,
			(int)(launches[i].globalSize[0] / launches[i].groupSize[0]),
			(int)(launches[i].globalSize[1] / launches[i].groupSize[1]),
			(int)launches[i].groupSize[0], (int)launches[i].groupSize[1],
			step_secs * 1e3, memSizeMiB / step_secs);
		kernel_mem += memSizeMiB;
		kernel_secs += step_secs;
	}

	// Calcola statistiche globali in base alla somma del tempo di esecuzione dei singoli step
	fprintf(stderr, " total(SUM) %g ms, %g MiB/s, %g Ksamples/s\n",
		kernel_secs * 1e3, kernel_mem / kernel_secs, 1e-3 * samplesPerRun / kernel_secs);

	// Calcola statistiche globali in base al tempo intercorso tra il marker e l'ultimo kernel
	kernel_secs = clhEventWaitAndGetDifference(start_evt, kernel_evts[launches.size() - 1]);
	fprintf(stderr, " total(MARKER) %g ms, %g MiB/s, %g Ksamples/s\n",
		kernel_secs * 1e3, kernel_mem / kernel_secs, 1e-3 * samplesPerRun / kernel_secs);

	if (download_map_evt)
	{
		const float download_secs = clhEventWaitAndGetDuration(download_map_evt);
		const float download_memSizeMiB = tmpMemSize / SIZECONV_MB;

		fprintf(stderr, " download %g ms, %g MiB/s\n",
			download_secs * 1e3, download_memSizeMiB / download_secs);

		CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(download_map_evt));
	}

	if (getenv("PRINT_KERNEL_EXECUTION_TIME"))
		printf("%g\n", kernel_secs * 1e3);

	CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(start_evt));

	for (unsigned int i = 0; i < launches.size(); i++)
		CL_CHECK_ERR("clReleaseEvent", clReleaseEvent(kernel_evts[i]));
}
Пример #18
0
void call_kernel_mem(int nc,int np,float *sdens_out,char * cl_name) {
//----------------------------------------------------------------------------
// Initialization
    size_t global;                      // global domain size for our calculation
    size_t local;                       // local domain size for our calculation

    //cl_device_id device_id;             // compute device id
    cl_context context;                 // compute context
    cl_command_queue commands;          // compute command queue
    cl_program program;                 // compute program
    cl_kernel kernel;                   // compute kernel
//----------------------------------------------------------------------------
// Claim Variables for Device
    cl_mem output;                      // device memory used for the output array
//----------------------------------------------------------------------------
// Setup Context of OpenCL
	int err;
    int gpu = 1;
	unsigned int ndevices = 0;
    //err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, &ndevices);
	//printf("--------------------------%d\n", ndevices);
    //context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
    //commands = clCreateCommandQueue(context, device_id, 0, &err);

    clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, NULL, &ndevices);
	cl_device_id devices[ndevices];
    err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, ndevices, devices, NULL);
	printf("--------------------------%d\n",err);

    //context = clCreateContext(NULL, ndevices, devices, NULL, NULL, &err);
	context = CL_CHECK_ERR(clCreateContext(NULL, ndevices, devices, &pfn_notify, NULL, &_err));
	printf("--------------------------%d\n",err);
    commands = clCreateCommandQueue(context, devices[1], 0, &err);
	printf("--------------------------%d\n",err);
//---------------------------------------------------------------------
//* Load kernel source file */
	int MAX_SOURCE_SIZE = 1048576;
	FILE * fp;
	//const char fileName[] = "./sph_opencl.cl";
	size_t KernelSourceSize;
	char *KernelSource;
	fp = fopen(cl_name, "r");
	if (!fp) {
		fprintf(stderr, "Failed to load kernel.\n");
		exit(1);
	}
	KernelSource = (char *)malloc(MAX_SOURCE_SIZE*sizeof(char));
	KernelSourceSize = fread(KernelSource,sizeof(char), MAX_SOURCE_SIZE, fp);
	program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
	printf("--------------------------%d\n",err);
	clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	kernel = clCreateKernel(program, "sph_cl", &err);
	printf("--------------------------%d\n",err);
//----------------------------------------------------------------------------
// Allocate Memory for Device
    output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*nc*nc, NULL, NULL);
//----------------------------------------------------------------------------
// Passing Parameters into Kernel Functions
    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &output);
	printf("passing--------------------------%d\n",err);
    err = clSetKernelArg(kernel, 1, sizeof(int), &nc);
	printf("--------------------------%d\n",err);
    err = clSetKernelArg(kernel, 2, sizeof(int), &np);
	printf("passing--------------------------%d\n",err);
//----------------------------------------------------------------------------
// Runing Kernel Functions
    clGetKernelWorkGroupInfo(kernel, devices[1], CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
	printf("********************%zd\n", local);
    global = nc*nc;
	local = 32;

    //clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global,&local, 0, NULL, NULL);
    err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global,&local, 0, NULL, NULL);
	printf("--------------------------%d\n",err);
    //err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL,&global,NULL, 0, NULL, NULL);
    clFinish(commands);
//----------------------------------------------------------------------------
// Output Array
	printf("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n");
    err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float)*nc*nc, sdens_out, 0, NULL, NULL );
	printf("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^%d\n",err);
//----------------------------------------------------------------------------
// Free the Memory in Device
    clReleaseMemObject(output);
//----------------------------------------------------------------------------
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);

    //printf("nKernel source:\n\n %s \n", KernelSource);
    free(KernelSource);
}
Пример #19
0
cl_mem cl_fft<float>::runInternal(const cl_mem input, cl_event *out_startEvent, cl_event *out_kernelEvents)
{
	CL_CHECK_ERR("clEnqueueMarker", clEnqueueMarker(command_queue, out_startEvent));

	// Lanci del kernel
	const cl_uint Nhalf = samplesPerRun / 2;
	cl_event prev_evt = *out_startEvent;
	for (unsigned int i = 0; i < launches.size(); i++)
	{
		if (launches[i].isOptibase == false)
		{
			// Solo il primo step ha input reali
			cl_kernel kernel = (i == 0) ? k_fftstep_real2cpx : k_fftstep_cpx2cpx;
			CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(kernel, 0, sizeof(cl_mem), (i == 0) ? &input : &v_tmp1));
			CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(kernel, 1, sizeof(cl_mem), &v_tmp2));
			CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(kernel, 2, sizeof(cl_mem), &v_twiddleFactors));
			CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(kernel, 3, sizeof(cl_uint), &launches[i].Wshift));
			CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(kernel, 4, sizeof(cl_uint), &Nhalf));
			CL_CHECK_ERR("clEnqueueNDRangeKernel", clEnqueueNDRangeKernel(command_queue,
				kernel,
				2,
				NULL,
				launches[i].globalSize,
				launches[i].groupSize,
				1,
				&prev_evt,
				&out_kernelEvents[i]
			));
		}
		else
		{
			CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(k_fftstep_optibase, 0, sizeof(cl_mem), &v_tmp1));
			CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(k_fftstep_optibase, 1, sizeof(cl_mem), &v_tmp2));
			CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(k_fftstep_optibase, 2, sizeof(cl_mem), &v_twiddleFactors));
			CL_CHECK_ERR("clSetKernelArg", clSetKernelArg(k_fftstep_optibase, 3, sizeof(cl_uint), &Nhalf));
			CL_CHECK_ERR("clEnqueueNDRangeKernel", clEnqueueNDRangeKernel(command_queue,
				k_fftstep_optibase,
				1,
				NULL,
				launches[i].globalSize,
				launches[i].groupSize,
				1,
				&prev_evt,
				&out_kernelEvents[i]
			));
		}

		prev_evt = out_kernelEvents[i];
		swap(v_tmp1, v_tmp2);
	}

	return v_tmp1;
}
Пример #20
0
int main(int argc, char **argv)
{
  printf("enter demo main\n");
  fflush(stdout);
  putenv("POCL_VERBOSE=1");
  putenv("POCL_DEVICES=basic");
  putenv("POCL_LEAVE_TEMP_DIRS=1");
  putenv("POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES=1");
  putenv("POCL_TEMP_DIR=pocl");
  putenv("POCL_CACHE_DIR=pocl");
  putenv("POCL_WORK_GROUP_METHOD=spmd");
  if(argc >= 2){
    printf("argv[1]:%s:\n",argv[1]);
    if(!strcmp(argv[1], "h"))
      putenv("POCL_WORK_GROUP_METHOD=spmd");
    if(!strcmp(argv[1], "c"))
      putenv("POCL_CROSS_COMPILE=1");
  }
  if(argc >= 3){
    printf("argv[2]:%s:\n",argv[2]);
    if(!strcmp(argv[2], "h"))
      putenv("POCL_WORK_GROUP_METHOD=spmd");
    if(!strcmp(argv[2], "c"))
      putenv("POCL_CROSS_COMPILE=1");
  }

  //putenv("LD_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
  //putenv("LTDL_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
  //lt_dlsetsearchpath("/scratch/colins/build/linux/fs/lib");
  //printf("SEARCH_PATH:%s\n",lt_dlgetsearchpath());
	cl_platform_id platforms[100];
	cl_uint platforms_n = 0;
	CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));

	printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);
	for (int i=0; i<platforms_n; i++)
	{
		char buffer[10240];
		printf("  -- %d --\n", i);
		CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL));
		printf("  PROFILE = %s\n", buffer);
		CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL));
		printf("  VERSION = %s\n", buffer);
		CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL));
		printf("  NAME = %s\n", buffer);
		CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL));
		printf("  VENDOR = %s\n", buffer);
		CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL));
		printf("  EXTENSIONS = %s\n", buffer);
	}

	if (platforms_n == 0)
		return 1;

	cl_device_id devices[100];
	cl_uint devices_n = 0;
	// CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));
	CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));

	printf("=== %d OpenCL device(s) found on platform:\n", platforms_n);
	for (int i=0; i<devices_n; i++)
	{
		char buffer[10240];
		cl_uint buf_uint;
		cl_ulong buf_ulong;
		printf("  -- %d --\n", i);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL));
		printf("  DEVICE_NAME = %s\n", buffer);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL));
		printf("  DEVICE_VENDOR = %s\n", buffer);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL));
		printf("  DEVICE_VERSION = %s\n", buffer);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL));
		printf("  DRIVER_VERSION = %s\n", buffer);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL));
		printf("  DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL));
		printf("  DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL));
		printf("  DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong);
	}

	if (devices_n == 0)
		return 1;

	cl_context context;
	context = CL_CHECK_ERR(clCreateContext(NULL, 1, devices+1, &pfn_notify, NULL, &_err));

	cl_command_queue queue;
  queue = CL_CHECK_ERR(clCreateCommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE, &_err));

	cl_kernel kernel = 0;
  cl_mem memObjects[3] = {0,0,0};


  // Create OpenCL program - first attempt to load cached binary.
  //  If that is not available, then create the program from source
  //  and store the binary for future use.
  std::cout << "Attempting to create program from binary..." << std::endl;
  cl_program program = CreateProgramFromBinary(context, devices[1], "kernel.cl.bin");
  if (program == NULL)
  {
      std::cout << "Binary not loaded, create from source..." << std::endl;
      program = CreateProgram(context, devices[1], "kernel.cl");
      if (program == NULL)
      {
          Cleanup(context, queue, program, kernel, memObjects);
          return 1;
      }

      std::cout << "Save program binary for future run..." << std::endl;
      if (SaveProgramBinary(program, devices[1], "kernel.cl.bin") == false)
      {
          std::cerr << "Failed to write program binary" << std::endl;
          Cleanup(context, queue, program, kernel, memObjects);
          return 1;
      }
  }
  else
  {
      std::cout << "Read program from binary." << std::endl;
  }

  printf("attempting to create input buffer\n");
  fflush(stdout);
	cl_mem input_bufferA;
	input_bufferA = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));
	cl_mem input_bufferB;
	input_bufferB = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));

  printf("attempting to create output buffer\n");
  fflush(stdout);
	cl_mem output_buffer;
	output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));

  memObjects[0] = input_bufferA;
  memObjects[1] = input_bufferB;
  memObjects[2] = output_buffer;

	size_t width = NUM_DATA;

  printf("attempting to create kernel\n");
  fflush(stdout);
	kernel = CL_CHECK_ERR(clCreateKernel(program, "sgemm_single", &_err));
	CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_bufferA), &input_bufferA));
	CL_CHECK(clSetKernelArg(kernel, 1, sizeof(input_bufferB), &input_bufferB));
	CL_CHECK(clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer));
	CL_CHECK(clSetKernelArg(kernel, 3, sizeof(width), &width));

  printf("attempting to enqueue write buffer\n");
  fflush(stdout);
	for (int i=0; i<NUM_DATA*NUM_DATA; i++) {
    float in = ((float)rand()/(float)(RAND_MAX)) * 100.0;
		CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferA, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL));
    in = ((float)rand()/(float)(RAND_MAX)) * 100.0;
		CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferB, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL));
	}

	cl_event kernel_completion;
  const size_t local_work_size[3] = { 64, 1, 1};
  //                             a_offset  
	size_t global_work_size[3] = { NUM_DATA, NUM_DATA, NUM_DATA };
  printf("attempting to enqueue kernel\n");
  fflush(stdout);
	CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &kernel_completion));
  printf("Enqueue'd kerenel\n");
  fflush(stdout);
  cl_ulong time_start, time_end;
  CL_CHECK(clWaitForEvents(1, &kernel_completion));
  CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL));
  CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL));
  double elapsed = time_end - time_start;
  printf("time(ns):%lg\n",elapsed);
	CL_CHECK(clReleaseEvent(kernel_completion));

	printf("Result:");
	for (int i=0; i<NUM_DATA*NUM_DATA; i++) {
		float data;
		CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, i*sizeof(float), 4, &data, 0, NULL, NULL));
		//printf(" %f", data);
	}
	printf("\n");

	CL_CHECK(clReleaseMemObject(memObjects[0]));
	CL_CHECK(clReleaseMemObject(memObjects[1]));
	CL_CHECK(clReleaseMemObject(memObjects[2]));

	CL_CHECK(clReleaseKernel(kernel));
	CL_CHECK(clReleaseProgram(program));
	CL_CHECK(clReleaseContext(context));

	return 0;
}
Пример #21
0
int main(int argc, char **argv)
{
	cl_platform_id platforms[100];
	cl_uint platforms_n = 0;
	CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));

	printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);
	for (int i=0; i<platforms_n; i++)
	{
		char buffer[10240];
		printf("  -- %d --\n", i);
		CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL));
		printf("  PROFILE = %s\n", buffer);
		CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL));
		printf("  VERSION = %s\n", buffer);
		CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL));
		printf("  NAME = %s\n", buffer);
		CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL));
		printf("  VENDOR = %s\n", buffer);
		CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL));
		printf("  EXTENSIONS = %s\n", buffer);
	}

	if (platforms_n == 0)
		return 1;

	cl_device_id devices[100];
	cl_uint devices_n = 0;
	// CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));
	CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));

	printf("=== %d OpenCL device(s) found on platform:\n", platforms_n);
	for (int i=0; i<devices_n; i++)
	{
		char buffer[10240];
		cl_uint buf_uint;
		cl_ulong buf_ulong;
		printf("  -- %d --\n", i);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL));
		printf("  DEVICE_NAME = %s\n", buffer);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL));
		printf("  DEVICE_VENDOR = %s\n", buffer);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL));
		printf("  DEVICE_VERSION = %s\n", buffer);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL));
		printf("  DRIVER_VERSION = %s\n", buffer);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL));
		printf("  DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL));
		printf("  DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint);
		CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL));
		printf("  DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong);
	}

	if (devices_n == 0)
		return 1;

	cl_context context;
	context = CL_CHECK_ERR(clCreateContext(NULL, 1, devices, &pfn_notify, NULL, &_err));

	const char *program_source[] = {
		"__kernel void simple_demo(__global int *src, __global int *dst, int factor)\n",
		"{\n",
		"	int i = get_global_id(0);\n",
		"	dst[i] = src[i] * factor;\n",
		"}\n"
	};

	cl_program program;
	program = CL_CHECK_ERR(clCreateProgramWithSource(context, sizeof(program_source)/sizeof(*program_source), program_source, NULL, &_err));
	if (clBuildProgram(program, 1, devices, "", NULL, NULL) != CL_SUCCESS) {
		char buffer[10240];
		clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
		fprintf(stderr, "CL Compilation failed:\n%s", buffer);
		abort();
	}
	CL_CHECK(clUnloadCompiler());

	cl_mem input_buffer;
	input_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int)*NUM_DATA, NULL, &_err));

	cl_mem output_buffer;
	output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int)*NUM_DATA, NULL, &_err));

	int factor = 2;

	cl_kernel kernel;
	kernel = CL_CHECK_ERR(clCreateKernel(program, "simple_demo", &_err));
	CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
	CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
	CL_CHECK(clSetKernelArg(kernel, 2, sizeof(factor), &factor));

	cl_command_queue queue;
	queue = CL_CHECK_ERR(clCreateCommandQueue(context, devices[0], 0, &_err));

	for (int i=0; i<NUM_DATA; i++) {
		CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, i*sizeof(int), sizeof(int), &i, 0, NULL, NULL));
	}

	cl_event kernel_completion;
	size_t global_work_size[1] = { NUM_DATA };
	CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, &kernel_completion));
	CL_CHECK(clWaitForEvents(1, &kernel_completion));
	CL_CHECK(clReleaseEvent(kernel_completion));

	printf("Result:");
	for (int i=0; i<NUM_DATA; i++) {
		int data;
		CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, i*sizeof(int), sizeof(int), &data, 0, NULL, NULL));
		printf(" %d", data);
	}
	printf("\n");

	CL_CHECK(clReleaseMemObject(input_buffer));
	CL_CHECK(clReleaseMemObject(output_buffer));

	CL_CHECK(clReleaseKernel(kernel));
	CL_CHECK(clReleaseProgram(program));
	CL_CHECK(clReleaseContext(context));

	return 0;
}
Пример #22
0
SpectrumAnalyzerRangeData::~SpectrumAnalyzerRangeData()
{
	CL_CHECK_ERR("clReleaseMemObject", clReleaseMemObject(samples));
}