cl_mem OpenCLDevice::COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex,
                                                               list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers,
                                                               ReadBufferOperation *reader)
{
	cl_int error;
	
	MemoryBuffer *result = reader->getInputMemoryBuffer(inputMemoryBuffers);

	const cl_image_format imageFormat = {
		CL_RGBA,
		CL_FLOAT
	};

	cl_mem clBuffer = clCreateImage2D(this->m_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imageFormat, result->getWidth(),
	                                  result->getHeight(), 0, result->getBuffer(), &error);

	if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
	if (error == CL_SUCCESS) cleanup->push_back(clBuffer);

	error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer);
	if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }

	COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result);
	return clBuffer;
}
示例#2
0
void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel,
                                      MemoryBuffer *outputMemoryBuffer,
                                      int offsetIndex,
                                      NodeOperation *operation)
{
  cl_int error;
  const int width = outputMemoryBuffer->getWidth();
  const int height = outputMemoryBuffer->getHeight();
  int offsetx;
  int offsety;
  int localSize = 1024;
  size_t size[2];
  cl_int2 offset;

  if (this->m_vendorID == NVIDIA) {
    localSize = 32;
  }

  bool breaked = false;
  for (offsety = 0; offsety < height && (!breaked); offsety += localSize) {
    offset.s[1] = offsety;
    if (offsety + localSize < height) {
      size[1] = localSize;
    }
    else {
      size[1] = height - offsety;
    }

    for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) {
      if (offsetx + localSize < width) {
        size[0] = localSize;
      }
      else {
        size[0] = width - offsetx;
      }
      offset.s[0] = offsetx;

      error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
      if (error != CL_SUCCESS) {
        printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
      }
      error = clEnqueueNDRangeKernel(this->m_queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
      if (error != CL_SUCCESS) {
        printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
      }
      clFlush(this->m_queue);
      if (operation->isBreaked()) {
        breaked = false;
      }
    }
  }
}
示例#3
0
	virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
	{
		device_vector<uint64_t> size_buffer;
		size_buffer.resize(1);
		device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);

		uint threads = num_threads;
		device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer);

		size_t global_size = 64;
		device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
		                               device->program_state_buffer_size(),
		                               1,
		                               NULL,
		                               &global_size,
		                               NULL,
		                               0,
		                               NULL,
		                               NULL);

		device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");

		device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
		device->mem_free(size_buffer);

		if(device->ciErr != CL_SUCCESS) {
			string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()",
			                               clewErrorString(device->ciErr));
			device->opencl_error(message);
			return 0;
		}

		return *size_buffer.get_data();
	}
cOpenClHardware::cOpenClHardware(QObject *parent) : QObject(parent)
{
	openClAvailable = false;
	contextReady = false;
	// TODO: confirm initial value
	// initialize multi-gpu devices' indices list with empty QList
	selectedDevicesIndices = QList<int>();
	missingOpenClDLL = false;
	selectedPlatformIndex = 0;

#ifdef USE_OPENCL
#ifdef _WIN32
#ifndef _MSC_VER
	const std::wstring openclDll(L"OpenCL.dll");
	int err = clewInit(openclDll.c_str());
	if (err)
	{
		qCritical() << clewErrorString(err);
		missingOpenClDLL = true;
	}
#endif //   _MSC_VER
#endif
	isNVidia = false;
	isAMD = false;
	context = nullptr;
#endif
}
示例#5
0
	virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data)
	{
		device->kernel_set_args(program(), 0, kg, data);

		device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
		                                       program(),
		                                       2,
		                                       NULL,
		                                       dim.global_size,
		                                       dim.local_size,
		                                       0,
		                                       NULL,
		                                       NULL);

		device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");

		if(device->ciErr != CL_SUCCESS) {
			string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()",
			                               clewErrorString(device->ciErr));
			device->opencl_error(message);
			return false;
		}

		return true;
	}
void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
{
	cl_int error;
	const size_t size[] = {(size_t)outputMemoryBuffer->getWidth(), (size_t)outputMemoryBuffer->getHeight()};

	error = clEnqueueNDRangeKernel(this->m_queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
	if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
}
示例#7
0
static inline cl_int _clCheck(cl_int errcode, const char *file, int line, const char *func) {
  mocl_errcode = errcode;
  if (mocl_errcode != CL_SUCCESS) {
    error ("%d/%s at %s:%d %s\n", mocl_errcode,
           clewErrorString(mocl_errcode),
           file, line, func);
  }
  return errcode;
}
void OpenCLDevice::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation *operation)
{
	if (offsetIndex != -1) {
		cl_int error;
		cl_int2 offset = {{(cl_int)operation->getWidth(), (cl_int)operation->getHeight()}};

		error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
		if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
	}
}
示例#9
0
void OpenCLDevice::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel,
                                                                   int parameterIndex,
                                                                   cl_mem clOutputMemoryBuffer)
{
  cl_int error;
  error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer);
  if (error != CL_SUCCESS) {
    printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
  }
}
cl_kernel OpenCLDevice::COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp)
{
	cl_int error;
	cl_kernel kernel = clCreateKernel(this->m_program, kernelname, &error);
	if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
	else {
		if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel);
	}
	return kernel;

}
void OpenCLDevice::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer)
{
	if (offsetIndex != -1) {
		cl_int error;
		rcti *rect = memoryBuffer->getRect();
		cl_int2 offset = {{rect->xmin, rect->ymin}};

		error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
		if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
	}
}
示例#12
0
	virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data)
	{
		if(cached_id != cached_memory.id) {
			cl_uint start_arg_index =
				device->kernel_set_args(program(),
					            0,
					            kg,
					            data,
					            *cached_memory.split_data,
					            *cached_memory.ray_state);

				device->set_kernel_arg_buffers(program(), &start_arg_index);

			start_arg_index +=
				device->kernel_set_args(program(),
					            start_arg_index,
					            *cached_memory.queue_index,
					            *cached_memory.use_queues_flag,
					            *cached_memory.work_pools,
					            *cached_memory.buffer);

			cached_id = cached_memory.id;
		}

		device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
		                                       program(),
		                                       2,
		                                       NULL,
		                                       dim.global_size,
		                                       dim.local_size,
		                                       0,
		                                       NULL,
		                                       NULL);

		device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");

		if(device->ciErr != CL_SUCCESS) {
			string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()",
			                               clewErrorString(device->ciErr));
			device->opencl_error(message);
			return false;
		}

		return true;
	}
const char *clGetErrorString (cl_int error) {
  return clewErrorString (error);
}
示例#14
0
void WorkScheduler::initialize(bool use_opencl, int num_cpu_threads)
{
	/* initialize highlighting */
	if (!g_highlightInitialized) {
		if (g_highlightedNodesRead) MEM_freeN(g_highlightedNodesRead);
		if (g_highlightedNodes)     MEM_freeN(g_highlightedNodes);

		g_highlightedNodesRead = NULL;
		g_highlightedNodes = NULL;

		COM_startReadHighlights();

		g_highlightInitialized = true;
	}

#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
	/* deinitialize if number of threads doesn't match */
	if (g_cpudevices.size() != num_cpu_threads) {
		Device *device;

		while (g_cpudevices.size() > 0) {
			device = g_cpudevices.back();
			g_cpudevices.pop_back();
			device->deinitialize();
			delete device;
		}

		g_cpuInitialized = false;
	}

	/* initialize CPU threads */
	if (!g_cpuInitialized) {
		for (int index = 0; index < num_cpu_threads; index++) {
			CPUDevice *device = new CPUDevice();
			device->initialize();
			g_cpudevices.push_back(device);
		}

		g_cpuInitialized = true;
	}

#ifdef COM_OPENCL_ENABLED
	/* deinitialize OpenCL GPU's */
	if (use_opencl && !g_openclInitialized) {
		g_context = NULL;
		g_program = NULL;

		if (!OCL_init()) /* this will check for errors and skip if already initialized */
			return;

		if (clCreateContextFromType) {
			cl_uint numberOfPlatforms = 0;
			cl_int error;
			error = clGetPlatformIDs(0, 0, &numberOfPlatforms);
			if (error == -1001) { }   /* GPU not supported */
			else if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
			if (G.f & G_DEBUG) printf("%u number of platforms\n", numberOfPlatforms);
			cl_platform_id *platforms = (cl_platform_id *)MEM_mallocN(sizeof(cl_platform_id) * numberOfPlatforms, __func__);
			error = clGetPlatformIDs(numberOfPlatforms, platforms, 0);
			unsigned int indexPlatform;
			for (indexPlatform = 0; indexPlatform < numberOfPlatforms; indexPlatform++) {
				cl_platform_id platform = platforms[indexPlatform];
				cl_uint numberOfDevices = 0;
				clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, 0, &numberOfDevices);
				if (numberOfDevices <= 0)
					continue;

				cl_device_id *cldevices = (cl_device_id *)MEM_mallocN(sizeof(cl_device_id) * numberOfDevices, __func__);
				clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numberOfDevices, cldevices, 0);

				g_context = clCreateContext(NULL, numberOfDevices, cldevices, clContextError, NULL, &error);
				if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
				const char *cl_str[2] = {datatoc_COM_OpenCLKernels_cl, NULL};
				g_program = clCreateProgramWithSource(g_context, 1, cl_str, 0, &error);
				error = clBuildProgram(g_program, numberOfDevices, cldevices, 0, 0, 0);
				if (error != CL_SUCCESS) {
					cl_int error2;
					size_t ret_val_size = 0;
					printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
					error2 = clGetProgramBuildInfo(g_program, cldevices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
					if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
					char *build_log = (char *)MEM_mallocN(sizeof(char) * ret_val_size + 1, __func__);
					error2 = clGetProgramBuildInfo(g_program, cldevices[0], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
					if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
					build_log[ret_val_size] = '\0';
					printf("%s", build_log);
					MEM_freeN(build_log);
				}
				else {
					unsigned int indexDevices;
					for (indexDevices = 0; indexDevices < numberOfDevices; indexDevices++) {
						cl_device_id device = cldevices[indexDevices];
						cl_int vendorID = 0;
						cl_int error2 = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, NULL);
						if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error2, clewErrorString(error2)); }
						OpenCLDevice *clDevice = new OpenCLDevice(g_context, device, g_program, vendorID);
						clDevice->initialize();
						g_gpudevices.push_back(clDevice);
					}
				}
				MEM_freeN(cldevices);
			}
			MEM_freeN(platforms);
		}

		g_openclInitialized = true;
	}
#endif
#endif
}
void WriteBufferOperation::executeOpenCLRegion(OpenCLDevice *device, rcti *rect, unsigned int chunkNumber,
                                               MemoryBuffer **inputMemoryBuffers, MemoryBuffer *outputBuffer)
{
	float *outputFloatBuffer = outputBuffer->getBuffer();
	cl_int error;
	/*
	 * 1. create cl_mem from outputbuffer
	 * 2. call NodeOperation (input) executeOpenCLChunk(.....)
	 * 3. schedule readback from opencl to main device (outputbuffer)
	 * 4. schedule native callback
	 *
	 * note: list of cl_mem will be filled by 2, and needs to be cleaned up by 4
	 */
	// STEP 1
	const unsigned int outputBufferWidth = outputBuffer->getWidth();
	const unsigned int outputBufferHeight = outputBuffer->getHeight();

	const cl_image_format imageFormat = {
		CL_RGBA,
		CL_FLOAT
	};

	cl_mem clOutputBuffer = clCreateImage2D(device->getContext(), CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, outputBufferWidth, outputBufferHeight, 0, outputFloatBuffer, &error);
	if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
	
	// STEP 2
	list<cl_mem> *clMemToCleanUp = new list<cl_mem>();
	clMemToCleanUp->push_back(clOutputBuffer);
	list<cl_kernel> *clKernelsToCleanUp = new list<cl_kernel>();

	this->m_input->executeOpenCL(device, outputBuffer, clOutputBuffer, inputMemoryBuffers, clMemToCleanUp, clKernelsToCleanUp);

	// STEP 3

	size_t origin[3] = {0, 0, 0};
	size_t region[3] = {outputBufferWidth, outputBufferHeight, 1};

//	clFlush(queue);
//	clFinish(queue);

	error = clEnqueueBarrier(device->getQueue());
	if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
	error = clEnqueueReadImage(device->getQueue(), clOutputBuffer, CL_TRUE, origin, region, 0, 0, outputFloatBuffer, 0, NULL, NULL);
	if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
	
	this->getMemoryProxy()->getBuffer()->copyContentFrom(outputBuffer);

	// STEP 4
	while (!clMemToCleanUp->empty()) {
		cl_mem mem = clMemToCleanUp->front();
		error = clReleaseMemObject(mem);
		if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
		clMemToCleanUp->pop_front();
	}

	while (!clKernelsToCleanUp->empty()) {
		cl_kernel kernel = clKernelsToCleanUp->front();
		error = clReleaseKernel(kernel);
		if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
		clKernelsToCleanUp->pop_front();
	}
	delete clKernelsToCleanUp;
}
示例#16
0
	virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
	                                            RenderTile& rtile,
	                                            int num_global_elements,
	                                            device_memory& kernel_globals,
	                                            device_memory& kernel_data,
	                                            device_memory& split_data,
	                                            device_memory& ray_state,
	                                            device_memory& queue_index,
	                                            device_memory& use_queues_flag,
	                                            device_memory& work_pool_wgs
	                                            )
	{
		cl_int dQueue_size = dim.global_size[0] * dim.global_size[1];

		/* Set the range of samples to be processed for every ray in
		 * path-regeneration logic.
		 */
		cl_int start_sample = rtile.start_sample;
		cl_int end_sample = rtile.start_sample + rtile.num_samples;

		cl_uint start_arg_index =
			device->kernel_set_args(device->program_data_init(),
			                0,
			                kernel_globals,
			                kernel_data,
			                split_data,
			                num_global_elements,
			                ray_state,
			                rtile.rng_state);

/* TODO(sergey): Avoid map lookup here. */
#define KERNEL_TEX(type, ttype, name) \
	device->set_kernel_arg_mem(device->program_data_init(), &start_arg_index, #name);
#include "kernel/kernel_textures.h"
#undef KERNEL_TEX

		start_arg_index +=
			device->kernel_set_args(device->program_data_init(),
			                start_arg_index,
			                start_sample,
			                end_sample,
			                rtile.x,
			                rtile.y,
			                rtile.w,
			                rtile.h,
			                rtile.offset,
			                rtile.stride,
			                queue_index,
			                dQueue_size,
			                use_queues_flag,
			                work_pool_wgs,
			                rtile.num_samples,
			                rtile.buffer);

		/* Enqueue ckPathTraceKernel_data_init kernel. */
		device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
		                               device->program_data_init(),
		                               2,
		                               NULL,
		                               dim.global_size,
		                               dim.local_size,
		                               0,
		                               NULL,
		                               NULL);

		device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");

		if(device->ciErr != CL_SUCCESS) {
			string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()",
			                               clewErrorString(device->ciErr));
			device->opencl_error(message);
			return false;
		}

		return true;
	}
示例#17
0
	virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
	                                            RenderTile& rtile,
	                                            int num_global_elements,
	                                            device_memory& kernel_globals,
	                                            device_memory& kernel_data,
	                                            device_memory& split_data,
	                                            device_memory& ray_state,
	                                            device_memory& queue_index,
	                                            device_memory& use_queues_flag,
	                                            device_memory& work_pool_wgs
	                                            )
	{
		cl_int dQueue_size = dim.global_size[0] * dim.global_size[1];

		/* Set the range of samples to be processed for every ray in
		 * path-regeneration logic.
		 */
		cl_int start_sample = rtile.start_sample;
		cl_int end_sample = rtile.start_sample + rtile.num_samples;

		cl_uint start_arg_index =
			device->kernel_set_args(device->program_data_init(),
			                0,
			                kernel_globals,
			                kernel_data,
			                split_data,
			                num_global_elements,
			                ray_state);

			device->set_kernel_arg_buffers(device->program_data_init(), &start_arg_index);

		start_arg_index +=
			device->kernel_set_args(device->program_data_init(),
			                start_arg_index,
			                start_sample,
			                end_sample,
			                rtile.x,
			                rtile.y,
			                rtile.w,
			                rtile.h,
			                rtile.offset,
			                rtile.stride,
			                queue_index,
			                dQueue_size,
			                use_queues_flag,
			                work_pool_wgs,
			                rtile.num_samples,
			                rtile.buffer);

		/* Enqueue ckPathTraceKernel_data_init kernel. */
		device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
		                               device->program_data_init(),
		                               2,
		                               NULL,
		                               dim.global_size,
		                               dim.local_size,
		                               0,
		                               NULL,
		                               NULL);

		device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");

		if(device->ciErr != CL_SUCCESS) {
			string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()",
			                               clewErrorString(device->ciErr));
			device->opencl_error(message);
			return false;
		}

		cached_memory.split_data = &split_data;
		cached_memory.ray_state = &ray_state;
		cached_memory.queue_index = &queue_index;
		cached_memory.use_queues_flag = &use_queues_flag;
		cached_memory.work_pools = &work_pool_wgs;
		cached_memory.buffer = &rtile.buffer;
		cached_memory.id++;

		return true;
	}