Ejemplo n.º 1
0
	void SetUpOpenCL() {
		//----------------------------------------------------------------------
		// Compile kernel
		//----------------------------------------------------------------------

		const std::string &kernelFileName = commandLineOpts["kernel"].as<std::string>();
		OCLTOY_LOG("Compile OpenCL kernel: " << kernelFileName);

		// Read the kernel
		const std::string kernelSource = ReadSources(kernelFileName, "jugCLer");

		// Create the kernel program
		cl::Device &oclDevice = selectedDevices[0];
		cl::Context &oclContext = deviceContexts[0];
		cl::Program program = cl::Program(oclContext, kernelSource);
		try {
			VECTOR_CLASS<cl::Device> buildDevice;
			buildDevice.push_back(oclDevice);
			program.build(buildDevice);
		} catch (cl::Error err) {
			cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(oclDevice);
			OCLTOY_LOG("Kernel compilation error:\n" << strError.c_str());

			throw err;
		}

		kernelsJugCLer = cl::Kernel(program, "render_gpu");
		kernelsJugCLer.getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &kernelsWorkGroupSize);
		if (commandLineOpts.count("workgroupsize"))
			kernelsWorkGroupSize = commandLineOpts["workgroupsize"].as<size_t>();
		OCLTOY_LOG("Using workgroup size: " << kernelsWorkGroupSize);

		//----------------------------------------------------------------------
		// Allocate buffer
		//----------------------------------------------------------------------

		AllocateBuffers();

		//----------------------------------------------------------------------
		// Set kernel arguments
		//----------------------------------------------------------------------

		kernelsJugCLer.setArg(0, *sceneBuff);
		kernelsJugCLer.setArg(1, *pixelsBuff);
	}
Ejemplo n.º 2
0
int Setup_OpenCL( const char *program_source )
{
    cl_device_id devices[16];
    size_t cb;
    cl_uint size_ret = 0;
    cl_int err;
    int num_cores;
    cl_device_id device_ID;
    char device_name[128] = {0};
	
	if(g_bRunOnPG)
	{
		printf("Trying to run on a Processor Graphics \n");
	}
	else
	{
		printf("Trying to run on a CPU \n");
	}

    cl_platform_id intel_platform_id = GetIntelOCLPlatform();
    if( intel_platform_id == NULL )
    {
        printf("ERROR: Failed to find Intel OpenCL platform.\n");
        return -1;
    }

    cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)intel_platform_id, NULL };

    // create the OpenCL context on a CPU/PG 
	if(g_bRunOnPG)
	{
		g_context = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_GPU, NULL, NULL, NULL);
	}
	else
	{
		g_context = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_CPU, NULL, NULL, NULL);
	}
	if (g_context == (cl_context)0)
        return -1;

    // get the list of CPU devices associated with context
    err = clGetContextInfo(g_context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
    clGetContextInfo(g_context, CL_CONTEXT_DEVICES, cb, devices, NULL);

    g_cmd_queue = clCreateCommandQueue(g_context, devices[0], 0, NULL);
    if (g_cmd_queue == (cl_command_queue)0)
    {
        Cleanup_OpenCL();
        return -1;
    }

    char *sources = ReadSources(program_source);	//read program .cl source file
    g_program = clCreateProgramWithSource(g_context, 1, (const char**)&sources, NULL, NULL);
    if (g_program == (cl_program)0)
    {
        printf("ERROR: Failed to create Program with source...\n");
        Cleanup_OpenCL();
        free(sources);
        return -1;
    }

    err = clBuildProgram(g_program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        printf("ERROR: Failed to build program...\n");
        BuildFailLog(g_program, devices[0]);
        Cleanup_OpenCL();
        free(sources);
        return -1;
    }

#ifdef PER_PIXEL
	g_kernel = clCreateKernel(g_program, "ToneMappingPerPixel", NULL);
#else
    g_kernel = clCreateKernel(g_program, "ToneMappingLine", NULL);
#endif
    if (g_kernel == (cl_kernel)0)
    {
        printf("ERROR: Failed to create kernel...\n");
        Cleanup_OpenCL();
        free(sources);
        return -1;
    }
    free(sources);

    // retrieve platform information

    // use first device ID
    device_ID = devices[0];

    err = clGetDeviceInfo(device_ID, CL_DEVICE_NAME, 128, device_name, NULL);
    if (err!=CL_SUCCESS)
    {
        printf("ERROR: Failed to get device information (device name)...\n");
        Cleanup_OpenCL();
        return -1;
    }
    printf("Using device %s...\n", device_name);

    err = clGetDeviceInfo(device_ID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &num_cores, NULL);
    if (err!=CL_SUCCESS)
    {
        printf("ERROR: Failed to get device information (max compute units)...\n");
        Cleanup_OpenCL();
        return -1;
    }
    printf("Using %d compute units...\n", num_cores);


    err = clGetDeviceInfo(device_ID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &g_min_align, NULL);
    if (err!=CL_SUCCESS)
    {
        printf("ERROR: Failed to get device information (max memory base address align size)...\n");
        Cleanup_OpenCL();
        return -1;
    }
    g_min_align /= 8; //in bytes
    printf("Expected min alignment for buffers is %d bytes...\n", g_min_align);

    return 0; // success...
}
Ejemplo n.º 3
0
RenderDevice::RenderDevice(const cl::Device &device, const string &kernelFileName,
		const unsigned int forceGPUWorkSize,
		Camera *camera, Sphere *spheres, const unsigned int sceneSphereCount/*,
		boost::barrier *startBarrier, boost::barrier *endBarrier*/) :
	/*renderThread(NULL), threadStartBarrier(startBarrier), threadEndBarrier(endBarrier),*/
	sphereCount(sceneSphereCount), colorBuffer(NULL), pixelBuffer(NULL), seedBuffer(NULL),
	pixels(NULL), colors(NULL), seeds(NULL), exeUnitCount(0.0), exeTime(0.0) {
	deviceName = "anonymouse";//device.getInfo<CL_DEVICE_NAME > ().c_str();

	// Allocate a context with the selected device
	cl::Platform platform = device.getInfo<CL_DEVICE_PLATFORM>();
	VECTOR_CLASS<cl::Device> devices;
	devices.push_back(device);
	cl_context_properties cps[3] = {
		CL_CONTEXT_PLATFORM, (cl_context_properties)platform(), 0
	};
	context = new cl::Context(devices, cps);

	// Allocate the queue for this device
	cl_command_queue_properties prop = CL_QUEUE_PROFILING_ENABLE;
	queue = new cl::CommandQueue(*context, device, prop);

	// Create the kernel
	string src = ReadSources(kernelFileName);

	// Compile sources
	cl::Program::Sources source(1, make_pair(src.c_str(), src.length()));
	cl::Program program = cl::Program(*context, source);
	try {
		VECTOR_CLASS<cl::Device> buildDevice;
		buildDevice.push_back(device);
#if defined(__EMSCRIPTEN__)
		program.build(buildDevice, "");
#elif defined(__APPLE__)
		program.build(buildDevice, "-D__APPLE__");
#else
		program.build(buildDevice, "");
#endif
		cl::string result = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
		cerr << "[Device::" << deviceName << "]" << " Compilation result: " << result.c_str() << endl;
	} catch (cl::Error err) {
		cl::string strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
		cerr << "[Device::" << deviceName << "]" << " Compilation error:" << endl << strError.c_str() << endl;

		throw err;
	}

	kernel = new cl::Kernel(program, "RadianceGPU");

	kernel->getWorkGroupInfo<size_t>(device, CL_KERNEL_WORK_GROUP_SIZE, &workGroupSize);
	cerr << "[Device::" << deviceName << "]" << " Suggested work group size: " << workGroupSize << endl;

	// Force workgroup size if applicable and required
	if ((forceGPUWorkSize > 0) && (device.getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_GPU)) {
		workGroupSize = forceGPUWorkSize;
		cerr << "[Device::" << deviceName << "]" << " Forced work group size: " << workGroupSize << endl;
	}

	// Create the thread for the rendering
	//renderThread = new boost::thread(boost::bind(RenderDevice::RenderThread, this));

	// Create camera buffer
	cameraBuffer = new cl::Buffer(*context,
#if defined (__APPLE__)
			CL_MEM_READ_ONLY, // CL_MEM_USE_HOST_PTR is very slow with Apple's OpenCL
#else
			CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
#endif
				sizeof(Camera),
				camera);
	cerr << "[Device::" << deviceName << "] Camera buffer size: " << (sizeof(Camera) / 1024) << "Kb" << endl;

	sphereBuffer = new cl::Buffer(*context,
#if defined (__APPLE__)
			CL_MEM_READ_ONLY, // CL_MEM_USE_HOST_PTR is very slow with Apple's OpenCL
#else
			CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
#endif
			sizeof(Sphere) * sphereCount,
			spheres);
	cerr << "[Device::" << deviceName << "] Scene buffer size: " << (sizeof(Sphere) * sphereCount / 1024) << "Kb" << endl;
}