示例#1
0
void initSimulation() {
    // source: http://stackoverflow.com/questions/26517114/how-to-compile-opencl-project-with-kernels

    try {
        std::vector<cl::Platform> platforms;
        cl::Platform::get(&platforms);

        std::vector<cl::Device> devices;
        platforms[PLATFORM_ID].getDevices(CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU, &devices);

        context = cl::Context(devices);
        queue = cl::CommandQueue(context, devices[DEVICE_ID]);

        std::ifstream sourceFile{"kernels/programs.cl"};
        std::string sourceCode(std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>()));
        cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()));

        simulationProgram = cl::Program(context, source);
        simulationProgram.build(devices);

        visualizationBufferGPU = cl::Buffer(context, CL_MEM_WRITE_ONLY,
                                            sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                                            nullptr, nullptr);
        randomizeField();

        stepKernel = cl::Kernel(simulationProgram, "tick");
        stepKernel.setArg(0, fieldWidth);
        stepKernel.setArg(1, fieldHeight);
        stepKernel.setArg(3, visualizationBufferGPU);
    } catch (cl::Error err) {
        std::cout << "Error: " << err.what() << "(" << err.err() << ")" << std::endl;
        exit(2);
    }
}
示例#2
0
    void buildProgram(cl::Program &prog, const int num_files,
                      const char **ker_strs, const int *ker_lens, std::string options)
    {
        try {
            Program::Sources setSrc;
            setSrc.emplace_back(USE_DBL_SRC_STR.c_str(), USE_DBL_SRC_STR.length());
            setSrc.emplace_back(KParam_hpp, KParam_hpp_len);

            for (int i = 0; i < num_files; i++) {
                setSrc.emplace_back(ker_strs[i], ker_lens[i]);
            }

            static std::string defaults =
                std::string(" -cl-std=CL1.1") + std::string(" -D dim_type=") +
                std::string(dtype_traits<dim_type>::getName());


            prog = cl::Program(getContext(), setSrc);
            std::vector<cl::Device> targetDevices;
            targetDevices.push_back(getDevice());
            prog.build(targetDevices, (defaults + options).c_str());

        } catch (...) {
            SHOW_BUILD_INFO(prog);
            throw;
        }
    }
示例#3
0
void initCL()
{
    dumpCLinfo();

    EGLDisplay mEglDisplay = eglGetCurrentDisplay();
    if (mEglDisplay == EGL_NO_DISPLAY)
        LOGE("initCL: eglGetCurrentDisplay() returned 'EGL_NO_DISPLAY', error = %x", eglGetError());

    EGLContext mEglContext = eglGetCurrentContext();
    if (mEglContext == EGL_NO_CONTEXT)
        LOGE("initCL: eglGetCurrentContext() returned 'EGL_NO_CONTEXT', error = %x", eglGetError());

    cl_context_properties props[] =
    {   CL_GL_CONTEXT_KHR,   (cl_context_properties) mEglContext,
        CL_EGL_DISPLAY_KHR,  (cl_context_properties) mEglDisplay,
        CL_CONTEXT_PLATFORM, 0,
        0 };

    try
    {
        cl::Platform p = cl::Platform::getDefault();
        std::string ext = p.getInfo<CL_PLATFORM_EXTENSIONS>();
        if(ext.find("cl_khr_gl_sharing") == std::string::npos)
            LOGE("Warning: CL-GL sharing isn't supported by PLATFORM");
        props[5] = (cl_context_properties) p();

        theContext = cl::Context(CL_DEVICE_TYPE_GPU, props);
        std::vector<cl::Device> devs = theContext.getInfo<CL_CONTEXT_DEVICES>();
        LOGD("Context returned %d devices, taking the 1st one", devs.size());
        ext = devs[0].getInfo<CL_DEVICE_EXTENSIONS>();
        if(ext.find("cl_khr_gl_sharing") == std::string::npos)
            LOGE("Warning: CL-GL sharing isn't supported by DEVICE");

        theQueue = cl::CommandQueue(theContext, devs[0]);

        cl::Program::Sources src(1, std::make_pair(oclProgI2I, sizeof(oclProgI2I)));
        theProgI2I = cl::Program(theContext, src);
        theProgI2I.build(devs);

        cv::ocl::attachContext(p.getInfo<CL_PLATFORM_NAME>(), p(), theContext(), devs[0]());
        if( cv::ocl::useOpenCL() )
            LOGD("OpenCV+OpenCL works OK!");
        else
            LOGE("Can't init OpenCV with OpenCL TAPI");
    }
    catch(cl::Error& e)
    {
        LOGE("cl::Error: %s (%d)", e.what(), e.err());
    }
    catch(std::exception& e)
    {
        LOGE("std::exception: %s", e.what());
    }
    catch(...)
    {
        LOGE( "OpenCL info: unknown error while initializing OpenCL stuff" );
    }
    LOGD("initCL completed");
}
示例#4
0
文件: kmeans.cpp 项目: markusd/gpgpu
void initCL()
{
	ocl::createContextEx(CL_DEVICE_TYPE_ALL, clPlatform, clDevices, clContext, clQueues);
	cl_int clError = CL_SUCCESS;

	std::ifstream t("kmeans.cl");
	std::string code((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());

	std::string header = "#define DIM ";
	header +=  util::toString(DIM);
	header += "\n";
	header += "#define K ";
	header += util::toString(K);
	header += "\n";
	header += "#define N ";
	header += util::toString(N);
	header += "\n";
	header += "#define AM_LWS ";
	header += util::toString(AM_LWS);
	header += "\n";
	header += "#define RP_LWS ";
	header += util::toString(RP_LWS);
	header += "\n\n\n";

	code = header + code;

	try {
		cl::Program::Sources source(1, std::make_pair(code.c_str(), code.size()));
		clProgram = cl::Program(clContext, source);
		clProgram.build(clDevices, "-cl-fast-relaxed-math -cl-unsafe-math-optimizations -cl-mad-enable");

		std::string info("");
		for (std::vector<cl::Device>::iterator itr = clDevices.begin(); itr != clDevices.end(); ++itr) {
			clProgram.getBuildInfo(*itr, CL_PROGRAM_BUILD_LOG, &info);
			if (info.size() > 0)
				std::cout << "Build log: " << info << std::endl;
		}

		for (int i = 0; i < clDevices.size(); ++i) {
			clClusterAssignment.push_back(cl::Kernel(clProgram, "cluster_assignment", &clError));
			clClusterReposition.push_back(cl::Kernel(clProgram, "cluster_reposition", &clError));
			clClusterReposition_k.push_back(cl::Kernel(clProgram, "cluster_reposition_k", &clError));
			clClusterReposition_k_c.push_back(cl::Kernel(clProgram, "c_cluster_reposition", &clError));
			clComputeCost.push_back(cl::Kernel(clProgram, "compute_cost", &clError));
		}

	} catch (const cl::Error& err) {
		std::cout << "OpenCL Error 4: " << err.what() << " (" << err.err() << ")" << std::endl;
		std::string info("");
		for (std::vector<cl::Device>::iterator itr = clDevices.begin(); itr != clDevices.end(); ++itr) {
			clProgram.getBuildInfo(*itr, CL_PROGRAM_BUILD_LOG, &info);
			if (info.size() > 0)
				std::cout << "Build log: " << info << std::endl;
		}
		std::cin.get();
	}
}
示例#5
0
文件: clgl.cpp 项目: Gardarik/CLGL
void InitCL()
{
	//cl_int err = CL_SUCCESS;
	try
	{
		//Identify platforms
		cl::Platform::get(&clPlatformList);
		//Select first platform with any GPU devices
		for(unsigned int i=0; i<clPlatformList.size(); i++)
		{
			clPlatformList[i].getDevices(CL_DEVICE_TYPE_GPU, &clDeviceList);
			if(!clDeviceList.empty())	break;
		}

		//Set Context Properties: Get associated cl_platform_id using getInfo() on the first GPU
		//Thus conveniently avoiding previous C++ bindings issues :)
		cl_context_properties clProps[] = 
		{
			CL_GL_CONTEXT_KHR,		(cl_context_properties)wglGetCurrentContext(),
			CL_WGL_HDC_KHR,			(cl_context_properties)wglGetCurrentDC(),
			CL_CONTEXT_PLATFORM,	(cl_context_properties)clDeviceList[0].getInfo<CL_DEVICE_PLATFORM>(),
			0
		};
		//Create interop context from GPU devices
		clContext = cl::Context(CL_DEVICE_TYPE_GPU, clProps);

		//Generate program with source and build
		std::string progFile = ReadKernels(KERNEL_FILE);
		cl::Program::Sources clSource(1, std::make_pair(progFile.c_str(), progFile.size()));
		clProgram = cl::Program(clContext, clSource);
		clProgram.build(clDeviceList);
		//Initialize kernels
		for(int i=0; i<NUM_KERNELS; i++)
		{
			clKernels[i] = cl::Kernel(clProgram, kernelName[i]);
		}
		//Create Command Queue with profiling enabled
		clQueue = cl::CommandQueue(clContext, clDeviceList[0], CL_QUEUE_PROFILING_ENABLE);
	}
	catch(cl::Error e)
	{
		cout << "OpenCL initialization failure: " << e.what() << endl
			<< "Error code: " << e.err() << endl;
		if(e.err() == -11)
		{
			std::string clProgLog;
			clProgram.getBuildInfo(clDeviceList[0], CL_PROGRAM_BUILD_LOG, &clProgLog);
			cout << clProgLog;
			system("pause");
			exit(EXIT_FAILURE);
		}
		throw;
	}
}
示例#6
0
文件: main.cpp 项目: yoggy/opencltest
int main(int argc, char *argv[])
{
	cl_int err = CL_SUCCESS;
	cl::Event evt;

	std::vector<cl::Platform> platforms;
	cl::Platform::get(&platforms);
	if (platforms.size() == 0) {
		return false;
	}
	platform_ = platforms[0];

	cl_context_properties properties[] = 
		{ CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0};
	context_ = cl::Context(CL_DEVICE_TYPE_GPU, properties, NULL, NULL, &err); 
	CHECK_CL_ERROR(err, "cl::Context");

	std::vector<cl::Device> devices = context_.getInfo<CL_CONTEXT_DEVICES>();
	if (devices.size() == 0) {
		return false;
	}
	device_ = devices[0];

	sources_.push_back(std::make_pair(source_str.c_str(), source_str.size()));
	program_ = cl::Program(context_, sources_);
	err = program_.build(devices);
	if (err != CL_SUCCESS) {
		std::string log = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]);
		std::cout << "program.build() ERROR: " << log.c_str() << std::endl;
		return false;
	}

	kernel_ = cl::Kernel(program_, "hello", &err); 
	CHECK_CL_ERROR(err, "cl::Kernel");

	buf_ = cl::Buffer(context_, CL_MEM_READ_ONLY, 1024, NULL, &err);

	queue_ = cl::CommandQueue(context_, device_, 0, &err);
	CHECK_CL_ERROR(err, "cl::CommandQueue");

	kernel_.setArg(0, buf_);

	err = queue_.enqueueNDRangeKernel(kernel_, cl::NullRange, cl::NDRange(10, 10), cl::NullRange, NULL, &evt); 
	evt.wait();
	CHECK_CL_ERROR(err, "queue.enqueueNDRangeKernel()");

	return 0;
}
示例#7
0
cl_int CLFW::Build(cl::Program &program, cl::Program::Sources &sources, cl::Context &context, cl::Device &device) {
  cl_int error;
  program = cl::Program(context, sources, &error);
  if (error != CL_SUCCESS) {
	  Print("Error creating program:", errorFG, errorBG);
	  return error;
  }

  error = program.build({ device });
  if (error != CL_SUCCESS) {
    Print("Error building program:", errorFG, errorBG);
    Print(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device), errorFG, errorBG);
  }
  else {
    Print("Success building OpenCL program. ", successFG, successBG);
  }
  
  return error;
}
示例#8
0
void CLHelper::compileProgram(
	cl::Program& program,
	std::vector<cl::Device>& devices,
	const char* options,
	void (CL_CALLBACK * notifyFptr)(cl_program, void *),
	void* data)
{
	cl_int err;

	err = program.build(devices, options, NULL, NULL);
	if(err != CL_SUCCESS) {
		std::cout << "Build error! Showing build log:" << std::endl << std::endl;

		std::string errorLog;
		std::vector<cl::Device>::iterator device;
		for(device = devices.begin(); device != devices.end(); device++)
		{
			errorLog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device);
			std::cout << errorLog << std::endl;
		}
		CHECK_OPENCL_ERROR(err, "cl::Program::build() failed.");
	}
}
void PTWeekend::setup()
{
    
    /* Scene data */
    camera.lookAt(glm::vec3(-2,1,1), vec3(0, 0, -1.0f), vec3(0,1,0));
    camera.setPerspective( 45.0f, getWindowAspectRatio(), 0.01f, 100.0f );
    cameraUI = CameraUi(&camera, getWindow());
    
    
    
    glm::vec3 bottom_sky_color(1.0, 1.0, 1.0);
    glm::vec3 top_sky_color(0.5, 0.7, 1.0);
    
    CGLContextObj glContext = CGLGetCurrentContext();
    CGLShareGroupObj shareGroup = CGLGetShareGroup(glContext);
    
    GLuint imgTexName;
    
    const char* program_file_str = "../../../assets/path_tracing.cl";
    
    /* Obtain a platform */
    std::vector<cl::Platform> platforms;
    clStatus = cl::Platform::get(&platforms);
    pt_assert(clStatus, "Could not find an OpenCL platform.");
    
    /* Obtain a device and determinte max local size */
    std::vector<cl::Device> devices;
    clStatus = platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
    pt_assert(clStatus, "Could not find a GPU device.");
    device = devices[0];
    
    /* Create an OpenCL context for the device */
    cl_context_properties properties[] = {
        CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
        (cl_context_properties)shareGroup,
        0
    };
    
    context = cl::Context({device}, properties, NULL, NULL, &clStatus);
    pt_assert(clStatus, "Could not create a context for device.");
    
    /* Load and build a program */
    std::ifstream program_file(program_file_str);
    std::string program_str(std::istreambuf_iterator<char>(program_file), (std::istreambuf_iterator<char>()));
    cl::Program::Sources sources(1, std::make_pair(program_str.c_str(), program_str.length() + 1));
    program = cl::Program(context, sources);
    clStatus = program.build({device}, "-I ../../../assets/ -cl-denorms-are-zero");
    
    if (clStatus != CL_SUCCESS)
    {
        std::string log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
        std::cerr << log << "\n";
        exit(EXIT_FAILURE);
    }
    
    /* Create command queue */
    cmd_queue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &clStatus);
    pt_assert(clStatus, "Could not create command queue");
    
    /* create kernel and set the kernel arguments */
    kernel = cl::Kernel(program, "path_tracing", &clStatus);
    pt_assert(clStatus, "Could not create kernel");
    
    img_width = getWindowWidth();
    img_height = getWindowHeight();
    
    true_img_width = getWindowWidth();
    true_img_height = getWindowHeight();
    
    local_size = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); //TODO: throws
    local_width = (size_t)pow(2, ceilf(log2f((floorf(sqrtf(local_size))))));
    local_height = local_size / local_width;
    
    img_width = ceilf((float)img_width / (float)local_width) * local_width;
    img_height = ceilf((float)img_height / (float)local_height) * local_height;
    
    
    unsigned int samples = 16;
    
    /* Create GL texture and CL wrapper */
    glGenTextures(1, &imgTexName);
    glBindTexture(GL_TEXTURE_2D, imgTexName);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, img_width, img_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);
    glBindTexture(GL_TEXTURE_2D, 0);
    imgTex = gl::Texture2d::create(GL_TEXTURE_2D, imgTexName, img_width, img_height, true);
    glFinish();
    
    img_buffer.push_back(cl::Image2DGL(context, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, imgTexName, &clStatus));
    pt_assert(clStatus, "Could not create buffer");
    
    /* Create all buffers */
    cam_buffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(cl_pinhole_cam), NULL, &clStatus);
    pt_assert(clStatus, "Could not create camera buffer");
    
    primitive_buffer = cl::Buffer (context, CL_MEM_READ_ONLY, MAX_PRIMITIVES * sizeof(cl_sphere), NULL, &clStatus);
    pt_assert(clStatus, "Could not create primitive buffer");
    
    material_buffer = cl::Buffer (context, CL_MEM_READ_ONLY, MAX_PRIMITIVES * sizeof(cl_material), NULL, &clStatus);
    pt_assert(clStatus, "Could not create primitive buffer");
    
    sky_buffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(cl_sky_material), NULL, &clStatus);
    pt_assert(clStatus, "Could not create sky buffer");
    
    /* Upload scene (static) */
    
    size_t sceneObjectCount = 5;
    cl_sphere* primitive_array = (cl_sphere*)malloc(sceneObjectCount * sizeof(cl_sphere));
    cl_material* material_array = (cl_material*)malloc(sceneObjectCount * sizeof(cl_material));
    
    primitive_array[0] = cl_make_sphere(glm::vec3(1, 0, -1), 0.5f);
    material_array[0] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0x730202"), 0, MAT_LAMBERTIAN);
    
    primitive_array[1] = cl_make_sphere(glm::vec3(-1, 0, -1), 0.5f);
    material_array[1] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0xF89000"), 0, MAT_LAMBERTIAN);
    
    primitive_array[2] = cl_make_sphere(glm::vec3(0, 0, 0), 0.5f);
    material_array[2] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0x97A663"), 0.1f, MAT_METALLIC);
    
    primitive_array[3] = cl_make_sphere(glm::vec3(0, 0, -2), 0.5f);
    material_array[3] = cl_make_material(glm::vec3(0.8f, 0.6f, 0.2f), 0.3f, MAT_METALLIC);
    
    primitive_array[4] = cl_make_sphere(glm::vec3(0,-100.5f, 1.0f), 100.0f);
    material_array[4] = cl_make_material(glm::vec3(0.5f), 0, MAT_LAMBERTIAN);
    
    clStatus = cmd_queue.enqueueWriteBuffer(primitive_buffer, CL_TRUE, 0, sceneObjectCount * sizeof(cl_sphere), primitive_array, NULL, NULL);
    pt_assert(clStatus, "Could not fill primitive buffer");
    
    clStatus = cmd_queue.enqueueWriteBuffer(material_buffer, CL_TRUE, 0, sceneObjectCount * sizeof(cl_material), material_array, NULL, NULL);
    pt_assert(clStatus, "Could not fill material buffer");
    
    pt_assert(cl_set_skycolors(bottom_sky_color, top_sky_color, sky_buffer, cmd_queue),
              "Could not fill sky buffer");
    
    clStatus = kernel.setArg(1, primitive_buffer);
    pt_assert(clStatus, "Could not set primitive buffer argument");
    
    clStatus = kernel.setArg(2, material_buffer);
    pt_assert(clStatus, "Could not set material buffer argument");
    
    clStatus = kernel.setArg(3, sky_buffer);
    pt_assert(clStatus, "Could not set sky buffer argument");
    
    clStatus = kernel.setArg(4, sceneObjectCount);
    pt_assert(clStatus, "Could not set primitive count count argument");
    
    clStatus = kernel.setArg(5, img_buffer[0]);
    pt_assert(clStatus, "Could not set img buffer argument");
    
    clStatus = kernel.setArg(6, samples);
    pt_assert(clStatus, "Could not set samples argument");
    
    clStatus = kernel.setArg(0, cam_buffer);
    pt_assert(clStatus, "Could not set camera buffer argument");
}
bool MaxValueSimple::initialize(cl_device_type type) {
	if (type == CL_DEVICE_TYPE_CPU)
		TYPE = CLCPU;
	else if (type == CL_DEVICE_TYPE_GPU)
		TYPE = CLGPU;
	else {
		TYPE = CPU;
		return true;
	}

	try {
		/*** Hole OpenCL-Plattformen z.B. AMD APP, NVIDIA CUDA ***/
		cl::Platform::get(&platforms);

		/*** Hole OpenCL-Device des geforderten Typs z.B. GPU, CPU ***/
		std::vector < cl::Device > devTmp;
		for (std::vector<cl::Platform>::iterator it = platforms.begin(); it
				!= platforms.end(); ++it) {
			it->getDevices(type, &devTmp);
			devices.insert(devices.end(), devTmp.begin(), devTmp.end());
			devTmp.clear();
		}

		std::cerr << "[DEBUG] OpenCL device: " << devices[0].getInfo<
				CL_DEVICE_NAME> () << std::endl;

		/*** Erstelle OpenCL-Context und CommandQueue ***/
		context = cl::Context(devices);
		cmdQ = cl::CommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE);

		/*** OpenCL-Quellcode einlesen ***/
		std::string src = readFile(KERNEL_PATH);
		cl::Program::Sources source;
		source.push_back(std::make_pair(src.data(), src.length()));

		/*** OpenCL-Programm aus Quellcode erstellen ***/
		program = cl::Program(context, source);
		try {
			program.build(devices);
		} catch (cl::Error & err) {
			/* TODO logging
			 Logger::logDebug(
			 "initCL",
			 Logger::sStream << err.what() << "\nBuild-Log fuer \""
			 << devices.front().getInfo<CL_DEVICE_NAME> () << "\":\n"
			 << program.getBuildInfo<CL_PROGRAM_BUILD_LOG> (devices.front()));
			 */
			throw err;
		}
		kernel = cl::Kernel(program, "maxInt");
		event = cl::Event();
		return true;
	} catch (cl::Error& err) {
		// TODO Logger::logError(METHOD, Logger::sStream << err.what());
		std::cerr << "[ERROR] MaxValueSimple::initialize(cl_device_type): "
				<< err.what() << " (" << err.err() << ")" << std::endl;
		return false;
	} catch (std::exception& err) {
		// TODO Logger::logError(METHOD, Logger::sStream << err.what());
		std::cerr << "[ERROR] MaxValueSimple::initialize(cl_device_type): "
				<< err.what() << std::endl;
		return false;
	}
}
示例#11
0
int main(int argc, char **argv)
{
    TS ts; //Time stepper
    Vec soln; //Holds the solution vector, including all the primitive
              //variables. 
    DM dmda; //Manages the computational grid and parallelization.

    int X1Start, X2Start;
    int X1Size, X2Size;

    PetscInitialize(&argc, &argv, PETSC_NULL, help);

    // Create the computational domain.
    DMDACreate2d(PETSC_COMM_WORLD, 
                 DM_BOUNDARY_GHOSTED, DM_BOUNDARY_GHOSTED,
                 DMDA_STENCIL_STAR,
                 N1, N2,
                 PETSC_DECIDE, PETSC_DECIDE,
                 DOF, NG, PETSC_NULL, PETSC_NULL, &dmda);

    // When running in parallel, each process computes from
    // [X1Start, X1Start+X1Size] x [X2Start, X2Start+X2Size]
    DMDAGetCorners(dmda, 
                   &X1Start, &X2Start, NULL,
                   &X1Size, &X2Size, NULL);

    // Create the solution vector.
    DMCreateGlobalVector(dmda, &soln);

    // Create the time stepper and link it to the computational grid and the
    // residual evaluation function.
    TSCreate(PETSC_COMM_WORLD, &ts);
    TSSetDM(ts, dmda);
    TSSetIFunction(ts, PETSC_NULL, ComputeResidual, NULL);

    // OpenCL boilerplate code.
    clErr = cl::Platform::get(&platforms);
    CheckCLErrors(clErr, "cl::Platform::get");

    // Select computation device here.
    clErr = platforms.at(1).getDevices(CL_DEVICE_TYPE_CPU, &devices);
    CheckCLErrors(clErr, "cl::Platform::getDevices");

    context = cl::Context(devices, NULL, NULL, NULL, &clErr);
    CheckCLErrors(clErr, "cl::Context::Context");

    queue = cl::CommandQueue(context, devices.at(0), 0, &clErr);
    CheckCLErrors(clErr, "cl::CommandQueue::CommandQueue");

    std::ifstream sourceFile("computeresidual.cl");
    std::string sourceCode((std::istreambuf_iterator<char>(sourceFile)),
                            std::istreambuf_iterator<char>());
    cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(),
                                sourceCode.length()+1));
    
    program = cl::Program(context, source, &clErr);
    CheckCLErrors(clErr, "cl::Program::Program");

    // Pass in constants to the OpenCL kernel as compiler switches. This is an
    // efficient way to handle constants such as domain sizes in OpenCL.
    std::string BuildOptions("\
                              -D X1_SIZE=" +
                             std::to_string(X1Size) +
                             " -D X2_SIZE=" + 
                             std::to_string(X2Size) +
                             " -D TOTAL_X1_SIZE=" + 
                             std::to_string(X1Size+2*NG) + 
                             " -D TOTAL_X2_SIZE=" +
                             std::to_string(X2Size+2*NG));

    // Compile the OpenCL program and extract the kernel.
    PetscScalar start = std::clock();
    clErr = program.build(devices, BuildOptions.c_str(), NULL, NULL);
    const char *buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(
                                                devices.at(0),
                                                &clErr).c_str();
    PetscPrintf(PETSC_COMM_WORLD, "%s\n", buildlog);
    CheckCLErrors(clErr, "cl::Program::build");
    PetscScalar end = std::clock();

    PetscScalar time = (end - start)/(PetscScalar)CLOCKS_PER_SEC;
    PetscPrintf(PETSC_COMM_WORLD, 
                "Time taken for kernel compilation = %f\n", time);


    kernel = cl::Kernel(program, "ComputeResidual", &clErr);
    CheckCLErrors(clErr, "cl::Kernel::Kernel");

    // How much memory is the kernel using?
    cl_ulong localMemSize = kernel.getWorkGroupInfo<CL_KERNEL_LOCAL_MEM_SIZE>(
                                        devices.at(0), &clErr);
    cl_ulong privateMemSize = kernel.getWorkGroupInfo<CL_KERNEL_PRIVATE_MEM_SIZE>(
                                        devices.at(0), &clErr);
    printf("Local memory used = %llu\n", (unsigned long long)localMemSize);
    printf("Private memory used = %llu\n", (unsigned long long)privateMemSize);


    // Set initial conditions.
    InitialCondition(ts, soln);

    TSSetSolution(ts, soln);
    TSSetType(ts, TSTHETA);
    TSSetFromOptions(ts);

    // Finally solve! All time stepping options can be controlled from the
    // command line.
    TSSolve(ts, soln);

    // Delete the data structures in the following order.
    DMDestroy(&dmda);
    VecDestroy(&soln);
    TSDestroy(&ts);

    PetscFinalize();
    return(0);
}
示例#12
0
 void initOpenCL()
 {
     // OpenCL
     try
     {
         // Get available platforms
         vector<cl::Platform> platforms;
         cl::Platform::get(&platforms);
         LOG_INFO<<platforms.front().getInfo<CL_PLATFORM_VERSION>();
         
         // context sharing is OS specific
         #if defined (__APPLE__) || defined(MACOSX)
             CGLContextObj curCGLContext = CGLGetCurrentContext();
             CGLShareGroupObj curCGLShareGroup = CGLGetShareGroup(curCGLContext);
             
             cl_context_properties properties[] =
             {
                 CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
                 (cl_context_properties)curCGLShareGroup,
                 0
             };
         #elif defined WIN32
             cl_context_properties properties[] =
             {
                 CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
                 CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
                 CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(),
                 0
             };
         #else
             cl_context_properties properties[] =
             {
                 CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
                 CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
                 CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(),
                 0
             };
         #endif
         
         m_context = cl::Context( CL_DEVICE_TYPE_GPU, properties);
         
         // Get a list of devices on this platform
         vector<cl::Device> devices = m_context.getInfo<CL_CONTEXT_DEVICES>();
         m_device = devices[0];
         
         // Create a command queue and use the first device
         m_queue = cl::CommandQueue(m_context, devices[0]);
         
         // Read source file
         std::string sourceCode = kinski::readFile("kernels.cl");
         
         // Make program of the source code in the context
         m_program = cl::Program(m_context, sourceCode);
         
         // Build program for these specific devices
         m_program.build();
         
         m_particleKernel = cl::Kernel(m_program, "updateParticles");
         m_imageKernel = cl::Kernel(m_program, "set_colors_from_image");
     }
     catch(cl::Error &error)
     {
         LOG_ERROR << error.what() << "(" << oclErrorString(error.err()) << ")";
         LOG_ERROR << "Build Status: " << m_program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(m_device);
         LOG_ERROR << "Build Options:\t" << m_program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(m_device);
         LOG_ERROR << "Build Log:\t " << m_program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(m_device);
     }
 }
示例#13
0
	CL_helper(const char *kernelFileName, std::vector<std::string> kernelNames, bool loadBinary, bool writeBinary,
				bool usePreProcArgs = false, std::map<std::string, std::string> preProcArgs = std::map<std::string, std::string>()){
		

		cl::Platform::get(&platforms);
		if(platforms.size()==0)
			throw std::runtime_error("No OpenCL platforms found.\n");

 
		int selectedPlatform=0;
		platform=platforms.at(selectedPlatform);
		platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);  
		if(devices.size()==0)
			throw std::runtime_error("No opencl devices found.\n");

		int selectedDevice=0;
		device=devices.at(selectedDevice);
		context = cl::Context(devices);

		

		try{


			if(loadBinary){
				std::string vendor=platform.getInfo<CL_PLATFORM_VENDOR>();
				size_t found = vendor.find("NVIDIA");
				if(found != std::string::npos){
					FILE* fp;
					fp = fopen("src/kernels/julia_filter.ptx", "r");
					if (!fp) {
						std::cerr << "Error loading kernel binary" << std::endl;
						std::cerr << "Building kernel from .cl file" << std::endl;
						loadBinary = false;

					} else {
						fseek(fp, 0, SEEK_END);
						size_t kernel_sz = ftell(fp);
						rewind(fp);

						char* kernel_str = (char*)malloc(kernel_sz);
						unsigned bytes = fread(kernel_str, 1, kernel_sz, fp);
						fclose(fp);

						binaries.push_back(std::make_pair((void*)kernel_str,kernel_sz+1));
				
						program = cl::Program(context, devices, binaries);	

						program.build(devices);
					}
				} else{
					std::cerr << "Vendor not NVIDIA, cannot load .ptx binary" << std::endl;
					std::cerr << "Building kernel from .cl file" << std::endl;
					loadBinary = false;
				}

			}
			if(!loadBinary){
				std::string kernelSource=CL_helper::LoadSource(kernelFileName);
				sources.push_back(std::make_pair(kernelSource.c_str(), kernelSource.size()+1)); 

				program = cl::Program(context, sources);

				if(usePreProcArgs && !preProcArgs.empty()){
					std::string preProcArgsString;
					for(auto& arg : preProcArgs) { 
						preProcArgsString += "-D" + arg.first + "=" + arg.second + " ";
					}
					program.build(devices, preProcArgsString.c_str()); 
				} else {
					//std::string params = "-cl-unsafe-math-optimizations";
					program.build(devices);
				}
			}

		}catch (cl::Error er) {
			for(unsigned i=0;i<devices.size();i++){
					std::cerr <<"Log for device " << devices[i].getInfo<CL_DEVICE_NAME>().c_str()<<std::endl;
					std::cerr << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[i]).c_str() <<std::endl;
				}
			std::cerr << "ERROR:" << er.what() << " Code " << er.err()<<std::endl;
			throw;
		}


		for(unsigned i=0; i<kernelNames.size();i++){
			kernels.push_back( cl::Kernel(program, kernelNames.at(i).c_str()) );
		}

		queue = cl::CommandQueue(context, device);


		if(writeBinary){
			size_t bin_sz;
			program.getInfo( CL_PROGRAM_BINARY_SIZES, &bin_sz);
			
			unsigned char *bin = (unsigned char *)malloc(bin_sz);
			
			program.getInfo(CL_PROGRAM_BINARIES, &bin);

			FILE* fp = fopen("src/kernels/julia_filter.ptx", "wb");
			fwrite(bin, sizeof(char), bin_sz, fp);
			fclose(fp);
			free(bin);
		}



	}
示例#14
0
int main()
{
    try {
        std::vector<cl::Device> devices;

        // select platform
        cl::Platform platform = selectPlatform();

        // select device
        platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
        cl::Device device = selectDevice(devices);

        // create context
        context = cl::Context(devices);

        // create command queue
        queue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE);

        // load opencl source
        std::ifstream cl_file("inclusive_scan.cl");

        std::string cl_string{std::istreambuf_iterator<char>(cl_file),
                    std::istreambuf_iterator<char>()};

        cl::Program::Sources source(1,
                                    std::make_pair(cl_string.c_str(),
                                                   cl_string.length() + 1));

        // create programm
        program = cl::Program(context, source);

        // compile opencl source
        try {
            program.build(devices);

            size_t input_size;
            std::ifstream input_file("input.txt");
            input_file >> input_size;

            std::vector<float> input(input_size);

//            for (size_t i = 0; i < input_size; ++i) {
//                input[i] = i % 10;
//            }

            for (int i = 0; i < input_size; i++) {
                input_file >> input[i];
            }

            std::vector<float> output(input_size, 0);

            cl::Buffer dev_input (context, CL_MEM_READ_ONLY, sizeof(float) * input_size);
            queue.enqueueWriteBuffer(dev_input, CL_TRUE, 0, sizeof(float) * input_size, &input[0]);

            cl::Buffer dev_output = inclusive_scan(dev_input, input_size);

            queue.enqueueReadBuffer(dev_output, CL_TRUE, 0, sizeof(float) * input_size, &output[0]);
            queue.finish();

            cpu_check(input, output);

            std::ofstream output_file("output.txt");
            for (int i = 0; i < input_size; i++) {
                output_file << output[i] << " ";
            }

        }
        catch (cl::Error const & e) {
            std::string log_str = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
            std::cout << std::endl << e.what() << " : " << e.err() << std::endl;
            std::cout << log_str;
            return 0;
        }


    }
    catch (cl::Error const & e) {
        std::cout << "Error: " << e.what() << " #" << e.err() << std::endl;
    }

    return 0;
}