Exemplo n.º 1
0
/* Enqueues a given task to operate on a given memory buffer dataset.
 *
 * @env: Struct containing device/context/queue variables.
 * @mem_struct: Struct containing cl_mem buffer / target dataset.
 * @task: Struct containing the kernel to execute and the task name. */
void runTaskOnDataset(
  const RubiCLEnvironment* env,
  const RubiCLMemoryBuffer* mem_struct,
  const RubiCLTask* task
) {
    if (DEBUG) printf("runTaskOnDataset\n");
  size_t g_work_size[1] = {mem_struct->buffer_entries};

  /* Kernel's global data_array set to be the given device memory buffer */
  cl_int ret = clSetKernelArg(
    task->kernel,       // Kernel concerned
    0,                 // Index of argument to specify
    sizeof(cl_mem),    // Size of argument value
    &mem_struct->buffer // Argument value
  );
  if (ret != CL_SUCCESS) printf("clSetKernelArg %s\n", oclErrorString(ret));

  /* Kernel enqueued to be executed on the environment's command queue */
  ret = clEnqueueNDRangeKernel(
    env->queue,   // Device's command queue
    task->kernel, // Kernel to enqueue
    1,           // Dimensionality of work
    0,           // Global offset of work index
    g_work_size, // Array of work sizes by dimension
    NULL,        // Local work size, omitted so will be automatically deduced
    0,           // Number of preceding events
    NULL,        // Preceding events list
    NULL         // Event object destination
  );
  if (ret != CL_SUCCESS) printf("clEnqueueNDRangeKernel %s\n", oclErrorString(ret));
}
Exemplo n.º 2
0
void CL::popCorn()
{
    printf("in popCorn\n");
    //initialize our kernel from the program
    //kernel = clCreateKernel(program, "part1", &err);
    //printf("clCreateKernel: %s\n", oclErrorString(err));
    try{
        kernel = cl::Kernel(program, "part2", &err);
    }
    catch (cl::Error er) {
        printf("ERROR: %s(%s)\n", er.what(), oclErrorString(er.err()));
    }

    //set the arguements of our kernel
    try
    {
        err = kernel.setArg(0, cl_vbos[0]); //position vbo
        err = kernel.setArg(1, cl_vbos[1]); //color vbo
        err = kernel.setArg(2, cl_velocities);
        err = kernel.setArg(3, cl_pos_gen);
        err = kernel.setArg(4, cl_vel_gen);
    }
    catch (cl::Error er) {
        printf("ERROR: %s(%s)\n", er.what(), oclErrorString(er.err()));
    }
    //Wait for the command queue to finish these commands before proceeding
    queue.finish();
}
Exemplo n.º 3
0
CL::CL()
{
    printf("Initialize OpenCL object and context\n");
    //setup devices and context
    
    //this function is defined in util.cpp
    //it comes from the NVIDIA SDK example code
    err = oclGetPlatformID(&platform);
    //oclErrorString is also defined in util.cpp and comes from the NVIDIA SDK
    printf("oclGetPlatformID: %s\n", oclErrorString(err));

    // Get the number of GPU devices available to the platform
    // we should probably expose the device type to the user
    // the other common option is CL_DEVICE_TYPE_CPU
    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
    printf("clGetDeviceIDs (get number of devices): %s\n", oclErrorString(err));


    // Create the device list
    devices = new cl_device_id [numDevices];
    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
    printf("clGetDeviceIDs (create device list): %s\n", oclErrorString(err));
 

    //for right now we just use the first available device
    //later you may have criteria (such as support for different extensions)
    //that you want to use to select the device
    deviceUsed = 0;
    //create the context
    context = clCreateContext(0, 1, &devices[deviceUsed], NULL, NULL, &err);
    
    //create the command queue we will use to execute OpenCL commands
    command_queue = clCreateCommandQueue(context, devices[deviceUsed], 0, &err);

}
Exemplo n.º 4
0
void CL::loadProgram(std::string kernel_source)
{
    //Program Setup
    int pl;
    //size_t program_length;
    printf("load the program\n");
    
    pl = kernel_source.size();
    printf("kernel size: %d\n", pl);
    //printf("kernel: \n %s\n", kernel_source.c_str());
    try
    {
        cl::Program::Sources source(1,
            std::make_pair(kernel_source.c_str(), pl));
        program = cl::Program(context, source);
    }
    catch (cl::Error er) {
        printf("ERROR: %s(%s)\n", er.what(), oclErrorString(er.err()));
    }

    printf("build program\n");
    try
    {
        err = program.build(devices);
    }
    catch (cl::Error er) {
        printf("program.build: %s\n", oclErrorString(er.err()));
    //if(err != CL_SUCCESS){
    }
    printf("done building program\n");
	std::cout << "Build Status: " << program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(devices[0]) << std::endl;
	std::cout << "Build Options:\t" << program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(devices[0]) << std::endl;
	std::cout << "Build Log:\t " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]) << std::endl;

}
Exemplo n.º 5
0
void MD::clInit(float bound, float dt, std::string force_kernel_name) {
  printf("Initializing CL Kernels.\n");
  // Initialize our kernel from the program.
  try {
    forceKernel = cl::Kernel(program, force_kernel_name.c_str(), &err);
    updateKernel = cl::Kernel(program, "update", &err);
  }
  catch (cl::Error er) {
    printf("ERROR: %s(%s)\n", er.what(), oclErrorString(er.err()));
    exit(EXIT_FAILURE);
  }
  // Set the arguements of our kernel.
  try {
    err = forceKernel.setArg(0, cl_vbos[0]);  // Position vbo.
    err = forceKernel.setArg(1, cl_vbos[1]);  // Color vbo.
    err = forceKernel.setArg(2, cl_forces);
    err = forceKernel.setArg(3, num);          // Pass in the size.
    err = updateKernel.setArg(0, cl_vbos[0]);  // Position vbo.
    err = updateKernel.setArg(1, cl_vbos[1]);  // Color vbo.
    err = updateKernel.setArg(2, cl_forces);
    err = updateKernel.setArg(3, cl_vel);
    err = updateKernel.setArg(4, bound);
    updateKernel.setArg(5, dt);                // Pass in the timestep.
  }
  catch (cl::Error er) {
    printf("ERROR: %s(%s)\n", er.what(), oclErrorString(er.err()));
  }
  // Wait for the command queue to finish these commands before proceeding.
  queue.finish();
  printf("Done initializing.\n");
}
Exemplo n.º 6
0
void computePresenceArrayForTupDataset(
  const RubiCLEnvironment* env,
  const RubiCLMemoryBuffer* fst_mem_struct,
  const RubiCLMemoryBuffer* snd_mem_struct,
  const RubiCLTask* task,
  RubiCLMemoryBuffer *presence
) {
    if (DEBUG) printf("computePresenceArrayForDataset\n");
  size_t g_work_size[1] = {fst_mem_struct->buffer_entries};

  /* Kernel's fst data_array set to be the given fst memory buffer */
  cl_int ret = clSetKernelArg(
    task->kernel,       // Kernel concerned
    0,                 // Index of argument to specify
    sizeof(cl_mem),    // Size of argument value
    &fst_mem_struct->buffer // Argument value
  );
  if (ret != CL_SUCCESS) printf("clSetKernelArg %s\n", oclErrorString(ret));

  /* Kernel's snd data_array set to be the given snd memory buffer */
  ret = clSetKernelArg(
    task->kernel,       // Kernel concerned
    1,                 // Index of argument to specify
    sizeof(cl_mem),    // Size of argument value
    &snd_mem_struct->buffer // Argument value
  );
  if (ret != CL_SUCCESS) printf("clSetKernelArg %s\n", oclErrorString(ret));

  /* Output buffer created to be an int flag for each element in input dataset. */
  presence->buffer_entries = fst_mem_struct->buffer_entries;
  presence->buffer = createMemoryBuffer(
    env,                                      // Environment struct
    (presence->buffer_entries * sizeof(int)), // Size of buffer to create
    CL_MEM_HOST_READ_ONLY                     // Buffer flags set
  );
  presence->type = INTEGER_BUFFER;

  /* Kernel's global presence_array set to be the newly created presence buffer */
  ret = clSetKernelArg(
    task->kernel,      // Kernel concerned
    2,                // Index of argument to specify
    sizeof(cl_mem),   // Size of argument value
    &presence->buffer // Argument value
  );
  if (ret != CL_SUCCESS) printf("clSetKernelArg PA %s\n", oclErrorString(ret));

  /* Kernel enqueued to be executed on the environment's command queue */
  ret = clEnqueueNDRangeKernel(
    env->queue,   // Device's command queue
    task->kernel, // Kernel to enqueue
    1,           // Dimensionality of work
    0,           // Global offset of work index
    g_work_size, // Array of work size in each dimension
    NULL,        // Local work size, omitted so will be deduced by OpenCL platform
    0,           // Number of preceding events
    NULL,        // Preceding events list
    NULL         // Event object destination
  );
  if (ret != CL_SUCCESS) printf("clEnqueueNDRangeKernel %s\n", oclErrorString(ret));
}
Exemplo n.º 7
0
int setup_data_buffers(struct sapi_o *a, size_t data_in_len)
{
  cl_int err;

  if (a == NULL)
    return -1;

  a->olen = data_in_len / sizeof(float2) * sizeof(float);

  a->host  = realloc(a->host, a->olen);
  if (a->host == NULL){
#ifdef DEBUG
    fprintf(stderr, "e: logic cannot malloc output buffer\n");
#endif
    return -1;
  }

#ifdef DEBUG
  fprintf(stderr, "%s: created ouput buffer %ld bytes\n", __func__, a->olen);
#endif

  a->clin  = clCreateBuffer(a->ctx, CL_MEM_READ_ONLY, data_in_len, NULL, &err);
  if (err != CL_SUCCESS){
#ifdef DEBUG
    fprintf(stderr, "clCreateBuffer return %s\n", oclErrorString(err));
#endif
    return -1;
  }

#ifdef DEBUG
  fprintf(stderr, "%s: created device input buffer %ld bytes\n", __func__, data_in_len);
#endif

  a->clout = clCreateBuffer(a->ctx, CL_MEM_WRITE_ONLY, data_in_len, NULL, &err);
  if (err != CL_SUCCESS){
#ifdef DEBUG
    fprintf(stderr, "clCreateBuffer return %s\n", oclErrorString(err));
#endif
    return -1;
  }

#ifdef DEBUG
  fprintf(stderr, "%s: created device ouput buffer %ld bytes\n", __func__, data_in_len);
#endif

  a->clpow = clCreateBuffer(a->ctx, CL_MEM_WRITE_ONLY, a->olen, NULL, &err);
  if (err != CL_SUCCESS){
#ifdef DEBUG
    fprintf(stderr, "clCreateBuffer return %s\n", oclErrorString(err));
#endif
    return -1;
  }

#ifdef DEBUG
  fprintf(stderr, "%s: created device ouput buffer %ld bytes\n", __func__, a->olen);
#endif

  return 0;
}
Exemplo n.º 8
0
    float Kernel::execute(int ndrange)
    {
        if (ndrange <= 0)
            return -1.f;

        
        cl_ulong start, end;
        float timing = -1.0f;

        try
        {
            cl::Event event;
            cli->err = cli->queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(ndrange), cl::NullRange, NULL, &event);
            cli->queue.finish();
            event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
            event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
            timing = (end - start) * 1.0e-6f;

        }
        catch (cl::Error er)
        {
            printf("err: work group size: %d\n", ndrange);
            printf("ERROR: %s(%s)\n", er.what(), oclErrorString(er.err()));
        }
        return timing;

    }
Exemplo n.º 9
0
void CL::loadProgram(const char* relative_path)
{
 // Program Setup
    int pl;
    size_t program_length;
    printf("load the program\n");
    
    //CL_SOURCE_DIR is set in the CMakeLists.txt
    std::string path(CL_SOURCE_DIR);
    path += "/" + std::string(relative_path);
    printf("path: %s\n", path.c_str());

    //file_contents is defined in util.cpp
    //it loads the contents of the file at the given path
    char* cSourceCL = file_contents(path.c_str(), &pl);
    //printf("file: %s\n", cSourceCL);
    program_length = (size_t)pl;

    // create the program
    program = clCreateProgramWithSource(context, 1,
                      (const char **) &cSourceCL, &program_length, &err);
    printf("clCreateProgramWithSource: %s\n", oclErrorString(err));

    buildExecutable();
   
}
Exemplo n.º 10
0
cl_device_id selectDefaultClDeviceOfType(cl_device_type device_type, cl_platform_id* platform) {
    if (DEBUG) printf("selectDefaultClDeviceOfType\n");

    cl_int ret;
    cl_device_id device_id;
    cl_uint num_devices;

    ret = clGetDeviceIDs(
        *platform,    // Selected platform
        device_type, // Type of device (CPU/GPU)
        0,           // Limit
        NULL,        // Devices destination
        &num_devices // Count destination
    );

    cl_device_id* devices = calloc(sizeof(cl_device_id), num_devices);
    ret = clGetDeviceIDs(
        *platform,    // Selected platform
        device_type, // Type of device (CPU/GPU)
        num_devices, // Limit
        devices,     // Devices destination
        NULL         // Count destination
    );
    if (ret != CL_SUCCESS) printf("clGetDeviceIDs %s\n", oclErrorString(ret));

    if (DEBUG) {
        displayDeviceInfo(num_devices, devices);
        printf("Selecting Device 1.\n");
    }
    device_id = devices[0];
    free(devices);

    return device_id;
}
Exemplo n.º 11
0
    void Density::execute(int num,
                    Buffer<float4>& pos_s,
                    Buffer<float>& dens_s,
                    //output
                    Buffer<unsigned int>& ci_start,
                    Buffer<unsigned int>& ci_end,
                    //params
                    Buffer<SPHParams>& sphp,
                    Buffer<GridParams>& gp,
                    //debug params
                    Buffer<float4>& clf_debug,
                    Buffer<int4>& cli_debug)
    { 
        int iarg = 0;
        k_density.setArg(iarg++, pos_s.getDevicePtr());
        k_density.setArg(iarg++, dens_s.getDevicePtr());
        k_density.setArg(iarg++, ci_start.getDevicePtr());
        k_density.setArg(iarg++, ci_end.getDevicePtr());
        k_density.setArg(iarg++, gp.getDevicePtr());
        k_density.setArg(iarg++, sphp.getDevicePtr());

        k_density.setArg(iarg++, clf_debug.getDevicePtr());
        k_density.setArg(iarg++, cli_debug.getDevicePtr());

        int local = 64;
        try
        {
            float gputime = k_density.execute(num, local);
        }

        catch (cl::Error er)
        {
            printf("ERROR(density): %s(%s)\n", er.what(), oclErrorString(er.err()));
        }
    }
Exemplo n.º 12
0
/* Writes the contents of a given dataset into a given cl_mem device memory buffer
 *
 * @env: Struct containing device/context/queue variables.
 * @mem_struct: Struct containing cl_mem buffer and the number of entries it can hold.
 * @dataset: Pointer to an integer array of data to be read, same length as buffer. */
void loadIntArrayIntoDevice(
  const RubiCLEnvironment env,
  const RubiCLMemoryBuffer mem_struct,
  int* dataset
) {
    if (DEBUG) printf("loadIntArrayIntoDevice\n");
  cl_event write_event;
  cl_int ret = clEnqueueWriteBuffer(
    env.queue,                                 // Command queue
    mem_struct.buffer,                         // Memory buffer
    CL_FALSE,                                  // Blocking write? (set to nonblocking)
    0,                                         // Offset in buffer to write to
    mem_struct.buffer_entries * sizeof(int),   // Input data size
    dataset,                                   // Input data
    0,                                         // Number of preceding actions
    NULL,                                      // List of preceding actions
    &write_event                               // Event object destination
  );

  if (ret != CL_SUCCESS) printf("clEnqueueWriteBuffer %s\n", oclErrorString(ret));

  clSetEventCallback(
    write_event,            // Event to monitor
    CL_COMPLETE,            // Status to fire on
    &releaseMemoryCallback, // Callback to trigger
    dataset                 // Data to pass to callback
  );

}
Exemplo n.º 13
0
bool OCL::BuildExecutable()
{
	cl_int error;

	// Build program
	printf("Building OpenCL program...\n");
	error = clBuildProgram(program, 1, &deviceId, NULL, NULL, NULL);
	
	if(error != CL_SUCCESS)
	{
		printf("Failed to build executable with error code %d (%s)", error, oclErrorString(error));
		return false;
	}

	// Get and print build status messages.
	cl_build_status build_status;
	error = clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &build_status, NULL);

	char *build_log;
	size_t ret_val_size;
	error = clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);

	build_log = new char[ret_val_size+1];
	error = clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
	build_log[ret_val_size] = '\0';
	printf("BUILD LOG: \n %s", build_log);

	delete build_log;
	return true;
}
Exemplo n.º 14
0
cl_platform_id selectDefaultClPlatform() {
    if (DEBUG) printf("selectDefaultClPlatform\n");
    cl_platform_id platform;
    cl_int ret;

    cl_uint num_platforms, i;
    ret = clGetPlatformIDs(
        0,             // Limit
        NULL,          // Value destination
        &num_platforms // Count destination
    );
    cl_platform_id* platforms = calloc(sizeof(cl_platform_id), num_platforms);
        ret = clGetPlatformIDs(
        num_platforms, // Limit
        platforms,     // Value destination
        NULL           // Count destination
    );
    if (ret != CL_SUCCESS) printf("clGetPlatformIDs %s\n", oclErrorString(ret));

    if (DEBUG) {
        char buf [128];
        for (i = 0; i < num_platforms; i++) {
            ret = clGetPlatformInfo(
                platforms[i],        // Platform
                CL_PLATFORM_VERSION, // OpenCL version
                sizeof(buf),         // Buffer size
                buf,                 // Destination buffer
                NULL                 // Size destination
            );
            printf("* Platform %d: %s", (i + 1), buf);

            ret = clGetPlatformInfo(
                platforms[i],        // Platform
                CL_PLATFORM_NAME,    // Platform name
                sizeof(buf),         // Buffer size
                buf,                 // Destination buffer
                NULL                 // Size destination
            );
            printf(" %s", buf);

            ret = clGetPlatformInfo(
                platforms[i],        // Platform
                CL_PLATFORM_VENDOR,  // Platform vendor
                sizeof(buf),         // Buffer size
                buf,                 // Destination buffer
                NULL                 // Size destination
            );
            printf(" %s\n", buf);
        }
    }

    if (DEBUG) printf("Selecting Platform 1.\n");
    platform = platforms[0];
    free(platforms);

    return platform;
}
Exemplo n.º 15
0
void MD::loadProgram(std::string kernel_source, int group_size_val) {
  // Program Setup.
  int pl;
  group_size = group_size_val;
  printf("Load the program.\n");
  bool failed = false;

  pl = kernel_source.size();
  printf("Kernel size: %d.\n", pl);
  try {
    cl::Program::Sources source(1,
                                std::make_pair(kernel_source.c_str(), pl));
    program = cl::Program(context, source);
  }
  catch (cl::Error er) {
    printf("ERROR: %s(%s)\n", er.what(), oclErrorString(er.err()));
  }

  printf("Building program...\n");
  try {
    //err = program.build(devices, "-cl-nv-verbose -cl-nv-maxrregcount=100");
    std::stringstream build_options;
    // Define the group size to allow for __local arrays.
    build_options << "-D SIZE=" << group_size << std::ends;
    err = program.build(devices, build_options.str().c_str());
  }
  catch (cl::Error er) {
    printf("program.build: %s\n", oclErrorString(er.err()));
    failed = true;
  }
  printf("Done building program.\n");
  std::cout << "Build Status: "
            << program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(devices[0])
            << std::endl;
  std::cout << "Build Options:\t"
            << program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(devices[0])
            << std::endl;
  std::cout << "Build Log:\t "
            << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0])
            << std::endl;

  if (failed)
    exit(EXIT_FAILURE);
}
CLWrapper::CLWrapper()
{
	// Initialize OpenCL object and context

	// Get all platforms that are OpenCL compatible
	err_ = cl::Platform::get(&allPlatforms_);
	Zilch::Console::Write("Collect all OpenCL platforms. Status : ");
	Zilch::Console::WriteLine(oclErrorString(err_));

	if (allPlatforms_.size() == 0)
	{
		Zilch::Console::WriteLine("No OpenCL compatible platforms found, check installation!");
		exit(1);
	}

	// Use default platform 
	platform_ = allPlatforms_[0];
	Zilch::Console::Write("Using platform: ");
	Zilch::Console::WriteLine(platform_.getInfo<CL_PLATFORM_NAME>().c_str());

	// Get all devices registered to platform
	err_ = platform_.getDevices(CL_DEVICE_TYPE_ALL, &allDevices_);
	Zilch::Console::Write("Collect all OpenCL devices registered to platform. Status : ");
	Zilch::Console::WriteLine(oclErrorString(err_));

	if (allDevices_.size() == 0) 
	{
		Zilch::Console::WriteLine(" No OpenCL compatible devices found, check installation!");
		exit(1);
	}

	// Use default devices
	device_ = allDevices_[0];

	// Create context
	context_ = cl::Context({ device_ });

	// Create command queue
	queue_ = cl::CommandQueue(context_, device_, 0, &err_);

	Zilch::Console::Write("Creating command queue. -> Status : ");
	Zilch::Console::WriteLine(oclErrorString(err_));

}
Exemplo n.º 17
0
CL::CL()
{
    printf("Initialize OpenCL object and context\n");
    //setup devices and context
    
    //this function is defined in util.cpp
    //it comes from the NVIDIA SDK example code
    ///err = oclGetPlatformID(&platform);
    //oclErrorString is also defined in util.cpp and comes from the NVIDIA SDK
    ///printf("oclGetPlatformID: %s\n", oclErrorString(err));
    std::vector<cl::Platform> platforms;
    err = cl::Platform::get(&platforms);
    printf("cl::Platform::get(): %s\n", oclErrorString(err));
    printf("number of platforms: %d\n", platforms.size());
    if (platforms.size() == 0) {
        printf("Platform size 0\n");
    }
 

    // Get the number of GPU devices available to the platform
    // we should probably expose the device type to the user
    // the other common option is CL_DEVICE_TYPE_CPU
    ///err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
    ///printf("clGetDeviceIDs (get number of devices): %s\n", oclErrorString(err));


    // Create the device list
    ///devices = new cl_device_id [numDevices];
    ///err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
    ///printf("clGetDeviceIDs (create device list): %s\n", oclErrorString(err));
 

    //for right now we just use the first available device
    //later you may have criteria (such as support for different extensions)
    //that you want to use to select the device
    deviceUsed = 0;
    //create the context
    ///context = clCreateContext(0, 1, &devices[deviceUsed], NULL, NULL, &err);
    //context properties will be important later, for now we go with defualts
    cl_context_properties properties[] = 
        { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0};

    context = cl::Context(CL_DEVICE_TYPE_GPU, properties);
    devices = context.getInfo<CL_CONTEXT_DEVICES>();
    printf("number of devices %d\n", devices.size());
    
    //create the command queue we will use to execute OpenCL commands
    ///command_queue = clCreateCommandQueue(context, devices[deviceUsed], 0, &err);
    try{
        queue = cl::CommandQueue(context, devices[deviceUsed], 0, &err);
    }
    catch (cl::Error er) {
        printf("ERROR: %s(%d)\n", er.what(), er.err());
    }

}
Exemplo n.º 18
0
cl_kernel __cl_create_kernel__(cl_program program, const char *kernelname, int line, const char *file){
    cl_int error;
	cl_kernel result = clCreateKernel(program, kernelname, &error);
    if(error){
        handle_error(error, line, file);
		unregister_gpu();\
        pthread_exit((void *)oclErrorString(error));
    }
	return result;
}
void CLWrapper::readAllBuffers()
{
	for each (CLBuffer buf in allBuffers_)
	{
		err_ = queue_.enqueueReadBuffer(buf.bufferValue, CL_TRUE, 0, buf.bufferSize, buf.dataPointer, NULL, NULL);
		if (err_ != CL_SUCCESS)
		{
			Zilch::Console::Write("Reading buffers. -> Status : ");
			Zilch::Console::WriteLine(oclErrorString(err_));
		}
	}
Exemplo n.º 20
0
void CL::runKernel()
{
    printf("in runKernel\n");
    //execute the kernel
    err = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, workGroupSize, NULL, 0, NULL, &event);
    clReleaseEvent(event);
    printf("clEnqueueNDRangeKernel: %s\n", oclErrorString(err));
    clFinish(command_queue);

    //lets check our calculations by reading from the device memory and printing out the results
    float c_done[num];
    err = clEnqueueReadBuffer(command_queue, cl_c, CL_TRUE, 0, sizeof(float) * num, &c_done, 0, NULL, &event);
    printf("clEnqueueReadBuffer: %s\n", oclErrorString(err));
    clReleaseEvent(event);

    for(int i=0; i < num; i++)
    {
        printf("c_done[%d] = %g\n", i, c_done[i]);
    }
}
Exemplo n.º 21
0
void cl_context::init()
{
    // OpenCL
    try
    {
        // Get available platforms
        vector<cl::Platform> platforms;
        cl::Platform::get(&platforms);
        LOG_INFO<<platforms.front().getInfo<CL_PLATFORM_VERSION>();

        // context sharing is OS specific
#if defined (__APPLE__) || defined(MACOSX)
        CGLContextObj curCGLContext = CGLGetCurrentContext();
        CGLShareGroupObj curCGLShareGroup = CGLGetShareGroup(curCGLContext);

        cl_context_properties properties[] =
        {
            CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
            (cl_context_properties)curCGLShareGroup,
            0
        };
#elif defined WIN32
        cl_context_properties properties[] =
        {
            CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
            CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
            CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(),
            0
        };
#else
        cl_context_properties properties[] =
        {
            CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
            CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
            CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(),
            0
        };
#endif

        m_context = cl::Context(CL_DEVICE_TYPE_GPU, properties);

        // Get a list of devices on this platform
        vector<cl::Device> devices = m_context.getInfo<CL_CONTEXT_DEVICES>();
        m_device = devices[0];

        // Create a command queue and use the first device
        m_queue = cl::CommandQueue(m_context, m_device);

    }
    catch(cl::Error &error)
    {
        LOG_ERROR << error.what() << "(" << oclErrorString(error.err()) << ")";
    }
}
Exemplo n.º 22
0
void createRubiCLHybridEnvironment(RubiCLHybridEnvironment* env) {
    if (DEBUG) printf("createRubiCLHybridEnvironment\n");

    cl_int ret;

    cl_platform_id platform = selectDefaultClPlatform();

    env->cpu_device_id = selectDefaultClDeviceOfType(CL_DEVICE_TYPE_CPU, &platform);
    env->gpu_device_id = selectDefaultClDeviceOfType(CL_DEVICE_TYPE_GPU, &platform);

    cl_device_id devices[2] = { env->cpu_device_id, env->gpu_device_id };

    env->context = clCreateContext(
        NULL,           // Properties
        2,              // Number of devices specified
        devices,        // Devices specified
        NULL,           // Error callback Fn
        NULL,           // User data for Fn
        &ret            // Status destination
    );
    if (ret != CL_SUCCESS) printf("clCreateContext %s\n", oclErrorString(ret));

    env->cpu_queue = clCreateCommandQueue(
        env->context,
        env->cpu_device_id,
        0,
        &ret
    );
    if (ret != CL_SUCCESS) printf("clCreateCommandQueue (CPU) %s\n", oclErrorString(ret));


    env->gpu_queue = clCreateCommandQueue(
        env->context,
        env->gpu_device_id,
        0,
        &ret
    );
    if (ret != CL_SUCCESS) printf("clCreateCommandQueue (GPU) %s\n", oclErrorString(ret));

    //FIXME: Set GroupSize or something.
}
Exemplo n.º 23
0
 void Kernel::setArgShared(int arg, int nb_bytes)
 {
     try
     {
         kernel.setArg(arg, nb_bytes, 0);
         cli->queue.finish();
     }
     catch (cl::Error er)
     {
         printf("ERROR: %s(%s)\n", er.what(), oclErrorString(er.err()));
     }
 }
Exemplo n.º 24
0
void CL::popCorn()
{
    printf("in popCorn\n");

    //initialize our kernel from the program
    printf("%s\n",kernel_name);
    kernel = clCreateKernel(program, kernel_name, &err);
    printf("clCreateKernel: %s\n", oclErrorString(err));

    //initialize our CPU memory arrays, send them to the device and set the kernel arguements
    num = 10;
    float *a = new float[num];
    float *b = new float[num];
    for(int i=0; i < num; i++)
    {
        a[i] = 1.0f * i;
        b[i] = 1.0f * i;
    }

    printf("Creating OpenCL arrays\n");
    //our input arrays
    //create our OpenCL buffer for a, copying the data from CPU to the GPU at the same time
    cl_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, sizeof(float) * num, a, &err);
    //cl_b = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, sizeof(float) * num, b, &err);
    //we could do b similar, but you may want to create your buffer and fill it at a different time
    cl_b = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * num, NULL, &err);
    //our output array
    cl_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * num, NULL, &err);

    printf("Pushing data to the GPU\n");
    //push our CPU arrays to the GPU
//    err = clEnqueueWriteBuffer(command_queue, cl_a, CL_TRUE, 0, sizeof(float) * num, a, 0, NULL, &event);
//    clReleaseEvent(event); //we need to release events in order to be completely clean (has to do with openclprof)
//
    //push b's data to the GPU
    err = clEnqueueWriteBuffer(command_queue, cl_b, CL_TRUE, 0, sizeof(float) * num, b, 0, NULL, &event);
    clReleaseEvent(event);


    //set the arguements of our kernel
    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &cl_a);
    err  = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &cl_b);
    err  = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &cl_c);
    //Wait for the command queue to finish these commands before proceeding
    clFinish(command_queue);

    //clean up allocated space.
    delete[] a;
    delete[] b;

    //for now we make the workgroup size the same as the number of elements in our arrays
    workGroupSize[0] = num;
}
Exemplo n.º 25
0
size_t __cl_get_info__(cl_device_id device_id, cl_device_info info, int line, const char *file){
	size_t ret;
    cl_int error = clGetDeviceInfo(device_id, 
								info, sizeof(ret), 
								&ret, NULL);
    if(error){
        handle_error(error, line, file);
		unregister_gpu();\
        pthread_exit((void *)oclErrorString(error));
    }
	return ret;
}
Exemplo n.º 26
0
cl_mem __cl_malloc__(cl_context context, size_t size, int line, const  char *file){
	cl_mem memory_object;
#if DEBUG >= 1
fprintf(stderr, "Allocating %u on a GPU... ", size);
#endif

	if(size<1)
	{
		size=16;
	}

	cl_int error;
	memory_object = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &error);
	if(error){
#if DEBUG >= 1
fprintf(stderr, "fail\n");
#else
		handle_error(error, line, file);
		fprintf(stderr,
			"MTY_CL was unable to allocate enough memory on one of your GPU devices.\n"
			"Possible reasons for this are:\n"
			"\t- Running other programs hogging a lot of GPU memory\n"
			"\t- Limited maximum memory allocation on GPU devices\n"
			"\t- Thread concurrency + batch size too large for the device\n"
			"\n"
			"On windows the most common issue is limited maximum memory allocation.\n"
			"To remove this limit, run the following command on any windows command line:\n"
			"\tsetx GPU_MAX_ALLOC_PERCENT 100\n"
			"\n"
			"Thread concurrency and batch size is not currently user configurable.\n"
			"If removing allocation limit does not work, and you are uses the latest driver\n"
			"or a recommended driver version, please open an issue at\n"
			"\thttps://github.com/madsbuvi/MTY_CL\n"
			"Including the following information:\n"
			"\t-File name and line number of error\n"
			"\t-GPU name & available memory on the GPU\n"
			"\t-OS\n"
			"\t-Driver version\n"
			"\n"
			"Amount of memory that could not be allocated: %u\n",
				(unsigned int)size
		);
#endif
#ifndef IGNORE_MEMORY_FAILURE
		pthread_exit((void *)oclErrorString(error));
#endif
		return 0;
	}
#if DEBUG >= 1
fprintf(stderr, "done\n");
#endif
	return memory_object;
}
Exemplo n.º 27
0
/* Takes the source code for a kernel and the name of the task to build and creates a
 * RubiCLTask Struct containing the components needed to dispatch this task later.
 *
 * @env: Struct containing device/context/queue variables.
 * @kernel_source: String containing the .cl Kernel source.
 * @source_size: The size of the source.
 * @name: The name of the task within the source to build. */
void buildTaskFromSource(
  const RubiCLEnvironment* env,
  const char* kernel_source,
  const char* name,
  RubiCLTask* result
) {
    if (DEBUG) printf("buildTaskFromSource\n");

  /* Create cl_program from given task/name and store inside RubiCLTask struct. */
  cl_int ret;
  result->name = (char *) name;
  result->program = clCreateProgramWithSource(
    env->context,                    // Context
    1,                              // Number of parts that the source is in
    (const char **) &kernel_source, // Array of program source code
    NULL,                           // Total size of source
    &ret                            // Status destination
  );
  if (ret != CL_SUCCESS) printf("clCreateProgramWithSource %s\n", oclErrorString(ret));

  /* Create kernel from cl_program to execute later on target-device */
  ret = clBuildProgram(
    result->program,            // Program to build
    1,                       // Number of devices involved
    &env->device_id,          // List of involved devices
    "-cl-fast-relaxed-math", // Compilation options
    NULL,                    // Build complete callback, building is synchronous if omitted
    NULL                     // Callback user data
  );
  if (ret != CL_SUCCESS) printf("clBuildProgram %s\n", oclErrorString(ret));

    if (ret == CL_BUILD_PROGRAM_FAILURE) displayBuildFailureInfo(result->program, env->device_id);

  result->kernel = clCreateKernel(
    result->program, // Built program
    result->name,    // Entry point to kernel
    &ret          // Status destination
  );
  if (ret != CL_SUCCESS) printf("clCreateKernel %s\n", oclErrorString(ret));
}
Exemplo n.º 28
0
MD::MD() {
  printf("Initialize OpenCL object and context\n");
  // Setup devices and context.
  std::vector<cl::Platform> platforms;
  err = cl::Platform::get(&platforms);
  printf("cl::Platform::get(): %s\n", oclErrorString(err));
  printf("platforms.size(): %lu\n", platforms.size());

  deviceUsed = 0;
  err = platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
  printf("getDevices: %s\n", oclErrorString(err));
  printf("devices.size(): %lu\n", devices.size());
  int t = devices.front().getInfo<CL_DEVICE_TYPE>();
  printf("type: device: %d CL_DEVICE_TYPE_GPU: %d \n", t, CL_DEVICE_TYPE_GPU);

  // This part of the setup may be Linux specific.
  cl_context_properties props[] =
    {
      CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
      CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
      CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(),
      0
    };
  try {
    context = cl::Context(CL_DEVICE_TYPE_GPU, props);
  }
  catch (cl::Error er) {
    printf("ERROR: %s(%s)\n", er.what(), oclErrorString(er.err()));
    exit(EXIT_FAILURE);
  }

  // Create the command queue we will use to execute OpenCL commands.
  try {
    queue = cl::CommandQueue(context, devices[deviceUsed], 0, &err);
  }
  catch (cl::Error er) {
    printf("ERROR: %s(%d)\n", er.what(), er.err());
    exit(EXIT_FAILURE);
  }
}
Exemplo n.º 29
0
/* Returns the number of elements that would be kept after a presence array calculation
 * has been completed.
 *
 * @env: RubiCLEnvironment struct
 * @presence: presence array post filter calculation.
 * @index_scan: result of exclusive prefix sum on presence array. */
int filteredBufferLength(
    const RubiCLEnvironment* env,
    RubiCLMemoryBuffer* presence,
    RubiCLMemoryBuffer* index_scan
) {
    if (DEBUG) printf("filteredBufferLength\n");

  int index_reduce, last_element_presence;
  cl_int ret = clEnqueueReadBuffer(
    env->queue,                                     // Device's command queue
    index_scan->buffer,                             // Buffer to output data from
    CL_FALSE,                                      // Block? Async to hide latency
    (index_scan->buffer_entries - 1) * sizeof(int), // Offset to read from
    sizeof(int),                                   // Size of output data
    &index_reduce,                                 // Output destination
    0,                                             // Number of preceding actions
    NULL,                                          // List of preceding actions
    NULL                                           // Event object destination
  );
  if (ret != CL_SUCCESS) printf("clEnqueueReadBuffer %s\n", oclErrorString(ret));

  ret = clEnqueueReadBuffer(
    env->queue,                                   // Device's command queue
    presence->buffer,                             // Buffer to output data from
    CL_FALSE,                                    // Block? Async to hide latency
    (presence->buffer_entries - 1) * sizeof(int), // Offset to read from
    sizeof(int),                                 // Size of output data
    &last_element_presence,                      // Output destination
    0,                                           // Number of preceding actions
    NULL,                                        // List of preceding actions
    NULL                                         // Event object destination
  );
  if (ret != CL_SUCCESS) printf("clEnqueueReadBuffer %s\n", oclErrorString(ret));

  clFinish(env->queue);

  return index_reduce + last_element_presence;
}
Exemplo n.º 30
0
void __cl_copy_from__(cl_mem from, void *to, size_t size, size_t offset, cl_command_queue command_queue, int line, const char *file)
{
	if(size<1)
	{
		size=16;
	}
	
    cl_int error = clEnqueueReadBuffer(command_queue, from, CL_TRUE, offset, size, to, 0, NULL, NULL);
    if(error){
        handle_error(error, line, file);
		unregister_gpu();\
        pthread_exit((void *)oclErrorString(error));
    }
}