Beispiel #1
0
CLWProgram::CLWProgram(cl_program program)
: ReferenceCounter<cl_program, clRetainProgram, clReleaseProgram>(program)
{
    cl_int status = CL_SUCCESS;
    cl_uint numKernels;
    status = clCreateKernelsInProgram(*this, 0, nullptr, &numKernels);
    ThrowIf(numKernels == 0, CL_BUILD_ERROR, "clCreateKernelsInProgram return 0 kernels");

    ThrowIf(status != CL_SUCCESS, status, "clCreateKernelsInProgram failed");
    
    std::vector<cl_kernel> kernels(numKernels);
    status = clCreateKernelsInProgram(*this, numKernels, &kernels[0], nullptr);
    
    ThrowIf(status != CL_SUCCESS, status, "clCreateKernelsInProgram failed");
    
    std::for_each(kernels.begin(), kernels.end(), [this](cl_kernel k)
                  {
                      size_t size = 0;
                      cl_int res;
                      
                      res = clGetKernelInfo(k, CL_KERNEL_FUNCTION_NAME, 0, nullptr, &size);
                      ThrowIf(res != CL_SUCCESS, res, "clGetKernelInfo failed");
                      
                      std::vector<char> temp(size);
                      res = clGetKernelInfo(k, CL_KERNEL_FUNCTION_NAME, size, &temp[0], nullptr);
                      ThrowIf(res != CL_SUCCESS, res, "clGetKernelInfo failed");
                      
                      std::string funcName(temp.begin(), temp.end()-1);
                      kernels_[funcName] = CLWKernel::Create(k);
                  });
}
int main(int argc, char **argv)
{
  cl_int err;
  const char *krn_src;
  cl_program empty, program;
  cl_context ctx;
  cl_device_id did;
  cl_command_queue queue;
  cl_uint num_krn;
  cl_kernel kernels[2];

  poclu_get_any_device(&ctx, &did, &queue);
  TEST_ASSERT( ctx );
  TEST_ASSERT( did );
  TEST_ASSERT( queue );

  /* Test creating a program from an empty source */
  empty = clCreateProgramWithSource(ctx, 1, &empty_src, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource");
  err = clBuildProgram(empty, 0, NULL, NULL, NULL, NULL);
  CHECK_OPENCL_ERROR_IN("clBuildProgram");

  err = clCreateKernelsInProgram(empty, 0, NULL, &num_krn);
  CHECK_OPENCL_ERROR_IN("clCreateKernelsInProgram");
  TEST_ASSERT(num_krn == 0);

  krn_src = poclu_read_file(SRCDIR "/tests/runtime/test_clCreateKernelsInProgram.cl");
  TEST_ASSERT(krn_src);

  program = clCreateProgramWithSource(ctx, 1, &krn_src, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource");
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  CHECK_OPENCL_ERROR_IN("clBuildProgram");

  err = clCreateKernelsInProgram(program, 0, NULL, &num_krn);
  CHECK_OPENCL_ERROR_IN("clCreateKernelsInProgram");
  // test_clCreateKernelsInProgram.cl has two kernel functions.
  TEST_ASSERT(num_krn == 2);

  err = clCreateKernelsInProgram(program, 2, kernels, NULL);
  CHECK_OPENCL_ERROR_IN("clCreateKernelsInProgram");

  // make sure the kernels were actually created 
  // Note: nothing in the specification says which kernel function
  // is kernels[0], which is kernels[1]. For now assume pocl/LLVM
  // orders these deterministacally
  err = clEnqueueTask(queue, kernels[0], 0, NULL, NULL); 
  CHECK_OPENCL_ERROR_IN("clEnqueueTask");

  err = clFinish(queue);
  CHECK_OPENCL_ERROR_IN("clFinish");

  err = clEnqueueTask(queue, kernels[1], 0, NULL, NULL);
  CHECK_OPENCL_ERROR_IN("clEnqueueTask");

  err = clFinish(queue);
  CHECK_OPENCL_ERROR_IN("clFinish");

  return EXIT_SUCCESS;
}
Beispiel #3
0
PassRefPtr<WebCLKernelList> 
WebCLProgram::createKernelsInProgram(ExceptionState& es) {
    cl_int err = 0;
    cl_kernel* kernelBuf = NULL;
    cl_uint num = 0;
    
    if (m_cl_program == NULL) {
        printf("Error: Invalid program object\n");
        es.throwWebCLException(
                WebCLException::INVALID_PROGRAM,
                WebCLException::invalidProgramMessage);
        return nullptr;
    }
    err = clCreateKernelsInProgram (m_cl_program, 0, NULL, &num);
    
    if (err != CL_SUCCESS) {
        printf("Error: clCreateKernelsInProgram \n");
        WebCLException::throwException(err, es);
        return nullptr;
    }
    
    if(num == 0) {
        printf("Warning: createKernelsInProgram - Number of Kernels is 0 \n");
        es.throwWebCLException(
                WebCLException::FAILURE,
                WebCLException::failureMessage);
        return nullptr;
    }
    kernelBuf = (cl_kernel*)malloc (sizeof(cl_kernel) * num);
    
    if (!kernelBuf) {
        return nullptr;
    }

    err = clCreateKernelsInProgram (m_cl_program, num, kernelBuf, NULL);
    
    if (err != CL_SUCCESS) {
        WebCLException::throwException(err, es);
    } else {
        RefPtr<WebCLKernelList> o = WebCLKernelList::create(kernelBuf, num, 
                                                            m_cl_context.get(), 
                                                            this);
        printf("WebCLKernelList Size = %d \n\n\n\n", num);
        m_num_kernels = num;
        return o;
    }
    return nullptr;
}
Beispiel #4
0
cl_int WINAPI wine_clCreateKernelsInProgram(cl_program program, cl_uint num_kernels,
                                            cl_kernel * kernels, cl_uint * num_kernels_ret)
{
    cl_int ret;
    TRACE("\n");
    ret = clCreateKernelsInProgram(program, num_kernels, kernels, num_kernels_ret);
    return ret;
}
int main(int argc, char **argv)
{
  cl_int err;
  const char *krn_src;
  cl_program program;
  cl_context ctx;
  cl_device_id did;
  cl_command_queue queue;
  cl_uint num_krn;
  cl_kernel kernels[2];

  poclu_get_any_device(&ctx, &did, &queue);
  assert( ctx );
  assert( did );
  assert( queue );

  krn_src = poclu_read_file(SRCDIR "/tests/runtime/test_clCreateKernelsInProgram.cl");
  assert(krn_src);

  program = clCreateProgramWithSource(ctx, 1, &krn_src, NULL, NULL);
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  assert(err == CL_SUCCESS);
  
  err = clCreateKernelsInProgram(program, 0, NULL, &num_krn);
  assert(err == CL_SUCCESS);
  // test_clCreateKernelsInProgram.cl has two kernel functions.
  assert(num_krn == 2);

  err = clCreateKernelsInProgram(program, 2, kernels, NULL);
  assert(err == CL_SUCCESS);
  
  // make sure the kernels were actually created 
  // Note: nothing in the specification says which kernel function
  // is kernels[0], which is kernels[1]. For now assume pocl/LLVM
  // orders these deterministacally
  err = clEnqueueTask(queue, kernels[0], 0, NULL, NULL); 
  assert(err == CL_SUCCESS);
  err = clEnqueueTask(queue, kernels[1], 0, NULL, NULL);
  assert(err == CL_SUCCESS);
  
  clFinish(queue);

}
Beispiel #6
0
 vector<Kernel> Program::createKernels() const
 {
     vector<Kernel> vec;
     cl_uint size;
     cl_int error;
     if((error = clCreateKernelsInProgram(_id, 0,
             nullptr, &size)) != CL_SUCCESS)
     {
         detail::reportError("Program::createKernels(): ", error);
         return vec;
     }
     vector<cl_kernel> buf(size);
     if(clCreateKernelsInProgram(_id, size, 
             buf.data(), nullptr) != CL_SUCCESS)
     {
         detail::reportError("Program::createKernels(): ", error);
         return vec;
     }
     for(cl_uint i = 0; i < size; ++i)
         vec.push_back(Kernel(_ctx, buf[i]));
     return vec;
 }
cl_kernel
createKernel(
	const char* source,
	cl_context context,
	const char* options,
	cl_int* error)
{

	cl_int err;
	cl_device_id device;
	cl_program program;
	cl_kernel kernel;
	size_t logSize;
	char *log;

	err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device), &device, NULL);
	if (err != CL_SUCCESS) {
		if (error != NULL) {
			*error = err;
		}
		return NULL;
	}

	program = clCreateProgramWithSource(context, 1, &source, NULL, error);
	if (program == NULL) {
		return NULL;
	}

	err = clBuildProgram(program, 1, &device, options, NULL, NULL);
	if (err != CL_SUCCESS) {
		logSize = 0;
		clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
		log = (char*)calloc(1, logSize + 1);
		clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
		printf("=== Build log ===\n%s\n", log);
		free(log);
		clReleaseProgram(program);
		if (error != NULL) {
			*error = err;
		}
		return NULL;
	}

	kernel = NULL;
	err = clCreateKernelsInProgram(program, 1, &kernel, NULL);
	clReleaseProgram(program);
	if (error != NULL) {
		*error = err;
	}
	return kernel;
}
Beispiel #8
0
cl_int pl_load_code(PLContext *pl_ctx, PLCode *pl_code) {
	cl_program program;
	cl_int error;
	
	cl_int binary_status;
	
	program = clCreateProgramWithBinary(pl_ctx->ctx, 1, 
										(const cl_device_id *)&pl_ctx->device_id, 
										(const size_t *)&pl_code->len, 
										(const u_char **)&pl_code->binary, 
										&binary_status, &error);
	if (error != CL_SUCCESS) {
		return error;
	}
	
	error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	
	if (error != CL_SUCCESS) {
		clReleaseProgram(program);
		return error;
	}
	
	cl_uint kernel_count_ret;
	
	cl_kernel *kernels;
	if ((kernels = malloc(sizeof(cl_kernel) * pl_code->kernel_count)) == NULL) {
		clReleaseProgram(program);
		return CL_OUT_OF_HOST_MEMORY;
	}
	
	error = clCreateKernelsInProgram(program, pl_code->kernel_count, kernels, &kernel_count_ret);
	
	clReleaseProgram(program);
	
	if (error != CL_SUCCESS) {
		free(kernels);
		return error;
	}
	
	pl_ctx->kernel_count = kernel_count_ret;
	pl_ctx->kernels = kernels;
	
	return CL_SUCCESS;
}
///
//	main() for HelloWorld example
//
int main(int argc, char** argv)
{
    cl_context context = 0;
    cl_command_queue commandQueue = 0;
    cl_program program = 0;
    cl_device_id device = 0;
    cl_kernel kernels[2] = { 0, 0 };
    cl_mem memObjects[3] = { 0, 0, 0 };
    cl_int errNum;

    // Create an OpenCL context on first available platform
    context = CreateContext();
    if (context == NULL)
    {
        std::cerr << "Failed to create OpenCL context." << std::endl;
        return 1;
    }

    // Create a command-queue on the first device available
    // on the created context
    commandQueue = CreateCommandQueue(context, &device);
    if (commandQueue == NULL)
    {
        Cleanup(context, commandQueue, program, kernels, memObjects);
        return 1;
    }

    // Create OpenCL program from HelloWorld.cl kernel source
    program = CreateProgram(context, device, "simple.cl");
    if (program == NULL)
    {
        Cleanup(context, commandQueue, program, kernels, memObjects);
        return 1;
    }

    // Create OpenCL kernel
    //clCreateKernel(program, "hello_kernel", NULL);

    cl_uint numberOfKernels = 0;
    errNum = clCreateKernelsInProgram(program,
                                      0,
                                      NULL,
                                      &numberOfKernels
                                     );
    if (errNum != CL_SUCCESS)
    {
        std::cerr << "Failed to get number of kernels" << std::endl;
        Cleanup(context, commandQueue, program, kernels, memObjects);
        return 1;
    }
    else
    {
        std::cout << "numberOfKernels is:" << numberOfKernels << std::endl;
    }

    assert(numberOfKernels == 2 && "number of kernels was not as expected");
    errNum = clCreateKernelsInProgram(program,
                                      2,
                                      kernels,
                                      NULL
                                     );

    if (errNum != CL_SUCCESS)
    {
        std::cerr << "Failed to retrieve kernels" << std::endl;
        Cleanup(context, commandQueue, program, kernels, memObjects);
        return 1;
    }


    // Create memory objects that will be used as arguments to
    // kernels.  First create host memory arrays that will be
    // used to store the arguments to the kernel
    float result[ARRAY_SIZE];
    float a[ARRAY_SIZE];
    float b[ARRAY_SIZE];
    for (int i = 0; i < ARRAY_SIZE; i++)
    {
        a[i] = (float)i;
        b[i] = (float)(i * 2);
    }

    if (!CreateMemObjects(context, memObjects, a, b))
    {
        Cleanup(context, commandQueue, program, kernels, memObjects);
        return 1;
    }

    for (int i = 0; i < numberOfKernels; ++i)
    {
        // Set the kernel arguments (result, a, b)
        errNum = clSetKernelArg(kernels[i], 0, sizeof(cl_mem), &memObjects[0]);
        errNum |= clSetKernelArg(kernels[i], 1, sizeof(cl_mem), &memObjects[1]);
        errNum |= clSetKernelArg(kernels[i], 2, sizeof(cl_mem), &memObjects[2]);
        if (errNum != CL_SUCCESS)
        {
            std::cerr << "Error setting kernels[" << i << "] arguments." << std::endl;
            Cleanup(context, commandQueue, program, kernels, memObjects);
            return 1;
        }
    }

    size_t globalWorkSize[1] = { ARRAY_SIZE };
    size_t localWorkSize[1] = { 1 };


    cl_event waitFor = NULL;
    for (int i = 0; i < numberOfKernels; ++i)
    {
        cl_uint numToWaitFor = 0;
        cl_event waitList[1] = { 0 };
        cl_event* waitListP = 0;

        if (waitFor != NULL)
        {
            numToWaitFor = 1;
            waitList[0] = waitFor;
            waitListP = waitList;
        }

        // Queue the kernel up for execution across the array
        errNum = clEnqueueNDRangeKernel(commandQueue, kernels[i], 1, NULL,
                                        globalWorkSize, localWorkSize,
                                        numToWaitFor, waitListP, &waitFor);
        if (errNum != CL_SUCCESS)
        {
            std::cerr << "Error queuing kernel for execution." << std::endl;
            Cleanup(context, commandQueue, program, kernels, memObjects);
            return 1;
        }
    }

    // Read the output buffer back to the Host
    errNum = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE,
                                 0, ARRAY_SIZE * sizeof(float), result,
                                 0, NULL, NULL);
    if (errNum != CL_SUCCESS)
    {
        std::cerr << "Error reading result buffer." << std::endl;
        Cleanup(context, commandQueue, program, kernels, memObjects);
        return 1;
    }

    // Output the result buffer
    for (int i = 0; i < ARRAY_SIZE; i++)
    {
        std::cout << result[i] << " ";
    }
    std::cout << std::endl;
    std::cout << "Executed program succesfully." << std::endl;
    Cleanup(context, commandQueue, program, kernels, memObjects);

    return 0;
}
Vector<RefPtr<WebCLKernel>> WebCLProgram::createKernelsInProgram(ExceptionState& es)
{
    if (isReleased()) {
        es.throwWebCLException(WebCLException::INVALID_PROGRAM, WebCLException::invalidProgramMessage);
        return Vector<RefPtr<WebCLKernel>>();
    }

    if (!m_isProgramBuilt) {
        es.throwWebCLException(WebCLException::INVALID_PROGRAM_EXECUTABLE, WebCLException::invalidProgramExecutableMessage);
        return Vector<RefPtr<WebCLKernel>>();
    }

    cl_uint num = 0;
    cl_int err = clCreateKernelsInProgram(m_clProgram, 0, nullptr, &num);
    if (err != CL_SUCCESS) {
        WebCLException::throwException(err, es);
        return Vector<RefPtr<WebCLKernel>>();
    }

    if (num == 0) {
        es.throwWebCLException(WebCLException::FAILURE, WebCLException::failureMessage);
        return Vector<RefPtr<WebCLKernel>>();
    }

    cl_kernel* kernelBuf = (cl_kernel*)malloc (sizeof(cl_kernel) * num);
    if (!kernelBuf) {
        return Vector<RefPtr<WebCLKernel>>();
    }

    err = clCreateKernelsInProgram(m_clProgram, num, kernelBuf, nullptr);

    if (err != CL_SUCCESS) {
        WebCLException::throwException(err, es);
        return Vector<RefPtr<WebCLKernel>>();
    }

    Vector<char> kernelName;
    size_t bytesOfKernelName = 0;
    Vector<RefPtr<WebCLKernel>> m_kernelList;
    for (size_t i = 0 ; i < num; i++) {
        err = clGetKernelInfo(kernelBuf[i], CL_KERNEL_FUNCTION_NAME, 0, nullptr, &bytesOfKernelName);
        if (err != CL_SUCCESS) {
            continue;
        }

        kernelName.reserveCapacity(bytesOfKernelName);
        kernelName.resize(bytesOfKernelName);

        err = clGetKernelInfo(kernelBuf[i], CL_KERNEL_FUNCTION_NAME, bytesOfKernelName, kernelName.data(), 0);

        if (err != CL_SUCCESS) {
            continue;
        }

        RefPtr<WebCLKernel> kernel = WebCLKernel::create(kernelBuf[i], context(), this, static_cast<const char*>(kernelName.data()));

        if (kernel)
            m_kernelList.append(kernel);
        kernelName.clear();
        bytesOfKernelName = 0;
    }

    return m_kernelList;
}
Beispiel #11
0
cl_int GLCLDraw::BuildFromSource(cl_program *program, const char *p)
{
    cl_int ret;
    size_t codeSize;
    char *logBuf;
    char compile_options[2048];
    cl_bool endian_little;
    compile_options[0] = '\0';
   
    codeSize = strlen(p);
    *program = clCreateProgramWithSource(context, 1, (const char **)&p,
                                        (const size_t *)&codeSize, &ret);
    XM7_DebugLog(XM7_LOG_INFO, "CL: Build Result=%d", ret);
    if(ret < CL_SUCCESS) {
      return ret;
    }
    // Compile from source
    //strncat(compile_options, "-cl-fast-relaxed-math ", sizeof(compile_options) - 1);
    if(clGetDeviceInfo(device_id[using_device], CL_DEVICE_ENDIAN_LITTLE,
		       sizeof(cl_bool), &endian_little, NULL) == CL_SUCCESS){
      if(endian_little == CL_TRUE) {
	strncat(compile_options, "-D_CL_KERNEL_LITTLE_ENDIAN=1 ", sizeof(compile_options) - 1);
      } else { // BIG
	strncat(compile_options, "-D_CL_KERNEL_LITTLE_ENDIAN=0 ", sizeof(compile_options) - 1); // Big endian
      }
    } else {
      strncat(compile_options, "-D_CL_KERNEL_LITTLE_ENDIAN=1 ", sizeof(compile_options) - 1); // Assume little endian
    }
//    build_callback = CL_LogProgramExecute;
//    ret = clBuildProgram(*program, 1, &device_id[using_device], compile_options,
//			 build_callback, (void *)this);
    ret = clBuildProgram(*program, 1, &device_id[using_device], compile_options,
			 NULL, NULL);
    XM7_DebugLog(XM7_LOG_INFO, "Compile Result=%d", ret);
    CL_LogProgramExecute(*program, (void *)this);
    if(ret != CL_SUCCESS) {  // Printout error log.
      //      clReleaseProgram(program);
      return ret;
    }
    ret = clCreateKernelsInProgram(*program, 1,
				   kernels_array, &nkernels);

    if(ret < CL_SUCCESS) {
      XM7_DebugLog(XM7_LOG_INFO, "Unable to build CL kernel. Status=%d", ret);
    } else {
      char funcname[128];
      int i = 0;
      size_t size;
      XM7_DebugLog(XM7_LOG_INFO, "Built %d CL kernel(s).", nkernels);
#if 1
      for(i = 0; i < nkernels; i++) {
	funcname[0] = '\0';
	if(clGetKernelInfo(kernels_array[i], CL_KERNEL_FUNCTION_NAME,
			   sizeof(funcname) / sizeof(char) - 1, 
			   funcname, size) == CL_SUCCESS){
	  XM7_DebugLog(XM7_LOG_INFO, "Kernel name:%s.", funcname);
	  if((strncmp(funcname, "getvram8", strlen("getvram8")) == 0)) kernel_8colors = kernels_array[i];
	  if((strncmp(funcname, "getvram4096", strlen("getvram4096")) == 0)) kernel_4096colors = kernels_array[i];
	  if((strncmp(funcname, "getvram256k", strlen("getvram256k")) == 0)) kernel_256kcolors = kernels_array[i];
	  if((strncmp(funcname, "CreateTable", strlen("CreateTable")) == 0)) kernel_table = kernels_array[i];
	  if((strncmp(funcname, "CopyVram", strlen("CopyVram")) == 0)) kernel_copyvram = kernels_array[i];
	}
      }
#endif
    }
   return ret;
}
Beispiel #12
0
int main(int argc, char** argv) {

   /* OpenCL 1.1 data structures */
   cl_platform_id* platforms;
   cl_program program;
   cl_device_id device;
   cl_context context;

   /* OpenCL 1.1 scalar data types */
   cl_uint numOfPlatforms;
   cl_int  error;

   /* 
      Get the number of platforms 
      Remember that for each vendor's SDK installed on the computer,
      the number of available platform also increased. 
    */
   error = clGetPlatformIDs(0, NULL, &numOfPlatforms);
   if(error != CL_SUCCESS) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }

   platforms = (cl_platform_id*) alloca(sizeof(cl_platform_id) * numOfPlatforms);
   printf("Number of OpenCL platforms found: %d\n", numOfPlatforms);

   error = clGetPlatformIDs(numOfPlatforms, platforms, NULL);
   if(error != CL_SUCCESS) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }
   // Search for a CPU/GPU device through the installed platforms
   // Build a OpenCL program and do not run it.
   for(cl_uint i = 0; i < numOfPlatforms; i++ ) {
       // Get the GPU device
       error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 1, &device, NULL);
       if(error != CL_SUCCESS) {
          // Otherwise, get the CPU
          error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU, 1, &device, NULL);
       }
        if(error != CL_SUCCESS) {
            perror("Can't locate any OpenCL compliant device");
            exit(1);
        }
        /* Create a context */
        context = clCreateContext(NULL, 1, &device, NULL, NULL, &error);
        if(error != CL_SUCCESS) {
            perror("Can't create a valid OpenCL context");
            exit(1);
        }

        /* Load the two source files into temporary datastores */
        const char *file_names[] = {"simple.cl", "simple_2.cl"};
        const int NUMBER_OF_FILES = 2;
        char* buffer[NUMBER_OF_FILES];
        size_t sizes[NUMBER_OF_FILES];
        loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes);

        /* Create the OpenCL program object */
        program = clCreateProgramWithSource(context, NUMBER_OF_FILES, (const char**)buffer, sizes, &error);				
	    if(error != CL_SUCCESS) {
	      perror("Can't create the OpenCL program object");
	      exit(1);   
	    }
        /* Build OpenCL program object and dump the error message, if any */
        char *program_log;
        const char options[] = "-cl-finite-math-only -cl-no-signed-zeros";  
        size_t log_size;
        //error = clBuildProgram(program, 1, &device, argv[1], NULL, NULL);		
        // Uncomment the line below, comment the line above; re-build the program to use build options statically
        error = clBuildProgram(program, 1, &device, options, NULL, NULL);		
	    if(error != CL_SUCCESS) {
	      // If there's an error whilst building the program, dump the log
	      clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
	      program_log = (char*) malloc(log_size+1);
	      program_log[log_size] = '\0';
	      clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
	            log_size+1, program_log, NULL);
	      printf("\n=== ERROR ===\n\n%s\n=============\n", program_log);
	      free(program_log);
	      exit(1);
	    }
  
        /* Query the program as to how many kernels were detected */
        cl_uint numOfKernels;
        error = clCreateKernelsInProgram(program, 0, NULL, &numOfKernels);
        if (error != CL_SUCCESS) {
            perror("Unable to retrieve kernel count from program");
            exit(1);
        }
        cl_kernel* kernels = (cl_kernel*) alloca(sizeof(cl_kernel) * numOfKernels);
        error = clCreateKernelsInProgram(program, numOfKernels, kernels, NULL);
        for(cl_uint i = 0; i < numOfKernels; i++) {
            char kernelName[32];
            cl_uint argCnt;
            clGetKernelInfo(kernels[i], CL_KERNEL_FUNCTION_NAME, sizeof(kernelName), kernelName, NULL);
            clGetKernelInfo(kernels[i], CL_KERNEL_NUM_ARGS, sizeof(argCnt), &argCnt, NULL);
            printf("Kernel name: %s with arity: %d\n", kernelName, argCnt);
        }

        /* Clean up */
        for(cl_uint i = 0; i < numOfKernels; i++) { clReleaseKernel(kernels[i]); }
        for(i=0; i< NUMBER_OF_FILES; i++) { free(buffer[i]); }
        clReleaseProgram(program);
        clReleaseContext(context);
   }
}
Beispiel #13
0
Kernel* OpenCL::createKernel( std::string strKernelSource )
{
	int iErr = 0;

	size_t sKernelLength = strKernelSource.length();
	const char* program_buffer = strKernelSource.c_str();
	cl_program program = clCreateProgramWithSource( m_context, 1, (const char**)&program_buffer, &sKernelLength, &iErr );
	if( iErr != CL_SUCCESS )
	{
		Log::getLog( "GPUAbstractionLayer" ) << Log::EL_ERROR << "Unable to create the program from the given source: " << strKernelSource.substr( 0, strKernelSource.find( '\n' ) ) << Log::endl;
		return NULL;
	}

	/* Build program */
	std::string strBuildParams;

	// if this is a debug build and we are on a CPU use the debug option!
#ifdef DEBUG
	cl_device_type devType;
	clGetDeviceInfo( m_device, CL_DEVICE_TYPE, sizeof( cl_device_type ), &devType, NULL );
	if( devType == CL_DEVICE_TYPE_CPU )
		strBuildParams += " -g";
#else
	strBuildParams += " -cl-unsafe-math-optimizations -cl-mad-enable -cl-no-signed-zeros";
#endif

#ifdef MAC
	strBuildParams += " -DMAC";
#endif

	iErr = clBuildProgram( program, 0, NULL, strBuildParams.c_str(), NULL, NULL );
	if( iErr != CL_SUCCESS )
	{
		char* program_log;
		size_t log_size;
		/* Find size of log and print to std output */
		clGetProgramBuildInfo( program, m_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size );
		program_log = (char*)malloc( log_size + 1 );
		program_log[ log_size ] = '\0';
		clGetProgramBuildInfo( program, m_device, CL_PROGRAM_BUILD_LOG, log_size + 1, program_log, NULL );
		Log::getLog( "GPUAbstractionLayer" ) << Log::EL_FATAL_ERROR << "Error compiling the source:\n" << program_log << "\n" << Log::endl;
		free( program_log );

		return NULL;
	}
#ifdef DEBUG
	else
	{
		char* program_log;
		size_t log_size;
		/* Find size of log and print to std output */
		clGetProgramBuildInfo( program, m_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size );
		program_log = (char*)malloc( log_size + 1 );
		program_log[ log_size ] = '\0';
		clGetProgramBuildInfo( program, m_device, CL_PROGRAM_BUILD_LOG, log_size + 1, program_log, NULL );
		Log::getLog( "GPUAbstractionLayer" ) << Log::EL_INFO << "Build log:\n" << program_log << "\n" << Log::endl;
		free( program_log );
	}
#endif

	cl_kernel kernel;
	iErr = clCreateKernelsInProgram( program, 1, &kernel, NULL );
	if( iErr != CL_SUCCESS )
	{
		Log::getLog( "GPUAbstractionLayer" ) << Log::EL_ERROR << "Unable to create a kernel from the given source code" << Log::endl;
		return NULL;
	}

	size_t wgSize;
	iErr = clGetKernelWorkGroupInfo( kernel, m_device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( size_t ), &wgSize, NULL );
	if( iErr != CL_SUCCESS )
	{
		Log::getLog( "GPUAbstractionLayer" ) << Log::EL_ERROR << "Failed to get kernel work group size (" << errorNumberToString( iErr ) << ")" << Log::endl;
		return NULL;
	}

	return new Kernel( m_commandQueue, kernel, std::min( (unsigned int)wgSize, m_uiMaxWorkGroupSize ) );
}
int main(int argc, char** argv) {

   /* OpenCL 1.1 data structures */
   cl_platform_id* platforms;
   cl_program program;
   cl_device_id device;
   cl_context context;

   /* OpenCL 1.1 scalar data types */
   cl_uint numOfPlatforms;
   cl_int  error;

   /*
    Prepare an array of UserData via dynamic memory allocation
   */
   UserData* ud_in = (UserData*) malloc( sizeof(UserData) * DATA_SIZE); // input to device
   UserData* ud_out = (UserData*) malloc( sizeof(UserData) * DATA_SIZE); // output from device
   for( int i = 0; i < DATA_SIZE; ++i) {
      (ud_in + i)->x = i;
      (ud_in + i)->y = i;
      (ud_in + i)->z = i;
      (ud_in + i)->w = 3 * i;
   }
   /* 
      Get the number of platforms 
      Remember that for each vendor's SDK installed on the computer,
      the number of available platform also increased. 
    */
   error = clGetPlatformIDs(0, NULL, &numOfPlatforms);
   if(error != CL_SUCCESS ) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }

   platforms = (cl_platform_id*) alloca(sizeof(cl_platform_id) * numOfPlatforms);
   printf("Number of OpenCL platforms found: %d\n", numOfPlatforms);

   error = clGetPlatformIDs(numOfPlatforms, platforms, NULL);
   if(error != CL_SUCCESS ) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }
   // Search for a CPU/GPU device through the installed platforms
   // Build a OpenCL program and do not run it.
   for(cl_uint i = 0; i < numOfPlatforms; i++ ) {
       // Get the GPU device
       error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 1, &device, NULL);
       if(error != CL_SUCCESS) {
          // Otherwise, get the CPU
          error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU, 1, &device, NULL);
       }
        if(error != CL_SUCCESS) {
            perror("Can't locate any OpenCL compliant device");
            exit(1);
        }
        /* Create a context */
        context = clCreateContext(NULL, 1, &device, NULL, NULL, &error);
        if(error != CL_SUCCESS) {
            perror("Can't create a valid OpenCL context");
            exit(1);
        }

        /* Load the two source files into temporary datastores */
        const char *file_names[] = {"user_test.cl"}; 
        const int NUMBER_OF_FILES = 1;
        char* buffer[NUMBER_OF_FILES];
        size_t sizes[NUMBER_OF_FILES];
        loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes);

        /* Create the OpenCL program object */
        program = clCreateProgramWithSource(context, NUMBER_OF_FILES, (const char**)buffer, sizes, &error);				
	    if(error != CL_SUCCESS) {
	      perror("Can't create the OpenCL program object");
	      exit(1);   
	    }
        /* Build OpenCL program object and dump the error message, if any */
        char *program_log;
        size_t log_size;
        error = clBuildProgram(program, 1, &device, NULL, NULL, NULL);		
	    if(error != CL_SUCCESS) {
	      // If there's an error whilst building the program, dump the log
	      clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
	      program_log = (char*) malloc(log_size+1);
	      program_log[log_size] = '\0';
	      clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
	            log_size+1, program_log, NULL);
	      printf("\n=== ERROR ===\n\n%s\n=============\n", program_log);
	      free(program_log);
	      exit(1);
	    }
  
        /* Query the program as to how many kernels were detected */
        cl_uint numOfKernels;
        error = clCreateKernelsInProgram(program, 0, NULL, &numOfKernels);
        if (error != CL_SUCCESS) {
            perror("Unable to retrieve kernel count from program");
            exit(1);
        }
        cl_kernel* kernels = (cl_kernel*) alloca(sizeof(cl_kernel) * numOfKernels);
        error = clCreateKernelsInProgram(program, numOfKernels, kernels, NULL);
        for(cl_uint i = 0; i < numOfKernels; i++) {
            char kernelName[32];
            cl_uint argCnt;
            clGetKernelInfo(kernels[i], CL_KERNEL_FUNCTION_NAME, sizeof(kernelName), kernelName, NULL);
            clGetKernelInfo(kernels[i], CL_KERNEL_NUM_ARGS, sizeof(argCnt), &argCnt, NULL);
            printf("Kernel name: %s with arity: %d\n", kernelName, argCnt);
            printf("About to create command queue and enqueue this kernel...\n");

            /* Create a command queue */
            cl_command_queue cQ = clCreateCommandQueue(context, device, 0, &error);
            if (error != CL_SUCCESS) { 
                perror("Unable to create command-queue");
                exit(1);
            }

            /* Create a OpenCL buffer object */
            cl_mem UDObj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 
                                           sizeof(UserData) * DATA_SIZE, ud_in, &error);
            if (error != CL_SUCCESS) { 
                perror("Unable to create buffer object");
                exit(1);
            }

            /* Let OpenCL know that the kernel is suppose to receive an argument */
            error = clSetKernelArg(kernels[i], 0, sizeof(cl_mem), &UDObj);
            if (error != CL_SUCCESS) { 
                perror("Unable to set buffer object as kernel argument");
                exit(1);
            }

            /* Enqueue the kernel to the command queue */
            error = clEnqueueTask(cQ, kernels[i], 0, NULL, NULL);
            if (error != CL_SUCCESS) { 
                perror("Unable to enqueue task to command-queue");
                exit(1);
            }
            printf("Task has been enqueued successfully!\n");

            /* Enqueue the read-back from device to host */
            error = clEnqueueReadBuffer(cQ, UDObj,
                                         CL_TRUE,                    // blocking read
                                         0,                          // write from the start
                                         sizeof(UserData) * DATA_SIZE, // how much to copy
                                         ud_out, 0, NULL, NULL);
            if ( valuesOK(ud_in, ud_out) ) {
                printf("Check passed!\n");
            } else printf("Check failed!\n");

            /* Release the command queue */
            clReleaseCommandQueue(cQ);
            clReleaseMemObject(UDObj);
        }

        /* Clean up */
        
        for(cl_uint i = 0; i < numOfKernels; i++) { clReleaseKernel(kernels[i]); }
        for(i=0; i< NUMBER_OF_FILES; i++) { free(buffer[i]); }
        clReleaseProgram(program);
        clReleaseContext(context);
   }

   free(ud_in);
   free(ud_out);
}
Beispiel #15
0
int main(int argc, char** argv) {

   /* OpenCL 1.1 data structures */
   cl_platform_id* platforms;
   cl_program program;
   cl_context context;

   /* OpenCL 1.1 scalar data types */
   cl_uint numOfPlatforms;
   cl_int  error;

   cl_float16* ud_in = (cl_float16*) malloc( sizeof(cl_float16) * DATA_SIZE); // input to device
   cl_float16* ud_out = (cl_float16*) malloc( sizeof(cl_float16) * DATA_SIZE); // output from device
   for( int i = 0; i < DATA_SIZE; ++i) {
       ud_in[i] = (cl_float16){ (float)i,(float)i,(float)i,(float)i, (float)i,(float)i,(float)i,(float)i, (float)i,(float)i,(float)i,(float)i, (float)i,(float)i,(float)i,(float)i };
   }

   /* 
      Get the number of platforms 
      Remember that for each vendor's SDK installed on the computer,
      the number of available platform also increased. 
    */
   error = clGetPlatformIDs(0, NULL, &numOfPlatforms);
   if(error != CL_SUCCESS ) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }

   platforms = (cl_platform_id*) alloca(sizeof(cl_platform_id) * numOfPlatforms);
   printf("Number of OpenCL platforms found: %d\n", numOfPlatforms);

   error = clGetPlatformIDs(numOfPlatforms, platforms, NULL);
   if(error != CL_SUCCESS ) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }
   // Search for a CPU/GPU device through the installed platforms
   // Build a OpenCL program and do not run it.
   for(cl_uint i = 0; i < numOfPlatforms; i++ ) {

        cl_uint numOfDevices = 0;

        /* Determine how many devices are connected to your platform */
        error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &numOfDevices);
        if (error != CL_SUCCESS ) { 
            perror("Unable to obtain any OpenCL compliant device info");
            exit(1);
        }
        cl_device_id* devices = (cl_device_id*) alloca(sizeof(cl_device_id) * numOfDevices);

        /* Load the information about your devices into the variable 'devices' */
        error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numOfDevices, devices, NULL);
        if (error != CL_SUCCESS ) { 
            perror("Unable to obtain any OpenCL compliant device info");
            exit(1);
        }
        printf("Number of detected OpenCL devices: %d\n", numOfDevices);

	    /* Create a context */
        cl_context_properties ctx[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i], 0 };
	    context = clCreateContext(ctx, numOfDevices, devices, NULL, NULL, &error);
	    if(error != CL_SUCCESS) {
	        perror("Can't create a valid OpenCL context");
	        exit(1);
	    }

	    /* For each device, create a buffer and partition that data among the devices for compute! */
	    cl_mem inobj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
	                                  sizeof(cl_float16) * DATA_SIZE, ud_in, &error);
	    if(error != CL_SUCCESS) {
	        perror("Can't create a buffer");
	        exit(1);
	    }

        int offset = 0; 
        for(int i = 0; i < numOfDevices; ++i, ++offset ) {
	        /* Load the two source files into temporary datastores */
	        const char *file_names[] = {"vector_load.cl"}; 
	        const int NUMBER_OF_FILES = 1;
	        char* buffer[NUMBER_OF_FILES];
	        size_t sizes[NUMBER_OF_FILES];
	        loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes);
	
	        /* Create the OpenCL program object */
	        program = clCreateProgramWithSource(context, NUMBER_OF_FILES, (const char**)buffer, sizes, &error);				
		    if(error != CL_SUCCESS) {
		      perror("Can't create the OpenCL program object");
		      exit(1);   
		    }

	        /* Build OpenCL program object and dump the error message, if any */
	        char *program_log;
	        size_t log_size;
	        error = clBuildProgram(program, 1, &devices[i], NULL, NULL, NULL);		
		    if(error != CL_SUCCESS) {
		      // If there's an error whilst building the program, dump the log
		      clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
		      program_log = (char*) malloc(log_size+1);
		      program_log[log_size] = '\0';
		      clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, 
		            log_size+1, program_log, NULL);
		      printf("\n=== ERROR ===\n\n%s\n=============\n", program_log);
		      free(program_log);
		      exit(1);
		    }
	  
	        /* Query the program as to how many kernels were detected */
	        cl_uint numOfKernels;
	        error = clCreateKernelsInProgram(program, 0, NULL, &numOfKernels);
	        if (error != CL_SUCCESS) {
	            perror("Unable to retrieve kernel count from program");
	            exit(1);
	        }
	        cl_kernel* kernels = (cl_kernel*) alloca(sizeof(cl_kernel) * numOfKernels);
	        error = clCreateKernelsInProgram(program, numOfKernels, kernels, NULL);

            /* Loop thru each kernel and execute on device */
	        for(cl_uint j = 0; j < numOfKernels; j++) {
	            char kernelName[32];
	            cl_uint argCnt;
	            clGetKernelInfo(kernels[j], CL_KERNEL_FUNCTION_NAME, sizeof(kernelName), kernelName, NULL);
	            clGetKernelInfo(kernels[j], CL_KERNEL_NUM_ARGS, sizeof(argCnt), &argCnt, NULL);
	            printf("Kernel name: %s with arity: %d\n", kernelName, argCnt);
	            printf("About to create command queue and enqueue this kernel...\n");
	
	            /* Create a command queue */
	            cl_command_queue cQ = clCreateCommandQueue(context, devices[i], 0, &error);
	            if (error != CL_SUCCESS) { 
	                perror("Unable to create command-queue");
	                exit(1);
	            }
	
                /* Create a buffer and copy the data from the main buffer */
	            cl_mem outobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
	                                           sizeof(cl_float16) * DATA_SIZE, 0, &error);
	            if (error != CL_SUCCESS) { 
	                perror("Unable to create sub-buffer object");
	                exit(1);
	            }

	            /* Let OpenCL know that the kernel is suppose to receive an argument */
	            error = clSetKernelArg(kernels[j], 0, sizeof(cl_mem), &inobj);
	            error = clSetKernelArg(kernels[j], 1, sizeof(cl_mem), &outobj);
	            if (error != CL_SUCCESS) { 
	                perror("Unable to set buffer object in kernel");
	                exit(1);
	            }
	
	            /* Enqueue the kernel to the command queue */
                size_t threadsPerGroup[] = {4}; 
                size_t numOfGroups[] = { DATA_SIZE / threadsPerGroup[0] };
                error = clEnqueueNDRangeKernel(cQ,
                                               kernels[j],
                                               1,
                                               0,
                                               numOfGroups,
                                               threadsPerGroup,0, NULL, NULL);
	            if (error != CL_SUCCESS) { 
	                perror("Unable to enqueue task to command-queue");
	                exit(1);
	            }
	            printf("Task has been enqueued successfully!\n");

	            /* Enqueue the read-back from device to host */
	            error = clEnqueueReadBuffer(cQ, outobj,
	                                        CL_TRUE,               // blocking read
	                                        0,                      // read from the start
	                                        sizeof(cl_float16)*DATA_SIZE,          // how much to copy
	                                        ud_out, 0, NULL, NULL);
                /* Check the returned data */
	            if ( valuesOK(ud_in, ud_out, DATA_SIZE) ) {
	                printf("Check passed!\n");
	            } else printf("Check failed!\n");
	
	            /* Release the command queue */
	            clReleaseCommandQueue(cQ);
	            clReleaseMemObject(outobj);
	        } 

        /* Clean up */
        
        for(cl_uint i = 0; i < numOfKernels; i++) { clReleaseKernel(kernels[i]); }
        for(int i=0; i< NUMBER_OF_FILES; i++) { free(buffer[i]); }
        clReleaseProgram(program);
    }// end of device loop and execution
    
	    clReleaseMemObject(inobj);
        clReleaseContext(context);
   }// end of platform loop

   free(ud_in);
   free(ud_out);
}
vx_status vxTargetInit(vx_target_t *target)
{
    vx_status status = VX_ERROR_NO_RESOURCES;
    cl_int err = 0;
    vx_context context = target->base.context;
    cl_uint p, d, k;
    char *vx_incs = getenv("VX_CL_INCLUDE_DIR");
    char *cl_dirs = getenv("VX_CL_SOURCE_DIR");
    char cl_args[1024];

    snprintf(cl_args, sizeof(cl_args), "-D VX_CL_KERNEL -I %s -I %s %s %s", (vx_incs?vx_incs:"C:\\Users\\Eric\\Desktop\\VS_OpenVX2\\example_multinode_graph\\cl_code"), cl_dirs,
//#if !defined(__APPLE__)
//        "-D CL_USE_LUMINANCE",
//#else
        "",
//#endif
#if defined(VX_INCLUDE_DIR)
    "-I "VX_INCLUDE_DIR" "
#else
    " "
#endif
    );

    if (cl_dirs == NULL) {
#ifdef VX_CL_SOURCE_DIR
        const char *sdir = VX_CL_SOURCE_DIR;
        int len = strlen(sdir);
        cl_dirs = malloc(len);
        strncpy(cl_dirs, sdir, len);
#else
        return status;
#endif
    }

    strncpy(target->name, name, VX_MAX_TARGET_NAME);
    target->priority = VX_TARGET_PRIORITY_OPENCL;

    context->num_platforms = CL_MAX_PLATFORMS;
    err = clGetPlatformIDs(CL_MAX_PLATFORMS, context->platforms, NULL);
    if (err != CL_SUCCESS)
        goto exit;

    for (p = 0; p < context->num_platforms; p++) {
        err = clGetDeviceIDs(context->platforms[p], CL_DEVICE_TYPE_ALL,
            0, NULL, &context->num_devices[p]);
        err = clGetDeviceIDs(context->platforms[p], CL_DEVICE_TYPE_ALL,
            context->num_devices[p] > CL_MAX_DEVICES ? CL_MAX_DEVICES : context->num_devices[p],
            context->devices[p], NULL);
        if (err == CL_SUCCESS) {
            cl_context_properties props[] = {
                (cl_context_properties)CL_CONTEXT_PLATFORM,
                (cl_context_properties)context->platforms[p],
                (cl_context_properties)0,
            };
            for (d = 0; d < context->num_devices[p]; d++) {
                char deviceName[64];
                cl_bool compiler = CL_FALSE;
                cl_bool available = CL_FALSE;
                cl_bool image_support = CL_FALSE;
                err = clGetDeviceInfo(context->devices[p][d], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
                CL_ERROR_MSG(err, "clGetDeviceInfo");
                err = clGetDeviceInfo(context->devices[p][d], CL_DEVICE_COMPILER_AVAILABLE, sizeof(cl_bool), &compiler, NULL);
                CL_ERROR_MSG(err, "clGetDeviceInfo");
                err = clGetDeviceInfo(context->devices[p][d], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &available, NULL);
                CL_ERROR_MSG(err, "clGetDeviceInfo");
                err = clGetDeviceInfo(context->devices[p][d], CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL);
                CL_ERROR_MSG(err, "clGetDeviceInfo");
                VX_PRINT(VX_ZONE_INFO, "Device %s (compiler=%s) (available=%s) (images=%s)\n", deviceName, (compiler?"TRUE":"FALSE"), (available?"TRUE":"FALSE"), (image_support?"TRUE":"FALSE"));
            }
            context->global[p] = clCreateContext(props,
                                                 context->num_devices[p],
                                                 context->devices[p],
                                                 vxcl_platform_notifier,
                                                 target,
                                                 &err);
            if (err != CL_SUCCESS)
                break;

            /* check for supported formats */
            if (err == CL_SUCCESS) {
                cl_uint f,num_entries = 0u;
                cl_image_format *formats = NULL;
                cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;
                cl_mem_object_type type = CL_MEM_OBJECT_IMAGE2D;

                err = clGetSupportedImageFormats(context->global[p], flags, type, 0, NULL, &num_entries);
                formats = (cl_image_format *)malloc(num_entries * sizeof(cl_image_format));
                err = clGetSupportedImageFormats(context->global[p], flags, type, num_entries, formats, NULL);
                for (f = 0; f < num_entries; f++) {
                    char order[256];
                    char datat[256];
    #define CASE_STRINGERIZE2(value, string) case value: strcpy(string, #value); break
                    switch(formats[f].image_channel_order) {
                        CASE_STRINGERIZE2(CL_R, order);
                        CASE_STRINGERIZE2(CL_A, order);
                        CASE_STRINGERIZE2(CL_RG, order);
                        CASE_STRINGERIZE2(CL_RA, order);
                        CASE_STRINGERIZE2(CL_RGB, order);
                        CASE_STRINGERIZE2(CL_RGBA, order);
                        CASE_STRINGERIZE2(CL_BGRA, order);
                        CASE_STRINGERIZE2(CL_ARGB, order);
                        CASE_STRINGERIZE2(CL_INTENSITY, order);
                        CASE_STRINGERIZE2(CL_LUMINANCE, order);
                        CASE_STRINGERIZE2(CL_Rx, order);
                        CASE_STRINGERIZE2(CL_RGx, order);
                        CASE_STRINGERIZE2(CL_RGBx, order);
    #if defined(CL_VERSION_1_2) && defined(cl_khr_gl_depth_images)
                        CASE_STRINGERIZE2(CL_DEPTH, order);
                        CASE_STRINGERIZE2(CL_DEPTH_STENCIL, order);
    #if defined(__APPLE__)
                        CASE_STRINGERIZE2(CL_1RGB_APPLE, order);
                        CASE_STRINGERIZE2(CL_BGR1_APPLE, order);
                        CASE_STRINGERIZE2(CL_SFIXED14_APPLE, order);
                        CASE_STRINGERIZE2(CL_BIASED_HALF_APPLE, order);
                        CASE_STRINGERIZE2(CL_YCbYCr_APPLE, order);
                        CASE_STRINGERIZE2(CL_CbYCrY_APPLE, order);
                        CASE_STRINGERIZE2(CL_ABGR_APPLE, order);
    #endif
    #endif
                        default:
                            sprintf(order, "%x", formats[f].image_channel_order);
                            break;
                    }
                    switch(formats[f].image_channel_data_type) {
                        CASE_STRINGERIZE2(CL_SNORM_INT8, datat);
                        CASE_STRINGERIZE2(CL_SNORM_INT16, datat);
                        CASE_STRINGERIZE2(CL_UNORM_INT8, datat);
                        CASE_STRINGERIZE2(CL_UNORM_INT16, datat);
                        CASE_STRINGERIZE2(CL_UNORM_SHORT_565, datat);
                        CASE_STRINGERIZE2(CL_UNORM_SHORT_555, datat);
                        CASE_STRINGERIZE2(CL_UNORM_INT_101010, datat);
                        CASE_STRINGERIZE2(CL_SIGNED_INT8, datat);
                        CASE_STRINGERIZE2(CL_SIGNED_INT16, datat);
                        CASE_STRINGERIZE2(CL_SIGNED_INT32, datat);
                        CASE_STRINGERIZE2(CL_UNSIGNED_INT8, datat);
                        CASE_STRINGERIZE2(CL_UNSIGNED_INT16, datat);
                        CASE_STRINGERIZE2(CL_UNSIGNED_INT32, datat);
                        CASE_STRINGERIZE2(CL_HALF_FLOAT, datat);
                        CASE_STRINGERIZE2(CL_FLOAT, datat);
    #if defined(CL_VERSION_2_0)
                        CASE_STRINGERIZE2(CL_UNORM_INT24, datat);
    #endif
                        default:
                            sprintf(order, "%x", formats[f].image_channel_data_type);
                            break;
                    }
                    VX_PRINT(VX_ZONE_INFO, "%s : %s\n", order, datat);
                }
            }

            /* create a queue for each device */
            for (d = 0; d < context->num_devices[p]; d++)
            {
                context->queues[p][d] = clCreateCommandQueue(context->global[p],
                                                          context->devices[p][d],
                                                          CL_QUEUE_PROFILING_ENABLE,
                                                          &err);
                if (err == CL_SUCCESS) {
                }
            }

			char abs_source_path[VX_CL_MAX_PATH];
            /* for each kernel */
            for (k = 0; k < num_cl_kernels; k++)
            {
                char *sources = NULL;
                size_t programSze = 0;

                /* load the source file */
                VX_PRINT(VX_ZONE_INFO, "Joiner: %s\n", FILE_JOINER);
                VX_PRINT(VX_ZONE_INFO, "Path: %s\n", cl_dirs);
                VX_PRINT(VX_ZONE_INFO, "Kernel[%u] File: %s\n", k, cl_kernels[k]->sourcepath);
                VX_PRINT(VX_ZONE_INFO, "Kernel[%u] Name: %s\n", k, cl_kernels[k]->kernelname);
                VX_PRINT(VX_ZONE_INFO, "Kernel[%u] ID: %s\n", k, cl_kernels[k]->description.name);
				
				int cl_dirs_len = strlen(cl_dirs);
				int sourcepath_len = strlen(cl_kernels[k]->sourcepath);
				strncpy(abs_source_path, cl_dirs, cl_dirs_len);
				strncpy(&abs_source_path[cl_dirs_len], cl_kernels[k]->sourcepath, sourcepath_len);
				abs_source_path[cl_dirs_len+sourcepath_len] = '\0';
                sources = clLoadSources(abs_source_path, &programSze);
				VX_PRINT(VX_ZONE_INFO, "clLoadSources programSze:%d\n", programSze);
				
                /* create a program with this source */
                cl_kernels[k]->program[p] = clCreateProgramWithSource(context->global[p],
                    1,
                    (const char **)&sources,
                    &programSze,
                    &err);
                if (err == CL_SUCCESS)
                {
                    err = clBuildProgram((cl_program)cl_kernels[k]->program[p],
                        1,
                        (const cl_device_id *)context->devices,
                        (const char *)cl_args,
                        NULL,
                        NULL);
                    if (err != CL_SUCCESS)
                    {
                        CL_BUILD_MSG(err, "Build Error");
                        if (err == CL_BUILD_PROGRAM_FAILURE)
                        {
                            char log[10][1024];
                            size_t logSize = 0;
                            clGetProgramBuildInfo((cl_program)cl_kernels[k]->program[p],
                                (cl_device_id)context->devices[p][0],
                                CL_PROGRAM_BUILD_LOG,
                                sizeof(log),
                                log,
                                &logSize);
                            VX_PRINT(VX_ZONE_ERROR, "%s", log);
                        }
                    }
                    else
                    {
                        cl_int k2 = 0;
                        cl_build_status bstatus = 0;
                        size_t bs = 0;
                        err = clGetProgramBuildInfo(cl_kernels[k]->program[p],
                            context->devices[p][0],
                            CL_PROGRAM_BUILD_STATUS,
                            sizeof(cl_build_status),
                            &bstatus,
                            &bs);
                        VX_PRINT(VX_ZONE_INFO, "Status = %d (%d)\n", bstatus, err);
                        /* get the cl_kernels from the program */
                        cl_kernels[k]->num_kernels[p] = 1;
                        err = clCreateKernelsInProgram(cl_kernels[k]->program[p],
                            1,
                            &cl_kernels[k]->kernels[p],
                            NULL);
                        VX_PRINT(VX_ZONE_INFO, "Found %u cl_kernels in %s (%d)\n", cl_kernels[k]->num_kernels[p], cl_kernels[k]->sourcepath, err);
                        for (k2 = 0; (err == CL_SUCCESS) && (k2 < (cl_int)cl_kernels[k]->num_kernels[p]); k2++)
                        {
                            char kName[VX_MAX_KERNEL_NAME];
                            size_t size = 0;
                            err = clGetKernelInfo(cl_kernels[k]->kernels[p],
                                CL_KERNEL_FUNCTION_NAME,
                                0,
                                NULL,
                                &size);
                            err = clGetKernelInfo(cl_kernels[k]->kernels[p],
                                CL_KERNEL_FUNCTION_NAME,
                                size,
                                kName,
                                NULL);
                            VX_PRINT(VX_ZONE_INFO, "Kernel %s\n", kName);
                            if (strncmp(kName, cl_kernels[k]->kernelname, VX_MAX_KERNEL_NAME) == 0)
                            {
                                vx_kernel_f kfunc = cl_kernels[k]->description.function;
                                VX_PRINT(VX_ZONE_INFO, "Linked Kernel %s on target %s\n", cl_kernels[k]->kernelname, target->name);
                                target->num_kernels++;
                                target->base.context->num_kernels++;
                                status = vxInitializeKernel(target->base.context,
                                    &target->kernels[k],
                                    cl_kernels[k]->description.enumeration,
                                    (kfunc == NULL ? vxclCallOpenCLKernel : kfunc),
                                    cl_kernels[k]->description.name,
                                    cl_kernels[k]->description.parameters,
                                    cl_kernels[k]->description.numParams,
                                    cl_kernels[k]->description.input_validate,
                                    cl_kernels[k]->description.output_validate,
                                    cl_kernels[k]->description.initialize,
                                    cl_kernels[k]->description.deinitialize);
                                if (vxIsKernelUnique(&target->kernels[k]) == vx_true_e) {
                                    target->base.context->num_unique_kernels++;
                                } else {
                                    VX_PRINT(VX_ZONE_KERNEL, "Kernel %s is NOT unqiue\n", target->kernels[k].name);
                                }
                            }
                        }
                    }
                }
                else
                {
                    CL_ERROR_MSG(err, "Program");
                }
                free(sources);
            }
        }
    }
exit:
    if (err == CL_SUCCESS) {
        status = VX_SUCCESS;
    } else {
        status = VX_ERROR_NO_RESOURCES;
    }
    return status;
}