Exemplo n.º 1
0
int main(int argc, char **argv)
{
  cl_int err;
  const char *krn_src;
  cl_program empty, program;
  cl_context ctx;
  cl_device_id did;
  cl_command_queue queue;
  cl_uint num_krn;
  cl_kernel kernels[2];

  poclu_get_any_device(&ctx, &did, &queue);
  TEST_ASSERT( ctx );
  TEST_ASSERT( did );
  TEST_ASSERT( queue );

  /* Test creating a program from an empty source */
  empty = clCreateProgramWithSource(ctx, 1, &empty_src, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource");
  err = clBuildProgram(empty, 0, NULL, NULL, NULL, NULL);
  CHECK_OPENCL_ERROR_IN("clBuildProgram");

  err = clCreateKernelsInProgram(empty, 0, NULL, &num_krn);
  CHECK_OPENCL_ERROR_IN("clCreateKernelsInProgram");
  TEST_ASSERT(num_krn == 0);

  krn_src = poclu_read_file(SRCDIR "/tests/runtime/test_clCreateKernelsInProgram.cl");
  TEST_ASSERT(krn_src);

  program = clCreateProgramWithSource(ctx, 1, &krn_src, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource");
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  CHECK_OPENCL_ERROR_IN("clBuildProgram");

  err = clCreateKernelsInProgram(program, 0, NULL, &num_krn);
  CHECK_OPENCL_ERROR_IN("clCreateKernelsInProgram");
  // test_clCreateKernelsInProgram.cl has two kernel functions.
  TEST_ASSERT(num_krn == 2);

  err = clCreateKernelsInProgram(program, 2, kernels, NULL);
  CHECK_OPENCL_ERROR_IN("clCreateKernelsInProgram");

  // make sure the kernels were actually created 
  // Note: nothing in the specification says which kernel function
  // is kernels[0], which is kernels[1]. For now assume pocl/LLVM
  // orders these deterministacally
  err = clEnqueueTask(queue, kernels[0], 0, NULL, NULL); 
  CHECK_OPENCL_ERROR_IN("clEnqueueTask");

  err = clFinish(queue);
  CHECK_OPENCL_ERROR_IN("clFinish");

  err = clEnqueueTask(queue, kernels[1], 0, NULL, NULL);
  CHECK_OPENCL_ERROR_IN("clEnqueueTask");

  err = clFinish(queue);
  CHECK_OPENCL_ERROR_IN("clFinish");

  return EXIT_SUCCESS;
}
Exemplo n.º 2
0
    /// Enqueues a kernel to execute using a single work-item.
    ///
    /// \see_opencl_ref{clEnqueueTask}
    event enqueue_task(const kernel &kernel, const wait_list &events = wait_list())
    {
        BOOST_ASSERT(m_queue != 0);
        BOOST_ASSERT(kernel.get_context() == this->get_context());

        event event_;

        // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we
        // just forward to the equivalent clEnqueueNDRangeKernel() call.
        #ifdef CL_VERSION_2_0
        size_t one = 1;
        cl_int ret = clEnqueueNDRangeKernel(
            m_queue, kernel, 1, 0, &one, &one,
            events.size(), events.get_event_ptr(), &event_.get()
        );
        #else
        cl_int ret = clEnqueueTask(
            m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get()
        );
        #endif

        if(ret != CL_SUCCESS){
            BOOST_THROW_EXCEPTION(opencl_error(ret));
        }

        return event_;
    }
Exemplo n.º 3
0
    void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue)
    {
      // 1D kernel:
      if (k.local_work_size(1) == 0)
      {
        #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
        std::cout << "ViennaCL: Starting 1D-kernel '" << k.name() << "'..." << std::endl;
        std::cout << "ViennaCL: Global work size: '"  << k.global_work_size() << "'..." << std::endl;
        std::cout << "ViennaCL: Local work size: '"   << k.local_work_size() << "'..." << std::endl;
        #endif

        vcl_size_t tmp_global = k.global_work_size();
        vcl_size_t tmp_local = k.local_work_size();

        cl_int err;
        if (tmp_global == 1 && tmp_local == 1)
          err = clEnqueueTask(queue.handle().get(), k.handle().get(), 0, NULL, NULL);
        else
          err = clEnqueueNDRangeKernel(queue.handle().get(), k.handle().get(), 1, NULL, &tmp_global, &tmp_local, 0, NULL, NULL);

        if (err != CL_SUCCESS)
        {
          std::cerr << "ViennaCL: FATAL ERROR: Kernel start failed for '" << k.name() << "'." << std::endl;
          std::cerr << "ViennaCL: Smaller work sizes could not solve the problem. " << std::endl;
          VIENNACL_ERR_CHECK(err);
        }
      }
      else //2D or 3D kernel
      {
        #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
        std::cout << "ViennaCL: Starting 2D/3D-kernel '" << k.name() << "'..." << std::endl;
        std::cout << "ViennaCL: Global work size: '"  << k.global_work_size(0) << ", " << k.global_work_size(1) << ", " << k.global_work_size(2) << "'..." << std::endl;
        std::cout << "ViennaCL: Local work size: '"   << k.local_work_size(0) << ", " << k.local_work_size(1) << ", " << k.local_work_size(2) << "'..." << std::endl;
        #endif

        vcl_size_t tmp_global[3];
        tmp_global[0] = k.global_work_size(0);
        tmp_global[1] = k.global_work_size(1);
        tmp_global[2] = k.global_work_size(2);

        vcl_size_t tmp_local[3];
        tmp_local[0] = k.local_work_size(0);
        tmp_local[1] = k.local_work_size(1);
        tmp_local[2] = k.local_work_size(2);

        cl_int err = clEnqueueNDRangeKernel(queue.handle().get(), k.handle().get(), (tmp_global[2] == 0) ? 2 : 3, NULL, tmp_global, tmp_local, 0, NULL, NULL);

        if (err != CL_SUCCESS)
        {
          //could not start kernel with any parameters
          std::cerr << "ViennaCL: FATAL ERROR: Kernel start failed for '" << k.name() << "'." << std::endl;
          VIENNACL_ERR_CHECK(err);
        }
      }

      #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
      queue.finish();
      std::cout << "ViennaCL: Kernel " << k.name() << " finished!" << std::endl;
      #endif
    } //enqueue()
Exemplo n.º 4
0
void execute_kernel() {

   int err;
   cl_event kernel_event;

   /* Complete OpenGL processing */
   glFinish();

   /* Execute the kernel */
   err = clEnqueueAcquireGLObjects(queue, 6, mem_objects, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't acquire the GL objects");
      exit(1);   
   }

   err = clEnqueueTask(queue, kernel, 0, NULL, &kernel_event);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }

   err = clWaitForEvents(1, &kernel_event);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }

   clEnqueueReleaseGLObjects(queue, 6, mem_objects, 0, NULL, NULL);
   clFinish(queue);
   clReleaseEvent(kernel_event);
}
Exemplo n.º 5
0
int main(void) {
    const char *source =
        "__kernel void main(int in, __global int *out) {\n"
        "    out[0] = in + 1;\n"
        "}\n";
    cl_command_queue command_queue;
    cl_context context;
    cl_device_id device;
    cl_int input = 1;
    cl_kernel kernel;
    cl_mem buffer;
    cl_platform_id platform;
    cl_program program;

    clGetPlatformIDs(1, &platform, NULL);
    clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
    context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
    command_queue = clCreateCommandQueue(context, device, 0, NULL);
    buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, NULL);
    program = clCreateProgramWithSource(context, 1, &source, NULL, NULL);
    clBuildProgram(program, 1, &device, "", NULL, NULL);
    kernel = clCreateKernel(program, "main", NULL);
    clSetKernelArg(kernel, 0, sizeof(cl_int), &input);
    clSetKernelArg(kernel, 1, sizeof(cl_mem), &buffer);
    clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
    clFlush(command_queue);
    clFinish(command_queue);
    clEnqueueReadBuffer(command_queue, buffer, CL_TRUE, 0, sizeof(cl_int), &input, 0, NULL, NULL);

    assert(input == 2);
    return EXIT_SUCCESS;
}
Exemplo n.º 6
0
cl_int WINAPI wine_clEnqueueTask(cl_command_queue command_queue, cl_kernel kernel,
                                 cl_uint num_events_in_wait_list, cl_event * event_wait_list, cl_event * event)
{
    cl_int ret;
    TRACE("\n");
    ret = clEnqueueTask(command_queue, kernel, num_events_in_wait_list, event_wait_list, event);
    return ret;
}
Exemplo n.º 7
0
inline void Queue::runTask(cl_command_queue queue,
                           cl_kernel kernel,
                           cl_uint waitListSize,
                           const cl_event* waitList,
                           cl_event* event) {
  cl_int errorCode = clEnqueueTask(queue, kernel,
                                   waitListSize, waitList, event); 
  verifyOutputCode(errorCode, "Error launching the task");
}
Exemplo n.º 8
0
int main(int argc, char **argv)
{
  cl_int err;
  const char *krn_src;
  cl_program program;
  cl_context ctx;
  cl_device_id did;
  cl_command_queue queue;
  cl_uint num_krn;
  cl_kernel kernels[2];

  poclu_get_any_device(&ctx, &did, &queue);
  assert( ctx );
  assert( did );
  assert( queue );

  krn_src = poclu_read_file(SRCDIR "/tests/runtime/test_clCreateKernelsInProgram.cl");
  assert(krn_src);

  program = clCreateProgramWithSource(ctx, 1, &krn_src, NULL, NULL);
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  assert(err == CL_SUCCESS);
  
  err = clCreateKernelsInProgram(program, 0, NULL, &num_krn);
  assert(err == CL_SUCCESS);
  // test_clCreateKernelsInProgram.cl has two kernel functions.
  assert(num_krn == 2);

  err = clCreateKernelsInProgram(program, 2, kernels, NULL);
  assert(err == CL_SUCCESS);
  
  // make sure the kernels were actually created 
  // Note: nothing in the specification says which kernel function
  // is kernels[0], which is kernels[1]. For now assume pocl/LLVM
  // orders these deterministacally
  err = clEnqueueTask(queue, kernels[0], 0, NULL, NULL); 
  assert(err == CL_SUCCESS);
  err = clEnqueueTask(queue, kernels[1], 0, NULL, NULL);
  assert(err == CL_SUCCESS);
  
  clFinish(queue);

}
Exemplo n.º 9
0
void value_profiler::check_value_on_device(ad_rule rule)
{
	cl_mem dest;
	cl_int status;
	printf("Not Implemented ");
	exit(-1);
	//ad_setKernelArg(test_kernel,0,sizeof(cl_mem),(void *)&(rule.get_target_buff()));
	//! Action to be done
	//ad_setKernelArg(test_kernel,1,sizeof(cl_int),(void *)&(rule.get_target_buff()));
	clEnqueueTask(access_queue,test_kernel,0,NULL,NULL);

}
Exemplo n.º 10
0
void run_benchmark( void *vargs, cl_context& context, cl_command_queue& commands, cl_program& program, cl_kernel& kernel ) {
  struct bench_args_t *args = (struct bench_args_t *)vargs;
  // Create device buffers
  //
  static unsigned *nzval_buffer = (unsigned int*)clSVMAllocAltera(context, 0, sizeof(args->nzval), 1024); 
  static unsigned *cols_buffer = (unsigned int*)clSVMAllocAltera(context, 0, sizeof(args->cols), 1024); 
  static unsigned *vec_buffer = (unsigned int*)clSVMAllocAltera(context, 0, sizeof(args->vec), 1024); 
  static unsigned *out_buffer = (unsigned int*)clSVMAllocAltera(context, 0, sizeof(args->out), 1024); 

  // Write our data set into device buffers  
  //
  memcpy(nzval_buffer, args->nzval, sizeof(args->nzval));
  memcpy(cols_buffer, args->cols, sizeof(args->cols));
  memcpy(vec_buffer, args->vec, sizeof(args->vec));
    
  // Set the arguments to our compute kernel
  //
  int status;
  status = clSetKernelArgSVMPointerAltera(kernel, 0, (void*)nzval_buffer);
  status |= clSetKernelArgSVMPointerAltera(kernel, 1, (void*)cols_buffer);
  status |= clSetKernelArgSVMPointerAltera(kernel, 2, (void*)vec_buffer);
  status |= clSetKernelArgSVMPointerAltera(kernel, 3, (void*)out_buffer);
  if(status != CL_SUCCESS) {
    dump_error("Failed set args.", status);
    exit(1);
  }

  // Execute the kernel over the entire range of our 1d input data set
  // using the maximum number of work group items for this device
  //

#ifdef OPENCL_KERNEL
  status = clEnqueueTask(commands, kernel, 0, NULL, NULL);
#else
  printf("Error: C kernel is not currently supported!\n");
  exit(1);
#endif
  if (status)
  {
    printf("Error: Failed to execute kernel! %d\n", status);
    printf("Test failed\n");
    exit(1);
  }
  clFinish(commands);

  // Read back the results from the device to verify the output
  //
  memcpy(args->out, out_buffer, sizeof(args->out));
}
Exemplo n.º 11
0
int oclFluid3D::compile()
{
	clInitFluid = 0;
	clIntegrateForce = 0;
	clIntegrateVelocity = 0;
	clHash = 0;
	clReorder = 0;
	clInitBounds = 0;

	if (!mRadixSort.compile())
	{
		return 0;
	}

	if (!oclProgram::compile())
	{
		return 0;
	}

	clInitFluid = createKernel("clInitFluid");
	KERNEL_VALIDATE(clInitFluid)
	clIntegrateForce = createKernel("clIntegrateForce");
	KERNEL_VALIDATE(clIntegrateForce)
	clIntegrateVelocity = createKernel("clIntegrateVelocity");
	KERNEL_VALIDATE(clIntegrateVelocity)
	clHash = createKernel("clHash");
	KERNEL_VALIDATE(clHash)
	clReorder = createKernel("clReorder");
	KERNEL_VALIDATE(clReorder)
	clInitBounds = createKernel("clInitBounds");
	KERNEL_VALIDATE(clInitBounds)
	clFindBounds = createKernel("clFindBounds");
	KERNEL_VALIDATE(clFindBounds)
	clCalculateDensity = createKernel("clCalculateDensity");
	KERNEL_VALIDATE(clCalculateDensity)
	clCalculateForces = createKernel("clCalculateForces");
	KERNEL_VALIDATE(clCalculateForces)
	clGravity = createKernel("clGravity");
	KERNEL_VALIDATE(clGravity)
	clClipBox = createKernel("clClipBox");
	KERNEL_VALIDATE(clClipBox)

	// init fluid parameters
	clSetKernelArg(clInitFluid, 0, sizeof(cl_mem), bfParams);
	clEnqueueTask(mContext.getDevice(0), clInitFluid, 0, NULL, clInitFluid.getEvent());
	bfParams.map(CL_MAP_READ);

	return bindBuffers();
}
Exemplo n.º 12
0
int _tmain(int argc, _TCHAR* argv[])
{
    cl_int ret;

	o2o_init();
	o2o_create_cmd_queue(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
	
	o2o_create_program_from_source(kernel1);
	o2o_build_program();
	o2o_create_kernel("kernel1");

    size_t k_size = strlen(kernel2);
    cl_program p2 = clCreateProgramWithSource(ocl_ctx, 1, 
                                    &kernel2, 
                                    &k_size,
                                    &ret);
    CHECK(ret);

    ret = clBuildProgram(p2, 1, &d_id, NULL, NULL, NULL);
    CHECK(ret);

    cl_kernel k2 = clCreateKernel(p2, "kernel2", &ret);
    CHECK(ret);

    ret = clEnqueueTask(cmd_q, kernel, 0, NULL, NULL);
    CHECK(ret);

    ret = clEnqueueTask(cmd_q, k2, 0, NULL, NULL);
    CHECK(ret);

	o2o_finalize();
	
	printf("... Program Done\n");
	getchar();
	return 0;
}
Exemplo n.º 13
0
bool
piglit_cl_enqueue_task(cl_command_queue command_queue, cl_kernel kernel)
{
	cl_int errNo;

	errNo = clEnqueueTask(command_queue, kernel,
	                      0, NULL, NULL);
	if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
		fprintf(stderr,
		        "Could not enqueue task: %s\n",
		        piglit_cl_get_error_name(errNo));
		return false;
	}

	return true;
}
Exemplo n.º 14
0
int main(void) {
    const char *source =
        /* kernel pointer arguments must be __global, __constant, or __local. */
        /* https://www.khronos.org/registry/cl/sdk/2.1/docs/man/xhtml/restrictions.html */
        "__kernel void increment(__global int *out) {\n"
        "    out[0]++;\n"
        "}\n";
    cl_command_queue command_queue;
    cl_context context;
    cl_device_id device;
    cl_int input = 1;
    cl_kernel kernel;
    cl_mem buffer;
    cl_platform_id platform;
    cl_program program;

	/* Run kernel. */
    clGetPlatformIDs(1, &platform, NULL);
    clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
    context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
    program = clCreateProgramWithSource(context, 1, &source, NULL, NULL);
	clBuildProgram(program, 1, &device, "", NULL, NULL);
    /* The name of the kernel function we want to call. */
    kernel = clCreateKernel(program, "increment", NULL);
    buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int), &input, NULL);
    clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
    command_queue = clCreateCommandQueue(context, device, 0, NULL);
    clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
    clFlush(command_queue);
    clFinish(command_queue);
    clEnqueueReadBuffer(command_queue, buffer, CL_TRUE, 0, sizeof(input), &input, 0, NULL, NULL);

	/* Asserts. */
    assert(input == 2);

    /* Cleanup. */
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);
    clReleaseMemObject(buffer);
    return EXIT_SUCCESS;
}
Exemplo n.º 15
0
int main(int argc, char** argv) {
    if (argc < 2) {
        printf("Missing required argument input.\n");
        printf("Usage: %s input\n", argv[0]);
        return -1;
    }
    int input = atoi(argv[1]);
    // 8 is an arbitrary maximum number of platforms.
    cl_uint num_entries = 8;
    cl_platform_id* platforms = malloc(num_entries * sizeof (cl_platform_id));
    cl_uint num_platforms = -1;
    clGetPlatformIDs(num_entries, platforms, &num_platforms);
    cl_uint num_devices = -1;
    cl_device_id* devices = malloc(num_entries * sizeof (cl_device_id));
    clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, num_entries, devices, &num_devices);
    for (int i = 0; i < num_devices; i++) {
        size_t device_type_size = sizeof(cl_device_type);
        cl_device_type* device_type = malloc(device_type_size);
        clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, device_type_size, device_type, NULL);
        if (device_type[0] == CL_DEVICE_TYPE_GPU) {
            cl_context context = clCreateContext(NULL, 1, &devices[i], NULL, NULL, NULL);
            cl_command_queue command_queue = clCreateCommandQueue(context, devices[i], 0, NULL);
            cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof (cl_int), NULL, NULL);
            const char* source = "__kernel void increment(int in, __global int* out) { out[0] = in + 1; }";
            cl_program program = clCreateProgramWithSource(context, 1, &source, NULL, NULL);
            clBuildProgram(program, 1, &devices[i], "", NULL, NULL);
            cl_kernel kernel = clCreateKernel(program, "increment", NULL);
            clSetKernelArg(kernel, 0, sizeof(cl_int), &input);
            clSetKernelArg(kernel, 1, sizeof(cl_mem), &buffer);
            clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
            clFlush(command_queue);
            clFinish(command_queue);
            cl_int kernel_result = 0;
            clEnqueueReadBuffer(command_queue, buffer, CL_TRUE, 0, sizeof (cl_int), &kernel_result, 0, NULL, NULL);
            printf("%i\n", kernel_result);
        }
        free(device_type);
    }
    free(devices);
    free(platforms);
    return 0;
}
Exemplo n.º 16
0
void execute_device(){
    int err;
#ifdef C_KERNEL
    err = clEnqueueTask(commands, kernel, 0, NULL, NULL);
#else
    size_t global[1];                   // global domain size for our calculation
    size_t local[1];                    // local domain size for our calculation
    global[0] = 1;
    local[0] = 1;

    err = clEnqueueNDRangeKernel(commands, kernel_in, 1, NULL,
            (size_t*)global, (size_t*)local, 0, NULL, NULL);
    if (err)
    {
        printf("Error: Failed to execute kernel_in! %d\n", err);
        printf("Test failed\n");
        exit(1);
    }

    err = clEnqueueNDRangeKernel(commands, kernel_inter, 1, NULL,
            (size_t*)global, (size_t*)local, 0, NULL, NULL);
    if (err)
    {
        printf("Error: Failed to execute kernel_inter! %d\n", err);
        printf("Test failed\n");
        exit(1);
    }

    err = clEnqueueNDRangeKernel(commands, kernel_out, 1, NULL,
            (size_t*)global, (size_t*)local, 0, NULL, NULL);
    if (err)
    {
        printf("Error: Failed to execute kernel_out! %d\n", err);
        printf("Test failed\n");
        exit(1);
    }
#endif
    clFinish(commands);
}
Exemplo n.º 17
0
int main() {

   /* Host/device data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int i, err;

   /* Data and buffers */
   float shuffle1[8];
   char shuffle2[16];
   cl_mem shuffle1_buffer, shuffle2_buffer;

   /* Create a context */
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build the program and create a kernel */
   program = build_program(context, device, PROGRAM_FILE);
   kernel = clCreateKernel(program, KERNEL_FUNC, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);   
   };

   /* Create a write-only buffer to hold the output data */
   shuffle1_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         sizeof(shuffle1), NULL, &err);
   if(err < 0) {
      perror("Couldn't create a buffer");
      exit(1);   
   };
   shuffle2_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         sizeof(shuffle2), NULL, &err);
         
   /* Create kernel argument */
   err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &shuffle1_buffer);
   if(err < 0) {
      perror("Couldn't set a kernel argument");
      exit(1);   
   };
   clSetKernelArg(kernel, 1, sizeof(cl_mem), &shuffle2_buffer);
   
   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Enqueue kernel */
   err = clEnqueueTask(queue, kernel, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }

   /* Read and print the result */
   err = clEnqueueReadBuffer(queue, shuffle1_buffer, CL_TRUE, 0, 
      sizeof(shuffle1), &shuffle1, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read the buffer");
      exit(1);   
   }
   clEnqueueReadBuffer(queue, shuffle2_buffer, CL_TRUE, 0, 
      sizeof(shuffle2), &shuffle2, 0, NULL, NULL);   
   
   printf("Shuffle1: ");
   for(i=0; i<7; i++) {
      printf("%.2f, ", shuffle1[i]);
   }
   printf("%.2f\n", shuffle1[7]);
   
   printf("Shuffle2: ");
   for(i=0; i<16; i++) {
      printf("%c", shuffle2[i]);
   }  
   printf("\n");

   /* Deallocate resources */
   clReleaseMemObject(shuffle1_buffer);
   clReleaseMemObject(shuffle2_buffer);   
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}
Exemplo n.º 18
0
int test_context(cl_context ctx, const char *prog_src, int mul,
  int ndevs, cl_device_id *devs) {
  cl_int err;
  cl_command_queue queue[ndevs];
  cl_program prog;
  cl_kernel krn;
  cl_mem buf;
  cl_event evt[ndevs];
  cl_int i;

  prog = clCreateProgramWithSource(ctx, 1, &prog_src, NULL, &err);
  CHECK_OPENCL_ERROR_IN("create program");

  CHECK_CL_ERROR(clBuildProgram(prog, 0, NULL, NULL, NULL, NULL));

  krn = clCreateKernel(prog, "setidx", &err);
  CHECK_OPENCL_ERROR_IN("create kernel");

  buf = clCreateBuffer(ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE |
    CL_MEM_HOST_READ_ONLY, ndevs*sizeof(cl_int), NULL, &err);
  CHECK_OPENCL_ERROR_IN("create buffer");

  CHECK_CL_ERROR(clSetKernelArg(krn, 0, sizeof(cl_mem), &buf));

  /* create one queue per device, and submit task, waiting for all
   * previous */
  for (i = 0; i < ndevs; ++i) {
    queue[i] = clCreateCommandQueue(ctx, devs[i], 0, &err);
    CHECK_OPENCL_ERROR_IN("create queue");
    err = clSetKernelArg(krn, 1, sizeof(i), &i);
    CHECK_OPENCL_ERROR_IN("set kernel arg 1");
    // no wait list for first (root) device
    err = clEnqueueTask(queue[i], krn, i, i ? evt : NULL, evt + i);
    CHECK_OPENCL_ERROR_IN("submit task");
  }

  /* enqueue map on last */
  cl_int *buf_host = clEnqueueMapBuffer(queue[ndevs - 1], buf, CL_TRUE,
    CL_MAP_READ, 0, ndevs*sizeof(cl_int), ndevs, evt, NULL, &err);
  CHECK_OPENCL_ERROR_IN("map buffer");

  int mismatch = 0;
  for (i = 0; i < ndevs; ++i) {
    mismatch += !!(buf_host[i] != i*mul);
  }
  TEST_ASSERT(mismatch == 0);

  /* enqueue unmap on first */
  CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue[0], buf, buf_host, 0, NULL, NULL));

  for (i = 0 ; i < ndevs; ++i) {
    err = clFinish(queue[i]);
    err |= clReleaseCommandQueue(queue[i]);
    err |= clReleaseEvent(evt[i]);
  }

  err |= clReleaseKernel(krn);
  err |= clReleaseMemObject(buf);
  err |= clReleaseProgram(prog);
  err |= clReleaseContext(ctx);

  CHECK_OPENCL_ERROR_IN("cleanup");

  return CL_SUCCESS;

}
Exemplo n.º 19
0
int main()
{
  cl_device_id device_id = NULL;
  cl_context context = NULL;
  cl_command_queue command_queue = NULL;
  cl_mem memobj = NULL;
  cl_program program = NULL;
  cl_kernel kernel = NULL;
  cl_platform_id platform_id = NULL;
  cl_uint ret_num_devices;
  cl_uint ret_num_platforms;
  cl_int ret;

  char string[MEM_SIZE];

  FILE *fp;
  char fileName[] = "./hello.cl";
  char *source_str;
  size_t source_size;

  /* Load the source code containing the kernel*/
  fp = fopen(fileName, "r");
  if (!fp) {
    fprintf(stderr, "Failed to load kernel.\n");
    exit(1);
  }
  source_str = (char*)malloc(MAX_SOURCE_SIZE);
  source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
  fclose( fp );

  /* Get Platform and Device Info */
  ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
  ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);

  /* Create OpenCL context */
  context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);

  /* Create Command Queue */
  command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

  /* Create Memory Buffer */
  memobj = clCreateBuffer(context, CL_MEM_READ_WRITE,MEM_SIZE * sizeof(char), NULL, &ret);

  /* Create Kernel Program from the source */
  program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
				                      (const size_t *)&source_size, &ret);

  /* Build Kernel Program */
  ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

  /* Create OpenCL Kernel */
  kernel = clCreateKernel(program, "hello", &ret);

  /* Set OpenCL Kernel Arguments */
  ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);

  /* Execute OpenCL Kernel */
  ret = clEnqueueTask(command_queue, kernel, 0, NULL,NULL);

  /* Copy results from the memory buffer */
  ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0,
			                MEM_SIZE * sizeof(char),string, 0, NULL, NULL);

  /* Display Result */
  puts(string);

  /* Finalization */
  ret = clFlush(command_queue);
  ret = clFinish(command_queue);
  ret = clReleaseKernel(kernel);
  ret = clReleaseProgram(program);
  ret = clReleaseMemObject(memobj);
  ret = clReleaseCommandQueue(command_queue);
  ret = clReleaseContext(context);

  free(source_str);

  return 0;
}
Exemplo n.º 20
0
int main() {

   /* Host/device data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int err;

   /* Data and buffers */
   float reflect[4];
   cl_mem reflect_buffer;
   float x[4] = {1.0f, 2.0f, 3.0f, 4.0f};
   float u[4] = {0.0f, 5.0f, 0.0f, 0.0f};
   
   /* Create a device and context */
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build the program */
   program = build_program(context, device, PROGRAM_FILE);

   /* Create a kernel */
   kernel = clCreateKernel(program, KERNEL_FUNC, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);
   };

   /* Create buffer */
   reflect_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         4*sizeof(float), NULL, &err);
   if(err < 0) {
      perror("Couldn't create a buffer");
      exit(1);   
   };

   /* Create kernel argument */
   err = clSetKernelArg(kernel, 0, sizeof(x), x);
   err |= clSetKernelArg(kernel, 1, sizeof(u), u);
   err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &reflect_buffer);
   if(err < 0) {
      printf("Couldn't set a kernel argument");
      exit(1);   
   };

   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Enqueue kernel */
   err = clEnqueueTask(queue, kernel, 0, NULL, NULL); 
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }

   /* Read and print the result */
   err = clEnqueueReadBuffer(queue, reflect_buffer, CL_TRUE, 0, 
      sizeof(reflect), reflect, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read the buffer");
      exit(1);   
   }
   printf("\nResult: %f %f %f %f\n", 
         reflect[0], reflect[1], reflect[2], reflect[3]);

   /* Deallocate resources */
   clReleaseMemObject(reflect_buffer);
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}
Exemplo n.º 21
0
int main(void)      
{       
    cl_platform_id platform_id = NULL;  
    cl_uint ret_num_platforms;  
    cl_device_id device_id = NULL;  
    cl_uint ret_num_devices;    
    cl_context context = NULL;  
    cl_command_queue command_queue = NULL;  
    cl_mem memobj_in = NULL;    
    cl_mem memobj_out = NULL;   
    cl_program program = NULL;  
    cl_kernel kernel = NULL;    
    size_t kernel_code_size;    
    char *kernel_src_str;   
    float *result;  
    cl_int ret; 
    FILE *fp;   
         
    int data_num = sizeof(stock_array1) / sizeof(stock_array1[0]);  
    int window_num = (int)WINDOW_SIZE;  
    int i;  
         
    /* Allocate space to read in kernel code */
    kernel_src_str = (char *)malloc(MAX_SOURCE_SIZE);   
         
    /* Allocate space for the result on the host side */   
    result = (float *)malloc(data_num*sizeof(float));   
         
    printf("starting/n");
    /* Get Platform */ 
    ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);    
         
    /* Get Device */   
    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_CPU, 1, &device_id,    
        &ret_num_devices);
/*   status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
        if (numDevices == 0) //no GPU available.
        {
                cout << "No GPU device available."<<endl;
                cout << "Choose CPU as default device."<<endl;
                status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices);
                devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));

                status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL);
        }
  */  
     
    /* Create Context */   
    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);   
         
    /* Create Command Queue */ 
    command_queue = clCreateCommandQueue(context, device_id, 0, &ret);  
        
    printf("after create command queue/n"); 
    /* Read Kernel Code */ 
    fp = fopen("moving_average.cl", "r");   
    kernel_code_size = fread(kernel_src_str, 1, MAX_SOURCE_SIZE, fp);   
    fclose(fp); 
         
    /* Create Program Object */
    program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src_str, 
        (const size_t *)&kernel_code_size, &ret);
    /* Compile kernel */   
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); 
         
    /* Create Kernel */
    kernel = clCreateKernel(program, "moving_average", &ret);   
         
    /* Create buffer for the input data on the device */   
    memobj_in = clCreateBuffer(context, CL_MEM_READ_WRITE,  
        data_num * sizeof(int), NULL, &ret);
         
    /* Create buffer for the result on the device */   
    memobj_out = clCreateBuffer(context, CL_MEM_READ_WRITE, 
    data_num * sizeof(float), NULL, &ret);  
         
    /* Copy input data to the global memory on the device*/
    ret = clEnqueueWriteBuffer(command_queue, memobj_in, CL_TRUE, 0,    
        data_num * sizeof(int),
        stock_array1, 0, NULL, NULL);
         
    /* Set kernel arguments */ 
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj_in);    
    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memobj_out);   
    ret = clSetKernelArg(kernel, 2, sizeof(int), (void *)&data_num);    
    ret = clSetKernelArg(kernel, 3, sizeof(int), (void *)&window_num);  
         
    /* Execute the kernel */   
    ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);  
         
    /* Copy result from device to host */  
    ret = clEnqueueReadBuffer(command_queue, memobj_out, CL_TRUE, 0,    
        data_num * sizeof(float),
        result, 0, NULL, NULL);
         
         
    /* OpenCL Object Finalization */   
    ret = clReleaseKernel(kernel);  
    ret = clReleaseProgram(program);    
    ret = clReleaseMemObject(memobj_in);    
    ret = clReleaseMemObject(memobj_out);   
    ret = clReleaseCommandQueue(command_queue); 
    ret = clReleaseContext(context);    
         
    /* Display Results */  
    for (i=0; i < data_num; i++) {   
        printf("result[%d] = %f\n", i, result[i]);
    }   
         
    /* Deallocate memory on the host */
    free(result);   
    free(kernel_src_str);   
         
    return 0;   
}       
Exemplo n.º 22
0
int main()			
{			
	cl_platform_id platform_id = NULL;		
	cl_device_id device_id = NULL;		
	cl_context context = NULL;		
	cl_command_queue command_queue = NULL;		
	cl_mem Amobj = NULL;		
	cl_mem Bmobj = NULL;		
	cl_mem Cmobj = NULL;		
	cl_program program = NULL;		
	cl_kernel kernel[4] = {NULL, NULL, NULL, NULL};		
	cl_uint ret_num_devices;		
	cl_uint ret_num_platforms;		
	cl_int ret;		
			
	int i, j;		
	float* A;		
	float* B;		
	float* C;		
			
	A = (float*)malloc(4*4*sizeof(float));		
	B = (float*)malloc(4*4*sizeof(float));		
	C = (float*)malloc(4*4*sizeof(float));		
			
			
	FILE *fp;		
	const char fileName[] = "./taskParallel.cl";		
	size_t source_size;		
	char *source_str;		
			
	/* Load kernel source file */		
	fp = fopen(fileName, "rb");		
	if (!fp) {		
		fprintf(stderr, "Failed to load kernel.\n");	
		exit(1);	
	}		
	source_str = (char *)malloc(MAX_SOURCE_SIZE);		
	source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);		
	fclose(fp);		
			
	/* Initialize input data */		
	for (i=0; i < 4; i++) {		
		for (j=0; j < 4; j++) {	
			A[i*4+j] = i*4+j+1;
			B[i*4+j] = j*4+i+1;
		}	
	}		
			
	/* Get platform/device information */		
	ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);		
	ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);		
			
	/* Create OpenCL Context */		
	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);		
			
	/* Create command queue */		
	command_queue = clCreateCommandQueue(context, device_id, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &ret);		
			
	/* Create buffer object */		
	Amobj = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret);		
	Bmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret);		
	Cmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret);		
			
	/* Copy input data to memory buffer */		
	ret = clEnqueueWriteBuffer(command_queue, Amobj, CL_TRUE, 0, 4*4*sizeof(float), A, 0, NULL, NULL);		
	ret = clEnqueueWriteBuffer(command_queue, Bmobj, CL_TRUE, 0, 4*4*sizeof(float), B, 0, NULL, NULL);		
			
	/* Create kernel from source */		
	program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);		
	ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);		
			
	/* Create task parallel OpenCL kernel */		
	kernel[0] = clCreateKernel(program, "taskParallelAdd", &ret);		
	kernel[1] = clCreateKernel(program, "taskParallelSub", &ret);		
	kernel[2] = clCreateKernel(program, "taskParallelMul", &ret);		
	kernel[3] = clCreateKernel(program, "taskParallelDiv", &ret);		
			
	/* Set OpenCL kernel arguments */		
	for (i=0; i < 4; i++) {		
		ret = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&Amobj);	
		ret = clSetKernelArg(kernel[i], 1, sizeof(cl_mem), (void *)&Bmobj);	
		ret = clSetKernelArg(kernel[i], 2, sizeof(cl_mem), (void *)&Cmobj);	
	}		
			
	/* Execute OpenCL kernel as task parallel */		
	for (i=0; i < 4; i++) {		
		ret = clEnqueueTask(command_queue, kernel[i], 0, NULL, NULL);	
	}		
			
	/* Copy result to host */		
	ret = clEnqueueReadBuffer(command_queue, Cmobj, CL_TRUE, 0, 4*4*sizeof(float), C, 0, NULL, NULL);		
			
	/* Display result */		
	for (i=0; i < 4; i++) {		
		for (j=0; j < 4; j++) {	
			printf("%7.2f ", C[i*4+j]);
		}	
		printf("\n");	
	}		
			
	/* Finalization */		
	ret = clFlush(command_queue);		
	ret = clFinish(command_queue);		
	ret = clReleaseKernel(kernel[0]);		
	ret = clReleaseKernel(kernel[1]);		
	ret = clReleaseKernel(kernel[2]);		
	ret = clReleaseKernel(kernel[3]);		
	ret = clReleaseProgram(program);		
	ret = clReleaseMemObject(Amobj);		
	ret = clReleaseMemObject(Bmobj);		
	ret = clReleaseMemObject(Cmobj);		
	ret = clReleaseCommandQueue(command_queue);		
	ret = clReleaseContext(context);		
			
	free(source_str);		
			
	free(A);		
	free(B);		
	free(C);		
			
	return 0;		
}
Exemplo n.º 23
0
int main(int argc, char *argv[])
{

#ifdef DEBUG
printf("Argument count = [%d]\n", argc);
#endif

if(argc!=2)
{
printf("Expecting one argument!\n");
exit(1);
}
if(argv[1]==NULL)
{
printf("Expecting one non-null argument!\n");
exit(1);
}

char *progName = argv[1];
char fileName[100];
sprintf(fileName, "./target/%s.cl",progName);
printf("Using kernel file [%s], with kernel name [%s]\n", fileName, progName);

cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_platform_id platform_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;

float *result;
int i;
cl_mem image, out;
cl_bool support;
cl_image_format fmt;
int num_out = 9;

FILE *fp;
char *source_str;
size_t source_size, r_size;
int mem_size = sizeof(cl_float4) * num_out;

/*load the source code containing the kernel*/
fp = fopen (fileName, "r");
if (!fp) {
fprintf(stderr, "failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);

/*Get platform and device info*/
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
printf("ret_num_platforms = %d\n", ret_num_platforms);
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1 ,&device_id, &ret_num_devices);
printf("ret_num_platforms = %d\n", ret_num_platforms);

context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);

result = (float*) malloc(mem_size);
//check image support
clGetDeviceInfo(device_id, CL_DEVICE_IMAGE_SUPPORT, sizeof(support), &support, &r_size);
if (support != CL_TRUE) {
	puts("image not supported");
	return 1;
}

command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
printf("queue ret = %d\n", ret);

out = clCreateBuffer(context, CL_MEM_READ_WRITE, mem_size, NULL, &ret);
printf("create buffer ret = %d\n", ret);

fmt.image_channel_order = CL_R;
fmt.image_channel_data_type = CL_FLOAT;

image = clCreateImage2D(context, CL_MEM_READ_ONLY, &fmt, 4, 4, 0, 0, NULL);

size_t origin[] = {0,0,0};
size_t region[] = {4,4,1};
float data[] = {
	10,20,30,40,
	10,20,30,40,
	10,20,30,40,
	10,20,30,40
};

clEnqueueWriteImage(command_queue, image, CL_TRUE, origin, region, 4*sizeof(float), 0, data, 0, NULL, NULL);

program = clCreateProgramWithSource(context, 1, (const char**) &source_str, (const size_t*) &source_size, &ret);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
printf("build program ret = %d\n", ret);
kernel = clCreateKernel(program, progName, &ret);
printf("create kernel ret = %d\n", ret);

//How to set int arguments?
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &image);
printf("arg 0 ret = %d\n", ret);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &out);
printf("arg 1 ret = %d\n", ret);

cl_event ev;
ret = clEnqueueTask(command_queue, kernel, 0, NULL, &ev);

//How to read a int?
ret = clEnqueueReadBuffer(command_queue, out, CL_TRUE, 0, mem_size, result, 0, NULL, NULL);
for(int i=0; i < num_out; i++) {
	printf("%f,%f,%f,%f\n", result[i*4+0], result[i*4+1], result[i*4+2], result[i*4+3]);
}

ret=clFlush(command_queue);
ret=clFinish(command_queue);
ret=clReleaseKernel(kernel);
ret=clReleaseProgram(program);

ret=clReleaseMemObject(out);
ret=clReleaseMemObject(image);

ret=clReleaseCommandQueue(command_queue);
ret=clReleaseContext(context);

free(source_str);

printf("\n");

return 0;
}
Exemplo n.º 24
0
void run_benchmark( void *vargs, cl_context& context, cl_command_queue& commands, cl_program& program, cl_kernel& kernel ) {
  struct bench_args_t *args = (struct bench_args_t *)vargs;
  int num_jobs = 1 << 16;

  char* seqA_batch = (char *)malloc(sizeof(args->seqA) * num_jobs);
  char* seqB_batch = (char *)malloc(sizeof(args->seqB) * num_jobs);
  char* alignedA_batch = (char *)malloc(sizeof(args->alignedA) * num_jobs);
  char* alignedB_batch = (char *)malloc(sizeof(args->alignedB) * num_jobs);
  int i;
  for (i=0; i<num_jobs; i++) {
    memcpy(seqA_batch + i*sizeof(args->seqA), args->seqA, sizeof(args->seqA));
    memcpy(seqB_batch + i*sizeof(args->seqB), args->seqB, sizeof(args->seqB));
    memcpy(alignedA_batch + i*sizeof(args->alignedA), args->alignedA, sizeof(args->alignedA));
    memcpy(alignedB_batch + i*sizeof(args->alignedB), args->alignedB, sizeof(args->alignedB));
  }

  // 0th: initialize the timer at the beginning of the program
  timespec timer = tic();

  // Create device buffers
  //
  cl_mem seqA_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->seqA)*num_jobs, NULL, NULL);
  cl_mem seqB_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->seqB)*num_jobs, NULL, NULL);
  cl_mem alignedA_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->alignedA)*num_jobs, NULL, NULL);
  cl_mem alignedB_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->alignedB)*num_jobs, NULL, NULL);
  if (!seqA_buffer || !seqB_buffer || !alignedA_buffer || !alignedB_buffer)
  {
    printf("Error: Failed to allocate device memory!\n");
    printf("Test failed\n");
    exit(1);
  }    

  // 1st: time of buffer allocation
  toc(&timer, "buffer allocation");

  // Write our data set into device buffers  
  //
  int err;
  err = clEnqueueWriteBuffer(commands, seqA_buffer, CL_TRUE, 0, sizeof(args->seqA)*num_jobs, seqA_batch, 0, NULL, NULL);
  err |= clEnqueueWriteBuffer(commands, seqB_buffer, CL_TRUE, 0, sizeof(args->seqB)*num_jobs, seqB_batch, 0, NULL, NULL);
  if (err != CL_SUCCESS)
  {
      printf("Error: Failed to write to device memory!\n");
      printf("Test failed\n");
      exit(1);
  }

  // 2nd: time of pageable-pinned memory copy
  toc(&timer, "memory copy");
    
  // Set the arguments to our compute kernel
  //
  err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &seqA_buffer);
  err  |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &seqB_buffer);
  err  |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &alignedA_buffer);
  err  |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &alignedB_buffer);
  err  |= clSetKernelArg(kernel, 4, sizeof(int), &num_jobs);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to set kernel arguments! %d\n", err);
    printf("Test failed\n");
    exit(1);
  }

  // 3rd: time of setting arguments
  toc(&timer, "set arguments");

  // Execute the kernel over the entire range of our 1d input data set
  // using the maximum number of work group items for this device
  //

#ifdef C_KERNEL
  err = clEnqueueTask(commands, kernel, 0, NULL, NULL);
#else
  printf("Error: OpenCL kernel is not currently supported!\n");
  exit(1);
#endif
  if (err)
  {
    printf("Error: Failed to execute kernel! %d\n", err);
    printf("Test failed\n");
    exit(1);
  }

  // 4th: time of kernel execution
  clFinish(commands);
  toc(&timer, "kernel execution");

  // Read back the results from the device to verify the output
  //
  err = clEnqueueReadBuffer( commands, alignedA_buffer, CL_TRUE, 0, sizeof(args->alignedA)*num_jobs, alignedA_batch, 0, NULL, NULL );  
  err |= clEnqueueReadBuffer( commands, alignedB_buffer, CL_TRUE, 0, sizeof(args->alignedB)*num_jobs, alignedB_batch, 0, NULL, NULL );  
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to read output array! %d\n", err);
    printf("Test failed\n");
    exit(1);
  }

  // 5th: time of data retrieving (PCIe + memcpy)
  toc(&timer, "data retrieving");

  // memcpy(args->alignedA, alignedA_batch, sizeof(args->alignedA));
  // memcpy(args->alignedB, alignedB_batch, sizeof(args->alignedB));
  for (i=0; i<sizeof(args->alignedA); i++) {
    args->alignedA[i] = 'a';
  }
  for (i=0; i<sizeof(args->alignedB); i++) {
    args->alignedB[i] = 'b';
  }
  free(seqA_batch);
  free(seqB_batch);
  free(alignedA_batch);
  free(alignedB_batch);
}
Exemplo n.º 25
0
    void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue)
    {
      // 1D kernel:
      if (k.local_work_size(1) == 0)
      {
        #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
        std::cout << "ViennaCL: Starting 1D-kernel '" << k.name() << "'..." << std::endl;
        std::cout << "ViennaCL: Global work size: '"  << k.global_work_size() << "'..." << std::endl;
        std::cout << "ViennaCL: Local work size: '"   << k.local_work_size() << "'..." << std::endl;
        #endif
      
        size_t tmp_global = k.global_work_size();
        size_t tmp_local = k.local_work_size();
        
        cl_int err;
        if (tmp_global == 1 && tmp_local == 1)
          err = clEnqueueTask(queue.handle().get(), k.handle().get(), 0, NULL, NULL);
        else
          err = clEnqueueNDRangeKernel(queue.handle().get(), k.handle().get(), 1, NULL, &tmp_global, &tmp_local, 0, NULL, NULL);

        if (err != CL_SUCCESS)  //if not successful, try to start with smaller work size
        {
          //std::cout << "FAIL: " << std::endl; exit(0);
          while (err != CL_SUCCESS && tmp_local > 1)
          {
            //std::cout << "Flushing queue, then enqueuing again with half the size..." << std::endl;
            //std::cout << "Error code: " << err << std::endl;
            
            tmp_global /= 2;
            tmp_local /= 2;

            #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
            std::cout << "ViennaCL: Kernel start failed for '" << k.name() << "'." << std::endl;
            std::cout << "ViennaCL: Global work size: '"  << tmp_global << "'..." << std::endl;
            std::cout << "ViennaCL: Local work size: '"   << tmp_local << "'..." << std::endl;
            #endif
            
            queue.finish();
            err = clEnqueueNDRangeKernel(queue.handle().get(), k.handle().get(), 1, NULL, &tmp_global, &tmp_local, 0, NULL, NULL);
          }
          
          if (err != CL_SUCCESS)
          {
            //could not start kernel with any parameters
            std::cerr << "ViennaCL: FATAL ERROR: Kernel start failed for '" << k.name() << "'." << std::endl;
            std::cerr << "ViennaCL: Smaller work sizes could not solve the problem. " << std::endl;
            VIENNACL_ERR_CHECK(err);
          }
          else
          {
            //remember parameters:
            k.local_work_size(0, tmp_local);
            k.global_work_size(0, tmp_global);
            #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
            std::cout << "ViennaCL: Kernel '" << k.name() << "' now uses global work size " << tmp_global << " and local work size " << tmp_local << "."  << std::endl;
            #endif
          }          
        }
      }
      else //2D kernel
      {
        #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
        std::cout << "ViennaCL: Starting 2D-kernel '" << k.name() << "'..." << std::endl;
        std::cout << "ViennaCL: Global work size: '"  << k.global_work_size(0) << ", " << k.global_work_size(1) << "'..." << std::endl;
        std::cout << "ViennaCL: Local work size: '"   << k.local_work_size(0) << ", " << k.local_work_size(1) << "'..." << std::endl;
        #endif

        size_t tmp_global[2]; 
        tmp_global[0] = k.global_work_size(0);
        tmp_global[1] = k.global_work_size(1);
        
        size_t tmp_local[2];
        tmp_local[0] = k.local_work_size(0);
        tmp_local[1] = k.local_work_size(1);
        
        cl_int err = clEnqueueNDRangeKernel(queue.handle().get(), k.handle().get(), 2, NULL, tmp_global, tmp_local, 0, NULL, NULL);

        if (err != CL_SUCCESS)
        {
          //could not start kernel with any parameters
          std::cerr << "ViennaCL: FATAL ERROR: Kernel start failed for '" << k.name() << "'." << std::endl;
          VIENNACL_ERR_CHECK(err);
        }
        
      }
            
      #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
      queue.finish();
      std::cout << "ViennaCL: Kernel " << k.name() << " finished!" << std::endl;
      #endif
    } //enqueue()
Exemplo n.º 26
0
int main()
{
    cl_platform_id platform = NULL;
    cl_device_id device = NULL;
    cl_context context = NULL;
    cl_command_queue command_queue = NULL;
    cl_program program = NULL;
    cl_kernel kernel = NULL;
    cl_int status = 0;
    cl_event task_event, map_event;
    cl_device_type dType = CL_DEVICE_TYPE_GPU;
    cl_int image_width, image_height;
    cl_float4 *result;
    int i, j;
    cl_mem clImage, out;
    cl_bool support;
    int pixels_read = 8;

    //Setup the OpenCL Platform,
    //Get the first available platform. Use it as the default platform
    status = clGetPlatformIDs(1, &platform, NULL);
    LOG_OCL_ERROR(status, "clGetPlatformIDs Failed" );

    //Get the first available device
    status = clGetDeviceIDs (platform, dType, 1, &device, NULL);
    LOG_OCL_ERROR(status, "clGetDeviceIDs Failed" );
    
    /*Check if the device support images */
    clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(support), &support, NULL);
     if (support != CL_TRUE) {
         std::cout <<"IMAGES not supported\n";
         return 1;
     }
    //Create an execution context for the selected platform and device.
    cl_context_properties contextProperty[3] =
    {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)platform,
        0
    };
    context = clCreateContextFromType(
        contextProperty,
        dType,
        NULL,
        NULL,
        &status);
    LOG_OCL_ERROR(status, "clCreateContextFromType Failed" );

    /*Create command queue*/
    command_queue = clCreateCommandQueue(context,
                                        device,
                                        0,
                                        &status);
    LOG_OCL_ERROR(status, "clCreateCommandQueue Failed" );

    /* Create Image Object */
    //Create OpenCL device input image with the format and descriptor as below

    cl_image_format image_format;
    image_format.image_channel_data_type = CL_FLOAT;
    image_format.image_channel_order = CL_R;

    //We create a 5 X 5 2D image 
    image_width  = 5; 
    image_height = 5;
    cl_image_desc image_desc;
    image_desc.image_type   = CL_MEM_OBJECT_IMAGE2D;
    image_desc.image_width  = image_width;
    image_desc.image_height = image_height;
    image_desc.image_depth  = 1;
    image_desc.image_array_size  = 1;
    image_desc.image_row_pitch   = image_width*sizeof(float);
    image_desc.image_slice_pitch = 25*sizeof(float);
    image_desc.num_mip_levels = 0;
    image_desc.num_samples    = 0;
    image_desc.buffer         = NULL;
    
    /* Create output buffer */
    out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float4)*pixels_read, NULL, &status);
    LOG_OCL_ERROR(status, "clCreateBuffer Failed" );

    size_t origin[] = {0,0,0};  /* Transfer target coordinate*/
    size_t region[] = {image_width,image_height,1};  /* Size of object to be transferred */
    float *data = (float *)malloc(image_width*image_height*sizeof(float));
    float pixels[] = {            /* Transfer Data */
        10, 20, 10, 40, 50,
        10, 20, 20, 40, 50,
        10, 20, 30, 40, 50,
        10, 20, 40, 40, 50,
        10, 20, 50, 40, 50
    };
    memcpy(data, pixels, image_width*image_height*sizeof(float));
    clImage = clCreateImage(context, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, &image_format, &image_desc, pixels, &status);
    LOG_OCL_ERROR(status, "clCreateImage Failed" );

    /* If the image was not created using CL_MEM_USE_HOST_PTR, then you can write the image data to the device using the 
       clEnqueueWriteImage function. */
    //status = clEnqueueWriteImage(command_queue, clImage, CL_TRUE, origin, region, 5*sizeof(float), 25*sizeof(float), data, 0, NULL, NULL);
    //LOG_OCL_ERROR(status, "clCreateBuffer Failed" );

    /* Build program */
    program = clCreateProgramWithSource(context, 1, (const char **)&sample_image_kernel,
                                        NULL, &status);
    LOG_OCL_ERROR(status, "clCreateProgramWithSource Failed" );

    // Build the program
    status = clBuildProgram(program, 1, &device, "", NULL, NULL);
    LOG_OCL_ERROR(status, "clBuildProgram Failed" );
    if(status != CL_SUCCESS)
    {
        if(status == CL_BUILD_PROGRAM_FAILURE)
            LOG_OCL_COMPILER_ERROR(program, device);
        LOG_OCL_ERROR(status, "clBuildProgram Failed" );
    }
    printf("Printing the image pixels\n");
    for (i=0; i<image_height; i++) {
        for (j=0; j<image_width; j++) {
            printf("%f ",data[i*image_width +j]);
        }
        printf("\n");
    }

    //Create kernel and set the kernel arguments
    kernel = clCreateKernel(program, "image_test", &status);
    clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&clImage);
    clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&out);

    /*********Image sampler with image repeated at every 1.0 normalized coordinate***********/
    /*If host side sampler is not required the sampler objects can also be created on the kernel code. 
      Don't pass the thirsd argument to the kernel and create  a sample object as shown below in the kernel code*/
    //const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; 

    cl_sampler sampler = clCreateSampler (context,
                            CL_TRUE,
                            CL_ADDRESS_REPEAT,
                            CL_FILTER_NEAREST,
                            &status);
    clSetKernelArg(kernel, 2, sizeof(cl_sampler), (void*)&sampler);
    //Enqueue the kernel 
    status = clEnqueueTask(command_queue, kernel, 0, NULL, &task_event);
    LOG_OCL_ERROR(status, "clEnqueueTask Failed" );
    /* Map the result back to host address */
    result = (cl_float4*)clEnqueueMapBuffer(command_queue, out, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float4)*pixels_read, 1, &task_event, &map_event, &status);
    printf(" SAMPLER mode set to CL_ADDRESS_REPEAT | CL_FILTER_NEAREST\n");
    printf("\nPixel values retreived based on the filter and Addressing mode selected\n");
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[0].s[0],result[0].s[1],result[0].s[2],result[0].s[3]);
    printf("(float2)(0.8f,0.5f) = %f,%f,%f,%f\n",result[1].s[0],result[1].s[1],result[1].s[2],result[1].s[3]);
    printf("(float2)(1.3f,0.5f) = %f,%f,%f,%f\n",result[2].s[0],result[2].s[1],result[2].s[2],result[2].s[3]);
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[3].s[0],result[3].s[1],result[3].s[2],result[3].s[3]);
    printf("(float2)(0.5f,0.8f) = %f,%f,%f,%f\n",result[4].s[0],result[4].s[1],result[4].s[2],result[4].s[3]);
    printf("(float2)(0.5f,1.3f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(4.5f,0.5f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(5.0f,0.5f) = %f,%f,%f,%f\n",result[7].s[0],result[7].s[1],result[7].s[2],result[7].s[3]);
    clEnqueueUnmapMemObject(command_queue, out, result, 0, NULL, NULL);
    clReleaseSampler(sampler);

    /*********Image sampler with image mirrored at every 1.0 normalized coordinate***********/
    sampler = clCreateSampler (context,
                            CL_TRUE,
                            CL_ADDRESS_MIRRORED_REPEAT,
                            CL_FILTER_LINEAR,
                            &status);
    clSetKernelArg(kernel, 2, sizeof(cl_sampler), (void*)&sampler);
    //Enqueue the kernel 
    status = clEnqueueTask(command_queue, kernel, 0, NULL, &task_event);
    LOG_OCL_ERROR(status, "clEnqueueTask Failed" );
    /* Map the result back to host address */
    result = (cl_float4*)clEnqueueMapBuffer(command_queue, out, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float4)*pixels_read, 1, &task_event, &map_event, &status);
    printf(" SAMPLER mode set to CL_ADDRESS_MIRRORED_REPEAT | CL_FILTER_LINEAR\n");
    printf("\nPixel values retreived based on the filter and Addressing mode selected\n");
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[0].s[0],result[0].s[1],result[0].s[2],result[0].s[3]);
    printf("(float2)(0.8f,0.5f) = %f,%f,%f,%f\n",result[1].s[0],result[1].s[1],result[1].s[2],result[1].s[3]);
    printf("(float2)(1.3f,0.5f) = %f,%f,%f,%f\n",result[2].s[0],result[2].s[1],result[2].s[2],result[2].s[3]);
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[3].s[0],result[3].s[1],result[3].s[2],result[3].s[3]);
    printf("(float2)(0.5f,0.8f) = %f,%f,%f,%f\n",result[4].s[0],result[4].s[1],result[4].s[2],result[4].s[3]);
    printf("(float2)(0.5f,1.3f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(4.5f,0.5f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(5.0f,0.5f) = %f,%f,%f,%f\n",result[7].s[0],result[7].s[1],result[7].s[2],result[7].s[3]);
    clEnqueueUnmapMemObject(command_queue, out, result, 0, NULL, NULL);
    clReleaseSampler(sampler);
    /********************/

    //Free All OpenCL objects.
    clReleaseMemObject(out);
    clReleaseMemObject(clImage);
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);
    return 0;

}
Exemplo n.º 27
0
void Device::scoreCandidates(eObj *e) {
    //e->iNumBufferedCandidates = 0;
    //return;
    //MEA: static?
    static cObj* p;
    //static size_t iNumBlocks;
    static size_t stGlobalDim;
    static size_t globalTransDim = Tempest::mround(Tempest::data.iNumMS2Bins, this->transform_size);
    static float fElapsedTime;
    long lSpectrumOffset = e->lIndex*Tempest::data.iNumMS2Bins;
    long lScratchOffset = (long)Tempest::data.iCrossCorrelationWidth;
    long lNoOffset = 0;
    int err;
    cl_ulong start;
    cl_ulong end;
    
    err = clEnqueueWriteBuffer(clCommandQueue, cl_cCandidates, CL_FALSE, 0, sizeof(cObj) * e->iNumBufferedCandidates, e->candidateBuffer, 0, NULL, &(e->clEventSent));
    Tempest::check_cl_error(__FILE__, __LINE__, err, "Unable to copy candidate data from host to GPU");
	
    stGlobalDim = Tempest::mround(Tempest::data.host_iPeakCounts[e->lIndex], this->build_size);
    cl_mem spectrumBuffer;

    std::map<long,cl_mem>::iterator s2bElem = spectrum2buffer.find(e->lIndex);
    if (s2bElem == spectrum2buffer.end()) { //spectrum not cached
        if (!unusedBuffers.empty()) {
            spectrumBuffer = unusedBuffers.top();
            unusedBuffers.pop();
        }
        else {
            spectrumBuffer = spectrum2buffer.begin()->second;
            spectrum2buffer.erase(spectrum2buffer.begin());
        }
        spectrum2buffer[e->lIndex] = spectrumBuffer;

        //initialize buffer
        err = clEnqueueCopyBuffer(clCommandQueue, cl_init_fSpectra, spectrumBuffer, 0, 0, Tempest::data.iNumMS2Bins*sizeof(cl_float), 0, NULL, Tempest::config.profile ? &memsetEvent : NULL);
        //Tempest::check_cl_error(__FILE__, __LINE__, err, "Unable to clear spectrum memory");
        if (err != 0) {
            //memory cap reached. Stop filling new buffers.
            unusedBuffers = std::stack<cl_mem>();
            spectrumBuffer = spectrum2buffer.begin()->second;
            spectrum2buffer.erase(spectrum2buffer.begin());
            spectrum2buffer[e->lIndex] = spectrumBuffer;
            err = clEnqueueCopyBuffer(clCommandQueue, cl_init_fSpectra, spectrumBuffer, 0, 0, Tempest::data.iNumMS2Bins*sizeof(cl_float), 0, NULL, Tempest::config.profile ? &memsetEvent : NULL);
            Tempest::check_cl_error(__FILE__, __LINE__, err, "Unable to clear spectrum memory");
        }
        if (Tempest::config.profile) {
            clFinish(clCommandQueue);
            clGetEventProfilingInfo(memsetEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
            clGetEventProfilingInfo(memsetEvent, CL_PROFILING_COMMAND_END,   sizeof(cl_ulong), &end,   NULL);
            totalMemsetTime += (end-start);
            clReleaseEvent(memsetEvent);
        }
        
        // build
        err  = clSetKernelArg(__cl_build, 0, sizeof(cl_mem), &spectrumBuffer);
        err |= clSetKernelArg(__cl_build, 1, sizeof(int), &(Tempest::data.host_iPeakCounts[e->lIndex]));
        err |= clSetKernelArg(__cl_build, 4, sizeof(long), &(Tempest::data.host_lPeakIndices[e->lIndex]));
        err |= clEnqueueNDRangeKernel(clCommandQueue, __cl_build, 1, NULL, &stGlobalDim, &(this->build_size), 0, NULL, Tempest::config.profile ? &buildEvent : NULL);
        Tempest::check_cl_error(__FILE__, __LINE__, err, "Could not build spectrum (cl_build kernel)");
        if (Tempest::config.profile) {
            clFinish(clCommandQueue);
            clGetEventProfilingInfo(buildEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
            clGetEventProfilingInfo(buildEvent, CL_PROFILING_COMMAND_END,   sizeof(cl_ulong), &end,   NULL);
            totalBuildTime += (end-start);
            buildLaunches += 1;
            clReleaseEvent(buildEvent);
        }

        // transform
        if (Tempest::params.xcorrTransformWidth) {
            //size_t localDim = CROSS_CORRELATION_WINDOW * 2;
            //size_t globalDim = localDim * Tempest::data.iNumMS2Bins;
            size_t globalDim = Tempest::mround(Tempest::data.iNumMS2Bins, this->transform_size);
            err  = clSetKernelArg(__cl_transform, 0, sizeof(cl_mem), &spectrumBuffer);
            err |= clEnqueueNDRangeKernel(clCommandQueue, __cl_transform, 1, NULL, &globalDim, &(this->transform_size), 0, NULL, Tempest::config.profile ? & transformEvent : NULL);
            Tempest::check_cl_error(__FILE__, __LINE__, err, "Could not transform spectrum (cl_transform kernel)");
            if (Tempest::config.profile) {
                clFinish(clCommandQueue);
                clGetEventProfilingInfo(transformEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
                clGetEventProfilingInfo(transformEvent, CL_PROFILING_COMMAND_END,   sizeof(cl_ulong), &end,   NULL);
                totalTransformTime += (end-start);
                clReleaseEvent(transformEvent);
            }
        }
    }
    else {
        //move spectrum entry to end of map by reinserting
        spectrumBuffer = s2bElem->second;
        spectrum2buffer.erase(s2bElem);
        spectrum2buffer[e->lIndex] = spectrumBuffer;
    }
        
    // score
    err  = clSetKernelArg(__cl_score, 0, sizeof(int), &(e->iPrecursorCharge));
    err |= clSetKernelArg(__cl_score, 1, sizeof(int), &(e->iNumBufferedCandidates));
    err |= clSetKernelArg(__cl_score, 4, sizeof(cl_mem), &spectrumBuffer);
    err |= clSetKernelArg(__cl_score, 5, sizeof(long), &lNoOffset);
    err |= clEnqueueNDRangeKernel(clCommandQueue, __cl_score, 1, NULL, &(this->candidateBufferSize), &(this->score_size), 0, NULL, Tempest::config.profile ? &scoreEvent : NULL);
    Tempest::check_cl_error(__FILE__, __LINE__, err, "Could not score candidates (cl_score kernel)");
    if (Tempest::config.profile) {
        clFinish(clCommandQueue);
        clGetEventProfilingInfo(scoreEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
        clGetEventProfilingInfo(scoreEvent, CL_PROFILING_COMMAND_END,   sizeof(cl_ulong), &end,   NULL);
        totalScoreTime += (end-start);
        clReleaseEvent(scoreEvent);
        scoreKernelLaunches++;
    }
    
    // Process Scores
	
    // TODO what if buffer size is less than 512?
    long lPSMsOffset = e->lIndex * Tempest::params.numInternalPSMs;
    err |= clSetKernelArg(__cl_reduce_scores, 4, sizeof(long), &lPSMsOffset);
    if (Tempest::config.parallelReduce)
        err |= clEnqueueNDRangeKernel(clCommandQueue, __cl_reduce_scores, 1, NULL, &(this->reduce_scores_size), &(this->reduce_scores_size), 0, NULL, Tempest::config.profile ? &reduceEvent : NULL);
    else
        err |= clEnqueueTask(clCommandQueue, __cl_reduce_scores, 0, NULL, Tempest::config.profile ? &reduceEvent : NULL);
    Tempest::check_cl_error(__FILE__, __LINE__, err, "Could not process scores (cl_reduce_scores kernel)");
    if (Tempest::config.profile) {
        clFinish(clCommandQueue);
        clGetEventProfilingInfo(reduceEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
        clGetEventProfilingInfo(reduceEvent, CL_PROFILING_COMMAND_END,   sizeof(cl_ulong), &end,   NULL);
        totalReduceTime += (end-start);
        clReleaseEvent(reduceEvent);
    }

    // reset buffer
    e->iNumBufferedCandidates = 0;
}
Exemplo n.º 28
0
int main() {

   /* Host/device data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int i, err;

   /* Data and buffers */
   unsigned char test[16];
   cl_mem test_buffer;

   /* Create a context */
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build the program and create a kernel */
   program = build_program(context, device, PROGRAM_FILE);
   kernel = clCreateKernel(program, KERNEL_FUNC, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);   
   };

   /* Create a write-only buffer to hold the output data */
   test_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         sizeof(test), NULL, &err);
   if(err < 0) {
      perror("Couldn't create a buffer");
      exit(1);   
   };

   /* Create kernel argument */
   err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buffer);
   if(err < 0) {
      perror("Couldn't set a kernel argument");
      exit(1);   
   };

   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Enqueue kernel */
   err = clEnqueueTask(queue, kernel, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }

   /* Read and print the result */
   err = clEnqueueReadBuffer(queue, test_buffer, CL_TRUE, 0, 
      sizeof(test), &test, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read the buffer");
      exit(1);   
   }
   for(i=0; i<15; i++) {
      printf("0x%X, ", test[i]);
   }
   printf("0x%X\n", test[15]);

   /* Deallocate resources */
   clReleaseMemObject(test_buffer);
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}
Exemplo n.º 29
0
/*
 * To ease testing, each kernel will be a Task kernel taking a pointer to an
 * integer and running built-in functions. If an error is encountered, the
 * integer pointed to by the arg will be set accordingly. If the kernel succeeds,
 * this integer is set to 0.
 */
static uint32_t run_kernel(const char *source, TestCaseKind kind)
{
    cl_platform_id platform = 0;
    cl_device_id device;
    cl_context ctx;
    cl_command_queue queue;
    cl_program program;
    cl_int result;
    cl_kernel kernel;
    cl_event event;
    cl_mem rs_buf;

    cl_sampler sampler;
    cl_mem mem1, mem2, mem3;
    cl_image_format fmt;

    unsigned char image2d_data[3*3*4] = {
        255, 0, 0, 0,       0, 255, 0, 0,       128, 128, 128, 0,
        0, 0, 255, 0,       255, 255, 0, 0,     0, 128, 0, 0,
        255, 128, 0, 0,     128, 0, 255, 0,     0, 0, 0, 0
    };

    uint32_t rs = 0;

    result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, 0);
    if (result != CL_SUCCESS) return 65536;

    ctx = clCreateContext(0, 1, &device, 0, 0, &result);
    if (result != CL_SUCCESS) return 65537;

    queue = clCreateCommandQueue(ctx, device, 0, &result);
    if (result != CL_SUCCESS) return 65538;

    program = clCreateProgramWithSource(ctx, 1, &source, 0, &result);
    if (result != CL_SUCCESS) return 65539;

    result = clBuildProgram(program, 1, &device, "", 0, 0);
    if (result != CL_SUCCESS)
    {
        // Print log
        char *log = 0;
        size_t len = 0;

        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, 0, &len);
        log = (char *)std::malloc(len);
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, len, log, 0);

        std::cout << log << std::endl;
        std::free(log);

        return 65540;
    }

    kernel = clCreateKernel(program, "test_case", &result);
    if (result != CL_SUCCESS) return 65541;

    // Create the result buffer
    rs_buf = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
                            sizeof(rs), &rs, &result);
    if (result != CL_SUCCESS) return 65542;

    result = clSetKernelArg(kernel, 0, sizeof(cl_mem), &rs_buf);
    if (result != CL_SUCCESS) return 65543;

    // Kind
    switch (kind)
    {
        case NormalKind:
            break;

        case SamplerKind:
            sampler = clCreateSampler(ctx, 1, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, &result);
            if (result != CL_SUCCESS) return 65546;

            result = clSetKernelArg(kernel, 1, sizeof(cl_sampler), &sampler);
            if (result != CL_SUCCESS) return 65547;
            break;

        case ImageKind:
            fmt.image_channel_data_type = CL_UNORM_INT8;
            fmt.image_channel_order = CL_RGBA;

            mem1 = clCreateImage2D(ctx, CL_MEM_WRITE_ONLY, &fmt, 4, 4, 0, 0, &result);
            if (result != CL_SUCCESS) return 65548;

            mem3 = clCreateImage2D(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                   &fmt, 3, 3, 0, image2d_data, &result);
            if (result != CL_SUCCESS) return 65548;

            fmt.image_channel_data_type = CL_SIGNED_INT16;

            mem2 = clCreateImage2D(ctx, CL_MEM_WRITE_ONLY, &fmt, 4, 4, 0, 0, &result);
            if (result != CL_SUCCESS) return 65548;

            result = clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem1);
            if (result != CL_SUCCESS) return 65549;

            result = clSetKernelArg(kernel, 2, sizeof(cl_mem), &mem2);
            if (result != CL_SUCCESS) return 65549;

            result = clSetKernelArg(kernel, 3, sizeof(cl_mem), &mem3);
            if (result != CL_SUCCESS) return 65549;
            break;

        default:
            break;
    }

    if (kind == BarrierKind)
    {
        size_t local_size = 64;
        size_t global_size = 64;

        result = clEnqueueNDRangeKernel(queue, kernel, 1, 0, &global_size,
                                        &local_size, 0, 0, &event);
        if (result != CL_SUCCESS) return 65544;
    }
    else
    {
        result = clEnqueueTask(queue, kernel, 0, 0, &event);
        if (result != CL_SUCCESS) return 65544;
    }

    result = clWaitForEvents(1, &event);
    if (result != CL_SUCCESS) return 65545;

    if (kind == SamplerKind) clReleaseSampler(sampler);
    if (kind == ImageKind)
    {
        clReleaseMemObject(mem1);
        clReleaseMemObject(mem2);
        clReleaseMemObject(mem3);
    }
    clReleaseEvent(event);
    clReleaseMemObject(rs_buf);
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(queue);
    clReleaseContext(ctx);

    return rs;
}
Exemplo n.º 30
0
//---------------------------------------------------------------------
// this function computes the norm of the difference between the
// computed solution and the exact solution
//---------------------------------------------------------------------
void error_norm(double rms[5])
{
  int i, m, d;

  cl_kernel *k_en;
  cl_mem *m_rms;
  double (*g_rms)[5];
  cl_int ecode;

  g_rms = (double (*)[5])malloc(sizeof(double)*5 * num_devices);
  m_rms = (cl_mem *)malloc(sizeof(cl_mem) * num_devices);
  k_en  = (cl_kernel *)malloc(sizeof(cl_kernel) * num_devices);

  for (i = 0; i < num_devices; i++) {
    m_rms[i] = clCreateBuffer(context,
                              CL_MEM_READ_WRITE,
                              sizeof(double) * 5, 
                              NULL, &ecode);
    clu_CheckError(ecode, "clCreateBuffer()");

    k_en[i] = clCreateKernel(p_error[i], "error_norm", &ecode);
    clu_CheckError(ecode, "clCreateKernel()");

    ecode  = clSetKernelArg(k_en[i], 0, sizeof(cl_mem), &m_u[i]);
    ecode |= clSetKernelArg(k_en[i], 1, sizeof(cl_mem), &m_ce[i]);
    ecode |= clSetKernelArg(k_en[i], 2, sizeof(cl_mem), &m_rms[i]);
    ecode |= clSetKernelArg(k_en[i], 3, sizeof(cl_mem), &m_cell_low[i]);
    ecode |= clSetKernelArg(k_en[i], 4, sizeof(cl_mem), &m_cell_high[i]);
    ecode |= clSetKernelArg(k_en[i], 5, sizeof(int), &ncells);
    clu_CheckError(ecode, "clSetKernelArg()");
    
    ecode = clEnqueueTask(cmd_queue[i],
                          k_en[i],
                          0, NULL, NULL);
    clu_CheckError(ecode, "clEnqueueTask()");

    clFinish(cmd_queue[i]);

    ecode = clEnqueueReadBuffer(cmd_queue[i],
                                m_rms[i],
                                CL_TRUE,
                                0, sizeof(double)*5,
                                &g_rms[i],
                                0, NULL, NULL);
    clu_CheckError(ecode, "clEnqueueReadBuffer()");
  }

  for (m = 0; m < 5; m++) {
    rms[m] = 0.0;
  }

  for (i = 0; i < num_devices; i++) {
    ecode = clFinish(cmd_queue[i]);
    clu_CheckError(ecode, "clFinish()");
  }

  // reduction
  for (i = 0; i < num_devices; i++) {
    for (m = 0; m < 5; m++) {
      rms[m] += g_rms[i][m];
    }
  }
  
  for (m = 0; m < 5; m++) {
    for (d = 0; d < 3; d++) {
      rms[m] = rms[m] / (double)(grid_points[d]-2);
    }
    rms[m] = sqrt(rms[m]);
  }

  for (i = 0; i < num_devices; i++) {
    clReleaseMemObject(m_rms[i]);
    clReleaseKernel(k_en[i]);
  }
  free(g_rms);
  free(m_rms);
  free(k_en);
}