Ejemplo n.º 1
0
int task(cl_context context, cl_device_id device, cl_command_queue queue, void* data_)
{
  const TaskData* data = (const TaskData*) data_;
  cl_int err;

  if (data->points % data->points_per_work_item)
    check_error(CLQMC_INVALID_VALUE, "points must be a multiple of points_per_work_item");

  if (data->replications % data->replications_per_work_item)
    check_error(CLQMC_INVALID_VALUE, "replications must be a multiple of replications_per_work_item");


  // Lattice buffer

  size_t pointset_size;
  // gen_vec is given in common.c
  clqmcLatticeRule* pointset = clqmcLatticeRuleCreate(data->points, DIMENSION, gen_vec, &pointset_size, &err);
  check_error(err, NULL);

  cl_mem pointset_buf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
      pointset_size, pointset, &err);
  check_error(err, "cannot create point set buffer");


  // Shifts buffer
  
  clqmc_fptype* shifts = (clqmc_fptype*) malloc(data->replications * DIMENSION * sizeof(clqmc_fptype));

  // populate random shifts using a random stream
  clrngMrg31k3pStream* stream = clrngMrg31k3pCreateStreams(NULL, 1, NULL, &err);
  check_error(err, NULL);
  for (cl_uint i = 0; i < data->replications; i++)
      for (cl_uint j = 0; j < DIMENSION; j++)
          shifts[i * DIMENSION + j] = clrngMrg31k3pRandomU01(stream);
  err = clrngMrg31k3pDestroyStreams(stream);
  check_error(err, NULL);

  cl_mem shifts_buf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
      data->replications * DIMENSION * sizeof(clqmc_fptype), shifts, &err);
  check_error(err, "cannot create shifts buffer");


  // Output buffer

  size_t points_block_count = data->points / data->points_per_work_item;
  cl_mem output_buf = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, 
      data->replications * points_block_count * sizeof(clqmc_fptype), NULL, &err);
  check_error(err, "cannot create output buffer");


  // OpenCL kernel

  cl_program program = build_program_from_file(context, device,
      "client/DocsTutorial/example4_kernel.cl",
      NULL);
  check_error(err, NULL);
  cl_kernel kernel = clCreateKernel(program, "simulateWithRQMC", &err);
  check_error(err, "cannot create kernel");

  int iarg = 0;
  err  = clSetKernelArg(kernel, iarg++, sizeof(pointset_buf), &pointset_buf);
  err |= clSetKernelArg(kernel, iarg++, sizeof(shifts_buf), &shifts_buf);
  err |= clSetKernelArg(kernel, iarg++, sizeof(data->points_per_work_item), &data->points_per_work_item);
  err |= clSetKernelArg(kernel, iarg++, sizeof(data->replications), &data->replications);
  err |= clSetKernelArg(kernel, iarg++, sizeof(output_buf), &output_buf);
  check_error(err, "cannot set kernel arguments");


  // Execution

  cl_event ev;
  size_t global_size = (data->replications / data->replications_per_work_item) * points_block_count;
  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, &ev);
  check_error(err, "cannot enqueue kernel");

  err = clWaitForEvents(1, &ev);
  check_error(err, "error waiting for events");

  clqmc_fptype* output = (clqmc_fptype*) malloc(data->replications * points_block_count * sizeof(clqmc_fptype));
  err = clEnqueueReadBuffer(queue, output_buf, CL_TRUE, 0,
      data->replications * points_block_count * sizeof(clqmc_fptype), output, 0, NULL, NULL);
  check_error(err, "cannot read output buffer");

  printf("\nAdvanced randomized quasi-Monte Carlo integration:\n\n");

  err = clqmcLatticeRuleWriteInfo(pointset, stdout);
  check_error(err, NULL);
  printf("\n");

  rqmcReport(data->replications, data->points, points_block_count, output);


  // Clean up

  clReleaseEvent(ev);
  clReleaseMemObject(output_buf);
  clReleaseMemObject(pointset_buf);
  clReleaseKernel(kernel);
  clReleaseProgram(program);

  free(output);
  err = clqmcLatticeRuleDestroy(pointset);
  check_error(err, NULL);

  return EXIT_SUCCESS;
}
Ejemplo n.º 2
0
OPENCL_EXPERIMENTS_EXPORT
cl_int opencl_plugin_create(opencl_plugin *plugin_out)
{
    cl_int err = CL_SUCCESS;
    opencl_plugin plugin;
    cl_int i;
    cl_int num_queues = 50;

    assert(plugin_out != NULL);

    plugin = calloc(1, sizeof(*plugin));
    CHECK_ALLOCATION(plugin);

    if (get_desired_platform("NVIDIA", &plugin->selected_platform, &err))
        goto error;

    if (get_gpu_device_id(plugin->selected_platform, &plugin->selected_device,
                          CL_TRUE, &err))
        goto error;

    if (create_context(plugin->selected_platform, plugin->selected_device,
                       &plugin->context, &err))
        goto error;

    if (build_program_from_file("program.cl", NULL, plugin->context,
                                plugin->selected_device, &plugin->program, &err))
        goto error;

    plugin->queue = clCreateCommandQueue(plugin->context, plugin->selected_device, 0, &err);
    CHECK_CL_ERROR(err);

    plugin->num_queues = num_queues;
    plugin->queues = calloc(num_queues, sizeof(cl_command_queue));
    CHECK_ALLOCATION(plugin->queues);

    for (i = 0; i < num_queues; i++) {
        plugin->queues[i] = clCreateCommandQueue(plugin->context, plugin->selected_device, 0, &err);
        CHECK_CL_ERROR(err);
    }

    plugin->voxelize_kernel = clCreateKernel(plugin->program, "voxelize", &err);
    CHECK_CL_ERROR(err);

    *plugin_out = plugin;
    return 0;
error:
    if (plugin) {
        if (plugin->voxelize_kernel)
            clReleaseKernel(plugin->voxelize_kernel);
        if (plugin->queue)
            clReleaseCommandQueue(plugin->queue);
        if (plugin->queues) {
            for (i = 0; i < num_queues; i++) {
                if (plugin->queues[i])
                    clReleaseCommandQueue(plugin->queues[i]);
            }
            free(plugin->queues);
        }
        if (plugin->context)
            clReleaseContext(plugin->context);
        free(plugin);
    }
    return -1;
}