コード例 #1
0
ファイル: Reduce.c プロジェクト: BillTheBest/load-balancer
void setupGPU()
{
	// Retrieve an OpenCL platform
	cl_uint num_platforms = 0;
	int err = 0;
	err = clGetPlatformIDs(0, NULL, &num_platforms);

	cl_platform_id* platform_ids = (cl_platform_id*)(malloc(sizeof(cl_platform_id) * num_platforms));

	err = clGetPlatformIDs(num_platforms, platform_ids, NULL);
	CHKERR(err, "Failed to get a platform!");
	
	// Connect to a compute device
	int i = 0;
	for(i = 0; i < num_platforms; i++)
	{
		cl_device_id device_id;
		err = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
		if(err != CL_DEVICE_NOT_FOUND) 
		{
			CHKERR(err, "Failed to create a device group!");
			device_id_cpu = device_id;
		}
		
		err = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
		if(err != CL_DEVICE_NOT_FOUND) 
		{
			CHKERR(err, "Failed to create a device group!");
			device_id_gpu = device_id;	
		}
	}
	free(platform_ids);	


	if(scheme != GPU_ONLY)
	{
		context_cpu = clCreateContext(NULL, 1, &device_id_cpu, NULL, NULL, &err);
		CHKERR(err, "Failed to create a compute context!");
		commands_cpu = clCreateCommandQueue(context_cpu, device_id_cpu, 0, &err);
		CHKERR(err, "Failed to create a command queue!");
		kernel_compute_cpu = create_kernel(KernelSourceFile_cpu, "compute", context_cpu, device_id_cpu);
	}

	if(scheme != CPU_ONLY)
	{
		context_gpu = clCreateContext(NULL, 1, &device_id_gpu, NULL, NULL, &err);
		CHKERR(err, "Failed to create a compute context!");
		commands_gpu = clCreateCommandQueue(context_gpu, device_id_gpu, 0, &err);
		CHKERR(err, "Failed to create a command queue!");
		kernel_compute_gpu = create_kernel(KernelSourceFile_gpu, "compute", context_gpu, device_id_gpu);
	}

}
コード例 #2
0
ファイル: main.cpp プロジェクト: CallForSanity/svd3
int main()
{
	// create a compute context and command queue
	auto ctx = boost::compute::system::default_context();
	auto queue = boost::compute::system::default_queue();

	// create program and kernels
	std::ostringstream source;
	get_file_contents(source, "svd3.cl");
	auto program = boost::compute::program::build_with_source(source.str(), ctx);
	auto svdArrayTestKernel = program.create_kernel("svdArrayTest");

	EigenMatN A_host = EigenMatN::Identity();
	boost::compute::vector<cl_float> A_dev(A_host.data(), A_host.data() + N_*N_, queue);
	boost::compute::vector<cl_float> U_dev(N_*N_), S_dev(N_*N_), V_dev(N_*N_);

	svdArrayTestKernel.set_args(A_dev, U_dev, S_dev, V_dev);
	queue.enqueue_1d_range_kernel(svdArrayTestKernel, 0, 1, 0);

	EigenMatN U_host,S_host,V_host;
	boost::compute::copy(U_dev.begin(), U_dev.end(), U_host.data());
	boost::compute::copy(S_dev.begin(), S_dev.end(), S_host.data());
	boost::compute::copy(V_dev.begin(), V_dev.end(), V_host.data());

	std::cout << A_host << std::endl;
	std::cout << U_host << std::endl;
	std::cout << S_host << std::endl;
	std::cout << V_host << std::endl;
	
    return 0;
}
コード例 #3
0
ファイル: opengl.cpp プロジェクト: parvizp/Halide
// Called at the beginning of a code block generated by Halide. This function
// is responsible for setting up the OpenGL environment and compiling the GLSL
// code into a fragment shader.
EXPORT void *halide_opengl_init_kernels(void *user_context, void *state_ptr,
                                        const char *src, int size) {
    // TODO: handle error
    if (int error = halide_opengl_init(user_context)) {
        return NULL;
    }

    // Use '/// KERNEL' comments to split 'src' into discrete blocks, one for
    // each kernel contained in it.
    char *begin = strstr(src, kernel_marker);
    char *end = NULL;
    for (; begin && begin[0]; begin = end) {
        end = strstr(begin + sizeof(kernel_marker) - 1, kernel_marker);
        if (!end) {
            end = begin + strlen(begin);
        }
        HalideOpenGLKernel *kernel = create_kernel(user_context, begin, end - begin);
        if (!kernel) {
            // Simply skip invalid kernels
            continue;
        }

#ifdef DEBUG
        halide_printf(user_context, "Defining kernel '%s'\n", kernel->name);
#endif
        // Compile shader
        kernel->shader_id = halide_opengl_make_shader(user_context, GL_FRAGMENT_SHADER,
                                                      kernel->source, NULL);

        // Link GLSL program
        GLuint program = ST.CreateProgram();
        ST.AttachShader(program, ST.vertex_shader_id);
        ST.AttachShader(program, kernel->shader_id);
        ST.LinkProgram(program);
        GLint status;
        ST.GetProgramiv(program, GL_LINK_STATUS, &status);
        if (!status) {
            halide_printf(user_context, "Could not link GLSL program:\n");
            GLint log_len;
            ST.GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_len);
            char *log = (char*) malloc(log_len);
            ST.GetProgramInfoLog(program, log_len, NULL, log);
            halide_printf(user_context, "%s", log);
            free(log);
            ST.DeleteProgram(program);
            program = 0;
        }
        kernel->program_id = program;

        if (halide_opengl_find_kernel(kernel->name)) {
            halide_printf(user_context, "Duplicate kernel name '%s'\n", kernel->name);
            halide_opengl_delete_kernel(user_context, kernel);
        } else {
            kernel->next = ST.kernels;
            ST.kernels = kernel;
        }
    }
    return NULL;
}
コード例 #4
0
cl_kernel cl_manager::get_kernel(const std::string& filename, std::string entry_proc)
{
    if (FAILED(create_kernel(filename, entry_proc))) {
        std::cout << "Shutting down ..." << std::endl;
        exit(-1);
    }

    return program_to_kernels[filename][entry_proc];
}
コード例 #5
0
ファイル: bmodel.cpp プロジェクト: grafin/Diplom
void BModel::wiener_filter(const int r, const double sigma, const double S)
{
    IplImage *reKernel = cvCreateImage(cvGetSize(_tempImageSrc), IPL_DEPTH_64F, 1);
    IplImage *image = cvCreateImage(cvGetSize(_tempImageSrc), IPL_DEPTH_64F, 4);
    IplImage *reRImage = cvCreateImage(cvGetSize(_tempImageSrc), IPL_DEPTH_64F, 1);
    IplImage *reGImage = cvCreateImage(cvGetSize(_tempImageSrc), IPL_DEPTH_64F, 1);
    IplImage *reBImage = cvCreateImage(cvGetSize(_tempImageSrc), IPL_DEPTH_64F, 1);

    IplImage *kernel = cvCreateImage(cvGetSize(_tempImageSrc), IPL_DEPTH_64F, 2);
    IplImage *rImage = cvCreateImage(cvGetSize(_tempImageSrc), IPL_DEPTH_64F, 2);
    IplImage *gImage = cvCreateImage(cvGetSize(_tempImageSrc), IPL_DEPTH_64F, 2);
    IplImage *bImage = cvCreateImage(cvGetSize(_tempImageSrc), IPL_DEPTH_64F, 2);

    IplImage *imaginary = cvCreateImage(cvGetSize(_tempImageSrc), IPL_DEPTH_64F, 1);

    cvZero(imaginary);
    cvZero(reKernel);
    create_kernel(r, sigma, reKernel);
    cvConvertScale(_tempImageSrc, image, 1/255.);
    cvSplit(image, reRImage, reGImage, reBImage, 0);

    cvMerge(reKernel, imaginary, 0, 0, kernel);
    cvMerge(reRImage, imaginary, 0, 0, rImage);
    cvMerge(reGImage, imaginary, 0, 0, gImage);
    cvMerge(reBImage, imaginary, 0, 0, bImage);

    wiener_filter_chanel(rImage, kernel, S);
    cvSplit(rImage, reRImage, imaginary, 0, 0);
    wiener_filter_chanel(gImage, kernel, S);
    cvSplit(gImage, reGImage, imaginary, 0, 0);
    wiener_filter_chanel(bImage, kernel, S);
    cvSplit(bImage, reBImage, imaginary, 0, 0);

    cvMerge(reRImage, reGImage, reBImage, 0, image);
    cvConvertScale(image, _tempImageDst, 255);
    remap_image(_tempImageDst, -r);
    change_filt_image();
    create_temp_image(_srcImage);

    cvReleaseImage(&reKernel);
    cvReleaseImage(&image);
    cvReleaseImage(&reRImage);
    cvReleaseImage(&reGImage);
    cvReleaseImage(&reBImage);
    cvReleaseImage(&kernel);
    cvReleaseImage(&rImage);
    cvReleaseImage(&gImage);
    cvReleaseImage(&bImage);
    cvReleaseImage(&imaginary);
}
コード例 #6
0
void AttentionMap::init(int fps, int width, int height)
{
	m_heatmap = Mat::zeros(height, width, CV_32FC1);
	m_ones = Mat::ones(height, width, CV_32F);
	m_zeros = Mat::zeros(height, width, CV_32F);

	m_fade_mat = Mat::ones(height, width, CV_32F);
	// Determine how much to fade the heatmap values by each frame
	m_fade_mat.setTo((1.0 / fps) / g_fade_time);
	// Create heatmap kernel
	
	create_kernel();

	m_last_update_time = 0;
}
コード例 #7
0
ファイル: kernelgen.c プロジェクト: jiapei100/gimp
int
main (int    argc,
      char **argv)
{
  int    i, j;
  double x, y;

  printf ("/* gimpbrushcore-kernels.h\n"
          " *\n"
          " *   This file was generated using kernelgen as found in the tools dir.\n");
  printf (" *   (threshold = %g)\n", THRESHOLD);
  printf (" */\n\n");
  printf ("#ifndef __GIMP_BRUSH_CORE_KERNELS_H__\n");
  printf ("#define __GIMP_BRUSH_CORE_KERNELS_H__\n\n");
  printf ("#define KERNEL_WIDTH     %d\n", KERNEL_WIDTH);
  printf ("#define KERNEL_HEIGHT    %d\n", KERNEL_HEIGHT);
  printf ("#define KERNEL_SUBSAMPLE %d\n", SUBSAMPLE);
  printf ("#define KERNEL_SUM       %d\n", KERNEL_SUM);
  printf ("\n\n");
  printf ("/*  Brush pixel subsampling kernels  */\n");
  printf ("static const int subsample[%d][%d][%d] =\n{\n",
          SUBSAMPLE + 1, SUBSAMPLE + 1, KERNEL_WIDTH * KERNEL_HEIGHT);

  for (j = 0; j <= SUBSAMPLE; j++)
    {
      y = (double) j / (double) SUBSAMPLE;

      printf ("  {\n");

      for (i = 0; i <= SUBSAMPLE; i++)
        {
          x = (double) i / (double) SUBSAMPLE;

          printf ("    {");
          create_kernel (x, y);
          printf (" }%s", i < SUBSAMPLE ? ",\n" : "\n");
        }

      printf ("  }%s", j < SUBSAMPLE ? ",\n" : "\n");
    }

  printf ("};\n\n");

  printf ("#endif /* __GIMP_BRUSH_CORE_KERNELS_H__ */\n");

  return 0;
}
コード例 #8
0
ファイル: ufo-resources.c プロジェクト: Dynalon/ufo-core
/**
 * ufo_resources_get_kernel_from_source:
 * @resources: A #UfoResources
 * @source: OpenCL source string
 * @kernel: Name of a kernel or %NULL
 * @error: Return location for a GError from #UfoResourcesError, or NULL
 *
 * Loads and builds a kernel from a string. If @kernel is %NULL, the first
 * kernel defined in @source is used.
 *
 * Returns: (transfer none): a cl_kernel object that is load from @filename
 */
gpointer
ufo_resources_get_kernel_from_source (UfoResources *resources,
                                      const gchar *source,
                                      const gchar *kernel,
                                      GError **error)
{
    UfoResourcesPrivate *priv;
    cl_program program;

    g_return_val_if_fail (UFO_IS_RESOURCES (resources) &&
                          (source != NULL), NULL);

    priv = UFO_RESOURCES_GET_PRIVATE (resources);
    program = add_program_from_source (priv, source, NULL, error);
    g_debug ("Added program %p from source", (gpointer) program);
    return create_kernel (priv, program, kernel, error);
}
コード例 #9
0
ファイル: ufo-resources.c プロジェクト: Dynalon/ufo-core
/**
 * ufo_resources_get_kernel:
 * @resources: A #UfoResources object
 * @filename: Name of the .cl kernel file
 * @kernel: Name of a kernel, or %NULL
 * @error: Return location for a GError from #UfoResourcesError, or %NULL
 *
 * Loads a and builds a kernel from a file. The file is searched in the current
 * working directory and all paths added through ufo_resources_add_paths (). If
 * @kernel is %NULL, the first encountered kernel is returned.
 *
 * Returns: (transfer none): a cl_kernel object that is load from @filename or %NULL on error
 */
gpointer
ufo_resources_get_kernel (UfoResources *resources,
                          const gchar *filename,
                          const gchar *kernelname,
                          GError **error)
{
    UfoResourcesPrivate *priv;
    gchar *path;
    gchar *buffer;
    cl_program program;

    g_return_val_if_fail (UFO_IS_RESOURCES (resources) &&
                          (filename != NULL), NULL);

    priv = resources->priv;
    path = lookup_kernel_path (priv, filename);

    if (path == NULL) {
        g_set_error (error, UFO_RESOURCES_ERROR, UFO_RESOURCES_ERROR_LOAD_PROGRAM,
                     "Could not find `%s'. Maybe you forgot to pass a configuration?", filename);
        return NULL;
    }

    buffer = read_file (path);
    g_free (path);

    if (buffer == NULL) {
        g_set_error (error, UFO_RESOURCES_ERROR, UFO_RESOURCES_ERROR_LOAD_PROGRAM,
                     "Could not open `%s'", filename);
        return NULL;
    }

    program = add_program_from_source (priv, buffer, "", error);
    g_debug ("Added program %p from `%s`", (gpointer) program, filename);
    g_free (buffer);

    return create_kernel (priv, program, kernelname, error);
}
コード例 #10
0
int main (void) {
  int *a;
  cl_mem a_in;
  cl_event event;
  cl_kernel kernel;
  cl_context context;
  cl_program program;
  cl_uint devices_num;
  char *program_source;
  cl_device_id device_id;
  cl_platform_id platform_id;
  cl_command_queue command_queue;

  program_source = (char *) calloc (1000, sizeof (char));
  program_source = readKernel ();

  /* number of platforms on the system */
  platforms_number ();

  /* id of the first platform proposed by the system */
  platform_id = get_platform ();

  /* number of devices on the platform specified by platform_id */
  devices_num = devices_number (platform_id);

  /* id of the first device proposed by the system on the platform
     specified by platform_id */
  device_id = create_device (platform_id);

  /* create a context to stablish a communication channel between the
     host process and the device */
  context = create_context (device_id);

  /* create a program providing the source code */
  program = create_program (context, program_source);

  /* compile the program for the specific device architecture */
  build_program (program, device_id);

  /* create a kernel given the program */
  kernel = create_kernel (program);

  /* create a memory object, in this case this will be an array of
     integers of length specified by the LENGTH macro */
  a = create_memory_object (LENGTH, "a");

  /* create a buffer, this will be allocated on the global memory of
     the device */
  a_in = create_buffer (LENGTH, context, "a_in");

  /* assign this buffer as the only kernel argument */
  set_kernel_argument (kernel, a_in, 0, "a_in");

  /* create a command queue, here we can enqueue tasks for the device
     specified by device_id */
  command_queue = create_command_queue (context, device_id);

  /* copy the memory object allocated on the host memory into the
     buffer created on the global memory of the device */
  enqueue_write_buffer_task (command_queue, a_in, LENGTH, a, "a_in");

  /* enqueue a task to execute the kernel on the device */
  event = enqueue_kernel_execution (command_queue, kernel, LENGTH, 0, NULL);
  enqueue_kernel_execution (command_queue, kernel, LENGTH, 1, &event);

  /* copy the content of the buffer from the global memory of the
     device to the host memory */
  enqueue_read_buffer_task (command_queue, a_in, LENGTH,  a, "a_in");

  /* print the memory object with the result of the execution */
  print_memory_object (a, LENGTH, "a");

  return 0;
}
コード例 #11
0
void go_cl(config c)
{
    cl_int error;
    cl_platform_id platform;
    cl_device_id device;
    cl_context context;
    cl_command_queue commands;
    cl_program program;
    cl_kernel kernel;
    cl_event ev;
    cl_mem mem;
    float *result = NULL;
    char buf[256];
    int work_size[2] = { 128, 128 };
    int offset[2];
    int x, y;

    cl_float4 camera_pos = { c.camera_pos.x, c.camera_pos.y, c.camera_pos.z, 0 };
    cl_float4 camera_dir = { 
        c.camera_target.x - c.camera_pos.x, 
        c.camera_target.y - c.camera_pos.y, 
        c.camera_target.z - c.camera_pos.z, 
        0 };
    cl_float4 light_pos = { c.light_pos.x, c.light_pos.y, c.light_pos.z, 0 };
    cl_int2 image_size = { c.width, c.height };

    sprintf(buf, "-DBAILOUT=%d -DSCALE=%f -DFOV=%f", c.bailout, c.scale, c.fov);

    printf("Starting\n");

    init_cl(&platform, &device, &context, &commands);

    dump_info(platform, device);

    printf("Creating kernel\n");

    program = load_program_from_file("kernel.cl", context, device, buf);
    kernel = create_kernel(program, "test");

    printf("Setting memory\n");

    mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * c.width * c.height, NULL, &error);
    check_error(error, "Could not allocate buffer");
    error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem);
    error = clSetKernelArg(kernel, 1, sizeof(cl_float4), &camera_pos);
    error = clSetKernelArg(kernel, 2, sizeof(cl_float4), &camera_dir);
    error = clSetKernelArg(kernel, 3, sizeof(cl_float4), &light_pos);
    error = clSetKernelArg(kernel, 4, sizeof(cl_int2), &image_size);

    clFinish(commands);

    printf("Running\n");

    for (x = 0; x < c.width; x += work_size[0])
    {
        for (y = 0; y < c.height; y += work_size[1])
        {
            offset[0] = x;
            offset[1] = y;
            error = clEnqueueNDRangeKernel(commands, kernel, 2, offset, work_size, NULL, 0, NULL, &ev);
            printf(".");
        }
    }

    clFinish(commands);

    printf("\nWriting image\n");

    result = malloc(sizeof(float) * c.width * c.height);
    error = clEnqueueReadBuffer(commands, mem, CL_TRUE, 0, sizeof(float) * c.width * c.height, result, 0, NULL, &ev);

    clFinish(commands);

    save(result, c, 0, 0);

    free(result);
    clReleaseMemObject(mem);
    release_cl(context, program, commands, kernel);
}
コード例 #12
0
int main (void) {
  float *sum;
  cl_kernel kernel;
  cl_mem sum_buffer;
  cl_context context;
  cl_program program;
  cl_uint devices_num;
  char *program_source;
  cl_device_id device_id;
  cl_platform_id platform_id;
  cl_command_queue command_queue;

  sum = (float *) calloc (NUM_STEPS, sizeof (float));
  program_source = (char *) calloc (1000, sizeof (char));
  program_source = readKernel ();

  /* number of platforms on the system */
  platforms_number ();

  /* id of the first platform proposed by the system */
  platform_id = get_platform ();

  /* number of devices on the platform specified by platform_id */
  devices_num = devices_number (platform_id);

  /* id of the first device proposed by the system on the platform
     specified by platform_id */
  device_id = create_device (platform_id);

  /* create a context to stablish a communication channel between the
     host process and the device */
  context = create_context (device_id);

  /* create a program providing the source code */
  program = create_program (context, program_source);

  /* compile the program for the specific device architecture */
  build_program (program, device_id);\

  /* create a kernel given the program */
  kernel = create_kernel (program);

  /* create a memory object, in this case this will be float number
     that will contain the values of the partial sums */
  sum_buffer = create_buffer (context, "sum_buffer", NUM_STEPS);

  /* assign this buffer as the only kernel argument */
  set_kernel_argument (kernel, sum_buffer, 0, "sum_buffer");

  /* create a command queue, here we can enqueue tasks for the device
     specified by device_id */
  command_queue = create_command_queue (context, device_id);

  /* enqueue a task to execute the kernel on the device */
  enqueue_kernel_execution (command_queue, kernel, NUM_STEPS);

  /* copy the content of the buffer from the global memory of the
     device to the host memory */
  enqueue_read_buffer_task (command_queue, sum_buffer, NUM_STEPS, sum, "sum");

  printf (ANSI_COLOR_CYAN "\nAproximación de PI: %.10lf\n\n" ANSI_COLOR_RESET, sum[0] / NUM_STEPS);

  return 0;
}
コード例 #13
0
// This program implements a simple vector addition routine to demonstrate kernel execution
// It is mostly a straightforward port of the vector addition example from Heterogeneous Computing with OpenCL
int main() {
   // In this example, we will be summing two integer vectors of a hard-coded size
   const size_t vector_length = 64 * 1024 * 1024;
   const size_t vector_size = vector_length * sizeof(cl_int);

   // Minimal platform and device parameters are specified here
   const CLplusplus::Version target_version = CLplusplus::version_1p2;
   const cl_ulong min_mem_alloc_size = vector_size;
   const cl_ulong min_global_mem_size = 3 * vector_size;

   // Have the user select a suitable device, according to some criteria (see shared.hpp for more details)
   const auto selected_platform_and_device = Shared::select_device(
      [&](const CLplusplus::Platform & platform) -> bool {
         return (platform.version() >= target_version);                       // Platform OpenCL version is recent enough
      },
      [&](const CLplusplus::Device & device) -> bool {
         if(device.version() < target_version) return false;                  // OpenCL platforms may support older-generation devices, which we need to eliminate
         const bool device_supports_ooe_execution = device.queue_properties() & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
         return device.available() &&                                         // Device is available for compute purposes
                device.endian_little() &&                                     // Device is little-endian
                (device.execution_capabilities() & CL_EXEC_KERNEL) &&         // Device can execute OpenCL kernels
                device_supports_ooe_execution &&                              // Device can execute OpenCL commands out of order
                device.compiler_available() && device.linker_available() &&   // Implementation has an OpenCL C compiler and linker for this device
                (device.max_mem_alloc_size() >= min_mem_alloc_size) &&        // Device accepts large enough global memory allocations
                (device.global_mem_size() >= min_global_mem_size);            // Device has enough global memory
      }
   );

   // Create an OpenCL context on the device with some default parameters (see shared.hpp for more details)
   const auto context = Shared::build_default_context(selected_platform_and_device);

   // Allocate our input and output buffers
   std::cout << "Creating buffers..." << std::endl;
   const auto input_A_buffer = context.create_buffer(CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, vector_size);
   const auto input_B_buffer = context.create_buffer(CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, vector_size);
   const auto output_C_buffer = context.create_buffer(CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, vector_size);

   // Create a program object from the basic vector addition example
   std::cout << "Loading program..." << std::endl;
   auto program = context.create_program_with_source_file("kernels/vector_add.cl");

   // Start an asynchronous program build
   std::cout << "Starting to build program..." << std::endl;
   const auto build_event = program.build_with_event("-cl-mad-enable -cl-no-signed-zeros -cl-std=CL1.2 -cl-kernel-arg-info");

   // Create an out-of-order command queue for the device
   const auto command_queue = context.create_command_queue(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);

   // Generate our input data and send it to the device
   std::cout << "Generating and sending data..." << std::endl;

   std::vector<cl_int> input_A(vector_length);
   for(size_t i = 0; i < vector_length; ++i) input_A[i] = i + 1;
   const auto write_A_event = command_queue.enqueued_write_buffer(static_cast<const void *>(&(input_A[0])), false, input_A_buffer, 0, vector_size, {});

   std::vector<cl_int> input_B(vector_length);
   for(size_t i = 0; i < vector_length; ++i) input_B[i] = vector_length - i;
   const auto write_B_event = command_queue.enqueued_write_buffer(static_cast<const void *>(&(input_B[0])), false, input_B_buffer, 0, vector_size, {});

   const auto all_write_events = command_queue.enqueued_marker_with_wait_list({write_A_event, write_B_event});

   // Once the program is built, create a kernel object associated to our vector addition routine
   std::cout << std::endl;
   std::cout << "Creating a kernel for vector addition..." << std::endl;
   const auto kernel = program.create_kernel("vector_add", build_event);

   // Set its arguments as appropriate
   kernel.set_buffer_argument(0, &input_A_buffer);
   kernel.set_buffer_argument(1, &input_B_buffer);
   kernel.set_buffer_argument(2, &output_C_buffer);

   // Execute the kernel
   std::cout << "Starting the kernel..." << std::endl;
   const auto exec_event = command_queue.enqueued_1d_range_kernel(kernel, vector_length, {all_write_events});

   // Once the kernel is done, synchronously read device output back into host memory
   std::cout << "Waiting for output..." << std::endl;
   std::vector<cl_int> output_C(vector_length);
   command_queue.read_buffer(output_C_buffer, 0, static_cast<void *>(&(output_C[0])), vector_size, {exec_event});

   // Verify the output
   std::cout << std::endl;
   for(const auto & output : output_C) {
      if(output != vector_length + 1) {
         std::cout << "Incorrect output !" << std::endl;
         std::abort();
      }
   }
   std::cout << "Vector addition was performed successfully !" << std::endl;

   return 0;
}
コード例 #14
0
ファイル: program.hpp プロジェクト: hapoo/hpxcl
 static void create_kernel_sync(hpx::naming::id_type const &gid, std::string module_name, std::string kernel_name)
 {
     create_kernel(gid, module_name, kernel_name).get();
 }