static void compiler_mandelbrot_alternate(void)
{
  const size_t global[2] = {w, h};
  const size_t local[2] = {16, 1};
  const size_t sz = w * h * sizeof(char[4]);
  const float rcpWidth = 1.f / float(w);
  const float rcpHeight = 1.f / float(h);

  OCL_CREATE_KERNEL("compiler_mandelbrot_alternate");

  OCL_CREATE_BUFFER(buf[0], 0, sz, NULL);
  OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]);
  OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &rcpWidth);
  OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &rcpHeight);
  OCL_CALL (clSetKernelArg, kernel, 3, sizeof(float), &criterium);
  OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
  OCL_MAP_BUFFER(0);
  dst = (int *) buf_data[0];

  /* Save the image (for debug purpose) */
  cl_write_bmp(dst, w, h, "compiler_mandelbrot_alternate.bmp");

  /* Compare with the golden image */
  OCL_CHECK_IMAGE(dst, w, h, "compiler_mandelbrot_alternate_ref.bmp");
}
int
main(int argc, char *argv[])
{
  cl_mem dst[24];
  int *dst_buffer = NULL;
  const size_t n = 32 * 1024 * 1024;
  const size_t global_work_size = n;
  const size_t local_work_size = 16;
  int status = 0;

  if ((status = cl_test_init("test_write_only.cl", "test_write_only", SOURCE)) != 0)
    goto error;

  for (uint32_t j = 0; j < 24; ++j)
  {
    // Allocate the two buffers
    dst[j] = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
    if (status != CL_SUCCESS) goto error;

    // Set source and destination
    OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst[j]);

    // Run the kernel
    OCL_CALL (clEnqueueNDRangeKernel, queue,
                                  kernel,
                                  1,
                                  NULL,
                                  &global_work_size,
                                  &local_work_size,
                                  0,
                                  NULL,
                                  NULL);

    // Be sure that everything run fine
    dst_buffer = (int *) clMapBufferIntel(dst[j], &status);
    if (status != CL_SUCCESS)
      goto error;
    for (uint32_t i = 0; i < n; ++i)
      if (dst_buffer[i] != int(i)) {
        fprintf(stderr, "run-time flat address space failed\n");
        exit(-1);
      }
    OCL_CALL (clUnmapBufferIntel, dst[j]);
  }

  for (uint32_t j = 0; j < 24; ++j) OCL_CALL (clReleaseMemObject, dst[j]);
  cl_test_destroy();
  printf("%i memory leaks\n", clReportUnfreedIntel());
  assert(clReportUnfreedIntel() == 0);

error:
  return status;
}