コード例 #1
0
ファイル: opengl.cpp プロジェクト: parvizp/Halide
// Called at the beginning of a code block generated by Halide. This function
// is responsible for setting up the OpenGL environment and compiling the GLSL
// code into a fragment shader.
EXPORT void *halide_opengl_init_kernels(void *user_context, void *state_ptr,
                                        const char *src, int size) {
    // TODO: handle error
    if (int error = halide_opengl_init(user_context)) {
        return NULL;
    }

    // Use '/// KERNEL' comments to split 'src' into discrete blocks, one for
    // each kernel contained in it.
    char *begin = strstr(src, kernel_marker);
    char *end = NULL;
    for (; begin && begin[0]; begin = end) {
        end = strstr(begin + sizeof(kernel_marker) - 1, kernel_marker);
        if (!end) {
            end = begin + strlen(begin);
        }
        HalideOpenGLKernel *kernel = create_kernel(user_context, begin, end - begin);
        if (!kernel) {
            // Simply skip invalid kernels
            continue;
        }

#ifdef DEBUG
        halide_printf(user_context, "Defining kernel '%s'\n", kernel->name);
#endif
        // Compile shader
        kernel->shader_id = halide_opengl_make_shader(user_context, GL_FRAGMENT_SHADER,
                                                      kernel->source, NULL);

        // Link GLSL program
        GLuint program = ST.CreateProgram();
        ST.AttachShader(program, ST.vertex_shader_id);
        ST.AttachShader(program, kernel->shader_id);
        ST.LinkProgram(program);
        GLint status;
        ST.GetProgramiv(program, GL_LINK_STATUS, &status);
        if (!status) {
            halide_printf(user_context, "Could not link GLSL program:\n");
            GLint log_len;
            ST.GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_len);
            char *log = (char*) malloc(log_len);
            ST.GetProgramInfoLog(program, log_len, NULL, log);
            halide_printf(user_context, "%s", log);
            free(log);
            ST.DeleteProgram(program);
            program = 0;
        }
        kernel->program_id = program;

        if (halide_opengl_find_kernel(kernel->name)) {
            halide_printf(user_context, "Duplicate kernel name '%s'\n", kernel->name);
            halide_opengl_delete_kernel(user_context, kernel);
        } else {
            kernel->next = ST.kernels;
            ST.kernels = kernel;
        }
    }
    return NULL;
}
コード例 #2
0
ファイル: opencl.cpp プロジェクト: EEmmanuel7/Halide
static cl_mem __dev_malloc(void *user_context, size_t bytes) {
    cl_mem p;
    #ifdef DEBUG
    halide_printf(user_context, "dev_malloc (%lld bytes)\n", (long long)bytes);
    #endif

    int err;
    p = clCreateBuffer(*cl_ctx, CL_MEM_READ_WRITE, bytes, NULL, &err );
    #ifdef DEBUG
    halide_printf(user_context, "    returned: %p (err: %d)\n", (void*)p, err);
    #endif
    halide_assert(user_context, p);
    return p;
}
コード例 #3
0
static int run_test(void *uc, int channels, Implementation imp, Layout layout) {
  std::string name = "Example_";
  name += std::to_string(channels);
  name += (imp == kGLSL) ? "_GLSL" : "_CPU";
  name += (layout == kChunky) ? "_Chunky" : "_Planar";
  halide_printf(uc, "\n---------------------------\n%s\n", name.c_str());
  Image<uint8_t> input(kWidth, kHeight, channels, 0, (layout == kChunky));
  Image<uint8_t> output(kWidth, kHeight, channels, 0, (layout == kChunky));
  (void) halide_smooth_buffer_host<uint8_t>(uc, kSeed, input);
  if (imp == kGLSL) {
    // Call once to ensure OpenGL is inited (we want to time the
    // cost of copy-to-device alone)
    halide_copy_to_device(uc, input, halide_opengl_device_interface());
    // Mark as dirty so the next call won't be a no-op
    input.set_host_dirty();
    {
      ScopedTimer timer(uc, name + " halide_copy_to_device input");
      halide_copy_to_device(uc, input, halide_opengl_device_interface());
    }
    {
      ScopedTimer timer(uc, name + " halide_copy_to_device output");
      halide_copy_to_device(uc, output, halide_opengl_device_interface());
    }
  }
  // Call once to compile shader, warm up, etc.
  ExampleFunc example = exampleFuncs[channels-1][imp];
  (void) example(input, output);
  {
    ScopedTimer timer(uc, name, kIter);
    for (int i = 0; i < kIter; ++i) {
      (void) example(input, output);
    }
  }
  if (imp == kGLSL) {
    ScopedTimer timer(uc, name + " halide_copy_to_host");
    halide_copy_to_host(uc, output);
  }
  // halide_buffer_display(input);
  // halide_buffer_print(input);
  // halide_buffer_display(output);
  // halide_buffer_print(output);
  int errors = check<uint8_t>(input, output);
  if (errors) {
    halide_errorf(uc, "Test %s had %d errors!\n\n", name.c_str(), errors);
  } else {
    halide_printf(uc, "Test %s had no errors.\n\n", name.c_str());
  }
  return errors;
}
コード例 #4
0
ファイル: cuda.cpp プロジェクト: EEmmanuel7/Halide
WEAK void halide_dev_malloc(void *user_context, buffer_t* buf) {
    if (buf->dev) {
        // This buffer already has a device allocation
        return;
    }

    size_t size = __buf_size(user_context, buf);

    #ifdef DEBUG
    halide_printf(user_context, "dev_malloc allocating buffer of %zd bytes, "
                  "extents: %zdx%zdx%zdx%zd strides: %zdx%zdx%zdx%zd (%d bytes per element)\n",
                  size, buf->extent[0], buf->extent[1], buf->extent[2], buf->extent[3],
                  buf->stride[0], buf->stride[1], buf->stride[2], buf->stride[3],
                  buf->elem_size);
    #endif

    CUdeviceptr p;
    TIME_CALL( cuMemAlloc(&p, size), "dev_malloc");

    buf->dev = (uint64_t)p;
    halide_assert(user_context, buf->dev);

    #ifdef DEBUG
    halide_assert(user_context, halide_validate_dev_pointer(user_context, buf));
    #endif
}
コード例 #5
0
ファイル: cuda.cpp プロジェクト: bnascimento/Halide
// Used to generate correct timings when tracing
WEAK int halide_dev_sync(void *user_context) {
    DEBUG_PRINTF( user_context, "CUDA: halide_dev_sync (user_context: %p)\n", user_context );

    CudaContext ctx(user_context);
    if (ctx.error != CUDA_SUCCESS) {
        return ctx.error;
    }

    #ifdef DEBUG
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    CUresult err = cuCtxSynchronize();
    if (err != CUDA_SUCCESS) {
        halide_error_varargs(user_context, "CUDA: cuCtxSynchronize failed (%s)",
                             _get_error_name(err));
        return err;
    }

    #ifdef DEBUG
    uint64_t t_after = halide_current_time_ns(user_context);
    halide_printf(user_context, "    Time: %f ms\n", (t_after - t_before) / 1.0e6);
    #endif

    return 0;
}
コード例 #6
0
ファイル: cuda.cpp プロジェクト: bnascimento/Halide
WEAK int halide_dev_run(void *user_context,
                        void *state_ptr,
                        const char* entry_name,
                        int blocksX, int blocksY, int blocksZ,
                        int threadsX, int threadsY, int threadsZ,
                        int shared_mem_bytes,
                        size_t arg_sizes[],
                        void* args[]) {
    DEBUG_PRINTF( user_context, "CUDA: halide_dev_run (user_context: %p, entry: %s, blocks: %dx%dx%d, threads: %dx%dx%d, shmem: %d)\n",
                  user_context, entry_name,
                  blocksX, blocksY, blocksZ,
                  threadsX, threadsY, threadsZ,
                  shared_mem_bytes );

    CUresult err;
    CudaContext ctx(user_context);
    if (ctx.error != CUDA_SUCCESS) {
        return ctx.error;
    }

    #ifdef DEBUG
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    halide_assert(user_context, state_ptr);
    CUmodule mod = ((module_state*)state_ptr)->module;
    halide_assert(user_context, mod);
    CUfunction f;
    err = cuModuleGetFunction(&f, mod, entry_name);
    if (err != CUDA_SUCCESS) {
        halide_error_varargs(user_context, "CUDA: cuModuleGetFunction failed (%s)",
                             _get_error_name(err));
        return err;
    }

    err = cuLaunchKernel(f,
                         blocksX,  blocksY,  blocksZ,
                         threadsX, threadsY, threadsZ,
                         shared_mem_bytes,
                         NULL, // stream
                         args,
                         NULL);
    if (err != CUDA_SUCCESS) {
        halide_error_varargs(user_context, "CUDA: cuLaunchKernel failed (%s)",
                             _get_error_name(err));
        return err;
    }

    #ifdef DEBUG
    err = cuCtxSynchronize();
    if (err != CUDA_SUCCESS) {
        halide_error_varargs(user_context, "CUDA: cuCtxSynchronize failed (%s)\n",
                             _get_error_name(err));
        return err;
    }
    uint64_t t_after = halide_current_time_ns(user_context);
    halide_printf(user_context, "    Time: %f ms\n", (t_after - t_before) / 1.0e6);
    #endif
    return 0;
}
コード例 #7
0
ファイル: opencl.cpp プロジェクト: jacobke/Halide
WEAK bool halide_validate_dev_pointer(buffer_t* buf, size_t size=0) {
    if (buf->dev == 0)
      return true;

    size_t real_size;
    cl_int result = clGetMemObjectInfo((cl_mem)buf->dev, CL_MEM_SIZE, sizeof(size_t), &real_size, NULL);
    if (result) {
        halide_printf("Bad device pointer %p: clGetMemObjectInfo returned %d\n", (void *)buf->dev, result);
        return false;
    }
    #ifdef DEBUG
    halide_printf("validate %p: asked for %lld, actual allocated %lld\n", (void*)buf->dev, (long long)size, (long long)real_size);
    #endif
    if (size) halide_assert(real_size >= size && "Validating pointer with insufficient size");
    return true;
}
コード例 #8
0
ファイル: opencl.cpp プロジェクト: netaz/Halide
WEAK int halide_dev_free(void *user_context, buffer_t* buf) {
    DEBUG_PRINTF( user_context, "CL: halide_dev_free (user_context: %p, buf: %p)\n", user_context, buf );

    ClContext ctx(user_context);

    // halide_dev_free, at present, can be exposed to clients and they
    // should be allowed to call halide_dev_free on any buffer_t
    // including ones that have never been used with a GPU.
    if (buf->dev == 0) {
      return 0;
    }

    #ifdef DEBUG
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    halide_assert(user_context, halide_validate_dev_pointer(user_context, buf));
    DEBUG_PRINTF(user_context, "    clReleaseMemObject %p\n", (cl_mem)buf->dev );
    cl_int result = clReleaseMemObject((cl_mem)buf->dev);
    // If clReleaseMemObject fails, it is unlikely to succeed in a later call, so
    // we just end our reference to it regardless.
    buf->dev = 0;
    if (result != CL_SUCCESS) {
        halide_error_varargs(user_context, "CL: clReleaseMemObject failed (%d)", result);
        return result;
    }

    #ifdef DEBUG
    uint64_t t_after = halide_current_time_ns(user_context);
    halide_printf(user_context, "    Time: %f ms\n", (t_after - t_before) / 1.0e6);
    #endif

    return 0;
}
コード例 #9
0
WEAK void halide_error(void *user_context, const char *msg) {
    if (halide_error_handler) {
        (*halide_error_handler)(user_context, msg);
    }  else {
        halide_printf(user_context, "Error: %s\n", msg);
        exit(1);
    }
}
コード例 #10
0
ファイル: opencl.cpp プロジェクト: EEmmanuel7/Halide
WEAK void halide_dev_run(
    void *user_context,
    const char* entry_name,
    int blocksX, int blocksY, int blocksZ,
    int threadsX, int threadsY, int threadsZ,
    int shared_mem_bytes,
    size_t arg_sizes[],
    void* args[])
{
    cl_kernel f = __get_kernel(user_context, entry_name);
    #ifdef DEBUG
    halide_printf(user_context,
        "dev_run %s with (%dx%dx%d) blks, (%dx%dx%d) threads, %d shmem\n",
        entry_name, blocksX, blocksY, blocksZ, threadsX, threadsY, threadsZ, shared_mem_bytes
    );
    #endif
    // Pack dims
    size_t global_dim[3] = {blocksX*threadsX,  blocksY*threadsY,  blocksZ*threadsZ};
    size_t local_dim[3] = {threadsX, threadsY, threadsZ};

    // Set args
    int i = 0;
    while (arg_sizes[i] != 0) {
        #ifdef DEBUG
        halide_printf(user_context, "clSetKernelArg %i %i [0x%x ...]\n", i, arg_sizes[i], *(int *)args[i]);
        #endif
        CHECK_CALL( clSetKernelArg(f, i, arg_sizes[i], args[i]), "clSetKernelArg" );
        i++;
    }
    // Set the shared mem buffer last
    // Always set at least 1 byte of shmem, to keep the launch happy
    CHECK_CALL( clSetKernelArg(f, i, (shared_mem_bytes > 0) ? shared_mem_bytes : 1, NULL), "clSetKernelArg" );

    // Launch kernel
    int err =
    clEnqueueNDRangeKernel(
        *cl_q,
        f,
        3,
        NULL,
        global_dim,
        local_dim,
        0, NULL, NULL
    );
    CHECK_ERR(err, "clEnqueueNDRangeKernel");
}
コード例 #11
0
ファイル: cuda.cpp プロジェクト: bnascimento/Halide
WEAK int halide_copy_to_host(void *user_context, buffer_t* buf) {
    if (!buf->dev_dirty) {
        return 0;
    }

    DEBUG_PRINTF( user_context, "CUDA: halide_copy_to_host (user_context: %p, buf: %p)\n", user_context, buf );

    CudaContext ctx(user_context);
    if (ctx.error != CUDA_SUCCESS) {
        return ctx.error;
    }

    // Need to check dev_dirty again, in case another thread did the
    // copy_to_host before the serialization point above.
    if (buf->dev_dirty) {
        #ifdef DEBUG
        uint64_t t_before = halide_current_time_ns(user_context);
        #endif

        halide_assert(user_context, buf->dev && buf->dev);
        halide_assert(user_context, halide_validate_dev_pointer(user_context, buf));

        _dev_copy c = _make_dev_to_host_copy(buf);

        for (int w = 0; w < c.extent[3]; w++) {
            for (int z = 0; z < c.extent[2]; z++) {
                for (int y = 0; y < c.extent[1]; y++) {
                    for (int x = 0; x < c.extent[0]; x++) {
                        uint64_t off = (x * c.stride_bytes[0] +
                                        y * c.stride_bytes[1] +
                                        z * c.stride_bytes[2] +
                                        w * c.stride_bytes[3]);
                        CUdeviceptr src = (CUdeviceptr)(c.src + off);
                        void *dst = (void *)(c.dst + off);
                        uint64_t size = c.chunk_size;
                        DEBUG_PRINTF( user_context, "    cuMemcpyDtoH (%d, %d, %d, %d), %p -> %p, %lld bytes\n",
                                      x, y, z, w,
                                      (void *)src, dst, (long long)size );
                        CUresult err = cuMemcpyDtoH(dst, src, size);
                        if (err != CUDA_SUCCESS) {
                            halide_error_varargs(user_context, "CUDA: cuMemcpyDtoH failed (%s)",
                                                 _get_error_name(err));
                            return err;
                        }
                    }
                }
            }
        }

        #ifdef DEBUG
        uint64_t t_after = halide_current_time_ns(user_context);
        halide_printf(user_context, "    Time: %f ms\n", (t_after - t_before) / 1.0e6);
        #endif
    }
    buf->dev_dirty = false;
    return 0;
}
コード例 #12
0
WEAK void halide_do_par_for(void (*f)(int, uint8_t *), int min, int size, uint8_t *closure) {
    if (halide_custom_do_par_for) {
        (*halide_custom_do_par_for)(f, min, size, closure);
        return;
    }
    if (!halide_thread_pool_initialized) {
        halide_work_queue.shutdown = false;
        pthread_mutex_init(&halide_work_queue.mutex, NULL);
        pthread_cond_init(&halide_work_queue.not_empty, NULL);
        halide_work_queue.head = halide_work_queue.tail = 0;
        halide_work_queue.ids = 1;
        char *threadStr = getenv("HL_NUMTHREADS");
        #ifdef _LP64
        // On 64-bit systems we use 8 threads by default
        halide_threads = 8;
        #else
        // On 32-bit systems we use 2 threads by default
        halide_threads = 2;
        #endif
        if (threadStr) {
            halide_threads = atoi(threadStr);
        } else {
            halide_printf("HL_NUMTHREADS not defined. Defaulting to %d threads.\n", halide_threads);
        }
        if (halide_threads > MAX_THREADS) halide_threads = MAX_THREADS;
        for (int i = 0; i < halide_threads-1; i++) {
            //fprintf(stderr, "Creating thread %d\n", i);
            pthread_create(halide_work_queue.threads + i, NULL, halide_worker_thread, NULL);
        }

        halide_thread_pool_initialized = true;
    }

    // Enqueue the job
    pthread_mutex_lock(&halide_work_queue.mutex);
    //fprintf(stderr, "Enqueuing some work\n");
    work job = {f, min, min + size, closure, halide_work_queue.ids++, 0};
    if (job.id == 0) job.id = halide_work_queue.ids++; // disallow zero, as it flags a completed job
    halide_work_queue.jobs[halide_work_queue.tail] = job;
    work *jobPtr = halide_work_queue.jobs + halide_work_queue.tail;
    worker_arg arg = {job.id, jobPtr};
    int new_tail = (halide_work_queue.tail + 1) % MAX_JOBS;
    //assert(new_tail != halide_work_queue.head); 
    halide_work_queue.tail = new_tail;

    // TODO: check to make sure the work queue doesn't overflow
    pthread_mutex_unlock(&halide_work_queue.mutex);
    
    //fprintf(stderr, "Waking up workers\n");
    // Wake up everyone
    pthread_cond_broadcast(&halide_work_queue.not_empty);

    // Do some work myself
    //fprintf(stderr, "Doing some work on job %d\n", arg.id);
    halide_worker_thread((void *)(&arg));    
    //fprintf(stderr, "Parallel for done\n");
}
コード例 #13
0
ファイル: runtime.ptx_host.cpp プロジェクト: 202198/Halide
WEAK bool halide_validate_dev_pointer(buffer_t* buf) {
    CUcontext ctx;
    CUresult result = cuPointerGetAttribute(&ctx, CU_POINTER_ATTRIBUTE_CONTEXT, buf->dev);
    if (result) {
        halide_printf("Bad device pointer %p: cuPointerGetAttribute returned %d\n", (void *)buf->dev, result);
        return false;
    }
    return true;
}
コード例 #14
0
ファイル: runtime.ptx_host.cpp プロジェクト: 202198/Halide
WEAK void halide_dev_free(buffer_t* buf) {

    #ifdef DEBUG
    halide_printf("In dev_free of %p - dev: 0x%p\n", buf, (void*)buf->dev);
    halide_assert(halide_validate_dev_pointer(buf));
    #endif

    CHECK_CALL( cuMemFree(buf->dev), "cuMemFree" );
    buf->dev = 0;

}
コード例 #15
0
ファイル: opencl.cpp プロジェクト: EEmmanuel7/Halide
static cl_kernel __get_kernel(void *user_context, const char* entry_name) {
    cl_kernel f;
    #ifdef DEBUG
    halide_printf(user_context, "get_kernel %s\n", entry_name);
    #endif
    // Get kernel function ptr
    int err;
    f = clCreateKernel(__mod, entry_name, &err);
    CHECK_ERR(err, "clCreateKernel");
    return f;
}
コード例 #16
0
ファイル: opengl.cpp プロジェクト: parvizp/Halide
WEAK GLuint halide_opengl_make_shader(void *user_context, GLenum type,
                                      const char *source, GLint *length) {
    GLuint shader = ST.CreateShader(type);
    ST.ShaderSource(shader, 1, (const GLchar **)&source, length);
    ST.CompileShader(shader);

    GLint shader_ok = 0;
    ST.GetShaderiv(shader, GL_COMPILE_STATUS, &shader_ok);
    if (!shader_ok) {
        halide_printf(user_context, "Could not compile shader:\n");
        GLint log_len;
        ST.GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_len);
        char *log = (char *)malloc(log_len);
        ST.GetShaderInfoLog(shader, log_len, NULL, log);
        halide_printf(user_context, "%s", log);
        free(log);
        ST.DeleteShader(shader);
        return 0;
    }
    return shader;
}
コード例 #17
0
ファイル: cuda.cpp プロジェクト: bnascimento/Halide
WEAK int halide_dev_malloc(void *user_context, buffer_t *buf) {
    DEBUG_PRINTF( user_context, "CUDA: halide_dev_malloc (user_context: %p, buf: %p)\n", user_context, buf );

    CudaContext ctx(user_context);
    if (ctx.error != CUDA_SUCCESS) {
        return ctx.error;
    }

    size_t size = _buf_size(user_context, buf);
    if (buf->dev) {
        // This buffer already has a device allocation
        halide_assert(user_context, halide_validate_dev_pointer(user_context, buf, size));
        return 0;
    }

    halide_assert(user_context, buf->stride[0] >= 0 && buf->stride[1] >= 0 &&
                                buf->stride[2] >= 0 && buf->stride[3] >= 0);

    DEBUG_PRINTF(user_context, "    allocating buffer of %lld bytes, "
                 "extents: %lldx%lldx%lldx%lld strides: %lldx%lldx%lldx%lld (%d bytes per element)\n",
                 (long long)size,
                 (long long)buf->extent[0], (long long)buf->extent[1],
                 (long long)buf->extent[2], (long long)buf->extent[3],
                 (long long)buf->stride[0], (long long)buf->stride[1],
                 (long long)buf->stride[2], (long long)buf->stride[3],
                 buf->elem_size);

    #ifdef DEBUG
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    CUdeviceptr p;
    DEBUG_PRINTF( user_context, "    cuMemAlloc %lld -> ", size );
    CUresult err = cuMemAlloc(&p, size);
    if (err != CUDA_SUCCESS) {
        DEBUG_PRINTF( user_context, "%s\n", _get_error_name(err));
        halide_error_varargs(user_context, "CUDA: cuMemAlloc failed (%s)",
                             _get_error_name(err));
        return err;
    } else {
        DEBUG_PRINTF( user_context, "%p\n", p );
    }
    halide_assert(user_context, p);
    buf->dev = (uint64_t)p;

    #ifdef DEBUG
    uint64_t t_after = halide_current_time_ns(user_context);
    halide_printf(user_context, "    Time: %f ms\n", (t_after - t_before) / 1.0e6);
    #endif

    return 0;
}
コード例 #18
0
ファイル: opencl.cpp プロジェクト: EEmmanuel7/Halide
WEAK void halide_copy_to_dev(void *user_context, buffer_t* buf) {
    if (buf->host_dirty) {
        halide_assert(user_context, buf->host && buf->dev);
        size_t size = __buf_size(user_context, buf);
        #ifdef DEBUG
        halide_printf(user_context, "copy_to_dev (%lld bytes) %p -> %p\n", (long long)size, buf->host, (void*)buf->dev);
        #endif
        halide_assert(user_context, halide_validate_dev_pointer(user_context, buf));
        int err = clEnqueueWriteBuffer( *cl_q, (cl_mem)((void*)buf->dev), CL_TRUE, 0, size, buf->host, 0, NULL, NULL );
        CHECK_ERR( err, "clEnqueueWriteBuffer" );
    }
    buf->host_dirty = false;
}
コード例 #19
0
ファイル: opengl.cpp プロジェクト: parvizp/Halide
// Initialize the runtime, in particular all fields in halide_opengl_state.
EXPORT int halide_opengl_init(void *user_context) {
    if (ST.initialized) return 0;

    // Make a context if there isn't one
    if (halide_opengl_create_context(user_context)) {
        halide_printf(user_context, "Failed to make opengl context\n");
        return 1;
    }

    // Initialize pointers to OpenGL functions.
#define GLFUNC(TYPE, VAR)                                               \
    ST.VAR = (TYPE)halide_opengl_get_proc_address(user_context, "gl" #VAR); \
    if (!ST.VAR) {                                                      \
        halide_printf(user_context, "Could not load function pointer for %s\n", "gl" #VAR); \
        return 1;                                                         \
    }
    USED_GL_FUNCTIONS;
#undef GLFUNC

    ST.kernels = NULL;
    ST.textures = NULL;

    // Initialize all OpenGL objects that are shared between kernels.
    ST.GenFramebuffers(1, &ST.framebuffer_id);
    CHECK_GLERROR(1);

    ST.vertex_shader_id = halide_opengl_make_shader(user_context,
        GL_VERTEX_SHADER, vertex_shader_src, NULL);
    if (ST.vertex_shader_id == 0) {
	halide_error(user_context, "Failed to create vertex shader");
	return 1;
    }

    GLuint buf;
    ST.GenBuffers(1, &buf);
    ST.BindBuffer(GL_ARRAY_BUFFER, buf);
    ST.BufferData(GL_ARRAY_BUFFER,
                  sizeof(square_vertices), square_vertices, GL_STATIC_DRAW);
    CHECK_GLERROR(1);
    ST.vertex_buffer = buf;

    ST.GenBuffers(1, &buf);
    ST.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, buf);
    ST.BufferData(GL_ELEMENT_ARRAY_BUFFER,
                  sizeof(square_indices), square_indices, GL_STATIC_DRAW);
    CHECK_GLERROR(1);
    ST.element_buffer = buf;

    ST.initialized = true;
    return 0;
}
コード例 #20
0
ファイル: opencl.cpp プロジェクト: netaz/Halide
WEAK int halide_dev_malloc(void *user_context, buffer_t* buf) {
    DEBUG_PRINTF( user_context, "CL: halide_dev_malloc (user_context: %p, buf: %p)\n", user_context, buf );

    ClContext ctx(user_context);
    if (ctx.error != CL_SUCCESS) {
        return ctx.error;
    }

    size_t size = _buf_size(user_context, buf);
    if (buf->dev) {
        halide_assert(user_context, halide_validate_dev_pointer(user_context, buf, size));
        return 0;
    }

    halide_assert(user_context, buf->stride[0] >= 0 && buf->stride[1] >= 0 &&
                                buf->stride[2] >= 0 && buf->stride[3] >= 0);

    DEBUG_PRINTF(user_context, "    Allocating buffer of %lld bytes, "
                 "extents: %lldx%lldx%lldx%lld strides: %lldx%lldx%lldx%lld (%d bytes per element)\n",
                 (long long)size,
                 (long long)buf->extent[0], (long long)buf->extent[1],
                 (long long)buf->extent[2], (long long)buf->extent[3],
                 (long long)buf->stride[0], (long long)buf->stride[1],
                 (long long)buf->stride[2], (long long)buf->stride[3],
                 buf->elem_size);

    #ifdef DEBUG
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    cl_int err;
    DEBUG_PRINTF( user_context, "    clCreateBuffer -> ", size );
    buf->dev = (uint64_t)clCreateBuffer(ctx.context, CL_MEM_READ_WRITE, size, NULL, &err);
    if (err != CL_SUCCESS || buf->dev == 0) {
        DEBUG_PRINTF( user_context, "%d\n", err);
        halide_error_varargs(user_context, "CL: clCreateBuffer failed (%d)\n", err);
        return err;
    } else {
        DEBUG_PRINTF( user_context, "%p\n", (cl_mem)buf->dev );
    }

    DEBUG_PRINTF(user_context, "    Allocated device buffer %p for buffer %p\n",
                 (void *)buf->dev, buf);

    #ifdef DEBUG
    uint64_t t_after = halide_current_time_ns(user_context);
    halide_printf(user_context, "    Time: %f ms\n", (t_after - t_before) / 1.0e6);
    #endif

    return CL_SUCCESS;
}
コード例 #21
0
ファイル: opengl.cpp プロジェクト: parvizp/Halide
static void print_buffer(void *user_context, buffer_t *buf) {
    halide_printf(user_context, "  dev: %ul\n", buf->dev);
    halide_printf(user_context, "  host: %p\n", buf->host);
    halide_printf(user_context, "  extent: %d %d %d %d\n",
                  buf->extent[0], buf->extent[1], buf->extent[2], buf->extent[3]);
    halide_printf(user_context, "  stride: %d %d %d %d\n",
                  buf->stride[0], buf->stride[1], buf->stride[2], buf->stride[3]);
    halide_printf(user_context, "  min: %d %d %d %d\n",
                  buf->min[0], buf->min[1], buf->min[2], buf->min[3]);
    halide_printf(user_context, "  elem_size: %d\n", buf->elem_size);
    halide_printf(user_context, "  host_dirty: %d, dev_dirty: %d\n",
                  buf->host_dirty, buf->dev_dirty);
}
コード例 #22
0
ファイル: opencl.cpp プロジェクト: EEmmanuel7/Halide
WEAK void halide_release(void *user_context) {
    // TODO: this is for timing; bad for release-mode performance
    #ifdef DEBUG
    halide_printf(user_context, "dev_sync on exit\n" );
    #endif
    halide_dev_sync(user_context);

    // Unload the module
    if (__mod) {
        #ifdef DEBUG
        halide_printf(user_context, "clReleaseProgram %p\n", __mod);
        #endif

        CHECK_CALL( clReleaseProgram(__mod), "clReleaseProgram" );
        __mod = 0;
    }

    // TODO: This is not a good solution to deal with this problem (finding out if the
    // cl_ctx/cl_q are going to be freed). I think a larger redesign of the global
    // context scheme might be necessary.
    cl_uint refs = 0;
    clGetContextInfo(*cl_ctx, CL_CONTEXT_REFERENCE_COUNT, sizeof(refs), &refs, NULL);

    // Unload context (ref counted).
    CHECK_CALL( clReleaseCommandQueue(*cl_q), "clReleaseCommandQueue" );
    #ifdef DEBUG
    halide_printf(user_context, "clReleaseContext %p\n", *cl_ctx);
    #endif
    CHECK_CALL( clReleaseContext(*cl_ctx), "clReleaseContext" );

    // See TODO above...
    if (--refs == 0) {
        *cl_ctx = NULL;
        *cl_q = NULL;
    }
}
コード例 #23
0
ファイル: opencl.cpp プロジェクト: jacobke/Halide
WEAK void halide_dev_free(buffer_t* buf) {
    // halide_dev_free, at present, can be exposed to clients and they
    // should be allowed to call halide_dev_free on any buffer_t
    // including ones that have never been used with a GPU.
    if (buf->dev == 0)
      return;

    #ifdef DEBUG
    halide_printf("In dev_free of %p - dev: 0x%p\n", buf, (void*)buf->dev);
    #endif

    halide_assert(halide_validate_dev_pointer(buf));
    CHECK_CALL( clReleaseMemObject((cl_mem)buf->dev), "clReleaseMemObject" );
    buf->dev = 0;
}
コード例 #24
0
ファイル: opencl.cpp プロジェクト: EEmmanuel7/Halide
WEAK void halide_dev_malloc(void *user_context, buffer_t* buf) {
    if (buf->dev) {
        halide_assert(user_context, halide_validate_dev_pointer(user_context, buf));
        return;
    }

    size_t size = __buf_size(user_context, buf);
    #ifdef DEBUG
    halide_printf(user_context, "dev_malloc allocating buffer of %lld bytes, "
                  "extents: %lldx%lldx%lldx%lld strides: %lldx%lldx%lldx%lld (%d bytes per element)\n",
                  (long long)size, (long long)buf->extent[0], (long long)buf->extent[1],
                  (long long)buf->extent[2], (long long)buf->extent[3],
                  (long long)buf->stride[0], (long long)buf->stride[1],
                  (long long)buf->stride[2], (long long)buf->stride[3],
                  buf->elem_size);
    #endif

    buf->dev = (uint64_t)__dev_malloc(user_context, size);
    #ifdef DEBUG
    halide_printf(user_context, "dev_malloc allocated buffer %p of with buf->dev of %p\n",
                  buf, (void *)buf->dev);
    #endif
    halide_assert(user_context, buf->dev);
}
コード例 #25
0
ファイル: opencl.cpp プロジェクト: EEmmanuel7/Halide
WEAK void halide_copy_to_host(void *user_context, buffer_t* buf) {
    if (buf->dev_dirty) {
        clFinish(*cl_q); // block on completion before read back
        halide_assert(user_context, buf->host && buf->dev);
        size_t size = __buf_size(user_context, buf);
        #ifdef DEBUG
        halide_printf(user_context, "copy_to_host buf %p (%lld bytes) %p -> %p\n", buf, (long long)size,
                      (void*)buf->dev, buf->host );
        #endif

        halide_assert(user_context, halide_validate_dev_pointer(user_context, buf, size));
        int err = clEnqueueReadBuffer( *cl_q, (cl_mem)((void*)buf->dev), CL_TRUE, 0, size, buf->host, 0, NULL, NULL );
        CHECK_ERR( err, "clEnqueueReadBuffer" );
    }
    buf->dev_dirty = false;
}
コード例 #26
0
ファイル: cuda.cpp プロジェクト: EEmmanuel7/Halide
WEAK void halide_dev_free(void *user_context, buffer_t* buf) {
    // halide_dev_free, at present, can be exposed to clients and they
    // should be allowed to call halide_dev_free on any buffer_t
    // including ones that have never been used with a GPU.
    if (buf->dev == 0)
        return;

    #ifdef DEBUG
    halide_printf(user_context, "In dev_free of %p - dev: 0x%p\n", buf, (void*)buf->dev);
    halide_assert(user_context, halide_validate_dev_pointer(user_context, buf));
    #endif

    CHECK_CALL( cuMemFree(buf->dev), "cuMemFree" );
    buf->dev = 0;

}
コード例 #27
0
ファイル: cuda.cpp プロジェクト: bnascimento/Halide
WEAK int halide_init_kernels(void *user_context, void **state_ptr, const char* ptx_src, int size) {
    DEBUG_PRINTF( user_context, "CUDA: halide_init_kernels (user_context: %p, state_ptr: %p, ptx_src: %p, %i)\n",
                  user_context, state_ptr, ptx_src, size );

    CudaContext ctx(user_context);
    if (ctx.error != 0) {
        return ctx.error;
    }

    #ifdef DEBUG
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    // Create the state object if necessary. This only happens once, regardless
    // of how many times halide_init_kernels/halide_release is called.
    // halide_release traverses this list and releases the module objects, but
    // it does not modify the list nodes created/inserted here.
    module_state **state = (module_state**)state_ptr;
    if (!(*state)) {
        *state = (module_state*)malloc(sizeof(module_state));
        (*state)->module = NULL;
        (*state)->next = state_list;
        state_list = *state;
    }

    // Create the module itself if necessary.
    if (!(*state)->module) {
        DEBUG_PRINTF( user_context, "    cuModuleLoadData %p, %i -> ", ptx_src, size );
        CUmodule module;
        CUresult err = cuModuleLoadData(&(*state)->module, ptx_src);
        if (err != CUDA_SUCCESS) {
            DEBUG_PRINTF( user_context, "%s\n", _get_error_name(err) );
            halide_error_varargs(user_context, "CUDA: cuModuleLoadData failed (%s)",
                                 _get_error_name(err));
            return err;
        } else {
            DEBUG_PRINTF( user_context, "%p\n", module );
        }
    }

    #ifdef DEBUG
    uint64_t t_after = halide_current_time_ns(user_context);
    halide_printf(user_context, "    Time: %f ms\n", (t_after - t_before) / 1.0e6);
    #endif

    return 0;
}
コード例 #28
0
ファイル: opencl.cpp プロジェクト: jacobke/Halide
WEAK void halide_dev_malloc(buffer_t* buf) {
    if (buf->dev) {
        halide_assert(halide_validate_dev_pointer(buf));
        return;
    }

    size_t size = __buf_size(buf);
    #ifdef DEBUG
    halide_printf("dev_malloc allocating buffer of %zd bytes, extents: %zdx%zdx%zdx%zd strides: %zdx%zdx%zdx%zd (%d bytes per element)\n",
		  size, buf->extent[0], buf->extent[1], buf->extent[2], buf->extent[3],
                  buf->stride[0], buf->stride[1], buf->stride[2], buf->stride[3],
		  buf->elem_size);
    #endif

    buf->dev = (uint64_t)__dev_malloc(size);
    halide_assert(buf->dev);
}
コード例 #29
0
ファイル: opencl.cpp プロジェクト: jacobke/Halide
WEAK void halide_release() {
    // TODO: this is for timing; bad for release-mode performance
    #ifdef DEBUG
    halide_printf("dev_sync on exit" );
    #endif
    halide_dev_sync();

    // Unload the module
    if (__mod) {
        CHECK_CALL( clReleaseProgram(__mod), "clReleaseProgram" );
        __mod = 0;
    }

    // Unload context (ref counted).
    CHECK_CALL( clReleaseCommandQueue(cl_q), "clReleaseCommandQueue" );
    CHECK_CALL( clReleaseContext(cl_ctx), "clReleaseContext" );
}
コード例 #30
0
ファイル: opengl.cpp プロジェクト: parvizp/Halide
// Release all data allocated by the runtime.
//
// The OpenGL context itself is generally managed by the host application, so
// we leave it untouched.
EXPORT void halide_opengl_release(void *user_context) {
    CHECK_INITIALIZED();
    ST.DeleteShader(ST.vertex_shader_id);
    ST.DeleteFramebuffers(1, &ST.framebuffer_id);

    HalideOpenGLKernel *cur = ST.kernels;
    while (cur) {
        HalideOpenGLKernel *next = cur->next;
        halide_opengl_delete_kernel(user_context, cur);
        cur = next;
    }

    // Delete all textures that were allocated by us.
    HalideOpenGLTexture *tex = ST.textures;
    int freed_textures = 0;
    while (tex) {
        HalideOpenGLTexture *next = tex->next;
        if (tex->halide_allocated) {
            ST.DeleteTextures(1, &tex->id);
            CHECK_GLERROR();
            freed_textures++;
        }
        free(tex);
        tex = next;
    }
#ifdef DEBUG
    if (freed_textures > 0) {
        halide_printf(user_context,
            "halide_opengl_release: deleted %d dangling texture(s).\n",
            freed_textures);
    }
#endif

    ST.DeleteBuffers(1, &ST.vertex_buffer);
    ST.DeleteBuffers(1, &ST.element_buffer);

    ST.vertex_shader_id = 0;
    ST.framebuffer_id = 0;
    ST.vertex_buffer = 0;
    ST.element_buffer = 0;
    ST.kernels = NULL;
    ST.textures = NULL;
    ST.initialized = false;
}