Example #1
0
// Determine OpenGL texture format and channel type for a given buffer_t.
static bool get_texture_format(void *user_context,
                               buffer_t *buf,
                               GLint *format,
                               GLint *type) {
    if (buf->extent[2] <= 1) {
        *format = GL_LUMINANCE;
    } else if (buf->extent[2] == 3) {
        *format = GL_RGB;
    } else if (buf->extent[2] == 4) {
        *format = GL_RGBA;
    } else {
        halide_error(user_context, "Only 1, 3, or 4 color channels are supported");
        return false;
    }

    if (buf->elem_size == 1) {
        *type = GL_UNSIGNED_BYTE;
    } else if (buf->elem_size == 2) {
        *type = GL_UNSIGNED_SHORT;
    } else {
        halide_error(user_context, "Only uint8 and uint16 textures are supported");
        return false;
    }
    return true;
}
Example #2
0
/** Allocate host and device memory to back a buffer_t. Ideally this
 * will be a zero copy setup, but the default implementation may
 * separately allocate the host memory using halide_malloc and the
 * device memory using halide_device_malloc. */
WEAK int halide_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf,
                                       const halide_device_interface_t *device_interface) {
    const halide_device_interface_t *current_interface = buf->device_interface;
    debug(user_context) << "halide_device_and_host_malloc: " << buf
                        << " interface " << device_interface
                        << " host: " << buf->host
                        << ", device: " << buf->device
                        << ", host_dirty: " << buf->host_dirty()
                        << ", dev_dirty:" << buf->device_dirty()
                        << " buf current interface: " << current_interface << "\n";

    // halide_device_malloc does not support switching interfaces.
    if (current_interface != NULL && current_interface != device_interface) {
        halide_error(user_context, "halide_device_and_host_malloc doesn't support switching interfaces\n");
        return halide_error_code_device_malloc_failed;
    }

    // Ensure code is not freed prematurely.
    // TODO: Exception safety...
    device_interface->use_module();
    int result = device_interface->device_and_host_malloc(user_context, buf);
    device_interface->release_module();

    if (result) {
        halide_error(user_context, "allocating host and device memory failed\n");
        return halide_error_code_device_malloc_failed;
    } else {
        return 0;
    }
}
Example #3
0
// Delete all texture information associated with a buffer. The OpenGL texture
// itself is only deleted if it was actually allocated by Halide and not
// provided by the host application.
EXPORT int halide_opengl_dev_free(void *user_context, buffer_t *buf) {
    CHECK_INITIALIZED(1);

    GLuint tex = get_texture_id(buf);
    if (tex == 0) {
        return 0;
    }

    // Look up corresponding HalideOpenGLTexture and unlink it from the list.
    HalideOpenGLTexture **ptr = &ST.textures;
    HalideOpenGLTexture *texinfo = *ptr;
    for (; texinfo != NULL; ptr = &texinfo->next, texinfo = *ptr) {
        if (texinfo->id == tex) {
            *ptr = texinfo->next;
            texinfo->next = NULL;
            break;
        }
    }
    if (!texinfo) {
        halide_error(user_context, "Internal error: texture not found");
        return 1;
    }

    // Delete texture if it was allocated by us.
    if (texinfo->halide_allocated) {
        ST.DeleteTextures(1, &tex);
        CHECK_GLERROR(1);
        buf->dev = 0;
    }

    free(texinfo);
    return 0;
}
Example #4
0
// Parse declaration of the form "type name" and construct
// matching HalideOpenGLArgument.
static HalideOpenGLArgument *parse_argument(void *user_context, const char *src,
                                            const char *end) {
    const char *name;
    ArgumentType type = ARGTYPE_NONE;
    if ((name = match_prefix(src, "float "))) {
        type = ARGTYPE_FLOAT;
    } else if ((name = match_prefix(src, "int "))) {
        type = ARGTYPE_INT;
    } else if ((name = match_prefix(src, "uint8 "))) {
        type = ARGTYPE_UINT8;
    } else if ((name = match_prefix(src, "uint16 "))) {
	type = ARGTYPE_UINT16;
    }
    if (type == ARGTYPE_NONE) {
        halide_error(user_context, "Internal error: argument type not supported");
        return NULL;
    }

    HalideOpenGLArgument *arg =
        (HalideOpenGLArgument *)malloc(sizeof(HalideOpenGLArgument));
    arg->name = strndup(name, end - name);
    arg->type = type;
    arg->kind = ARGKIND_NONE;
    arg->next = 0;
    return arg;
}
Example #5
0
WEAK void halide_error_varargs(void *user_context, const char *msg, ...) {
    char buf[4096];
    __builtin_va_list args;
    __builtin_va_start(args, msg);
    vsnprintf(buf, 4096, msg, args);
    __builtin_va_end(args);
    halide_error(user_context, buf);
}
Example #6
0
// Initialize the runtime, in particular all fields in halide_opengl_state.
EXPORT int halide_opengl_init(void *user_context) {
    if (ST.initialized) return 0;

    // Make a context if there isn't one
    if (halide_opengl_create_context(user_context)) {
        halide_printf(user_context, "Failed to make opengl context\n");
        return 1;
    }

    // Initialize pointers to OpenGL functions.
#define GLFUNC(TYPE, VAR)                                               \
    ST.VAR = (TYPE)halide_opengl_get_proc_address(user_context, "gl" #VAR); \
    if (!ST.VAR) {                                                      \
        halide_printf(user_context, "Could not load function pointer for %s\n", "gl" #VAR); \
        return 1;                                                         \
    }
    USED_GL_FUNCTIONS;
#undef GLFUNC

    ST.kernels = NULL;
    ST.textures = NULL;

    // Initialize all OpenGL objects that are shared between kernels.
    ST.GenFramebuffers(1, &ST.framebuffer_id);
    CHECK_GLERROR(1);

    ST.vertex_shader_id = halide_opengl_make_shader(user_context,
        GL_VERTEX_SHADER, vertex_shader_src, NULL);
    if (ST.vertex_shader_id == 0) {
	halide_error(user_context, "Failed to create vertex shader");
	return 1;
    }

    GLuint buf;
    ST.GenBuffers(1, &buf);
    ST.BindBuffer(GL_ARRAY_BUFFER, buf);
    ST.BufferData(GL_ARRAY_BUFFER,
                  sizeof(square_vertices), square_vertices, GL_STATIC_DRAW);
    CHECK_GLERROR(1);
    ST.vertex_buffer = buf;

    ST.GenBuffers(1, &buf);
    ST.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, buf);
    ST.BufferData(GL_ELEMENT_ARRAY_BUFFER,
                  sizeof(square_indices), square_indices, GL_STATIC_DRAW);
    CHECK_GLERROR(1);
    ST.element_buffer = buf;

    ST.initialized = true;
    return 0;
}
Example #7
0
static CUresult create_context(void *user_context, CUcontext *ctx) {
    // Initialize CUDA
    CUresult err = cuInit(0);
    if (err != CUDA_SUCCESS) {
        halide_error_varargs(user_context, "CUDA: cuInit failed (%s)",
                             _get_error_name(err));
        return err;
    }

    // Make sure we have a device
    int deviceCount = 0;
    err = cuDeviceGetCount(&deviceCount);
    if (err != CUDA_SUCCESS) {
        halide_error_varargs(user_context, "CUDA: cuGetDeviceCount failed (%s)",
                             _get_error_name(err));
        return err;
    }
    if (deviceCount <= 0) {
        halide_error(user_context, "CUDA: No devices available");
        return CUDA_ERROR_NO_DEVICE;
    }

    int device = halide_get_gpu_device(user_context);
    if (device == -1) {
        device = deviceCount - 1;
    }

    // Get device
    CUdevice dev;
    CUresult status = cuDeviceGet(&dev, device);
    if (status != CUDA_SUCCESS) {
        halide_error(user_context, "CUDA: Failed to get device\n");
        return status;
    }

    DEBUG_PRINTF( user_context, "    Got device %d\n", dev );

    // Dump device attributes
    #ifdef DEBUG
    {
        char name[256];
        name[0] = 0;
        err = cuDeviceGetName(name, 256, dev);
        DEBUG_PRINTF(user_context, "      %s\n", name);

        if (err != CUDA_SUCCESS) {
            halide_error_varargs(user_context, "CUDA: cuDeviceGetName failed (%s)",
                                 _get_error_name(err));
            return err;
        }

        size_t memory = 0;
        err = cuDeviceTotalMem(&memory, dev);
        DEBUG_PRINTF(user_context, "      total memory: %d MB\n", (int)(memory >> 20));

        if (err != CUDA_SUCCESS) {
            halide_error_varargs(user_context, "CUDA: cuDeviceTotalMem failed (%s)",
                                 _get_error_name(err));
            return err;
        }

        // Declare variables for other state we want to query.
        int max_threads_per_block = 0, warp_size = 0, num_cores = 0;
        int max_block_size[] = {0, 0, 0};
        int max_grid_size[] = {0, 0, 0};
        int max_shared_mem = 0, max_constant_mem = 0;
        int cc_major = 0, cc_minor = 0;

        struct {int *dst; CUdevice_attribute attr;} attrs[] = {
            {&max_threads_per_block, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK},
            {&warp_size,             CU_DEVICE_ATTRIBUTE_WARP_SIZE},
            {&num_cores,             CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT},
            {&max_block_size[0],     CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X},
            {&max_block_size[1],     CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y},
            {&max_block_size[2],     CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z},
            {&max_grid_size[0],      CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X},
            {&max_grid_size[1],      CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y},
            {&max_grid_size[2],      CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z},
            {&max_shared_mem,        CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK},
            {&max_constant_mem,      CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY},
            {&cc_major,              CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR},
            {&cc_minor,              CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR},
            {NULL,                   CU_DEVICE_ATTRIBUTE_MAX}};

        // Do all the queries.
        for (int i = 0; attrs[i].dst; i++) {
            err = cuDeviceGetAttribute(attrs[i].dst, attrs[i].attr, dev);
            if (err != CUDA_SUCCESS) {
                halide_error_varargs(user_context,
                                     "CUDA: cuDeviceGetAttribute failed (%s) for attribute %d",
                                     _get_error_name(err), (int)attrs[i].attr);
                return err;
            }
        }

        // threads per core is a function of the compute capability
        int threads_per_core = (cc_major == 1 ? 8 :
                                cc_major == 2 ? (cc_minor == 0 ? 32 : 48) :
                                cc_major == 3 ? 192 :
                                cc_major == 5 ? 128 : 0);

        DEBUG_PRINTF(user_context,
                     "      max threads per block: %d\n"
                     "      warp size: %d\n"
                     "      max block size: %d %d %d\n"
                     "      max grid size: %d %d %d\n"
                     "      max shared memory per block: %d\n"
                     "      max constant memory per block: %d\n"
                     "      compute capability %d.%d\n"
                     "      cuda cores: %d x %d = %d\n",
                     max_threads_per_block, warp_size,
                     max_block_size[0], max_block_size[1], max_block_size[2],
                     max_grid_size[0], max_grid_size[1], max_grid_size[2],
                     max_shared_mem, max_constant_mem,
                     cc_major, cc_minor,
                     num_cores, threads_per_core, num_cores * threads_per_core);
    }
    #endif

    // Create context
    DEBUG_PRINTF( user_context, "    cuCtxCreate %d -> ", dev );
    err = cuCtxCreate(ctx, 0, dev);
    if (err != CUDA_SUCCESS) {
        DEBUG_PRINTF( user_context, "%s\n", _get_error_name(err) );
        halide_error_varargs(user_context, "CUDA: cuCtxCreate failed (%s)",
                             _get_error_name(err));
        return err;
    } else {
        unsigned int version = 0;
        cuCtxGetApiVersion(*ctx, &version);
        DEBUG_PRINTF( user_context, "%p (%d)\n", *ctx, version);
    }

    return CUDA_SUCCESS;
}
Example #8
0
// Initializes the context used by the default implementation
// of halide_acquire_context.
static int create_context(void *user_context, cl_context *ctx, cl_command_queue *q) {
    DEBUG_PRINTF( user_context, "    create_context (user_context: %p)\n", user_context );

    halide_assert(user_context, ctx != NULL && *ctx == NULL);
    halide_assert(user_context, q != NULL && *q == NULL);

    cl_int err = 0;

    const cl_uint maxPlatforms = 4;
    cl_platform_id platforms[maxPlatforms];
    cl_uint platformCount = 0;

    err = clGetPlatformIDs( maxPlatforms, platforms, &platformCount );
    if (err != CL_SUCCESS) {
        halide_error_varargs(user_context, "CL: clGetPlatformIDs failed (%d)\n", err);
        return err;
    }

    cl_platform_id platform = NULL;

    // Find the requested platform, or the first if none specified.
    const char * name = getenv("HL_OCL_PLATFORM_NAME");
    if (name != NULL) {
        for (cl_uint i = 0; i < platformCount; ++i) {
            const cl_uint maxPlatformName = 256;
            char platformName[maxPlatformName];
            err = clGetPlatformInfo( platforms[i], CL_PLATFORM_NAME, maxPlatformName, platformName, NULL );
            if (err != CL_SUCCESS) continue;

            // A platform matches the request if it is a substring of the platform name.
            if (strstr(platformName, name)) {
                platform = platforms[i];
                break;
            }
        }
    } else if (platformCount > 0) {
        platform = platforms[0];
    }
    if (platform == NULL){
        halide_error(user_context, "CL: Failed to find platform\n");
        return CL_INVALID_PLATFORM;
    }

    #ifdef DEBUG
    const cl_uint maxPlatformName = 256;
    char platformName[maxPlatformName];
    err = clGetPlatformInfo( platform, CL_PLATFORM_NAME, maxPlatformName, platformName, NULL );
    if (err != CL_SUCCESS) {
        halide_printf(user_context, "    clGetPlatformInfo(CL_PLATFORM_NAME) failed (%d)\n", err);
        // This is just debug info, report the error but don't fail context creation due to it.
        //return err;
    } else {
        halide_printf(user_context, "    Got platform '%s', about to create context (t=%lld)\n",
                      platformName, (long long)halide_current_time_ns(user_context));
    }
    #endif

    // Get the types of devices requested.
    cl_device_type device_type = 0;
    const char * dev_type = getenv("HL_OCL_DEVICE_TYPE");
    if (dev_type != NULL) {
        if (strstr("cpu", dev_type)) {
            device_type |= CL_DEVICE_TYPE_CPU;
        }
        if (strstr("gpu", dev_type)) {
            device_type |= CL_DEVICE_TYPE_GPU;
        }
    }
    // If no device types are specified, use all the available
    // devices.
    if (device_type == 0) {
        device_type = CL_DEVICE_TYPE_ALL;
    }

    // Get all the devices of the specified type.
    const cl_uint maxDevices = 4;
    cl_device_id devices[maxDevices];
    cl_uint deviceCount = 0;
    err = clGetDeviceIDs( platform, device_type, maxDevices, devices, &deviceCount );
    if (err != CL_SUCCESS) {
        halide_error_varargs(user_context, "CL: clGetDeviceIDs failed (%d)\n", err);
        return err;
    }

    // If the user indicated a specific device index to use, use
    // that. Note that this is an index within the set of devices
    // specified by the device type.
    char *device_str = getenv("HL_GPU_DEVICE");
    cl_uint device = deviceCount - 1;
    if (device_str) {
        device = atoi(device_str);
    }

    if (device >= deviceCount) {
        halide_error_varargs(user_context, "CL: Failed to get device %i\n", device);
        return CL_DEVICE_NOT_FOUND;
    }

    cl_device_id dev = devices[device];

    #ifdef DEBUG
    const cl_uint maxDeviceName = 256;
    char deviceName[maxDeviceName];
    err = clGetDeviceInfo( dev, CL_DEVICE_NAME, maxDeviceName, deviceName, NULL );
    if (err != CL_SUCCESS) {
        halide_printf(user_context, "    clGetDeviceInfo(CL_DEVICE_NAME) failed (%d)\n", err);
        // This is just debug info, report the error but don't fail context create if it fails.
        //return err;
    } else {
        halide_printf(user_context, "    Got device '%s'\n", deviceName);
    }
    #endif


    // Create context and command queue.
    cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
    DEBUG_PRINTF( user_context, "    clCreateContext -> " );
    *ctx = clCreateContext(properties, 1, &dev, NULL, NULL, &err);
    if (err != CL_SUCCESS) {
        DEBUG_PRINTF( user_context, "%d", err);
        halide_error_varargs(user_context, "CL: clCreateContext failed (%d)\n", err);
        return err;
    } else {
        DEBUG_PRINTF( user_context, "%p\n", *ctx );
    }

    DEBUG_PRINTF(user_context, "    clCreateCommandQueue ");
    *q = clCreateCommandQueue(*ctx, dev, 0, &err);
    if (err != CL_SUCCESS) {
        DEBUG_PRINTF( user_context, "%d", err );
        halide_error_varargs(user_context, "CL: clCreateCommandQueue failed (%d)\n", err);
        return err;
    } else {
        DEBUG_PRINTF( user_context, "%p\n", *q );
    }

    return err;
}
Example #9
0
WEAK void __stack_chk_fail() {
    halide_error(NULL, "Memory error: stack smashing protector changed!\n");
    abort();
}
Example #10
0
// Initialize OpenGL
WEAK int halide_opengl_create_context(void *user_context) {
    const int desired_major_version = 3;
    const int desired_minor_version = 2;

    if (glXGetCurrentContext()) {
        // Already have a context
        return 0;
    }

    void *dpy = XOpenDisplay(NULL);
    if (!dpy) {
        halide_error(user_context, "Could not open X11 display.\n");
        return -1;
    }

    // GLX supported?
    if (!glXQueryExtension(dpy, NULL, NULL)) {
        halide_error(user_context, "GLX not supported by X server.\n");
        return -1;
    }

    int screen = XDefaultScreen(dpy);

    int attribs[] = {
        GLX_RENDER_TYPE, GLX_RGBA_BIT,
        GLX_RED_SIZE, 8,
        GLX_GREEN_SIZE, 8,
        GLX_BLUE_SIZE, 8,
        GLX_ALPHA_SIZE, 8,
        0
    };
    int num_configs = 0;
    void** fbconfigs = glXChooseFBConfig(dpy, screen, attribs, &num_configs);
    if (!num_configs) {
        halide_error(user_context, "Could not get framebuffer config.\n");
        return -1;
    }
    void *fbconfig = fbconfigs[0];

    const char *glxexts = glXQueryExtensionsString(dpy, screen);
    void *share_list = NULL;
    int direct = 1;
    void *context = NULL;

    glXCreateContextAttribsARBProc glXCreateContextAttribsARB = 0;
    glXCreateContextAttribsARB = (glXCreateContextAttribsARBProc)
        glXGetProcAddressARB("glXCreateContextAttribsARB");

    if (glx_extension_supported(glxexts, "GLX_ARB_create_context") &&
        glXCreateContextAttribsARB) {
        int context_attribs[] = {
            GLX_CONTEXT_MAJOR_VERSION_ARB, desired_major_version,
            GLX_CONTEXT_MINOR_VERSION_ARB, desired_minor_version,
            0
        };
        context = glXCreateContextAttribsARB(dpy, fbconfig, share_list, direct,
                                      context_attribs);
    }
    if (!context) {
        // Open a legacy context
        context = glXCreateNewContext(dpy, fbconfig, GLX_RGBA_TYPE, share_list, direct);
    }
    if (!context) {
        halide_error(user_context, "Could not create OpenGL context.\n");
        return -1;
    }

    int pbuffer_attribs[] = {
        0x8041 /* GLX_PBUFFER_WIDTH */,  32,
        0x8040 /* GLX_PBUFFER_HEIGHT */, 32,
        0
    };
    unsigned long pbuffer = glXCreatePbuffer(dpy, fbconfig, pbuffer_attribs);

    XFree(fbconfigs);
    XSync(dpy, 0);

    if (!glXMakeContextCurrent(dpy, pbuffer, pbuffer, context)) {
        halide_error(user_context, "Could not make context current.\n");
        return -1;
    }

    return 0;
}
Example #11
0
WEAK int halide_error_out_of_memory(void *user_context) {
    // The error message builder uses malloc, so we can't use it here.
    halide_error(user_context, "Out of memory (halide_malloc returned NULL)");
    return halide_error_code_out_of_memory;
}
Example #12
0
// Initialize OpenGL
WEAK int halide_opengl_create_context(void *user_context) {
    if (glXGetCurrentContext()) {
        // Already have a context
        return 0;
    }

    void *dpy = XOpenDisplay(NULL);
    if (!dpy) {
        halide_error(user_context, "Could not open X11 display.\n");
        return 1;
    }

    // GLX supported?
    if (!glXQueryExtension(dpy, NULL, NULL)) {
        halide_error(user_context, "GLX not supported by X server.\n");
        return 1;
    }

    int screen = XDefaultScreen(dpy);

    int attribs[] = {
        0x8011 /* GLX_RENDER_TYPE */, 1 /* GLX_RGBA_BIT */,
        8 /* GLX_RED_SIZE */, 8,
        9 /* GLX_GREEN_SIZE */, 8,
        10 /* GLX_BLUE_SIZE */, 8,
        11 /* GLX_ALPHA_SIZE */, 8,
        0
    };
    int num_configs = 0;
    void** fb_config = glXChooseFBConfig(dpy, screen, attribs, &num_configs);
    if (!num_configs) {
        halide_error(user_context, "Could not get framebuffer config.\n");
        return 1;
    }

    void *ctx = glXCreateNewContext(dpy, fb_config[0],
                                    0x8014 /* GLX_RGBA_TYPE */,
                                    NULL /* share list */, 1 /* direct */);
    if (!ctx) {
        halide_error(user_context, "Could not create OpenGL context.\n");
        return 1;
    }

    int pbuffer_attribs[] = {
        0x8041 /* GLX_PBUFFER_WIDTH */,  32,
        0x8040 /* GLX_PBUFFER_HEIGHT */, 32,
        0
    };
    unsigned long pbuffer = glXCreatePbuffer(dpy, fb_config[0], pbuffer_attribs);

    // clean up:
    XFree(fb_config);
    XSync(dpy, 0);

    if (!glXMakeContextCurrent(dpy, pbuffer, pbuffer, ctx)) {
        halide_error(user_context, "Could not make context current.\n");
        return 1;
    }

    return 0;
}
Example #13
0
EXPORT int halide_opengl_dev_run(
    void *user_context,
    void *state_ptr,
    const char *entry_name,
    int blocksX, int blocksY, int blocksZ,
    int threadsX, int threadsY, int threadsZ,
    int shared_mem_bytes,
    size_t arg_sizes[],
    void *args[])
{
    CHECK_INITIALIZED(1);

    HalideOpenGLKernel *kernel = halide_opengl_find_kernel(entry_name);
    if (!kernel) {
        halide_printf(user_context, "Could not find a kernel named '%s'\n",
                      entry_name);
        return 1;
    }

    ST.UseProgram(kernel->program_id);

    HalideOpenGLArgument *kernel_arg;

    // Copy input arguments to corresponding GLSL uniforms.
    GLint num_active_textures = 0;
    kernel_arg = kernel->arguments;
    for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) {
        if (!kernel_arg) {
            halide_printf(user_context, "Argument %d: size=%d value=%p\n", i,
                          arg_sizes[i], args[i]);
            halide_error(user_context,
                         "Too many arguments passed to halide_opengl_dev_run");
            return 1;
        }

        if (kernel_arg->kind == ARGKIND_OUTBUF) {
            // Outbuf textures are handled explicitly below
            continue;
        } else if (kernel_arg->kind == ARGKIND_INBUF) {
            GLint loc =
                ST.GetUniformLocation(kernel->program_id, kernel_arg->name);
            if (loc == -1) {
                halide_error(user_context, "No sampler defined for input texture.\n");
                return 1;
            }
            GLuint tex = *((GLuint *)args[i]);
            ST.ActiveTexture(GL_TEXTURE0 + num_active_textures);
            ST.BindTexture(GL_TEXTURE_2D, tex);
            ST.Uniform1iv(loc, 1, &num_active_textures);
            num_active_textures++;
            // TODO: check maximum number of active textures
        } else if (kernel_arg->kind == ARGKIND_VAR) {
            GLint loc =
                ST.GetUniformLocation(kernel->program_id, kernel_arg->name);
            if (loc == -1) {
                // Argument was probably optimized away by GLSL compiler.
#ifdef DEBUG
                halide_printf(user_context, "Ignoring argument '%s'\n",
                              kernel_arg->name);
#endif
                continue;
            }

            switch (kernel_arg->type) {
            case ARGTYPE_INT:
#ifdef DEBUG
                halide_printf(user_context, "Int argument %d (%s): %d\n", i,
                              kernel_arg->name, *((int *)args[i]));
#endif
                ST.Uniform1iv(loc, 1, (GLint *)args[i]);
                break;
            case ARGTYPE_FLOAT: {
#ifdef DEBUG
                halide_printf(user_context, "Float argument %d (%s): %g\n", i,
                              kernel_arg->name, *((float *)args[i]));
#endif
                ST.Uniform1fv(loc, 1, (GLfloat *)args[i]);
                break;
            }
            case ARGTYPE_NONE:
            default:
                halide_error(user_context, "Unknown kernel argument type");
                return 1;
            }
        }
    }
    if (kernel_arg) {
        halide_error(user_context, "Too few arguments passed to halide_opengl_dev_run");
        return 1;
    }

    // Prepare framebuffer for rendering to output textures.
    GLint output_min[2] = { 0, 0 };
    GLint output_extent[2] = { 0, 0 };
    ST.BindFramebuffer(GL_FRAMEBUFFER, ST.framebuffer_id);
    ST.Disable(GL_CULL_FACE);
    ST.Disable(GL_DEPTH_TEST);

    GLint num_output_textures = 0;
    kernel_arg = kernel->arguments;
    for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) {
        if (kernel_arg->kind != ARGKIND_OUTBUF) continue;

        // TODO: GL_MAX_COLOR_ATTACHMENTS
        if (num_output_textures >= 1) {
            halide_error(user_context,
			 "OpenGL ES 2.0 only supports one single output texture");
	    return 1;
        }

        GLuint tex = *((GLuint*)args[i]);
#ifdef DEBUG
        halide_printf(user_context, "Output texture %d: %d\n", num_output_textures, tex);
#endif
        ST.FramebufferTexture2D(GL_FRAMEBUFFER,
                                GL_COLOR_ATTACHMENT0 + num_output_textures,
                                GL_TEXTURE_2D, tex, 0);
        CHECK_GLERROR(1);

        HalideOpenGLTexture *texinfo = halide_opengl_find_texture(tex);
	if (!texinfo) {
	    halide_error(user_context, "Undefined output texture");
	    return 1;
	}
        output_min[0] = texinfo->min[0];
        output_min[1] = texinfo->min[1];
        output_extent[0] = texinfo->extent[0];
        output_extent[1] = texinfo->extent[1];
        num_output_textures++;
    }
    // TODO: GL_MAX_DRAW_BUFFERS
    if (num_output_textures == 0) {
        halide_printf(user_context, "Warning: kernel '%s' has no output\n",
                      kernel->name);
        // TODO: cleanup
        return 1;
    } else {
        GLenum *draw_buffers = (GLenum*)
            malloc(num_output_textures * sizeof(GLenum));
        for (int i=0; i<num_output_textures; i++)
            draw_buffers[i] = GL_COLOR_ATTACHMENT0 + i;
        ST.DrawBuffers(num_output_textures, draw_buffers);
        CHECK_GLERROR(1);
        free(draw_buffers);
    }

    // Check that framebuffer is set up correctly
    GLenum status = ST.CheckFramebufferStatus(GL_FRAMEBUFFER);
    CHECK_GLERROR(1);
    if (status != GL_FRAMEBUFFER_COMPLETE) {
        halide_printf(user_context, "Setting up GL framebuffer %d failed (%x)\n",
                      ST.framebuffer_id, status);
        // TODO: cleanup
        return 1;
    }

    // Set vertex attributes
    GLint loc = ST.GetUniformLocation(kernel->program_id, "output_extent");
    ST.Uniform2iv(loc, 1, output_extent);
    CHECK_GLERROR(1);
    loc = ST.GetUniformLocation(kernel->program_id, "output_min");
    ST.Uniform2iv(loc, 1, output_min);
    CHECK_GLERROR(1);

    // Setup viewport
    ST.Viewport(0, 0, output_extent[0], output_extent[1]);


    // Execute shader
    GLint position = ST.GetAttribLocation(kernel->program_id,
                                          "position");
    ST.BindBuffer(GL_ARRAY_BUFFER, ST.vertex_buffer);
    ST.VertexAttribPointer(position,
                           2,
                           GL_FLOAT,
                           GL_FALSE,    // normalized?
                           sizeof(GLfloat)*2,
                           NULL);
    ST.EnableVertexAttribArray(position);
    ST.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, ST.element_buffer);
    ST.DrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_INT, NULL);
    CHECK_GLERROR(1);
    ST.DisableVertexAttribArray(position);

    // Cleanup
    for (int i = 0; i < num_active_textures; i++) {
        ST.ActiveTexture(GL_TEXTURE0 + i);
        ST.BindTexture(GL_TEXTURE_2D, 0);
    }
    ST.BindFramebuffer(GL_FRAMEBUFFER, 0);
    return 0;
}
Example #14
0
// Copy image data from texture back to host memory.
EXPORT int halide_opengl_copy_to_host(void *user_context, buffer_t *buf) {
    CHECK_INITIALIZED(1);
    if (!buf->dev_dirty) {
        return 0;
    }

    if (!buf->host || !buf->dev) {
#ifdef DEBUG
        print_buffer(user_context, buf);
#endif
        halide_error(user_context, "Invalid copy_to_host operation");
        return 1;
    }

    GLuint tex = get_texture_id(buf);
#ifdef DEBUG
    halide_printf(user_context, "halide_copy_to_host: %d\n", tex);
#endif

    GLint format;
    GLint type;
    if (!get_texture_format(user_context, buf, &format, &type)) {
        halide_error(user_context, "Invalid texture format\n");
        return 1;
    }
    GLint width = buf->extent[0];
    GLint height = buf->extent[1];

    ST.BindTexture(GL_TEXTURE_2D, tex);
    CHECK_GLERROR(1);
    bool is_interleaved =
        (buf->stride[2] == 1 && buf->stride[0] == buf->extent[2]);
    if (is_interleaved) {
        // TODO: GL_UNPACK_ROW_LENGTH
        ST.PixelStorei(GL_PACK_ROW_LENGTH, buf->extent[1]);
        ST.PixelStorei(GL_PACK_ALIGNMENT, 1);
        ST.GetTexImage(GL_TEXTURE_2D, 0, format, type, buf->host);
        CHECK_GLERROR(1);
    } else {
        #ifdef DEBUG
        halide_printf(user_context, "Warning: In copy_to_host, host buffer is not interleaved. Doing slow deinterleave.\n");
        #endif

        size_t size = width * height * buf->extent[2] * buf->elem_size;
        uint8_t *tmp = (uint8_t*)halide_malloc(user_context, size);

        ST.PixelStorei(GL_PACK_ALIGNMENT, 1);
        ST.GetTexImage(GL_TEXTURE_2D, 0, format, type, tmp);
        CHECK_GLERROR(1);

        switch (type) {
        case GL_UNSIGNED_BYTE:
            interleaved_to_halide<uint8_t>(buf, (uint8_t*)tmp, width, height, buf->extent[2]);
            break;
        case GL_UNSIGNED_SHORT:
            interleaved_to_halide<uint16_t>(buf, (uint16_t*)tmp, width, height, buf->extent[2]);
            break;
        case GL_FLOAT:
            interleaved_to_halide<float>(buf, (float*)tmp, width, height, buf->extent[2]);
            break;
        }

        halide_free(user_context, tmp);
    }

    ST.BindTexture(GL_TEXTURE_2D, 0);
    buf->dev_dirty = false;
    return 0;
}
Example #15
0
// Allocate a new texture matching the dimension and color format of the
// specified buffer.
EXPORT int halide_opengl_dev_malloc(void *user_context, buffer_t *buf) {
    if (int error = halide_opengl_init(user_context))
        return error;

    if (!buf) {
        halide_error(user_context, "Invalid buffer");
        return 1;
    }

    // If the texture was already created by the host application, check that
    // it has the correct format. Otherwise, allocate and set up an
    // appropriate texture.
    GLuint tex = get_texture_id(buf);
    bool halide_allocated = false;
    GLint format = 0;
    GLint width, height;
    if (tex != 0) {
        ST.BindTexture(GL_TEXTURE_2D, tex);
        ST.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &width);
        ST.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &height);
        CHECK_GLERROR(1);
        if (width < buf->extent[0] || height < buf->extent[1]) {
#ifdef DEBUG
            halide_printf(user_context, "Texture size: %dx%d, buffer size: %dx%d\n",
                          width, height, buf->extent[0], buf->extent[1]);
#endif
            halide_error(user_context, "Existing texture is smaller than buffer");
            return 1;
        }
    } else {
        if (buf->extent[3] > 1) {
            halide_error(user_context, "3D textures are not supported");
            return 1;
        }

        // Generate texture ID
        ST.GenTextures(1, &tex);
        CHECK_GLERROR(1);

        // Set parameters for this texture: no interpolation and clamp to edges.
        ST.BindTexture(GL_TEXTURE_2D, tex);
        ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
        ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
        CHECK_GLERROR(1);

        // Create empty texture here and fill it with glTexSubImage2D later.
        GLint type = GL_UNSIGNED_BYTE;
        if (!get_texture_format(user_context, buf, &format, &type)) {
            halide_error(user_context, "Invalid texture format\n");
            return 1;
        }
        width = buf->extent[0];
        height = buf->extent[1];

        ST.TexImage2D(GL_TEXTURE_2D, 0, format,
                      width, height, 0, format, type, NULL);
        CHECK_GLERROR(1);

        buf->dev = tex;
        halide_allocated = true;
#ifdef DEBUG
        halide_printf(user_context, "Allocated texture %d of size %d x %d\n", tex, width, height);
#endif

        ST.BindTexture(GL_TEXTURE_2D, 0);
    }

    // Record main information about texture and remember it for later. In
    // halide_opengl_dev_run we are only given the texture ID and not the full
    // buffer_t, so we copy the interesting information here.
    HalideOpenGLTexture *texinfo = (HalideOpenGLTexture*)
        malloc(sizeof(HalideOpenGLTexture));
    texinfo->id = tex;
    for (int i=0; i<3; i++) {
        texinfo->min[i] = buf->min[i];
        texinfo->extent[i] = buf->extent[i];
    }
    texinfo->format = format;
    texinfo->halide_allocated = halide_allocated;

    texinfo->next = ST.textures;
    ST.textures = texinfo;
    return 0;
}
Example #16
0
// Create HalideOpenGLKernel for a piece of GLSL code
static HalideOpenGLKernel *create_kernel(void *user_context, const char *src, int size) {
    HalideOpenGLKernel *kernel =
        (HalideOpenGLKernel *)malloc(sizeof(HalideOpenGLKernel));

    kernel->source = strndup(src, size);
    kernel->name = NULL;
    kernel->arguments = NULL;
    kernel->shader_id = 0;
    kernel->program_id = 0;
    kernel->next = NULL;

    #ifdef DEBUG
    halide_printf(user_context, "Compiling GLSL kernel:\n%s\n", 
                  kernel->source);
    #endif

    // Parse initial comment block
    const char *line = kernel->source;
    while (*line) {
        const char *next_line = strchr(line, '\n') + 1;
        if (!next_line)
            next_line = line + size;

        const char *args;
        if ((args = match_prefix(line, kernel_marker))) {
            kernel->name = strndup(args, next_line - args - 1);
        } else if ((args = match_prefix(line, var_marker))) {
            if (HalideOpenGLArgument *arg =
                parse_argument(user_context, args, next_line - 1)) {
                arg->kind = ARGKIND_VAR;
                arg->next = kernel->arguments;
                kernel->arguments = arg;
            }
        } else if ((args = match_prefix(line, input_marker))) {
            if (HalideOpenGLArgument *arg =
                parse_argument(user_context, args, next_line - 1)) {
                arg->kind = ARGKIND_INBUF;
                arg->next = kernel->arguments;
                kernel->arguments = arg;
            }
        } else if ((args = match_prefix(line, output_marker))) {
            if (HalideOpenGLArgument *arg =
                parse_argument(user_context, args, next_line - 1)) {
                arg->kind = ARGKIND_OUTBUF;
                arg->next = kernel->arguments;
                kernel->arguments = arg;
            }
        } else {
            // Stop parsing if we encounter something we don't recognize
            break;
        }
        line = next_line;
    }
    if (!kernel->name) {
        halide_error(user_context, "Internal error: kernel name not specified");
        return NULL;
    }

    // Arguments are currently in reverse order, flip the list.
    HalideOpenGLArgument *cur = kernel->arguments;
    kernel->arguments = NULL;
    while (cur) {
        HalideOpenGLArgument *next = cur->next;
        cur->next = kernel->arguments;
        kernel->arguments = cur;
        cur = next;
    }

    return kernel;
}