// Determine OpenGL texture format and channel type for a given buffer_t. static bool get_texture_format(void *user_context, buffer_t *buf, GLint *format, GLint *type) { if (buf->extent[2] <= 1) { *format = GL_LUMINANCE; } else if (buf->extent[2] == 3) { *format = GL_RGB; } else if (buf->extent[2] == 4) { *format = GL_RGBA; } else { halide_error(user_context, "Only 1, 3, or 4 color channels are supported"); return false; } if (buf->elem_size == 1) { *type = GL_UNSIGNED_BYTE; } else if (buf->elem_size == 2) { *type = GL_UNSIGNED_SHORT; } else { halide_error(user_context, "Only uint8 and uint16 textures are supported"); return false; } return true; }
/** Allocate host and device memory to back a buffer_t. Ideally this * will be a zero copy setup, but the default implementation may * separately allocate the host memory using halide_malloc and the * device memory using halide_device_malloc. */ WEAK int halide_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf, const halide_device_interface_t *device_interface) { const halide_device_interface_t *current_interface = buf->device_interface; debug(user_context) << "halide_device_and_host_malloc: " << buf << " interface " << device_interface << " host: " << buf->host << ", device: " << buf->device << ", host_dirty: " << buf->host_dirty() << ", dev_dirty:" << buf->device_dirty() << " buf current interface: " << current_interface << "\n"; // halide_device_malloc does not support switching interfaces. if (current_interface != NULL && current_interface != device_interface) { halide_error(user_context, "halide_device_and_host_malloc doesn't support switching interfaces\n"); return halide_error_code_device_malloc_failed; } // Ensure code is not freed prematurely. // TODO: Exception safety... device_interface->use_module(); int result = device_interface->device_and_host_malloc(user_context, buf); device_interface->release_module(); if (result) { halide_error(user_context, "allocating host and device memory failed\n"); return halide_error_code_device_malloc_failed; } else { return 0; } }
// Delete all texture information associated with a buffer. The OpenGL texture // itself is only deleted if it was actually allocated by Halide and not // provided by the host application. EXPORT int halide_opengl_dev_free(void *user_context, buffer_t *buf) { CHECK_INITIALIZED(1); GLuint tex = get_texture_id(buf); if (tex == 0) { return 0; } // Look up corresponding HalideOpenGLTexture and unlink it from the list. HalideOpenGLTexture **ptr = &ST.textures; HalideOpenGLTexture *texinfo = *ptr; for (; texinfo != NULL; ptr = &texinfo->next, texinfo = *ptr) { if (texinfo->id == tex) { *ptr = texinfo->next; texinfo->next = NULL; break; } } if (!texinfo) { halide_error(user_context, "Internal error: texture not found"); return 1; } // Delete texture if it was allocated by us. if (texinfo->halide_allocated) { ST.DeleteTextures(1, &tex); CHECK_GLERROR(1); buf->dev = 0; } free(texinfo); return 0; }
// Parse declaration of the form "type name" and construct // matching HalideOpenGLArgument. static HalideOpenGLArgument *parse_argument(void *user_context, const char *src, const char *end) { const char *name; ArgumentType type = ARGTYPE_NONE; if ((name = match_prefix(src, "float "))) { type = ARGTYPE_FLOAT; } else if ((name = match_prefix(src, "int "))) { type = ARGTYPE_INT; } else if ((name = match_prefix(src, "uint8 "))) { type = ARGTYPE_UINT8; } else if ((name = match_prefix(src, "uint16 "))) { type = ARGTYPE_UINT16; } if (type == ARGTYPE_NONE) { halide_error(user_context, "Internal error: argument type not supported"); return NULL; } HalideOpenGLArgument *arg = (HalideOpenGLArgument *)malloc(sizeof(HalideOpenGLArgument)); arg->name = strndup(name, end - name); arg->type = type; arg->kind = ARGKIND_NONE; arg->next = 0; return arg; }
WEAK void halide_error_varargs(void *user_context, const char *msg, ...) { char buf[4096]; __builtin_va_list args; __builtin_va_start(args, msg); vsnprintf(buf, 4096, msg, args); __builtin_va_end(args); halide_error(user_context, buf); }
// Initialize the runtime, in particular all fields in halide_opengl_state. EXPORT int halide_opengl_init(void *user_context) { if (ST.initialized) return 0; // Make a context if there isn't one if (halide_opengl_create_context(user_context)) { halide_printf(user_context, "Failed to make opengl context\n"); return 1; } // Initialize pointers to OpenGL functions. #define GLFUNC(TYPE, VAR) \ ST.VAR = (TYPE)halide_opengl_get_proc_address(user_context, "gl" #VAR); \ if (!ST.VAR) { \ halide_printf(user_context, "Could not load function pointer for %s\n", "gl" #VAR); \ return 1; \ } USED_GL_FUNCTIONS; #undef GLFUNC ST.kernels = NULL; ST.textures = NULL; // Initialize all OpenGL objects that are shared between kernels. ST.GenFramebuffers(1, &ST.framebuffer_id); CHECK_GLERROR(1); ST.vertex_shader_id = halide_opengl_make_shader(user_context, GL_VERTEX_SHADER, vertex_shader_src, NULL); if (ST.vertex_shader_id == 0) { halide_error(user_context, "Failed to create vertex shader"); return 1; } GLuint buf; ST.GenBuffers(1, &buf); ST.BindBuffer(GL_ARRAY_BUFFER, buf); ST.BufferData(GL_ARRAY_BUFFER, sizeof(square_vertices), square_vertices, GL_STATIC_DRAW); CHECK_GLERROR(1); ST.vertex_buffer = buf; ST.GenBuffers(1, &buf); ST.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, buf); ST.BufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(square_indices), square_indices, GL_STATIC_DRAW); CHECK_GLERROR(1); ST.element_buffer = buf; ST.initialized = true; return 0; }
static CUresult create_context(void *user_context, CUcontext *ctx) { // Initialize CUDA CUresult err = cuInit(0); if (err != CUDA_SUCCESS) { halide_error_varargs(user_context, "CUDA: cuInit failed (%s)", _get_error_name(err)); return err; } // Make sure we have a device int deviceCount = 0; err = cuDeviceGetCount(&deviceCount); if (err != CUDA_SUCCESS) { halide_error_varargs(user_context, "CUDA: cuGetDeviceCount failed (%s)", _get_error_name(err)); return err; } if (deviceCount <= 0) { halide_error(user_context, "CUDA: No devices available"); return CUDA_ERROR_NO_DEVICE; } int device = halide_get_gpu_device(user_context); if (device == -1) { device = deviceCount - 1; } // Get device CUdevice dev; CUresult status = cuDeviceGet(&dev, device); if (status != CUDA_SUCCESS) { halide_error(user_context, "CUDA: Failed to get device\n"); return status; } DEBUG_PRINTF( user_context, " Got device %d\n", dev ); // Dump device attributes #ifdef DEBUG { char name[256]; name[0] = 0; err = cuDeviceGetName(name, 256, dev); DEBUG_PRINTF(user_context, " %s\n", name); if (err != CUDA_SUCCESS) { halide_error_varargs(user_context, "CUDA: cuDeviceGetName failed (%s)", _get_error_name(err)); return err; } size_t memory = 0; err = cuDeviceTotalMem(&memory, dev); DEBUG_PRINTF(user_context, " total memory: %d MB\n", (int)(memory >> 20)); if (err != CUDA_SUCCESS) { halide_error_varargs(user_context, "CUDA: cuDeviceTotalMem failed (%s)", _get_error_name(err)); return err; } // Declare variables for other state we want to query. int max_threads_per_block = 0, warp_size = 0, num_cores = 0; int max_block_size[] = {0, 0, 0}; int max_grid_size[] = {0, 0, 0}; int max_shared_mem = 0, max_constant_mem = 0; int cc_major = 0, cc_minor = 0; struct {int *dst; CUdevice_attribute attr;} attrs[] = { {&max_threads_per_block, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK}, {&warp_size, CU_DEVICE_ATTRIBUTE_WARP_SIZE}, {&num_cores, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT}, {&max_block_size[0], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X}, {&max_block_size[1], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y}, {&max_block_size[2], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z}, {&max_grid_size[0], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X}, {&max_grid_size[1], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y}, {&max_grid_size[2], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z}, {&max_shared_mem, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK}, {&max_constant_mem, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY}, {&cc_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR}, {&cc_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR}, {NULL, CU_DEVICE_ATTRIBUTE_MAX}}; // Do all the queries. for (int i = 0; attrs[i].dst; i++) { err = cuDeviceGetAttribute(attrs[i].dst, attrs[i].attr, dev); if (err != CUDA_SUCCESS) { halide_error_varargs(user_context, "CUDA: cuDeviceGetAttribute failed (%s) for attribute %d", _get_error_name(err), (int)attrs[i].attr); return err; } } // threads per core is a function of the compute capability int threads_per_core = (cc_major == 1 ? 8 : cc_major == 2 ? (cc_minor == 0 ? 32 : 48) : cc_major == 3 ? 192 : cc_major == 5 ? 128 : 0); DEBUG_PRINTF(user_context, " max threads per block: %d\n" " warp size: %d\n" " max block size: %d %d %d\n" " max grid size: %d %d %d\n" " max shared memory per block: %d\n" " max constant memory per block: %d\n" " compute capability %d.%d\n" " cuda cores: %d x %d = %d\n", max_threads_per_block, warp_size, max_block_size[0], max_block_size[1], max_block_size[2], max_grid_size[0], max_grid_size[1], max_grid_size[2], max_shared_mem, max_constant_mem, cc_major, cc_minor, num_cores, threads_per_core, num_cores * threads_per_core); } #endif // Create context DEBUG_PRINTF( user_context, " cuCtxCreate %d -> ", dev ); err = cuCtxCreate(ctx, 0, dev); if (err != CUDA_SUCCESS) { DEBUG_PRINTF( user_context, "%s\n", _get_error_name(err) ); halide_error_varargs(user_context, "CUDA: cuCtxCreate failed (%s)", _get_error_name(err)); return err; } else { unsigned int version = 0; cuCtxGetApiVersion(*ctx, &version); DEBUG_PRINTF( user_context, "%p (%d)\n", *ctx, version); } return CUDA_SUCCESS; }
// Initializes the context used by the default implementation // of halide_acquire_context. static int create_context(void *user_context, cl_context *ctx, cl_command_queue *q) { DEBUG_PRINTF( user_context, " create_context (user_context: %p)\n", user_context ); halide_assert(user_context, ctx != NULL && *ctx == NULL); halide_assert(user_context, q != NULL && *q == NULL); cl_int err = 0; const cl_uint maxPlatforms = 4; cl_platform_id platforms[maxPlatforms]; cl_uint platformCount = 0; err = clGetPlatformIDs( maxPlatforms, platforms, &platformCount ); if (err != CL_SUCCESS) { halide_error_varargs(user_context, "CL: clGetPlatformIDs failed (%d)\n", err); return err; } cl_platform_id platform = NULL; // Find the requested platform, or the first if none specified. const char * name = getenv("HL_OCL_PLATFORM_NAME"); if (name != NULL) { for (cl_uint i = 0; i < platformCount; ++i) { const cl_uint maxPlatformName = 256; char platformName[maxPlatformName]; err = clGetPlatformInfo( platforms[i], CL_PLATFORM_NAME, maxPlatformName, platformName, NULL ); if (err != CL_SUCCESS) continue; // A platform matches the request if it is a substring of the platform name. if (strstr(platformName, name)) { platform = platforms[i]; break; } } } else if (platformCount > 0) { platform = platforms[0]; } if (platform == NULL){ halide_error(user_context, "CL: Failed to find platform\n"); return CL_INVALID_PLATFORM; } #ifdef DEBUG const cl_uint maxPlatformName = 256; char platformName[maxPlatformName]; err = clGetPlatformInfo( platform, CL_PLATFORM_NAME, maxPlatformName, platformName, NULL ); if (err != CL_SUCCESS) { halide_printf(user_context, " clGetPlatformInfo(CL_PLATFORM_NAME) failed (%d)\n", err); // This is just debug info, report the error but don't fail context creation due to it. //return err; } else { halide_printf(user_context, " Got platform '%s', about to create context (t=%lld)\n", platformName, (long long)halide_current_time_ns(user_context)); } #endif // Get the types of devices requested. cl_device_type device_type = 0; const char * dev_type = getenv("HL_OCL_DEVICE_TYPE"); if (dev_type != NULL) { if (strstr("cpu", dev_type)) { device_type |= CL_DEVICE_TYPE_CPU; } if (strstr("gpu", dev_type)) { device_type |= CL_DEVICE_TYPE_GPU; } } // If no device types are specified, use all the available // devices. if (device_type == 0) { device_type = CL_DEVICE_TYPE_ALL; } // Get all the devices of the specified type. const cl_uint maxDevices = 4; cl_device_id devices[maxDevices]; cl_uint deviceCount = 0; err = clGetDeviceIDs( platform, device_type, maxDevices, devices, &deviceCount ); if (err != CL_SUCCESS) { halide_error_varargs(user_context, "CL: clGetDeviceIDs failed (%d)\n", err); return err; } // If the user indicated a specific device index to use, use // that. Note that this is an index within the set of devices // specified by the device type. char *device_str = getenv("HL_GPU_DEVICE"); cl_uint device = deviceCount - 1; if (device_str) { device = atoi(device_str); } if (device >= deviceCount) { halide_error_varargs(user_context, "CL: Failed to get device %i\n", device); return CL_DEVICE_NOT_FOUND; } cl_device_id dev = devices[device]; #ifdef DEBUG const cl_uint maxDeviceName = 256; char deviceName[maxDeviceName]; err = clGetDeviceInfo( dev, CL_DEVICE_NAME, maxDeviceName, deviceName, NULL ); if (err != CL_SUCCESS) { halide_printf(user_context, " clGetDeviceInfo(CL_DEVICE_NAME) failed (%d)\n", err); // This is just debug info, report the error but don't fail context create if it fails. //return err; } else { halide_printf(user_context, " Got device '%s'\n", deviceName); } #endif // Create context and command queue. cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; DEBUG_PRINTF( user_context, " clCreateContext -> " ); *ctx = clCreateContext(properties, 1, &dev, NULL, NULL, &err); if (err != CL_SUCCESS) { DEBUG_PRINTF( user_context, "%d", err); halide_error_varargs(user_context, "CL: clCreateContext failed (%d)\n", err); return err; } else { DEBUG_PRINTF( user_context, "%p\n", *ctx ); } DEBUG_PRINTF(user_context, " clCreateCommandQueue "); *q = clCreateCommandQueue(*ctx, dev, 0, &err); if (err != CL_SUCCESS) { DEBUG_PRINTF( user_context, "%d", err ); halide_error_varargs(user_context, "CL: clCreateCommandQueue failed (%d)\n", err); return err; } else { DEBUG_PRINTF( user_context, "%p\n", *q ); } return err; }
WEAK void __stack_chk_fail() { halide_error(NULL, "Memory error: stack smashing protector changed!\n"); abort(); }
// Initialize OpenGL WEAK int halide_opengl_create_context(void *user_context) { const int desired_major_version = 3; const int desired_minor_version = 2; if (glXGetCurrentContext()) { // Already have a context return 0; } void *dpy = XOpenDisplay(NULL); if (!dpy) { halide_error(user_context, "Could not open X11 display.\n"); return -1; } // GLX supported? if (!glXQueryExtension(dpy, NULL, NULL)) { halide_error(user_context, "GLX not supported by X server.\n"); return -1; } int screen = XDefaultScreen(dpy); int attribs[] = { GLX_RENDER_TYPE, GLX_RGBA_BIT, GLX_RED_SIZE, 8, GLX_GREEN_SIZE, 8, GLX_BLUE_SIZE, 8, GLX_ALPHA_SIZE, 8, 0 }; int num_configs = 0; void** fbconfigs = glXChooseFBConfig(dpy, screen, attribs, &num_configs); if (!num_configs) { halide_error(user_context, "Could not get framebuffer config.\n"); return -1; } void *fbconfig = fbconfigs[0]; const char *glxexts = glXQueryExtensionsString(dpy, screen); void *share_list = NULL; int direct = 1; void *context = NULL; glXCreateContextAttribsARBProc glXCreateContextAttribsARB = 0; glXCreateContextAttribsARB = (glXCreateContextAttribsARBProc) glXGetProcAddressARB("glXCreateContextAttribsARB"); if (glx_extension_supported(glxexts, "GLX_ARB_create_context") && glXCreateContextAttribsARB) { int context_attribs[] = { GLX_CONTEXT_MAJOR_VERSION_ARB, desired_major_version, GLX_CONTEXT_MINOR_VERSION_ARB, desired_minor_version, 0 }; context = glXCreateContextAttribsARB(dpy, fbconfig, share_list, direct, context_attribs); } if (!context) { // Open a legacy context context = glXCreateNewContext(dpy, fbconfig, GLX_RGBA_TYPE, share_list, direct); } if (!context) { halide_error(user_context, "Could not create OpenGL context.\n"); return -1; } int pbuffer_attribs[] = { 0x8041 /* GLX_PBUFFER_WIDTH */, 32, 0x8040 /* GLX_PBUFFER_HEIGHT */, 32, 0 }; unsigned long pbuffer = glXCreatePbuffer(dpy, fbconfig, pbuffer_attribs); XFree(fbconfigs); XSync(dpy, 0); if (!glXMakeContextCurrent(dpy, pbuffer, pbuffer, context)) { halide_error(user_context, "Could not make context current.\n"); return -1; } return 0; }
WEAK int halide_error_out_of_memory(void *user_context) { // The error message builder uses malloc, so we can't use it here. halide_error(user_context, "Out of memory (halide_malloc returned NULL)"); return halide_error_code_out_of_memory; }
// Initialize OpenGL WEAK int halide_opengl_create_context(void *user_context) { if (glXGetCurrentContext()) { // Already have a context return 0; } void *dpy = XOpenDisplay(NULL); if (!dpy) { halide_error(user_context, "Could not open X11 display.\n"); return 1; } // GLX supported? if (!glXQueryExtension(dpy, NULL, NULL)) { halide_error(user_context, "GLX not supported by X server.\n"); return 1; } int screen = XDefaultScreen(dpy); int attribs[] = { 0x8011 /* GLX_RENDER_TYPE */, 1 /* GLX_RGBA_BIT */, 8 /* GLX_RED_SIZE */, 8, 9 /* GLX_GREEN_SIZE */, 8, 10 /* GLX_BLUE_SIZE */, 8, 11 /* GLX_ALPHA_SIZE */, 8, 0 }; int num_configs = 0; void** fb_config = glXChooseFBConfig(dpy, screen, attribs, &num_configs); if (!num_configs) { halide_error(user_context, "Could not get framebuffer config.\n"); return 1; } void *ctx = glXCreateNewContext(dpy, fb_config[0], 0x8014 /* GLX_RGBA_TYPE */, NULL /* share list */, 1 /* direct */); if (!ctx) { halide_error(user_context, "Could not create OpenGL context.\n"); return 1; } int pbuffer_attribs[] = { 0x8041 /* GLX_PBUFFER_WIDTH */, 32, 0x8040 /* GLX_PBUFFER_HEIGHT */, 32, 0 }; unsigned long pbuffer = glXCreatePbuffer(dpy, fb_config[0], pbuffer_attribs); // clean up: XFree(fb_config); XSync(dpy, 0); if (!glXMakeContextCurrent(dpy, pbuffer, pbuffer, ctx)) { halide_error(user_context, "Could not make context current.\n"); return 1; } return 0; }
EXPORT int halide_opengl_dev_run( void *user_context, void *state_ptr, const char *entry_name, int blocksX, int blocksY, int blocksZ, int threadsX, int threadsY, int threadsZ, int shared_mem_bytes, size_t arg_sizes[], void *args[]) { CHECK_INITIALIZED(1); HalideOpenGLKernel *kernel = halide_opengl_find_kernel(entry_name); if (!kernel) { halide_printf(user_context, "Could not find a kernel named '%s'\n", entry_name); return 1; } ST.UseProgram(kernel->program_id); HalideOpenGLArgument *kernel_arg; // Copy input arguments to corresponding GLSL uniforms. GLint num_active_textures = 0; kernel_arg = kernel->arguments; for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) { if (!kernel_arg) { halide_printf(user_context, "Argument %d: size=%d value=%p\n", i, arg_sizes[i], args[i]); halide_error(user_context, "Too many arguments passed to halide_opengl_dev_run"); return 1; } if (kernel_arg->kind == ARGKIND_OUTBUF) { // Outbuf textures are handled explicitly below continue; } else if (kernel_arg->kind == ARGKIND_INBUF) { GLint loc = ST.GetUniformLocation(kernel->program_id, kernel_arg->name); if (loc == -1) { halide_error(user_context, "No sampler defined for input texture.\n"); return 1; } GLuint tex = *((GLuint *)args[i]); ST.ActiveTexture(GL_TEXTURE0 + num_active_textures); ST.BindTexture(GL_TEXTURE_2D, tex); ST.Uniform1iv(loc, 1, &num_active_textures); num_active_textures++; // TODO: check maximum number of active textures } else if (kernel_arg->kind == ARGKIND_VAR) { GLint loc = ST.GetUniformLocation(kernel->program_id, kernel_arg->name); if (loc == -1) { // Argument was probably optimized away by GLSL compiler. #ifdef DEBUG halide_printf(user_context, "Ignoring argument '%s'\n", kernel_arg->name); #endif continue; } switch (kernel_arg->type) { case ARGTYPE_INT: #ifdef DEBUG halide_printf(user_context, "Int argument %d (%s): %d\n", i, kernel_arg->name, *((int *)args[i])); #endif ST.Uniform1iv(loc, 1, (GLint *)args[i]); break; case ARGTYPE_FLOAT: { #ifdef DEBUG halide_printf(user_context, "Float argument %d (%s): %g\n", i, kernel_arg->name, *((float *)args[i])); #endif ST.Uniform1fv(loc, 1, (GLfloat *)args[i]); break; } case ARGTYPE_NONE: default: halide_error(user_context, "Unknown kernel argument type"); return 1; } } } if (kernel_arg) { halide_error(user_context, "Too few arguments passed to halide_opengl_dev_run"); return 1; } // Prepare framebuffer for rendering to output textures. GLint output_min[2] = { 0, 0 }; GLint output_extent[2] = { 0, 0 }; ST.BindFramebuffer(GL_FRAMEBUFFER, ST.framebuffer_id); ST.Disable(GL_CULL_FACE); ST.Disable(GL_DEPTH_TEST); GLint num_output_textures = 0; kernel_arg = kernel->arguments; for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) { if (kernel_arg->kind != ARGKIND_OUTBUF) continue; // TODO: GL_MAX_COLOR_ATTACHMENTS if (num_output_textures >= 1) { halide_error(user_context, "OpenGL ES 2.0 only supports one single output texture"); return 1; } GLuint tex = *((GLuint*)args[i]); #ifdef DEBUG halide_printf(user_context, "Output texture %d: %d\n", num_output_textures, tex); #endif ST.FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + num_output_textures, GL_TEXTURE_2D, tex, 0); CHECK_GLERROR(1); HalideOpenGLTexture *texinfo = halide_opengl_find_texture(tex); if (!texinfo) { halide_error(user_context, "Undefined output texture"); return 1; } output_min[0] = texinfo->min[0]; output_min[1] = texinfo->min[1]; output_extent[0] = texinfo->extent[0]; output_extent[1] = texinfo->extent[1]; num_output_textures++; } // TODO: GL_MAX_DRAW_BUFFERS if (num_output_textures == 0) { halide_printf(user_context, "Warning: kernel '%s' has no output\n", kernel->name); // TODO: cleanup return 1; } else { GLenum *draw_buffers = (GLenum*) malloc(num_output_textures * sizeof(GLenum)); for (int i=0; i<num_output_textures; i++) draw_buffers[i] = GL_COLOR_ATTACHMENT0 + i; ST.DrawBuffers(num_output_textures, draw_buffers); CHECK_GLERROR(1); free(draw_buffers); } // Check that framebuffer is set up correctly GLenum status = ST.CheckFramebufferStatus(GL_FRAMEBUFFER); CHECK_GLERROR(1); if (status != GL_FRAMEBUFFER_COMPLETE) { halide_printf(user_context, "Setting up GL framebuffer %d failed (%x)\n", ST.framebuffer_id, status); // TODO: cleanup return 1; } // Set vertex attributes GLint loc = ST.GetUniformLocation(kernel->program_id, "output_extent"); ST.Uniform2iv(loc, 1, output_extent); CHECK_GLERROR(1); loc = ST.GetUniformLocation(kernel->program_id, "output_min"); ST.Uniform2iv(loc, 1, output_min); CHECK_GLERROR(1); // Setup viewport ST.Viewport(0, 0, output_extent[0], output_extent[1]); // Execute shader GLint position = ST.GetAttribLocation(kernel->program_id, "position"); ST.BindBuffer(GL_ARRAY_BUFFER, ST.vertex_buffer); ST.VertexAttribPointer(position, 2, GL_FLOAT, GL_FALSE, // normalized? sizeof(GLfloat)*2, NULL); ST.EnableVertexAttribArray(position); ST.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, ST.element_buffer); ST.DrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_INT, NULL); CHECK_GLERROR(1); ST.DisableVertexAttribArray(position); // Cleanup for (int i = 0; i < num_active_textures; i++) { ST.ActiveTexture(GL_TEXTURE0 + i); ST.BindTexture(GL_TEXTURE_2D, 0); } ST.BindFramebuffer(GL_FRAMEBUFFER, 0); return 0; }
// Copy image data from texture back to host memory. EXPORT int halide_opengl_copy_to_host(void *user_context, buffer_t *buf) { CHECK_INITIALIZED(1); if (!buf->dev_dirty) { return 0; } if (!buf->host || !buf->dev) { #ifdef DEBUG print_buffer(user_context, buf); #endif halide_error(user_context, "Invalid copy_to_host operation"); return 1; } GLuint tex = get_texture_id(buf); #ifdef DEBUG halide_printf(user_context, "halide_copy_to_host: %d\n", tex); #endif GLint format; GLint type; if (!get_texture_format(user_context, buf, &format, &type)) { halide_error(user_context, "Invalid texture format\n"); return 1; } GLint width = buf->extent[0]; GLint height = buf->extent[1]; ST.BindTexture(GL_TEXTURE_2D, tex); CHECK_GLERROR(1); bool is_interleaved = (buf->stride[2] == 1 && buf->stride[0] == buf->extent[2]); if (is_interleaved) { // TODO: GL_UNPACK_ROW_LENGTH ST.PixelStorei(GL_PACK_ROW_LENGTH, buf->extent[1]); ST.PixelStorei(GL_PACK_ALIGNMENT, 1); ST.GetTexImage(GL_TEXTURE_2D, 0, format, type, buf->host); CHECK_GLERROR(1); } else { #ifdef DEBUG halide_printf(user_context, "Warning: In copy_to_host, host buffer is not interleaved. Doing slow deinterleave.\n"); #endif size_t size = width * height * buf->extent[2] * buf->elem_size; uint8_t *tmp = (uint8_t*)halide_malloc(user_context, size); ST.PixelStorei(GL_PACK_ALIGNMENT, 1); ST.GetTexImage(GL_TEXTURE_2D, 0, format, type, tmp); CHECK_GLERROR(1); switch (type) { case GL_UNSIGNED_BYTE: interleaved_to_halide<uint8_t>(buf, (uint8_t*)tmp, width, height, buf->extent[2]); break; case GL_UNSIGNED_SHORT: interleaved_to_halide<uint16_t>(buf, (uint16_t*)tmp, width, height, buf->extent[2]); break; case GL_FLOAT: interleaved_to_halide<float>(buf, (float*)tmp, width, height, buf->extent[2]); break; } halide_free(user_context, tmp); } ST.BindTexture(GL_TEXTURE_2D, 0); buf->dev_dirty = false; return 0; }
// Allocate a new texture matching the dimension and color format of the // specified buffer. EXPORT int halide_opengl_dev_malloc(void *user_context, buffer_t *buf) { if (int error = halide_opengl_init(user_context)) return error; if (!buf) { halide_error(user_context, "Invalid buffer"); return 1; } // If the texture was already created by the host application, check that // it has the correct format. Otherwise, allocate and set up an // appropriate texture. GLuint tex = get_texture_id(buf); bool halide_allocated = false; GLint format = 0; GLint width, height; if (tex != 0) { ST.BindTexture(GL_TEXTURE_2D, tex); ST.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &width); ST.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &height); CHECK_GLERROR(1); if (width < buf->extent[0] || height < buf->extent[1]) { #ifdef DEBUG halide_printf(user_context, "Texture size: %dx%d, buffer size: %dx%d\n", width, height, buf->extent[0], buf->extent[1]); #endif halide_error(user_context, "Existing texture is smaller than buffer"); return 1; } } else { if (buf->extent[3] > 1) { halide_error(user_context, "3D textures are not supported"); return 1; } // Generate texture ID ST.GenTextures(1, &tex); CHECK_GLERROR(1); // Set parameters for this texture: no interpolation and clamp to edges. ST.BindTexture(GL_TEXTURE_2D, tex); ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); CHECK_GLERROR(1); // Create empty texture here and fill it with glTexSubImage2D later. GLint type = GL_UNSIGNED_BYTE; if (!get_texture_format(user_context, buf, &format, &type)) { halide_error(user_context, "Invalid texture format\n"); return 1; } width = buf->extent[0]; height = buf->extent[1]; ST.TexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, format, type, NULL); CHECK_GLERROR(1); buf->dev = tex; halide_allocated = true; #ifdef DEBUG halide_printf(user_context, "Allocated texture %d of size %d x %d\n", tex, width, height); #endif ST.BindTexture(GL_TEXTURE_2D, 0); } // Record main information about texture and remember it for later. In // halide_opengl_dev_run we are only given the texture ID and not the full // buffer_t, so we copy the interesting information here. HalideOpenGLTexture *texinfo = (HalideOpenGLTexture*) malloc(sizeof(HalideOpenGLTexture)); texinfo->id = tex; for (int i=0; i<3; i++) { texinfo->min[i] = buf->min[i]; texinfo->extent[i] = buf->extent[i]; } texinfo->format = format; texinfo->halide_allocated = halide_allocated; texinfo->next = ST.textures; ST.textures = texinfo; return 0; }
// Create HalideOpenGLKernel for a piece of GLSL code static HalideOpenGLKernel *create_kernel(void *user_context, const char *src, int size) { HalideOpenGLKernel *kernel = (HalideOpenGLKernel *)malloc(sizeof(HalideOpenGLKernel)); kernel->source = strndup(src, size); kernel->name = NULL; kernel->arguments = NULL; kernel->shader_id = 0; kernel->program_id = 0; kernel->next = NULL; #ifdef DEBUG halide_printf(user_context, "Compiling GLSL kernel:\n%s\n", kernel->source); #endif // Parse initial comment block const char *line = kernel->source; while (*line) { const char *next_line = strchr(line, '\n') + 1; if (!next_line) next_line = line + size; const char *args; if ((args = match_prefix(line, kernel_marker))) { kernel->name = strndup(args, next_line - args - 1); } else if ((args = match_prefix(line, var_marker))) { if (HalideOpenGLArgument *arg = parse_argument(user_context, args, next_line - 1)) { arg->kind = ARGKIND_VAR; arg->next = kernel->arguments; kernel->arguments = arg; } } else if ((args = match_prefix(line, input_marker))) { if (HalideOpenGLArgument *arg = parse_argument(user_context, args, next_line - 1)) { arg->kind = ARGKIND_INBUF; arg->next = kernel->arguments; kernel->arguments = arg; } } else if ((args = match_prefix(line, output_marker))) { if (HalideOpenGLArgument *arg = parse_argument(user_context, args, next_line - 1)) { arg->kind = ARGKIND_OUTBUF; arg->next = kernel->arguments; kernel->arguments = arg; } } else { // Stop parsing if we encounter something we don't recognize break; } line = next_line; } if (!kernel->name) { halide_error(user_context, "Internal error: kernel name not specified"); return NULL; } // Arguments are currently in reverse order, flip the list. HalideOpenGLArgument *cur = kernel->arguments; kernel->arguments = NULL; while (cur) { HalideOpenGLArgument *next = cur->next; cur->next = kernel->arguments; kernel->arguments = cur; cur = next; } return kernel; }