bool brwCreateContext(int api, const struct gl_config *mesaVis, __DRIcontext *driContextPriv, unsigned major_version, unsigned minor_version, uint32_t flags, unsigned *error, void *sharedContextPrivate) { __DRIscreen *sPriv = driContextPriv->driScreenPriv; struct intel_screen *screen = sPriv->driverPrivate; struct dd_function_table functions; struct brw_context *brw = rzalloc(NULL, struct brw_context); if (!brw) { printf("%s: failed to alloc context\n", __FUNCTION__); *error = __DRI_CTX_ERROR_NO_MEMORY; return false; } /* brwInitVtbl needs to know the chipset generation so that it can set the * right pointers. */ brw->gen = screen->gen; brwInitVtbl( brw ); brwInitDriverFunctions(screen, &functions); struct gl_context *ctx = &brw->ctx; if (!intelInitContext( brw, api, major_version, minor_version, mesaVis, driContextPriv, sharedContextPrivate, &functions, error)) { ralloc_free(brw); return false; } brw_initialize_context_constants(brw); /* Reinitialize the context point state. It depends on ctx->Const values. */ _mesa_init_point(ctx); if (brw->gen >= 6) { /* Create a new hardware context. Using a hardware context means that * our GPU state will be saved/restored on context switch, allowing us * to assume that the GPU is in the same state we left it in. * * This is required for transform feedback buffer offsets, query objects, * and also allows us to reduce how much state we have to emit. */ brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr); if (!brw->hw_ctx) { fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n"); ralloc_free(brw); return false; } } brw_init_surface_formats(brw); /* Initialize swrast, tnl driver tables: */ TNLcontext *tnl = TNL_CONTEXT(ctx); if (tnl) tnl->Driver.RunPipeline = _tnl_run_pipeline; ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK; ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD; ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER; if (brw->is_g4x || brw->gen >= 5) { brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; brw->has_surface_tile_offset = true; if (brw->gen < 6) brw->has_compr4 = true; brw->has_aa_line_parameters = true; brw->has_pln = true; } else { brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; } /* WM maximum threads is number of EUs times number of threads per EU. */ assert(brw->gen <= 7); if (brw->is_haswell) { if (brw->gt == 1) { brw->max_wm_threads = 102; brw->max_vs_threads = 70; brw->max_gs_threads = 70; brw->urb.size = 128; brw->urb.min_vs_entries = 32; brw->urb.max_vs_entries = 640; brw->urb.max_gs_entries = 256; } else if (brw->gt == 2) { brw->max_wm_threads = 204; brw->max_vs_threads = 280; brw->max_gs_threads = 256; brw->urb.size = 256; brw->urb.min_vs_entries = 64; brw->urb.max_vs_entries = 1664; brw->urb.max_gs_entries = 640; } else if (brw->gt == 3) { brw->max_wm_threads = 408; brw->max_vs_threads = 280; brw->max_gs_threads = 256; brw->urb.size = 512; brw->urb.min_vs_entries = 64; brw->urb.max_vs_entries = 1664; brw->urb.max_gs_entries = 640; } } else if (brw->gen == 7) { if (brw->gt == 1) { brw->max_wm_threads = 48; brw->max_vs_threads = 36; brw->max_gs_threads = 36; brw->urb.size = 128; brw->urb.min_vs_entries = 32; brw->urb.max_vs_entries = 512; brw->urb.max_gs_entries = 192; } else if (brw->gt == 2) { brw->max_wm_threads = 172; brw->max_vs_threads = 128; brw->max_gs_threads = 128; brw->urb.size = 256; brw->urb.min_vs_entries = 32; brw->urb.max_vs_entries = 704; brw->urb.max_gs_entries = 320; } else { assert(!"Unknown gen7 device."); } } else if (brw->gen == 6) { if (brw->gt == 2) { brw->max_wm_threads = 80; brw->max_vs_threads = 60; brw->max_gs_threads = 60; brw->urb.size = 64; /* volume 5c.5 section 5.1 */ brw->urb.min_vs_entries = 24; brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */ brw->urb.max_gs_entries = 256; } else { brw->max_wm_threads = 40; brw->max_vs_threads = 24; brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */ brw->urb.size = 32; /* volume 5c.5 section 5.1 */ brw->urb.min_vs_entries = 24; brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */ brw->urb.max_gs_entries = 256; } brw->urb.gen6_gs_previously_active = false; } else if (brw->gen == 5) { brw->urb.size = 1024; brw->max_vs_threads = 72; brw->max_gs_threads = 32; brw->max_wm_threads = 12 * 6; } else if (brw->is_g4x) { brw->urb.size = 384; brw->max_vs_threads = 32; brw->max_gs_threads = 2; brw->max_wm_threads = 10 * 5; } else if (brw->gen < 6) { brw->urb.size = 256; brw->max_vs_threads = 16; brw->max_gs_threads = 2; brw->max_wm_threads = 8 * 4; brw->has_negative_rhw_bug = true; } if (brw->gen <= 7) { brw->needs_unlit_centroid_workaround = true; } brw->prim_restart.in_progress = false; brw->prim_restart.enable_cut_index = false; brw_init_state( brw ); if (brw->gen < 6) { brw->curbe.last_buf = calloc(1, 4096); brw->curbe.next_buf = calloc(1, 4096); } brw->state.dirty.mesa = ~0; brw->state.dirty.brw = ~0; /* Make sure that brw->state.dirty.brw has enough bits to hold all possible * dirty flags. */ STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw)); brw->emit_state_always = 0; brw->batch.need_workaround_flush = true; ctx->VertexProgram._MaintainTnlProgram = true; ctx->FragmentProgram._MaintainTexEnvProgram = true; brw_draw_init( brw ); brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile"); ctx->Const.ContextFlags = 0; if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0) ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT; ctx->Debug.DebugOutput = GL_FALSE; if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) { ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT; ctx->Debug.DebugOutput = GL_TRUE; /* Turn on some extra GL_ARB_debug_output generation. */ brw->perf_debug = true; } brw_fs_alloc_reg_sets(brw); brw_vec4_alloc_reg_set(brw); if (INTEL_DEBUG & DEBUG_SHADER_TIME) brw_init_shader_time(brw); _mesa_compute_version(ctx); _mesa_initialize_dispatch_tables(ctx); _mesa_initialize_vbo_vtxfmt(ctx); return true; }
struct brw_compiler * brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo) { struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); compiler->devinfo = devinfo; brw_fs_alloc_reg_sets(compiler); brw_vec4_alloc_reg_set(compiler); compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false); compiler->scalar_stage[MESA_SHADER_VERTEX] = devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TCS", true); compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true); compiler->scalar_stage[MESA_SHADER_GEOMETRY] = devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true); compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; compiler->glsl_compiler_options[i].MaxIfDepth = devinfo->gen < 6 ? 16 : UINT_MAX; compiler->glsl_compiler_options[i].EmitNoNoise = true; compiler->glsl_compiler_options[i].EmitNoMainReturn = true; compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; compiler->glsl_compiler_options[i].LowerCombinedClipCullDistance = true; bool is_scalar = compiler->scalar_stage[i]; compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar; compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar; compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar; /* !ARB_gpu_shader5 */ if (devinfo->gen < 7) compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; if (is_scalar) { compiler->glsl_compiler_options[i].NirOptions = &scalar_nir_options; } else { compiler->glsl_compiler_options[i].NirOptions = devinfo->gen < 6 ? &vector_nir_options : &vector_nir_options_gen6; } compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; compiler->glsl_compiler_options[i].ClampBlockIndicesToArrayBounds = true; } compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false; compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false; compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectOutput = false; if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; compiler->glsl_compiler_options[MESA_SHADER_COMPUTE] .LowerShaderSharedVariables = true; return compiler; }
struct brw_compiler * brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) { struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); compiler->devinfo = devinfo; compiler->shader_debug_log = shader_debug_log_mesa; compiler->shader_perf_log = shader_perf_log_mesa; brw_fs_alloc_reg_sets(compiler); brw_vec4_alloc_reg_set(compiler); compiler->scalar_stage[MESA_SHADER_VERTEX] = devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); compiler->scalar_stage[MESA_SHADER_GEOMETRY] = devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false); compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; nir_shader_compiler_options *nir_options = rzalloc(compiler, nir_shader_compiler_options); nir_options->native_integers = true; /* In order to help allow for better CSE at the NIR level we tell NIR * to split all ffma instructions during opt_algebraic and we then * re-combine them as a later step. */ nir_options->lower_ffma = true; nir_options->lower_sub = true; /* In the vec4 backend, our dpN instruction replicates its result to all * the components of a vec4. We would like NIR to give us replicated fdot * instructions because it can optimize better for us. * * For the FS backend, it should be lowered away by the scalarizing pass so * we should never see fdot anyway. */ nir_options->fdot_replicates = true; /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; compiler->glsl_compiler_options[i].MaxIfDepth = devinfo->gen < 6 ? 16 : UINT_MAX; compiler->glsl_compiler_options[i].EmitCondCodes = true; compiler->glsl_compiler_options[i].EmitNoNoise = true; compiler->glsl_compiler_options[i].EmitNoMainReturn = true; compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; compiler->glsl_compiler_options[i].LowerClipDistance = true; bool is_scalar = compiler->scalar_stage[i]; compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar; compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar; compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar; /* !ARB_gpu_shader5 */ if (devinfo->gen < 7) compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; compiler->glsl_compiler_options[i].NirOptions = nir_options; compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; } if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; compiler->glsl_compiler_options[MESA_SHADER_COMPUTE] .LowerShaderSharedVariables = true; return compiler; }
GLboolean brwCreateContext(gl_api api, const struct gl_config *mesaVis, __DRIcontext *driContextPriv, unsigned major_version, unsigned minor_version, uint32_t flags, bool notify_reset, unsigned *dri_ctx_error, void *sharedContextPrivate) { __DRIscreen *sPriv = driContextPriv->driScreenPriv; struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; struct intel_screen *screen = sPriv->driverPrivate; const struct brw_device_info *devinfo = screen->devinfo; struct dd_function_table functions; struct gl_config visual; if (flags & ~(__DRI_CTX_FLAG_DEBUG | __DRI_CTX_FLAG_FORWARD_COMPATIBLE | __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS)) { *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG; return false; } struct brw_context *brw = rzalloc(NULL, struct brw_context); if (!brw) { printf("%s: failed to alloc context\n", __FUNCTION__); *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; return false; } driContextPriv->driverPrivate = brw; brw->driContext = driContextPriv; brw->intelScreen = screen; brw->bufmgr = screen->bufmgr; brw->gen = devinfo->gen; brw->gt = devinfo->gt; brw->is_g4x = devinfo->is_g4x; brw->is_baytrail = devinfo->is_baytrail; brw->is_haswell = devinfo->is_haswell; brw->has_llc = devinfo->has_llc; brw->has_hiz = devinfo->has_hiz_and_separate_stencil; brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; brw->has_pln = devinfo->has_pln; brw->has_compr4 = devinfo->has_compr4; brw->has_surface_tile_offset = devinfo->has_surface_tile_offset; brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug; brw->needs_unlit_centroid_workaround = devinfo->needs_unlit_centroid_workaround; brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil; brw->has_swizzling = screen->hw_has_swizzling; if (brw->gen >= 7) { gen7_init_vtable_surface_functions(brw); gen7_init_vtable_sampler_functions(brw); brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz; } else { gen4_init_vtable_surface_functions(brw); gen4_init_vtable_sampler_functions(brw); brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz; } brw_init_driver_functions(brw, &functions); if (notify_reset) functions.GetGraphicsResetStatus = brw_get_graphics_reset_status; struct gl_context *ctx = &brw->ctx; if (mesaVis == NULL) { memset(&visual, 0, sizeof visual); mesaVis = &visual; } if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) { *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; printf("%s: failed to init mesa context\n", __FUNCTION__); intelDestroyContext(driContextPriv); return false; } /* Initialize the software rasterizer and helper modules. * * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for * software fallbacks (which we have to support on legacy GL to do weird * glDrawPixels(), glBitmap(), and other functions). */ if (api != API_OPENGL_CORE && api != API_OPENGLES2) { _swrast_CreateContext(ctx); } _vbo_CreateContext(ctx); if (ctx->swrast_context) { _tnl_CreateContext(ctx); TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; _swsetup_CreateContext(ctx); /* Configure swrast to match hardware characteristics: */ _swrast_allow_pixel_fog(ctx, false); _swrast_allow_vertex_fog(ctx, true); } _mesa_meta_init(ctx); brw_process_driconf_options(brw); brw_process_intel_debug_variable(brw); brw_initialize_context_constants(brw); ctx->Const.ResetStrategy = notify_reset ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB; /* Reinitialize the context point state. It depends on ctx->Const values. */ _mesa_init_point(ctx); intel_batchbuffer_init(brw); brw_init_state(brw); intelInitExtensions(ctx); intel_fbo_init(brw); if (brw->gen >= 6) { /* Create a new hardware context. Using a hardware context means that * our GPU state will be saved/restored on context switch, allowing us * to assume that the GPU is in the same state we left it in. * * This is required for transform feedback buffer offsets, query objects, * and also allows us to reduce how much state we have to emit. */ brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr); if (!brw->hw_ctx) { fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n"); intelDestroyContext(driContextPriv); return false; } } /* Notification of GPU resets requires hardware contexts and a kernel new * enough to support DRM_IOCTL_I915_GET_RESET_STATS, which isn't upstream * yet. */ if (notify_reset) { /* This is the wrong error code, but the correct error code (one that * will cause EGL to generate EGL_BAD_MATCH) doesn't seem to exist. */ *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE; intelDestroyContext(driContextPriv); return false; } brw_init_surface_formats(brw); if (brw->is_g4x || brw->gen >= 5) { brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; } else { brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; } brw->max_vs_threads = devinfo->max_vs_threads; brw->max_gs_threads = devinfo->max_gs_threads; brw->max_wm_threads = devinfo->max_wm_threads; brw->urb.size = devinfo->urb.size; brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; /* Estimate the size of the mappable aperture into the GTT. There's an * ioctl to get the whole GTT size, but not one to get the mappable subset. * It turns out it's basically always 256MB, though some ancient hardware * was smaller. */ uint32_t gtt_size = 256 * 1024 * 1024; /* We don't want to map two objects such that a memcpy between them would * just fault one mapping in and then the other over and over forever. So * we would need to divide the GTT size by 2. Additionally, some GTT is * taken up by things like the framebuffer and the ringbuffer and such, so * be more conservative. */ brw->max_gtt_map_object_size = gtt_size / 4; if (brw->gen == 6) brw->urb.gen6_gs_previously_active = false; brw->prim_restart.in_progress = false; brw->prim_restart.enable_cut_index = false; if (brw->gen < 6) { brw->curbe.last_buf = calloc(1, 4096); brw->curbe.next_buf = calloc(1, 4096); } ctx->VertexProgram._MaintainTnlProgram = true; ctx->FragmentProgram._MaintainTexEnvProgram = true; brw_draw_init( brw ); if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) { /* Turn on some extra GL_ARB_debug_output generation. */ brw->perf_debug = true; } if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; brw_fs_alloc_reg_sets(brw); brw_vec4_alloc_reg_set(brw); if (INTEL_DEBUG & DEBUG_SHADER_TIME) brw_init_shader_time(brw); _mesa_compute_version(ctx); /* Here we override context constants. We apply the overrides after * calculation of the context version because we do not want the overridden * constants to change the version. */ brw_override_max_samples(brw); _mesa_initialize_dispatch_tables(ctx); _mesa_initialize_vbo_vtxfmt(ctx); return true; }