GLboolean
brwCreateContext(gl_api api,
	         const struct gl_config *mesaVis,
		 __DRIcontext *driContextPriv,
                 unsigned major_version,
                 unsigned minor_version,
                 uint32_t flags,
                 bool notify_reset,
                 unsigned *dri_ctx_error,
	         void *sharedContextPrivate)
{
   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
   struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
   struct intel_screen *screen = sPriv->driverPrivate;
   const struct brw_device_info *devinfo = screen->devinfo;
   struct dd_function_table functions;

   /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
    * provides us with context reset notifications.
    */
   uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
      | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;

   if (screen->has_context_reset_notification)
      allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;

   if (flags & ~allowed_flags) {
      *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
      return false;
   }

   struct brw_context *brw = rzalloc(NULL, struct brw_context);
   if (!brw) {
      fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__);
      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
      return false;
   }

   driContextPriv->driverPrivate = brw;
   brw->driContext = driContextPriv;
   brw->intelScreen = screen;
   brw->bufmgr = screen->bufmgr;

   brw->gen = devinfo->gen;
   brw->gt = devinfo->gt;
   brw->is_g4x = devinfo->is_g4x;
   brw->is_baytrail = devinfo->is_baytrail;
   brw->is_haswell = devinfo->is_haswell;
   brw->has_llc = devinfo->has_llc;
   brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
   brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
   brw->has_pln = devinfo->has_pln;
   brw->has_compr4 = devinfo->has_compr4;
   brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
   brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
   brw->needs_unlit_centroid_workaround =
      devinfo->needs_unlit_centroid_workaround;

   brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
   brw->has_swizzling = screen->hw_has_swizzling;

   brw->vs.base.stage = MESA_SHADER_VERTEX;
   brw->gs.base.stage = MESA_SHADER_GEOMETRY;
   brw->wm.base.stage = MESA_SHADER_FRAGMENT;
   if (brw->gen >= 8) {
      gen8_init_vtable_surface_functions(brw);
      gen7_init_vtable_sampler_functions(brw);
      brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
   } else if (brw->gen >= 7) {
      gen7_init_vtable_surface_functions(brw);
      gen7_init_vtable_sampler_functions(brw);
      brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
   } else {
      gen4_init_vtable_surface_functions(brw);
      gen4_init_vtable_sampler_functions(brw);
      brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
   }

   brw_init_driver_functions(brw, &functions);

   if (notify_reset)
      functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;

   struct gl_context *ctx = &brw->ctx;

   if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
      fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__);
      intelDestroyContext(driContextPriv);
      return false;
   }

   driContextSetFlags(ctx, flags);

   /* Initialize the software rasterizer and helper modules.
    *
    * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
    * software fallbacks (which we have to support on legacy GL to do weird
    * glDrawPixels(), glBitmap(), and other functions).
    */
   if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
      _swrast_CreateContext(ctx);
   }

   _vbo_CreateContext(ctx);
   if (ctx->swrast_context) {
      _tnl_CreateContext(ctx);
      TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
      _swsetup_CreateContext(ctx);

      /* Configure swrast to match hardware characteristics: */
      _swrast_allow_pixel_fog(ctx, false);
      _swrast_allow_vertex_fog(ctx, true);
   }

   _mesa_meta_init(ctx);

   brw_process_driconf_options(brw);
   brw_process_intel_debug_variable(brw);
   brw_initialize_context_constants(brw);

   ctx->Const.ResetStrategy = notify_reset
      ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;

   /* Reinitialize the context point state.  It depends on ctx->Const values. */
   _mesa_init_point(ctx);

   intel_fbo_init(brw);

   intel_batchbuffer_init(brw);

   if (brw->gen >= 6) {
      /* Create a new hardware context.  Using a hardware context means that
       * our GPU state will be saved/restored on context switch, allowing us
       * to assume that the GPU is in the same state we left it in.
       *
       * This is required for transform feedback buffer offsets, query objects,
       * and also allows us to reduce how much state we have to emit.
       */
      brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);

      if (!brw->hw_ctx) {
         fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
         intelDestroyContext(driContextPriv);
         return false;
      }
   }

   brw_init_state(brw);

   intelInitExtensions(ctx);

   brw_init_surface_formats(brw);

   brw->max_vs_threads = devinfo->max_vs_threads;
   brw->max_gs_threads = devinfo->max_gs_threads;
   brw->max_wm_threads = devinfo->max_wm_threads;
   brw->urb.size = devinfo->urb.size;
   brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
   brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
   brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;

   /* Estimate the size of the mappable aperture into the GTT.  There's an
    * ioctl to get the whole GTT size, but not one to get the mappable subset.
    * It turns out it's basically always 256MB, though some ancient hardware
    * was smaller.
    */
   uint32_t gtt_size = 256 * 1024 * 1024;

   /* We don't want to map two objects such that a memcpy between them would
    * just fault one mapping in and then the other over and over forever.  So
    * we would need to divide the GTT size by 2.  Additionally, some GTT is
    * taken up by things like the framebuffer and the ringbuffer and such, so
    * be more conservative.
    */
   brw->max_gtt_map_object_size = gtt_size / 4;

   if (brw->gen == 6)
      brw->urb.gen6_gs_previously_active = false;

   brw->prim_restart.in_progress = false;
   brw->prim_restart.enable_cut_index = false;
   brw->gs.enabled = false;

   if (brw->gen < 6) {
      brw->curbe.last_buf = calloc(1, 4096);
      brw->curbe.next_buf = calloc(1, 4096);
   }

   ctx->VertexProgram._MaintainTnlProgram = true;
   ctx->FragmentProgram._MaintainTexEnvProgram = true;

   brw_draw_init( brw );

   if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
      /* Turn on some extra GL_ARB_debug_output generation. */
      brw->perf_debug = true;
   }

   if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;

   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
      brw_init_shader_time(brw);

   _mesa_compute_version(ctx);

   _mesa_initialize_dispatch_tables(ctx);
   _mesa_initialize_vbo_vtxfmt(ctx);

   if (ctx->Extensions.AMD_performance_monitor) {
      brw_init_performance_monitors(brw);
   }

   vbo_use_buffer_objects(ctx);
   vbo_always_unmap_buffers(ctx);

   return true;
}
Example #2
0
bool
brwCreateContext(int api,
	         const struct gl_config *mesaVis,
		 __DRIcontext *driContextPriv,
                 unsigned major_version,
                 unsigned minor_version,
                 uint32_t flags,
                 unsigned *error,
	         void *sharedContextPrivate)
{
   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
   struct intel_screen *screen = sPriv->driverPrivate;
   struct dd_function_table functions;

   struct brw_context *brw = rzalloc(NULL, struct brw_context);
   if (!brw) {
      printf("%s: failed to alloc context\n", __FUNCTION__);
      *error = __DRI_CTX_ERROR_NO_MEMORY;
      return false;
   }

   /* brwInitVtbl needs to know the chipset generation so that it can set the
    * right pointers.
    */
   brw->gen = screen->gen;

   brwInitVtbl( brw );

   brwInitDriverFunctions(screen, &functions);

   struct gl_context *ctx = &brw->ctx;

   if (!intelInitContext( brw, api, major_version, minor_version,
                          mesaVis, driContextPriv,
			  sharedContextPrivate, &functions,
			  error)) {
      ralloc_free(brw);
      return false;
   }

   brw_initialize_context_constants(brw);

   /* Reinitialize the context point state.  It depends on ctx->Const values. */
   _mesa_init_point(ctx);

   if (brw->gen >= 6) {
      /* Create a new hardware context.  Using a hardware context means that
       * our GPU state will be saved/restored on context switch, allowing us
       * to assume that the GPU is in the same state we left it in.
       *
       * This is required for transform feedback buffer offsets, query objects,
       * and also allows us to reduce how much state we have to emit.
       */
      brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);

      if (!brw->hw_ctx) {
         fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
         ralloc_free(brw);
         return false;
      }
   }

   brw_init_surface_formats(brw);

   /* Initialize swrast, tnl driver tables: */
   TNLcontext *tnl = TNL_CONTEXT(ctx);
   if (tnl)
      tnl->Driver.RunPipeline = _tnl_run_pipeline;

   ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
   ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
   ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;

   if (brw->is_g4x || brw->gen >= 5) {
      brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
      brw->has_surface_tile_offset = true;
      if (brw->gen < 6)
	  brw->has_compr4 = true;
      brw->has_aa_line_parameters = true;
      brw->has_pln = true;
  } else {
      brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
   }

   /* WM maximum threads is number of EUs times number of threads per EU. */
   assert(brw->gen <= 7);

   if (brw->is_haswell) {
      if (brw->gt == 1) {
	 brw->max_wm_threads = 102;
	 brw->max_vs_threads = 70;
	 brw->max_gs_threads = 70;
	 brw->urb.size = 128;
         brw->urb.min_vs_entries = 32;
	 brw->urb.max_vs_entries = 640;
	 brw->urb.max_gs_entries = 256;
      } else if (brw->gt == 2) {
	 brw->max_wm_threads = 204;
	 brw->max_vs_threads = 280;
	 brw->max_gs_threads = 256;
	 brw->urb.size = 256;
         brw->urb.min_vs_entries = 64;
	 brw->urb.max_vs_entries = 1664;
	 brw->urb.max_gs_entries = 640;
      } else if (brw->gt == 3) {
	 brw->max_wm_threads = 408;
	 brw->max_vs_threads = 280;
	 brw->max_gs_threads = 256;
	 brw->urb.size = 512;
         brw->urb.min_vs_entries = 64;
	 brw->urb.max_vs_entries = 1664;
	 brw->urb.max_gs_entries = 640;
      }
   } else if (brw->gen == 7) {
      if (brw->gt == 1) {
	 brw->max_wm_threads = 48;
	 brw->max_vs_threads = 36;
	 brw->max_gs_threads = 36;
	 brw->urb.size = 128;
         brw->urb.min_vs_entries = 32;
	 brw->urb.max_vs_entries = 512;
	 brw->urb.max_gs_entries = 192;
      } else if (brw->gt == 2) {
	 brw->max_wm_threads = 172;
	 brw->max_vs_threads = 128;
	 brw->max_gs_threads = 128;
	 brw->urb.size = 256;
         brw->urb.min_vs_entries = 32;
	 brw->urb.max_vs_entries = 704;
	 brw->urb.max_gs_entries = 320;
      } else {
	 assert(!"Unknown gen7 device.");
      }
   } else if (brw->gen == 6) {
      if (brw->gt == 2) {
	 brw->max_wm_threads = 80;
	 brw->max_vs_threads = 60;
	 brw->max_gs_threads = 60;
	 brw->urb.size = 64;            /* volume 5c.5 section 5.1 */
         brw->urb.min_vs_entries = 24;
	 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
	 brw->urb.max_gs_entries = 256;
      } else {
	 brw->max_wm_threads = 40;
	 brw->max_vs_threads = 24;
	 brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */
	 brw->urb.size = 32;            /* volume 5c.5 section 5.1 */
         brw->urb.min_vs_entries = 24;
	 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
	 brw->urb.max_gs_entries = 256;
      }
      brw->urb.gen6_gs_previously_active = false;
   } else if (brw->gen == 5) {
      brw->urb.size = 1024;
      brw->max_vs_threads = 72;
      brw->max_gs_threads = 32;
      brw->max_wm_threads = 12 * 6;
   } else if (brw->is_g4x) {
      brw->urb.size = 384;
      brw->max_vs_threads = 32;
      brw->max_gs_threads = 2;
      brw->max_wm_threads = 10 * 5;
   } else if (brw->gen < 6) {
      brw->urb.size = 256;
      brw->max_vs_threads = 16;
      brw->max_gs_threads = 2;
      brw->max_wm_threads = 8 * 4;
      brw->has_negative_rhw_bug = true;
   }

   if (brw->gen <= 7) {
      brw->needs_unlit_centroid_workaround = true;
   }

   brw->prim_restart.in_progress = false;
   brw->prim_restart.enable_cut_index = false;

   brw_init_state( brw );

   if (brw->gen < 6) {
      brw->curbe.last_buf = calloc(1, 4096);
      brw->curbe.next_buf = calloc(1, 4096);
   }

   brw->state.dirty.mesa = ~0;
   brw->state.dirty.brw = ~0;

   /* Make sure that brw->state.dirty.brw has enough bits to hold all possible
    * dirty flags.
    */
   STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw));

   brw->emit_state_always = 0;

   brw->batch.need_workaround_flush = true;

   ctx->VertexProgram._MaintainTnlProgram = true;
   ctx->FragmentProgram._MaintainTexEnvProgram = true;

   brw_draw_init( brw );

   brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");

   ctx->Const.ContextFlags = 0;
   if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0)
      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;

   ctx->Debug.DebugOutput = GL_FALSE;
   if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT;
      ctx->Debug.DebugOutput = GL_TRUE;

      /* Turn on some extra GL_ARB_debug_output generation. */
      brw->perf_debug = true;
   }

   brw_fs_alloc_reg_sets(brw);
   brw_vec4_alloc_reg_set(brw);

   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
      brw_init_shader_time(brw);

   _mesa_compute_version(ctx);

   _mesa_initialize_dispatch_tables(ctx);
   _mesa_initialize_vbo_vtxfmt(ctx);

   return true;
}
Example #3
0
bool
brwCreateContext(int api,
	         const struct gl_config *mesaVis,
		 __DRIcontext *driContextPriv,
	         void *sharedContextPrivate)
{
   struct dd_function_table functions;
   struct brw_context *brw = rzalloc(NULL, struct brw_context);
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   unsigned i;

   if (!brw) {
      printf("%s: failed to alloc context\n", __FUNCTION__);
      return false;
   }

   brwInitDriverFunctions( &functions );

   if (!intelInitContext( intel, api, mesaVis, driContextPriv,
			  sharedContextPrivate, &functions )) {
      printf("%s: failed to init intel context\n", __FUNCTION__);
      FREE(brw);
      return false;
   }

   brwInitVtbl( brw );

   brw_init_surface_formats(brw);

   /* Initialize swrast, tnl driver tables: */
   intelInitSpanFuncs(ctx);

   TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;

   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
   ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
   ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
                                     ctx->Const.MaxTextureImageUnits);
   ctx->Const.MaxVertexTextureImageUnits = BRW_MAX_TEX_UNIT;
   ctx->Const.MaxCombinedTextureImageUnits =
      ctx->Const.MaxVertexTextureImageUnits +
      ctx->Const.MaxTextureImageUnits;

   ctx->Const.MaxTextureLevels = 14; /* 8192 */
   if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
	   ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
   ctx->Const.Max3DTextureLevels = 9;
   ctx->Const.MaxCubeTextureLevels = 12;
   /* minimum maximum.  Users are likely to run into memory problems
    * even at this size, since 64 * 2048 * 2048 * 4 = 1GB and we can't
    * address that much.
    */
   ctx->Const.MaxArrayTextureLayers = 64;
   ctx->Const.MaxTextureRectSize = (1<<12);
   
   ctx->Const.MaxTextureMaxAnisotropy = 16.0;

   /* Hardware only supports a limited number of transform feedback buffers.
    * So we need to override the Mesa default (which is based only on software
    * limits).
    */
   ctx->Const.MaxTransformFeedbackSeparateAttribs = BRW_MAX_SOL_BUFFERS;

   /* On Gen6, in the worst case, we use up one binding table entry per
    * transform feedback component (see comments above the definition of
    * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
    * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
    * BRW_MAX_SOL_BINDINGS.
    *
    * In "separate components" mode, we need to divide this value by
    * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
    * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
    */
   ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
   ctx->Const.MaxTransformFeedbackSeparateComponents =
      BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;

   /* if conformance mode is set, swrast can handle any size AA point */
   ctx->Const.MaxPointSizeAA = 255.0;

   /* We want the GLSL compiler to emit code that uses condition codes */
   for (i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
      ctx->ShaderCompilerOptions[i].MaxIfDepth = intel->gen < 6 ? 16 : UINT_MAX;
      ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
      ctx->ShaderCompilerOptions[i].EmitNVTempInitialization = true;
      ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
      ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
      ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
      ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;

      ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
	 (i == MESA_SHADER_FRAGMENT);
      ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
	 (i == MESA_SHADER_FRAGMENT);
      ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
   }

   ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
   ctx->Const.VertexProgram.MaxAluInstructions = 0;
   ctx->Const.VertexProgram.MaxTexInstructions = 0;
   ctx->Const.VertexProgram.MaxTexIndirections = 0;
   ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
   ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
   ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
   ctx->Const.VertexProgram.MaxNativeAttribs = 16;
   ctx->Const.VertexProgram.MaxNativeTemps = 256;
   ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
   ctx->Const.VertexProgram.MaxNativeParameters = 1024;
   ctx->Const.VertexProgram.MaxEnvParams =
      MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
	   ctx->Const.VertexProgram.MaxEnvParams);

   ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024);
   ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024);
   ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024);
   ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024);
   ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
   ctx->Const.FragmentProgram.MaxNativeTemps = 256;
   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
   ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
   ctx->Const.FragmentProgram.MaxEnvParams =
      MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
	   ctx->Const.FragmentProgram.MaxEnvParams);

   /* Fragment shaders use real, 32-bit twos-complement integers for all
    * integer types.
    */
   ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
   ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
   ctx->Const.FragmentProgram.LowInt.Precision = 0;
   ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.MediumInt
      = ctx->Const.FragmentProgram.LowInt;

   /* Gen6 converts quads to polygon in beginning of 3D pipeline,
      but we're not sure how it's actually done for vertex order,
      that affect provoking vertex decision. Always use last vertex
      convention for quad primitive which works as expected for now. */
   if (intel->gen >= 6)
       ctx->Const.QuadsFollowProvokingVertexConvention = false;

   if (intel->is_g4x || intel->gen >= 5) {
      brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
      brw->has_surface_tile_offset = true;
      if (intel->gen < 6)
	  brw->has_compr4 = true;
      brw->has_aa_line_parameters = true;
      brw->has_pln = true;
  } else {
      brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
   }

   /* WM maximum threads is number of EUs times number of threads per EU. */
   if (intel->gen >= 7) {
      if (intel->gt == 1) {
	 brw->max_wm_threads = 86;
	 brw->max_vs_threads = 36;
	 brw->max_gs_threads = 36;
	 brw->urb.size = 128;
	 brw->urb.max_vs_entries = 512;
	 brw->urb.max_gs_entries = 192;
      } else if (intel->gt == 2) {
	 brw->max_wm_threads = 86;
	 brw->max_vs_threads = 128;
	 brw->max_gs_threads = 128;
	 brw->urb.size = 256;
	 brw->urb.max_vs_entries = 704;
	 brw->urb.max_gs_entries = 320;
      } else {
	 assert(!"Unknown gen7 device.");
      }
   } else if (intel->gen == 6) {
      if (intel->gt == 2) {
	 /* This could possibly be 80, but is supposed to require
	  * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
	  * GPU reset to change.
	  */
	 brw->max_wm_threads = 40;
	 brw->max_vs_threads = 60;
	 brw->max_gs_threads = 60;
	 brw->urb.size = 64;            /* volume 5c.5 section 5.1 */
	 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
	 brw->urb.max_gs_entries = 256;
      } else {
	 brw->max_wm_threads = 40;
	 brw->max_vs_threads = 24;
	 brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */
	 brw->urb.size = 32;            /* volume 5c.5 section 5.1 */
	 brw->urb.max_vs_entries = 128; /* volume 2a (see 3DSTATE_URB) */
	 brw->urb.max_gs_entries = 256;
      }
      brw->urb.gen6_gs_previously_active = false;
   } else if (intel->gen == 5) {
      brw->urb.size = 1024;
      brw->max_vs_threads = 72;
      brw->max_gs_threads = 32;
      brw->max_wm_threads = 12 * 6;
   } else if (intel->is_g4x) {
      brw->urb.size = 384;
      brw->max_vs_threads = 32;
      brw->max_gs_threads = 2;
      brw->max_wm_threads = 10 * 5;
   } else if (intel->gen < 6) {
      brw->urb.size = 256;
      brw->max_vs_threads = 16;
      brw->max_gs_threads = 2;
      brw->max_wm_threads = 8 * 4;
      brw->has_negative_rhw_bug = true;
   }

   brw_init_state( brw );

   brw->curbe.last_buf = calloc(1, 4096);
   brw->curbe.next_buf = calloc(1, 4096);

   brw->state.dirty.mesa = ~0;
   brw->state.dirty.brw = ~0;

   brw->emit_state_always = 0;

   intel->batch.need_workaround_flush = true;

   ctx->VertexProgram._MaintainTnlProgram = true;
   ctx->FragmentProgram._MaintainTexEnvProgram = true;

   brw_draw_init( brw );

   brw->new_vs_backend = (getenv("INTEL_OLD_VS") == NULL);
   brw->precompile = driQueryOptionb(&intel->optionCache, "shader_precompile");

   /* If we're using the new shader backend, we require integer uniforms
    * stored as actual integers.
    */
   if (brw->new_vs_backend) {
      ctx->Const.NativeIntegers = true;
      ctx->Const.UniformBooleanTrue = 1;
   }

   return true;
}