static void upload_ps_state(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; uint32_t dw2, dw4, dw5; const int max_threads_shift = brw->is_haswell ? HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; /* BRW_NEW_PS_BINDING_TABLE */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2)); OUT_BATCH(brw->wm.base.bind_bo_offset); ADVANCE_BATCH(); /* CACHE_NEW_SAMPLER */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); OUT_BATCH(brw->wm.base.sampler_offset); ADVANCE_BATCH(); /* CACHE_NEW_WM_PROG */ gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS); dw2 = dw4 = dw5 = 0; /* CACHE_NEW_SAMPLER */ dw2 |= (ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Use ALT floating point mode for ARB fragment programs, because they * require 0^0 == 1. Even though _CurrentFragmentProgram is used for * rendering, CurrentFragmentProgram is used for this check to * differentiate between the GLSL and non-GLSL cases. */ /* BRW_NEW_FRAGMENT_PROGRAM */ if (ctx->Shader.CurrentFragmentProgram == NULL) dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT; if (brw->is_haswell) dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */ dw4 |= (brw->max_wm_threads - 1) << max_threads_shift; /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params > 0) dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; /* CACHE_NEW_WM_PROG | _NEW_COLOR * * The hardware wedges if you have this bit set but don't turn on any dual * source blend factors. */ if (brw->wm.prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) && ctx->Color.Blend[0]._UsesDualSrc) { dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE; } /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->num_varying_inputs != 0) dw4 |= GEN7_PS_ATTRIBUTE_ENABLE; dw4 |= GEN7_PS_8_DISPATCH_ENABLE; if (brw->wm.prog_data->prog_offset_16) dw4 |= GEN7_PS_16_DISPATCH_ENABLE; dw5 |= (brw->wm.prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0); dw5 |= (brw->wm.prog_data->first_curbe_grf_16 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2); BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); OUT_BATCH(brw->wm.base.prog_offset); OUT_BATCH(dw2); if (brw->wm.prog_data->total_scratch) { OUT_RELOC(brw->wm.base.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(brw->wm.prog_data->total_scratch) - 11); } else { OUT_BATCH(0); } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(0); /* kernel 1 pointer */ OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16); ADVANCE_BATCH(); }
static void upload_gs_state(struct brw_context *brw) { const struct brw_stage_state *stage_state = &brw->gs.base; const int max_threads_shift = brw->is_haswell ? HSW_GS_MAX_THREADS_SHIFT : GEN6_GS_MAX_THREADS_SHIFT; /* BRW_NEW_GEOMETRY_PROGRAM */ bool active = brw->geometry_program; /* CACHE_NEW_GS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; /* BRW_NEW_GS_BINDING_TABLE */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_GS << 16 | (2 - 2)); OUT_BATCH(stage_state->bind_bo_offset); ADVANCE_BATCH(); /* CACHE_NEW_SAMPLER */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2)); OUT_BATCH(stage_state->sampler_offset); ADVANCE_BATCH(); gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); if (active) { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(stage_state->prog_offset); OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT)); if (brw->gs.prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(brw->gs.prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); } uint32_t dw4 = ((brw->gs.prog_data->output_vertex_size_hwords * 2 - 1) << GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) | (brw->gs.prog_data->output_topology << GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) | (prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) | (prog_data->dispatch_grf_start_reg << GEN6_GS_DISPATCH_START_GRF_SHIFT); /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between * Ivy Bridge and Haswell. * * On Ivy Bridge, setting this bit causes the vertices of a triangle * strip to be delivered to the geometry shader in an order that does * not strictly follow the OpenGL spec, but preserves triangle * orientation. For example, if the vertices are (1, 2, 3, 4, 5), then * the geometry shader sees triangles: * * (1, 2, 3), (2, 4, 3), (3, 4, 5) * * (Clearing the bit is even worse, because it fails to preserve * orientation). * * Triangle strips with adjacency always ordered in a way that preserves * triangle orientation but does not strictly follow the OpenGL spec, * regardless of the setting of this bit. * * On Haswell, both triangle strips and triangle strips with adjacency * are always ordered in a way that preserves triangle orientation. * Setting this bit causes the ordering to strictly follow the OpenGL * spec. * * So in either case we want to set the bit. Unfortunately on Ivy * Bridge this will get the order close to correct but not perfect. */ uint32_t dw5 = ((brw->max_gs_threads - 1) << max_threads_shift) | (brw->gs.prog_data->control_data_header_size_hwords << GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | (brw->gs.prog_data->dual_instanced_dispatch ? GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE : GEN7_GS_DISPATCH_MODE_DUAL_OBJECT) | GEN6_GS_STATISTICS_ENABLE | (brw->gs.prog_data->include_primitive_id ? GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | GEN7_GS_REORDER_TRAILING | GEN7_GS_ENABLE; uint32_t dw6 = 0; if (brw->is_haswell) { dw6 |= brw->gs.prog_data->control_data_format << HSW_GS_CONTROL_DATA_FORMAT_SHIFT; } else { dw5 |= brw->gs.prog_data->control_data_format << IVB_GS_CONTROL_DATA_FORMAT_SHIFT; } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(dw6); ADVANCE_BATCH(); } else { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); /* prog_bo */ OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | GEN7_GS_INCLUDE_VERTEX_HANDLES | (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | GEN6_GS_STATISTICS_ENABLE); OUT_BATCH(0); ADVANCE_BATCH(); } }
static void upload_gs_state(struct brw_context *brw) { const struct brw_stage_state *stage_state = &brw->gs.base; const int max_threads_shift = brw->is_haswell ? HSW_GS_MAX_THREADS_SHIFT : GEN6_GS_MAX_THREADS_SHIFT; /* BRW_NEW_GEOMETRY_PROGRAM */ bool active = brw->geometry_program; /* CACHE_NEW_GS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; /* BRW_NEW_GS_BINDING_TABLE */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_GS << 16 | (2 - 2)); OUT_BATCH(stage_state->bind_bo_offset); ADVANCE_BATCH(); /* CACHE_NEW_SAMPLER */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2)); OUT_BATCH(stage_state->sampler_offset); ADVANCE_BATCH(); gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); if (active) { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(stage_state->prog_offset); OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT)); if (brw->gs.prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(brw->gs.prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); } uint32_t dw4 = ((brw->gs.prog_data->output_vertex_size_hwords * 2 - 1) << GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) | (brw->gs.prog_data->output_topology << GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) | (prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) | (prog_data->dispatch_grf_start_reg << GEN6_GS_DISPATCH_START_GRF_SHIFT); uint32_t dw5 = ((brw->max_gs_threads - 1) << max_threads_shift) | (brw->gs.prog_data->control_data_header_size_hwords << GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | GEN7_GS_DISPATCH_MODE_DUAL_OBJECT | GEN6_GS_STATISTICS_ENABLE | (brw->gs.prog_data->include_primitive_id ? GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | GEN7_GS_ENABLE; uint32_t dw6 = 0; if (brw->is_haswell) { dw6 |= brw->gs.prog_data->control_data_format << HSW_GS_CONTROL_DATA_FORMAT_SHIFT; } else { dw5 |= brw->gs.prog_data->control_data_format << IVB_GS_CONTROL_DATA_FORMAT_SHIFT; } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(dw6); ADVANCE_BATCH(); } else { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); /* prog_bo */ OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | GEN7_GS_INCLUDE_VERTEX_HANDLES | (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | GEN6_GS_STATISTICS_ENABLE); OUT_BATCH(0); ADVANCE_BATCH(); } }