static void upload_ps_state(struct brw_context *brw) { /* BRW_NEW_FS_PROG_DATA */ const struct brw_wm_prog_data *prog_data = brw_wm_prog_data(brw->wm.base.prog_data); gen8_upload_ps_state(brw, &brw->wm.base, prog_data, brw->wm.fast_clear_op); }
/** * \param line_aa BRW_WM_AA_NEVER, BRW_WM_AA_ALWAYS or BRW_WM_AA_SOMETIMES * \param lookup bitmask of BRW_WM_IZ_* flags */ void fs_visitor::setup_fs_payload_gen4() { assert(stage == MESA_SHADER_FRAGMENT); assert(dispatch_width <= 16); struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; GLuint reg = 1; bool kill_stats_promoted_workaround = false; int lookup = key->iz_lookup; assert(lookup < BRW_WM_IZ_BIT_MAX); /* Crazy workaround in the windowizer, which we need to track in * our register allocation and render target writes. See the "If * statistics are enabled..." paragraph of 11.5.3.2: Early Depth * Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec. */ if (key->stats_wm && (lookup & BRW_WM_IZ_PS_KILL_ALPHATEST_BIT) && wm_iz_table[lookup].mode == P) { kill_stats_promoted_workaround = true; } payload.subspan_coord_reg[0] = reg++; prog_data->uses_src_depth = (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0; if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth || kill_stats_promoted_workaround) { payload.source_depth_reg[0] = reg; reg += 2; } if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround) source_depth_to_render_target = true; if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) { payload.aa_dest_stencil_reg[0] = reg; runtime_check_aads_emit = !wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES; reg++; } if (wm_iz_table[lookup].dd_present) { payload.dest_depth_reg[0] = reg; reg+=2; } payload.num_regs = reg; }
static void upload_ps_state(struct brw_context *brw) { /* BRW_NEW_FS_PROG_DATA */ const struct brw_wm_prog_data *prog_data = brw_wm_prog_data(brw->wm.base.prog_data); const struct gl_context *ctx = &brw->ctx; /* BRW_NEW_FS_PROG_DATA | _NEW_COLOR */ const bool enable_dual_src_blend = prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) && ctx->Color.Blend[0]._UsesDualSrc; /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ const unsigned sample_mask = brw->is_haswell ? gen6_determine_sample_mask(brw) : 0; gen7_upload_ps_state(brw, &brw->wm.base, prog_data, enable_dual_src_blend, sample_mask, brw->wm.fast_clear_op); }
/** * Setup wm hardware state. See page 225 of Volume 2 */ static void brw_upload_wm_unit(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; struct gl_context *ctx = &brw->ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct gl_fragment_program *fp = brw->fragment_program; /* BRW_NEW_FS_PROG_DATA */ const struct brw_wm_prog_data *prog_data = brw_wm_prog_data(brw->wm.base.prog_data); struct brw_wm_unit_state *wm; wm = brw_state_batch(brw, AUB_TRACE_WM_STATE, sizeof(*wm), 32, &brw->wm.base.state_offset); memset(wm, 0, sizeof(*wm)); if (prog_data->dispatch_8 && prog_data->dispatch_16) { /* These two fields should be the same pre-gen6, which is why we * only have one hardware field to program for both dispatch * widths. */ assert(prog_data->base.dispatch_grf_start_reg == prog_data->dispatch_grf_start_reg_2); } /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_FS_PROG_DATA */ wm->wm5.enable_8_pix = prog_data->dispatch_8; wm->wm5.enable_16_pix = prog_data->dispatch_16; if (prog_data->dispatch_8 || prog_data->dispatch_16) { wm->thread0.grf_reg_count = prog_data->reg_blocks_0; wm->thread0.kernel_start_pointer = brw_program_reloc(brw, brw->wm.base.state_offset + offsetof(struct brw_wm_unit_state, thread0), brw->wm.base.prog_offset + (wm->thread0.grf_reg_count << 1)) >> 6; }
static void upload_wm_state(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; uint32_t dw1 = 0; /* BRW_NEW_FS_PROG_DATA */ const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(brw->wm.base.prog_data); dw1 |= GEN7_WM_STATISTICS_ENABLE; dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT; /* _NEW_LINE */ if (ctx->Line.StippleFlag) dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE; /* _NEW_POLYGON */ if (ctx->Polygon.StippleFlag) dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE; dw1 |= wm_prog_data->barycentric_interp_modes << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* BRW_NEW_FS_PROG_DATA */ if (wm_prog_data->early_fragment_tests) dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS; else if (wm_prog_data->has_side_effects) dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC; BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2)); OUT_BATCH(dw1); ADVANCE_BATCH(); }
/* Calculate interpolants for triangle and line rasterization. */ void brw_upload_sf_prog(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; struct brw_sf_prog_key key; if (!brw_state_dirty(brw, _NEW_BUFFERS | _NEW_HINT | _NEW_LIGHT | _NEW_POINT | _NEW_POLYGON | _NEW_PROGRAM | _NEW_TRANSFORM, BRW_NEW_BLORP | BRW_NEW_FS_PROG_DATA | BRW_NEW_REDUCED_PRIMITIVE | BRW_NEW_VUE_MAP_GEOM_OUT)) return; /* _NEW_BUFFERS */ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); memset(&key, 0, sizeof(key)); /* Populate the key, noting state dependencies: */ /* BRW_NEW_VUE_MAP_GEOM_OUT */ key.attrs = brw->vue_map_geom_out.slots_valid; /* BRW_NEW_REDUCED_PRIMITIVE */ switch (brw->reduced_primitive) { case GL_TRIANGLES: /* NOTE: We just use the edgeflag attribute as an indicator that * unfilled triangles are active. We don't actually do the * edgeflag testing here, it is already done in the clip * program. */ if (key.attrs & BITFIELD64_BIT(VARYING_SLOT_EDGE)) key.primitive = BRW_SF_PRIM_UNFILLED_TRIS; else key.primitive = BRW_SF_PRIM_TRIANGLES; break; case GL_LINES: key.primitive = BRW_SF_PRIM_LINES; break; case GL_POINTS: key.primitive = BRW_SF_PRIM_POINTS; break; } /* _NEW_TRANSFORM */ key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); /* _NEW_POINT */ key.do_point_sprite = ctx->Point.PointSprite; if (key.do_point_sprite) { key.point_sprite_coord_replace = ctx->Point.CoordReplace & 0xff; } if (brw->programs[MESA_SHADER_FRAGMENT]->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)) { key.do_point_coord = 1; } /* * Window coordinates in a FBO are inverted, which means point * sprite origin must be inverted, too. */ if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) key.sprite_origin_lower_left = true; /* BRW_NEW_FS_PROG_DATA */ const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(brw->wm.base.prog_data); if (wm_prog_data) { key.contains_flat_varying = wm_prog_data->contains_flat_varying; STATIC_ASSERT(sizeof(key.interp_mode) == sizeof(wm_prog_data->interp_mode)); memcpy(key.interp_mode, wm_prog_data->interp_mode, sizeof(key.interp_mode)); } /* _NEW_LIGHT | _NEW_PROGRAM */ key.do_twoside_color = _mesa_vertex_program_two_side_enabled(ctx); /* _NEW_POLYGON */ if (key.do_twoside_color) { /* If we're rendering to a FBO, we have to invert the polygon * face orientation, just as we invert the viewport in * sf_unit_create_from_key(). */ key.frontface_ccw = brw->polygon_front_bit == render_to_fbo; } if (!brw_search_cache(&brw->cache, BRW_CACHE_SF_PROG, &key, sizeof(key), &brw->sf.prog_offset, &brw->sf.prog_data)) { compile_sf_prog( brw, &key ); } }
/* Calculate interpolants for triangle and line rasterization. */ void brw_upload_clip_prog(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; struct brw_clip_prog_key key; if (!brw_state_dirty(brw, _NEW_BUFFERS | _NEW_LIGHT | _NEW_POLYGON | _NEW_TRANSFORM, BRW_NEW_BLORP | BRW_NEW_FS_PROG_DATA | BRW_NEW_REDUCED_PRIMITIVE | BRW_NEW_VUE_MAP_GEOM_OUT)) return; memset(&key, 0, sizeof(key)); /* Populate the key: */ /* BRW_NEW_FS_PROG_DATA */ const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(brw->wm.base.prog_data); if (wm_prog_data) { key.contains_flat_varying = wm_prog_data->contains_flat_varying; key.contains_noperspective_varying = wm_prog_data->contains_noperspective_varying; STATIC_ASSERT(sizeof(key.interp_mode) == sizeof(wm_prog_data->interp_mode)); memcpy(key.interp_mode, wm_prog_data->interp_mode, sizeof(key.interp_mode)); } /* BRW_NEW_REDUCED_PRIMITIVE */ key.primitive = brw->reduced_primitive; /* BRW_NEW_VUE_MAP_GEOM_OUT */ key.attrs = brw->vue_map_geom_out.slots_valid; /* _NEW_LIGHT */ key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); /* _NEW_TRANSFORM (also part of VUE map)*/ if (ctx->Transform.ClipPlanesEnabled) key.nr_userclip = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1; if (brw->gen == 5) key.clip_mode = BRW_CLIP_MODE_KERNEL_CLIP; else key.clip_mode = BRW_CLIP_MODE_NORMAL; /* _NEW_POLYGON */ if (key.primitive == GL_TRIANGLES) { if (ctx->Polygon.CullFlag && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) key.clip_mode = BRW_CLIP_MODE_REJECT_ALL; else { GLuint fill_front = BRW_CLIP_FILL_MODE_CULL; GLuint fill_back = BRW_CLIP_FILL_MODE_CULL; GLuint offset_front = 0; GLuint offset_back = 0; if (!ctx->Polygon.CullFlag || ctx->Polygon.CullFaceMode != GL_FRONT) { switch (ctx->Polygon.FrontMode) { case GL_FILL: fill_front = BRW_CLIP_FILL_MODE_FILL; offset_front = 0; break; case GL_LINE: fill_front = BRW_CLIP_FILL_MODE_LINE; offset_front = ctx->Polygon.OffsetLine; break; case GL_POINT: fill_front = BRW_CLIP_FILL_MODE_POINT; offset_front = ctx->Polygon.OffsetPoint; break; } } if (!ctx->Polygon.CullFlag || ctx->Polygon.CullFaceMode != GL_BACK) { switch (ctx->Polygon.BackMode) { case GL_FILL: fill_back = BRW_CLIP_FILL_MODE_FILL; offset_back = 0; break; case GL_LINE: fill_back = BRW_CLIP_FILL_MODE_LINE; offset_back = ctx->Polygon.OffsetLine; break; case GL_POINT: fill_back = BRW_CLIP_FILL_MODE_POINT; offset_back = ctx->Polygon.OffsetPoint; break; } } if (ctx->Polygon.BackMode != GL_FILL || ctx->Polygon.FrontMode != GL_FILL) { key.do_unfilled = 1; /* Most cases the fixed function units will handle. Cases where * one or more polygon faces are unfilled will require help: */ key.clip_mode = BRW_CLIP_MODE_CLIP_NON_REJECTED; if (offset_back || offset_front) { /* _NEW_POLYGON, _NEW_BUFFERS */ key.offset_units = ctx->Polygon.OffsetUnits * ctx->DrawBuffer->_MRD * 2; key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD; key.offset_clamp = ctx->Polygon.OffsetClamp * ctx->DrawBuffer->_MRD; } if (!brw->polygon_front_bit) { key.fill_ccw = fill_front; key.fill_cw = fill_back; key.offset_ccw = offset_front; key.offset_cw = offset_back; if (ctx->Light.Model.TwoSide && key.fill_cw != BRW_CLIP_FILL_MODE_CULL) key.copy_bfc_cw = 1; } else { key.fill_cw = fill_front; key.fill_ccw = fill_back; key.offset_cw = offset_front; key.offset_ccw = offset_back; if (ctx->Light.Model.TwoSide && key.fill_ccw != BRW_CLIP_FILL_MODE_CULL) key.copy_bfc_ccw = 1; } } } } if (!brw_search_cache(&brw->cache, BRW_CACHE_CLIP_PROG, &key, sizeof(key), &brw->clip.prog_offset, &brw->clip.prog_data)) { compile_clip_prog( brw, &key ); } }
static void upload_wm_state(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_FS_PROG_DATA */ const struct brw_wm_prog_data *prog_data = brw_wm_prog_data(brw->wm.base.prog_data); bool writes_depth = prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF; uint32_t dw1, dw2; /* _NEW_BUFFERS */ const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; dw1 = dw2 = 0; dw1 |= GEN7_WM_STATISTICS_ENABLE; dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; /* _NEW_LINE */ if (ctx->Line.StippleFlag) dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE; /* _NEW_POLYGON */ if (ctx->Polygon.StippleFlag) dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE; if (prog_data->uses_src_depth) dw1 |= GEN7_WM_USES_SOURCE_DEPTH; if (prog_data->uses_src_w) dw1 |= GEN7_WM_USES_SOURCE_W; dw1 |= prog_data->computed_depth_mode << GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT; dw1 |= prog_data->barycentric_interp_modes << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* _NEW_COLOR, _NEW_MULTISAMPLE _NEW_BUFFERS */ /* Enable if the pixel shader kernel generates and outputs oMask. */ if (prog_data->uses_kill || _mesa_is_alpha_test_enabled(ctx) || _mesa_is_alpha_to_coverage_enabled(ctx) || prog_data->uses_omask) { dw1 |= GEN7_WM_KILL_ENABLE; } /* _NEW_BUFFERS | _NEW_COLOR */ if (brw_color_buffer_write_enabled(brw) || writes_depth || prog_data->has_side_effects || dw1 & GEN7_WM_KILL_ENABLE) { dw1 |= GEN7_WM_DISPATCH_ENABLE; } if (multisampled_fbo) { /* _NEW_MULTISAMPLE */ if (ctx->Multisample.Enabled) dw1 |= GEN7_WM_MSRAST_ON_PATTERN; else dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; if (prog_data->persample_dispatch) dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; else dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; } else { dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; } if (prog_data->uses_sample_mask) { dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK; } /* BRW_NEW_FS_PROG_DATA */ if (prog_data->early_fragment_tests) dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS; else if (prog_data->has_side_effects) dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC; /* The "UAV access enable" bits are unnecessary on HSW because they only * seem to have an effect on the HW-assisted coherency mechanism which we * don't need, and the rasterization-related UAV_ONLY flag and the * DISPATCH_ENABLE bit can be set independently from it. * C.f. gen8_upload_ps_extra(). * * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS | _NEW_COLOR */ if (brw->is_haswell && !(brw_color_buffer_write_enabled(brw) || writes_depth) && prog_data->has_side_effects) dw2 |= HSW_WM_UAV_ONLY; BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); OUT_BATCH(dw1); OUT_BATCH(dw2); ADVANCE_BATCH(); }
static void upload_ps_extra(struct brw_context *brw) { /* BRW_NEW_FS_PROG_DATA */ gen8_upload_ps_extra(brw, brw_wm_prog_data(brw->wm.base.prog_data)); }