static enum gen_sample_count get_gen6_sample_count(const struct ilo_dev *dev, uint8_t sample_count) { enum gen_sample_count c; int min_gen; ILO_DEV_ASSERT(dev, 6, 8); switch (sample_count) { case 1: c = GEN6_NUMSAMPLES_1; min_gen = ILO_GEN(6); break; case 2: c = GEN8_NUMSAMPLES_2; min_gen = ILO_GEN(8); break; case 4: c = GEN6_NUMSAMPLES_4; min_gen = ILO_GEN(6); break; case 8: c = GEN7_NUMSAMPLES_8; min_gen = ILO_GEN(7); break; default: assert(!"unexpected sample count"); c = GEN6_NUMSAMPLES_1; break; } assert(ilo_dev_gen(dev) >= min_gen); return c; }
static bool vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb, const struct ilo_dev *dev, const struct ilo_state_vertex_buffer_info *info) { const uint32_t size = vertex_buffer_get_gen6_size(dev, info); uint32_t dw0; ILO_DEV_ASSERT(dev, 6, 8); if (!vertex_buffer_validate_gen6(dev, info)) return false; dw0 = info->stride << GEN6_VB_DW0_PITCH__SHIFT; if (ilo_dev_gen(dev) >= ILO_GEN(7)) dw0 |= GEN7_VB_DW0_ADDR_MODIFIED; if (!info->vma) dw0 |= GEN6_VB_DW0_IS_NULL; STATIC_ASSERT(ARRAY_SIZE(vb->vb) >= 3); vb->vb[0] = dw0; vb->vb[1] = info->offset; if (ilo_dev_gen(dev) >= ILO_GEN(8)) { vb->vb[2] = size; } else { /* address of the last valid byte */ vb->vb[2] = (size) ? info->offset + size - 1 : 0; } vb->vma = info->vma; return true; }
static bool draw_vbo_need_sw_restart(const struct ilo_context *ilo, const struct pipe_draw_info *info) { /* the restart index is fixed prior to GEN7.5 */ if (ilo_dev_gen(ilo->dev) < ILO_GEN(7.5)) { const unsigned cut_index = (ilo->state_vector.ib.state.index_size == 1) ? 0xff : (ilo->state_vector.ib.state.index_size == 2) ? 0xffff : (ilo->state_vector.ib.state.index_size == 4) ? 0xffffffff : 0; if (info->restart_index < cut_index) return true; } switch (info->mode) { case PIPE_PRIM_POINTS: case PIPE_PRIM_LINES: case PIPE_PRIM_LINE_STRIP: case PIPE_PRIM_TRIANGLES: case PIPE_PRIM_TRIANGLE_STRIP: /* these never need software fallback */ return false; case PIPE_PRIM_LINE_LOOP: case PIPE_PRIM_POLYGON: case PIPE_PRIM_QUAD_STRIP: case PIPE_PRIM_QUADS: case PIPE_PRIM_TRIANGLE_FAN: /* these need software fallback prior to GEN7.5 */ return (ilo_dev_gen(ilo->dev) < ILO_GEN(7.5)); default: /* the rest always needs software fallback */ return true; } }
static uint16_t ps_get_gen6_thread_count(const struct ilo_dev *dev, const struct ilo_state_ps_info *info) { uint16_t thread_count; ILO_DEV_ASSERT(dev, 6, 8); /* Maximum Number of Threads of 3DSTATE_PS */ switch (ilo_dev_gen(dev)) { case ILO_GEN(8): /* scaled automatically */ thread_count = 64 - 1; break; case ILO_GEN(7.5): thread_count = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102; break; case ILO_GEN(7): thread_count = (dev->gt == 2) ? 172 : 48; break; case ILO_GEN(6): default: /* from the classic driver instead of the PRM */ thread_count = (dev->gt == 2) ? 80 : 40; break; } return thread_count - 1; }
void ilo_state_raster_get_delta(const struct ilo_state_raster *rs, const struct ilo_dev *dev, const struct ilo_state_raster *old, struct ilo_state_raster_delta *delta) { delta->dirty = 0; if (memcmp(rs->clip, old->clip, sizeof(rs->clip))) delta->dirty |= ILO_STATE_RASTER_3DSTATE_CLIP; if (memcmp(rs->sf, old->sf, sizeof(rs->sf))) delta->dirty |= ILO_STATE_RASTER_3DSTATE_SF; if (memcmp(rs->raster, old->raster, sizeof(rs->raster))) { if (ilo_dev_gen(dev) >= ILO_GEN(8)) delta->dirty |= ILO_STATE_RASTER_3DSTATE_RASTER; else delta->dirty |= ILO_STATE_RASTER_3DSTATE_SF; } if (memcmp(rs->sample, old->sample, sizeof(rs->sample))) { delta->dirty |= ILO_STATE_RASTER_3DSTATE_MULTISAMPLE | ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK; } if (memcmp(rs->wm, old->wm, sizeof(rs->wm))) { delta->dirty |= ILO_STATE_RASTER_3DSTATE_WM; if (ilo_dev_gen(dev) >= ILO_GEN(8)) delta->dirty |= ILO_STATE_RASTER_3DSTATE_WM_HZ_OP; } }
void gen6_draw_vs(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { const bool emit_3dstate_vs = (DIRTY(VS) || r->instruction_bo_changed); const bool emit_3dstate_constant_vs = session->pcb_vs_changed; /* * the classic i965 does this in upload_vs_state(), citing a spec that I * cannot find */ if (emit_3dstate_vs && ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_pre_non_pipelined(r); /* 3DSTATE_CONSTANT_VS */ if (emit_3dstate_constant_vs) { gen6_3DSTATE_CONSTANT_VS(r->builder, &r->state.vs.PUSH_CONSTANT_BUFFER, &r->state.vs.PUSH_CONSTANT_BUFFER_size, 1); } /* 3DSTATE_VS */ if (emit_3dstate_vs) gen6_3DSTATE_VS(r->builder, vec->vs); if (emit_3dstate_constant_vs && ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_post_3dstate_constant_vs(r); }
bool ilo_state_raster_set_info(struct ilo_state_raster *rs, const struct ilo_dev *dev, const struct ilo_state_raster_info *info) { struct ilo_state_raster_line_info line; bool ret = true; ret &= raster_set_gen6_3DSTATE_CLIP(rs, dev, info); raster_get_gen6_effective_line(dev, info, &line); if (ilo_dev_gen(dev) >= ILO_GEN(8)) { ret &= raster_set_gen8_3DSTATE_SF(rs, dev, info, &line); ret &= raster_set_gen8_3DSTATE_RASTER(rs, dev, info, &line); } else { ret &= raster_set_gen7_3DSTATE_SF(rs, dev, info, &line); } ret &= raster_set_gen8_3DSTATE_MULTISAMPLE(rs, dev, info); ret &= raster_set_gen6_3DSTATE_SAMPLE_MASK(rs, dev, info); if (ilo_dev_gen(dev) >= ILO_GEN(7)) { ret &= raster_set_gen8_3DSTATE_WM(rs, dev, info, &line); if (ilo_dev_gen(dev) >= ILO_GEN(8)) ret &= raster_set_gen8_3dstate_wm_hz_op(rs, dev, info); } else { ret &= raster_set_gen6_3dstate_wm(rs, dev, info, &line); } assert(ret); return ret; }
void gen6_draw_wm_raster(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { /* 3DSTATE_POLY_STIPPLE_PATTERN and 3DSTATE_POLY_STIPPLE_OFFSET */ if ((DIRTY(RASTERIZER) || DIRTY(POLY_STIPPLE)) && vec->rasterizer->state.poly_stipple_enable) { if (ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_pre_non_pipelined(r); gen6_3DSTATE_POLY_STIPPLE_PATTERN(r->builder, &vec->poly_stipple); gen6_3DSTATE_POLY_STIPPLE_OFFSET(r->builder, 0, 0); } /* 3DSTATE_LINE_STIPPLE */ if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_stipple_enable) { if (ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_pre_non_pipelined(r); gen6_3DSTATE_LINE_STIPPLE(r->builder, vec->rasterizer->state.line_stipple_pattern, vec->rasterizer->state.line_stipple_factor + 1); } /* 3DSTATE_AA_LINE_PARAMETERS */ if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_smooth) { if (ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_pre_non_pipelined(r); gen6_3DSTATE_AA_LINE_PARAMETERS(r->builder); } }
bool ilo_state_ps_init(struct ilo_state_ps *ps, const struct ilo_dev *dev, const struct ilo_state_ps_info *info) { struct pixel_ff ff; bool ret = true; assert(ilo_is_zeroed(ps, sizeof(*ps))); ret &= ps_get_gen6_ff(dev, info, &ff); if (ilo_dev_gen(dev) >= ILO_GEN(8)) { ret &= ps_set_gen8_3DSTATE_PS(ps, dev, info, &ff); ret &= ps_set_gen8_3DSTATE_PS_EXTRA(ps, dev, info, &ff); } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) { ret &= ps_set_gen7_3dstate_wm(ps, dev, info, &ff); ret &= ps_set_gen7_3DSTATE_PS(ps, dev, info, &ff); } else { ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff); } /* save conditions */ ps->conds = ff.conds; assert(ret); return ret; }
static void tex_layout_init_qpitch(struct tex_layout *layout) { const struct pipe_resource *templ = layout->templ; int h0, h1; if (templ->array_size <= 1) return; h0 = align(layout->levels[0].h, layout->align_j); if (!layout->array_spacing_full) { layout->qpitch = h0; return; } h1 = align(layout->levels[1].h, layout->align_j); /* * From the Sandy Bridge PRM, volume 1 part 1, page 115: * * "The following equation is used for surface formats other than * compressed textures: * * QPitch = (h0 + h1 + 11j)" * * "The equation for compressed textures (BC* and FXT1 surface formats) * follows: * * QPitch = (h0 + h1 + 11j) / 4" * * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the * value calculated in the equation above, for every other odd Surface * Height starting from 1 i.e. 1,5,9,13" * * From the Ivy Bridge PRM, volume 1 part 1, page 111-112: * * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth * buffer and stencil buffer have an implied value of ARYSPC_FULL): * * QPitch = (h0 + h1 + 12j) * QPitch = (h0 + h1 + 12j) / 4 (compressed) * * (There are many typos or missing words here...)" * * To access the N-th slice, an offset of (Stride * QPitch * N) is added to * the base address. The PRM divides QPitch by 4 for compressed formats * because the block height for those formats are 4, and it wants QPitch to * mean the number of memory rows, as opposed to texel rows, between * slices. Since we use texel rows in tex->slice_offsets, we do not need * to divide QPitch by 4. */ layout->qpitch = h0 + h1 + ((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * layout->align_j; if (layout->dev->gen == ILO_GEN(6) && templ->nr_samples > 1 && templ->height0 % 4 == 1) layout->qpitch += 4; }
static void tex_layout_init_hiz(struct tex_layout *layout) { const struct pipe_resource *templ = layout->templ; const struct util_format_description *desc; desc = util_format_description(templ->format); layout->has_depth = util_format_has_depth(desc); layout->has_stencil = util_format_has_stencil(desc); if (!layout->has_depth) return; layout->hiz = true; /* no point in having HiZ */ if (templ->usage & PIPE_USAGE_STAGING) layout->hiz = false; if (layout->dev->gen == ILO_GEN(6)) { /* * From the Sandy Bridge PRM, volume 2 part 1, page 312: * * "The hierarchical depth buffer does not support the LOD field, it * is assumed by hardware to be zero. A separate hierarachical * depth buffer is required for each LOD used, and the * corresponding buffer's state delivered to hardware each time a * new depth buffer state with modified LOD is delivered." * * But we have a stronger requirement. Because of layer offsetting * (check out the callers of ilo_texture_get_slice_offset()), we already * have to require the texture to be non-mipmapped and non-array. */ if (templ->last_level > 0 || templ->array_size > 1 || templ->depth0 > 1) layout->hiz = false; } if (ilo_debug & ILO_DEBUG_NOHIZ) layout->hiz = false; if (layout->has_stencil) { /* * From the Sandy Bridge PRM, volume 2 part 1, page 317: * * "This field (Separate Stencil Buffer Enable) must be set to the * same value (enabled or disabled) as Hierarchical Depth Buffer * Enable." * * GEN7+ requires separate stencil buffers. */ if (layout->dev->gen >= ILO_GEN(7)) layout->separate_stencil = true; else layout->separate_stencil = layout->hiz; if (layout->separate_stencil) layout->has_stencil = false; } }
struct ilo_3d_pipeline * ilo_3d_pipeline_create(struct ilo_cp *cp, const struct ilo_dev_info *dev) { struct ilo_3d_pipeline *p; int i; p = CALLOC_STRUCT(ilo_3d_pipeline); if (!p) return NULL; p->cp = cp; p->dev = dev; switch (p->dev->gen) { case ILO_GEN(6): ilo_3d_pipeline_init_gen6(p); break; case ILO_GEN(7): case ILO_GEN(7.5): ilo_3d_pipeline_init_gen7(p); break; default: assert(!"unsupported GEN"); FREE(p); return NULL; break; } p->invalidate_flags = ILO_3D_PIPELINE_INVALIDATE_ALL; p->workaround_bo = intel_winsys_alloc_buffer(p->cp->winsys, "PIPE_CONTROL workaround", 4096, false); if (!p->workaround_bo) { ilo_warn("failed to allocate PIPE_CONTROL workaround bo\n"); FREE(p); return NULL; } p->packed_sample_position_1x = sample_position_1x[0].x << 4 | sample_position_1x[0].y; /* pack into dwords */ for (i = 0; i < 4; i++) { p->packed_sample_position_4x |= sample_position_4x[i].x << (8 * i + 4) | sample_position_4x[i].y << (8 * i); p->packed_sample_position_8x[0] |= sample_position_8x[i].x << (8 * i + 4) | sample_position_8x[i].y << (8 * i); p->packed_sample_position_8x[1] |= sample_position_8x[4 + i].x << (8 * i + 4) | sample_position_8x[4 + i].y << (8 * i); } return p; }
static bool raster_set_gen8_3DSTATE_WM(struct ilo_state_raster *rs, const struct ilo_dev *dev, const struct ilo_state_raster_info *info, const struct ilo_state_raster_line_info *line) { const struct ilo_state_raster_tri_info *tri = &info->tri; const struct ilo_state_raster_setup_info *setup = &info->setup; const struct ilo_state_raster_scan_info *scan = &info->scan; const enum gen_msrast_mode msrast = raster_setup_get_gen6_msrast_mode(dev, setup); uint32_t dw1; ILO_DEV_ASSERT(dev, 7, 8); if (!raster_validate_gen6_wm(dev, info)) return false; dw1 = scan->earlyz_control << GEN7_WM_DW1_EDSC__SHIFT | scan->zw_interp << GEN7_WM_DW1_ZW_INTERP__SHIFT | scan->barycentric_interps << GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT | GEN7_WM_DW1_AA_LINE_CAP_1_0 | /* same as in 3DSTATE_SF */ GEN7_WM_DW1_AA_LINE_WIDTH_2_0 | GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; if (scan->stats_enable) dw1 |= GEN7_WM_DW1_STATISTICS; if (ilo_dev_gen(dev) < ILO_GEN(8)) { switch (scan->earlyz_op) { case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR: dw1 |= GEN7_WM_DW1_LEGACY_DEPTH_CLEAR; break; case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE: dw1 |= GEN7_WM_DW1_LEGACY_DEPTH_RESOLVE; break; case ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE: dw1 |= GEN7_WM_DW1_LEGACY_HIZ_RESOLVE; break; default: if (scan->earlyz_stencil_clear) dw1 |= GEN7_WM_DW1_LEGACY_DEPTH_CLEAR; break; } } if (tri->poly_stipple_enable) dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE; if (line->stipple_enable) dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE; if (ilo_dev_gen(dev) < ILO_GEN(8)) dw1 |= msrast << GEN7_WM_DW1_MSRASTMODE__SHIFT; STATIC_ASSERT(ARRAY_SIZE(rs->wm) >= 1); rs->wm[0] = dw1; return true; }
static bool urb_init_gen7_hs_entry(const struct ilo_dev *dev, const struct ilo_state_urb_info *info, struct urb_configuration *conf) { /* * From the Ivy Bridge PRM, volume 2 part 1, page 37: * * "HS Number of URB Entries must be divisible by 8 if the HS URB Entry * Allocation Size is less than 9 512-bit URB * entries."2:0" = reserved "000" * * [0,64] * [0,32]" * * From the Haswell PRM, volume 2b, page 849: * * "(HS Number of URB Entries) * [0,128] DevHSW:GT2 * [0,64] DevHSW:GT1" */ const int row_size = 512 / 8; int row_count, entry_count; int max_entry_count; ILO_DEV_ASSERT(dev, 7, 8); row_count = (info->hs_entry_size + row_size - 1) / row_size; if (!row_count) row_count++; entry_count = conf->hs_urb_alloc_8kb * 8192 / (row_size * row_count); if (row_count < 9) entry_count &= ~7; switch (ilo_dev_gen(dev)) { case ILO_GEN(8): case ILO_GEN(7.5): max_entry_count = (dev->gt >= 2) ? 128 : 64; break; case ILO_GEN(7): max_entry_count = (dev->gt == 2) ? 64 : 32; break; default: assert(!"unexpected gen"); return false; break; } if (entry_count > max_entry_count) entry_count = max_entry_count; else if (info->hs_entry_size && !entry_count) return false; conf->hs_entry_rows = row_count; conf->hs_entry_count = entry_count; return true; }
static bool ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps, const struct ilo_dev *dev, const struct ilo_state_ps_info *info, const struct pixel_ff *ff) { const struct ilo_state_ps_io_info *io = &info->io; uint32_t dw2, dw3, dw4, dw5; ILO_DEV_ASSERT(dev, 7, 7.5); dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; if (false) dw2 |= GEN6_THREADDISP_FP_MODE_ALT; dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT | ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; if (ilo_dev_gen(dev) == ILO_GEN(7.5)) { dw4 |= ff->thread_count << GEN75_PS_DW4_MAX_THREADS__SHIFT | (ff->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; } else { dw4 |= ff->thread_count << GEN7_PS_DW4_MAX_THREADS__SHIFT; } if (ff->pcb_enable) dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE; if (io->attr_count) dw4 |= GEN7_PS_DW4_ATTR_ENABLE; if (io->write_omask) dw4 |= GEN7_PS_DW4_COMPUTE_OMASK; if (info->rt_clear_enable) dw4 |= GEN7_PS_DW4_RT_FAST_CLEAR; if (ff->dual_source_blending) dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; if (info->rt_resolve_enable) dw4 |= GEN7_PS_DW4_RT_RESOLVE; if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff->has_uav) dw4 |= GEN75_PS_DW4_ACCESS_UAV; dw5 = ff->grf_starts[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT | ff->grf_starts[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT | ff->grf_starts[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT; STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 8); ps->ps[2] = dw2; ps->ps[3] = dw3; ps->ps[4] = dw4; ps->ps[5] = dw5; ps->ps[6] = ff->kernel_offsets[1]; ps->ps[7] = ff->kernel_offsets[2]; return true; }
static void layout_init_layer_height(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; unsigned num_layers; if (layout->walk != ILO_LAYOUT_WALK_LAYER) return; num_layers = layout_get_num_layers(layout, params); if (num_layers <= 1) return; /* * From the Sandy Bridge PRM, volume 1 part 1, page 115: * * "The following equation is used for surface formats other than * compressed textures: * * QPitch = (h0 + h1 + 11j)" * * "The equation for compressed textures (BC* and FXT1 surface formats) * follows: * * QPitch = (h0 + h1 + 11j) / 4" * * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the * value calculated in the equation above, for every other odd Surface * Height starting from 1 i.e. 1,5,9,13" * * From the Ivy Bridge PRM, volume 1 part 1, page 111-112: * * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth * buffer and stencil buffer have an implied value of ARYSPC_FULL): * * QPitch = (h0 + h1 + 12j) * QPitch = (h0 + h1 + 12j) / 4 (compressed) * * (There are many typos or missing words here...)" * * To access the N-th slice, an offset of (Stride * QPitch * N) is added to * the base address. The PRM divides QPitch by 4 for compressed formats * because the block height for those formats are 4, and it wants QPitch to * mean the number of memory rows, as opposed to texel rows, between * slices. Since we use texel rows everywhere, we do not need to divide * QPitch by 4. */ layout->layer_height = params->h0 + params->h1 + ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j; if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 && layout->height0 % 4 == 1) layout->layer_height += 4; params->max_y += layout->layer_height * (num_layers - 1); }
bool ilo_state_vf_set_params(struct ilo_state_vf *vf, const struct ilo_dev *dev, const struct ilo_state_vf_params_info *params) { bool ret = true; ILO_DEV_ASSERT(dev, 6, 8); ret &= vf_params_set_gen6_internal_ve(vf, dev, params, vf->user_ve_count); if (ilo_dev_gen(dev) >= ILO_GEN(8)) ret &= vf_params_set_gen8_3DSTATE_VF_SGVS(vf, dev, params); /* * From the Sandy Bridge PRM, volume 2 part 1, page 94: * * "Edge flags are supported for the following primitive topology types * only, otherwise EdgeFlagEnable must not be ENABLED. * * - 3DPRIM_TRILIST* * - 3DPRIM_TRISTRIP* * - 3DPRIM_TRIFAN* * - 3DPRIM_POLYGON" * * "[DevSNB]: Edge Flags are not supported for QUADLIST primitives. * Software may elect to convert QUADLIST primitives to some set of * corresponding edge-flag-supported primitive types (e.g., POLYGONs) * prior to submission to the 3D vf." * * From the Ivy Bridge PRM, volume 2 part 1, page 86: * * "Edge flags are supported for all primitive topology types." * * Both PRMs are confusing... */ if (params->last_element_edge_flag) { assert(vf->edge_flag_supported); if (ilo_dev_gen(dev) == ILO_GEN(6)) assert(params->cv_topology != GEN6_3DPRIM_QUADLIST); } if (vf->edge_flag_supported) { assert(vf->user_ve_count); memcpy(vf->user_ve[vf->user_ve_count - 1], vf->last_user_ve[params->last_element_edge_flag], sizeof(vf->user_ve[vf->user_ve_count - 1])); } if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ret &= vf_params_set_gen75_3DSTATE_VF(vf, dev, params); else ret &= vf_params_set_gen6_3dstate_index_buffer(vf, dev, params); assert(ret); return ret; }
static bool surface_validate_gen6_buffer(const struct ilo_dev *dev, const struct ilo_state_surface_buffer_info *info) { uint32_t alignment; ILO_DEV_ASSERT(dev, 6, 8); if (info->offset + info->size > info->vma->vm_size) { ilo_warn("invalid buffer range\n"); return false; } /* * From the Sandy Bridge PRM, volume 4 part 1, page 81: * * "For surfaces of type SURFTYPE_BUFFER: [0,2047] -> [1B, 2048B] * For surfaces of type SURFTYPE_STRBUF: [0,2047] -> [1B, 2048B]" */ if (!info->struct_size || info->struct_size > 2048) { ilo_warn("invalid buffer struct size\n"); return false; } alignment = surface_get_gen6_buffer_offset_alignment(dev, info); if (info->offset % alignment || info->vma->vm_alignment % alignment) { ilo_warn("bad buffer offset\n"); return false; } /* no STRBUF on Gen6 */ if (info->format == GEN6_FORMAT_RAW && info->struct_size > 1) assert(ilo_dev_gen(dev) >= ILO_GEN(7)); /* SVB writes are Gen6 only */ if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB) assert(ilo_dev_gen(dev) == ILO_GEN(6)); /* * From the Ivy Bridge PRM, volume 4 part 1, page 83: * * "NOTE: "RAW" is supported only with buffers and structured buffers * accessed via the untyped surface read/write and untyped atomic * operation messages, which do not have a column in the table." * * From the Ivy Bridge PRM, volume 4 part 1, page 252: * * "For untyped messages, the Surface Format must be RAW and the * Surface Type must be SURFTYPE_BUFFER or SURFTYPE_STRBUF." */ assert((info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED) == (info->format == GEN6_FORMAT_RAW)); return true; }
void ilo_render_emit_rectlist_commands_gen7(struct ilo_render *r, const struct ilo_blitter *blitter, const struct ilo_render_rectlist_session *session) { ILO_DEV_ASSERT(r->dev, 7, 7.5); gen7_rectlist_wm_multisample(r, blitter); gen6_state_base_address(r->builder, true); gen6_user_3DSTATE_VERTEX_BUFFERS(r->builder, session->vb_start, session->vb_end, sizeof(blitter->vertices[0])); gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->vf); gen7_rectlist_pcb_alloc(r, blitter); /* needed for any VS-related commands */ if (ilo_dev_gen(r->dev) == ILO_GEN(7)) gen7_wa_pre_vs(r); gen7_rectlist_urb(r, blitter); if (blitter->uses & ILO_BLITTER_USE_DSA) { gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(r->builder, r->state.DEPTH_STENCIL_STATE); } if (blitter->uses & ILO_BLITTER_USE_CC) { gen7_3DSTATE_CC_STATE_POINTERS(r->builder, r->state.COLOR_CALC_STATE); } gen7_rectlist_vs_to_sf(r, blitter); gen7_rectlist_wm(r, blitter); if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) { gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(r->builder, r->state.CC_VIEWPORT); } gen7_rectlist_wm_depth(r, blitter); gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0, blitter->fb.width, blitter->fb.height); if (ilo_dev_gen(r->dev) == ILO_GEN(7)) gen7_wa_post_ps_and_later(r); ilo_render_3dprimitive(r, &blitter->draw_info); }
static bool sbe_set_gen8_3DSTATE_SBE(struct ilo_state_sbe *sbe, const struct ilo_dev *dev, const struct ilo_state_sbe_info *info) { uint8_t vue_read_offset, vue_read_len; uint8_t attr_count; uint32_t dw1, dw2, dw3; ILO_DEV_ASSERT(dev, 6, 8); if (!sbe_validate_gen8(dev, info)) return false; vue_read_offset = info->vue_read_base / 2; vue_read_len = sbe_get_gen8_read_length(dev, info); attr_count = info->attr_count; if (ilo_dev_gen(dev) == ILO_GEN(6) && info->swizzle_16_31) attr_count += 16; dw1 = attr_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT | vue_read_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT; if (ilo_dev_gen(dev) >= ILO_GEN(8)) { dw1 |= GEN8_SBE_DW1_USE_URB_READ_LEN | GEN8_SBE_DW1_USE_URB_READ_OFFSET | vue_read_offset << GEN8_SBE_DW1_URB_READ_OFFSET__SHIFT; } else { dw1 |= vue_read_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT; } if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->swizzle_16_31) dw1 |= GEN7_SBE_DW1_ATTR_SWIZZLE_16_31; if (info->swizzle_enable) dw1 |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE; dw1 |= (info->point_sprite_origin_lower_left) ? GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT : GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT; dw2 = info->point_sprite_enables; dw3 = info->const_interp_enables; STATIC_ASSERT(ARRAY_SIZE(sbe->sbe) >= 3); sbe->sbe[0] = dw1; sbe->sbe[1] = dw2; sbe->sbe[2] = dw3; return true; }
void gen6_draw_common_base_address(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { /* STATE_BASE_ADDRESS */ if (r->state_bo_changed || r->instruction_bo_changed || r->batch_bo_changed) { if (ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_pre_non_pipelined(r); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) gen8_state_base_address(r->builder, r->hw_ctx_changed); else gen6_state_base_address(r->builder, r->hw_ctx_changed); /* * From the Sandy Bridge PRM, volume 1 part 1, page 28: * * "The following commands must be reissued following any change to * the base addresses: * * * 3DSTATE_BINDING_TABLE_POINTERS * * 3DSTATE_SAMPLER_STATE_POINTERS * * 3DSTATE_VIEWPORT_STATE_POINTERS * * 3DSTATE_CC_POINTERS * * MEDIA_STATE_POINTERS" * * 3DSTATE_SCISSOR_STATE_POINTERS is not on the list, but it is * reasonable to also reissue the command. Same to PCB. */ session->viewport_changed = true; session->scissor_changed = true; session->blend_changed = true; session->dsa_changed = true; session->cc_changed = true; session->sampler_vs_changed = true; session->sampler_gs_changed = true; session->sampler_fs_changed = true; session->pcb_vs_changed = true; session->pcb_gs_changed = true; session->pcb_fs_changed = true; session->binding_table_vs_changed = true; session->binding_table_gs_changed = true; session->binding_table_fs_changed = true; } }
bool ilo_state_ps_set_params(struct ilo_state_ps *ps, const struct ilo_dev *dev, const struct ilo_state_ps_params_info *params) { ILO_DEV_ASSERT(dev, 6, 8); /* modify sample mask */ if (ilo_dev_gen(dev) == ILO_GEN(7.5)) { ps->ps[4] = (ps->ps[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK) | (params->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; } /* modify dispatch enable, pixel kill, and dual source blending */ if (ilo_dev_gen(dev) < ILO_GEN(8)) { if (ilo_dev_gen(dev) >= ILO_GEN(7)) { if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds)) ps->ps[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; else ps->ps[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE; if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds)) ps->ps[0] |= GEN7_WM_DW1_PS_KILL_PIXEL; else ps->ps[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL; if (params->dual_source_blending) ps->ps[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; else ps->ps[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND; } else { if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds)) ps->ps[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; else ps->ps[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE; if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds)) ps->ps[3] |= GEN6_WM_DW5_PS_KILL_PIXEL; else ps->ps[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL; if (params->dual_source_blending) ps->ps[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; else ps->ps[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; } } return true; }
void gen7_draw_vs(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { const bool emit_3dstate_binding_table = session->binding_table_vs_changed; const bool emit_3dstate_sampler_state = session->sampler_vs_changed; /* see gen6_draw_vs() */ const bool emit_3dstate_constant_vs = session->pcb_vs_changed; const bool emit_3dstate_vs = (DIRTY(VS) || r->instruction_bo_changed); /* emit depth stall before any of the VS commands */ if (ilo_dev_gen(r->dev) == ILO_GEN(7)) { if (emit_3dstate_binding_table || emit_3dstate_sampler_state || emit_3dstate_constant_vs || emit_3dstate_vs) gen7_wa_pre_vs(r); } /* 3DSTATE_BINDING_TABLE_POINTERS_VS */ if (emit_3dstate_binding_table) { gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(r->builder, r->state.vs.BINDING_TABLE_STATE); } /* 3DSTATE_SAMPLER_STATE_POINTERS_VS */ if (emit_3dstate_sampler_state) { gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(r->builder, r->state.vs.SAMPLER_STATE); } /* 3DSTATE_CONSTANT_VS */ if (emit_3dstate_constant_vs) { gen7_3DSTATE_CONSTANT_VS(r->builder, &r->state.vs.PUSH_CONSTANT_BUFFER, &r->state.vs.PUSH_CONSTANT_BUFFER_size, 1); } /* 3DSTATE_VS */ if (emit_3dstate_vs) { const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs); const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); else gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); } }
void gen6_draw_clip(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { /* 3DSTATE_CLIP */ if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(VIEWPORT) || DIRTY(FB)) { bool enable_guardband = true; unsigned i; /* * Gen8+ has viewport extent test. Guard band test can be enabled on * prior Gens only when the viewport is larger than the framebuffer, * unless we emulate viewport extent test on them. */ if (ilo_dev_gen(r->dev) < ILO_GEN(8)) { for (i = 0; i < vec->viewport.count; i++) { const struct ilo_viewport_cso *vp = &vec->viewport.cso[i]; if (vp->min_x > 0.0f || vp->max_x < vec->fb.state.width || vp->min_y > 0.0f || vp->max_y < vec->fb.state.height) { enable_guardband = false; break; } } } gen6_3DSTATE_CLIP(r->builder, vec->rasterizer, vec->fs, enable_guardband, 1); } }
static bool layout_want_hiz(const struct ilo_layout *layout, const struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; const struct util_format_description *desc = util_format_description(templ->format); if (ilo_debug & ILO_DEBUG_NOHIZ) return false; if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL)) return false; if (!util_format_has_depth(desc)) return false; /* no point in having HiZ */ if (templ->usage == PIPE_USAGE_STAGING) return false; /* * As can be seen in layout_calculate_hiz_size(), HiZ may not be enabled * for every level. This is generally fine except on GEN6, where HiZ and * separate stencil are enabled and disabled at the same time. When the * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ * can result in incompatible formats. */ if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && templ->last_level) return false; return true; }
static void writer_decode_sf_clip_viewport_gen7(const struct ilo_builder *builder, enum ilo_builder_writer_type which, const struct ilo_builder_item *item) { const unsigned state_size = sizeof(uint32_t) * 16; const unsigned count = item->size / state_size; unsigned offset = item->offset; unsigned i; for (i = 0; i < count; i++) { uint32_t dw; dw = writer_dw(builder, which, offset, 0, "SF_CLIP VP%d", i); ilo_printf("m00 = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 1, "SF_CLIP VP%d", i); ilo_printf("m11 = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 2, "SF_CLIP VP%d", i); ilo_printf("m22 = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 3, "SF_CLIP VP%d", i); ilo_printf("m30 = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 4, "SF_CLIP VP%d", i); ilo_printf("m31 = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 5, "SF_CLIP VP%d", i); ilo_printf("m32 = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 8, "SF_CLIP VP%d", i); ilo_printf("guardband xmin = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 9, "SF_CLIP VP%d", i); ilo_printf("guardband xmax = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 10, "SF_CLIP VP%d", i); ilo_printf("guardband ymin = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 11, "SF_CLIP VP%d", i); ilo_printf("guardband ymax = %f\n", uif(dw)); if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { dw = writer_dw(builder, which, offset, 12, "SF_CLIP VP%d", i); ilo_printf("extent xmin = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 13, "SF_CLIP VP%d", i); ilo_printf("extent xmax = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 14, "SF_CLIP VP%d", i); ilo_printf("extent ymin = %f\n", uif(dw)); dw = writer_dw(builder, which, offset, 15, "SF_CLIP VP%d", i); ilo_printf("extent ymax = %f\n", uif(dw)); } offset += state_size; } }
void gen6_draw_vf(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) { /* 3DSTATE_INDEX_BUFFER */ if (DIRTY(IB) || r->batch_bo_changed) { gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ib, false); } /* 3DSTATE_VF */ if (session->primitive_restart_changed) { gen75_3DSTATE_VF(r->builder, vec->draw->primitive_restart, vec->draw->restart_index); } } else { /* 3DSTATE_INDEX_BUFFER */ if (DIRTY(IB) || session->primitive_restart_changed || r->batch_bo_changed) { gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ib, vec->draw->primitive_restart); } } /* 3DSTATE_VERTEX_BUFFERS */ if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb); /* 3DSTATE_VERTEX_ELEMENTS */ if (DIRTY(VE)) gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, vec->ve); }
static void gen6_draw_wm_multisample(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */ if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) { const uint32_t *packed_sample_pos; packed_sample_pos = (vec->fb.num_samples > 1) ? &r->packed_sample_position_4x : &r->packed_sample_position_1x; if (ilo_dev_gen(r->dev) == ILO_GEN(6)) { gen6_wa_pre_non_pipelined(r); gen6_wa_pre_3dstate_multisample(r); } gen6_3DSTATE_MULTISAMPLE(r->builder, vec->fb.num_samples, packed_sample_pos, vec->rasterizer->state.half_pixel_center); gen6_3DSTATE_SAMPLE_MASK(r->builder, (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1); } }
static void gen6_draw_wm(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { /* 3DSTATE_CONSTANT_PS */ if (session->pcb_fs_changed) { gen6_3DSTATE_CONSTANT_PS(r->builder, &r->state.wm.PUSH_CONSTANT_BUFFER, &r->state.wm.PUSH_CONSTANT_BUFFER_size, 1); } /* 3DSTATE_WM */ if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER) || r->instruction_bo_changed) { const bool dual_blend = vec->blend->dual_blend; const bool cc_may_kill = (vec->dsa->dw_alpha || vec->blend->alpha_to_coverage); if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed) gen6_wa_pre_3dstate_wm_max_threads(r); gen6_3DSTATE_WM(r->builder, vec->fs, vec->rasterizer, dual_blend, cc_may_kill); } }
static void gen6_draw_gs_svbi(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { const bool emit = gen6_draw_update_max_svbi(r, vec, session); /* 3DSTATE_GS_SVB_INDEX */ if (emit) { if (ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_pre_non_pipelined(r); gen6_3DSTATE_GS_SVB_INDEX(r->builder, 0, 0, r->state.so_max_vertices, false); if (r->hw_ctx_changed) { int i; /* * From the Sandy Bridge PRM, volume 2 part 1, page 148: * * "If a buffer is not enabled then the SVBI must be set to 0x0 * in order to not cause overflow in that SVBI." * * "If a buffer is not enabled then the MaxSVBI must be set to * 0xFFFFFFFF in order to not cause overflow in that SVBI." */ for (i = 1; i < 4; i++) { gen6_3DSTATE_GS_SVB_INDEX(r->builder, i, 0, 0xffffffff, false); } } } }