static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) { struct r600_context *rctx = (struct r600_context *)ctx; r600_suspend_nontimer_queries(rctx); util_blitter_save_vertex_buffers(rctx->blitter, util_last_bit(rctx->vertex_buffer_state.enabled_mask), rctx->vertex_buffer_state.vb); util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); util_blitter_save_so_targets(rctx->blitter, rctx->num_so_targets, (struct pipe_stream_output_target**)rctx->so_targets); util_blitter_save_rasterizer(rctx->blitter, rctx->states[R600_PIPE_STATE_RASTERIZER]); if (op & R600_SAVE_FRAGMENT_STATE) { if (rctx->states[R600_PIPE_STATE_VIEWPORT]) { util_blitter_save_viewport(rctx->blitter, &rctx->viewport); } util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]); if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) { util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); } util_blitter_save_sample_mask(rctx->blitter, rctx->sample_mask.sample_mask); } if (op & R600_SAVE_FRAMEBUFFER) util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); if (op & R600_SAVE_TEXTURES) { util_blitter_save_fragment_sampler_states( rctx->blitter, rctx->ps_samplers.n_samplers, (void**)rctx->ps_samplers.samplers); util_blitter_save_fragment_sampler_views( rctx->blitter, util_last_bit(rctx->ps_samplers.views.enabled_mask), (struct pipe_sampler_view**)rctx->ps_samplers.views.views); } if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) { rctx->saved_render_cond = rctx->current_render_cond; rctx->saved_render_cond_mode = rctx->current_render_cond_mode; rctx->context.render_condition(&rctx->context, NULL, 0); } }
static void fd_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot, unsigned count, const struct pipe_vertex_buffer *vb) { struct fd_context *ctx = fd_context(pctx); struct fd_vertexbuf_stateobj *so = &ctx->vertexbuf; int i; /* on a2xx, pitch is encoded in the vtx fetch instruction, so * we need to mark VTXSTATE as dirty as well to trigger patching * and re-emitting the vtx shader: */ for (i = 0; i < count; i++) { bool new_enabled = vb && (vb[i].buffer || vb[i].user_buffer); bool old_enabled = so->vb[i].buffer || so->vb[i].user_buffer; uint32_t new_stride = vb ? vb[i].stride : 0; uint32_t old_stride = so->vb[i].stride; if ((new_enabled != old_enabled) || (new_stride != old_stride)) { ctx->dirty |= FD_DIRTY_VTXSTATE; break; } } util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, count); so->count = util_last_bit(so->enabled_mask); ctx->dirty |= FD_DIRTY_VTXBUF; }
void nv50_fp_linkage_validate(struct nv50_context *nv50) { struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog; struct nv50_program *fp = nv50->fragprog; struct nv50_varying dummy; int i, n, c, m; uint32_t primid = 0; uint32_t layerid = 0; uint32_t viewportid = 0; uint32_t psiz = 0x000; uint32_t interp = fp->fp.interp; uint32_t colors = fp->fp.colors; uint32_t clpd_nr = util_last_bit(vp->vp.clip_enable | vp->vp.cull_enable); uint32_t lin[4]; uint8_t map[64]; uint8_t so_map[64]; if (!(nv50->dirty_3d & (NV50_NEW_3D_VERTPROG | NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_GMTYPROG))) { uint8_t bfc, ffc; ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK); bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK) >> 8; if (nv50->rast->pipe.light_twoside == ((ffc == bfc) ? 0 : 1)) return; }
static void finalize_global_binding(struct ilo_state_vector *vec) { struct ilo_shader_state *cs = vec->cs; int base, count, shift; int i; count = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_SURFACE_GLOBAL_COUNT); if (!count) return; base = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_SURFACE_GLOBAL_BASE); shift = 32 - util_last_bit(base + count - 1); if (count > vec->global_binding.count) count = vec->global_binding.count; for (i = 0; i < count; i++) { struct ilo_global_binding_cso *cso = util_dynarray_element(&vec->global_binding.bindings, struct ilo_global_binding_cso, i); const uint32_t offset = *cso->handle & ((1 << shift) - 1); *cso->handle = ((base + i) << shift) | offset; } }
static void etna_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot, unsigned num_buffers, const struct pipe_vertex_buffer *vb) { struct etna_context *ctx = etna_context(pctx); struct etna_vertexbuf_state *so = &ctx->vertex_buffer; util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, num_buffers); so->count = util_last_bit(so->enabled_mask); for (unsigned idx = start_slot; idx < start_slot + num_buffers; ++idx) { struct compiled_set_vertex_buffer *cs = &so->cvb[idx]; struct pipe_vertex_buffer *vbi = &so->vb[idx]; assert(!vbi->is_user_buffer); /* XXX support user_buffer using etna_usermem_map */ if (vbi->buffer.resource) { /* GPU buffer */ cs->FE_VERTEX_STREAM_BASE_ADDR.bo = etna_resource(vbi->buffer.resource)->bo; cs->FE_VERTEX_STREAM_BASE_ADDR.offset = vbi->buffer_offset; cs->FE_VERTEX_STREAM_BASE_ADDR.flags = ETNA_RELOC_READ; cs->FE_VERTEX_STREAM_CONTROL = FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE(vbi->stride); } else { cs->FE_VERTEX_STREAM_BASE_ADDR.bo = NULL; cs->FE_VERTEX_STREAM_CONTROL = 0; } } ctx->dirty |= ETNA_DIRTY_VERTEX_BUFFERS; }
/* for vertex shader, the inputs are loaded into registers before the shader * is executed, so max_regs from the shader instructions might not properly * reflect the # of registers actually used, especially in case passthrough * varyings. * * Likewise, for fragment shader, we can have some regs which are passed * input values but never touched by the resulting shader (ie. as result * of dead code elimination or simply because we don't know how to turn * the reg off. */ static void fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id) { unsigned i; for (i = 0; i < v->inputs_count; i++) { /* skip frag inputs fetch via bary.f since their reg's are * not written by gpu before shader starts (and in fact the * regid's might not even be valid) */ if (v->inputs[i].bary) continue; /* ignore high regs that are global to all threads in a warp * (they exist by default) (a5xx+) */ if (v->inputs[i].regid >= regid(48,0)) continue; if (v->inputs[i].compmask) { unsigned n = util_last_bit(v->inputs[i].compmask) - 1; int32_t regid = v->inputs[i].regid + n; if (v->inputs[i].half) { if (gpu_id < 500) { v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2); } else { v->info.max_reg = MAX2(v->info.max_reg, regid >> 3); } } else {
void brw_upload_cs_prog(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; struct brw_cs_prog_key key; struct brw_program *cp = (struct brw_program *) brw->compute_program; if (!cp) return; if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM)) return; brw->cs.base.sampler_count = util_last_bit(ctx->ComputeProgram._Current->SamplersUsed); brw_cs_populate_key(brw, &key); if (!brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG, &key, sizeof(key), &brw->cs.base.prog_offset, &brw->cs.base.prog_data)) { bool success = brw_codegen_cs_prog(brw, ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE], cp, &key); (void) success; assert(success); } }
static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) { struct r600_context *rctx = (struct r600_context *)ctx; r600_suspend_nontimer_queries(&rctx->b); util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb); util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso); util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); util_blitter_save_geometry_shader(rctx->blitter, rctx->gs_shader); util_blitter_save_so_targets(rctx->blitter, rctx->b.streamout.num_targets, (struct pipe_stream_output_target**)rctx->b.streamout.targets); util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso); if (op & R600_SAVE_FRAGMENT_STATE) { util_blitter_save_viewport(rctx->blitter, &rctx->viewport[0].state); util_blitter_save_scissor(rctx->blitter, &rctx->scissor[0].scissor); util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); util_blitter_save_blend(rctx->blitter, rctx->blend_state.cso); util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa_state.cso); util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref.pipe_state); util_blitter_save_sample_mask(rctx->blitter, rctx->sample_mask.sample_mask); } if (op & R600_SAVE_FRAMEBUFFER) util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer.state); if (op & R600_SAVE_TEXTURES) { util_blitter_save_fragment_sampler_states( rctx->blitter, util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].states.enabled_mask), (void**)rctx->samplers[PIPE_SHADER_FRAGMENT].states.states); util_blitter_save_fragment_sampler_views( rctx->blitter, util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].views.enabled_mask), (struct pipe_sampler_view**)rctx->samplers[PIPE_SHADER_FRAGMENT].views.views); } if ((op & R600_DISABLE_RENDER_COND) && rctx->b.current_render_cond) { util_blitter_save_render_condition(rctx->blitter, rctx->b.current_render_cond, rctx->b.current_render_cond_cond, rctx->b.current_render_cond_mode); } }
static void vc4_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot, unsigned count, const struct pipe_vertex_buffer *vb) { struct vc4_context *vc4 = vc4_context(pctx); struct vc4_vertexbuf_stateobj *so = &vc4->vertexbuf; util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, count); so->count = util_last_bit(so->enabled_mask); vc4->dirty |= VC4_DIRTY_VTXBUF; }
static void bind_sampler_states(struct fd_texture_stateobj *tex, unsigned start, unsigned nr, void **hwcso) { unsigned i; for (i = 0; i < nr; i++) { unsigned p = i + start; tex->samplers[p] = hwcso[i]; if (tex->samplers[p]) tex->valid_samplers |= (1 << p); else tex->valid_samplers &= ~(1 << p); } tex->num_samplers = util_last_bit(tex->valid_samplers); }
static void set_sampler_views(struct fd_texture_stateobj *tex, unsigned start, unsigned nr, struct pipe_sampler_view **views) { unsigned i; for (i = 0; i < nr; i++) { struct pipe_sampler_view *view = views ? views[i] : NULL; unsigned p = i + start; pipe_sampler_view_reference(&tex->textures[p], view); if (tex->textures[p]) tex->valid_textures |= (1 << p); else tex->valid_textures &= ~(1 << p); } tex->num_textures = util_last_bit(tex->valid_textures); }
/** * Same as util_set_vertex_buffers_mask, but it only returns the number * of bound buffers. */ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst, unsigned *dst_count, const struct pipe_vertex_buffer *src, unsigned start_slot, unsigned count) { unsigned i; uint32_t enabled_buffers = 0; for (i = 0; i < *dst_count; i++) { if (dst[i].buffer || dst[i].user_buffer) enabled_buffers |= (1ull << i); } util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot, count); *dst_count = util_last_bit(enabled_buffers); }
/* Add any missing varyings needed for stream-out. Otherwise varyings not * used by fragment shader will be stripped out. */ static void link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v) { const struct pipe_stream_output_info *strmout = &v->shader->stream_output; /* * First, any stream-out varyings not already in linkage map (ie. also * consumed by frag shader) need to be added: */ for (unsigned i = 0; i < strmout->num_outputs; i++) { const struct pipe_stream_output *out = &strmout->output[i]; unsigned k = out->register_index; unsigned compmask = (1 << (out->num_components + out->start_component)) - 1; unsigned idx, nextloc = 0; /* psize/pos need to be the last entries in linkage map, and will * get added link_stream_out, so skip over them: */ if ((v->outputs[k].slot == VARYING_SLOT_PSIZ) || (v->outputs[k].slot == VARYING_SLOT_POS)) continue; for (idx = 0; idx < l->cnt; idx++) { if (l->var[idx].regid == v->outputs[k].regid) break; nextloc = MAX2(nextloc, l->var[idx].loc + 4); } /* add if not already in linkage map: */ if (idx == l->cnt) ir3_link_add(l, v->outputs[k].regid, compmask, nextloc); /* expand component-mask if needed, ie streaming out all components * but frag shader doesn't consume all components: */ if (compmask & ~l->var[idx].compmask) { l->var[idx].compmask |= compmask; l->max_loc = MAX2(l->max_loc, l->var[idx].loc + util_last_bit(l->var[idx].compmask)); } } }
static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) { struct si_context *sctx = (struct si_context *)ctx; r600_suspend_nontimer_queries(&sctx->b); util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend); util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa); util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref); util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer); util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader); util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader); util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader); util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements); if (sctx->queued.named.viewport) { util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport->viewport); } util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer); util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets, (struct pipe_stream_output_target**)sctx->b.streamout.targets); if (op & SI_SAVE_FRAMEBUFFER) util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state); if (op & SI_SAVE_TEXTURES) { util_blitter_save_fragment_sampler_states( sctx->blitter, sctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers, (void**)sctx->samplers[PIPE_SHADER_FRAGMENT].samplers); util_blitter_save_fragment_sampler_views(sctx->blitter, util_last_bit(sctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask & ((1 << NUM_TEX_UNITS) - 1)), sctx->samplers[PIPE_SHADER_FRAGMENT].views.views); } if ((op & SI_DISABLE_RENDER_COND) && sctx->b.current_render_cond) { util_blitter_save_render_condition(sctx->blitter, sctx->b.current_render_cond, sctx->b.current_render_cond_cond, sctx->b.current_render_cond_mode); } }
static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) { struct r600_context *rctx = (struct r600_context *)ctx; r600_context_queries_suspend(rctx); util_blitter_save_blend(rctx->blitter, rctx->queued.named.blend); util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->queued.named.dsa); util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); util_blitter_save_rasterizer(rctx->blitter, rctx->queued.named.rasterizer); util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); if (rctx->queued.named.viewport) { util_blitter_save_viewport(rctx->blitter, &rctx->queued.named.viewport->viewport); } util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer); util_blitter_save_so_targets(rctx->blitter, rctx->b.streamout.num_targets, (struct pipe_stream_output_target**)rctx->b.streamout.targets); if (op & R600_SAVE_FRAMEBUFFER) util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); if (op & R600_SAVE_TEXTURES) { util_blitter_save_fragment_sampler_states( rctx->blitter, rctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers, (void**)rctx->samplers[PIPE_SHADER_FRAGMENT].samplers); util_blitter_save_fragment_sampler_views(rctx->blitter, util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask & ((1 << NUM_TEX_UNITS) - 1)), rctx->samplers[PIPE_SHADER_FRAGMENT].views.views); } if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) { rctx->saved_render_cond = rctx->current_render_cond; rctx->saved_render_cond_cond = rctx->current_render_cond_cond; rctx->saved_render_cond_mode = rctx->current_render_cond_mode; rctx->b.b.render_condition(&rctx->b.b, NULL, FALSE, 0); } }
static float etna_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) { struct etna_screen *screen = etna_screen(pscreen); switch (param) { case PIPE_CAPF_MAX_LINE_WIDTH: case PIPE_CAPF_MAX_LINE_WIDTH_AA: case PIPE_CAPF_MAX_POINT_WIDTH: case PIPE_CAPF_MAX_POINT_WIDTH_AA: return 8192.0f; case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: return 16.0f; case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: return util_last_bit(screen->specs.max_texture_size); } debug_printf("unknown paramf %d", param); return 0; }
/** * Write out a batch of 32 control data bits from the control_data_bits * register to the URB. * * The current value of the vertex_count register determines which DWORD in * the URB receives the control data bits. The control_data_bits register is * assumed to contain the correct data for the vertex that was most recently * output, and all previous vertices that share the same DWORD. * * This function takes care of ensuring that if no vertices have been output * yet, no control bits are emitted. */ void vec4_gs_visitor::emit_control_data_bits() { assert(c->control_data_bits_per_vertex != 0); /* Since the URB_WRITE_OWORD message operates with 128-bit (vec4 sized) * granularity, we need to use two tricks to ensure that the batch of 32 * control data bits is written to the appropriate DWORD in the URB. To * select which vec4 we are writing to, we use the "slot {0,1} offset" * fields of the message header. To select which DWORD in the vec4 we are * writing to, we use the channel mask fields of the message header. To * avoid penalizing geometry shaders that emit a small number of vertices * with extra bookkeeping, we only do each of these tricks when * c->prog_data.control_data_header_size_bits is large enough to make it * necessary. * * Note: this means that if we're outputting just a single DWORD of control * data bits, we'll actually replicate it four times since we won't do any * channel masking. But that's not a problem since in this case the * hardware only pays attention to the first DWORD. */ enum brw_urb_write_flags urb_write_flags = BRW_URB_WRITE_OWORD; if (c->control_data_header_size_bits > 32) urb_write_flags = urb_write_flags | BRW_URB_WRITE_USE_CHANNEL_MASKS; if (c->control_data_header_size_bits > 128) urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET; /* If we are using either channel masks or a per-slot offset, then we * need to figure out which DWORD we are trying to write to, using the * formula: * * dword_index = (vertex_count - 1) * bits_per_vertex / 32 * * Since bits_per_vertex is a power of two, and is known at compile * time, this can be optimized to: * * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex)) */ src_reg dword_index(this, glsl_type::uint_type); if (urb_write_flags) { src_reg prev_count(this, glsl_type::uint_type); emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu))); unsigned log2_bits_per_vertex = util_last_bit(c->control_data_bits_per_vertex); emit(SHR(dst_reg(dword_index), prev_count, brw_imm_ud(6 - log2_bits_per_vertex))); } /* Start building the URB write message. The first MRF gets a copy of * R0. */ int base_mrf = 1; dst_reg mrf_reg(MRF, base_mrf); src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); vec4_instruction *inst = emit(MOV(mrf_reg, r0)); inst->force_writemask_all = true; if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) { /* Set the per-slot offset to dword_index / 4, to that we'll write to * the appropriate OWORD within the control data header. */ src_reg per_slot_offset(this, glsl_type::uint_type); emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u))); emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, brw_imm_ud(1u)); } if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) { /* Set the channel masks to 1 << (dword_index % 4), so that we'll * write to the appropriate DWORD within the OWORD. We need to do * this computation with force_writemask_all, otherwise garbage data * from invocation 0 might clobber the mask for invocation 1 when * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks * together. */ src_reg channel(this, glsl_type::uint_type); inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u))); inst->force_writemask_all = true; src_reg one(this, glsl_type::uint_type); inst = emit(MOV(dst_reg(one), brw_imm_ud(1u))); inst->force_writemask_all = true; src_reg channel_mask(this, glsl_type::uint_type); inst = emit(SHL(dst_reg(channel_mask), one, channel)); inst->force_writemask_all = true; emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask), channel_mask); emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask); } /* Store the control data bits in the message payload and send it. */ dst_reg mrf_reg2(MRF, base_mrf + 1); inst = emit(MOV(mrf_reg2, this->control_data_bits)); inst->force_writemask_all = true; inst = emit(GS_OPCODE_URB_WRITE); inst->urb_write_flags = urb_write_flags; /* We need to increment Global Offset by 256-bits to make room for * Broadwell's extra "Vertex Count" payload at the beginning of the * URB entry. Since this is an OWord message, Global Offset is counted * in 128-bit units, so we must set it to 2. */ if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1) inst->offset = 2; inst->base_mrf = base_mrf; inst->mlen = 2; }
void fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd5_emit *emit) { struct stage s[MAX_STAGES]; uint32_t pos_regid, psize_regid, color_regid[8]; uint32_t face_regid, coord_regid, zwcoord_regid; uint32_t vcoord_regid, vertex_regid, instance_regid; enum a3xx_threadsize fssz; uint8_t psize_loc = ~0; int i, j; setup_stages(emit, s); fssz = (s[FS].i->max_reg >= 24) ? TWO_QUADS : FOUR_QUADS; pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ); vertex_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); instance_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID); if (s[FS].v->color0_mrt) { color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR); } else { color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0); color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1); color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2); color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3); color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4); color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5); color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6); color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); vcoord_regid = (s[FS].v->total_in > 0) ? s[FS].v->pos_regid : regid(63,0); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONFIG, 5); OUT_RING(ring, A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) | A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) | COND(s[VS].v, A5XX_HLSQ_VS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) | A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) | COND(s[FS].v, A5XX_HLSQ_FS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) | A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) | COND(s[HS].v, A5XX_HLSQ_HS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) | A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) | COND(s[DS].v, A5XX_HLSQ_DS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) | A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) | COND(s[GS].v, A5XX_HLSQ_GS_CONFIG_ENABLED)); OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5); OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) | COND(s[VS].v && s[VS].v->has_ssbo, A5XX_HLSQ_VS_CNTL_SSBO_ENABLE)); OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) | COND(s[FS].v && s[FS].v->has_ssbo, A5XX_HLSQ_FS_CNTL_SSBO_ENABLE)); OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) | COND(s[HS].v && s[HS].v->has_ssbo, A5XX_HLSQ_HS_CNTL_SSBO_ENABLE)); OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) | COND(s[DS].v && s[DS].v->has_ssbo, A5XX_HLSQ_DS_CNTL_SSBO_ENABLE)); OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) | COND(s[GS].v && s[GS].v->has_ssbo, A5XX_HLSQ_GS_CNTL_SSBO_ENABLE)); OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5); OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) | A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) | COND(s[VS].v, A5XX_SP_VS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) | A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) | COND(s[FS].v, A5XX_SP_FS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) | A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) | COND(s[HS].v, A5XX_SP_HS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) | A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) | COND(s[DS].v, A5XX_SP_DS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) | A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) | COND(s[GS].v, A5XX_SP_GS_CONFIG_ENABLED)); OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONSTLEN, 2); OUT_RING(ring, s[VS].constlen); /* HLSQ_VS_CONSTLEN */ OUT_RING(ring, s[VS].instrlen); /* HLSQ_VS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_FS_CONSTLEN, 2); OUT_RING(ring, s[FS].constlen); /* HLSQ_FS_CONSTLEN */ OUT_RING(ring, s[FS].instrlen); /* HLSQ_FS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_HS_CONSTLEN, 2); OUT_RING(ring, s[HS].constlen); /* HLSQ_HS_CONSTLEN */ OUT_RING(ring, s[HS].instrlen); /* HLSQ_HS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_DS_CONSTLEN, 2); OUT_RING(ring, s[DS].constlen); /* HLSQ_DS_CONSTLEN */ OUT_RING(ring, s[DS].instrlen); /* HLSQ_DS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_GS_CONSTLEN, 2); OUT_RING(ring, s[GS].constlen); /* HLSQ_GS_CONSTLEN */ OUT_RING(ring, s[GS].instrlen); /* HLSQ_GS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2); OUT_RING(ring, 0x00000000); /* HLSQ_CS_CONSTLEN */ OUT_RING(ring, 0x00000000); /* HLSQ_CS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1); OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | 0x6 | /* XXX seems to be always set? */ A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. COND(s[VS].v->has_samp, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; ir3_link_shaders(&l, s[VS].v, s[FS].v); if ((s[VS].v->shader->stream_output.num_outputs > 0) && !emit->key.binning_pass) link_stream_out(&l, s[VS].v); BITSET_DECLARE(varbs, 128) = {0}; uint32_t *varmask = (uint32_t *)varbs; for (i = 0; i < l.cnt; i++) for (j = 0; j < util_last_bit(l.var[i].compmask); j++) BITSET_SET(varbs, l.var[i].loc + j); OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4); OUT_RING(ring, ~varmask[0]); /* VPC_VAR[0].DISABLE */ OUT_RING(ring, ~varmask[1]); /* VPC_VAR[1].DISABLE */ OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */ OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */ /* a5xx appends pos/psize to end of the linkage map: */ if (pos_regid != regid(63,0)) ir3_link_add(&l, pos_regid, 0xf, l.max_loc); if (psize_regid != regid(63,0)) { psize_loc = l.max_loc; ir3_link_add(&l, psize_regid, 0x1, l.max_loc); } if ((s[VS].v->shader->stream_output.num_outputs > 0) && !emit->key.binning_pass) { emit_stream_out(ring, s[VS].v, &l); OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); OUT_RING(ring, 0x00000000); } else { OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE); } for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT4(ring, REG_A5XX_SP_VS_OUT_REG(i), 1); reg |= A5XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); reg |= A5XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); j++; reg |= A5XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); reg |= A5XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); j++; OUT_RING(ring, reg); } for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT4(ring, REG_A5XX_SP_VS_VPC_DST_REG(i), 1); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc); OUT_RING(ring, reg); } OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2); OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */ if (s[VS].instrlen) fd5_emit_shader(ring, s[VS].v); // TODO depending on other bits in this reg (if any) set somewhere else? OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1); OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE)); OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1); OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt)); OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1); OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(l.max_loc) | COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) | COND(s[FS].v->frag_coord, A5XX_VPC_CNTL_0_VARYING) | 0x10000); // XXX fd5_context(ctx)->max_loc = l.max_loc; if (emit->key.binning_pass) { OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2); OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */ OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */ } else { OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2); OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_LO/HI */ } OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5); OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) | A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(TWO_QUADS) | 0x00000880); /* XXX HLSQ_CONTROL_0 */ OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63)); OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) | 0xfcfcfc00); /* XXX */ OUT_RING(ring, A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(vcoord_regid) | 0xfcfcfc00); /* XXX */ OUT_RING(ring, A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) | A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) | 0x0000fcfc); /* XXX */ OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) | COND(s[FS].v->frag_coord, A5XX_SP_FS_CTRL_REG0_VARYING) | 0x40006 | /* XXX set pretty much everywhere */ A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. COND(s[FS].v->has_samp, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); OUT_RING(ring, 0x020fffff); /* XXX */ OUT_PKT4(ring, REG_A5XX_VPC_GS_SIV_CNTL, 1); OUT_RING(ring, 0x0000ffff); /* XXX */ OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1); OUT_RING(ring, 0x00000010); /* XXX */ OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_GRAS_CNTL_VARYING) | COND(s[FS].v->frag_coord, A5XX_GRAS_CNTL_XCOORD | A5XX_GRAS_CNTL_YCOORD | A5XX_GRAS_CNTL_ZCOORD | A5XX_GRAS_CNTL_WCOORD | A5XX_GRAS_CNTL_UNK3) | COND(s[FS].v->frag_face, A5XX_GRAS_CNTL_UNK3)); OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 2); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_RB_RENDER_CONTROL0_VARYING) | COND(s[FS].v->frag_coord, A5XX_RB_RENDER_CONTROL0_XCOORD | A5XX_RB_RENDER_CONTROL0_YCOORD | A5XX_RB_RENDER_CONTROL0_ZCOORD | A5XX_RB_RENDER_CONTROL0_WCOORD | A5XX_RB_RENDER_CONTROL0_UNK3) | COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL0_UNK3)); OUT_RING(ring, COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL1_FACENESS)); OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_REG(0), 8); for (i = 0; i < 8; i++) { OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) | COND(emit->key.half_precision, A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION)); } OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1); OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) | A5XX_VPC_PACK_PSIZELOC(psize_loc)); if (!emit->key.binning_pass) { uint32_t vinterp[8], vpsrepl[8]; memset(vinterp, 0, sizeof(vinterp)); memset(vpsrepl, 0, sizeof(vpsrepl)); /* looks like we need to do int varyings in the frag * shader on a5xx (no flatshad reg? or a420.0 bug?): * * (sy)(ss)nop * (sy)ldlv.u32 r0.x,l[r0.x], 1 * ldlv.u32 r0.y,l[r0.x+1], 1 * (ss)bary.f (ei)r63.x, 0, r0.x * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x * (rpt5)nop * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0 * * Possibly on later a5xx variants we'll be able to use * something like the code below instead of workaround * in the shader: */ /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */ for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) { /* NOTE: varyings are packed, so if compmask is 0xb * then first, third, and fourth component occupy * three consecutive varying slots: */ unsigned compmask = s[FS].v->inputs[j].compmask; uint32_t inloc = s[FS].v->inputs[j].inloc; if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) || (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { uint32_t loc = inloc; for (i = 0; i < 4; i++) { if (compmask & (1 << i)) { vinterp[loc / 16] |= 1 << ((loc % 16) * 2); //flatshade[loc / 32] |= 1 << (loc % 32); loc++; } } } gl_varying_slot slot = s[FS].v->inputs[j].slot; /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ if (slot >= VARYING_SLOT_VAR0) { unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); /* Replace the .xy coordinates with S/T from the point sprite. Set * interpolation bits for .zw such that they become .01 */ if (emit->sprite_coord_enable & texmask) { /* mask is two 2-bit fields, where: * '01' -> S * '10' -> T * '11' -> 1 - T (flip mode) */ unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001; uint32_t loc = inloc; if (compmask & 0x1) { vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); loc++; } if (compmask & 0x2) { vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); loc++; }
static bool ps_get_gen6_ff_kernels(const struct ilo_dev *dev, const struct ilo_state_ps_info *info, struct pixel_ff *ff) { const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8; const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16; const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32; uint32_t scratch_size; ILO_DEV_ASSERT(dev, 6, 8); ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info); /* initialize kernel offsets and GRF starts */ if (util_is_power_of_two(ff->dispatch_modes)) { if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) { ff->kernel_offsets[0] = kernel_8->offset; ff->grf_starts[0] = kernel_8->grf_start; } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) { ff->kernel_offsets[0] = kernel_16->offset; ff->grf_starts[0] = kernel_16->grf_start; } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) { ff->kernel_offsets[0] = kernel_32->offset; ff->grf_starts[0] = kernel_32->grf_start; } } else { ff->kernel_offsets[0] = kernel_8->offset; ff->kernel_offsets[1] = kernel_32->offset; ff->kernel_offsets[2] = kernel_16->offset; ff->grf_starts[0] = kernel_8->grf_start; ff->grf_starts[1] = kernel_32->grf_start; ff->grf_starts[2] = kernel_16->grf_start; } /* we do not want to save it */ assert(ff->kernel_offsets[0] == 0); ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && kernel_8->pcb_attr_count) || ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && kernel_16->pcb_attr_count) || ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && kernel_32->pcb_attr_count)); scratch_size = 0; if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && scratch_size < kernel_8->scratch_size) scratch_size = kernel_8->scratch_size; if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && scratch_size < kernel_16->scratch_size) scratch_size = kernel_16->scratch_size; if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && scratch_size < kernel_32->scratch_size) scratch_size = kernel_32->scratch_size; /* next power of two, starting from 1KB */ ff->scratch_space = (scratch_size > 1024) ? (util_last_bit(scratch_size - 1) - 10): 0; /* GPU hangs on Haswell if none of the dispatch mode bits is set */ if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes) ff->dispatch_modes |= GEN6_PS_DISPATCH_8; return true; }
static int etna_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { struct etna_screen *screen = etna_screen(pscreen); switch (param) { /* Supported features (boolean caps). */ case PIPE_CAP_TWO_SIDED_STENCIL: case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_POINT_SPRITE: case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: case PIPE_CAP_SM3: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_USER_CONSTANT_BUFFERS: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: return 1; /* Memory */ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return 4; /* XXX could easily be supported */ case PIPE_CAP_GLSL_FEATURE_LEVEL: return 120; case PIPE_CAP_NPOT_TEXTURES: return true; /* VIV_FEATURE(priv->dev, chipMinorFeatures1, NON_POWER_OF_TWO); */ case PIPE_CAP_PRIMITIVE_RESTART: return VIV_FEATURE(screen, chipMinorFeatures1, HALTI0); case PIPE_CAP_ENDIANNESS: return PIPE_ENDIAN_LITTLE; /* on most Viv hw this is configurable (feature ENDIANNESS_CONFIG) */ /* Unsupported features. */ case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_TEXTURE_SWIZZLE: /* XXX supported on gc2000 */ case PIPE_CAP_COMPUTE: /* XXX supported on gc2000 */ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: /* only one colorbuffer supported, so mixing makes no sense */ case PIPE_CAP_CONDITIONAL_RENDER: /* no occlusion queries */ case PIPE_CAP_TGSI_INSTANCEID: /* no idea, really */ case PIPE_CAP_START_INSTANCE: /* instancing not supported AFAIK */ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: /* instancing not supported AFAIK */ case PIPE_CAP_SHADER_STENCIL_EXPORT: /* Fragment shader cannot export stencil value */ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: /* no dual-source supported */ case PIPE_CAP_TEXTURE_MULTISAMPLE: /* no texture multisample */ case PIPE_CAP_TEXTURE_MIRROR_CLAMP: /* only mirrored repeat */ case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_DEPTH_CLIP_DISABLE: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: /* Don't skip strict max uniform limit check */ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_USER_INDEX_BUFFERS: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: /* TODO: test me out with piglit */ case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_CLIP_HALFZ: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_CLEAR_TEXTURE: case PIPE_CAP_DRAW_PARAMETERS: case PIPE_CAP_TGSI_PACK_HALF_FLOAT: case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: case PIPE_CAP_QUERY_BUFFER_OBJECT: case PIPE_CAP_QUERY_MEMORY_INFO: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: case PIPE_CAP_CULL_DISTANCE: case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: case PIPE_CAP_TGSI_VOTE: case PIPE_CAP_MAX_WINDOW_RECTANGLES: case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: case PIPE_CAP_TGSI_ARRAY_COMPONENTS: case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: case PIPE_CAP_NATIVE_FENCE_FD: case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: case PIPE_CAP_TGSI_FS_FBFETCH: case PIPE_CAP_TGSI_MUL_ZERO_WINS: case PIPE_CAP_DOUBLES: case PIPE_CAP_INT64: case PIPE_CAP_INT64_DIVMOD: return 0; /* Stream output. */ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: return 0; /* Geometry shader output, unsupported. */ case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: case PIPE_CAP_MAX_VERTEX_STREAMS: return 0; case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 128; /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: { int log2_max_tex_size = util_last_bit(screen->specs.max_texture_size); assert(log2_max_tex_size > 0); return log2_max_tex_size; } case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: /* 3D textures not supported - fake it */ return 5; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: return 0; case PIPE_CAP_CUBE_MAP_ARRAY: return 0; case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MIN_TEXEL_OFFSET: return -8; case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MAX_TEXEL_OFFSET: return 7; case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: return 0; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: return 65536; /* Render targets. */ case PIPE_CAP_MAX_RENDER_TARGETS: return 1; /* Viewports and scissors. */ case PIPE_CAP_MAX_VIEWPORTS: return 1; /* Timer queries. */ case PIPE_CAP_QUERY_TIME_ELAPSED: case PIPE_CAP_OCCLUSION_QUERY: return 0; case PIPE_CAP_QUERY_TIMESTAMP: return 1; case PIPE_CAP_QUERY_PIPELINE_STATISTICS: return 0; /* Preferences */ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return 0; case PIPE_CAP_PCI_GROUP: case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: return 0; case PIPE_CAP_VENDOR_ID: case PIPE_CAP_DEVICE_ID: return 0xFFFFFFFF; case PIPE_CAP_ACCELERATED: return 1; case PIPE_CAP_VIDEO_MEMORY: return 0; case PIPE_CAP_UMA: return 1; } debug_printf("unknown param %d", param); return 0; }