Ejemplo n.º 1
0
static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op)
{
    struct r600_context *rctx = (struct r600_context *)ctx;

    r600_suspend_nontimer_queries(rctx);

    util_blitter_save_vertex_buffers(rctx->blitter,
                                     util_last_bit(rctx->vertex_buffer_state.enabled_mask),
                                     rctx->vertex_buffer_state.vb);
    util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements);
    util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
    util_blitter_save_so_targets(rctx->blitter, rctx->num_so_targets,
                                 (struct pipe_stream_output_target**)rctx->so_targets);
    util_blitter_save_rasterizer(rctx->blitter, rctx->states[R600_PIPE_STATE_RASTERIZER]);

    if (op & R600_SAVE_FRAGMENT_STATE) {
        if (rctx->states[R600_PIPE_STATE_VIEWPORT]) {
            util_blitter_save_viewport(rctx->blitter, &rctx->viewport);
        }
        util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
        util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]);
        util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]);
        if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) {
            util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref);
        }
        util_blitter_save_sample_mask(rctx->blitter, rctx->sample_mask.sample_mask);
    }

    if (op & R600_SAVE_FRAMEBUFFER)
        util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer);

    if (op & R600_SAVE_TEXTURES) {
        util_blitter_save_fragment_sampler_states(
            rctx->blitter, rctx->ps_samplers.n_samplers,
            (void**)rctx->ps_samplers.samplers);

        util_blitter_save_fragment_sampler_views(
            rctx->blitter, util_last_bit(rctx->ps_samplers.views.enabled_mask),
            (struct pipe_sampler_view**)rctx->ps_samplers.views.views);
    }

    if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) {
        rctx->saved_render_cond = rctx->current_render_cond;
        rctx->saved_render_cond_mode = rctx->current_render_cond_mode;
        rctx->context.render_condition(&rctx->context, NULL, 0);
    }

}
Ejemplo n.º 2
0
static void
fd_set_vertex_buffers(struct pipe_context *pctx,
		unsigned start_slot, unsigned count,
		const struct pipe_vertex_buffer *vb)
{
	struct fd_context *ctx = fd_context(pctx);
	struct fd_vertexbuf_stateobj *so = &ctx->vertexbuf;
	int i;

	/* on a2xx, pitch is encoded in the vtx fetch instruction, so
	 * we need to mark VTXSTATE as dirty as well to trigger patching
	 * and re-emitting the vtx shader:
	 */
	for (i = 0; i < count; i++) {
		bool new_enabled = vb && (vb[i].buffer || vb[i].user_buffer);
		bool old_enabled = so->vb[i].buffer || so->vb[i].user_buffer;
		uint32_t new_stride = vb ? vb[i].stride : 0;
		uint32_t old_stride = so->vb[i].stride;
		if ((new_enabled != old_enabled) || (new_stride != old_stride)) {
			ctx->dirty |= FD_DIRTY_VTXSTATE;
			break;
		}
	}

	util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, count);
	so->count = util_last_bit(so->enabled_mask);

	ctx->dirty |= FD_DIRTY_VTXBUF;
}
Ejemplo n.º 3
0
void
nv50_fp_linkage_validate(struct nv50_context *nv50)
{
   struct nouveau_pushbuf *push = nv50->base.pushbuf;
   struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog;
   struct nv50_program *fp = nv50->fragprog;
   struct nv50_varying dummy;
   int i, n, c, m;
   uint32_t primid = 0;
   uint32_t layerid = 0;
   uint32_t viewportid = 0;
   uint32_t psiz = 0x000;
   uint32_t interp = fp->fp.interp;
   uint32_t colors = fp->fp.colors;
   uint32_t clpd_nr = util_last_bit(vp->vp.clip_enable | vp->vp.cull_enable);
   uint32_t lin[4];
   uint8_t map[64];
   uint8_t so_map[64];

   if (!(nv50->dirty_3d & (NV50_NEW_3D_VERTPROG |
                           NV50_NEW_3D_FRAGPROG |
                           NV50_NEW_3D_GMTYPROG))) {
      uint8_t bfc, ffc;
      ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK);
      bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK)
         >> 8;
      if (nv50->rast->pipe.light_twoside == ((ffc == bfc) ? 0 : 1))
         return;
   }
Ejemplo n.º 4
0
static void
finalize_global_binding(struct ilo_state_vector *vec)
{
   struct ilo_shader_state *cs = vec->cs;
   int base, count, shift;
   int i;

   count = ilo_shader_get_kernel_param(cs,
         ILO_KERNEL_CS_SURFACE_GLOBAL_COUNT);
   if (!count)
      return;

   base = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_SURFACE_GLOBAL_BASE);
   shift = 32 - util_last_bit(base + count - 1);

   if (count > vec->global_binding.count)
      count = vec->global_binding.count;

   for (i = 0; i < count; i++) {
      struct ilo_global_binding_cso *cso =
         util_dynarray_element(&vec->global_binding.bindings,
               struct ilo_global_binding_cso, i);
      const uint32_t offset = *cso->handle & ((1 << shift) - 1);

      *cso->handle = ((base + i) << shift) | offset;
   }
}
Ejemplo n.º 5
0
static void
etna_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot,
      unsigned num_buffers, const struct pipe_vertex_buffer *vb)
{
   struct etna_context *ctx = etna_context(pctx);
   struct etna_vertexbuf_state *so = &ctx->vertex_buffer;

   util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, num_buffers);
   so->count = util_last_bit(so->enabled_mask);

   for (unsigned idx = start_slot; idx < start_slot + num_buffers; ++idx) {
      struct compiled_set_vertex_buffer *cs = &so->cvb[idx];
      struct pipe_vertex_buffer *vbi = &so->vb[idx];

      assert(!vbi->is_user_buffer); /* XXX support user_buffer using
                                       etna_usermem_map */

      if (vbi->buffer.resource) { /* GPU buffer */
         cs->FE_VERTEX_STREAM_BASE_ADDR.bo = etna_resource(vbi->buffer.resource)->bo;
         cs->FE_VERTEX_STREAM_BASE_ADDR.offset = vbi->buffer_offset;
         cs->FE_VERTEX_STREAM_BASE_ADDR.flags = ETNA_RELOC_READ;
         cs->FE_VERTEX_STREAM_CONTROL =
            FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE(vbi->stride);
      } else {
         cs->FE_VERTEX_STREAM_BASE_ADDR.bo = NULL;
         cs->FE_VERTEX_STREAM_CONTROL = 0;
      }
   }

   ctx->dirty |= ETNA_DIRTY_VERTEX_BUFFERS;
}
Ejemplo n.º 6
0
/* for vertex shader, the inputs are loaded into registers before the shader
 * is executed, so max_regs from the shader instructions might not properly
 * reflect the # of registers actually used, especially in case passthrough
 * varyings.
 *
 * Likewise, for fragment shader, we can have some regs which are passed
 * input values but never touched by the resulting shader (ie. as result
 * of dead code elimination or simply because we don't know how to turn
 * the reg off.
 */
static void
fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id)
{
	unsigned i;

	for (i = 0; i < v->inputs_count; i++) {
		/* skip frag inputs fetch via bary.f since their reg's are
		 * not written by gpu before shader starts (and in fact the
		 * regid's might not even be valid)
		 */
		if (v->inputs[i].bary)
			continue;

		/* ignore high regs that are global to all threads in a warp
		 * (they exist by default) (a5xx+)
		 */
		if (v->inputs[i].regid >= regid(48,0))
			continue;

		if (v->inputs[i].compmask) {
			unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
			int32_t regid = v->inputs[i].regid + n;
			if (v->inputs[i].half) {
				if (gpu_id < 500) {
					v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
				} else {
					v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
				}
			} else {
Ejemplo n.º 7
0
void
brw_upload_cs_prog(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   struct brw_cs_prog_key key;
   struct brw_program *cp = (struct brw_program *) brw->compute_program;

   if (!cp)
      return;

   if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM))
      return;

   brw->cs.base.sampler_count =
      util_last_bit(ctx->ComputeProgram._Current->SamplersUsed);

   brw_cs_populate_key(brw, &key);

   if (!brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG,
                         &key, sizeof(key),
                         &brw->cs.base.prog_offset,
                         &brw->cs.base.prog_data)) {
      bool success =
         brw_codegen_cs_prog(brw,
                             ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE],
                             cp, &key);
      (void) success;
      assert(success);
   }
}
Ejemplo n.º 8
0
static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op)
{
	struct r600_context *rctx = (struct r600_context *)ctx;

	r600_suspend_nontimer_queries(&rctx->b);

	util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb);
	util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso);
	util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
	util_blitter_save_geometry_shader(rctx->blitter, rctx->gs_shader);
	util_blitter_save_so_targets(rctx->blitter, rctx->b.streamout.num_targets,
				     (struct pipe_stream_output_target**)rctx->b.streamout.targets);
	util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso);

	if (op & R600_SAVE_FRAGMENT_STATE) {
		util_blitter_save_viewport(rctx->blitter, &rctx->viewport[0].state);
		util_blitter_save_scissor(rctx->blitter, &rctx->scissor[0].scissor);
		util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
		util_blitter_save_blend(rctx->blitter, rctx->blend_state.cso);
		util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa_state.cso);
		util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref.pipe_state);
                util_blitter_save_sample_mask(rctx->blitter, rctx->sample_mask.sample_mask);
	}

	if (op & R600_SAVE_FRAMEBUFFER)
		util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer.state);

	if (op & R600_SAVE_TEXTURES) {
		util_blitter_save_fragment_sampler_states(
			rctx->blitter, util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].states.enabled_mask),
			(void**)rctx->samplers[PIPE_SHADER_FRAGMENT].states.states);

		util_blitter_save_fragment_sampler_views(
			rctx->blitter, util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].views.enabled_mask),
			(struct pipe_sampler_view**)rctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
	}

	if ((op & R600_DISABLE_RENDER_COND) && rctx->b.current_render_cond) {
           util_blitter_save_render_condition(rctx->blitter,
                                              rctx->b.current_render_cond,
                                              rctx->b.current_render_cond_cond,
                                              rctx->b.current_render_cond_mode);
        }
}
Ejemplo n.º 9
0
static void
vc4_set_vertex_buffers(struct pipe_context *pctx,
                       unsigned start_slot, unsigned count,
                       const struct pipe_vertex_buffer *vb)
{
        struct vc4_context *vc4 = vc4_context(pctx);
        struct vc4_vertexbuf_stateobj *so = &vc4->vertexbuf;

        util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb,
                                     start_slot, count);
        so->count = util_last_bit(so->enabled_mask);

        vc4->dirty |= VC4_DIRTY_VTXBUF;
}
Ejemplo n.º 10
0
static void bind_sampler_states(struct fd_texture_stateobj *tex,
		unsigned start, unsigned nr, void **hwcso)
{
	unsigned i;

	for (i = 0; i < nr; i++) {
		unsigned p = i + start;
		tex->samplers[p] = hwcso[i];
		if (tex->samplers[p])
			tex->valid_samplers |= (1 << p);
		else
			tex->valid_samplers &= ~(1 << p);
	}

	tex->num_samplers = util_last_bit(tex->valid_samplers);
}
Ejemplo n.º 11
0
static void set_sampler_views(struct fd_texture_stateobj *tex,
		unsigned start, unsigned nr, struct pipe_sampler_view **views)
{
	unsigned i;

	for (i = 0; i < nr; i++) {
		struct pipe_sampler_view *view = views ? views[i] : NULL;
		unsigned p = i + start;
		pipe_sampler_view_reference(&tex->textures[p], view);
		if (tex->textures[p])
			tex->valid_textures |= (1 << p);
		else
			tex->valid_textures &= ~(1 << p);
	}

	tex->num_textures = util_last_bit(tex->valid_textures);
}
Ejemplo n.º 12
0
/**
 * Same as util_set_vertex_buffers_mask, but it only returns the number
 * of bound buffers.
 */
void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
                                   unsigned *dst_count,
                                   const struct pipe_vertex_buffer *src,
                                   unsigned start_slot, unsigned count)
{
   unsigned i;
   uint32_t enabled_buffers = 0;

   for (i = 0; i < *dst_count; i++) {
      if (dst[i].buffer || dst[i].user_buffer)
         enabled_buffers |= (1ull << i);
   }

   util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot,
                                count);

   *dst_count = util_last_bit(enabled_buffers);
}
Ejemplo n.º 13
0
/* Add any missing varyings needed for stream-out.  Otherwise varyings not
 * used by fragment shader will be stripped out.
 */
static void
link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v)
{
	const struct pipe_stream_output_info *strmout = &v->shader->stream_output;

	/*
	 * First, any stream-out varyings not already in linkage map (ie. also
	 * consumed by frag shader) need to be added:
	 */
	for (unsigned i = 0; i < strmout->num_outputs; i++) {
		const struct pipe_stream_output *out = &strmout->output[i];
		unsigned k = out->register_index;
		unsigned compmask =
			(1 << (out->num_components + out->start_component)) - 1;
		unsigned idx, nextloc = 0;

		/* psize/pos need to be the last entries in linkage map, and will
		 * get added link_stream_out, so skip over them:
		 */
		if ((v->outputs[k].slot == VARYING_SLOT_PSIZ) ||
				(v->outputs[k].slot == VARYING_SLOT_POS))
			continue;

		for (idx = 0; idx < l->cnt; idx++) {
			if (l->var[idx].regid == v->outputs[k].regid)
				break;
			nextloc = MAX2(nextloc, l->var[idx].loc + 4);
		}

		/* add if not already in linkage map: */
		if (idx == l->cnt)
			ir3_link_add(l, v->outputs[k].regid, compmask, nextloc);

		/* expand component-mask if needed, ie streaming out all components
		 * but frag shader doesn't consume all components:
		 */
		if (compmask & ~l->var[idx].compmask) {
			l->var[idx].compmask |= compmask;
			l->max_loc = MAX2(l->max_loc,
				l->var[idx].loc + util_last_bit(l->var[idx].compmask));
		}
	}
}
Ejemplo n.º 14
0
static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
{
	struct si_context *sctx = (struct si_context *)ctx;

	r600_suspend_nontimer_queries(&sctx->b);

	util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
	util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
	util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref);
	util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
	util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader);
	util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader);
	util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader);
	util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
	if (sctx->queued.named.viewport) {
		util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport->viewport);
	}
	util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
	util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
				     (struct pipe_stream_output_target**)sctx->b.streamout.targets);

	if (op & SI_SAVE_FRAMEBUFFER)
		util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state);

	if (op & SI_SAVE_TEXTURES) {
		util_blitter_save_fragment_sampler_states(
			sctx->blitter, sctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers,
			(void**)sctx->samplers[PIPE_SHADER_FRAGMENT].samplers);

		util_blitter_save_fragment_sampler_views(sctx->blitter,
			util_last_bit(sctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask &
				      ((1 << NUM_TEX_UNITS) - 1)),
			sctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
	}

	if ((op & SI_DISABLE_RENDER_COND) && sctx->b.current_render_cond) {
		util_blitter_save_render_condition(sctx->blitter,
                                                   sctx->b.current_render_cond,
                                                   sctx->b.current_render_cond_cond,
                                                   sctx->b.current_render_cond_mode);
	}
}
Ejemplo n.º 15
0
static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op)
{
	struct r600_context *rctx = (struct r600_context *)ctx;

	r600_context_queries_suspend(rctx);

	util_blitter_save_blend(rctx->blitter, rctx->queued.named.blend);
	util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->queued.named.dsa);
	util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref);
	util_blitter_save_rasterizer(rctx->blitter, rctx->queued.named.rasterizer);
	util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
	util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
	util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements);
	if (rctx->queued.named.viewport) {
		util_blitter_save_viewport(rctx->blitter, &rctx->queued.named.viewport->viewport);
	}
	util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer);
	util_blitter_save_so_targets(rctx->blitter, rctx->b.streamout.num_targets,
				     (struct pipe_stream_output_target**)rctx->b.streamout.targets);

	if (op & R600_SAVE_FRAMEBUFFER)
		util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer);

	if (op & R600_SAVE_TEXTURES) {
		util_blitter_save_fragment_sampler_states(
			rctx->blitter, rctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers,
			(void**)rctx->samplers[PIPE_SHADER_FRAGMENT].samplers);

		util_blitter_save_fragment_sampler_views(rctx->blitter,
			util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask &
				      ((1 << NUM_TEX_UNITS) - 1)),
			rctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
	}

	if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) {
		rctx->saved_render_cond = rctx->current_render_cond;
		rctx->saved_render_cond_cond = rctx->current_render_cond_cond;
		rctx->saved_render_cond_mode = rctx->current_render_cond_mode;
		rctx->b.b.render_condition(&rctx->b.b, NULL, FALSE, 0);
	}

}
Ejemplo n.º 16
0
static float
etna_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
{
   struct etna_screen *screen = etna_screen(pscreen);

   switch (param) {
   case PIPE_CAPF_MAX_LINE_WIDTH:
   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
   case PIPE_CAPF_MAX_POINT_WIDTH:
   case PIPE_CAPF_MAX_POINT_WIDTH_AA:
      return 8192.0f;
   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
      return 16.0f;
   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
      return util_last_bit(screen->specs.max_texture_size);
   }

   debug_printf("unknown paramf %d", param);
   return 0;
}
Ejemplo n.º 17
0
/**
 * Write out a batch of 32 control data bits from the control_data_bits
 * register to the URB.
 *
 * The current value of the vertex_count register determines which DWORD in
 * the URB receives the control data bits.  The control_data_bits register is
 * assumed to contain the correct data for the vertex that was most recently
 * output, and all previous vertices that share the same DWORD.
 *
 * This function takes care of ensuring that if no vertices have been output
 * yet, no control bits are emitted.
 */
void
vec4_gs_visitor::emit_control_data_bits()
{
    assert(c->control_data_bits_per_vertex != 0);

    /* Since the URB_WRITE_OWORD message operates with 128-bit (vec4 sized)
     * granularity, we need to use two tricks to ensure that the batch of 32
     * control data bits is written to the appropriate DWORD in the URB.  To
     * select which vec4 we are writing to, we use the "slot {0,1} offset"
     * fields of the message header.  To select which DWORD in the vec4 we are
     * writing to, we use the channel mask fields of the message header.  To
     * avoid penalizing geometry shaders that emit a small number of vertices
     * with extra bookkeeping, we only do each of these tricks when
     * c->prog_data.control_data_header_size_bits is large enough to make it
     * necessary.
     *
     * Note: this means that if we're outputting just a single DWORD of control
     * data bits, we'll actually replicate it four times since we won't do any
     * channel masking.  But that's not a problem since in this case the
     * hardware only pays attention to the first DWORD.
     */
    enum brw_urb_write_flags urb_write_flags = BRW_URB_WRITE_OWORD;
    if (c->control_data_header_size_bits > 32)
        urb_write_flags = urb_write_flags | BRW_URB_WRITE_USE_CHANNEL_MASKS;
    if (c->control_data_header_size_bits > 128)
        urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET;

    /* If we are using either channel masks or a per-slot offset, then we
     * need to figure out which DWORD we are trying to write to, using the
     * formula:
     *
     *     dword_index = (vertex_count - 1) * bits_per_vertex / 32
     *
     * Since bits_per_vertex is a power of two, and is known at compile
     * time, this can be optimized to:
     *
     *     dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
     */
    src_reg dword_index(this, glsl_type::uint_type);
    if (urb_write_flags) {
        src_reg prev_count(this, glsl_type::uint_type);
        emit(ADD(dst_reg(prev_count), this->vertex_count,
                 brw_imm_ud(0xffffffffu)));
        unsigned log2_bits_per_vertex =
            util_last_bit(c->control_data_bits_per_vertex);
        emit(SHR(dst_reg(dword_index), prev_count,
                 brw_imm_ud(6 - log2_bits_per_vertex)));
    }

    /* Start building the URB write message.  The first MRF gets a copy of
     * R0.
     */
    int base_mrf = 1;
    dst_reg mrf_reg(MRF, base_mrf);
    src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
    vec4_instruction *inst = emit(MOV(mrf_reg, r0));
    inst->force_writemask_all = true;

    if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) {
        /* Set the per-slot offset to dword_index / 4, to that we'll write to
         * the appropriate OWORD within the control data header.
         */
        src_reg per_slot_offset(this, glsl_type::uint_type);
        emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u)));
        emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset,
             brw_imm_ud(1u));
    }

    if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
        /* Set the channel masks to 1 << (dword_index % 4), so that we'll
         * write to the appropriate DWORD within the OWORD.  We need to do
         * this computation with force_writemask_all, otherwise garbage data
         * from invocation 0 might clobber the mask for invocation 1 when
         * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks
         * together.
         */
        src_reg channel(this, glsl_type::uint_type);
        inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u)));
        inst->force_writemask_all = true;
        src_reg one(this, glsl_type::uint_type);
        inst = emit(MOV(dst_reg(one), brw_imm_ud(1u)));
        inst->force_writemask_all = true;
        src_reg channel_mask(this, glsl_type::uint_type);
        inst = emit(SHL(dst_reg(channel_mask), one, channel));
        inst->force_writemask_all = true;
        emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask),
             channel_mask);
        emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask);
    }

    /* Store the control data bits in the message payload and send it. */
    dst_reg mrf_reg2(MRF, base_mrf + 1);
    inst = emit(MOV(mrf_reg2, this->control_data_bits));
    inst->force_writemask_all = true;
    inst = emit(GS_OPCODE_URB_WRITE);
    inst->urb_write_flags = urb_write_flags;
    /* We need to increment Global Offset by 256-bits to make room for
     * Broadwell's extra "Vertex Count" payload at the beginning of the
     * URB entry.  Since this is an OWord message, Global Offset is counted
     * in 128-bit units, so we must set it to 2.
     */
    if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
        inst->offset = 2;
    inst->base_mrf = base_mrf;
    inst->mlen = 2;
}
Ejemplo n.º 18
0
void
fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
				 struct fd5_emit *emit)
{
	struct stage s[MAX_STAGES];
	uint32_t pos_regid, psize_regid, color_regid[8];
	uint32_t face_regid, coord_regid, zwcoord_regid;
	uint32_t vcoord_regid, vertex_regid, instance_regid;
	enum a3xx_threadsize fssz;
	uint8_t psize_loc = ~0;
	int i, j;

	setup_stages(emit, s);

	fssz = (s[FS].i->max_reg >= 24) ? TWO_QUADS : FOUR_QUADS;

	pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
	psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
	vertex_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
	instance_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID);

	if (s[FS].v->color0_mrt) {
		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
		color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
			ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
	} else {
		color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
		color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
		color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
		color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
		color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
		color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
		color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
		color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
	}

	/* TODO get these dynamically: */
	face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
	coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
	zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0);
	vcoord_regid = (s[FS].v->total_in > 0) ? s[FS].v->pos_regid : regid(63,0);

	/* we could probably divide this up into things that need to be
	 * emitted if frag-prog is dirty vs if vert-prog is dirty..
	 */

	OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONFIG, 5);
	OUT_RING(ring, A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |
			A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) |
			COND(s[VS].v, A5XX_HLSQ_VS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) |
			A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) |
			COND(s[FS].v, A5XX_HLSQ_FS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) |
			A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) |
			COND(s[HS].v, A5XX_HLSQ_HS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) |
			A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) |
			COND(s[DS].v, A5XX_HLSQ_DS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) |
			A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) |
			COND(s[GS].v, A5XX_HLSQ_GS_CONFIG_ENABLED));

	OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5);
	OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) |
			COND(s[VS].v && s[VS].v->has_ssbo, A5XX_HLSQ_VS_CNTL_SSBO_ENABLE));
	OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) |
			COND(s[FS].v && s[FS].v->has_ssbo, A5XX_HLSQ_FS_CNTL_SSBO_ENABLE));
	OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) |
			COND(s[HS].v && s[HS].v->has_ssbo, A5XX_HLSQ_HS_CNTL_SSBO_ENABLE));
	OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) |
			COND(s[DS].v && s[DS].v->has_ssbo, A5XX_HLSQ_DS_CNTL_SSBO_ENABLE));
	OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) |
			COND(s[GS].v && s[GS].v->has_ssbo, A5XX_HLSQ_GS_CNTL_SSBO_ENABLE));

	OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5);
	OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |
			A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) |
			COND(s[VS].v, A5XX_SP_VS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) |
			A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) |
			COND(s[FS].v, A5XX_SP_FS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) |
			A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) |
			COND(s[HS].v, A5XX_SP_HS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) |
			A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) |
			COND(s[DS].v, A5XX_SP_DS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) |
			A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) |
			COND(s[GS].v, A5XX_SP_GS_CONFIG_ENABLED));

	OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONSTLEN, 2);
	OUT_RING(ring, s[VS].constlen);    /* HLSQ_VS_CONSTLEN */
	OUT_RING(ring, s[VS].instrlen);    /* HLSQ_VS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_FS_CONSTLEN, 2);
	OUT_RING(ring, s[FS].constlen);    /* HLSQ_FS_CONSTLEN */
	OUT_RING(ring, s[FS].instrlen);    /* HLSQ_FS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_HS_CONSTLEN, 2);
	OUT_RING(ring, s[HS].constlen);    /* HLSQ_HS_CONSTLEN */
	OUT_RING(ring, s[HS].instrlen);    /* HLSQ_HS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_DS_CONSTLEN, 2);
	OUT_RING(ring, s[DS].constlen);    /* HLSQ_DS_CONSTLEN */
	OUT_RING(ring, s[DS].instrlen);    /* HLSQ_DS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_GS_CONSTLEN, 2);
	OUT_RING(ring, s[GS].constlen);    /* HLSQ_GS_CONSTLEN */
	OUT_RING(ring, s[GS].instrlen);    /* HLSQ_GS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2);
	OUT_RING(ring, 0x00000000);        /* HLSQ_CS_CONSTLEN */
	OUT_RING(ring, 0x00000000);        /* HLSQ_CS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1);
	OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
			A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
			0x6 | /* XXX seems to be always set? */
			A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
			COND(s[VS].v->has_samp, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));

	struct ir3_shader_linkage l = {0};
	ir3_link_shaders(&l, s[VS].v, s[FS].v);

	if ((s[VS].v->shader->stream_output.num_outputs > 0) &&
			!emit->key.binning_pass)
		link_stream_out(&l, s[VS].v);

	BITSET_DECLARE(varbs, 128) = {0};
	uint32_t *varmask = (uint32_t *)varbs;

	for (i = 0; i < l.cnt; i++)
		for (j = 0; j < util_last_bit(l.var[i].compmask); j++)
			BITSET_SET(varbs, l.var[i].loc + j);

	OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4);
	OUT_RING(ring, ~varmask[0]);  /* VPC_VAR[0].DISABLE */
	OUT_RING(ring, ~varmask[1]);  /* VPC_VAR[1].DISABLE */
	OUT_RING(ring, ~varmask[2]);  /* VPC_VAR[2].DISABLE */
	OUT_RING(ring, ~varmask[3]);  /* VPC_VAR[3].DISABLE */

	/* a5xx appends pos/psize to end of the linkage map: */
	if (pos_regid != regid(63,0))
		ir3_link_add(&l, pos_regid, 0xf, l.max_loc);

	if (psize_regid != regid(63,0)) {
		psize_loc = l.max_loc;
		ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
	}

	if ((s[VS].v->shader->stream_output.num_outputs > 0) &&
			!emit->key.binning_pass) {
		emit_stream_out(ring, s[VS].v, &l);

		OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
		OUT_RING(ring, 0x00000000);
	} else {
		OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
		OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
	}

	for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
		uint32_t reg = 0;

		OUT_PKT4(ring, REG_A5XX_SP_VS_OUT_REG(i), 1);

		reg |= A5XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
		reg |= A5XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
		j++;

		reg |= A5XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
		reg |= A5XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
		j++;

		OUT_RING(ring, reg);
	}

	for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
		uint32_t reg = 0;

		OUT_PKT4(ring, REG_A5XX_SP_VS_VPC_DST_REG(i), 1);

		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc);
		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc);
		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc);
		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc);

		OUT_RING(ring, reg);
	}

	OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2);
	OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0);  /* SP_VS_OBJ_START_LO/HI */

	if (s[VS].instrlen)
		fd5_emit_shader(ring, s[VS].v);

	// TODO depending on other bits in this reg (if any) set somewhere else?
	OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE));

	OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1);
	OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt));

	OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1);
	OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(l.max_loc) |
			COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) |
			COND(s[FS].v->frag_coord, A5XX_VPC_CNTL_0_VARYING) |
			0x10000);    // XXX

	fd5_context(ctx)->max_loc = l.max_loc;

	if (emit->key.binning_pass) {
		OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
		OUT_RING(ring, 0x00000000);    /* SP_FS_OBJ_START_LO */
		OUT_RING(ring, 0x00000000);    /* SP_FS_OBJ_START_HI */
	} else {
		OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
		OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0);  /* SP_FS_OBJ_START_LO/HI */
	}

	OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5);
	OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) |
			A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(TWO_QUADS) |
			0x00000880);               /* XXX HLSQ_CONTROL_0 */
	OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63));
	OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
			0xfcfcfc00);               /* XXX */
	OUT_RING(ring, A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(vcoord_regid) |
			0xfcfcfc00);               /* XXX */
	OUT_RING(ring, A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
			A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
			0x0000fcfc);               /* XXX */

	OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1);
	OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) |
			COND(s[FS].v->frag_coord, A5XX_SP_FS_CTRL_REG0_VARYING) |
			0x40006 | /* XXX set pretty much everywhere */
			A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
			A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
			A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
			A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
			COND(s[FS].v->has_samp, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));

	OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
	OUT_RING(ring, 0x020fffff);        /* XXX */

	OUT_PKT4(ring, REG_A5XX_VPC_GS_SIV_CNTL, 1);
	OUT_RING(ring, 0x0000ffff);        /* XXX */

	OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1);
	OUT_RING(ring, 0x00000010);        /* XXX */

	OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
	OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_GRAS_CNTL_VARYING) |
			COND(s[FS].v->frag_coord, A5XX_GRAS_CNTL_XCOORD |
					A5XX_GRAS_CNTL_YCOORD |
					A5XX_GRAS_CNTL_ZCOORD |
					A5XX_GRAS_CNTL_WCOORD |
					A5XX_GRAS_CNTL_UNK3) |
			COND(s[FS].v->frag_face, A5XX_GRAS_CNTL_UNK3));

	OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 2);
	OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_RB_RENDER_CONTROL0_VARYING) |
			COND(s[FS].v->frag_coord, A5XX_RB_RENDER_CONTROL0_XCOORD |
					A5XX_RB_RENDER_CONTROL0_YCOORD |
					A5XX_RB_RENDER_CONTROL0_ZCOORD |
					A5XX_RB_RENDER_CONTROL0_WCOORD |
					A5XX_RB_RENDER_CONTROL0_UNK3) |
			COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL0_UNK3));
	OUT_RING(ring, COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL1_FACENESS));

	OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_REG(0), 8);
	for (i = 0; i < 8; i++) {
		OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) |
				COND(emit->key.half_precision,
					A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION));
	}


	OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
	OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) |
			A5XX_VPC_PACK_PSIZELOC(psize_loc));

	if (!emit->key.binning_pass) {
		uint32_t vinterp[8], vpsrepl[8];

		memset(vinterp, 0, sizeof(vinterp));
		memset(vpsrepl, 0, sizeof(vpsrepl));

		/* looks like we need to do int varyings in the frag
		 * shader on a5xx (no flatshad reg?  or a420.0 bug?):
		 *
		 *    (sy)(ss)nop
		 *    (sy)ldlv.u32 r0.x,l[r0.x], 1
		 *    ldlv.u32 r0.y,l[r0.x+1], 1
		 *    (ss)bary.f (ei)r63.x, 0, r0.x
		 *    (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
		 *    (rpt5)nop
		 *    sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
		 *
		 * Possibly on later a5xx variants we'll be able to use
		 * something like the code below instead of workaround
		 * in the shader:
		 */
		/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
		for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
			/* NOTE: varyings are packed, so if compmask is 0xb
			 * then first, third, and fourth component occupy
			 * three consecutive varying slots:
			 */
			unsigned compmask = s[FS].v->inputs[j].compmask;

			uint32_t inloc = s[FS].v->inputs[j].inloc;

			if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) ||
					(s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
				uint32_t loc = inloc;

				for (i = 0; i < 4; i++) {
					if (compmask & (1 << i)) {
						vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
						//flatshade[loc / 32] |= 1 << (loc % 32);
						loc++;
					}
				}
			}

			gl_varying_slot slot = s[FS].v->inputs[j].slot;

			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
			if (slot >= VARYING_SLOT_VAR0) {
				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
				/* Replace the .xy coordinates with S/T from the point sprite. Set
				 * interpolation bits for .zw such that they become .01
				 */
				if (emit->sprite_coord_enable & texmask) {
					/* mask is two 2-bit fields, where:
					 *   '01' -> S
					 *   '10' -> T
					 *   '11' -> 1 - T  (flip mode)
					 */
					unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001;
					uint32_t loc = inloc;
					if (compmask & 0x1) {
						vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x2) {
						vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
Ejemplo n.º 19
0
static bool
ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
                       const struct ilo_state_ps_info *info,
                       struct pixel_ff *ff)
{
   const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
   const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
   const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
   uint32_t scratch_size;

   ILO_DEV_ASSERT(dev, 6, 8);

   ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info);

   /* initialize kernel offsets and GRF starts */
   if (util_is_power_of_two(ff->dispatch_modes)) {
      if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) {
         ff->kernel_offsets[0] = kernel_8->offset;
         ff->grf_starts[0] = kernel_8->grf_start;
      } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) {
         ff->kernel_offsets[0] = kernel_16->offset;
         ff->grf_starts[0] = kernel_16->grf_start;
      } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) {
         ff->kernel_offsets[0] = kernel_32->offset;
         ff->grf_starts[0] = kernel_32->grf_start;
      }
   } else {
      ff->kernel_offsets[0] = kernel_8->offset;
      ff->kernel_offsets[1] = kernel_32->offset;
      ff->kernel_offsets[2] = kernel_16->offset;

      ff->grf_starts[0] = kernel_8->grf_start;
      ff->grf_starts[1] = kernel_32->grf_start;
      ff->grf_starts[2] = kernel_16->grf_start;
   }

   /* we do not want to save it */
   assert(ff->kernel_offsets[0] == 0);

   ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
                      kernel_8->pcb_attr_count) ||
                     ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
                      kernel_16->pcb_attr_count) ||
                     ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
                      kernel_32->pcb_attr_count));

   scratch_size = 0;
   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
       scratch_size < kernel_8->scratch_size)
      scratch_size = kernel_8->scratch_size;
   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
       scratch_size < kernel_16->scratch_size)
      scratch_size = kernel_16->scratch_size;
   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
       scratch_size < kernel_32->scratch_size)
      scratch_size = kernel_32->scratch_size;

   /* next power of two, starting from 1KB */
   ff->scratch_space = (scratch_size > 1024) ?
      (util_last_bit(scratch_size - 1) - 10): 0;

   /* GPU hangs on Haswell if none of the dispatch mode bits is set */
   if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
      ff->dispatch_modes |= GEN6_PS_DISPATCH_8;

   return true;
}
Ejemplo n.º 20
0
static int
etna_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
{
   struct etna_screen *screen = etna_screen(pscreen);

   switch (param) {
   /* Supported features (boolean caps). */
   case PIPE_CAP_TWO_SIDED_STENCIL:
   case PIPE_CAP_ANISOTROPIC_FILTER:
   case PIPE_CAP_POINT_SPRITE:
   case PIPE_CAP_TEXTURE_SHADOW_MAP:
   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
   case PIPE_CAP_SM3:
   case PIPE_CAP_TEXTURE_BARRIER:
   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
   case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
   case PIPE_CAP_USER_CONSTANT_BUFFERS:
   case PIPE_CAP_TGSI_TEXCOORD:
   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
      return 1;

   /* Memory */
   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
      return 256;
   case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
      return 4; /* XXX could easily be supported */
   case PIPE_CAP_GLSL_FEATURE_LEVEL:
      return 120;

   case PIPE_CAP_NPOT_TEXTURES:
      return true; /* VIV_FEATURE(priv->dev, chipMinorFeatures1,
                      NON_POWER_OF_TWO); */

   case PIPE_CAP_PRIMITIVE_RESTART:
      return VIV_FEATURE(screen, chipMinorFeatures1, HALTI0);

   case PIPE_CAP_ENDIANNESS:
      return PIPE_ENDIAN_LITTLE; /* on most Viv hw this is configurable (feature
                                    ENDIANNESS_CONFIG) */

   /* Unsupported features. */
   case PIPE_CAP_SEAMLESS_CUBE_MAP:
   case PIPE_CAP_TEXTURE_SWIZZLE: /* XXX supported on gc2000 */
   case PIPE_CAP_COMPUTE: /* XXX supported on gc2000 */
   case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: /* only one colorbuffer supported, so mixing makes no sense */
   case PIPE_CAP_CONDITIONAL_RENDER: /* no occlusion queries */
   case PIPE_CAP_TGSI_INSTANCEID: /* no idea, really */
   case PIPE_CAP_START_INSTANCE: /* instancing not supported AFAIK */
   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: /* instancing not supported AFAIK */
   case PIPE_CAP_SHADER_STENCIL_EXPORT: /* Fragment shader cannot export stencil value */
   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: /* no dual-source supported */
   case PIPE_CAP_TEXTURE_MULTISAMPLE: /* no texture multisample */
   case PIPE_CAP_TEXTURE_MIRROR_CLAMP: /* only mirrored repeat */
   case PIPE_CAP_INDEP_BLEND_ENABLE:
   case PIPE_CAP_INDEP_BLEND_FUNC:
   case PIPE_CAP_DEPTH_CLIP_DISABLE:
   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
   case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: /* Don't skip strict max uniform limit check */
   case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
   case PIPE_CAP_VERTEX_COLOR_CLAMPED:
   case PIPE_CAP_USER_VERTEX_BUFFERS:
   case PIPE_CAP_USER_INDEX_BUFFERS:
   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: /* TODO: test me out with piglit */
   case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
   case PIPE_CAP_TEXTURE_GATHER_SM5:
   case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
   case PIPE_CAP_FAKE_SW_MSAA:
   case PIPE_CAP_TEXTURE_QUERY_LOD:
   case PIPE_CAP_SAMPLE_SHADING:
   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
   case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
   case PIPE_CAP_DRAW_INDIRECT:
   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
   case PIPE_CAP_SAMPLER_VIEW_TARGET:
   case PIPE_CAP_CLIP_HALFZ:
   case PIPE_CAP_VERTEXID_NOBASE:
   case PIPE_CAP_POLYGON_OFFSET_CLAMP:
   case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
   case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
   case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
   case PIPE_CAP_DEPTH_BOUNDS_TEST:
   case PIPE_CAP_TGSI_TXQS:
   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
   case PIPE_CAP_SHAREABLE_SHADERS:
   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
   case PIPE_CAP_CLEAR_TEXTURE:
   case PIPE_CAP_DRAW_PARAMETERS:
   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
   case PIPE_CAP_MULTI_DRAW_INDIRECT:
   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
   case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
   case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
   case PIPE_CAP_INVALIDATE_BUFFER:
   case PIPE_CAP_GENERATE_MIPMAP:
   case PIPE_CAP_STRING_MARKER:
   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
   case PIPE_CAP_QUERY_BUFFER_OBJECT:
   case PIPE_CAP_QUERY_MEMORY_INFO:
   case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
   case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
   case PIPE_CAP_CULL_DISTANCE:
   case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
   case PIPE_CAP_TGSI_VOTE:
   case PIPE_CAP_MAX_WINDOW_RECTANGLES:
   case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
   case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
   case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
   case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
   case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
   case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
   case PIPE_CAP_NATIVE_FENCE_FD:
   case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
   case PIPE_CAP_TGSI_FS_FBFETCH:
   case PIPE_CAP_TGSI_MUL_ZERO_WINS:
   case PIPE_CAP_DOUBLES:
   case PIPE_CAP_INT64:
   case PIPE_CAP_INT64_DIVMOD:
      return 0;

   /* Stream output. */
   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
      return 0;

   /* Geometry shader output, unsupported. */
   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
   case PIPE_CAP_MAX_VERTEX_STREAMS:
      return 0;

   case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
      return 128;

   /* Texturing. */
   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
   {
      int log2_max_tex_size = util_last_bit(screen->specs.max_texture_size);
      assert(log2_max_tex_size > 0);
      return log2_max_tex_size;
   }
   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: /* 3D textures not supported - fake it */
      return 5;
   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
      return 0;
   case PIPE_CAP_CUBE_MAP_ARRAY:
      return 0;
   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
   case PIPE_CAP_MIN_TEXEL_OFFSET:
      return -8;
   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
   case PIPE_CAP_MAX_TEXEL_OFFSET:
      return 7;
   case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
      return 0;
   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
      return 65536;

   /* Render targets. */
   case PIPE_CAP_MAX_RENDER_TARGETS:
      return 1;

   /* Viewports and scissors. */
   case PIPE_CAP_MAX_VIEWPORTS:
      return 1;

   /* Timer queries. */
   case PIPE_CAP_QUERY_TIME_ELAPSED:
   case PIPE_CAP_OCCLUSION_QUERY:
      return 0;
   case PIPE_CAP_QUERY_TIMESTAMP:
      return 1;
   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
      return 0;

   /* Preferences */
   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
      return 0;

   case PIPE_CAP_PCI_GROUP:
   case PIPE_CAP_PCI_BUS:
   case PIPE_CAP_PCI_DEVICE:
   case PIPE_CAP_PCI_FUNCTION:
      return 0;
   case PIPE_CAP_VENDOR_ID:
   case PIPE_CAP_DEVICE_ID:
      return 0xFFFFFFFF;
   case PIPE_CAP_ACCELERATED:
      return 1;
   case PIPE_CAP_VIDEO_MEMORY:
      return 0;
   case PIPE_CAP_UMA:
      return 1;
   }

   debug_printf("unknown param %d", param);
   return 0;
}