Ejemplo n.º 1
0
static void
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
		struct fd4_emit *emit)
{
	const struct pipe_draw_info *info = emit->info;
	enum pc_di_primtype primtype = ctx->primtypes[info->mode];

	fd4_emit_state(ctx, ring, emit);

	if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
		fd4_emit_vertex_bufs(ring, emit);

	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
	OUT_RING(ring, info->indexed ? info->index_bias : info->start); /* VFD_INDEX_OFFSET */
	OUT_RING(ring, info->start_instance);   /* ??? UNKNOWN_2209 */

	OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
	OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
			info->restart_index : 0xffffffff);

	/* points + psize -> spritelist: */
	if (ctx->rasterizer->point_size_per_vertex &&
			fd4_emit_get_vp(emit)->writes_psize &&
			(info->mode == PIPE_PRIM_POINTS))
		primtype = DI_PT_POINTLIST_PSIZE;

	fd4_draw_emit(ctx->batch, ring, primtype,
			emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
			info);
}
Ejemplo n.º 2
0
static void
setup_stages(struct fd4_emit *emit, struct stage *s)
{
	unsigned i;

	s[VS].v = fd4_emit_get_vp(emit);
	s[FS].v = fd4_emit_get_fp(emit);

	s[HS].v = s[DS].v = s[GS].v = NULL;  /* for now */

	for (i = 0; i < MAX_STAGES; i++) {
		if (s[i].v) {
			s[i].i = &s[i].v->info;
			/* constlen is in units of 4 * vec4: */
			s[i].constlen = align(s[i].v->constlen, 4) / 4;
			/* instrlen is already in units of 16 instr.. although
			 * probably we should ditch that and not make the compiler
			 * care about instruction group size of a3xx vs a4xx
			 */
			s[i].instrlen = s[i].v->instrlen;
		} else {
			s[i].i = NULL;
			s[i].constlen = 0;
			s[i].instrlen = 0;
		}
	}

	/* NOTE: at least for gles2, blob partitions VS at bottom of const
	 * space and FS taking entire remaining space.  We probably don't
	 * need to do that the same way, but for now mimic what the blob
	 * does to make it easier to diff against register values from blob
	 *
	 * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders
	 * is run from external memory.
	 */
	if ((s[VS].instrlen + s[FS].instrlen) > 64) {
		/* prioritize FS for internal memory: */
		if (s[FS].instrlen < 64) {
			/* if FS can fit, kick VS out to external memory: */
			s[VS].instrlen = 0;
		} else if (s[VS].instrlen < 64) {
			/* otherwise if VS can fit, kick out FS: */
			s[FS].instrlen = 0;
		} else {
			/* neither can fit, run both from external memory: */
			s[VS].instrlen = 0;
			s[FS].instrlen = 0;
		}
	}
	s[VS].constlen = 66;
	s[FS].constlen = 128 - s[VS].constlen;
	s[VS].instroff = 0;
	s[VS].constoff = 0;
	s[FS].instroff = 64 - s[FS].instrlen;
	s[FS].constoff = s[VS].constlen;
	s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
	s[HS].constoff = s[DS].constoff = s[GS].constoff = s[FS].constoff;
}
Ejemplo n.º 3
0
void
fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
		struct fd4_emit *emit)
{
	struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
	struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
	uint32_t dirty = emit->dirty;

	emit_marker(ring, 5);

	if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
		uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control;

		/* I suppose if we needed to (which I don't *think* we need
		 * to), we could emit this for binning pass too.  But we
		 * would need to keep a different patch-list for binning
		 * vs render pass.
		 */

		OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
		OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches);
	}

	if (dirty & FD_DIRTY_ZSA) {
		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);

		OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
		OUT_RING(ring, zsa->rb_alpha_control);

		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
		OUT_RING(ring, zsa->rb_stencil_control);
		OUT_RING(ring, zsa->rb_stencil_control2);
	}

	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
		struct pipe_stencil_ref *sr = &ctx->stencil_ref;

		OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
		OUT_RING(ring, zsa->rb_stencilrefmask |
				A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
		OUT_RING(ring, zsa->rb_stencilrefmask_bf |
				A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
	}

	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
		bool fragz = fp->has_kill | fp->writes_pos;

		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
		OUT_RING(ring, zsa->rb_depth_control |
				COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE));

		/* maybe this register/bitfield needs a better name.. this
		 * appears to be just disabling early-z
		 */
		OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
		OUT_RING(ring, zsa->gras_alpha_control |
				COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE));
	}

	if (dirty & FD_DIRTY_RASTERIZER) {
		struct fd4_rasterizer_stateobj *rasterizer =
				fd4_rasterizer_stateobj(ctx->rasterizer);

		OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
		OUT_RING(ring, rasterizer->gras_su_mode_control |
				A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);

		OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
		OUT_RING(ring, rasterizer->gras_su_point_minmax);
		OUT_RING(ring, rasterizer->gras_su_point_size);

		OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
		OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
		OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);

		OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
		OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
	}

	/* NOTE: since primitive_restart is not actually part of any
	 * state object, we need to make sure that we always emit
	 * PRIM_VTX_CNTL.. either that or be more clever and detect
	 * when it changes.
	 */
	if (emit->info) {
		const struct pipe_draw_info *info = emit->info;
		uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
				->pc_prim_vtx_cntl;

		if (info->indexed && info->primitive_restart)
			val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;

		val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);

		if (fp->total_in > 0) {
			uint32_t varout = align(fp->total_in, 16) / 16;
			if (varout > 1)
				varout = align(varout, 2);
			val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout);
		}

		OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
		OUT_RING(ring, val);
		OUT_RING(ring, 0x12);     /* XXX UNKNOWN_21C5 */
	}

	if (dirty & FD_DIRTY_SCISSOR) {
		struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);

		OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
		OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
				A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
		OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
				A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));

		ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
		ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
		ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
		ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
	}

	if (dirty & FD_DIRTY_VIEWPORT) {
		fd_wfi(ctx, ring);
		OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
	}

	if (dirty & FD_DIRTY_PROG)
		fd4_program_emit(ring, emit);

	if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
			/* evil hack to deal sanely with clear path: */
			(emit->prog == &ctx->prog)) {
		fd_wfi(ctx, ring);
		emit_constants(ring,  SB_VERT_SHADER,
				&ctx->constbuf[PIPE_SHADER_VERTEX],
				vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
		if (!emit->key.binning_pass) {
			emit_constants(ring, SB_FRAG_SHADER,
					&ctx->constbuf[PIPE_SHADER_FRAGMENT],
					fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
		}
	}

	/* emit driver params every time */
	if (emit->info && emit->prog == &ctx->prog) {
		uint32_t vertex_params[4] = {
			emit->info->indexed ? emit->info->index_bias : emit->info->start,
			0,
			0,
			0
		};
		if (vp->constlen >= vp->first_driver_param + 4) {
			fd4_emit_constant(ring, SB_VERT_SHADER,
							  (vp->first_driver_param + 4) * 4,
							  0, 4, vertex_params, NULL);
		}
	}

	if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
		struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
		uint32_t i;

		for (i = 0; i < 8; i++) {
			OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
			OUT_RING(ring, blend->rb_mrt[i].control);

			OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
			OUT_RING(ring, blend->rb_mrt[i].blend_control);
		}

		OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
		OUT_RING(ring, blend->rb_fs_output |
				A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
	}

	if (dirty & FD_DIRTY_BLEND_COLOR) {
		struct pipe_blend_color *bcolor = &ctx->blend_color;
		OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
		OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) |
				A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
		OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) |
				A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
		OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) |
				A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) |
				A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
	}

	if (dirty & FD_DIRTY_VERTTEX) {
		if (vp->has_samp)
			emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex);
		else
			dirty &= ~FD_DIRTY_VERTTEX;
	}

	if (dirty & FD_DIRTY_FRAGTEX) {
		if (fp->has_samp)
			emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex);
		else
			dirty &= ~FD_DIRTY_FRAGTEX;
	}

	ctx->dirty &= ~dirty;
}
Ejemplo n.º 4
0
void
fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
{
	int32_t i, j, last = -1;
	uint32_t total_in = 0;
	const struct fd_vertex_state *vtx = emit->vtx;
	struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
	unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);

	for (i = 0; i < vp->inputs_count; i++) {
		uint8_t semantic = sem2name(vp->inputs[i].semantic);
		if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
			vertex_regid = vp->inputs[i].regid;
		else if (semantic == TGSI_SEMANTIC_INSTANCEID)
			instance_regid = vp->inputs[i].regid;
		else if ((i < vtx->vtx->num_elements) && vp->inputs[i].compmask)
			last = i;
	}

	/* hw doesn't like to be configured for zero vbo's, it seems: */
	if ((vtx->vtx->num_elements == 0) &&
			(vertex_regid == regid(63, 0)) &&
			(instance_regid == regid(63, 0)))
		return;

	for (i = 0, j = 0; i <= last; i++) {
		assert(sem2name(vp->inputs[i].semantic) == 0);
		if (vp->inputs[i].compmask) {
			struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
			const struct pipe_vertex_buffer *vb =
					&vtx->vertexbuf.vb[elem->vertex_buffer_index];
			struct fd_resource *rsc = fd_resource(vb->buffer);
			enum pipe_format pfmt = elem->src_format;
			enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
			bool switchnext = (i != last) ||
					(vertex_regid != regid(63, 0)) ||
					(instance_regid != regid(63, 0));
			bool isint = util_format_is_pure_integer(pfmt);
			uint32_t fs = util_format_get_blocksize(pfmt);
			uint32_t off = vb->buffer_offset + elem->src_offset;
			uint32_t size = fd_bo_size(rsc->bo) - off;
			debug_assert(fmt != ~0);

			OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
			OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
					A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
					COND(elem->instance_divisor, A4XX_VFD_FETCH_INSTR_0_INSTANCED) |
					COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
			OUT_RELOC(ring, rsc->bo, off, 0, 0);
			OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size));
			OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(MAX2(1, elem->instance_divisor)));

			OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1);
			OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
					A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
					A4XX_VFD_DECODE_INSTR_FORMAT(fmt) |
					A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) |
					A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
					A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
					A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
					COND(isint, A4XX_VFD_DECODE_INSTR_INT) |
					COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));

			total_in += vp->inputs[i].ncomp;
			j++;
		}
	}

	OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
	OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
			0xa0000 | /* XXX */
			A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
			A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
	OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX
			A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
			A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
	OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_2 */
	OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(regid(63, 0)));
	OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_4 */

	/* cache invalidate, otherwise vertex fetch could see
	 * stale vbo contents:
	 */
	OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000012);
}
Ejemplo n.º 5
0
static bool
fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd4_emit emit = {
		.debug = &ctx->debug,
		.vtx  = &ctx->vtx,
		.prog = &ctx->prog,
		.info = info,
		.key = {
			.color_two_side = ctx->rasterizer->light_twoside,
			.vclamp_color = ctx->rasterizer->clamp_vertex_color,
			.fclamp_color = ctx->rasterizer->clamp_fragment_color,
			.rasterflat = ctx->rasterizer->flatshade,
			// TODO set .half_precision based on render target format,
			// ie. float16 and smaller use half, float32 use full..
			.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
			.ucp_enables = ctx->rasterizer->clip_plane_enable,
			.has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate ||
					fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb),
			.vsaturate_s = fd4_ctx->vsaturate_s,
			.vsaturate_t = fd4_ctx->vsaturate_t,
			.vsaturate_r = fd4_ctx->vsaturate_r,
			.fsaturate_s = fd4_ctx->fsaturate_s,
			.fsaturate_t = fd4_ctx->fsaturate_t,
			.fsaturate_r = fd4_ctx->fsaturate_r,
			.vastc_srgb = fd4_ctx->vastc_srgb,
			.fastc_srgb = fd4_ctx->fastc_srgb,
		},
		.rasterflat = ctx->rasterizer->flatshade,
		.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
		.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
	};

	fixup_shader_state(ctx, &emit.key);

	unsigned dirty = ctx->dirty;

	/* do regular pass first, since that is more likely to fail compiling: */

	if (!(fd4_emit_get_vp(&emit) && fd4_emit_get_fp(&emit)))
		return false;

	emit.key.binning_pass = false;
	emit.dirty = dirty;

	struct fd_ringbuffer *ring = ctx->batch->draw;

	if (ctx->rasterizer->rasterizer_discard) {
		fd_wfi(ctx->batch, ring);
		OUT_PKT3(ring, CP_REG_RMW, 3);
		OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
		OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
		OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
	}

	draw_impl(ctx, ctx->batch->draw, &emit);

	if (ctx->rasterizer->rasterizer_discard) {
		fd_wfi(ctx->batch, ring);
		OUT_PKT3(ring, CP_REG_RMW, 3);
		OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
		OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
		OUT_RING(ring, 0);
	}

	/* and now binning pass: */
	emit.key.binning_pass = true;
	emit.dirty = dirty & ~(FD_DIRTY_BLEND);
	emit.vp = NULL;   /* we changed key so need to refetch vp */
	emit.fp = NULL;
	draw_impl(ctx, ctx->batch->binning, &emit);

	return true;
}