Ejemplo n.º 1
0
static void
update_vsc_pipe(struct fd_context *ctx)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_ringbuffer *ring = ctx->ring;
	int i;

	OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
	OUT_RELOCW(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */

	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
	for (i = 0; i < 8; i++) {
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
		OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
				A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
				A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
				A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
	}

	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
	for (i = 0; i < 8; i++) {
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
		if (!pipe->bo) {
			pipe->bo = fd_bo_new(ctx->dev, 0x40000,
					DRM_FREEDRENO_GEM_TYPE_KMEM);
		}
		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);       /* VSC_PIPE_DATA_ADDRESS[i] */
	}

	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
	for (i = 0; i < 8; i++) {
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
		OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
	}
}
Ejemplo n.º 2
0
/* fixup dirty shader state in case some "unrelated" (from the state-
 * tracker's perspective) state change causes us to switch to a
 * different variant.
 */
static void
fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct ir3_shader_key *last_key = &fd4_ctx->last_key;

	if (!ir3_shader_key_equal(last_key, key)) {
		ctx->dirty |= FD_DIRTY_PROG;

		if (last_key->has_per_samp || key->has_per_samp) {
			if ((last_key->vsaturate_s != key->vsaturate_s) ||
					(last_key->vsaturate_t != key->vsaturate_t) ||
					(last_key->vsaturate_r != key->vsaturate_r))
				ctx->prog.dirty |= FD_SHADER_DIRTY_VP;

			if ((last_key->fsaturate_s != key->fsaturate_s) ||
					(last_key->fsaturate_t != key->fsaturate_t) ||
					(last_key->fsaturate_r != key->fsaturate_r))
				ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
		}

		if (last_key->color_two_side != key->color_two_side)
			ctx->prog.dirty |= FD_SHADER_DIRTY_FP;

		if (last_key->half_precision != key->half_precision)
			ctx->prog.dirty |= FD_SHADER_DIRTY_FP;

		if (last_key->alpha != key->alpha)
			ctx->prog.dirty |= FD_SHADER_DIRTY_FP;

		fd4_ctx->last_key = *key;
	}
}
Ejemplo n.º 3
0
static void
patch_rbrc(struct fd_context *ctx, uint32_t val)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	unsigned i;
	for (i = 0; i < fd_patch_num_elements(&fd4_ctx->rbrc_patches); i++) {
		struct fd_cs_patch *patch = fd_patch_element(&fd4_ctx->rbrc_patches, i);
		*patch->cs = patch->val | val;
	}
	util_dynarray_resize(&fd4_ctx->rbrc_patches, 0);
}
Ejemplo n.º 4
0
static void
fd4_clear_binning(struct fd_context *ctx, unsigned dirty)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_ringbuffer *ring = ctx->batch->binning;
	struct fd4_emit emit = {
		.debug = &ctx->debug,
		.vtx  = &fd4_ctx->solid_vbuf_state,
		.prog = &ctx->solid_prog,
		.key = {
			.binning_pass = true,
			.half_precision = true,
		},
		.dirty = dirty,
	};
Ejemplo n.º 5
0
static void
fd4_context_destroy(struct pipe_context *pctx)
{
	struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));

	u_upload_destroy(fd4_ctx->border_color_uploader);

	fd_context_destroy(pctx);

	fd_bo_del(fd4_ctx->vs_pvt_mem);
	fd_bo_del(fd4_ctx->fs_pvt_mem);
	fd_bo_del(fd4_ctx->vsc_size_mem);

	fd_context_cleanup_common_vbos(&fd4_ctx->base);

	fd_hw_query_fini(pctx);

	free(fd4_ctx);
}
Ejemplo n.º 6
0
static void
fd4_context_destroy(struct pipe_context *pctx)
{
	struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));

	util_dynarray_fini(&fd4_ctx->rbrc_patches);

	fd_bo_del(fd4_ctx->vs_pvt_mem);
	fd_bo_del(fd4_ctx->fs_pvt_mem);
	fd_bo_del(fd4_ctx->vsc_size_mem);

	pctx->delete_vertex_elements_state(pctx, fd4_ctx->solid_vbuf_state.vtx);
	pctx->delete_vertex_elements_state(pctx, fd4_ctx->blit_vbuf_state.vtx);

	pipe_resource_reference(&fd4_ctx->solid_vbuf, NULL);
	pipe_resource_reference(&fd4_ctx->blit_texcoord_vbuf, NULL);

	fd_context_destroy(pctx);
}
Ejemplo n.º 7
0
static void
fd4_context_destroy(struct pipe_context *pctx)
{
	struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));

	fd_bo_del(fd4_ctx->vs_pvt_mem);
	fd_bo_del(fd4_ctx->fs_pvt_mem);
	fd_bo_del(fd4_ctx->vsc_size_mem);

	pctx->delete_vertex_elements_state(pctx, fd4_ctx->solid_vbuf_state.vtx);
	pctx->delete_vertex_elements_state(pctx, fd4_ctx->blit_vbuf_state.vtx);

	pipe_resource_reference(&fd4_ctx->solid_vbuf, NULL);
	pipe_resource_reference(&fd4_ctx->blit_texcoord_vbuf, NULL);

	u_upload_destroy(fd4_ctx->border_color_uploader);

	fd_context_destroy(pctx);
}
Ejemplo n.º 8
0
static void
fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd4_emit emit = {
		.vtx  = &ctx->vtx,
		.prog = &ctx->prog,
		.info = info,
		.key = {
			/* do binning pass first: */
			.binning_pass = true,
			.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
			.alpha = util_format_is_alpha(pipe_surface_format(ctx->framebuffer.cbufs[0])),
			// TODO set .half_precision based on render target format,
			// ie. float16 and smaller use half, float32 use full..
			.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
			.has_per_samp = fd4_ctx->fsaturate || fd4_ctx->vsaturate,
			.vsaturate_s = fd4_ctx->vsaturate_s,
			.vsaturate_t = fd4_ctx->vsaturate_t,
			.vsaturate_r = fd4_ctx->vsaturate_r,
			.fsaturate_s = fd4_ctx->fsaturate_s,
			.fsaturate_t = fd4_ctx->fsaturate_t,
			.fsaturate_r = fd4_ctx->fsaturate_r,
		},
		.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
	};
	unsigned dirty;

	fixup_shader_state(ctx, &emit.key);

	dirty = ctx->dirty;
	emit.dirty = dirty & ~(FD_DIRTY_BLEND);
	draw_impl(ctx, ctx->binning_ring, &emit);

	/* and now regular (non-binning) pass: */
	emit.key.binning_pass = false;
	emit.dirty = dirty;
	emit.vp = NULL;   /* we changed key so need to refetch vp */
	draw_impl(ctx, ctx->ring, &emit);
}
Ejemplo n.º 9
0
/* fixup dirty shader state in case some "unrelated" (from the state-
 * tracker's perspective) state change causes us to switch to a
 * different variant.
 */
static void
fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct ir3_shader_key *last_key = &fd4_ctx->last_key;

	if (!ir3_shader_key_equal(last_key, key)) {
		if (last_key->has_per_samp || key->has_per_samp) {
			if ((last_key->vsaturate_s != key->vsaturate_s) ||
					(last_key->vsaturate_t != key->vsaturate_t) ||
					(last_key->vsaturate_r != key->vsaturate_r) ||
					(last_key->vastc_srgb != key->vastc_srgb))
				ctx->dirty |= FD_SHADER_DIRTY_VP;

			if ((last_key->fsaturate_s != key->fsaturate_s) ||
					(last_key->fsaturate_t != key->fsaturate_t) ||
					(last_key->fsaturate_r != key->fsaturate_r) ||
					(last_key->fastc_srgb != key->fastc_srgb))
				ctx->dirty |= FD_SHADER_DIRTY_FP;
		}

		if (last_key->vclamp_color != key->vclamp_color)
			ctx->dirty |= FD_SHADER_DIRTY_VP;

		if (last_key->fclamp_color != key->fclamp_color)
			ctx->dirty |= FD_SHADER_DIRTY_FP;

		if (last_key->color_two_side != key->color_two_side)
			ctx->dirty |= FD_SHADER_DIRTY_FP;

		if (last_key->half_precision != key->half_precision)
			ctx->dirty |= FD_SHADER_DIRTY_FP;

		if (last_key->rasterflat != key->rasterflat)
			ctx->dirty |= FD_SHADER_DIRTY_FP;

		fd4_ctx->last_key = *key;
	}
}
Ejemplo n.º 10
0
void
fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
		struct fd4_emit *emit)
{
	struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
	struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
	uint32_t dirty = emit->dirty;

	emit_marker(ring, 5);

	if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
		uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control;

		/* I suppose if we needed to (which I don't *think* we need
		 * to), we could emit this for binning pass too.  But we
		 * would need to keep a different patch-list for binning
		 * vs render pass.
		 */

		OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
		OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches);
	}

	if (dirty & FD_DIRTY_ZSA) {
		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);

		OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
		OUT_RING(ring, zsa->rb_alpha_control);

		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
		OUT_RING(ring, zsa->rb_stencil_control);
		OUT_RING(ring, zsa->rb_stencil_control2);
	}

	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
		struct pipe_stencil_ref *sr = &ctx->stencil_ref;

		OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
		OUT_RING(ring, zsa->rb_stencilrefmask |
				A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
		OUT_RING(ring, zsa->rb_stencilrefmask_bf |
				A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
	}

	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
		bool fragz = fp->has_kill | fp->writes_pos;

		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
		OUT_RING(ring, zsa->rb_depth_control |
				COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE));

		/* maybe this register/bitfield needs a better name.. this
		 * appears to be just disabling early-z
		 */
		OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
		OUT_RING(ring, zsa->gras_alpha_control |
				COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE));
	}

	if (dirty & FD_DIRTY_RASTERIZER) {
		struct fd4_rasterizer_stateobj *rasterizer =
				fd4_rasterizer_stateobj(ctx->rasterizer);

		OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
		OUT_RING(ring, rasterizer->gras_su_mode_control |
				A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);

		OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
		OUT_RING(ring, rasterizer->gras_su_point_minmax);
		OUT_RING(ring, rasterizer->gras_su_point_size);

		OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
		OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
		OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);

		OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
		OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
	}

	/* NOTE: since primitive_restart is not actually part of any
	 * state object, we need to make sure that we always emit
	 * PRIM_VTX_CNTL.. either that or be more clever and detect
	 * when it changes.
	 */
	if (emit->info) {
		const struct pipe_draw_info *info = emit->info;
		uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
				->pc_prim_vtx_cntl;

		if (info->indexed && info->primitive_restart)
			val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;

		val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);

		if (fp->total_in > 0) {
			uint32_t varout = align(fp->total_in, 16) / 16;
			if (varout > 1)
				varout = align(varout, 2);
			val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout);
		}

		OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
		OUT_RING(ring, val);
		OUT_RING(ring, 0x12);     /* XXX UNKNOWN_21C5 */
	}

	if (dirty & FD_DIRTY_SCISSOR) {
		struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);

		OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
		OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
				A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
		OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
				A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));

		ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
		ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
		ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
		ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
	}

	if (dirty & FD_DIRTY_VIEWPORT) {
		fd_wfi(ctx, ring);
		OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
	}

	if (dirty & FD_DIRTY_PROG)
		fd4_program_emit(ring, emit);

	if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
			/* evil hack to deal sanely with clear path: */
			(emit->prog == &ctx->prog)) {
		fd_wfi(ctx, ring);
		emit_constants(ring,  SB_VERT_SHADER,
				&ctx->constbuf[PIPE_SHADER_VERTEX],
				vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
		if (!emit->key.binning_pass) {
			emit_constants(ring, SB_FRAG_SHADER,
					&ctx->constbuf[PIPE_SHADER_FRAGMENT],
					fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
		}
	}

	/* emit driver params every time */
	if (emit->info && emit->prog == &ctx->prog) {
		uint32_t vertex_params[4] = {
			emit->info->indexed ? emit->info->index_bias : emit->info->start,
			0,
			0,
			0
		};
		if (vp->constlen >= vp->first_driver_param + 4) {
			fd4_emit_constant(ring, SB_VERT_SHADER,
							  (vp->first_driver_param + 4) * 4,
							  0, 4, vertex_params, NULL);
		}
	}

	if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
		struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
		uint32_t i;

		for (i = 0; i < 8; i++) {
			OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
			OUT_RING(ring, blend->rb_mrt[i].control);

			OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
			OUT_RING(ring, blend->rb_mrt[i].blend_control);
		}

		OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
		OUT_RING(ring, blend->rb_fs_output |
				A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
	}

	if (dirty & FD_DIRTY_BLEND_COLOR) {
		struct pipe_blend_color *bcolor = &ctx->blend_color;
		OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
		OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) |
				A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
		OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) |
				A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
		OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) |
				A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) |
				A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
	}

	if (dirty & FD_DIRTY_VERTTEX) {
		if (vp->has_samp)
			emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex);
		else
			dirty &= ~FD_DIRTY_VERTTEX;
	}

	if (dirty & FD_DIRTY_FRAGTEX) {
		if (fp->has_samp)
			emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex);
		else
			dirty &= ~FD_DIRTY_FRAGTEX;
	}

	ctx->dirty &= ~dirty;
}
Ejemplo n.º 11
0
static void
fd4_clear(struct fd_context *ctx, unsigned buffers,
		const union pipe_color_union *color, double depth, unsigned stencil)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	unsigned dirty = ctx->dirty;
	unsigned ce, i;
	struct fd4_emit emit = {
		.vtx  = &fd4_ctx->solid_vbuf_state,
		.prog = &ctx->solid_prog,
		.key = {
			.half_precision = true,
		},
	};
	uint32_t colr = 0;

	if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs)
		colr  = pack_rgba(pfb->cbufs[0]->format, color->f);

	dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
	dirty |= FD_DIRTY_PROG;
	emit.dirty = dirty;

	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);

	/* emit generic state now: */
	fd4_emit_state(ctx, ring, &emit);
	reset_viewport(ring, pfb);

	if (buffers & PIPE_CLEAR_DEPTH) {
		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
		OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
				A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
				A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));

		fd_wfi(ctx, ring);
		OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0, 2);
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(depth));
		ctx->dirty |= FD_DIRTY_VIEWPORT;
	} else {
		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
		OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
	}

	if (buffers & PIPE_CLEAR_STENCIL) {
		OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
		OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(stencil) |
				A4XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
				A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
		OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
				A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
				0xff000000 | // XXX ???
				A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));

		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
		OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
				A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
				A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
				A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
				A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
		OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
	} else {
		OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
		OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
				A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
				A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
		OUT_RING(ring, A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
				A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
				A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));

		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
		OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
				A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
				A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
		OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
	}

	if (buffers & PIPE_CLEAR_COLOR) {
		OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
		OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
		ce = 0xf;
	} else {
		ce = 0x0;
	}

	for (i = 0; i < 8; i++) {
		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
				A4XX_RB_MRT_CONTROL_B11 |
				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));

		OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
	}

	fd4_emit_vertex_bufs(ring, &emit);

	OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
	OUT_RING(ring, 0x0);          /* XXX GRAS_ALPHA_CONTROL */

	OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4);
	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW0 */
	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW1 */
	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW2 */
	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW3 */

	/* until fastclear works: */
	fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);

	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */

	OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
	OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */

	OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
	OUT_RING(ring, 0x00000001);

	fd4_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
			DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);

	OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
	OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
}
Ejemplo n.º 12
0
static bool
fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd4_emit emit = {
		.debug = &ctx->debug,
		.vtx  = &ctx->vtx,
		.prog = &ctx->prog,
		.info = info,
		.key = {
			.color_two_side = ctx->rasterizer->light_twoside,
			.vclamp_color = ctx->rasterizer->clamp_vertex_color,
			.fclamp_color = ctx->rasterizer->clamp_fragment_color,
			.rasterflat = ctx->rasterizer->flatshade,
			// TODO set .half_precision based on render target format,
			// ie. float16 and smaller use half, float32 use full..
			.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
			.ucp_enables = ctx->rasterizer->clip_plane_enable,
			.has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate ||
					fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb),
			.vsaturate_s = fd4_ctx->vsaturate_s,
			.vsaturate_t = fd4_ctx->vsaturate_t,
			.vsaturate_r = fd4_ctx->vsaturate_r,
			.fsaturate_s = fd4_ctx->fsaturate_s,
			.fsaturate_t = fd4_ctx->fsaturate_t,
			.fsaturate_r = fd4_ctx->fsaturate_r,
			.vastc_srgb = fd4_ctx->vastc_srgb,
			.fastc_srgb = fd4_ctx->fastc_srgb,
		},
		.rasterflat = ctx->rasterizer->flatshade,
		.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
		.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
	};

	fixup_shader_state(ctx, &emit.key);

	unsigned dirty = ctx->dirty;

	/* do regular pass first, since that is more likely to fail compiling: */

	if (!(fd4_emit_get_vp(&emit) && fd4_emit_get_fp(&emit)))
		return false;

	emit.key.binning_pass = false;
	emit.dirty = dirty;

	struct fd_ringbuffer *ring = ctx->batch->draw;

	if (ctx->rasterizer->rasterizer_discard) {
		fd_wfi(ctx->batch, ring);
		OUT_PKT3(ring, CP_REG_RMW, 3);
		OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
		OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
		OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
	}

	draw_impl(ctx, ctx->batch->draw, &emit);

	if (ctx->rasterizer->rasterizer_discard) {
		fd_wfi(ctx->batch, ring);
		OUT_PKT3(ring, CP_REG_RMW, 3);
		OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
		OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
		OUT_RING(ring, 0);
	}

	/* and now binning pass: */
	emit.key.binning_pass = true;
	emit.dirty = dirty & ~(FD_DIRTY_BLEND);
	emit.vp = NULL;   /* we changed key so need to refetch vp */
	emit.fp = NULL;
	draw_impl(ctx, ctx->batch->binning, &emit);

	return true;
}
Ejemplo n.º 13
0
static void
fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	struct fd4_emit emit = {
			.vtx = &fd4_ctx->blit_vbuf_state,
			.sprite_coord_enable = 1,
			/* NOTE: They all use the same VP, this is for vtx bufs. */
			.prog = &ctx->blit_prog[0],
			.key = {
				.half_precision = fd_half_precision(pfb),
			},
			.no_decode_srgb = true,
	};
	unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
	float x0, y0, x1, y1;
	unsigned bin_w = tile->bin_w;
	unsigned bin_h = tile->bin_h;
	unsigned i;

	/* write texture coordinates to vertexbuf: */
	x0 = ((float)tile->xoff) / ((float)pfb->width);
	x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
	y0 = ((float)tile->yoff) / ((float)pfb->height);
	y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);

	OUT_PKT3(ring, CP_MEM_WRITE, 5);
	OUT_RELOCW(ring, fd_resource(fd4_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
	OUT_RING(ring, fui(x0));
	OUT_RING(ring, fui(y0));
	OUT_RING(ring, fui(x1));
	OUT_RING(ring, fui(y1));

	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
		mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;

		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
				A4XX_RB_MRT_CONTROL_B11 |
				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));

		OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
	}

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
	OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
			A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
			A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
			A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
			A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
			A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
			A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
			A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
	OUT_RING(ring, 0x8);          /* XXX RB_RENDER_CONTROL */

	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
	OUT_RING(ring, 0x280000);     /* XXX GRAS_CL_CLIP_CNTL */

	OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0) |
			A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)bin_w/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)bin_w/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)bin_h/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)bin_h/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));

	OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
	OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
			A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
			A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
	OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));

	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST |
			A4XX_PC_PRIM_VTX_CNTL_VAROUT(1));

	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */

	fd4_emit_vertex_bufs(ring, &emit);

	/* for gmem pitch/base calculations, we need to use the non-
	 * truncated tile sizes:
	 */
	bin_w = gmem->bin_w;
	bin_h = gmem->bin_h;

	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
		emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
		emit.fp = NULL;      /* frag shader changed so clear cache */
		fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
		emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
	}

	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
		switch (pfb->zsbuf->format) {
		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
		case PIPE_FORMAT_Z32_FLOAT:
			emit.prog = (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) ?
					&ctx->blit_z : &ctx->blit_zs;
			emit.key.half_precision = false;

			OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
			OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
					A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
					A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS) |
					A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE);

			OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
			OUT_RING(ring, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE);

			OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
			OUT_RING(ring, 0x80000);   /* GRAS_CL_CLIP_CNTL */

			break;
		default:
			/* Non-float can use a regular color write. It's split over 8-bit
			 * components, so half precision is always sufficient.
			 */
			emit.prog = &ctx->blit_prog[0];
			emit.key.half_precision = true;
			break;
		}
		emit.fp = NULL;      /* frag shader changed so clear cache */
		fd4_program_emit(ring, &emit, 1, &pfb->zsbuf);
		emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
	}

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
			0x00010000);  /* XXX */
}
Ejemplo n.º 14
0
static void
fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	struct fd4_emit emit = {
			.vtx = &fd4_ctx->solid_vbuf_state,
			.prog = &ctx->solid_prog,
			.key = {
				.half_precision = true,
			},
	};

	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));

	OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
	OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
			A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
			A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
	OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */

	OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
	OUT_RING(ring, 0xff000000 |
			A4XX_RB_STENCILREFMASK_STENCILREF(0) |
			A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
			A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
	OUT_RING(ring, 0xff000000 |
			A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
			A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
			A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));

	OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));

	fd_wfi(ctx, ring);

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
	OUT_RING(ring, 0x80000);      /* GRAS_CL_CLIP_CNTL */

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
			0xa);       /* XXX */

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));

	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);

	OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
	OUT_RING(ring, 0x00000002);

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));

	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */

	fd4_program_emit(ring, &emit, 0, NULL);
	fd4_emit_vertex_bufs(ring, &emit);

	if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
		if (!rsc->stencil || (ctx->resolve & FD_BUFFER_DEPTH))
			emit_gmem2mem_surf(ctx, false, ctx->gmem.zsbuf_base[0], pfb->zsbuf);
		if (rsc->stencil && (ctx->resolve & FD_BUFFER_STENCIL))
			emit_gmem2mem_surf(ctx, true, ctx->gmem.zsbuf_base[1], pfb->zsbuf);
	}

	if (ctx->resolve & FD_BUFFER_COLOR) {
		unsigned i;
		for (i = 0; i < pfb->nr_cbufs; i++) {
			if (!pfb->cbufs[i])
				continue;
			if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i)))
				continue;
			emit_gmem2mem_surf(ctx, false, gmem->cbuf_base[i], pfb->cbufs[i]);
		}
	}

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
}
Ejemplo n.º 15
0
/* emit setup at begin of new cmdstream buffer (don't rely on previous
 * state, there could have been a context switch between ioctls):
 */
void
fd4_emit_restore(struct fd_context *ctx)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_ringbuffer *ring = ctx->ring;

	OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
	OUT_RING(ring, 0x00000001);

	OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC3, 1);
	OUT_RING(ring, 0x00000006);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0F03, 1);
	OUT_RING(ring, 0x0000003a);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
	OUT_RING(ring, 0x00000001);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
	OUT_RING(ring, 0x00000007);

	OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000012);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E05, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
	OUT_RING(ring, 0x00000006);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
	OUT_RING(ring, 0x00040000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
	OUT_RING(ring, 0x00001000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F0, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F1, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F2, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
	OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) |
			A4XX_RB_BLEND_RED_FLOAT(0.0));
	OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(0) |
			A4XX_RB_BLEND_GREEN_FLOAT(0.0));
	OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(0) |
			A4XX_RB_BLEND_BLUE_FLOAT(0.0));
	OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(0x7fff) |
			A4XX_RB_BLEND_ALPHA_FLOAT(1.0));

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F7, 1);
	OUT_RING(ring, 0x3f800000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
	OUT_RING(ring, 0x0000001d);

	OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
	OUT_RING(ring, 0x00000001);

	OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1);
	OUT_RING(ring, A4XX_TPL1_TP_TEX_COUNT_VS(16) |
			A4XX_TPL1_TP_TEX_COUNT_HS(0) |
			A4XX_TPL1_TP_TEX_COUNT_DS(0) |
			A4XX_TPL1_TP_TEX_COUNT_GS(0));

	OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1);
	OUT_RING(ring, 16);

	/* we don't use this yet.. probably best to disable.. */
	OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
	OUT_RING(ring, CP_SET_DRAW_STATE_0_COUNT(0) |
			CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS |
			CP_SET_DRAW_STATE_0_GROUP_ID(0));
	OUT_RING(ring, CP_SET_DRAW_STATE_1_ADDR(0));

	OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
	OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_PARAM */
	OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR */

	OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
	OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_PARAM */
	OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR */

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));

	OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
			A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
	OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
			A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));

	OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));

	OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
	OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
	OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(0xf));

	OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
	OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);

	OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
	OUT_RING(ring, 0x0);

	ctx->needs_rb_fbd = true;
}
Ejemplo n.º 16
0
			}
		} else {
			OUT_RING(ring, 0xbad00000 | (i << 16));
		}
	}
}

static void
emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
		enum adreno_state_block sb, struct fd_texture_stateobj *tex)
{
	static const uint32_t bcolor_reg[] = {
			[SB_VERT_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
			[SB_FRAG_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
	};
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	unsigned i, off;
	void *ptr;

	u_upload_alloc(fd4_ctx->border_color_uploader,
			0, BORDER_COLOR_UPLOAD_SIZE,
		       BORDER_COLOR_UPLOAD_SIZE, &off,
			&fd4_ctx->border_color_buf,
			&ptr);

	fd_setup_border_colors(tex, ptr, 0);

	if (tex->num_samplers > 0) {
		int num_samplers;

		/* not sure if this is an a420.0 workaround, but we seem
Ejemplo n.º 17
0
static void
fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	struct fd4_emit emit = {
			.vtx = &fd4_ctx->solid_vbuf_state,
			.prog = &ctx->solid_prog,
			.key = key,
			.format = fd4_emit_format(pfb->cbufs[0]),
	};

	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));

	OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
	OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
			A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
			A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
	OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */

	OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
	OUT_RING(ring, 0xff000000 |
			A4XX_RB_STENCILREFMASK_STENCILREF(0) |
			A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
			A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
	OUT_RING(ring, 0xff000000 |
			A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
			A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
			A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));

	OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));

	fd_wfi(ctx, ring);

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
	OUT_RING(ring, 0x80000);      /* GRAS_CL_CLIP_CNTL */

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
			0xa);       /* XXX */

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));

	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);

	OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
	OUT_RING(ring, 0x00000002);

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));

	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */

	fd4_program_emit(ring, &emit);
	fd4_emit_vertex_bufs(ring, &emit);

	if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
		uint32_t base = depth_base(ctx);
		emit_gmem2mem_surf(ctx, base, pfb->zsbuf);
	}

	if (ctx->resolve & FD_BUFFER_COLOR) {
		emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]);
	}

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
}

/* transfer from system memory to gmem */

static void
emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
		struct pipe_surface *psurf, uint32_t bin_w)
{
	struct fd_ringbuffer *ring = ctx->ring;

	emit_mrt(ring, 1, &psurf, &base, bin_w);

	fd4_emit_gmem_restore_tex(ring, psurf);

	fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
			DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
}

static void
fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	struct fd4_emit emit = {
			.vtx = &fd4_ctx->blit_vbuf_state,
			.prog = &ctx->blit_prog[0],
			.key = key,
			.format = fd4_emit_format(pfb->cbufs[0]),
	};
	float x0, y0, x1, y1;
	unsigned bin_w = tile->bin_w;
	unsigned bin_h = tile->bin_h;
	unsigned i;

	/* write texture coordinates to vertexbuf: */
	x0 = ((float)tile->xoff) / ((float)pfb->width);
	x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
	y0 = ((float)tile->yoff) / ((float)pfb->height);
	y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);

	OUT_PKT3(ring, CP_MEM_WRITE, 5);
	OUT_RELOCW(ring, fd_resource(fd4_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
	OUT_RING(ring, fui(x0));
	OUT_RING(ring, fui(y0));
	OUT_RING(ring, fui(x1));
	OUT_RING(ring, fui(y1));

	for (i = 0; i < 8; i++) {
		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
				A4XX_RB_MRT_CONTROL_B11 |
				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));

		OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
	}

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
	OUT_RING(ring, 0x8);          /* XXX RB_RENDER_CONTROL */

	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
	OUT_RING(ring, 0x280000);     /* XXX GRAS_CL_CLIP_CNTL */

	OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0) |
			A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)bin_w/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)bin_w/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)bin_h/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)bin_h/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));

	OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
	OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
			A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
			A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
	OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));

	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST |
			A4XX_PC_PRIM_VTX_CNTL_VAROUT(1));

	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */

	fd4_program_emit(ring, &emit);
	fd4_emit_vertex_bufs(ring, &emit);

	/* for gmem pitch/base calculations, we need to use the non-
	 * truncated tile sizes:
	 */
	bin_w = gmem->bin_w;
	bin_h = gmem->bin_h;

	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
		emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w);

	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
		emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w);

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
			0x00010000);  /* XXX */
}

static void
patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
{
	unsigned i;
	for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) {
		struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i);
		*patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
	}
	util_dynarray_resize(&ctx->draw_patches, 0);
}

static void
patch_rbrc(struct fd_context *ctx, uint32_t val)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	unsigned i;
	for (i = 0; i < fd_patch_num_elements(&fd4_ctx->rbrc_patches); i++) {
		struct fd_cs_patch *patch = fd_patch_element(&fd4_ctx->rbrc_patches, i);
		*patch->cs = patch->val | val;
	}
	util_dynarray_resize(&fd4_ctx->rbrc_patches, 0);
}

/* for rendering directly to system memory: */
static void
fd4_emit_sysmem_prep(struct fd_context *ctx)
{
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	struct fd_ringbuffer *ring = ctx->ring;

	fd4_emit_restore(ctx);

	OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
	OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
			A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));

	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);

	/* setup scissor/offset for current tile: */
	OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
	OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(0) |
			A4XX_RB_BIN_OFFSET_Y(0));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(0) |
			A4XX_RB_MODE_CONTROL_HEIGHT(0) |
			0x00c00000);  /* XXX */

	patch_draws(ctx, IGNORE_VISIBILITY);
	patch_rbrc(ctx, 0);  // XXX
}

static void
update_vsc_pipe(struct fd_context *ctx)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_ringbuffer *ring = ctx->ring;
	int i;

	OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
	OUT_RELOCW(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */

	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
	for (i = 0; i < 8; i++) {
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
		OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
				A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
				A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
				A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
	}

	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
	for (i = 0; i < 8; i++) {
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
		if (!pipe->bo) {
			pipe->bo = fd_bo_new(ctx->dev, 0x40000,
					DRM_FREEDRENO_GEM_TYPE_KMEM);
		}
		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);       /* VSC_PIPE_DATA_ADDRESS[i] */
	}

	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
	for (i = 0; i < 8; i++) {
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
		OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
	}
}

/* before first tile */
static void
fd4_emit_tile_init(struct fd_context *ctx)
{
	struct fd_ringbuffer *ring = ctx->ring;
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	uint32_t rb_render_control;

	fd4_emit_restore(ctx);

	OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
	OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
			A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
			0x00010000);  /* XXX */

	update_vsc_pipe(ctx);
	patch_draws(ctx, IGNORE_VISIBILITY);

	rb_render_control = 0; // XXX or BINNING_PASS.. but maybe we can emit only from gmem
	patch_rbrc(ctx, rb_render_control);
}

/* before mem2gmem */
static void
fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
{
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	uint32_t reg;

	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
	reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
	if (pfb->zsbuf) {
		reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format));
	}
	OUT_RING(ring, reg);
	if (pfb->zsbuf) {
		uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
		OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
		OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w));
	} else {
		OUT_RING(ring, 0x00000000);
		OUT_RING(ring, 0x00000000);
	}

	OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
	if (pfb->zsbuf) {
		OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
				fd4_pipe2depth(pfb->zsbuf->format)));
	} else {
		OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(DEPTH4_NONE));
	}

	if (ctx->needs_rb_fbd) {
		fd_wfi(ctx, ring);
		OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
		OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
				A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
		ctx->needs_rb_fbd = false;
	}
}

/* before IB to rendering cmds: */
static void
fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
{
	struct fd_ringbuffer *ring = ctx->ring;
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;

	uint32_t x1 = tile->xoff;
	uint32_t y1 = tile->yoff;
	uint32_t x2 = tile->xoff + tile->bin_w - 1;
	uint32_t y2 = tile->yoff + tile->bin_h - 1;

	OUT_PKT3(ring, CP_SET_BIN, 3);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
	OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));

	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);

	/* setup scissor/offset for current tile: */
	OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
	OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) |
			A4XX_RB_BIN_OFFSET_Y(tile->yoff));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
}

void
fd4_gmem_init(struct pipe_context *pctx)
{
	struct fd_context *ctx = fd_context(pctx);

	ctx->emit_sysmem_prep = fd4_emit_sysmem_prep;
	ctx->emit_tile_init = fd4_emit_tile_init;
	ctx->emit_tile_prep = fd4_emit_tile_prep;
	ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem;
	ctx->emit_tile_renderprep = fd4_emit_tile_renderprep;
	ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem;
}