Beispiel #1
0
void
fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
		struct fd2_vertex_buf *vbufs, uint32_t n)
{
	unsigned i;

	OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n));
	OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
	for (i = 0; i < n; i++) {
		struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
		OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0);
		OUT_RING (ring, vbufs[i].size);
	}
}
Beispiel #2
0
static struct fd_hw_sample *
occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
	struct fd_hw_sample *samp =
			fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs));

	/* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
	 * HW_QUERY_BASE_REG register:
	 */
	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
	OUT_RING(ring, CP_REG(REG_A3XX_RB_SAMPLE_COUNT_ADDR) | 0x80000000);
	OUT_RING(ring, HW_QUERY_BASE_REG);
	OUT_RING(ring, samp->offset);

	OUT_PKT0(ring, REG_A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
	OUT_RING(ring, A3XX_RB_SAMPLE_COUNT_CONTROL_COPY);

	OUT_PKT3(ring, CP_DRAW_INDX, 3);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, DRAW(DI_PT_POINTLIST_A2XX, DI_SRC_SEL_AUTO_INDEX,
			INDEX_SIZE_IGN, USE_VISIBILITY));
	OUT_RING(ring, 0);             /* NumIndices */

	fd_event_write(ctx, ring, ZPASS_DONE);

	OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1);
	OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE);

	OUT_PKT0(ring, REG_A3XX_VBIF_PERF_CNT_EN, 1);
	OUT_RING(ring, A3XX_VBIF_PERF_CNT_EN_CNT0 |
			A3XX_VBIF_PERF_CNT_EN_CNT1 |
			A3XX_VBIF_PERF_CNT_EN_PWRCNT0 |
			A3XX_VBIF_PERF_CNT_EN_PWRCNT1 |
			A3XX_VBIF_PERF_CNT_EN_PWRCNT2);

	return samp;
}
Beispiel #3
0
static void
fd2_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
{
	struct fd_ringbuffer *ring = ctx->ring;

	if (ctx->dirty & FD_DIRTY_VTXBUF)
		emit_vertexbufs(ctx);

	fd2_emit_state(ctx, ctx->dirty);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
	OUT_RING(ring, info->start);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
	OUT_RING(ring, 0x0000003b);

	OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
	OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);

	OUT_WFI (ring);

	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
	OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
	OUT_RING(ring, info->max_index);        /* VGT_MAX_VTX_INDX */
	OUT_RING(ring, info->min_index);        /* VGT_MIN_VTX_INDX */

	fd_draw_emit(ctx, ring, IGNORE_VISIBILITY, info);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
	OUT_RING(ring, 0x00000000);

	emit_cacheflush(ring);
}
Beispiel #4
0
static void
emit_shader(struct fd_ringbuffer *ring, struct fd_shader *shader,
		enum adreno_state_block state_block)
{
	uint32_t i;
	OUT_PKT3(ring, CP_LOAD_STATE, 2 + shader->sizedwords);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
			CP_LOAD_STATE_0_STATE_BLOCK(state_block) |
			CP_LOAD_STATE_0_NUM_UNIT(instrlen(shader)));
	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
	for (i = 0; i < shader->sizedwords; i++)
		OUT_RING(ring, shader->bin[i]);
}
Beispiel #5
0
static void emit_uniconst(struct fd_ringbuffer *ring,
		struct fd_shader *shader, struct fd_parameters *uniforms,
		enum adreno_state_block state_block)
{
	static uint32_t buf[512]; /* cheesy, but test code isn't multithreaded */
	uint32_t i, j, k, sz = 0;

	memset(buf, 0, sizeof(buf));

	for (i = 0; i < shader->ir->consts_count; i++) {
		struct ir3_const *c = shader->ir->consts[i];
		uint32_t off = c->cstart->num;
		memcpy(&buf[off], c->val, sizeof(c->val));
		sz = max(sz, off + (sizeof(c->val) / sizeof(buf[0])));
	}

	for (i = 0; i < shader->ir->uniforms_count; i++) {
		struct ir3_uniform *u = shader->ir->uniforms[i];
		struct fd_param *p = find_param(uniforms, u->name);
		const uint32_t *dwords = p->data;
		uint32_t off = u->cstart->num;

		for (j = 0; j < p->count; j++) {
			for (k = 0; k < p->size; k++)
				buf[off++] = *(dwords++);
			/* zero pad if needed: */
			for (; k < ALIGN(p->size, 4); k++)
				buf[off++] = 0;
		}

		sz = max(sz, off);
	}

	/* if no constants, don't emit the CP_LOAD_STATE */
	if (sz == 0)
		return;

	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
			CP_LOAD_STATE_0_STATE_BLOCK(state_block) |
			CP_LOAD_STATE_0_NUM_UNIT(sz/2));
	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
	for (i = 0; i < sz; i++)
		OUT_RING(ring, buf[i]);
}
Beispiel #6
0
static void
emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
{
	const struct ir3_info *si = &so->info;
	enum adreno_state_block sb;
	enum adreno_state_src src;
	uint32_t i, sz, *bin;

	if (so->type == SHADER_VERTEX) {
		sb = SB_VERT_SHADER;
	} else {
		sb = SB_FRAG_SHADER;
	}

	if (fd_mesa_debug & FD_DBG_DIRECT) {
		sz = si->sizedwords;
		src = SS_DIRECT;
		bin = fd_bo_map(so->bo);
	} else {
		sz = 0;
		src = 2;  // enums different on a4xx..
		bin = NULL;
	}

	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
			CP_LOAD_STATE_0_STATE_SRC(src) |
			CP_LOAD_STATE_0_STATE_BLOCK(sb) |
			CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
	if (bin) {
		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
	} else {
		OUT_RELOC(ring, so->bo, 0,
				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
	}

	/* for how clever coverity is, it is sometimes rather dull, and
	 * doesn't realize that the only case where bin==NULL, sz==0:
	 */
	assume(bin || (sz == 0));

	for (i = 0; i < sz; i++) {
		OUT_RING(ring, bin[i]);
	}
}
Beispiel #7
0
/* regid:          base const register
 * prsc or dwords: buffer containing constant values
 * sizedwords:     size of const value buffer
 */
void
fd3_emit_constant(struct fd_ringbuffer *ring,
		enum adreno_state_block sb,
		uint32_t regid, uint32_t offset, uint32_t sizedwords,
		const uint32_t *dwords, struct pipe_resource *prsc)
{
	uint32_t i, sz;
	enum adreno_state_src src;

	if (prsc) {
		sz = 0;
		src = SS_INDIRECT;
	} else {
		sz = sizedwords;
		src = SS_DIRECT;
	}

	/* we have this sometimes, not others.. perhaps we could be clever
	 * and figure out actually when we need to invalidate cache:
	 */
	OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
			A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
			A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);

	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
			CP_LOAD_STATE_0_STATE_SRC(src) |
			CP_LOAD_STATE_0_STATE_BLOCK(sb) |
			CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
	if (prsc) {
		struct fd_bo *bo = fd_resource(prsc)->bo;
		OUT_RELOC(ring, bo, offset,
				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
	} else {
		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
		dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
	}
	for (i = 0; i < sz; i++) {
		OUT_RING(ring, dwords[i]);
	}
}
Beispiel #8
0
static texmask
emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
		struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted)
{
	unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
	static const struct fd2_sampler_stateobj dummy_sampler = {};
	static const struct fd2_pipe_sampler_view dummy_view = {};
	const struct fd2_sampler_stateobj *sampler;
	const struct fd2_pipe_sampler_view *view;
	struct fd_resource *rsc;

	if (emitted & (1 << const_idx))
		return 0;

	sampler = tex->samplers[samp_id] ?
			fd2_sampler_stateobj(tex->samplers[samp_id]) :
			&dummy_sampler;
	view = tex->textures[samp_id] ?
			fd2_pipe_sampler_view(tex->textures[samp_id]) :
			&dummy_view;

	rsc = view->base.texture ? fd_resource(view->base.texture) : NULL;

	OUT_PKT3(ring, CP_SET_CONSTANT, 7);
	OUT_RING(ring, 0x00010000 + (0x6 * const_idx));

	OUT_RING(ring, sampler->tex0 | view->tex0);
	if (rsc)
		OUT_RELOC(ring, rsc->bo, 0, view->tex1, 0);
	else
		OUT_RING(ring, 0);

	OUT_RING(ring, view->tex2);
	OUT_RING(ring, sampler->tex3 | view->tex3);
	OUT_RING(ring, sampler->tex4 | view->tex4);

	if (rsc && rsc->base.last_level)
		OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 1, 0), view->tex5, 0);
	else
		OUT_RING(ring, view->tex5);

	return (1 << const_idx);
}
Beispiel #9
0
static void
emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
{
	const struct ir3_info *si = &so->info;
	enum adreno_state_block sb;
	enum adreno_state_src src;
	uint32_t i, sz, *bin;

	if (so->type == SHADER_VERTEX) {
		sb = SB_VERT_SHADER;
	} else {
		sb = SB_FRAG_SHADER;
	}

	if (fd_mesa_debug & FD_DBG_DIRECT) {
		sz = si->sizedwords;
		src = SS_DIRECT;
		bin = fd_bo_map(so->bo);
	} else {
		sz = 0;
		src = SS_INDIRECT;
		bin = NULL;
	}

	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
			CP_LOAD_STATE_0_STATE_SRC(src) |
			CP_LOAD_STATE_0_STATE_BLOCK(sb) |
			CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
	if (bin) {
		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
	} else {
		OUT_RELOC(ring, so->bo, 0,
				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
	}
	for (i = 0; i < sz; i++) {
		OUT_RING(ring, bin[i]);
	}
}
Beispiel #10
0
/* regid:          base const register
 * prsc or dwords: buffer containing constant values
 * sizedwords:     size of const value buffer
 */
void
fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
		uint32_t regid, uint32_t offset, uint32_t sizedwords,
		const uint32_t *dwords, struct pipe_resource *prsc)
{
	uint32_t i, sz;
	enum adreno_state_src src;

	debug_assert((regid % 4) == 0);
	debug_assert((sizedwords % 4) == 0);

	if (prsc) {
		sz = 0;
		src = 0x2;  // TODO ??
	} else {
		sz = sizedwords;
		src = SS_DIRECT;
	}

	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
			CP_LOAD_STATE_0_STATE_SRC(src) |
			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
			CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
	if (prsc) {
		struct fd_bo *bo = fd_resource(prsc)->bo;
		OUT_RELOC(ring, bo, offset,
				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
	} else {
		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
		dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
	}
	for (i = 0; i < sz; i++) {
		OUT_RING(ring, dwords[i]);
	}
}
Beispiel #11
0
/* this is same for a2xx/a3xx, so split into helper: */
void
fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info)
{
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_index_buffer *idx = &ctx->indexbuf;
	struct fd_bo *idx_bo = NULL;
	enum pc_di_index_size idx_type = INDEX_SIZE_IGN;
	enum pc_di_src_sel src_sel;
	uint32_t idx_size, idx_offset;

	if (info->indexed) {
		assert(!idx->user_buffer);

		idx_bo = fd_resource(idx->buffer)->bo;
		idx_type = size2indextype(idx->index_size);
		idx_size = idx->index_size * info->count;
		idx_offset = idx->offset;
		src_sel = DI_SRC_SEL_DMA;
	} else {
		idx_bo = NULL;
		idx_type = INDEX_SIZE_IGN;
		idx_size = 0;
		idx_offset = 0;
		src_sel = DI_SRC_SEL_AUTO_INDEX;
	}

	OUT_PKT3(ring, CP_DRAW_INDX, info->indexed ? 5 : 3);
	OUT_RING(ring, 0x00000000);        /* viz query info. */
	OUT_RING(ring, DRAW(mode2primtype(info->mode),
			src_sel, idx_type, IGNORE_VISIBILITY));
	OUT_RING(ring, info->count);       /* NumIndices */
	if (info->indexed) {
		OUT_RELOC(ring, idx_bo, idx_offset, 0);
		OUT_RING (ring, idx_size);
	}
}
Beispiel #12
0
void
fd_state_emit(struct pipe_context *pctx, uint32_t dirty)
{
	struct fd_context *ctx = fd_context(pctx);
	struct fd_ringbuffer *ring = ctx->ring;

	/* NOTE: we probably want to eventually refactor this so each state
	 * object handles emitting it's own state..  although the mapping of
	 * state to registers is not always orthogonal, sometimes a single
	 * register contains bitfields coming from multiple state objects,
	 * so not sure the best way to deal with that yet.
	 */

	if (dirty & FD_DIRTY_SAMPLE_MASK) {
		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
		OUT_RING(ring, ctx->sample_mask);
	}

	if (dirty & FD_DIRTY_ZSA) {
		struct pipe_stencil_ref *sr = &ctx->stencil_ref;

		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
		OUT_RING(ring, ctx->zsa->rb_depthcontrol);

		OUT_PKT3(ring, CP_SET_CONSTANT, 4);
		OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
		OUT_RING(ring, ctx->zsa->rb_stencilrefmask_bf |
				A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[1]));
		OUT_RING(ring, ctx->zsa->rb_stencilrefmask |
				A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
		OUT_RING(ring, ctx->zsa->rb_alpha_ref);
	}

	if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
		OUT_RING(ring, ctx->rasterizer->pa_cl_clip_cntl);
		OUT_RING(ring, ctx->rasterizer->pa_su_sc_mode_cntl |
				A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE);

		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POINT_SIZE));
		OUT_RING(ring, ctx->rasterizer->pa_su_point_size);
		OUT_RING(ring, ctx->rasterizer->pa_su_point_minmax);
		OUT_RING(ring, ctx->rasterizer->pa_su_line_cntl);
		OUT_RING(ring, ctx->rasterizer->pa_sc_line_stipple);

		OUT_PKT3(ring, CP_SET_CONSTANT, 6);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_VTX_CNTL));
		OUT_RING(ring, ctx->rasterizer->pa_su_vtx_cntl);
		OUT_RING(ring, fui(1.0));                /* PA_CL_GB_VERT_CLIP_ADJ */
		OUT_RING(ring, fui(1.0));                /* PA_CL_GB_VERT_DISC_ADJ */
		OUT_RING(ring, fui(1.0));                /* PA_CL_GB_HORZ_CLIP_ADJ */
		OUT_RING(ring, fui(1.0));                /* PA_CL_GB_HORZ_DISC_ADJ */
	}

	if (dirty & FD_DIRTY_SCISSOR) {
		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
		OUT_RING(ring, xy2d(ctx->scissor.minx,   /* PA_SC_WINDOW_SCISSOR_TL */
				ctx->scissor.miny));
		OUT_RING(ring, xy2d(ctx->scissor.maxx,   /* PA_SC_WINDOW_SCISSOR_BR */
				ctx->scissor.maxy));

		ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, ctx->scissor.minx);
		ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, ctx->scissor.miny);
		ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, ctx->scissor.maxx);
		ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, ctx->scissor.maxy);
	}

	if (dirty & FD_DIRTY_VIEWPORT) {
		OUT_PKT3(ring, CP_SET_CONSTANT, 7);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
		OUT_RING(ring, fui(ctx->viewport.scale[0]));       /* PA_CL_VPORT_XSCALE */
		OUT_RING(ring, fui(ctx->viewport.translate[0]));   /* PA_CL_VPORT_XOFFSET */
		OUT_RING(ring, fui(ctx->viewport.scale[1]));       /* PA_CL_VPORT_YSCALE */
		OUT_RING(ring, fui(ctx->viewport.translate[1]));   /* PA_CL_VPORT_YOFFSET */
		OUT_RING(ring, fui(ctx->viewport.scale[2]));       /* PA_CL_VPORT_ZSCALE */
		OUT_RING(ring, fui(ctx->viewport.translate[2]));   /* PA_CL_VPORT_ZOFFSET */

		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
		OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
				A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
				A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
				A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
				A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
				A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
				A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
	}

	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) {
		fd_program_validate(ctx);
		fd_program_emit(ring, &ctx->prog);
	}

	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
		emit_constants(ring,  VS_CONST_BASE * 4,
				&ctx->constbuf[PIPE_SHADER_VERTEX],
				(dirty & FD_DIRTY_PROG) ? ctx->prog.vp : NULL);
		emit_constants(ring, PS_CONST_BASE * 4,
				&ctx->constbuf[PIPE_SHADER_FRAGMENT],
				(dirty & FD_DIRTY_PROG) ? ctx->prog.fp : NULL);
	}

	if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
		OUT_RING(ring, ctx->zsa->rb_colorcontrol | ctx->blend->rb_colorcontrol);
	}

	if (dirty & FD_DIRTY_BLEND) {
		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
		OUT_RING(ring, ctx->blend->rb_blendcontrol);

		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
		OUT_RING(ring, ctx->blend->rb_colormask);
	}

	if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX | FD_DIRTY_PROG))
		emit_textures(ring, ctx);

	ctx->dirty &= ~dirty;
}
Beispiel #13
0
static void
emit_constants(struct fd_ringbuffer *ring, uint32_t base,
		struct fd_constbuf_stateobj *constbuf,
		struct fd_shader_stateobj *shader)
{
	uint32_t enabled_mask = constbuf->enabled_mask;
	uint32_t start_base = base;
	unsigned i;

	// XXX TODO only emit dirty consts.. but we need to keep track if
	// they are clobbered by a clear, gmem2mem, or mem2gmem..
	constbuf->dirty_mask = enabled_mask;

	/* emit user constants: */
	while (enabled_mask) {
		unsigned index = ffs(enabled_mask) - 1;
		struct pipe_constant_buffer *cb = &constbuf->cb[index];
		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */

		// I expect that size should be a multiple of vec4's:
		assert(size == align(size, 4));

		/* hmm, sometimes we still seem to end up with consts bound,
		 * even if shader isn't using them, which ends up overwriting
		 * const reg's used for immediates.. this is a hack to work
		 * around that:
		 */
		if (shader && ((base - start_base) >= (shader->first_immediate * 4)))
			break;

		if (constbuf->dirty_mask & (1 << index)) {
			const uint32_t *dwords;

			if (cb->user_buffer) {
				dwords = cb->user_buffer;
			} else {
				struct fd_resource *rsc = fd_resource(cb->buffer);
				dwords = fd_bo_map(rsc->bo);
			}

			dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset);

			OUT_PKT3(ring, CP_SET_CONSTANT, size + 1);
			OUT_RING(ring, base);
			for (i = 0; i < size; i++)
				OUT_RING(ring, *(dwords++));

			constbuf->dirty_mask &= ~(1 << index);
		}

		base += size;
		enabled_mask &= ~(1 << index);
	}

	/* emit shader immediates: */
	if (shader) {
		for (i = 0; i < shader->num_immediates; i++) {
			OUT_PKT3(ring, CP_SET_CONSTANT, 5);
			OUT_RING(ring, start_base + (4 * (shader->first_immediate + i)));
			OUT_RING(ring, shader->immediates[i].val[0]);
			OUT_RING(ring, shader->immediates[i].val[1]);
			OUT_RING(ring, shader->immediates[i].val[2]);
			OUT_RING(ring, shader->immediates[i].val[3]);
			base += 4;
		}
	}
}
Beispiel #14
0
static void
emit_constants(struct fd_ringbuffer *ring,
		enum adreno_state_block sb,
		struct fd_constbuf_stateobj *constbuf,
		struct ir3_shader_variant *shader,
		bool emit_immediates)
{
	uint32_t enabled_mask = constbuf->enabled_mask;
	uint32_t max_const;
	int i;

	// XXX TODO only emit dirty consts.. but we need to keep track if
	// they are clobbered by a clear, gmem2mem, or mem2gmem..
	constbuf->dirty_mask = enabled_mask;

	/* in particular, with binning shader we may end up with unused
	 * consts, ie. we could end up w/ constlen that is smaller
	 * than first_immediate.  In that case truncate the user consts
	 * early to avoid HLSQ lockup caused by writing too many consts
	 */
	max_const = MIN2(shader->first_driver_param, shader->constlen);

	/* emit user constants: */
	if (enabled_mask & 1) {
		const unsigned index = 0;
		struct pipe_constant_buffer *cb = &constbuf->cb[index];
		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */

		// I expect that size should be a multiple of vec4's:
		assert(size == align(size, 4));

		/* and even if the start of the const buffer is before
		 * first_immediate, the end may not be:
		 */
		size = MIN2(size, 4 * max_const);

		if (size && (constbuf->dirty_mask & (1 << index))) {
			fd4_emit_constant(ring, sb, 0,
					cb->buffer_offset, size,
					cb->user_buffer, cb->buffer);
			constbuf->dirty_mask &= ~(1 << index);
		}

		enabled_mask &= ~(1 << index);
	}

	/* emit ubos: */
	if (shader->constlen > shader->first_driver_param) {
		uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
		OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) |
				CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
				CP_LOAD_STATE_0_STATE_BLOCK(sb) |
				CP_LOAD_STATE_0_NUM_UNIT(params));
		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));

		for (i = 1; i <= params * 4; i++) {
			struct pipe_constant_buffer *cb = &constbuf->cb[i];
			assert(!cb->user_buffer);
			if ((enabled_mask & (1 << i)) && cb->buffer)
				OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
			else
				OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
		}
	}

	/* emit shader immediates: */
	if (shader && emit_immediates) {
		int size = shader->immediates_count;
		uint32_t base = shader->first_immediate;

		/* truncate size to avoid writing constants that shader
		 * does not use:
		 */
		size = MIN2(size + base, shader->constlen) - base;

		/* convert out of vec4: */
		base *= 4;
		size *= 4;

		if (size > 0) {
			fd4_emit_constant(ring, sb, base,
				0, size, shader->immediates[0].val, NULL);
		}
	}
}
Beispiel #15
0
/* emit setup at begin of new cmdstream buffer (don't rely on previous
 * state, there could have been a context switch between ioctls):
 */
void
fd4_emit_restore(struct fd_context *ctx)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_ringbuffer *ring = ctx->ring;

	OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
	OUT_RING(ring, 0x00000001);

	OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC3, 1);
	OUT_RING(ring, 0x00000006);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0F03, 1);
	OUT_RING(ring, 0x0000003a);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
	OUT_RING(ring, 0x00000001);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
	OUT_RING(ring, 0x00000007);

	OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000012);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E05, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
	OUT_RING(ring, 0x00000006);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
	OUT_RING(ring, 0x00040000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
	OUT_RING(ring, 0x00001000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F0, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F1, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F2, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
	OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) |
			A4XX_RB_BLEND_RED_FLOAT(0.0));
	OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(0) |
			A4XX_RB_BLEND_GREEN_FLOAT(0.0));
	OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(0) |
			A4XX_RB_BLEND_BLUE_FLOAT(0.0));
	OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(0x7fff) |
			A4XX_RB_BLEND_ALPHA_FLOAT(1.0));

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F7, 1);
	OUT_RING(ring, 0x3f800000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
	OUT_RING(ring, 0x0000001d);

	OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
	OUT_RING(ring, 0x00000001);

	OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1);
	OUT_RING(ring, A4XX_TPL1_TP_TEX_COUNT_VS(16) |
			A4XX_TPL1_TP_TEX_COUNT_HS(0) |
			A4XX_TPL1_TP_TEX_COUNT_DS(0) |
			A4XX_TPL1_TP_TEX_COUNT_GS(0));

	OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1);
	OUT_RING(ring, 16);

	/* we don't use this yet.. probably best to disable.. */
	OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
	OUT_RING(ring, CP_SET_DRAW_STATE_0_COUNT(0) |
			CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS |
			CP_SET_DRAW_STATE_0_GROUP_ID(0));
	OUT_RING(ring, CP_SET_DRAW_STATE_1_ADDR(0));

	OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
	OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_PARAM */
	OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR */

	OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
	OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_PARAM */
	OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR */

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));

	OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
			A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
	OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
			A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));

	OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));

	OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
	OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
	OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(0xf));

	OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
	OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);

	OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
	OUT_RING(ring, 0x0);

	ctx->needs_rb_fbd = true;
}
Beispiel #16
0
static void
emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
		enum adreno_state_block sb, struct fd_texture_stateobj *tex)
{
	unsigned i;

	if (tex->num_samplers > 0) {
		int num_samplers;

		/* not sure if this is an a420.0 workaround, but we seem
		 * to need to emit these in pairs.. emit a final dummy
		 * entry if odd # of samplers:
		 */
		num_samplers = align(tex->num_samplers, 2);

		/* output sampler state: */
		OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * num_samplers));
		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
				CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
				CP_LOAD_STATE_0_STATE_BLOCK(sb) |
				CP_LOAD_STATE_0_NUM_UNIT(num_samplers));
		OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
				CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
		for (i = 0; i < tex->num_samplers; i++) {
			static const struct fd4_sampler_stateobj dummy_sampler = {};
			const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ?
					fd4_sampler_stateobj(tex->samplers[i]) :
					&dummy_sampler;
			OUT_RING(ring, sampler->texsamp0);
			OUT_RING(ring, sampler->texsamp1);
		}

		for (; i < num_samplers; i++) {
			OUT_RING(ring, 0x00000000);
			OUT_RING(ring, 0x00000000);
		}
	}

	if (tex->num_textures > 0) {
		/* emit texture state: */
		OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * tex->num_textures));
		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
				CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
				CP_LOAD_STATE_0_STATE_BLOCK(sb) |
				CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures));
		OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
				CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
		for (i = 0; i < tex->num_textures; i++) {
			static const struct fd4_pipe_sampler_view dummy_view = {};
			const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
					fd4_pipe_sampler_view(tex->textures[i]) :
					&dummy_view;
			unsigned start = view->base.u.tex.first_level;

			OUT_RING(ring, view->texconst0);
			OUT_RING(ring, view->texconst1);
			OUT_RING(ring, view->texconst2);
			OUT_RING(ring, view->texconst3);
			if (view->base.texture) {
				struct fd_resource *rsc = fd_resource(view->base.texture);
				uint32_t offset = fd_resource_offset(rsc, start, 0);
				OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
			} else {
				OUT_RING(ring, 0x00000000);
			}
			OUT_RING(ring, 0x00000000);
			OUT_RING(ring, 0x00000000);
			OUT_RING(ring, 0x00000000);
		}
	}
}
Beispiel #17
0
/* emit per-context initialization:
 */
void
fd_state_emit_setup(struct pipe_context *pctx)
{
	struct fd_context *ctx = fd_context(pctx);
	struct fd_ringbuffer *ring = ctx->ring;

	OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
	OUT_RING(ring, 0x00000002);

	OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
	OUT_RING(ring, 0x00007fff);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_SQ_VS_CONST));
	OUT_RING(ring, A2XX_SQ_VS_CONST_BASE(VS_CONST_BASE) |
			A2XX_SQ_VS_CONST_SIZE(0x100));

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_SQ_PS_CONST));
	OUT_RING(ring, A2XX_SQ_PS_CONST_BASE(PS_CONST_BASE) |
			A2XX_SQ_PS_CONST_SIZE(0xe0));

	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
	OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
	OUT_RING(ring, 0xffffffff);        /* VGT_MAX_VTX_INDX */
	OUT_RING(ring, 0x00000000);        /* VGT_MIN_VTX_INDX */

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
	OUT_RING(ring, 0x00000000);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
	OUT_RING(ring, 0x0000003b);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
	OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY));

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_SQ_INTERPOLATOR_CNTL));
	OUT_RING(ring, 0xffffffff);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
	OUT_RING(ring, 0x00000000);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_LINE_CNTL));
	OUT_RING(ring, 0x00000000);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
	OUT_RING(ring, 0x00000000);

	// XXX we change this dynamically for draw/clear.. vs gmem<->mem..
	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
	OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS));
	OUT_RING(ring, 0x88888888);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_DEST_MASK));
	OUT_RING(ring, 0xffffffff);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_INFO));
	OUT_RING(ring, A2XX_RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) |
			A2XX_RB_COPY_DEST_INFO_WRITE_RED |
			A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
			A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
			A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);

	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
	OUT_RING(ring, CP_REG(REG_A2XX_SQ_WRAPPING_0));
	OUT_RING(ring, 0x00000000);        /* SQ_WRAPPING_0 */
	OUT_RING(ring, 0x00000000);        /* SQ_WRAPPING_1 */

	OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
	OUT_RING(ring, 0x000005d0);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x5f601000);
	OUT_RING(ring, 0x00000001);

	OUT_PKT0(ring, REG_A2XX_SQ_INST_STORE_MANAGMENT, 1);
	OUT_RING(ring, 0x00000180);

	OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
	OUT_RING(ring, 0x00000300);

	OUT_PKT3(ring, CP_SET_SHADER_BASES, 1);
	OUT_RING(ring, 0x80000180);

	/* not sure what this form of CP_SET_CONSTANT is.. */
	OUT_PKT3(ring, CP_SET_CONSTANT, 13);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x469c4000);
	OUT_RING(ring, 0x3f800000);
	OUT_RING(ring, 0x3f000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x40000000);
	OUT_RING(ring, 0x3f400000);
	OUT_RING(ring, 0x3ec00000);
	OUT_RING(ring, 0x3e800000);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
	OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
			A2XX_RB_COLOR_MASK_WRITE_GREEN |
			A2XX_RB_COLOR_MASK_WRITE_BLUE |
			A2XX_RB_COLOR_MASK_WRITE_ALPHA);

	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
	OUT_RING(ring, 0x00000000);        /* RB_BLEND_RED */
	OUT_RING(ring, 0x00000000);        /* RB_BLEND_GREEN */
	OUT_RING(ring, 0x00000000);        /* RB_BLEND_BLUE */
	OUT_RING(ring, 0x000000ff);        /* RB_BLEND_ALPHA */

	fd_ringbuffer_flush(ring);
	fd_ringmarker_mark(ctx->draw_start);
}
Beispiel #18
0
/* emit per-context initialization:
 */
void
fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
	if (is_a20x(ctx->screen)) {
		OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1);
		OUT_RING(ring,
			A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) |
			A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP |
			A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE |
			A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) |
			A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));

		/* not sure why this is required */
		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_VIZ_QUERY));
		OUT_RING(ring, A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(16));
	}

	OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
	OUT_RING(ring, 0x00000002);

	OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
	OUT_RING(ring, 0x00007fff);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_SQ_VS_CONST));
	OUT_RING(ring, A2XX_SQ_VS_CONST_BASE(VS_CONST_BASE) |
			A2XX_SQ_VS_CONST_SIZE(0x100));

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_SQ_PS_CONST));
	OUT_RING(ring, A2XX_SQ_PS_CONST_BASE(PS_CONST_BASE) |
			A2XX_SQ_PS_CONST_SIZE(0xe0));

	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
	OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
	OUT_RING(ring, 0xffffffff);        /* VGT_MAX_VTX_INDX */
	OUT_RING(ring, 0x00000000);        /* VGT_MIN_VTX_INDX */

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
	OUT_RING(ring, 0x00000000);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
	OUT_RING(ring, 0x0000003b);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
	OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY));

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_SQ_INTERPOLATOR_CNTL));
	OUT_RING(ring, 0xffffffff);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
	OUT_RING(ring, 0x00000000);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_LINE_CNTL));
	OUT_RING(ring, 0x00000000);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
	OUT_RING(ring, 0x00000000);

	// XXX we change this dynamically for draw/clear.. vs gmem<->mem..
	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
	OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS));
	OUT_RING(ring, 0x88888888);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_DEST_MASK));
	OUT_RING(ring, 0xffffffff);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_INFO));
	OUT_RING(ring, A2XX_RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) |
			A2XX_RB_COPY_DEST_INFO_WRITE_RED |
			A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
			A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
			A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);

	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
	OUT_RING(ring, CP_REG(REG_A2XX_SQ_WRAPPING_0));
	OUT_RING(ring, 0x00000000);        /* SQ_WRAPPING_0 */
	OUT_RING(ring, 0x00000000);        /* SQ_WRAPPING_1 */

	OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
	OUT_RING(ring, 0x000005d0);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x5f601000);
	OUT_RING(ring, 0x00000001);

	OUT_PKT0(ring, REG_A2XX_SQ_INST_STORE_MANAGMENT, 1);
	OUT_RING(ring, 0x00000180);

	OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
	OUT_RING(ring, 0x00000300);

	OUT_PKT3(ring, CP_SET_SHADER_BASES, 1);
	OUT_RING(ring, 0x80000180);

	/* not sure what this form of CP_SET_CONSTANT is.. */
	OUT_PKT3(ring, CP_SET_CONSTANT, 13);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x469c4000);
	OUT_RING(ring, 0x3f800000);
	OUT_RING(ring, 0x3f000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x40000000);
	OUT_RING(ring, 0x3f400000);
	OUT_RING(ring, 0x3ec00000);
	OUT_RING(ring, 0x3e800000);

	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
	OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
			A2XX_RB_COLOR_MASK_WRITE_GREEN |
			A2XX_RB_COLOR_MASK_WRITE_BLUE |
			A2XX_RB_COLOR_MASK_WRITE_ALPHA);

	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
	OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
	OUT_RING(ring, 0x00000000);        /* RB_BLEND_RED */
	OUT_RING(ring, 0x00000000);        /* RB_BLEND_GREEN */
	OUT_RING(ring, 0x00000000);        /* RB_BLEND_BLUE */
	OUT_RING(ring, 0x000000ff);        /* RB_BLEND_ALPHA */
}
Beispiel #19
0
void
fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
{
	struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
	struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa);
	struct fd_ringbuffer *ring = ctx->batch->draw;

	/* NOTE: we probably want to eventually refactor this so each state
	 * object handles emitting it's own state..  although the mapping of
	 * state to registers is not always orthogonal, sometimes a single
	 * register contains bitfields coming from multiple state objects,
	 * so not sure the best way to deal with that yet.
	 */

	if (dirty & FD_DIRTY_SAMPLE_MASK) {
		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
		OUT_RING(ring, ctx->sample_mask);
	}

	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
		struct pipe_stencil_ref *sr = &ctx->stencil_ref;

		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
		OUT_RING(ring, zsa->rb_depthcontrol);

		OUT_PKT3(ring, CP_SET_CONSTANT, 4);
		OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
		OUT_RING(ring, zsa->rb_stencilrefmask_bf |
				A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[1]));
		OUT_RING(ring, zsa->rb_stencilrefmask |
				A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
		OUT_RING(ring, zsa->rb_alpha_ref);
	}

	if (ctx->rasterizer && dirty & FD_DIRTY_RASTERIZER) {
		struct fd2_rasterizer_stateobj *rasterizer =
				fd2_rasterizer_stateobj(ctx->rasterizer);
		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
		OUT_RING(ring, rasterizer->pa_cl_clip_cntl);
		OUT_RING(ring, rasterizer->pa_su_sc_mode_cntl |
				A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE);

		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POINT_SIZE));
		OUT_RING(ring, rasterizer->pa_su_point_size);
		OUT_RING(ring, rasterizer->pa_su_point_minmax);
		OUT_RING(ring, rasterizer->pa_su_line_cntl);
		OUT_RING(ring, rasterizer->pa_sc_line_stipple);

		OUT_PKT3(ring, CP_SET_CONSTANT, 6);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_VTX_CNTL));
		OUT_RING(ring, rasterizer->pa_su_vtx_cntl);
		OUT_RING(ring, fui(1.0));                /* PA_CL_GB_VERT_CLIP_ADJ */
		OUT_RING(ring, fui(1.0));                /* PA_CL_GB_VERT_DISC_ADJ */
		OUT_RING(ring, fui(1.0));                /* PA_CL_GB_HORZ_CLIP_ADJ */
		OUT_RING(ring, fui(1.0));                /* PA_CL_GB_HORZ_DISC_ADJ */
	}

	/* NOTE: scissor enabled bit is part of rasterizer state: */
	if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
		struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);

		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
		OUT_RING(ring, xy2d(scissor->minx,       /* PA_SC_WINDOW_SCISSOR_TL */
				scissor->miny));
		OUT_RING(ring, xy2d(scissor->maxx,       /* PA_SC_WINDOW_SCISSOR_BR */
				scissor->maxy));

		ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
		ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
		ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
		ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
	}

	if (dirty & FD_DIRTY_VIEWPORT) {
		OUT_PKT3(ring, CP_SET_CONSTANT, 7);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
		OUT_RING(ring, fui(ctx->viewport.scale[0]));       /* PA_CL_VPORT_XSCALE */
		OUT_RING(ring, fui(ctx->viewport.translate[0]));   /* PA_CL_VPORT_XOFFSET */
		OUT_RING(ring, fui(ctx->viewport.scale[1]));       /* PA_CL_VPORT_YSCALE */
		OUT_RING(ring, fui(ctx->viewport.translate[1]));   /* PA_CL_VPORT_YOFFSET */
		OUT_RING(ring, fui(ctx->viewport.scale[2]));       /* PA_CL_VPORT_ZSCALE */
		OUT_RING(ring, fui(ctx->viewport.translate[2]));   /* PA_CL_VPORT_ZOFFSET */

		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
		OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
				A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
				A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
				A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
				A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
				A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
				A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
	}

	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) {
		fd2_program_validate(ctx);
		fd2_program_emit(ring, &ctx->prog);
	}

	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
		emit_constants(ring,  VS_CONST_BASE * 4,
				&ctx->constbuf[PIPE_SHADER_VERTEX],
				(dirty & FD_DIRTY_PROG) ? ctx->prog.vp : NULL);
		emit_constants(ring, PS_CONST_BASE * 4,
				&ctx->constbuf[PIPE_SHADER_FRAGMENT],
				(dirty & FD_DIRTY_PROG) ? ctx->prog.fp : NULL);
	}

	if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
		OUT_RING(ring, blend ? zsa->rb_colorcontrol | blend->rb_colorcontrol : 0);
	}

	if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
		enum pipe_format format =
			pipe_surface_format(ctx->batch->framebuffer.cbufs[0]);
		bool has_alpha = util_format_has_alpha(format);

		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
		OUT_RING(ring, blend ? blend->rb_blendcontrol_alpha |
			COND(has_alpha, blend->rb_blendcontrol_rgb) |
			COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb) : 0);

		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
		OUT_RING(ring, blend ? blend->rb_colormask : 0xf);
	}

	if (dirty & FD_DIRTY_BLEND_COLOR) {
		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
		OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
		OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[0]));
		OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[1]));
		OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[2]));
		OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[3]));
	}

	if (dirty & (FD_DIRTY_TEX | FD_DIRTY_PROG))
		emit_textures(ring, ctx);
}
Beispiel #20
0
static void
fd4_clear(struct fd_context *ctx, unsigned buffers,
		const union pipe_color_union *color, double depth, unsigned stencil)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	unsigned dirty = ctx->dirty;
	unsigned ce, i;
	struct fd4_emit emit = {
		.vtx  = &fd4_ctx->solid_vbuf_state,
		.prog = &ctx->solid_prog,
		.key = {
			.half_precision = true,
		},
	};
	uint32_t colr = 0;

	if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs)
		colr  = pack_rgba(pfb->cbufs[0]->format, color->f);

	dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
	dirty |= FD_DIRTY_PROG;
	emit.dirty = dirty;

	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);

	/* emit generic state now: */
	fd4_emit_state(ctx, ring, &emit);
	reset_viewport(ring, pfb);

	if (buffers & PIPE_CLEAR_DEPTH) {
		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
		OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
				A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
				A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));

		fd_wfi(ctx, ring);
		OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0, 2);
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(depth));
		ctx->dirty |= FD_DIRTY_VIEWPORT;
	} else {
		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
		OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
	}

	if (buffers & PIPE_CLEAR_STENCIL) {
		OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
		OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(stencil) |
				A4XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
				A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
		OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
				A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
				0xff000000 | // XXX ???
				A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));

		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
		OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
				A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
				A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
				A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
				A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
		OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
	} else {
		OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
		OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
				A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
				A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
		OUT_RING(ring, A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
				A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
				A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));

		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
		OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
				A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
				A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
				A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
		OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
	}

	if (buffers & PIPE_CLEAR_COLOR) {
		OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
		OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
		ce = 0xf;
	} else {
		ce = 0x0;
	}

	for (i = 0; i < 8; i++) {
		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
				A4XX_RB_MRT_CONTROL_B11 |
				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));

		OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
	}

	fd4_emit_vertex_bufs(ring, &emit);

	OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
	OUT_RING(ring, 0x0);          /* XXX GRAS_ALPHA_CONTROL */

	OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4);
	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW0 */
	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW1 */
	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW2 */
	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW3 */

	/* until fastclear works: */
	fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);

	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */

	OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
	OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */

	OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
	OUT_RING(ring, 0x00000001);

	fd4_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
			DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);

	OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
	OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
}
Beispiel #21
0
static bool
fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd4_emit emit = {
		.debug = &ctx->debug,
		.vtx  = &ctx->vtx,
		.prog = &ctx->prog,
		.info = info,
		.key = {
			.color_two_side = ctx->rasterizer->light_twoside,
			.vclamp_color = ctx->rasterizer->clamp_vertex_color,
			.fclamp_color = ctx->rasterizer->clamp_fragment_color,
			.rasterflat = ctx->rasterizer->flatshade,
			// TODO set .half_precision based on render target format,
			// ie. float16 and smaller use half, float32 use full..
			.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
			.ucp_enables = ctx->rasterizer->clip_plane_enable,
			.has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate ||
					fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb),
			.vsaturate_s = fd4_ctx->vsaturate_s,
			.vsaturate_t = fd4_ctx->vsaturate_t,
			.vsaturate_r = fd4_ctx->vsaturate_r,
			.fsaturate_s = fd4_ctx->fsaturate_s,
			.fsaturate_t = fd4_ctx->fsaturate_t,
			.fsaturate_r = fd4_ctx->fsaturate_r,
			.vastc_srgb = fd4_ctx->vastc_srgb,
			.fastc_srgb = fd4_ctx->fastc_srgb,
		},
		.rasterflat = ctx->rasterizer->flatshade,
		.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
		.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
	};

	fixup_shader_state(ctx, &emit.key);

	unsigned dirty = ctx->dirty;

	/* do regular pass first, since that is more likely to fail compiling: */

	if (!(fd4_emit_get_vp(&emit) && fd4_emit_get_fp(&emit)))
		return false;

	emit.key.binning_pass = false;
	emit.dirty = dirty;

	struct fd_ringbuffer *ring = ctx->batch->draw;

	if (ctx->rasterizer->rasterizer_discard) {
		fd_wfi(ctx->batch, ring);
		OUT_PKT3(ring, CP_REG_RMW, 3);
		OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
		OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
		OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
	}

	draw_impl(ctx, ctx->batch->draw, &emit);

	if (ctx->rasterizer->rasterizer_discard) {
		fd_wfi(ctx->batch, ring);
		OUT_PKT3(ring, CP_REG_RMW, 3);
		OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
		OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
		OUT_RING(ring, 0);
	}

	/* and now binning pass: */
	emit.key.binning_pass = true;
	emit.dirty = dirty & ~(FD_DIRTY_BLEND);
	emit.vp = NULL;   /* we changed key so need to refetch vp */
	emit.fp = NULL;
	draw_impl(ctx, ctx->batch->binning, &emit);

	return true;
}
Beispiel #22
0
static void
fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	struct fd4_emit emit = {
			.vtx = &fd4_ctx->blit_vbuf_state,
			.sprite_coord_enable = 1,
			/* NOTE: They all use the same VP, this is for vtx bufs. */
			.prog = &ctx->blit_prog[0],
			.key = {
				.half_precision = fd_half_precision(pfb),
			},
			.no_decode_srgb = true,
	};
	unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
	float x0, y0, x1, y1;
	unsigned bin_w = tile->bin_w;
	unsigned bin_h = tile->bin_h;
	unsigned i;

	/* write texture coordinates to vertexbuf: */
	x0 = ((float)tile->xoff) / ((float)pfb->width);
	x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
	y0 = ((float)tile->yoff) / ((float)pfb->height);
	y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);

	OUT_PKT3(ring, CP_MEM_WRITE, 5);
	OUT_RELOCW(ring, fd_resource(fd4_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
	OUT_RING(ring, fui(x0));
	OUT_RING(ring, fui(y0));
	OUT_RING(ring, fui(x1));
	OUT_RING(ring, fui(y1));

	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
		mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;

		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
				A4XX_RB_MRT_CONTROL_B11 |
				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));

		OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
	}

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
	OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
			A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
			A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
			A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
			A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
			A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
			A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
			A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
	OUT_RING(ring, 0x8);          /* XXX RB_RENDER_CONTROL */

	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
	OUT_RING(ring, 0x280000);     /* XXX GRAS_CL_CLIP_CNTL */

	OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0) |
			A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)bin_w/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)bin_w/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)bin_h/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)bin_h/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));

	OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
	OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
			A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
			A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
	OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));

	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST |
			A4XX_PC_PRIM_VTX_CNTL_VAROUT(1));

	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */

	fd4_emit_vertex_bufs(ring, &emit);

	/* for gmem pitch/base calculations, we need to use the non-
	 * truncated tile sizes:
	 */
	bin_w = gmem->bin_w;
	bin_h = gmem->bin_h;

	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
		emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
		emit.fp = NULL;      /* frag shader changed so clear cache */
		fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
		emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
	}

	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
		switch (pfb->zsbuf->format) {
		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
		case PIPE_FORMAT_Z32_FLOAT:
			emit.prog = (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) ?
					&ctx->blit_z : &ctx->blit_zs;
			emit.key.half_precision = false;

			OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
			OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
					A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
					A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS) |
					A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE);

			OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
			OUT_RING(ring, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE);

			OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
			OUT_RING(ring, 0x80000);   /* GRAS_CL_CLIP_CNTL */

			break;
		default:
			/* Non-float can use a regular color write. It's split over 8-bit
			 * components, so half precision is always sufficient.
			 */
			emit.prog = &ctx->blit_prog[0];
			emit.key.half_precision = true;
			break;
		}
		emit.fp = NULL;      /* frag shader changed so clear cache */
		fd4_program_emit(ring, &emit, 1, &pfb->zsbuf);
		emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
	}

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
			0x00010000);  /* XXX */
}
Beispiel #23
0
static void
fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
{
	unsigned dirty = ctx->dirty;
	struct fd3_shader_key key = {
			/* do binning pass first: */
			.binning_pass = true,
			.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
			// TODO set .half_precision based on render target format,
			// ie. float16 and smaller use half, float32 use full..
			.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
	};
	draw_impl(ctx, info, ctx->binning_ring,
			dirty & ~(FD_DIRTY_BLEND), key);
	/* and now regular (non-binning) pass: */
	key.binning_pass = false;
	draw_impl(ctx, info, ctx->ring, dirty, key);
}

/* binning pass cmds for a clear:
 * NOTE: newer blob drivers don't use binning for clear, which is probably
 * preferable since it is low vtx count.  However that doesn't seem to
 * actually work for me.  Not sure if it is depending on support for
 * clear pass (rather than using solid-fill shader), or something else
 * that newer blob is doing differently.  Once that is figured out, we
 * can remove fd3_clear_binning().
 */
static void
fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
{
	struct fd3_context *fd3_ctx = fd3_context(ctx);
	struct fd_ringbuffer *ring = ctx->binning_ring;
	struct fd3_shader_key key = {
			.binning_pass = true,
			.half_precision = true,
	};

	fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key);

	fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
			(struct fd3_vertex_buf[]) {{
				.prsc = fd3_ctx->solid_vbuf,
				.stride = 12,
				.format = PIPE_FORMAT_R32G32B32_FLOAT,
			}}, 1);

	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
	OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */

	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
	OUT_RING(ring, PERFCOUNTER_STOP);

	fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
			DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}

static void
fd3_clear(struct fd_context *ctx, unsigned buffers,
		const union pipe_color_union *color, double depth, unsigned stencil)
{
	struct fd3_context *fd3_ctx = fd3_context(ctx);
	struct fd_ringbuffer *ring = ctx->ring;
	unsigned dirty = ctx->dirty;
	unsigned ce, i;
	struct fd3_shader_key key = {
			.half_precision = true,
	};

	dirty &= FD_DIRTY_VIEWPORT | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
	dirty |= FD_DIRTY_PROG;

	fd3_clear_binning(ctx, dirty);

	/* emit generic state now: */
	fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key);

	OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
	OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
			A3XX_RB_BLEND_ALPHA_FLOAT(1.0));

	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
	OUT_RINGP(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER),
			&fd3_ctx->rbrc_patches);

	if (buffers & PIPE_CLEAR_DEPTH) {
		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
		OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
				A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
				A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));

		OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_ZOFFSET, 2);
		OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
		OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(depth));
		ctx->dirty |= FD_DIRTY_VIEWPORT;
	} else {
		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
		OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
	}

	if (buffers & PIPE_CLEAR_STENCIL) {
		OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
		OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(stencil) |
				A3XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
				A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
		OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
				A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
				0xff000000 | // XXX ???
				A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));

		OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
		OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
				A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
				A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
				A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
				A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
	} else {
		OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
		OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
				A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
				A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
		OUT_RING(ring, A3XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
				A3XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
				A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));

		OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
		OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
				A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
				A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
	}

	if (buffers & PIPE_CLEAR_COLOR) {
		ce = 0xf;
	} else {
		ce = 0x0;
	}

	for (i = 0; i < 4; i++) {
		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
		OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
				A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
				A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));

		OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
		OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
				A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO) |
				A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE);
	}

	OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
	OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));

	fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
			(struct fd3_vertex_buf[]) {{
				.prsc = fd3_ctx->solid_vbuf,
				.stride = 12,
				.format = PIPE_FORMAT_R32G32B32_FLOAT,
			}}, 1);

	fd_wfi(ctx, ring);
	fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);

	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
	OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */

	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
	OUT_RING(ring, PERFCOUNTER_STOP);

	fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
			DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}

void
fd3_draw_init(struct pipe_context *pctx)
{
	struct fd_context *ctx = fd_context(pctx);
	ctx->draw = fd3_draw;
	ctx->clear = fd3_clear;
}
Beispiel #24
0
static void
fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	struct fd4_emit emit = {
			.vtx = &fd4_ctx->solid_vbuf_state,
			.prog = &ctx->solid_prog,
			.key = key,
			.format = fd4_emit_format(pfb->cbufs[0]),
	};

	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));

	OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
	OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
			A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
			A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
	OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */

	OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
	OUT_RING(ring, 0xff000000 |
			A4XX_RB_STENCILREFMASK_STENCILREF(0) |
			A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
			A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
	OUT_RING(ring, 0xff000000 |
			A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
			A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
			A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));

	OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));

	fd_wfi(ctx, ring);

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
	OUT_RING(ring, 0x80000);      /* GRAS_CL_CLIP_CNTL */

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
			0xa);       /* XXX */

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));

	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);

	OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
	OUT_RING(ring, 0x00000002);

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));

	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */

	fd4_program_emit(ring, &emit);
	fd4_emit_vertex_bufs(ring, &emit);

	if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
		uint32_t base = depth_base(ctx);
		emit_gmem2mem_surf(ctx, base, pfb->zsbuf);
	}

	if (ctx->resolve & FD_BUFFER_COLOR) {
		emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]);
	}

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
}

/* transfer from system memory to gmem */

static void
emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
		struct pipe_surface *psurf, uint32_t bin_w)
{
	struct fd_ringbuffer *ring = ctx->ring;

	emit_mrt(ring, 1, &psurf, &base, bin_w);

	fd4_emit_gmem_restore_tex(ring, psurf);

	fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
			DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
}

static void
fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	struct fd4_emit emit = {
			.vtx = &fd4_ctx->blit_vbuf_state,
			.prog = &ctx->blit_prog[0],
			.key = key,
			.format = fd4_emit_format(pfb->cbufs[0]),
	};
	float x0, y0, x1, y1;
	unsigned bin_w = tile->bin_w;
	unsigned bin_h = tile->bin_h;
	unsigned i;

	/* write texture coordinates to vertexbuf: */
	x0 = ((float)tile->xoff) / ((float)pfb->width);
	x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
	y0 = ((float)tile->yoff) / ((float)pfb->height);
	y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);

	OUT_PKT3(ring, CP_MEM_WRITE, 5);
	OUT_RELOCW(ring, fd_resource(fd4_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
	OUT_RING(ring, fui(x0));
	OUT_RING(ring, fui(y0));
	OUT_RING(ring, fui(x1));
	OUT_RING(ring, fui(y1));

	for (i = 0; i < 8; i++) {
		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
				A4XX_RB_MRT_CONTROL_B11 |
				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));

		OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
		OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
	}

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
	OUT_RING(ring, 0x8);          /* XXX RB_RENDER_CONTROL */

	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
	OUT_RING(ring, 0x280000);     /* XXX GRAS_CL_CLIP_CNTL */

	OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0) |
			A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);

	OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)bin_w/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)bin_w/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)bin_h/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)bin_h/2.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));

	OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
	OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
			A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
			A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
			A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
	OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));

	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST |
			A4XX_PC_PRIM_VTX_CNTL_VAROUT(1));

	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */

	fd4_program_emit(ring, &emit);
	fd4_emit_vertex_bufs(ring, &emit);

	/* for gmem pitch/base calculations, we need to use the non-
	 * truncated tile sizes:
	 */
	bin_w = gmem->bin_w;
	bin_h = gmem->bin_h;

	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
		emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w);

	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
		emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w);

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
			0x00010000);  /* XXX */
}

static void
patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
{
	unsigned i;
	for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) {
		struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i);
		*patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
	}
	util_dynarray_resize(&ctx->draw_patches, 0);
}

static void
patch_rbrc(struct fd_context *ctx, uint32_t val)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	unsigned i;
	for (i = 0; i < fd_patch_num_elements(&fd4_ctx->rbrc_patches); i++) {
		struct fd_cs_patch *patch = fd_patch_element(&fd4_ctx->rbrc_patches, i);
		*patch->cs = patch->val | val;
	}
	util_dynarray_resize(&fd4_ctx->rbrc_patches, 0);
}

/* for rendering directly to system memory: */
static void
fd4_emit_sysmem_prep(struct fd_context *ctx)
{
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	struct fd_ringbuffer *ring = ctx->ring;

	fd4_emit_restore(ctx);

	OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
	OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
			A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));

	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);

	/* setup scissor/offset for current tile: */
	OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
	OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(0) |
			A4XX_RB_BIN_OFFSET_Y(0));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(0) |
			A4XX_RB_MODE_CONTROL_HEIGHT(0) |
			0x00c00000);  /* XXX */

	patch_draws(ctx, IGNORE_VISIBILITY);
	patch_rbrc(ctx, 0);  // XXX
}

static void
update_vsc_pipe(struct fd_context *ctx)
{
	struct fd4_context *fd4_ctx = fd4_context(ctx);
	struct fd_ringbuffer *ring = ctx->ring;
	int i;

	OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
	OUT_RELOCW(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */

	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
	for (i = 0; i < 8; i++) {
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
		OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
				A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
				A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
				A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
	}

	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
	for (i = 0; i < 8; i++) {
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
		if (!pipe->bo) {
			pipe->bo = fd_bo_new(ctx->dev, 0x40000,
					DRM_FREEDRENO_GEM_TYPE_KMEM);
		}
		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);       /* VSC_PIPE_DATA_ADDRESS[i] */
	}

	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
	for (i = 0; i < 8; i++) {
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
		OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
	}
}

/* before first tile */
static void
fd4_emit_tile_init(struct fd_context *ctx)
{
	struct fd_ringbuffer *ring = ctx->ring;
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	uint32_t rb_render_control;

	fd4_emit_restore(ctx);

	OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
	OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
			A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));

	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
			0x00010000);  /* XXX */

	update_vsc_pipe(ctx);
	patch_draws(ctx, IGNORE_VISIBILITY);

	rb_render_control = 0; // XXX or BINNING_PASS.. but maybe we can emit only from gmem
	patch_rbrc(ctx, rb_render_control);
}

/* before mem2gmem */
static void
fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
{
	struct fd_ringbuffer *ring = ctx->ring;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	uint32_t reg;

	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
	reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
	if (pfb->zsbuf) {
		reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format));
	}
	OUT_RING(ring, reg);
	if (pfb->zsbuf) {
		uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
		OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
		OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w));
	} else {
		OUT_RING(ring, 0x00000000);
		OUT_RING(ring, 0x00000000);
	}

	OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
	if (pfb->zsbuf) {
		OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
				fd4_pipe2depth(pfb->zsbuf->format)));
	} else {
		OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(DEPTH4_NONE));
	}

	if (ctx->needs_rb_fbd) {
		fd_wfi(ctx, ring);
		OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
		OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
				A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
		ctx->needs_rb_fbd = false;
	}
}

/* before IB to rendering cmds: */
static void
fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
{
	struct fd_ringbuffer *ring = ctx->ring;
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;

	uint32_t x1 = tile->xoff;
	uint32_t y1 = tile->yoff;
	uint32_t x2 = tile->xoff + tile->bin_w - 1;
	uint32_t y2 = tile->yoff + tile->bin_h - 1;

	OUT_PKT3(ring, CP_SET_BIN, 3);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
	OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));

	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);

	/* setup scissor/offset for current tile: */
	OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
	OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) |
			A4XX_RB_BIN_OFFSET_Y(tile->yoff));

	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
}

void
fd4_gmem_init(struct pipe_context *pctx)
{
	struct fd_context *ctx = fd_context(pctx);

	ctx->emit_sysmem_prep = fd4_emit_sysmem_prep;
	ctx->emit_tile_init = fd4_emit_tile_init;
	ctx->emit_tile_prep = fd4_emit_tile_prep;
	ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem;
	ctx->emit_tile_renderprep = fd4_emit_tile_renderprep;
	ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem;
}
Beispiel #25
0
void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
		struct msm_file_private *ctx)
{
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
	struct msm_drm_private *priv = gpu->dev->dev_private;
	struct msm_ringbuffer *ring = gpu->rb;
	unsigned i;

	for (i = 0; i < submit->nr_cmds; i++) {
		switch (submit->cmd[i].type) {
		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
			/* ignore IB-targets */
			break;
		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
			/* ignore if there has not been a ctx switch: */
			if (priv->lastctx == ctx)
				break;
		case MSM_SUBMIT_CMD_BUF:
			OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ?
				CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
			OUT_RING(ring, submit->cmd[i].iova);
			OUT_RING(ring, submit->cmd[i].size);
			OUT_PKT2(ring);
			break;
		}
	}

	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
	OUT_RING(ring, submit->fence->seqno);

	if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) {
		/* Flush HLSQ lazy updates to make sure there is nothing
		 * pending for indirect loads after the timestamp has
		 * passed:
		 */
		OUT_PKT3(ring, CP_EVENT_WRITE, 1);
		OUT_RING(ring, HLSQ_FLUSH);

		OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
		OUT_RING(ring, 0x00000000);
	}

	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
	OUT_RING(ring, CACHE_FLUSH_TS);
	OUT_RING(ring, rbmemptr(adreno_gpu, fence));
	OUT_RING(ring, submit->fence->seqno);

	/* we could maybe be clever and only CP_COND_EXEC the interrupt: */
	OUT_PKT3(ring, CP_INTERRUPT, 1);
	OUT_RING(ring, 0x80000000);

	/* Workaround for missing irq issue on 8x16/a306.  Unsure if the
	 * root cause is a platform issue or some a306 quirk, but this
	 * keeps things humming along:
	 */
	if (adreno_is_a306(adreno_gpu)) {
		OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
		OUT_RING(ring, 0x00000000);
		OUT_PKT3(ring, CP_INTERRUPT, 1);
		OUT_RING(ring, 0x80000000);
	}

#if 0
	if (adreno_is_a3xx(adreno_gpu)) {
		/* Dummy set-constant to trigger context rollover */
		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
		OUT_RING(ring, 0x00000000);
	}
#endif

	gpu->funcs->flush(gpu);
}
Beispiel #26
0
int main(int argc, char *argv[])
{
	struct fd_device *dev;
	struct fd_pipe *pipe;
	struct fd_ringbuffer *ring;
	struct fd_bo *bo;
	uint32_t i = 0;
	uint32_t *ptr;
	int fd, ret;

	fd = drmOpen("msm", NULL);
	if (fd < 0) {
		printf("failed to initialize DRM\n");
		return fd;
	}

	dev = fd_device_new(fd);
	if (!dev) {
		printf("failed to initialize freedreno device\n");
		return -1;
	}

	pipe = fd_pipe_new(dev, FD_PIPE_3D);
	if (!pipe) {
		printf("failed to initialize freedreno pipe\n");
		return -1;
	}

	ring = fd_ringbuffer_new(pipe, 4096);
	if (!ring) {
		printf("failed to initialize freedreno ring\n");
		return -1;
	}

	bo = fd_bo_new(dev, 4096, 0);

#define BASE REG_A3XX_GRAS_CL_VPORT_XOFFSET
#define SIZE 6

	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG4, 1);
	OUT_RING(ring, 0x123);

	OUT_PKT0(ring, BASE, SIZE);
	for (i = 0; i < SIZE; i++)
		OUT_RING(ring, i);

	/* this adds the value of CP_SCRATCH_REG4 to 0x111 and writes
	 * to REG_BASE+2
	 */
	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
	OUT_RING(ring, 0x80000000 | CP_REG(BASE + 2));
	OUT_RING(ring, REG_AXXX_CP_SCRATCH_REG4);
	OUT_RING(ring, 0x111);

	/* read back all the regs: */
	for (i = 0; i < SIZE; i++) {
		OUT_PKT3(ring, CP_REG_TO_MEM, 2);
		OUT_RING(ring, BASE + i);
		OUT_RELOCW(ring, bo, i * 4, 0, 0);
	}

	fd_ringbuffer_flush(ring);

	/* and read back the values: */
	fd_bo_cpu_prep(bo, pipe, DRM_FREEDRENO_PREP_READ);
	ptr = fd_bo_map(bo);
	for (i = 0; i < SIZE; i++) {
		printf("%02x: %08x\n", i, ptr[i]);
	}
	fd_bo_cpu_fini(bo);

	return 0;
}