Пример #1
0
static void
emit_constants(struct fd_ringbuffer *ring,
		enum adreno_state_block sb,
		struct fd_constbuf_stateobj *constbuf,
		struct fd3_shader_stateobj *shader)
{
	uint32_t enabled_mask = constbuf->enabled_mask;
	uint32_t base = 0;
	unsigned i;

	// XXX TODO only emit dirty consts.. but we need to keep track if
	// they are clobbered by a clear, gmem2mem, or mem2gmem..
	constbuf->dirty_mask = enabled_mask;

	/* emit user constants: */
	while (enabled_mask) {
		unsigned index = ffs(enabled_mask) - 1;
		struct pipe_constant_buffer *cb = &constbuf->cb[index];
		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */

		// I expect that size should be a multiple of vec4's:
		assert(size == align(size, 4));

		/* gallium could have const-buffer still bound, even though the
		 * shader is not using it.  Writing consts above constlen (or
		 * rather, HLSQ_{VS,FS}_CONTROL_REG.CONSTLENGTH) will cause a
		 * hang.
		 */
		if ((base / 4) >= shader->constlen)
			break;

		if (constbuf->dirty_mask & (1 << index)) {
			fd3_emit_constant(ring, sb, base,
					cb->buffer_offset, size,
					cb->user_buffer, cb->buffer);
			constbuf->dirty_mask &= ~(1 << index);
		}

		base += size;
		enabled_mask &= ~(1 << index);
	}

	/* emit shader immediates: */
	if (shader) {
		for (i = 0; i < shader->immediates_count; i++) {
			fd3_emit_constant(ring, sb,
					4 * (shader->first_immediate + i),
					0, 4, shader->immediates[i].val, NULL);
		}
	}
}
Пример #2
0
static void
fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
{
	unsigned dirty = ctx->dirty;
	struct fd3_shader_key key = {
			/* do binning pass first: */
			.binning_pass = true,
			.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
			// TODO set .half_precision based on render target format,
			// ie. float16 and smaller use half, float32 use full..
			.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
	};
	draw_impl(ctx, info, ctx->binning_ring,
			dirty & ~(FD_DIRTY_BLEND), key);
	/* and now regular (non-binning) pass: */
	key.binning_pass = false;
	draw_impl(ctx, info, ctx->ring, dirty, key);
}

/* binning pass cmds for a clear:
 * NOTE: newer blob drivers don't use binning for clear, which is probably
 * preferable since it is low vtx count.  However that doesn't seem to
 * actually work for me.  Not sure if it is depending on support for
 * clear pass (rather than using solid-fill shader), or something else
 * that newer blob is doing differently.  Once that is figured out, we
 * can remove fd3_clear_binning().
 */
static void
fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
{
	struct fd3_context *fd3_ctx = fd3_context(ctx);
	struct fd_ringbuffer *ring = ctx->binning_ring;
	struct fd3_shader_key key = {
			.binning_pass = true,
			.half_precision = true,
	};

	fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key);

	fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
			(struct fd3_vertex_buf[]) {{
				.prsc = fd3_ctx->solid_vbuf,
				.stride = 12,
				.format = PIPE_FORMAT_R32G32B32_FLOAT,
			}}, 1);

	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
	OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */

	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
	OUT_RING(ring, PERFCOUNTER_STOP);

	fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
			DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}

static void
fd3_clear(struct fd_context *ctx, unsigned buffers,
		const union pipe_color_union *color, double depth, unsigned stencil)
{
	struct fd3_context *fd3_ctx = fd3_context(ctx);
	struct fd_ringbuffer *ring = ctx->ring;
	unsigned dirty = ctx->dirty;
	unsigned ce, i;
	struct fd3_shader_key key = {
			.half_precision = true,
	};

	dirty &= FD_DIRTY_VIEWPORT | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
	dirty |= FD_DIRTY_PROG;

	fd3_clear_binning(ctx, dirty);

	/* emit generic state now: */
	fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key);

	OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
	OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
			A3XX_RB_BLEND_ALPHA_FLOAT(1.0));

	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
	OUT_RINGP(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER),
			&fd3_ctx->rbrc_patches);

	if (buffers & PIPE_CLEAR_DEPTH) {
		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
		OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
				A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
				A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));

		OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_ZOFFSET, 2);
		OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
		OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(depth));
		ctx->dirty |= FD_DIRTY_VIEWPORT;
	} else {
		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
		OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
	}

	if (buffers & PIPE_CLEAR_STENCIL) {
		OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
		OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(stencil) |
				A3XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
				A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
		OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
				A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
				0xff000000 | // XXX ???
				A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));

		OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
		OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
				A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
				A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
				A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
				A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
	} else {
		OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
		OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
				A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
				A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
		OUT_RING(ring, A3XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
				A3XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
				A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));

		OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
		OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
				A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
				A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
				A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
	}

	if (buffers & PIPE_CLEAR_COLOR) {
		ce = 0xf;
	} else {
		ce = 0x0;
	}

	for (i = 0; i < 4; i++) {
		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
		OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
				A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
				A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));

		OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
		OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
				A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO) |
				A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE);
	}

	OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
	OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));

	fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
			(struct fd3_vertex_buf[]) {{
				.prsc = fd3_ctx->solid_vbuf,
				.stride = 12,
				.format = PIPE_FORMAT_R32G32B32_FLOAT,
			}}, 1);

	fd_wfi(ctx, ring);
	fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);

	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
	OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
	OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */

	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
	OUT_RING(ring, PERFCOUNTER_STOP);

	fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
			DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}

void
fd3_draw_init(struct pipe_context *pctx)
{
	struct fd_context *ctx = fd_context(pctx);
	ctx->draw = fd3_draw;
	ctx->clear = fd3_clear;
}
Пример #3
0
static void
emit_constants(struct fd_ringbuffer *ring,
		enum adreno_state_block sb,
		struct fd_constbuf_stateobj *constbuf,
		struct fd3_shader_variant *shader)
{
	uint32_t enabled_mask = constbuf->enabled_mask;
	uint32_t first_immediate;
	uint32_t base = 0;
	unsigned i;

	// XXX TODO only emit dirty consts.. but we need to keep track if
	// they are clobbered by a clear, gmem2mem, or mem2gmem..
	constbuf->dirty_mask = enabled_mask;

	/* in particular, with binning shader and a unneeded consts no
	 * longer referenced, we could end up w/ constlen that is smaller
	 * than first_immediate.  In that case truncate the user consts
	 * early to avoid HLSQ lockup caused by writing too many consts
	 */
	first_immediate = MIN2(shader->first_immediate, shader->constlen);

	/* emit user constants: */
	while (enabled_mask) {
		unsigned index = ffs(enabled_mask) - 1;
		struct pipe_constant_buffer *cb = &constbuf->cb[index];
		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */

		// I expect that size should be a multiple of vec4's:
		assert(size == align(size, 4));

		/* gallium could leave const buffers bound above what the
		 * current shader uses.. don't let that confuse us.
		 */
		if (base >= (4 * first_immediate))
			break;

		if (constbuf->dirty_mask & (1 << index)) {
			/* and even if the start of the const buffer is before
			 * first_immediate, the end may not be:
			 */
			size = MIN2(size, (4 * first_immediate) - base);
			fd3_emit_constant(ring, sb, base,
					cb->buffer_offset, size,
					cb->user_buffer, cb->buffer);
			constbuf->dirty_mask &= ~(1 << index);
		}

		base += size;
		enabled_mask &= ~(1 << index);
	}

	/* emit shader immediates: */
	if (shader) {
		for (i = 0; i < shader->immediates_count; i++) {
			base = 4 * (shader->first_immediate + i);
			if (base >= (4 * shader->constlen))
				break;
			fd3_emit_constant(ring, sb, base,
				0, 4, shader->immediates[i].val, NULL);
		}
	}
}
Пример #4
0
static void
emit_constants(struct fd_ringbuffer *ring,
		enum adreno_state_block sb,
		struct fd_constbuf_stateobj *constbuf,
		struct ir3_shader_variant *shader,
		bool emit_immediates)
{
	uint32_t enabled_mask = constbuf->enabled_mask;
	uint32_t max_const;
	int i;

	// XXX TODO only emit dirty consts.. but we need to keep track if
	// they are clobbered by a clear, gmem2mem, or mem2gmem..
	constbuf->dirty_mask = enabled_mask;

	/* in particular, with binning shader we may end up with unused
	 * consts, ie. we could end up w/ constlen that is smaller
	 * than first_immediate.  In that case truncate the user consts
	 * early to avoid HLSQ lockup caused by writing too many consts
	 */
	max_const = MIN2(shader->first_driver_param, shader->constlen);

	/* emit user constants: */
	if (enabled_mask & 1) {
		const unsigned index = 0;
		struct pipe_constant_buffer *cb = &constbuf->cb[index];
		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */

		// I expect that size should be a multiple of vec4's:
		assert(size == align(size, 4));

		/* and even if the start of the const buffer is before
		 * first_immediate, the end may not be:
		 */
		size = MIN2(size, 4 * max_const);

		if (size && constbuf->dirty_mask & (1 << index)) {
			fd3_emit_constant(ring, sb, 0,
							  cb->buffer_offset, size,
							  cb->user_buffer, cb->buffer);
			constbuf->dirty_mask &= ~(1 << index);
		}

		enabled_mask &= ~(1 << index);
	}

	if (shader->constlen > shader->first_driver_param) {
		uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
		/* emit ubos: */
		OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) |
				 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
				 CP_LOAD_STATE_0_STATE_BLOCK(sb) |
				 CP_LOAD_STATE_0_NUM_UNIT(params * 2));
		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
				 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));

		for (i = 1; i <= params * 4; i++) {
			struct pipe_constant_buffer *cb = &constbuf->cb[i];
			assert(!cb->user_buffer);
			if ((enabled_mask & (1 << i)) && cb->buffer)
				OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
			else
				OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
		}
	}

	/* emit shader immediates: */
	if (shader && emit_immediates) {
		int size = shader->immediates_count;
		uint32_t base = shader->first_immediate;

		/* truncate size to avoid writing constants that shader
		 * does not use:
		 */
		size = MIN2(size + base, shader->constlen) - base;

		/* convert out of vec4: */
		base *= 4;
		size *= 4;

		if (size > 0) {
			fd3_emit_constant(ring, sb, base,
				0, size, shader->immediates[0].val, NULL);
		}
	}
}