void
ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
{
	struct ir3_cp_ctx ctx = {
			.shader = ir,
			.so = so,
	};

	ir3_clear_mark(ir);

	for (unsigned i = 0; i < ir->noutputs; i++) {
		if (ir->outputs[i]) {
			instr_cp(&ctx, ir->outputs[i]);
			ir->outputs[i] = eliminate_output_mov(ir->outputs[i]);
		}
	}

	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
		if (block->condition) {
			instr_cp(&ctx, block->condition);
			block->condition = eliminate_output_mov(block->condition);
		}

		for (unsigned i = 0; i < block->keeps_count; i++) {
			instr_cp(&ctx, block->keeps[i]);
			block->keeps[i] = eliminate_output_mov(block->keeps[i]);
		}
	}
}
예제 #2
0
void
ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
{
	struct ir3_cp_ctx ctx = {
			.shader = ir,
			.so = so,
	};

	/* This is a bit annoying, and probably wouldn't be necessary if we
	 * tracked a reverse link from producing instruction to consumer.
	 * But we need to know when we've eliminated the last consumer of
	 * a mov, so we need to do a pass to first count consumers of a
	 * mov.
	 */
	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
			struct ir3_instruction *src;

			/* by the way, we don't account for false-dep's, so the CP
			 * pass should always happen before false-dep's are inserted
			 */
			debug_assert(instr->deps_count == 0);

			foreach_ssa_src(src, instr) {
				src->use_count++;
			}
		}
	}

	ir3_clear_mark(ir);

	for (unsigned i = 0; i < ir->noutputs; i++) {
		if (ir->outputs[i]) {
			instr_cp(&ctx, ir->outputs[i]);
			ir->outputs[i] = eliminate_output_mov(ir->outputs[i]);
		}
	}

	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
		if (block->condition) {
			instr_cp(&ctx, block->condition);
			block->condition = eliminate_output_mov(block->condition);
		}

		for (unsigned i = 0; i < block->keeps_count; i++) {
			instr_cp(&ctx, block->keeps[i]);
			block->keeps[i] = eliminate_output_mov(block->keeps[i]);
		}
	}
}
/**
 * Find instruction src's which are mov's that can be collapsed, replacing
 * the mov dst with the mov src
 */
static void
instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
{
	struct ir3_register *reg;

	if (instr->regs_count == 0)
		return;

	if (ir3_instr_check_mark(instr))
		return;

	/* walk down the graph from each src: */
	foreach_src_n(reg, n, instr) {
		struct ir3_instruction *src = ssa(reg);

		if (!src)
			continue;

		instr_cp(ctx, src);

		/* TODO non-indirect access we could figure out which register
		 * we actually want and allow cp..
		 */
		if (reg->flags & IR3_REG_ARRAY)
			continue;

		reg_cp(ctx, instr, reg, n);
	}

	if (instr->regs[0]->flags & IR3_REG_ARRAY) {
		struct ir3_instruction *src = ssa(instr->regs[0]);
		if (src)
			instr_cp(ctx, src);
	}

	if (instr->address) {
		instr_cp(ctx, instr->address);
		ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
	}

	/* we can end up with extra cmps.s from frontend, which uses a
	 *
	 *    cmps.s p0.x, cond, 0
	 *
	 * as a way to mov into the predicate register.  But frequently 'cond'
	 * is itself a cmps.s/cmps.f/cmps.u.  So detect this special case and
	 * just re-write the instruction writing predicate register to get rid
	 * of the double cmps.
	 */
	if ((instr->opc == OPC_CMPS_S) &&
			(instr->regs[0]->num == regid(REG_P0, 0)) &&
			ssa(instr->regs[1]) &&
			(instr->regs[2]->flags & IR3_REG_IMMED) &&
			(instr->regs[2]->iim_val == 0)) {
		struct ir3_instruction *cond = ssa(instr->regs[1]);
		switch (cond->opc) {
		case OPC_CMPS_S:
		case OPC_CMPS_F:
		case OPC_CMPS_U:
			instr->opc   = cond->opc;
			instr->flags = cond->flags;
			instr->cat2  = cond->cat2;
			instr->address = cond->address;
			instr->regs[1] = cond->regs[1];
			instr->regs[2] = cond->regs[2];
			break;
		default:
			break;
		}
	}
}
예제 #4
0
/**
 * Find instruction src's which are mov's that can be collapsed, replacing
 * the mov dst with the mov src
 */
static void
instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
{
	struct ir3_register *reg;

	if (instr->regs_count == 0)
		return;

	if (ir3_instr_check_mark(instr))
		return;

	/* walk down the graph from each src: */
	foreach_src_n(reg, n, instr) {
		struct ir3_instruction *src = ssa(reg);

		if (!src)
			continue;

		instr_cp(ctx, src);

		/* TODO non-indirect access we could figure out which register
		 * we actually want and allow cp..
		 */
		if (reg->flags & IR3_REG_ARRAY)
			continue;

		/* Don't CP absneg into meta instructions, that won't end well: */
		if (is_meta(instr) && (src->opc != OPC_MOV))
			continue;

		reg_cp(ctx, instr, reg, n);
	}

	if (instr->regs[0]->flags & IR3_REG_ARRAY) {
		struct ir3_instruction *src = ssa(instr->regs[0]);
		if (src)
			instr_cp(ctx, src);
	}

	if (instr->address) {
		instr_cp(ctx, instr->address);
		ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
	}

	/* we can end up with extra cmps.s from frontend, which uses a
	 *
	 *    cmps.s p0.x, cond, 0
	 *
	 * as a way to mov into the predicate register.  But frequently 'cond'
	 * is itself a cmps.s/cmps.f/cmps.u.  So detect this special case and
	 * just re-write the instruction writing predicate register to get rid
	 * of the double cmps.
	 */
	if ((instr->opc == OPC_CMPS_S) &&
			(instr->regs[0]->num == regid(REG_P0, 0)) &&
			ssa(instr->regs[1]) &&
			(instr->regs[2]->flags & IR3_REG_IMMED) &&
			(instr->regs[2]->iim_val == 0)) {
		struct ir3_instruction *cond = ssa(instr->regs[1]);
		switch (cond->opc) {
		case OPC_CMPS_S:
		case OPC_CMPS_F:
		case OPC_CMPS_U:
			instr->opc   = cond->opc;
			instr->flags = cond->flags;
			instr->cat2  = cond->cat2;
			instr->address = cond->address;
			instr->regs[1] = cond->regs[1];
			instr->regs[2] = cond->regs[2];
			instr->barrier_class |= cond->barrier_class;
			instr->barrier_conflict |= cond->barrier_conflict;
			unuse(cond);
			break;
		default:
			break;
		}
	}

	/* Handle converting a sam.s2en (taking samp/tex idx params via
	 * register) into a normal sam (encoding immediate samp/tex idx)
	 * if they are immediate.  This saves some instructions and regs
	 * in the common case where we know samp/tex at compile time:
	 */
	if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) &&
			!(ir3_shader_debug & IR3_DBG_FORCES2EN)) {
		/* The first src will be a fan-in (collect), if both of it's
		 * two sources are mov from imm, then we can
		 */
		struct ir3_instruction *samp_tex = ssa(instr->regs[1]);

		debug_assert(samp_tex->opc == OPC_META_FI);

		struct ir3_instruction *samp = ssa(samp_tex->regs[1]);
		struct ir3_instruction *tex  = ssa(samp_tex->regs[2]);

		if ((samp->opc == OPC_MOV) &&
				(samp->regs[1]->flags & IR3_REG_IMMED) &&
				(tex->opc == OPC_MOV) &&
				(tex->regs[1]->flags & IR3_REG_IMMED)) {
			instr->flags &= ~IR3_INSTR_S2EN;
			instr->cat5.samp = samp->regs[1]->iim_val;
			instr->cat5.tex  = tex->regs[1]->iim_val;
			instr->regs[1]->instr = NULL;
		}
	}
}