Ejemplo n.º 1
0
void ir3_block_depth(struct ir3_block *block)
{
	unsigned i;

	block->head = NULL;

	ir3_clear_mark(block->shader);
	for (i = 0; i < block->noutputs; i++)
		if (block->outputs[i])
			ir3_instr_depth(block->outputs[i]);

	/* mark un-used instructions: */
	for (i = 0; i < block->shader->instrs_count; i++) {
		struct ir3_instruction *instr = block->shader->instrs[i];

		/* just consider instructions within this block: */
		if (instr->block != block)
			continue;

		if (!ir3_instr_check_mark(instr))
			instr->depth = DEPTH_UNUSED;
	}

	/* cleanup unused inputs: */
	for (i = 0; i < block->ninputs; i++) {
		struct ir3_instruction *in = block->inputs[i];
		if (in && (in->depth == DEPTH_UNUSED))
			block->inputs[i] = NULL;
	}
}
Ejemplo n.º 2
0
static void
ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep)
{
	struct ir3_instruction *src;

	/* don't mark falsedep's as used, but otherwise process them normally: */
	if (!falsedep)
		instr->flags &= ~IR3_INSTR_UNUSED;

	if (ir3_instr_check_mark(instr))
		return;

	instr->depth = 0;

	foreach_ssa_src_n(src, i, instr) {
		unsigned sd;

		/* visit child to compute it's depth: */
		ir3_instr_depth(src, boost, __is_false_dep(instr, i));

		/* for array writes, no need to delay on previous write: */
		if (i == 0)
			continue;

		sd = ir3_delayslots(src, instr, i) + src->depth;
		sd += boost;

		instr->depth = MAX2(instr->depth, sd);
	}
Ejemplo n.º 3
0
static void
ir3_instr_depth(struct ir3_instruction *instr)
{
	struct ir3_instruction *src;

	/* if we've already visited this instruction, bail now: */
	if (ir3_instr_check_mark(instr))
		return;

	instr->depth = 0;

	foreach_ssa_src_n(src, i, instr) {
		unsigned sd;

		/* visit child to compute it's depth: */
		ir3_instr_depth(src);

		/* for array writes, no need to delay on previous write: */
		if (i == 0)
			continue;

		sd = ir3_delayslots(src, instr, i) + src->depth;

		instr->depth = MAX2(instr->depth, sd);
	}
Ejemplo n.º 4
0
static void
instr_find_neighbors(struct ir3_instruction *instr)
{
	struct ir3_instruction *src;

	if (ir3_instr_check_mark(instr))
		return;

	if (instr->opc == OPC_META_FI)
		group_n(&instr_ops, instr, instr->regs_count - 1);

	foreach_ssa_src(src, instr)
		instr_find_neighbors(src);
}
Ejemplo n.º 5
0
void ir3_block_depth(struct ir3_block *block)
{
	unsigned i;

	block->head = NULL;

	ir3_clear_mark(block->shader);
	for (i = 0; i < block->noutputs; i++)
		if (block->outputs[i])
			ir3_instr_depth(block->outputs[i]);

	/* at this point, any unvisited input is unused: */
	for (i = 0; i < block->ninputs; i++) {
		struct ir3_instruction *in = block->inputs[i];
		if (in && !ir3_instr_check_mark(in))
			block->inputs[i] = NULL;
	}
}
Ejemplo n.º 6
0
static struct ir3_instruction *
instr_cp(struct ir3_instruction *instr, bool keep)
{
	/* if we've already visited this instruction, bail now: */
	if (ir3_instr_check_mark(instr))
		return instr;

	if (is_meta(instr) && (instr->opc == OPC_META_FI))
		return instr_cp_fanin(instr);

	if (is_eligible_mov(instr) && !keep) {
		struct ir3_register *src = instr->regs[1];
		return instr_cp(src->instr, false);
	}

	walk_children(instr, false);

	return instr;
}
Ejemplo n.º 7
0
static void ir3_instr_depth(struct ir3_instruction *instr)
{
	unsigned i;

	/* if we've already visited this instruction, bail now: */
	if (ir3_instr_check_mark(instr))
		return;

	instr->depth = 0;

	for (i = 1; i < instr->regs_count; i++) {
		struct ir3_register *src = instr->regs[i];
		if (src->flags & IR3_REG_SSA) {
			unsigned sd;

			/* visit child to compute it's depth: */
			ir3_instr_depth(src->instr);

			sd = ir3_delayslots(src->instr, instr, i-1) +
					src->instr->depth;

			instr->depth = MAX2(instr->depth, sd);
		}
	}

	/* meta-instructions don't add cycles, other than PHI.. which
	 * might translate to a real instruction..
	 *
	 * well, not entirely true, fan-in/out, etc might need to need
	 * to generate some extra mov's in edge cases, etc.. probably
	 * we might want to do depth calculation considering the worst
	 * case for these??
	 */
	if (!is_meta(instr))
		instr->depth++;

	insert_by_depth(instr);
}
Ejemplo n.º 8
0
/**
 * Find instruction src's which are mov's that can be collapsed, replacing
 * the mov dst with the mov src
 */
static void
instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
{
	struct ir3_register *reg;

	if (instr->regs_count == 0)
		return;

	if (ir3_instr_check_mark(instr))
		return;

	/* walk down the graph from each src: */
	foreach_src_n(reg, n, instr) {
		struct ir3_instruction *src = ssa(reg);

		if (!src)
			continue;

		instr_cp(ctx, src);

		/* TODO non-indirect access we could figure out which register
		 * we actually want and allow cp..
		 */
		if (reg->flags & IR3_REG_ARRAY)
			continue;

		reg_cp(ctx, instr, reg, n);
	}

	if (instr->regs[0]->flags & IR3_REG_ARRAY) {
		struct ir3_instruction *src = ssa(instr->regs[0]);
		if (src)
			instr_cp(ctx, src);
	}

	if (instr->address) {
		instr_cp(ctx, instr->address);
		ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
	}

	/* we can end up with extra cmps.s from frontend, which uses a
	 *
	 *    cmps.s p0.x, cond, 0
	 *
	 * as a way to mov into the predicate register.  But frequently 'cond'
	 * is itself a cmps.s/cmps.f/cmps.u.  So detect this special case and
	 * just re-write the instruction writing predicate register to get rid
	 * of the double cmps.
	 */
	if ((instr->opc == OPC_CMPS_S) &&
			(instr->regs[0]->num == regid(REG_P0, 0)) &&
			ssa(instr->regs[1]) &&
			(instr->regs[2]->flags & IR3_REG_IMMED) &&
			(instr->regs[2]->iim_val == 0)) {
		struct ir3_instruction *cond = ssa(instr->regs[1]);
		switch (cond->opc) {
		case OPC_CMPS_S:
		case OPC_CMPS_F:
		case OPC_CMPS_U:
			instr->opc   = cond->opc;
			instr->flags = cond->flags;
			instr->cat2  = cond->cat2;
			instr->address = cond->address;
			instr->regs[1] = cond->regs[1];
			instr->regs[2] = cond->regs[2];
			break;
		default:
			break;
		}
	}
}
Ejemplo n.º 9
0
/**
 * Find instruction src's which are mov's that can be collapsed, replacing
 * the mov dst with the mov src
 */
static void
instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
{
	struct ir3_register *reg;

	if (instr->regs_count == 0)
		return;

	if (ir3_instr_check_mark(instr))
		return;

	/* walk down the graph from each src: */
	foreach_src_n(reg, n, instr) {
		struct ir3_instruction *src = ssa(reg);

		if (!src)
			continue;

		instr_cp(ctx, src);

		/* TODO non-indirect access we could figure out which register
		 * we actually want and allow cp..
		 */
		if (reg->flags & IR3_REG_ARRAY)
			continue;

		/* Don't CP absneg into meta instructions, that won't end well: */
		if (is_meta(instr) && (src->opc != OPC_MOV))
			continue;

		reg_cp(ctx, instr, reg, n);
	}

	if (instr->regs[0]->flags & IR3_REG_ARRAY) {
		struct ir3_instruction *src = ssa(instr->regs[0]);
		if (src)
			instr_cp(ctx, src);
	}

	if (instr->address) {
		instr_cp(ctx, instr->address);
		ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
	}

	/* we can end up with extra cmps.s from frontend, which uses a
	 *
	 *    cmps.s p0.x, cond, 0
	 *
	 * as a way to mov into the predicate register.  But frequently 'cond'
	 * is itself a cmps.s/cmps.f/cmps.u.  So detect this special case and
	 * just re-write the instruction writing predicate register to get rid
	 * of the double cmps.
	 */
	if ((instr->opc == OPC_CMPS_S) &&
			(instr->regs[0]->num == regid(REG_P0, 0)) &&
			ssa(instr->regs[1]) &&
			(instr->regs[2]->flags & IR3_REG_IMMED) &&
			(instr->regs[2]->iim_val == 0)) {
		struct ir3_instruction *cond = ssa(instr->regs[1]);
		switch (cond->opc) {
		case OPC_CMPS_S:
		case OPC_CMPS_F:
		case OPC_CMPS_U:
			instr->opc   = cond->opc;
			instr->flags = cond->flags;
			instr->cat2  = cond->cat2;
			instr->address = cond->address;
			instr->regs[1] = cond->regs[1];
			instr->regs[2] = cond->regs[2];
			instr->barrier_class |= cond->barrier_class;
			instr->barrier_conflict |= cond->barrier_conflict;
			unuse(cond);
			break;
		default:
			break;
		}
	}

	/* Handle converting a sam.s2en (taking samp/tex idx params via
	 * register) into a normal sam (encoding immediate samp/tex idx)
	 * if they are immediate.  This saves some instructions and regs
	 * in the common case where we know samp/tex at compile time:
	 */
	if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) &&
			!(ir3_shader_debug & IR3_DBG_FORCES2EN)) {
		/* The first src will be a fan-in (collect), if both of it's
		 * two sources are mov from imm, then we can
		 */
		struct ir3_instruction *samp_tex = ssa(instr->regs[1]);

		debug_assert(samp_tex->opc == OPC_META_FI);

		struct ir3_instruction *samp = ssa(samp_tex->regs[1]);
		struct ir3_instruction *tex  = ssa(samp_tex->regs[2]);

		if ((samp->opc == OPC_MOV) &&
				(samp->regs[1]->flags & IR3_REG_IMMED) &&
				(tex->opc == OPC_MOV) &&
				(tex->regs[1]->flags & IR3_REG_IMMED)) {
			instr->flags &= ~IR3_INSTR_S2EN;
			instr->cat5.samp = samp->regs[1]->iim_val;
			instr->cat5.tex  = tex->regs[1]->iim_val;
			instr->regs[1]->instr = NULL;
		}
	}
}