void ir3_block_depth(struct ir3_block *block) { unsigned i; block->head = NULL; ir3_clear_mark(block->shader); for (i = 0; i < block->noutputs; i++) if (block->outputs[i]) ir3_instr_depth(block->outputs[i]); /* mark un-used instructions: */ for (i = 0; i < block->shader->instrs_count; i++) { struct ir3_instruction *instr = block->shader->instrs[i]; /* just consider instructions within this block: */ if (instr->block != block) continue; if (!ir3_instr_check_mark(instr)) instr->depth = DEPTH_UNUSED; } /* cleanup unused inputs: */ for (i = 0; i < block->ninputs; i++) { struct ir3_instruction *in = block->inputs[i]; if (in && (in->depth == DEPTH_UNUSED)) block->inputs[i] = NULL; } }
static void ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep) { struct ir3_instruction *src; /* don't mark falsedep's as used, but otherwise process them normally: */ if (!falsedep) instr->flags &= ~IR3_INSTR_UNUSED; if (ir3_instr_check_mark(instr)) return; instr->depth = 0; foreach_ssa_src_n(src, i, instr) { unsigned sd; /* visit child to compute it's depth: */ ir3_instr_depth(src, boost, __is_false_dep(instr, i)); /* for array writes, no need to delay on previous write: */ if (i == 0) continue; sd = ir3_delayslots(src, instr, i) + src->depth; sd += boost; instr->depth = MAX2(instr->depth, sd); }
static void ir3_instr_depth(struct ir3_instruction *instr) { struct ir3_instruction *src; /* if we've already visited this instruction, bail now: */ if (ir3_instr_check_mark(instr)) return; instr->depth = 0; foreach_ssa_src_n(src, i, instr) { unsigned sd; /* visit child to compute it's depth: */ ir3_instr_depth(src); /* for array writes, no need to delay on previous write: */ if (i == 0) continue; sd = ir3_delayslots(src, instr, i) + src->depth; instr->depth = MAX2(instr->depth, sd); }
static void instr_find_neighbors(struct ir3_instruction *instr) { struct ir3_instruction *src; if (ir3_instr_check_mark(instr)) return; if (instr->opc == OPC_META_FI) group_n(&instr_ops, instr, instr->regs_count - 1); foreach_ssa_src(src, instr) instr_find_neighbors(src); }
void ir3_block_depth(struct ir3_block *block) { unsigned i; block->head = NULL; ir3_clear_mark(block->shader); for (i = 0; i < block->noutputs; i++) if (block->outputs[i]) ir3_instr_depth(block->outputs[i]); /* at this point, any unvisited input is unused: */ for (i = 0; i < block->ninputs; i++) { struct ir3_instruction *in = block->inputs[i]; if (in && !ir3_instr_check_mark(in)) block->inputs[i] = NULL; } }
static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, bool keep) { /* if we've already visited this instruction, bail now: */ if (ir3_instr_check_mark(instr)) return instr; if (is_meta(instr) && (instr->opc == OPC_META_FI)) return instr_cp_fanin(instr); if (is_eligible_mov(instr) && !keep) { struct ir3_register *src = instr->regs[1]; return instr_cp(src->instr, false); } walk_children(instr, false); return instr; }
static void ir3_instr_depth(struct ir3_instruction *instr) { unsigned i; /* if we've already visited this instruction, bail now: */ if (ir3_instr_check_mark(instr)) return; instr->depth = 0; for (i = 1; i < instr->regs_count; i++) { struct ir3_register *src = instr->regs[i]; if (src->flags & IR3_REG_SSA) { unsigned sd; /* visit child to compute it's depth: */ ir3_instr_depth(src->instr); sd = ir3_delayslots(src->instr, instr, i-1) + src->instr->depth; instr->depth = MAX2(instr->depth, sd); } } /* meta-instructions don't add cycles, other than PHI.. which * might translate to a real instruction.. * * well, not entirely true, fan-in/out, etc might need to need * to generate some extra mov's in edge cases, etc.. probably * we might want to do depth calculation considering the worst * case for these?? */ if (!is_meta(instr)) instr->depth++; insert_by_depth(instr); }
/** * Find instruction src's which are mov's that can be collapsed, replacing * the mov dst with the mov src */ static void instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) { struct ir3_register *reg; if (instr->regs_count == 0) return; if (ir3_instr_check_mark(instr)) return; /* walk down the graph from each src: */ foreach_src_n(reg, n, instr) { struct ir3_instruction *src = ssa(reg); if (!src) continue; instr_cp(ctx, src); /* TODO non-indirect access we could figure out which register * we actually want and allow cp.. */ if (reg->flags & IR3_REG_ARRAY) continue; reg_cp(ctx, instr, reg, n); } if (instr->regs[0]->flags & IR3_REG_ARRAY) { struct ir3_instruction *src = ssa(instr->regs[0]); if (src) instr_cp(ctx, src); } if (instr->address) { instr_cp(ctx, instr->address); ir3_instr_set_address(instr, eliminate_output_mov(instr->address)); } /* we can end up with extra cmps.s from frontend, which uses a * * cmps.s p0.x, cond, 0 * * as a way to mov into the predicate register. But frequently 'cond' * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and * just re-write the instruction writing predicate register to get rid * of the double cmps. */ if ((instr->opc == OPC_CMPS_S) && (instr->regs[0]->num == regid(REG_P0, 0)) && ssa(instr->regs[1]) && (instr->regs[2]->flags & IR3_REG_IMMED) && (instr->regs[2]->iim_val == 0)) { struct ir3_instruction *cond = ssa(instr->regs[1]); switch (cond->opc) { case OPC_CMPS_S: case OPC_CMPS_F: case OPC_CMPS_U: instr->opc = cond->opc; instr->flags = cond->flags; instr->cat2 = cond->cat2; instr->address = cond->address; instr->regs[1] = cond->regs[1]; instr->regs[2] = cond->regs[2]; break; default: break; } } }
/** * Find instruction src's which are mov's that can be collapsed, replacing * the mov dst with the mov src */ static void instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) { struct ir3_register *reg; if (instr->regs_count == 0) return; if (ir3_instr_check_mark(instr)) return; /* walk down the graph from each src: */ foreach_src_n(reg, n, instr) { struct ir3_instruction *src = ssa(reg); if (!src) continue; instr_cp(ctx, src); /* TODO non-indirect access we could figure out which register * we actually want and allow cp.. */ if (reg->flags & IR3_REG_ARRAY) continue; /* Don't CP absneg into meta instructions, that won't end well: */ if (is_meta(instr) && (src->opc != OPC_MOV)) continue; reg_cp(ctx, instr, reg, n); } if (instr->regs[0]->flags & IR3_REG_ARRAY) { struct ir3_instruction *src = ssa(instr->regs[0]); if (src) instr_cp(ctx, src); } if (instr->address) { instr_cp(ctx, instr->address); ir3_instr_set_address(instr, eliminate_output_mov(instr->address)); } /* we can end up with extra cmps.s from frontend, which uses a * * cmps.s p0.x, cond, 0 * * as a way to mov into the predicate register. But frequently 'cond' * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and * just re-write the instruction writing predicate register to get rid * of the double cmps. */ if ((instr->opc == OPC_CMPS_S) && (instr->regs[0]->num == regid(REG_P0, 0)) && ssa(instr->regs[1]) && (instr->regs[2]->flags & IR3_REG_IMMED) && (instr->regs[2]->iim_val == 0)) { struct ir3_instruction *cond = ssa(instr->regs[1]); switch (cond->opc) { case OPC_CMPS_S: case OPC_CMPS_F: case OPC_CMPS_U: instr->opc = cond->opc; instr->flags = cond->flags; instr->cat2 = cond->cat2; instr->address = cond->address; instr->regs[1] = cond->regs[1]; instr->regs[2] = cond->regs[2]; instr->barrier_class |= cond->barrier_class; instr->barrier_conflict |= cond->barrier_conflict; unuse(cond); break; default: break; } } /* Handle converting a sam.s2en (taking samp/tex idx params via * register) into a normal sam (encoding immediate samp/tex idx) * if they are immediate. This saves some instructions and regs * in the common case where we know samp/tex at compile time: */ if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) && !(ir3_shader_debug & IR3_DBG_FORCES2EN)) { /* The first src will be a fan-in (collect), if both of it's * two sources are mov from imm, then we can */ struct ir3_instruction *samp_tex = ssa(instr->regs[1]); debug_assert(samp_tex->opc == OPC_META_FI); struct ir3_instruction *samp = ssa(samp_tex->regs[1]); struct ir3_instruction *tex = ssa(samp_tex->regs[2]); if ((samp->opc == OPC_MOV) && (samp->regs[1]->flags & IR3_REG_IMMED) && (tex->opc == OPC_MOV) && (tex->regs[1]->flags & IR3_REG_IMMED)) { instr->flags &= ~IR3_INSTR_S2EN; instr->cat5.samp = samp->regs[1]->iim_val; instr->cat5.tex = tex->regs[1]->iim_val; instr->regs[1]->instr = NULL; } } }