void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so) { struct ir3_cp_ctx ctx = { .shader = ir, .so = so, }; ir3_clear_mark(ir); for (unsigned i = 0; i < ir->noutputs; i++) { if (ir->outputs[i]) { instr_cp(&ctx, ir->outputs[i]); ir->outputs[i] = eliminate_output_mov(ir->outputs[i]); } } list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { if (block->condition) { instr_cp(&ctx, block->condition); block->condition = eliminate_output_mov(block->condition); } for (unsigned i = 0; i < block->keeps_count; i++) { instr_cp(&ctx, block->keeps[i]); block->keeps[i] = eliminate_output_mov(block->keeps[i]); } } }
void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so) { struct ir3_cp_ctx ctx = { .shader = ir, .so = so, }; /* This is a bit annoying, and probably wouldn't be necessary if we * tracked a reverse link from producing instruction to consumer. * But we need to know when we've eliminated the last consumer of * a mov, so we need to do a pass to first count consumers of a * mov. */ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { struct ir3_instruction *src; /* by the way, we don't account for false-dep's, so the CP * pass should always happen before false-dep's are inserted */ debug_assert(instr->deps_count == 0); foreach_ssa_src(src, instr) { src->use_count++; } } } ir3_clear_mark(ir); for (unsigned i = 0; i < ir->noutputs; i++) { if (ir->outputs[i]) { instr_cp(&ctx, ir->outputs[i]); ir->outputs[i] = eliminate_output_mov(ir->outputs[i]); } } list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { if (block->condition) { instr_cp(&ctx, block->condition); block->condition = eliminate_output_mov(block->condition); } for (unsigned i = 0; i < block->keeps_count; i++) { instr_cp(&ctx, block->keeps[i]); block->keeps[i] = eliminate_output_mov(block->keeps[i]); } } }
/** * Find instruction src's which are mov's that can be collapsed, replacing * the mov dst with the mov src */ static void instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) { struct ir3_register *reg; if (instr->regs_count == 0) return; if (ir3_instr_check_mark(instr)) return; /* walk down the graph from each src: */ foreach_src_n(reg, n, instr) { struct ir3_instruction *src = ssa(reg); if (!src) continue; instr_cp(ctx, src); /* TODO non-indirect access we could figure out which register * we actually want and allow cp.. */ if (reg->flags & IR3_REG_ARRAY) continue; reg_cp(ctx, instr, reg, n); } if (instr->regs[0]->flags & IR3_REG_ARRAY) { struct ir3_instruction *src = ssa(instr->regs[0]); if (src) instr_cp(ctx, src); } if (instr->address) { instr_cp(ctx, instr->address); ir3_instr_set_address(instr, eliminate_output_mov(instr->address)); } /* we can end up with extra cmps.s from frontend, which uses a * * cmps.s p0.x, cond, 0 * * as a way to mov into the predicate register. But frequently 'cond' * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and * just re-write the instruction writing predicate register to get rid * of the double cmps. */ if ((instr->opc == OPC_CMPS_S) && (instr->regs[0]->num == regid(REG_P0, 0)) && ssa(instr->regs[1]) && (instr->regs[2]->flags & IR3_REG_IMMED) && (instr->regs[2]->iim_val == 0)) { struct ir3_instruction *cond = ssa(instr->regs[1]); switch (cond->opc) { case OPC_CMPS_S: case OPC_CMPS_F: case OPC_CMPS_U: instr->opc = cond->opc; instr->flags = cond->flags; instr->cat2 = cond->cat2; instr->address = cond->address; instr->regs[1] = cond->regs[1]; instr->regs[2] = cond->regs[2]; break; default: break; } } }
/** * Find instruction src's which are mov's that can be collapsed, replacing * the mov dst with the mov src */ static void instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) { struct ir3_register *reg; if (instr->regs_count == 0) return; if (ir3_instr_check_mark(instr)) return; /* walk down the graph from each src: */ foreach_src_n(reg, n, instr) { struct ir3_instruction *src = ssa(reg); if (!src) continue; instr_cp(ctx, src); /* TODO non-indirect access we could figure out which register * we actually want and allow cp.. */ if (reg->flags & IR3_REG_ARRAY) continue; /* Don't CP absneg into meta instructions, that won't end well: */ if (is_meta(instr) && (src->opc != OPC_MOV)) continue; reg_cp(ctx, instr, reg, n); } if (instr->regs[0]->flags & IR3_REG_ARRAY) { struct ir3_instruction *src = ssa(instr->regs[0]); if (src) instr_cp(ctx, src); } if (instr->address) { instr_cp(ctx, instr->address); ir3_instr_set_address(instr, eliminate_output_mov(instr->address)); } /* we can end up with extra cmps.s from frontend, which uses a * * cmps.s p0.x, cond, 0 * * as a way to mov into the predicate register. But frequently 'cond' * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and * just re-write the instruction writing predicate register to get rid * of the double cmps. */ if ((instr->opc == OPC_CMPS_S) && (instr->regs[0]->num == regid(REG_P0, 0)) && ssa(instr->regs[1]) && (instr->regs[2]->flags & IR3_REG_IMMED) && (instr->regs[2]->iim_val == 0)) { struct ir3_instruction *cond = ssa(instr->regs[1]); switch (cond->opc) { case OPC_CMPS_S: case OPC_CMPS_F: case OPC_CMPS_U: instr->opc = cond->opc; instr->flags = cond->flags; instr->cat2 = cond->cat2; instr->address = cond->address; instr->regs[1] = cond->regs[1]; instr->regs[2] = cond->regs[2]; instr->barrier_class |= cond->barrier_class; instr->barrier_conflict |= cond->barrier_conflict; unuse(cond); break; default: break; } } /* Handle converting a sam.s2en (taking samp/tex idx params via * register) into a normal sam (encoding immediate samp/tex idx) * if they are immediate. This saves some instructions and regs * in the common case where we know samp/tex at compile time: */ if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) && !(ir3_shader_debug & IR3_DBG_FORCES2EN)) { /* The first src will be a fan-in (collect), if both of it's * two sources are mov from imm, then we can */ struct ir3_instruction *samp_tex = ssa(instr->regs[1]); debug_assert(samp_tex->opc == OPC_META_FI); struct ir3_instruction *samp = ssa(samp_tex->regs[1]); struct ir3_instruction *tex = ssa(samp_tex->regs[2]); if ((samp->opc == OPC_MOV) && (samp->regs[1]->flags & IR3_REG_IMMED) && (tex->opc == OPC_MOV) && (tex->regs[1]->flags & IR3_REG_IMMED)) { instr->flags &= ~IR3_INSTR_S2EN; instr->cat5.samp = samp->regs[1]->iim_val; instr->cat5.tex = tex->regs[1]->iim_val; instr->regs[1]->instr = NULL; } } }