static bool is_zero(struct vc4_compile *c, struct qreg reg) { reg = qir_follow_movs(c, reg); return is_constant_value(c, reg, 0); }
static bool is_1f(struct vc4_compile *c, struct qreg reg) { reg = qir_follow_movs(c, reg); return is_constant_value(c, reg, fui(1.0)); }
bool qir_opt_small_immediates(struct vc4_compile *c) { bool progress = false; qir_for_each_inst_inorder(inst, c) { /* The small immediate value sits in the raddr B field, so we * can't have 2 small immediates in one instruction (unless * they're the same value, but that should be optimized away * elsewhere). */ bool uses_small_imm = false; for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_SMALL_IMM) uses_small_imm = true; } if (uses_small_imm) continue; /* Don't propagate small immediates into the top-end bounds * checking for indirect UBO loads. The kernel doesn't parse * small immediates and rejects the shader in this case. UBO * loads are much more expensive than the uniform load, and * indirect UBO regions are usually much larger than a small * immediate, so it's not worth updating the kernel to allow * optimizing it. */ if (inst->op == QOP_MIN_NOIMM) continue; for (int i = 0; i < qir_get_nsrc(inst); i++) { struct qreg src = qir_follow_movs(c, inst->src[i]); if (src.file != QFILE_UNIF || src.pack || c->uniform_contents[src.index] != QUNIFORM_CONSTANT) { continue; } if (qir_is_tex(inst) && i == qir_get_tex_uniform_src(inst)) { /* No turning the implicit uniform read into * an immediate. */ continue; } uint32_t imm = c->uniform_data[src.index]; uint32_t small_imm = qpu_encode_small_immediate(imm); if (small_imm == ~0) continue; if (debug) { fprintf(stderr, "opt_small_immediate() from: "); qir_dump_inst(c, inst); fprintf(stderr, "\n"); } inst->src[i].file = QFILE_SMALL_IMM; inst->src[i].index = imm; if (debug) { fprintf(stderr, "to: "); qir_dump_inst(c, inst); fprintf(stderr, "\n"); } progress = true; break; } }