コード例 #1
0
ファイル: vc4_opt_algebraic.c プロジェクト: etnaviv/mesa
static void
replace_with_mov(struct vc4_compile *c, struct qinst *inst, struct qreg arg)
{
        dump_from(c, inst);
        if (qir_is_mul(inst))
                inst->op = QOP_MMOV;
        else if (qir_is_float_input(inst))
                inst->op = QOP_FMOV;
        else
                inst->op = QOP_MOV;
        inst->src[0] = arg;
        inst->src[1] = c->undef;
        dump_to(c, inst);
}
コード例 #2
0
static bool
try_copy_prop(struct vc4_compile *c, struct qinst *inst, struct qinst **movs)
{
        bool debug = false;
        bool progress = false;

	for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
                if (inst->src[i].file != QFILE_TEMP)
                        continue;

                /* We have two ways of finding MOVs we can copy propagate
                 * from.  One is if it's an SSA def: then we can reuse it from
                 * any block in the program, as long as its source is also an
                 * SSA def.  Alternatively, if it's in the "movs" array
                 * tracked within the block, then we know the sources for it
                 * haven't been changed since we saw the instruction within
                 * our block.
                 */
                struct qinst *mov = movs[inst->src[i].index];
                if (!mov) {
                        if (!is_copy_mov(c->defs[inst->src[i].index]))
                                continue;
                        mov = c->defs[inst->src[i].index];

                        if (mov->src[0].file == QFILE_TEMP &&
                            !c->defs[mov->src[0].index])
                                continue;
                }

                /* Mul rotation's source needs to be in an r0-r3 accumulator,
                 * so no uniforms or regfile-a/r4 unpacking allowed.
                 */
                if (inst->op == QOP_ROT_MUL &&
                    (mov->src[0].file != QFILE_TEMP ||
                     mov->src[0].pack))
                        continue;

                uint8_t unpack;
                if (mov->src[0].pack) {
                        /* Make sure that the meaning of the unpack
                         * would be the same between the two
                         * instructions.
                         */
                        if (qir_is_float_input(inst) !=
                            qir_is_float_input(mov)) {
                                continue;
                        }

                        /* There's only one unpack field, so make sure
                         * this instruction doesn't already use it.
                         */
                        bool already_has_unpack = false;
                        for (int j = 0; j < qir_get_op_nsrc(inst->op); j++) {
                                if (inst->src[j].pack)
                                        already_has_unpack = true;
                        }
                        if (already_has_unpack)
                                continue;

                        /* A destination pack requires the PM bit to
                         * be set to a specific value already, which
                         * may be different from ours.
                         */
                        if (inst->dst.pack)
                                continue;

                        unpack = mov->src[0].pack;
                } else {
                        unpack = inst->src[i].pack;
                }

                if (debug) {
                        fprintf(stderr, "Copy propagate: ");
                        qir_dump_inst(c, inst);
                        fprintf(stderr, "\n");
                }

                inst->src[i] = mov->src[0];
                inst->src[i].pack = unpack;

                if (debug) {
                        fprintf(stderr, "to: ");
                        qir_dump_inst(c, inst);
                        fprintf(stderr, "\n");
                }

                progress = true;
        }

        return progress;
}
コード例 #3
0
ファイル: vc4_register_allocate.c プロジェクト: djdeath/mesa
/**
 * Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
 *
 * The return value should be freed by the caller.
 */
struct qpu_reg *
vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
{
        struct node_to_temp_map map[c->num_temps];
        uint32_t temp_to_node[c->num_temps];
        uint8_t class_bits[c->num_temps];
        struct qpu_reg *temp_registers = calloc(c->num_temps,
                                                sizeof(*temp_registers));

        /* If things aren't ever written (undefined values), just read from
         * r0.
         */
        for (uint32_t i = 0; i < c->num_temps; i++)
                temp_registers[i] = qpu_rn(0);

        vc4_alloc_reg_set(vc4);

        struct ra_graph *g = ra_alloc_interference_graph(vc4->regs,
                                                         c->num_temps);

        /* Compute the live ranges so we can figure out interference. */
        qir_calculate_live_intervals(c);

        for (uint32_t i = 0; i < c->num_temps; i++) {
                map[i].temp = i;
                map[i].priority = c->temp_end[i] - c->temp_start[i];
        }
        qsort(map, c->num_temps, sizeof(map[0]), node_to_temp_priority);
        for (uint32_t i = 0; i < c->num_temps; i++) {
                temp_to_node[map[i].temp] = i;
        }

        /* Figure out our register classes and preallocated registers.  We
         * start with any temp being able to be in any file, then instructions
         * incrementally remove bits that the temp definitely can't be in.
         */
        memset(class_bits,
               CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4,
               sizeof(class_bits));

        int ip = 0;
        qir_for_each_inst_inorder(inst, c) {
                if (qir_writes_r4(inst)) {
                        /* This instruction writes r4 (and optionally moves
                         * its result to a temp), so nothing else can be
                         * stored in r4 across it.
                         */
                        for (int i = 0; i < c->num_temps; i++) {
                                if (c->temp_start[i] < ip && c->temp_end[i] > ip)
                                        class_bits[i] &= ~CLASS_BIT_R4;
                        }
                } else {
                        /* R4 can't be written as a general purpose
                         * register. (it's TMU_NOSWAP as a write address).
                         */
                        if (inst->dst.file == QFILE_TEMP)
                                class_bits[inst->dst.index] &= ~CLASS_BIT_R4;
                }

                switch (inst->op) {
                case QOP_FRAG_Z:
                        ra_set_node_reg(g, temp_to_node[inst->dst.index],
                                        AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2 + 1);
                        break;

                case QOP_FRAG_W:
                        ra_set_node_reg(g, temp_to_node[inst->dst.index],
                                        AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
                        break;

                case QOP_ROT_MUL:
                        assert(inst->src[0].file == QFILE_TEMP);
                        class_bits[inst->src[0].index] &= ~CLASS_BIT_R0_R3;
                        break;

                default:
                        break;
                }

                if (inst->dst.pack && !qir_is_mul(inst)) {
                        /* The non-MUL pack flags require an A-file dst
                         * register.
                         */
                        class_bits[inst->dst.index] &= CLASS_BIT_A;
                }

                /* Apply restrictions for src unpacks.  The integer unpacks
                 * can only be done from regfile A, while float unpacks can be
                 * either A or R4.
                 */
                for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
                        if (inst->src[i].file == QFILE_TEMP &&
                            inst->src[i].pack) {
                                if (qir_is_float_input(inst)) {
                                        class_bits[inst->src[i].index] &=
                                                CLASS_BIT_A | CLASS_BIT_R4;
                                } else {
                                        class_bits[inst->src[i].index] &=
                                                CLASS_BIT_A;
                                }
                        }
                }

                ip++;
        }

        for (uint32_t i = 0; i < c->num_temps; i++) {
                int node = temp_to_node[i];

                switch (class_bits[i]) {
                case CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4:
                        ra_set_node_class(g, node, vc4->reg_class_any);
                        break;
                case CLASS_BIT_A | CLASS_BIT_B_OR_ACC:
                        ra_set_node_class(g, node, vc4->reg_class_a_or_b_or_acc);
                        break;
                case CLASS_BIT_A | CLASS_BIT_R4:
                        ra_set_node_class(g, node, vc4->reg_class_r4_or_a);
                        break;
                case CLASS_BIT_A:
                        ra_set_node_class(g, node, vc4->reg_class_a);
                        break;
                case CLASS_BIT_R0_R3:
                        ra_set_node_class(g, node, vc4->reg_class_r0_r3);
                        break;
                default:
                        fprintf(stderr, "temp %d: bad class bits: 0x%x\n",
                                i, class_bits[i]);
                        abort();
                        break;
                }
        }

        for (uint32_t i = 0; i < c->num_temps; i++) {
                for (uint32_t j = i + 1; j < c->num_temps; j++) {
                        if (!(c->temp_start[i] >= c->temp_end[j] ||
                              c->temp_start[j] >= c->temp_end[i])) {
                                ra_add_node_interference(g,
                                                         temp_to_node[i],
                                                         temp_to_node[j]);
                        }
                }
        }

        bool ok = ra_allocate(g);
        if (!ok) {
                fprintf(stderr, "Failed to register allocate:\n");
                qir_dump(c);
                c->failed = true;
                return NULL;
        }

        for (uint32_t i = 0; i < c->num_temps; i++) {
                temp_registers[i] = vc4_regs[ra_get_node_reg(g, temp_to_node[i])];

                /* If the value's never used, just write to the NOP register
                 * for clarity in debug output.
                 */
                if (c->temp_start[i] == c->temp_end[i])
                        temp_registers[i] = qpu_ra(QPU_W_NOP);
        }

        ralloc_free(g);

        return temp_registers;
}