Exemple #1
0
uint64_t
qpu_merge_inst(uint64_t a, uint64_t b)
{
        uint64_t merge = a | b;
        bool ok = true;
        uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG);
        uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG);

        if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
            QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {
                if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP ||
                    QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP ||
                    !(convert_mov(&a) || convert_mov(&b))) {
                        return 0;
                } else {
                        merge = a | b;
                }
        }

        if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
            QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
                return 0;

        if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
                return 0;

        if (a_sig == QPU_SIG_LOAD_IMM ||
            b_sig == QPU_SIG_LOAD_IMM ||
            a_sig == QPU_SIG_SMALL_IMM ||
            b_sig == QPU_SIG_SMALL_IMM ||
            a_sig == QPU_SIG_BRANCH ||
            b_sig == QPU_SIG_BRANCH) {
                return 0;
        }

        ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
                                QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));

        /* Misc fields that have to match exactly. */
        ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);

        if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
                          QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
                /* Since we tend to use regfile A by default both for register
                 * allocation and for our special values (uniforms and
                 * varyings), try swapping uniforms and varyings to regfile B
                 * to resolve raddr A conflicts.
                 */
                if (!try_swap_ra_file(&merge, &a, &b) &&
                    !try_swap_ra_file(&merge, &b, &a)) {
                        return 0;
                }
        }

        ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,
                                QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));

        ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,
                                QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
        ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
                                QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));

        /* Allow disagreement on WS (swapping A vs B physical reg file as the
         * destination for ADD/MUL) if one of the original instructions
         * ignores it (probably because it's just writing to accumulators).
         */
        if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) &&
            qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {
                merge = (merge & ~QPU_WS) | (b & QPU_WS);
        } else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) &&
                   qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {
                merge = (merge & ~QPU_WS) | (a & QPU_WS);
        } else {
                if ((a & QPU_WS) != (b & QPU_WS))
                        return 0;
        }

        if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
                /* If one instruction has PM bit set and the other not, the
                 * one without PM shouldn't do packing/unpacking, and we
                 * have to make sure non-NOP packing/unpacking from PM
                 * instruction aren't added to it.
                 */
                uint64_t temp;

                /* Let a be the one with PM bit */
                if (!(a & QPU_PM)) {
                        temp = a;
                        a = b;
                        b = temp;
                }

                if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
                        return 0;

                if ((a & QPU_PACK_MASK) != 0 &&
                    QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
                        return 0;

                if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
                        return 0;
        } else {
                /* packing: Make sure that non-NOP packs agree, then deal with
                 * special-case failing of adding a non-NOP pack to something
                 * with a NOP pack.
                 */
                if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
                        return 0;
                bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
                                QPU_GET_FIELD(merge, QPU_PACK));
                bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
                                QPU_GET_FIELD(merge, QPU_PACK));
                if (!(merge & QPU_PM)) {
                        /* Make sure we're not going to be putting a new
                         * a-file packing on either half.
                         */
                        if (new_a_pack && writes_a_file(a))
                                return 0;

                        if (new_b_pack && writes_a_file(b))
                                return 0;
                } else {
                        /* Make sure we're not going to be putting new MUL
                         * packing oneither half.
                         */
                        if (new_a_pack &&
                            QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
                                return 0;

                        if (new_b_pack &&
                            QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
                                return 0;
                }

                /* unpacking: Make sure that non-NOP unpacks agree, then deal
                 * with special-case failing of adding a non-NOP unpack to
                 * something with a NOP unpack.
                 */
                if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
                        return 0;
                bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
                                QPU_GET_FIELD(merge, QPU_UNPACK));
                bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
                                QPU_GET_FIELD(merge, QPU_UNPACK));
                if (!(merge & QPU_PM)) {
                        /* Make sure we're not going to be putting a new
                         * a-file packing on either half.
                         */
                        if (new_a_unpack &&
                            QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
                                return 0;

                        if (new_b_unpack &&
                            QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
                                return 0;
                } else {
                        /* Make sure we're not going to be putting new r4
                         * unpack on either half.
                         */
                        if (new_a_unpack && reads_r4(a))
                                return 0;

                        if (new_b_unpack && reads_r4(b))
                                return 0;
                }
        }

        if (ok)
                return merge;
        else
                return 0;
}
Exemple #2
0
/**
 * Checks for the instruction restrictions from page 37 ("Summary of
 * Instruction Restrictions").
 */
void
vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
{
        bool scoreboard_locked = false;

        /* We don't want to do validation in release builds, but we want to
         * keep compiling the validation code to make sure it doesn't get
         * broken.
         */
#ifndef DEBUG
        return;
#endif

        for (int i = 0; i < num_inst; i++) {
                uint64_t inst = insts[i];

                if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) {
                        if (qpu_inst_is_tlb(inst))
                                scoreboard_locked = true;

                        continue;
                }

                /* "The Thread End instruction must not write to either physical
                 *  regfile A or B."
                 */
                if (QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32 ||
                    QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32) {
                        fail_instr(inst, "write to phys reg in thread end");
                }

                /* Can't trigger an implicit wait on scoreboard in the program
                 * end instruction.
                 */
                if (qpu_inst_is_tlb(inst) && !scoreboard_locked)
                        fail_instr(inst, "implicit sb wait in program end");

                /* Two delay slots will be executed. */
                assert(i + 2 <= num_inst);

                 for (int j = i; j < i + 2; j++) {
                         /* "The last three instructions of any program
                          *  (Thread End plus the following two delay-slot
                          *  instructions) must not do varyings read, uniforms
                          *  read or any kind of VPM, VDR, or VDW read or
                          *  write."
                          */
                         if (writes_reg(insts[j], QPU_W_VPM) ||
                             reads_reg(insts[j], QPU_R_VARY) ||
                             reads_reg(insts[j], QPU_R_UNIF) ||
                             reads_reg(insts[j], QPU_R_VPM)) {
                                 fail_instr(insts[j], "last 3 instructions "
                                            "using fixed functions");
                         }

                         /* "The Thread End instruction and the following two
                          *  delay slot instructions must not write or read
                          *  address 14 in either regfile A or B."
                          */
                         if (writes_reg(insts[j], 14) ||
                             reads_reg(insts[j], 14)) {
                                 fail_instr(insts[j], "last 3 instructions "
                                            "must not use r14");
                         }
                 }

                 /* "The final program instruction (the second delay slot
                  *  instruction) must not do a TLB Z write."
                  */
                 if (writes_reg(insts[i + 2], QPU_W_TLB_Z)) {
                         fail_instr(insts[i + 2], "final instruction doing "
                                    "Z write");
                 }
        }

        /* "A scoreboard wait must not occur in the first two instructions of
         *  a fragment shader. This is either the explicit Wait for Scoreboard
         *  signal or an implicit wait with the first tile-buffer read or
         *  write instruction."
         */
        for (int i = 0; i < 2; i++) {
                uint64_t inst = insts[i];

                if (qpu_inst_is_tlb(inst))
                        fail_instr(inst, "sb wait in first two insts");
        }

        /* "If TMU_NOSWAP is written, the write must be three instructions
         *  before the first TMU write instruction.  For example, if
         *  TMU_NOSWAP is written in the first shader instruction, the first
         *  TMU write cannot occur before the 4th shader instruction."
         */
        int last_tmu_noswap = -10;
        for (int i = 0; i < num_inst; i++) {
                uint64_t inst = insts[i];

                if ((i - last_tmu_noswap) <= 3 &&
                    (writes_reg(inst, QPU_W_TMU0_S) ||
                     writes_reg(inst, QPU_W_TMU1_S))) {
                        fail_instr(inst, "TMU write too soon after TMU_NOSWAP");
                }

                if (writes_reg(inst, QPU_W_TMU_NOSWAP))
                    last_tmu_noswap = i;
        }

        /* "An instruction must not read from a location in physical regfile A
         *  or B that was written to by the previous instruction."
         */
        for (int i = 0; i < num_inst - 1; i++) {
                uint64_t inst = insts[i];
                uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
                uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
                uint32_t waddr_a, waddr_b;

                if (inst & QPU_WS) {
                        waddr_b = add_waddr;
                        waddr_a = mul_waddr;
                } else {
                        waddr_a = add_waddr;
                        waddr_b = mul_waddr;
                }

                if ((waddr_a < 32 && reads_a_reg(insts[i + 1], waddr_a)) ||
                    (waddr_b < 32 && reads_b_reg(insts[i + 1], waddr_b))) {
                        fail_instr(insts[i + 1],
                                   "Reads physical reg too soon after write");
                }
        }

        /* "After an SFU lookup instruction, accumulator r4 must not be read
         *  in the following two instructions. Any other instruction that
         *  results in r4 being written (that is, TMU read, TLB read, SFU
         *  lookup) cannot occur in the two instructions following an SFU
         *  lookup."
         */
        int last_sfu_inst = -10;
        for (int i = 0; i < num_inst - 1; i++) {
                uint64_t inst = insts[i];
                uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);

                if (i - last_sfu_inst <= 2 &&
                    (writes_sfu(inst) ||
                     sig == QPU_SIG_LOAD_TMU0 ||
                     sig == QPU_SIG_LOAD_TMU1 ||
                     sig == QPU_SIG_COLOR_LOAD)) {
                        fail_instr(inst, "R4 write too soon after SFU write");
                }

                if (writes_sfu(inst))
                        last_sfu_inst = i;
        }

        for (int i = 0; i < num_inst - 1; i++) {
                uint64_t inst = insts[i];

                if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM &&
                    QPU_GET_FIELD(inst, QPU_SMALL_IMM) >=
                    QPU_SMALL_IMM_MUL_ROT) {
                        uint32_t mux_a = QPU_GET_FIELD(inst, QPU_MUL_A);
                        uint32_t mux_b = QPU_GET_FIELD(inst, QPU_MUL_B);

                        /* "The full horizontal vector rotate is only
                         *  available when both of the mul ALU input arguments
                         *  are taken from accumulators r0-r3."
                         */
                        if (mux_a > QPU_MUX_R3 || mux_b > QPU_MUX_R3) {
                                fail_instr(inst,
                                           "MUL rotate using non-accumulator "
                                           "input");
                        }

                        if (QPU_GET_FIELD(inst, QPU_SMALL_IMM) ==
                            QPU_SMALL_IMM_MUL_ROT) {
                                /* "An instruction that does a vector rotate
                                 *  by r5 must not immediately follow an
                                 *  instruction that writes to r5."
                                 */
                                if (writes_reg(insts[i - 1], QPU_W_ACC5)) {
                                        fail_instr(inst,
                                                   "vector rotate by r5 "
                                                   "immediately after r5 write");
                                }
                        }

                        /* "An instruction that does a vector rotate must not
                         *  immediately follow an instruction that writes to the
                         *  accumulator that is being rotated."
                         */
                        if (writes_reg(insts[i - 1], QPU_W_ACC0 + mux_a) ||
                            writes_reg(insts[i - 1], QPU_W_ACC0 + mux_b)) {
                                fail_instr(inst,
                                           "vector rotate of value "
                                           "written in previous instruction");
                        }
                }
        }

        /* "An instruction that does a vector rotate must not immediately
         *  follow an instruction that writes to the accumulator that is being
         *  rotated.
         *
         * XXX: TODO.
         */

        /* "After an instruction that does a TLB Z write, the multisample mask
         *  must not be read as an instruction input argument in the following
         *  two instruction. The TLB Z write instruction can, however, be
         *  followed immediately by a TLB color write."
         */
        for (int i = 0; i < num_inst - 1; i++) {
                uint64_t inst = insts[i];
                if (writes_reg(inst, QPU_W_TLB_Z) &&
                    (reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS) ||
                     reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS))) {
                        fail_instr(inst, "TLB Z write followed by MS mask read");
                }
        }

        /*
         * "A single instruction can only perform a maximum of one of the
         *  following closely coupled peripheral accesses in a single
         *  instruction: TMU write, TMU read, TLB write, TLB read, TLB
         *  combined color read and write, SFU write, Mutex read or Semaphore
         *  access."
         */
        for (int i = 0; i < num_inst - 1; i++) {
                uint64_t inst = insts[i];

                if (qpu_num_sf_accesses(inst) > 1)
                        fail_instr(inst, "Single instruction writes SFU twice");
        }

        /* "The uniform base pointer can be written (from SIMD element 0) by
         *  the processor to reset the stream, there must be at least two
         *  nonuniform-accessing instructions following a pointer change
         *  before uniforms can be accessed once more."
         */
        int last_unif_pointer_update = -3;
        for (int i = 0; i < num_inst; i++) {
                uint64_t inst = insts[i];
                uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
                uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);

                if (reads_reg(inst, QPU_R_UNIF) &&
                    i - last_unif_pointer_update <= 2) {
                        fail_instr(inst,
                                   "uniform read too soon after pointer update");
                }

                if (waddr_add == QPU_W_UNIFORMS_ADDRESS ||
                    waddr_mul == QPU_W_UNIFORMS_ADDRESS)
                        last_unif_pointer_update = i;
        }
}