static bool writes_sfu(uint64_t inst) { return (writes_reg(inst, QPU_W_SFU_RECIP) || writes_reg(inst, QPU_W_SFU_RECIPSQRT) || writes_reg(inst, QPU_W_SFU_EXP) || writes_reg(inst, QPU_W_SFU_LOG)); }
/* Whether i depends on j */ STATIC_INLINE int depends_on(int i, int j) { int n; /* First, check memory */ if (writes_mem(i) && uses_mem(j)) return 1; if (reads_mem(i) && writes_mem(j)) return 1; /* Next, check flags */ if (writes_flags(i) && uses_flags(j)) return 1; if (reads_flags(i) && writes_flags(j)) return 1; for (n=0;n<linst[i].nargs;n++) { if (linst[i].argtype[n] & FLOAT) return 1; } for (n=0;n<linst[j].nargs;n++) { if (linst[j].argtype[n] & FLOAT) return 1; } for (n=0;n<linst[i].nargs;n++) { if ((linst[i].argtype[n] & WRITE) && !(linst[i].argtype[n] & FLOAT)) { if (uses_reg(j,linst[i].args[n])) return 1; } else if ((linst[i].argtype[n] & READ) && !(linst[i].argtype[n] & FLOAT)) { if (writes_reg(j,linst[i].args[n])) return 1; } } /* The need for this indicates a problem somewhere in the LOWFUNC definitions --- I think. FIXME! */ if (uses_flags(j) && uses_flags(i)) return 1; if (linst[i].func==do_raw_fflags_save) return 1; if (linst[j].func==do_raw_fflags_save) return 1; return 0; }
/** * Checks for the instruction restrictions from page 37 ("Summary of * Instruction Restrictions"). */ void vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) { for (int i = 0; i < num_inst; i++) { uint64_t inst = insts[i]; if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) continue; /* "The Thread End instruction must not write to either physical * regfile A or B." */ assert(QPU_GET_FIELD(inst, QPU_WADDR_ADD) >= 32); assert(QPU_GET_FIELD(inst, QPU_WADDR_MUL) >= 32); /* Two delay slots will be executed. */ assert(i + 2 <= num_inst); for (int j = i; j < i + 2; j++) { /* "The last three instructions of any program * (Thread End plus the following two delay-slot * instructions) must not do varyings read, uniforms * read or any kind of VPM, VDR, or VDW read or * write." */ assert(!writes_reg(insts[j], QPU_W_VPM)); assert(!reads_reg(insts[j], QPU_R_VARY)); assert(!reads_reg(insts[j], QPU_R_UNIF)); assert(!reads_reg(insts[j], QPU_R_VPM)); /* "The Thread End instruction and the following two * delay slot instructions must not write or read * address 14 in either regfile A or B." */ assert(!writes_reg(insts[j], 14)); assert(!reads_reg(insts[j], 14)); } /* "The final program instruction (the second delay slot * instruction) must not do a TLB Z write." */ assert(!writes_reg(insts[i + 2], QPU_W_TLB_Z)); } /* "A scoreboard wait must not occur in the first two instructions of * a fragment shader. This is either the explicit Wait for Scoreboard * signal or an implicit wait with the first tile-buffer read or * write instruction." */ for (int i = 0; i < 2; i++) { uint64_t inst = insts[i]; assert(QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_COLOR_LOAD); assert(QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_WAIT_FOR_SCOREBOARD); assert(!writes_reg(inst, QPU_W_TLB_COLOR_MS)); assert(!writes_reg(inst, QPU_W_TLB_COLOR_ALL)); assert(!writes_reg(inst, QPU_W_TLB_Z)); } /* "If TMU_NOSWAP is written, the write must be three instructions * before the first TMU write instruction. For example, if * TMU_NOSWAP is written in the first shader instruction, the first * TMU write cannot occur before the 4th shader instruction." */ int last_tmu_noswap = -10; for (int i = 0; i < num_inst; i++) { uint64_t inst = insts[i]; assert((i - last_tmu_noswap) > 3 || (!writes_reg(inst, QPU_W_TMU0_S) && !writes_reg(inst, QPU_W_TMU1_S))); if (writes_reg(inst, QPU_W_TMU_NOSWAP)) last_tmu_noswap = i; } /* "An instruction must not read from a location in physical regfile A * or B that was written to by the previous instruction." */ for (int i = 0; i < num_inst - 1; i++) { uint64_t inst = insts[i]; uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD); uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL); uint32_t waddr_a, waddr_b; if (inst & QPU_WS) { waddr_b = add_waddr; waddr_a = mul_waddr; } else { waddr_a = add_waddr; waddr_b = mul_waddr; } assert(waddr_a >= 32 || !reads_a_reg(insts[i + 1], waddr_a)); assert(waddr_b >= 32 || !reads_b_reg(insts[i + 1], waddr_b)); } /* "After an SFU lookup instruction, accumulator r4 must not be read * in the following two instructions. Any other instruction that * results in r4 being written (that is, TMU read, TLB read, SFU * lookup) cannot occur in the two instructions following an SFU * lookup." */ int last_sfu_inst = -10; for (int i = 0; i < num_inst - 1; i++) { uint64_t inst = insts[i]; assert(i - last_sfu_inst > 2 || (!writes_sfu(inst) && !writes_reg(inst, QPU_W_TMU0_S) && !writes_reg(inst, QPU_W_TMU1_S) && QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_COLOR_LOAD)); if (writes_sfu(inst)) last_sfu_inst = i; } int last_r5_write = -10; for (int i = 0; i < num_inst - 1; i++) { uint64_t inst = insts[i]; /* "An instruction that does a vector rotate by r5 must not * immediately follow an instruction that writes to r5." */ assert(last_r5_write != i - 1 || QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM || QPU_GET_FIELD(inst, QPU_SMALL_IMM) != 48); } /* "An instruction that does a vector rotate must not immediately * follow an instruction that writes to the accumulator that is being * rotated. * * XXX: TODO. */ /* "After an instruction that does a TLB Z write, the multisample mask * must not be read as an instruction input argument in the following * two instruction. The TLB Z write instruction can, however, be * followed immediately by a TLB color write." */ for (int i = 0; i < num_inst - 1; i++) { uint64_t inst = insts[i]; if (writes_reg(inst, QPU_W_TLB_Z)) { assert(!reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS)); assert(!reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS)); } } /* * "A single instruction can only perform a maximum of one of the * following closely coupled peripheral accesses in a single * instruction: TMU write, TMU read, TLB write, TLB read, TLB * combined color read and write, SFU write, Mutex read or Semaphore * access." */ for (int i = 0; i < num_inst - 1; i++) { uint64_t inst = insts[i]; int accesses = 0; static const uint32_t specials[] = { QPU_W_TLB_COLOR_MS, QPU_W_TLB_COLOR_ALL, QPU_W_TLB_Z, QPU_W_TMU0_S, QPU_W_TMU0_T, QPU_W_TMU0_R, QPU_W_TMU0_B, QPU_W_TMU1_S, QPU_W_TMU1_T, QPU_W_TMU1_R, QPU_W_TMU1_B, QPU_W_SFU_RECIP, QPU_W_SFU_RECIPSQRT, QPU_W_SFU_EXP, QPU_W_SFU_LOG, }; for (int j = 0; j < ARRAY_SIZE(specials); j++) { if (writes_reg(inst, specials[j])) accesses++; } if (reads_reg(inst, QPU_R_MUTEX_ACQUIRE)) accesses++; /* XXX: semaphore, combined color read/write? */ switch (QPU_GET_FIELD(inst, QPU_SIG)) { case QPU_SIG_COLOR_LOAD: case QPU_SIG_COLOR_LOAD_END: case QPU_SIG_LOAD_TMU0: case QPU_SIG_LOAD_TMU1: accesses++; } assert(accesses <= 1); } }
/** * Checks for the instruction restrictions from page 37 ("Summary of * Instruction Restrictions"). */ void vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) { bool scoreboard_locked = false; /* We don't want to do validation in release builds, but we want to * keep compiling the validation code to make sure it doesn't get * broken. */ #ifndef DEBUG return; #endif for (int i = 0; i < num_inst; i++) { uint64_t inst = insts[i]; if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) { if (qpu_inst_is_tlb(inst)) scoreboard_locked = true; continue; } /* "The Thread End instruction must not write to either physical * regfile A or B." */ if (QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32 || QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32) { fail_instr(inst, "write to phys reg in thread end"); } /* Can't trigger an implicit wait on scoreboard in the program * end instruction. */ if (qpu_inst_is_tlb(inst) && !scoreboard_locked) fail_instr(inst, "implicit sb wait in program end"); /* Two delay slots will be executed. */ assert(i + 2 <= num_inst); for (int j = i; j < i + 2; j++) { /* "The last three instructions of any program * (Thread End plus the following two delay-slot * instructions) must not do varyings read, uniforms * read or any kind of VPM, VDR, or VDW read or * write." */ if (writes_reg(insts[j], QPU_W_VPM) || reads_reg(insts[j], QPU_R_VARY) || reads_reg(insts[j], QPU_R_UNIF) || reads_reg(insts[j], QPU_R_VPM)) { fail_instr(insts[j], "last 3 instructions " "using fixed functions"); } /* "The Thread End instruction and the following two * delay slot instructions must not write or read * address 14 in either regfile A or B." */ if (writes_reg(insts[j], 14) || reads_reg(insts[j], 14)) { fail_instr(insts[j], "last 3 instructions " "must not use r14"); } } /* "The final program instruction (the second delay slot * instruction) must not do a TLB Z write." */ if (writes_reg(insts[i + 2], QPU_W_TLB_Z)) { fail_instr(insts[i + 2], "final instruction doing " "Z write"); } } /* "A scoreboard wait must not occur in the first two instructions of * a fragment shader. This is either the explicit Wait for Scoreboard * signal or an implicit wait with the first tile-buffer read or * write instruction." */ for (int i = 0; i < 2; i++) { uint64_t inst = insts[i]; if (qpu_inst_is_tlb(inst)) fail_instr(inst, "sb wait in first two insts"); } /* "If TMU_NOSWAP is written, the write must be three instructions * before the first TMU write instruction. For example, if * TMU_NOSWAP is written in the first shader instruction, the first * TMU write cannot occur before the 4th shader instruction." */ int last_tmu_noswap = -10; for (int i = 0; i < num_inst; i++) { uint64_t inst = insts[i]; if ((i - last_tmu_noswap) <= 3 && (writes_reg(inst, QPU_W_TMU0_S) || writes_reg(inst, QPU_W_TMU1_S))) { fail_instr(inst, "TMU write too soon after TMU_NOSWAP"); } if (writes_reg(inst, QPU_W_TMU_NOSWAP)) last_tmu_noswap = i; } /* "An instruction must not read from a location in physical regfile A * or B that was written to by the previous instruction." */ for (int i = 0; i < num_inst - 1; i++) { uint64_t inst = insts[i]; uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD); uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL); uint32_t waddr_a, waddr_b; if (inst & QPU_WS) { waddr_b = add_waddr; waddr_a = mul_waddr; } else { waddr_a = add_waddr; waddr_b = mul_waddr; } if ((waddr_a < 32 && reads_a_reg(insts[i + 1], waddr_a)) || (waddr_b < 32 && reads_b_reg(insts[i + 1], waddr_b))) { fail_instr(insts[i + 1], "Reads physical reg too soon after write"); } } /* "After an SFU lookup instruction, accumulator r4 must not be read * in the following two instructions. Any other instruction that * results in r4 being written (that is, TMU read, TLB read, SFU * lookup) cannot occur in the two instructions following an SFU * lookup." */ int last_sfu_inst = -10; for (int i = 0; i < num_inst - 1; i++) { uint64_t inst = insts[i]; uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); if (i - last_sfu_inst <= 2 && (writes_sfu(inst) || sig == QPU_SIG_LOAD_TMU0 || sig == QPU_SIG_LOAD_TMU1 || sig == QPU_SIG_COLOR_LOAD)) { fail_instr(inst, "R4 write too soon after SFU write"); } if (writes_sfu(inst)) last_sfu_inst = i; } for (int i = 0; i < num_inst - 1; i++) { uint64_t inst = insts[i]; if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM && QPU_GET_FIELD(inst, QPU_SMALL_IMM) >= QPU_SMALL_IMM_MUL_ROT) { uint32_t mux_a = QPU_GET_FIELD(inst, QPU_MUL_A); uint32_t mux_b = QPU_GET_FIELD(inst, QPU_MUL_B); /* "The full horizontal vector rotate is only * available when both of the mul ALU input arguments * are taken from accumulators r0-r3." */ if (mux_a > QPU_MUX_R3 || mux_b > QPU_MUX_R3) { fail_instr(inst, "MUL rotate using non-accumulator " "input"); } if (QPU_GET_FIELD(inst, QPU_SMALL_IMM) == QPU_SMALL_IMM_MUL_ROT) { /* "An instruction that does a vector rotate * by r5 must not immediately follow an * instruction that writes to r5." */ if (writes_reg(insts[i - 1], QPU_W_ACC5)) { fail_instr(inst, "vector rotate by r5 " "immediately after r5 write"); } } /* "An instruction that does a vector rotate must not * immediately follow an instruction that writes to the * accumulator that is being rotated." */ if (writes_reg(insts[i - 1], QPU_W_ACC0 + mux_a) || writes_reg(insts[i - 1], QPU_W_ACC0 + mux_b)) { fail_instr(inst, "vector rotate of value " "written in previous instruction"); } } } /* "An instruction that does a vector rotate must not immediately * follow an instruction that writes to the accumulator that is being * rotated. * * XXX: TODO. */ /* "After an instruction that does a TLB Z write, the multisample mask * must not be read as an instruction input argument in the following * two instruction. The TLB Z write instruction can, however, be * followed immediately by a TLB color write." */ for (int i = 0; i < num_inst - 1; i++) { uint64_t inst = insts[i]; if (writes_reg(inst, QPU_W_TLB_Z) && (reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS) || reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS))) { fail_instr(inst, "TLB Z write followed by MS mask read"); } } /* * "A single instruction can only perform a maximum of one of the * following closely coupled peripheral accesses in a single * instruction: TMU write, TMU read, TLB write, TLB read, TLB * combined color read and write, SFU write, Mutex read or Semaphore * access." */ for (int i = 0; i < num_inst - 1; i++) { uint64_t inst = insts[i]; if (qpu_num_sf_accesses(inst) > 1) fail_instr(inst, "Single instruction writes SFU twice"); } /* "The uniform base pointer can be written (from SIMD element 0) by * the processor to reset the stream, there must be at least two * nonuniform-accessing instructions following a pointer change * before uniforms can be accessed once more." */ int last_unif_pointer_update = -3; for (int i = 0; i < num_inst; i++) { uint64_t inst = insts[i]; uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); if (reads_reg(inst, QPU_R_UNIF) && i - last_unif_pointer_update <= 2) { fail_instr(inst, "uniform read too soon after pointer update"); } if (waddr_add == QPU_W_UNIFORMS_ADDRESS || waddr_mul == QPU_W_UNIFORMS_ADDRESS) last_unif_pointer_update = i; } }