static bool
check_instruction_reads(uint64_t inst,
			struct vc4_validated_shader_info *validated_shader)
{
	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);

	if (raddr_a == QPU_R_UNIF ||
	    raddr_b == QPU_R_UNIF) {
		if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul)) {
			DRM_ERROR("uniform read in the same instruction as "
				  "texture setup");
			return false;
		}

		/* This can't overflow the uint32_t, because we're reading 8
		 * bytes of instruction to increment by 4 here, so we'd
		 * already be OOM.
		 */
		validated_shader->uniforms_size += 4;
	}

	return true;
}
Esempio n. 2
0
void
vc4_qpu_disasm(const uint64_t *instructions, int num_instructions)
{
        for (int i = 0; i < num_instructions; i++) {
                uint64_t inst = instructions[i];
                uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);

                switch (sig) {
                case QPU_SIG_BRANCH:
                        fprintf(stderr, "branch");
                        vc4_qpu_disasm_cond_branch(stderr,
                                                   QPU_GET_FIELD(inst,
                                                                 QPU_BRANCH_COND));

                        fprintf(stderr, " %d", (uint32_t)inst);
                        break;

                case QPU_SIG_LOAD_IMM:
                        print_load_imm(inst);
                        break;
                default:
                        if (sig != QPU_SIG_NONE)
                                fprintf(stderr, "%s ", DESC(qpu_sig, sig));
                        print_add_op(inst);
                        fprintf(stderr, " ; ");
                        print_mul_op(inst);
                        break;
                }

                if (num_instructions != 1)
                        fprintf(stderr, "\n");
        }
}
Esempio n. 3
0
static bool
writes_a_file(uint64_t inst)
{
        if (!(inst & QPU_WS))
                return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32;
        else
                return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32;
}
Esempio n. 4
0
static bool
reads_r4(uint64_t inst)
{
        return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 ||
                QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 ||
                QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 ||
                QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4);
}
Esempio n. 5
0
bool
qpu_inst_is_tlb(uint64_t inst)
{
        uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);

        return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
                qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||
                sig == QPU_SIG_COLOR_LOAD ||
                sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
}
Esempio n. 6
0
static uint64_t
set_src_raddr(uint64_t inst, struct qpu_reg src)
{
        if (src.mux == QPU_MUX_A) {
                assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP ||
                       QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr);
                return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A);
        }

        if (src.mux == QPU_MUX_B) {
                assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
                        QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) &&
                       QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM);
                return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B);
        }

        if (src.mux == QPU_MUX_SMALL_IMM) {
                if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
                        assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
                } else {
                        inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM);
                        assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP);
                }
                return ((inst & ~QPU_RADDR_B_MASK) |
                        QPU_SET_FIELD(src.addr, QPU_RADDR_B));
        }

        return inst;
}
Esempio n. 7
0
static bool
_reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b)
{
        struct {
                uint32_t mux, addr;
        } src_regs[] = {
                { QPU_GET_FIELD(inst, QPU_ADD_A) },
                { QPU_GET_FIELD(inst, QPU_ADD_B) },
                { QPU_GET_FIELD(inst, QPU_MUL_A) },
                { QPU_GET_FIELD(inst, QPU_MUL_B) },
        };

        /* Branches only reference raddr_a (no mux), and we don't use that
         * feature of branching.
         */
        if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_BRANCH)
                return false;

        for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
                if (!ignore_a &&
                    src_regs[i].mux == QPU_MUX_A &&
                    (QPU_GET_FIELD(inst, QPU_RADDR_A) == r))
                        return true;

                if (!ignore_b &&
                    QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM &&
                    src_regs[i].mux == QPU_MUX_B &&
                    (QPU_GET_FIELD(inst, QPU_RADDR_B) == r))
                        return true;
        }

        return false;
}
static bool
check_instruction_writes(uint64_t inst,
			 struct vc4_validated_shader_info *validated_shader,
			 struct vc4_shader_validation_state *validation_state)
{
	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);

	if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
		DRM_ERROR("ADD and MUL both set up textures\n");
		return false;
	}

	return (check_register_write(validated_shader, validation_state, waddr_add) &&
		check_register_write(validated_shader, validation_state, waddr_mul));
}
Esempio n. 9
0
static uint32_t
raddr_add_a_to_live_reg_index(uint64_t inst)
{
	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);

	if (add_a == QPU_MUX_A)
		return raddr_a;
	else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
		return 32 + raddr_b;
	else if (add_a <= QPU_MUX_R3)
		return 64 + add_a;
	else
		return ~0;
}
Esempio n. 10
0
static void
print_mul_op(uint64_t inst)
{
        uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
        uint32_t op_mul = QPU_GET_FIELD(inst, QPU_OP_MUL);
        uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_MUL);
        bool is_mov = (op_mul == QPU_M_V8MIN &&
                       QPU_GET_FIELD(inst, QPU_MUL_A) ==
                       QPU_GET_FIELD(inst, QPU_MUL_B));

        fprintf(stderr, "%s%s%s ",
                is_mov ? "mov" : DESC(qpu_mul_opcodes, op_mul),
                ((inst & QPU_SF) && op_add == QPU_A_NOP) ? ".sf" : "",
                op_mul != QPU_M_NOP ? DESC(qpu_condflags, cond) : "");

        print_alu_dst(inst, true);
        fprintf(stderr, ", ");

        print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_A));

        if (!is_mov) {
                fprintf(stderr, ", ");
                print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_B));
        }
}
Esempio n. 11
0
static void
print_mul_op(uint64_t inst)
{
        uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
        uint32_t op_mul = QPU_GET_FIELD(inst, QPU_OP_MUL);
        uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_MUL);
        bool is_mov = (op_mul == QPU_M_V8MIN &&
                       QPU_GET_FIELD(inst, QPU_MUL_A) ==
                       QPU_GET_FIELD(inst, QPU_MUL_B));

        if (is_mov)
                fprintf(stderr, "mov");
        else
                fprintf(stderr, "%s", DESC(qpu_mul_opcodes, op_mul));

        if ((inst & QPU_SF) && op_add == QPU_A_NOP)
                fprintf(stderr, ".sf");

        if (op_mul != QPU_M_NOP)
                vc4_qpu_disasm_cond(stderr, cond);

        fprintf(stderr, " ");
        print_alu_dst(inst, true);
        fprintf(stderr, ", ");

        print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_A), true);

        if (!is_mov) {
                fprintf(stderr, ", ");
                print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_B), true);
        }
}
Esempio n. 12
0
static void
print_load_imm(uint64_t inst)
{
        uint32_t imm = inst;
        uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
        uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
        uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
        uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);

        fprintf(stderr, "load_imm ");
        print_alu_dst(inst, false);
        fprintf(stderr, "%s, ", (waddr_add != QPU_W_NOP ?
                                 DESC(qpu_condflags, cond_add) : ""));
        print_alu_dst(inst, true);
        fprintf(stderr, "%s, ", (waddr_mul != QPU_W_NOP ?
                                 DESC(qpu_condflags, cond_mul) : ""));
        fprintf(stderr, "0x%08x (%f)", imm, uif(imm));
}
Esempio n. 13
0
static bool
check_instruction_reads(uint64_t inst,
			struct vc4_validated_shader_info *validated_shader)
{
	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);

	if (raddr_a == QPU_R_UNIF ||
	    (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
		/* This can't overflow the uint32_t, because we're reading 8
		 * bytes of instruction to increment by 4 here, so we'd
		 * already be OOM.
		 */
		validated_shader->uniforms_size += 4;
	}

	return true;
}
Esempio n. 14
0
static void
print_alu_src(uint64_t inst, uint32_t mux, bool is_mul)
{
        bool is_a = mux != QPU_MUX_B;
        const char *file = is_a ? "a" : "b";
        uint32_t raddr = (is_a ?
                          QPU_GET_FIELD(inst, QPU_RADDR_A) :
                          QPU_GET_FIELD(inst, QPU_RADDR_B));
        uint32_t unpack = QPU_GET_FIELD(inst, QPU_UNPACK);
        bool has_si = QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM;
        uint32_t si = QPU_GET_FIELD(inst, QPU_SMALL_IMM);

        if (mux <= QPU_MUX_R5) {
                fprintf(stderr, "r%d", mux);
                if (has_si && is_mul && si >= QPU_SMALL_IMM_MUL_ROT + 1)
                        fprintf(stderr, "+%d", si - QPU_SMALL_IMM_MUL_ROT);
        } else if (!is_a && has_si) {
                if (si <= 15)
                        fprintf(stderr, "%d", si);
                else if (si <= 31)
                        fprintf(stderr, "%d", -16 + (si - 16));
                else if (si <= 39)
                        fprintf(stderr, "%.1f", (float)(1 << (si - 32)));
                else if (si <= 47)
                        fprintf(stderr, "%f", 1.0f / (1 << (48 - si)));
                else
                        fprintf(stderr, "<bad imm %d>", si);
        } else if (raddr <= 31)
                fprintf(stderr, "r%s%d", file, raddr);
        else {
                if (is_a)
                        fprintf(stderr, "%s", DESC(special_read_a, raddr - 32));
                else
                        fprintf(stderr, "%s", DESC(special_read_b, raddr - 32));
        }

        if (((mux == QPU_MUX_A && !(inst & QPU_PM)) ||
             (mux == QPU_MUX_R4 && (inst & QPU_PM)))) {
                vc4_qpu_disasm_unpack(stderr, unpack);
        }
}
static void
print_alu_src(uint64_t inst, uint32_t mux)
{
        bool is_a = mux != QPU_MUX_B;
        const char *file = is_a ? "a" : "b";
        uint32_t raddr = (is_a ?
                          QPU_GET_FIELD(inst, QPU_RADDR_A) :
                          QPU_GET_FIELD(inst, QPU_RADDR_B));
        uint32_t unpack = QPU_GET_FIELD(inst, QPU_UNPACK);

        if (mux <= QPU_MUX_R5)
                fprintf(stderr, "r%d", mux);
        else if (!is_a &&
                 QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
                uint32_t si = QPU_GET_FIELD(inst, QPU_SMALL_IMM);
                if (si <= 15)
                        fprintf(stderr, "%d", si);
                else if (si <= 31)
                        fprintf(stderr, "%d", -16 + (si - 16));
                else if (si <= 39)
                        fprintf(stderr, "%.1f", (float)(1 << (si - 32)));
                else if (si <= 47)
                        fprintf(stderr, "%f", 1.0f / (256 / (si - 39)));
                else
                        fprintf(stderr, "???");
        } else if (raddr <= 31)
                fprintf(stderr, "r%s%d", file, raddr);
        else {
                if (is_a)
                        fprintf(stderr, "%s", DESC(special_read_a, raddr - 32));
                else
                        fprintf(stderr, "%s", DESC(special_read_b, raddr - 32));
        }

        if (unpack != QPU_UNPACK_NOP &&
            ((mux == QPU_MUX_A && !(inst & QPU_PM)) ||
             (mux == QPU_MUX_R4 && (inst & QPU_PM)))) {
                fprintf(stderr, ".%s", DESC(qpu_unpack, unpack));
        }
}
Esempio n. 16
0
static void
print_add_op(uint64_t inst)
{
        uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
        uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_ADD);
        bool is_mov = (op_add == QPU_A_OR &&
                       QPU_GET_FIELD(inst, QPU_ADD_A) ==
                       QPU_GET_FIELD(inst, QPU_ADD_B));

        fprintf(stderr, "%s%s%s ",
                is_mov ? "mov" : DESC(qpu_add_opcodes, op_add),
                ((inst & QPU_SF) && op_add != QPU_A_NOP) ? ".sf" : "",
                op_add != QPU_A_NOP ? DESC(qpu_condflags, cond) : "");

        print_alu_dst(inst, false);
        fprintf(stderr, ", ");

        print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_A));

        if (!is_mov) {
                fprintf(stderr, ", ");

                print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_B));
        }
}
Esempio n. 17
0
static bool
_reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b)
{
        struct {
                uint32_t mux, addr;
        } src_regs[] = {
                { QPU_GET_FIELD(inst, QPU_ADD_A) },
                { QPU_GET_FIELD(inst, QPU_ADD_B) },
                { QPU_GET_FIELD(inst, QPU_MUL_A) },
                { QPU_GET_FIELD(inst, QPU_MUL_B) },
        };

        for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
                if (!ignore_a &&
                    src_regs[i].mux == QPU_MUX_A &&
                    (QPU_GET_FIELD(inst, QPU_RADDR_A) == r))
                        return true;

                if (!ignore_b &&
                    src_regs[i].mux == QPU_MUX_B &&
                    (QPU_GET_FIELD(inst, QPU_RADDR_B) == r))
                        return true;
        }

        return false;
}
Esempio n. 18
0
static void
print_add_op(uint64_t inst)
{
        uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
        uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_ADD);
        bool is_mov = (op_add == QPU_A_OR &&
                       QPU_GET_FIELD(inst, QPU_ADD_A) ==
                       QPU_GET_FIELD(inst, QPU_ADD_B));

        if (is_mov)
                fprintf(stderr, "mov");
        else
                fprintf(stderr, "%s", DESC(qpu_add_opcodes, op_add));

        if ((inst & QPU_SF) && op_add != QPU_A_NOP)
                fprintf(stderr, ".sf");

        if (op_add != QPU_A_NOP)
                vc4_qpu_disasm_cond(stderr, cond);

        fprintf(stderr, " ");
        print_alu_dst(inst, false);
        fprintf(stderr, ", ");

        print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_A), false);

        if (!is_mov) {
                fprintf(stderr, ", ");

                print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_B), false);
        }
}
Esempio n. 19
0
static bool
try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
{
        uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);
        uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);
        uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);
        uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);

        if (raddr_a_b != QPU_R_NOP)
                return false;

        switch (raddr_a_a) {
        case QPU_R_UNIF:
        case QPU_R_VARY:
                break;
        default:
                return false;
        }

        if (!(*merge & QPU_PM) &&
            QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) {
                return false;
        }

        if (raddr_b_b != QPU_R_NOP &&
            raddr_b_b != raddr_a_a)
                return false;

        /* Move raddr A to B in instruction a. */
        *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
        *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);
        *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A);
        *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B);
        swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);
        swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);
        swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);
        swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);

        return true;
}
Esempio n. 20
0
static void
print_alu_dst(uint64_t inst, bool is_mul)
{
        bool is_a = is_mul == ((inst & QPU_WS) != 0);
        uint32_t waddr = (is_mul ?
                          QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
                          QPU_GET_FIELD(inst, QPU_WADDR_ADD));
        const char *file = is_a ? "a" : "b";
        uint32_t pack = QPU_GET_FIELD(inst, QPU_PACK);

        if (waddr <= 31)
                fprintf(stderr, "r%s%d", file, waddr);
        else if (get_special_write_desc(waddr, is_a))
                fprintf(stderr, "%s", get_special_write_desc(waddr, is_a));
        else
                fprintf(stderr, "%s%d?", file, waddr);

        if (is_mul && (inst & QPU_PM)) {
                vc4_qpu_disasm_pack_mul(stderr, pack);
        } else if (is_a && !(inst & QPU_PM)) {
                vc4_qpu_disasm_pack_a(stderr, pack);
        }
}
Esempio n. 21
0
static bool
check_instruction_writes(uint64_t inst,
			 struct vc4_validated_shader_info *validated_shader,
			 struct vc4_shader_validation_state *validation_state)
{
	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
	bool ok;

	if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
		DRM_ERROR("ADD and MUL both set up textures\n");
		return false;
	}

	ok = (check_reg_write(inst, validated_shader, validation_state,
			      false) &&
	      check_reg_write(inst, validated_shader, validation_state,
			      true));

	track_live_clamps(inst, validated_shader, validation_state);

	return ok;
}
Esempio n. 22
0
static void
print_load_imm(uint64_t inst)
{
        uint32_t imm = inst;
        uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
        uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
        uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
        uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);

        fprintf(stderr, "load_imm ");

        print_alu_dst(inst, false);
        if (waddr_add != QPU_W_NOP)
                vc4_qpu_disasm_cond(stderr, cond_add);
        fprintf(stderr, ", ");

        print_alu_dst(inst, true);
        if (waddr_mul != QPU_W_NOP)
                vc4_qpu_disasm_cond(stderr, cond_mul);
        fprintf(stderr, ", ");

        fprintf(stderr, "0x%08x (%f)", imm, uif(imm));
}
Esempio n. 23
0
static void
parse_shaders(void)
{
        list_for_each_entry(struct vc4_mem_area_rec, rec, &dump.mem_areas,
                            link) {
                const char *type = NULL;

                switch (rec->type) {
                case VC4_MEM_AREA_CS:
                        type = "CS";
                        break;
                case VC4_MEM_AREA_VS:
                        type = "VS";
                        break;
                case VC4_MEM_AREA_FS:
                        type = "FS";
                        break;
                default:
                        continue;
                }

                printf("%s at 0x%08x:\n", type, rec->paddr);

                if (!rec->addr) {
                        printf("    No mapping found\n");
                        continue;
                }

                uint32_t end_offset = ~0;
                for (uint32_t offset = 0;
                     offset < end_offset;
                     offset += sizeof(uint64_t)) {
                        uint64_t inst = *(uint64_t *)(rec->addr + offset);

                        printf("0x%08x: ", rec->paddr + offset);
                        vc4_qpu_disasm(stdout, &inst, 1);
                        printf("\n");

                        if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_PROG_END) {
                                /* Parse two more instructions (the delay
                                 * slots), then stop.
                                 */
                                end_offset = offset + 12;
                        }
                }
                printf("\n");
        }
}
Esempio n. 24
0
int
qpu_num_sf_accesses(uint64_t inst)
{
        int accesses = 0;
        static const uint32_t specials[] = {
                QPU_W_TLB_COLOR_MS,
                QPU_W_TLB_COLOR_ALL,
                QPU_W_TLB_Z,
                QPU_W_TMU0_S,
                QPU_W_TMU0_T,
                QPU_W_TMU0_R,
                QPU_W_TMU0_B,
                QPU_W_TMU1_S,
                QPU_W_TMU1_T,
                QPU_W_TMU1_R,
                QPU_W_TMU1_B,
                QPU_W_SFU_RECIP,
                QPU_W_SFU_RECIPSQRT,
                QPU_W_SFU_EXP,
                QPU_W_SFU_LOG,
        };
        uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
        uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
        uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
        uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);

        for (int j = 0; j < ARRAY_SIZE(specials); j++) {
                if (waddr_add == specials[j])
                        accesses++;
                if (waddr_mul == specials[j])
                        accesses++;
        }

        if (raddr_a == QPU_R_MUTEX_ACQUIRE)
                accesses++;
        if (raddr_b == QPU_R_MUTEX_ACQUIRE &&
            QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM)
                accesses++;

        /* XXX: semaphore, combined color read/write? */
        switch (QPU_GET_FIELD(inst, QPU_SIG)) {
        case QPU_SIG_COLOR_LOAD:
        case QPU_SIG_COLOR_LOAD_END:
        case QPU_SIG_LOAD_TMU0:
        case QPU_SIG_LOAD_TMU1:
                accesses++;
        }

        return accesses;
}
Esempio n. 25
0
static bool
convert_mov(uint64_t *inst)
{
        uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A);
        uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD);
        uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD);

        /* Is it a MOV? */
        if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR ||
            (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) {
                return false;
        }

        if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE)
                return false;

        /* We could maybe support this in the .8888 and .8a-.8d cases. */
        if (*inst & QPU_PM)
                return false;

        *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD);
        *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL);

        *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A);
        *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B);
        *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A);
        *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B);

        *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL);
        *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD);

        *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL);
        *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD);

        if (!qpu_waddr_ignores_ws(waddr_add))
                *inst ^= QPU_WS;

        return true;
}
Esempio n. 26
0
void
vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
{
        struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c);
        bool discard = false;
        uint32_t inputs_remaining = c->num_inputs;
        uint32_t vpm_read_fifo_count = 0;
        uint32_t vpm_read_offset = 0;
        int last_vpm_read_index = -1;
        /* Map from the QIR ops enum order to QPU unpack bits. */
        static const uint32_t unpack_map[] = {
                QPU_UNPACK_8A,
                QPU_UNPACK_8B,
                QPU_UNPACK_8C,
                QPU_UNPACK_8D,
                QPU_UNPACK_16A_TO_F32,
                QPU_UNPACK_16B_TO_F32,
        };

        list_inithead(&c->qpu_inst_list);

        switch (c->stage) {
        case QSTAGE_VERT:
        case QSTAGE_COORD:
                /* There's a 4-entry FIFO for VPMVCD reads, each of which can
                 * load up to 16 dwords (4 vec4s) per vertex.
                 */
                while (inputs_remaining) {
                        uint32_t num_entries = MIN2(inputs_remaining, 16);
                        queue(c, qpu_load_imm_ui(qpu_vrsetup(),
                                                 vpm_read_offset |
                                                 0x00001a00 |
                                                 ((num_entries & 0xf) << 20)));
                        inputs_remaining -= num_entries;
                        vpm_read_offset += num_entries;
                        vpm_read_fifo_count++;
                }
                assert(vpm_read_fifo_count <= 4);

                queue(c, qpu_load_imm_ui(qpu_vwsetup(), 0x00001a00));
                break;
        case QSTAGE_FRAG:
                break;
        }

        list_for_each_entry(struct qinst, qinst, &c->instructions, link) {
#if 0
                fprintf(stderr, "translating qinst to qpu: ");
                qir_dump_inst(qinst);
                fprintf(stderr, "\n");
#endif

                static const struct {
                        uint32_t op;
                } translate[] = {
#define A(name) [QOP_##name] = {QPU_A_##name}
#define M(name) [QOP_##name] = {QPU_M_##name}
                        A(FADD),
                        A(FSUB),
                        A(FMIN),
                        A(FMAX),
                        A(FMINABS),
                        A(FMAXABS),
                        A(FTOI),
                        A(ITOF),
                        A(ADD),
                        A(SUB),
                        A(SHL),
                        A(SHR),
                        A(ASR),
                        A(MIN),
                        A(MAX),
                        A(AND),
                        A(OR),
                        A(XOR),
                        A(NOT),

                        M(FMUL),
                        M(MUL24),
                };

                struct qpu_reg src[4];
                for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
                        int index = qinst->src[i].index;
                        switch (qinst->src[i].file) {
                        case QFILE_NULL:
                                src[i] = qpu_rn(0);
                                break;
                        case QFILE_TEMP:
                                src[i] = temp_registers[index];
                                break;
                        case QFILE_UNIF:
                                src[i] = qpu_unif();
                                break;
                        case QFILE_VARY:
                                src[i] = qpu_vary();
                                break;
                        case QFILE_SMALL_IMM:
                                src[i].mux = QPU_MUX_SMALL_IMM;
                                src[i].addr = qpu_encode_small_immediate(qinst->src[i].index);
                                /* This should only have returned a valid
                                 * small immediate field, not ~0 for failure.
                                 */
                                assert(src[i].addr <= 47);
                                break;
                        case QFILE_VPM:
                                assert((int)qinst->src[i].index >=
                                       last_vpm_read_index);
                                (void)last_vpm_read_index;
                                last_vpm_read_index = qinst->src[i].index;
                                src[i] = qpu_ra(QPU_R_VPM);
                                break;
                        }
                }

                struct qpu_reg dst;
                switch (qinst->dst.file) {
                case QFILE_NULL:
                        dst = qpu_ra(QPU_W_NOP);
                        break;
                case QFILE_TEMP:
                        dst = temp_registers[qinst->dst.index];
                        break;
                case QFILE_VPM:
                        dst = qpu_ra(QPU_W_VPM);
                        break;
                case QFILE_VARY:
                case QFILE_UNIF:
                case QFILE_SMALL_IMM:
                        assert(!"not reached");
                        break;
                }

                switch (qinst->op) {
                case QOP_MOV:
                        /* Skip emitting the MOV if it's a no-op. */
                        if (dst.mux == QPU_MUX_A || dst.mux == QPU_MUX_B ||
                            dst.mux != src[0].mux || dst.addr != src[0].addr) {
                                queue(c, qpu_a_MOV(dst, src[0]));
                        }
                        break;

                case QOP_SEL_X_0_ZS:
                case QOP_SEL_X_0_ZC:
                case QOP_SEL_X_0_NS:
                case QOP_SEL_X_0_NC:
                case QOP_SEL_X_0_CS:
                case QOP_SEL_X_0_CC:
                        queue(c, qpu_a_MOV(dst, src[0]));
                        set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
                                          QPU_COND_ZS);

                        queue(c, qpu_a_XOR(dst, qpu_r0(), qpu_r0()));
                        set_last_cond_add(c, ((qinst->op - QOP_SEL_X_0_ZS) ^
                                              1) + QPU_COND_ZS);
                        break;

                case QOP_SEL_X_Y_ZS:
                case QOP_SEL_X_Y_ZC:
                case QOP_SEL_X_Y_NS:
                case QOP_SEL_X_Y_NC:
                case QOP_SEL_X_Y_CS:
                case QOP_SEL_X_Y_CC:
                        queue(c, qpu_a_MOV(dst, src[0]));
                        set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS +
                                          QPU_COND_ZS);

                        queue(c, qpu_a_MOV(dst, src[1]));
                        set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^
                                              1) + QPU_COND_ZS);

                        break;

                case QOP_RCP:
                case QOP_RSQ:
                case QOP_EXP2:
                case QOP_LOG2:
                        switch (qinst->op) {
                        case QOP_RCP:
                                queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIP),
                                                   src[0]));
                                break;
                        case QOP_RSQ:
                                queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIPSQRT),
                                                   src[0]));
                                break;
                        case QOP_EXP2:
                                queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_EXP),
                                                   src[0]));
                                break;
                        case QOP_LOG2:
                                queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_LOG),
                                                   src[0]));
                                break;
                        default:
                                abort();
                        }

                        if (dst.mux != QPU_MUX_R4)
                                queue(c, qpu_a_MOV(dst, qpu_r4()));

                        break;

                case QOP_PACK_8888_F:
                        queue(c, qpu_m_MOV(dst, src[0]));
                        *last_inst(c) |= QPU_PM;
                        *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888,
                                                       QPU_PACK);
                        break;

                case QOP_PACK_8A_F:
                case QOP_PACK_8B_F:
                case QOP_PACK_8C_F:
                case QOP_PACK_8D_F:
                        queue(c,
                              qpu_m_MOV(dst, src[0]) |
                              QPU_PM |
                              QPU_SET_FIELD(QPU_PACK_MUL_8A +
                                            qinst->op - QOP_PACK_8A_F,
                                            QPU_PACK));
                        break;

                case QOP_FRAG_X:
                        queue(c, qpu_a_ITOF(dst,
                                            qpu_ra(QPU_R_XY_PIXEL_COORD)));
                        break;

                case QOP_FRAG_Y:
                        queue(c, qpu_a_ITOF(dst,
                                            qpu_rb(QPU_R_XY_PIXEL_COORD)));
                        break;

                case QOP_FRAG_REV_FLAG:
                        queue(c, qpu_a_ITOF(dst,
                                            qpu_rb(QPU_R_MS_REV_FLAGS)));
                        break;

                case QOP_FRAG_Z:
                case QOP_FRAG_W:
                        /* QOP_FRAG_Z/W don't emit instructions, just allocate
                         * the register to the Z/W payload.
                         */
                        break;

                case QOP_TLB_DISCARD_SETUP:
                        discard = true;
                        queue(c, qpu_a_MOV(src[0], src[0]));
                        *last_inst(c) |= QPU_SF;
                        break;

                case QOP_TLB_STENCIL_SETUP:
                        queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), src[0]));
                        break;

                case QOP_TLB_Z_WRITE:
                        queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0]));
                        if (discard) {
                                set_last_cond_add(c, QPU_COND_ZS);
                        }
                        break;

                case QOP_TLB_COLOR_READ:
                        queue(c, qpu_NOP());
                        *last_inst(c) = qpu_set_sig(*last_inst(c),
                                                    QPU_SIG_COLOR_LOAD);

                        if (dst.mux != QPU_MUX_R4)
                                queue(c, qpu_a_MOV(dst, qpu_r4()));
                        break;

                case QOP_TLB_COLOR_WRITE:
                        queue(c, qpu_a_MOV(qpu_tlbc(), src[0]));
                        if (discard) {
                                set_last_cond_add(c, QPU_COND_ZS);
                        }
                        break;

                case QOP_VARY_ADD_C:
                        queue(c, qpu_a_FADD(dst, src[0], qpu_r5()));
                        break;

                case QOP_TEX_S:
                case QOP_TEX_T:
                case QOP_TEX_R:
                case QOP_TEX_B:
                        queue(c, qpu_a_MOV(qpu_rb(QPU_W_TMU0_S +
                                                  (qinst->op - QOP_TEX_S)),
                                           src[0]));
                        break;

                case QOP_TEX_DIRECT:
                        fixup_raddr_conflict(c, dst, &src[0], &src[1]);
                        queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1]));
                        break;

                case QOP_TEX_RESULT:
                        queue(c, qpu_NOP());
                        *last_inst(c) = qpu_set_sig(*last_inst(c),
                                                    QPU_SIG_LOAD_TMU0);
                        if (dst.mux != QPU_MUX_R4)
                                queue(c, qpu_a_MOV(dst, qpu_r4()));
                        break;

                case QOP_UNPACK_8A_F:
                case QOP_UNPACK_8B_F:
                case QOP_UNPACK_8C_F:
                case QOP_UNPACK_8D_F:
                case QOP_UNPACK_16A_F:
                case QOP_UNPACK_16B_F: {
                        if (src[0].mux == QPU_MUX_R4) {
                                queue(c, qpu_a_MOV(dst, src[0]));
                                *last_inst(c) |= QPU_PM;
                                *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
                                                               (qinst->op -
                                                                QOP_UNPACK_8A_F),
                                                               QPU_UNPACK);
                        } else {
                                assert(src[0].mux == QPU_MUX_A);

                                /* Since we're setting the pack bits, if the
                                 * destination is in A it would get re-packed.
                                 */
                                queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
                                                     qpu_rb(31) : dst),
                                                    src[0], src[0]));
                                *last_inst(c) |=
                                        QPU_SET_FIELD(unpack_map[qinst->op -
                                                                 QOP_UNPACK_8A_F],
                                                      QPU_UNPACK);

                                if (dst.mux == QPU_MUX_A) {
                                        queue(c, qpu_a_MOV(dst, qpu_rb(31)));
                                }
                        }
                }
                        break;

                case QOP_UNPACK_8A_I:
                case QOP_UNPACK_8B_I:
                case QOP_UNPACK_8C_I:
                case QOP_UNPACK_8D_I:
                case QOP_UNPACK_16A_I:
                case QOP_UNPACK_16B_I: {
                        assert(src[0].mux == QPU_MUX_A);

                        /* Since we're setting the pack bits, if the
                         * destination is in A it would get re-packed.
                         */
                        queue(c, qpu_a_MOV((dst.mux == QPU_MUX_A ?
                                            qpu_rb(31) : dst), src[0]));
                        *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
                                                                  QOP_UNPACK_8A_I],
                                                       QPU_UNPACK);

                        if (dst.mux == QPU_MUX_A) {
                                queue(c, qpu_a_MOV(dst, qpu_rb(31)));
                        }
                }
                        break;

                default:
                        assert(qinst->op < ARRAY_SIZE(translate));
                        assert(translate[qinst->op].op != 0); /* NOPs */

                        /* If we have only one source, put it in the second
                         * argument slot as well so that we don't take up
                         * another raddr just to get unused data.
                         */
                        if (qir_get_op_nsrc(qinst->op) == 1)
                                src[1] = src[0];

                        fixup_raddr_conflict(c, dst, &src[0], &src[1]);

                        if (qir_is_mul(qinst)) {
                                queue(c, qpu_m_alu2(translate[qinst->op].op,
                                                    dst,
                                                    src[0], src[1]));
                                if (qinst->dst.pack) {
                                        *last_inst(c) |= QPU_PM;
                                        *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
                                                                       QPU_PACK);
                                }
                        } else {
                                queue(c, qpu_a_alu2(translate[qinst->op].op,
                                                    dst,
                                                    src[0], src[1]));
                                if (qinst->dst.pack) {
                                        assert(dst.mux == QPU_MUX_A);
                                        *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
                                                                       QPU_PACK);
                                }
                        }

                        break;
                }

                if (qinst->sf) {
                        assert(!qir_is_multi_instruction(qinst));
                        *last_inst(c) |= QPU_SF;
                }
        }

        qpu_schedule_instructions(c);

        /* thread end can't have VPM write or read */
        if (QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_WADDR_ADD) == QPU_W_VPM ||
            QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_WADDR_MUL) == QPU_W_VPM ||
            QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_RADDR_A) == QPU_R_VPM ||
            QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_RADDR_B) == QPU_R_VPM) {
                qpu_serialize_one_inst(c, qpu_NOP());
        }

        /* thread end can't have uniform read */
        if (QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_RADDR_A) == QPU_R_UNIF ||
            QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_RADDR_B) == QPU_R_UNIF) {
                qpu_serialize_one_inst(c, qpu_NOP());
        }

        /* thread end can't have TLB operations */
        if (qpu_inst_is_tlb(c->qpu_insts[c->qpu_inst_count - 1]))
                qpu_serialize_one_inst(c, qpu_NOP());

        c->qpu_insts[c->qpu_inst_count - 1] =
                qpu_set_sig(c->qpu_insts[c->qpu_inst_count - 1],
                            QPU_SIG_PROG_END);
        qpu_serialize_one_inst(c, qpu_NOP());
        qpu_serialize_one_inst(c, qpu_NOP());

        switch (c->stage) {
        case QSTAGE_VERT:
        case QSTAGE_COORD:
                break;
        case QSTAGE_FRAG:
                c->qpu_insts[c->qpu_inst_count - 1] =
                        qpu_set_sig(c->qpu_insts[c->qpu_inst_count - 1],
                                    QPU_SIG_SCOREBOARD_UNLOCK);
                break;
        }

        if (vc4_debug & VC4_DEBUG_QPU)
                vc4_dump_program(c);

        vc4_qpu_validate(c->qpu_insts, c->qpu_inst_count);

        free(temp_registers);
}
Esempio n. 27
0
/**
 * Checks for the instruction restrictions from page 37 ("Summary of
 * Instruction Restrictions").
 */
void
vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
{
        for (int i = 0; i < num_inst; i++) {
                uint64_t inst = insts[i];

                if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END)
                        continue;

                /* "The Thread End instruction must not write to either physical
                 *  regfile A or B."
                 */
                assert(QPU_GET_FIELD(inst, QPU_WADDR_ADD) >= 32);
                assert(QPU_GET_FIELD(inst, QPU_WADDR_MUL) >= 32);

                /* Two delay slots will be executed. */
                assert(i + 2 <= num_inst);

                 for (int j = i; j < i + 2; j++) {
                         /* "The last three instructions of any program
                          *  (Thread End plus the following two delay-slot
                          *  instructions) must not do varyings read, uniforms
                          *  read or any kind of VPM, VDR, or VDW read or
                          *  write."
                          */
                         assert(!writes_reg(insts[j], QPU_W_VPM));
                         assert(!reads_reg(insts[j], QPU_R_VARY));
                         assert(!reads_reg(insts[j], QPU_R_UNIF));
                         assert(!reads_reg(insts[j], QPU_R_VPM));

                         /* "The Thread End instruction and the following two
                          *  delay slot instructions must not write or read
                          *  address 14 in either regfile A or B."
                          */
                         assert(!writes_reg(insts[j], 14));
                         assert(!reads_reg(insts[j], 14));

                 }

                 /* "The final program instruction (the second delay slot
                  *  instruction) must not do a TLB Z write."
                  */
                 assert(!writes_reg(insts[i + 2], QPU_W_TLB_Z));
        }

        /* "A scoreboard wait must not occur in the first two instructions of
         *  a fragment shader. This is either the explicit Wait for Scoreboard
         *  signal or an implicit wait with the first tile-buffer read or
         *  write instruction."
         */
        for (int i = 0; i < 2; i++) {
                uint64_t inst = insts[i];

                assert(QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_COLOR_LOAD);
                assert(QPU_GET_FIELD(inst, QPU_SIG) !=
                       QPU_SIG_WAIT_FOR_SCOREBOARD);
                assert(!writes_reg(inst, QPU_W_TLB_COLOR_MS));
                assert(!writes_reg(inst, QPU_W_TLB_COLOR_ALL));
                assert(!writes_reg(inst, QPU_W_TLB_Z));

        }

        /* "If TMU_NOSWAP is written, the write must be three instructions
         *  before the first TMU write instruction.  For example, if
         *  TMU_NOSWAP is written in the first shader instruction, the first
         *  TMU write cannot occur before the 4th shader instruction."
         */
        int last_tmu_noswap = -10;
        for (int i = 0; i < num_inst; i++) {
                uint64_t inst = insts[i];

                assert((i - last_tmu_noswap) > 3 ||
                       (!writes_reg(inst, QPU_W_TMU0_S) &&
                        !writes_reg(inst, QPU_W_TMU1_S)));

                if (writes_reg(inst, QPU_W_TMU_NOSWAP))
                    last_tmu_noswap = i;
        }

        /* "An instruction must not read from a location in physical regfile A
         *  or B that was written to by the previous instruction."
         */
        for (int i = 0; i < num_inst - 1; i++) {
                uint64_t inst = insts[i];
                uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
                uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
                uint32_t waddr_a, waddr_b;

                if (inst & QPU_WS) {
                        waddr_b = add_waddr;
                        waddr_a = mul_waddr;
                } else {
                        waddr_a = add_waddr;
                        waddr_b = mul_waddr;
                }

                assert(waddr_a >= 32 || !reads_a_reg(insts[i + 1], waddr_a));
                assert(waddr_b >= 32 || !reads_b_reg(insts[i + 1], waddr_b));
        }

        /* "After an SFU lookup instruction, accumulator r4 must not be read
         *  in the following two instructions. Any other instruction that
         *  results in r4 being written (that is, TMU read, TLB read, SFU
         *  lookup) cannot occur in the two instructions following an SFU
         *  lookup."
         */
        int last_sfu_inst = -10;
        for (int i = 0; i < num_inst - 1; i++) {
                uint64_t inst = insts[i];

                assert(i - last_sfu_inst > 2 ||
                       (!writes_sfu(inst) &&
                        !writes_reg(inst, QPU_W_TMU0_S) &&
                        !writes_reg(inst, QPU_W_TMU1_S) &&
                        QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_COLOR_LOAD));

                if (writes_sfu(inst))
                        last_sfu_inst = i;
        }

        int last_r5_write = -10;
        for (int i = 0; i < num_inst - 1; i++) {
                uint64_t inst = insts[i];

                /* "An instruction that does a vector rotate by r5 must not
                 *  immediately follow an instruction that writes to r5."
                 */
                assert(last_r5_write != i - 1 ||
                       QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM ||
                       QPU_GET_FIELD(inst, QPU_SMALL_IMM) != 48);
        }

        /* "An instruction that does a vector rotate must not immediately
         *  follow an instruction that writes to the accumulator that is being
         *  rotated.
         *
         * XXX: TODO.
         */

        /* "After an instruction that does a TLB Z write, the multisample mask
         *  must not be read as an instruction input argument in the following
         *  two instruction. The TLB Z write instruction can, however, be
         *  followed immediately by a TLB color write."
         */
        for (int i = 0; i < num_inst - 1; i++) {
                uint64_t inst = insts[i];
                if (writes_reg(inst, QPU_W_TLB_Z)) {
                        assert(!reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS));
                        assert(!reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS));
                }
        }

        /*
         * "A single instruction can only perform a maximum of one of the
         *  following closely coupled peripheral accesses in a single
         *  instruction: TMU write, TMU read, TLB write, TLB read, TLB
         *  combined color read and write, SFU write, Mutex read or Semaphore
         *  access."
         */
        for (int i = 0; i < num_inst - 1; i++) {
                uint64_t inst = insts[i];
                int accesses = 0;
                static const uint32_t specials[] = {
                        QPU_W_TLB_COLOR_MS,
                        QPU_W_TLB_COLOR_ALL,
                        QPU_W_TLB_Z,
                        QPU_W_TMU0_S,
                        QPU_W_TMU0_T,
                        QPU_W_TMU0_R,
                        QPU_W_TMU0_B,
                        QPU_W_TMU1_S,
                        QPU_W_TMU1_T,
                        QPU_W_TMU1_R,
                        QPU_W_TMU1_B,
                        QPU_W_SFU_RECIP,
                        QPU_W_SFU_RECIPSQRT,
                        QPU_W_SFU_EXP,
                        QPU_W_SFU_LOG,
                };

                for (int j = 0; j < ARRAY_SIZE(specials); j++) {
                        if (writes_reg(inst, specials[j]))
                                accesses++;
                }

                if (reads_reg(inst, QPU_R_MUTEX_ACQUIRE))
                        accesses++;

                /* XXX: semaphore, combined color read/write? */
                switch (QPU_GET_FIELD(inst, QPU_SIG)) {
                case QPU_SIG_COLOR_LOAD:
                case QPU_SIG_COLOR_LOAD_END:
                case QPU_SIG_LOAD_TMU0:
                case QPU_SIG_LOAD_TMU1:
                        accesses++;
                }

                assert(accesses <= 1);
        }
}
Esempio n. 28
0
static bool
writes_reg(uint64_t inst, uint32_t w)
{
        return (QPU_GET_FIELD(inst, QPU_WADDR_ADD) == w ||
                QPU_GET_FIELD(inst, QPU_WADDR_MUL) == w);
}
Esempio n. 29
0
uint64_t
qpu_set_cond_mul(uint64_t inst, uint32_t cond)
{
        assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS);
        return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL);
}
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
		    uint32_t start_offset)
{
	bool found_shader_end = false;
	int shader_end_ip = 0;
	uint32_t ip, max_ip;
	uint64_t *shader;
	struct vc4_validated_shader_info *validated_shader;
	struct vc4_shader_validation_state validation_state;

	memset(&validation_state, 0, sizeof(validation_state));

	if (start_offset + sizeof(uint64_t) > shader_obj->base.size) {
		DRM_ERROR("shader starting at %d outside of BO sized %d\n",
			  start_offset,
			  shader_obj->base.size);
		return NULL;
	}
	shader = shader_obj->vaddr + start_offset;
	max_ip = (shader_obj->base.size - start_offset) / sizeof(uint64_t);

	validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL);
	if (!validated_shader)
		return NULL;

	for (ip = 0; ip < max_ip; ip++) {
		uint64_t inst = shader[ip];
		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);

		switch (sig) {
		case QPU_SIG_NONE:
		case QPU_SIG_WAIT_FOR_SCOREBOARD:
		case QPU_SIG_SCOREBOARD_UNLOCK:
		case QPU_SIG_COLOR_LOAD:
		case QPU_SIG_LOAD_TMU0:
		case QPU_SIG_LOAD_TMU1:
			if (!check_instruction_writes(inst, validated_shader,
						      &validation_state)) {
				DRM_ERROR("Bad write at ip %d\n", ip);
				goto fail;
			}

			if (!check_instruction_reads(inst, validated_shader))
				goto fail;

			break;

		case QPU_SIG_LOAD_IMM:
			if (!check_instruction_writes(inst, validated_shader,
						      &validation_state)) {
				DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
				goto fail;
			}
			break;

		case QPU_SIG_PROG_END:
			found_shader_end = true;
			shader_end_ip = ip;
			break;

		default:
			DRM_ERROR("Unsupported QPU signal %d at "
				  "instruction %d\n", sig, ip);
			goto fail;
		}

		/* There are two delay slots after program end is signaled
		 * that are still executed, then we're finished.
		 */
		if (found_shader_end && ip == shader_end_ip + 2)
			break;
	}

	if (ip == max_ip) {
		DRM_ERROR("shader starting at %d failed to terminate before "
			  "shader BO end at %d\n",
			  start_offset,
			  shader_obj->base.size);
		goto fail;
	}

	/* Again, no chance of integer overflow here because the worst case
	 * scenario is 8 bytes of uniforms plus handles per 8-byte
	 * instruction.
	 */
	validated_shader->uniforms_src_size =
		(validated_shader->uniforms_size +
		 4 * validated_shader->num_texture_samples);

	return validated_shader;

fail:
	kfree(validated_shader);
	return NULL;
}