static bool test_compact_instruction(struct brw_codegen *p, brw_inst src) { brw_compact_inst dst; memset(&dst, 0xd0, sizeof(dst)); if (brw_try_compact_instruction(p->devinfo, &dst, &src)) { brw_inst uncompacted; brw_uncompact_instruction(p->devinfo, &uncompacted, &dst); if (memcmp(&uncompacted, &src, sizeof(src))) { brw_debug_compact_uncompact(p->devinfo, &src, &uncompacted); return false; } } else { brw_compact_inst unchanged; memset(&unchanged, 0xd0, sizeof(unchanged)); /* It's not supposed to change dst unless it compacted. */ if (memcmp(&unchanged, &dst, sizeof(dst))) { fprintf(stderr, "Failed to compact, but dst changed\n"); fprintf(stderr, " Instruction: "); brw_disassemble_inst(stderr, p->devinfo, &src, false); return false; } } return true; }
static bool test_compact_instruction(struct brw_compile *p, struct brw_instruction src) { struct brw_context *brw = p->brw; struct brw_compact_instruction dst; memset(&dst, 0xd0, sizeof(dst)); if (brw_try_compact_instruction(p, &dst, &src)) { struct brw_instruction uncompacted; brw_uncompact_instruction(brw, &uncompacted, &dst); if (memcmp(&uncompacted, &src, sizeof(src))) { brw_debug_compact_uncompact(brw, &src, &uncompacted); return false; } } else { struct brw_compact_instruction unchanged; memset(&unchanged, 0xd0, sizeof(unchanged)); /* It's not supposed to change dst unless it compacted. */ if (memcmp(&unchanged, &dst, sizeof(dst))) { fprintf(stderr, "Failed to compact, but dst changed\n"); fprintf(stderr, " Instruction: "); brw_disassemble_inst(stderr, &src, brw->gen, false); return false; } } return true; }
void brw_disassemble(struct brw_context *brw, void *assembly, int start, int end, FILE *out) { bool dump_hex = false; for (int offset = start; offset < end;) { brw_inst *insn = assembly + offset; brw_inst uncompacted; bool compacted = brw_inst_cmpt_control(brw, insn); if (0) fprintf(out, "0x%08x: ", offset); if (compacted) { brw_compact_inst *compacted = (void *)insn; if (dump_hex) { fprintf(out, "0x%08x 0x%08x ", ((uint32_t *)insn)[1], ((uint32_t *)insn)[0]); } brw_uncompact_instruction(brw, &uncompacted, compacted); insn = &uncompacted; offset += 8; } else { if (dump_hex) { fprintf(out, "0x%08x 0x%08x 0x%08x 0x%08x ", ((uint32_t *)insn)[3], ((uint32_t *)insn)[2], ((uint32_t *)insn)[1], ((uint32_t *)insn)[0]); } offset += 16; } brw_disassemble_inst(out, brw, insn, compacted); } }
void brw_disassemble(const struct brw_device_info *devinfo, void *assembly, int start, int end, FILE *out) { bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0; for (int offset = start; offset < end;) { brw_inst *insn = assembly + offset; brw_inst uncompacted; bool compacted = brw_inst_cmpt_control(devinfo, insn); if (0) fprintf(out, "0x%08x: ", offset); if (compacted) { brw_compact_inst *compacted = (void *)insn; if (dump_hex) { fprintf(out, "0x%08x 0x%08x ", ((uint32_t *)insn)[1], ((uint32_t *)insn)[0]); } brw_uncompact_instruction(devinfo, &uncompacted, compacted); insn = &uncompacted; offset += 8; } else { if (dump_hex) { fprintf(out, "0x%08x 0x%08x 0x%08x 0x%08x ", ((uint32_t *)insn)[3], ((uint32_t *)insn)[2], ((uint32_t *)insn)[1], ((uint32_t *)insn)[0]); } offset += 16; } brw_disassemble_inst(out, devinfo, insn, compacted); } }
void brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end) { struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; void *store = p->store; bool dump_hex = false; for (int offset = start; offset < end;) { struct brw_instruction *insn = store + offset; struct brw_instruction uncompacted; printf("0x%08x: ", offset); if (insn->header.cmpt_control) { struct brw_compact_instruction *compacted = (void *)insn; if (dump_hex) { printf("0x%08x 0x%08x ", ((uint32_t *)insn)[1], ((uint32_t *)insn)[0]); } brw_uncompact_instruction(intel, &uncompacted, compacted); insn = &uncompacted; offset += 8; } else { if (dump_hex) { printf("0x%08x 0x%08x 0x%08x 0x%08x ", ((uint32_t *)insn)[3], ((uint32_t *)insn)[2], ((uint32_t *)insn)[1], ((uint32_t *)insn)[0]); } offset += 16; } brw_disasm(stdout, insn, p->brw->intel.gen); } }
void brw_compact_instructions(struct brw_compile *p) { struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; void *store = p->store; /* For an instruction at byte offset 8*i before compaction, this is the number * of compacted instructions that preceded it. */ int compacted_counts[p->next_insn_offset / 8]; /* For an instruction at byte offset 8*i after compaction, this is the * 8-byte offset it was at before compaction. */ int old_ip[p->next_insn_offset / 8]; if (intel->gen < 6) return; int src_offset; int offset = 0; int compacted_count = 0; for (src_offset = 0; src_offset < p->nr_insn * 16;) { struct brw_instruction *src = store + src_offset; void *dst = store + offset; old_ip[offset / 8] = src_offset / 8; compacted_counts[src_offset / 8] = compacted_count; struct brw_instruction saved = *src; if (!src->header.cmpt_control && brw_try_compact_instruction(p, dst, src)) { compacted_count++; if (INTEL_DEBUG) { struct brw_instruction uncompacted; brw_uncompact_instruction(intel, &uncompacted, dst); if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) { brw_debug_compact_uncompact(intel, &saved, &uncompacted); } } offset += 8; src_offset += 16; } else { int size = src->header.cmpt_control ? 8 : 16; /* It appears that the end of thread SEND instruction needs to be * aligned, or the GPU hangs. */ if ((src->header.opcode == BRW_OPCODE_SEND || src->header.opcode == BRW_OPCODE_SENDC) && src->bits3.generic.end_of_thread && (offset & 8) != 0) { struct brw_compact_instruction *align = store + offset; memset(align, 0, sizeof(*align)); align->dw0.opcode = BRW_OPCODE_NOP; align->dw0.cmpt_ctrl = 1; offset += 8; old_ip[offset / 8] = src_offset / 8; dst = store + offset; } /* If we didn't compact this intruction, we need to move it down into * place. */ if (offset != src_offset) { memmove(dst, src, size); } offset += size; src_offset += size; } } /* Fix up control flow offsets. */ p->next_insn_offset = offset; for (offset = 0; offset < p->next_insn_offset;) { struct brw_instruction *insn = store + offset; int this_old_ip = old_ip[offset / 8]; int this_compacted_count = compacted_counts[this_old_ip]; int target_old_ip, target_compacted_count; switch (insn->header.opcode) { case BRW_OPCODE_BREAK: case BRW_OPCODE_CONTINUE: case BRW_OPCODE_HALT: update_uip_jip(insn, this_old_ip, compacted_counts); break; case BRW_OPCODE_IF: case BRW_OPCODE_ELSE: case BRW_OPCODE_ENDIF: case BRW_OPCODE_WHILE: if (intel->gen == 6) { target_old_ip = this_old_ip + insn->bits1.branch_gen6.jump_count; target_compacted_count = compacted_counts[target_old_ip]; insn->bits1.branch_gen6.jump_count -= (target_compacted_count - this_compacted_count); } else { update_uip_jip(insn, this_old_ip, compacted_counts); } break; } if (insn->header.cmpt_control) { offset += 8; } else { offset += 16; } } /* p->nr_insn is counting the number of uncompacted instructions still, so * divide. We do want to be sure there's a valid instruction in any * alignment padding, so that the next compression pass (for the FS 8/16 * compile passes) parses correctly. */ if (p->next_insn_offset & 8) { struct brw_compact_instruction *align = store + offset; memset(align, 0, sizeof(*align)); align->dw0.opcode = BRW_OPCODE_NOP; align->dw0.cmpt_ctrl = 1; p->next_insn_offset += 8; } p->nr_insn = p->next_insn_offset / 16; if (0) { fprintf(stdout, "dumping compacted program\n"); brw_dump_compile(p, stdout, 0, p->next_insn_offset); int cmp = 0; for (offset = 0; offset < p->next_insn_offset;) { struct brw_instruction *insn = store + offset; if (insn->header.cmpt_control) { offset += 8; cmp++; } else { offset += 16; } } fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8, cmp * 8 * 100 / (offset + cmp * 8)); } }