void vec4_generator::generate_code(exec_list *instructions) { int last_native_insn_offset = 0; const char *last_annotation_string = NULL; const void *last_annotation_ir = NULL; if (unlikely(debug_flag)) { if (shader_prog) { fprintf(stderr, "Native code for %s vertex shader %d:\n", shader_prog->Label ? shader_prog->Label : "unnamed", shader_prog->Name); } else { fprintf(stderr, "Native code for vertex program %d:\n", prog->Id); } } foreach_list(node, instructions) { vec4_instruction *inst = (vec4_instruction *)node; struct brw_reg src[3], dst; if (unlikely(debug_flag)) { if (last_annotation_ir != inst->ir) { last_annotation_ir = inst->ir; if (last_annotation_ir) { fprintf(stderr, " "); if (shader_prog) { ((ir_instruction *) last_annotation_ir)->fprint(stderr); } else { const prog_instruction *vpi; vpi = (const prog_instruction *) inst->ir; fprintf(stderr, "%d: ", (int)(vpi - prog->Instructions)); _mesa_fprint_instruction_opt(stderr, vpi, 0, PROG_PRINT_DEBUG, NULL); } fprintf(stderr, "\n"); } } if (last_annotation_string != inst->annotation) { last_annotation_string = inst->annotation; if (last_annotation_string) fprintf(stderr, " %s\n", last_annotation_string); } } for (unsigned int i = 0; i < 3; i++) { src[i] = inst->get_src(this->prog_data, i); } dst = inst->get_dst(); brw_set_conditionalmod(p, inst->conditional_mod); brw_set_predicate_control(p, inst->predicate); brw_set_predicate_inverse(p, inst->predicate_inverse); brw_set_saturate(p, inst->saturate); brw_set_mask_control(p, inst->force_writemask_all); unsigned pre_emit_nr_insn = p->nr_insn; generate_vec4_instruction(inst, dst, src); if (inst->no_dd_clear || inst->no_dd_check) { assert(p->nr_insn == pre_emit_nr_insn + 1 || !"no_dd_check or no_dd_clear set for IR emitting more " "than 1 instruction"); struct brw_instruction *last = &p->store[pre_emit_nr_insn]; if (inst->no_dd_clear) last->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED; if (inst->no_dd_check) last->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED; } if (unlikely(debug_flag)) { brw_dump_compile(p, stderr, last_native_insn_offset, p->next_insn_offset); } last_native_insn_offset = p->next_insn_offset; }
void brw_compact_instructions(struct brw_compile *p) { struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; void *store = p->store; /* For an instruction at byte offset 8*i before compaction, this is the number * of compacted instructions that preceded it. */ int compacted_counts[p->next_insn_offset / 8]; /* For an instruction at byte offset 8*i after compaction, this is the * 8-byte offset it was at before compaction. */ int old_ip[p->next_insn_offset / 8]; if (intel->gen < 6) return; int src_offset; int offset = 0; int compacted_count = 0; for (src_offset = 0; src_offset < p->nr_insn * 16;) { struct brw_instruction *src = store + src_offset; void *dst = store + offset; old_ip[offset / 8] = src_offset / 8; compacted_counts[src_offset / 8] = compacted_count; struct brw_instruction saved = *src; if (!src->header.cmpt_control && brw_try_compact_instruction(p, dst, src)) { compacted_count++; if (INTEL_DEBUG) { struct brw_instruction uncompacted; brw_uncompact_instruction(intel, &uncompacted, dst); if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) { brw_debug_compact_uncompact(intel, &saved, &uncompacted); } } offset += 8; src_offset += 16; } else { int size = src->header.cmpt_control ? 8 : 16; /* It appears that the end of thread SEND instruction needs to be * aligned, or the GPU hangs. */ if ((src->header.opcode == BRW_OPCODE_SEND || src->header.opcode == BRW_OPCODE_SENDC) && src->bits3.generic.end_of_thread && (offset & 8) != 0) { struct brw_compact_instruction *align = store + offset; memset(align, 0, sizeof(*align)); align->dw0.opcode = BRW_OPCODE_NOP; align->dw0.cmpt_ctrl = 1; offset += 8; old_ip[offset / 8] = src_offset / 8; dst = store + offset; } /* If we didn't compact this intruction, we need to move it down into * place. */ if (offset != src_offset) { memmove(dst, src, size); } offset += size; src_offset += size; } } /* Fix up control flow offsets. */ p->next_insn_offset = offset; for (offset = 0; offset < p->next_insn_offset;) { struct brw_instruction *insn = store + offset; int this_old_ip = old_ip[offset / 8]; int this_compacted_count = compacted_counts[this_old_ip]; int target_old_ip, target_compacted_count; switch (insn->header.opcode) { case BRW_OPCODE_BREAK: case BRW_OPCODE_CONTINUE: case BRW_OPCODE_HALT: update_uip_jip(insn, this_old_ip, compacted_counts); break; case BRW_OPCODE_IF: case BRW_OPCODE_ELSE: case BRW_OPCODE_ENDIF: case BRW_OPCODE_WHILE: if (intel->gen == 6) { target_old_ip = this_old_ip + insn->bits1.branch_gen6.jump_count; target_compacted_count = compacted_counts[target_old_ip]; insn->bits1.branch_gen6.jump_count -= (target_compacted_count - this_compacted_count); } else { update_uip_jip(insn, this_old_ip, compacted_counts); } break; } if (insn->header.cmpt_control) { offset += 8; } else { offset += 16; } } /* p->nr_insn is counting the number of uncompacted instructions still, so * divide. We do want to be sure there's a valid instruction in any * alignment padding, so that the next compression pass (for the FS 8/16 * compile passes) parses correctly. */ if (p->next_insn_offset & 8) { struct brw_compact_instruction *align = store + offset; memset(align, 0, sizeof(*align)); align->dw0.opcode = BRW_OPCODE_NOP; align->dw0.cmpt_ctrl = 1; p->next_insn_offset += 8; } p->nr_insn = p->next_insn_offset / 16; if (0) { fprintf(stdout, "dumping compacted program\n"); brw_dump_compile(p, stdout, 0, p->next_insn_offset); int cmp = 0; for (offset = 0; offset < p->next_insn_offset;) { struct brw_instruction *insn = store + offset; if (insn->header.cmpt_control) { offset += 8; cmp++; } else { offset += 16; } } fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8, cmp * 8 * 100 / (offset + cmp * 8)); } }
const GLuint * brw_blorp_const_color_program::compile(struct brw_context *brw, GLuint *program_size) { /* Set up prog_data */ memset(&prog_data, 0, sizeof(prog_data)); prog_data.persample_msaa_dispatch = false; alloc_regs(); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); struct brw_reg mrf_rt_write = retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F); uint32_t mlen, msg_type; if (key->use_simd16_replicated_data) { /* The message payload is a single register with the low 4 floats/ints * filled with the constant clear color. */ brw_set_mask_control(&func, BRW_MASK_DISABLE); brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba); brw_set_mask_control(&func, BRW_MASK_ENABLE); msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; mlen = 1; } else { for (int i = 0; i < 4; i++) { /* The message payload is pairs of registers for 16 pixels each of r, * g, b, and a. */ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED); brw_MOV(&func, brw_message_reg(base_mrf + i * 2), brw_vec1_grf(clear_rgba.nr, i)); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); } msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; mlen = 8; } /* Now write to the render target and terminate the thread */ brw_fb_WRITE(&func, 16 /* dispatch_width */, base_mrf /* msg_reg_nr */, mrf_rt_write /* src0 */, msg_type, BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX, mlen, 0 /* response_length */, true /* eot */, false /* header present */); if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) { fprintf(stderr, "Native code for BLORP clear:\n"); brw_dump_compile(&func, stderr, 0, func.next_insn_offset); fprintf(stderr, "\n"); } return brw_get_program(&func, program_size); }
void vec4_generator::generate_code(exec_list *instructions) { int last_native_insn_offset = 0; const char *last_annotation_string = NULL; const void *last_annotation_ir = NULL; if (unlikely(INTEL_DEBUG & DEBUG_VS)) { if (shader) { printf("Native code for vertex shader %d:\n", prog->Name); } else { printf("Native code for vertex program %d:\n", c->vp->program.Base.Id); } } foreach_list(node, instructions) { vec4_instruction *inst = (vec4_instruction *)node; struct brw_reg src[3], dst; if (unlikely(INTEL_DEBUG & DEBUG_VS)) { if (last_annotation_ir != inst->ir) { last_annotation_ir = inst->ir; if (last_annotation_ir) { printf(" "); if (shader) { ((ir_instruction *) last_annotation_ir)->print(); } else { const prog_instruction *vpi; vpi = (const prog_instruction *) inst->ir; printf("%d: ", (int)(vpi - vp->Base.Instructions)); _mesa_fprint_instruction_opt(stdout, vpi, 0, PROG_PRINT_DEBUG, NULL); } printf("\n"); } } if (last_annotation_string != inst->annotation) { last_annotation_string = inst->annotation; if (last_annotation_string) printf(" %s\n", last_annotation_string); } } for (unsigned int i = 0; i < 3; i++) { src[i] = inst->get_src(i); } dst = inst->get_dst(); brw_set_conditionalmod(p, inst->conditional_mod); brw_set_predicate_control(p, inst->predicate); brw_set_predicate_inverse(p, inst->predicate_inverse); brw_set_saturate(p, inst->saturate); switch (inst->opcode) { case BRW_OPCODE_MOV: brw_MOV(p, dst, src[0]); break; case BRW_OPCODE_ADD: brw_ADD(p, dst, src[0], src[1]); break; case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; case BRW_OPCODE_MACH: brw_set_acc_write_control(p, 1); brw_MACH(p, dst, src[0], src[1]); brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; case BRW_OPCODE_RNDD: brw_RNDD(p, dst, src[0]); break; case BRW_OPCODE_RNDE: brw_RNDE(p, dst, src[0]); break; case BRW_OPCODE_RNDZ: brw_RNDZ(p, dst, src[0]); break; case BRW_OPCODE_AND: brw_AND(p, dst, src[0], src[1]); break; case BRW_OPCODE_OR: brw_OR(p, dst, src[0], src[1]); break; case BRW_OPCODE_XOR: brw_XOR(p, dst, src[0], src[1]); break; case BRW_OPCODE_NOT: brw_NOT(p, dst, src[0]); break; case BRW_OPCODE_ASR: brw_ASR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHR: brw_SHR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHL: brw_SHL(p, dst, src[0], src[1]); break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; case BRW_OPCODE_SEL: brw_SEL(p, dst, src[0], src[1]); break; case BRW_OPCODE_DPH: brw_DPH(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP4: brw_DP4(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP3: brw_DP3(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP2: brw_DP2(p, dst, src[0], src[1]); break; case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ assert(intel->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8); brw_inst->header.predicate_control = inst->predicate; } break; case BRW_OPCODE_ELSE: brw_ELSE(p); break; case BRW_OPCODE_ENDIF: brw_ENDIF(p); break; case BRW_OPCODE_DO: brw_DO(p, BRW_EXECUTE_8); break; case BRW_OPCODE_BREAK: brw_BREAK(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: /* FINISHME: We need to write the loop instruction support still. */ if (intel->gen >= 6) gen6_CONT(p); else brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_WHILE: brw_WHILE(p); break; default: generate_vs_instruction(inst, dst, src); break; } if (unlikely(INTEL_DEBUG & DEBUG_VS)) { brw_dump_compile(p, stdout, last_native_insn_offset, p->next_insn_offset); } last_native_insn_offset = p->next_insn_offset; }