/** * Generate assembly for a Vec4 IR instruction. * * \param instruction The Vec4 IR instruction to generate code for. * \param dst The destination register. * \param src An array of up to three source registers. */ void vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, struct brw_reg dst, struct brw_reg *src) { vec4_instruction *inst = (vec4_instruction *) instruction; if (dst.width == BRW_WIDTH_4) { /* This happens in attribute fixups for "dual instanced" geometry * shaders, since they use attributes that are vec4's. Since the exec * width is only 4, it's essential that the caller set * force_writemask_all in order to make sure the instruction is executed * regardless of which channels are enabled. */ assert(inst->force_writemask_all); /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy * the following register region restrictions (from Graphics BSpec: * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions * > Register Region Restrictions) * * 1. ExecSize must be greater than or equal to Width. * * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set * to Width * HorzStride." */ for (int i = 0; i < 3; i++) { if (src[i].file == BRW_GENERAL_REGISTER_FILE) src[i] = stride(src[i], 4, 4, 1); } } switch (inst->opcode) { case BRW_OPCODE_MOV: brw_MOV(p, dst, src[0]); break; case BRW_OPCODE_ADD: brw_ADD(p, dst, src[0], src[1]); break; case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; case BRW_OPCODE_MACH: brw_set_acc_write_control(p, 1); brw_MACH(p, dst, src[0], src[1]); brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_MAD: assert(brw->gen >= 6); brw_MAD(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; case BRW_OPCODE_RNDD: brw_RNDD(p, dst, src[0]); break; case BRW_OPCODE_RNDE: brw_RNDE(p, dst, src[0]); break; case BRW_OPCODE_RNDZ: brw_RNDZ(p, dst, src[0]); break; case BRW_OPCODE_AND: brw_AND(p, dst, src[0], src[1]); break; case BRW_OPCODE_OR: brw_OR(p, dst, src[0], src[1]); break; case BRW_OPCODE_XOR: brw_XOR(p, dst, src[0], src[1]); break; case BRW_OPCODE_NOT: brw_NOT(p, dst, src[0]); break; case BRW_OPCODE_ASR: brw_ASR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHR: brw_SHR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHL: brw_SHL(p, dst, src[0], src[1]); break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; case BRW_OPCODE_SEL: brw_SEL(p, dst, src[0], src[1]); break; case BRW_OPCODE_DPH: brw_DPH(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP4: brw_DP4(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP3: brw_DP3(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP2: brw_DP2(p, dst, src[0], src[1]); break; case BRW_OPCODE_F32TO16: assert(brw->gen >= 7); brw_F32TO16(p, dst, src[0]); break; case BRW_OPCODE_F16TO32: assert(brw->gen >= 7); brw_F16TO32(p, dst, src[0]); break; case BRW_OPCODE_LRP: assert(brw->gen >= 6); brw_LRP(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFREV: assert(brw->gen >= 7); /* BFREV only supports UD type for src and dst. */ brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), retype(src[0], BRW_REGISTER_TYPE_UD)); break; case BRW_OPCODE_FBH: assert(brw->gen >= 7); /* FBH only supports UD type for dst. */ brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_FBL: assert(brw->gen >= 7); /* FBL only supports UD type for dst. */ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_CBIT: assert(brw->gen >= 7); /* CBIT only supports UD type for dst. */ brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_ADDC: assert(brw->gen >= 7); brw_set_acc_write_control(p, 1); brw_ADDC(p, dst, src[0], src[1]); brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_SUBB: assert(brw->gen >= 7); brw_set_acc_write_control(p, 1); brw_SUBB(p, dst, src[0], src[1]); brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_BFE: assert(brw->gen >= 7); brw_BFE(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFI1: assert(brw->gen >= 7); brw_BFI1(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFI2: assert(brw->gen >= 7); brw_BFI2(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ assert(brw->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8); brw_inst->header.predicate_control = inst->predicate; } break; case BRW_OPCODE_ELSE: brw_ELSE(p); break; case BRW_OPCODE_ENDIF: brw_ENDIF(p); break; case BRW_OPCODE_DO: brw_DO(p, BRW_EXECUTE_8); break; case BRW_OPCODE_BREAK: brw_BREAK(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: /* FINISHME: We need to write the loop instruction support still. */ if (brw->gen >= 6) gen6_CONT(p); else brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_WHILE: brw_WHILE(p); break; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: case SHADER_OPCODE_SQRT: case SHADER_OPCODE_EXP2: case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: if (brw->gen == 6) { generate_math1_gen6(inst, dst, src[0]); } else { /* Also works for Gen7. */ generate_math1_gen4(inst, dst, src[0]); } break; case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: if (brw->gen >= 7) { generate_math2_gen7(inst, dst, src[0], src[1]); } else if (brw->gen == 6) { generate_math2_gen6(inst, dst, src[0], src[1]); } else { generate_math2_gen4(inst, dst, src[0], src[1]); } break; case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: generate_tex(inst, dst, src[0]); break; case VS_OPCODE_URB_WRITE: generate_vs_urb_write(inst); break; case SHADER_OPCODE_GEN4_SCRATCH_READ: generate_scratch_read(inst, dst, src[0]); break; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: generate_scratch_write(inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD: generate_pull_constant_load(inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); break; case GS_OPCODE_URB_WRITE: generate_gs_urb_write(inst); break; case GS_OPCODE_THREAD_END: generate_gs_thread_end(inst); break; case GS_OPCODE_SET_WRITE_OFFSET: generate_gs_set_write_offset(dst, src[0], src[1]); break; case GS_OPCODE_SET_VERTEX_COUNT: generate_gs_set_vertex_count(dst, src[0]); break; case GS_OPCODE_SET_DWORD_2_IMMED: generate_gs_set_dword_2_immed(dst, src[0]); break; case GS_OPCODE_PREPARE_CHANNEL_MASKS: generate_gs_prepare_channel_masks(dst); break; case GS_OPCODE_SET_CHANNEL_MASKS: generate_gs_set_channel_masks(dst, src[0]); break; case GS_OPCODE_GET_INSTANCE_ID: generate_gs_get_instance_id(dst); break; case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], prog_data->base.binding_table.shader_time_start); brw_mark_surface_used(&prog_data->base, prog_data->base.binding_table.shader_time_start); break; case SHADER_OPCODE_UNTYPED_ATOMIC: generate_untyped_atomic(inst, dst, src[0], src[1]); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: generate_untyped_surface_read(inst, dst, src[0]); break; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: generate_unpack_flags(inst, dst); break; default: if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n", opcode_descs[inst->opcode].name); } else { _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode); } abort(); } }
/* Emit the fragment program instructions here. */ void brw_wm_emit( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; GLuint insn; brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); if (intel->gen >= 6) brw_set_acc_write_control(p, 1); /* Check if any of the payload regs need to be spilled: */ spill_values(c, c->payload.depth, 4); spill_values(c, c->creg, c->nr_creg); spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX); for (insn = 0; insn < c->nr_insns; insn++) { struct brw_wm_instruction *inst = &c->instruction[insn]; struct brw_reg args[3][4], dst[4]; GLuint i, dst_flags; /* Get argument regs: */ for (i = 0; i < 3; i++) get_argument_regs(c, inst->src[i], args[i]); /* Get dest regs: */ for (i = 0; i < 4; i++) if (inst->dst[i]) dst[i] = inst->dst[i]->hw_reg; else dst[i] = brw_null_reg(); /* Flags */ dst_flags = inst->writemask; if (inst->saturate) dst_flags |= SATURATE; switch (inst->opcode) { /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: emit_pixel_xy(c, dst, dst_flags); break; case WM_DELTAXY: emit_delta_xy(p, dst, dst_flags, args[0]); break; case WM_WPOSXY: emit_wpos_xy(c, dst, dst_flags, args[0]); break; case WM_PIXELW: emit_pixel_w(c, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: emit_linterp(p, dst, dst_flags, args[0], args[1]); break; case WM_PINTERP: emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); break; case WM_CINTERP: emit_cinterp(p, dst, dst_flags, args[0]); break; case WM_FB_WRITE: emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot); break; case WM_FRONTFACING: emit_frontfacing(p, dst, dst_flags); break; /* Straightforward arithmetic: */ case OPCODE_ADD: emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); break; case OPCODE_FRC: emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); break; case OPCODE_FLR: emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; case OPCODE_DDX: emit_ddxy(p, dst, dst_flags, true, args[0]); break; case OPCODE_DDY: emit_ddxy(p, dst, dst_flags, false, args[0]); break; case OPCODE_DP2: emit_dp2(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DP4: emit_dp4(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DPH: emit_dph(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_TRUNC: for (i = 0; i < 4; i++) { if (dst_flags & (1<<i)) { brw_RNDZ(p, dst[i], args[0][i]); } } break; case OPCODE_LRP: emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_MAD: emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_MOV: case OPCODE_SWZ: emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; case OPCODE_MUL: emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); break; case OPCODE_XPD: emit_xpd(p, dst, dst_flags, args[0], args[1]); break; /* Higher math functions: */ case OPCODE_RCP: emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_SCS: /* There is an scs math function, but it would need some * fixup for 16-element execution. */ if (dst_flags & WRITEMASK_X) emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); if (dst_flags & WRITEMASK_Y) emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); break; case OPCODE_POW: emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); break; /* Comparisons: */ case OPCODE_CMP: emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_MAX: emit_max(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_MIN: emit_min(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SLT: emit_slt(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SLE: emit_sle(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SGT: emit_sgt(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SGE: emit_sge(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SEQ: emit_seq(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SNE: emit_sne(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SSG: emit_sign(p, dst, dst_flags, args[0]); break; case OPCODE_LIT: emit_lit(c, dst, dst_flags, args[0]); break; /* Texturing operations: */ case OPCODE_TEX: emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, inst->tex_idx, inst->tex_unit, inst->tex_shadow); break; case OPCODE_TXB: emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, inst->tex_idx, inst->tex_unit); break; case OPCODE_KIL: emit_kil(c, args[0]); break; default: printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? _mesa_opcode_string(inst->opcode) : "unknown"); } for (i = 0; i < 4; i++) if (inst->dst[i] && inst->dst[i]->spill_slot) emit_spill(c, inst->dst[i]->hw_reg, inst->dst[i]->spill_slot); } /* Only properly tested on ILK */ if (p->brw->intel.gen == 5) { brw_remove_duplicate_mrf_moves(p); if (c->dispatch_width == 16) brw_remove_grf_to_mrf_moves(p); } if (unlikely(INTEL_DEBUG & DEBUG_WM)) { int i; printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stdout, &p->store[i], p->brw->intel.gen); printf("\n"); } }
void vec4_generator::generate_code(exec_list *instructions) { int last_native_insn_offset = 0; const char *last_annotation_string = NULL; const void *last_annotation_ir = NULL; if (unlikely(INTEL_DEBUG & DEBUG_VS)) { if (shader) { printf("Native code for vertex shader %d:\n", prog->Name); } else { printf("Native code for vertex program %d:\n", c->vp->program.Base.Id); } } foreach_list(node, instructions) { vec4_instruction *inst = (vec4_instruction *)node; struct brw_reg src[3], dst; if (unlikely(INTEL_DEBUG & DEBUG_VS)) { if (last_annotation_ir != inst->ir) { last_annotation_ir = inst->ir; if (last_annotation_ir) { printf(" "); if (shader) { ((ir_instruction *) last_annotation_ir)->print(); } else { const prog_instruction *vpi; vpi = (const prog_instruction *) inst->ir; printf("%d: ", (int)(vpi - vp->Base.Instructions)); _mesa_fprint_instruction_opt(stdout, vpi, 0, PROG_PRINT_DEBUG, NULL); } printf("\n"); } } if (last_annotation_string != inst->annotation) { last_annotation_string = inst->annotation; if (last_annotation_string) printf(" %s\n", last_annotation_string); } } for (unsigned int i = 0; i < 3; i++) { src[i] = inst->get_src(i); } dst = inst->get_dst(); brw_set_conditionalmod(p, inst->conditional_mod); brw_set_predicate_control(p, inst->predicate); brw_set_predicate_inverse(p, inst->predicate_inverse); brw_set_saturate(p, inst->saturate); switch (inst->opcode) { case BRW_OPCODE_MOV: brw_MOV(p, dst, src[0]); break; case BRW_OPCODE_ADD: brw_ADD(p, dst, src[0], src[1]); break; case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; case BRW_OPCODE_MACH: brw_set_acc_write_control(p, 1); brw_MACH(p, dst, src[0], src[1]); brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; case BRW_OPCODE_RNDD: brw_RNDD(p, dst, src[0]); break; case BRW_OPCODE_RNDE: brw_RNDE(p, dst, src[0]); break; case BRW_OPCODE_RNDZ: brw_RNDZ(p, dst, src[0]); break; case BRW_OPCODE_AND: brw_AND(p, dst, src[0], src[1]); break; case BRW_OPCODE_OR: brw_OR(p, dst, src[0], src[1]); break; case BRW_OPCODE_XOR: brw_XOR(p, dst, src[0], src[1]); break; case BRW_OPCODE_NOT: brw_NOT(p, dst, src[0]); break; case BRW_OPCODE_ASR: brw_ASR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHR: brw_SHR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHL: brw_SHL(p, dst, src[0], src[1]); break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; case BRW_OPCODE_SEL: brw_SEL(p, dst, src[0], src[1]); break; case BRW_OPCODE_DPH: brw_DPH(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP4: brw_DP4(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP3: brw_DP3(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP2: brw_DP2(p, dst, src[0], src[1]); break; case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ assert(intel->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8); brw_inst->header.predicate_control = inst->predicate; } break; case BRW_OPCODE_ELSE: brw_ELSE(p); break; case BRW_OPCODE_ENDIF: brw_ENDIF(p); break; case BRW_OPCODE_DO: brw_DO(p, BRW_EXECUTE_8); break; case BRW_OPCODE_BREAK: brw_BREAK(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: /* FINISHME: We need to write the loop instruction support still. */ if (intel->gen >= 6) gen6_CONT(p); else brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_WHILE: brw_WHILE(p); break; default: generate_vs_instruction(inst, dst, src); break; } if (unlikely(INTEL_DEBUG & DEBUG_VS)) { brw_dump_compile(p, stdout, last_native_insn_offset, p->next_insn_offset); } last_native_insn_offset = p->next_insn_offset; }