static void emit_lit( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0 ) { assert((mask & WRITEMASK_XW) == 0); if (mask & WRITEMASK_Y) { brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MOV(p, dst[1], arg0[0]); brw_set_saturate(p, 0); } if (mask & WRITEMASK_Z) { emit_math2(p, BRW_MATH_FUNCTION_POW, &dst[2], WRITEMASK_X | (mask & SATURATE), &arg0[1], &arg0[3]); } /* Ordinarily you'd use an iff statement to skip or shortcircuit * some of the POW calculations above, but 16-wide iff statements * seem to lock c1 hardware, so this is a nasty workaround: */ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0)); { if (mask & WRITEMASK_Y) brw_MOV(p, dst[1], brw_imm_f(0)); if (mask & WRITEMASK_Z) brw_MOV(p, dst[2], brw_imm_f(0)); } brw_set_predicate_control(p, BRW_PREDICATE_NONE); }
/* Emit the fragment program instructions here. */ void brw_wm_emit( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; GLuint insn; brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); if (intel->gen >= 6) brw_set_acc_write_control(p, 1); /* Check if any of the payload regs need to be spilled: */ spill_values(c, c->payload.depth, 4); spill_values(c, c->creg, c->nr_creg); spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX); for (insn = 0; insn < c->nr_insns; insn++) { struct brw_wm_instruction *inst = &c->instruction[insn]; struct brw_reg args[3][4], dst[4]; GLuint i, dst_flags; /* Get argument regs: */ for (i = 0; i < 3; i++) get_argument_regs(c, inst->src[i], args[i]); /* Get dest regs: */ for (i = 0; i < 4; i++) if (inst->dst[i]) dst[i] = inst->dst[i]->hw_reg; else dst[i] = brw_null_reg(); /* Flags */ dst_flags = inst->writemask; if (inst->saturate) dst_flags |= SATURATE; switch (inst->opcode) { /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: emit_pixel_xy(c, dst, dst_flags); break; case WM_DELTAXY: emit_delta_xy(p, dst, dst_flags, args[0]); break; case WM_WPOSXY: emit_wpos_xy(c, dst, dst_flags, args[0]); break; case WM_PIXELW: emit_pixel_w(c, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: emit_linterp(p, dst, dst_flags, args[0], args[1]); break; case WM_PINTERP: emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); break; case WM_CINTERP: emit_cinterp(p, dst, dst_flags, args[0]); break; case WM_FB_WRITE: emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot); break; case WM_FRONTFACING: emit_frontfacing(p, dst, dst_flags); break; /* Straightforward arithmetic: */ case OPCODE_ADD: emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); break; case OPCODE_FRC: emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); break; case OPCODE_FLR: emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; case OPCODE_DDX: emit_ddxy(p, dst, dst_flags, true, args[0]); break; case OPCODE_DDY: emit_ddxy(p, dst, dst_flags, false, args[0]); break; case OPCODE_DP2: emit_dp2(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DP4: emit_dp4(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DPH: emit_dph(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_TRUNC: for (i = 0; i < 4; i++) { if (dst_flags & (1<<i)) { brw_RNDZ(p, dst[i], args[0][i]); } } break; case OPCODE_LRP: emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_MAD: emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_MOV: case OPCODE_SWZ: emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; case OPCODE_MUL: emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); break; case OPCODE_XPD: emit_xpd(p, dst, dst_flags, args[0], args[1]); break; /* Higher math functions: */ case OPCODE_RCP: emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_SCS: /* There is an scs math function, but it would need some * fixup for 16-element execution. */ if (dst_flags & WRITEMASK_X) emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); if (dst_flags & WRITEMASK_Y) emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); break; case OPCODE_POW: emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); break; /* Comparisons: */ case OPCODE_CMP: emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_MAX: emit_max(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_MIN: emit_min(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SLT: emit_slt(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SLE: emit_sle(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SGT: emit_sgt(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SGE: emit_sge(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SEQ: emit_seq(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SNE: emit_sne(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SSG: emit_sign(p, dst, dst_flags, args[0]); break; case OPCODE_LIT: emit_lit(c, dst, dst_flags, args[0]); break; /* Texturing operations: */ case OPCODE_TEX: emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, inst->tex_idx, inst->tex_unit, inst->tex_shadow); break; case OPCODE_TXB: emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, inst->tex_idx, inst->tex_unit); break; case OPCODE_KIL: emit_kil(c, args[0]); break; default: printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? _mesa_opcode_string(inst->opcode) : "unknown"); } for (i = 0; i < 4; i++) if (inst->dst[i] && inst->dst[i]->spill_slot) emit_spill(c, inst->dst[i]->hw_reg, inst->dst[i]->spill_slot); } /* Only properly tested on ILK */ if (p->brw->intel.gen == 5) { brw_remove_duplicate_mrf_moves(p); if (c->dispatch_width == 16) brw_remove_grf_to_mrf_moves(p); } if (unlikely(INTEL_DEBUG & DEBUG_WM)) { int i; printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stdout, &p->store[i], p->brw->intel.gen); printf("\n"); } }
/* Emit the fragment program instructions here. */ void brw_wm_emit( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; GLuint insn; brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); /* Check if any of the payload regs need to be spilled: */ spill_values(c, c->payload.depth, 4); spill_values(c, c->creg, c->nr_creg); spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX); for (insn = 0; insn < c->nr_insns; insn++) { struct brw_wm_instruction *inst = &c->instruction[insn]; struct brw_reg args[3][4], dst[4]; GLuint i, dst_flags; /* Get argument regs: */ for (i = 0; i < 3; i++) get_argument_regs(c, inst->src[i], args[i]); /* Get dest regs: */ for (i = 0; i < 4; i++) if (inst->dst[i]) dst[i] = inst->dst[i]->hw_reg; else dst[i] = brw_null_reg(); /* Flags */ dst_flags = inst->writemask; if (inst->saturate) dst_flags |= SATURATE; switch (inst->opcode) { /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: emit_pixel_xy(p, dst, dst_flags, args[0]); break; case WM_DELTAXY: emit_delta_xy(p, dst, dst_flags, args[0], args[1]); break; case WM_WPOSXY: emit_wpos_xy(c, dst, dst_flags, args[0]); break; case WM_PIXELW: emit_pixel_w(p, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: emit_linterp(p, dst, dst_flags, args[0], args[1]); break; case WM_PINTERP: emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); break; case WM_CINTERP: emit_cinterp(p, dst, dst_flags, args[0]); break; case WM_FB_WRITE: emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot); break; /* Straightforward arithmetic: */ case OPCODE_ADD: emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); break; case OPCODE_FRC: emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); break; case OPCODE_FLR: emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; case OPCODE_DP3: /* */ emit_dp3(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DP4: emit_dp4(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DPH: emit_dph(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_LRP: /* */ emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_MAD: emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_MOV: case OPCODE_SWZ: emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; case OPCODE_MUL: emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); break; case OPCODE_XPD: emit_xpd(p, dst, dst_flags, args[0], args[1]); break; /* Higher math functions: */ case OPCODE_RCP: emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_SCS: /* There is an scs math function, but it would need some * fixup for 16-element execution. */ if (dst_flags & WRITEMASK_X) emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); if (dst_flags & WRITEMASK_Y) emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); break; case OPCODE_POW: emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); break; /* Comparisons: */ case OPCODE_CMP: emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_MAX: emit_max(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_MIN: emit_min(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SLT: emit_slt(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SLE: emit_sle(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SGT: emit_sgt(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SGE: emit_sge(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SEQ: emit_seq(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_SNE: emit_sne(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_LIT: emit_lit(p, dst, dst_flags, args[0]); break; /* Texturing operations: */ case OPCODE_TEX: emit_tex(c, inst, dst, dst_flags, args[0]); break; case OPCODE_TXB: emit_txb(c, inst, dst, dst_flags, args[0]); break; case OPCODE_KIL: emit_kil(c, args[0]); break; default: _mesa_printf("unsupport opcode %d in fragment program\n", inst->opcode); } for (i = 0; i < 4; i++) if (inst->dst[i] && inst->dst[i]->spill_slot) emit_spill(c, inst->dst[i]->hw_reg, inst->dst[i]->spill_slot); } }