static void precalc_lit( struct brw_wm_compile *c, const struct prog_instruction *inst ) { struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register dst = inst->DstReg; if (dst.WriteMask & WRITEMASK_XW) { struct prog_instruction *swz; /* dst.xw = swz src0.1111 */ swz = emit_op(c, OPCODE_SWZ, dst_mask(dst, WRITEMASK_XW), 0, src_swizzle1(src0, SWIZZLE_ONE), src_undef(), src_undef()); /* Avoid letting the negation flag of src0 affect our 1 constant. */ swz->SrcReg[0].Negate = NEGATE_NONE; } if (dst.WriteMask & WRITEMASK_YZ) { emit_op(c, OPCODE_LIT, dst_mask(dst, WRITEMASK_YZ), inst->SaturateMode, src0, src_undef(), src_undef()); } }
static void precalc_lit( struct brw_wm_compile *c, const struct prog_instruction *inst ) { struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register dst = inst->DstReg; if (dst.WriteMask & WRITEMASK_XW) { /* dst.xw = swz src0.1111 */ emit_op(c, OPCODE_SWZ, dst_mask(dst, WRITEMASK_XW), 0, 0, 0, src_swizzle1(src0, SWIZZLE_ONE), src_undef(), src_undef()); } if (dst.WriteMask & WRITEMASK_YZ) { emit_op(c, OPCODE_LIT, dst_mask(dst, WRITEMASK_YZ), inst->SaturateMode, 0, 0, src0, src_undef(), src_undef()); } }
/* Many Mesa opcodes produce the same value across all the result channels. * We'd rather not have to support that splatting in the opcode implementations, * and brw_wm_pass*.c wants to optimize them out by shuffling references around * anyway. We can easily get both by emitting the opcode to one channel, and * then MOVing it to the others, which brw_wm_pass*.c already understands. */ static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, const struct prog_instruction *inst0) { struct prog_instruction *inst; unsigned int dst_chan; unsigned int other_channel_mask; if (inst0->DstReg.WriteMask == 0) return NULL; dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1; inst = get_fp_inst(c); *inst = *inst0; inst->DstReg.WriteMask = 1 << dst_chan; other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); if (other_channel_mask != 0) { inst = emit_op(c, OPCODE_MOV, dst_mask(inst0->DstReg, other_channel_mask), 0, src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), src_undef(), src_undef()); } return inst; }
static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) { if (src_is_undef(c->pixel_xy)) { struct prog_dst_register pixel_xy = get_temp(c); struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); /* Emit the out calculations, and hold onto the results. Use * two instructions as a temporary is required. */ /* pixel_xy.xy = PIXELXY payload[0]; */ emit_op(c, WM_PIXELXY, dst_mask(pixel_xy, WRITEMASK_XY), 0, payload_r0_depth, src_undef(), src_undef()); c->pixel_xy = src_reg_from_dst(pixel_xy); } return c->pixel_xy; }
static INLINE void emit_op1(struct brw_wm_compile *c, GLuint op, struct brw_fp_dst dest, struct brw_fp_src src0) { emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef()); }
static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c ) { if (src_is_undef(c->fp_pixel_w)) { struct brw_fp_dst pixel_w = get_temp(c); struct brw_fp_src deltas = get_delta_xy(c); /* XXX: assuming position is always first -- valid? */ struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0); /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ emit_op3(c, WM_PIXELW, dst_mask(pixel_w, BRW_WRITEMASK_W), interp_wpos, deltas, src_undef()); c->fp_pixel_w = src_reg_from_dst(pixel_w); } return c->fp_pixel_w; }
/*********************************************************************** * Expand various instructions here to simpler forms. */ static void precalc_dst( struct brw_wm_compile *c, const struct prog_instruction *inst ) { struct prog_src_register src0 = inst->SrcReg[0]; struct prog_src_register src1 = inst->SrcReg[1]; struct prog_dst_register dst = inst->DstReg; if (dst.WriteMask & WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ emit_op(c, OPCODE_MUL, dst_mask(dst, WRITEMASK_Y), inst->SaturateMode, src0, src1, src_undef()); } if (dst.WriteMask & WRITEMASK_XZ) { struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); /* dst.xz = swz src0.1zzz */ swz = emit_op(c, OPCODE_SWZ, dst_mask(dst, WRITEMASK_XZ), inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), src_undef(), src_undef()); /* Avoid letting negation flag of src0 affect our 1 constant. */ swz->SrcReg[0].Negate &= ~NEGATE_X; } if (dst.WriteMask & WRITEMASK_W) { /* dst.w = mov src1.w */ emit_op(c, OPCODE_MOV, dst_mask(dst, WRITEMASK_W), inst->SaturateMode, src1, src_undef(), src_undef()); } }
/** * Emit code for TXP. */ static void precalc_txp( struct brw_wm_compile *c, const struct prog_instruction *inst ) { struct prog_src_register src0 = inst->SrcReg[0]; if (projtex(c, inst)) { struct prog_dst_register tmp = get_temp(c); struct prog_instruction tmp_inst; /* tmp0.w = RCP inst.arg[0][3] */ emit_op(c, OPCODE_RCP, dst_mask(tmp, WRITEMASK_W), 0, src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), src_undef(), src_undef()); /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww */ emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_XYZ), 0, src0, src_swizzle1(src_reg_from_dst(tmp), W), src_undef()); /* dst = precalc(TEX tmp0) */ tmp_inst = *inst; tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); precalc_tex(c, &tmp_inst); release_temp(c, tmp); } else { /* dst = precalc(TEX src0) */ precalc_tex(c, inst); } }
/* Internally generated immediates: overkill... */ static struct brw_fp_src src_imm( struct brw_wm_compile *c, const GLfloat *v, unsigned nr) { unsigned i, j; unsigned swizzle; /* Could do a first pass where we examine all existing immediates * without expanding. */ for (i = 0; i < c->nr_immediates; i++) { if (match_or_expand_immediate( v, nr, c->immediate[i].v, &c->immediate[i].nr, &swizzle )) goto out; } if (c->nr_immediates < Elements(c->immediate)) { i = c->nr_immediates++; if (match_or_expand_immediate( v, nr, c->immediate[i].v, &c->immediate[i].nr, &swizzle )) goto out; } c->error = 1; return src_undef(); out: /* Make sure that all referenced elements are from this immediate. * Has the effect of making size-one immediates into scalars. */ for (j = nr; j < 4; j++) swizzle |= (swizzle & 0x3) << (j * 2); return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ), BRW_GET_SWZ(swizzle, X), BRW_GET_SWZ(swizzle, Y), BRW_GET_SWZ(swizzle, Z), BRW_GET_SWZ(swizzle, W) ); }
static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c ) { if (src_is_undef(c->fp_delta_xy)) { struct brw_fp_dst delta_xy = get_temp(c); struct brw_fp_src pixel_xy = get_pixel_xy(c); struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); /* deltas.xy = DELTAXY pixel_xy, payload[0] */ emit_op3(c, WM_DELTAXY, dst_mask(delta_xy, BRW_WRITEMASK_XY), pixel_xy, payload_r0_depth, src_undef()); c->fp_delta_xy = src_reg_from_dst(delta_xy); } return c->fp_delta_xy; }
static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) { if (src_is_undef(c->delta_xy)) { struct prog_dst_register delta_xy = get_temp(c); struct prog_src_register pixel_xy = get_pixel_xy(c); struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); /* deltas.xy = DELTAXY pixel_xy, payload[0] */ emit_op(c, WM_DELTAXY, dst_mask(delta_xy, WRITEMASK_XY), 0, pixel_xy, payload_r0_depth, src_undef()); c->delta_xy = src_reg_from_dst(delta_xy); } return c->delta_xy; }
static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) { if (src_is_undef(c->pixel_w)) { struct prog_dst_register pixel_w = get_temp(c); struct prog_src_register deltas = get_delta_xy(c); struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ emit_op(c, WM_PIXELW, dst_mask(pixel_w, WRITEMASK_W), 0, interp_wpos, deltas, src_undef()); c->pixel_w = src_reg_from_dst(pixel_w); } return c->pixel_w; }
static void emit_interp( struct brw_wm_compile *c, GLuint idx ) { struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); struct prog_src_register deltas = get_delta_xy(c); /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those * which have not: */ switch (idx) { case FRAG_ATTRIB_WPOS: /* Have to treat wpos.xy specially: */ emit_op(c, WM_WPOSXY, dst_mask(dst, WRITEMASK_XY), 0, get_pixel_xy(c), src_undef(), src_undef()); dst = dst_mask(dst, WRITEMASK_ZW); /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw */ emit_op(c, WM_LINTERP, dst, 0, interp, deltas, src_undef()); break; case FRAG_ATTRIB_COL0: case FRAG_ATTRIB_COL1: if (c->key.flat_shade) { emit_op(c, WM_CINTERP, dst, 0, interp, src_undef(), src_undef()); } else { if (c->key.linear_color) { emit_op(c, WM_LINTERP, dst, 0, interp, deltas, src_undef()); } else { /* perspective-corrected color interpolation */ emit_op(c, WM_PINTERP, dst, 0, interp, deltas, get_pixel_w(c)); } } break; case FRAG_ATTRIB_FOGC: /* Interpolate the fog coordinate */ emit_op(c, WM_PINTERP, dst_mask(dst, WRITEMASK_X), 0, interp, deltas, get_pixel_w(c)); emit_op(c, OPCODE_MOV, dst_mask(dst, WRITEMASK_YZW), 0, src_swizzle(interp, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE), src_undef(), src_undef()); break; case FRAG_ATTRIB_FACE: emit_op(c, WM_FRONTFACING, dst_mask(dst, WRITEMASK_X), 0, src_undef(), src_undef(), src_undef()); break; case FRAG_ATTRIB_PNTC: /* XXX review/test this case */ emit_op(c, WM_PINTERP, dst_mask(dst, WRITEMASK_XY), 0, interp, deltas, get_pixel_w(c)); emit_op(c, OPCODE_MOV, dst_mask(dst, WRITEMASK_ZW), 0, src_swizzle(interp, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE), src_undef(), src_undef()); break; default: emit_op(c, WM_PINTERP, dst, 0, interp, deltas, get_pixel_w(c)); break; } c->fp_interp_emitted |= 1<<idx; }
static void precalc_tex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { struct prog_src_register coord; struct prog_dst_register tmpcoord; if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { struct prog_src_register scale = search_or_add_param5( c, STATE_INTERNAL, STATE_TEXRECT_SCALE, inst->TexSrcUnit, 0,0 ); tmpcoord = get_temp(c); /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } */ emit_op(c, OPCODE_MUL, tmpcoord, 0, 0, 0, inst->SrcReg[0], scale, src_undef()); coord = src_reg_from_dst(tmpcoord); } else { coord = inst->SrcReg[0]; } /* Need to emit YUV texture conversions by hand. Probably need to * do this here - the alternative is in brw_wm_emit.c, but the * conversion requires allocating a temporary variable which we * don't have the facility to do that late in the compilation. */ if (!(c->key.yuvtex_mask & (1<<inst->TexSrcUnit))) { emit_op(c, OPCODE_TEX, inst->DstReg, inst->SaturateMode, inst->TexSrcUnit, inst->TexSrcTarget, coord, src_undef(), src_undef()); } else { /* CONST C0 = { -.5, -.0625, -.5, 1.164 } CONST C1 = { 1.596, -0.813, 2.018, -.391 } UYV = TEX ... UYV.xyz = ADD UYV, C0 UYV.y = MUL UYV.y, C0.w RGB.xyz = MAD UYV.xxz, C1, UYV.y RGB.y = MAD UYV.z, C1.w, RGB.y */ struct prog_dst_register dst = inst->DstReg; struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register tmp = get_temp(c); struct prog_src_register tmpsrc = src_reg_from_dst(tmp); struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); /* tmp = TEX ... */ emit_op(c, OPCODE_TEX, tmp, inst->SaturateMode, inst->TexSrcUnit, inst->TexSrcTarget, src0, src_undef(), src_undef()); /* tmp.xyz = ADD TMP, C0 */ emit_op(c, OPCODE_ADD, dst_mask(tmp, WRITEMASK_XYZ), 0, 0, 0, tmpsrc, C0, src_undef()); /* YUV.y = MUL YUV.y, C0.w */ emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_Y), 0, 0, 0, tmpsrc, src_swizzle1(C0, W), src_undef()); /* RGB.xyz = MAD YUV.xxz, C1, YUV.y */ emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_XYZ), 0, 0, 0, src_swizzle(tmpsrc, X,X,Z,Z), C1, src_swizzle1(tmpsrc, Y)); /* RGB.y = MAD YUV.z, C1.w, RGB.y */ emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_Y), 0, 0, 0, src_swizzle1(tmpsrc, Z), src_swizzle1(C1, W), src_swizzle1(src_reg_from_dst(dst), Y)); release_temp(c, tmp); } if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV) release_temp(c, tmpcoord); }
void brw_wm_pass_fp( struct brw_wm_compile *c ) { struct brw_fragment_program *fp = c->fp; GLuint insn; if (INTEL_DEBUG & DEBUG_WM) { _mesa_printf("\n\n\npre-fp:\n"); _mesa_print_program(&fp->program.Base); _mesa_printf("\n"); } c->pixel_xy = src_undef(); c->delta_xy = src_undef(); c->pixel_w = src_undef(); c->nr_fp_insns = 0; /* Emit preamble instructions: */ for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; struct prog_instruction *out; /* Check for INPUT values, emit INTERP instructions where * necessary: */ validate_src_regs(c, inst); switch (inst->Opcode) { case OPCODE_SWZ: out = emit_insn(c, inst); out->Opcode = OPCODE_MOV; break; case OPCODE_ABS: out = emit_insn(c, inst); out->Opcode = OPCODE_MOV; out->SrcReg[0].NegateBase = 0; out->SrcReg[0].Abs = 1; break; case OPCODE_SUB: out = emit_insn(c, inst); out->Opcode = OPCODE_ADD; out->SrcReg[1].NegateBase ^= 0xf; break; case OPCODE_SCS: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask &= WRITEMASK_XY; break; case OPCODE_DST: precalc_dst(c, inst); break; case OPCODE_LIT: precalc_lit(c, inst); break; case OPCODE_TXP: precalc_txp(c, inst); break; case OPCODE_XPD: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask &= WRITEMASK_XYZ; break; case OPCODE_KIL: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask = 0; break; case OPCODE_END: case OPCODE_PRINT: break; default: emit_insn(c, inst); break; } } emit_fog(c); emit_fb_write(c); if (INTEL_DEBUG & DEBUG_WM) { _mesa_printf("\n\n\npass_fp:\n"); print_insns( c->prog_instructions, c->nr_fp_insns ); _mesa_printf("\n"); } }
/** * Initial pass for fragment program code generation. * This function is used by both the GLSL and non-GLSL paths. */ int brw_wm_pass_fp( struct brw_wm_compile *c ) { struct brw_fragment_shader *fs = c->fp; struct tgsi_parse_context parse; struct tgsi_full_instruction *inst; struct tgsi_full_declaration *decl; const float *imm; GLuint size; GLuint i; if (BRW_DEBUG & DEBUG_WM) { debug_printf("pre-fp:\n"); tgsi_dump(fs->tokens, 0); } c->fp_pixel_xy = src_undef(); c->fp_delta_xy = src_undef(); c->fp_pixel_w = src_undef(); c->nr_fp_insns = 0; c->nr_immediates = 0; /* Loop over all instructions doing assorted simplifications and * transformations. */ tgsi_parse_init( &parse, fs->tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: /* Turn intput declarations into special WM_* instructions. * * XXX: For non-branching shaders, consider deferring variable * initialization as late as possible to minimize register * usage. This is how the original BRW driver worked. * * In a branching shader, must preamble instructions at decl * time, as instruction order in the shader does not * correspond to the order instructions are executed in the * wild. * * This is where special instructions such as WM_CINTERP, * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to * compute shader inputs from the payload registers and pixel * position. */ decl = &parse.FullToken.FullDeclaration; if( decl->Declaration.File == TGSI_FILE_INPUT ) { unsigned first, last, mask; unsigned attrib; first = decl->Range.First; last = decl->Range.Last; mask = decl->Declaration.UsageMask; for (attrib = first; attrib <= last; attrib++) { emit_interp(c, attrib, decl->Semantic.Name, decl->Declaration.Interpolate ); } } break; case TGSI_TOKEN_TYPE_IMMEDIATE: /* Unlike VS programs we can probably manage fine encoding * immediate values directly into the emitted EU * instructions, as we probably only need to reference one * float value per instruction. Just save the data for now * and use directly later. */ i = c->nr_immediates++; imm = &parse.FullToken.FullImmediate.u[i].Float; size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; if (c->nr_immediates >= BRW_WM_MAX_CONST) return PIPE_ERROR_OUT_OF_MEMORY; for (i = 0; i < size; i++) c->immediate[c->nr_immediates].v[i] = imm[i]; for (; i < 4; i++) c->immediate[c->nr_immediates].v[i] = 0.0; c->immediate[c->nr_immediates].nr = size; c->nr_immediates++; break; case TGSI_TOKEN_TYPE_INSTRUCTION: inst = &parse.FullToken.FullInstruction; emit_insn(c, inst); break; } } if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_fp_program( c, "pass_fp" ); debug_printf("\n"); } return c->error; }
/** * Initial pass for fragment program code generation. * This function is used by both the GLSL and non-GLSL paths. */ void brw_wm_pass_fp( struct brw_wm_compile *c ) { struct brw_fragment_program *fp = c->fp; GLuint insn; if (INTEL_DEBUG & DEBUG_WM) { printf("pre-fp:\n"); _mesa_print_program(&fp->program.Base); printf("\n"); } c->pixel_xy = src_undef(); c->delta_xy = src_undef(); c->pixel_w = src_undef(); c->nr_fp_insns = 0; c->fp->tex_units_used = 0x0; /* Emit preamble instructions. This is where special instructions such as * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to * compute shader inputs from varying vars. */ for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; validate_src_regs(c, inst); validate_dst_regs(c, inst); } /* Loop over all instructions doing assorted simplifications and * transformations. */ for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; struct prog_instruction *out; /* Check for INPUT values, emit INTERP instructions where * necessary: */ switch (inst->Opcode) { case OPCODE_SWZ: out = emit_insn(c, inst); out->Opcode = OPCODE_MOV; break; case OPCODE_ABS: out = emit_insn(c, inst); out->Opcode = OPCODE_MOV; out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; break; case OPCODE_SUB: out = emit_insn(c, inst); out->Opcode = OPCODE_ADD; out->SrcReg[1].Negate ^= NEGATE_XYZW; break; case OPCODE_SCS: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask &= WRITEMASK_XY; break; case OPCODE_DST: precalc_dst(c, inst); break; case OPCODE_LIT: precalc_lit(c, inst); break; case OPCODE_TEX: precalc_tex(c, inst); break; case OPCODE_TXP: precalc_txp(c, inst); break; case OPCODE_TXB: out = emit_insn(c, inst); out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); break; case OPCODE_XPD: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask &= WRITEMASK_XYZ; break; case OPCODE_KIL: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask = 0; break; case OPCODE_END: emit_render_target_writes(c); break; case OPCODE_PRINT: break; default: if (brw_wm_is_scalar_result(inst->Opcode)) emit_scalar_insn(c, inst); else emit_insn(c, inst); break; } } if (INTEL_DEBUG & DEBUG_WM) { printf("pass_fp:\n"); print_insns( c->prog_instructions, c->nr_fp_insns ); printf("\n"); } }
/** * Some TEX instructions require extra code, cube map coordinate * normalization, or coordinate scaling for RECT textures, etc. * This function emits those extra instructions and the TEX * instruction itself. */ static void precalc_tex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { struct prog_src_register coord; struct prog_dst_register tmpcoord; const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; assert(unit < BRW_MAX_TEX_UNIT); if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { struct prog_instruction *out; struct prog_dst_register tmp0 = get_temp(c); struct prog_src_register tmp0src = src_reg_from_dst(tmp0); struct prog_dst_register tmp1 = get_temp(c); struct prog_src_register tmp1src = src_reg_from_dst(tmp1); struct prog_src_register src0 = inst->SrcReg[0]; /* find longest component of coord vector and normalize it */ tmpcoord = get_temp(c); coord = src_reg_from_dst(tmpcoord); /* tmpcoord = src0 (i.e.: coord = src0) */ out = emit_op(c, OPCODE_MOV, tmpcoord, 0, src0, src_undef(), src_undef()); out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; /* tmp0 = MAX(coord.X, coord.Y) */ emit_op(c, OPCODE_MAX, tmp0, 0, src_swizzle1(coord, X), src_swizzle1(coord, Y), src_undef()); /* tmp1 = MAX(tmp0, coord.Z) */ emit_op(c, OPCODE_MAX, tmp1, 0, tmp0src, src_swizzle1(coord, Z), src_undef()); /* tmp0 = 1 / tmp1 */ emit_op(c, OPCODE_RCP, dst_mask(tmp0, WRITEMASK_X), 0, tmp1src, src_undef(), src_undef()); /* tmpCoord = src0 * tmp0 */ emit_op(c, OPCODE_MUL, tmpcoord, 0, src0, src_swizzle1(tmp0src, SWIZZLE_X), src_undef()); release_temp(c, tmp0); release_temp(c, tmp1); } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { struct prog_src_register scale = search_or_add_param5( c, STATE_INTERNAL, STATE_TEXRECT_SCALE, unit, 0,0 ); tmpcoord = get_temp(c); /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } */ emit_op(c, OPCODE_MUL, tmpcoord, 0, inst->SrcReg[0], src_swizzle(scale, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_ONE), src_undef()); coord = src_reg_from_dst(tmpcoord); } else { coord = inst->SrcReg[0]; } /* Need to emit YUV texture conversions by hand. Probably need to * do this here - the alternative is in brw_wm_emit.c, but the * conversion requires allocating a temporary variable which we * don't have the facility to do that late in the compilation. */ if (c->key.yuvtex_mask & (1 << unit)) { /* convert ycbcr to RGBA */ GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit); /* CONST C0 = { -.5, -.0625, -.5, 1.164 } CONST C1 = { 1.596, -0.813, 2.018, -.391 } UYV = TEX ... UYV.xyz = ADD UYV, C0 UYV.y = MUL UYV.y, C0.w if (UV swaped) RGB.xyz = MAD UYV.zzx, C1, UYV.y else RGB.xyz = MAD UYV.xxz, C1, UYV.y RGB.y = MAD UYV.z, C1.w, RGB.y */ struct prog_dst_register dst = inst->DstReg; struct prog_dst_register tmp = get_temp(c); struct prog_src_register tmpsrc = src_reg_from_dst(tmp); struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); /* tmp = TEX ... */ emit_tex_op(c, OPCODE_TEX, tmp, inst->SaturateMode, unit, inst->TexSrcTarget, inst->TexShadow, coord, src_undef(), src_undef()); /* tmp.xyz = ADD TMP, C0 */ emit_op(c, OPCODE_ADD, dst_mask(tmp, WRITEMASK_XYZ), 0, tmpsrc, C0, src_undef()); /* YUV.y = MUL YUV.y, C0.w */ emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_Y), 0, tmpsrc, src_swizzle1(C0, W), src_undef()); /* * if (UV swaped) * RGB.xyz = MAD YUV.zzx, C1, YUV.y * else * RGB.xyz = MAD YUV.xxz, C1, YUV.y */ emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_XYZ), 0, swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), C1, src_swizzle1(tmpsrc, Y)); /* RGB.y = MAD YUV.z, C1.w, RGB.y */ emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_Y), 0, src_swizzle1(tmpsrc, Z), src_swizzle1(C1, W), src_swizzle1(src_reg_from_dst(dst), Y)); release_temp(c, tmp); } else { /* ordinary RGBA tex instruction */ emit_tex_op(c, OPCODE_TEX, inst->DstReg, inst->SaturateMode, unit, inst->TexSrcTarget, inst->TexShadow, coord, src_undef(), src_undef()); } /* For GL_EXT_texture_swizzle: */ if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { /* swizzle the result of the TEX instruction */ struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); emit_op(c, OPCODE_SWZ, inst->DstReg, SATURATE_OFF, /* saturate already done above */ src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), src_undef(), src_undef()); } if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) release_temp(c, tmpcoord); }
/** * Some TEX instructions require extra code, cube map coordinate * normalization, or coordinate scaling for RECT textures, etc. * This function emits those extra instructions and the TEX * instruction itself. */ static void precalc_tex( struct brw_wm_compile *c, struct brw_fp_dst dst, unsigned target, unsigned unit, struct brw_fp_src src0, struct brw_fp_src sampler ) { struct brw_fp_src coord; struct brw_fp_dst tmp = dst_undef(); assert(unit < BRW_MAX_TEX_UNIT); /* Cubemap: find longest component of coord vector and normalize * it. */ if (target == TGSI_TEXTURE_CUBE) { struct brw_fp_src tmpsrc; tmp = get_temp(c); tmpsrc = src_reg_from_dst(tmp); /* tmp = abs(src0) */ emit_op1(c, TGSI_OPCODE_MOV, tmp, src_abs(src0)); /* tmp.X = MAX(tmp.X, tmp.Y) */ emit_op2(c, TGSI_OPCODE_MAX, dst_mask(tmp, BRW_WRITEMASK_X), src_scalar(tmpsrc, X), src_scalar(tmpsrc, Y)); /* tmp.X = MAX(tmp.X, tmp.Z) */ emit_op2(c, TGSI_OPCODE_MAX, dst_mask(tmp, BRW_WRITEMASK_X), tmpsrc, src_scalar(tmpsrc, Z)); /* tmp.X = 1 / tmp.X */ emit_op1(c, TGSI_OPCODE_RCP, dst_mask(tmp, BRW_WRITEMASK_X), tmpsrc); /* tmp = src0 * tmp.xxxx */ emit_op2(c, TGSI_OPCODE_MUL, tmp, src0, src_scalar(tmpsrc, X)); coord = tmpsrc; } else if (target == TGSI_TEXTURE_RECT || target == TGSI_TEXTURE_SHADOWRECT) { /* XXX: need a mechanism for internally generated constants. */ coord = src0; } else { coord = src0; } /* Need to emit YUV texture conversions by hand. Probably need to * do this here - the alternative is in brw_wm_emit.c, but the * conversion requires allocating a temporary variable which we * don't have the facility to do that late in the compilation. */ if (c->key.yuvtex_mask & (1 << unit)) { /* convert ycbcr to RGBA */ GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit); struct brw_fp_dst tmp = get_temp(c); struct brw_fp_src tmpsrc = src_reg_from_dst(tmp); struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 ); struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 ); /* tmp = TEX ... */ emit_tex_op(c, TGSI_OPCODE_TEX, dst_saturate(tmp, dst.saturate), unit, target, sampler.index, coord, src_undef(), src_undef()); /* tmp.xyz = ADD TMP, C0 */ emit_op2(c, TGSI_OPCODE_ADD, dst_mask(tmp, BRW_WRITEMASK_XYZ), tmpsrc, C0); /* YUV.y = MUL YUV.y, C0.w */ emit_op2(c, TGSI_OPCODE_MUL, dst_mask(tmp, BRW_WRITEMASK_Y), tmpsrc, src_scalar(C0, W)); /* * if (UV swaped) * RGB.xyz = MAD YUV.zzx, C1, YUV.y * else * RGB.xyz = MAD YUV.xxz, C1, YUV.y */ emit_op3(c, TGSI_OPCODE_MAD, dst_mask(dst, BRW_WRITEMASK_XYZ), ( swap_uv ? src_swizzle(tmpsrc, Z,Z,X,X) : src_swizzle(tmpsrc, X,X,Z,Z)), C1, src_scalar(tmpsrc, Y)); /* RGB.y = MAD YUV.z, C1.w, RGB.y */ emit_op3(c, TGSI_OPCODE_MAD, dst_mask(dst, BRW_WRITEMASK_Y), src_scalar(tmpsrc, Z), src_scalar(C1, W), src_scalar(src_reg_from_dst(dst), Y)); release_temp(c, tmp); } else { /* ordinary RGBA tex instruction */ emit_tex_op(c, TGSI_OPCODE_TEX, dst, unit, target, sampler.index, coord, src_undef(), src_undef()); } /* XXX: add GL_EXT_texture_swizzle support to gallium -- by * generating shader variants in mesa state tracker. */ /* Release this temp if we ended up allocating it: */ if (!dst_is_undef(tmp)) release_temp(c, tmp); }
static void emit_interp( struct brw_wm_compile *c, GLuint idx ) { struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); struct prog_src_register deltas = get_delta_xy(c); struct prog_src_register arg2; GLuint opcode; /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those * which have not: */ switch (idx) { case FRAG_ATTRIB_WPOS: opcode = WM_LINTERP; arg2 = src_undef(); /* Have to treat wpos.xy specially: */ emit_op(c, WM_WPOSXY, dst_mask(dst, WRITEMASK_XY), 0, 0, 0, get_pixel_xy(c), src_undef(), src_undef()); dst = dst_mask(dst, WRITEMASK_ZW); /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw */ emit_op(c, WM_LINTERP, dst, 0, 0, 0, interp, deltas, arg2); break; case FRAG_ATTRIB_COL0: case FRAG_ATTRIB_COL1: if (c->key.flat_shade) { emit_op(c, WM_CINTERP, dst, 0, 0, 0, interp, src_undef(), src_undef()); } else { emit_op(c, WM_LINTERP, dst, 0, 0, 0, interp, deltas, src_undef()); } break; default: emit_op(c, WM_PINTERP, dst, 0, 0, 0, interp, deltas, get_pixel_w(c)); break; } c->fp_interp_emitted |= 1<<idx; }