/* dst = ROUND(src) : * add = src + .5 * frac = FRC(add) * dst = add - frac * * According to the GLSL spec, the implementor can decide which way to round * when the fraction is .5. We round down for .5. * */ static void transform_ROUND(struct radeon_compiler* c, struct rc_instruction* inst) { unsigned int mask = inst->U.I.DstReg.WriteMask; unsigned int frac_index, add_index; struct rc_dst_register frac_dst, add_dst; struct rc_src_register frac_src, add_src; /* add = src + .5 */ add_index = rc_find_free_temporary(c); add_dst = dstregtmpmask(add_index, mask); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0], builtin_half); add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index); /* frac = FRC(add) */ frac_index = rc_find_free_temporary(c); frac_dst = dstregtmpmask(frac_index, mask); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src); frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index); /* dst = add - frac */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, add_src, negate(frac_src)); rc_remove_instruction(inst); }
static void lower_texture_rect(struct r300_fragment_program_compiler *compiler, struct rc_instruction *inst) { struct rc_instruction *inst_rect; unsigned temp = rc_find_free_temporary(&compiler->Base); if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) { inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev); inst_rect->U.I.Opcode = RC_OPCODE_MUL; inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_rect->U.I.DstReg.Index = temp; inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT; inst_rect->U.I.SrcReg[1].Index = rc_constants_add_state(&compiler->Base.Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit); reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; inst->U.I.TexSrcTarget = RC_TEXTURE_2D; } }
static void projective_divide(struct r300_fragment_program_compiler *compiler, struct rc_instruction *inst) { struct rc_instruction *inst_mul, *inst_rcp; unsigned temp = rc_find_free_temporary(&compiler->Base); inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->U.I.DstReg.Index = temp; inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; /* Because the input can be arbitrarily swizzled, * read the component mapped to W. */ inst_rcp->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = temp; inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mul->U.I.SrcReg[1].Index = temp; inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.Opcode = RC_OPCODE_TEX; inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; }
static void transform_r300_vertex_SSG(struct radeon_compiler* c, struct rc_instruction* inst) { /* result = sign(x) * * SLT tmp0, 0, x; * SLT tmp1, x, 0; * ADD result, tmp0, -tmp1; */ struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); unsigned tmp1; /* 0 < x */ dst0 = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_SLT, 0, dst0, builtin_zero, inst->U.I.SrcReg[0]); /* x < 0 */ tmp1 = rc_find_free_temporary(c); emit2(c, inst->Prev, RC_OPCODE_SLT, 0, dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), inst->U.I.SrcReg[0], builtin_zero); /* Either both are zero, or one of them is one and the other is zero. */ /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); }
static void transform_r300_vertex_SNE(struct radeon_compiler *c, struct rc_instruction *inst) { /* x != y <==> x < y || y < x */ int tmp = rc_find_free_temporary(c); /* x < y */ emit2(c, inst->Prev, RC_OPCODE_SLT, 0, dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), inst->U.I.SrcReg[0], inst->U.I.SrcReg[1]); /* y < x */ emit2(c, inst->Prev, RC_OPCODE_SLT, 0, inst->U.I.DstReg, inst->U.I.SrcReg[1], inst->U.I.SrcReg[0]); /* x || y = max(x, y) */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, tmp), srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); rc_remove_instruction(inst); }
static void transform_r300_vertex_SEQ(struct radeon_compiler *c, struct rc_instruction *inst) { /* x = y <==> x >= y && y >= x */ int tmp = rc_find_free_temporary(c); /* x <= y */ emit2(c, inst->Prev, RC_OPCODE_SGE, 0, dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), inst->U.I.SrcReg[0], inst->U.I.SrcReg[1]); /* y <= x */ emit2(c, inst->Prev, RC_OPCODE_SGE, 0, inst->U.I.DstReg, inst->U.I.SrcReg[1], inst->U.I.SrcReg[0]); /* x && y = x * y */ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, tmp), srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); rc_remove_instruction(inst); }
/** * R3xx-R4xx vertex engine does not support the Absolute source operand modifier * and the Saturate opcode modifier. Only Absolute is currently transformed. */ static int transform_nonnative_modifiers( struct radeon_compiler *c, struct rc_instruction *inst, void* unused) { const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; /* Transform ABS(a) to MAX(a, -a). */ for (i = 0; i < opcode->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].Abs) { struct rc_instruction *new_inst; unsigned temp; inst->U.I.SrcReg[i].Abs = 0; temp = rc_find_free_temporary(c); new_inst = rc_insert_new_instruction(c, inst->Prev); new_inst->U.I.Opcode = RC_OPCODE_MAX; new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY; new_inst->U.I.DstReg.Index = temp; new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i]; new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[i].Index = temp; inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; } } return 1; }
static void transform_negative_addressing(struct r300_vertex_program_compiler *c, struct rc_instruction *arl, struct rc_instruction *end, int min_offset) { struct rc_instruction *inst, *add; unsigned const_swizzle; /* Transform ARL */ add = rc_insert_new_instruction(&c->Base, arl->Prev); add->U.I.Opcode = RC_OPCODE_ADD; add->U.I.DstReg.File = RC_FILE_TEMPORARY; add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base); add->U.I.DstReg.WriteMask = RC_MASK_X; add->U.I.SrcReg[0] = arl->U.I.SrcReg[0]; add->U.I.SrcReg[1].File = RC_FILE_CONSTANT; add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants, min_offset, &const_swizzle); add->U.I.SrcReg[1].Swizzle = const_swizzle; arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index; arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX; /* Rewrite offsets up to and excluding inst. */ for (inst = arl->Next; inst != end; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); for (unsigned i = 0; i < opcode->NumSrcRegs; i++) if (inst->U.I.SrcReg[i].RelAddr) inst->U.I.SrcReg[i].Index -= min_offset; } }
/** * Transform the trigonometric functions COS, SIN, and SCS * so that the input to COS and SIN is always in the range [-PI, PI]. * SCS is replaced by one COS and one SIN instruction. */ int r300_transform_trig_scale_vertex(struct radeon_compiler *c, struct rc_instruction *inst, void *unused) { static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; unsigned int temp; unsigned int constant; if (inst->U.I.Opcode != RC_OPCODE_COS && inst->U.I.Opcode != RC_OPCODE_SIN && inst->U.I.Opcode != RC_OPCODE_SCS) return 0; /* Repeat x in the range [-PI, PI]: * * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI */ temp = rc_find_free_temporary(c); constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), swizzle_xxxx(inst->U.I.SrcReg[0]), srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX), srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY)); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), srcreg(RC_FILE_TEMPORARY, temp)); emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), srcreg(RC_FILE_TEMPORARY, temp), srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ), srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW)); r300_transform_SIN_COS_SCS(c, inst, temp); return 1; }
/** * Transform the trigonometric functions COS, SIN, and SCS * to include pre-scaling by 1/(2*PI) and taking the fractional * part, so that the input to COS and SIN is always in the range [0,1). * SCS is replaced by one COS and one SIN instruction. * * @warning This transformation implicitly changes the semantics of SIN and COS! */ int radeonTransformTrigScale(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { static const float RCP_2PI = 0.15915494309189535; unsigned int temp; unsigned int constant; unsigned int constant_swizzle; if (inst->U.I.Opcode != RC_OPCODE_COS && inst->U.I.Opcode != RC_OPCODE_SIN && inst->U.I.Opcode != RC_OPCODE_SCS) return 0; temp = rc_find_free_temporary(c); constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), swizzle_xxxx(inst->U.I.SrcReg[0]), srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), srcreg(RC_FILE_TEMPORARY, temp)); r300_transform_SIN_COS_SCS(c, inst, temp); return 1; }
/** * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. * Gallium and OpenGL define it the other way around. * * So let's just negate FACE at the beginning of the shader and rewrite the rest * of the shader to read from the newly allocated temporary. */ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) { unsigned tempregi = rc_find_free_temporary(c); struct rc_instruction *inst_add; struct rc_instruction *inst; /* perspective divide */ inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); inst_add->U.I.Opcode = RC_OPCODE_ADD; inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_add->U.I.DstReg.Index = tempregi; inst_add->U.I.DstReg.WriteMask = RC_MASK_X; inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; inst_add->U.I.SrcReg[1].Index = face; inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; for(i = 0; i < opcode->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == face) { inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[i].Index = tempregi; } } } }
static void rewrite_source(struct radeon_compiler * c, struct rc_instruction * inst, unsigned src) { struct rc_swizzle_split split; unsigned int tempreg = rc_find_free_temporary(c); unsigned int usemask; usemask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) usemask |= 1 << chan; } c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); unsigned int phase_refmask; unsigned int masked_negate; mov->U.I.Opcode = RC_OPCODE_MOV; mov->U.I.DstReg.File = RC_FILE_TEMPORARY; mov->U.I.DstReg.Index = tempreg; mov->U.I.DstReg.WriteMask = split.Phase[phase]; mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; mov->U.I.PreSub = inst->U.I.PreSub; phase_refmask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { if (!GET_BIT(split.Phase[phase], chan)) SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); else phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); } phase_refmask &= RC_MASK_XYZW; masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; if (masked_negate == 0) mov->U.I.SrcReg[0].Negate = 0; else if (masked_negate == split.Phase[phase]) mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; } inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[src].Index = tempreg; inst->U.I.SrcReg[src].Swizzle = 0; inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; inst->U.I.SrcReg[src].Abs = 0; for(unsigned int chan = 0; chan < 4; ++chan) { SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); } }
static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, struct rc_instruction *inst) { unsigned tmp; if (is_dst_safe_to_reuse(inst)) tmp = inst->U.I.DstReg.Index; else tmp = rc_find_free_temporary(c); return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); }
/** * Vertex engine cannot read two inputs or two constants at the same time. * Introduce intermediate MOVs to temporary registers to account for this. */ static int transform_source_conflicts( struct radeon_compiler *c, struct rc_instruction* inst, void* unused) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->NumSrcRegs == 3) { if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = tmpreg; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; reset_srcreg(&inst->U.I.SrcReg[2]); inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[2].Index = tmpreg; } } if (opcode->NumSrcRegs >= 2) { if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = tmpreg; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; reset_srcreg(&inst->U.I.SrcReg[1]); inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[1].Index = tmpreg; } } return 1; }
/** * Approximate sin(x), where x is clamped to (-pi/2, pi/2). * * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } * MAD tmp.x, tmp.y, |src|, tmp.x * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x * MAD dest, tmp.y, weight, tmp.x */ static void sin_approx( struct radeon_compiler* c, struct rc_instruction * inst, struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) { unsigned int tempreg = rc_find_free_temporary(c); emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), swizzle_xxxx(src), srcreg(RC_FILE_CONSTANT, constants[0])); emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), absolute(swizzle_xxxx(src)), swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))), negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)))); emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst, swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])), swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); }
static void scale_texcoords(struct r300_fragment_program_compiler *compiler, struct rc_instruction *inst, unsigned state_constant) { struct rc_instruction *inst_mov; unsigned temp = rc_find_free_temporary(&compiler->Base); inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MUL; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT; inst_mov->U.I.SrcReg[1].Index = rc_constants_add_state(&compiler->Base.Program.Constants, state_constant, inst->U.I.TexSrcUnit); reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; }
/** * Rewrite the program such that a given output is duplicated. */ void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) { unsigned tempreg = rc_find_free_temporary(c); struct rc_instruction * inst; for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->HasDstReg) { if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = tempreg; } } } inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = output; inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = tempreg; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = dup_output; inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = tempreg; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; c->Program.OutputsWritten |= 1 << dup_output; }
/** * Transform TEX, TXP, TXB, and KIL instructions in the following ways: * - implement texture compare (shadow extensions) * - extract non-native source / destination operands * - premultiply texture coordinates for RECT * - extract operand swizzles * - introduce a temporary register when write masks are needed */ int radeonTransformTEX( struct radeon_compiler * c, struct rc_instruction * inst, void* data) { struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; if (inst->U.I.Opcode != RC_OPCODE_TEX && inst->U.I.Opcode != RC_OPCODE_TXB && inst->U.I.Opcode != RC_OPCODE_TXP && inst->U.I.Opcode != RC_OPCODE_KIL) return 0; /* ARB_shadow & EXT_shadow_funcs */ if (inst->U.I.Opcode != RC_OPCODE_KIL && ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { inst->U.I.Opcode = RC_OPCODE_MOV; if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { inst->U.I.SrcReg[0].File = RC_FILE_NONE; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; } else { inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); } return 1; } else { rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; struct rc_instruction * inst_rcp = NULL; struct rc_instruction * inst_mad; struct rc_instruction * inst_cmp; unsigned tmp_texsample; unsigned tmp_sum; unsigned tmp_recip_w = 0; int pass, fail, tex; /* Save the output register. */ struct rc_dst_register output_reg = inst->U.I.DstReg; /* Redirect TEX to a new temp. */ tmp_texsample = rc_find_free_temporary(c); inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = tmp_texsample; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; if (inst->U.I.Opcode == RC_OPCODE_TXP) { tmp_recip_w = rc_find_free_temporary(c); /* Compute 1/W. */ inst_rcp = rc_insert_new_instruction(c, inst); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->U.I.DstReg.Index = tmp_recip_w; inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_rcp->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); } /* Perspective-divide Z by W (if it's TXP) and add the texture sample (see below). */ tmp_sum = rc_find_free_temporary(c); inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = tmp_sum; inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mad->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); if (inst->U.I.Opcode == RC_OPCODE_TXP) { inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[1].Index = tmp_recip_w; inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; tex = 2; } else { inst_mad->U.I.Opcode = RC_OPCODE_ADD; tex = 1; } inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[tex].Index = tmp_texsample; inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle; /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */ if (comparefunc == RC_COMPARE_FUNC_EQUAL) { comparefunc = RC_COMPARE_FUNC_GEQUAL; } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { comparefunc = RC_COMPARE_FUNC_LESS; } /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and: * LESS: r < tex <=> -tex+r < 0 * GEQUAL: r >= tex <=> not (-tex+r < 0) * GREATER: r > tex <=> tex-r < 0 * LEQUAL: r <= tex <=> not ( tex-r < 0) * * This negates either r or tex: */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW; else inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; /* This negates the whole expresion: */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { pass = 1; fail = 2; } else { pass = 2; fail = 1; } inst_cmp = rc_insert_new_instruction(c, inst_mad); inst_cmp->U.I.Opcode = RC_OPCODE_CMP; inst_cmp->U.I.DstReg = output_reg; inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_cmp->U.I.SrcReg[0].Index = tmp_sum; inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w); } } /* Divide by W if needed. */ if (inst->U.I.Opcode == RC_OPCODE_TXP && (compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode == RC_WRAP_REPEAT || compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode == RC_WRAP_MIRRORED_REPEAT)) { projective_divide(compiler, inst); } /* Texture wrap modes don't work on NPOT textures or texrects. * * The game plan is simple. We have two flags, fake_npot and * non_normalized_coords, as well as a tex target. The RECT tex target * will make the emitted code use non-scaled texcoords. * * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and * mirroring are not. If we need to repeat, we do: * * MUL temp, texcoord, <scaling factor constant> * FRC temp, temp ; Discard integer portion of coords * * This gives us coords in [0, 1]. * * Mirroring is trickier. We're going to start out like repeat: * * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] * ; so scale to [0, 1] * FRC temp, temp ; Make the pattern repeat * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. * ; The pattern is backwards, so reverse it (1-x). * * This gives us coords in [0, 1]. * * ~ C & M. ;) */ if (inst->U.I.Opcode != RC_OPCODE_KIL && (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot || compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) { rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; /* R300 cannot sample from rectangles. */ if (!c->is_r500) { lower_texture_rect(compiler, inst); } if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot && wrapmode != RC_WRAP_NONE) { struct rc_instruction *inst_mov; unsigned temp = rc_find_free_temporary(c); /* For NPOT fallback, we need normalized coordinates anyway. */ if (c->is_r500) { lower_texture_rect(compiler, inst); } if (wrapmode == RC_WRAP_REPEAT) { /* Both instructions will be paired up. */ struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); inst_frc->U.I.Opcode = RC_OPCODE_FRC; inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_frc->U.I.DstReg.Index = temp; inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { /* * Function: * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) * * Code: * MUL temp, src0, 0.5 * FRC temp, temp * MAD temp, temp, 2, -1 * ADD temp, 1, -abs(temp) */ struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; unsigned two, two_swizzle; inst_mul = rc_insert_new_instruction(c, inst->Prev); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = temp; inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; inst_frc = rc_insert_new_instruction(c, inst->Prev); inst_frc->U.I.Opcode = RC_OPCODE_FRC; inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_frc->U.I.DstReg.Index = temp; inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_frc->U.I.SrcReg[0].Index = temp; inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); inst_mad = rc_insert_new_instruction(c, inst->Prev); inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = temp; inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[0].Index = temp; inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; inst_mad->U.I.SrcReg[1].Index = two; inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; inst_add = rc_insert_new_instruction(c, inst->Prev); inst_add->U.I.Opcode = RC_OPCODE_ADD; inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_add->U.I.DstReg.Index = temp; inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_add->U.I.SrcReg[1].Index = temp; inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; inst_add->U.I.SrcReg[1].Abs = 1; inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { /* * Mirrored clamp modes are bloody simple, we just use abs * to mirror [0, 1] into [-1, 0]. This works for * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. */ struct rc_instruction *inst_mov; inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mov->U.I.SrcReg[0].Abs = 1; } /* Preserve W for TXP/TXB. */ inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; } } /* Cannot write texture to output registers (all chips) or with masks (non-r500) */ if (inst->U.I.Opcode != RC_OPCODE_KIL && (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg = inst->U.I.DstReg; inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; } return 1; }
/** * Introduce standard code fragment to deal with fragment.position. */ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, int full_vtransform) { unsigned tempregi = rc_find_free_temporary(c); struct rc_instruction * inst_rcp; struct rc_instruction * inst_mul; struct rc_instruction * inst_mad; struct rc_instruction * inst; c->Program.InputsRead &= ~(1 << wpos); c->Program.InputsRead |= 1 << new_input; /* perspective divide */ inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->U.I.DstReg.Index = tempregi; inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; inst_rcp->U.I.SrcReg[0].Index = new_input; inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; inst_mul = rc_insert_new_instruction(c, inst_rcp); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = tempregi; inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; inst_mul->U.I.SrcReg[0].Index = new_input; inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mul->U.I.SrcReg[1].Index = tempregi; inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; /* viewport transformation */ inst_mad = rc_insert_new_instruction(c, inst_mul); inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = tempregi; inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[0].Index = tempregi; inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; if (full_vtransform) { inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); } else { inst_mad->U.I.SrcReg[1].Index = inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); } for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; for(i = 0; i < opcode->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == wpos) { inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[i].Index = tempregi; } } } }
/** * Definition of LIT (from ARB_fragment_program): * * tmp = VectorLoad(op0); * if (tmp.x < 0) tmp.x = 0; * if (tmp.y < 0) tmp.y = 0; * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; * result.x = 1.0; * result.y = tmp.x; * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; * result.w = 1.0; * * The longest path of computation is the one leading to result.z, * consisting of 5 operations. This implementation of LIT takes * 5 slots, if the subsequent optimization passes are clever enough * to pair instructions correctly. */ static void transform_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { unsigned int constant; unsigned int constant_swizzle; unsigned int temp; struct rc_src_register srctemp; constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { struct rc_instruction * inst_mov; inst_mov = emit1(c, inst, RC_OPCODE_MOV, 0, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } temp = inst->U.I.DstReg.Index; srctemp = srcreg(RC_FILE_TEMPORARY, temp); /* tmp.x = max(0.0, Src.x); */ /* tmp.y = max(0.0, Src.y); */ /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, dstregtmpmask(temp, RC_MASK_XYW), inst->U.I.SrcReg[0], swizzle(srcreg(RC_FILE_CONSTANT, constant), RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dstregtmpmask(temp, RC_MASK_Z), swizzle_wwww(srctemp), negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); /* tmp.w = Pow(tmp.y, tmp.w) */ emit1(c, inst->Prev, RC_OPCODE_LG2, 0, dstregtmpmask(temp, RC_MASK_W), swizzle_yyyy(srctemp)); emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), swizzle_wwww(srctemp), swizzle_zzzz(srctemp)); emit1(c, inst->Prev, RC_OPCODE_EX2, 0, dstregtmpmask(temp, RC_MASK_W), swizzle_wwww(srctemp)); /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, dstregtmpmask(temp, RC_MASK_Z), negate(swizzle_xxxx(srctemp)), swizzle_wwww(srctemp), builtin_zero); /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, dstregtmpmask(temp, RC_MASK_XYW), swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); rc_remove_instruction(inst); }
/** * Transform TEX, TXP, TXB, and KIL instructions in the following ways: * - implement texture compare (shadow extensions) * - extract non-native source / destination operands * - premultiply texture coordinates for RECT * - extract operand swizzles * - introduce a temporary register when write masks are needed */ int radeonTransformTEX( struct radeon_compiler * c, struct rc_instruction * inst, void* data) { struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords; if (inst->U.I.Opcode != RC_OPCODE_TEX && inst->U.I.Opcode != RC_OPCODE_TXB && inst->U.I.Opcode != RC_OPCODE_TXP && inst->U.I.Opcode != RC_OPCODE_TXD && inst->U.I.Opcode != RC_OPCODE_TXL && inst->U.I.Opcode != RC_OPCODE_KIL) return 0; /* ARB_shadow & EXT_shadow_funcs */ if (inst->U.I.Opcode != RC_OPCODE_KIL && ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { inst->U.I.Opcode = RC_OPCODE_MOV; if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); } else { inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); } return 1; } else { struct rc_instruction * inst_rcp = NULL; struct rc_instruction *inst_mul, *inst_add, *inst_cmp; unsigned tmp_texsample; unsigned tmp_sum; int pass, fail; /* Save the output register. */ struct rc_dst_register output_reg = inst->U.I.DstReg; unsigned saturate_mode = inst->U.I.SaturateMode; /* Redirect TEX to a new temp. */ tmp_texsample = rc_find_free_temporary(c); inst->U.I.SaturateMode = 0; inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = tmp_texsample; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; tmp_sum = rc_find_free_temporary(c); if (inst->U.I.Opcode == RC_OPCODE_TXP) { /* Compute 1/W. */ inst_rcp = rc_insert_new_instruction(c, inst); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->U.I.DstReg.Index = tmp_sum; inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_rcp->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); } /* Divide Z by W (if it's TXP) and saturate. */ inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = tmp_sum; inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mul->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); if (inst->U.I.Opcode == RC_OPCODE_TXP) { inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mul->U.I.SrcReg[1].Index = tmp_sum; inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; } /* Add the depth texture value. */ inst_add = rc_insert_new_instruction(c, inst_mul); inst_add->U.I.Opcode = RC_OPCODE_ADD; inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_add->U.I.DstReg.Index = tmp_sum; inst_add->U.I.DstReg.WriteMask = RC_MASK_W; inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_add->U.I.SrcReg[0].Index = tmp_sum; inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_add->U.I.SrcReg[1].Index = tmp_texsample; inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; /* Note that SrcReg[0] is r, SrcReg[1] is tex and: * LESS: r < tex <=> -tex+r < 0 * GEQUAL: r >= tex <=> not (-tex+r < 0) * GREATER: r > tex <=> tex-r < 0 * LEQUAL: r <= tex <=> not ( tex-r < 0) * EQUAL: GEQUAL * NOTEQUAL:LESS */ /* This negates either r or tex: */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; else inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; /* This negates the whole expresion: */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { pass = 1; fail = 2; } else { pass = 2; fail = 1; } inst_cmp = rc_insert_new_instruction(c, inst_add); inst_cmp->U.I.Opcode = RC_OPCODE_CMP; inst_cmp->U.I.SaturateMode = saturate_mode; inst_cmp->U.I.DstReg = output_reg; inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_cmp->U.I.SrcReg[0].Index = tmp_sum; inst_cmp->U.I.SrcReg[0].Swizzle = combine_swizzles(RC_SWIZZLE_WWWW, compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle); inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); assert(tmp_texsample != tmp_sum); } } /* R300 cannot sample from rectangles and the wrap mode fallback needs * normalized coordinates anyway. */ if (inst->U.I.Opcode != RC_OPCODE_KIL && is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) { scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR); inst->U.I.TexSrcTarget = RC_TEXTURE_2D; } /* Divide by W if needed. */ if (inst->U.I.Opcode == RC_OPCODE_TXP && (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT || compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) { projective_divide(compiler, inst); } /* Texture wrap modes don't work on NPOT textures. * * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and * mirroring are not. If we need to repeat, we do: * * MUL temp, texcoord, <scaling factor constant> * FRC temp, temp ; Discard integer portion of coords * * This gives us coords in [0, 1]. * * Mirroring is trickier. We're going to start out like repeat: * * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] * ; so scale to [0, 1] * FRC temp, temp ; Make the pattern repeat * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. * ; The pattern is backwards, so reverse it (1-x). * * This gives us coords in [0, 1]. * * ~ C & M. ;) */ if (inst->U.I.Opcode != RC_OPCODE_KIL && wrapmode != RC_WRAP_NONE) { struct rc_instruction *inst_mov; unsigned temp = rc_find_free_temporary(c); if (wrapmode == RC_WRAP_REPEAT) { /* Both instructions will be paired up. */ struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); inst_frc->U.I.Opcode = RC_OPCODE_FRC; inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_frc->U.I.DstReg.Index = temp; inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { /* * Function: * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) * * Code: * MUL temp, src0, 0.5 * FRC temp, temp * MAD temp, temp, 2, -1 * ADD temp, 1, -abs(temp) */ struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; unsigned two, two_swizzle; inst_mul = rc_insert_new_instruction(c, inst->Prev); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = temp; inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; inst_frc = rc_insert_new_instruction(c, inst->Prev); inst_frc->U.I.Opcode = RC_OPCODE_FRC; inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_frc->U.I.DstReg.Index = temp; inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_frc->U.I.SrcReg[0].Index = temp; inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); inst_mad = rc_insert_new_instruction(c, inst->Prev); inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = temp; inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[0].Index = temp; inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; inst_mad->U.I.SrcReg[1].Index = two; inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; inst_add = rc_insert_new_instruction(c, inst->Prev); inst_add->U.I.Opcode = RC_OPCODE_ADD; inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_add->U.I.DstReg.Index = temp; inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_add->U.I.SrcReg[1].Index = temp; inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; inst_add->U.I.SrcReg[1].Abs = 1; inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { /* * Mirrored clamp modes are bloody simple, we just use abs * to mirror [0, 1] into [-1, 0]. This works for * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. */ struct rc_instruction *inst_mov; inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mov->U.I.SrcReg[0].Abs = 1; } /* Preserve W for TXP/TXB. */ inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; } /* NPOT -> POT conversion for 3D textures. */ if (inst->U.I.Opcode != RC_OPCODE_KIL && compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) { struct rc_instruction *inst_mov; unsigned temp = rc_find_free_temporary(c); /* Saturate XYZ. */ inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; /* Copy W. */ inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); } /* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM. * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2 */ if (inst->U.I.Opcode != RC_OPCODE_KIL && compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) { unsigned two, two_swizzle; struct rc_instruction *inst_mul, *inst_mad, *inst_cnd; two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle); inst_mul = rc_insert_new_instruction(c, inst); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */ inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */ inst_mul->U.I.SrcReg[1].Index = two; inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle; inst_mad = rc_insert_new_instruction(c, inst_mul); inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */ inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */ inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW; inst_cnd = rc_insert_new_instruction(c, inst_mad); inst_cnd->U.I.Opcode = RC_OPCODE_CND; inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode; inst_cnd->U.I.DstReg = inst->U.I.DstReg; inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index; inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ inst->U.I.SaturateMode = 0; inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot write texture to output registers or with saturate (all chips), * or with masks (non-r500). */ if (inst->U.I.Opcode != RC_OPCODE_KIL && (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.SaturateMode || (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; inst_mov->U.I.DstReg = inst->U.I.DstReg; inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); inst->U.I.SaturateMode = 0; inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; } return 1; }
/** * Translate the trigonometric functions COS, SIN, and SCS * using only the basic instructions * MOV, ADD, MUL, MAD, FRC */ int r300_transform_trig_simple(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { unsigned int constants[2]; unsigned int tempreg; if (inst->U.I.Opcode != RC_OPCODE_COS && inst->U.I.Opcode != RC_OPCODE_SIN && inst->U.I.Opcode != RC_OPCODE_SCS) return 0; tempreg = rc_find_free_temporary(c); sincos_constants(c, constants); if (inst->U.I.Opcode == RC_OPCODE_COS) { /* MAD tmp.x, src, 1/(2*PI), 0.75 */ /* FRC tmp.x, tmp.x */ /* MAD tmp.z, tmp.x, 2*PI, -PI */ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), swizzle_xxxx(inst->U.I.SrcReg[0]), swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1]))); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); sin_approx(c, inst, inst->U.I.DstReg, swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), constants); } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), swizzle_xxxx(inst->U.I.SrcReg[0]), swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1]))); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); sin_approx(c, inst, inst->U.I.DstReg, swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), constants); } else { struct rc_dst_register dst; emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), swizzle_xxxx(inst->U.I.SrcReg[0]), swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), srcreg(RC_FILE_TEMPORARY, tempreg)); emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), srcreg(RC_FILE_TEMPORARY, tempreg), swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); dst = inst->U.I.DstReg; dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; sin_approx(c, inst, dst, swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), constants); dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; sin_approx(c, inst, dst, swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), constants); } rc_remove_instruction(inst); return 1; }