static void transform_r300_vertex_SSG(struct radeon_compiler* c, struct rc_instruction* inst) { /* result = sign(x) * * SLT tmp0, 0, x; * SLT tmp1, x, 0; * ADD result, tmp0, -tmp1; */ struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); unsigned tmp1; /* 0 < x */ dst0 = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_SLT, 0, dst0, builtin_zero, inst->U.I.SrcReg[0]); /* x < 0 */ tmp1 = rc_find_free_temporary(c); emit2(c, inst->Prev, RC_OPCODE_SLT, 0, dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), inst->U.I.SrcReg[0], builtin_zero); /* Either both are zero, or one of them is one and the other is zero. */ /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); }
static void transform_r300_vertex_CMP(struct radeon_compiler* c, struct rc_instruction* inst) { /* There is no decent CMP available, so let's rig one up. * CMP is defined as dst = src0 < 0.0 ? src1 : src2 * The following sequence consumes zero to two temps and two extra slots * (the second temp and the second slot is consumed by transform_LRP), * but should be equivalent: * * SLT tmp0, src0, 0.0 * LRP dst, tmp0, src1, src2 * * Yes, I know, I'm a mad scientist. ~ C. & M. */ struct rc_dst_register dst = try_to_reuse_dst(c, inst); /* SLT tmp0, src0, 0.0 */ emit2(c, inst->Prev, RC_OPCODE_SLT, 0, dst, inst->U.I.SrcReg[0], builtin_zero); /* LRP dst, tmp0, src1, src2 */ transform_LRP(c, emit3(c, inst->Prev, RC_OPCODE_LRP, 0, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); rc_remove_instruction(inst); }
static void transform_FLR(struct radeon_compiler* c, struct rc_instruction* inst) { struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); }
static void transform_SNE(struct radeon_compiler* c, struct rc_instruction* inst) { struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); rc_remove_instruction(inst); }
static void transform_SLE(struct radeon_compiler* c, struct rc_instruction* inst) { struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); }
static void transform_POW(struct radeon_compiler* c, struct rc_instruction* inst) { struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); tempdst.WriteMask = RC_MASK_W; tempsrc.Swizzle = RC_SWIZZLE_WWWW; emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0])); emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1])); emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); rc_remove_instruction(inst); }
static void transform_LRP(struct radeon_compiler* c, struct rc_instruction* inst) { struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); rc_remove_instruction(inst); }
static void transform_XPD(struct radeon_compiler* c, struct rc_instruction* inst) { struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); }
static void transform_CLAMP(struct radeon_compiler *c, struct rc_instruction *inst) { /* CLAMP dst, src, min, max * into: * MIN tmp, src, max * MAX dst, tmp, min */ struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); rc_remove_instruction(inst); }
static void transform_TRUNC(struct radeon_compiler* c, struct rc_instruction* inst) { /* Definition of trunc: * trunc(x) = (abs(x) - fract(abs(x))) * sgn(x) * * The multiplication by sgn(x) can be simplified using CMP: * y * sgn(x) = (x < 0 ? -y : y) */ struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, absolute(inst->U.I.SrcReg[0])); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, absolute(inst->U.I.SrcReg[0]), negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index)); rc_remove_instruction(inst); }
static void transform_CEIL(struct radeon_compiler* c, struct rc_instruction* inst) { /* Assuming: * ceil(x) = -floor(-x) * * After inlining floor: * ceil(x) = -(-x-frac(-x)) * * After simplification: * ceil(x) = x+frac(-x) */ struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); rc_remove_instruction(inst); }
static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { struct rc_dst_register dst = try_to_reuse_dst(c, inst); unsigned constant_swizzle; int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, 0.0000000000000000001, &constant_swizzle); /* MOV dst, src */ dst.WriteMask = RC_MASK_XYZW; emit1(c, inst->Prev, RC_OPCODE_MOV, 0, dst, inst->U.I.SrcReg[0]); /* MAX dst.y, src, 0.00...001 */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, dstregtmpmask(dst.Index, RC_MASK_Y), srcreg(RC_FILE_TEMPORARY, dst.Index), srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); }