static void ei_mad(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsigned int * inst) { /* Remarks about hardware limitations of MAD * (please preserve this comment, as this information is _NOT_ * in the documentation provided by AMD). * * As described in the documentation, MAD with three unique temporary * source registers requires the use of the macro version. * * However (and this is not mentioned in the documentation), apparently * the macro version is _NOT_ a full superset of the normal version. * In particular, the macro version does not always work when relative * addressing is used in the source operands. * * This limitation caused incorrect rendering in Sauerbraten's OpenGL * assembly shader path when using medium quality animations * (i.e. animations with matrix blending instead of quaternion blending). * * Unfortunately, I (nha) have been unable to extract a Piglit regression * test for this issue - for some reason, it is possible to have vertex * programs whose prefix is *exactly* the same as the prefix of the * offending program in Sauerbraten up to the offending instruction * without causing any trouble. * * Bottom line: Only use the macro version only when really necessary; * according to AMD docs, this should improve performance by one clock * as a nice side bonus. */ if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && vpi->SrcReg[1].File == RC_FILE_TEMPORARY && vpi->SrcReg[2].File == RC_FILE_TEMPORARY && vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, 0, 1, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); } else { inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, 0, 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); } inst[1] = t_src(vp, &vpi->SrcReg[0]); inst[2] = t_src(vp, &vpi->SrcReg[1]); inst[3] = t_src(vp, &vpi->SrcReg[2]); }
static void ei_lit(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsigned int * inst) { //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, 1, 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); /* NOTE: Users swizzling might not work. */ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_src_class(vpi->SrcReg[0].File), vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X t_src_class(vpi->SrcReg[0].File), vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W t_src_class(vpi->SrcReg[0].File), vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); }
static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler, unsigned int hw_opcode, int is_math) { return PVS_OP_DST_OPERAND(hw_opcode, is_math, 0, compiler->PredicateIndex, RC_MASK_W, t_dst_class(RC_FILE_TEMPORARY)); }
static void ei_pow(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, 1, 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); inst[2] = __CONST(0, RC_SWIZZLE_ZERO); inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); }
static void ei_vector2(struct r300_vertex_program_code *vp, unsigned int hw_opcode, struct rc_sub_instruction *vpi, unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, 0, 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src(vp, &vpi->SrcReg[0]); inst[2] = t_src(vp, &vpi->SrcReg[1]); inst[3] = __CONST(1, RC_SWIZZLE_ZERO); }
static void ei_math1(struct r300_vertex_program_code *vp, unsigned int hw_opcode, struct rc_sub_instruction *vpi, unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, 1, 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File), vpi->SaturateMode == RC_SATURATE_ZERO_ONE); inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); inst[2] = __CONST(0, RC_SWIZZLE_ZERO); inst[3] = __CONST(0, RC_SWIZZLE_ZERO); }