コード例 #1
0
/* dst = ROUND(src) :
 *   add = src + .5
 *   frac = FRC(add)
 *   dst = add - frac
 *
 * According to the GLSL spec, the implementor can decide which way to round
 * when the fraction is .5.  We round down for .5.
 *
 */
static void transform_ROUND(struct radeon_compiler* c,
	struct rc_instruction* inst)
{
	unsigned int mask = inst->U.I.DstReg.WriteMask;
	unsigned int frac_index, add_index;
	struct rc_dst_register frac_dst, add_dst;
	struct rc_src_register frac_src, add_src;

	/* add = src + .5 */
	add_index = rc_find_free_temporary(c);
	add_dst = dstregtmpmask(add_index, mask);
	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0],
								builtin_half);
	add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index);


	/* frac = FRC(add) */
	frac_index = rc_find_free_temporary(c);
	frac_dst = dstregtmpmask(frac_index, mask);
	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src);
	frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index);

	/* dst = add - frac */
	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg,
						add_src, negate(frac_src));
	rc_remove_instruction(inst);
}
コード例 #2
0
static void lower_texture_rect(struct r300_fragment_program_compiler *compiler,
							   struct rc_instruction *inst)
{
	struct rc_instruction *inst_rect;
	unsigned temp = rc_find_free_temporary(&compiler->Base);

	if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
		compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) {
		inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev);

		inst_rect->U.I.Opcode = RC_OPCODE_MUL;
		inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_rect->U.I.DstReg.Index = temp;
		inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
		inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
		inst_rect->U.I.SrcReg[1].Index =
				rc_constants_add_state(&compiler->Base.Program.Constants,
									   RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);

		reset_srcreg(&inst->U.I.SrcReg[0]);
		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst->U.I.SrcReg[0].Index = temp;

		inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
	}
}
コード例 #3
0
static void projective_divide(struct r300_fragment_program_compiler *compiler,
			      struct rc_instruction *inst)
{
	struct rc_instruction *inst_mul, *inst_rcp;

	unsigned temp = rc_find_free_temporary(&compiler->Base);

	inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);
	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
	inst_rcp->U.I.DstReg.Index = temp;
	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
	inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
	/* Because the input can be arbitrarily swizzled,
	 * read the component mapped to W. */
	inst_rcp->U.I.SrcReg[0].Swizzle =
		RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));

	inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);
	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
	inst_mul->U.I.DstReg.Index = temp;
	inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
	inst_mul->U.I.SrcReg[1].Index = temp;
	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;

	reset_srcreg(&inst->U.I.SrcReg[0]);
	inst->U.I.Opcode = RC_OPCODE_TEX;
	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
	inst->U.I.SrcReg[0].Index = temp;
}
コード例 #4
0
ファイル: radeon_program_alu.c プロジェクト: nikai3d/mesa
static void transform_r300_vertex_SSG(struct radeon_compiler* c,
	struct rc_instruction* inst)
{
	/* result = sign(x)
	 *
	 *   SLT tmp0, 0, x;
	 *   SLT tmp1, x, 0;
	 *   ADD result, tmp0, -tmp1;
	 */
	struct rc_dst_register dst0 = try_to_reuse_dst(c, inst);
	unsigned tmp1;

	/* 0 < x */
	dst0 = try_to_reuse_dst(c, inst);
	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
	      dst0,
	      builtin_zero,
	      inst->U.I.SrcReg[0]);

	/* x < 0 */
	tmp1 = rc_find_free_temporary(c);
	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
	      dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
	      inst->U.I.SrcReg[0],
	      builtin_zero);

	/* Either both are zero, or one of them is one and the other is zero. */
	/* result = tmp0 - tmp1 */
	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
	      inst->U.I.DstReg,
	      srcreg(RC_FILE_TEMPORARY, dst0.Index),
	      negate(srcreg(RC_FILE_TEMPORARY, tmp1)));

	rc_remove_instruction(inst);
}
コード例 #5
0
ファイル: radeon_program_alu.c プロジェクト: nikai3d/mesa
static void transform_r300_vertex_SNE(struct radeon_compiler *c,
	struct rc_instruction *inst)
{
	/* x != y  <==>  x < y || y < x */
	int tmp = rc_find_free_temporary(c);

	/* x < y */
	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
	      dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
	      inst->U.I.SrcReg[0],
	      inst->U.I.SrcReg[1]);

	/* y < x */
	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
	      inst->U.I.DstReg,
	      inst->U.I.SrcReg[1],
	      inst->U.I.SrcReg[0]);

	/* x || y  =  max(x, y) */
	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
	      inst->U.I.DstReg,
	      srcreg(RC_FILE_TEMPORARY, tmp),
	      srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));

	rc_remove_instruction(inst);
}
コード例 #6
0
ファイル: radeon_program_alu.c プロジェクト: nikai3d/mesa
static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
	struct rc_instruction *inst)
{
	/* x = y  <==>  x >= y && y >= x */
	int tmp = rc_find_free_temporary(c);

	/* x <= y */
	emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
	      dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
	      inst->U.I.SrcReg[0],
	      inst->U.I.SrcReg[1]);

	/* y <= x */
	emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
	      inst->U.I.DstReg,
	      inst->U.I.SrcReg[1],
	      inst->U.I.SrcReg[0]);

	/* x && y  =  x * y */
	emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
	      inst->U.I.DstReg,
	      srcreg(RC_FILE_TEMPORARY, tmp),
	      srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));

	rc_remove_instruction(inst);
}
コード例 #7
0
ファイル: r3xx_vertprog.c プロジェクト: ChillyWillyGuru/RSXGL
/**
 * R3xx-R4xx vertex engine does not support the Absolute source operand modifier
 * and the Saturate opcode modifier. Only Absolute is currently transformed.
 */
static int transform_nonnative_modifiers(
	struct radeon_compiler *c,
	struct rc_instruction *inst,
	void* unused)
{
	const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
	unsigned i;

	/* Transform ABS(a) to MAX(a, -a). */
	for (i = 0; i < opcode->NumSrcRegs; i++) {
		if (inst->U.I.SrcReg[i].Abs) {
			struct rc_instruction *new_inst;
			unsigned temp;

			inst->U.I.SrcReg[i].Abs = 0;

			temp = rc_find_free_temporary(c);

			new_inst = rc_insert_new_instruction(c, inst->Prev);
			new_inst->U.I.Opcode = RC_OPCODE_MAX;
			new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
			new_inst->U.I.DstReg.Index = temp;
			new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
			new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
			new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;

			memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
			inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
			inst->U.I.SrcReg[i].Index = temp;
			inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
		}
	}
	return 1;
}
コード例 #8
0
ファイル: r3xx_vertprog.c プロジェクト: ChillyWillyGuru/RSXGL
static void transform_negative_addressing(struct r300_vertex_program_compiler *c,
					  struct rc_instruction *arl,
					  struct rc_instruction *end,
					  int min_offset)
{
	struct rc_instruction *inst, *add;
	unsigned const_swizzle;

	/* Transform ARL */
	add = rc_insert_new_instruction(&c->Base, arl->Prev);
	add->U.I.Opcode = RC_OPCODE_ADD;
	add->U.I.DstReg.File = RC_FILE_TEMPORARY;
	add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base);
	add->U.I.DstReg.WriteMask = RC_MASK_X;
	add->U.I.SrcReg[0] = arl->U.I.SrcReg[0];
	add->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
	add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants,
								     min_offset, &const_swizzle);
	add->U.I.SrcReg[1].Swizzle = const_swizzle;

	arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
	arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index;
	arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX;

	/* Rewrite offsets up to and excluding inst. */
	for (inst = arl->Next; inst != end; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);

		for (unsigned i = 0; i < opcode->NumSrcRegs; i++)
			if (inst->U.I.SrcReg[i].RelAddr)
				inst->U.I.SrcReg[i].Index -= min_offset;
	}
}
コード例 #9
0
ファイル: radeon_program_alu.c プロジェクト: nikai3d/mesa
/**
 * Transform the trigonometric functions COS, SIN, and SCS
 * so that the input to COS and SIN is always in the range [-PI, PI].
 * SCS is replaced by one COS and one SIN instruction.
 */
int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
	struct rc_instruction *inst,
	void *unused)
{
	static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
	unsigned int temp;
	unsigned int constant;

	if (inst->U.I.Opcode != RC_OPCODE_COS &&
	    inst->U.I.Opcode != RC_OPCODE_SIN &&
	    inst->U.I.Opcode != RC_OPCODE_SCS)
		return 0;

	/* Repeat x in the range [-PI, PI]:
	 *
	 *   repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
	 */

	temp = rc_find_free_temporary(c);
	constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);

	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
		swizzle_xxxx(inst->U.I.SrcReg[0]),
		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
		srcreg(RC_FILE_TEMPORARY, temp));
	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
		srcreg(RC_FILE_TEMPORARY, temp),
		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));

	r300_transform_SIN_COS_SCS(c, inst, temp);
	return 1;
}
コード例 #10
0
ファイル: radeon_program_alu.c プロジェクト: nikai3d/mesa
/**
 * Transform the trigonometric functions COS, SIN, and SCS
 * to include pre-scaling by 1/(2*PI) and taking the fractional
 * part, so that the input to COS and SIN is always in the range [0,1).
 * SCS is replaced by one COS and one SIN instruction.
 *
 * @warning This transformation implicitly changes the semantics of SIN and COS!
 */
int radeonTransformTrigScale(struct radeon_compiler* c,
	struct rc_instruction* inst,
	void* unused)
{
	static const float RCP_2PI = 0.15915494309189535;
	unsigned int temp;
	unsigned int constant;
	unsigned int constant_swizzle;

	if (inst->U.I.Opcode != RC_OPCODE_COS &&
	    inst->U.I.Opcode != RC_OPCODE_SIN &&
	    inst->U.I.Opcode != RC_OPCODE_SCS)
		return 0;

	temp = rc_find_free_temporary(c);
	constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);

	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
		swizzle_xxxx(inst->U.I.SrcReg[0]),
		srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
		srcreg(RC_FILE_TEMPORARY, temp));

	r300_transform_SIN_COS_SCS(c, inst, temp);
	return 1;
}
コード例 #11
0
/**
 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
 * Gallium and OpenGL define it the other way around.
 *
 * So let's just negate FACE at the beginning of the shader and rewrite the rest
 * of the shader to read from the newly allocated temporary.
 */
void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
{
	unsigned tempregi = rc_find_free_temporary(c);
	struct rc_instruction *inst_add;
	struct rc_instruction *inst;

	/* perspective divide */
	inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
	inst_add->U.I.Opcode = RC_OPCODE_ADD;

	inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
	inst_add->U.I.DstReg.Index = tempregi;
	inst_add->U.I.DstReg.WriteMask = RC_MASK_X;

	inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
	inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;

	inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
	inst_add->U.I.SrcReg[1].Index = face;
	inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
	inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;

	for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
		unsigned i;

		for(i = 0; i < opcode->NumSrcRegs; i++) {
			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
			    inst->U.I.SrcReg[i].Index == face) {
				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
				inst->U.I.SrcReg[i].Index = tempregi;
			}
		}
	}
}
コード例 #12
0
static void rewrite_source(struct radeon_compiler * c,
		struct rc_instruction * inst, unsigned src)
{
	struct rc_swizzle_split split;
	unsigned int tempreg = rc_find_free_temporary(c);
	unsigned int usemask;

	usemask = 0;
	for(unsigned int chan = 0; chan < 4; ++chan) {
		if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
			usemask |= 1 << chan;
	}

	c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);

	for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
		struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
		unsigned int phase_refmask;
		unsigned int masked_negate;

		mov->U.I.Opcode = RC_OPCODE_MOV;
		mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		mov->U.I.DstReg.Index = tempreg;
		mov->U.I.DstReg.WriteMask = split.Phase[phase];
		mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
		mov->U.I.PreSub = inst->U.I.PreSub;

		phase_refmask = 0;
		for(unsigned int chan = 0; chan < 4; ++chan) {
			if (!GET_BIT(split.Phase[phase], chan))
				SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
			else
				phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
		}

		phase_refmask &= RC_MASK_XYZW;

		masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
		if (masked_negate == 0)
			mov->U.I.SrcReg[0].Negate = 0;
		else if (masked_negate == split.Phase[phase])
			mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;

	}

	inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
	inst->U.I.SrcReg[src].Index = tempreg;
	inst->U.I.SrcReg[src].Swizzle = 0;
	inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
	inst->U.I.SrcReg[src].Abs = 0;
	for(unsigned int chan = 0; chan < 4; ++chan) {
		SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
				GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
	}
}
コード例 #13
0
ファイル: radeon_program_alu.c プロジェクト: nikai3d/mesa
static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c,
					       struct rc_instruction *inst)
{
	unsigned tmp;

	if (is_dst_safe_to_reuse(inst))
		tmp = inst->U.I.DstReg.Index;
	else
		tmp = rc_find_free_temporary(c);

	return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
}
コード例 #14
0
ファイル: r3xx_vertprog.c プロジェクト: ChillyWillyGuru/RSXGL
/**
 * Vertex engine cannot read two inputs or two constants at the same time.
 * Introduce intermediate MOVs to temporary registers to account for this.
 */
static int transform_source_conflicts(
	struct radeon_compiler *c,
	struct rc_instruction* inst,
	void* unused)
{
	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);

	if (opcode->NumSrcRegs == 3) {
		if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
		    || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
			int tmpreg = rc_find_free_temporary(c);
			struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mov->U.I.DstReg.Index = tmpreg;
			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];

			reset_srcreg(&inst->U.I.SrcReg[2]);
			inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
			inst->U.I.SrcReg[2].Index = tmpreg;
		}
	}

	if (opcode->NumSrcRegs >= 2) {
		if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
			int tmpreg = rc_find_free_temporary(c);
			struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mov->U.I.DstReg.Index = tmpreg;
			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];

			reset_srcreg(&inst->U.I.SrcReg[1]);
			inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
			inst->U.I.SrcReg[1].Index = tmpreg;
		}
	}

	return 1;
}
コード例 #15
0
ファイル: radeon_program_alu.c プロジェクト: nikai3d/mesa
/**
 * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
 *
 * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
 * MAD tmp.x, tmp.y, |src|, tmp.x
 * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
 * MAD dest, tmp.y, weight, tmp.x
 */
static void sin_approx(
	struct radeon_compiler* c, struct rc_instruction * inst,
	struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
{
	unsigned int tempreg = rc_find_free_temporary(c);

	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
		swizzle_xxxx(src),
		srcreg(RC_FILE_CONSTANT, constants[0]));
	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
		swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
		absolute(swizzle_xxxx(src)),
		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
		absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))),
		negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))));
	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
		swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
		swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])),
		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
}
コード例 #16
0
static void scale_texcoords(struct r300_fragment_program_compiler *compiler,
			    struct rc_instruction *inst,
			    unsigned state_constant)
{
	struct rc_instruction *inst_mov;

	unsigned temp = rc_find_free_temporary(&compiler->Base);

	inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);

	inst_mov->U.I.Opcode = RC_OPCODE_MUL;
	inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
	inst_mov->U.I.DstReg.Index = temp;
	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
	inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
	inst_mov->U.I.SrcReg[1].Index =
			rc_constants_add_state(&compiler->Base.Program.Constants,
					       state_constant, inst->U.I.TexSrcUnit);

	reset_srcreg(&inst->U.I.SrcReg[0]);
	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
	inst->U.I.SrcReg[0].Index = temp;
}
コード例 #17
0
/**
 * Rewrite the program such that a given output is duplicated.
 */
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
{
	unsigned tempreg = rc_find_free_temporary(c);
	struct rc_instruction * inst;

	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);

		if (opcode->HasDstReg) {
			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
				inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
				inst->U.I.DstReg.Index = tempreg;
			}
		}
	}

	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
	inst->U.I.Opcode = RC_OPCODE_MOV;
	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
	inst->U.I.DstReg.Index = output;

	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
	inst->U.I.SrcReg[0].Index = tempreg;
	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;

	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
	inst->U.I.Opcode = RC_OPCODE_MOV;
	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
	inst->U.I.DstReg.Index = dup_output;

	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
	inst->U.I.SrcReg[0].Index = tempreg;
	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;

	c->Program.OutputsWritten |= 1 << dup_output;
}
コード例 #18
0
/**
 * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
 *  - implement texture compare (shadow extensions)
 *  - extract non-native source / destination operands
 *  - premultiply texture coordinates for RECT
 *  - extract operand swizzles
 *  - introduce a temporary register when write masks are needed
 */
int radeonTransformTEX(
	struct radeon_compiler * c,
	struct rc_instruction * inst,
	void* data)
{
	struct r300_fragment_program_compiler *compiler =
		(struct r300_fragment_program_compiler*)data;

	if (inst->U.I.Opcode != RC_OPCODE_TEX &&
		inst->U.I.Opcode != RC_OPCODE_TXB &&
		inst->U.I.Opcode != RC_OPCODE_TXP &&
		inst->U.I.Opcode != RC_OPCODE_KIL)
		return 0;

	/* ARB_shadow & EXT_shadow_funcs */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
		((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
		 (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
		rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;

		if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
			inst->U.I.Opcode = RC_OPCODE_MOV;

			if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
				inst->U.I.SrcReg[0].File = RC_FILE_NONE;
				inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
			} else {
				inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
			}

			return 1;
		} else {
			rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
			struct rc_instruction * inst_rcp = NULL;
			struct rc_instruction * inst_mad;
			struct rc_instruction * inst_cmp;
			unsigned tmp_texsample;
			unsigned tmp_sum;
			unsigned tmp_recip_w = 0;
			int pass, fail, tex;

			/* Save the output register. */
			struct rc_dst_register output_reg = inst->U.I.DstReg;

			/* Redirect TEX to a new temp. */
			tmp_texsample = rc_find_free_temporary(c);
			inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst->U.I.DstReg.Index = tmp_texsample;
			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;

			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
				tmp_recip_w = rc_find_free_temporary(c);

				/* Compute 1/W. */
				inst_rcp = rc_insert_new_instruction(c, inst);
				inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
				inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
				inst_rcp->U.I.DstReg.Index = tmp_recip_w;
				inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
				inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
				inst_rcp->U.I.SrcReg[0].Swizzle =
					RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
			}

			/* Perspective-divide Z by W (if it's TXP) and add the texture sample (see below). */
			tmp_sum = rc_find_free_temporary(c);
			inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
			inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mad->U.I.DstReg.Index = tmp_sum;
			inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
			inst_mad->U.I.SrcReg[0].Swizzle =
				RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
				inst_mad->U.I.Opcode = RC_OPCODE_MAD;
				inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
				inst_mad->U.I.SrcReg[1].Index = tmp_recip_w;
				inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
				tex = 2;
			} else {
				inst_mad->U.I.Opcode = RC_OPCODE_ADD;
				tex = 1;
			}
			inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY;
			inst_mad->U.I.SrcReg[tex].Index = tmp_texsample;
			inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle;

			/* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */
			if (comparefunc == RC_COMPARE_FUNC_EQUAL) {
				comparefunc = RC_COMPARE_FUNC_GEQUAL;
			} else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
				comparefunc = RC_COMPARE_FUNC_LESS;
			}

			/* Recall that SrcReg[0] is r, SrcReg[tex] is tex and:
			 *   LESS:    r  < tex  <=>      -tex+r < 0
			 *   GEQUAL:  r >= tex  <=> not (-tex+r < 0)
			 *   GREATER: r  > tex  <=>       tex-r < 0
			 *   LEQUAL:  r <= tex  <=> not ( tex-r < 0)
			 *
			 * This negates either r or tex: */
			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
				inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW;
			else
				inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;

			/* This negates the whole expresion: */
			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
				pass = 1;
				fail = 2;
			} else {
				pass = 2;
				fail = 1;
			}

			inst_cmp = rc_insert_new_instruction(c, inst_mad);
			inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
			inst_cmp->U.I.DstReg = output_reg;
			inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
			inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
			inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
			inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);

			assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w);
		}
	}

	/* Divide by W if needed. */
	if (inst->U.I.Opcode == RC_OPCODE_TXP &&
	    (compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode == RC_WRAP_REPEAT ||
             compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode == RC_WRAP_MIRRORED_REPEAT)) {
		projective_divide(compiler, inst);
	}

	/* Texture wrap modes don't work on NPOT textures or texrects.
	 *
	 * The game plan is simple. We have two flags, fake_npot and
	 * non_normalized_coords, as well as a tex target. The RECT tex target
	 * will make the emitted code use non-scaled texcoords.
	 *
	 * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
	 * mirroring are not. If we need to repeat, we do:
	 *
	 * MUL temp, texcoord, <scaling factor constant>
	 * FRC temp, temp ; Discard integer portion of coords
	 *
	 * This gives us coords in [0, 1].
	 *
	 * Mirroring is trickier. We're going to start out like repeat:
	 *
	 * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
	 * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
	 *                            ; so scale to [0, 1]
	 * FRC temp, temp ; Make the pattern repeat
	 * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
	 * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
	 *				; The pattern is backwards, so reverse it (1-x).
	 *
	 * This gives us coords in [0, 1].
	 *
	 * ~ C & M. ;)
	 */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
		(inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
			compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot ||
			compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) {
		rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;

		/* R300 cannot sample from rectangles. */
		if (!c->is_r500) {
			lower_texture_rect(compiler, inst);
		}

		if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot &&
			wrapmode != RC_WRAP_NONE) {
			struct rc_instruction *inst_mov;
			unsigned temp = rc_find_free_temporary(c);

			/* For NPOT fallback, we need normalized coordinates anyway. */
			if (c->is_r500) {
				lower_texture_rect(compiler, inst);
			}

			if (wrapmode == RC_WRAP_REPEAT) {
				/* Both instructions will be paired up. */
				struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);

				inst_frc->U.I.Opcode = RC_OPCODE_FRC;
				inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
				inst_frc->U.I.DstReg.Index = temp;
				inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
				inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
			} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
				/*
				 * Function:
				 *   f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
				 *
				 * Code:
				 *   MUL temp, src0, 0.5
				 *   FRC temp, temp
				 *   MAD temp, temp, 2, -1
				 *   ADD temp, 1, -abs(temp)
				 */

				struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
				unsigned two, two_swizzle;

				inst_mul = rc_insert_new_instruction(c, inst->Prev);

				inst_mul->U.I.Opcode = RC_OPCODE_MUL;
				inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
				inst_mul->U.I.DstReg.Index = temp;
				inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
				inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
				inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;

				inst_frc = rc_insert_new_instruction(c, inst->Prev);

				inst_frc->U.I.Opcode = RC_OPCODE_FRC;
				inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
				inst_frc->U.I.DstReg.Index = temp;
				inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
				inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
				inst_frc->U.I.SrcReg[0].Index = temp;
				inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;

				two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
				inst_mad = rc_insert_new_instruction(c, inst->Prev);

				inst_mad->U.I.Opcode = RC_OPCODE_MAD;
				inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
				inst_mad->U.I.DstReg.Index = temp;
				inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
				inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
				inst_mad->U.I.SrcReg[0].Index = temp;
				inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
				inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
				inst_mad->U.I.SrcReg[1].Index = two;
				inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
				inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
				inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;

				inst_add = rc_insert_new_instruction(c, inst->Prev);

				inst_add->U.I.Opcode = RC_OPCODE_ADD;
				inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
				inst_add->U.I.DstReg.Index = temp;
				inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
				inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
				inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
				inst_add->U.I.SrcReg[1].Index = temp;
				inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
				inst_add->U.I.SrcReg[1].Abs = 1;
				inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
			} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
				/*
				 * Mirrored clamp modes are bloody simple, we just use abs
				 * to mirror [0, 1] into [-1, 0]. This works for
				 * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
				 */
				struct rc_instruction *inst_mov;

				inst_mov = rc_insert_new_instruction(c, inst->Prev);

				inst_mov->U.I.Opcode = RC_OPCODE_MOV;
				inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
				inst_mov->U.I.DstReg.Index = temp;
				inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
				inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
				inst_mov->U.I.SrcReg[0].Abs = 1;
			}

			/* Preserve W for TXP/TXB. */
			inst_mov = rc_insert_new_instruction(c, inst->Prev);

			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mov->U.I.DstReg.Index = temp;
			inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

			reset_srcreg(&inst->U.I.SrcReg[0]);
			inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst->U.I.SrcReg[0].Index = temp;
		}
	}

	/* Cannot write texture to output registers (all chips) or with masks (non-r500) */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
		(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
		 (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);

		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg = inst->U.I.DstReg;
		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);

		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
	}

	/* Cannot read texture coordinate from constants file */
	if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);

		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		reset_srcreg(&inst->U.I.SrcReg[0]);
		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
	}

	return 1;
}
コード例 #19
0
/**
 * Introduce standard code fragment to deal with fragment.position.
 */
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
                                int full_vtransform)
{
	unsigned tempregi = rc_find_free_temporary(c);
	struct rc_instruction * inst_rcp;
	struct rc_instruction * inst_mul;
	struct rc_instruction * inst_mad;
	struct rc_instruction * inst;

	c->Program.InputsRead &= ~(1 << wpos);
	c->Program.InputsRead |= 1 << new_input;

	/* perspective divide */
	inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;

	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
	inst_rcp->U.I.DstReg.Index = tempregi;
	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;

	inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
	inst_rcp->U.I.SrcReg[0].Index = new_input;
	inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;

	inst_mul = rc_insert_new_instruction(c, inst_rcp);
	inst_mul->U.I.Opcode = RC_OPCODE_MUL;

	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
	inst_mul->U.I.DstReg.Index = tempregi;
	inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;

	inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
	inst_mul->U.I.SrcReg[0].Index = new_input;

	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
	inst_mul->U.I.SrcReg[1].Index = tempregi;
	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;

	/* viewport transformation */
	inst_mad = rc_insert_new_instruction(c, inst_mul);
	inst_mad->U.I.Opcode = RC_OPCODE_MAD;

	inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
	inst_mad->U.I.DstReg.Index = tempregi;
	inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;

	inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
	inst_mad->U.I.SrcReg[0].Index = tempregi;
	inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;

	inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
	inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;

	inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
	inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;

	if (full_vtransform) {
		inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
	} else {
		inst_mad->U.I.SrcReg[1].Index =
		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
	}

	for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
		unsigned i;

		for(i = 0; i < opcode->NumSrcRegs; i++) {
			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
			    inst->U.I.SrcReg[i].Index == wpos) {
				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
				inst->U.I.SrcReg[i].Index = tempregi;
			}
		}
	}
}
コード例 #20
0
ファイル: radeon_program_alu.c プロジェクト: nikai3d/mesa
/**
 * Definition of LIT (from ARB_fragment_program):
 *
 *  tmp = VectorLoad(op0);
 *  if (tmp.x < 0) tmp.x = 0;
 *  if (tmp.y < 0) tmp.y = 0;
 *  if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
 *  else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
 *  result.x = 1.0;
 *  result.y = tmp.x;
 *  result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
 *  result.w = 1.0;
 *
 * The longest path of computation is the one leading to result.z,
 * consisting of 5 operations. This implementation of LIT takes
 * 5 slots, if the subsequent optimization passes are clever enough
 * to pair instructions correctly.
 */
static void transform_LIT(struct radeon_compiler* c,
	struct rc_instruction* inst)
{
	unsigned int constant;
	unsigned int constant_swizzle;
	unsigned int temp;
	struct rc_src_register srctemp;

	constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);

	if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
		struct rc_instruction * inst_mov;

		inst_mov = emit1(c, inst,
			RC_OPCODE_MOV, 0, inst->U.I.DstReg,
			srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));

		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
	}

	temp = inst->U.I.DstReg.Index;
	srctemp = srcreg(RC_FILE_TEMPORARY, temp);

	/* tmp.x = max(0.0, Src.x); */
	/* tmp.y = max(0.0, Src.y); */
	/* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
		dstregtmpmask(temp, RC_MASK_XYW),
		inst->U.I.SrcReg[0],
		swizzle(srcreg(RC_FILE_CONSTANT, constant),
			RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
	emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
		dstregtmpmask(temp, RC_MASK_Z),
		swizzle_wwww(srctemp),
		negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));

	/* tmp.w = Pow(tmp.y, tmp.w) */
	emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
		dstregtmpmask(temp, RC_MASK_W),
		swizzle_yyyy(srctemp));
	emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
		dstregtmpmask(temp, RC_MASK_W),
		swizzle_wwww(srctemp),
		swizzle_zzzz(srctemp));
	emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
		dstregtmpmask(temp, RC_MASK_W),
		swizzle_wwww(srctemp));

	/* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
		dstregtmpmask(temp, RC_MASK_Z),
		negate(swizzle_xxxx(srctemp)),
		swizzle_wwww(srctemp),
		builtin_zero);

	/* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
		dstregtmpmask(temp, RC_MASK_XYW),
		swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));

	rc_remove_instruction(inst);
}
コード例 #21
0
/**
 * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
 *  - implement texture compare (shadow extensions)
 *  - extract non-native source / destination operands
 *  - premultiply texture coordinates for RECT
 *  - extract operand swizzles
 *  - introduce a temporary register when write masks are needed
 */
int radeonTransformTEX(
	struct radeon_compiler * c,
	struct rc_instruction * inst,
	void* data)
{
	struct r300_fragment_program_compiler *compiler =
		(struct r300_fragment_program_compiler*)data;
	rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
	int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
		      compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;

	if (inst->U.I.Opcode != RC_OPCODE_TEX &&
		inst->U.I.Opcode != RC_OPCODE_TXB &&
		inst->U.I.Opcode != RC_OPCODE_TXP &&
		inst->U.I.Opcode != RC_OPCODE_TXD &&
		inst->U.I.Opcode != RC_OPCODE_TXL &&
		inst->U.I.Opcode != RC_OPCODE_KIL)
		return 0;

	/* ARB_shadow & EXT_shadow_funcs */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
		((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
		 (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
		rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;

		if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
			inst->U.I.Opcode = RC_OPCODE_MOV;

			if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
				inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
			} else {
				inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
			}

			return 1;
		} else {
			struct rc_instruction * inst_rcp = NULL;
			struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
			unsigned tmp_texsample;
			unsigned tmp_sum;
			int pass, fail;

			/* Save the output register. */
			struct rc_dst_register output_reg = inst->U.I.DstReg;
			unsigned saturate_mode = inst->U.I.SaturateMode;

			/* Redirect TEX to a new temp. */
			tmp_texsample = rc_find_free_temporary(c);
			inst->U.I.SaturateMode = 0;
			inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst->U.I.DstReg.Index = tmp_texsample;
			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;

			tmp_sum = rc_find_free_temporary(c);

			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
				/* Compute 1/W. */
				inst_rcp = rc_insert_new_instruction(c, inst);
				inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
				inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
				inst_rcp->U.I.DstReg.Index = tmp_sum;
				inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
				inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
				inst_rcp->U.I.SrcReg[0].Swizzle =
					RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
			}

			/* Divide Z by W (if it's TXP) and saturate. */
			inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
			inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
			inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mul->U.I.DstReg.Index = tmp_sum;
			inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
			inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
			inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
			inst_mul->U.I.SrcReg[0].Swizzle =
				RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
				inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
				inst_mul->U.I.SrcReg[1].Index = tmp_sum;
				inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
			}

			/* Add the depth texture value. */
			inst_add = rc_insert_new_instruction(c, inst_mul);
			inst_add->U.I.Opcode = RC_OPCODE_ADD;
			inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_add->U.I.DstReg.Index = tmp_sum;
			inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
			inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_add->U.I.SrcReg[0].Index = tmp_sum;
			inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
			inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
			inst_add->U.I.SrcReg[1].Index = tmp_texsample;
			inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;

			/* Note that SrcReg[0] is r, SrcReg[1] is tex and:
			 *   LESS:    r  < tex  <=>      -tex+r < 0
			 *   GEQUAL:  r >= tex  <=> not (-tex+r < 0)
			 *   GREATER: r  > tex  <=>       tex-r < 0
			 *   LEQUAL:  r <= tex  <=> not ( tex-r < 0)
			 *   EQUAL:   GEQUAL
			 *   NOTEQUAL:LESS
			 */

			/* This negates either r or tex: */
			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
			    comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
				inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
			else
				inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;

			/* This negates the whole expresion: */
			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
			    comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
				pass = 1;
				fail = 2;
			} else {
				pass = 2;
				fail = 1;
			}

			inst_cmp = rc_insert_new_instruction(c, inst_add);
			inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
			inst_cmp->U.I.SaturateMode = saturate_mode;
			inst_cmp->U.I.DstReg = output_reg;
			inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
			inst_cmp->U.I.SrcReg[0].Swizzle =
					combine_swizzles(RC_SWIZZLE_WWWW,
							 compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
			inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
			inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);

			assert(tmp_texsample != tmp_sum);
		}
	}

	/* R300 cannot sample from rectangles and the wrap mode fallback needs
	 * normalized coordinates anyway. */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
		scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
		inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
	}

	/* Divide by W if needed. */
	if (inst->U.I.Opcode == RC_OPCODE_TXP &&
	    (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
	     compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
		projective_divide(compiler, inst);
	}

	/* Texture wrap modes don't work on NPOT textures.
	 *
	 * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
	 * mirroring are not. If we need to repeat, we do:
	 *
	 * MUL temp, texcoord, <scaling factor constant>
	 * FRC temp, temp ; Discard integer portion of coords
	 *
	 * This gives us coords in [0, 1].
	 *
	 * Mirroring is trickier. We're going to start out like repeat:
	 *
	 * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
	 * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
	 *                            ; so scale to [0, 1]
	 * FRC temp, temp ; Make the pattern repeat
	 * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
	 * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
	 *				; The pattern is backwards, so reverse it (1-x).
	 *
	 * This gives us coords in [0, 1].
	 *
	 * ~ C & M. ;)
	 */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    wrapmode != RC_WRAP_NONE) {
		struct rc_instruction *inst_mov;
		unsigned temp = rc_find_free_temporary(c);

		if (wrapmode == RC_WRAP_REPEAT) {
			/* Both instructions will be paired up. */
			struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);

			inst_frc->U.I.Opcode = RC_OPCODE_FRC;
			inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_frc->U.I.DstReg.Index = temp;
			inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
		} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
			/*
			 * Function:
			 *   f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
			 *
			 * Code:
			 *   MUL temp, src0, 0.5
			 *   FRC temp, temp
			 *   MAD temp, temp, 2, -1
			 *   ADD temp, 1, -abs(temp)
			 */

			struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
			unsigned two, two_swizzle;

			inst_mul = rc_insert_new_instruction(c, inst->Prev);

			inst_mul->U.I.Opcode = RC_OPCODE_MUL;
			inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mul->U.I.DstReg.Index = temp;
			inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
			inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;

			inst_frc = rc_insert_new_instruction(c, inst->Prev);

			inst_frc->U.I.Opcode = RC_OPCODE_FRC;
			inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_frc->U.I.DstReg.Index = temp;
			inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_frc->U.I.SrcReg[0].Index = temp;
			inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;

			two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
			inst_mad = rc_insert_new_instruction(c, inst->Prev);

			inst_mad->U.I.Opcode = RC_OPCODE_MAD;
			inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mad->U.I.DstReg.Index = temp;
			inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_mad->U.I.SrcReg[0].Index = temp;
			inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
			inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
			inst_mad->U.I.SrcReg[1].Index = two;
			inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
			inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
			inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;

			inst_add = rc_insert_new_instruction(c, inst->Prev);

			inst_add->U.I.Opcode = RC_OPCODE_ADD;
			inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_add->U.I.DstReg.Index = temp;
			inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
			inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
			inst_add->U.I.SrcReg[1].Index = temp;
			inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
			inst_add->U.I.SrcReg[1].Abs = 1;
			inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
		} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
			/*
			 * Mirrored clamp modes are bloody simple, we just use abs
			 * to mirror [0, 1] into [-1, 0]. This works for
			 * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
			 */
			struct rc_instruction *inst_mov;

			inst_mov = rc_insert_new_instruction(c, inst->Prev);

			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mov->U.I.DstReg.Index = temp;
			inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
			inst_mov->U.I.SrcReg[0].Abs = 1;
		}

		/* Preserve W for TXP/TXB. */
		inst_mov = rc_insert_new_instruction(c, inst->Prev);

		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = temp;
		inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		reset_srcreg(&inst->U.I.SrcReg[0]);
		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst->U.I.SrcReg[0].Index = temp;
	}

	/* NPOT -> POT conversion for 3D textures. */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
		struct rc_instruction *inst_mov;
		unsigned temp = rc_find_free_temporary(c);

		/* Saturate XYZ. */
		inst_mov = rc_insert_new_instruction(c, inst->Prev);
		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = temp;
		inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		/* Copy W. */
		inst_mov = rc_insert_new_instruction(c, inst->Prev);
		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = temp;
		inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		reset_srcreg(&inst->U.I.SrcReg[0]);
		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst->U.I.SrcReg[0].Index = temp;

		scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
	}

	/* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM.
	 * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2
	 */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) {
		unsigned two, two_swizzle;
		struct rc_instruction *inst_mul, *inst_mad, *inst_cnd;

		two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle);

		inst_mul = rc_insert_new_instruction(c, inst);
		inst_mul->U.I.Opcode = RC_OPCODE_MUL;
		inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
		inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */
		inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */
		inst_mul->U.I.SrcReg[1].Index = two;
		inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle;

		inst_mad = rc_insert_new_instruction(c, inst_mul);
		inst_mad->U.I.Opcode = RC_OPCODE_MAD;
		inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
		inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
		inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */
		inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */
		inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW;

		inst_cnd = rc_insert_new_instruction(c, inst_mad);
		inst_cnd->U.I.Opcode = RC_OPCODE_CND;
		inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode;
		inst_cnd->U.I.DstReg = inst->U.I.DstReg;
		inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
		inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
		inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
		inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index;
		inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
		inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */

		inst->U.I.SaturateMode = 0;
		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index;
		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
	}

	/* Cannot write texture to output registers or with saturate (all chips),
	 * or with masks (non-r500). */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
		(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
		 inst->U.I.SaturateMode ||
		 (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);

		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
		inst_mov->U.I.DstReg = inst->U.I.DstReg;
		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);

		inst->U.I.SaturateMode = 0;
		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
	}

	/* Cannot read texture coordinate from constants file */
	if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);

		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		reset_srcreg(&inst->U.I.SrcReg[0]);
		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
	}

	return 1;
}
コード例 #22
0
ファイル: radeon_program_alu.c プロジェクト: nikai3d/mesa
/**
 * Translate the trigonometric functions COS, SIN, and SCS
 * using only the basic instructions
 *  MOV, ADD, MUL, MAD, FRC
 */
int r300_transform_trig_simple(struct radeon_compiler* c,
	struct rc_instruction* inst,
	void* unused)
{
	unsigned int constants[2];
	unsigned int tempreg;

	if (inst->U.I.Opcode != RC_OPCODE_COS &&
	    inst->U.I.Opcode != RC_OPCODE_SIN &&
	    inst->U.I.Opcode != RC_OPCODE_SCS)
		return 0;

	tempreg = rc_find_free_temporary(c);

	sincos_constants(c, constants);

	if (inst->U.I.Opcode == RC_OPCODE_COS) {
		/* MAD tmp.x, src, 1/(2*PI), 0.75 */
		/* FRC tmp.x, tmp.x */
		/* MAD tmp.z, tmp.x, 2*PI, -PI */
		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
			swizzle_xxxx(inst->U.I.SrcReg[0]),
			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
			swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1])));
		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));

		sin_approx(c, inst, inst->U.I.DstReg,
			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
			constants);
	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
			swizzle_xxxx(inst->U.I.SrcReg[0]),
			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
			swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1])));
		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));

		sin_approx(c, inst, inst->U.I.DstReg,
			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
			constants);
	} else {
		struct rc_dst_register dst;

		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
			swizzle_xxxx(inst->U.I.SrcReg[0]),
			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
			swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
			srcreg(RC_FILE_TEMPORARY, tempreg));
		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
			srcreg(RC_FILE_TEMPORARY, tempreg),
			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));

		dst = inst->U.I.DstReg;

		dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
		sin_approx(c, inst, dst,
			swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
			constants);

		dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
		sin_approx(c, inst, dst,
			swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
			constants);
	}

	rc_remove_instruction(inst);

	return 1;
}