コード例 #1
0
static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, const char swapModeTable[4][5])
{
	int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1);
	bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
	bool bHasIndStage = bpmem.tevind[n].bt < bpmem.genMode.numindstages;
	// HACK to handle cases where the tex gen is not enabled
	if (!bHasTexCoord)
		texcoord = 0;
	if (Write_Code)
	{
		out.Write("// TEV stage %d\n", n);
	}


	uid_data.stagehash[n].hasindstage = bHasIndStage;
	uid_data.stagehash[n].tevorders_texcoord = texcoord;
	if (bHasIndStage)
	{
		uid_data.stagehash[n].tevind = bpmem.tevind[n].hex & 0x7FFFFF;
		if (Write_Code)
		{
			out.Write("// indirect op\n");
			// perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
			if (bpmem.tevind[n].bs != ITBA_OFF)
			{
				// lest explain a little what is done here
				// a_bump is taked from the upper bits that are not taked for indirect texturing
				// if all the bits are used for indirect texturing then only the upper 5 bits are used for a_bump
				// so to do this bit masking is used for example 
				// a_bump = x & 0xE0; for 3 bits
				// a_bump = x & 0xF0; for 4 bits
				// a_bump = x & 0xF8; for 5 bits
				// there is no support for bitmasking in older hardware
				// and in newer hardware is slower than using pure float operations
				// so we have to emulate it
				// the exact formula for float masking emulation of the upper bits of a number is: 
				// having x as the number to mask stored in a float
				// nb as the number of bits to mask
				// and n the (wordlen - nb) in this case (8 - nb)
				// then result = floor(x * (255.0f/(2^n))) * ((2^n) / 255.0f)
				// so for nb = 3 bit this will be n = 5  result = floor(x * (255.0/32.0)) * (32.0/255.0f);
				// to optimize a litle al the coeficient are precalculated to avoid slowing thigs more than needed

				static const char *tevIndAlphaSel[] = { "", "x", "y", "z" };
				static const char *tevIndAlphaScale[] = { "(1.0/8.0)", "(1.0/32.0)", "(1.0/16.0)", "(1.0/8.0)" };
				static const char *tevIndAlphaNormFactor[] =
				{
					"8.0",	// 5 bits
					"32.0",	// 3 bits
					"16.0",	// 4 bits
					"8.0"	// 5 bits
				};

				out.Write("a_bump = trunc(indtex%d.%s * %s) * %s;\n",
					bpmem.tevind[n].bt,
					tevIndAlphaSel[bpmem.tevind[n].bs],
					tevIndAlphaScale[bpmem.tevind[n].fmt],
					tevIndAlphaNormFactor[bpmem.tevind[n].fmt]);
			}

			if (bpmem.tevind[n].mid != 0)
			{
				static const char *tevIndFmtScale[] =
				{
					"(1.0/256.0)",	// 8 bits (& 0xFF)
					"(1.0/32.0)",	// 5 bits (& 0x1F)
					"(1.0/16.0)",	// 4 bits (& 0x0F)
					"(1.0/8.0)"		// 3 bits (& 0x07)
				};

				static const char *tevIndFmtNormFactor[] =
				{
					"256.0",// 8 bits
					"32.0",	// 5 bits
					"16.0",	// 4 bits
					"8.0"	// 3 bits
				};

				// format
				// to mask the lower bits the formula is:
				// having x as number to mask stored in a float
				// nb as the number of bits to mask			
				// then result = frac(x * 255.0 / (2^nb) * (2^nb)
				// for 3 bits result = frac(x * 255.0 / 8.0) * 8.0
				if (bpmem.tevind[n].fmt > 0)
				{
					out.Write("float3 indtevcrd%d = round(frac(indtex%d * %s) * %s);\n",
						n,
						bpmem.tevind[n].bt,
						tevIndFmtScale[bpmem.tevind[n].fmt],
						tevIndFmtNormFactor[bpmem.tevind[n].fmt]);
				}
				else
				{
					out.Write("float3 indtevcrd%d = indtex%d;\n",
						n,
						bpmem.tevind[n].bt);
				}


				static const char *tevIndBiasField[] = { "", "x", "y", "xy", "z", "xz", "yz", "xyz" }; // indexed by bias
				static const char *tevIndBiasAdd[] = { "-128.0", "1.0", "1.0", "1.0" }; // indexed by fmt
				// bias
				if (bpmem.tevind[n].bias == ITB_S || bpmem.tevind[n].bias == ITB_T || bpmem.tevind[n].bias == ITB_U)
					out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]);
				else if (bpmem.tevind[n].bias == ITB_ST || bpmem.tevind[n].bias == ITB_SU || bpmem.tevind[n].bias == ITB_TU)
					out.Write("indtevcrd%d.%s += float2(%s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]);
				else if (bpmem.tevind[n].bias == ITB_STU)
					out.Write("indtevcrd%d.%s += float3(%s, %s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]);
				
				// multiply by offset matrix and scale
				if (bpmem.tevind[n].mid <= 3)
				{
					int mtxidx = 2 * (bpmem.tevind[n].mid - 1);
					out.Write("float2 indtevtrans%d = round(float2(dot(" I_INDTEXMTX "[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX "[%d].xyz, indtevcrd%d)));\n",
						n, mtxidx, n, mtxidx + 1, n);
					out.Write("indtevtrans%d = BSH(indtevtrans%d, float2(3.0,3.0));\n", n, n);
					out.Write("indtevtrans%d = BSH(indtevtrans%d, " I_INDTEXMTX "[%d].ww);\n", n, n, mtxidx);
				}
				else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
				{ // s matrix
					_assert_(bpmem.tevind[n].mid >= 5);
					int mtxidx = 2 * (bpmem.tevind[n].mid - 5);
					out.Write("float2 indtevtrans%d = uv%d.xy * indtevcrd%d.xx;\n", n, texcoord, n);
					out.Write("indtevtrans%d = BSH(indtevtrans%d, float2(8.0,8.0));\n", n, n);
					out.Write("indtevtrans%d = BSH(indtevtrans%d, " I_INDTEXMTX "[%d].ww);\n", n, n, mtxidx);
				}
				else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
				{ // t matrix
					_assert_(bpmem.tevind[n].mid >= 9);
					int mtxidx = 2 * (bpmem.tevind[n].mid - 9);
					out.Write("float2 indtevtrans%d = uv%d.xy * indtevcrd%d.yy;\n", n, texcoord, n);
					out.Write("indtevtrans%d = BSH(indtevtrans%d, float2(8.0,8.0));\n", n, n);
					out.Write("indtevtrans%d = BSH(indtevtrans%d, " I_INDTEXMTX "[%d].ww);\n", n, n, mtxidx);
				}
				else
				{
					out.Write("float2 indtevtrans%d = float2(0.0,0.0);\n", n);
				}
			}
			else
			{
				out.Write("float2 indtevtrans%d = float2(0.0,0.0);\n", n);
			}
			// ---------
			// Wrapping
			// ---------
			static const char *tevIndWrapStart[] = { "0.0", "(256.0*128.0)", "(128.0*128.0)", "(64.0*128.0)", "(32.0*128.0)", "(16.0*128.0)", "1.0" };
			// wrap S
			if (bpmem.tevind[n].sw == ITW_OFF)
				out.Write("wrappedcoord.x = uv%d.x;\n", texcoord);
			else if (bpmem.tevind[n].sw == ITW_0)
				out.Write("wrappedcoord.x = 0.0;\n");
			else
				out.Write("wrappedcoord.x = remainder(uv%d.x, %s);\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]);

			// wrap T
			if (bpmem.tevind[n].tw == ITW_OFF)
				out.Write("wrappedcoord.y = uv%d.y;\n", texcoord);
			else if (bpmem.tevind[n].tw == ITW_0)
				out.Write("wrappedcoord.y = 0.0;\n");
			else
				out.Write("wrappedcoord.y = remainder(uv%d.y, %s);\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]);

			if (bpmem.tevind[n].fb_addprev) // add previous tevcoord
				out.Write("tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n);
			else
				out.Write("tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n);
		}
	}

	TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
	TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC;

	uid_data.stagehash[n].cc = cc.hex & 0xFFFFFF;
	uid_data.stagehash[n].ac = ac.hex & 0xFFFFF0; // Storing rswap and tswap later

	if (cc.UsedAsInput(TEVCOLORARG_RASA) || cc.UsedAsInput(TEVCOLORARG_RASC) || ac.UsedAsInput(TEVALPHAARG_RASA))
	{
		const int i = bpmem.combiners[n].alphaC.rswap;
		uid_data.stagehash[n].ac |= bpmem.combiners[n].alphaC.rswap;
		uid_data.stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1;
		uid_data.stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2;
		uid_data.stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1;
		uid_data.stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2;
		uid_data.stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1);

		const char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
		if (Write_Code)
		{
			int rasindex = bpmem.tevorders[n / 2].getColorChan(n & 1);
			if (rasindex == 0 && !tevRascolor0_Expanded)
			{
				out.Write("colors_0 = round(colors_0 * 255.0);\n");
				tevRascolor0_Expanded = true;
			}
			if (rasindex == 1 && !tevRascolor1_Expanded)
			{
				out.Write("colors_1 = round(colors_1 * 255.0);\n");
				tevRascolor1_Expanded = true;
			}
			out.Write("ras_t = %s.%s;\n", tevRasTable[rasindex], rasswap);
			TevOverflowState[tevSources::RASC] = rasindex < 2;
			TevOverflowState[tevSources::RASA] = rasindex < 2;
		}
	}

	uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1);
	if (bpmem.tevorders[n / 2].getEnable(n & 1))
	{
		if (Write_Code)
		{
			if (!bHasIndStage)
			{
				// calc tevcord
				if (bHasTexCoord)
					out.Write("tevcoord.xy = uv%d.xy;\n", texcoord);
				else
					out.Write("tevcoord.xy = float2(0.0,0.0);\n");
			}
		}
		const int i = bpmem.combiners[n].alphaC.tswap;
		uid_data.stagehash[n].ac |= bpmem.combiners[n].alphaC.tswap << 2;
		uid_data.stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1;
		uid_data.stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2;
		uid_data.stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1;
		uid_data.stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2;

		uid_data.stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1);

		const char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
		int texmap = bpmem.tevorders[n / 2].getTexMap(n & 1);
		uid_data.SetTevindrefTexmap(i, texmap);
		if (Write_Code)
			out.Write("tex_t = ");

		SampleTexture<T, Write_Code, ApiType>(out, "(round(tevcoord) * (1.0/128.0))", texswap, texmap);
	}
	else if (Write_Code)
	{
		out.Write("tex_t = float4(255.0,255.0,255.0,255.0);\n");
	}


	if (cc.UsedAsInput(TEVCOLORARG_KONST) || ac.UsedAsInput(TEVALPHAARG_KONST))
	{
		int kc = bpmem.tevksel[n / 2].getKC(n & 1);
		int ka = bpmem.tevksel[n / 2].getKA(n & 1);
		uid_data.stagehash[n].tevksel_kc = kc;
		uid_data.stagehash[n].tevksel_ka = ka;
		if (Write_Code)
		{
			out.Write("konst_t = float4(%s,%s);\n", tevKSelTableC[kc], tevKSelTableA[ka]);
			TevOverflowState[tevSources::KONST] = kc > 7 || ka > 7;
		}
	}
	if (Write_Code)
	{
		out.Write("tin_a = %s(float4(%s,%s));\n", TevOverflowState[cc.a] || TevOverflowState[AInputSourceMap[ac.a]] ? "CHK_O_U8" : "", tevCInputTable[cc.a], tevAInputTable[ac.a]);
		out.Write("tin_b = %s(float4(%s,%s));\n", TevOverflowState[cc.b] || TevOverflowState[AInputSourceMap[ac.b]] ? "CHK_O_U8" : "", tevCInputTable[cc.b], tevAInputTable[ac.b]);
		out.Write("tin_c = %s(float4(%s,%s));\n", TevOverflowState[cc.c] || TevOverflowState[AInputSourceMap[ac.c]] ? "CHK_O_U8" : "", tevCInputTable[cc.c], tevAInputTable[ac.c]);

		bool normalize_c_rgb = cc.c != TEVCOLORARG_ZERO &&  cc.bias != TevBias_COMPARE;
		bool normalize_c_a = ac.c != TEVALPHAARG_ZERO &&  ac.bias != TevBias_COMPARE;
		if (normalize_c_rgb && normalize_c_a)
		{
			out.Write("tin_c = tin_c+trunc(tin_c*(1.0/128.0));\n");
		}
		else if (normalize_c_rgb)
		{
			out.Write("tin_c.rgb = tin_c.rgb+trunc(tin_c.rgb*(1.0/128.0));\n");
		}
		else if (normalize_c_a)
		{
			out.Write("tin_c.a = tin_c.a+trunc(tin_c.a*(1.0/128.0));\n");
		}

		if (!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD && !(cc.shift & 3)) || !(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD && !(ac.shift & 3)))
		{
			out.Write("tin_d = float4(%s,%s);\n", tevCInputTable[cc.d], tevAInputTable[ac.d]);
		}
		TevOverflowState[tevCOutputSourceMap[cc.dest]] = !cc.clamp;
		TevOverflowState[tevAOutputSourceMap[ac.dest]] = !ac.clamp;

		out.Write("// color combine\n");
		out.Write("%s = round(clamp(", tevCOutputTable[cc.dest]);
		// combine the color channel
		if (cc.bias != TevBias_COMPARE) // if not compare
		{
			//normal color combiner goes here
			WriteTevRegular<TEVCOLORARG_ZERO, TEVCOLORARG_ONE>(out, ".rgb", cc.bias, cc.op, cc.clamp, cc.shift, cc.a, cc.b, cc.c, cc.d);
		}
		else
		{
			//compare color combiner goes here
			int cmp = (cc.shift << 1) | cc.op; // comparemode stored here
			WriteTevCompare(out, 1, cmp);
		}
		if (cc.clamp)
		{
			out.Write(",0.0,255.0));\n");
		}
		else
		{
			out.Write(",-1024.0,1023.0));\n");
		}

		out.Write("// alpha combine\n");
		out.Write("%s = round(clamp(", tevAOutputTable[ac.dest]);
		if (ac.bias != TevBias_COMPARE) // if not compare
		{
			// 8 is used because alpha stage don't have ONE input so a number outside range is used
			WriteTevRegular<TEVALPHAARG_ZERO, 8>(out, ".a", ac.bias, ac.op, ac.clamp, ac.shift, ac.a, ac.b, ac.c, ac.d);
		}
		else
		{
			//compare alpha combiner goes here			
			int cmp = (ac.shift << 1) | ac.op; // comparemode stored here
			WriteTevCompare(out, 0, cmp);
		}
		if (ac.clamp)
		{
			out.Write(",0.0,255.0));\n\n");
		}
		else
		{
			out.Write(",-1024.0,1023.0));\n\n");
		}
		out.Write("// TEV done\n");
	}
}
コード例 #2
0
ファイル: PixelShaderGen.cpp プロジェクト: galop1n/dolphin
static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, const char swapModeTable[4][5])
{
	int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1);
	bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
	bool bHasIndStage = bpmem.tevind[n].bt < bpmem.genMode.numindstages;
	// HACK to handle cases where the tex gen is not enabled
	if (!bHasTexCoord)
		texcoord = 0;

	out.Write("\t// TEV stage %d\n", n);

	uid_data.stagehash[n].hasindstage = bHasIndStage;
	uid_data.stagehash[n].tevorders_texcoord = texcoord;
	if (bHasIndStage)
	{
		uid_data.stagehash[n].tevind = bpmem.tevind[n].hex & 0x7FFFFF;

		out.Write("\t// indirect op\n");
		// perform the indirect op on the incoming regular coordinates using iindtex%d as the offset coords
		if (bpmem.tevind[n].bs != ITBA_OFF)
		{
			const char *tevIndAlphaSel[]   = {"", "x", "y", "z"};
			const char *tevIndAlphaMask[] = {"248", "224", "240", "248"}; // 0b11111000, 0b11100000, 0b11110000, 0b11111000
			out.Write("alphabump = iindtex%d.%s & %s;\n",
					bpmem.tevind[n].bt,
					tevIndAlphaSel[bpmem.tevind[n].bs],
					tevIndAlphaMask[bpmem.tevind[n].fmt]);
		}
		else
		{
			// TODO: Should we reset alphabump to 0 here?
		}

		if (bpmem.tevind[n].mid != 0)
		{
			// format
			const char *tevIndFmtMask[] = { "255", "31", "15", "7" };
			out.Write("\tint3 iindtevcrd%d = iindtex%d & %s;\n", n, bpmem.tevind[n].bt, tevIndFmtMask[bpmem.tevind[n].fmt]);

			// bias - TODO: Check if this needs to be this complicated..
			const char *tevIndBiasField[] = { "", "x", "y", "xy", "z", "xz", "yz", "xyz" }; // indexed by bias
			const char *tevIndBiasAdd[] = { "-128", "1", "1", "1" }; // indexed by fmt
			if (bpmem.tevind[n].bias == ITB_S || bpmem.tevind[n].bias == ITB_T || bpmem.tevind[n].bias == ITB_U)
				out.Write("\tiindtevcrd%d.%s += int(%s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]);
			else if (bpmem.tevind[n].bias == ITB_ST || bpmem.tevind[n].bias == ITB_SU || bpmem.tevind[n].bias == ITB_TU)
				out.Write("\tiindtevcrd%d.%s += int2(%s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]);
			else if (bpmem.tevind[n].bias == ITB_STU)
				out.Write("\tiindtevcrd%d.%s += int3(%s, %s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]);

			// multiply by offset matrix and scale - calculations are likely to overflow badly,
			// yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result
			if (bpmem.tevind[n].mid <= 3)
			{
				int mtxidx = 2*(bpmem.tevind[n].mid-1);
				out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);

				out.Write("\tint2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n);

				// TODO: should use a shader uid branch for this for better performance
				out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
				out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
			}
			else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
			{ // s matrix
				_assert_(bpmem.tevind[n].mid >= 5);
				int mtxidx = 2*(bpmem.tevind[n].mid-5);
				out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
				out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n);

				out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
				out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
			}
			else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
			{ // t matrix
				_assert_(bpmem.tevind[n].mid >= 9);
				int mtxidx = 2*(bpmem.tevind[n].mid-9);
				out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
				out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n);

				out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
				out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
			}
			else
			{
				out.Write("\tint2 indtevtrans%d = int2(0, 0);\n", n);
			}
		}
		else
		{
			out.Write("\tint2 indtevtrans%d = int2(0, 0);\n", n);
		}

		// ---------
		// Wrapping
		// ---------
		const char *tevIndWrapStart[]  = {"0", "(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)", "1" }; // TODO: Should the last one be 1 or (1<<7)?

		// wrap S
		if (bpmem.tevind[n].sw == ITW_OFF)
			out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", texcoord);
		else if (bpmem.tevind[n].sw == ITW_0)
			out.Write("\twrappedcoord.x = 0;\n");
		else
			out.Write("\twrappedcoord.x = fixpoint_uv%d.x %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]);

		// wrap T
		if (bpmem.tevind[n].tw == ITW_OFF)
			out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", texcoord);
		else if (bpmem.tevind[n].tw == ITW_0)
			out.Write("\twrappedcoord.y = 0;\n");
		else
			out.Write("\twrappedcoord.y = fixpoint_uv%d.y %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]);

		if (bpmem.tevind[n].fb_addprev) // add previous tevcoord
			out.Write("\ttevcoord.xy += wrappedcoord + indtevtrans%d;\n", n);
		else
			out.Write("\ttevcoord.xy = wrappedcoord + indtevtrans%d;\n", n);

		// Emulate s24 overflows
		out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
	}

	TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
	TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC;

	uid_data.stagehash[n].cc = cc.hex & 0xFFFFFF;
	uid_data.stagehash[n].ac = ac.hex & 0xFFFFF0; // Storing rswap and tswap later

	if (cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC ||
	   cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC ||
	   cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC ||
	   cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC ||
	   ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA ||
	   ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
	{
		const int i = bpmem.combiners[n].alphaC.rswap;
		uid_data.stagehash[n].ac |= bpmem.combiners[n].alphaC.rswap;
		uid_data.stagehash[n].tevksel_swap1a = bpmem.tevksel[i*2].swap1;
		uid_data.stagehash[n].tevksel_swap2a = bpmem.tevksel[i*2].swap2;
		uid_data.stagehash[n].tevksel_swap1b = bpmem.tevksel[i*2+1].swap1;
		uid_data.stagehash[n].tevksel_swap2b = bpmem.tevksel[i*2+1].swap2;
		uid_data.stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1);

		const char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
		out.Write("\trastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap);
	}

	uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1);
	if (bpmem.tevorders[n/2].getEnable(n&1))
	{
		int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
		if (!bHasIndStage)
		{
			// calc tevcord
			if (bHasTexCoord)
				out.Write("\ttevcoord.xy = fixpoint_uv%d;\n", texcoord);
			else
				out.Write("\ttevcoord.xy = int2(0, 0);\n");
		}

		const int i = bpmem.combiners[n].alphaC.tswap;
		uid_data.stagehash[n].ac |= bpmem.combiners[n].alphaC.tswap << 2;
		uid_data.stagehash[n].tevksel_swap1c = bpmem.tevksel[i*2].swap1;
		uid_data.stagehash[n].tevksel_swap2c = bpmem.tevksel[i*2].swap2;
		uid_data.stagehash[n].tevksel_swap1d = bpmem.tevksel[i*2+1].swap1;
		uid_data.stagehash[n].tevksel_swap2d = bpmem.tevksel[i*2+1].swap2;

		uid_data.stagehash[n].tevorders_texmap= bpmem.tevorders[n/2].getTexMap(n&1);

		const char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
		uid_data.SetTevindrefTexmap(i, texmap);

		out.Write("\ttextemp = ");
		SampleTexture<T>(out, "(float2(tevcoord.xy)/128.0)", texswap, texmap, ApiType);
	}
	else
	{
		out.Write("\ttextemp = int4(255, 255, 255, 255);\n");
	}


	if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST ||
	    cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST ||
	    ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST ||
	    ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
	{
		int kc = bpmem.tevksel[n / 2].getKC(n & 1);
		int ka = bpmem.tevksel[n / 2].getKA(n & 1);
		uid_data.stagehash[n].tevksel_kc = kc;
		uid_data.stagehash[n].tevksel_ka = ka;
		out.Write("\tkonsttemp = int4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]);

		if (kc > 7)
			out.SetConstantsUsed(C_KCOLORS+((kc-0xc)%4),C_KCOLORS+((kc-0xc)%4));
		if (ka > 7)
			out.SetConstantsUsed(C_KCOLORS+((ka-0xc)%4),C_KCOLORS+((ka-0xc)%4));
	}

	if (cc.d == TEVCOLORARG_C0 || cc.d == TEVCOLORARG_A0 || ac.d == TEVALPHAARG_A0)
		out.SetConstantsUsed(C_COLORS+1,C_COLORS+1);

	if (cc.d == TEVCOLORARG_C1 || cc.d == TEVCOLORARG_A1 || ac.d == TEVALPHAARG_A1)
		out.SetConstantsUsed(C_COLORS+2,C_COLORS+2);

	if (cc.d == TEVCOLORARG_C2 || cc.d == TEVCOLORARG_A2 || ac.d == TEVALPHAARG_A2)
		out.SetConstantsUsed(C_COLORS+3,C_COLORS+3);

	if (cc.dest >= GX_TEVREG0 && cc.dest <= GX_TEVREG2)
		out.SetConstantsUsed(C_COLORS+cc.dest, C_COLORS+cc.dest);

	if (ac.dest >= GX_TEVREG0 && ac.dest <= GX_TEVREG2)
		out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest);


	out.Write("tevin_a = int4(%s, %s)&255;\n", tevCInputTable[cc.a], tevAInputTable[ac.a]);
	out.Write("tevin_b = int4(%s, %s)&255;\n", tevCInputTable[cc.b], tevAInputTable[ac.b]);
	out.Write("tevin_c = int4(%s, %s)&255;\n", tevCInputTable[cc.c], tevAInputTable[ac.c]);
	out.Write("tevin_d = int4(%s, %s);\n", tevCInputTable[cc.d], tevAInputTable[ac.d]);

	out.Write("\t// color combine\n");
	out.Write("\t%s = clamp(", tevCOutputTable[cc.dest]);
	if (cc.bias != TevBias_COMPARE)
	{
		WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.shift);
	}
	else
	{
		const char *function_table[] =
		{
			"((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_R8_GT
			"((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_R8_EQ
			"((idot(tevin_a.rgb, comp16) >  idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_GR16_GT
			"((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_GR16_EQ
			"((idot(tevin_a.rgb, comp24) >  idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_BGR24_GT
			"((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_BGR24_EQ
			"(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // TEVCMP_RGB8_GT
			"((int3(1,1,1) - sign(abs(tevin_a.rgb - tevin_b.rgb))) * tevin_c.rgb)" // TEVCMP_RGB8_EQ
		};

		int mode = (cc.shift<<1)|cc.op;
		out.Write("   tevin_d.rgb + ");
		out.Write(function_table[mode]);
	}
	if (cc.clamp)
		out.Write(", int3(0,0,0), int3(255,255,255))");
	else
		out.Write(", int3(-1024,-1024,-1024), int3(1023,1023,1023))");
	out.Write(";\n");

	out.Write("\t// alpha combine\n");
	out.Write("\t%s = clamp(", tevAOutputTable[ac.dest]);
	if (ac.bias != TevBias_COMPARE)
	{
		WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.shift);
	}
	else
	{
		const char *function_table[] =
		{
			"((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TEVCMP_R8_GT
			"((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // TEVCMP_R8_EQ
			"((idot(tevin_a.rgb, comp16) >  idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // TEVCMP_GR16_GT
			"((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // TEVCMP_GR16_EQ
			"((idot(tevin_a.rgb, comp24) >  idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // TEVCMP_BGR24_GT
			"((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // TEVCMP_BGR24_EQ
			"((tevin_a.a >  tevin_b.a) ? tevin_c.a : 0)", // TEVCMP_A8_GT
			"((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)" // TEVCMP_A8_EQ
		};

		int mode = (ac.shift<<1)|ac.op;
		out.Write("   tevin_d.a + ");
		out.Write(function_table[mode]);
	}
	if (ac.clamp)
		out.Write(", 0, 255)");
	else
		out.Write(", -1024, 1023)");

	out.Write(";\n\n");
}