static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_type)
{
	// Non-uid template parameters will write to the dummy data (=> gets optimized out)
	vertex_shader_uid_data dummy_data;
	vertex_shader_uid_data* uid_data = out.template GetUidData<vertex_shader_uid_data>();
	if (uid_data == nullptr)
		uid_data = &dummy_data;

	out.SetBuffer(text);
	const bool is_writing_shadercode = (out.GetBuffer() != nullptr);

	if (is_writing_shadercode)
		text[sizeof(text) - 1] = 0x7C;  // canary

	_assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens);
	_assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans);

	if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
	{
		// Add functions to do shifts on scalars and ivecs.
		// This is included in the vertex shader for lighting shader generation.
		out.Write("int ilshift(int a, int b) { return a << b; }\n"
		          "int irshift(int a, int b) { return a >> b; }\n"

		          "int2 ilshift(int2 a, int2 b) { return int2(a.x << b.x, a.y << b.y); }\n"
		          "int2 ilshift(int2 a, int b) { return int2(a.x << b, a.y << b); }\n"
		          "int2 irshift(int2 a, int2 b) { return int2(a.x >> b.x, a.y >> b.y); }\n"
		          "int2 irshift(int2 a, int b) { return int2(a.x >> b, a.y >> b); }\n"

		          "int3 ilshift(int3 a, int3 b) { return int3(a.x << b.x, a.y << b.y, a.z << b.z); }\n"
		          "int3 ilshift(int3 a, int b) { return int3(a.x << b, a.y << b, a.z << b); }\n"
		          "int3 irshift(int3 a, int3 b) { return int3(a.x >> b.x, a.y >> b.y, a.z >> b.z); }\n"
		          "int3 irshift(int3 a, int b) { return int3(a.x >> b, a.y >> b, a.z >> b); }\n"

		          "int4 ilshift(int4 a, int4 b) { return int4(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w); }\n"
		          "int4 ilshift(int4 a, int b) { return int4(a.x << b, a.y << b, a.z << b, a.w << b); }\n"
		          "int4 irshift(int4 a, int4 b) { return int4(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); }\n"
		          "int4 irshift(int4 a, int b) { return int4(a.x >> b, a.y >> b, a.z >> b, a.w >> b); }\n\n");
	}

	out.Write("%s", s_lighting_struct);

	// uniforms
	if (api_type == API_OPENGL)
		out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : "");
	else
		out.Write("cbuffer VSBlock {\n");
	out.Write(s_shader_uniforms);
	out.Write("};\n");

	out.Write("struct VS_OUTPUT {\n");
	GenerateVSOutputMembers<T>(out, api_type);
	out.Write("};\n");

	uid_data->numTexGens = xfmem.numTexGen.numTexGens;
	uid_data->components = components;
	uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting;

	if (api_type == API_OPENGL)
	{
		out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
		if (components & VB_HAS_POSMTXIDX)
			out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
		if (components & VB_HAS_NRM0)
			out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
		if (components & VB_HAS_NRM1)
			out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
		if (components & VB_HAS_NRM2)
			out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);

		if (components & VB_HAS_COL0)
			out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
		if (components & VB_HAS_COL1)
			out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);

		for (int i = 0; i < 8; ++i)
		{
			u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i));
			if ((components & (VB_HAS_UV0<<i)) || hastexmtx)
				out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i);
		}

		if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
		{
			out.Write("out VertexData {\n");
			GenerateVSOutputMembers<T>(out, api_type, g_ActiveConfig.backend_info.bSupportsBindingLayout ? "centroid" : "centroid out");
			out.Write("} vs;\n");
		}
		else
		{
			// Let's set up attributes
			for (size_t i = 0; i < 8; ++i)
			{
				if (i < xfmem.numTexGen.numTexGens)
				{
					out.Write("centroid out float3 uv%d;\n", i);
				}
			}
			out.Write("centroid out float4 clipPos;\n");
			if (g_ActiveConfig.bEnablePixelLighting)
			{
				out.Write("centroid out float3 Normal;\n");
				out.Write("centroid out float3 WorldPos;\n");
			}
			out.Write("centroid out float4 colors_0;\n");
			out.Write("centroid out float4 colors_1;\n");
		}

		out.Write("void main()\n{\n");
	}
	else // D3D
	{
		out.Write("VS_OUTPUT main(\n");

		// inputs
		if (components & VB_HAS_NRM0)
			out.Write("  float3 rawnorm0 : NORMAL0,\n");
		if (components & VB_HAS_NRM1)
			out.Write("  float3 rawnorm1 : NORMAL1,\n");
		if (components & VB_HAS_NRM2)
			out.Write("  float3 rawnorm2 : NORMAL2,\n");
		if (components & VB_HAS_COL0)
			out.Write("  float4 color0 : COLOR0,\n");
		if (components & VB_HAS_COL1)
			out.Write("  float4 color1 : COLOR1,\n");
		for (int i = 0; i < 8; ++i)
		{
			u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i));
			if ((components & (VB_HAS_UV0<<i)) || hastexmtx)
				out.Write("  float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
		}
		if (components & VB_HAS_POSMTXIDX)
			out.Write("  int posmtx : BLENDINDICES,\n");
		out.Write("  float4 rawpos : POSITION) {\n");
	}

	out.Write("VS_OUTPUT o;\n");

	// transforms
	if (components & VB_HAS_POSMTXIDX)
	{
		if (is_writing_shadercode && (DriverDetails::HasBug(DriverDetails::BUG_NODYNUBOACCESS) && !DriverDetails::HasBug(DriverDetails::BUG_ANNIHILATEDUBOS)))
		{
			// This'll cause issues, but  it can't be helped
			out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[0], rawpos), dot(" I_TRANSFORMMATRICES"[1], rawpos), dot(" I_TRANSFORMMATRICES"[2], rawpos), 1);\n");
			if (components & VB_HAS_NRMALL)
				out.Write("float3 N0 = " I_NORMALMATRICES"[0].xyz, N1 = " I_NORMALMATRICES"[1].xyz, N2 = " I_NORMALMATRICES"[2].xyz;\n");
		}
		else
		{
			out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n");

			if (components & VB_HAS_NRMALL)
			{
				out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n");
				out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n");
			}
		}

		if (components & VB_HAS_NRM0)
			out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n");
		if (components & VB_HAS_NRM1)
			out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
		if (components & VB_HAS_NRM2)
			out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
	}
	else
	{
		out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0);\n");
		if (components & VB_HAS_NRM0)
			out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm0)));\n");
		if (components & VB_HAS_NRM1)
			out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm1));\n");
		if (components & VB_HAS_NRM2)
			out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm2));\n");
	}

	if (!(components & VB_HAS_NRM0))
		out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");


	out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n");

	out.Write("int4 lacc;\n"
			"float3 ldir, h, cosAttn, distAttn;\n"
			"float dist, dist2, attn;\n");

	uid_data->numColorChans = xfmem.numChan.numColorChans;
	if (xfmem.numChan.numColorChans == 0)
	{
		if (components & VB_HAS_COL0)
			out.Write("o.colors_0 = color0;\n");
		else
			out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
	}

	GenerateLightingShader<T>(out, uid_data->lighting, components, "color", "o.colors_");

	if (xfmem.numChan.numColorChans < 2)
	{
		if (components & VB_HAS_COL1)
			out.Write("o.colors_1 = color1;\n");
		else
			out.Write("o.colors_1 = o.colors_0;\n");
	}
	// special case if only pos and tex coord 0 and tex coord input is AB11
	// donko - this has caused problems in some games. removed for now.
	bool texGenSpecialCase = false;
	/*bool texGenSpecialCase =
		((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0
		(g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) &&
		(xfmem.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11);
		*/

	// transform texcoords
	out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
	for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
	{
		TexMtxInfo& texinfo = xfmem.texMtxInfo[i];

		out.Write("{\n");
		out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
		uid_data->texMtxInfo[i].sourcerow = xfmem.texMtxInfo[i].sourcerow;
		switch (texinfo.sourcerow)
		{
		case XF_SRCGEOM_INROW:
			_assert_(texinfo.inputform == XF_TEXINPUT_ABC1);
			out.Write("coord = rawpos;\n"); // pos.w is 1
			break;
		case XF_SRCNORMAL_INROW:
			if (components & VB_HAS_NRM0)
			{
				_assert_(texinfo.inputform == XF_TEXINPUT_ABC1);
				out.Write("coord = float4(rawnorm0.xyz, 1.0);\n");
			}
			break;
		case XF_SRCCOLORS_INROW:
			_assert_(texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1);
			break;
		case XF_SRCBINORMAL_T_INROW:
			if (components & VB_HAS_NRM1)
			{
				_assert_(texinfo.inputform == XF_TEXINPUT_ABC1);
				out.Write("coord = float4(rawnorm1.xyz, 1.0);\n");
			}
			break;
		case XF_SRCBINORMAL_B_INROW:
			if (components & VB_HAS_NRM2)
			{
				_assert_(texinfo.inputform == XF_TEXINPUT_ABC1);
				out.Write("coord = float4(rawnorm2.xyz, 1.0);\n");
			}
			break;
		default:
			_assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
			if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW)))
				out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
			break;
		}

		// first transformation
		uid_data->texMtxInfo[i].texgentype = xfmem.texMtxInfo[i].texgentype;
		switch (texinfo.texgentype)
		{
			case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map

				if (components & (VB_HAS_NRM1|VB_HAS_NRM2))
				{
					// transform the light dir into tangent space
					uid_data->texMtxInfo[i].embosslightshift = xfmem.texMtxInfo[i].embosslightshift;
					uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
					out.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(texinfo.embosslightshift));
					out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i, texinfo.embosssourceshift);
				}
				else
				{
					_assert_(0); // should have normals
					uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
					out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
				}

				break;
			case XF_TEXGEN_COLOR_STRGBC0:
				_assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW);
				out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
				break;
			case XF_TEXGEN_COLOR_STRGBC1:
				_assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW);
				out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
				break;
			case XF_TEXGEN_REGULAR:
			default:
				uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
				if (components & (VB_HAS_TEXMTXIDX0<<i))
				{
					out.Write("int tmp = int(tex%d.z);\n", i);
					if (texinfo.projection == XF_TEXPROJ_STQ)
						out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i);
					else
						out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i);
				}
				else
				{
					if (texinfo.projection == XF_TEXPROJ_STQ)
						out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3*i, 3*i+1, 3*i+2);
					else
						out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3*i, 3*i+1);
				}
				break;
		}

		uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled;
		// CHECKME: does this only work for regular tex gen types?
		if (xfmem.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
		{
			const PostMtxInfo& postInfo = xfmem.postMtxInfo[i];

			uid_data->postMtxInfo[i].index = xfmem.postMtxInfo[i].index;
			int postidx = postInfo.index;
			out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n"
				"float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n"
				"float4 P2 = " I_POSTTRANSFORMMATRICES"[%d];\n",
				postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f);

			if (texGenSpecialCase)
			{
				// no normalization
				// q of input is 1
				// q of output is unknown

				// multiply by postmatrix
				out.Write("o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0);\n", i, i, i);
			}
			else
			{
				uid_data->postMtxInfo[i].normalize = xfmem.postMtxInfo[i].normalize;
				if (postInfo.normalize)
					out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);

				// multiply by postmatrix
				out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i);
			}
		}

		out.Write("}\n");
	}

	// clipPos/w needs to be done in pixel shader, not here
	out.Write("o.clipPos = o.pos;\n");

	if (g_ActiveConfig.bEnablePixelLighting)
	{
		out.Write("o.Normal = _norm0;\n");
		out.Write("o.WorldPos = pos.xyz;\n");

		if (components & VB_HAS_COL0)
			out.Write("o.colors_0 = color0;\n");

		if (components & VB_HAS_COL1)
			out.Write("o.colors_1 = color1;\n");
	}

	//write the true depth value, if the game uses depth textures pixel shaders will override with the correct values
	//if not early z culling will improve speed
	if (g_ActiveConfig.backend_info.bSupportsClipControl)
	{
		out.Write("o.pos.z = -o.pos.z;\n");
	}
	else if (api_type == API_D3D)
	{
		out.Write("o.pos.z = -o.pos.z;\n");
	}
	else // OGL
	{
		// this results in a scale from -1..0 to -1..1 after perspective
		// divide
		out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n");

		// the next steps of the OGL pipeline are:
		// (x_c,y_c,z_c,w_c) = o.pos  //switch to OGL spec terminology
		// clipping to -w_c <= (x_c,y_c,z_c) <= w_c
		// (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide
		// z_w = (f-n)/2*z_d + (n+f)/2
		// z_w now contains the value to go to the 0..1 depth buffer

		//trying to get the correct semantic while not using glDepthRange
		//seems to get rather complicated
	}

	// The console GPU places the pixel center at 7/12 in screen space unless
	// antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
	// in some primitives being placed one pixel too far to the bottom-right,
	// which in turn can be critical if it happens for clear quads.
	// Hence, we compensate for this pixel center difference so that primitives
	// get rasterized correctly.
	out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION".xy;\n");

	if (api_type == API_OPENGL)
	{
		if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
		{
			AssignVSOutputMembers(out, "vs", "o");
		}
		else
		{
			// TODO: Pass interface blocks between shader stages even if geometry shaders
			// are not supported, however that will require at least OpenGL 3.2 support.
			for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
				out.Write("uv%d.xyz = o.tex%d;\n", i, i);
			out.Write("clipPos = o.clipPos;\n");
			if (g_ActiveConfig.bEnablePixelLighting)
			{
				out.Write("Normal = o.Normal;\n");
				out.Write("WorldPos = o.WorldPos;\n");
			}
			out.Write("colors_0 = o.colors_0;\n");
			out.Write("colors_1 = o.colors_1;\n");
		}

		out.Write("gl_Position = o.pos;\n");
	}
	else // D3D
	{
		out.Write("return o;\n");
	}
	out.Write("}\n");

	if (is_writing_shadercode)
	{
		if (text[sizeof(text) - 1] != 0x7C)
			PanicAlert("VertexShader generator - buffer too small, canary has been eaten!");
	}
}
Beispiel #2
0
static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_data, int index,
                                int litchan_index, bool alpha)
{
  const char* swizzle = alpha ? "a" : "rgb";
  const char* swizzle_components = (alpha) ? "" : "3";

  int attnfunc = (uid_data.attnfunc >> (2 * litchan_index)) & 0x3;
  int diffusefunc = (uid_data.diffusefunc >> (2 * litchan_index)) & 0x3;

  switch (attnfunc)
  {
  case LIGHTATTN_NONE:
  case LIGHTATTN_DIR:
    object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
    object.Write("attn = 1.0;\n");
    object.Write("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n");
    break;
  case LIGHTATTN_SPEC:
    object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
    object.Write("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR
                 ".xyz)) : 0.0;\n",
                 LIGHT_DIR_PARAMS(index));
    object.Write("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index));
    object.Write("distAttn = %s(" LIGHT_DISTATT ".xyz);\n",
                 (diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", LIGHT_DISTATT_PARAMS(index));
    object.Write("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
                 "float3(1.0, attn, attn*attn));\n");
    break;
  case LIGHTATTN_SPOT:
    object.Write("ldir = " LIGHT_POS ".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(index));
    object.Write("dist2 = dot(ldir, ldir);\n"
                 "dist = sqrt(dist2);\n"
                 "ldir = ldir / dist;\n"
                 "attn = max(0.0, dot(ldir, " LIGHT_DIR ".xyz));\n",
                 LIGHT_DIR_PARAMS(index));
    // attn*attn may overflow
    object.Write("attn = max(0.0, " LIGHT_COSATT ".x + " LIGHT_COSATT ".y*attn + " LIGHT_COSATT
                 ".z*attn*attn) / dot(" LIGHT_DISTATT ".xyz, float3(1.0,dist,dist2));\n",
                 LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index),
                 LIGHT_DISTATT_PARAMS(index));
    break;
  }

  switch (diffusefunc)
  {
  case LIGHTDIF_NONE:
    object.Write("lacc.%s += int%s(round(attn * float%s(" LIGHT_COL ")));\n", swizzle,
                 swizzle_components, swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
    break;
  case LIGHTDIF_SIGN:
  case LIGHTDIF_CLAMP:
    object.Write("lacc.%s += int%s(round(attn * %sdot(ldir, _norm0)) * float%s(" LIGHT_COL ")));\n",
                 swizzle, swizzle_components, diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(",
                 swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
    break;
  default:
    _assert_(0);
  }

  object.Write("\n");
}
Beispiel #3
0
inline void GenerateVertexShader(T& out, u32 components, const XFMemory &xfr, const BPMemory &bpm, bool use_integer_math)
{
    // Non-uid template parameters will write to the dummy data (=> gets optimized out)
    bool uidPresent = (&out.template GetUidData<vertex_shader_uid_data>() != NULL);
    vertex_shader_uid_data dummy_data;
    vertex_shader_uid_data& uid_data = uidPresent ? out.template GetUidData<vertex_shader_uid_data>() : dummy_data;
    if (uidPresent)
    {
        out.ClearUID();
    }
    if (Write_Code)
    {
        _dbg_assert_log_(VIDEO, bpm.genMode.numtexgens == xfr.numTexGen.numTexGens, "numTexGens mismatch bpmem: %u xfmem: %u", bpm.genMode.numtexgens.Value(), xfr.numTexGen.numTexGens);
        _dbg_assert_log_(VIDEO, bpm.genMode.numcolchans == xfr.numChan.numColorChans, "numColorChans mismatch bpmem: %u xfmem: %u", bpm.genMode.numcolchans.Value(), xfr.numChan.numColorChans);
    }
    uid_data.numTexGens = xfr.numTexGen.numTexGens;
    uid_data.components = components;
    bool lightingEnabled = xfr.numChan.numColorChans > 0;
    bool enable_pl = g_ActiveConfig.PixelLightingEnabled(xfr, components);
    bool needLightShader = lightingEnabled && !enable_pl;
    for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i)
    {
        const TexMtxInfo& texinfo = xfr.texMtxInfo[i];
        needLightShader = needLightShader || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1;
    }
    uid_data.pixel_lighting = enable_pl;
    uid_data.numColorChans = xfr.numChan.numColorChans;
    if (!(api_type & API_D3D9))
    {
        uid_data.msaa = g_ActiveConfig.iMultisamples > 1;
        uid_data.ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA;
    }
    char * buffer = nullptr;
    if (Write_Code)
    {
        buffer = out.GetBuffer();
        if (buffer == nullptr)
        {
            buffer = text;
            out.SetBuffer(text);
        }

        buffer[VERTEXSHADERGEN_BUFFERSIZE - 1] = 0x7C;  // canary
        // uniforms
        if (api_type == API_OPENGL)
            out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : "");
        else if (api_type == API_D3D11)
            out.Write("cbuffer VSBlock : register(b0) {\n");

        DeclareUniform<T, api_type>(out, C_PROJECTION, "float4", I_PROJECTION"[4]");
        DeclareUniform<T, api_type>(out, C_DEPTHPARAMS, "float4", I_DEPTHPARAMS);
        DeclareUniform<T, api_type>(out, C_MATERIALS, "float4", I_MATERIALS"[4]");
        DeclareUniform<T, api_type>(out, C_LIGHTS, "float4", I_LIGHTS"[40]");
        DeclareUniform<T, api_type>(out, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]");
        DeclareUniform<T, api_type>(out, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]");
        DeclareUniform<T, api_type>(out, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]");
        DeclareUniform<T, api_type>(out, C_POSTTRANSFORMMATRICES, "float4", I_POSTTRANSFORMMATRICES"[64]");
        DeclareUniform<T, api_type>(out, C_PLOFFSETPARAMS, "float4", I_PLOFFSETPARAMS"[13]");

        if (api_type == API_OPENGL || api_type == API_D3D11)
            out.Write("};\n");

        out.Write("struct VS_OUTPUT {\n");
        GenerateVSOutputMembers<T, api_type>(out, enable_pl, xfr);
        out.Write("};\n");

        if (api_type == API_OPENGL)
        {
            out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
            out.Write("in float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
            if (components & VB_HAS_NRM0)
                out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
            if (components & VB_HAS_NRM1)
                out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
            if (components & VB_HAS_NRM2)
                out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);

            if (components & VB_HAS_COL0)
                out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
            if (components & VB_HAS_COL1)
                out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);

            for (int i = 0; i < 8; ++i)
            {
                u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i));
                if ((components & (VB_HAS_UV0 << i)) || hastexmtx)
                    out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i);
            }

            if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
            {
                out.Write("out VertexData {\n");
                GenerateVSOutputMembers<T, api_type>(out, enable_pl, xfr, GetInterpolationQualifier(api_type, false, true));
                out.Write("} vs;\n");
            }
            else
            {
                const char* optCentroid = GetInterpolationQualifier(api_type);

                // Let's set up attributes
                if (xfr.numTexGen.numTexGens < 7)
                {
                    for (int i = 0; i < 8; ++i)
                        out.Write("%s out float3 uv%d_2;\n", optCentroid, i);
                    out.Write("%s out float4 clipPos_2;\n", optCentroid);
                    if (enable_pl)
                        out.Write("%s out float4 Normal_2;\n", optCentroid);
                }
                else
                {
                    // wpos is in w of first 4 texcoords
                    if (enable_pl)
                    {
                        for (int i = 0; i < 8; ++i)
                            out.Write("%s out float4 uv%d_2;\n", optCentroid, i);
                    }
                    else
                    {
                        for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i)
                            out.Write("%s out float%d uv%d_2;\n", optCentroid, i < 4 ? 4 : 3, i);
                    }
                }
                out.Write("%s out float4 colors_0;\n", optCentroid);
                out.Write("%s out float4 colors_1;\n", optCentroid);
            }

            out.Write("void main()\n{\n");
        }
        else
        {
            out.Write("VS_OUTPUT main(\n");

            // inputs
            if (components & VB_HAS_NRM0)
                out.Write("  float3 rawnorm0 : NORMAL0,\n");
            if (components & VB_HAS_NRM1)
                out.Write("  float3 rawnorm1 : NORMAL1,\n");
            if (components & VB_HAS_NRM2)
                out.Write("  float3 rawnorm2 : NORMAL2,\n");
            if (components & VB_HAS_COL0)
                out.Write("  float4 color0 : COLOR0,\n");
            if (components & VB_HAS_COL1)
                out.Write("  float4 color1 : COLOR1,\n");
            for (int i = 0; i < 8; ++i)
            {
                u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i));
                if ((components & (VB_HAS_UV0 << i)) || hastexmtx)
                    out.Write("  float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
            }
            out.Write("  float4 blend_indices : BLENDINDICES,\n");

            out.Write("  float4 rawpos : POSITION) {\n");
        }
        out.Write("VS_OUTPUT o;\n");
        if (api_type & API_D3D9)
        {
            out.Write("int4 indices = D3DCOLORtoUBYTE4(blend_indices);\n");
        }
        // transforms
        if (api_type & API_D3D9)
        {
            out.Write("int posmtx = indices.x;\n");
        }
        else if (api_type == API_D3D11)
        {
            out.Write("int posmtx = blend_indices.x * 255.0;\n");
        }
        else
        {
            out.Write("int posmtx = int(fposmtx);\n");
        }

        out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n");

        if (components & VB_HAS_NRMALL) {
            out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n");
            out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n");
        }

        if (components & VB_HAS_NRM0)
            out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n");
        if (components & VB_HAS_NRM1)
            out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
        if (components & VB_HAS_NRM2)
            out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");

        if (!(components & VB_HAS_NRM0))
            out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");


        out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n");
        if (api_type & API_D3D9)
        {
            //Write Pos offset for Point/Line Rendering
            out.Write("o.pos.xy = o.pos.xy + " I_PLOFFSETPARAMS"[indices.z].xy * o.pos.w;\n");
        }
        if (needLightShader)
        {
            out.Write("float4 mat, lacc;\n"
                      "float3 ldir, h;\n"
                      "float dist, dist2, attn;\n");
            if (use_integer_math)
            {
                out.Write("int4 ilacc;\n");
            }
        }
        if (!lightingEnabled)
        {
            if (components & VB_HAS_COL0)
                out.Write("o.colors_0 = color0;\n");
            else
                out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");

            if (components & VB_HAS_COL1)
                out.Write("o.colors_1 = color1;\n");
            else
                out.Write("o.colors_1 = o.colors_0;\n");
        }
    }
    if (needLightShader)
        GenerateLightingShader<T, Write_Code>(out, uid_data.lighting, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_", xfr, use_integer_math);

    // special case if only pos and tex coord 0 and tex coord input is AB11
    // donko - this has caused problems in some games. removed for now.
    bool texGenSpecialCase = false;
    /*bool texGenSpecialCase =
    ((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0
    (g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) &&
    (xfr.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11);
    */
    if (Write_Code)
    {
        if (xfr.numChan.numColorChans < 2 && needLightShader)
        {
            if (components & VB_HAS_COL1)
                out.Write("o.colors_1 = color1;\n");
            else
                out.Write("o.colors_1 = o.colors_0;\n");
        }
        // transform texcoords
        out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
    }


    for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i)
    {
        const TexMtxInfo& texinfo = xfr.texMtxInfo[i];
        uid_data.texMtxInfo[i].sourcerow = xfr.texMtxInfo[i].sourcerow;
        if (Write_Code)
        {
            out.Write("{\n");
            out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
            switch (texinfo.sourcerow)
            {
            case XF_SRCGEOM_INROW:
                _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCGEOM_INROW inputform: %u", texinfo.inputform);
                out.Write("coord = rawpos;\n"); // pos.w is 1
                break;
            case XF_SRCNORMAL_INROW:
                if (components & VB_HAS_NRM0)
                {
                    _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCNORMAL_INROW inputform: %u", texinfo.inputform);
                    out.Write("coord = float4(rawnorm0.xyz, 1.0);\n");
                }
                break;
            case XF_SRCCOLORS_INROW:
                _dbg_assert_log_(VIDEO, texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1, "texgentype missmatch: %u", texinfo.texgentype);
                break;
            case XF_SRCBINORMAL_T_INROW:
                if (components & VB_HAS_NRM1)
                {
                    _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCBINORMAL_T_INROW inputform: %u", texinfo.inputform);
                    out.Write("coord = float4(rawnorm1.xyz, 1.0);\n");
                }
                break;
            case XF_SRCBINORMAL_B_INROW:
                if (components & VB_HAS_NRM2)
                {
                    _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCBINORMAL_B_INROW inputform: %u", texinfo.inputform);
                    out.Write("coord = float4(rawnorm2.xyz, 1.0);\n");
                }
                break;
            default:
                _dbg_assert_log_(VIDEO, texinfo.sourcerow <= XF_SRCTEX7_INROW, "sourcerow missmatch: %u", texinfo.sourcerow);
                if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW)))
                    out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
                break;
            }
        }

        // first transformation
        uid_data.texMtxInfo[i].texgentype = xfr.texMtxInfo[i].texgentype;
        switch (texinfo.texgentype)
        {
        case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map

            if (components & (VB_HAS_NRM1 | VB_HAS_NRM2))
            {
                // transform the light dir into tangent space
                uid_data.texMtxInfo[i].embosslightshift = xfr.texMtxInfo[i].embosslightshift;
                uid_data.texMtxInfo[i].embosssourceshift = xfr.texMtxInfo[i].embosssourceshift;
                if (Write_Code)
                {
                    out.Write("float3 eldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(I_LIGHTS, texinfo.embosslightshift));
                    out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(eldir, _norm1), dot(eldir, _norm2), 0.0);\n", i, texinfo.embosssourceshift);
                }
            }
            else
            {
                // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue Squadron 2
                //
                uid_data.texMtxInfo[i].embosssourceshift = xfr.texMtxInfo[i].embosssourceshift;
                if (Write_Code)
                {
                    _dbg_assert_log_(VIDEO, 0, "vertex normals spected");
                    out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
                }
            }

            break;
        case XF_TEXGEN_COLOR_STRGBC0:
            if (Write_Code)
            {
                _dbg_assert_log_(VIDEO, texinfo.sourcerow == XF_SRCCOLORS_INROW, "sourcerow missmatch spected: XF_SRCCOLORS_INROW found: %u", texinfo.sourcerow);
                out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
            }
            break;
        case XF_TEXGEN_COLOR_STRGBC1:
            if (Write_Code)
            {
                _dbg_assert_log_(VIDEO, texinfo.sourcerow == XF_SRCCOLORS_INROW, "sourcerow missmatch spected: XF_SRCCOLORS_INROW found: %u", texinfo.sourcerow);
                out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
            }
            break;
        case XF_TEXGEN_REGULAR:
        default:
            uid_data.texMtxInfo_n_projection |= xfr.texMtxInfo[i].projection << i;
            if (Write_Code)
            {
                if (components & (VB_HAS_TEXMTXIDX0 << i))
                {
                    out.Write("int tmp = int(tex%d.z);\n", i);
                    if (texinfo.projection == XF_TEXPROJ_STQ)
                        out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i);
                    else
                        out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i);
                }
                else
                {
                    if (texinfo.projection == XF_TEXPROJ_STQ)
                        out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3 * i, 3 * i + 1, 3 * i + 2);
                    else
                        out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3 * i, 3 * i + 1);
                }
            }
            break;
        }

        uid_data.dualTexTrans_enabled = xfr.dualTexTrans.enabled;
        // CHECKME: does this only work for regular tex gen types?
        if (xfr.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
        {
            const PostMtxInfo& postInfo = xfr.postMtxInfo[i];

            uid_data.postMtxInfo[i].index = xfr.postMtxInfo[i].index;
            int postidx = postInfo.index;
            if (Write_Code)
            {
                out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n"
                          "float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n"
                          "float4 P2 = " I_POSTTRANSFORMMATRICES"[%d];\n",
                          postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f);
            }
            if (texGenSpecialCase)
            {
                // no normalization
                // q of input is 1
                // q of output is unknown

                // multiply by postmatrix
                if (Write_Code)
                    out.Write("o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0);\n", i, i, i);
            }
            else
            {
                uid_data.postMtxInfo[i].normalize = xfr.postMtxInfo[i].normalize;
                if (Write_Code)
                {
                    if (postInfo.normalize)
                        out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);

                    // multiply by postmatrix
                    out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i);
                }
            }
        }
        if (Write_Code)
            out.Write("}\n");
    }
    if (Write_Code)
    {
        // clipPos/w needs to be done in pixel shader, not here
        if (xfr.numTexGen.numTexGens < 7)
        {
            out.Write("o.clipPos%s = float4(pos.x,pos.y,o.pos.z,o.pos.w);\n", (api_type == API_OPENGL) ? "_2" : "");
        }
        else
        {
            out.Write("o.tex0.w = pos.x;\n");
            out.Write("o.tex1.w = pos.y;\n");
            out.Write("o.tex2.w = o.pos.z;\n");
            out.Write("o.tex3.w = o.pos.w;\n");
        }

        if (enable_pl)
        {
            if (xfr.numTexGen.numTexGens < 7)
            {
                out.Write("o.Normal%s = float4(_norm0.x,_norm0.y,_norm0.z,pos.z);\n", (api_type == API_OPENGL) ? "_2" : "");
            }
            else
            {
                out.Write("o.tex4.w = _norm0.x;\n");
                out.Write("o.tex5.w = _norm0.y;\n");
                out.Write("o.tex6.w = _norm0.z;\n");
                if (xfr.numTexGen.numTexGens < 8)
                    out.Write("o.tex7 = pos.xyzz;\n");
                else
                    out.Write("o.tex7.w = pos.z;\n");
            }

            if (components & VB_HAS_COL0)
                out.Write("o.colors_0 = color0;\n");
            else
                out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");

            if (components & VB_HAS_COL1)
                out.Write("o.colors_1 = color1;\n");
            else
                out.Write("o.colors_1 = o.colors_0;\n");
        }

        //write the true depth value, if the game uses depth textures pixel shaders will override with the correct values
        //if not early z culling will improve speed
        if (g_ActiveConfig.backend_info.bSupportsClipControl)
        {
            out.Write("o.pos.z = -o.pos.z;\n");
        }
        else if (api_type & API_D3D9 || api_type == API_D3D11)
        {
            out.Write("o.pos.z = -((" I_DEPTHPARAMS".x - 1.0) * o.pos.w + o.pos.z * " I_DEPTHPARAMS".y);\n");
        }
        else
        {
            // this results in a scale from -1..0 to -1..1 after perspective
            // divide
            out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n");

            // the next steps of the OGL pipeline are:
            // (x_c,y_c,z_c,w_c) = o.pos  //switch to OGL spec terminology
            // clipping to -w_c <= (x_c,y_c,z_c) <= w_c
            // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide
            // z_w = (f-n)/2*z_d + (n+f)/2
            // z_w now contains the value to go to the 0..1 depth buffer

            //trying to get the correct semantic while not using glDepthRange
            //seems to get rather complicated
        }

        // The console GPU places the pixel center at 7/12 in screen space unless
        // antialiasing is enabled, while D3D11 and OpenGL place it at 0.5, and D3D9 at 0. This results
        // in some primitives being placed one pixel too far to the bottom-right,
        // which in turn can be critical if it happens for clear quads.
        // Hence, we compensate for this pixel center difference so that primitives
        // get rasterized correctly.
        out.Write("o.pos.xy = o.pos.xy + o.pos.w * " I_DEPTHPARAMS".zw;\n");

        if (api_type & API_D3D9)
        {
            // Write Texture Offsets for Point/Line Rendering
            for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i)
            {
                out.Write("o.tex%d.xy = o.tex%d.xy + (" I_PLOFFSETPARAMS"[indices.w].zw * " I_PLOFFSETPARAMS"[indices.y + %d].%s );\n", i, i, ((i / 4) + 1), texOffsetMemberSelector[i % 4]);
            }
        }

        if (api_type == API_OPENGL)
        {
            if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
            {
                AssignVSOutputMembers<T, api_type>(out, "vs", "o", enable_pl, xfr);
            }
            else
            {

                if (xfr.numTexGen.numTexGens < 7)
                {
                    for (unsigned int i = 0; i < 8; ++i)
                    {
                        if (i < xfr.numTexGen.numTexGens)
                            out.Write(" uv%d_2.xyz =  o.tex%d;\n", i, i);
                        else
                            out.Write(" uv%d_2.xyz =  float3(0.0, 0.0, 0.0);\n", i);
                    }
                    out.Write("  clipPos_2 = o.clipPos;\n");
                    if (enable_pl)
                        out.Write("  Normal_2 = o.Normal;\n");
                }
                else
                {
                    // clip position is in w of first 4 texcoords
                    if (enable_pl)
                    {
                        for (int i = 0; i < 8; ++i)
                            out.Write(" uv%d_2 = o.tex%d;\n", i, i);
                    }
                    else
                    {
                        for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i)
                            out.Write("  uv%d_2%s = o.tex%d;\n", i, i < 4 ? ".xyzw" : ".xyz", i);
                    }
                }
                out.Write("colors_0 = o.colors_0;\n");
                out.Write("colors_1 = o.colors_1;\n");
            }
            out.Write("gl_Position = o.pos;\n");
            out.Write("}\n");
        }
        else
        {
            out.Write("return o;\n}\n");
        }

        if (buffer[VERTEXSHADERGEN_BUFFERSIZE - 1] != 0x7C)
            PanicAlert("VertexShader generator - buffer too small, canary has been eaten!");
    }
    if (uidPresent)
    {
        out.CalculateUIDHash();
    }
}
Beispiel #4
0
static T GenerateVertexShader(API_TYPE api_type)
{
  T out;
  const u32 components = VertexLoaderManager::g_current_components;
  // Non-uid template parameters will write to the dummy data (=> gets optimized out)
  vertex_shader_uid_data dummy_data;
  vertex_shader_uid_data* uid_data = out.template GetUidData<vertex_shader_uid_data>();
  if (uid_data != nullptr)
    memset(uid_data, 0, sizeof(*uid_data));
  else
    uid_data = &dummy_data;

  _assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens);
  _assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans);

  out.Write("%s", s_lighting_struct);

  // uniforms
  if (api_type == API_OPENGL)
    out.Write("layout(std140%s) uniform VSBlock {\n",
              g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : "");
  else
    out.Write("cbuffer VSBlock {\n");
  out.Write(s_shader_uniforms);
  out.Write("};\n");

  out.Write("struct VS_OUTPUT {\n");
  GenerateVSOutputMembers<T>(out, api_type, "");
  out.Write("};\n");

  uid_data->numTexGens = xfmem.numTexGen.numTexGens;
  uid_data->components = components;
  uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting;

  if (api_type == API_OPENGL)
  {
    out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
    if (components & VB_HAS_POSMTXIDX)
      out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
    if (components & VB_HAS_NRM0)
      out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
    if (components & VB_HAS_NRM1)
      out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
    if (components & VB_HAS_NRM2)
      out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);

    if (components & VB_HAS_COL0)
      out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
    if (components & VB_HAS_COL1)
      out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);

    for (int i = 0; i < 8; ++i)
    {
      u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i));
      if ((components & (VB_HAS_UV0 << i)) || hastexmtx)
        out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i,
                  SHADER_TEXTURE0_ATTRIB + i);
    }

    if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
    {
      out.Write("out VertexData {\n");
      GenerateVSOutputMembers<T>(out, api_type, GetInterpolationQualifier(true, false));
      out.Write("} vs;\n");
    }
    else
    {
      // Let's set up attributes
      for (u32 i = 0; i < 8; ++i)
      {
        if (i < xfmem.numTexGen.numTexGens)
        {
          out.Write("%s out float3 uv%u;\n", GetInterpolationQualifier(), i);
        }
      }
      out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier());
      if (g_ActiveConfig.bEnablePixelLighting)
      {
        out.Write("%s out float3 Normal;\n", GetInterpolationQualifier());
        out.Write("%s out float3 WorldPos;\n", GetInterpolationQualifier());
      }
      out.Write("%s out float4 colors_0;\n", GetInterpolationQualifier());
      out.Write("%s out float4 colors_1;\n", GetInterpolationQualifier());
    }

    out.Write("void main()\n{\n");
  }
  else  // D3D
  {
    out.Write("VS_OUTPUT main(\n");

    // inputs
    if (components & VB_HAS_NRM0)
      out.Write("  float3 rawnorm0 : NORMAL0,\n");
    if (components & VB_HAS_NRM1)
      out.Write("  float3 rawnorm1 : NORMAL1,\n");
    if (components & VB_HAS_NRM2)
      out.Write("  float3 rawnorm2 : NORMAL2,\n");
    if (components & VB_HAS_COL0)
      out.Write("  float4 color0 : COLOR0,\n");
    if (components & VB_HAS_COL1)
      out.Write("  float4 color1 : COLOR1,\n");
    for (int i = 0; i < 8; ++i)
    {
      u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i));
      if ((components & (VB_HAS_UV0 << i)) || hastexmtx)
        out.Write("  float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
    }
    if (components & VB_HAS_POSMTXIDX)
      out.Write("  int posmtx : BLENDINDICES,\n");
    out.Write("  float4 rawpos : POSITION) {\n");
  }

  out.Write("VS_OUTPUT o;\n");

  // transforms
  if (components & VB_HAS_POSMTXIDX)
  {
    out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES
              "[posmtx], rawpos), dot(" I_TRANSFORMMATRICES
              "[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posmtx+2], rawpos), 1);\n");

    if (components & VB_HAS_NRMALL)
    {
      out.Write("int normidx = posmtx & 31;\n");
      out.Write("float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES
                "[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n");
    }

    if (components & VB_HAS_NRM0)
      out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, "
                "rawnorm0)));\n");
    if (components & VB_HAS_NRM1)
      out.Write(
          "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
    if (components & VB_HAS_NRM2)
      out.Write(
          "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
  }
  else
  {
    out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX
              "[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n");
    if (components & VB_HAS_NRM0)
      out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX
                "[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX
                "[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n");
    if (components & VB_HAS_NRM1)
      out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX
                "[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX
                "[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n");
    if (components & VB_HAS_NRM2)
      out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX
                "[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX
                "[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n");
  }

  if (!(components & VB_HAS_NRM0))
    out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");

  out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
            "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");

  out.Write("int4 lacc;\n"
            "float3 ldir, h, cosAttn, distAttn;\n"
            "float dist, dist2, attn;\n");

  uid_data->numColorChans = xfmem.numChan.numColorChans;
  if (xfmem.numChan.numColorChans == 0)
  {
    if (components & VB_HAS_COL0)
      out.Write("o.colors_0 = color0;\n");
    else
      out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
  }

  GenerateLightingShader<T>(out, uid_data->lighting, components, "color", "o.colors_");

  if (xfmem.numChan.numColorChans < 2)
  {
    if (components & VB_HAS_COL1)
      out.Write("o.colors_1 = color1;\n");
    else
      out.Write("o.colors_1 = o.colors_0;\n");
  }

  // transform texcoords
  out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
  for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
  {
    TexMtxInfo& texinfo = xfmem.texMtxInfo[i];

    out.Write("{\n");
    out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
    uid_data->texMtxInfo[i].sourcerow = xfmem.texMtxInfo[i].sourcerow;
    switch (texinfo.sourcerow)
    {
    case XF_SRCGEOM_INROW:
      out.Write("coord.xyz = rawpos.xyz;\n");
      break;
    case XF_SRCNORMAL_INROW:
      if (components & VB_HAS_NRM0)
      {
        out.Write("coord.xyz = rawnorm0.xyz;\n");
      }
      break;
    case XF_SRCCOLORS_INROW:
      _assert_(texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 ||
               texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1);
      break;
    case XF_SRCBINORMAL_T_INROW:
      if (components & VB_HAS_NRM1)
      {
        out.Write("coord.xyz = rawnorm1.xyz;\n");
      }
      break;
    case XF_SRCBINORMAL_B_INROW:
      if (components & VB_HAS_NRM2)
      {
        out.Write("coord.xyz = rawnorm2.xyz;\n");
      }
      break;
    default:
      _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
      if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW)))
        out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n",
                  texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
      break;
    }
    // Input form of AB11 sets z element to 1.0
    uid_data->texMtxInfo[i].inputform = xfmem.texMtxInfo[i].inputform;
    if (texinfo.inputform == XF_TEXINPUT_AB11)
      out.Write("coord.z = 1.0;\n");

    // first transformation
    uid_data->texMtxInfo[i].texgentype = xfmem.texMtxInfo[i].texgentype;
    switch (texinfo.texgentype)
    {
    case XF_TEXGEN_EMBOSS_MAP:  // calculate tex coords into bump map

      if (components & (VB_HAS_NRM1 | VB_HAS_NRM2))
      {
        // transform the light dir into tangent space
        uid_data->texMtxInfo[i].embosslightshift = xfmem.texMtxInfo[i].embosslightshift;
        uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
        out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
                  LIGHT_POS_PARAMS(texinfo.embosslightshift));
        out.Write(
            "o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i,
            texinfo.embosssourceshift);
      }
      else
      {
        // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue
        // Squadron 2
        //_assert_(0); // should have normals
        uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
        out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
      }

      break;
    case XF_TEXGEN_COLOR_STRGBC0:
      out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
      break;
    case XF_TEXGEN_COLOR_STRGBC1:
      out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
      break;
    case XF_TEXGEN_REGULAR:
    default:
      uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
      if (components & (VB_HAS_TEXMTXIDX0 << i))
      {
        out.Write("int tmp = int(tex%d.z);\n", i);
        if (texinfo.projection == XF_TEXPROJ_STQ)
          out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
                    "[tmp]), dot(coord, " I_TRANSFORMMATRICES
                    "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n",
                    i);
        else
          out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
                    "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n",
                    i);
      }
      else
      {
        if (texinfo.projection == XF_TEXPROJ_STQ)
          out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES
                    "[%d]), dot(coord, " I_TEXMATRICES "[%d]), dot(coord, " I_TEXMATRICES
                    "[%d]));\n",
                    i, 3 * i, 3 * i + 1, 3 * i + 2);
        else
          out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES
                    "[%d]), dot(coord, " I_TEXMATRICES "[%d]), 1);\n",
                    i, 3 * i, 3 * i + 1);
      }
      break;
    }

    uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled;
    // CHECKME: does this only work for regular tex gen types?
    if (xfmem.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
    {
      const PostMtxInfo& postInfo = xfmem.postMtxInfo[i];

      uid_data->postMtxInfo[i].index = xfmem.postMtxInfo[i].index;
      int postidx = postInfo.index;
      out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[%d];\n"
                "float4 P1 = " I_POSTTRANSFORMMATRICES "[%d];\n"
                "float4 P2 = " I_POSTTRANSFORMMATRICES "[%d];\n",
                postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f);

      uid_data->postMtxInfo[i].normalize = xfmem.postMtxInfo[i].normalize;
      if (postInfo.normalize)
        out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);

      // multiply by postmatrix
      out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + "
                "P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n",
                i, i, i, i);
    }

    out.Write("}\n");
  }

  // clipPos/w needs to be done in pixel shader, not here
  out.Write("o.clipPos = o.pos;\n");

  if (g_ActiveConfig.bEnablePixelLighting)
  {
    out.Write("o.Normal = _norm0;\n");
    out.Write("o.WorldPos = pos.xyz;\n");

    if (components & VB_HAS_COL0)
      out.Write("o.colors_0 = color0;\n");

    if (components & VB_HAS_COL1)
      out.Write("o.colors_1 = color1;\n");
  }

  // write the true depth value, if the game uses depth textures pixel shaders will override with
  // the correct values
  // if not early z culling will improve speed
  if (g_ActiveConfig.backend_info.bSupportsClipControl)
  {
    out.Write("o.pos.z = -o.pos.z;\n");
  }
  else  // OGL
  {
    // this results in a scale from -1..0 to -1..1 after perspective
    // divide
    out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n");

    // the next steps of the OGL pipeline are:
    // (x_c,y_c,z_c,w_c) = o.pos  //switch to OGL spec terminology
    // clipping to -w_c <= (x_c,y_c,z_c) <= w_c
    // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide
    // z_w = (f-n)/2*z_d + (n+f)/2
    // z_w now contains the value to go to the 0..1 depth buffer

    // trying to get the correct semantic while not using glDepthRange
    // seems to get rather complicated
  }

  // The console GPU places the pixel center at 7/12 in screen space unless
  // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
  // in some primitives being placed one pixel too far to the bottom-right,
  // which in turn can be critical if it happens for clear quads.
  // Hence, we compensate for this pixel center difference so that primitives
  // get rasterized correctly.
  out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");

  if (api_type == API_OPENGL)
  {
    if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
    {
      AssignVSOutputMembers(out, "vs", "o");
    }
    else
    {
      // TODO: Pass interface blocks between shader stages even if geometry shaders
      // are not supported, however that will require at least OpenGL 3.2 support.
      for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
        out.Write("uv%d.xyz = o.tex%d;\n", i, i);
      out.Write("clipPos = o.clipPos;\n");
      if (g_ActiveConfig.bEnablePixelLighting)
      {
        out.Write("Normal = o.Normal;\n");
        out.Write("WorldPos = o.WorldPos;\n");
      }
      out.Write("colors_0 = o.colors_0;\n");
      out.Write("colors_1 = o.colors_1;\n");
    }

    out.Write("gl_Position = o.pos;\n");
  }
  else  // D3D
  {
    out.Write("return o;\n");
  }
  out.Write("}\n");

  return out;
}