static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { // Non-uid template parameters will write to the dummy data (=> gets optimized out) vertex_shader_uid_data dummy_data; vertex_shader_uid_data* uid_data = out.template GetUidData<vertex_shader_uid_data>(); if (uid_data == nullptr) uid_data = &dummy_data; out.SetBuffer(text); const bool is_writing_shadercode = (out.GetBuffer() != nullptr); if (is_writing_shadercode) text[sizeof(text) - 1] = 0x7C; // canary _assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens); _assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans); if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) { // Add functions to do shifts on scalars and ivecs. // This is included in the vertex shader for lighting shader generation. out.Write("int ilshift(int a, int b) { return a << b; }\n" "int irshift(int a, int b) { return a >> b; }\n" "int2 ilshift(int2 a, int2 b) { return int2(a.x << b.x, a.y << b.y); }\n" "int2 ilshift(int2 a, int b) { return int2(a.x << b, a.y << b); }\n" "int2 irshift(int2 a, int2 b) { return int2(a.x >> b.x, a.y >> b.y); }\n" "int2 irshift(int2 a, int b) { return int2(a.x >> b, a.y >> b); }\n" "int3 ilshift(int3 a, int3 b) { return int3(a.x << b.x, a.y << b.y, a.z << b.z); }\n" "int3 ilshift(int3 a, int b) { return int3(a.x << b, a.y << b, a.z << b); }\n" "int3 irshift(int3 a, int3 b) { return int3(a.x >> b.x, a.y >> b.y, a.z >> b.z); }\n" "int3 irshift(int3 a, int b) { return int3(a.x >> b, a.y >> b, a.z >> b); }\n" "int4 ilshift(int4 a, int4 b) { return int4(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w); }\n" "int4 ilshift(int4 a, int b) { return int4(a.x << b, a.y << b, a.z << b, a.w << b); }\n" "int4 irshift(int4 a, int4 b) { return int4(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); }\n" "int4 irshift(int4 a, int b) { return int4(a.x >> b, a.y >> b, a.z >> b, a.w >> b); }\n\n"); } out.Write("%s", s_lighting_struct); // uniforms if (api_type == API_OPENGL) out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : ""); else out.Write("cbuffer VSBlock {\n"); out.Write(s_shader_uniforms); out.Write("};\n"); out.Write("struct VS_OUTPUT {\n"); GenerateVSOutputMembers<T>(out, api_type); out.Write("};\n"); uid_data->numTexGens = xfmem.numTexGen.numTexGens; uid_data->components = components; uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; if (api_type == API_OPENGL) { out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); if (components & VB_HAS_POSMTXIDX) out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); if (components & VB_HAS_NRM0) out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); if (components & VB_HAS_NRM1) out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB); if (components & VB_HAS_NRM2) out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB); if (components & VB_HAS_COL0) out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB); if (components & VB_HAS_COL1) out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i)); if ((components & (VB_HAS_UV0<<i)) || hastexmtx) out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i); } if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("out VertexData {\n"); GenerateVSOutputMembers<T>(out, api_type, g_ActiveConfig.backend_info.bSupportsBindingLayout ? "centroid" : "centroid out"); out.Write("} vs;\n"); } else { // Let's set up attributes for (size_t i = 0; i < 8; ++i) { if (i < xfmem.numTexGen.numTexGens) { out.Write("centroid out float3 uv%d;\n", i); } } out.Write("centroid out float4 clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("centroid out float3 Normal;\n"); out.Write("centroid out float3 WorldPos;\n"); } out.Write("centroid out float4 colors_0;\n"); out.Write("centroid out float4 colors_1;\n"); } out.Write("void main()\n{\n"); } else // D3D { out.Write("VS_OUTPUT main(\n"); // inputs if (components & VB_HAS_NRM0) out.Write(" float3 rawnorm0 : NORMAL0,\n"); if (components & VB_HAS_NRM1) out.Write(" float3 rawnorm1 : NORMAL1,\n"); if (components & VB_HAS_NRM2) out.Write(" float3 rawnorm2 : NORMAL2,\n"); if (components & VB_HAS_COL0) out.Write(" float4 color0 : COLOR0,\n"); if (components & VB_HAS_COL1) out.Write(" float4 color1 : COLOR1,\n"); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i)); if ((components & (VB_HAS_UV0<<i)) || hastexmtx) out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i); } if (components & VB_HAS_POSMTXIDX) out.Write(" int posmtx : BLENDINDICES,\n"); out.Write(" float4 rawpos : POSITION) {\n"); } out.Write("VS_OUTPUT o;\n"); // transforms if (components & VB_HAS_POSMTXIDX) { if (is_writing_shadercode && (DriverDetails::HasBug(DriverDetails::BUG_NODYNUBOACCESS) && !DriverDetails::HasBug(DriverDetails::BUG_ANNIHILATEDUBOS))) { // This'll cause issues, but it can't be helped out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[0], rawpos), dot(" I_TRANSFORMMATRICES"[1], rawpos), dot(" I_TRANSFORMMATRICES"[2], rawpos), 1);\n"); if (components & VB_HAS_NRMALL) out.Write("float3 N0 = " I_NORMALMATRICES"[0].xyz, N1 = " I_NORMALMATRICES"[1].xyz, N2 = " I_NORMALMATRICES"[2].xyz;\n"); } else { out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n"); if (components & VB_HAS_NRMALL) { out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n"); out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n"); } } if (components & VB_HAS_NRM0) out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); if (components & VB_HAS_NRM1) out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); if (components & VB_HAS_NRM2) out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); } else { out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0);\n"); if (components & VB_HAS_NRM0) out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm0)));\n"); if (components & VB_HAS_NRM1) out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm1));\n"); if (components & VB_HAS_NRM2) out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm2));\n"); } if (!(components & VB_HAS_NRM0)) out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n"); out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n"); out.Write("int4 lacc;\n" "float3 ldir, h, cosAttn, distAttn;\n" "float dist, dist2, attn;\n"); uid_data->numColorChans = xfmem.numChan.numColorChans; if (xfmem.numChan.numColorChans == 0) { if (components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); else out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); } GenerateLightingShader<T>(out, uid_data->lighting, components, "color", "o.colors_"); if (xfmem.numChan.numColorChans < 2) { if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); else out.Write("o.colors_1 = o.colors_0;\n"); } // special case if only pos and tex coord 0 and tex coord input is AB11 // donko - this has caused problems in some games. removed for now. bool texGenSpecialCase = false; /*bool texGenSpecialCase = ((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0 (g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) && (xfmem.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11); */ // transform texcoords out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"); for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) { TexMtxInfo& texinfo = xfmem.texMtxInfo[i]; out.Write("{\n"); out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); uid_data->texMtxInfo[i].sourcerow = xfmem.texMtxInfo[i].sourcerow; switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: _assert_(texinfo.inputform == XF_TEXINPUT_ABC1); out.Write("coord = rawpos;\n"); // pos.w is 1 break; case XF_SRCNORMAL_INROW: if (components & VB_HAS_NRM0) { _assert_(texinfo.inputform == XF_TEXINPUT_ABC1); out.Write("coord = float4(rawnorm0.xyz, 1.0);\n"); } break; case XF_SRCCOLORS_INROW: _assert_(texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1); break; case XF_SRCBINORMAL_T_INROW: if (components & VB_HAS_NRM1) { _assert_(texinfo.inputform == XF_TEXINPUT_ABC1); out.Write("coord = float4(rawnorm1.xyz, 1.0);\n"); } break; case XF_SRCBINORMAL_B_INROW: if (components & VB_HAS_NRM2) { _assert_(texinfo.inputform == XF_TEXINPUT_ABC1); out.Write("coord = float4(rawnorm2.xyz, 1.0);\n"); } break; default: _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); break; } // first transformation uid_data->texMtxInfo[i].texgentype = xfmem.texMtxInfo[i].texgentype; switch (texinfo.texgentype) { case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) { // transform the light dir into tangent space uid_data->texMtxInfo[i].embosslightshift = xfmem.texMtxInfo[i].embosslightshift; uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(texinfo.embosslightshift)); out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i, texinfo.embosssourceshift); } else { _assert_(0); // should have normals uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); } break; case XF_TEXGEN_COLOR_STRGBC0: _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); break; case XF_TEXGEN_COLOR_STRGBC1: _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); break; case XF_TEXGEN_REGULAR: default: uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; if (components & (VB_HAS_TEXMTXIDX0<<i)) { out.Write("int tmp = int(tex%d.z);\n", i); if (texinfo.projection == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i); else out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i); } else { if (texinfo.projection == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3*i, 3*i+1, 3*i+2); else out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3*i, 3*i+1); } break; } uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled; // CHECKME: does this only work for regular tex gen types? if (xfmem.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) { const PostMtxInfo& postInfo = xfmem.postMtxInfo[i]; uid_data->postMtxInfo[i].index = xfmem.postMtxInfo[i].index; int postidx = postInfo.index; out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n" "float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n" "float4 P2 = " I_POSTTRANSFORMMATRICES"[%d];\n", postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f); if (texGenSpecialCase) { // no normalization // q of input is 1 // q of output is unknown // multiply by postmatrix out.Write("o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0);\n", i, i, i); } else { uid_data->postMtxInfo[i].normalize = xfmem.postMtxInfo[i].normalize; if (postInfo.normalize) out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i); // multiply by postmatrix out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i); } } out.Write("}\n"); } // clipPos/w needs to be done in pixel shader, not here out.Write("o.clipPos = o.pos;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("o.Normal = _norm0;\n"); out.Write("o.WorldPos = pos.xyz;\n"); if (components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); } //write the true depth value, if the game uses depth textures pixel shaders will override with the correct values //if not early z culling will improve speed if (g_ActiveConfig.backend_info.bSupportsClipControl) { out.Write("o.pos.z = -o.pos.z;\n"); } else if (api_type == API_D3D) { out.Write("o.pos.z = -o.pos.z;\n"); } else // OGL { // this results in a scale from -1..0 to -1..1 after perspective // divide out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n"); // the next steps of the OGL pipeline are: // (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology // clipping to -w_c <= (x_c,y_c,z_c) <= w_c // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide // z_w = (f-n)/2*z_d + (n+f)/2 // z_w now contains the value to go to the 0..1 depth buffer //trying to get the correct semantic while not using glDepthRange //seems to get rather complicated } // The console GPU places the pixel center at 7/12 in screen space unless // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results // in some primitives being placed one pixel too far to the bottom-right, // which in turn can be critical if it happens for clear quads. // Hence, we compensate for this pixel center difference so that primitives // get rasterized correctly. out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION".xy;\n"); if (api_type == API_OPENGL) { if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { AssignVSOutputMembers(out, "vs", "o"); } else { // TODO: Pass interface blocks between shader stages even if geometry shaders // are not supported, however that will require at least OpenGL 3.2 support. for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) out.Write("uv%d.xyz = o.tex%d;\n", i, i); out.Write("clipPos = o.clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("Normal = o.Normal;\n"); out.Write("WorldPos = o.WorldPos;\n"); } out.Write("colors_0 = o.colors_0;\n"); out.Write("colors_1 = o.colors_1;\n"); } out.Write("gl_Position = o.pos;\n"); } else // D3D { out.Write("return o;\n"); } out.Write("}\n"); if (is_writing_shadercode) { if (text[sizeof(text) - 1] != 0x7C) PanicAlert("VertexShader generator - buffer too small, canary has been eaten!"); } }
static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_data, int index, int litchan_index, bool alpha) { const char* swizzle = alpha ? "a" : "rgb"; const char* swizzle_components = (alpha) ? "" : "3"; int attnfunc = (uid_data.attnfunc >> (2 * litchan_index)) & 0x3; int diffusefunc = (uid_data.diffusefunc >> (2 * litchan_index)) & 0x3; switch (attnfunc) { case LIGHTATTN_NONE: case LIGHTATTN_DIR: object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index)); object.Write("attn = 1.0;\n"); object.Write("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n"); break; case LIGHTATTN_SPEC: object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index)); object.Write("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR ".xyz)) : 0.0;\n", LIGHT_DIR_PARAMS(index)); object.Write("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index)); object.Write("distAttn = %s(" LIGHT_DISTATT ".xyz);\n", (diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", LIGHT_DISTATT_PARAMS(index)); object.Write("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, " "float3(1.0, attn, attn*attn));\n"); break; case LIGHTATTN_SPOT: object.Write("ldir = " LIGHT_POS ".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(index)); object.Write("dist2 = dot(ldir, ldir);\n" "dist = sqrt(dist2);\n" "ldir = ldir / dist;\n" "attn = max(0.0, dot(ldir, " LIGHT_DIR ".xyz));\n", LIGHT_DIR_PARAMS(index)); // attn*attn may overflow object.Write("attn = max(0.0, " LIGHT_COSATT ".x + " LIGHT_COSATT ".y*attn + " LIGHT_COSATT ".z*attn*attn) / dot(" LIGHT_DISTATT ".xyz, float3(1.0,dist,dist2));\n", LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_DISTATT_PARAMS(index)); break; } switch (diffusefunc) { case LIGHTDIF_NONE: object.Write("lacc.%s += int%s(round(attn * float%s(" LIGHT_COL ")));\n", swizzle, swizzle_components, swizzle_components, LIGHT_COL_PARAMS(index, swizzle)); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: object.Write("lacc.%s += int%s(round(attn * %sdot(ldir, _norm0)) * float%s(" LIGHT_COL ")));\n", swizzle, swizzle_components, diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(", swizzle_components, LIGHT_COL_PARAMS(index, swizzle)); break; default: _assert_(0); } object.Write("\n"); }
inline void GenerateVertexShader(T& out, u32 components, const XFMemory &xfr, const BPMemory &bpm, bool use_integer_math) { // Non-uid template parameters will write to the dummy data (=> gets optimized out) bool uidPresent = (&out.template GetUidData<vertex_shader_uid_data>() != NULL); vertex_shader_uid_data dummy_data; vertex_shader_uid_data& uid_data = uidPresent ? out.template GetUidData<vertex_shader_uid_data>() : dummy_data; if (uidPresent) { out.ClearUID(); } if (Write_Code) { _dbg_assert_log_(VIDEO, bpm.genMode.numtexgens == xfr.numTexGen.numTexGens, "numTexGens mismatch bpmem: %u xfmem: %u", bpm.genMode.numtexgens.Value(), xfr.numTexGen.numTexGens); _dbg_assert_log_(VIDEO, bpm.genMode.numcolchans == xfr.numChan.numColorChans, "numColorChans mismatch bpmem: %u xfmem: %u", bpm.genMode.numcolchans.Value(), xfr.numChan.numColorChans); } uid_data.numTexGens = xfr.numTexGen.numTexGens; uid_data.components = components; bool lightingEnabled = xfr.numChan.numColorChans > 0; bool enable_pl = g_ActiveConfig.PixelLightingEnabled(xfr, components); bool needLightShader = lightingEnabled && !enable_pl; for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i) { const TexMtxInfo& texinfo = xfr.texMtxInfo[i]; needLightShader = needLightShader || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1; } uid_data.pixel_lighting = enable_pl; uid_data.numColorChans = xfr.numChan.numColorChans; if (!(api_type & API_D3D9)) { uid_data.msaa = g_ActiveConfig.iMultisamples > 1; uid_data.ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; } char * buffer = nullptr; if (Write_Code) { buffer = out.GetBuffer(); if (buffer == nullptr) { buffer = text; out.SetBuffer(text); } buffer[VERTEXSHADERGEN_BUFFERSIZE - 1] = 0x7C; // canary // uniforms if (api_type == API_OPENGL) out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : ""); else if (api_type == API_D3D11) out.Write("cbuffer VSBlock : register(b0) {\n"); DeclareUniform<T, api_type>(out, C_PROJECTION, "float4", I_PROJECTION"[4]"); DeclareUniform<T, api_type>(out, C_DEPTHPARAMS, "float4", I_DEPTHPARAMS); DeclareUniform<T, api_type>(out, C_MATERIALS, "float4", I_MATERIALS"[4]"); DeclareUniform<T, api_type>(out, C_LIGHTS, "float4", I_LIGHTS"[40]"); DeclareUniform<T, api_type>(out, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]"); DeclareUniform<T, api_type>(out, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]"); DeclareUniform<T, api_type>(out, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]"); DeclareUniform<T, api_type>(out, C_POSTTRANSFORMMATRICES, "float4", I_POSTTRANSFORMMATRICES"[64]"); DeclareUniform<T, api_type>(out, C_PLOFFSETPARAMS, "float4", I_PLOFFSETPARAMS"[13]"); if (api_type == API_OPENGL || api_type == API_D3D11) out.Write("};\n"); out.Write("struct VS_OUTPUT {\n"); GenerateVSOutputMembers<T, api_type>(out, enable_pl, xfr); out.Write("};\n"); if (api_type == API_OPENGL) { out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); out.Write("in float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); if (components & VB_HAS_NRM0) out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); if (components & VB_HAS_NRM1) out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB); if (components & VB_HAS_NRM2) out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB); if (components & VB_HAS_COL0) out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB); if (components & VB_HAS_COL1) out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i)); if ((components & (VB_HAS_UV0 << i)) || hastexmtx) out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i); } if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("out VertexData {\n"); GenerateVSOutputMembers<T, api_type>(out, enable_pl, xfr, GetInterpolationQualifier(api_type, false, true)); out.Write("} vs;\n"); } else { const char* optCentroid = GetInterpolationQualifier(api_type); // Let's set up attributes if (xfr.numTexGen.numTexGens < 7) { for (int i = 0; i < 8; ++i) out.Write("%s out float3 uv%d_2;\n", optCentroid, i); out.Write("%s out float4 clipPos_2;\n", optCentroid); if (enable_pl) out.Write("%s out float4 Normal_2;\n", optCentroid); } else { // wpos is in w of first 4 texcoords if (enable_pl) { for (int i = 0; i < 8; ++i) out.Write("%s out float4 uv%d_2;\n", optCentroid, i); } else { for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i) out.Write("%s out float%d uv%d_2;\n", optCentroid, i < 4 ? 4 : 3, i); } } out.Write("%s out float4 colors_0;\n", optCentroid); out.Write("%s out float4 colors_1;\n", optCentroid); } out.Write("void main()\n{\n"); } else { out.Write("VS_OUTPUT main(\n"); // inputs if (components & VB_HAS_NRM0) out.Write(" float3 rawnorm0 : NORMAL0,\n"); if (components & VB_HAS_NRM1) out.Write(" float3 rawnorm1 : NORMAL1,\n"); if (components & VB_HAS_NRM2) out.Write(" float3 rawnorm2 : NORMAL2,\n"); if (components & VB_HAS_COL0) out.Write(" float4 color0 : COLOR0,\n"); if (components & VB_HAS_COL1) out.Write(" float4 color1 : COLOR1,\n"); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i)); if ((components & (VB_HAS_UV0 << i)) || hastexmtx) out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i); } out.Write(" float4 blend_indices : BLENDINDICES,\n"); out.Write(" float4 rawpos : POSITION) {\n"); } out.Write("VS_OUTPUT o;\n"); if (api_type & API_D3D9) { out.Write("int4 indices = D3DCOLORtoUBYTE4(blend_indices);\n"); } // transforms if (api_type & API_D3D9) { out.Write("int posmtx = indices.x;\n"); } else if (api_type == API_D3D11) { out.Write("int posmtx = blend_indices.x * 255.0;\n"); } else { out.Write("int posmtx = int(fposmtx);\n"); } out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n"); if (components & VB_HAS_NRMALL) { out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n"); out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n"); } if (components & VB_HAS_NRM0) out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); if (components & VB_HAS_NRM1) out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); if (components & VB_HAS_NRM2) out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); if (!(components & VB_HAS_NRM0)) out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n"); out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n"); if (api_type & API_D3D9) { //Write Pos offset for Point/Line Rendering out.Write("o.pos.xy = o.pos.xy + " I_PLOFFSETPARAMS"[indices.z].xy * o.pos.w;\n"); } if (needLightShader) { out.Write("float4 mat, lacc;\n" "float3 ldir, h;\n" "float dist, dist2, attn;\n"); if (use_integer_math) { out.Write("int4 ilacc;\n"); } } if (!lightingEnabled) { if (components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); else out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); else out.Write("o.colors_1 = o.colors_0;\n"); } } if (needLightShader) GenerateLightingShader<T, Write_Code>(out, uid_data.lighting, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_", xfr, use_integer_math); // special case if only pos and tex coord 0 and tex coord input is AB11 // donko - this has caused problems in some games. removed for now. bool texGenSpecialCase = false; /*bool texGenSpecialCase = ((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0 (g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) && (xfr.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11); */ if (Write_Code) { if (xfr.numChan.numColorChans < 2 && needLightShader) { if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); else out.Write("o.colors_1 = o.colors_0;\n"); } // transform texcoords out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"); } for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i) { const TexMtxInfo& texinfo = xfr.texMtxInfo[i]; uid_data.texMtxInfo[i].sourcerow = xfr.texMtxInfo[i].sourcerow; if (Write_Code) { out.Write("{\n"); out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCGEOM_INROW inputform: %u", texinfo.inputform); out.Write("coord = rawpos;\n"); // pos.w is 1 break; case XF_SRCNORMAL_INROW: if (components & VB_HAS_NRM0) { _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCNORMAL_INROW inputform: %u", texinfo.inputform); out.Write("coord = float4(rawnorm0.xyz, 1.0);\n"); } break; case XF_SRCCOLORS_INROW: _dbg_assert_log_(VIDEO, texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1, "texgentype missmatch: %u", texinfo.texgentype); break; case XF_SRCBINORMAL_T_INROW: if (components & VB_HAS_NRM1) { _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCBINORMAL_T_INROW inputform: %u", texinfo.inputform); out.Write("coord = float4(rawnorm1.xyz, 1.0);\n"); } break; case XF_SRCBINORMAL_B_INROW: if (components & VB_HAS_NRM2) { _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCBINORMAL_B_INROW inputform: %u", texinfo.inputform); out.Write("coord = float4(rawnorm2.xyz, 1.0);\n"); } break; default: _dbg_assert_log_(VIDEO, texinfo.sourcerow <= XF_SRCTEX7_INROW, "sourcerow missmatch: %u", texinfo.sourcerow); if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); break; } } // first transformation uid_data.texMtxInfo[i].texgentype = xfr.texMtxInfo[i].texgentype; switch (texinfo.texgentype) { case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map if (components & (VB_HAS_NRM1 | VB_HAS_NRM2)) { // transform the light dir into tangent space uid_data.texMtxInfo[i].embosslightshift = xfr.texMtxInfo[i].embosslightshift; uid_data.texMtxInfo[i].embosssourceshift = xfr.texMtxInfo[i].embosssourceshift; if (Write_Code) { out.Write("float3 eldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(I_LIGHTS, texinfo.embosslightshift)); out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(eldir, _norm1), dot(eldir, _norm2), 0.0);\n", i, texinfo.embosssourceshift); } } else { // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue Squadron 2 // uid_data.texMtxInfo[i].embosssourceshift = xfr.texMtxInfo[i].embosssourceshift; if (Write_Code) { _dbg_assert_log_(VIDEO, 0, "vertex normals spected"); out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); } } break; case XF_TEXGEN_COLOR_STRGBC0: if (Write_Code) { _dbg_assert_log_(VIDEO, texinfo.sourcerow == XF_SRCCOLORS_INROW, "sourcerow missmatch spected: XF_SRCCOLORS_INROW found: %u", texinfo.sourcerow); out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); } break; case XF_TEXGEN_COLOR_STRGBC1: if (Write_Code) { _dbg_assert_log_(VIDEO, texinfo.sourcerow == XF_SRCCOLORS_INROW, "sourcerow missmatch spected: XF_SRCCOLORS_INROW found: %u", texinfo.sourcerow); out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); } break; case XF_TEXGEN_REGULAR: default: uid_data.texMtxInfo_n_projection |= xfr.texMtxInfo[i].projection << i; if (Write_Code) { if (components & (VB_HAS_TEXMTXIDX0 << i)) { out.Write("int tmp = int(tex%d.z);\n", i); if (texinfo.projection == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i); else out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i); } else { if (texinfo.projection == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3 * i, 3 * i + 1, 3 * i + 2); else out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3 * i, 3 * i + 1); } } break; } uid_data.dualTexTrans_enabled = xfr.dualTexTrans.enabled; // CHECKME: does this only work for regular tex gen types? if (xfr.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) { const PostMtxInfo& postInfo = xfr.postMtxInfo[i]; uid_data.postMtxInfo[i].index = xfr.postMtxInfo[i].index; int postidx = postInfo.index; if (Write_Code) { out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n" "float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n" "float4 P2 = " I_POSTTRANSFORMMATRICES"[%d];\n", postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f); } if (texGenSpecialCase) { // no normalization // q of input is 1 // q of output is unknown // multiply by postmatrix if (Write_Code) out.Write("o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0);\n", i, i, i); } else { uid_data.postMtxInfo[i].normalize = xfr.postMtxInfo[i].normalize; if (Write_Code) { if (postInfo.normalize) out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i); // multiply by postmatrix out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i); } } } if (Write_Code) out.Write("}\n"); } if (Write_Code) { // clipPos/w needs to be done in pixel shader, not here if (xfr.numTexGen.numTexGens < 7) { out.Write("o.clipPos%s = float4(pos.x,pos.y,o.pos.z,o.pos.w);\n", (api_type == API_OPENGL) ? "_2" : ""); } else { out.Write("o.tex0.w = pos.x;\n"); out.Write("o.tex1.w = pos.y;\n"); out.Write("o.tex2.w = o.pos.z;\n"); out.Write("o.tex3.w = o.pos.w;\n"); } if (enable_pl) { if (xfr.numTexGen.numTexGens < 7) { out.Write("o.Normal%s = float4(_norm0.x,_norm0.y,_norm0.z,pos.z);\n", (api_type == API_OPENGL) ? "_2" : ""); } else { out.Write("o.tex4.w = _norm0.x;\n"); out.Write("o.tex5.w = _norm0.y;\n"); out.Write("o.tex6.w = _norm0.z;\n"); if (xfr.numTexGen.numTexGens < 8) out.Write("o.tex7 = pos.xyzz;\n"); else out.Write("o.tex7.w = pos.z;\n"); } if (components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); else out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); else out.Write("o.colors_1 = o.colors_0;\n"); } //write the true depth value, if the game uses depth textures pixel shaders will override with the correct values //if not early z culling will improve speed if (g_ActiveConfig.backend_info.bSupportsClipControl) { out.Write("o.pos.z = -o.pos.z;\n"); } else if (api_type & API_D3D9 || api_type == API_D3D11) { out.Write("o.pos.z = -((" I_DEPTHPARAMS".x - 1.0) * o.pos.w + o.pos.z * " I_DEPTHPARAMS".y);\n"); } else { // this results in a scale from -1..0 to -1..1 after perspective // divide out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n"); // the next steps of the OGL pipeline are: // (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology // clipping to -w_c <= (x_c,y_c,z_c) <= w_c // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide // z_w = (f-n)/2*z_d + (n+f)/2 // z_w now contains the value to go to the 0..1 depth buffer //trying to get the correct semantic while not using glDepthRange //seems to get rather complicated } // The console GPU places the pixel center at 7/12 in screen space unless // antialiasing is enabled, while D3D11 and OpenGL place it at 0.5, and D3D9 at 0. This results // in some primitives being placed one pixel too far to the bottom-right, // which in turn can be critical if it happens for clear quads. // Hence, we compensate for this pixel center difference so that primitives // get rasterized correctly. out.Write("o.pos.xy = o.pos.xy + o.pos.w * " I_DEPTHPARAMS".zw;\n"); if (api_type & API_D3D9) { // Write Texture Offsets for Point/Line Rendering for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i) { out.Write("o.tex%d.xy = o.tex%d.xy + (" I_PLOFFSETPARAMS"[indices.w].zw * " I_PLOFFSETPARAMS"[indices.y + %d].%s );\n", i, i, ((i / 4) + 1), texOffsetMemberSelector[i % 4]); } } if (api_type == API_OPENGL) { if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { AssignVSOutputMembers<T, api_type>(out, "vs", "o", enable_pl, xfr); } else { if (xfr.numTexGen.numTexGens < 7) { for (unsigned int i = 0; i < 8; ++i) { if (i < xfr.numTexGen.numTexGens) out.Write(" uv%d_2.xyz = o.tex%d;\n", i, i); else out.Write(" uv%d_2.xyz = float3(0.0, 0.0, 0.0);\n", i); } out.Write(" clipPos_2 = o.clipPos;\n"); if (enable_pl) out.Write(" Normal_2 = o.Normal;\n"); } else { // clip position is in w of first 4 texcoords if (enable_pl) { for (int i = 0; i < 8; ++i) out.Write(" uv%d_2 = o.tex%d;\n", i, i); } else { for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i) out.Write(" uv%d_2%s = o.tex%d;\n", i, i < 4 ? ".xyzw" : ".xyz", i); } } out.Write("colors_0 = o.colors_0;\n"); out.Write("colors_1 = o.colors_1;\n"); } out.Write("gl_Position = o.pos;\n"); out.Write("}\n"); } else { out.Write("return o;\n}\n"); } if (buffer[VERTEXSHADERGEN_BUFFERSIZE - 1] != 0x7C) PanicAlert("VertexShader generator - buffer too small, canary has been eaten!"); } if (uidPresent) { out.CalculateUIDHash(); } }
static T GenerateVertexShader(API_TYPE api_type) { T out; const u32 components = VertexLoaderManager::g_current_components; // Non-uid template parameters will write to the dummy data (=> gets optimized out) vertex_shader_uid_data dummy_data; vertex_shader_uid_data* uid_data = out.template GetUidData<vertex_shader_uid_data>(); if (uid_data != nullptr) memset(uid_data, 0, sizeof(*uid_data)); else uid_data = &dummy_data; _assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens); _assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans); out.Write("%s", s_lighting_struct); // uniforms if (api_type == API_OPENGL) out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : ""); else out.Write("cbuffer VSBlock {\n"); out.Write(s_shader_uniforms); out.Write("};\n"); out.Write("struct VS_OUTPUT {\n"); GenerateVSOutputMembers<T>(out, api_type, ""); out.Write("};\n"); uid_data->numTexGens = xfmem.numTexGen.numTexGens; uid_data->components = components; uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; if (api_type == API_OPENGL) { out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); if (components & VB_HAS_POSMTXIDX) out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); if (components & VB_HAS_NRM0) out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); if (components & VB_HAS_NRM1) out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB); if (components & VB_HAS_NRM2) out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB); if (components & VB_HAS_COL0) out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB); if (components & VB_HAS_COL1) out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i)); if ((components & (VB_HAS_UV0 << i)) || hastexmtx) out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i); } if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("out VertexData {\n"); GenerateVSOutputMembers<T>(out, api_type, GetInterpolationQualifier(true, false)); out.Write("} vs;\n"); } else { // Let's set up attributes for (u32 i = 0; i < 8; ++i) { if (i < xfmem.numTexGen.numTexGens) { out.Write("%s out float3 uv%u;\n", GetInterpolationQualifier(), i); } } out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier()); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("%s out float3 Normal;\n", GetInterpolationQualifier()); out.Write("%s out float3 WorldPos;\n", GetInterpolationQualifier()); } out.Write("%s out float4 colors_0;\n", GetInterpolationQualifier()); out.Write("%s out float4 colors_1;\n", GetInterpolationQualifier()); } out.Write("void main()\n{\n"); } else // D3D { out.Write("VS_OUTPUT main(\n"); // inputs if (components & VB_HAS_NRM0) out.Write(" float3 rawnorm0 : NORMAL0,\n"); if (components & VB_HAS_NRM1) out.Write(" float3 rawnorm1 : NORMAL1,\n"); if (components & VB_HAS_NRM2) out.Write(" float3 rawnorm2 : NORMAL2,\n"); if (components & VB_HAS_COL0) out.Write(" float4 color0 : COLOR0,\n"); if (components & VB_HAS_COL1) out.Write(" float4 color1 : COLOR1,\n"); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i)); if ((components & (VB_HAS_UV0 << i)) || hastexmtx) out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i); } if (components & VB_HAS_POSMTXIDX) out.Write(" int posmtx : BLENDINDICES,\n"); out.Write(" float4 rawpos : POSITION) {\n"); } out.Write("VS_OUTPUT o;\n"); // transforms if (components & VB_HAS_POSMTXIDX) { out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES "[posmtx], rawpos), dot(" I_TRANSFORMMATRICES "[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posmtx+2], rawpos), 1);\n"); if (components & VB_HAS_NRMALL) { out.Write("int normidx = posmtx & 31;\n"); out.Write("float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES "[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"); } if (components & VB_HAS_NRM0) out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, " "rawnorm0)));\n"); if (components & VB_HAS_NRM1) out.Write( "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); if (components & VB_HAS_NRM2) out.Write( "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); } else { out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX "[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n"); if (components & VB_HAS_NRM0) out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX "[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n"); if (components & VB_HAS_NRM1) out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX "[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n"); if (components & VB_HAS_NRM2) out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX "[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n"); } if (!(components & VB_HAS_NRM0)) out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n"); out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"); out.Write("int4 lacc;\n" "float3 ldir, h, cosAttn, distAttn;\n" "float dist, dist2, attn;\n"); uid_data->numColorChans = xfmem.numChan.numColorChans; if (xfmem.numChan.numColorChans == 0) { if (components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); else out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); } GenerateLightingShader<T>(out, uid_data->lighting, components, "color", "o.colors_"); if (xfmem.numChan.numColorChans < 2) { if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); else out.Write("o.colors_1 = o.colors_0;\n"); } // transform texcoords out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"); for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) { TexMtxInfo& texinfo = xfmem.texMtxInfo[i]; out.Write("{\n"); out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); uid_data->texMtxInfo[i].sourcerow = xfmem.texMtxInfo[i].sourcerow; switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: out.Write("coord.xyz = rawpos.xyz;\n"); break; case XF_SRCNORMAL_INROW: if (components & VB_HAS_NRM0) { out.Write("coord.xyz = rawnorm0.xyz;\n"); } break; case XF_SRCCOLORS_INROW: _assert_(texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1); break; case XF_SRCBINORMAL_T_INROW: if (components & VB_HAS_NRM1) { out.Write("coord.xyz = rawnorm1.xyz;\n"); } break; case XF_SRCBINORMAL_B_INROW: if (components & VB_HAS_NRM2) { out.Write("coord.xyz = rawnorm2.xyz;\n"); } break; default: _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); break; } // Input form of AB11 sets z element to 1.0 uid_data->texMtxInfo[i].inputform = xfmem.texMtxInfo[i].inputform; if (texinfo.inputform == XF_TEXINPUT_AB11) out.Write("coord.z = 1.0;\n"); // first transformation uid_data->texMtxInfo[i].texgentype = xfmem.texMtxInfo[i].texgentype; switch (texinfo.texgentype) { case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map if (components & (VB_HAS_NRM1 | VB_HAS_NRM2)) { // transform the light dir into tangent space uid_data->texMtxInfo[i].embosslightshift = xfmem.texMtxInfo[i].embosslightshift; uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(texinfo.embosslightshift)); out.Write( "o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i, texinfo.embosssourceshift); } else { // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue // Squadron 2 //_assert_(0); // should have normals uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); } break; case XF_TEXGEN_COLOR_STRGBC0: out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); break; case XF_TEXGEN_COLOR_STRGBC1: out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); break; case XF_TEXGEN_REGULAR: default: uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; if (components & (VB_HAS_TEXMTXIDX0 << i)) { out.Write("int tmp = int(tex%d.z);\n", i); if (texinfo.projection == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n", i); else out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n", i); } else { if (texinfo.projection == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES "[%d]), dot(coord, " I_TEXMATRICES "[%d]), dot(coord, " I_TEXMATRICES "[%d]));\n", i, 3 * i, 3 * i + 1, 3 * i + 2); else out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES "[%d]), dot(coord, " I_TEXMATRICES "[%d]), 1);\n", i, 3 * i, 3 * i + 1); } break; } uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled; // CHECKME: does this only work for regular tex gen types? if (xfmem.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) { const PostMtxInfo& postInfo = xfmem.postMtxInfo[i]; uid_data->postMtxInfo[i].index = xfmem.postMtxInfo[i].index; int postidx = postInfo.index; out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[%d];\n" "float4 P1 = " I_POSTTRANSFORMMATRICES "[%d];\n" "float4 P2 = " I_POSTTRANSFORMMATRICES "[%d];\n", postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f); uid_data->postMtxInfo[i].normalize = xfmem.postMtxInfo[i].normalize; if (postInfo.normalize) out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i); // multiply by postmatrix out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + " "P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i); } out.Write("}\n"); } // clipPos/w needs to be done in pixel shader, not here out.Write("o.clipPos = o.pos;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("o.Normal = _norm0;\n"); out.Write("o.WorldPos = pos.xyz;\n"); if (components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); } // write the true depth value, if the game uses depth textures pixel shaders will override with // the correct values // if not early z culling will improve speed if (g_ActiveConfig.backend_info.bSupportsClipControl) { out.Write("o.pos.z = -o.pos.z;\n"); } else // OGL { // this results in a scale from -1..0 to -1..1 after perspective // divide out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n"); // the next steps of the OGL pipeline are: // (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology // clipping to -w_c <= (x_c,y_c,z_c) <= w_c // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide // z_w = (f-n)/2*z_d + (n+f)/2 // z_w now contains the value to go to the 0..1 depth buffer // trying to get the correct semantic while not using glDepthRange // seems to get rather complicated } // The console GPU places the pixel center at 7/12 in screen space unless // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results // in some primitives being placed one pixel too far to the bottom-right, // which in turn can be critical if it happens for clear quads. // Hence, we compensate for this pixel center difference so that primitives // get rasterized correctly. out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n"); if (api_type == API_OPENGL) { if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { AssignVSOutputMembers(out, "vs", "o"); } else { // TODO: Pass interface blocks between shader stages even if geometry shaders // are not supported, however that will require at least OpenGL 3.2 support. for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) out.Write("uv%d.xyz = o.tex%d;\n", i, i); out.Write("clipPos = o.clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("Normal = o.Normal;\n"); out.Write("WorldPos = o.WorldPos;\n"); } out.Write("colors_0 = o.colors_0;\n"); out.Write("colors_1 = o.colors_1;\n"); } out.Write("gl_Position = o.pos;\n"); } else // D3D { out.Write("return o;\n"); } out.Write("}\n"); return out; }