Exemplo n.º 1
0
inline void GenerateVertexShader(T& out, u32 components, const XFMemory &xfr, const BPMemory &bpm, bool use_integer_math)
{
    // Non-uid template parameters will write to the dummy data (=> gets optimized out)
    bool uidPresent = (&out.template GetUidData<vertex_shader_uid_data>() != NULL);
    vertex_shader_uid_data dummy_data;
    vertex_shader_uid_data& uid_data = uidPresent ? out.template GetUidData<vertex_shader_uid_data>() : dummy_data;
    if (uidPresent)
    {
        out.ClearUID();
    }
    if (Write_Code)
    {
        _dbg_assert_log_(VIDEO, bpm.genMode.numtexgens == xfr.numTexGen.numTexGens, "numTexGens mismatch bpmem: %u xfmem: %u", bpm.genMode.numtexgens.Value(), xfr.numTexGen.numTexGens);
        _dbg_assert_log_(VIDEO, bpm.genMode.numcolchans == xfr.numChan.numColorChans, "numColorChans mismatch bpmem: %u xfmem: %u", bpm.genMode.numcolchans.Value(), xfr.numChan.numColorChans);
    }
    uid_data.numTexGens = xfr.numTexGen.numTexGens;
    uid_data.components = components;
    bool lightingEnabled = xfr.numChan.numColorChans > 0;
    bool enable_pl = g_ActiveConfig.PixelLightingEnabled(xfr, components);
    bool needLightShader = lightingEnabled && !enable_pl;
    for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i)
    {
        const TexMtxInfo& texinfo = xfr.texMtxInfo[i];
        needLightShader = needLightShader || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1;
    }
    uid_data.pixel_lighting = enable_pl;
    uid_data.numColorChans = xfr.numChan.numColorChans;
    if (!(api_type & API_D3D9))
    {
        uid_data.msaa = g_ActiveConfig.iMultisamples > 1;
        uid_data.ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA;
    }
    char * buffer = nullptr;
    if (Write_Code)
    {
        buffer = out.GetBuffer();
        if (buffer == nullptr)
        {
            buffer = text;
            out.SetBuffer(text);
        }

        buffer[VERTEXSHADERGEN_BUFFERSIZE - 1] = 0x7C;  // canary
        // uniforms
        if (api_type == API_OPENGL)
            out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : "");
        else if (api_type == API_D3D11)
            out.Write("cbuffer VSBlock : register(b0) {\n");

        DeclareUniform<T, api_type>(out, C_PROJECTION, "float4", I_PROJECTION"[4]");
        DeclareUniform<T, api_type>(out, C_DEPTHPARAMS, "float4", I_DEPTHPARAMS);
        DeclareUniform<T, api_type>(out, C_MATERIALS, "float4", I_MATERIALS"[4]");
        DeclareUniform<T, api_type>(out, C_LIGHTS, "float4", I_LIGHTS"[40]");
        DeclareUniform<T, api_type>(out, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]");
        DeclareUniform<T, api_type>(out, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]");
        DeclareUniform<T, api_type>(out, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]");
        DeclareUniform<T, api_type>(out, C_POSTTRANSFORMMATRICES, "float4", I_POSTTRANSFORMMATRICES"[64]");
        DeclareUniform<T, api_type>(out, C_PLOFFSETPARAMS, "float4", I_PLOFFSETPARAMS"[13]");

        if (api_type == API_OPENGL || api_type == API_D3D11)
            out.Write("};\n");

        out.Write("struct VS_OUTPUT {\n");
        GenerateVSOutputMembers<T, api_type>(out, enable_pl, xfr);
        out.Write("};\n");

        if (api_type == API_OPENGL)
        {
            out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
            out.Write("in float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
            if (components & VB_HAS_NRM0)
                out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
            if (components & VB_HAS_NRM1)
                out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
            if (components & VB_HAS_NRM2)
                out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);

            if (components & VB_HAS_COL0)
                out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
            if (components & VB_HAS_COL1)
                out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);

            for (int i = 0; i < 8; ++i)
            {
                u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i));
                if ((components & (VB_HAS_UV0 << i)) || hastexmtx)
                    out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i);
            }

            if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
            {
                out.Write("out VertexData {\n");
                GenerateVSOutputMembers<T, api_type>(out, enable_pl, xfr, GetInterpolationQualifier(api_type, false, true));
                out.Write("} vs;\n");
            }
            else
            {
                const char* optCentroid = GetInterpolationQualifier(api_type);

                // Let's set up attributes
                if (xfr.numTexGen.numTexGens < 7)
                {
                    for (int i = 0; i < 8; ++i)
                        out.Write("%s out float3 uv%d_2;\n", optCentroid, i);
                    out.Write("%s out float4 clipPos_2;\n", optCentroid);
                    if (enable_pl)
                        out.Write("%s out float4 Normal_2;\n", optCentroid);
                }
                else
                {
                    // wpos is in w of first 4 texcoords
                    if (enable_pl)
                    {
                        for (int i = 0; i < 8; ++i)
                            out.Write("%s out float4 uv%d_2;\n", optCentroid, i);
                    }
                    else
                    {
                        for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i)
                            out.Write("%s out float%d uv%d_2;\n", optCentroid, i < 4 ? 4 : 3, i);
                    }
                }
                out.Write("%s out float4 colors_0;\n", optCentroid);
                out.Write("%s out float4 colors_1;\n", optCentroid);
            }

            out.Write("void main()\n{\n");
        }
        else
        {
            out.Write("VS_OUTPUT main(\n");

            // inputs
            if (components & VB_HAS_NRM0)
                out.Write("  float3 rawnorm0 : NORMAL0,\n");
            if (components & VB_HAS_NRM1)
                out.Write("  float3 rawnorm1 : NORMAL1,\n");
            if (components & VB_HAS_NRM2)
                out.Write("  float3 rawnorm2 : NORMAL2,\n");
            if (components & VB_HAS_COL0)
                out.Write("  float4 color0 : COLOR0,\n");
            if (components & VB_HAS_COL1)
                out.Write("  float4 color1 : COLOR1,\n");
            for (int i = 0; i < 8; ++i)
            {
                u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i));
                if ((components & (VB_HAS_UV0 << i)) || hastexmtx)
                    out.Write("  float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
            }
            out.Write("  float4 blend_indices : BLENDINDICES,\n");

            out.Write("  float4 rawpos : POSITION) {\n");
        }
        out.Write("VS_OUTPUT o;\n");
        if (api_type & API_D3D9)
        {
            out.Write("int4 indices = D3DCOLORtoUBYTE4(blend_indices);\n");
        }
        // transforms
        if (api_type & API_D3D9)
        {
            out.Write("int posmtx = indices.x;\n");
        }
        else if (api_type == API_D3D11)
        {
            out.Write("int posmtx = blend_indices.x * 255.0;\n");
        }
        else
        {
            out.Write("int posmtx = int(fposmtx);\n");
        }

        out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n");

        if (components & VB_HAS_NRMALL) {
            out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n");
            out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n");
        }

        if (components & VB_HAS_NRM0)
            out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n");
        if (components & VB_HAS_NRM1)
            out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
        if (components & VB_HAS_NRM2)
            out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");

        if (!(components & VB_HAS_NRM0))
            out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");


        out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n");
        if (api_type & API_D3D9)
        {
            //Write Pos offset for Point/Line Rendering
            out.Write("o.pos.xy = o.pos.xy + " I_PLOFFSETPARAMS"[indices.z].xy * o.pos.w;\n");
        }
        if (needLightShader)
        {
            out.Write("float4 mat, lacc;\n"
                      "float3 ldir, h;\n"
                      "float dist, dist2, attn;\n");
            if (use_integer_math)
            {
                out.Write("int4 ilacc;\n");
            }
        }
        if (!lightingEnabled)
        {
            if (components & VB_HAS_COL0)
                out.Write("o.colors_0 = color0;\n");
            else
                out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");

            if (components & VB_HAS_COL1)
                out.Write("o.colors_1 = color1;\n");
            else
                out.Write("o.colors_1 = o.colors_0;\n");
        }
    }
    if (needLightShader)
        GenerateLightingShader<T, Write_Code>(out, uid_data.lighting, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_", xfr, use_integer_math);

    // special case if only pos and tex coord 0 and tex coord input is AB11
    // donko - this has caused problems in some games. removed for now.
    bool texGenSpecialCase = false;
    /*bool texGenSpecialCase =
    ((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0
    (g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) &&
    (xfr.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11);
    */
    if (Write_Code)
    {
        if (xfr.numChan.numColorChans < 2 && needLightShader)
        {
            if (components & VB_HAS_COL1)
                out.Write("o.colors_1 = color1;\n");
            else
                out.Write("o.colors_1 = o.colors_0;\n");
        }
        // transform texcoords
        out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
    }


    for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i)
    {
        const TexMtxInfo& texinfo = xfr.texMtxInfo[i];
        uid_data.texMtxInfo[i].sourcerow = xfr.texMtxInfo[i].sourcerow;
        if (Write_Code)
        {
            out.Write("{\n");
            out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
            switch (texinfo.sourcerow)
            {
            case XF_SRCGEOM_INROW:
                _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCGEOM_INROW inputform: %u", texinfo.inputform);
                out.Write("coord = rawpos;\n"); // pos.w is 1
                break;
            case XF_SRCNORMAL_INROW:
                if (components & VB_HAS_NRM0)
                {
                    _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCNORMAL_INROW inputform: %u", texinfo.inputform);
                    out.Write("coord = float4(rawnorm0.xyz, 1.0);\n");
                }
                break;
            case XF_SRCCOLORS_INROW:
                _dbg_assert_log_(VIDEO, texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1, "texgentype missmatch: %u", texinfo.texgentype);
                break;
            case XF_SRCBINORMAL_T_INROW:
                if (components & VB_HAS_NRM1)
                {
                    _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCBINORMAL_T_INROW inputform: %u", texinfo.inputform);
                    out.Write("coord = float4(rawnorm1.xyz, 1.0);\n");
                }
                break;
            case XF_SRCBINORMAL_B_INROW:
                if (components & VB_HAS_NRM2)
                {
                    _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCBINORMAL_B_INROW inputform: %u", texinfo.inputform);
                    out.Write("coord = float4(rawnorm2.xyz, 1.0);\n");
                }
                break;
            default:
                _dbg_assert_log_(VIDEO, texinfo.sourcerow <= XF_SRCTEX7_INROW, "sourcerow missmatch: %u", texinfo.sourcerow);
                if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW)))
                    out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
                break;
            }
        }

        // first transformation
        uid_data.texMtxInfo[i].texgentype = xfr.texMtxInfo[i].texgentype;
        switch (texinfo.texgentype)
        {
        case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map

            if (components & (VB_HAS_NRM1 | VB_HAS_NRM2))
            {
                // transform the light dir into tangent space
                uid_data.texMtxInfo[i].embosslightshift = xfr.texMtxInfo[i].embosslightshift;
                uid_data.texMtxInfo[i].embosssourceshift = xfr.texMtxInfo[i].embosssourceshift;
                if (Write_Code)
                {
                    out.Write("float3 eldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(I_LIGHTS, texinfo.embosslightshift));
                    out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(eldir, _norm1), dot(eldir, _norm2), 0.0);\n", i, texinfo.embosssourceshift);
                }
            }
            else
            {
                // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue Squadron 2
                //
                uid_data.texMtxInfo[i].embosssourceshift = xfr.texMtxInfo[i].embosssourceshift;
                if (Write_Code)
                {
                    _dbg_assert_log_(VIDEO, 0, "vertex normals spected");
                    out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
                }
            }

            break;
        case XF_TEXGEN_COLOR_STRGBC0:
            if (Write_Code)
            {
                _dbg_assert_log_(VIDEO, texinfo.sourcerow == XF_SRCCOLORS_INROW, "sourcerow missmatch spected: XF_SRCCOLORS_INROW found: %u", texinfo.sourcerow);
                out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
            }
            break;
        case XF_TEXGEN_COLOR_STRGBC1:
            if (Write_Code)
            {
                _dbg_assert_log_(VIDEO, texinfo.sourcerow == XF_SRCCOLORS_INROW, "sourcerow missmatch spected: XF_SRCCOLORS_INROW found: %u", texinfo.sourcerow);
                out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
            }
            break;
        case XF_TEXGEN_REGULAR:
        default:
            uid_data.texMtxInfo_n_projection |= xfr.texMtxInfo[i].projection << i;
            if (Write_Code)
            {
                if (components & (VB_HAS_TEXMTXIDX0 << i))
                {
                    out.Write("int tmp = int(tex%d.z);\n", i);
                    if (texinfo.projection == XF_TEXPROJ_STQ)
                        out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i);
                    else
                        out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i);
                }
                else
                {
                    if (texinfo.projection == XF_TEXPROJ_STQ)
                        out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3 * i, 3 * i + 1, 3 * i + 2);
                    else
                        out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3 * i, 3 * i + 1);
                }
            }
            break;
        }

        uid_data.dualTexTrans_enabled = xfr.dualTexTrans.enabled;
        // CHECKME: does this only work for regular tex gen types?
        if (xfr.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
        {
            const PostMtxInfo& postInfo = xfr.postMtxInfo[i];

            uid_data.postMtxInfo[i].index = xfr.postMtxInfo[i].index;
            int postidx = postInfo.index;
            if (Write_Code)
            {
                out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n"
                          "float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n"
                          "float4 P2 = " I_POSTTRANSFORMMATRICES"[%d];\n",
                          postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f);
            }
            if (texGenSpecialCase)
            {
                // no normalization
                // q of input is 1
                // q of output is unknown

                // multiply by postmatrix
                if (Write_Code)
                    out.Write("o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0);\n", i, i, i);
            }
            else
            {
                uid_data.postMtxInfo[i].normalize = xfr.postMtxInfo[i].normalize;
                if (Write_Code)
                {
                    if (postInfo.normalize)
                        out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);

                    // multiply by postmatrix
                    out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i);
                }
            }
        }
        if (Write_Code)
            out.Write("}\n");
    }
    if (Write_Code)
    {
        // clipPos/w needs to be done in pixel shader, not here
        if (xfr.numTexGen.numTexGens < 7)
        {
            out.Write("o.clipPos%s = float4(pos.x,pos.y,o.pos.z,o.pos.w);\n", (api_type == API_OPENGL) ? "_2" : "");
        }
        else
        {
            out.Write("o.tex0.w = pos.x;\n");
            out.Write("o.tex1.w = pos.y;\n");
            out.Write("o.tex2.w = o.pos.z;\n");
            out.Write("o.tex3.w = o.pos.w;\n");
        }

        if (enable_pl)
        {
            if (xfr.numTexGen.numTexGens < 7)
            {
                out.Write("o.Normal%s = float4(_norm0.x,_norm0.y,_norm0.z,pos.z);\n", (api_type == API_OPENGL) ? "_2" : "");
            }
            else
            {
                out.Write("o.tex4.w = _norm0.x;\n");
                out.Write("o.tex5.w = _norm0.y;\n");
                out.Write("o.tex6.w = _norm0.z;\n");
                if (xfr.numTexGen.numTexGens < 8)
                    out.Write("o.tex7 = pos.xyzz;\n");
                else
                    out.Write("o.tex7.w = pos.z;\n");
            }

            if (components & VB_HAS_COL0)
                out.Write("o.colors_0 = color0;\n");
            else
                out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");

            if (components & VB_HAS_COL1)
                out.Write("o.colors_1 = color1;\n");
            else
                out.Write("o.colors_1 = o.colors_0;\n");
        }

        //write the true depth value, if the game uses depth textures pixel shaders will override with the correct values
        //if not early z culling will improve speed
        if (g_ActiveConfig.backend_info.bSupportsClipControl)
        {
            out.Write("o.pos.z = -o.pos.z;\n");
        }
        else if (api_type & API_D3D9 || api_type == API_D3D11)
        {
            out.Write("o.pos.z = -((" I_DEPTHPARAMS".x - 1.0) * o.pos.w + o.pos.z * " I_DEPTHPARAMS".y);\n");
        }
        else
        {
            // this results in a scale from -1..0 to -1..1 after perspective
            // divide
            out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n");

            // the next steps of the OGL pipeline are:
            // (x_c,y_c,z_c,w_c) = o.pos  //switch to OGL spec terminology
            // clipping to -w_c <= (x_c,y_c,z_c) <= w_c
            // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide
            // z_w = (f-n)/2*z_d + (n+f)/2
            // z_w now contains the value to go to the 0..1 depth buffer

            //trying to get the correct semantic while not using glDepthRange
            //seems to get rather complicated
        }

        // The console GPU places the pixel center at 7/12 in screen space unless
        // antialiasing is enabled, while D3D11 and OpenGL place it at 0.5, and D3D9 at 0. This results
        // in some primitives being placed one pixel too far to the bottom-right,
        // which in turn can be critical if it happens for clear quads.
        // Hence, we compensate for this pixel center difference so that primitives
        // get rasterized correctly.
        out.Write("o.pos.xy = o.pos.xy + o.pos.w * " I_DEPTHPARAMS".zw;\n");

        if (api_type & API_D3D9)
        {
            // Write Texture Offsets for Point/Line Rendering
            for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i)
            {
                out.Write("o.tex%d.xy = o.tex%d.xy + (" I_PLOFFSETPARAMS"[indices.w].zw * " I_PLOFFSETPARAMS"[indices.y + %d].%s );\n", i, i, ((i / 4) + 1), texOffsetMemberSelector[i % 4]);
            }
        }

        if (api_type == API_OPENGL)
        {
            if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
            {
                AssignVSOutputMembers<T, api_type>(out, "vs", "o", enable_pl, xfr);
            }
            else
            {

                if (xfr.numTexGen.numTexGens < 7)
                {
                    for (unsigned int i = 0; i < 8; ++i)
                    {
                        if (i < xfr.numTexGen.numTexGens)
                            out.Write(" uv%d_2.xyz =  o.tex%d;\n", i, i);
                        else
                            out.Write(" uv%d_2.xyz =  float3(0.0, 0.0, 0.0);\n", i);
                    }
                    out.Write("  clipPos_2 = o.clipPos;\n");
                    if (enable_pl)
                        out.Write("  Normal_2 = o.Normal;\n");
                }
                else
                {
                    // clip position is in w of first 4 texcoords
                    if (enable_pl)
                    {
                        for (int i = 0; i < 8; ++i)
                            out.Write(" uv%d_2 = o.tex%d;\n", i, i);
                    }
                    else
                    {
                        for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i)
                            out.Write("  uv%d_2%s = o.tex%d;\n", i, i < 4 ? ".xyzw" : ".xyz", i);
                    }
                }
                out.Write("colors_0 = o.colors_0;\n");
                out.Write("colors_1 = o.colors_1;\n");
            }
            out.Write("gl_Position = o.pos;\n");
            out.Write("}\n");
        }
        else
        {
            out.Write("return o;\n}\n");
        }

        if (buffer[VERTEXSHADERGEN_BUFFERSIZE - 1] != 0x7C)
            PanicAlert("VertexShader generator - buffer too small, canary has been eaten!");
    }
    if (uidPresent)
    {
        out.CalculateUIDHash();
    }
}
Exemplo n.º 2
0
static inline void GenerateGeometryShader(T& out, u32 primitive_type, API_TYPE ApiType)
{
	// Non-uid template parameters will write to the dummy data (=> gets optimized out)
	geometry_shader_uid_data dummy_data;
	geometry_shader_uid_data* uid_data = out.template GetUidData<geometry_shader_uid_data>();
	if (uid_data == nullptr)
		uid_data = &dummy_data;

	out.SetBuffer(text);
	const bool is_writing_shadercode = (out.GetBuffer() != nullptr);

	if (is_writing_shadercode)
		text[sizeof(text) - 1] = 0x7C;  // canary

	uid_data->primitive_type = primitive_type;
	const unsigned int vertex_in = primitive_type + 1;
	unsigned int vertex_out = primitive_type == PRIMITIVE_TRIANGLES ? 3 : 4;

	uid_data->wireframe = g_ActiveConfig.bWireFrame;
	if (g_ActiveConfig.bWireFrame)
		vertex_out++;

	uid_data->stereo = g_ActiveConfig.iStereoMode > 0;
	if (ApiType == API_OPENGL)
	{
		// Insert layout parameters
		if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
		{
			out.Write("layout(%s, invocations = %d) in;\n", primitives_ogl[primitive_type], g_ActiveConfig.iStereoMode > 0 ? 2 : 1);
			out.Write("layout(%s_strip, max_vertices = %d) out;\n", g_ActiveConfig.bWireFrame ? "line" : "triangle", vertex_out);
		}
		else
		{
			out.Write("layout(%s) in;\n", primitives_ogl[primitive_type]);
			out.Write("layout(%s_strip, max_vertices = %d) out;\n", g_ActiveConfig.bWireFrame ? "line" : "triangle", g_ActiveConfig.iStereoMode > 0 ? vertex_out * 2 : vertex_out);
		}
	}

	out.Write("%s", s_lighting_struct);

	// uniforms
	if (ApiType == API_OPENGL)
		out.Write("layout(std140%s) uniform GSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 3" : "");
	else
		out.Write("cbuffer GSBlock {\n");
	out.Write(
		"\tfloat4 " I_STEREOPARAMS";\n"
		"\tfloat4 " I_LINEPTPARAMS";\n"
		"\tint4 " I_TEXOFFSET";\n"
		"};\n");

	uid_data->numTexGens = xfmem.numTexGen.numTexGens;
	uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting;

	out.Write("struct VS_OUTPUT {\n");
	GenerateVSOutputMembers<T>(out, ApiType);
	out.Write("};\n");

	if (ApiType == API_OPENGL)
	{
		if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
			out.Write("#define InstanceID gl_InvocationID\n");

		out.Write("in VertexData {\n");
		GenerateVSOutputMembers<T>(out, ApiType, GetInterpolationQualifier(ApiType, true, true));
		out.Write("} vs[%d];\n", vertex_in);

		out.Write("out VertexData {\n");
		GenerateVSOutputMembers<T>(out, ApiType, GetInterpolationQualifier(ApiType, false, true));

		if (g_ActiveConfig.iStereoMode > 0)
			out.Write("\tflat int layer;\n");

		out.Write("} ps;\n");

		out.Write("void main()\n{\n");
	}
	else // D3D
	{
		out.Write("struct VertexData {\n");
		out.Write("\tVS_OUTPUT o;\n");

		if (g_ActiveConfig.iStereoMode > 0)
			out.Write("\tuint layer : SV_RenderTargetArrayIndex;\n");

		out.Write("};\n");

		if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
		{
			out.Write("[maxvertexcount(%d)]\n[instance(%d)]\n", vertex_out, g_ActiveConfig.iStereoMode > 0 ? 2 : 1);
			out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream<VertexData> output, in uint InstanceID : SV_GSInstanceID)\n{\n", primitives_d3d[primitive_type], vertex_in, g_ActiveConfig.bWireFrame ? "Line" : "Triangle");
		}
		else
		{
			out.Write("[maxvertexcount(%d)]\n", g_ActiveConfig.iStereoMode > 0 ? vertex_out * 2 : vertex_out);
			out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream<VertexData> output)\n{\n", primitives_d3d[primitive_type], vertex_in, g_ActiveConfig.bWireFrame ? "Line" : "Triangle");
		}

		out.Write("\tVertexData ps;\n");
	}

	if (primitive_type == PRIMITIVE_LINES)
	{
		if (ApiType == API_OPENGL)
		{
			out.Write("\tVS_OUTPUT start, end;\n");
			AssignVSOutputMembers(out, "start", "vs[0]");
			AssignVSOutputMembers(out, "end", "vs[1]");
		}
		else
		{
			out.Write("\tVS_OUTPUT start = o[0];\n");
			out.Write("\tVS_OUTPUT end = o[1];\n");
		}

		// GameCube/Wii's line drawing algorithm is a little quirky. It does not
		// use the correct line caps. Instead, the line caps are vertical or
		// horizontal depending the slope of the line.
		out.Write(
			"\tfloat2 offset;\n"
			"\tfloat2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w);\n"
			// FIXME: What does real hardware do when line is at a 45-degree angle?
			// FIXME: Lines aren't drawn at the correct width. See Twilight Princess map.
			"\tif (" I_LINEPTPARAMS".y * to.y > " I_LINEPTPARAMS".x * to.x) {\n"
			// Line is more tall. Extend geometry left and right.
			// Lerp LineWidth/2 from [0..VpWidth] to [-1..1]
			"\t\toffset = float2(" I_LINEPTPARAMS".z / " I_LINEPTPARAMS".x, 0);\n"
			"\t} else {\n"
			// Line is more wide. Extend geometry up and down.
			// Lerp LineWidth/2 from [0..VpHeight] to [1..-1]
			"\t\toffset = float2(0, -" I_LINEPTPARAMS".z / " I_LINEPTPARAMS".y);\n"
			"\t}\n");
	}
	else if (primitive_type == PRIMITIVE_POINTS)
	{
		if (ApiType == API_OPENGL)
		{
			out.Write("\tVS_OUTPUT center;\n");
			AssignVSOutputMembers(out, "center", "vs[0]");
		}
		else
		{
			out.Write("\tVS_OUTPUT center = o[0];\n");
		}

		// Offset from center to upper right vertex
		// Lerp PointSize/2 from [0,0..VpWidth,VpHeight] to [-1,1..1,-1]
		out.Write("\tfloat2 offset = float2(" I_LINEPTPARAMS".w / " I_LINEPTPARAMS".x, -" I_LINEPTPARAMS".w / " I_LINEPTPARAMS".y) * center.pos.w;\n");
	}

	if (g_ActiveConfig.iStereoMode > 0)
	{
		// If the GPU supports invocation we don't need a for loop and can simply use the
		// invocation identifier to determine which layer we're rendering.
		if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
			out.Write("\tint eye = InstanceID;\n");
		else
			out.Write("\tfor (int eye = 0; eye < 2; ++eye) {\n");
	}

	if (g_ActiveConfig.bWireFrame)
		out.Write("\tVS_OUTPUT first;\n");

	out.Write("\tfor (int i = 0; i < %d; ++i) {\n", vertex_in);

	if (ApiType == API_OPENGL)
	{
		out.Write("\tVS_OUTPUT f;\n");
		AssignVSOutputMembers(out, "f", "vs[i]");
	}
	else
	{
		out.Write("\tVS_OUTPUT f = o[i];\n");
	}

	if (g_ActiveConfig.iStereoMode > 0)
	{
		// Select the output layer
		out.Write("\tps.layer = eye;\n");
		if (ApiType == API_OPENGL)
			out.Write("\tgl_Layer = eye;\n");

		// For stereoscopy add a small horizontal offset in Normalized Device Coordinates proportional
		// to the depth of the vertex. We retrieve the depth value from the w-component of the projected
		// vertex which contains the negated z-component of the original vertex.
		// For negative parallax (out-of-screen effects) we subtract a convergence value from
		// the depth value. This results in objects at a distance smaller than the convergence
		// distance to seemingly appear in front of the screen.
		// This formula is based on page 13 of the "Nvidia 3D Vision Automatic, Best Practices Guide"
		out.Write("\tf.pos.x += " I_STEREOPARAMS"[eye] * (f.pos.w - " I_STEREOPARAMS"[2]);\n");
	}

	if (primitive_type == PRIMITIVE_LINES)
	{
		out.Write("\tVS_OUTPUT l = f;\n"
		          "\tVS_OUTPUT r = f;\n");

		out.Write("\tl.pos.xy -= offset * l.pos.w;\n"
		          "\tr.pos.xy += offset * r.pos.w;\n");

		out.Write("\tif (" I_TEXOFFSET"[2] != 0) {\n");
		out.Write("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET"[2]);\n");

		for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
		{
			out.Write("\tif (((" I_TEXOFFSET"[0] >> %d) & 0x1) != 0)\n", i);
			out.Write("\t\tr.tex%d.x += texOffset;\n", i);
		}
		out.Write("\t}\n");

		EmitVertex<T>(out, "l", ApiType, true);
		EmitVertex<T>(out, "r", ApiType);
	}
	else if (primitive_type == PRIMITIVE_POINTS)
	{
		out.Write("\tVS_OUTPUT ll = f;\n"
		          "\tVS_OUTPUT lr = f;\n"
		          "\tVS_OUTPUT ul = f;\n"
		          "\tVS_OUTPUT ur = f;\n");

		out.Write("\tll.pos.xy += float2(-1,-1) * offset;\n"
		          "\tlr.pos.xy += float2(1,-1) * offset;\n"
		          "\tul.pos.xy += float2(-1,1) * offset;\n"
		          "\tur.pos.xy += offset;\n");

		out.Write("\tif (" I_TEXOFFSET"[3] != 0) {\n");
		out.Write("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET"[3]), 1.0 / float(" I_TEXOFFSET"[3]));\n");

		for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
		{
			out.Write("\tif (((" I_TEXOFFSET"[1] >> %d) & 0x1) != 0) {\n", i);
			out.Write("\t\tll.tex%d.xy += float2(0,1) * texOffset;\n", i);
			out.Write("\t\tlr.tex%d.xy += texOffset;\n", i);
			out.Write("\t\tur.tex%d.xy += float2(1,0) * texOffset;\n", i);
			out.Write("\t}\n");
		}
		out.Write("\t}\n");

		EmitVertex<T>(out, "ll", ApiType, true);
		EmitVertex<T>(out, "lr", ApiType);
		EmitVertex<T>(out, "ul", ApiType);
		EmitVertex<T>(out, "ur", ApiType);
	}
	else
	{
		EmitVertex<T>(out, "f", ApiType, true);
	}

	out.Write("\t}\n");

	EndPrimitive<T>(out, ApiType);

	if (g_ActiveConfig.iStereoMode > 0 && !g_ActiveConfig.backend_info.bSupportsGSInstancing)
		out.Write("\t}\n");

	out.Write("}\n");

	if (is_writing_shadercode)
	{
		if (text[sizeof(text) - 1] != 0x7C)
			PanicAlert("GeometryShader generator - buffer too small, canary has been eaten!");
	}
}
Exemplo n.º 3
0
static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
{
	T out;
	const u32 components = VertexLoaderManager::g_current_components;
	// Non-uid template parameters will write to the dummy data (=> gets optimized out)
	pixel_shader_uid_data dummy_data;
	pixel_shader_uid_data* uid_data = out.template GetUidData<pixel_shader_uid_data>();
	if (uid_data != nullptr)
		memset(uid_data, 0, sizeof(*uid_data));
	else
		uid_data = &dummy_data;

	unsigned int numStages = bpmem.genMode.numtevstages + 1;
	unsigned int numTexgen = bpmem.genMode.numtexgens;

	out.Write("//Pixel Shader for TEV stages\n");
	out.Write("//%i TEV stages, %i texgens, %i IND stages\n",
		numStages, numTexgen, bpmem.genMode.numindstages.Value());

	uid_data->dstAlphaMode = dstAlphaMode;
	uid_data->genMode_numindstages = bpmem.genMode.numindstages;
	uid_data->genMode_numtevstages = bpmem.genMode.numtevstages;
	uid_data->genMode_numtexgens = bpmem.genMode.numtexgens;

	// dot product for integer vectors
	out.Write("int idot(int3 x, int3 y)\n"
	          "{\n"
	          "\tint3 tmp = x * y;\n"
	          "\treturn tmp.x + tmp.y + tmp.z;\n"
	          "}\n");

	out.Write("int idot(int4 x, int4 y)\n"
	          "{\n"
	          "\tint4 tmp = x * y;\n"
	          "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n"
	          "}\n\n");

	// rounding + casting to integer at once in a single function
	out.Write("int  iround(float  x) { return int (round(x)); }\n"
	          "int2 iround(float2 x) { return int2(round(x)); }\n"
	          "int3 iround(float3 x) { return int3(round(x)); }\n"
	          "int4 iround(float4 x) { return int4(round(x)); }\n\n");

	out.Write("int  itrunc(float  x) { return int (trunc(x)); }\n"
	          "int2 itrunc(float2 x) { return int2(trunc(x)); }\n"
	          "int3 itrunc(float3 x) { return int3(trunc(x)); }\n"
	          "int4 itrunc(float4 x) { return int4(trunc(x)); }\n\n");

	if (ApiType == API_OPENGL)
	{
		out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n");
	}
	else // D3D
	{
		// Declare samplers
		out.Write("SamplerState samp[8] : register(s0);\n");
		out.Write("\n");
		out.Write("Texture2DArray Tex[8] : register(t0);\n");
	}
	out.Write("\n");

	if (ApiType == API_OPENGL)
	{
		out.Write("layout(std140%s) uniform PSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 1" : "");
	}
	else
	{
		out.Write("cbuffer PSBlock : register(b0) {\n");
	}
	out.Write(
		"\tint4 " I_COLORS"[4];\n"
		"\tint4 " I_KCOLORS"[4];\n"
		"\tint4 " I_ALPHA";\n"
		"\tfloat4 " I_TEXDIMS"[8];\n"
		"\tint4 " I_ZBIAS"[2];\n"
		"\tint4 " I_INDTEXSCALE"[2];\n"
		"\tint4 " I_INDTEXMTX"[6];\n"
		"\tint4 " I_FOGCOLOR";\n"
		"\tint4 " I_FOGI";\n"
		"\tfloat4 " I_FOGF"[2];\n"
		"\tfloat4 " I_ZSLOPE";\n"
		"\tfloat4 " I_EFBSCALE";\n"
		"};\n");

	if (g_ActiveConfig.bEnablePixelLighting)
	{
		out.Write("%s", s_lighting_struct);

		if (ApiType == API_OPENGL)
		{
			out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : "");
		}
		else
		{
			out.Write("cbuffer VSBlock : register(b1) {\n");
		}
		out.Write(s_shader_uniforms);
		out.Write("};\n");
	}

	if (g_ActiveConfig.backend_info.bSupportsBBox && g_ActiveConfig.bBBoxEnable)
	{
		if (ApiType == API_OPENGL)
		{
			out.Write(
				"layout(std140, binding = 3) buffer BBox {\n"
				"\tint4 bbox_data;\n"
				"};\n"
				);
		}
		else
		{
			out.Write(
				"globallycoherent RWBuffer<int> bbox_data : register(u2);\n"
				);
		}
	}

	out.Write("struct VS_OUTPUT {\n");
	GenerateVSOutputMembers<T>(out, ApiType);
	out.Write("};\n");

	const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest()
	                            && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
	                            // We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
	                            // This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
	                            && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
	const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest())
	                             || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z)
	                             || (bpmem.zmode.testenable && bpmem.genMode.zfreeze);

	if (forced_early_z)
	{
		// Zcomploc (aka early_ztest) is a way to control whether depth test is done before
		// or after texturing and alpha test. PC graphics APIs used to provide no way to emulate
		// this feature properly until 2012: Depth tests were always done after alpha testing.
		// Most importantly, it was not possible to write to the depth buffer without also writing
		// a color value (unless color writing was disabled altogether).

		// OpenGL 4.2 actually provides two extensions which can force an early z test:
		//  * ARB_image_load_store has 'layout(early_fragment_tests)' which forces the driver to do z and stencil tests early.
		//  * ARB_conservative_depth has 'layout(depth_unchanged) which signals to the driver that it can make optimisations
		//    which assume the pixel shader won't update the depth buffer.

		// early_fragment_tests is the best option, as it requires the driver to do early-z and defines early-z exactly as
		// we expect, with discard causing the shader to exit with only the depth buffer updated.

		// Conservative depth's 'depth_unchanged' only hints to the driver that an early-z optimisation can be made and
		// doesn't define what will happen if we discard the fragment. But the way modern graphics hardware is implemented
		// means it is not unreasonable to expect the the same behaviour as early_fragment_tests.
		// We can also assume that if a driver has gone out of its way to support conservative depth and not image_load_store
		// as required by OpenGL 4.2 that it will be doing the optimisation.
		// If the driver doesn't actually do an early z optimisation, ZCompLoc will be broken and depth will only be written
		// if the alpha test passes.

		// We support Conservative as a fallback, because many drivers based on Mesa haven't implemented all of the
		// ARB_image_load_store extension yet.

		// D3D11 also has a way to force the driver to enable early-z, so we're fine here.
		if(ApiType == API_OPENGL)
		{
			 // This is a #define which signals whatever early-z method the driver supports.
			out.Write("FORCE_EARLY_Z; \n");
		}
		else
		{
			out.Write("[earlydepthstencil]\n");
		}
	}
	else if (bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED))
	{
		static bool warn_once = true;
		if (warn_once)
			WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current configuration. Make sure to enable fast depth calculations. If this message still shows up your hardware isn't able to emulate the feature properly (a GPU with D3D 11.0 / OGL 4.2 support is required).");
		warn_once = false;
	}

	uid_data->msaa = g_ActiveConfig.iMultisamples > 1;
	uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA;
	if (ApiType == API_OPENGL)
	{
		out.Write("out vec4 ocol0;\n");
		if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
			out.Write("out vec4 ocol1;\n");

		if (per_pixel_depth)
			out.Write("#define depth gl_FragDepth\n");

		uid_data->stereo = g_ActiveConfig.iStereoMode > 0;
		if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
		{
			out.Write("in VertexData {\n");
			GenerateVSOutputMembers<T>(out, ApiType, GetInterpolationQualifier(ApiType, true, true));

			if (g_ActiveConfig.iStereoMode > 0)
				out.Write("\tflat int layer;\n");

			out.Write("};\n");
		}
		else
		{
			out.Write("%s in float4 colors_0;\n", GetInterpolationQualifier(ApiType));
			out.Write("%s in float4 colors_1;\n", GetInterpolationQualifier(ApiType));
			// compute window position if needed because binding semantic WPOS is not widely supported
			// Let's set up attributes
			for (unsigned int i = 0; i < numTexgen; ++i)
			{
				out.Write("%s in float3 uv%d;\n", GetInterpolationQualifier(ApiType), i);
			}
			out.Write("%s in float4 clipPos;\n", GetInterpolationQualifier(ApiType));
			if (g_ActiveConfig.bEnablePixelLighting)
			{
				out.Write("%s in float3 Normal;\n", GetInterpolationQualifier(ApiType));
				out.Write("%s in float3 WorldPos;\n", GetInterpolationQualifier(ApiType));
			}
		}

		out.Write("void main()\n{\n");

		if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
		{
			for (unsigned int i = 0; i < numTexgen; ++i)
				out.Write("\tfloat3 uv%d = tex%d;\n", i, i);
		}

		out.Write("\tfloat4 rawpos = gl_FragCoord;\n");
	}
	else // D3D
	{
		out.Write("void main(\n");
		out.Write("  out float4 ocol0 : SV_Target0,%s%s\n  in float4 rawpos : SV_Position,\n",
			dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n  out float4 ocol1 : SV_Target1," : "",
			per_pixel_depth ? "\n  out float depth : SV_Depth," : "");

		out.Write("  in %s float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(ApiType));
		out.Write("  in %s float4 colors_1 : COLOR1\n", GetInterpolationQualifier(ApiType));

		// compute window position if needed because binding semantic WPOS is not widely supported
		for (unsigned int i = 0; i < numTexgen; ++i)
			out.Write(",\n  in %s float3 uv%d : TEXCOORD%d", GetInterpolationQualifier(ApiType), i, i);
		out.Write(",\n  in %s float4 clipPos : TEXCOORD%d", GetInterpolationQualifier(ApiType), numTexgen);
		if (g_ActiveConfig.bEnablePixelLighting)
		{
			out.Write(",\n  in %s float3 Normal : TEXCOORD%d", GetInterpolationQualifier(ApiType), numTexgen + 1);
			out.Write(",\n  in %s float3 WorldPos : TEXCOORD%d", GetInterpolationQualifier(ApiType), numTexgen + 2);
		}
		uid_data->stereo = g_ActiveConfig.iStereoMode > 0;
		if (g_ActiveConfig.iStereoMode > 0)
			out.Write(",\n  in uint layer : SV_RenderTargetArrayIndex\n");
		out.Write("        ) {\n");
	}

	out.Write("\tint4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = " I_COLORS"[0];\n"
	          "\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, 0, 0);\n"
	          "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n"
	          "\tint alphabump=0;\n"
	          "\tint3 tevcoord=int3(0, 0, 0);\n"
	          "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n"
	          "\tint4 tevin_a=int4(0,0,0,0),tevin_b=int4(0,0,0,0),tevin_c=int4(0,0,0,0),tevin_d=int4(0,0,0,0);\n\n"); // tev combiner inputs

	// On GLSL, input variables must not be assigned to.
	// This is why we declare these variables locally instead.
	out.Write("\tfloat4 col0 = colors_0;\n");
	out.Write("\tfloat4 col1 = colors_1;\n");

	if (g_ActiveConfig.bEnablePixelLighting)
	{
		out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n");
		out.Write("\tfloat3 pos = WorldPos;\n");

		out.Write("\tint4 lacc;\n"
				"\tfloat3 ldir, h, cosAttn, distAttn;\n"
				"\tfloat dist, dist2, attn;\n");

		// TODO: Our current constant usage code isn't able to handle more than one buffer.
		//       So we can't mark the VS constant as used here. But keep them here as reference.
		//out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further
		//out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further
		//out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3);
		uid_data->components = components;
		GenerateLightingShader<T>(out, uid_data->lighting, components, "colors_", "col");
	}

	// HACK to handle cases where the tex gen is not enabled
	if (numTexgen == 0)
	{
		out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n");
	}
	else
	{
		out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS+numTexgen-1);
		for (unsigned int i = 0; i < numTexgen; ++i)
		{
			out.Write("\tint2 fixpoint_uv%d = itrunc(", i);
			// optional perspective divides
			uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
			if (xfmem.texMtxInfo[i].projection == XF_TEXPROJ_STQ)
			{
				out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i);
			}
			else
			{
				out.Write("uv%d.xy", i);
			}
			out.Write(" * " I_TEXDIMS"[%d].zw * 128.0);\n", i);
			// TODO: S24 overflows here?
		}
	}

	// indirect texture map lookup
	int nIndirectStagesUsed = 0;
	if (bpmem.genMode.numindstages > 0)
	{
		for (unsigned int i = 0; i < numStages; ++i)
		{
			if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
				nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
		}
	}

	uid_data->nIndirectStagesUsed = nIndirectStagesUsed;
	for (u32 i = 0; i < bpmem.genMode.numindstages; ++i)
	{
		if (nIndirectStagesUsed & (1 << i))
		{
			unsigned int texcoord = bpmem.tevindref.getTexCoord(i);
			unsigned int texmap = bpmem.tevindref.getTexMap(i);

			uid_data->SetTevindrefValues(i, texcoord, texmap);
			if (texcoord < numTexgen)
			{
				out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2);
				out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy");
			}
			else
				out.Write("\ttempcoord = int2(0, 0);\n");

			out.Write("\tint3 iindtex%d = ", i);
			SampleTexture<T>(out, "(float2(tempcoord)/128.0)", "abg", texmap, ApiType);
		}
	}

	// Uid fields for BuildSwapModeTable are set in WriteStage
	char swapModeTable[4][5];
	const char* swapColors = "rgba";
	for (int i = 0; i < 4; i++)
	{
		swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1];
		swapModeTable[i][1] = swapColors[bpmem.tevksel[i*2].swap2];
		swapModeTable[i][2] = swapColors[bpmem.tevksel[i*2+1].swap1];
		swapModeTable[i][3] = swapColors[bpmem.tevksel[i*2+1].swap2];
		swapModeTable[i][4] = '\0';
	}

	for (unsigned int i = 0; i < numStages; i++)
		WriteStage<T>(out, uid_data, i, ApiType, swapModeTable); // build the equation for this stage

#define MY_STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str)))
	bool enable_pl = g_ActiveConfig.bEnablePixelLighting;
	uid_data->num_values = (enable_pl) ? sizeof(*uid_data) : MY_STRUCT_OFFSET(*uid_data,stagehash[numStages]);


	if (numStages)
	{
		// The results of the last texenv stage are put onto the screen,
		// regardless of the used destination register
		if (bpmem.combiners[numStages - 1].colorC.dest != 0)
		{
			out.Write("\tprev.rgb = %s;\n", tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]);
		}
		if (bpmem.combiners[numStages - 1].alphaC.dest != 0)
		{
			out.Write("\tprev.a = %s;\n", tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]);
		}
	}
	out.Write("\tprev = prev & 255;\n");

	AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
	uid_data->Pretest = Pretest;

	// NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
	// (in this case we need to write a depth value if depth test passes regardless of the alpha testing result)
	if (Pretest == AlphaTest::UNDETERMINED || (Pretest == AlphaTest::FAIL && bpmem.UseLateDepthTest()))
		WriteAlphaTest<T>(out, uid_data, ApiType, dstAlphaMode, per_pixel_depth);

	if (bpmem.genMode.zfreeze)
	{
		out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE);
		out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE);

		out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE".xy;\n");

		// Opengl has reversed vertical screenspace coordiantes
		if (ApiType == API_OPENGL)
			out.Write("\tscreenpos.y = %i.0 - screenpos.y;\n", EFB_HEIGHT);

		out.Write("\tint zCoord = int(" I_ZSLOPE".z + " I_ZSLOPE".x * screenpos.x + " I_ZSLOPE".y * screenpos.y);\n");
	}
	else if (!g_ActiveConfig.bFastDepthCalc)
	{
		// FastDepth means to trust the depth generated in perspective division.
		// It should be correct, but it seems not to be as accurate as required. TODO: Find out why!
		// For disabled FastDepth we just calculate the depth value again.
		// The performance impact of this additional calculation doesn't matter, but it prevents
		// the host GPU driver from performing any early depth test optimizations.
		out.SetConstantsUsed(C_ZBIAS+1, C_ZBIAS+1);
		// the screen space depth value = far z + (clip z / clip w) * z range
		out.Write("\tint zCoord = " I_ZBIAS"[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS"[1].y));\n");
	}
	else
	{
		if (ApiType == API_D3D)
			out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n");
		else
			out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n");
	}
	out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n");

	// depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either
	const bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel;

	uid_data->ztex_op = bpmem.ztex2.op;
	uid_data->per_pixel_depth = per_pixel_depth;
	uid_data->forced_early_z = forced_early_z;
	uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
	uid_data->early_ztest = bpmem.UseEarlyDepthTest();
	uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
	uid_data->zfreeze = bpmem.genMode.zfreeze;

	// Note: z-textures are not written to depth buffer if early depth test is used
	if (per_pixel_depth && bpmem.UseEarlyDepthTest())
	{
		if (ApiType == API_D3D)
			out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
		else
			out.Write("\tdepth = float(zCoord) / 16777216.0;\n");
	}

	// Note: depth texture output is only written to depth buffer if late depth test is used
	// theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway
	if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture)
	{
		// use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
		out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1);
		out.Write("\tzCoord = idot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n",
									(bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : "");
		out.Write("\tzCoord = zCoord & 0xFFFFFF;\n");
	}

	if (per_pixel_depth && bpmem.UseLateDepthTest())
	{
		if (ApiType == API_D3D)
			out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
		else
			out.Write("\tdepth = float(zCoord) / 16777216.0;\n");
	}

	if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
	{
		out.SetConstantsUsed(C_ALPHA, C_ALPHA);
		out.Write("\tocol0 = float4(float3(prev.rgb), float(" I_ALPHA".a)) / 255.0;\n");
	}
	else
	{
		WriteFog<T>(out, uid_data);
		out.Write("\tocol0 = float4(prev) / 255.0;\n");
	}

	// Use dual-source color blending to perform dst alpha in a single pass
	if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
	{
		out.SetConstantsUsed(C_ALPHA, C_ALPHA);

		// Colors will be blended against the alpha from ocol1 and
		// the alpha from ocol0 will be written to the framebuffer.
		out.Write("\tocol1 = float4(prev) / 255.0;\n");
		out.Write("\tocol0.a = float(" I_ALPHA".a) / 255.0;\n");
	}

	if (g_ActiveConfig.backend_info.bSupportsBBox && g_ActiveConfig.bBBoxEnable && BoundingBox::active)
	{
		uid_data->bounding_box = true;
		const char* atomic_op = ApiType == API_OPENGL ? "atomic" : "Interlocked";
		out.Write(
			"\tif(bbox_data[0] > int(rawpos.x)) %sMin(bbox_data[0], int(rawpos.x));\n"
			"\tif(bbox_data[1] < int(rawpos.x)) %sMax(bbox_data[1], int(rawpos.x));\n"
			"\tif(bbox_data[2] > int(rawpos.y)) %sMin(bbox_data[2], int(rawpos.y));\n"
			"\tif(bbox_data[3] < int(rawpos.y)) %sMax(bbox_data[3], int(rawpos.y));\n",
			atomic_op, atomic_op, atomic_op, atomic_op);
	}

	out.Write("}\n");

	return out;
}
Exemplo n.º 4
0
ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid_data* uid_data)
{
  ShaderCode out;
  // Non-uid template parameters will write to the dummy data (=> gets optimized out)

  const unsigned int vertex_in = uid_data->primitive_type + 1;
  unsigned int vertex_out = uid_data->primitive_type == PRIMITIVE_TRIANGLES ? 3 : 4;

  if (uid_data->wireframe)
    vertex_out++;

  if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
  {
    // Insert layout parameters
    if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
    {
      out.Write("layout(%s, invocations = %d) in;\n", primitives_ogl[uid_data->primitive_type],
                uid_data->stereo ? 2 : 1);
      out.Write("layout(%s_strip, max_vertices = %d) out;\n",
                uid_data->wireframe ? "line" : "triangle", vertex_out);
    }
    else
    {
      out.Write("layout(%s) in;\n", primitives_ogl[uid_data->primitive_type]);
      out.Write("layout(%s_strip, max_vertices = %d) out;\n",
                uid_data->wireframe ? "line" : "triangle",
                uid_data->stereo ? vertex_out * 2 : vertex_out);
    }
  }

  out.Write("%s", s_lighting_struct);

  // uniforms
  if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
    out.Write("UBO_BINDING(std140, 3) uniform GSBlock {\n");
  else
    out.Write("cbuffer GSBlock {\n");

  out.Write("\tfloat4 " I_STEREOPARAMS ";\n"
            "\tfloat4 " I_LINEPTPARAMS ";\n"
            "\tint4 " I_TEXOFFSET ";\n"
            "};\n");

  out.Write("struct VS_OUTPUT {\n");
  GenerateVSOutputMembers<ShaderCode>(out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting,
                                      "");
  out.Write("};\n");

  if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
  {
    if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
      out.Write("#define InstanceID gl_InvocationID\n");

    out.Write("VARYING_LOCATION(0) in VertexData {\n");
    GenerateVSOutputMembers<ShaderCode>(
        out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting,
        GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true));
    out.Write("} vs[%d];\n", vertex_in);

    out.Write("VARYING_LOCATION(0) out VertexData {\n");
    GenerateVSOutputMembers<ShaderCode>(
        out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting,
        GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, false, true));

    if (uid_data->stereo)
      out.Write("\tflat int layer;\n");

    out.Write("} ps;\n");

    out.Write("void main()\n{\n");
  }
  else  // D3D
  {
    out.Write("struct VertexData {\n");
    out.Write("\tVS_OUTPUT o;\n");

    if (uid_data->stereo)
      out.Write("\tuint layer : SV_RenderTargetArrayIndex;\n");

    out.Write("};\n");

    if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
    {
      out.Write("[maxvertexcount(%d)]\n[instance(%d)]\n", vertex_out, uid_data->stereo ? 2 : 1);
      out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream<VertexData> output, in uint "
                "InstanceID : SV_GSInstanceID)\n{\n",
                primitives_d3d[uid_data->primitive_type], vertex_in,
                uid_data->wireframe ? "Line" : "Triangle");
    }
    else
    {
      out.Write("[maxvertexcount(%d)]\n", uid_data->stereo ? vertex_out * 2 : vertex_out);
      out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream<VertexData> output)\n{\n",
                primitives_d3d[uid_data->primitive_type], vertex_in,
                uid_data->wireframe ? "Line" : "Triangle");
    }

    out.Write("\tVertexData ps;\n");
  }

  if (uid_data->primitive_type == PRIMITIVE_LINES)
  {
    if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
    {
      out.Write("\tVS_OUTPUT start, end;\n");
      AssignVSOutputMembers(out, "start", "vs[0]", uid_data->numTexGens, uid_data->pixel_lighting);
      AssignVSOutputMembers(out, "end", "vs[1]", uid_data->numTexGens, uid_data->pixel_lighting);
    }
    else
    {
      out.Write("\tVS_OUTPUT start = o[0];\n");
      out.Write("\tVS_OUTPUT end = o[1];\n");
    }

    // GameCube/Wii's line drawing algorithm is a little quirky. It does not
    // use the correct line caps. Instead, the line caps are vertical or
    // horizontal depending the slope of the line.
    out.Write("\tfloat2 offset;\n"
              "\tfloat2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w);\n"
              // FIXME: What does real hardware do when line is at a 45-degree angle?
              // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map.
              "\tif (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {\n"
              // Line is more tall. Extend geometry left and right.
              // Lerp LineWidth/2 from [0..VpWidth] to [-1..1]
              "\t\toffset = float2(" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n"
              "\t} else {\n"
              // Line is more wide. Extend geometry up and down.
              // Lerp LineWidth/2 from [0..VpHeight] to [1..-1]
              "\t\toffset = float2(0, -" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n"
              "\t}\n");
  }
  else if (uid_data->primitive_type == PRIMITIVE_POINTS)
  {
    if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
    {
      out.Write("\tVS_OUTPUT center;\n");
      AssignVSOutputMembers(out, "center", "vs[0]", uid_data->numTexGens, uid_data->pixel_lighting);
    }
    else
    {
      out.Write("\tVS_OUTPUT center = o[0];\n");
    }

    // Offset from center to upper right vertex
    // Lerp PointSize/2 from [0,0..VpWidth,VpHeight] to [-1,1..1,-1]
    out.Write("\tfloat2 offset = float2(" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS
              ".x, -" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS ".y) * center.pos.w;\n");
  }

  if (uid_data->stereo)
  {
    // If the GPU supports invocation we don't need a for loop and can simply use the
    // invocation identifier to determine which layer we're rendering.
    if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
      out.Write("\tint eye = InstanceID;\n");
    else
      out.Write("\tfor (int eye = 0; eye < 2; ++eye) {\n");
  }

  if (uid_data->wireframe)
    out.Write("\tVS_OUTPUT first;\n");

  out.Write("\tfor (int i = 0; i < %d; ++i) {\n", vertex_in);

  if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
  {
    out.Write("\tVS_OUTPUT f;\n");
    AssignVSOutputMembers(out, "f", "vs[i]", uid_data->numTexGens, uid_data->pixel_lighting);

    if (g_ActiveConfig.backend_info.bSupportsDepthClamp &&
        DriverDetails::HasBug(DriverDetails::BUG_BROKENCLIPDISTANCE))
    {
      // On certain GPUs we have to consume the clip distance from the vertex shader
      // or else the other vertex shader outputs will get corrupted.
      out.Write("\tf.clipDist0 = gl_in[i].gl_ClipDistance[0];\n");
      out.Write("\tf.clipDist1 = gl_in[i].gl_ClipDistance[1];\n");
    }
  }
  else
  {
    out.Write("\tVS_OUTPUT f = o[i];\n");
  }

  if (uid_data->stereo)
  {
    // Select the output layer
    out.Write("\tps.layer = eye;\n");
    if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
      out.Write("\tgl_Layer = eye;\n");

    // For stereoscopy add a small horizontal offset in Normalized Device Coordinates proportional
    // to the depth of the vertex. We retrieve the depth value from the w-component of the projected
    // vertex which contains the negated z-component of the original vertex.
    // For negative parallax (out-of-screen effects) we subtract a convergence value from
    // the depth value. This results in objects at a distance smaller than the convergence
    // distance to seemingly appear in front of the screen.
    // This formula is based on page 13 of the "Nvidia 3D Vision Automatic, Best Practices Guide"
    out.Write("\tfloat hoffset = (eye == 0) ? " I_STEREOPARAMS ".x : " I_STEREOPARAMS ".y;\n");
    out.Write("\tf.pos.x += hoffset * (f.pos.w - " I_STEREOPARAMS ".z);\n");
  }

  if (uid_data->primitive_type == PRIMITIVE_LINES)
  {
    out.Write("\tVS_OUTPUT l = f;\n"
              "\tVS_OUTPUT r = f;\n");

    out.Write("\tl.pos.xy -= offset * l.pos.w;\n"
              "\tr.pos.xy += offset * r.pos.w;\n");

    out.Write("\tif (" I_TEXOFFSET "[2] != 0) {\n");
    out.Write("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n");

    for (unsigned int i = 0; i < uid_data->numTexGens; ++i)
    {
      out.Write("\tif (((" I_TEXOFFSET "[0] >> %d) & 0x1) != 0)\n", i);
      out.Write("\t\tr.tex%d.x += texOffset;\n", i);
    }
    out.Write("\t}\n");

    EmitVertex(out, uid_data, "l", ApiType, true);
    EmitVertex(out, uid_data, "r", ApiType);
  }
  else if (uid_data->primitive_type == PRIMITIVE_POINTS)
  {
    out.Write("\tVS_OUTPUT ll = f;\n"
              "\tVS_OUTPUT lr = f;\n"
              "\tVS_OUTPUT ul = f;\n"
              "\tVS_OUTPUT ur = f;\n");

    out.Write("\tll.pos.xy += float2(-1,-1) * offset;\n"
              "\tlr.pos.xy += float2(1,-1) * offset;\n"
              "\tul.pos.xy += float2(-1,1) * offset;\n"
              "\tur.pos.xy += offset;\n");

    out.Write("\tif (" I_TEXOFFSET "[3] != 0) {\n");
    out.Write("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET
              "[3]), 1.0 / float(" I_TEXOFFSET "[3]));\n");

    for (unsigned int i = 0; i < uid_data->numTexGens; ++i)
    {
      out.Write("\tif (((" I_TEXOFFSET "[1] >> %d) & 0x1) != 0) {\n", i);
      out.Write("\t\tll.tex%d.xy += float2(0,1) * texOffset;\n", i);
      out.Write("\t\tlr.tex%d.xy += texOffset;\n", i);
      out.Write("\t\tur.tex%d.xy += float2(1,0) * texOffset;\n", i);
      out.Write("\t}\n");
    }
    out.Write("\t}\n");

    EmitVertex(out, uid_data, "ll", ApiType, true);
    EmitVertex(out, uid_data, "lr", ApiType);
    EmitVertex(out, uid_data, "ul", ApiType);
    EmitVertex(out, uid_data, "ur", ApiType);
  }
  else
  {
    EmitVertex(out, uid_data, "f", ApiType, true);
  }

  out.Write("\t}\n");

  EndPrimitive(out, uid_data, ApiType);

  if (uid_data->stereo && !g_ActiveConfig.backend_info.bSupportsGSInstancing)
    out.Write("\t}\n");

  out.Write("}\n");

  return out;
}
Exemplo n.º 5
0
static T GenerateVertexShader(API_TYPE api_type)
{
  T out;
  const u32 components = VertexLoaderManager::g_current_components;
  // Non-uid template parameters will write to the dummy data (=> gets optimized out)
  vertex_shader_uid_data dummy_data;
  vertex_shader_uid_data* uid_data = out.template GetUidData<vertex_shader_uid_data>();
  if (uid_data != nullptr)
    memset(uid_data, 0, sizeof(*uid_data));
  else
    uid_data = &dummy_data;

  _assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens);
  _assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans);

  out.Write("%s", s_lighting_struct);

  // uniforms
  if (api_type == API_OPENGL)
    out.Write("layout(std140%s) uniform VSBlock {\n",
              g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : "");
  else
    out.Write("cbuffer VSBlock {\n");
  out.Write(s_shader_uniforms);
  out.Write("};\n");

  out.Write("struct VS_OUTPUT {\n");
  GenerateVSOutputMembers<T>(out, api_type, "");
  out.Write("};\n");

  uid_data->numTexGens = xfmem.numTexGen.numTexGens;
  uid_data->components = components;
  uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting;

  if (api_type == API_OPENGL)
  {
    out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
    if (components & VB_HAS_POSMTXIDX)
      out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
    if (components & VB_HAS_NRM0)
      out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
    if (components & VB_HAS_NRM1)
      out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
    if (components & VB_HAS_NRM2)
      out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);

    if (components & VB_HAS_COL0)
      out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
    if (components & VB_HAS_COL1)
      out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);

    for (int i = 0; i < 8; ++i)
    {
      u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i));
      if ((components & (VB_HAS_UV0 << i)) || hastexmtx)
        out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i,
                  SHADER_TEXTURE0_ATTRIB + i);
    }

    if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
    {
      out.Write("out VertexData {\n");
      GenerateVSOutputMembers<T>(out, api_type, GetInterpolationQualifier(true, false));
      out.Write("} vs;\n");
    }
    else
    {
      // Let's set up attributes
      for (u32 i = 0; i < 8; ++i)
      {
        if (i < xfmem.numTexGen.numTexGens)
        {
          out.Write("%s out float3 uv%u;\n", GetInterpolationQualifier(), i);
        }
      }
      out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier());
      if (g_ActiveConfig.bEnablePixelLighting)
      {
        out.Write("%s out float3 Normal;\n", GetInterpolationQualifier());
        out.Write("%s out float3 WorldPos;\n", GetInterpolationQualifier());
      }
      out.Write("%s out float4 colors_0;\n", GetInterpolationQualifier());
      out.Write("%s out float4 colors_1;\n", GetInterpolationQualifier());
    }

    out.Write("void main()\n{\n");
  }
  else  // D3D
  {
    out.Write("VS_OUTPUT main(\n");

    // inputs
    if (components & VB_HAS_NRM0)
      out.Write("  float3 rawnorm0 : NORMAL0,\n");
    if (components & VB_HAS_NRM1)
      out.Write("  float3 rawnorm1 : NORMAL1,\n");
    if (components & VB_HAS_NRM2)
      out.Write("  float3 rawnorm2 : NORMAL2,\n");
    if (components & VB_HAS_COL0)
      out.Write("  float4 color0 : COLOR0,\n");
    if (components & VB_HAS_COL1)
      out.Write("  float4 color1 : COLOR1,\n");
    for (int i = 0; i < 8; ++i)
    {
      u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i));
      if ((components & (VB_HAS_UV0 << i)) || hastexmtx)
        out.Write("  float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
    }
    if (components & VB_HAS_POSMTXIDX)
      out.Write("  int posmtx : BLENDINDICES,\n");
    out.Write("  float4 rawpos : POSITION) {\n");
  }

  out.Write("VS_OUTPUT o;\n");

  // transforms
  if (components & VB_HAS_POSMTXIDX)
  {
    out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES
              "[posmtx], rawpos), dot(" I_TRANSFORMMATRICES
              "[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posmtx+2], rawpos), 1);\n");

    if (components & VB_HAS_NRMALL)
    {
      out.Write("int normidx = posmtx & 31;\n");
      out.Write("float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES
                "[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n");
    }

    if (components & VB_HAS_NRM0)
      out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, "
                "rawnorm0)));\n");
    if (components & VB_HAS_NRM1)
      out.Write(
          "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
    if (components & VB_HAS_NRM2)
      out.Write(
          "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
  }
  else
  {
    out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX
              "[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n");
    if (components & VB_HAS_NRM0)
      out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX
                "[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX
                "[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n");
    if (components & VB_HAS_NRM1)
      out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX
                "[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX
                "[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n");
    if (components & VB_HAS_NRM2)
      out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX
                "[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX
                "[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n");
  }

  if (!(components & VB_HAS_NRM0))
    out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");

  out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
            "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");

  out.Write("int4 lacc;\n"
            "float3 ldir, h, cosAttn, distAttn;\n"
            "float dist, dist2, attn;\n");

  uid_data->numColorChans = xfmem.numChan.numColorChans;
  if (xfmem.numChan.numColorChans == 0)
  {
    if (components & VB_HAS_COL0)
      out.Write("o.colors_0 = color0;\n");
    else
      out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
  }

  GenerateLightingShader<T>(out, uid_data->lighting, components, "color", "o.colors_");

  if (xfmem.numChan.numColorChans < 2)
  {
    if (components & VB_HAS_COL1)
      out.Write("o.colors_1 = color1;\n");
    else
      out.Write("o.colors_1 = o.colors_0;\n");
  }

  // transform texcoords
  out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
  for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
  {
    TexMtxInfo& texinfo = xfmem.texMtxInfo[i];

    out.Write("{\n");
    out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
    uid_data->texMtxInfo[i].sourcerow = xfmem.texMtxInfo[i].sourcerow;
    switch (texinfo.sourcerow)
    {
    case XF_SRCGEOM_INROW:
      out.Write("coord.xyz = rawpos.xyz;\n");
      break;
    case XF_SRCNORMAL_INROW:
      if (components & VB_HAS_NRM0)
      {
        out.Write("coord.xyz = rawnorm0.xyz;\n");
      }
      break;
    case XF_SRCCOLORS_INROW:
      _assert_(texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 ||
               texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1);
      break;
    case XF_SRCBINORMAL_T_INROW:
      if (components & VB_HAS_NRM1)
      {
        out.Write("coord.xyz = rawnorm1.xyz;\n");
      }
      break;
    case XF_SRCBINORMAL_B_INROW:
      if (components & VB_HAS_NRM2)
      {
        out.Write("coord.xyz = rawnorm2.xyz;\n");
      }
      break;
    default:
      _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
      if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW)))
        out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n",
                  texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
      break;
    }
    // Input form of AB11 sets z element to 1.0
    uid_data->texMtxInfo[i].inputform = xfmem.texMtxInfo[i].inputform;
    if (texinfo.inputform == XF_TEXINPUT_AB11)
      out.Write("coord.z = 1.0;\n");

    // first transformation
    uid_data->texMtxInfo[i].texgentype = xfmem.texMtxInfo[i].texgentype;
    switch (texinfo.texgentype)
    {
    case XF_TEXGEN_EMBOSS_MAP:  // calculate tex coords into bump map

      if (components & (VB_HAS_NRM1 | VB_HAS_NRM2))
      {
        // transform the light dir into tangent space
        uid_data->texMtxInfo[i].embosslightshift = xfmem.texMtxInfo[i].embosslightshift;
        uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
        out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
                  LIGHT_POS_PARAMS(texinfo.embosslightshift));
        out.Write(
            "o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i,
            texinfo.embosssourceshift);
      }
      else
      {
        // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue
        // Squadron 2
        //_assert_(0); // should have normals
        uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
        out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
      }

      break;
    case XF_TEXGEN_COLOR_STRGBC0:
      out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
      break;
    case XF_TEXGEN_COLOR_STRGBC1:
      out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
      break;
    case XF_TEXGEN_REGULAR:
    default:
      uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
      if (components & (VB_HAS_TEXMTXIDX0 << i))
      {
        out.Write("int tmp = int(tex%d.z);\n", i);
        if (texinfo.projection == XF_TEXPROJ_STQ)
          out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
                    "[tmp]), dot(coord, " I_TRANSFORMMATRICES
                    "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n",
                    i);
        else
          out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
                    "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n",
                    i);
      }
      else
      {
        if (texinfo.projection == XF_TEXPROJ_STQ)
          out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES
                    "[%d]), dot(coord, " I_TEXMATRICES "[%d]), dot(coord, " I_TEXMATRICES
                    "[%d]));\n",
                    i, 3 * i, 3 * i + 1, 3 * i + 2);
        else
          out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES
                    "[%d]), dot(coord, " I_TEXMATRICES "[%d]), 1);\n",
                    i, 3 * i, 3 * i + 1);
      }
      break;
    }

    uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled;
    // CHECKME: does this only work for regular tex gen types?
    if (xfmem.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
    {
      const PostMtxInfo& postInfo = xfmem.postMtxInfo[i];

      uid_data->postMtxInfo[i].index = xfmem.postMtxInfo[i].index;
      int postidx = postInfo.index;
      out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[%d];\n"
                "float4 P1 = " I_POSTTRANSFORMMATRICES "[%d];\n"
                "float4 P2 = " I_POSTTRANSFORMMATRICES "[%d];\n",
                postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f);

      uid_data->postMtxInfo[i].normalize = xfmem.postMtxInfo[i].normalize;
      if (postInfo.normalize)
        out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);

      // multiply by postmatrix
      out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + "
                "P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n",
                i, i, i, i);
    }

    out.Write("}\n");
  }

  // clipPos/w needs to be done in pixel shader, not here
  out.Write("o.clipPos = o.pos;\n");

  if (g_ActiveConfig.bEnablePixelLighting)
  {
    out.Write("o.Normal = _norm0;\n");
    out.Write("o.WorldPos = pos.xyz;\n");

    if (components & VB_HAS_COL0)
      out.Write("o.colors_0 = color0;\n");

    if (components & VB_HAS_COL1)
      out.Write("o.colors_1 = color1;\n");
  }

  // write the true depth value, if the game uses depth textures pixel shaders will override with
  // the correct values
  // if not early z culling will improve speed
  if (g_ActiveConfig.backend_info.bSupportsClipControl)
  {
    out.Write("o.pos.z = -o.pos.z;\n");
  }
  else  // OGL
  {
    // this results in a scale from -1..0 to -1..1 after perspective
    // divide
    out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n");

    // the next steps of the OGL pipeline are:
    // (x_c,y_c,z_c,w_c) = o.pos  //switch to OGL spec terminology
    // clipping to -w_c <= (x_c,y_c,z_c) <= w_c
    // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide
    // z_w = (f-n)/2*z_d + (n+f)/2
    // z_w now contains the value to go to the 0..1 depth buffer

    // trying to get the correct semantic while not using glDepthRange
    // seems to get rather complicated
  }

  // The console GPU places the pixel center at 7/12 in screen space unless
  // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
  // in some primitives being placed one pixel too far to the bottom-right,
  // which in turn can be critical if it happens for clear quads.
  // Hence, we compensate for this pixel center difference so that primitives
  // get rasterized correctly.
  out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");

  if (api_type == API_OPENGL)
  {
    if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
    {
      AssignVSOutputMembers(out, "vs", "o");
    }
    else
    {
      // TODO: Pass interface blocks between shader stages even if geometry shaders
      // are not supported, however that will require at least OpenGL 3.2 support.
      for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
        out.Write("uv%d.xyz = o.tex%d;\n", i, i);
      out.Write("clipPos = o.clipPos;\n");
      if (g_ActiveConfig.bEnablePixelLighting)
      {
        out.Write("Normal = o.Normal;\n");
        out.Write("WorldPos = o.WorldPos;\n");
      }
      out.Write("colors_0 = o.colors_0;\n");
      out.Write("colors_1 = o.colors_1;\n");
    }

    out.Write("gl_Position = o.pos;\n");
  }
  else  // D3D
  {
    out.Write("return o;\n");
  }
  out.Write("}\n");

  return out;
}