LinkedShader::LinkedShader(ShaderID VSID, Shader *vs, ShaderID FSID, Shader *fs, bool useHWTransform) : useHWTransform_(useHWTransform), program(0), dirtyUniforms(0) { PROFILE_THIS_SCOPE("shaderlink"); program = glCreateProgram(); vs_ = vs; glAttachShader(program, vs->shader); glAttachShader(program, fs->shader); // Bind attribute locations to fixed locations so that they're // the same in all shaders. We use this later to minimize the calls to // glEnableVertexAttribArray and glDisableVertexAttribArray. glBindAttribLocation(program, ATTR_POSITION, "position"); glBindAttribLocation(program, ATTR_TEXCOORD, "texcoord"); glBindAttribLocation(program, ATTR_NORMAL, "normal"); glBindAttribLocation(program, ATTR_W1, "w1"); glBindAttribLocation(program, ATTR_W2, "w2"); glBindAttribLocation(program, ATTR_COLOR0, "color0"); glBindAttribLocation(program, ATTR_COLOR1, "color1"); #if !defined(USING_GLES2) if (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) { // Dual source alpha glBindFragDataLocationIndexed(program, 0, 0, "fragColor0"); glBindFragDataLocationIndexed(program, 0, 1, "fragColor1"); } else if (gl_extensions.VersionGEThan(3, 3, 0)) { glBindFragDataLocation(program, 0, "fragColor0"); } #elif !defined(IOS) if (gl_extensions.GLES3) { if (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) { glBindFragDataLocationIndexedEXT(program, 0, 0, "fragColor0"); glBindFragDataLocationIndexedEXT(program, 0, 1, "fragColor1"); } } #endif glLinkProgram(program); GLint linkStatus = GL_FALSE; glGetProgramiv(program, GL_LINK_STATUS, &linkStatus); if (linkStatus != GL_TRUE) { GLint bufLength = 0; glGetProgramiv(program, GL_INFO_LOG_LENGTH, &bufLength); if (bufLength) { char* buf = new char[bufLength]; glGetProgramInfoLog(program, bufLength, NULL, buf); #ifdef ANDROID ELOG("Could not link program:\n %s", buf); #endif ERROR_LOG(G3D, "Could not link program:\n %s", buf); ERROR_LOG(G3D, "VS desc:\n%s\n", vs->GetShaderString(SHADER_STRING_SHORT_DESC, VSID).c_str()); ERROR_LOG(G3D, "FS desc:\n%s\n", fs->GetShaderString(SHADER_STRING_SHORT_DESC, FSID).c_str()); std::string vs_source = vs->GetShaderString(SHADER_STRING_SOURCE_CODE, VSID); std::string fs_source = fs->GetShaderString(SHADER_STRING_SOURCE_CODE, FSID); ERROR_LOG(G3D, "VS:\n%s\n", vs_source.c_str()); ERROR_LOG(G3D, "FS:\n%s\n", fs_source.c_str()); Reporting::ReportMessage("Error in shader program link: info: %s / fs: %s / vs: %s", buf, fs_source.c_str(), vs_source.c_str()); #ifdef SHADERLOG OutputDebugStringUTF8(buf); OutputDebugStringUTF8(vs_source.c_str()); OutputDebugStringUTF8(fs_source.c_str()); #endif delete [] buf; // we're dead! } // Prevent a buffer overflow. numBones = 0; return; } INFO_LOG(G3D, "Linked shader: vs %i fs %i", (int)vs->shader, (int)fs->shader); u_tex = glGetUniformLocation(program, "tex"); u_proj = glGetUniformLocation(program, "u_proj"); u_proj_through = glGetUniformLocation(program, "u_proj_through"); u_texenv = glGetUniformLocation(program, "u_texenv"); u_fogcolor = glGetUniformLocation(program, "u_fogcolor"); u_fogcoef = glGetUniformLocation(program, "u_fogcoef"); u_alphacolorref = glGetUniformLocation(program, "u_alphacolorref"); u_alphacolormask = glGetUniformLocation(program, "u_alphacolormask"); u_stencilReplaceValue = glGetUniformLocation(program, "u_stencilReplaceValue"); u_testtex = glGetUniformLocation(program, "testtex"); u_fbotex = glGetUniformLocation(program, "fbotex"); u_blendFixA = glGetUniformLocation(program, "u_blendFixA"); u_blendFixB = glGetUniformLocation(program, "u_blendFixB"); u_fbotexSize = glGetUniformLocation(program, "u_fbotexSize"); // Transform u_view = glGetUniformLocation(program, "u_view"); u_world = glGetUniformLocation(program, "u_world"); u_texmtx = glGetUniformLocation(program, "u_texmtx"); if (VSID.Bit(VS_BIT_ENABLE_BONES)) numBones = TranslateNumBones(VSID.Bits(VS_BIT_BONES, 3) + 1); else numBones = 0; u_depthRange = glGetUniformLocation(program, "u_depthRange"); #ifdef USE_BONE_ARRAY u_bone = glGetUniformLocation(program, "u_bone"); #else for (int i = 0; i < 8; i++) { char name[10]; sprintf(name, "u_bone%i", i); u_bone[i] = glGetUniformLocation(program, name); } #endif // Lighting, texturing u_ambient = glGetUniformLocation(program, "u_ambient"); u_matambientalpha = glGetUniformLocation(program, "u_matambientalpha"); u_matdiffuse = glGetUniformLocation(program, "u_matdiffuse"); u_matspecular = glGetUniformLocation(program, "u_matspecular"); u_matemissive = glGetUniformLocation(program, "u_matemissive"); u_uvscaleoffset = glGetUniformLocation(program, "u_uvscaleoffset"); u_texclamp = glGetUniformLocation(program, "u_texclamp"); u_texclampoff = glGetUniformLocation(program, "u_texclampoff"); for (int i = 0; i < 4; i++) { char temp[64]; sprintf(temp, "u_lightpos%i", i); u_lightpos[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightdir%i", i); u_lightdir[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightatt%i", i); u_lightatt[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightangle%i", i); u_lightangle[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightspotCoef%i", i); u_lightspotCoef[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightambient%i", i); u_lightambient[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightdiffuse%i", i); u_lightdiffuse[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightspecular%i", i); u_lightspecular[i] = glGetUniformLocation(program, temp); } attrMask = 0; if (-1 != glGetAttribLocation(program, "position")) attrMask |= 1 << ATTR_POSITION; if (-1 != glGetAttribLocation(program, "texcoord")) attrMask |= 1 << ATTR_TEXCOORD; if (-1 != glGetAttribLocation(program, "normal")) attrMask |= 1 << ATTR_NORMAL; if (-1 != glGetAttribLocation(program, "w1")) attrMask |= 1 << ATTR_W1; if (-1 != glGetAttribLocation(program, "w2")) attrMask |= 1 << ATTR_W2; if (-1 != glGetAttribLocation(program, "color0")) attrMask |= 1 << ATTR_COLOR0; if (-1 != glGetAttribLocation(program, "color1")) attrMask |= 1 << ATTR_COLOR1; availableUniforms = 0; if (u_proj != -1) availableUniforms |= DIRTY_PROJMATRIX; if (u_proj_through != -1) availableUniforms |= DIRTY_PROJTHROUGHMATRIX; if (u_texenv != -1) availableUniforms |= DIRTY_TEXENV; if (u_alphacolorref != -1) availableUniforms |= DIRTY_ALPHACOLORREF; if (u_alphacolormask != -1) availableUniforms |= DIRTY_ALPHACOLORMASK; if (u_fogcolor != -1) availableUniforms |= DIRTY_FOGCOLOR; if (u_fogcoef != -1) availableUniforms |= DIRTY_FOGCOEF; if (u_texenv != -1) availableUniforms |= DIRTY_TEXENV; if (u_uvscaleoffset != -1) availableUniforms |= DIRTY_UVSCALEOFFSET; if (u_texclamp != -1) availableUniforms |= DIRTY_TEXCLAMP; if (u_world != -1) availableUniforms |= DIRTY_WORLDMATRIX; if (u_view != -1) availableUniforms |= DIRTY_VIEWMATRIX; if (u_texmtx != -1) availableUniforms |= DIRTY_TEXMATRIX; if (u_stencilReplaceValue != -1) availableUniforms |= DIRTY_STENCILREPLACEVALUE; if (u_blendFixA != -1 || u_blendFixB != -1 || u_fbotexSize != -1) availableUniforms |= DIRTY_SHADERBLEND; if (u_depthRange != -1) availableUniforms |= DIRTY_DEPTHRANGE; // Looping up to numBones lets us avoid checking u_bone[i] #ifdef USE_BONE_ARRAY if (u_bone != -1) { for (int i = 0; i < numBones; i++) { availableUniforms |= DIRTY_BONEMATRIX0 << i; } } #else for (int i = 0; i < numBones; i++) { if (u_bone[i] != -1) availableUniforms |= DIRTY_BONEMATRIX0 << i; } #endif if (u_ambient != -1) availableUniforms |= DIRTY_AMBIENT; if (u_matambientalpha != -1) availableUniforms |= DIRTY_MATAMBIENTALPHA; if (u_matdiffuse != -1) availableUniforms |= DIRTY_MATDIFFUSE; if (u_matemissive != -1) availableUniforms |= DIRTY_MATEMISSIVE; if (u_matspecular != -1) availableUniforms |= DIRTY_MATSPECULAR; for (int i = 0; i < 4; i++) { if (u_lightdir[i] != -1 || u_lightspecular[i] != -1 || u_lightpos[i] != -1) availableUniforms |= DIRTY_LIGHT0 << i; } glUseProgram(program); // Default uniform values glUniform1i(u_tex, 0); glUniform1i(u_fbotex, 1); glUniform1i(u_testtex, 2); // The rest, use the "dirty" mechanism. dirtyUniforms = DIRTY_ALL; }
bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *usesLighting) { char *p = buffer; WRITE(p, "%s", vulkan_glsl_preamble); bool highpFog = false; bool highpTexcoord = false; bool isModeThrough = id.Bit(VS_BIT_IS_THROUGH); bool lmode = id.Bit(VS_BIT_LMODE) && !isModeThrough; // TODO: Different expression than in shaderIDgen bool doTexture = id.Bit(VS_BIT_DO_TEXTURE); bool doTextureTransform = id.Bit(VS_BIT_DO_TEXTURE_TRANSFORM); GETexMapMode uvGenMode = static_cast<GETexMapMode>(id.Bits(VS_BIT_UVGEN_MODE, 2)); // this is only valid for some settings of uvGenMode GETexProjMapMode uvProjMode = static_cast<GETexProjMapMode>(id.Bits(VS_BIT_UVPROJ_MODE, 2)); bool doShadeMapping = uvGenMode == GE_TEXMAP_ENVIRONMENT_MAP; bool doFlatShading = id.Bit(VS_BIT_FLATSHADE); bool useHWTransform = id.Bit(VS_BIT_USE_HW_TRANSFORM); bool hasColor = id.Bit(VS_BIT_HAS_COLOR) || !useHWTransform; bool hasNormal = id.Bit(VS_BIT_HAS_NORMAL) && useHWTransform; bool hasTexcoord = id.Bit(VS_BIT_HAS_TEXCOORD) || !useHWTransform; bool enableFog = id.Bit(VS_BIT_ENABLE_FOG); bool throughmode = id.Bit(VS_BIT_IS_THROUGH); bool flipNormal = id.Bit(VS_BIT_NORM_REVERSE); int ls0 = id.Bits(VS_BIT_LS0, 2); int ls1 = id.Bits(VS_BIT_LS1, 2); bool enableBones = id.Bit(VS_BIT_ENABLE_BONES); bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE); int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3); bool doBezier = id.Bit(VS_BIT_BEZIER); bool doSpline = id.Bit(VS_BIT_SPLINE); bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS); bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS); bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS); // The uniforms are passed in as three "clumps" that may or may not be present. // We will memcpy the parts into place in a big buffer so we can be quite dynamic about what parts // are present and what parts aren't, but we will not be ultra detailed about it. *usesLighting = enableLighting || doShadeMapping; WRITE(p, "\n"); WRITE(p, "layout (std140, set = 0, binding = 2) uniform baseVars {\n%s} base;\n", ub_baseStr); if (enableLighting || doShadeMapping) WRITE(p, "layout (std140, set = 0, binding = 3) uniform lightVars {\n%s} light;\n", ub_vs_lightsStr); if (enableBones) WRITE(p, "layout (std140, set = 0, binding = 4) uniform boneVars {\n%s} bone;\n", ub_vs_bonesStr); const char *shading = doFlatShading ? "flat " : ""; DoLightComputation doLight[4] = { LIGHT_OFF, LIGHT_OFF, LIGHT_OFF, LIGHT_OFF }; if (useHWTransform) { int shadeLight0 = doShadeMapping ? ls0 : -1; int shadeLight1 = doShadeMapping ? ls1 : -1; for (int i = 0; i < 4; i++) { if (i == shadeLight0 || i == shadeLight1) doLight[i] = LIGHT_SHADE; if (id.Bit(VS_BIT_LIGHTING_ENABLE) && id.Bit(VS_BIT_LIGHT0_ENABLE + i)) doLight[i] = LIGHT_FULL; } } int numBoneWeights = 0; int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2); if (enableBones) { numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3); WRITE(p, "%s", boneWeightDecl[numBoneWeights]); } if (useHWTransform) WRITE(p, "layout (location = %d) in vec3 position;\n", PspAttributeLocation::POSITION); else // we pass the fog coord in w WRITE(p, "layout (location = %d) in vec4 position;\n", PspAttributeLocation::POSITION); if (useHWTransform && hasNormal) WRITE(p, "layout (location = %d) in vec3 normal;\n", PspAttributeLocation::NORMAL); bool texcoordInVec3 = false; if (doTexture && hasTexcoord) { if (!useHWTransform && doTextureTransform && !throughmode) { WRITE(p, "layout (location = %d) in vec3 texcoord;\n", PspAttributeLocation::TEXCOORD); texcoordInVec3 = true; } else WRITE(p, "layout (location = %d) in vec2 texcoord;\n", PspAttributeLocation::TEXCOORD); } if (hasColor) { WRITE(p, "layout (location = %d) in vec4 color0;\n", PspAttributeLocation::COLOR0); if (lmode && !useHWTransform) // only software transform supplies color1 as vertex data WRITE(p, "layout (location = %d) in vec3 color1;\n", PspAttributeLocation::COLOR1); } WRITE(p, "layout (location = 1) %sout vec4 v_color0;\n", shading); if (lmode) { WRITE(p, "layout (location = 2) %sout vec3 v_color1;\n", shading); } if (doTexture) { WRITE(p, "layout (location = 0) out vec3 v_texcoord;\n"); } if (enableFog) { // See the fragment shader generator WRITE(p, "layout (location = 3) out float v_fogdepth;\n"); } // See comment above this function (GenerateVertexShader). if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { // Apply the projection and viewport to get the Z buffer value, floor to integer, undo the viewport and projection. WRITE(p, "\nvec4 depthRoundZVP(vec4 v) {\n"); WRITE(p, " float z = v.z / v.w;\n"); WRITE(p, " z = z * base.depthRange.x + base.depthRange.y;\n"); WRITE(p, " z = floor(z);\n"); WRITE(p, " z = (z - base.depthRange.z) * base.depthRange.w;\n"); WRITE(p, " return vec4(v.x, v.y, z * v.w, v.w);\n"); WRITE(p, "}\n\n"); } WRITE(p, "out gl_PerVertex { vec4 gl_Position; };\n"); if (doBezier || doSpline) { WRITE(p, "layout (binding = 5) uniform sampler2D u_tess_pos_tex;\n"); WRITE(p, "layout (binding = 6) uniform sampler2D u_tess_tex_tex;\n"); WRITE(p, "layout (binding = 7) uniform sampler2D u_tess_col_tex;\n"); for (int i = 2; i <= 4; i++) { // Define 3 types vec2, vec3, vec4 WRITE(p, "vec%d tess_sample(in vec%d points[16], in vec2 weights[4]) {\n", i, i); WRITE(p, " vec%d pos = vec%d(0);\n", i, i); WRITE(p, " for (int i = 0; i < 4; ++i) {\n"); WRITE(p, " for (int j = 0; j < 4; ++j) {\n"); WRITE(p, " float f = weights[j].x * weights[i].y;\n"); WRITE(p, " if (f != 0)\n"); WRITE(p, " pos = pos + f * points[i * 4 + j];\n"); WRITE(p, " }\n"); WRITE(p, " }\n"); WRITE(p, " return pos;\n"); WRITE(p, "}\n"); } if (doSpline) { WRITE(p, "void spline_knot(ivec2 num_patches, ivec2 type, out vec2 knot[6], ivec2 patch_pos) {\n"); WRITE(p, " for (int i = 0; i < 6; ++i) {\n"); WRITE(p, " knot[i] = vec2(i + patch_pos.x - 2, i + patch_pos.y - 2);\n"); WRITE(p, " }\n"); WRITE(p, " if ((type.x & 1) != 0) {\n"); WRITE(p, " if (patch_pos.x <= 2)\n"); WRITE(p, " knot[0].x = 0;\n"); WRITE(p, " if (patch_pos.x <= 1)\n"); WRITE(p, " knot[1].x = 0;\n"); WRITE(p, " }\n"); WRITE(p, " if ((type.x & 2) != 0) {\n"); WRITE(p, " if (patch_pos.x >= (num_patches.x - 2))\n"); WRITE(p, " knot[5].x = num_patches.x;\n"); WRITE(p, " if (patch_pos.x == (num_patches.x - 1))\n"); WRITE(p, " knot[4].x = num_patches.x;\n"); WRITE(p, " }\n"); WRITE(p, " if ((type.y & 1) != 0) {\n"); WRITE(p, " if (patch_pos.y <= 2)\n"); WRITE(p, " knot[0].y = 0;\n"); WRITE(p, " if (patch_pos.y <= 1)\n"); WRITE(p, " knot[1].y = 0;\n"); WRITE(p, " }\n"); WRITE(p, " if ((type.y & 2) != 0) {\n"); WRITE(p, " if (patch_pos.y >= (num_patches.y - 2))\n"); WRITE(p, " knot[5].y = num_patches.y;\n"); WRITE(p, " if (patch_pos.y == (num_patches.y - 1))\n"); WRITE(p, " knot[4].y = num_patches.y;\n"); WRITE(p, " }\n"); WRITE(p, "}\n"); WRITE(p, "void spline_weight(vec2 t, in vec2 knot[6], out vec2 weights[4]) {\n"); // TODO: Maybe compilers could be coaxed into vectorizing this code without the above explicitly... WRITE(p, " vec2 t0 = (t - knot[0]);\n"); WRITE(p, " vec2 t1 = (t - knot[1]);\n"); WRITE(p, " vec2 t2 = (t - knot[2]);\n"); // TODO: All our knots are integers so we should be able to get rid of these divisions (How?) WRITE(p, " vec2 f30 = t0 / (knot[3] - knot[0]);\n"); WRITE(p, " vec2 f41 = t1 / (knot[4] - knot[1]);\n"); WRITE(p, " vec2 f52 = t2 / (knot[5] - knot[2]);\n"); WRITE(p, " vec2 f31 = t1 / (knot[3] - knot[1]);\n"); WRITE(p, " vec2 f42 = t2 / (knot[4] - knot[2]);\n"); WRITE(p, " vec2 f32 = t2 / (knot[3] - knot[2]);\n"); WRITE(p, " vec2 a = (1 - f30)*(1 - f31);\n"); WRITE(p, " vec2 b = (f31*f41);\n"); WRITE(p, " vec2 c = (1 - f41)*(1 - f42);\n"); WRITE(p, " vec2 d = (f42*f52);\n"); WRITE(p, " weights[0] = a - (a*f32);\n"); WRITE(p, " weights[1] = 1 - a - b + ((a + b + c - 1)*f32);\n"); WRITE(p, " weights[2] = b + ((1 - b - c - d)*f32);\n"); WRITE(p, " weights[3] = d*f32;\n"); WRITE(p, "}\n"); } } WRITE(p, "void main() {\n"); if (!useHWTransform) { // Simple pass-through of vertex data to fragment shader if (doTexture) { if (texcoordInVec3) { WRITE(p, " v_texcoord = texcoord;\n"); } else { WRITE(p, " v_texcoord = vec3(texcoord, 1.0);\n"); } } if (hasColor) { WRITE(p, " v_color0 = color0;\n"); if (lmode) WRITE(p, " v_color1 = color1;\n"); } else { WRITE(p, " v_color0 = base.matambientalpha;\n"); if (lmode) WRITE(p, " v_color1 = vec3(0.0);\n"); } if (enableFog) { WRITE(p, " v_fogdepth = position.w;\n"); } if (isModeThrough) { WRITE(p, " gl_Position = base.proj_through_mtx * vec4(position.xyz, 1.0);\n"); } else { // The viewport is used in this case, so need to compensate for that. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n"); } else { WRITE(p, " gl_Position = base.proj_mtx * vec4(position.xyz, 1.0);\n"); } } } else { // Step 1: World Transform / Skinning if (!enableBones) { if (doBezier || doSpline) { WRITE(p, " vec3 _pos[16];\n"); WRITE(p, " vec2 _tex[16];\n"); WRITE(p, " vec4 _col[16];\n"); WRITE(p, " int num_patches_u = %s;\n", doBezier ? "(base.spline_count_u - 1) / 3" : "base.spline_count_u - 3"); WRITE(p, " int u = int(mod(gl_InstanceIndex, num_patches_u));\n"); WRITE(p, " int v = gl_InstanceIndex / num_patches_u;\n"); WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n"); WRITE(p, " for (int i = 0; i < 4; i++) {\n"); WRITE(p, " for (int j = 0; j < 4; j++) {\n"); WRITE(p, " int idx = (i + v%s) * base.spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : ""); WRITE(p, " ivec2 index = ivec2(idx, 0);\n"); WRITE(p, " _pos[i * 4 + j] = texelFetch(u_tess_pos_tex, index, 0).xyz;\n"); if (doTexture && hasTexcoord && hasTexcoordTess) WRITE(p, " _tex[i * 4 + j] = texelFetch(u_tess_tex_tex, index, 0).xy;\n"); if (hasColor && hasColorTess) WRITE(p, " _col[i * 4 + j] = texelFetch(u_tess_col_tex, index, 0).rgba;\n"); WRITE(p, " }\n"); WRITE(p, " }\n"); WRITE(p, " vec2 tess_pos = position.xy;\n"); WRITE(p, " vec2 weights[4];\n"); if (doBezier) { // Bernstein 3D WRITE(p, " weights[0] = (1 - tess_pos) * (1 - tess_pos) * (1 - tess_pos);\n"); WRITE(p, " weights[1] = 3 * tess_pos * (1 - tess_pos) * (1 - tess_pos);\n"); WRITE(p, " weights[2] = 3 * tess_pos * tess_pos * (1 - tess_pos);\n"); WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n"); } else { // Spline WRITE(p, " ivec2 spline_num_patches = ivec2(base.spline_count_u - 3, base.spline_count_v - 3);\n"); WRITE(p, " ivec2 spline_type = ivec2(base.spline_type_u, base.spline_type_v);\n"); WRITE(p, " vec2 knots[6];\n"); WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n"); WRITE(p, " spline_weight(tess_pos + patch_pos, knots, weights);\n"); } WRITE(p, " vec3 pos = tess_sample(_pos, weights);\n"); if (doTexture && hasTexcoord) { if (hasTexcoordTess) WRITE(p, " vec2 tex = tess_sample(_tex, weights);\n"); else WRITE(p, " vec2 tex = tess_pos + patch_pos;\n"); } if (hasColor) { if (hasColorTess) WRITE(p, " vec4 col = tess_sample(_col, weights);\n"); else WRITE(p, " vec4 col = texelFetch(u_tess_col_tex, ivec2(0, 0), 0).rgba;\n"); } if (hasNormal) { // Curved surface is probably always need to compute normal(not sampling from control points) if (doBezier) { // Bernstein derivative WRITE(p, " vec2 bernderiv[4];\n"); WRITE(p, " bernderiv[0] = -3 * (tess_pos - 1) * (tess_pos - 1); \n"); WRITE(p, " bernderiv[1] = 9 * tess_pos * tess_pos - 12 * tess_pos + 3; \n"); WRITE(p, " bernderiv[2] = 3 * (2 - 3 * tess_pos) * tess_pos; \n"); WRITE(p, " bernderiv[3] = 3 * tess_pos * tess_pos; \n"); WRITE(p, " vec2 bernderiv_u[4];\n"); WRITE(p, " vec2 bernderiv_v[4];\n"); WRITE(p, " for (int i = 0; i < 4; i++) {\n"); WRITE(p, " bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n"); WRITE(p, " bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n"); WRITE(p, " }\n"); WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n"); WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n"); } else { // Spline WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0);\n"); WRITE(p, " vec2 tess_next_v = vec2(0, normal.y);\n"); // Right WRITE(p, " vec2 tess_pos_r = tess_pos + tess_next_u;\n"); WRITE(p, " spline_weight(tess_pos_r + patch_pos, knots, weights);\n"); WRITE(p, " vec3 pos_r = tess_sample(_pos, weights);\n"); // Left WRITE(p, " vec2 tess_pos_l = tess_pos - tess_next_u;\n"); WRITE(p, " spline_weight(tess_pos_l + patch_pos, knots, weights);\n"); WRITE(p, " vec3 pos_l = tess_sample(_pos, weights);\n"); // Down WRITE(p, " vec2 tess_pos_d = tess_pos + tess_next_v;\n"); WRITE(p, " spline_weight(tess_pos_d + patch_pos, knots, weights);\n"); WRITE(p, " vec3 pos_d = tess_sample(_pos, weights);\n"); // Up WRITE(p, " vec2 tess_pos_u = tess_pos - tess_next_v;\n"); WRITE(p, " spline_weight(tess_pos_u + patch_pos, knots, weights);\n"); WRITE(p, " vec3 pos_u = tess_sample(_pos, weights);\n"); WRITE(p, " vec3 du = pos_r - pos_l;\n"); WRITE(p, " vec3 dv = pos_d - pos_u;\n"); } WRITE(p, " vec3 nrm = cross(du, dv);\n"); WRITE(p, " nrm = normalize(nrm);\n"); } WRITE(p, " vec3 worldpos = vec4(pos.xyz, 1.0) * base.world_mtx;\n"); if (hasNormal) { WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snrm, 0.0) * base.world_mtx);\n", flipNormalTess ? "-" : ""); } else { WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } } else { // No skinning, just standard T&L. WRITE(p, " vec3 worldpos = vec4(position.xyz, 1.0) * base.world_mtx;\n"); if (hasNormal) WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snormal, 0.0) * base.world_mtx);\n", flipNormal ? "-" : ""); else WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } } else { static const char *rescale[4] = { "", " * 1.9921875", " * 1.999969482421875", "" }; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; const char *factor = rescale[boneWeightScale]; static const char * const boneWeightAttr[8] = { "w1.x", "w1.y", "w1.z", "w1.w", "w2.x", "w2.y", "w2.z", "w2.w", }; WRITE(p, " mat3x4 skinMatrix = w1.x * bone.m[0];\n"); if (numBoneWeights > 1) { for (int i = 1; i < numBoneWeights; i++) { WRITE(p, " skinMatrix += %s * bone.m[%i];\n", boneWeightAttr[i], i); } } WRITE(p, ";\n"); // Trying to simplify this results in bugs in LBP... WRITE(p, " vec3 skinnedpos = (vec4(position, 1.0) * skinMatrix) %s;\n", factor); WRITE(p, " vec3 worldpos = vec4(skinnedpos, 1.0) * base.world_mtx;\n"); if (hasNormal) { WRITE(p, " mediump vec3 skinnednormal = vec4(%snormal, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor); } else { WRITE(p, " mediump vec3 skinnednormal = vec4(0.0, 0.0, %s1.0, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor); } WRITE(p, " mediump vec3 worldnormal = normalize(vec4(skinnednormal, 0.0) * base.world_mtx);\n"); } WRITE(p, " vec4 viewPos = vec4(vec4(worldpos, 1.0) * base.view_mtx, 1.0);\n"); // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * viewPos);\n"); } else { WRITE(p, " gl_Position = base.proj_mtx * viewPos;\n"); } // TODO: Declare variables for dots for shade mapping if needed. const char *ambientStr = ((matUpdate & 1) && hasColor) ? "color0" : "base.matambientalpha"; const char *diffuseStr = ((matUpdate & 2) && hasColor) ? "color0.rgb" : "light.matdiffuse"; const char *specularStr = ((matUpdate & 4) && hasColor) ? "color0.rgb" : "light.matspecular.rgb"; if (doBezier || doSpline) { ambientStr = (matUpdate & 1) && hasColor ? "col" : "base.matambientalpha"; diffuseStr = (matUpdate & 2) && hasColor ? "col.rgb" : "light.matdiffuse"; specularStr = (matUpdate & 4) && hasColor ? "col.rgb" : "light.matspecular.rgb"; } bool diffuseIsZero = true; bool specularIsZero = true; bool distanceNeeded = false; if (enableLighting) { WRITE(p, " vec4 lightSum0 = light.u_ambient * %s + vec4(light.matemissive, 0.0);\n", ambientStr); for (int i = 0; i < 4; i++) { GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2)); GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2)); if (doLight[i] != LIGHT_FULL) continue; diffuseIsZero = false; if (comp != GE_LIGHTCOMP_ONLYDIFFUSE) specularIsZero = false; if (type != GE_LIGHTTYPE_DIRECTIONAL) distanceNeeded = true; } if (!specularIsZero) { WRITE(p, " vec3 lightSum1 = vec3(0.0);\n"); } if (!diffuseIsZero) { WRITE(p, " vec3 toLight;\n"); WRITE(p, " vec3 diffuse;\n"); } if (distanceNeeded) { WRITE(p, " float distance;\n"); WRITE(p, " float lightScale;\n"); } } // Calculate lights if needed. If shade mapping is enabled, lights may need to be // at least partially calculated. for (int i = 0; i < 4; i++) { if (doLight[i] != LIGHT_FULL) continue; GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2)); GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2)); if (type == GE_LIGHTTYPE_DIRECTIONAL) { // We prenormalize light positions for directional lights. WRITE(p, " toLight = light.pos[%i];\n", i); } else { WRITE(p, " toLight = light.pos[%i] - worldpos;\n", i); WRITE(p, " distance = length(toLight);\n"); WRITE(p, " toLight /= distance;\n"); } bool doSpecular = comp != GE_LIGHTCOMP_ONLYDIFFUSE; bool poweredDiffuse = comp == GE_LIGHTCOMP_BOTHWITHPOWDIFFUSE; WRITE(p, " mediump float dot%i = max(dot(toLight, worldnormal), 0.0);\n", i); if (poweredDiffuse) { // pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0. // Seen in Tales of the World: Radiant Mythology (#2424.) WRITE(p, " if (dot%i == 0.0 && light.matspecular.a == 0.0) {\n", i); WRITE(p, " dot%i = 1.0;\n", i); WRITE(p, " } else {\n"); WRITE(p, " dot%i = pow(dot%i, light.matspecular.a);\n", i, i); WRITE(p, " }\n"); } const char *timesLightScale = " * lightScale"; // Attenuation switch (type) { case GE_LIGHTTYPE_DIRECTIONAL: timesLightScale = ""; break; case GE_LIGHTTYPE_POINT: WRITE(p, " lightScale = clamp(1.0 / dot(light.att[%i], vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i); break; case GE_LIGHTTYPE_SPOT: case GE_LIGHTTYPE_UNKNOWN: WRITE(p, " float angle%i = dot(normalize(light.dir[%i]), toLight);\n", i, i); WRITE(p, " if (angle%i >= light.angle[%i]) {\n", i, i); WRITE(p, " lightScale = clamp(1.0 / dot(light.att[%i], vec3(1.0, distance, distance*distance)), 0.0, 1.0) * pow(angle%i, light.spotCoef[%i]);\n", i, i, i); WRITE(p, " } else {\n"); WRITE(p, " lightScale = 0.0;\n"); WRITE(p, " }\n"); break; default: // ILLEGAL break; } WRITE(p, " diffuse = (light.diffuse[%i] * %s) * dot%i;\n", i, diffuseStr, i); if (doSpecular) { WRITE(p, " dot%i = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n", i); WRITE(p, " if (dot%i > 0.0)\n", i); WRITE(p, " lightSum1 += light.specular[%i] * %s * (pow(dot%i, light.matspecular.a) %s);\n", i, specularStr, i, timesLightScale); } WRITE(p, " lightSum0.rgb += (light.ambient[%i] * %s.rgb + diffuse)%s;\n", i, ambientStr, timesLightScale); } if (enableLighting) { // Sum up ambient, emissive here. if (lmode) { WRITE(p, " v_color0 = clamp(lightSum0, 0.0, 1.0);\n"); // v_color1 only exists when lmode = 1. if (specularIsZero) { WRITE(p, " v_color1 = vec3(0.0);\n"); } else { WRITE(p, " v_color1 = clamp(lightSum1, 0.0, 1.0);\n"); } } else { if (specularIsZero) { WRITE(p, " v_color0 = clamp(lightSum0, 0.0, 1.0);\n"); } else { WRITE(p, " v_color0 = clamp(clamp(lightSum0, 0.0, 1.0) + vec4(lightSum1, 0.0), 0.0, 1.0);\n"); } } } else { // Lighting doesn't affect color. if (hasColor) { if (doBezier || doSpline) WRITE(p, " v_color0 = col;\n"); else WRITE(p, " v_color0 = color0;\n"); } else { WRITE(p, " v_color0 = base.matambientalpha;\n"); } if (lmode) { WRITE(p, " v_color1 = vec3(0.0);\n"); } } bool scaleUV = !throughmode && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN); // Step 3: UV generation if (doTexture) { switch (uvGenMode) { case GE_TEXMAP_TEXTURE_COORDS: // Scale-offset. Easy. case GE_TEXMAP_UNKNOWN: // Not sure what this is, but Riviera uses it. Treating as coords works. if (scaleUV) { if (hasTexcoord) { if (doBezier || doSpline) WRITE(p, " v_texcoord = vec3(tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n"); else WRITE(p, " v_texcoord = vec3(texcoord.xy, 0.0);\n"); } else { WRITE(p, " v_texcoord = vec3(0.0);\n"); } } else { if (hasTexcoord) { if (doBezier || doSpline) WRITE(p, " v_texcoord = vec3(tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n"); else WRITE(p, " v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n"); } else { WRITE(p, " v_texcoord = vec3(base.uvscaleoffset.zw, 0.0);\n"); } } break; case GE_TEXMAP_TEXTURE_MATRIX: // Projection mapping. { std::string temp_tc; switch (uvProjMode) { case GE_PROJMAP_POSITION: // Use model space XYZ as source temp_tc = "vec4(position.xyz, 1.0)"; break; case GE_PROJMAP_UV: // Use unscaled UV as source { // scaleUV is false here. if (hasTexcoord) { temp_tc = "vec4(texcoord.xy, 0.0, 1.0)"; } else { temp_tc = "vec4(0.0, 0.0, 0.0, 1.0)"; } } break; case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized transformed normal as source if (hasNormal) temp_tc = flipNormal ? "vec4(normalize(-normal), 1.0)" : "vec4(normalize(normal), 1.0)"; else temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)"; break; case GE_PROJMAP_NORMAL: // Use non-normalized transformed normal as source if (hasNormal) temp_tc = flipNormal ? "vec4(-normal, 1.0)" : "vec4(normal, 1.0)"; else temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)"; break; } // Transform by texture matrix. XYZ as we are doing projection mapping. WRITE(p, " v_texcoord = (%s * base.tex_mtx).xyz * vec3(base.uvscaleoffset.xy, 1.0);\n", temp_tc.c_str()); } break; case GE_TEXMAP_ENVIRONMENT_MAP: // Shade mapping - use dots from light sources. WRITE(p, " v_texcoord = vec3(base.uvscaleoffset.xy * vec2(1.0 + dot(normalize(light.pos[%i]), worldnormal), 1.0 + dot(normalize(light.pos[%i]), worldnormal)) * 0.5, 1.0);\n", ls0, ls1); break; default: // ILLEGAL break; } } // Compute fogdepth if (enableFog) WRITE(p, " v_fogdepth = (viewPos.z + base.fogcoef_stencilreplace.x) * base.fogcoef_stencilreplace.y;\n"); } WRITE(p, "}\n"); return true; }
bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *usesLighting) { char *p = buffer; WRITE(p, "%s", vulkan_glsl_preamble); bool highpFog = false; bool highpTexcoord = false; bool isModeThrough = id.Bit(VS_BIT_IS_THROUGH); bool lmode = id.Bit(VS_BIT_LMODE) && !isModeThrough; // TODO: Different expression than in shaderIDgen bool doTexture = id.Bit(VS_BIT_DO_TEXTURE); bool doTextureTransform = id.Bit(VS_BIT_DO_TEXTURE_TRANSFORM); GETexMapMode uvGenMode = static_cast<GETexMapMode>(id.Bits(VS_BIT_UVGEN_MODE, 2)); // this is only valid for some settings of uvGenMode GETexProjMapMode uvProjMode = static_cast<GETexProjMapMode>(id.Bits(VS_BIT_UVPROJ_MODE, 2)); bool doShadeMapping = uvGenMode == GE_TEXMAP_ENVIRONMENT_MAP; bool doFlatShading = id.Bit(VS_BIT_FLATSHADE); bool useHWTransform = id.Bit(VS_BIT_USE_HW_TRANSFORM); bool hasColor = id.Bit(VS_BIT_HAS_COLOR) || !useHWTransform; bool hasNormal = id.Bit(VS_BIT_HAS_NORMAL) && useHWTransform; bool hasTexcoord = id.Bit(VS_BIT_HAS_TEXCOORD) || !useHWTransform; bool enableFog = id.Bit(VS_BIT_ENABLE_FOG); bool throughmode = id.Bit(VS_BIT_IS_THROUGH); bool flipNormal = id.Bit(VS_BIT_NORM_REVERSE); int ls0 = id.Bits(VS_BIT_LS0, 2); int ls1 = id.Bits(VS_BIT_LS1, 2); bool enableBones = id.Bit(VS_BIT_ENABLE_BONES); bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE); int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3); // The uniforms are passed in as three "clumps" that may or may not be present. // We will memcpy the parts into place in a big buffer so we can be quite dynamic about what parts // are present and what parts aren't, but we will not be ultra detailed about it. *usesLighting = enableLighting || doShadeMapping; WRITE(p, "\n"); WRITE(p, "layout (std140, set = 0, binding = 2) uniform baseVars {\n%s} base;\n", ub_baseStr); if (enableLighting || doShadeMapping) WRITE(p, "layout (std140, set = 0, binding = 3) uniform lightVars {\n%s} light;\n", ub_vs_lightsStr); if (enableBones) WRITE(p, "layout (std140, set = 0, binding = 4) uniform boneVars {\n%s} bone;\n", ub_vs_bonesStr); const char *shading = doFlatShading ? "flat " : ""; DoLightComputation doLight[4] = { LIGHT_OFF, LIGHT_OFF, LIGHT_OFF, LIGHT_OFF }; if (useHWTransform) { int shadeLight0 = doShadeMapping ? ls0 : -1; int shadeLight1 = doShadeMapping ? ls1 : -1; for (int i = 0; i < 4; i++) { if (i == shadeLight0 || i == shadeLight1) doLight[i] = LIGHT_SHADE; if (id.Bit(VS_BIT_LIGHTING_ENABLE) && id.Bit(VS_BIT_LIGHT0_ENABLE + i)) doLight[i] = LIGHT_FULL; } } int numBoneWeights = 0; int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2); if (enableBones) { numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3); WRITE(p, "%s", boneWeightDecl[numBoneWeights]); } if (useHWTransform) WRITE(p, "layout (location = %d) in vec3 position;\n", PspAttributeLocation::POSITION); else // we pass the fog coord in w WRITE(p, "layout (location = %d) in vec4 position;\n", PspAttributeLocation::POSITION); if (useHWTransform && hasNormal) WRITE(p, "layout (location = %d) in vec3 normal;\n", PspAttributeLocation::NORMAL); bool texcoordInVec3 = false; if (doTexture && hasTexcoord) { if (!useHWTransform && doTextureTransform && !throughmode) { WRITE(p, "layout (location = %d) in vec3 texcoord;\n", PspAttributeLocation::TEXCOORD); texcoordInVec3 = true; } else WRITE(p, "layout (location = %d) in vec2 texcoord;\n", PspAttributeLocation::TEXCOORD); } if (hasColor) { WRITE(p, "layout (location = %d) in vec4 color0;\n", PspAttributeLocation::COLOR0); if (lmode && !useHWTransform) // only software transform supplies color1 as vertex data WRITE(p, "layout (location = %d) in vec3 color1;\n", PspAttributeLocation::COLOR1); } WRITE(p, "layout (location = 1) %sout vec4 v_color0;\n", shading); if (lmode) { WRITE(p, "layout (location = 2) %sout vec3 v_color1;\n", shading); } if (doTexture) { WRITE(p, "layout (location = 0) out vec3 v_texcoord;\n"); } if (enableFog) { // See the fragment shader generator WRITE(p, "layout (location = 3) out float v_fogdepth;\n"); } // See comment above this function (GenerateVertexShader). if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { // Apply the projection and viewport to get the Z buffer value, floor to integer, undo the viewport and projection. WRITE(p, "\nvec4 depthRoundZVP(vec4 v) {\n"); WRITE(p, " float z = v.z / v.w;\n"); WRITE(p, " z = z * base.depthRange.x + base.depthRange.y;\n"); WRITE(p, " z = floor(z);\n"); WRITE(p, " z = (z - base.depthRange.z) * base.depthRange.w;\n"); WRITE(p, " return vec4(v.x, v.y, z * v.w, v.w);\n"); WRITE(p, "}\n\n"); } WRITE(p, "out gl_PerVertex { vec4 gl_Position; };\n"); WRITE(p, "void main() {\n"); if (!useHWTransform) { // Simple pass-through of vertex data to fragment shader if (doTexture) { if (texcoordInVec3) { WRITE(p, " v_texcoord = texcoord;\n"); } else { WRITE(p, " v_texcoord = vec3(texcoord, 1.0);\n"); } } if (hasColor) { WRITE(p, " v_color0 = color0;\n"); if (lmode) WRITE(p, " v_color1 = color1;\n"); } else { WRITE(p, " v_color0 = base.matambientalpha;\n"); if (lmode) WRITE(p, " v_color1 = vec3(0.0);\n"); } if (enableFog) { WRITE(p, " v_fogdepth = position.w;\n"); } if (isModeThrough) { WRITE(p, " gl_Position = base.proj_through_mtx * vec4(position.xyz, 1.0);\n"); } else { // The viewport is used in this case, so need to compensate for that. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n"); } else { WRITE(p, " gl_Position = base.proj_mtx * vec4(position.xyz, 1.0);\n"); } } } else { // Step 1: World Transform / Skinning if (!enableBones) { // No skinning, just standard T&L. WRITE(p, " vec3 worldpos = vec4(position.xyz, 1.0) * base.world_mtx;\n"); if (hasNormal) WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snormal, 0.0) * base.world_mtx);\n", flipNormal ? "-" : ""); else WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } else { static const char *rescale[4] = { "", " * 1.9921875", " * 1.999969482421875", "" }; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; const char *factor = rescale[boneWeightScale]; static const char * const boneWeightAttr[8] = { "w1.x", "w1.y", "w1.z", "w1.w", "w2.x", "w2.y", "w2.z", "w2.w", }; WRITE(p, " mat3x4 skinMatrix = w1.x * bone.m[0];\n"); if (numBoneWeights > 1) { for (int i = 1; i < numBoneWeights; i++) { WRITE(p, " skinMatrix += %s * bone.m[%i];\n", boneWeightAttr[i], i); } } WRITE(p, ";\n"); // Trying to simplify this results in bugs in LBP... WRITE(p, " vec3 skinnedpos = (vec4(position, 1.0) * skinMatrix) %s;\n", factor); WRITE(p, " vec3 worldpos = vec4(skinnedpos, 1.0) * base.world_mtx;\n"); if (hasNormal) { WRITE(p, " mediump vec3 skinnednormal = vec4(%snormal, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor); } else { WRITE(p, " mediump vec3 skinnednormal = vec4(0.0, 0.0, %s1.0, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor); } WRITE(p, " mediump vec3 worldnormal = normalize(vec4(skinnednormal, 0.0) * base.world_mtx);\n"); } WRITE(p, " vec4 viewPos = vec4(vec4(worldpos, 1.0) * base.view_mtx, 1.0);\n"); // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * viewPos);\n"); } else { WRITE(p, " gl_Position = base.proj_mtx * viewPos;\n"); } // TODO: Declare variables for dots for shade mapping if needed. const char *ambientStr = ((matUpdate & 1) && hasColor) ? "color0" : "base.matambientalpha"; const char *diffuseStr = ((matUpdate & 2) && hasColor) ? "color0.rgb" : "light.matdiffuse"; const char *specularStr = ((matUpdate & 4) && hasColor) ? "color0.rgb" : "light.matspecular.rgb"; bool diffuseIsZero = true; bool specularIsZero = true; bool distanceNeeded = false; if (enableLighting) { WRITE(p, " vec4 lightSum0 = light.u_ambient * %s + vec4(light.matemissive, 0.0);\n", ambientStr); for (int i = 0; i < 4; i++) { GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2)); GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2)); if (doLight[i] != LIGHT_FULL) continue; diffuseIsZero = false; if (comp != GE_LIGHTCOMP_ONLYDIFFUSE) specularIsZero = false; if (type != GE_LIGHTTYPE_DIRECTIONAL) distanceNeeded = true; } if (!specularIsZero) { WRITE(p, " vec3 lightSum1 = vec3(0.0);\n"); } if (!diffuseIsZero) { WRITE(p, " vec3 toLight;\n"); WRITE(p, " vec3 diffuse;\n"); } if (distanceNeeded) { WRITE(p, " float distance;\n"); WRITE(p, " float lightScale;\n"); } } // Calculate lights if needed. If shade mapping is enabled, lights may need to be // at least partially calculated. for (int i = 0; i < 4; i++) { if (doLight[i] != LIGHT_FULL) continue; GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2)); GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2)); if (type == GE_LIGHTTYPE_DIRECTIONAL) { // We prenormalize light positions for directional lights. WRITE(p, " toLight = light.pos[%i];\n", i); } else { WRITE(p, " toLight = light.pos[%i] - worldpos;\n", i); WRITE(p, " distance = length(toLight);\n"); WRITE(p, " toLight /= distance;\n"); } bool doSpecular = comp != GE_LIGHTCOMP_ONLYDIFFUSE; bool poweredDiffuse = comp == GE_LIGHTCOMP_BOTHWITHPOWDIFFUSE; WRITE(p, " mediump float dot%i = max(dot(toLight, worldnormal), 0.0);\n", i); if (poweredDiffuse) { // pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0. // Seen in Tales of the World: Radiant Mythology (#2424.) WRITE(p, " if (dot%i == 0.0 && light.matspecular.a == 0.0) {\n", i); WRITE(p, " dot%i = 1.0;\n", i); WRITE(p, " } else {\n"); WRITE(p, " dot%i = pow(dot%i, light.matspecular.a);\n", i, i); WRITE(p, " }\n"); } const char *timesLightScale = " * lightScale"; // Attenuation switch (type) { case GE_LIGHTTYPE_DIRECTIONAL: timesLightScale = ""; break; case GE_LIGHTTYPE_POINT: WRITE(p, " lightScale = clamp(1.0 / dot(light.att[%i], vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i); break; case GE_LIGHTTYPE_SPOT: case GE_LIGHTTYPE_UNKNOWN: WRITE(p, " float angle%i = dot(normalize(light.dir[%i]), toLight);\n", i, i); WRITE(p, " if (angle%i >= light.angle[%i]) {\n", i, i); WRITE(p, " lightScale = clamp(1.0 / dot(light.att[%i], vec3(1.0, distance, distance*distance)), 0.0, 1.0) * pow(angle%i, light.spotCoef[%i]);\n", i, i, i); WRITE(p, " } else {\n"); WRITE(p, " lightScale = 0.0;\n"); WRITE(p, " }\n"); break; default: // ILLEGAL break; } WRITE(p, " diffuse = (light.diffuse[%i] * %s) * dot%i;\n", i, diffuseStr, i); if (doSpecular) { WRITE(p, " dot%i = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n", i); WRITE(p, " if (dot%i > 0.0)\n", i); WRITE(p, " lightSum1 += light.specular[%i] * %s * (pow(dot%i, light.matspecular.a) %s);\n", i, specularStr, i, timesLightScale); } WRITE(p, " lightSum0.rgb += (light.ambient[%i] * %s.rgb + diffuse)%s;\n", i, ambientStr, timesLightScale); } if (enableLighting) { // Sum up ambient, emissive here. if (lmode) { WRITE(p, " v_color0 = clamp(lightSum0, 0.0, 1.0);\n"); // v_color1 only exists when lmode = 1. if (specularIsZero) { WRITE(p, " v_color1 = vec3(0.0);\n"); } else { WRITE(p, " v_color1 = clamp(lightSum1, 0.0, 1.0);\n"); } } else { if (specularIsZero) { WRITE(p, " v_color0 = clamp(lightSum0, 0.0, 1.0);\n"); } else { WRITE(p, " v_color0 = clamp(clamp(lightSum0, 0.0, 1.0) + vec4(lightSum1, 0.0), 0.0, 1.0);\n"); } } } else { // Lighting doesn't affect color. if (hasColor) { WRITE(p, " v_color0 = color0;\n"); } else { WRITE(p, " v_color0 = base.matambientalpha;\n"); } if (lmode) { WRITE(p, " v_color1 = vec3(0.0);\n"); } } bool scaleUV = !throughmode && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN); // Step 3: UV generation if (doTexture) { switch (uvGenMode) { case GE_TEXMAP_TEXTURE_COORDS: // Scale-offset. Easy. case GE_TEXMAP_UNKNOWN: // Not sure what this is, but Riviera uses it. Treating as coords works. if (scaleUV) { if (hasTexcoord) { WRITE(p, " v_texcoord = vec3(texcoord.xy, 0.0);\n"); } else { WRITE(p, " v_texcoord = vec3(0.0);\n"); } } else { if (hasTexcoord) { WRITE(p, " v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n"); } else { WRITE(p, " v_texcoord = vec3(base.uvscaleoffset.zw, 0.0);\n"); } } break; case GE_TEXMAP_TEXTURE_MATRIX: // Projection mapping. { std::string temp_tc; switch (uvProjMode) { case GE_PROJMAP_POSITION: // Use model space XYZ as source temp_tc = "vec4(position.xyz, 1.0)"; break; case GE_PROJMAP_UV: // Use unscaled UV as source { // scaleUV is false here. if (hasTexcoord) { temp_tc = "vec4(texcoord.xy, 0.0, 1.0)"; } else { temp_tc = "vec4(0.0, 0.0, 0.0, 1.0)"; } } break; case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized transformed normal as source if (hasNormal) temp_tc = flipNormal ? "vec4(normalize(-normal), 1.0)" : "vec4(normalize(normal), 1.0)"; else temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)"; break; case GE_PROJMAP_NORMAL: // Use non-normalized transformed normal as source if (hasNormal) temp_tc = flipNormal ? "vec4(-normal, 1.0)" : "vec4(normal, 1.0)"; else temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)"; break; } // Transform by texture matrix. XYZ as we are doing projection mapping. WRITE(p, " v_texcoord = (%s * base.tex_mtx).xyz * vec3(base.uvscaleoffset.xy, 1.0);\n", temp_tc.c_str()); } break; case GE_TEXMAP_ENVIRONMENT_MAP: // Shade mapping - use dots from light sources. WRITE(p, " v_texcoord = vec3(base.uvscaleoffset.xy * vec2(1.0 + dot(normalize(light.pos[%i]), worldnormal), 1.0 + dot(normalize(light.pos[%i]), worldnormal)) * 0.5, 1.0);\n", ls0, ls1); break; default: // ILLEGAL break; } } // Compute fogdepth if (enableFog) WRITE(p, " v_fogdepth = (viewPos.z + base.fogcoef_stencilreplace.x) * base.fogcoef_stencilreplace.y;\n"); } WRITE(p, "}\n"); return true; }
// Missing: Z depth range bool GenerateFragmentShader(const ShaderID &id, char *buffer) { char *p = buffer; // In GLSL ES 3.0, you use "in" variables instead of varying. bool glslES30 = false; const char *varying = "varying"; const char *fragColor0 = "gl_FragColor"; const char *fragColor1 = "fragColor1"; const char *texture = "texture2D"; const char *texelFetch = NULL; bool highpFog = false; bool highpTexcoord = false; bool bitwiseOps = false; const char *lastFragData = nullptr; if (gl_extensions.IsGLES) { // ES doesn't support dual source alpha :( if (gstate_c.featureFlags & GPU_SUPPORTS_GLSL_ES_300) { WRITE(p, "#version 300 es\n"); // GLSL ES 3.0 fragColor0 = "fragColor0"; texture = "texture"; glslES30 = true; bitwiseOps = true; texelFetch = "texelFetch"; } else { WRITE(p, "#version 100\n"); // GLSL ES 1.0 if (gl_extensions.EXT_gpu_shader4) { WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n"); bitwiseOps = true; texelFetch = "texelFetch2D"; } if (gl_extensions.EXT_blend_func_extended) { // Oldy moldy GLES, so use the fixed output name. fragColor1 = "gl_SecondaryFragColorEXT"; } } // PowerVR needs highp to do the fog in MHU correctly. // Others don't, and some can't handle highp in the fragment shader. highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false; highpTexcoord = highpFog; if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) { if (gl_extensions.GLES3 && gl_extensions.EXT_shader_framebuffer_fetch) { WRITE(p, "#extension GL_EXT_shader_framebuffer_fetch : require\n"); lastFragData = "fragColor0"; } else if (gl_extensions.EXT_shader_framebuffer_fetch) { WRITE(p, "#extension GL_EXT_shader_framebuffer_fetch : require\n"); lastFragData = "gl_LastFragData[0]"; } else if (gl_extensions.NV_shader_framebuffer_fetch) { // GL_NV_shader_framebuffer_fetch is available on mobile platform and ES 2.0 only but not on desktop. WRITE(p, "#extension GL_NV_shader_framebuffer_fetch : require\n"); lastFragData = "gl_LastFragData[0]"; } else if (gl_extensions.ARM_shader_framebuffer_fetch) { WRITE(p, "#extension GL_ARM_shader_framebuffer_fetch : require\n"); lastFragData = "gl_LastFragColorARM"; } } WRITE(p, "precision lowp float;\n"); } else { // TODO: Handle this in VersionGEThan? #if !defined(FORCE_OPENGL_2_0) if (gl_extensions.VersionGEThan(3, 3, 0)) { fragColor0 = "fragColor0"; texture = "texture"; glslES30 = true; bitwiseOps = true; texelFetch = "texelFetch"; WRITE(p, "#version 330\n"); } else if (gl_extensions.VersionGEThan(3, 0, 0)) { fragColor0 = "fragColor0"; bitwiseOps = true; texelFetch = "texelFetch"; WRITE(p, "#version 130\n"); if (gl_extensions.EXT_gpu_shader4) { WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n"); } } else { WRITE(p, "#version 110\n"); if (gl_extensions.EXT_gpu_shader4) { WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n"); bitwiseOps = true; texelFetch = "texelFetch2D"; } } #endif // We remove these everywhere - GL4, GL3, Mac-forced-GL2, etc. WRITE(p, "#define lowp\n"); WRITE(p, "#define mediump\n"); WRITE(p, "#define highp\n"); } if (glslES30) { varying = "in"; } bool lmode = id.Bit(FS_BIT_LMODE); bool doTexture = id.Bit(FS_BIT_DO_TEXTURE); bool enableFog = id.Bit(FS_BIT_ENABLE_FOG); bool enableAlphaTest = id.Bit(FS_BIT_ALPHA_TEST); bool alphaTestAgainstZero = id.Bit(FS_BIT_ALPHA_AGAINST_ZERO); bool enableColorTest = id.Bit(FS_BIT_COLOR_TEST); bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO); bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE); bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ); bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA); bool doFlatShading = id.Bit(FS_BIT_FLATSHADE); GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3); GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2); bool needShaderTexClamp = id.Bit(FS_BIT_SHADER_TEX_CLAMP); GETexFunc texFunc = (GETexFunc)id.Bits(FS_BIT_TEXFUNC, 3); bool textureAtOffset = id.Bit(FS_BIT_TEXTURE_AT_OFFSET); ReplaceBlendType replaceBlend = static_cast<ReplaceBlendType>(id.Bits(FS_BIT_REPLACE_BLEND, 3)); ReplaceAlphaType stencilToAlpha = static_cast<ReplaceAlphaType>(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2)); GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4); GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4); GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3); bool isModeClear = id.Bit(FS_BIT_CLEARMODE); const char *shading = ""; if (glslES30) shading = doFlatShading ? "flat" : ""; if (doTexture) WRITE(p, "uniform sampler2D tex;\n"); if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) { if (!gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) && replaceBlend == REPLACE_BLEND_COPY_FBO) { if (!texelFetch) { WRITE(p, "uniform vec2 u_fbotexSize;\n"); } WRITE(p, "uniform sampler2D fbotex;\n"); } if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) { WRITE(p, "uniform vec3 u_blendFixA;\n"); } if (replaceBlendFuncB >= GE_DSTBLEND_FIXB) { WRITE(p, "uniform vec3 u_blendFixB;\n"); } } if (needShaderTexClamp && doTexture) { WRITE(p, "uniform vec4 u_texclamp;\n"); if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) { WRITE(p, "uniform vec2 u_texclampoff;\n"); } } if (enableAlphaTest || enableColorTest) { if (g_Config.bFragmentTestCache) { WRITE(p, "uniform sampler2D testtex;\n"); } else { WRITE(p, "uniform vec4 u_alphacolorref;\n"); if (bitwiseOps && ((enableColorTest && !colorTestAgainstZero) || (enableAlphaTest && !alphaTestAgainstZero))) { WRITE(p, "uniform ivec4 u_alphacolormask;\n"); } } } StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4); if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) { WRITE(p, "uniform float u_stencilReplaceValue;\n"); } if (doTexture && texFunc == GE_TEXFUNC_BLEND) WRITE(p, "uniform vec3 u_texenv;\n"); WRITE(p, "%s %s vec4 v_color0;\n", shading, varying); if (lmode) WRITE(p, "%s %s vec3 v_color1;\n", shading, varying); if (enableFog) { WRITE(p, "uniform vec3 u_fogcolor;\n"); WRITE(p, "%s %s float v_fogdepth;\n", varying, highpFog ? "highp" : "mediump"); } if (doTexture) { if (doTextureProjection) WRITE(p, "%s %s vec3 v_texcoord;\n", varying, highpTexcoord ? "highp" : "mediump"); else WRITE(p, "%s %s vec2 v_texcoord;\n", varying, highpTexcoord ? "highp" : "mediump"); } if (!g_Config.bFragmentTestCache) { if (enableAlphaTest && !alphaTestAgainstZero) { if (bitwiseOps) { WRITE(p, "int roundAndScaleTo255i(in float x) { return int(floor(x * 255.0 + 0.5)); }\n"); } else if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR) { WRITE(p, "float roundTo255thf(in mediump float x) { mediump float y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n"); } else { WRITE(p, "float roundAndScaleTo255f(in float x) { return floor(x * 255.0 + 0.5); }\n"); } } if (enableColorTest && !colorTestAgainstZero) { if (bitwiseOps) { WRITE(p, "ivec3 roundAndScaleTo255iv(in vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n"); } else if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR) { WRITE(p, "vec3 roundTo255thv(in vec3 x) { vec3 y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n"); } else { WRITE(p, "vec3 roundAndScaleTo255v(in vec3 x) { return floor(x * 255.0 + 0.5); }\n"); } } } if (!strcmp(fragColor0, "fragColor0")) { const char *qualifierColor0 = "out"; if (lastFragData && !strcmp(lastFragData, fragColor0)) { qualifierColor0 = "inout"; } // Output the output color definitions. if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) { WRITE(p, "%s vec4 fragColor0;\n", qualifierColor0); WRITE(p, "out vec4 fragColor1;\n"); } else { WRITE(p, "%s vec4 fragColor0;\n", qualifierColor0); } } // PowerVR needs a custom modulo function. For some reason, this has far higher precision than the builtin one. if ((gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) && needShaderTexClamp) { WRITE(p, "float mymod(float a, float b) { return a - b * floor(a / b); }\n"); } WRITE(p, "void main() {\n"); if (isModeClear) { // Clear mode does not allow any fancy shading. WRITE(p, " vec4 v = v_color0;\n"); } else { const char *secondary = ""; // Secondary color for specular on top of texture if (lmode) { WRITE(p, " vec4 s = vec4(v_color1, 0.0);\n"); secondary = " + s"; } else { secondary = ""; } if (doTexture) { const char *texcoord = "v_texcoord"; // TODO: Not sure the right way to do this for projection. // This path destroys resolution on older PowerVR no matter what I do, so we disable it on SGX 540 and lesser, and live with the consequences. if (needShaderTexClamp && !(gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_TERRIBLE)) { // We may be clamping inside a larger surface (tex = 64x64, buffer=480x272). // We may also be wrapping in such a surface, or either one in a too-small surface. // Obviously, clamping to a smaller surface won't work. But better to clamp to something. std::string ucoord = "v_texcoord.x"; std::string vcoord = "v_texcoord.y"; if (doTextureProjection) { ucoord = "(v_texcoord.x / v_texcoord.z)"; vcoord = "(v_texcoord.y / v_texcoord.z)"; } std::string modulo = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? "mymod" : "mod"; if (id.Bit(FS_BIT_CLAMP_S)) { ucoord = "clamp(" + ucoord + ", u_texclamp.z, u_texclamp.x - u_texclamp.z)"; } else { ucoord = modulo + "(" + ucoord + ", u_texclamp.x)"; } if (id.Bit(FS_BIT_CLAMP_T)) { vcoord = "clamp(" + vcoord + ", u_texclamp.w, u_texclamp.y - u_texclamp.w)"; } else { vcoord = modulo + "(" + vcoord + ", u_texclamp.y)"; } if (textureAtOffset) { ucoord = "(" + ucoord + " + u_texclampoff.x)"; vcoord = "(" + vcoord + " + u_texclampoff.y)"; } WRITE(p, " vec2 fixedcoord = vec2(%s, %s);\n", ucoord.c_str(), vcoord.c_str()); texcoord = "fixedcoord"; // We already projected it. doTextureProjection = false; } if (doTextureProjection) { WRITE(p, " vec4 t = %sProj(tex, %s);\n", texture, texcoord); } else { WRITE(p, " vec4 t = %s(tex, %s);\n", texture, texcoord); } WRITE(p, " vec4 p = v_color0;\n"); if (doTextureAlpha) { // texfmt == RGBA switch (texFunc) { case GE_TEXFUNC_MODULATE: WRITE(p, " vec4 v = p * t%s;\n", secondary); break; case GE_TEXFUNC_DECAL: WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary); break; case GE_TEXFUNC_BLEND: WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary); break; case GE_TEXFUNC_REPLACE: WRITE(p, " vec4 v = t%s;\n", secondary); break; case GE_TEXFUNC_ADD: case GE_TEXFUNC_UNKNOWN1: case GE_TEXFUNC_UNKNOWN2: case GE_TEXFUNC_UNKNOWN3: WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a)%s;\n", secondary); break; default: WRITE(p, " vec4 v = p;\n"); break; } } else { // texfmt == RGB switch (texFunc) { case GE_TEXFUNC_MODULATE: WRITE(p, " vec4 v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary); break; case GE_TEXFUNC_DECAL: WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break; case GE_TEXFUNC_BLEND: WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a)%s;\n", secondary); break; case GE_TEXFUNC_REPLACE: WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break; case GE_TEXFUNC_ADD: case GE_TEXFUNC_UNKNOWN1: case GE_TEXFUNC_UNKNOWN2: case GE_TEXFUNC_UNKNOWN3: WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a)%s;\n", secondary); break; default: WRITE(p, " vec4 v = p;\n"); break; } } } else { // No texture mapping WRITE(p, " vec4 v = v_color0 %s;\n", secondary); } // Texture access is at half texels [0.5/256, 255.5/256], but colors are normalized [0, 255]. // So we have to scale to account for the difference. std::string alphaTestXCoord = "0"; if (g_Config.bFragmentTestCache) { if (enableColorTest && !colorTestAgainstZero) { WRITE(p, " vec4 vScale256 = v * %f + %f;\n", 255.0 / 256.0, 0.5 / 256.0); alphaTestXCoord = "vScale256.a"; } else if (enableAlphaTest && !alphaTestAgainstZero) { char temp[64]; snprintf(temp, sizeof(temp), "v.a * %f + %f", 255.0 / 256.0, 0.5 / 256.0); alphaTestXCoord = temp; } } if (enableAlphaTest) { if (alphaTestAgainstZero) { // When testing against 0 (extremely common), we can avoid some math. // 0.002 is approximately half of 1.0 / 255.0. if (alphaTestFunc == GE_COMP_NOTEQUAL || alphaTestFunc == GE_COMP_GREATER) { WRITE(p, " if (v.a < 0.002) discard;\n"); } else if (alphaTestFunc != GE_COMP_NEVER) { // Anything else is a test for == 0. Happens sometimes, actually... WRITE(p, " if (v.a > 0.002) discard;\n"); } else { // NEVER has been logged as used by games, although it makes little sense - statically failing. // Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here. WRITE(p, " discard;\n"); } } else if (g_Config.bFragmentTestCache) { WRITE(p, " float aResult = %s(testtex, vec2(%s, 0)).a;\n", texture, alphaTestXCoord.c_str()); WRITE(p, " if (aResult < 0.5) discard;\n"); } else { const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " }; if (alphaTestFuncs[alphaTestFunc][0] != '#') { if (bitwiseOps) { WRITE(p, " if ((roundAndScaleTo255i(v.a) & u_alphacolormask.a) %s int(u_alphacolorref.a)) discard;\n", alphaTestFuncs[alphaTestFunc]); } else if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR) { // Work around bad PVR driver problem where equality check + discard just doesn't work. if (alphaTestFunc != GE_COMP_NOTEQUAL) { WRITE(p, " if (roundTo255thf(v.a) %s u_alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]); } } else { WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]); } } else { // This means NEVER. See above. WRITE(p, " discard;\n"); } } } if (enableColorTest) { if (colorTestAgainstZero) { // When testing against 0 (common), we can avoid some math. // 0.002 is approximately half of 1.0 / 255.0. if (colorTestFunc == GE_COMP_NOTEQUAL) { WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) discard;\n"); } else if (colorTestFunc != GE_COMP_NEVER) { // Anything else is a test for == 0. WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) discard;\n"); } else { // NEVER has been logged as used by games, although it makes little sense - statically failing. // Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here. WRITE(p, " discard;\n"); } } else if (g_Config.bFragmentTestCache) { WRITE(p, " float rResult = %s(testtex, vec2(vScale256.r, 0)).r;\n", texture); WRITE(p, " float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture); WRITE(p, " float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture); if (colorTestFunc == GE_COMP_EQUAL) { // Equal means all parts must be equal. WRITE(p, " if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) discard;\n"); } else { // Not equal means any part must be not equal. WRITE(p, " if (rResult < 0.5 && gResult < 0.5 && bResult < 0.5) discard;\n"); } } else { const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; if (colorTestFuncs[colorTestFunc][0] != '#') { if (bitwiseOps) { // Apparently GLES3 does not support vector bitwise ops. WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n"); const char *maskedFragColor = "ivec3(v_scaled.r & u_alphacolormask.r, v_scaled.g & u_alphacolormask.g, v_scaled.b & u_alphacolormask.b)"; const char *maskedColorRef = "ivec3(int(u_alphacolorref.r) & u_alphacolormask.r, int(u_alphacolorref.g) & u_alphacolormask.g, int(u_alphacolorref.b) & u_alphacolormask.b)"; WRITE(p, " if (%s %s %s) discard;\n", maskedFragColor, colorTestFuncs[colorTestFunc], maskedColorRef); } else if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR) { WRITE(p, " if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) discard;\n", colorTestFuncs[colorTestFunc]); } else { WRITE(p, " if (roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb) discard;\n", colorTestFuncs[colorTestFunc]); } } else { WRITE(p, " discard;\n"); } } } // Color doubling happens after the color test. if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) { WRITE(p, " v.rgb = v.rgb * 4.0;\n"); } else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) { WRITE(p, " v.rgb = v.rgb * 2.0;\n"); } if (enableFog) { WRITE(p, " float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n"); WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n"); // WRITE(p, " v.x = v_depth;\n"); } if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) { const char *srcFactor = "ERROR"; switch (replaceBlendFuncA) { case GE_SRCBLEND_DSTCOLOR: srcFactor = "ERROR"; break; case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "ERROR"; break; case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break; case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break; case GE_SRCBLEND_DSTALPHA: srcFactor = "ERROR"; break; case GE_SRCBLEND_INVDSTALPHA: srcFactor = "ERROR"; break; case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break; case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break; // PRE_SRC for REPLACE_BLEND_PRE_SRC_2X_ALPHA means "double the src." // It's close to the same, but clamping can still be an issue. case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(2.0)"; break; case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "ERROR"; break; case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break; default: srcFactor = "u_blendFixA"; break; } WRITE(p, " v.rgb = v.rgb * %s;\n", srcFactor); } if (replaceBlend == REPLACE_BLEND_COPY_FBO) { // If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit. // We can just read the prev value more directly. if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) { WRITE(p, " lowp vec4 destColor = %s;\n", lastFragData); } else if (!texelFetch) { WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", texture); } else { WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", texelFetch); } const char *srcFactor = "vec3(1.0)"; const char *dstFactor = "vec3(0.0)"; switch (replaceBlendFuncA) { case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break; case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(vec3(1.0) - destColor.rgb)"; break; case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break; case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break; case GE_SRCBLEND_DSTALPHA: srcFactor = "vec3(destColor.a)"; break; case GE_SRCBLEND_INVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a)"; break; case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break; case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break; case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(destColor.a * 2.0)"; break; case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a * 2.0)"; break; case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break; default: srcFactor = "u_blendFixA"; break; } switch (replaceBlendFuncB) { case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break; case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(vec3(1.0) - v.rgb)"; break; case GE_DSTBLEND_SRCALPHA: dstFactor = "vec3(v.a)"; break; case GE_DSTBLEND_INVSRCALPHA: dstFactor = "vec3(1.0 - v.a)"; break; case GE_DSTBLEND_DSTALPHA: dstFactor = "vec3(destColor.a)"; break; case GE_DSTBLEND_INVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a)"; break; case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "vec3(v.a * 2.0)"; break; case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a * 2.0)"; break; case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "vec3(destColor.a * 2.0)"; break; case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a * 2.0)"; break; case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break; default: srcFactor = "u_blendFixB"; break; } switch (replaceBlendEq) { case GE_BLENDMODE_MUL_AND_ADD: WRITE(p, " v.rgb = v.rgb * %s + destColor.rgb * %s;\n", srcFactor, dstFactor); break; case GE_BLENDMODE_MUL_AND_SUBTRACT: WRITE(p, " v.rgb = v.rgb * %s - destColor.rgb * %s;\n", srcFactor, dstFactor); break; case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: WRITE(p, " v.rgb = destColor.rgb * %s - v.rgb * %s;\n", dstFactor, srcFactor); break; case GE_BLENDMODE_MIN: WRITE(p, " v.rgb = min(v.rgb, destColor.rgb);\n"); break; case GE_BLENDMODE_MAX: WRITE(p, " v.rgb = max(v.rgb, destColor.rgb);\n"); break; case GE_BLENDMODE_ABSDIFF: WRITE(p, " v.rgb = abs(v.rgb - destColor.rgb);\n"); break; } } if (replaceBlend == REPLACE_BLEND_2X_ALPHA || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) { WRITE(p, " v.a = v.a * 2.0;\n"); } } std::string replacedAlpha = "0.0"; char replacedAlphaTemp[64] = ""; if (stencilToAlpha != REPLACE_ALPHA_NO) { switch (replaceAlphaWithStencilType) { case STENCIL_VALUE_UNIFORM: replacedAlpha = "u_stencilReplaceValue"; break; case STENCIL_VALUE_ZERO: replacedAlpha = "0.0"; break; case STENCIL_VALUE_ONE: case STENCIL_VALUE_INVERT: // In invert, we subtract by one, but we want to output one here. replacedAlpha = "1.0"; break; case STENCIL_VALUE_INCR_4: case STENCIL_VALUE_DECR_4: // We're adding/subtracting, just by the smallest value in 4-bit. snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 15.0); replacedAlpha = replacedAlphaTemp; break; case STENCIL_VALUE_INCR_8: case STENCIL_VALUE_DECR_8: // We're adding/subtracting, just by the smallest value in 8-bit. snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 255.0); replacedAlpha = replacedAlphaTemp; break; case STENCIL_VALUE_KEEP: // Do nothing. We'll mask out the alpha using color mask. break; } } switch (stencilToAlpha) { case REPLACE_ALPHA_DUALSOURCE: WRITE(p, " %s = vec4(v.rgb, %s);\n", fragColor0, replacedAlpha.c_str()); WRITE(p, " %s = vec4(0.0, 0.0, 0.0, v.a);\n", fragColor1); break; case REPLACE_ALPHA_YES: WRITE(p, " %s = vec4(v.rgb, %s);\n", fragColor0, replacedAlpha.c_str()); break; case REPLACE_ALPHA_NO: WRITE(p, " %s = v;\n", fragColor0); break; default: ERROR_LOG(G3D, "Bad stencil-to-alpha type, corrupt ID?"); return false; } LogicOpReplaceType replaceLogicOpType = (LogicOpReplaceType)id.Bits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2); switch (replaceLogicOpType) { case LOGICOPTYPE_ONE: WRITE(p, " %s.rgb = vec3(1.0, 1.0, 1.0);\n", fragColor0); break; case LOGICOPTYPE_INVERT: WRITE(p, " %s.rgb = vec3(1.0, 1.0, 1.0) - %s.rgb;\n", fragColor0, fragColor0); break; case LOGICOPTYPE_NORMAL: break; default: ERROR_LOG(G3D, "Bad logic op type, corrupt ID?"); return false; } #ifdef DEBUG_SHADER if (doTexture) { WRITE(p, " %s = texture2D(tex, v_texcoord.xy);\n", fragColor0); WRITE(p, " %s += vec4(0.3,0,0.3,0.3);\n", fragColor0); } else { WRITE(p, " %s = vec4(1,0,1,1);\n", fragColor0); } #endif if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) { WRITE(p, " highp float z = gl_FragCoord.z;\n"); WRITE(p, " z = (1.0/65535.0) * floor(z * 65535.0);\n"); WRITE(p, " gl_FragDepth = z;\n"); } WRITE(p, "}\n"); return true; }
// Missing: Z depth range bool GenerateVulkanGLSLFragmentShader(const ShaderID &id, char *buffer) { char *p = buffer; const char *lastFragData = nullptr; WRITE(p, "%s", vulkan_glsl_preamble); bool lmode = id.Bit(FS_BIT_LMODE); bool doTexture = id.Bit(FS_BIT_DO_TEXTURE); bool enableFog = id.Bit(FS_BIT_ENABLE_FOG); bool enableAlphaTest = id.Bit(FS_BIT_ALPHA_TEST); bool alphaTestAgainstZero = id.Bit(FS_BIT_ALPHA_AGAINST_ZERO); bool enableColorTest = id.Bit(FS_BIT_COLOR_TEST); bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO); bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE); bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ); bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA); bool doFlatShading = id.Bit(FS_BIT_FLATSHADE); GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3); GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2); bool needShaderTexClamp = id.Bit(FS_BIT_SHADER_TEX_CLAMP); GETexFunc texFunc = (GETexFunc)id.Bits(FS_BIT_TEXFUNC, 3); bool textureAtOffset = id.Bit(FS_BIT_TEXTURE_AT_OFFSET); ReplaceBlendType replaceBlend = static_cast<ReplaceBlendType>(id.Bits(FS_BIT_REPLACE_BLEND, 3)); ReplaceAlphaType stencilToAlpha = static_cast<ReplaceAlphaType>(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2)); GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4); GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4); GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3); StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4); bool isModeClear = id.Bit(FS_BIT_CLEARMODE); const char *shading = doFlatShading ? "flat" : ""; WRITE(p, "layout (std140, set = 0, binding = 2) uniform baseUBO {\n%s} base;\n", ub_baseStr); if (doTexture) { WRITE(p, "layout (binding = 0) uniform sampler2D tex;\n"); } if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) { if (replaceBlend == REPLACE_BLEND_COPY_FBO) { WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n"); } } WRITE(p, "layout (location = 1) %s in vec4 v_color0;\n", shading); if (lmode) WRITE(p, "layout (location = 2) %s in vec3 v_color1;\n", shading); if (enableFog) { WRITE(p, "layout (location = 3) in float v_fogdepth;\n"); } if (doTexture) { WRITE(p, "layout (location = 0) in vec3 v_texcoord;\n"); } if (enableAlphaTest && !alphaTestAgainstZero) { WRITE(p, "int roundAndScaleTo255i(in float x) { return int(floor(x * 255.0 + 0.5)); }\n"); } if (enableColorTest && !colorTestAgainstZero) { WRITE(p, "ivec3 roundAndScaleTo255iv(in vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n"); } WRITE(p, "layout (location = 0, index = 0) out vec4 fragColor0;\n"); if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) { WRITE(p, "layout (location = 0, index = 1) out vec4 fragColor1;\n"); } // PowerVR needs a custom modulo function. For some reason, this has far higher precision than the builtin one. if ((gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) && needShaderTexClamp) { WRITE(p, "float mymod(float a, float b) { return a - b * floor(a / b); }\n"); } WRITE(p, "void main() {\n"); if (isModeClear) { // Clear mode does not allow any fancy shading. WRITE(p, " vec4 v = v_color0;\n"); } else { const char *secondary = ""; // Secondary color for specular on top of texture if (lmode) { WRITE(p, " vec4 s = vec4(v_color1, 0.0);\n"); secondary = " + s"; } else { secondary = ""; } if (doTexture) { const char *texcoord = "v_texcoord"; // TODO: Not sure the right way to do this for projection. // This path destroys resolution on older PowerVR no matter what I do, so we disable it on SGX 540 and lesser, and live with the consequences. if (needShaderTexClamp && !(gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_TERRIBLE)) { // We may be clamping inside a larger surface (tex = 64x64, buffer=480x272). // We may also be wrapping in such a surface, or either one in a too-small surface. // Obviously, clamping to a smaller surface won't work. But better to clamp to something. std::string ucoord = "v_texcoord.x"; std::string vcoord = "v_texcoord.y"; if (doTextureProjection) { ucoord = "(v_texcoord.x / v_texcoord.z)"; vcoord = "(v_texcoord.y / v_texcoord.z)"; } std::string modulo = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? "mymod" : "mod"; if (id.Bit(FS_BIT_CLAMP_S)) { ucoord = "clamp(" + ucoord + ", base.texclamp.z, base.texclamp.x - base.texclamp.z)"; } else { ucoord = modulo + "(" + ucoord + ", base.texclamp.x)"; } if (id.Bit(FS_BIT_CLAMP_T)) { vcoord = "clamp(" + vcoord + ", base.texclamp.w, base.texclamp.y - base.texclamp.w)"; } else { vcoord = modulo + "(" + vcoord + ", base.texclamp.y)"; } if (textureAtOffset) { ucoord = "(" + ucoord + " + base.texclampoff.x)"; vcoord = "(" + vcoord + " + base.texclampoff.y)"; } WRITE(p, " vec2 fixedcoord = vec2(%s, %s);\n", ucoord.c_str(), vcoord.c_str()); texcoord = "fixedcoord"; // We already projected it. doTextureProjection = false; } if (doTextureProjection) { WRITE(p, " vec4 t = textureProj(tex, %s);\n", texcoord); } else { WRITE(p, " vec4 t = texture(tex, %s.xy);\n", texcoord); } WRITE(p, " vec4 p = v_color0;\n"); if (doTextureAlpha) { // texfmt == RGBA switch (texFunc) { case GE_TEXFUNC_MODULATE: WRITE(p, " vec4 v = p * t%s;\n", secondary); break; case GE_TEXFUNC_DECAL: WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary); break; case GE_TEXFUNC_BLEND: WRITE(p, " vec4 v = vec4(mix(p.rgb, base.texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary); break; case GE_TEXFUNC_REPLACE: WRITE(p, " vec4 v = t%s;\n", secondary); break; case GE_TEXFUNC_ADD: case GE_TEXFUNC_UNKNOWN1: case GE_TEXFUNC_UNKNOWN2: case GE_TEXFUNC_UNKNOWN3: WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a)%s;\n", secondary); break; default: WRITE(p, " vec4 v = p;\n"); break; } } else { // texfmt == RGB switch (texFunc) { case GE_TEXFUNC_MODULATE: WRITE(p, " vec4 v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary); break; case GE_TEXFUNC_DECAL: WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break; case GE_TEXFUNC_BLEND: WRITE(p, " vec4 v = vec4(mix(p.rgb, base.texenv.rgb, t.rgb), p.a)%s;\n", secondary); break; case GE_TEXFUNC_REPLACE: WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break; case GE_TEXFUNC_ADD: case GE_TEXFUNC_UNKNOWN1: case GE_TEXFUNC_UNKNOWN2: case GE_TEXFUNC_UNKNOWN3: WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a)%s;\n", secondary); break; default: WRITE(p, " vec4 v = p;\n"); break; } } } else { // No texture mapping WRITE(p, " vec4 v = v_color0 %s;\n", secondary); } // Texture access is at half texels [0.5/256, 255.5/256], but colors are normalized [0, 255]. // So we have to scale to account for the difference. std::string alphaTestXCoord = "0"; if (enableAlphaTest) { if (alphaTestAgainstZero) { // When testing against 0 (extremely common), we can avoid some math. // 0.002 is approximately half of 1.0 / 255.0. if (alphaTestFunc == GE_COMP_NOTEQUAL || alphaTestFunc == GE_COMP_GREATER) { WRITE(p, " if (v.a < 0.002) discard;\n"); } else if (alphaTestFunc != GE_COMP_NEVER) { // Anything else is a test for == 0. Happens sometimes, actually... WRITE(p, " if (v.a > 0.002) discard;\n"); } else { // NEVER has been logged as used by games, although it makes little sense - statically failing. // Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here. WRITE(p, " discard;\n"); } } else { const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " }; if (alphaTestFuncs[alphaTestFunc][0] != '#') { WRITE(p, " if ((roundAndScaleTo255i(v.a) & base.alphacolormask.a) %s base.alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]); } else { // This means NEVER. See above. WRITE(p, " discard;\n"); } } } if (enableColorTest) { if (colorTestAgainstZero) { // When testing against 0 (common), we can avoid some math. // 0.002 is approximately half of 1.0 / 255.0. if (colorTestFunc == GE_COMP_NOTEQUAL) { WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) discard;\n"); } else if (colorTestFunc != GE_COMP_NEVER) { // Anything else is a test for == 0. WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) discard;\n"); } else { // NEVER has been logged as used by games, although it makes little sense - statically failing. // Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here. WRITE(p, " discard;\n"); } } else { const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; if (colorTestFuncs[colorTestFunc][0] != '#') { WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n"); WRITE(p, " if ((v_scaled & base.alphacolormask.rgb) %s (base.alphacolorref.rgb & base.alphacolormask.rgb)) discard;\n", colorTestFuncs[colorTestFunc]); } else { WRITE(p, " discard;\n"); } } } // Color doubling happens after the color test. if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) { WRITE(p, " v.rgb = v.rgb * 4.0;\n"); } else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) { WRITE(p, " v.rgb = v.rgb * 2.0;\n"); } if (enableFog) { WRITE(p, " float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n"); WRITE(p, " v = mix(vec4(base.fogcolor, v.a), v, fogCoef);\n"); // WRITE(p, " v.x = v_depth;\n"); } if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) { const char *srcFactor = "ERROR"; switch (replaceBlendFuncA) { case GE_SRCBLEND_DSTCOLOR: srcFactor = "ERROR"; break; case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "ERROR"; break; case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break; case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break; case GE_SRCBLEND_DSTALPHA: srcFactor = "ERROR"; break; case GE_SRCBLEND_INVDSTALPHA: srcFactor = "ERROR"; break; case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break; case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break; case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "ERROR"; break; case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "ERROR"; break; case GE_SRCBLEND_FIXA: srcFactor = "base.blendFixA"; break; } WRITE(p, " v.rgb = v.rgb * %s;\n", srcFactor); } if (replaceBlend == REPLACE_BLEND_COPY_FBO) { WRITE(p, " lowp vec4 destColor = texelFetch(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n"); const char *srcFactor = "vec3(1.0)"; const char *dstFactor = "vec3(0.0)"; switch (replaceBlendFuncA) { case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break; case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(vec3(1.0) - destColor.rgb)"; break; case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break; case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break; case GE_SRCBLEND_DSTALPHA: srcFactor = "vec3(destColor.a)"; break; case GE_SRCBLEND_INVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a)"; break; case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break; case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break; case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(destColor.a * 2.0)"; break; case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a * 2.0)"; break; case GE_SRCBLEND_FIXA: srcFactor = "base.blendFixA"; break; } switch (replaceBlendFuncB) { case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break; case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(vec3(1.0) - v.rgb)"; break; case GE_DSTBLEND_SRCALPHA: dstFactor = "vec3(v.a)"; break; case GE_DSTBLEND_INVSRCALPHA: dstFactor = "vec3(1.0 - v.a)"; break; case GE_DSTBLEND_DSTALPHA: dstFactor = "vec3(destColor.a)"; break; case GE_DSTBLEND_INVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a)"; break; case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "vec3(v.a * 2.0)"; break; case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a * 2.0)"; break; case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "vec3(destColor.a * 2.0)"; break; case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a * 2.0)"; break; case GE_DSTBLEND_FIXB: dstFactor = "base.blendFixB"; break; } switch (replaceBlendEq) { case GE_BLENDMODE_MUL_AND_ADD: WRITE(p, " v.rgb = v.rgb * %s + destColor.rgb * %s;\n", srcFactor, dstFactor); break; case GE_BLENDMODE_MUL_AND_SUBTRACT: WRITE(p, " v.rgb = v.rgb * %s - destColor.rgb * %s;\n", srcFactor, dstFactor); break; case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: WRITE(p, " v.rgb = destColor.rgb * %s - v.rgb * %s;\n", dstFactor, srcFactor); break; case GE_BLENDMODE_MIN: WRITE(p, " v.rgb = min(v.rgb, destColor.rgb);\n"); break; case GE_BLENDMODE_MAX: WRITE(p, " v.rgb = max(v.rgb, destColor.rgb);\n"); break; case GE_BLENDMODE_ABSDIFF: WRITE(p, " v.rgb = abs(v.rgb - destColor.rgb);\n"); break; } } if (replaceBlend == REPLACE_BLEND_2X_ALPHA || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) { WRITE(p, " v.a = v.a * 2.0;\n"); } } std::string replacedAlpha = "0.0"; char replacedAlphaTemp[64] = ""; if (stencilToAlpha != REPLACE_ALPHA_NO) { switch (replaceAlphaWithStencilType) { case STENCIL_VALUE_UNIFORM: replacedAlpha = "base.fogcoef_stencilreplace.z"; break; case STENCIL_VALUE_ZERO: replacedAlpha = "0.0"; break; case STENCIL_VALUE_ONE: case STENCIL_VALUE_INVERT: // In invert, we subtract by one, but we want to output one here. replacedAlpha = "1.0"; break; case STENCIL_VALUE_INCR_4: case STENCIL_VALUE_DECR_4: // We're adding/subtracting, just by the smallest value in 4-bit. snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 15.0); replacedAlpha = replacedAlphaTemp; break; case STENCIL_VALUE_INCR_8: case STENCIL_VALUE_DECR_8: // We're adding/subtracting, just by the smallest value in 8-bit. snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 255.0); replacedAlpha = replacedAlphaTemp; break; case STENCIL_VALUE_KEEP: // Do nothing. We'll mask out the alpha using color mask. break; } } switch (stencilToAlpha) { case REPLACE_ALPHA_DUALSOURCE: WRITE(p, " fragColor0 = vec4(v.rgb, %s);\n", replacedAlpha.c_str()); WRITE(p, " fragColor1 = vec4(0.0, 0.0, 0.0, v.a);\n"); break; case REPLACE_ALPHA_YES: WRITE(p, " fragColor0 = vec4(v.rgb, %s);\n", replacedAlpha.c_str()); break; case REPLACE_ALPHA_NO: WRITE(p, " fragColor0 = v;\n"); break; default: ERROR_LOG(G3D, "Bad stencil-to-alpha type, corrupt ID?"); return false; } LogicOpReplaceType replaceLogicOpType = (LogicOpReplaceType)id.Bits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2); switch (replaceLogicOpType) { case LOGICOPTYPE_ONE: WRITE(p, " fragColor0.rgb = vec3(1.0, 1.0, 1.0);\n"); break; case LOGICOPTYPE_INVERT: WRITE(p, " fragColor0.rgb = vec3(1.0, 1.0, 1.0) - fragColor0.rgb;\n"); break; case LOGICOPTYPE_NORMAL: break; default: ERROR_LOG(G3D, "Bad logic op type, corrupt ID?"); return false; } if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) { WRITE(p, " highp float z = gl_FragCoord.z;\n"); WRITE(p, " z = (1.0/65535.0) * floor(z * 65535.0);\n"); WRITE(p, " gl_FragDepth = z;\n"); } WRITE(p, "}\n"); return true; }