예제 #1
0
LinkedShader::LinkedShader(ShaderID VSID, Shader *vs, ShaderID FSID, Shader *fs, bool useHWTransform)
		: useHWTransform_(useHWTransform), program(0), dirtyUniforms(0) {
	PROFILE_THIS_SCOPE("shaderlink");

	program = glCreateProgram();
	vs_ = vs;
	glAttachShader(program, vs->shader);
	glAttachShader(program, fs->shader);

	// Bind attribute locations to fixed locations so that they're
	// the same in all shaders. We use this later to minimize the calls to
	// glEnableVertexAttribArray and glDisableVertexAttribArray.
	glBindAttribLocation(program, ATTR_POSITION, "position");
	glBindAttribLocation(program, ATTR_TEXCOORD, "texcoord");
	glBindAttribLocation(program, ATTR_NORMAL, "normal");
	glBindAttribLocation(program, ATTR_W1, "w1");
	glBindAttribLocation(program, ATTR_W2, "w2");
	glBindAttribLocation(program, ATTR_COLOR0, "color0");
	glBindAttribLocation(program, ATTR_COLOR1, "color1");

#if !defined(USING_GLES2)
	if (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) {
		// Dual source alpha
		glBindFragDataLocationIndexed(program, 0, 0, "fragColor0");
		glBindFragDataLocationIndexed(program, 0, 1, "fragColor1");
	} else if (gl_extensions.VersionGEThan(3, 3, 0)) {
		glBindFragDataLocation(program, 0, "fragColor0");
	}
#elif !defined(IOS)
	if (gl_extensions.GLES3) {
		if (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) {
			glBindFragDataLocationIndexedEXT(program, 0, 0, "fragColor0");
			glBindFragDataLocationIndexedEXT(program, 0, 1, "fragColor1");
		}
	}
#endif

	glLinkProgram(program);

	GLint linkStatus = GL_FALSE;
	glGetProgramiv(program, GL_LINK_STATUS, &linkStatus);
	if (linkStatus != GL_TRUE) {
		GLint bufLength = 0;
		glGetProgramiv(program, GL_INFO_LOG_LENGTH, &bufLength);
		if (bufLength) {
			char* buf = new char[bufLength];
			glGetProgramInfoLog(program, bufLength, NULL, buf);
#ifdef ANDROID
			ELOG("Could not link program:\n %s", buf);
#endif
			ERROR_LOG(G3D, "Could not link program:\n %s", buf);
			ERROR_LOG(G3D, "VS desc:\n%s\n", vs->GetShaderString(SHADER_STRING_SHORT_DESC, VSID).c_str());
			ERROR_LOG(G3D, "FS desc:\n%s\n", fs->GetShaderString(SHADER_STRING_SHORT_DESC, FSID).c_str());
			std::string vs_source = vs->GetShaderString(SHADER_STRING_SOURCE_CODE, VSID);
			std::string fs_source = fs->GetShaderString(SHADER_STRING_SOURCE_CODE, FSID);
			ERROR_LOG(G3D, "VS:\n%s\n", vs_source.c_str());
			ERROR_LOG(G3D, "FS:\n%s\n", fs_source.c_str());
			Reporting::ReportMessage("Error in shader program link: info: %s / fs: %s / vs: %s", buf, fs_source.c_str(), vs_source.c_str());
#ifdef SHADERLOG
			OutputDebugStringUTF8(buf);
			OutputDebugStringUTF8(vs_source.c_str());
			OutputDebugStringUTF8(fs_source.c_str());
#endif
			delete [] buf;	// we're dead!
		}
		// Prevent a buffer overflow.
		numBones = 0;
		return;
	}

	INFO_LOG(G3D, "Linked shader: vs %i fs %i", (int)vs->shader, (int)fs->shader);

	u_tex = glGetUniformLocation(program, "tex");
	u_proj = glGetUniformLocation(program, "u_proj");
	u_proj_through = glGetUniformLocation(program, "u_proj_through");
	u_texenv = glGetUniformLocation(program, "u_texenv");
	u_fogcolor = glGetUniformLocation(program, "u_fogcolor");
	u_fogcoef = glGetUniformLocation(program, "u_fogcoef");
	u_alphacolorref = glGetUniformLocation(program, "u_alphacolorref");
	u_alphacolormask = glGetUniformLocation(program, "u_alphacolormask");
	u_stencilReplaceValue = glGetUniformLocation(program, "u_stencilReplaceValue");
	u_testtex = glGetUniformLocation(program, "testtex");

	u_fbotex = glGetUniformLocation(program, "fbotex");
	u_blendFixA = glGetUniformLocation(program, "u_blendFixA");
	u_blendFixB = glGetUniformLocation(program, "u_blendFixB");
	u_fbotexSize = glGetUniformLocation(program, "u_fbotexSize");

	// Transform
	u_view = glGetUniformLocation(program, "u_view");
	u_world = glGetUniformLocation(program, "u_world");
	u_texmtx = glGetUniformLocation(program, "u_texmtx");
	if (VSID.Bit(VS_BIT_ENABLE_BONES))
		numBones = TranslateNumBones(VSID.Bits(VS_BIT_BONES, 3) + 1);
	else
		numBones = 0;
	u_depthRange = glGetUniformLocation(program, "u_depthRange");

#ifdef USE_BONE_ARRAY
	u_bone = glGetUniformLocation(program, "u_bone");
#else
	for (int i = 0; i < 8; i++) {
		char name[10];
		sprintf(name, "u_bone%i", i);
		u_bone[i] = glGetUniformLocation(program, name);
	}
#endif

	// Lighting, texturing
	u_ambient = glGetUniformLocation(program, "u_ambient");
	u_matambientalpha = glGetUniformLocation(program, "u_matambientalpha");
	u_matdiffuse = glGetUniformLocation(program, "u_matdiffuse");
	u_matspecular = glGetUniformLocation(program, "u_matspecular");
	u_matemissive = glGetUniformLocation(program, "u_matemissive");
	u_uvscaleoffset = glGetUniformLocation(program, "u_uvscaleoffset");
	u_texclamp = glGetUniformLocation(program, "u_texclamp");
	u_texclampoff = glGetUniformLocation(program, "u_texclampoff");

	for (int i = 0; i < 4; i++) {
		char temp[64];
		sprintf(temp, "u_lightpos%i", i);
		u_lightpos[i] = glGetUniformLocation(program, temp);
		sprintf(temp, "u_lightdir%i", i);
		u_lightdir[i] = glGetUniformLocation(program, temp);
		sprintf(temp, "u_lightatt%i", i);
		u_lightatt[i] = glGetUniformLocation(program, temp);
		sprintf(temp, "u_lightangle%i", i);
		u_lightangle[i] = glGetUniformLocation(program, temp);
		sprintf(temp, "u_lightspotCoef%i", i);
		u_lightspotCoef[i] = glGetUniformLocation(program, temp);
		sprintf(temp, "u_lightambient%i", i);
		u_lightambient[i] = glGetUniformLocation(program, temp);
		sprintf(temp, "u_lightdiffuse%i", i);
		u_lightdiffuse[i] = glGetUniformLocation(program, temp);
		sprintf(temp, "u_lightspecular%i", i);
		u_lightspecular[i] = glGetUniformLocation(program, temp);
	}

	attrMask = 0;
	if (-1 != glGetAttribLocation(program, "position")) attrMask |= 1 << ATTR_POSITION;
	if (-1 != glGetAttribLocation(program, "texcoord")) attrMask |= 1 << ATTR_TEXCOORD;
	if (-1 != glGetAttribLocation(program, "normal")) attrMask |= 1 << ATTR_NORMAL;
	if (-1 != glGetAttribLocation(program, "w1")) attrMask |= 1 << ATTR_W1;
	if (-1 != glGetAttribLocation(program, "w2")) attrMask |= 1 << ATTR_W2;
	if (-1 != glGetAttribLocation(program, "color0")) attrMask |= 1 << ATTR_COLOR0;
	if (-1 != glGetAttribLocation(program, "color1")) attrMask |= 1 << ATTR_COLOR1;

	availableUniforms = 0;
	if (u_proj != -1) availableUniforms |= DIRTY_PROJMATRIX;
	if (u_proj_through != -1) availableUniforms |= DIRTY_PROJTHROUGHMATRIX;
	if (u_texenv != -1) availableUniforms |= DIRTY_TEXENV;
	if (u_alphacolorref != -1) availableUniforms |= DIRTY_ALPHACOLORREF;
	if (u_alphacolormask != -1) availableUniforms |= DIRTY_ALPHACOLORMASK;
	if (u_fogcolor != -1) availableUniforms |= DIRTY_FOGCOLOR;
	if (u_fogcoef != -1) availableUniforms |= DIRTY_FOGCOEF;
	if (u_texenv != -1) availableUniforms |= DIRTY_TEXENV;
	if (u_uvscaleoffset != -1) availableUniforms |= DIRTY_UVSCALEOFFSET;
	if (u_texclamp != -1) availableUniforms |= DIRTY_TEXCLAMP;
	if (u_world != -1) availableUniforms |= DIRTY_WORLDMATRIX;
	if (u_view != -1) availableUniforms |= DIRTY_VIEWMATRIX;
	if (u_texmtx != -1) availableUniforms |= DIRTY_TEXMATRIX;
	if (u_stencilReplaceValue != -1) availableUniforms |= DIRTY_STENCILREPLACEVALUE;
	if (u_blendFixA != -1 || u_blendFixB != -1 || u_fbotexSize != -1) availableUniforms |= DIRTY_SHADERBLEND;
	if (u_depthRange != -1)
		availableUniforms |= DIRTY_DEPTHRANGE;

	// Looping up to numBones lets us avoid checking u_bone[i]
#ifdef USE_BONE_ARRAY
	if (u_bone != -1) {
		for (int i = 0; i < numBones; i++) {
			availableUniforms |= DIRTY_BONEMATRIX0 << i;
		}
	}
#else
	for (int i = 0; i < numBones; i++) {
		if (u_bone[i] != -1)
			availableUniforms |= DIRTY_BONEMATRIX0 << i;
	}
#endif
	if (u_ambient != -1) availableUniforms |= DIRTY_AMBIENT;
	if (u_matambientalpha != -1) availableUniforms |= DIRTY_MATAMBIENTALPHA;
	if (u_matdiffuse != -1) availableUniforms |= DIRTY_MATDIFFUSE;
	if (u_matemissive != -1) availableUniforms |= DIRTY_MATEMISSIVE;
	if (u_matspecular != -1) availableUniforms |= DIRTY_MATSPECULAR;
	for (int i = 0; i < 4; i++) {
		if (u_lightdir[i] != -1 ||
				u_lightspecular[i] != -1 ||
				u_lightpos[i] != -1)
			availableUniforms |= DIRTY_LIGHT0 << i;
	}

	glUseProgram(program);

	// Default uniform values
	glUniform1i(u_tex, 0);
	glUniform1i(u_fbotex, 1);
	glUniform1i(u_testtex, 2);
	// The rest, use the "dirty" mechanism.
	dirtyUniforms = DIRTY_ALL;
}
bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *usesLighting) {
	char *p = buffer;

	WRITE(p, "%s", vulkan_glsl_preamble);

	bool highpFog = false;
	bool highpTexcoord = false;

	bool isModeThrough = id.Bit(VS_BIT_IS_THROUGH);
	bool lmode = id.Bit(VS_BIT_LMODE) && !isModeThrough;  // TODO: Different expression than in shaderIDgen
	bool doTexture = id.Bit(VS_BIT_DO_TEXTURE);
	bool doTextureTransform = id.Bit(VS_BIT_DO_TEXTURE_TRANSFORM);

	GETexMapMode uvGenMode = static_cast<GETexMapMode>(id.Bits(VS_BIT_UVGEN_MODE, 2));

	// this is only valid for some settings of uvGenMode
	GETexProjMapMode uvProjMode = static_cast<GETexProjMapMode>(id.Bits(VS_BIT_UVPROJ_MODE, 2));
	bool doShadeMapping = uvGenMode == GE_TEXMAP_ENVIRONMENT_MAP;
	bool doFlatShading = id.Bit(VS_BIT_FLATSHADE);

	bool useHWTransform = id.Bit(VS_BIT_USE_HW_TRANSFORM);
	bool hasColor = id.Bit(VS_BIT_HAS_COLOR) || !useHWTransform;
	bool hasNormal = id.Bit(VS_BIT_HAS_NORMAL) && useHWTransform;
	bool hasTexcoord = id.Bit(VS_BIT_HAS_TEXCOORD) || !useHWTransform;
	bool enableFog = id.Bit(VS_BIT_ENABLE_FOG);
	bool throughmode = id.Bit(VS_BIT_IS_THROUGH);
	bool flipNormal = id.Bit(VS_BIT_NORM_REVERSE);
	int ls0 = id.Bits(VS_BIT_LS0, 2);
	int ls1 = id.Bits(VS_BIT_LS1, 2);
	bool enableBones = id.Bit(VS_BIT_ENABLE_BONES);
	bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE);
	int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3);

	bool doBezier = id.Bit(VS_BIT_BEZIER);
	bool doSpline = id.Bit(VS_BIT_SPLINE);
	bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
	bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
	bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS);

	// The uniforms are passed in as three "clumps" that may or may not be present.
	// We will memcpy the parts into place in a big buffer so we can be quite dynamic about what parts
	// are present and what parts aren't, but we will not be ultra detailed about it.
	*usesLighting = enableLighting || doShadeMapping;
	WRITE(p, "\n");
	WRITE(p, "layout (std140, set = 0, binding = 2) uniform baseVars {\n%s} base;\n", ub_baseStr);
	if (enableLighting || doShadeMapping)
		WRITE(p, "layout (std140, set = 0, binding = 3) uniform lightVars {\n%s} light;\n", ub_vs_lightsStr);
	if (enableBones)
		WRITE(p, "layout (std140, set = 0, binding = 4) uniform boneVars {\n%s} bone;\n", ub_vs_bonesStr);

	const char *shading = doFlatShading ? "flat " : "";

	DoLightComputation doLight[4] = { LIGHT_OFF, LIGHT_OFF, LIGHT_OFF, LIGHT_OFF };
	if (useHWTransform) {
		int shadeLight0 = doShadeMapping ? ls0 : -1;
		int shadeLight1 = doShadeMapping ? ls1 : -1;
		for (int i = 0; i < 4; i++) {
			if (i == shadeLight0 || i == shadeLight1)
				doLight[i] = LIGHT_SHADE;
			if (id.Bit(VS_BIT_LIGHTING_ENABLE) && id.Bit(VS_BIT_LIGHT0_ENABLE + i))
				doLight[i] = LIGHT_FULL;
		}
	}

	int numBoneWeights = 0;
	int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2);
	if (enableBones) {
		numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3);
		WRITE(p, "%s", boneWeightDecl[numBoneWeights]);
	}

	if (useHWTransform)
		WRITE(p, "layout (location = %d) in vec3 position;\n", PspAttributeLocation::POSITION);
	else
		// we pass the fog coord in w
		WRITE(p, "layout (location = %d) in vec4 position;\n", PspAttributeLocation::POSITION);

	if (useHWTransform && hasNormal)
		WRITE(p, "layout (location = %d) in vec3 normal;\n", PspAttributeLocation::NORMAL);

	bool texcoordInVec3 = false;
	if (doTexture && hasTexcoord) {
		if (!useHWTransform && doTextureTransform && !throughmode) {
			WRITE(p, "layout (location = %d) in vec3 texcoord;\n", PspAttributeLocation::TEXCOORD);
			texcoordInVec3 = true;
		}
		else
			WRITE(p, "layout (location = %d) in vec2 texcoord;\n", PspAttributeLocation::TEXCOORD);
	}
	if (hasColor) {
		WRITE(p, "layout (location = %d) in vec4 color0;\n", PspAttributeLocation::COLOR0);
		if (lmode && !useHWTransform)  // only software transform supplies color1 as vertex data
			WRITE(p, "layout (location = %d) in vec3 color1;\n", PspAttributeLocation::COLOR1);
	}

	WRITE(p, "layout (location = 1) %sout vec4 v_color0;\n", shading);
	if (lmode) {
		WRITE(p, "layout (location = 2) %sout vec3 v_color1;\n", shading);
	}

	if (doTexture) {
		WRITE(p, "layout (location = 0) out vec3 v_texcoord;\n");
	}

	if (enableFog) {
		// See the fragment shader generator
		WRITE(p, "layout (location = 3) out float v_fogdepth;\n");
	}

	// See comment above this function (GenerateVertexShader).
	if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
		// Apply the projection and viewport to get the Z buffer value, floor to integer, undo the viewport and projection.
		WRITE(p, "\nvec4 depthRoundZVP(vec4 v) {\n");
		WRITE(p, "  float z = v.z / v.w;\n");
		WRITE(p, "  z = z * base.depthRange.x + base.depthRange.y;\n");
		WRITE(p, "  z = floor(z);\n");
		WRITE(p, "  z = (z - base.depthRange.z) * base.depthRange.w;\n");
		WRITE(p, "  return vec4(v.x, v.y, z * v.w, v.w);\n");
		WRITE(p, "}\n\n");
	}
	WRITE(p, "out gl_PerVertex { vec4 gl_Position; };\n");

	if (doBezier || doSpline) {
		WRITE(p, "layout (binding = 5) uniform sampler2D u_tess_pos_tex;\n");
		WRITE(p, "layout (binding = 6) uniform sampler2D u_tess_tex_tex;\n");
		WRITE(p, "layout (binding = 7) uniform sampler2D u_tess_col_tex;\n");

		for (int i = 2; i <= 4; i++) {
			// Define 3 types vec2, vec3, vec4
			WRITE(p, "vec%d tess_sample(in vec%d points[16], in vec2 weights[4]) {\n", i, i);
			WRITE(p, "  vec%d pos = vec%d(0);\n", i, i);
			WRITE(p, "  for (int i = 0; i < 4; ++i) {\n");
			WRITE(p, "    for (int j = 0; j < 4; ++j) {\n");
			WRITE(p, "      float f = weights[j].x * weights[i].y;\n");
			WRITE(p, "      if (f != 0)\n");
			WRITE(p, "        pos = pos + f * points[i * 4 + j];\n");
			WRITE(p, "    }\n");
			WRITE(p, "  }\n");
			WRITE(p, "  return pos;\n");
			WRITE(p, "}\n");
		}
		if (doSpline) {
			WRITE(p, "void spline_knot(ivec2 num_patches, ivec2 type, out vec2 knot[6], ivec2 patch_pos) {\n");
			WRITE(p, "  for (int i = 0; i < 6; ++i) {\n");
			WRITE(p, "    knot[i] = vec2(i + patch_pos.x - 2, i + patch_pos.y - 2);\n");
			WRITE(p, "  }\n");
			WRITE(p, "  if ((type.x & 1) != 0) {\n");
			WRITE(p, "    if (patch_pos.x <= 2)\n");
			WRITE(p, "      knot[0].x = 0;\n");
			WRITE(p, "    if (patch_pos.x <= 1)\n");
			WRITE(p, "      knot[1].x = 0;\n");
			WRITE(p, "  }\n");
			WRITE(p, "  if ((type.x & 2) != 0) {\n");
			WRITE(p, "    if (patch_pos.x >= (num_patches.x - 2))\n");
			WRITE(p, "      knot[5].x = num_patches.x;\n");
			WRITE(p, "    if (patch_pos.x == (num_patches.x - 1))\n");
			WRITE(p, "      knot[4].x = num_patches.x;\n");
			WRITE(p, "  }\n");
			WRITE(p, "  if ((type.y & 1) != 0) {\n");
			WRITE(p, "    if (patch_pos.y <= 2)\n");
			WRITE(p, "      knot[0].y = 0;\n");
			WRITE(p, "    if (patch_pos.y <= 1)\n");
			WRITE(p, "      knot[1].y = 0;\n");
			WRITE(p, "  }\n");
			WRITE(p, "  if ((type.y & 2) != 0) {\n");
			WRITE(p, "    if (patch_pos.y >= (num_patches.y - 2))\n");
			WRITE(p, "      knot[5].y = num_patches.y;\n");
			WRITE(p, "    if (patch_pos.y == (num_patches.y - 1))\n");
			WRITE(p, "      knot[4].y = num_patches.y;\n");
			WRITE(p, "  }\n");
			WRITE(p, "}\n");

			WRITE(p, "void spline_weight(vec2 t, in vec2 knot[6], out vec2 weights[4]) {\n");
			// TODO: Maybe compilers could be coaxed into vectorizing this code without the above explicitly...
			WRITE(p, "  vec2 t0 = (t - knot[0]);\n");
			WRITE(p, "  vec2 t1 = (t - knot[1]);\n");
			WRITE(p, "  vec2 t2 = (t - knot[2]);\n");
			// TODO: All our knots are integers so we should be able to get rid of these divisions (How?)
			WRITE(p, "  vec2 f30 = t0 / (knot[3] - knot[0]);\n");
			WRITE(p, "  vec2 f41 = t1 / (knot[4] - knot[1]);\n");
			WRITE(p, "  vec2 f52 = t2 / (knot[5] - knot[2]);\n");
			WRITE(p, "  vec2 f31 = t1 / (knot[3] - knot[1]);\n");
			WRITE(p, "  vec2 f42 = t2 / (knot[4] - knot[2]);\n");
			WRITE(p, "  vec2 f32 = t2 / (knot[3] - knot[2]);\n");
			WRITE(p, "  vec2 a = (1 - f30)*(1 - f31);\n");
			WRITE(p, "  vec2 b = (f31*f41);\n");
			WRITE(p, "  vec2 c = (1 - f41)*(1 - f42);\n");
			WRITE(p, "  vec2 d = (f42*f52);\n");
			WRITE(p, "  weights[0] = a - (a*f32);\n");
			WRITE(p, "  weights[1] = 1 - a - b + ((a + b + c - 1)*f32);\n");
			WRITE(p, "  weights[2] = b + ((1 - b - c - d)*f32);\n");
			WRITE(p, "  weights[3] = d*f32;\n");
			WRITE(p, "}\n");
		}
	}

	WRITE(p, "void main() {\n");

	if (!useHWTransform) {
		// Simple pass-through of vertex data to fragment shader
		if (doTexture) {
			if (texcoordInVec3) {
				WRITE(p, "  v_texcoord = texcoord;\n");
			} else {
				WRITE(p, "  v_texcoord = vec3(texcoord, 1.0);\n");
			}
		}
		if (hasColor) {
			WRITE(p, "  v_color0 = color0;\n");
			if (lmode)
				WRITE(p, "  v_color1 = color1;\n");
		} else {
			WRITE(p, "  v_color0 = base.matambientalpha;\n");
			if (lmode)
				WRITE(p, "  v_color1 = vec3(0.0);\n");
		}
		if (enableFog) {
			WRITE(p, "  v_fogdepth = position.w;\n");
		}
		if (isModeThrough) {
			WRITE(p, "  gl_Position = base.proj_through_mtx * vec4(position.xyz, 1.0);\n");
		} else {
			// The viewport is used in this case, so need to compensate for that.
			if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
				WRITE(p, "  gl_Position = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n");
			} else {
				WRITE(p, "  gl_Position = base.proj_mtx * vec4(position.xyz, 1.0);\n");
			}
		}
	} else {
		// Step 1: World Transform / Skinning
		if (!enableBones) {
			if (doBezier || doSpline) {
				WRITE(p, "  vec3 _pos[16];\n");
				WRITE(p, "  vec2 _tex[16];\n");
				WRITE(p, "  vec4 _col[16];\n");
				WRITE(p, "  int num_patches_u = %s;\n", doBezier ? "(base.spline_count_u - 1) / 3" : "base.spline_count_u - 3");
				WRITE(p, "  int u = int(mod(gl_InstanceIndex, num_patches_u));\n");
				WRITE(p, "  int v = gl_InstanceIndex / num_patches_u;\n");
				WRITE(p, "  ivec2 patch_pos = ivec2(u, v);\n");
				WRITE(p, "  for (int i = 0; i < 4; i++) {\n");
				WRITE(p, "    for (int j = 0; j < 4; j++) {\n");
				WRITE(p, "      int idx = (i + v%s) * base.spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
				WRITE(p, "      ivec2 index = ivec2(idx, 0);\n");
				WRITE(p, "      _pos[i * 4 + j] = texelFetch(u_tess_pos_tex, index, 0).xyz;\n");
				if (doTexture && hasTexcoord && hasTexcoordTess)
					WRITE(p, "      _tex[i * 4 + j] = texelFetch(u_tess_tex_tex, index, 0).xy;\n");
				if (hasColor && hasColorTess)
					WRITE(p, "      _col[i * 4 + j] = texelFetch(u_tess_col_tex, index, 0).rgba;\n");
				WRITE(p, "    }\n");
				WRITE(p, "  }\n");
				WRITE(p, "  vec2 tess_pos = position.xy;\n");
				WRITE(p, "  vec2 weights[4];\n");
				if (doBezier) {
					// Bernstein 3D
					WRITE(p, "  weights[0] = (1 - tess_pos) * (1 - tess_pos) * (1 - tess_pos);\n");
					WRITE(p, "  weights[1] = 3 * tess_pos * (1 - tess_pos) * (1 - tess_pos);\n");
					WRITE(p, "  weights[2] = 3 * tess_pos * tess_pos * (1 - tess_pos);\n");
					WRITE(p, "  weights[3] = tess_pos * tess_pos * tess_pos;\n");
				} else { // Spline
					WRITE(p, "  ivec2 spline_num_patches = ivec2(base.spline_count_u - 3, base.spline_count_v - 3);\n");
					WRITE(p, "  ivec2 spline_type = ivec2(base.spline_type_u, base.spline_type_v);\n");
					WRITE(p, "  vec2 knots[6];\n");
					WRITE(p, "  spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
					WRITE(p, "  spline_weight(tess_pos + patch_pos, knots, weights);\n");
				}
				WRITE(p, "  vec3 pos = tess_sample(_pos, weights);\n");
				if (doTexture && hasTexcoord) {
					if (hasTexcoordTess)
						WRITE(p, "  vec2 tex = tess_sample(_tex, weights);\n");
					else
						WRITE(p, "  vec2 tex = tess_pos + patch_pos;\n");
				}
				if (hasColor) {
					if (hasColorTess)
						WRITE(p, "  vec4 col = tess_sample(_col, weights);\n");
					else
						WRITE(p, "  vec4 col = texelFetch(u_tess_col_tex, ivec2(0, 0), 0).rgba;\n");
				}
				if (hasNormal) {
					// Curved surface is probably always need to compute normal(not sampling from control points)
					if (doBezier) {
						// Bernstein derivative
						WRITE(p, "  vec2 bernderiv[4];\n");
						WRITE(p, "  bernderiv[0] = -3 * (tess_pos - 1) * (tess_pos - 1); \n");
						WRITE(p, "  bernderiv[1] = 9 * tess_pos * tess_pos - 12 * tess_pos + 3; \n");
						WRITE(p, "  bernderiv[2] = 3 * (2 - 3 * tess_pos) * tess_pos; \n");
						WRITE(p, "  bernderiv[3] = 3 * tess_pos * tess_pos; \n");

						WRITE(p, "  vec2 bernderiv_u[4];\n");
						WRITE(p, "  vec2 bernderiv_v[4];\n");
						WRITE(p, "  for (int i = 0; i < 4; i++) {\n");
						WRITE(p, "    bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n");
						WRITE(p, "    bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n");
						WRITE(p, "  }\n");

						WRITE(p, "  vec3 du = tess_sample(_pos, bernderiv_u);\n");
						WRITE(p, "  vec3 dv = tess_sample(_pos, bernderiv_v);\n");
					} else { // Spline
						WRITE(p, "  vec2 tess_next_u = vec2(normal.x, 0);\n");
						WRITE(p, "  vec2 tess_next_v = vec2(0, normal.y);\n");
						// Right
						WRITE(p, "  vec2 tess_pos_r = tess_pos + tess_next_u;\n");
						WRITE(p, "  spline_weight(tess_pos_r + patch_pos, knots, weights);\n");
						WRITE(p, "  vec3 pos_r = tess_sample(_pos, weights);\n");
						// Left
						WRITE(p, "  vec2 tess_pos_l = tess_pos - tess_next_u;\n");
						WRITE(p, "  spline_weight(tess_pos_l + patch_pos, knots, weights);\n");
						WRITE(p, "  vec3 pos_l = tess_sample(_pos, weights);\n");
						// Down
						WRITE(p, "  vec2 tess_pos_d = tess_pos + tess_next_v;\n");
						WRITE(p, "  spline_weight(tess_pos_d + patch_pos, knots, weights);\n");
						WRITE(p, "  vec3 pos_d = tess_sample(_pos, weights);\n");
						// Up
						WRITE(p, "  vec2 tess_pos_u = tess_pos - tess_next_v;\n");
						WRITE(p, "  spline_weight(tess_pos_u + patch_pos, knots, weights);\n");
						WRITE(p, "  vec3 pos_u = tess_sample(_pos, weights);\n");

						WRITE(p, "  vec3 du = pos_r - pos_l;\n");
						WRITE(p, "  vec3 dv = pos_d - pos_u;\n");
					}
					WRITE(p, "  vec3 nrm = cross(du, dv);\n");
					WRITE(p, "  nrm = normalize(nrm);\n");
				}
				WRITE(p, "  vec3 worldpos = vec4(pos.xyz, 1.0) * base.world_mtx;\n");
				if (hasNormal) {
					WRITE(p, "  mediump vec3 worldnormal = normalize(vec4(%snrm, 0.0) * base.world_mtx);\n", flipNormalTess ? "-" : "");
				} else {
					WRITE(p, "  mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
				}
			} else {
				// No skinning, just standard T&L.
				WRITE(p, "  vec3 worldpos = vec4(position.xyz, 1.0) * base.world_mtx;\n");
				if (hasNormal)
					WRITE(p, "  mediump vec3 worldnormal = normalize(vec4(%snormal, 0.0) * base.world_mtx);\n", flipNormal ? "-" : "");
				else
					WRITE(p, "  mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
			}
		} else {
			static const char *rescale[4] = { "", " * 1.9921875", " * 1.999969482421875", "" }; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
			const char *factor = rescale[boneWeightScale];

			static const char * const boneWeightAttr[8] = {
				"w1.x", "w1.y", "w1.z", "w1.w",
				"w2.x", "w2.y", "w2.z", "w2.w",
			};

			WRITE(p, "  mat3x4 skinMatrix = w1.x * bone.m[0];\n");
			if (numBoneWeights > 1) {
				for (int i = 1; i < numBoneWeights; i++) {
					WRITE(p, "    skinMatrix += %s * bone.m[%i];\n", boneWeightAttr[i], i);
				}
			}

			WRITE(p, ";\n");

			// Trying to simplify this results in bugs in LBP...
			WRITE(p, "  vec3 skinnedpos = (vec4(position, 1.0) * skinMatrix) %s;\n", factor);
			WRITE(p, "  vec3 worldpos = vec4(skinnedpos, 1.0) * base.world_mtx;\n");

			if (hasNormal) {
				WRITE(p, "  mediump vec3 skinnednormal = vec4(%snormal, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor);
			} else {
				WRITE(p, "  mediump vec3 skinnednormal = vec4(0.0, 0.0, %s1.0, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor);
			}
			WRITE(p, "  mediump vec3 worldnormal = normalize(vec4(skinnednormal, 0.0) * base.world_mtx);\n");
		}

		WRITE(p, "  vec4 viewPos = vec4(vec4(worldpos, 1.0) * base.view_mtx, 1.0);\n");

		// Final view and projection transforms.
		if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
			WRITE(p, "  gl_Position = depthRoundZVP(base.proj_mtx * viewPos);\n");
		} else {
			WRITE(p, "  gl_Position = base.proj_mtx * viewPos;\n");
		}

		// TODO: Declare variables for dots for shade mapping if needed.

		const char *ambientStr = ((matUpdate & 1) && hasColor) ? "color0" : "base.matambientalpha";
		const char *diffuseStr = ((matUpdate & 2) && hasColor) ? "color0.rgb" : "light.matdiffuse";
		const char *specularStr = ((matUpdate & 4) && hasColor) ? "color0.rgb" : "light.matspecular.rgb";
		if (doBezier || doSpline) {
			ambientStr = (matUpdate & 1) && hasColor ? "col" : "base.matambientalpha";
			diffuseStr = (matUpdate & 2) && hasColor ? "col.rgb" : "light.matdiffuse";
			specularStr = (matUpdate & 4) && hasColor ? "col.rgb" : "light.matspecular.rgb";
		}

		bool diffuseIsZero = true;
		bool specularIsZero = true;
		bool distanceNeeded = false;

		if (enableLighting) {
			WRITE(p, "  vec4 lightSum0 = light.u_ambient * %s + vec4(light.matemissive, 0.0);\n", ambientStr);

			for (int i = 0; i < 4; i++) {
				GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2));
				GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2));
				if (doLight[i] != LIGHT_FULL)
					continue;
				diffuseIsZero = false;
				if (comp != GE_LIGHTCOMP_ONLYDIFFUSE)
					specularIsZero = false;
				if (type != GE_LIGHTTYPE_DIRECTIONAL)
					distanceNeeded = true;
			}

			if (!specularIsZero) {
				WRITE(p, "  vec3 lightSum1 = vec3(0.0);\n");
			}
			if (!diffuseIsZero) {
				WRITE(p, "  vec3 toLight;\n");
				WRITE(p, "  vec3 diffuse;\n");
			}
			if (distanceNeeded) {
				WRITE(p, "  float distance;\n");
				WRITE(p, "  float lightScale;\n");
			}
		}

		// Calculate lights if needed. If shade mapping is enabled, lights may need to be
		// at least partially calculated.
		for (int i = 0; i < 4; i++) {
			if (doLight[i] != LIGHT_FULL)
				continue;

			GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2));
			GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2));

			if (type == GE_LIGHTTYPE_DIRECTIONAL) {
				// We prenormalize light positions for directional lights.
				WRITE(p, "  toLight = light.pos[%i];\n", i);
			} else {
				WRITE(p, "  toLight = light.pos[%i] - worldpos;\n", i);
				WRITE(p, "  distance = length(toLight);\n");
				WRITE(p, "  toLight /= distance;\n");
			}

			bool doSpecular = comp != GE_LIGHTCOMP_ONLYDIFFUSE;
			bool poweredDiffuse = comp == GE_LIGHTCOMP_BOTHWITHPOWDIFFUSE;

			WRITE(p, "  mediump float dot%i = max(dot(toLight, worldnormal), 0.0);\n", i);
			if (poweredDiffuse) {
				// pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0.
				// Seen in Tales of the World: Radiant Mythology (#2424.)
				WRITE(p, "  if (dot%i == 0.0 && light.matspecular.a == 0.0) {\n", i);
				WRITE(p, "    dot%i = 1.0;\n", i);
				WRITE(p, "  } else {\n");
				WRITE(p, "    dot%i = pow(dot%i, light.matspecular.a);\n", i, i);
				WRITE(p, "  }\n");
			}

			const char *timesLightScale = " * lightScale";

			// Attenuation
			switch (type) {
			case GE_LIGHTTYPE_DIRECTIONAL:
				timesLightScale = "";
				break;
			case GE_LIGHTTYPE_POINT:
				WRITE(p, "  lightScale = clamp(1.0 / dot(light.att[%i], vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i);
				break;
			case GE_LIGHTTYPE_SPOT:
			case GE_LIGHTTYPE_UNKNOWN:
				WRITE(p, "  float angle%i = dot(normalize(light.dir[%i]), toLight);\n", i, i);
				WRITE(p, "  if (angle%i >= light.angle[%i]) {\n", i, i);
				WRITE(p, "    lightScale = clamp(1.0 / dot(light.att[%i], vec3(1.0, distance, distance*distance)), 0.0, 1.0) * pow(angle%i, light.spotCoef[%i]);\n", i, i, i);
				WRITE(p, "  } else {\n");
				WRITE(p, "    lightScale = 0.0;\n");
				WRITE(p, "  }\n");
				break;
			default:
				// ILLEGAL
				break;
			}

			WRITE(p, "  diffuse = (light.diffuse[%i] * %s) * dot%i;\n", i, diffuseStr, i);
			if (doSpecular) {
				WRITE(p, "  dot%i = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n", i);
				WRITE(p, "  if (dot%i > 0.0)\n", i);
				WRITE(p, "    lightSum1 += light.specular[%i] * %s * (pow(dot%i, light.matspecular.a) %s);\n", i, specularStr, i, timesLightScale);
			}
			WRITE(p, "  lightSum0.rgb += (light.ambient[%i] * %s.rgb + diffuse)%s;\n", i, ambientStr, timesLightScale);
		}

		if (enableLighting) {
			// Sum up ambient, emissive here.
			if (lmode) {
				WRITE(p, "  v_color0 = clamp(lightSum0, 0.0, 1.0);\n");
				// v_color1 only exists when lmode = 1.
				if (specularIsZero) {
					WRITE(p, "  v_color1 = vec3(0.0);\n");
				} else {
					WRITE(p, "  v_color1 = clamp(lightSum1, 0.0, 1.0);\n");
				}
			} else {
				if (specularIsZero) {
					WRITE(p, "  v_color0 = clamp(lightSum0, 0.0, 1.0);\n");
				} else {
					WRITE(p, "  v_color0 = clamp(clamp(lightSum0, 0.0, 1.0) + vec4(lightSum1, 0.0), 0.0, 1.0);\n");
				}
			}
		} else {
			// Lighting doesn't affect color.
			if (hasColor) {
				if (doBezier || doSpline)
					WRITE(p, "  v_color0 = col;\n");
				else
					WRITE(p, "  v_color0 = color0;\n");
			} else {
				WRITE(p, "  v_color0 = base.matambientalpha;\n");
			}
			if (lmode) {
				WRITE(p, "  v_color1 = vec3(0.0);\n");
			}
		}

		bool scaleUV = !throughmode && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN);

		// Step 3: UV generation
		if (doTexture) {
			switch (uvGenMode) {
			case GE_TEXMAP_TEXTURE_COORDS:  // Scale-offset. Easy.
			case GE_TEXMAP_UNKNOWN: // Not sure what this is, but Riviera uses it.  Treating as coords works.
				if (scaleUV) {
					if (hasTexcoord) {
						if (doBezier || doSpline)
							WRITE(p, "  v_texcoord = vec3(tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
						else
							WRITE(p, "  v_texcoord = vec3(texcoord.xy, 0.0);\n");
					} else {
						WRITE(p, "  v_texcoord = vec3(0.0);\n");
					}
				} else {
					if (hasTexcoord) {
						if (doBezier || doSpline)
							WRITE(p, "  v_texcoord = vec3(tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
						else
							WRITE(p, "  v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
					} else {
						WRITE(p, "  v_texcoord = vec3(base.uvscaleoffset.zw, 0.0);\n");
					}
				}
				break;

			case GE_TEXMAP_TEXTURE_MATRIX:  // Projection mapping.
			{
				std::string temp_tc;
				switch (uvProjMode) {
				case GE_PROJMAP_POSITION:  // Use model space XYZ as source
					temp_tc = "vec4(position.xyz, 1.0)";
					break;
				case GE_PROJMAP_UV:  // Use unscaled UV as source
				{
					// scaleUV is false here.
					if (hasTexcoord) {
						temp_tc = "vec4(texcoord.xy, 0.0, 1.0)";
					} else {
						temp_tc = "vec4(0.0, 0.0, 0.0, 1.0)";
					}
				}
				break;
				case GE_PROJMAP_NORMALIZED_NORMAL:  // Use normalized transformed normal as source
					if (hasNormal)
						temp_tc = flipNormal ? "vec4(normalize(-normal), 1.0)" : "vec4(normalize(normal), 1.0)";
					else
						temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)";
					break;
				case GE_PROJMAP_NORMAL:  // Use non-normalized transformed normal as source
					if (hasNormal)
						temp_tc = flipNormal ? "vec4(-normal, 1.0)" : "vec4(normal, 1.0)";
					else
						temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)";
					break;
				}
				// Transform by texture matrix. XYZ as we are doing projection mapping.
				WRITE(p, "  v_texcoord = (%s * base.tex_mtx).xyz * vec3(base.uvscaleoffset.xy, 1.0);\n", temp_tc.c_str());
			}
			break;

			case GE_TEXMAP_ENVIRONMENT_MAP:  // Shade mapping - use dots from light sources.
				WRITE(p, "  v_texcoord = vec3(base.uvscaleoffset.xy * vec2(1.0 + dot(normalize(light.pos[%i]), worldnormal), 1.0 + dot(normalize(light.pos[%i]), worldnormal)) * 0.5, 1.0);\n", ls0, ls1);
				break;

			default:
				// ILLEGAL
				break;
			}
		}

		// Compute fogdepth
		if (enableFog)
			WRITE(p, "  v_fogdepth = (viewPos.z + base.fogcoef_stencilreplace.x) * base.fogcoef_stencilreplace.y;\n");
	}
	WRITE(p, "}\n");
	return true;
}
bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *usesLighting) {
	char *p = buffer;

	WRITE(p, "%s", vulkan_glsl_preamble);

	bool highpFog = false;
	bool highpTexcoord = false;

	bool isModeThrough = id.Bit(VS_BIT_IS_THROUGH);
	bool lmode = id.Bit(VS_BIT_LMODE) && !isModeThrough;  // TODO: Different expression than in shaderIDgen
	bool doTexture = id.Bit(VS_BIT_DO_TEXTURE);
	bool doTextureTransform = id.Bit(VS_BIT_DO_TEXTURE_TRANSFORM);

	GETexMapMode uvGenMode = static_cast<GETexMapMode>(id.Bits(VS_BIT_UVGEN_MODE, 2));

	// this is only valid for some settings of uvGenMode
	GETexProjMapMode uvProjMode = static_cast<GETexProjMapMode>(id.Bits(VS_BIT_UVPROJ_MODE, 2));
	bool doShadeMapping = uvGenMode == GE_TEXMAP_ENVIRONMENT_MAP;
	bool doFlatShading = id.Bit(VS_BIT_FLATSHADE);

	bool useHWTransform = id.Bit(VS_BIT_USE_HW_TRANSFORM);
	bool hasColor = id.Bit(VS_BIT_HAS_COLOR) || !useHWTransform;
	bool hasNormal = id.Bit(VS_BIT_HAS_NORMAL) && useHWTransform;
	bool hasTexcoord = id.Bit(VS_BIT_HAS_TEXCOORD) || !useHWTransform;
	bool enableFog = id.Bit(VS_BIT_ENABLE_FOG);
	bool throughmode = id.Bit(VS_BIT_IS_THROUGH);
	bool flipNormal = id.Bit(VS_BIT_NORM_REVERSE);
	int ls0 = id.Bits(VS_BIT_LS0, 2);
	int ls1 = id.Bits(VS_BIT_LS1, 2);
	bool enableBones = id.Bit(VS_BIT_ENABLE_BONES);
	bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE);
	int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3);

	// The uniforms are passed in as three "clumps" that may or may not be present.
	// We will memcpy the parts into place in a big buffer so we can be quite dynamic about what parts
	// are present and what parts aren't, but we will not be ultra detailed about it.
	*usesLighting = enableLighting || doShadeMapping;
	WRITE(p, "\n");
	WRITE(p, "layout (std140, set = 0, binding = 2) uniform baseVars {\n%s} base;\n", ub_baseStr);
	if (enableLighting || doShadeMapping)
		WRITE(p, "layout (std140, set = 0, binding = 3) uniform lightVars {\n%s} light;\n", ub_vs_lightsStr);
	if (enableBones)
		WRITE(p, "layout (std140, set = 0, binding = 4) uniform boneVars {\n%s} bone;\n", ub_vs_bonesStr);

	const char *shading = doFlatShading ? "flat " : "";

	DoLightComputation doLight[4] = { LIGHT_OFF, LIGHT_OFF, LIGHT_OFF, LIGHT_OFF };
	if (useHWTransform) {
		int shadeLight0 = doShadeMapping ? ls0 : -1;
		int shadeLight1 = doShadeMapping ? ls1 : -1;
		for (int i = 0; i < 4; i++) {
			if (i == shadeLight0 || i == shadeLight1)
				doLight[i] = LIGHT_SHADE;
			if (id.Bit(VS_BIT_LIGHTING_ENABLE) && id.Bit(VS_BIT_LIGHT0_ENABLE + i))
				doLight[i] = LIGHT_FULL;
		}
	}

	int numBoneWeights = 0;
	int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2);
	if (enableBones) {
		numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3);
		WRITE(p, "%s", boneWeightDecl[numBoneWeights]);
	}

	if (useHWTransform)
		WRITE(p, "layout (location = %d) in vec3 position;\n", PspAttributeLocation::POSITION);
	else
		// we pass the fog coord in w
		WRITE(p, "layout (location = %d) in vec4 position;\n", PspAttributeLocation::POSITION);

	if (useHWTransform && hasNormal)
		WRITE(p, "layout (location = %d) in vec3 normal;\n", PspAttributeLocation::NORMAL);

	bool texcoordInVec3 = false;
	if (doTexture && hasTexcoord) {
		if (!useHWTransform && doTextureTransform && !throughmode) {
			WRITE(p, "layout (location = %d) in vec3 texcoord;\n", PspAttributeLocation::TEXCOORD);
			texcoordInVec3 = true;
		}
		else
			WRITE(p, "layout (location = %d) in vec2 texcoord;\n", PspAttributeLocation::TEXCOORD);
	}
	if (hasColor) {
		WRITE(p, "layout (location = %d) in vec4 color0;\n", PspAttributeLocation::COLOR0);
		if (lmode && !useHWTransform)  // only software transform supplies color1 as vertex data
			WRITE(p, "layout (location = %d) in vec3 color1;\n", PspAttributeLocation::COLOR1);
	}

	WRITE(p, "layout (location = 1) %sout vec4 v_color0;\n", shading);
	if (lmode) {
		WRITE(p, "layout (location = 2) %sout vec3 v_color1;\n", shading);
	}

	if (doTexture) {
		WRITE(p, "layout (location = 0) out vec3 v_texcoord;\n");
	}

	if (enableFog) {
		// See the fragment shader generator
		WRITE(p, "layout (location = 3) out float v_fogdepth;\n");
	}

	// See comment above this function (GenerateVertexShader).
	if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
		// Apply the projection and viewport to get the Z buffer value, floor to integer, undo the viewport and projection.
		WRITE(p, "\nvec4 depthRoundZVP(vec4 v) {\n");
		WRITE(p, "  float z = v.z / v.w;\n");
		WRITE(p, "  z = z * base.depthRange.x + base.depthRange.y;\n");
		WRITE(p, "  z = floor(z);\n");
		WRITE(p, "  z = (z - base.depthRange.z) * base.depthRange.w;\n");
		WRITE(p, "  return vec4(v.x, v.y, z * v.w, v.w);\n");
		WRITE(p, "}\n\n");
	}
	WRITE(p, "out gl_PerVertex { vec4 gl_Position; };\n");
	WRITE(p, "void main() {\n");

	if (!useHWTransform) {
		// Simple pass-through of vertex data to fragment shader
		if (doTexture) {
			if (texcoordInVec3) {
				WRITE(p, "  v_texcoord = texcoord;\n");
			} else {
				WRITE(p, "  v_texcoord = vec3(texcoord, 1.0);\n");
			}
		}
		if (hasColor) {
			WRITE(p, "  v_color0 = color0;\n");
			if (lmode)
				WRITE(p, "  v_color1 = color1;\n");
		} else {
			WRITE(p, "  v_color0 = base.matambientalpha;\n");
			if (lmode)
				WRITE(p, "  v_color1 = vec3(0.0);\n");
		}
		if (enableFog) {
			WRITE(p, "  v_fogdepth = position.w;\n");
		}
		if (isModeThrough) {
			WRITE(p, "  gl_Position = base.proj_through_mtx * vec4(position.xyz, 1.0);\n");
		} else {
			// The viewport is used in this case, so need to compensate for that.
			if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
				WRITE(p, "  gl_Position = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n");
			} else {
				WRITE(p, "  gl_Position = base.proj_mtx * vec4(position.xyz, 1.0);\n");
			}
		}
	} else {
		// Step 1: World Transform / Skinning
		if (!enableBones) {
			// No skinning, just standard T&L.
			WRITE(p, "  vec3 worldpos = vec4(position.xyz, 1.0) * base.world_mtx;\n");
			if (hasNormal)
				WRITE(p, "  mediump vec3 worldnormal = normalize(vec4(%snormal, 0.0) * base.world_mtx);\n", flipNormal ? "-" : "");
			else
				WRITE(p, "  mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
		} else {
			static const char *rescale[4] = { "", " * 1.9921875", " * 1.999969482421875", "" }; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
			const char *factor = rescale[boneWeightScale];

			static const char * const boneWeightAttr[8] = {
				"w1.x", "w1.y", "w1.z", "w1.w",
				"w2.x", "w2.y", "w2.z", "w2.w",
			};

			WRITE(p, "  mat3x4 skinMatrix = w1.x * bone.m[0];\n");
			if (numBoneWeights > 1) {
				for (int i = 1; i < numBoneWeights; i++) {
					WRITE(p, "    skinMatrix += %s * bone.m[%i];\n", boneWeightAttr[i], i);
				}
			}

			WRITE(p, ";\n");

			// Trying to simplify this results in bugs in LBP...
			WRITE(p, "  vec3 skinnedpos = (vec4(position, 1.0) * skinMatrix) %s;\n", factor);
			WRITE(p, "  vec3 worldpos = vec4(skinnedpos, 1.0) * base.world_mtx;\n");

			if (hasNormal) {
				WRITE(p, "  mediump vec3 skinnednormal = vec4(%snormal, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor);
			} else {
				WRITE(p, "  mediump vec3 skinnednormal = vec4(0.0, 0.0, %s1.0, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor);
			}
			WRITE(p, "  mediump vec3 worldnormal = normalize(vec4(skinnednormal, 0.0) * base.world_mtx);\n");
		}

		WRITE(p, "  vec4 viewPos = vec4(vec4(worldpos, 1.0) * base.view_mtx, 1.0);\n");

		// Final view and projection transforms.
		if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
			WRITE(p, "  gl_Position = depthRoundZVP(base.proj_mtx * viewPos);\n");
		} else {
			WRITE(p, "  gl_Position = base.proj_mtx * viewPos;\n");
		}

		// TODO: Declare variables for dots for shade mapping if needed.

		const char *ambientStr = ((matUpdate & 1) && hasColor) ? "color0" : "base.matambientalpha";
		const char *diffuseStr = ((matUpdate & 2) && hasColor) ? "color0.rgb" : "light.matdiffuse";
		const char *specularStr = ((matUpdate & 4) && hasColor) ? "color0.rgb" : "light.matspecular.rgb";

		bool diffuseIsZero = true;
		bool specularIsZero = true;
		bool distanceNeeded = false;

		if (enableLighting) {
			WRITE(p, "  vec4 lightSum0 = light.u_ambient * %s + vec4(light.matemissive, 0.0);\n", ambientStr);

			for (int i = 0; i < 4; i++) {
				GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2));
				GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2));
				if (doLight[i] != LIGHT_FULL)
					continue;
				diffuseIsZero = false;
				if (comp != GE_LIGHTCOMP_ONLYDIFFUSE)
					specularIsZero = false;
				if (type != GE_LIGHTTYPE_DIRECTIONAL)
					distanceNeeded = true;
			}

			if (!specularIsZero) {
				WRITE(p, "  vec3 lightSum1 = vec3(0.0);\n");
			}
			if (!diffuseIsZero) {
				WRITE(p, "  vec3 toLight;\n");
				WRITE(p, "  vec3 diffuse;\n");
			}
			if (distanceNeeded) {
				WRITE(p, "  float distance;\n");
				WRITE(p, "  float lightScale;\n");
			}
		}

		// Calculate lights if needed. If shade mapping is enabled, lights may need to be
		// at least partially calculated.
		for (int i = 0; i < 4; i++) {
			if (doLight[i] != LIGHT_FULL)
				continue;

			GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2));
			GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2));

			if (type == GE_LIGHTTYPE_DIRECTIONAL) {
				// We prenormalize light positions for directional lights.
				WRITE(p, "  toLight = light.pos[%i];\n", i);
			} else {
				WRITE(p, "  toLight = light.pos[%i] - worldpos;\n", i);
				WRITE(p, "  distance = length(toLight);\n");
				WRITE(p, "  toLight /= distance;\n");
			}

			bool doSpecular = comp != GE_LIGHTCOMP_ONLYDIFFUSE;
			bool poweredDiffuse = comp == GE_LIGHTCOMP_BOTHWITHPOWDIFFUSE;

			WRITE(p, "  mediump float dot%i = max(dot(toLight, worldnormal), 0.0);\n", i);
			if (poweredDiffuse) {
				// pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0.
				// Seen in Tales of the World: Radiant Mythology (#2424.)
				WRITE(p, "  if (dot%i == 0.0 && light.matspecular.a == 0.0) {\n", i);
				WRITE(p, "    dot%i = 1.0;\n", i);
				WRITE(p, "  } else {\n");
				WRITE(p, "    dot%i = pow(dot%i, light.matspecular.a);\n", i, i);
				WRITE(p, "  }\n");
			}

			const char *timesLightScale = " * lightScale";

			// Attenuation
			switch (type) {
			case GE_LIGHTTYPE_DIRECTIONAL:
				timesLightScale = "";
				break;
			case GE_LIGHTTYPE_POINT:
				WRITE(p, "  lightScale = clamp(1.0 / dot(light.att[%i], vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i);
				break;
			case GE_LIGHTTYPE_SPOT:
			case GE_LIGHTTYPE_UNKNOWN:
				WRITE(p, "  float angle%i = dot(normalize(light.dir[%i]), toLight);\n", i, i);
				WRITE(p, "  if (angle%i >= light.angle[%i]) {\n", i, i);
				WRITE(p, "    lightScale = clamp(1.0 / dot(light.att[%i], vec3(1.0, distance, distance*distance)), 0.0, 1.0) * pow(angle%i, light.spotCoef[%i]);\n", i, i, i);
				WRITE(p, "  } else {\n");
				WRITE(p, "    lightScale = 0.0;\n");
				WRITE(p, "  }\n");
				break;
			default:
				// ILLEGAL
				break;
			}

			WRITE(p, "  diffuse = (light.diffuse[%i] * %s) * dot%i;\n", i, diffuseStr, i);
			if (doSpecular) {
				WRITE(p, "  dot%i = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n", i);
				WRITE(p, "  if (dot%i > 0.0)\n", i);
				WRITE(p, "    lightSum1 += light.specular[%i] * %s * (pow(dot%i, light.matspecular.a) %s);\n", i, specularStr, i, timesLightScale);
			}
			WRITE(p, "  lightSum0.rgb += (light.ambient[%i] * %s.rgb + diffuse)%s;\n", i, ambientStr, timesLightScale);
		}

		if (enableLighting) {
			// Sum up ambient, emissive here.
			if (lmode) {
				WRITE(p, "  v_color0 = clamp(lightSum0, 0.0, 1.0);\n");
				// v_color1 only exists when lmode = 1.
				if (specularIsZero) {
					WRITE(p, "  v_color1 = vec3(0.0);\n");
				} else {
					WRITE(p, "  v_color1 = clamp(lightSum1, 0.0, 1.0);\n");
				}
			} else {
				if (specularIsZero) {
					WRITE(p, "  v_color0 = clamp(lightSum0, 0.0, 1.0);\n");
				} else {
					WRITE(p, "  v_color0 = clamp(clamp(lightSum0, 0.0, 1.0) + vec4(lightSum1, 0.0), 0.0, 1.0);\n");
				}
			}
		} else {
			// Lighting doesn't affect color.
			if (hasColor) {
				WRITE(p, "  v_color0 = color0;\n");
			} else {
				WRITE(p, "  v_color0 = base.matambientalpha;\n");
			}
			if (lmode) {
				WRITE(p, "  v_color1 = vec3(0.0);\n");
			}
		}

		bool scaleUV = !throughmode && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN);

		// Step 3: UV generation
		if (doTexture) {
			switch (uvGenMode) {
			case GE_TEXMAP_TEXTURE_COORDS:  // Scale-offset. Easy.
			case GE_TEXMAP_UNKNOWN: // Not sure what this is, but Riviera uses it.  Treating as coords works.
				if (scaleUV) {
					if (hasTexcoord) {
						WRITE(p, "  v_texcoord = vec3(texcoord.xy, 0.0);\n");
					} else {
						WRITE(p, "  v_texcoord = vec3(0.0);\n");
					}
				} else {
					if (hasTexcoord) {
						WRITE(p, "  v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
					} else {
						WRITE(p, "  v_texcoord = vec3(base.uvscaleoffset.zw, 0.0);\n");
					}
				}
				break;

			case GE_TEXMAP_TEXTURE_MATRIX:  // Projection mapping.
			{
				std::string temp_tc;
				switch (uvProjMode) {
				case GE_PROJMAP_POSITION:  // Use model space XYZ as source
					temp_tc = "vec4(position.xyz, 1.0)";
					break;
				case GE_PROJMAP_UV:  // Use unscaled UV as source
				{
					// scaleUV is false here.
					if (hasTexcoord) {
						temp_tc = "vec4(texcoord.xy, 0.0, 1.0)";
					} else {
						temp_tc = "vec4(0.0, 0.0, 0.0, 1.0)";
					}
				}
				break;
				case GE_PROJMAP_NORMALIZED_NORMAL:  // Use normalized transformed normal as source
					if (hasNormal)
						temp_tc = flipNormal ? "vec4(normalize(-normal), 1.0)" : "vec4(normalize(normal), 1.0)";
					else
						temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)";
					break;
				case GE_PROJMAP_NORMAL:  // Use non-normalized transformed normal as source
					if (hasNormal)
						temp_tc = flipNormal ? "vec4(-normal, 1.0)" : "vec4(normal, 1.0)";
					else
						temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)";
					break;
				}
				// Transform by texture matrix. XYZ as we are doing projection mapping.
				WRITE(p, "  v_texcoord = (%s * base.tex_mtx).xyz * vec3(base.uvscaleoffset.xy, 1.0);\n", temp_tc.c_str());
			}
			break;

			case GE_TEXMAP_ENVIRONMENT_MAP:  // Shade mapping - use dots from light sources.
				WRITE(p, "  v_texcoord = vec3(base.uvscaleoffset.xy * vec2(1.0 + dot(normalize(light.pos[%i]), worldnormal), 1.0 + dot(normalize(light.pos[%i]), worldnormal)) * 0.5, 1.0);\n", ls0, ls1);
				break;

			default:
				// ILLEGAL
				break;
			}
		}

		// Compute fogdepth
		if (enableFog)
			WRITE(p, "  v_fogdepth = (viewPos.z + base.fogcoef_stencilreplace.x) * base.fogcoef_stencilreplace.y;\n");
	}
	WRITE(p, "}\n");
	return true;
}
// Missing: Z depth range
bool GenerateFragmentShader(const ShaderID &id, char *buffer) {
    char *p = buffer;

    // In GLSL ES 3.0, you use "in" variables instead of varying.

    bool glslES30 = false;
    const char *varying = "varying";
    const char *fragColor0 = "gl_FragColor";
    const char *fragColor1 = "fragColor1";
    const char *texture = "texture2D";
    const char *texelFetch = NULL;
    bool highpFog = false;
    bool highpTexcoord = false;
    bool bitwiseOps = false;
    const char *lastFragData = nullptr;

    if (gl_extensions.IsGLES) {
        // ES doesn't support dual source alpha :(
        if (gstate_c.featureFlags & GPU_SUPPORTS_GLSL_ES_300) {
            WRITE(p, "#version 300 es\n");  // GLSL ES 3.0
            fragColor0 = "fragColor0";
            texture = "texture";
            glslES30 = true;
            bitwiseOps = true;
            texelFetch = "texelFetch";
        } else {
            WRITE(p, "#version 100\n");  // GLSL ES 1.0
            if (gl_extensions.EXT_gpu_shader4) {
                WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
                bitwiseOps = true;
                texelFetch = "texelFetch2D";
            }
            if (gl_extensions.EXT_blend_func_extended) {
                // Oldy moldy GLES, so use the fixed output name.
                fragColor1 = "gl_SecondaryFragColorEXT";
            }
        }

        // PowerVR needs highp to do the fog in MHU correctly.
        // Others don't, and some can't handle highp in the fragment shader.
        highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false;
        highpTexcoord = highpFog;

        if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) {
            if (gl_extensions.GLES3 && gl_extensions.EXT_shader_framebuffer_fetch) {
                WRITE(p, "#extension GL_EXT_shader_framebuffer_fetch : require\n");
                lastFragData = "fragColor0";
            } else if (gl_extensions.EXT_shader_framebuffer_fetch) {
                WRITE(p, "#extension GL_EXT_shader_framebuffer_fetch : require\n");
                lastFragData = "gl_LastFragData[0]";
            } else if (gl_extensions.NV_shader_framebuffer_fetch) {
                // GL_NV_shader_framebuffer_fetch is available on mobile platform and ES 2.0 only but not on desktop.
                WRITE(p, "#extension GL_NV_shader_framebuffer_fetch : require\n");
                lastFragData = "gl_LastFragData[0]";
            } else if (gl_extensions.ARM_shader_framebuffer_fetch) {
                WRITE(p, "#extension GL_ARM_shader_framebuffer_fetch : require\n");
                lastFragData = "gl_LastFragColorARM";
            }
        }

        WRITE(p, "precision lowp float;\n");
    } else {
        // TODO: Handle this in VersionGEThan?
#if !defined(FORCE_OPENGL_2_0)
        if (gl_extensions.VersionGEThan(3, 3, 0)) {
            fragColor0 = "fragColor0";
            texture = "texture";
            glslES30 = true;
            bitwiseOps = true;
            texelFetch = "texelFetch";
            WRITE(p, "#version 330\n");
        } else if (gl_extensions.VersionGEThan(3, 0, 0)) {
            fragColor0 = "fragColor0";
            bitwiseOps = true;
            texelFetch = "texelFetch";
            WRITE(p, "#version 130\n");
            if (gl_extensions.EXT_gpu_shader4) {
                WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
            }
        } else {
            WRITE(p, "#version 110\n");
            if (gl_extensions.EXT_gpu_shader4) {
                WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
                bitwiseOps = true;
                texelFetch = "texelFetch2D";
            }
        }
#endif

        // We remove these everywhere - GL4, GL3, Mac-forced-GL2, etc.
        WRITE(p, "#define lowp\n");
        WRITE(p, "#define mediump\n");
        WRITE(p, "#define highp\n");
    }

    if (glslES30) {
        varying = "in";
    }

    bool lmode = id.Bit(FS_BIT_LMODE);
    bool doTexture = id.Bit(FS_BIT_DO_TEXTURE);
    bool enableFog = id.Bit(FS_BIT_ENABLE_FOG);
    bool enableAlphaTest = id.Bit(FS_BIT_ALPHA_TEST);

    bool alphaTestAgainstZero = id.Bit(FS_BIT_ALPHA_AGAINST_ZERO);
    bool enableColorTest = id.Bit(FS_BIT_COLOR_TEST);
    bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO);
    bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE);
    bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ);
    bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA);
    bool doFlatShading = id.Bit(FS_BIT_FLATSHADE);

    GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3);
    GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2);
    bool needShaderTexClamp = id.Bit(FS_BIT_SHADER_TEX_CLAMP);

    GETexFunc texFunc = (GETexFunc)id.Bits(FS_BIT_TEXFUNC, 3);
    bool textureAtOffset = id.Bit(FS_BIT_TEXTURE_AT_OFFSET);

    ReplaceBlendType replaceBlend = static_cast<ReplaceBlendType>(id.Bits(FS_BIT_REPLACE_BLEND, 3));
    ReplaceAlphaType stencilToAlpha = static_cast<ReplaceAlphaType>(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2));

    GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
    GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
    GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3);

    bool isModeClear = id.Bit(FS_BIT_CLEARMODE);

    const char *shading = "";
    if (glslES30)
        shading = doFlatShading ? "flat" : "";

    if (doTexture)
        WRITE(p, "uniform sampler2D tex;\n");

    if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) {
        if (!gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) && replaceBlend == REPLACE_BLEND_COPY_FBO) {
            if (!texelFetch) {
                WRITE(p, "uniform vec2 u_fbotexSize;\n");
            }
            WRITE(p, "uniform sampler2D fbotex;\n");
        }
        if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) {
            WRITE(p, "uniform vec3 u_blendFixA;\n");
        }
        if (replaceBlendFuncB >= GE_DSTBLEND_FIXB) {
            WRITE(p, "uniform vec3 u_blendFixB;\n");
        }
    }

    if (needShaderTexClamp && doTexture) {
        WRITE(p, "uniform vec4 u_texclamp;\n");
        if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) {
            WRITE(p, "uniform vec2 u_texclampoff;\n");
        }
    }

    if (enableAlphaTest || enableColorTest) {
        if (g_Config.bFragmentTestCache) {
            WRITE(p, "uniform sampler2D testtex;\n");
        } else {
            WRITE(p, "uniform vec4 u_alphacolorref;\n");
            if (bitwiseOps && ((enableColorTest && !colorTestAgainstZero) || (enableAlphaTest && !alphaTestAgainstZero))) {
                WRITE(p, "uniform ivec4 u_alphacolormask;\n");
            }
        }
    }

    StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);
    if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) {
        WRITE(p, "uniform float u_stencilReplaceValue;\n");
    }
    if (doTexture && texFunc == GE_TEXFUNC_BLEND)
        WRITE(p, "uniform vec3 u_texenv;\n");

    WRITE(p, "%s %s vec4 v_color0;\n", shading, varying);
    if (lmode)
        WRITE(p, "%s %s vec3 v_color1;\n", shading, varying);
    if (enableFog) {
        WRITE(p, "uniform vec3 u_fogcolor;\n");
        WRITE(p, "%s %s float v_fogdepth;\n", varying, highpFog ? "highp" : "mediump");
    }
    if (doTexture) {
        if (doTextureProjection)
            WRITE(p, "%s %s vec3 v_texcoord;\n", varying, highpTexcoord ? "highp" : "mediump");
        else
            WRITE(p, "%s %s vec2 v_texcoord;\n", varying, highpTexcoord ? "highp" : "mediump");
    }

    if (!g_Config.bFragmentTestCache) {
        if (enableAlphaTest && !alphaTestAgainstZero) {
            if (bitwiseOps) {
                WRITE(p, "int roundAndScaleTo255i(in float x) { return int(floor(x * 255.0 + 0.5)); }\n");
            } else if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR) {
                WRITE(p, "float roundTo255thf(in mediump float x) { mediump float y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n");
            } else {
                WRITE(p, "float roundAndScaleTo255f(in float x) { return floor(x * 255.0 + 0.5); }\n");
            }
        }
        if (enableColorTest && !colorTestAgainstZero) {
            if (bitwiseOps) {
                WRITE(p, "ivec3 roundAndScaleTo255iv(in vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n");
            } else if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR) {
                WRITE(p, "vec3 roundTo255thv(in vec3 x) { vec3 y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n");
            } else {
                WRITE(p, "vec3 roundAndScaleTo255v(in vec3 x) { return floor(x * 255.0 + 0.5); }\n");
            }
        }
    }

    if (!strcmp(fragColor0, "fragColor0")) {
        const char *qualifierColor0 = "out";
        if (lastFragData && !strcmp(lastFragData, fragColor0)) {
            qualifierColor0 = "inout";
        }
        // Output the output color definitions.
        if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) {
            WRITE(p, "%s vec4 fragColor0;\n", qualifierColor0);
            WRITE(p, "out vec4 fragColor1;\n");
        } else {
            WRITE(p, "%s vec4 fragColor0;\n", qualifierColor0);
        }
    }

    // PowerVR needs a custom modulo function. For some reason, this has far higher precision than the builtin one.
    if ((gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) && needShaderTexClamp) {
        WRITE(p, "float mymod(float a, float b) { return a - b * floor(a / b); }\n");
    }

    WRITE(p, "void main() {\n");

    if (isModeClear) {
        // Clear mode does not allow any fancy shading.
        WRITE(p, "  vec4 v = v_color0;\n");
    } else {
        const char *secondary = "";
        // Secondary color for specular on top of texture
        if (lmode) {
            WRITE(p, "  vec4 s = vec4(v_color1, 0.0);\n");
            secondary = " + s";
        } else {
            secondary = "";
        }

        if (doTexture) {
            const char *texcoord = "v_texcoord";
            // TODO: Not sure the right way to do this for projection.
            // This path destroys resolution on older PowerVR no matter what I do, so we disable it on SGX 540 and lesser, and live with the consequences.
            if (needShaderTexClamp && !(gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_TERRIBLE)) {
                // We may be clamping inside a larger surface (tex = 64x64, buffer=480x272).
                // We may also be wrapping in such a surface, or either one in a too-small surface.
                // Obviously, clamping to a smaller surface won't work.  But better to clamp to something.
                std::string ucoord = "v_texcoord.x";
                std::string vcoord = "v_texcoord.y";
                if (doTextureProjection) {
                    ucoord = "(v_texcoord.x / v_texcoord.z)";
                    vcoord = "(v_texcoord.y / v_texcoord.z)";
                }

                std::string modulo = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? "mymod" : "mod";

                if (id.Bit(FS_BIT_CLAMP_S)) {
                    ucoord = "clamp(" + ucoord + ", u_texclamp.z, u_texclamp.x - u_texclamp.z)";
                } else {
                    ucoord = modulo + "(" + ucoord + ", u_texclamp.x)";
                }
                if (id.Bit(FS_BIT_CLAMP_T)) {
                    vcoord = "clamp(" + vcoord + ", u_texclamp.w, u_texclamp.y - u_texclamp.w)";
                } else {
                    vcoord = modulo + "(" + vcoord + ", u_texclamp.y)";
                }
                if (textureAtOffset) {
                    ucoord = "(" + ucoord + " + u_texclampoff.x)";
                    vcoord = "(" + vcoord + " + u_texclampoff.y)";
                }

                WRITE(p, "  vec2 fixedcoord = vec2(%s, %s);\n", ucoord.c_str(), vcoord.c_str());
                texcoord = "fixedcoord";
                // We already projected it.
                doTextureProjection = false;
            }

            if (doTextureProjection) {
                WRITE(p, "  vec4 t = %sProj(tex, %s);\n", texture, texcoord);
            } else {
                WRITE(p, "  vec4 t = %s(tex, %s);\n", texture, texcoord);
            }
            WRITE(p, "  vec4 p = v_color0;\n");

            if (doTextureAlpha) { // texfmt == RGBA
                switch (texFunc) {
                case GE_TEXFUNC_MODULATE:
                    WRITE(p, "  vec4 v = p * t%s;\n", secondary);
                    break;

                case GE_TEXFUNC_DECAL:
                    WRITE(p, "  vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary);
                    break;

                case GE_TEXFUNC_BLEND:
                    WRITE(p, "  vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary);
                    break;

                case GE_TEXFUNC_REPLACE:
                    WRITE(p, "  vec4 v = t%s;\n", secondary);
                    break;

                case GE_TEXFUNC_ADD:
                case GE_TEXFUNC_UNKNOWN1:
                case GE_TEXFUNC_UNKNOWN2:
                case GE_TEXFUNC_UNKNOWN3:
                    WRITE(p, "  vec4 v = vec4(p.rgb + t.rgb, p.a * t.a)%s;\n", secondary);
                    break;
                default:
                    WRITE(p, "  vec4 v = p;\n");
                    break;
                }
            } else { // texfmt == RGB
                switch (texFunc) {
                case GE_TEXFUNC_MODULATE:
                    WRITE(p, "  vec4 v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary);
                    break;

                case GE_TEXFUNC_DECAL:
                    WRITE(p, "  vec4 v = vec4(t.rgb, p.a)%s;\n", secondary);
                    break;

                case GE_TEXFUNC_BLEND:
                    WRITE(p, "  vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a)%s;\n", secondary);
                    break;

                case GE_TEXFUNC_REPLACE:
                    WRITE(p, "  vec4 v = vec4(t.rgb, p.a)%s;\n", secondary);
                    break;

                case GE_TEXFUNC_ADD:
                case GE_TEXFUNC_UNKNOWN1:
                case GE_TEXFUNC_UNKNOWN2:
                case GE_TEXFUNC_UNKNOWN3:
                    WRITE(p, "  vec4 v = vec4(p.rgb + t.rgb, p.a)%s;\n", secondary);
                    break;
                default:
                    WRITE(p, "  vec4 v = p;\n");
                    break;
                }
            }
        } else {
            // No texture mapping
            WRITE(p, "  vec4 v = v_color0 %s;\n", secondary);
        }

        // Texture access is at half texels [0.5/256, 255.5/256], but colors are normalized [0, 255].
        // So we have to scale to account for the difference.
        std::string alphaTestXCoord = "0";
        if (g_Config.bFragmentTestCache) {
            if (enableColorTest && !colorTestAgainstZero) {
                WRITE(p, "  vec4 vScale256 = v * %f + %f;\n", 255.0 / 256.0, 0.5 / 256.0);
                alphaTestXCoord = "vScale256.a";
            } else if (enableAlphaTest && !alphaTestAgainstZero) {
                char temp[64];
                snprintf(temp, sizeof(temp), "v.a * %f + %f", 255.0 / 256.0, 0.5 / 256.0);
                alphaTestXCoord = temp;
            }
        }

        if (enableAlphaTest) {
            if (alphaTestAgainstZero) {
                // When testing against 0 (extremely common), we can avoid some math.
                // 0.002 is approximately half of 1.0 / 255.0.
                if (alphaTestFunc == GE_COMP_NOTEQUAL || alphaTestFunc == GE_COMP_GREATER) {
                    WRITE(p, "  if (v.a < 0.002) discard;\n");
                } else if (alphaTestFunc != GE_COMP_NEVER) {
                    // Anything else is a test for == 0.  Happens sometimes, actually...
                    WRITE(p, "  if (v.a > 0.002) discard;\n");
                } else {
                    // NEVER has been logged as used by games, although it makes little sense - statically failing.
                    // Maybe we could discard the drawcall, but it's pretty rare.  Let's just statically discard here.
                    WRITE(p, "  discard;\n");
                }
            } else if (g_Config.bFragmentTestCache) {
                WRITE(p, "  float aResult = %s(testtex, vec2(%s, 0)).a;\n", texture, alphaTestXCoord.c_str());
                WRITE(p, "  if (aResult < 0.5) discard;\n");
            } else {
                const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " };
                if (alphaTestFuncs[alphaTestFunc][0] != '#') {
                    if (bitwiseOps) {
                        WRITE(p, "  if ((roundAndScaleTo255i(v.a) & u_alphacolormask.a) %s int(u_alphacolorref.a)) discard;\n", alphaTestFuncs[alphaTestFunc]);
                    } else if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR) {
                        // Work around bad PVR driver problem where equality check + discard just doesn't work.
                        if (alphaTestFunc != GE_COMP_NOTEQUAL) {
                            WRITE(p, "  if (roundTo255thf(v.a) %s u_alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]);
                        }
                    } else {
                        WRITE(p, "  if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]);
                    }
                } else {
                    // This means NEVER.  See above.
                    WRITE(p, "  discard;\n");
                }
            }
        }

        if (enableColorTest) {
            if (colorTestAgainstZero) {
                // When testing against 0 (common), we can avoid some math.
                // 0.002 is approximately half of 1.0 / 255.0.
                if (colorTestFunc == GE_COMP_NOTEQUAL) {
                    WRITE(p, "  if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) discard;\n");
                } else if (colorTestFunc != GE_COMP_NEVER) {
                    // Anything else is a test for == 0.
                    WRITE(p, "  if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) discard;\n");
                } else {
                    // NEVER has been logged as used by games, although it makes little sense - statically failing.
                    // Maybe we could discard the drawcall, but it's pretty rare.  Let's just statically discard here.
                    WRITE(p, "  discard;\n");
                }
            } else if (g_Config.bFragmentTestCache) {
                WRITE(p, "  float rResult = %s(testtex, vec2(vScale256.r, 0)).r;\n", texture);
                WRITE(p, "  float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture);
                WRITE(p, "  float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture);
                if (colorTestFunc == GE_COMP_EQUAL) {
                    // Equal means all parts must be equal.
                    WRITE(p, "  if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) discard;\n");
                } else {
                    // Not equal means any part must be not equal.
                    WRITE(p, "  if (rResult < 0.5 && gResult < 0.5 && bResult < 0.5) discard;\n");
                }
            } else {
                const char *colorTestFuncs[] = { "#", "#", " != ", " == " };
                if (colorTestFuncs[colorTestFunc][0] != '#') {
                    if (bitwiseOps) {
                        // Apparently GLES3 does not support vector bitwise ops.
                        WRITE(p, "  ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
                        const char *maskedFragColor = "ivec3(v_scaled.r & u_alphacolormask.r, v_scaled.g & u_alphacolormask.g, v_scaled.b & u_alphacolormask.b)";
                        const char *maskedColorRef = "ivec3(int(u_alphacolorref.r) & u_alphacolormask.r, int(u_alphacolorref.g) & u_alphacolormask.g, int(u_alphacolorref.b) & u_alphacolormask.b)";
                        WRITE(p, "  if (%s %s %s) discard;\n", maskedFragColor, colorTestFuncs[colorTestFunc], maskedColorRef);
                    } else if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR) {
                        WRITE(p, "  if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) discard;\n", colorTestFuncs[colorTestFunc]);
                    } else {
                        WRITE(p, "  if (roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb) discard;\n", colorTestFuncs[colorTestFunc]);
                    }
                } else {
                    WRITE(p, "  discard;\n");
                }
            }
        }

        // Color doubling happens after the color test.
        if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
            WRITE(p, "  v.rgb = v.rgb * 4.0;\n");
        } else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
            WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
        }

        if (enableFog) {
            WRITE(p, "  float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
            WRITE(p, "  v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
            // WRITE(p, "  v.x = v_depth;\n");
        }

        if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
            const char *srcFactor = "ERROR";
            switch (replaceBlendFuncA) {
            case GE_SRCBLEND_DSTCOLOR:
                srcFactor = "ERROR";
                break;
            case GE_SRCBLEND_INVDSTCOLOR:
                srcFactor = "ERROR";
                break;
            case GE_SRCBLEND_SRCALPHA:
                srcFactor = "vec3(v.a)";
                break;
            case GE_SRCBLEND_INVSRCALPHA:
                srcFactor = "vec3(1.0 - v.a)";
                break;
            case GE_SRCBLEND_DSTALPHA:
                srcFactor = "ERROR";
                break;
            case GE_SRCBLEND_INVDSTALPHA:
                srcFactor = "ERROR";
                break;
            case GE_SRCBLEND_DOUBLESRCALPHA:
                srcFactor = "vec3(v.a * 2.0)";
                break;
            case GE_SRCBLEND_DOUBLEINVSRCALPHA:
                srcFactor = "vec3(1.0 - v.a * 2.0)";
                break;
            // PRE_SRC for REPLACE_BLEND_PRE_SRC_2X_ALPHA means "double the src."
            // It's close to the same, but clamping can still be an issue.
            case GE_SRCBLEND_DOUBLEDSTALPHA:
                srcFactor = "vec3(2.0)";
                break;
            case GE_SRCBLEND_DOUBLEINVDSTALPHA:
                srcFactor = "ERROR";
                break;
            case GE_SRCBLEND_FIXA:
                srcFactor = "u_blendFixA";
                break;
            default:
                srcFactor = "u_blendFixA";
                break;
            }

            WRITE(p, "  v.rgb = v.rgb * %s;\n", srcFactor);
        }

        if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
            // If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit.
            // We can just read the prev value more directly.
            if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) {
                WRITE(p, "  lowp vec4 destColor = %s;\n", lastFragData);
            } else if (!texelFetch) {
                WRITE(p, "  lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", texture);
            } else {
                WRITE(p, "  lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", texelFetch);
            }

            const char *srcFactor = "vec3(1.0)";
            const char *dstFactor = "vec3(0.0)";

            switch (replaceBlendFuncA) {
            case GE_SRCBLEND_DSTCOLOR:
                srcFactor = "destColor.rgb";
                break;
            case GE_SRCBLEND_INVDSTCOLOR:
                srcFactor = "(vec3(1.0) - destColor.rgb)";
                break;
            case GE_SRCBLEND_SRCALPHA:
                srcFactor = "vec3(v.a)";
                break;
            case GE_SRCBLEND_INVSRCALPHA:
                srcFactor = "vec3(1.0 - v.a)";
                break;
            case GE_SRCBLEND_DSTALPHA:
                srcFactor = "vec3(destColor.a)";
                break;
            case GE_SRCBLEND_INVDSTALPHA:
                srcFactor = "vec3(1.0 - destColor.a)";
                break;
            case GE_SRCBLEND_DOUBLESRCALPHA:
                srcFactor = "vec3(v.a * 2.0)";
                break;
            case GE_SRCBLEND_DOUBLEINVSRCALPHA:
                srcFactor = "vec3(1.0 - v.a * 2.0)";
                break;
            case GE_SRCBLEND_DOUBLEDSTALPHA:
                srcFactor = "vec3(destColor.a * 2.0)";
                break;
            case GE_SRCBLEND_DOUBLEINVDSTALPHA:
                srcFactor = "vec3(1.0 - destColor.a * 2.0)";
                break;
            case GE_SRCBLEND_FIXA:
                srcFactor = "u_blendFixA";
                break;
            default:
                srcFactor = "u_blendFixA";
                break;
            }
            switch (replaceBlendFuncB) {
            case GE_DSTBLEND_SRCCOLOR:
                dstFactor = "v.rgb";
                break;
            case GE_DSTBLEND_INVSRCCOLOR:
                dstFactor = "(vec3(1.0) - v.rgb)";
                break;
            case GE_DSTBLEND_SRCALPHA:
                dstFactor = "vec3(v.a)";
                break;
            case GE_DSTBLEND_INVSRCALPHA:
                dstFactor = "vec3(1.0 - v.a)";
                break;
            case GE_DSTBLEND_DSTALPHA:
                dstFactor = "vec3(destColor.a)";
                break;
            case GE_DSTBLEND_INVDSTALPHA:
                dstFactor = "vec3(1.0 - destColor.a)";
                break;
            case GE_DSTBLEND_DOUBLESRCALPHA:
                dstFactor = "vec3(v.a * 2.0)";
                break;
            case GE_DSTBLEND_DOUBLEINVSRCALPHA:
                dstFactor = "vec3(1.0 - v.a * 2.0)";
                break;
            case GE_DSTBLEND_DOUBLEDSTALPHA:
                dstFactor = "vec3(destColor.a * 2.0)";
                break;
            case GE_DSTBLEND_DOUBLEINVDSTALPHA:
                dstFactor = "vec3(1.0 - destColor.a * 2.0)";
                break;
            case GE_DSTBLEND_FIXB:
                dstFactor = "u_blendFixB";
                break;
            default:
                srcFactor = "u_blendFixB";
                break;
            }

            switch (replaceBlendEq) {
            case GE_BLENDMODE_MUL_AND_ADD:
                WRITE(p, "  v.rgb = v.rgb * %s + destColor.rgb * %s;\n", srcFactor, dstFactor);
                break;
            case GE_BLENDMODE_MUL_AND_SUBTRACT:
                WRITE(p, "  v.rgb = v.rgb * %s - destColor.rgb * %s;\n", srcFactor, dstFactor);
                break;
            case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
                WRITE(p, "  v.rgb = destColor.rgb * %s - v.rgb * %s;\n", dstFactor, srcFactor);
                break;
            case GE_BLENDMODE_MIN:
                WRITE(p, "  v.rgb = min(v.rgb, destColor.rgb);\n");
                break;
            case GE_BLENDMODE_MAX:
                WRITE(p, "  v.rgb = max(v.rgb, destColor.rgb);\n");
                break;
            case GE_BLENDMODE_ABSDIFF:
                WRITE(p, "  v.rgb = abs(v.rgb - destColor.rgb);\n");
                break;
            }
        }

        if (replaceBlend == REPLACE_BLEND_2X_ALPHA || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
            WRITE(p, "  v.a = v.a * 2.0;\n");
        }
    }

    std::string replacedAlpha = "0.0";
    char replacedAlphaTemp[64] = "";
    if (stencilToAlpha != REPLACE_ALPHA_NO) {
        switch (replaceAlphaWithStencilType) {
        case STENCIL_VALUE_UNIFORM:
            replacedAlpha = "u_stencilReplaceValue";
            break;

        case STENCIL_VALUE_ZERO:
            replacedAlpha = "0.0";
            break;

        case STENCIL_VALUE_ONE:
        case STENCIL_VALUE_INVERT:
            // In invert, we subtract by one, but we want to output one here.
            replacedAlpha = "1.0";
            break;

        case STENCIL_VALUE_INCR_4:
        case STENCIL_VALUE_DECR_4:
            // We're adding/subtracting, just by the smallest value in 4-bit.
            snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 15.0);
            replacedAlpha = replacedAlphaTemp;
            break;

        case STENCIL_VALUE_INCR_8:
        case STENCIL_VALUE_DECR_8:
            // We're adding/subtracting, just by the smallest value in 8-bit.
            snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 255.0);
            replacedAlpha = replacedAlphaTemp;
            break;

        case STENCIL_VALUE_KEEP:
            // Do nothing. We'll mask out the alpha using color mask.
            break;
        }
    }

    switch (stencilToAlpha) {
    case REPLACE_ALPHA_DUALSOURCE:
        WRITE(p, "  %s = vec4(v.rgb, %s);\n", fragColor0, replacedAlpha.c_str());
        WRITE(p, "  %s = vec4(0.0, 0.0, 0.0, v.a);\n", fragColor1);
        break;

    case REPLACE_ALPHA_YES:
        WRITE(p, "  %s = vec4(v.rgb, %s);\n", fragColor0, replacedAlpha.c_str());
        break;

    case REPLACE_ALPHA_NO:
        WRITE(p, "  %s = v;\n", fragColor0);
        break;

    default:
        ERROR_LOG(G3D, "Bad stencil-to-alpha type, corrupt ID?");
        return false;
    }

    LogicOpReplaceType replaceLogicOpType = (LogicOpReplaceType)id.Bits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2);
    switch (replaceLogicOpType) {
    case LOGICOPTYPE_ONE:
        WRITE(p, "  %s.rgb = vec3(1.0, 1.0, 1.0);\n", fragColor0);
        break;
    case LOGICOPTYPE_INVERT:
        WRITE(p, "  %s.rgb = vec3(1.0, 1.0, 1.0) - %s.rgb;\n", fragColor0, fragColor0);
        break;
    case LOGICOPTYPE_NORMAL:
        break;

    default:
        ERROR_LOG(G3D, "Bad logic op type, corrupt ID?");
        return false;
    }

#ifdef DEBUG_SHADER
    if (doTexture) {
        WRITE(p, "  %s = texture2D(tex, v_texcoord.xy);\n", fragColor0);
        WRITE(p, "  %s += vec4(0.3,0,0.3,0.3);\n", fragColor0);
    } else {
        WRITE(p, "  %s = vec4(1,0,1,1);\n", fragColor0);
    }
#endif

    if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
        WRITE(p, "  highp float z = gl_FragCoord.z;\n");
        WRITE(p, "  z = (1.0/65535.0) * floor(z * 65535.0);\n");
        WRITE(p, "  gl_FragDepth = z;\n");
    }

    WRITE(p, "}\n");

    return true;
}
// Missing: Z depth range
bool GenerateVulkanGLSLFragmentShader(const ShaderID &id, char *buffer) {
	char *p = buffer;

	const char *lastFragData = nullptr;

	WRITE(p, "%s", vulkan_glsl_preamble);

	bool lmode = id.Bit(FS_BIT_LMODE);
	bool doTexture = id.Bit(FS_BIT_DO_TEXTURE);
	bool enableFog = id.Bit(FS_BIT_ENABLE_FOG);
	bool enableAlphaTest = id.Bit(FS_BIT_ALPHA_TEST);

	bool alphaTestAgainstZero = id.Bit(FS_BIT_ALPHA_AGAINST_ZERO);
	bool enableColorTest = id.Bit(FS_BIT_COLOR_TEST);
	bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO);
	bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE);
	bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ);
	bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA);
	bool doFlatShading = id.Bit(FS_BIT_FLATSHADE);

	GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3);
	GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2);
	bool needShaderTexClamp = id.Bit(FS_BIT_SHADER_TEX_CLAMP);

	GETexFunc texFunc = (GETexFunc)id.Bits(FS_BIT_TEXFUNC, 3);
	bool textureAtOffset = id.Bit(FS_BIT_TEXTURE_AT_OFFSET);

	ReplaceBlendType replaceBlend = static_cast<ReplaceBlendType>(id.Bits(FS_BIT_REPLACE_BLEND, 3));
	ReplaceAlphaType stencilToAlpha = static_cast<ReplaceAlphaType>(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2));

	GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
	GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
	GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3);
	StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);

	bool isModeClear = id.Bit(FS_BIT_CLEARMODE);

	const char *shading = doFlatShading ? "flat" : "";

	WRITE(p, "layout (std140, set = 0, binding = 2) uniform baseUBO {\n%s} base;\n", ub_baseStr);
	if (doTexture) {
		WRITE(p, "layout (binding = 0) uniform sampler2D tex;\n");
	}

	if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) {
		if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
			WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n");
		}
	}

	WRITE(p, "layout (location = 1) %s in vec4 v_color0;\n", shading);
	if (lmode)
		WRITE(p, "layout (location = 2) %s in vec3 v_color1;\n", shading);
	if (enableFog) {
		WRITE(p, "layout (location = 3) in float v_fogdepth;\n");
	}
	if (doTexture) {
		WRITE(p, "layout (location = 0) in vec3 v_texcoord;\n");
	}

	if (enableAlphaTest && !alphaTestAgainstZero) {
		WRITE(p, "int roundAndScaleTo255i(in float x) { return int(floor(x * 255.0 + 0.5)); }\n");
	}
	if (enableColorTest && !colorTestAgainstZero) {
		WRITE(p, "ivec3 roundAndScaleTo255iv(in vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n");
	}

	WRITE(p, "layout (location = 0, index = 0) out vec4 fragColor0;\n");
	if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) {
		WRITE(p, "layout (location = 0, index = 1) out vec4 fragColor1;\n");
	}

	// PowerVR needs a custom modulo function. For some reason, this has far higher precision than the builtin one.
	if ((gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) && needShaderTexClamp) {
		WRITE(p, "float mymod(float a, float b) { return a - b * floor(a / b); }\n");
	}

	WRITE(p, "void main() {\n");
	if (isModeClear) {
		// Clear mode does not allow any fancy shading.
		WRITE(p, "  vec4 v = v_color0;\n");
	} else {
		const char *secondary = "";
		// Secondary color for specular on top of texture
		if (lmode) {
			WRITE(p, "  vec4 s = vec4(v_color1, 0.0);\n");
			secondary = " + s";
		} else {
			secondary = "";
		}

		if (doTexture) {
			const char *texcoord = "v_texcoord";
			// TODO: Not sure the right way to do this for projection.
			// This path destroys resolution on older PowerVR no matter what I do, so we disable it on SGX 540 and lesser, and live with the consequences.
			if (needShaderTexClamp && !(gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_TERRIBLE)) {
				// We may be clamping inside a larger surface (tex = 64x64, buffer=480x272).
				// We may also be wrapping in such a surface, or either one in a too-small surface.
				// Obviously, clamping to a smaller surface won't work.  But better to clamp to something.
				std::string ucoord = "v_texcoord.x";
				std::string vcoord = "v_texcoord.y";
				if (doTextureProjection) {
					ucoord = "(v_texcoord.x / v_texcoord.z)";
					vcoord = "(v_texcoord.y / v_texcoord.z)";
				}

				std::string modulo = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? "mymod" : "mod";

				if (id.Bit(FS_BIT_CLAMP_S)) {
					ucoord = "clamp(" + ucoord + ", base.texclamp.z, base.texclamp.x - base.texclamp.z)";
				} else {
					ucoord = modulo + "(" + ucoord + ", base.texclamp.x)";
				}
				if (id.Bit(FS_BIT_CLAMP_T)) {
					vcoord = "clamp(" + vcoord + ", base.texclamp.w, base.texclamp.y - base.texclamp.w)";
				} else {
					vcoord = modulo + "(" + vcoord + ", base.texclamp.y)";
				}
				if (textureAtOffset) {
					ucoord = "(" + ucoord + " + base.texclampoff.x)";
					vcoord = "(" + vcoord + " + base.texclampoff.y)";
				}

				WRITE(p, "  vec2 fixedcoord = vec2(%s, %s);\n", ucoord.c_str(), vcoord.c_str());
				texcoord = "fixedcoord";
				// We already projected it.
				doTextureProjection = false;
			}

			if (doTextureProjection) {
				WRITE(p, "  vec4 t = textureProj(tex, %s);\n", texcoord);
			} else {
				WRITE(p, "  vec4 t = texture(tex, %s.xy);\n", texcoord);
			}
			WRITE(p, "  vec4 p = v_color0;\n");

			if (doTextureAlpha) { // texfmt == RGBA
				switch (texFunc) {
				case GE_TEXFUNC_MODULATE:
					WRITE(p, "  vec4 v = p * t%s;\n", secondary);
					break;

				case GE_TEXFUNC_DECAL:
					WRITE(p, "  vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary);
					break;

				case GE_TEXFUNC_BLEND:
					WRITE(p, "  vec4 v = vec4(mix(p.rgb, base.texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary);
					break;

				case GE_TEXFUNC_REPLACE:
					WRITE(p, "  vec4 v = t%s;\n", secondary);
					break;

				case GE_TEXFUNC_ADD:
				case GE_TEXFUNC_UNKNOWN1:
				case GE_TEXFUNC_UNKNOWN2:
				case GE_TEXFUNC_UNKNOWN3:
					WRITE(p, "  vec4 v = vec4(p.rgb + t.rgb, p.a * t.a)%s;\n", secondary);
					break;
				default:
					WRITE(p, "  vec4 v = p;\n"); break;
				}
			} else { // texfmt == RGB
				switch (texFunc) {
				case GE_TEXFUNC_MODULATE:
					WRITE(p, "  vec4 v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary);
					break;

				case GE_TEXFUNC_DECAL:
					WRITE(p, "  vec4 v = vec4(t.rgb, p.a)%s;\n", secondary);
					break;

				case GE_TEXFUNC_BLEND:
					WRITE(p, "  vec4 v = vec4(mix(p.rgb, base.texenv.rgb, t.rgb), p.a)%s;\n", secondary);
					break;

				case GE_TEXFUNC_REPLACE:
					WRITE(p, "  vec4 v = vec4(t.rgb, p.a)%s;\n", secondary);
					break;

				case GE_TEXFUNC_ADD:
				case GE_TEXFUNC_UNKNOWN1:
				case GE_TEXFUNC_UNKNOWN2:
				case GE_TEXFUNC_UNKNOWN3:
					WRITE(p, "  vec4 v = vec4(p.rgb + t.rgb, p.a)%s;\n", secondary); break;
				default:
					WRITE(p, "  vec4 v = p;\n"); break;
				}
			}
		} else {
			// No texture mapping
			WRITE(p, "  vec4 v = v_color0 %s;\n", secondary);
		}

		// Texture access is at half texels [0.5/256, 255.5/256], but colors are normalized [0, 255].
		// So we have to scale to account for the difference.
		std::string alphaTestXCoord = "0";

		if (enableAlphaTest) {
			if (alphaTestAgainstZero) {
				// When testing against 0 (extremely common), we can avoid some math.
				// 0.002 is approximately half of 1.0 / 255.0.
				if (alphaTestFunc == GE_COMP_NOTEQUAL || alphaTestFunc == GE_COMP_GREATER) {
					WRITE(p, "  if (v.a < 0.002) discard;\n");
				} else if (alphaTestFunc != GE_COMP_NEVER) {
					// Anything else is a test for == 0.  Happens sometimes, actually...
					WRITE(p, "  if (v.a > 0.002) discard;\n");
				} else {
					// NEVER has been logged as used by games, although it makes little sense - statically failing.
					// Maybe we could discard the drawcall, but it's pretty rare.  Let's just statically discard here.
					WRITE(p, "  discard;\n");
				}
			} else {
				const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " };
				if (alphaTestFuncs[alphaTestFunc][0] != '#') {
					WRITE(p, "  if ((roundAndScaleTo255i(v.a) & base.alphacolormask.a) %s base.alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]);
				} else {
					// This means NEVER.  See above.
					WRITE(p, "  discard;\n");
				}
			}
		}

		if (enableColorTest) {
			if (colorTestAgainstZero) {
				// When testing against 0 (common), we can avoid some math.
				// 0.002 is approximately half of 1.0 / 255.0.
				if (colorTestFunc == GE_COMP_NOTEQUAL) {
					WRITE(p, "  if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) discard;\n");
				} else if (colorTestFunc != GE_COMP_NEVER) {
					// Anything else is a test for == 0.
					WRITE(p, "  if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) discard;\n");
				} else {
					// NEVER has been logged as used by games, although it makes little sense - statically failing.
					// Maybe we could discard the drawcall, but it's pretty rare.  Let's just statically discard here.
					WRITE(p, "  discard;\n");
				}
			} else {
				const char *colorTestFuncs[] = { "#", "#", " != ", " == " };
				if (colorTestFuncs[colorTestFunc][0] != '#') {
					WRITE(p, "  ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
					WRITE(p, "  if ((v_scaled & base.alphacolormask.rgb) %s (base.alphacolorref.rgb & base.alphacolormask.rgb)) discard;\n", colorTestFuncs[colorTestFunc]);
				} else {
					WRITE(p, "  discard;\n");
				}
			}
		}

		// Color doubling happens after the color test.
		if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
			WRITE(p, "  v.rgb = v.rgb * 4.0;\n");
		} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
			WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
		}

		if (enableFog) {
			WRITE(p, "  float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
			WRITE(p, "  v = mix(vec4(base.fogcolor, v.a), v, fogCoef);\n");
			// WRITE(p, "  v.x = v_depth;\n");
		}

		if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
			const char *srcFactor = "ERROR";
			switch (replaceBlendFuncA) {
			case GE_SRCBLEND_DSTCOLOR:          srcFactor = "ERROR"; break;
			case GE_SRCBLEND_INVDSTCOLOR:       srcFactor = "ERROR"; break;
			case GE_SRCBLEND_SRCALPHA:          srcFactor = "vec3(v.a)"; break;
			case GE_SRCBLEND_INVSRCALPHA:       srcFactor = "vec3(1.0 - v.a)"; break;
			case GE_SRCBLEND_DSTALPHA:          srcFactor = "ERROR"; break;
			case GE_SRCBLEND_INVDSTALPHA:       srcFactor = "ERROR"; break;
			case GE_SRCBLEND_DOUBLESRCALPHA:    srcFactor = "vec3(v.a * 2.0)"; break;
			case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break;
			case GE_SRCBLEND_DOUBLEDSTALPHA:    srcFactor = "ERROR"; break;
			case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "ERROR"; break;
			case GE_SRCBLEND_FIXA:              srcFactor = "base.blendFixA"; break;
			}

			WRITE(p, "  v.rgb = v.rgb * %s;\n", srcFactor);
		}

		if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
			WRITE(p, "  lowp vec4 destColor = texelFetch(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n");

			const char *srcFactor = "vec3(1.0)";
			const char *dstFactor = "vec3(0.0)";

			switch (replaceBlendFuncA) {
			case GE_SRCBLEND_DSTCOLOR:          srcFactor = "destColor.rgb"; break;
			case GE_SRCBLEND_INVDSTCOLOR:       srcFactor = "(vec3(1.0) - destColor.rgb)"; break;
			case GE_SRCBLEND_SRCALPHA:          srcFactor = "vec3(v.a)"; break;
			case GE_SRCBLEND_INVSRCALPHA:       srcFactor = "vec3(1.0 - v.a)"; break;
			case GE_SRCBLEND_DSTALPHA:          srcFactor = "vec3(destColor.a)"; break;
			case GE_SRCBLEND_INVDSTALPHA:       srcFactor = "vec3(1.0 - destColor.a)"; break;
			case GE_SRCBLEND_DOUBLESRCALPHA:    srcFactor = "vec3(v.a * 2.0)"; break;
			case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break;
			case GE_SRCBLEND_DOUBLEDSTALPHA:    srcFactor = "vec3(destColor.a * 2.0)"; break;
			case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a * 2.0)"; break;
			case GE_SRCBLEND_FIXA:              srcFactor = "base.blendFixA"; break;
			}
			switch (replaceBlendFuncB) {
			case GE_DSTBLEND_SRCCOLOR:          dstFactor = "v.rgb"; break;
			case GE_DSTBLEND_INVSRCCOLOR:       dstFactor = "(vec3(1.0) - v.rgb)"; break;
			case GE_DSTBLEND_SRCALPHA:          dstFactor = "vec3(v.a)"; break;
			case GE_DSTBLEND_INVSRCALPHA:       dstFactor = "vec3(1.0 - v.a)"; break;
			case GE_DSTBLEND_DSTALPHA:          dstFactor = "vec3(destColor.a)"; break;
			case GE_DSTBLEND_INVDSTALPHA:       dstFactor = "vec3(1.0 - destColor.a)"; break;
			case GE_DSTBLEND_DOUBLESRCALPHA:    dstFactor = "vec3(v.a * 2.0)"; break;
			case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a * 2.0)"; break;
			case GE_DSTBLEND_DOUBLEDSTALPHA:    dstFactor = "vec3(destColor.a * 2.0)"; break;
			case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a * 2.0)"; break;
			case GE_DSTBLEND_FIXB:              dstFactor = "base.blendFixB"; break;
			}

			switch (replaceBlendEq) {
			case GE_BLENDMODE_MUL_AND_ADD:
				WRITE(p, "  v.rgb = v.rgb * %s + destColor.rgb * %s;\n", srcFactor, dstFactor);
				break;
			case GE_BLENDMODE_MUL_AND_SUBTRACT:
				WRITE(p, "  v.rgb = v.rgb * %s - destColor.rgb * %s;\n", srcFactor, dstFactor);
				break;
			case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
				WRITE(p, "  v.rgb = destColor.rgb * %s - v.rgb * %s;\n", dstFactor, srcFactor);
				break;
			case GE_BLENDMODE_MIN:
				WRITE(p, "  v.rgb = min(v.rgb, destColor.rgb);\n");
				break;
			case GE_BLENDMODE_MAX:
				WRITE(p, "  v.rgb = max(v.rgb, destColor.rgb);\n");
				break;
			case GE_BLENDMODE_ABSDIFF:
				WRITE(p, "  v.rgb = abs(v.rgb - destColor.rgb);\n");
				break;
			}
		}

		if (replaceBlend == REPLACE_BLEND_2X_ALPHA || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
			WRITE(p, "  v.a = v.a * 2.0;\n");
		}
	}

	std::string replacedAlpha = "0.0";
	char replacedAlphaTemp[64] = "";
	if (stencilToAlpha != REPLACE_ALPHA_NO) {
		switch (replaceAlphaWithStencilType) {
		case STENCIL_VALUE_UNIFORM:
			replacedAlpha = "base.fogcoef_stencilreplace.z";
			break;

		case STENCIL_VALUE_ZERO:
			replacedAlpha = "0.0";
			break;

		case STENCIL_VALUE_ONE:
		case STENCIL_VALUE_INVERT:
			// In invert, we subtract by one, but we want to output one here.
			replacedAlpha = "1.0";
			break;

		case STENCIL_VALUE_INCR_4:
		case STENCIL_VALUE_DECR_4:
			// We're adding/subtracting, just by the smallest value in 4-bit.
			snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 15.0);
			replacedAlpha = replacedAlphaTemp;
			break;

		case STENCIL_VALUE_INCR_8:
		case STENCIL_VALUE_DECR_8:
			// We're adding/subtracting, just by the smallest value in 8-bit.
			snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 255.0);
			replacedAlpha = replacedAlphaTemp;
			break;

		case STENCIL_VALUE_KEEP:
			// Do nothing. We'll mask out the alpha using color mask.
			break;
		}
	}

	switch (stencilToAlpha) {
	case REPLACE_ALPHA_DUALSOURCE:
		WRITE(p, "  fragColor0 = vec4(v.rgb, %s);\n", replacedAlpha.c_str());
		WRITE(p, "  fragColor1 = vec4(0.0, 0.0, 0.0, v.a);\n");
		break;

	case REPLACE_ALPHA_YES:
		WRITE(p, "  fragColor0 = vec4(v.rgb, %s);\n", replacedAlpha.c_str());
		break;

	case REPLACE_ALPHA_NO:
		WRITE(p, "  fragColor0 = v;\n");
		break;

	default:
		ERROR_LOG(G3D, "Bad stencil-to-alpha type, corrupt ID?");
		return false;
	}

	LogicOpReplaceType replaceLogicOpType = (LogicOpReplaceType)id.Bits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2);
	switch (replaceLogicOpType) {
	case LOGICOPTYPE_ONE:
		WRITE(p, "  fragColor0.rgb = vec3(1.0, 1.0, 1.0);\n");
		break;
	case LOGICOPTYPE_INVERT:
		WRITE(p, "  fragColor0.rgb = vec3(1.0, 1.0, 1.0) - fragColor0.rgb;\n");
		break;
	case LOGICOPTYPE_NORMAL:
		break;

	default:
		ERROR_LOG(G3D, "Bad logic op type, corrupt ID?");
		return false;
	}

	if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
		WRITE(p, "  highp float z = gl_FragCoord.z;\n");
		WRITE(p, "  z = (1.0/65535.0) * floor(z * 65535.0);\n");
		WRITE(p, "  gl_FragDepth = z;\n");
	}

	WRITE(p, "}\n");

	return true;
}