Exemplo n.º 1
0
Lighter::Lighter(int vertType) {
	if (!gstate.isLightingEnabled())
		return;

	doShadeMapping_ = gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP;
	materialEmissive.GetFromRGB(gstate.materialemissive);
	materialEmissive.a = 0.0f;
	globalAmbient.GetFromRGB(gstate.ambientcolor);
	globalAmbient.GetFromA(gstate.ambientalpha);
	materialAmbient.GetFromRGB(gstate.materialambient);
	materialAmbient.GetFromA(gstate.materialalpha);
	materialDiffuse.GetFromRGB(gstate.materialdiffuse);
	materialDiffuse.a = 1.0f;
	materialSpecular.GetFromRGB(gstate.materialspecular);
	materialSpecular.a = 1.0f;
	specCoef_ = getFloat24(gstate.materialspecularcoef);
	// viewer_ = Vec3f(-gstate.viewMatrix[9], -gstate.viewMatrix[10], -gstate.viewMatrix[11]);
	bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0;
	materialUpdate_ = hasColor ? (gstate.materialupdate & 7) : 0;

	for (int l = 0; l < 4; l++) {
		int i = l * 3;
		if (gstate.isLightChanEnabled(l)) {
			lpos[i] = getFloat24(gstate.lpos[i]);
			lpos[i + 1] = getFloat24(gstate.lpos[i + 1]);
			lpos[i + 2] = getFloat24(gstate.lpos[i + 2]);
			ldir[i] = getFloat24(gstate.ldir[i]);
			ldir[i + 1] = getFloat24(gstate.ldir[i + 1]);
			ldir[i + 2] = getFloat24(gstate.ldir[i + 2]);
			latt[i] = getFloat24(gstate.latt[i]);
			latt[i + 1] = getFloat24(gstate.latt[i + 1]);
			latt[i + 2] = getFloat24(gstate.latt[i + 2]);
			for (int t = 0; t < 3; t++) {
				u32 data = gstate.lcolor[l * 3 + t] & 0xFFFFFF;
				float r = (float)(data & 0xff) * (1.0f / 255.0f);
				float g = (float)((data >> 8) & 0xff) * (1.0f / 255.0f);
				float b = (float)(data >> 16) * (1.0f / 255.0f);
				lcolor[t][l][0] = r;
				lcolor[t][l][1] = g;
				lcolor[t][l][2] = b;
			}
		}
	}
Exemplo n.º 2
0
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
	// Lighting
	if (dirtyUniforms & DIRTY_AMBIENT) {
		Uint8x3ToFloat4_AlphaUint8(ub->ambientColor, gstate.ambientcolor, gstate.getAmbientA());
	}
	if (dirtyUniforms & DIRTY_MATDIFFUSE) {
		Uint8x3ToFloat4(ub->materialDiffuse, gstate.materialdiffuse);
	}
	if (dirtyUniforms & DIRTY_MATSPECULAR) {
		Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, std::max(0.0f, getFloat24(gstate.materialspecularcoef)));
	}
	if (dirtyUniforms & DIRTY_MATEMISSIVE) {
		Uint8x3ToFloat4(ub->materialEmissive, gstate.materialemissive);
	}

	for (int i = 0; i < 4; i++) {
		if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
			if (gstate.isDirectionalLight(i)) {
				// Prenormalize
				float x = getFloat24(gstate.lpos[i * 3 + 0]);
				float y = getFloat24(gstate.lpos[i * 3 + 1]);
				float z = getFloat24(gstate.lpos[i * 3 + 2]);
				float len = sqrtf(x*x + y*y + z*z);
				if (len == 0.0f)
					len = 1.0f;
				else
					len = 1.0f / len;
				float vec[3] = { x * len, y * len, z * len };
				CopyFloat3To4(ub->lpos[i], vec);
			} else {
				ExpandFloat24x3ToFloat4(ub->lpos[i], &gstate.lpos[i * 3]);
			}
			ExpandFloat24x3ToFloat4(ub->ldir[i], &gstate.ldir[i * 3]);
			ExpandFloat24x3ToFloat4(ub->latt[i], &gstate.latt[i * 3]);
			CopyFloat1To4(ub->lightAngle[i], getFloat24(gstate.lcutoff[i]));
			CopyFloat1To4(ub->lightSpotCoef[i], getFloat24(gstate.lconv[i]));
			Uint8x3ToFloat4(ub->lightAmbient[i], gstate.lcolor[i * 3]);
			Uint8x3ToFloat4(ub->lightDiffuse[i], gstate.lcolor[i * 3 + 1]);
			Uint8x3ToFloat4(ub->lightSpecular[i], gstate.lcolor[i * 3 + 2]);
		}
	}
}
Exemplo n.º 3
0
// TODO: This is ugly
static inline ScreenCoords ClipToScreenInternal(const ClipCoords& coords, bool set_flag = true)
{
	ScreenCoords ret;
	// TODO: Check for invalid parameters (x2 < x1, etc)
	float vpx1 = getFloat24(gstate.viewportx1);
	float vpx2 = getFloat24(gstate.viewportx2);
	float vpy1 = getFloat24(gstate.viewporty1);
	float vpy2 = getFloat24(gstate.viewporty2);
	float vpz1 = getFloat24(gstate.viewportz1);
	float vpz2 = getFloat24(gstate.viewportz2);

	float retx = coords.x * vpx1 / coords.w + vpx2;
	float rety = coords.y * vpy1 / coords.w + vpy2;
	float retz = coords.z * vpz1 / coords.w + vpz2;

	if (gstate.clipEnable & 0x1) {
		if (retz < 0.f) retz = 0.f;
		if (retz > 65535.f) retz = 65535.f;
	}

	if (set_flag && (retx > 4095.9375f || rety > 4096.9375f || retx < 0 || rety < 0 || retz < 0 || retz > 65535.f))
		outside_range_flag = true;

	// 16 = 0xFFFF / 4095.9375
	return ScreenCoords(retx * 16, rety * 16, retz);
}
Exemplo n.º 4
0
Lighter::Lighter() {
	doShadeMapping_ = gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP;
	materialEmissive.GetFromRGB(gstate.materialemissive);
	materialEmissive.a = 0.0f;
	globalAmbient.GetFromRGB(gstate.ambientcolor);
	globalAmbient.GetFromA(gstate.ambientalpha);
	materialAmbient.GetFromRGB(gstate.materialambient);
	materialAmbient.GetFromA(gstate.materialalpha);
	materialDiffuse.GetFromRGB(gstate.materialdiffuse);
	materialDiffuse.a = 1.0f;
	materialSpecular.GetFromRGB(gstate.materialspecular);
	materialSpecular.a = 1.0f;
	specCoef_ = getFloat24(gstate.materialspecularcoef);
	// viewer_ = Vec3f(-gstate.viewMatrix[9], -gstate.viewMatrix[10], -gstate.viewMatrix[11]);
	materialUpdate_ = gstate.materialupdate & 7;
}
Exemplo n.º 5
0
static Vec3f ClipToScreen(const Vec4f& coords) {
	// TODO: Check for invalid parameters (x2 < x1, etc)
	float vpx1 = getFloat24(gstate.viewportx1);
	float vpx2 = getFloat24(gstate.viewportx2);
	float vpy1 = getFloat24(gstate.viewporty1);
	float vpy2 = getFloat24(gstate.viewporty2);
	float vpz1 = getFloat24(gstate.viewportz1);
	float vpz2 = getFloat24(gstate.viewportz2);

	float retx = coords.x * vpx1 / coords.w + vpx2;
	float rety = coords.y * vpy1 / coords.w + vpy2;
	float retz = coords.z * vpz1 / coords.w + vpz2;

	// 16 = 0xFFFF / 4095.9375
	return Vec3f(retx * 16, rety * 16, retz);
}
Exemplo n.º 6
0
void ShaderManagerVulkan::LightUpdateUniforms(int dirtyUniforms) {
	// Lighting
	if (dirtyUniforms & DIRTY_AMBIENT) {
		Uint8x3ToFloat4_AlphaUint8(ub_lights.ambientColor, gstate.ambientcolor, gstate.getAmbientA());
	}
	if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
		// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
		Uint8x3ToFloat4_AlphaUint8(ub_base.matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
	}
	if (dirtyUniforms & DIRTY_MATDIFFUSE) {
		Uint8x3ToFloat4(ub_lights.materialDiffuse, gstate.materialdiffuse);
	}
	if (dirtyUniforms & DIRTY_MATEMISSIVE) {
		Uint8x3ToFloat4(ub_lights.materialEmissive, gstate.materialemissive);
	}
	if (dirtyUniforms & DIRTY_MATSPECULAR) {
		Uint8x3ToFloat4_Alpha(ub_lights.materialSpecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
	}

	for (int i = 0; i < 4; i++) {
		if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
			if (gstate.isDirectionalLight(i)) {
				// Prenormalize
				float x = getFloat24(gstate.lpos[i * 3 + 0]);
				float y = getFloat24(gstate.lpos[i * 3 + 1]);
				float z = getFloat24(gstate.lpos[i * 3 + 2]);
				float len = sqrtf(x*x + y*y + z*z);
				if (len == 0.0f)
					len = 1.0f;
				else
					len = 1.0f / len;
				float vec[3] = { x * len, y * len, z * len };
				CopyFloat3To4(ub_lights.lpos[i], vec);
			} else {
				ExpandFloat24x3ToFloat4(ub_lights.lpos[i], &gstate.lpos[i * 3]);
			}
			ExpandFloat24x3ToFloat4(ub_lights.ldir[i], &gstate.ldir[i * 3]);
			ExpandFloat24x3ToFloat4(ub_lights.latt[i], &gstate.latt[i * 3]);
			CopyFloat1To4(ub_lights.lightAngle[i], getFloat24(gstate.lcutoff[i]));
			CopyFloat1To4(ub_lights.lightSpotCoef[i], getFloat24(gstate.lconv[i]));
			Uint8x3ToFloat4(ub_lights.lightAmbient[i], gstate.lcolor[i * 3]);
			Uint8x3ToFloat4(ub_lights.lightDiffuse[i], gstate.lcolor[i * 3 + 1]);
			Uint8x3ToFloat4(ub_lights.lightSpecular[i], gstate.lcolor[i * 3 + 2]);
		}
	}
}
Exemplo n.º 7
0
void TransformDrawEngine::SoftwareTransformAndDraw(
    int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex) {

    bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0;
    bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();

    // TODO: Split up into multiple draw calls for GLES 2.0 where you can't guarantee support for more than 0x10000 verts.

#if defined(MOBILE_DEVICE)
    if (vertexCount > 0x10000/3)
        vertexCount = 0x10000/3;
#endif

    float uscale = 1.0f;
    float vscale = 1.0f;
    bool scaleUV = false;
    if (throughmode) {
        uscale /= gstate_c.curTextureWidth;
        vscale /= gstate_c.curTextureHeight;
    } else {
        scaleUV = !g_Config.bPrescaleUV;
    }

    bool skinningEnabled = vertTypeIsSkinningEnabled(vertType);

    int w = gstate.getTextureWidth(0);
    int h = gstate.getTextureHeight(0);
    float widthFactor = (float) w / (float) gstate_c.curTextureWidth;
    float heightFactor = (float) h / (float) gstate_c.curTextureHeight;

    Lighter lighter(vertType);
    float fog_end = getFloat24(gstate.fog1);
    float fog_slope = getFloat24(gstate.fog2);

    VertexReader reader(decoded, decVtxFormat, vertType);
    for (int index = 0; index < maxIndex; index++) {
        reader.Goto(index);

        float v[3] = {0, 0, 0};
        float c0[4] = {1, 1, 1, 1};
        float c1[4] = {0, 0, 0, 0};
        float uv[3] = {0, 0, 1};
        float fogCoef = 1.0f;

        if (throughmode) {
            // Do not touch the coordinates or the colors. No lighting.
            reader.ReadPos(v);
            if (reader.hasColor0()) {
                reader.ReadColor0(c0);
                for (int j = 0; j < 4; j++) {
                    c1[j] = 0.0f;
                }
            } else {
                c0[0] = gstate.getMaterialAmbientR() / 255.f;
                c0[1] = gstate.getMaterialAmbientG() / 255.f;
                c0[2] = gstate.getMaterialAmbientB() / 255.f;
                c0[3] = gstate.getMaterialAmbientA() / 255.f;
            }

            if (reader.hasUV()) {
                reader.ReadUV(uv);

                uv[0] *= uscale;
                uv[1] *= vscale;
            }
            fogCoef = 1.0f;
            // Scale UV?
        } else {
            // We do software T&L for now
            float out[3], norm[3];
            float pos[3], nrm[3];
            Vec3f normal(0, 0, 1);
            reader.ReadPos(pos);
            if (reader.hasNormal())
                reader.ReadNrm(nrm);

            if (!skinningEnabled) {
                Vec3ByMatrix43(out, pos, gstate.worldMatrix);
                if (reader.hasNormal()) {
                    Norm3ByMatrix43(norm, nrm, gstate.worldMatrix);
                    normal = Vec3f(norm).Normalized();
                }
            } else {
                float weights[8];
                reader.ReadWeights(weights);
                // Skinning
                Vec3f psum(0,0,0);
                Vec3f nsum(0,0,0);
                for (int i = 0; i < vertTypeGetNumBoneWeights(vertType); i++) {
                    if (weights[i] != 0.0f) {
                        Vec3ByMatrix43(out, pos, gstate.boneMatrix+i*12);
                        Vec3f tpos(out);
                        psum += tpos * weights[i];
                        if (reader.hasNormal()) {
                            Norm3ByMatrix43(norm, nrm, gstate.boneMatrix+i*12);
                            Vec3f tnorm(norm);
                            nsum += tnorm * weights[i];
                        }
                    }
                }

                // Yes, we really must multiply by the world matrix too.
                Vec3ByMatrix43(out, psum.AsArray(), gstate.worldMatrix);
                if (reader.hasNormal()) {
                    Norm3ByMatrix43(norm, nsum.AsArray(), gstate.worldMatrix);
                    normal = Vec3f(norm).Normalized();
                }
            }

            // Perform lighting here if enabled. don't need to check through, it's checked above.
            float unlitColor[4] = {1, 1, 1, 1};
            if (reader.hasColor0()) {
                reader.ReadColor0(unlitColor);
            } else {
                unlitColor[0] = gstate.getMaterialAmbientR() / 255.f;
                unlitColor[1] = gstate.getMaterialAmbientG() / 255.f;
                unlitColor[2] = gstate.getMaterialAmbientB() / 255.f;
                unlitColor[3] = gstate.getMaterialAmbientA() / 255.f;
            }
            float litColor0[4];
            float litColor1[4];
            lighter.Light(litColor0, litColor1, unlitColor, out, normal);

            if (gstate.isLightingEnabled()) {
                // Don't ignore gstate.lmode - we should send two colors in that case
                for (int j = 0; j < 4; j++) {
                    c0[j] = litColor0[j];
                }
                if (lmode) {
                    // Separate colors
                    for (int j = 0; j < 4; j++) {
                        c1[j] = litColor1[j];
                    }
                } else {
                    // Summed color into c0
                    for (int j = 0; j < 4; j++) {
                        c0[j] = ((c0[j] + litColor1[j]) > 1.0f) ? 1.0f : (c0[j] + litColor1[j]);
                    }
                }
            } else {
                if (reader.hasColor0()) {
                    for (int j = 0; j < 4; j++) {
                        c0[j] = unlitColor[j];
                    }
                } else {
                    c0[0] = gstate.getMaterialAmbientR() / 255.f;
                    c0[1] = gstate.getMaterialAmbientG() / 255.f;
                    c0[2] = gstate.getMaterialAmbientB() / 255.f;
                    c0[3] = gstate.getMaterialAmbientA() / 255.f;
                }
                if (lmode) {
                    for (int j = 0; j < 4; j++) {
                        c1[j] = 0.0f;
                    }
                }
            }

            float ruv[2] = {0.0f, 0.0f};
            if (reader.hasUV())
                reader.ReadUV(ruv);

            // Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights.
            switch (gstate.getUVGenMode()) {
            case GE_TEXMAP_TEXTURE_COORDS:	// UV mapping
            case GE_TEXMAP_UNKNOWN: // Seen in Riviera.  Unsure of meaning, but this works.
                // Texture scale/offset is only performed in this mode.
                if (scaleUV) {
                    uv[0] = ruv[0]*gstate_c.uv.uScale + gstate_c.uv.uOff;
                    uv[1] = ruv[1]*gstate_c.uv.vScale + gstate_c.uv.vOff;
                } else {
                    uv[0] = ruv[0];
                    uv[1] = ruv[1];
                }
                uv[2] = 1.0f;
                break;

            case GE_TEXMAP_TEXTURE_MATRIX:
            {
                // Projection mapping
                Vec3f source;
                switch (gstate.getUVProjMode())	{
                case GE_PROJMAP_POSITION: // Use model space XYZ as source
                    source = pos;
                    break;

                case GE_PROJMAP_UV: // Use unscaled UV as source
                    source = Vec3f(ruv[0], ruv[1], 0.0f);
                    break;

                case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized normal as source
                    if (reader.hasNormal()) {
                        source = Vec3f(norm).Normalized();
                    } else {
                        ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?");
                        source = Vec3f(0.0f, 0.0f, 1.0f);
                    }
                    break;

                case GE_PROJMAP_NORMAL: // Use non-normalized normal as source!
                    if (reader.hasNormal()) {
                        source = Vec3f(norm);
                    } else {
                        ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?");
                        source = Vec3f(0.0f, 0.0f, 1.0f);
                    }
                    break;
                }

                float uvw[3];
                Vec3ByMatrix43(uvw, &source.x, gstate.tgenMatrix);
                uv[0] = uvw[0];
                uv[1] = uvw[1];
                uv[2] = uvw[2];
            }
            break;

            case GE_TEXMAP_ENVIRONMENT_MAP:
                // Shade mapping - use two light sources to generate U and V.
            {
                Vec3f lightpos0 = Vec3f(gstate_c.lightpos[gstate.getUVLS0()]).Normalized();
                Vec3f lightpos1 = Vec3f(gstate_c.lightpos[gstate.getUVLS1()]).Normalized();

                uv[0] = (1.0f + Dot(lightpos0, normal))/2.0f;
                uv[1] = (1.0f - Dot(lightpos1, normal))/2.0f;
                uv[2] = 1.0f;
            }
            break;

            default:
                // Illegal
                ERROR_LOG_REPORT(G3D, "Impossible UV gen mode? %d", gstate.getUVGenMode());
                break;
            }

            uv[0] = uv[0] * widthFactor;
            uv[1] = uv[1] * heightFactor;

            // Transform the coord by the view matrix.
            Vec3ByMatrix43(v, out, gstate.viewMatrix);
            fogCoef = (v[2] + fog_end) * fog_slope;
        }

        // TODO: Write to a flexible buffer, we don't always need all four components.
        memcpy(&transformed[index].x, v, 3 * sizeof(float));
        transformed[index].fog = fogCoef;
        memcpy(&transformed[index].u, uv, 3 * sizeof(float));
        if (gstate_c.flipTexture) {
            transformed[index].v = 1.0f - transformed[index].v;
        }
        for (int i = 0; i < 4; i++) {
            transformed[index].color0[i] = c0[i] * 255.0f;
        }
        for (int i = 0; i < 3; i++) {
            transformed[index].color1[i] = c1[i] * 255.0f;
        }
    }

    // Here's the best opportunity to try to detect rectangles used to clear the screen, and
    // replace them with real OpenGL clears. This can provide a speedup on certain mobile chips.
    // Disabled for now - depth does not come out exactly the same.
    //
    // An alternative option is to simply ditch all the verts except the first and last to create a single
    // rectangle out of many. Quite a small optimization though.
    if (false && maxIndex > 1 && gstate.isModeClear() && prim == GE_PRIM_RECTANGLES && IsReallyAClear(maxIndex)) {
        u32 clearColor;
        memcpy(&clearColor, transformed[0].color0, 4);
        float clearDepth = transformed[0].z;
        const float col[4] = {
            ((clearColor & 0xFF)) / 255.0f,
            ((clearColor & 0xFF00) >> 8) / 255.0f,
            ((clearColor & 0xFF0000) >> 16) / 255.0f,
            ((clearColor & 0xFF000000) >> 24) / 255.0f,
        };

        bool colorMask = gstate.isClearModeColorMask();
        bool alphaMask = gstate.isClearModeAlphaMask();
        glstate.colorMask.set(colorMask, colorMask, colorMask, alphaMask);
        if (alphaMask) {
            glstate.stencilTest.set(true);
            // Clear stencil
            // TODO: extract the stencilValue properly, see below
            int stencilValue = 0;
            glstate.stencilFunc.set(GL_ALWAYS, stencilValue, 255);
        } else {
            // Don't touch stencil
            glstate.stencilTest.set(false);
        }
        glstate.scissorTest.set(false);
        bool depthMask = gstate.isClearModeDepthMask();

        int target = 0;
        if (colorMask || alphaMask) target |= GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
        if (depthMask) target |= GL_DEPTH_BUFFER_BIT;

        glClearColor(col[0], col[1], col[2], col[3]);
#ifdef USING_GLES2
        glClearDepthf(clearDepth);
#else
        glClearDepth(clearDepth);
#endif
        glClearStencil(0);  // TODO - take from alpha?
        glClear(target);
        return;
    }
Exemplo n.º 8
0
void Process(VertexData& vertex)
{
	Vec3<int> mec = Vec3<int>(gstate.getMaterialEmissiveR(), gstate.getMaterialEmissiveG(), gstate.getMaterialEmissiveB());

	Vec3<int> mac = (gstate.materialupdate&1)
						? vertex.color0.rgb()
						: Vec3<int>(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB());
	Vec3<int> final_color = mec + mac * Vec3<int>(gstate.getAmbientR(), gstate.getAmbientG(), gstate.getAmbientB()) / 255;
	Vec3<int> specular_color(0, 0, 0);

	for (unsigned int light = 0; light < 4; ++light) {
		// Always calculate texture coords from lighting results if environment mapping is active
		// TODO: specular lighting should affect this, too!
		// TODO: Not sure if this really should be done even if lighting is disabled altogether
		if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) {
			Vec3<float> L = Vec3<float>(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF));
			float diffuse_factor = Dot(L,vertex.worldnormal) / L.Length() / vertex.worldnormal.Length();

			if (gstate.getUVLS0() == light)
				vertex.texturecoords.s() = (diffuse_factor + 1.f) / 2.f;

			if (gstate.getUVLS1() == light)
				vertex.texturecoords.t() = (diffuse_factor + 1.f) / 2.f;
		}
	}

	if (!gstate.isLightingEnabled())
		return;

	for (unsigned int light = 0; light < 4; ++light) {
		if (!gstate.isLightChanEnabled(light))
			continue;

		// L =  vector from vertex to light source
		// TODO: Should transfer the light positions to world/view space for these calculations
		Vec3<float> L = Vec3<float>(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF));
		L -= vertex.worldpos;
		float d = L.Length();

		float lka = getFloat24(gstate.latt[3*light]&0xFFFFFF);
		float lkb = getFloat24(gstate.latt[3*light+1]&0xFFFFFF);
		float lkc = getFloat24(gstate.latt[3*light+2]&0xFFFFFF);
		float att = 1.f;
		if (!gstate.isDirectionalLight(light)) {
			att = 1.f / (lka + lkb * d + lkc * d * d);
			if (att > 1.f) att = 1.f;
			if (att < 0.f) att = 0.f;
		}

		float spot = 1.f;
		if (gstate.isSpotLight(light)) {
			Vec3<float> dir = Vec3<float>(getFloat24(gstate.ldir[3*light]&0xFFFFFF), getFloat24(gstate.ldir[3*light+1]&0xFFFFFF),getFloat24(gstate.ldir[3*light+2]&0xFFFFFF));
			float _spot = Dot(-L,dir) / d / dir.Length();
			float cutoff = getFloat24(gstate.lcutoff[light]&0xFFFFFF);
			if (_spot > cutoff) {
				spot = _spot;
				float conv = getFloat24(gstate.lconv[light]&0xFFFFFF);
				spot = pow(_spot, conv);
			} else {
				spot = 0.f;
			}
		}

		// ambient lighting
		Vec3<int> lac = Vec3<int>(gstate.getLightAmbientColorR(light), gstate.getLightAmbientColorG(light), gstate.getLightAmbientColorB(light));
		final_color.r() += (int)(att * spot * lac.r() * mac.r() / 255);
		final_color.g() += (int)(att * spot * lac.g() * mac.g() / 255);
		final_color.b() += (int)(att * spot * lac.b() * mac.b() / 255);

		// diffuse lighting
		Vec3<int> ldc = Vec3<int>(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light));
		Vec3<int> mdc = (gstate.materialupdate&2)
							? vertex.color0.rgb()
							: Vec3<int>(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB());

		float diffuse_factor = Dot(L,vertex.worldnormal) / d / vertex.worldnormal.Length();
		if (gstate.isUsingPoweredDiffuseLight(light)) {
			float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF);
			diffuse_factor = pow(diffuse_factor, k);
		}

		if (diffuse_factor > 0.f) {
			final_color.r() += (int)(att * spot * ldc.r() * mdc.r() * diffuse_factor / 255);
			final_color.g() += (int)(att * spot * ldc.g() * mdc.g() * diffuse_factor / 255);
			final_color.b() += (int)(att * spot * ldc.b() * mdc.b() * diffuse_factor / 255);
		}

		if (gstate.isUsingSpecularLight(light)) {
			Vec3<float> E(0.f, 0.f, 1.f);
			Mat3x3<float> view_matrix(gstate.viewMatrix);
			Vec3<float> worldE = view_matrix.Inverse() * (E - Vec3<float>(gstate.viewMatrix[9], gstate.viewMatrix[10], gstate.viewMatrix[11]));
			Vec3<float> H = worldE / worldE.Length() + L / L.Length();

			Vec3<int> lsc = Vec3<int>(gstate.getSpecularColorR(light), gstate.getSpecularColorG(light), gstate.getSpecularColorB(light));
			Vec3<int> msc = (gstate.materialupdate&4)
								? vertex.color0.rgb()
								: Vec3<int>(gstate.getMaterialSpecularR(), gstate.getMaterialSpecularG(), gstate.getMaterialSpecularB());

			float specular_factor = Dot(H,vertex.worldnormal) / H.Length() / vertex.worldnormal.Length();
			float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF);
			specular_factor = pow(specular_factor, k);

			if (specular_factor > 0.f) {
				specular_color.r() += (int)(att * spot * lsc.r() * msc.r() * specular_factor / 255);
				specular_color.g() += (int)(att * spot * lsc.g() * msc.g() * specular_factor / 255);
				specular_color.b() += (int)(att * spot * lsc.b() * msc.b() * specular_factor / 255);
			}
		}
	}

	vertex.color0.r() = final_color.r();
	vertex.color0.g() = final_color.g();
	vertex.color0.b() = final_color.b();

	if (gstate.isUsingSecondaryColor())
	{
		vertex.color1 = specular_color;
	} else {
		vertex.color0.r() += specular_color.r();
		vertex.color0.g() += specular_color.g();
		vertex.color0.b() += specular_color.b();
		vertex.color1 = Vec3<int>(0, 0, 0);
	}

	int maa = (gstate.materialupdate&1) ? vertex.color0.a() : gstate.getMaterialAmbientA();
	vertex.color0.a() = gstate.getAmbientA() * maa / 255;

	if (vertex.color0.r() > 255) vertex.color0.r() = 255;
	if (vertex.color0.g() > 255) vertex.color0.g() = 255;
	if (vertex.color0.b() > 255) vertex.color0.b() = 255;
	if (vertex.color0.a() > 255) vertex.color0.a() = 255;
	if (vertex.color1.r() > 255) vertex.color1.r() = 255;
	if (vertex.color1.g() > 255) vertex.color1.g() = 255;
	if (vertex.color1.b() > 255) vertex.color1.b() = 255;
	if (vertex.color0.r() < 0) vertex.color0.r() = 0;
	if (vertex.color0.g() < 0) vertex.color0.g() = 0;
	if (vertex.color0.b() < 0) vertex.color0.b() = 0;
	if (vertex.color0.a() < 0) vertex.color0.a() = 0;
	if (vertex.color1.r() < 0) vertex.color1.r() = 0;
	if (vertex.color1.g() < 0) vertex.color1.g() = 0;
	if (vertex.color1.b() < 0) vertex.color1.b() = 0;
}
Exemplo n.º 9
0
void Light(float colorOut[4], const float colorIn[4], Vec3 pos, Vec3 normal, float dots[4])
{
	// could cache a lot of stuff, such as ambient, across vertices...

	bool doShadeMapping = (gstate.texmapmode & 0x3) == 2;
	if (!doShadeMapping && !(gstate.lightEnable[0]&1) && !(gstate.lightEnable[1]&1) && !(gstate.lightEnable[2]&1) && !(gstate.lightEnable[3]&1))
	{
		memcpy(colorOut, colorIn, sizeof(float) * 4);
		return;
	}

	Color4 emissive;
	emissive.GetFromRGB(gstate.materialemissive);
	Color4 globalAmbient;
	globalAmbient.GetFromRGB(gstate.ambientcolor);
	globalAmbient.GetFromA(gstate.ambientalpha);

	Vec3 norm = normal.Normalized();
	Color4 in(colorIn);

	Color4 ambient;
	if (gstate.materialupdate & 1)
	{
		ambient = in;
	}
	else
	{
		ambient.GetFromRGB(gstate.materialambient);
		ambient.a=1.0f;
	}

	Color4 diffuse;
	if (gstate.materialupdate & 2)
	{
		diffuse = in;
	}
	else
	{
		diffuse.GetFromRGB(gstate.materialdiffuse);
		diffuse.a=1.0f;
	}

	Color4 specular;
	if (gstate.materialupdate & 4)
	{
		specular = in;
	}
	else
	{
		specular.GetFromRGB(gstate.materialspecular);
		specular.a=1.0f;
	}

	float specCoef = getFloat24(gstate.materialspecularcoef);

	norm.Normalize();

	Vec3 viewer(gstate.viewMatrix[8], gstate.viewMatrix[9], gstate.viewMatrix[10]);

	Color4 lightSum = globalAmbient * ambient + emissive;


	// Try lights.elf - there's something wrong with the lighting

	for (int l = 0; l < 4; l++)
	{
		// can we skip this light?
		if ((gstate.lightEnable[l] & 1) == 0) // && !doShadeMapping)
			continue;

		GELightComputation comp = (GELightComputation)(gstate.ltype[l]&3);
		GELightType type = (GELightType)((gstate.ltype[l]>>8)&3);
		Vec3 toLight;

		if (type == GE_LIGHTTYPE_DIRECTIONAL)
			toLight = Vec3(gstate.lightpos[l]);
		else
			toLight = Vec3(gstate.lightpos[l]) - pos;

		Vec3 dir = Vec3(gstate.lightdir[l]);

		bool doSpecular = (comp != GE_LIGHTCOMP_ONLYDIFFUSE);
		bool poweredDiffuse = comp == GE_LIGHTCOMP_BOTHWITHPOWDIFFUSE;

		float distance = toLight.Normalize(); 

		float lightScale = 1.0f;
		if (type != GE_LIGHTTYPE_DIRECTIONAL)
		{
			lightScale = 1.0f / (gstate.lightatt[l][0] + gstate.lightatt[l][1]*distance + gstate.lightatt[l][2]*distance*distance);
			if (lightScale>1.0f) lightScale=1.0f;
		}

		float dot = toLight * norm;

		// Clamp dot to zero.
		if (dot < 0.0f) dot = 0.0f;

		if (poweredDiffuse)
			dot = powf(dot, specCoef);

		Color4 diff = (gstate.lightColor[1][l] * diffuse) * (dot*lightScale);	
		Color4 spec(0,0,0,0);

		if (doSpecular)
		{
			Vec3 halfVec = toLight;
			halfVec += viewer.Normalized();
			halfVec.Normalize();

			dot = halfVec * norm;
			if (dot >= 0)
			{
				spec += (gstate.lightColor[2][l] * specular * (powf(dot, specCoef)*lightScale));
			}	
		}
		dots[l] = dot;
		if (gstate.lightEnable[l] & 1)
		{
			lightSum += gstate.lightColor[0][l]*ambient + diff + spec;
		}
	}

	for (int i = 0; i < 3; i++)
		colorOut[i] = lightSum[i];
}
Exemplo n.º 10
0
void TransformDrawEngine::ApplyDrawState(int prim) {
	// TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall.

	if (gstate_c.textureChanged != TEXCHANGE_UNCHANGED && !gstate.isModeClear() && gstate.isTextureMapEnabled()) {
		textureCache_->SetTexture();
		gstate_c.textureChanged = TEXCHANGE_UNCHANGED;
	}

	// TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a
	// single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily.

	// Set blend
	bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled();
	glstate.blend.set(wantBlend);
	if (wantBlend) {
		// This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop.
		// HOWEVER - we should be able to approximate the 2x modes in the shader, although they will clip wrongly.

		// Examples of seen unimplementable blend states:
		// Mortal Kombat Unchained: FixA=0000ff FixB=000080 FuncA=10 FuncB=10

		int blendFuncA  = gstate.getBlendFuncA();
		int blendFuncB  = gstate.getBlendFuncB();
		GEBlendMode blendFuncEq = gstate.getBlendEq();
		if (blendFuncA > GE_SRCBLEND_FIXA) blendFuncA = GE_SRCBLEND_FIXA;
		if (blendFuncB > GE_DSTBLEND_FIXB) blendFuncB = GE_DSTBLEND_FIXB;

		float constantAlpha = 1.0f;
		ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil();
		if (gstate.isStencilTestEnabled() && replaceAlphaWithStencil == REPLACE_ALPHA_NO) {
			if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_UNIFORM) {
				constantAlpha = (float) gstate.getStencilTestRef() * (1.0f / 255.0f);
			}
		}

		// Shortcut by using GL_ONE where possible, no need to set blendcolor
		GLuint glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(gstate.getFixA()) : aLookup[blendFuncA];
		GLuint glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(gstate.getFixB()) : bLookup[blendFuncB];

		if (replaceAlphaWithStencil == REPLACE_ALPHA_DUALSOURCE) {
			glBlendFuncA = toDualSource(glBlendFuncA);
			glBlendFuncB = toDualSource(glBlendFuncB);
		}

		if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) {
			Vec3f fixA = Vec3f::FromRGB(gstate.getFixA());
			Vec3f fixB = Vec3f::FromRGB(gstate.getFixB());
			if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB != GL_INVALID_ENUM) {
				// Can use blendcolor trivially.
				const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha};
				glstate.blendColor.set(blendColor);
				glBlendFuncA = GL_CONSTANT_COLOR;
			} else if (glBlendFuncA != GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) {
				// Can use blendcolor trivially.
				const float blendColor[4] = {fixB.x, fixB.y, fixB.z, constantAlpha};
				glstate.blendColor.set(blendColor);
				glBlendFuncB = GL_CONSTANT_COLOR;
			} else if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) {
				if (blendColorSimilar(fixA, Vec3f::AssignToAll(constantAlpha) - fixB)) {
					glBlendFuncA = GL_CONSTANT_COLOR;
					glBlendFuncB = GL_ONE_MINUS_CONSTANT_COLOR;
					const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha};
					glstate.blendColor.set(blendColor);
				} else if (blendColorSimilar(fixA, fixB)) {
					glBlendFuncA = GL_CONSTANT_COLOR;
					glBlendFuncB = GL_CONSTANT_COLOR;
					const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha};
					glstate.blendColor.set(blendColor);
				} else {
					static bool didReportBlend = false;
					if (!didReportBlend)
						Reporting::ReportMessage("ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB());
					didReportBlend = true;

					DEBUG_LOG(G3D, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB());
					// Let's approximate, at least.  Close is better than totally off.
					const bool nearZeroA = blendColorSimilar(fixA, Vec3f::AssignToAll(0.0f), 0.25f);
					const bool nearZeroB = blendColorSimilar(fixB, Vec3f::AssignToAll(0.0f), 0.25f);
					if (nearZeroA || blendColorSimilar(fixA, Vec3f::AssignToAll(1.0f), 0.25f)) {
						glBlendFuncA = nearZeroA ? GL_ZERO : GL_ONE;
						glBlendFuncB = GL_CONSTANT_COLOR;
						const float blendColor[4] = {fixB.x, fixB.y, fixB.z, constantAlpha};
						glstate.blendColor.set(blendColor);
					// We need to pick something.  Let's go with A as the fixed color.
					} else {
						glBlendFuncA = GL_CONSTANT_COLOR;
						glBlendFuncB = nearZeroB ? GL_ZERO : GL_ONE;
						const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha};
						glstate.blendColor.set(blendColor);
					}
				}
			} else {
				// We optimized both, but that's probably not necessary, so let's pick one to be constant.
				// For now let's just pick whichever was fixed instead of checking error.
				if (blendFuncA == GE_SRCBLEND_FIXA) {
					glBlendFuncA = GL_CONSTANT_COLOR;
					const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha};
					glstate.blendColor.set(blendColor);
				} else {
					glBlendFuncB = GL_CONSTANT_COLOR;
					const float blendColor[4] = {fixB.x, fixB.y, fixB.z, constantAlpha};
					glstate.blendColor.set(blendColor);
				}
			}
		} else if (constantAlpha < 1.0f) {
			const float blendColor[4] = {1.0f, 1.0f, 1.0f, constantAlpha};
			glstate.blendColor.set(blendColor);
		}

		// Some Android devices (especially Mali, it seems) composite badly if there's alpha in the backbuffer.
		// So in non-buffered rendering, we will simply consider the dest alpha to be zero in blending equations.
#ifdef ANDROID
		if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) {
			if (glBlendFuncA == GL_DST_ALPHA) glBlendFuncA = GL_ZERO;
			if (glBlendFuncB == GL_DST_ALPHA) glBlendFuncB = GL_ZERO;
			if (glBlendFuncA == GL_ONE_MINUS_DST_ALPHA) glBlendFuncA = GL_ONE;
			if (glBlendFuncB == GL_ONE_MINUS_DST_ALPHA) glBlendFuncB = GL_ONE;
		}
#endif

		// At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow.

		// The stencil-to-alpha in fragment shader doesn't apply here (blending is enabled), and we shouldn't
		// do any blending in the alpha channel as that doesn't seem to happen on PSP. So lacking a better option,
		// the only value we can set alpha to here without multipass and dual source alpha is zero (by setting
		// the factors to zero). So let's do that.
		if (replaceAlphaWithStencil != REPLACE_ALPHA_NO) {
			// Let the fragment shader take care of it.
			glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ONE, GL_ZERO);
		} else if (gstate.isStencilTestEnabled()) {
			switch (ReplaceAlphaWithStencilType()) {
			case STENCIL_VALUE_KEEP:
				glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ZERO, GL_ONE);
				break;
			case STENCIL_VALUE_ONE:
				// This won't give one but it's our best shot...
				glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ONE, GL_ONE);
				break;
			case STENCIL_VALUE_ZERO:
				glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ZERO, GL_ZERO);
				break;
			case STENCIL_VALUE_UNIFORM:
				// This won't give a correct value (it multiplies) but it may be better than random values.
				glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_CONSTANT_ALPHA, GL_ZERO);
				break;
			case STENCIL_VALUE_UNKNOWN:
				// For now, let's err at zero.  This is INVERT or INCR/DECR.
				glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ZERO, GL_ZERO);
				break;
			}
		} else {
			// Retain the existing value when stencil testing is off.
			glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ZERO, GL_ONE);
		}

		if (blendFuncEq == GE_BLENDMODE_ABSDIFF) {
			WARN_LOG_REPORT_ONCE(blendAbsdiff, G3D, "Unsupported absdiff blend mode");
		}

		if (((blendFuncEq >= GE_BLENDMODE_MIN) && gl_extensions.EXT_blend_minmax) || gl_extensions.GLES3) {
			if (blendFuncEq == GE_BLENDMODE_ABSDIFF && gl_extensions.EXT_shader_framebuffer_fetch) {
				// Handle GE_BLENDMODE_ABSDIFF in fragment shader and turn off regular alpha blending here.
				glstate.blend.set(false);
			} else {
				glstate.blendEquation.set(eqLookup[blendFuncEq]);
			}
		} else {
			glstate.blendEquation.set(eqLookupNoMinMax[blendFuncEq]);
		}
	}

	bool alwaysDepthWrite = g_Config.bAlwaysDepthWrite;
	bool enableStencilTest = !g_Config.bDisableStencilTest;

	// Dither
	if (gstate.isDitherEnabled()) {
		glstate.dither.enable();
		glstate.dither.set(GL_TRUE);
	} else
		glstate.dither.disable();

	if (gstate.isModeClear()) {
#if !defined(USING_GLES2)
		// Logic Ops
		glstate.colorLogicOp.disable();
#endif
		// Culling
		glstate.cullFace.disable();

		// Depth Test
		glstate.depthTest.enable();
		glstate.depthFunc.set(GL_ALWAYS);
		glstate.depthWrite.set(gstate.isClearModeDepthMask() || alwaysDepthWrite ? GL_TRUE : GL_FALSE);
		if (gstate.isClearModeDepthMask() || alwaysDepthWrite) {
			framebufferManager_->SetDepthUpdated();
		}

		// Color Test
		bool colorMask = gstate.isClearModeColorMask();
		bool alphaMask = gstate.isClearModeAlphaMask();
		glstate.colorMask.set(colorMask, colorMask, colorMask, alphaMask);

		// Stencil Test
		if (alphaMask && enableStencilTest) {
			glstate.stencilTest.enable();
			glstate.stencilOp.set(GL_REPLACE, GL_REPLACE, GL_REPLACE);
			// TODO: In clear mode, the stencil value is set to the alpha value of the vertex.
			// A normal clear will be 2 points, the second point has the color.
			// We should set "ref" to that value instead of 0.
			// In case of clear rectangles, we set it again once we know what the color is.
			glstate.stencilFunc.set(GL_ALWAYS, 255, 0xFF);
		} else {
			glstate.stencilTest.disable();
		}
	} else {
#if !defined(USING_GLES2)
		// Logic Ops
		if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY) {
			glstate.colorLogicOp.enable();
			glstate.logicOp.set(logicOps[gstate.getLogicOp()]);
		} else {
			glstate.colorLogicOp.disable();
		}
#endif
		// Set cull
		bool cullEnabled = !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled();
		if (cullEnabled) {
			glstate.cullFace.enable();
			glstate.cullFaceMode.set(cullingMode[gstate.getCullMode()]);
		} else {
			glstate.cullFace.disable();
		}

		// Depth Test
		if (gstate.isDepthTestEnabled()) {
			glstate.depthTest.enable();
			glstate.depthFunc.set(ztests[gstate.getDepthTestFunction()]);
			glstate.depthWrite.set(gstate.isDepthWriteEnabled() || alwaysDepthWrite ? GL_TRUE : GL_FALSE);
			framebufferManager_->SetDepthUpdated();
		} else {
			glstate.depthTest.disable();
		}

		// PSP color/alpha mask is per bit but we can only support per byte.
		// But let's do that, at least. And let's try a threshold.
		bool rmask = (gstate.pmskc & 0xFF) < 128;
		bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128;
		bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128;
		bool amask = (gstate.pmska & 0xFF) < 128;

		// Let's not write to alpha if stencil isn't enabled.
		if (!gstate.isStencilTestEnabled()) {
			amask = false;
		} else {
			// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
			if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
				amask = false;
			}
		}
		if (g_Config.bAlphaMaskHack) {
			amask = true;  // Yes, this makes no sense, but it "fixes" the 3rd Birthday by popular demand.
		}

		glstate.colorMask.set(rmask, gmask, bmask, amask);

		// Stencil Test
		if (gstate.isStencilTestEnabled() && enableStencilTest) {
			glstate.stencilTest.enable();
			glstate.stencilFunc.set(ztests[gstate.getStencilTestFunction()],
				gstate.getStencilTestRef(),
				gstate.getStencilTestMask());
			glstate.stencilOp.set(stencilOps[gstate.getStencilOpSFail()],  // stencil fail
				stencilOps[gstate.getStencilOpZFail()],  // depth fail
				stencilOps[gstate.getStencilOpZPass()]); // depth pass
		} else {
			glstate.stencilTest.disable();
		}
	}

	float renderWidthFactor, renderHeightFactor;
	float renderWidth, renderHeight;
	float renderX, renderY;
	bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
	if (useBufferedRendering) {
		renderX = 0.0f;
		renderY = 0.0f;
		renderWidth = framebufferManager_->GetRenderWidth();
		renderHeight = framebufferManager_->GetRenderHeight();
	} else {
		// TODO: Aspect-ratio aware and centered
		float pixelW = PSP_CoreParameter().pixelWidth;
		float pixelH = PSP_CoreParameter().pixelHeight;
		CenterRect(&renderX, &renderY, &renderWidth, &renderHeight, 480, 272, pixelW, pixelH);
	}

	renderWidthFactor = (float)renderWidth / framebufferManager_->GetTargetWidth();
	renderHeightFactor = (float)renderHeight / framebufferManager_->GetTargetHeight();

	bool throughmode = gstate.isModeThrough();

	// Scissor
	int scissorX1 = gstate.getScissorX1();
	int scissorY1 = gstate.getScissorY1();
	int scissorX2 = gstate.getScissorX2() + 1;
	int scissorY2 = gstate.getScissorY2() + 1;

	// This is a bit of a hack as the render buffer isn't always that size
	if (scissorX1 == 0 && scissorY1 == 0 
		&& scissorX2 >= (int) gstate_c.curRTWidth
		&& scissorY2 >= (int) gstate_c.curRTHeight) {
		glstate.scissorTest.disable();
	} else {
		glstate.scissorTest.enable();
		glstate.scissorRect.set(
			renderX + scissorX1 * renderWidthFactor,
			renderY + renderHeight - (scissorY2 * renderHeightFactor),
			(scissorX2 - scissorX1) * renderWidthFactor,
			(scissorY2 - scissorY1) * renderHeightFactor);
	}

	/*
	int regionX1 = gstate.region1 & 0x3FF;
	int regionY1 = (gstate.region1 >> 10) & 0x3FF;
	int regionX2 = (gstate.region2 & 0x3FF) + 1;
	int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1;
	*/
	int regionX1 = 0;
	int regionY1 = 0;
	int regionX2 = gstate_c.curRTWidth;
	int regionY2 = gstate_c.curRTHeight;

	float offsetX = gstate.getOffsetX();
	float offsetY = gstate.getOffsetY();

	if (throughmode) {
		// No viewport transform here. Let's experiment with using region.
		glstate.viewport.set(
			renderX + (0 + regionX1) * renderWidthFactor, 
			renderY + (0 - regionY1) * renderHeightFactor,
			(regionX2 - regionX1) * renderWidthFactor,
			(regionY2 - regionY1) * renderHeightFactor);
		glstate.depthRange.set(0.0f, 1.0f);
	} else {
		// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
		float vpXa = getFloat24(gstate.viewportx1);
		float vpXb = getFloat24(gstate.viewportx2);
		float vpYa = getFloat24(gstate.viewporty1);
		float vpYb = getFloat24(gstate.viewporty2);

		// The viewport transform appears to go like this: 
		// Xscreen = -offsetX + vpXb + vpXa * Xview
		// Yscreen = -offsetY + vpYb + vpYa * Yview
		// Zscreen = vpZb + vpZa * Zview

		// This means that to get the analogue glViewport we must:
		float vpX0 = vpXb - offsetX - vpXa;
		float vpY0 = vpYb - offsetY + vpYa;   // Need to account for sign of Y
		gstate_c.vpWidth = vpXa * 2.0f;
		gstate_c.vpHeight = -vpYa * 2.0f;

		float vpWidth = fabsf(gstate_c.vpWidth);
		float vpHeight = fabsf(gstate_c.vpHeight);

		vpX0 *= renderWidthFactor;
		vpY0 *= renderHeightFactor;
		vpWidth *= renderWidthFactor;
		vpHeight *= renderHeightFactor;

		vpX0 = (vpXb - offsetX - fabsf(vpXa)) * renderWidthFactor;
		// Flip vpY0 to match the OpenGL coordinate system.
		vpY0 = renderHeight - (vpYb - offsetY + fabsf(vpYa)) * renderHeightFactor;		
		
		glstate.viewport.set(vpX0 + renderX, vpY0 + renderY, vpWidth, vpHeight);
		// Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh.
		// shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);

		float zScale = getFloat24(gstate.viewportz1) / 65535.0f;
		float zOff = getFloat24(gstate.viewportz2) / 65535.0f;
		float depthRangeMin = zOff - zScale;
		float depthRangeMax = zOff + zScale;
		glstate.depthRange.set(depthRangeMin, depthRangeMax);
	}
}
Exemplo n.º 11
0
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport) {
	if (dirtyUniforms & DIRTY_TEXENV) {
		Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor);
	}
	if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
		Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
	}
	if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
		Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
	}
	if (dirtyUniforms & DIRTY_FOGCOLOR) {
		Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor);
	}
	if (dirtyUniforms & DIRTY_SHADERBLEND) {
		Uint8x3ToFloat4(ub->blendFixA, gstate.getFixA());
		Uint8x3ToFloat4(ub->blendFixB, gstate.getFixB());
	}
	if (dirtyUniforms & DIRTY_TEXCLAMP) {
		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
		const int w = gstate.getTextureWidth(0);
		const int h = gstate.getTextureHeight(0);
		const float widthFactor = (float)w * invW;
		const float heightFactor = (float)h * invH;

		// First wrap xy, then half texel xy (for clamp.)
		ub->texClamp[0] = widthFactor;
		ub->texClamp[1] = heightFactor;
		ub->texClamp[2] = invW * 0.5f;
		ub->texClamp[3] = invH * 0.5f;
		ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW;
		ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH;
	}

	if (dirtyUniforms & DIRTY_PROJMATRIX) {
		Matrix4x4 flippedMatrix;
		memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));

		const bool invertedY = gstate_c.vpHeight < 0;
		if (invertedY) {
			flippedMatrix[1] = -flippedMatrix[1];
			flippedMatrix[5] = -flippedMatrix[5];
			flippedMatrix[9] = -flippedMatrix[9];
			flippedMatrix[13] = -flippedMatrix[13];
		}
		const bool invertedX = gstate_c.vpWidth < 0;
		if (invertedX) {
			flippedMatrix[0] = -flippedMatrix[0];
			flippedMatrix[4] = -flippedMatrix[4];
			flippedMatrix[8] = -flippedMatrix[8];
			flippedMatrix[12] = -flippedMatrix[12];
		}
		if (flipViewport) {
			ConvertProjMatrixToD3D11(flippedMatrix);
		} else {
			ConvertProjMatrixToVulkan(flippedMatrix);
		}

		if (g_Config.iRenderingMode == 0 && g_display_rotation != DisplayRotation::ROTATE_0) {
			flippedMatrix = flippedMatrix * g_display_rot_matrix;
		}

		CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr());
	}

	if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
		Matrix4x4 proj_through;
		if (flipViewport) {
			proj_through.setOrthoD3D(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1);
		} else {
			proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
		}
		if (g_Config.iRenderingMode == 0 && g_display_rotation != DisplayRotation::ROTATE_0) {
			proj_through = proj_through * g_display_rot_matrix;
		}
		CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr());
	}

	// Transform
	if (dirtyUniforms & DIRTY_WORLDMATRIX) {
		ConvertMatrix4x3To3x4Transposed(ub->world, gstate.worldMatrix);
	}
	if (dirtyUniforms & DIRTY_VIEWMATRIX) {
		ConvertMatrix4x3To3x4Transposed(ub->view, gstate.viewMatrix);
	}
	if (dirtyUniforms & DIRTY_TEXMATRIX) {
		ConvertMatrix4x3To3x4Transposed(ub->tex, gstate.tgenMatrix);
	}

	// Combined two small uniforms
	if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
		float fogcoef_stencil[3] = {
			getFloat24(gstate.fog1),
			getFloat24(gstate.fog2),
			(float)gstate.getStencilTestRef()/255.0f
		};
		if (my_isinf(fogcoef_stencil[1])) {
			// not really sure what a sensible value might be.
			fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
		} else if (my_isnan(fogcoef_stencil[1])) {
			// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
			// Just put the fog far away at a large finite distance.
			// Infinities and NaNs are rather unpredictable in shaders on many GPUs
			// so it's best to just make it a sane calculation.
			fogcoef_stencil[0] = 100000.0f;
			fogcoef_stencil[1] = 1.0f;
		}
#ifndef MOBILE_DEVICE
		else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
			ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
		}
#endif
		CopyFloat3(ub->fogCoef_stencil, fogcoef_stencil);
	}

	// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
	if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
		Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
	}

	// Texturing
	if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
		const int w = gstate.getTextureWidth(0);
		const int h = gstate.getTextureHeight(0);
		const float widthFactor = (float)w * invW;
		const float heightFactor = (float)h * invH;
		if (gstate_c.bezier || gstate_c.spline) {
			// When we are generating UV coordinates through the bezier/spline, we need to apply the scaling.
			// However, this is missing a check that we're not getting our UV:s supplied for us in the vertices.
			ub->uvScaleOffset[0] = gstate_c.uv.uScale * widthFactor;
			ub->uvScaleOffset[1] = gstate_c.uv.vScale * heightFactor;
			ub->uvScaleOffset[2] = gstate_c.uv.uOff * widthFactor;
			ub->uvScaleOffset[3] = gstate_c.uv.vOff * heightFactor;
		} else {
			ub->uvScaleOffset[0] = widthFactor;
			ub->uvScaleOffset[1] = heightFactor;
			ub->uvScaleOffset[2] = 0.0f;
			ub->uvScaleOffset[3] = 0.0f;
		}
	}

	if (dirtyUniforms & DIRTY_DEPTHRANGE) {
		float viewZScale = gstate.getViewportZScale();
		float viewZCenter = gstate.getViewportZCenter();

		// We had to scale and translate Z to account for our clamped Z range.
		// Therefore, we also need to reverse this to round properly.
		//
		// Example: scale = 65535.0, center = 0.0
		// Resulting range = -65535 to 65535, clamped to [0, 65535]
		// gstate_c.vpDepthScale = 2.0f
		// gstate_c.vpZOffset = -1.0f
		//
		// The projection already accounts for those, so we need to reverse them.
		//
		// Additionally, D3D9 uses a range from [0, 1].  We double and move the center.
		viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
		viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;

		float viewZInvScale;
		if (viewZScale != 0.0) {
			viewZInvScale = 1.0f / viewZScale;
		} else {
			viewZInvScale = 0.0;
		}

		ub->depthRange[0] = viewZScale;
		ub->depthRange[1] = viewZCenter;
		ub->depthRange[2] = viewZCenter;
		ub->depthRange[3] = viewZInvScale;
	}

	if (dirtyUniforms & DIRTY_BEZIERSPLINE) {
		ub->spline_count_u = gstate_c.spline_count_u;
		ub->spline_count_v = gstate_c.spline_count_v;
		ub->spline_type_u = gstate_c.spline_type_u;
		ub->spline_type_v = gstate_c.spline_type_v;
	}
}
Exemplo n.º 12
0
VertexData TransformUnit::ReadVertex(VertexReader& vreader)
{
	VertexData vertex;

	float pos[3];
	// VertexDecoder normally scales z, but we want it unscaled.
	vreader.ReadPosThroughZ16(pos);

	if (!gstate.isModeClear() && gstate.isTextureMapEnabled() && vreader.hasUV()) {
		float uv[2];
		vreader.ReadUV(uv);
		vertex.texturecoords = Vec2<float>(uv[0], uv[1]);
	}

	if (vreader.hasNormal()) {
		float normal[3];
		vreader.ReadNrm(normal);
		vertex.normal = Vec3<float>(normal[0], normal[1], normal[2]);

		if (gstate.areNormalsReversed())
			vertex.normal = -vertex.normal;
	}

	if (vertTypeIsSkinningEnabled(gstate.vertType) && !gstate.isModeThrough()) {
		float W[8] = { 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
		vreader.ReadWeights(W);

		Vec3<float> tmppos(0.f, 0.f, 0.f);
		Vec3<float> tmpnrm(0.f, 0.f, 0.f);

		for (int i = 0; i < vertTypeGetNumBoneWeights(gstate.vertType); ++i) {
			Mat3x3<float> bone(&gstate.boneMatrix[12*i]);
			tmppos += (bone * ModelCoords(pos[0], pos[1], pos[2]) + Vec3<float>(gstate.boneMatrix[12*i+9], gstate.boneMatrix[12*i+10], gstate.boneMatrix[12*i+11])) * W[i];
			if (vreader.hasNormal())
				tmpnrm += (bone * vertex.normal) * W[i];
		}

		pos[0] = tmppos.x;
		pos[1] = tmppos.y;
		pos[2] = tmppos.z;
		if (vreader.hasNormal())
			vertex.normal = tmpnrm;
	}

	if (vreader.hasColor0()) {
		float col[4];
		vreader.ReadColor0(col);
		vertex.color0 = Vec4<int>(col[0]*255, col[1]*255, col[2]*255, col[3]*255);
	} else {
		vertex.color0 = Vec4<int>(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB(), gstate.getMaterialAmbientA());
	}

	if (vreader.hasColor1()) {
		float col[3];
		vreader.ReadColor1(col);
		vertex.color1 = Vec3<int>(col[0]*255, col[1]*255, col[2]*255);
	} else {
		vertex.color1 = Vec3<int>(0, 0, 0);
	}

	if (!gstate.isModeThrough()) {
		vertex.modelpos = ModelCoords(pos[0], pos[1], pos[2]);
		vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(vertex.modelpos));
		ModelCoords viewpos = TransformUnit::WorldToView(vertex.worldpos);
		vertex.clippos = ClipCoords(TransformUnit::ViewToClip(viewpos));
		if (gstate.isFogEnabled()) {
			float fog_end = getFloat24(gstate.fog1);
			float fog_slope = getFloat24(gstate.fog2);
			// Same fixup as in ShaderManagerGLES.cpp
			if (my_isnanorinf(fog_end)) {
				// Not really sure what a sensible value might be, but let's try 64k.
				fog_end = std::signbit(fog_end) ? -65535.0f : 65535.0f;
			}
			if (my_isnanorinf(fog_slope)) {
				fog_slope = std::signbit(fog_slope) ? -65535.0f : 65535.0f;
			}
			vertex.fogdepth = (viewpos.z + fog_end) * fog_slope;
		} else {
			vertex.fogdepth = 1.0f;
		}
		vertex.screenpos = ClipToScreenInternal(vertex.clippos, &outside_range_flag);

		if (vreader.hasNormal()) {
			vertex.worldnormal = TransformUnit::ModelToWorldNormal(vertex.normal);
			// TODO: Isn't there a flag that controls whether to normalize the normal?
			vertex.worldnormal /= vertex.worldnormal.Length();
		} else {
			vertex.worldnormal = Vec3<float>(0.0f, 0.0f, 1.0f);
		}

		Lighting::Process(vertex, vreader.hasColor0());
	} else {
		vertex.screenpos.x = (int)(pos[0] * 16) + gstate.getOffsetX16();
		vertex.screenpos.y = (int)(pos[1] * 16) + gstate.getOffsetY16();
		vertex.screenpos.z = pos[2];
		vertex.clippos.w = 1.f;
		vertex.fogdepth = 1.f;
	}

	return vertex;
}
Exemplo n.º 13
0
void TransformDrawEngine::ApplyDrawState(int prim) {
	// TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall.

	if (gstate_c.textureChanged) {
		if (gstate.isTextureMapEnabled()) {
			textureCache_->SetTexture();
		}
		gstate_c.textureChanged = false;
	}

	// TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a
	// single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily.

	// Set blend
	bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled();
	glstate.blend.set(wantBlend);
	if (wantBlend) {
		// This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop.
		// HOWEVER - we should be able to approximate the 2x modes in the shader, although they will clip wrongly.

		// Examples of seen unimplementable blend states:
		// Mortal Kombat Unchained: FixA=0000ff FixB=000080 FuncA=10 FuncB=10

		int blendFuncA  = gstate.getBlendFuncA();
		int blendFuncB  = gstate.getBlendFuncB();
		int blendFuncEq = gstate.getBlendEq();
		if (blendFuncA > GE_SRCBLEND_FIXA) blendFuncA = GE_SRCBLEND_FIXA;
		if (blendFuncB > GE_DSTBLEND_FIXB) blendFuncB = GE_DSTBLEND_FIXB;

		// Shortcut by using GL_ONE where possible, no need to set blendcolor
		GLuint glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(gstate.getFixA()) : aLookup[blendFuncA];
		GLuint glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(gstate.getFixB()) : bLookup[blendFuncB];
		if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) {
			Vec3f fixA = Vec3f::FromRGB(gstate.getFixA());
			Vec3f fixB = Vec3f::FromRGB(gstate.getFixB());
			if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB != GL_INVALID_ENUM) {
				// Can use blendcolor trivially.
				const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f};
				glstate.blendColor.set(blendColor);
				glBlendFuncA = GL_CONSTANT_COLOR;
			} else if (glBlendFuncA != GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) {
				// Can use blendcolor trivially.
				const float blendColor[4] = {fixB.x, fixB.y, fixB.z, 1.0f};
				glstate.blendColor.set(blendColor);
				glBlendFuncB = GL_CONSTANT_COLOR;
			} else if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) {
				if (blendColorSimilar(fixA, Vec3f::AssignToAll(1.0f) - fixB)) {
					glBlendFuncA = GL_CONSTANT_COLOR;
					glBlendFuncB = GL_ONE_MINUS_CONSTANT_COLOR;
					const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f};
					glstate.blendColor.set(blendColor);
				} else if (blendColorSimilar(fixA, fixB)) {
					glBlendFuncA = GL_CONSTANT_COLOR;
					glBlendFuncB = GL_CONSTANT_COLOR;
					const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f};
					glstate.blendColor.set(blendColor);
				} else {
					static bool didReportBlend = false;
					if (!didReportBlend)
						Reporting::ReportMessage("ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB());
					didReportBlend = true;

					DEBUG_LOG(HLE, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB());
					// Let's approximate, at least.  Close is better than totally off.
					const bool nearZeroA = blendColorSimilar(fixA, Vec3f::AssignToAll(0.0f), 0.25f);
					const bool nearZeroB = blendColorSimilar(fixB, Vec3f::AssignToAll(0.0f), 0.25f);
					if (nearZeroA || blendColorSimilar(fixA, Vec3f::AssignToAll(1.0f), 0.25f)) {
						glBlendFuncA = nearZeroA ? GL_ZERO : GL_ONE;
						glBlendFuncB = GL_CONSTANT_COLOR;
						const float blendColor[4] = {fixB.x, fixB.y, fixB.z, 1.0f};
						glstate.blendColor.set(blendColor);
					// We need to pick something.  Let's go with A as the fixed color.
					} else {
						glBlendFuncA = GL_CONSTANT_COLOR;
						glBlendFuncB = nearZeroB ? GL_ZERO : GL_ONE;
						const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f};
						glstate.blendColor.set(blendColor);
					}
				}
			}
		}

		// At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow.
		if (!gstate.isStencilTestEnabled()) {
			// Fixes some Persona 2 issues, may be correct? (that is, don't change dest alpha at all if blending)
			// If this doesn't break anything else, it's likely to be right.
			// I guess an alternative solution would be to simply disable alpha writes if alpha blending is enabled.
			glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ZERO, glBlendFuncB);
		} else {
			glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, glBlendFuncA, glBlendFuncB);
		}
		glstate.blendEquation.set(eqLookup[blendFuncEq]);
	}

	// Dither
	if (gstate.isDitherEnabled()) {
		glstate.dither.enable();
		glstate.dither.set(GL_TRUE);
	} else
		glstate.dither.disable();

	if (gstate.isModeClear()) {

#if !defined(USING_GLES2)
		// Logic Ops
		glstate.colorLogicOp.disable();
#endif
		// Culling 
		glstate.cullFace.disable();
		
		// Depth Test
		glstate.depthTest.enable();
		glstate.depthFunc.set(GL_ALWAYS);
		glstate.depthWrite.set(gstate.isClearModeDepthWriteEnabled() ? GL_TRUE : GL_FALSE);

		// Color Test
		bool colorMask = gstate.isClearModeColorMask();
		bool alphaMask = gstate.isClearModeAlphaMask();
		glstate.colorMask.set(colorMask, colorMask, colorMask, alphaMask);

		// Stencil Test
		if (alphaMask) {
			glstate.stencilTest.enable();
			glstate.stencilOp.set(GL_REPLACE, GL_REPLACE, GL_REPLACE);
			glstate.stencilFunc.set(GL_ALWAYS, 0, 0xFF);
		} else 
			glstate.stencilTest.disable();
		
	} else {

#if !defined(USING_GLES2)
		// Logic Ops
		if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY) {
			glstate.colorLogicOp.enable();
			glstate.logicOp.set(logicOps[gstate.getLogicOp()]);
		} else
			glstate.colorLogicOp.disable();
#endif		
		// Set cull
		bool cullEnabled = !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled();
		if (cullEnabled) {
			glstate.cullFace.enable();
			glstate.cullFaceMode.set(cullingMode[gstate.getCullMode()]);
		} else
			glstate.cullFace.disable();
	
		// Depth Test
		if (gstate.isDepthTestEnabled()) {
			glstate.depthTest.enable();
			glstate.depthFunc.set(ztests[gstate.getDepthTestFunction()]);
			glstate.depthWrite.set(gstate.isDepthWriteEnabled() ? GL_TRUE : GL_FALSE);
		} else 
			glstate.depthTest.disable();
		
		// PSP color/alpha mask is per bit but we can only support per byte.
		// But let's do that, at least. And let's try a threshold.
		bool rmask = (gstate.pmskc & 0xFF) < 128;
		bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128;
		bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128;
		bool amask = (gstate.pmska & 0xFF) < 128;
		glstate.colorMask.set(rmask, gmask, bmask, amask);
		
		// Stencil Test
		if (gstate.isStencilTestEnabled()) {
			glstate.stencilTest.enable();
			glstate.stencilFunc.set(ztests[gstate.getStencilTestFunction()],
				gstate.getStencilTestRef(),
				gstate.getStencilTestMask());
			glstate.stencilOp.set(stencilOps[gstate.getStencilOpSFail()],  // stencil fail
				stencilOps[gstate.getStencilOpZFail()],  // depth fail
				stencilOps[gstate.getStencilOpZPass()]); // depth pass
		} else 
			glstate.stencilTest.disable();
		
	}

	float renderWidthFactor, renderHeightFactor;
	float renderWidth, renderHeight;
	float renderX, renderY;
	bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
	if (useBufferedRendering) {
		renderX = 0.0f;
		renderY = 0.0f;
		renderWidth = framebufferManager_->GetRenderWidth();
		renderHeight = framebufferManager_->GetRenderHeight();
		renderWidthFactor = (float)renderWidth / framebufferManager_->GetTargetWidth();
		renderHeightFactor = (float)renderHeight / framebufferManager_->GetTargetHeight();
	} else {
		// TODO: Aspect-ratio aware and centered
		float pixelW = PSP_CoreParameter().pixelWidth;
		float pixelH = PSP_CoreParameter().pixelHeight;
		CenterRect(&renderX, &renderY, &renderWidth, &renderHeight, 480, 272, pixelW, pixelH);
		renderWidthFactor = renderWidth / 480.0f;
		renderHeightFactor = renderHeight / 272.0f;
	}

	bool throughmode = gstate.isModeThrough();

	// Scissor
	int scissorX1 = gstate.getScissorX1();
	int scissorY1 = gstate.getScissorY1();
	int scissorX2 = gstate.getScissorX2() + 1;
	int scissorY2 = gstate.getScissorY2() + 1;

	// This is a bit of a hack as the render buffer isn't always that size
	if (scissorX1 == 0 && scissorY1 == 0 
		&& scissorX2 >= (int) gstate_c.curRTWidth
		&& scissorY2 >= (int) gstate_c.curRTHeight) {
		glstate.scissorTest.disable();
	} else {
		glstate.scissorTest.enable();
		glstate.scissorRect.set(
			renderX + scissorX1 * renderWidthFactor,
			renderY + renderHeight - (scissorY2 * renderHeightFactor),
			(scissorX2 - scissorX1) * renderWidthFactor,
			(scissorY2 - scissorY1) * renderHeightFactor);
	}

	/*
	int regionX1 = gstate.region1 & 0x3FF;
	int regionY1 = (gstate.region1 >> 10) & 0x3FF;
	int regionX2 = (gstate.region2 & 0x3FF) + 1;
	int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1;
	*/
	int regionX1 = 0;
	int regionY1 = 0;
	int regionX2 = gstate_c.curRTWidth;
	int regionY2 = gstate_c.curRTHeight;

	float offsetX = (float)(gstate.offsetx & 0xFFFF) / 16.0f;
	float offsetY = (float)(gstate.offsety & 0xFFFF) / 16.0f;

	if (throughmode) {
		// No viewport transform here. Let's experiment with using region.
		glstate.viewport.set(
			renderX + (0 + regionX1) * renderWidthFactor, 
			renderY + (0 - regionY1) * renderHeightFactor,
			(regionX2 - regionX1) * renderWidthFactor,
			(regionY2 - regionY1) * renderHeightFactor);
		glstate.depthRange.set(0.0f, 1.0f);
	} else {
		// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
		float vpXa = getFloat24(gstate.viewportx1);
		float vpXb = getFloat24(gstate.viewportx2);
		float vpYa = getFloat24(gstate.viewporty1);
		float vpYb = getFloat24(gstate.viewporty2);

		// The viewport transform appears to go like this: 
		// Xscreen = -offsetX + vpXb + vpXa * Xview
		// Yscreen = -offsetY + vpYb + vpYa * Yview
		// Zscreen = vpZb + vpZa * Zview

		// This means that to get the analogue glViewport we must:
		float vpX0 = vpXb - offsetX - vpXa;
		float vpY0 = vpYb - offsetY + vpYa;   // Need to account for sign of Y
		gstate_c.vpWidth = vpXa * 2.0f;
		gstate_c.vpHeight = -vpYa * 2.0f;

		float vpWidth = fabsf(gstate_c.vpWidth);
		float vpHeight = fabsf(gstate_c.vpHeight);

		vpX0 *= renderWidthFactor;
		vpY0 *= renderHeightFactor;
		vpWidth *= renderWidthFactor;
		vpHeight *= renderHeightFactor;

		vpX0 = (vpXb - offsetX - fabsf(vpXa)) * renderWidthFactor;
		// Flip vpY0 to match the OpenGL coordinate system.
		vpY0 = renderHeight - (vpYb - offsetY + fabsf(vpYa)) * renderHeightFactor;		
		
		glstate.viewport.set(vpX0 + renderX, vpY0 + renderY, vpWidth, vpHeight);
		// Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh.
		// shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);

		float zScale = getFloat24(gstate.viewportz1) / 65535.0f;
		float zOff = getFloat24(gstate.viewportz2) / 65535.0f;
		float depthRangeMin = zOff - zScale;
		float depthRangeMax = zOff + zScale;
		glstate.depthRange.set(depthRangeMin, depthRangeMax);
	}
}
Exemplo n.º 14
0
void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) {
	// Update any dirty uniforms before we draw
	if (dirtyUniforms & DIRTY_PROJMATRIX) {
		Matrix4x4 flippedMatrix;
		memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));

		const bool invertedY = gstate_c.vpHeight < 0;
		if (!invertedY) {
			flippedMatrix[1] = -flippedMatrix[1];
			flippedMatrix[5] = -flippedMatrix[5];
			flippedMatrix[9] = -flippedMatrix[9];
			flippedMatrix[13] = -flippedMatrix[13];
		}
		const bool invertedX = gstate_c.vpWidth < 0;
		if (invertedX) {
			flippedMatrix[0] = -flippedMatrix[0];
			flippedMatrix[4] = -flippedMatrix[4];
			flippedMatrix[8] = -flippedMatrix[8];
			flippedMatrix[12] = -flippedMatrix[12];
		}

		ConvertProjMatrixToD3D(flippedMatrix, invertedX, invertedY);

		VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr());
	}
	if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
		Matrix4x4 proj_through;
		proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1);

		ConvertProjMatrixToD3DThrough(proj_through);

		VSSetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr());
	}
	// Transform
	if (dirtyUniforms & DIRTY_WORLDMATRIX) {
		VSSetMatrix4x3_3(CONST_VS_WORLD, gstate.worldMatrix);
	}
	if (dirtyUniforms & DIRTY_VIEWMATRIX) {
		VSSetMatrix4x3_3(CONST_VS_VIEW, gstate.viewMatrix);
	}
	if (dirtyUniforms & DIRTY_TEXMATRIX) {
		VSSetMatrix4x3_3(CONST_VS_TEXMTX, gstate.tgenMatrix);
	}
	if (dirtyUniforms & DIRTY_FOGCOEF) {
		float fogcoef[2] = {
			getFloat24(gstate.fog1),
			getFloat24(gstate.fog2),
		};
		if (my_isinf(fogcoef[1])) {
			// not really sure what a sensible value might be.
			fogcoef[1] = fogcoef[1] < 0.0f ? -10000.0f : 10000.0f;
		} else if (my_isnan(fogcoef[1])) {
			// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
			// Just put the fog far away at a large finite distance.
			// Infinities and NaNs are rather unpredictable in shaders on many GPUs
			// so it's best to just make it a sane calculation.
			fogcoef[0] = 100000.0f;
			fogcoef[1] = 1.0f;
		}
#ifndef MOBILE_DEVICE
		else if (my_isnanorinf(fogcoef[1]) || my_isnanorinf(fogcoef[0])) {
			ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef[0], fogcoef[1]);
		}
#endif
		VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2);
	}
	// TODO: Could even set all bones in one go if they're all dirty.
#ifdef USE_BONE_ARRAY
	if (u_bone != 0) {
		float allBones[8 * 16];

		bool allDirty = true;
		for (int i = 0; i < numBones; i++) {
			if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
				ConvertMatrix4x3To4x4(allBones + 16 * i, gstate.boneMatrix + 12 * i);
			} else {
				allDirty = false;
			}
		}
		if (allDirty) {
			// Set them all with one call
			//glUniformMatrix4fv(u_bone, numBones, GL_FALSE, allBones);
		} else {
			// Set them one by one. Could try to coalesce two in a row etc but too lazy.
			for (int i = 0; i < numBones; i++) {
				if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
					//glUniformMatrix4fv(u_bone + i, 1, GL_FALSE, allBones + 16 * i);
				}
			}
		}
	}
#else
	for (int i = 0; i < 8; i++) {
		if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
			VSSetMatrix4x3_3(CONST_VS_BONE0 + 3 * i, gstate.boneMatrix + 12 * i);
		}
	}
#endif

	// Texturing
	if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
		const int w = gstate.getTextureWidth(0);
		const int h = gstate.getTextureHeight(0);
		const float widthFactor = (float)w * invW;
		const float heightFactor = (float)h * invH;

		float uvscaleoff[4];

		switch (gstate.getUVGenMode()) {
		case GE_TEXMAP_TEXTURE_COORDS:
			// Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera.  Treating the same as GE_TEXMAP_TEXTURE_COORDS works.
		case GE_TEXMAP_UNKNOWN:
			if (g_Config.bPrescaleUV) {
				// We are here but are prescaling UV in the decoder? Let's do the same as in the other case
				// except consider *Scale and *Off to be 1 and 0.
				uvscaleoff[0] = widthFactor;
				uvscaleoff[1] = heightFactor;
				uvscaleoff[2] = 0.0f;
				uvscaleoff[3] = 0.0f;
			} else {
				uvscaleoff[0] = gstate_c.uv.uScale * widthFactor;
				uvscaleoff[1] = gstate_c.uv.vScale * heightFactor;
				uvscaleoff[2] = gstate_c.uv.uOff * widthFactor;
				uvscaleoff[3] = gstate_c.uv.vOff * heightFactor;
			}
			break;

		// These two work the same whether or not we prescale UV.

		case GE_TEXMAP_TEXTURE_MATRIX:
			// We cannot bake the UV coord scale factor in here, as we apply a matrix multiplication
			// before this is applied, and the matrix multiplication may contain translation. In this case
			// the translation will be scaled which breaks faces in Hexyz Force for example.
			// So I've gone back to applying the scale factor in the shader.
			uvscaleoff[0] = widthFactor;
			uvscaleoff[1] = heightFactor;
			uvscaleoff[2] = 0.0f;
			uvscaleoff[3] = 0.0f;
			break;

		case GE_TEXMAP_ENVIRONMENT_MAP:
			// In this mode we only use uvscaleoff to scale to the texture size.
			uvscaleoff[0] = widthFactor;
			uvscaleoff[1] = heightFactor;
			uvscaleoff[2] = 0.0f;
			uvscaleoff[3] = 0.0f;
			break;

		default:
			ERROR_LOG_REPORT(G3D, "Unexpected UV gen mode: %d", gstate.getUVGenMode());
		}
		VSSetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4);
	}

	if (dirtyUniforms & DIRTY_DEPTHRANGE)	{
		// Depth is [0, 1] mapping to [minz, maxz], not too hard.
		float vpZScale = gstate.getViewportZScale();
		float vpZCenter = gstate.getViewportZCenter();

		// These are just the reverse of the formulas in GPUStateUtils.
		float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
		float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
		float viewZScale = halfActualZRange * 2.0f;
		// Account for the half pixel offset.
		float viewZCenter = minz + (DepthSliceFactor() / 256.0f) * 0.5f;
		float viewZInvScale;

		if (viewZScale != 0.0) {
			viewZInvScale = 1.0f / viewZScale;
		} else {
			viewZInvScale = 0.0;
		}

		float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale };
		VSSetFloatUniform4(CONST_VS_DEPTHRANGE, data);
	}
	// Lighting
	if (dirtyUniforms & DIRTY_AMBIENT) {
		VSSetColorUniform3Alpha(CONST_VS_AMBIENT, gstate.ambientcolor, gstate.getAmbientA());
	}
	if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
		VSSetColorUniform3Alpha(CONST_VS_MATAMBIENTALPHA, gstate.materialambient, gstate.getMaterialAmbientA());
	}
	if (dirtyUniforms & DIRTY_MATDIFFUSE) {
		VSSetColorUniform3(CONST_VS_MATDIFFUSE, gstate.materialdiffuse);
	}
	if (dirtyUniforms & DIRTY_MATEMISSIVE) {
		VSSetColorUniform3(CONST_VS_MATEMISSIVE, gstate.materialemissive);
	}
	if (dirtyUniforms & DIRTY_MATSPECULAR) {
		VSSetColorUniform3ExtraFloat(CONST_VS_MATSPECULAR, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
	}
	for (int i = 0; i < 4; i++) {
		if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
			if (gstate.isDirectionalLight(i)) {
				// Prenormalize
				float x = getFloat24(gstate.lpos[i * 3 + 0]);
				float y = getFloat24(gstate.lpos[i * 3 + 1]);
				float z = getFloat24(gstate.lpos[i * 3 + 2]);
				float len = sqrtf(x*x + y*y + z*z);
				if (len == 0.0f)
					len = 1.0f;
				else
					len = 1.0f / len;
				float vec[3] = { x * len, y * len, z * len };
				VSSetFloatArray(CONST_VS_LIGHTPOS + i, vec, 3);
			} else {
				VSSetFloat24Uniform3(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]);
			}
			VSSetFloat24Uniform3(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]);
			VSSetFloat24Uniform3(CONST_VS_LIGHTATT + i, &gstate.latt[i * 3]);
			VSSetFloat(CONST_VS_LIGHTANGLE + i, getFloat24(gstate.lcutoff[i]));
			VSSetFloat(CONST_VS_LIGHTSPOTCOEF + i, getFloat24(gstate.lconv[i]));
			VSSetColorUniform3(CONST_VS_LIGHTAMBIENT + i, gstate.lcolor[i * 3]);
			VSSetColorUniform3(CONST_VS_LIGHTDIFFUSE + i, gstate.lcolor[i * 3 + 1]);
			VSSetColorUniform3(CONST_VS_LIGHTSPECULAR + i, gstate.lcolor[i * 3 + 2]);
		}
	}
}
Exemplo n.º 15
0
void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) {
	if (dirtyUniforms & DIRTY_TEXENV) {
		Uint8x3ToFloat4(ub_base.texEnvColor, gstate.texenvcolor);
	}
	if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
		Uint8x3ToInt4_Alpha(ub_base.alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
	}
	if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
		Uint8x3ToInt4_Alpha(ub_base.colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
	}
	if (dirtyUniforms & DIRTY_FOGCOLOR) {
		Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor);
	}
	if (dirtyUniforms & DIRTY_SHADERBLEND) {
		Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA());
		Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB());
	}
	if (dirtyUniforms & DIRTY_TEXCLAMP) {
		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
		const int w = gstate.getTextureWidth(0);
		const int h = gstate.getTextureHeight(0);
		const float widthFactor = (float)w * invW;
		const float heightFactor = (float)h * invH;

		// First wrap xy, then half texel xy (for clamp.)
		const float texclamp[4] = {
			widthFactor,
			heightFactor,
			invW * 0.5f,
			invH * 0.5f,
		};
		const float texclampoff[2] = {
			gstate_c.curTextureXOffset * invW,
			gstate_c.curTextureYOffset * invH,
		};
		CopyFloat4(ub_base.texClamp, texclamp);
		CopyFloat2(ub_base.texClampOffset, texclampoff);
	}

	if (dirtyUniforms & DIRTY_PROJMATRIX) {
		Matrix4x4 flippedMatrix;
		memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));

		const bool invertedY = gstate_c.vpHeight < 0;
		if (invertedY) {
			flippedMatrix[1] = -flippedMatrix[1];
			flippedMatrix[5] = -flippedMatrix[5];
			flippedMatrix[9] = -flippedMatrix[9];
			flippedMatrix[13] = -flippedMatrix[13];
		}
		const bool invertedX = gstate_c.vpWidth < 0;
		if (invertedX) {
			flippedMatrix[0] = -flippedMatrix[0];
			flippedMatrix[4] = -flippedMatrix[4];
			flippedMatrix[8] = -flippedMatrix[8];
			flippedMatrix[12] = -flippedMatrix[12];
		}
		ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY);
		CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr());
	}

	if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
		Matrix4x4 proj_through;
		proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
		CopyMatrix4x4(ub_base.proj_through, proj_through.getReadPtr());
	}

	// Transform
	if (dirtyUniforms & DIRTY_WORLDMATRIX) {
		ConvertMatrix4x3To4x4(ub_base.world, gstate.worldMatrix);
	}
	if (dirtyUniforms & DIRTY_VIEWMATRIX) {
		ConvertMatrix4x3To4x4(ub_base.view, gstate.viewMatrix);
	}
	if (dirtyUniforms & DIRTY_TEXMATRIX) {
		ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix);
	}

	// Combined two small uniforms
	if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
		float fogcoef_stencil[3] = {
			getFloat24(gstate.fog1),
			getFloat24(gstate.fog2),
			(float)gstate.getStencilTestRef()
		};
		if (my_isinf(fogcoef_stencil[1])) {
			// not really sure what a sensible value might be.
			fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
		} else if (my_isnan(fogcoef_stencil[1])) {
			// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
			// Just put the fog far away at a large finite distance.
			// Infinities and NaNs are rather unpredictable in shaders on many GPUs
			// so it's best to just make it a sane calculation.
			fogcoef_stencil[0] = 100000.0f;
			fogcoef_stencil[1] = 1.0f;
		}
#ifndef MOBILE_DEVICE
		else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
			ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
		}
#endif
		CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil);
	}

	// Texturing
	if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
		const int w = gstate.getTextureWidth(0);
		const int h = gstate.getTextureHeight(0);
		const float widthFactor = (float)w * invW;
		const float heightFactor = (float)h * invH;

		static const float rescale[4] = { 1.0f, 2 * 127.5f / 128.f, 2 * 32767.5f / 32768.f, 1.0f };
		const float factor = rescale[(gstate.vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT];

		float uvscaleoff[4];

		switch (gstate.getUVGenMode()) {
		case GE_TEXMAP_TEXTURE_COORDS:
			// Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera.  Treating the same as GE_TEXMAP_TEXTURE_COORDS works.
		case GE_TEXMAP_UNKNOWN:
			if (g_Config.bPrescaleUV) {
				// We are here but are prescaling UV in the decoder? Let's do the same as in the other case
				// except consider *Scale and *Off to be 1 and 0.
				uvscaleoff[0] = widthFactor;
				uvscaleoff[1] = heightFactor;
				uvscaleoff[2] = 0.0f;
				uvscaleoff[3] = 0.0f;
			} else {
				uvscaleoff[0] = gstate_c.uv.uScale * factor * widthFactor;
				uvscaleoff[1] = gstate_c.uv.vScale * factor * heightFactor;
				uvscaleoff[2] = gstate_c.uv.uOff * widthFactor;
				uvscaleoff[3] = gstate_c.uv.vOff * heightFactor;
			}
			break;

			// These two work the same whether or not we prescale UV.

		case GE_TEXMAP_TEXTURE_MATRIX:
			// We cannot bake the UV coord scale factor in here, as we apply a matrix multiplication
			// before this is applied, and the matrix multiplication may contain translation. In this case
			// the translation will be scaled which breaks faces in Hexyz Force for example.
			// So I've gone back to applying the scale factor in the shader.
			uvscaleoff[0] = widthFactor;
			uvscaleoff[1] = heightFactor;
			uvscaleoff[2] = 0.0f;
			uvscaleoff[3] = 0.0f;
			break;

		case GE_TEXMAP_ENVIRONMENT_MAP:
			// In this mode we only use uvscaleoff to scale to the texture size.
			uvscaleoff[0] = widthFactor;
			uvscaleoff[1] = heightFactor;
			uvscaleoff[2] = 0.0f;
			uvscaleoff[3] = 0.0f;
			break;

		default:
			ERROR_LOG_REPORT(G3D, "Unexpected UV gen mode: %d", gstate.getUVGenMode());
		}
		CopyFloat4(ub_base.uvScaleOffset, uvscaleoff);
	}

	if (dirtyUniforms & DIRTY_DEPTHRANGE) {
		float viewZScale = gstate.getViewportZScale();
		float viewZCenter = gstate.getViewportZCenter();
		float viewZInvScale;

		// We had to scale and translate Z to account for our clamped Z range.
		// Therefore, we also need to reverse this to round properly.
		//
		// Example: scale = 65535.0, center = 0.0
		// Resulting range = -65535 to 65535, clamped to [0, 65535]
		// gstate_c.vpDepthScale = 2.0f
		// gstate_c.vpZOffset = -1.0f
		//
		// The projection already accounts for those, so we need to reverse them.
		//
		// Additionally, D3D9 uses a range from [0, 1].  We double and move the center.
		viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
		viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;

		if (viewZScale != 0.0) {
			viewZInvScale = 1.0f / viewZScale;
		} else {
			viewZInvScale = 0.0;
		}

		float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale };
		CopyFloat4(ub_base.depthRange, data);
	}
}
Exemplo n.º 16
0
void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) {
	if (dirtyUniforms & DIRTY_TEXENV) {
		Uint8x3ToFloat4(ub_base.texEnvColor, gstate.texenvcolor);
	}
	if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
		Uint8x3ToInt4_Alpha(ub_base.alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
	}
	if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
		Uint8x3ToInt4_Alpha(ub_base.colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
	}
	if (dirtyUniforms & DIRTY_FOGCOLOR) {
		Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor);
	}
	if (dirtyUniforms & DIRTY_SHADERBLEND) {
		Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA());
		Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB());
	}
	if (dirtyUniforms & DIRTY_TEXCLAMP) {
		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
		const int w = gstate.getTextureWidth(0);
		const int h = gstate.getTextureHeight(0);
		const float widthFactor = (float)w * invW;
		const float heightFactor = (float)h * invH;

		// First wrap xy, then half texel xy (for clamp.)
		ub_base.texClamp[0] = widthFactor;
		ub_base.texClamp[1] = heightFactor;
		ub_base.texClamp[2] = invW * 0.5f;
		ub_base.texClamp[3] = invH * 0.5f;
		ub_base.texClampOffset[0] = gstate_c.curTextureXOffset * invW;
		ub_base.texClampOffset[1] = gstate_c.curTextureYOffset * invH;
	}

	if (dirtyUniforms & DIRTY_PROJMATRIX) {
		Matrix4x4 flippedMatrix;
		memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));

		const bool invertedY = gstate_c.vpHeight < 0;
		if (invertedY) {
			flippedMatrix[1] = -flippedMatrix[1];
			flippedMatrix[5] = -flippedMatrix[5];
			flippedMatrix[9] = -flippedMatrix[9];
			flippedMatrix[13] = -flippedMatrix[13];
		}
		const bool invertedX = gstate_c.vpWidth < 0;
		if (invertedX) {
			flippedMatrix[0] = -flippedMatrix[0];
			flippedMatrix[4] = -flippedMatrix[4];
			flippedMatrix[8] = -flippedMatrix[8];
			flippedMatrix[12] = -flippedMatrix[12];
		}
		ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY);
		CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr());
	}

	if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
		Matrix4x4 proj_through;
		proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
		CopyMatrix4x4(ub_base.proj_through, proj_through.getReadPtr());
	}

	// Transform
	if (dirtyUniforms & DIRTY_WORLDMATRIX) {
		ConvertMatrix4x3To4x4(ub_base.world, gstate.worldMatrix);
	}
	if (dirtyUniforms & DIRTY_VIEWMATRIX) {
		ConvertMatrix4x3To4x4(ub_base.view, gstate.viewMatrix);
	}
	if (dirtyUniforms & DIRTY_TEXMATRIX) {
		ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix);
	}

	// Combined two small uniforms
	if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
		float fogcoef_stencil[3] = {
			getFloat24(gstate.fog1),
			getFloat24(gstate.fog2),
			(float)gstate.getStencilTestRef()
		};
		if (my_isinf(fogcoef_stencil[1])) {
			// not really sure what a sensible value might be.
			fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
		} else if (my_isnan(fogcoef_stencil[1])) {
			// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
			// Just put the fog far away at a large finite distance.
			// Infinities and NaNs are rather unpredictable in shaders on many GPUs
			// so it's best to just make it a sane calculation.
			fogcoef_stencil[0] = 100000.0f;
			fogcoef_stencil[1] = 1.0f;
		}
#ifndef MOBILE_DEVICE
		else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
			ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
		}
#endif
		CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil);
	}

	// Texturing
	if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
		const int w = gstate.getTextureWidth(0);
		const int h = gstate.getTextureHeight(0);
		const float widthFactor = (float)w * invW;
		const float heightFactor = (float)h * invH;
		ub_base.uvScaleOffset[0] = widthFactor;
		ub_base.uvScaleOffset[1] = heightFactor;
		ub_base.uvScaleOffset[2] = 0.0f;
		ub_base.uvScaleOffset[3] = 0.0f;
	}

	if (dirtyUniforms & DIRTY_DEPTHRANGE) {
		float viewZScale = gstate.getViewportZScale();
		float viewZCenter = gstate.getViewportZCenter();
		float viewZInvScale;

		// We had to scale and translate Z to account for our clamped Z range.
		// Therefore, we also need to reverse this to round properly.
		//
		// Example: scale = 65535.0, center = 0.0
		// Resulting range = -65535 to 65535, clamped to [0, 65535]
		// gstate_c.vpDepthScale = 2.0f
		// gstate_c.vpZOffset = -1.0f
		//
		// The projection already accounts for those, so we need to reverse them.
		//
		// Additionally, D3D9 uses a range from [0, 1].  We double and move the center.
		viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
		viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;

		if (viewZScale != 0.0) {
			viewZInvScale = 1.0f / viewZScale;
		} else {
			viewZInvScale = 0.0;
		}

		ub_base.depthRange[0] = viewZScale;
		ub_base.depthRange[1] = viewZCenter;
		ub_base.depthRange[2] = viewZCenter;
		ub_base.depthRange[3] = viewZInvScale;
	}
}
Exemplo n.º 17
0
void SoftwareTransform(
	int prim, int vertexCount, u32 vertType, u16 *&inds, int indexType,
	const DecVtxFormat &decVtxFormat, int &maxIndex, TransformedVertex *&drawBuffer, int &numTrans, bool &drawIndexed, const SoftwareTransformParams *params, SoftwareTransformResult *result) {
	u8 *decoded = params->decoded;
	FramebufferManagerCommon *fbman = params->fbman;
	TextureCacheCommon *texCache = params->texCache;
	TransformedVertex *transformed = params->transformed;
	TransformedVertex *transformedExpanded = params->transformedExpanded;
	float ySign = 1.0f;
	bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0;
	bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();

	// TODO: Split up into multiple draw calls for GLES 2.0 where you can't guarantee support for more than 0x10000 verts.

#if defined(MOBILE_DEVICE)
	if (vertexCount > 0x10000/3)
		vertexCount = 0x10000/3;
#endif

	float uscale = 1.0f;
	float vscale = 1.0f;
	if (throughmode) {
		uscale /= gstate_c.curTextureWidth;
		vscale /= gstate_c.curTextureHeight;
	}

	bool skinningEnabled = vertTypeIsSkinningEnabled(vertType);

	const int w = gstate.getTextureWidth(0);
	const int h = gstate.getTextureHeight(0);
	float widthFactor = (float) w / (float) gstate_c.curTextureWidth;
	float heightFactor = (float) h / (float) gstate_c.curTextureHeight;

	Lighter lighter(vertType);
	float fog_end = getFloat24(gstate.fog1);
	float fog_slope = getFloat24(gstate.fog2);
	// Same fixup as in ShaderManager.cpp
	if (my_isinf(fog_slope)) {
		// not really sure what a sensible value might be.
		fog_slope = fog_slope < 0.0f ? -10000.0f : 10000.0f;
	}
	if (my_isnan(fog_slope)) {
		// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
		// Just put the fog far away at a large finite distance.
		// Infinities and NaNs are rather unpredictable in shaders on many GPUs
		// so it's best to just make it a sane calculation.
		fog_end = 100000.0f;
		fog_slope = 1.0f;
	}

	VertexReader reader(decoded, decVtxFormat, vertType);
	if (throughmode) {
		for (int index = 0; index < maxIndex; index++) {
			// Do not touch the coordinates or the colors. No lighting.
			reader.Goto(index);
			// TODO: Write to a flexible buffer, we don't always need all four components.
			TransformedVertex &vert = transformed[index];
			reader.ReadPos(vert.pos);

			if (reader.hasColor0()) {
				reader.ReadColor0_8888(vert.color0);
			} else {
				vert.color0_32 = gstate.getMaterialAmbientRGBA();
			}

			if (reader.hasUV()) {
				reader.ReadUV(vert.uv);

				vert.u *= uscale;
				vert.v *= vscale;
			} else {
				vert.u = 0.0f;
				vert.v = 0.0f;
			}

			// Ignore color1 and fog, never used in throughmode anyway.
			// The w of uv is also never used (hardcoded to 1.0.)
		}
	} else {
		// Okay, need to actually perform the full transform.
		for (int index = 0; index < maxIndex; index++) {
			reader.Goto(index);

			float v[3] = {0, 0, 0};
			Vec4f c0 = Vec4f(1, 1, 1, 1);
			Vec4f c1 = Vec4f(0, 0, 0, 0);
			float uv[3] = {0, 0, 1};
			float fogCoef = 1.0f;

			// We do software T&L for now
			float out[3];
			float pos[3];
			Vec3f normal(0, 0, 1);
			Vec3f worldnormal(0, 0, 1);
			reader.ReadPos(pos);

			if (!skinningEnabled) {
				Vec3ByMatrix43(out, pos, gstate.worldMatrix);
				if (reader.hasNormal()) {
					reader.ReadNrm(normal.AsArray());
					if (gstate.areNormalsReversed()) {
						normal = -normal;
					}
					Norm3ByMatrix43(worldnormal.AsArray(), normal.AsArray(), gstate.worldMatrix);
					worldnormal = worldnormal.Normalized();
				}
			} else {
				float weights[8];
				reader.ReadWeights(weights);
				if (reader.hasNormal())
					reader.ReadNrm(normal.AsArray());

				// Skinning
				Vec3f psum(0, 0, 0);
				Vec3f nsum(0, 0, 0);
				for (int i = 0; i < vertTypeGetNumBoneWeights(vertType); i++) {
					if (weights[i] != 0.0f) {
						Vec3ByMatrix43(out, pos, gstate.boneMatrix+i*12);
						Vec3f tpos(out);
						psum += tpos * weights[i];
						if (reader.hasNormal()) {
							Vec3f norm;
							Norm3ByMatrix43(norm.AsArray(), normal.AsArray(), gstate.boneMatrix+i*12);
							nsum += norm * weights[i];
						}
					}
				}

				// Yes, we really must multiply by the world matrix too.
				Vec3ByMatrix43(out, psum.AsArray(), gstate.worldMatrix);
				if (reader.hasNormal()) {
					normal = nsum;
					if (gstate.areNormalsReversed()) {
						normal = -normal;
					}
					Norm3ByMatrix43(worldnormal.AsArray(), normal.AsArray(), gstate.worldMatrix);
					worldnormal = worldnormal.Normalized();
				}
			}

			// Perform lighting here if enabled. don't need to check through, it's checked above.
			Vec4f unlitColor = Vec4f(1, 1, 1, 1);
			if (reader.hasColor0()) {
				reader.ReadColor0(&unlitColor.x);
			} else {
				unlitColor = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA());
			}

			if (gstate.isLightingEnabled()) {
				float litColor0[4];
				float litColor1[4];
				lighter.Light(litColor0, litColor1, unlitColor.AsArray(), out, worldnormal);

				// Don't ignore gstate.lmode - we should send two colors in that case
				for (int j = 0; j < 4; j++) {
					c0[j] = litColor0[j];
				}
				if (lmode) {
					// Separate colors
					for (int j = 0; j < 4; j++) {
						c1[j] = litColor1[j];
					}
				} else {
					// Summed color into c0 (will clamp in ToRGBA().)
					for (int j = 0; j < 4; j++) {
						c0[j] += litColor1[j];
					}
				}
			} else {
				if (reader.hasColor0()) {
					for (int j = 0; j < 4; j++) {
						c0[j] = unlitColor[j];
					}
				} else {
					c0 = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA());
				}
				if (lmode) {
					// c1 is already 0.
				}
			}

			float ruv[2] = {0.0f, 0.0f};
			if (reader.hasUV())
				reader.ReadUV(ruv);

			// Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights.
			switch (gstate.getUVGenMode()) {
			case GE_TEXMAP_TEXTURE_COORDS:	// UV mapping
			case GE_TEXMAP_UNKNOWN: // Seen in Riviera.  Unsure of meaning, but this works.
				// We always prescale in the vertex decoder now.
				uv[0] = ruv[0];
				uv[1] = ruv[1];
				uv[2] = 1.0f;
				break;

			case GE_TEXMAP_TEXTURE_MATRIX:
				{
					// Projection mapping
					Vec3f source;
					switch (gstate.getUVProjMode())	{
					case GE_PROJMAP_POSITION: // Use model space XYZ as source
						source = pos;
						break;

					case GE_PROJMAP_UV: // Use unscaled UV as source
						source = Vec3f(ruv[0], ruv[1], 0.0f);
						break;

					case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized normal as source
						source = normal.Normalized();
						if (!reader.hasNormal()) {
							ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?");
						}
						break;

					case GE_PROJMAP_NORMAL: // Use non-normalized normal as source!
						source = normal;
						if (!reader.hasNormal()) {
							ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?");
						}
						break;
					}

					float uvw[3];
					Vec3ByMatrix43(uvw, &source.x, gstate.tgenMatrix);
					uv[0] = uvw[0];
					uv[1] = uvw[1];
					uv[2] = uvw[2];
				}
				break;

			case GE_TEXMAP_ENVIRONMENT_MAP:
				// Shade mapping - use two light sources to generate U and V.
				{
					Vec3f lightpos0 = Vec3f(&lighter.lpos[gstate.getUVLS0() * 3]).Normalized();
					Vec3f lightpos1 = Vec3f(&lighter.lpos[gstate.getUVLS1() * 3]).Normalized();

					uv[0] = (1.0f + Dot(lightpos0, worldnormal))/2.0f;
					uv[1] = (1.0f + Dot(lightpos1, worldnormal))/2.0f;
					uv[2] = 1.0f;
				}
				break;

			default:
				// Illegal
				ERROR_LOG_REPORT(G3D, "Impossible UV gen mode? %d", gstate.getUVGenMode());
				break;
			}

			uv[0] = uv[0] * widthFactor;
			uv[1] = uv[1] * heightFactor;

			// Transform the coord by the view matrix.
			Vec3ByMatrix43(v, out, gstate.viewMatrix);
			fogCoef = (v[2] + fog_end) * fog_slope;

			// TODO: Write to a flexible buffer, we don't always need all four components.
			memcpy(&transformed[index].x, v, 3 * sizeof(float));
			transformed[index].fog = fogCoef;
			memcpy(&transformed[index].u, uv, 3 * sizeof(float));
			transformed[index].color0_32 = c0.ToRGBA();
			transformed[index].color1_32 = c1.ToRGBA();

			// The multiplication by the projection matrix is still performed in the vertex shader.
			// So is vertex depth rounding, to simulate the 16-bit depth buffer.
		}
	}

	// Here's the best opportunity to try to detect rectangles used to clear the screen, and
	// replace them with real clears. This can provide a speedup on certain mobile chips.
	//
	// An alternative option is to simply ditch all the verts except the first and last to create a single
	// rectangle out of many. Quite a small optimization though.
	// Experiment: Disable on PowerVR (see issue #6290)
	// TODO: This bleeds outside the play area in non-buffered mode. Big deal? Probably not.
	bool reallyAClear = false;
	if (maxIndex > 1 && prim == GE_PRIM_RECTANGLES && gstate.isModeClear()) {
		int scissorX2 = gstate.getScissorX2() + 1;
		int scissorY2 = gstate.getScissorY2() + 1;
		reallyAClear = IsReallyAClear(transformed, maxIndex, scissorX2, scissorY2);
	}
	if (reallyAClear && gl_extensions.gpuVendor != GPU_VENDOR_POWERVR) {  // && g_Config.iRenderingMode != FB_NON_BUFFERED_MODE) {
		// If alpha is not allowed to be separate, it must match for both depth/stencil and color.  Vulkan requires this.
		bool alphaMatchesColor = gstate.isClearModeColorMask() == gstate.isClearModeAlphaMask();
		bool depthMatchesStencil = gstate.isClearModeAlphaMask() == gstate.isClearModeDepthMask();
		if (params->allowSeparateAlphaClear || (alphaMatchesColor && depthMatchesStencil)) {
			result->color = transformed[1].color0_32;
			// Need to rescale from a [0, 1] float.  This is the final transformed value.
			result->depth = ToScaledDepth((s16)(int)(transformed[1].z * 65535.0f));
			result->action = SW_CLEAR;
			return;
		}
	}

	// This means we're using a framebuffer (and one that isn't big enough.)
	if (gstate_c.curTextureHeight < (u32)h && maxIndex >= 2) {
		// Even if not rectangles, this will detect if either of the first two are outside the framebuffer.
		// HACK: Adding one pixel margin to this detection fixes issues in Assassin's Creed : Bloodlines,
		// while still keeping BOF working (see below).
		const float invTexH = 1.0f / gstate_c.curTextureHeight; // size of one texel.
		bool tlOutside;
		bool tlAlmostOutside;
		bool brOutside;
		// If we're outside heightFactor, then v must be wrapping or clamping.  Avoid this workaround.
		// If we're <= 1.0f, we're inside the framebuffer (workaround not needed.)
		// We buffer that 1.0f a little more with a texel to avoid some false positives.
		tlOutside = transformed[0].v <= heightFactor && transformed[0].v > 1.0f + invTexH;
		brOutside = transformed[1].v <= heightFactor && transformed[1].v > 1.0f + invTexH;
		// Careful: if br is outside, but tl is well inside, this workaround still doesn't make sense.
		// We go with halfway, since we overestimate framebuffer heights sometimes but not by much.
		tlAlmostOutside = transformed[0].v <= heightFactor && transformed[0].v >= 0.5f;
		if (tlOutside || (brOutside && tlAlmostOutside)) {
			// Okay, so we're texturing from outside the framebuffer, but inside the texture height.
			// Breath of Fire 3 does this to access a render surface at an offset.
			const u32 bpp = fbman->GetTargetFormat() == GE_FORMAT_8888 ? 4 : 2;
			const u32 prevH = texCache->AttachedDrawingHeight();
			const u32 fb_size = bpp * fbman->GetTargetStride() * prevH;
			const u32 prevYOffset = gstate_c.curTextureYOffset;
			if (texCache->SetOffsetTexture(fb_size)) {
				const float oldWidthFactor = widthFactor;
				const float oldHeightFactor = heightFactor;
				widthFactor = (float) w / (float) gstate_c.curTextureWidth;
				heightFactor = (float) h / (float) gstate_c.curTextureHeight;

				// We've already baked in the old gstate_c.curTextureYOffset, so correct.
				const float yDiff = (float) (prevH + prevYOffset - gstate_c.curTextureYOffset) / (float) h;
				for (int index = 0; index < maxIndex; ++index) {
					transformed[index].u *= widthFactor / oldWidthFactor;
					// Inverse it back to scale to the new FBO, and add 1.0f to account for old FBO.
					transformed[index].v = (transformed[index].v / oldHeightFactor - yDiff) * heightFactor;
				}
			}
		}
	}

	// Step 2: expand rectangles.
	drawBuffer = transformed;
	numTrans = 0;
	drawIndexed = false;

	if (prim != GE_PRIM_RECTANGLES) {
		// We can simply draw the unexpanded buffer.
		numTrans = vertexCount;
		drawIndexed = true;
	} else {
		bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
		if (useBufferedRendering)
			ySign = -ySign;

		float flippedMatrix[16];
		if (!throughmode) {
			memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));

			const bool invertedY = useBufferedRendering ? (gstate_c.vpHeight < 0) : (gstate_c.vpHeight > 0);
			if (invertedY) {
				flippedMatrix[1] = -flippedMatrix[1];
				flippedMatrix[5] = -flippedMatrix[5];
				flippedMatrix[9] = -flippedMatrix[9];
				flippedMatrix[13] = -flippedMatrix[13];
			}
			const bool invertedX = gstate_c.vpWidth < 0;
			if (invertedX) {
				flippedMatrix[0] = -flippedMatrix[0];
				flippedMatrix[4] = -flippedMatrix[4];
				flippedMatrix[8] = -flippedMatrix[8];
				flippedMatrix[12] = -flippedMatrix[12];
			}
		}

		//rectangles always need 2 vertices, disregard the last one if there's an odd number
		vertexCount = vertexCount & ~1;
		numTrans = 0;
		drawBuffer = transformedExpanded;
		TransformedVertex *trans = &transformedExpanded[0];
		const u16 *indsIn = (const u16 *)inds;
		u16 *newInds = inds + vertexCount;
		u16 *indsOut = newInds;
		maxIndex = 4 * vertexCount;
		for (int i = 0; i < vertexCount; i += 2) {
			const TransformedVertex &transVtxTL = transformed[indsIn[i + 0]];
			const TransformedVertex &transVtxBR = transformed[indsIn[i + 1]];

			// We have to turn the rectangle into two triangles, so 6 points.
			// This is 4 verts + 6 indices.

			// bottom right
			trans[0] = transVtxBR;

			// top right
			trans[1] = transVtxBR;
			trans[1].y = transVtxTL.y;
			trans[1].v = transVtxTL.v;

			// top left
			trans[2] = transVtxBR;
			trans[2].x = transVtxTL.x;
			trans[2].y = transVtxTL.y;
			trans[2].u = transVtxTL.u;
			trans[2].v = transVtxTL.v;

			// bottom left
			trans[3] = transVtxBR;
			trans[3].x = transVtxTL.x;
			trans[3].u = transVtxTL.u;

			// That's the four corners. Now process UV rotation.
			if (throughmode)
				RotateUVThrough(trans);
			else
				RotateUV(trans, flippedMatrix, ySign);

			// Triangle: BR-TR-TL
			indsOut[0] = i * 2 + 0;
			indsOut[1] = i * 2 + 1;
			indsOut[2] = i * 2 + 2;
			// Triangle: BL-BR-TL
			indsOut[3] = i * 2 + 3;
			indsOut[4] = i * 2 + 0;
			indsOut[5] = i * 2 + 2;
			trans += 4;
			indsOut += 6;

			numTrans += 6;
		}
		inds = newInds;
		drawIndexed = true;

		// We don't know the color until here, so we have to do it now, instead of in StateMapping.
		// Might want to reconsider the order of things later...
		if (gstate.isModeClear() && gstate.isClearModeAlphaMask()) {
			result->setStencil = true;
			if (vertexCount > 1) {
				// Take the bottom right alpha value of the first rect as the stencil value.
				// Technically, each rect could individually fill its stencil, but most of the
				// time they use the same one.
				result->stencilValue = transformed[indsIn[1]].color0[3];
			} else {
				result->stencilValue = 0;
			}
		}
	}

	result->action = SW_DRAW_PRIMITIVES;
}