Lighter::Lighter(int vertType) { if (!gstate.isLightingEnabled()) return; doShadeMapping_ = gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP; materialEmissive.GetFromRGB(gstate.materialemissive); materialEmissive.a = 0.0f; globalAmbient.GetFromRGB(gstate.ambientcolor); globalAmbient.GetFromA(gstate.ambientalpha); materialAmbient.GetFromRGB(gstate.materialambient); materialAmbient.GetFromA(gstate.materialalpha); materialDiffuse.GetFromRGB(gstate.materialdiffuse); materialDiffuse.a = 1.0f; materialSpecular.GetFromRGB(gstate.materialspecular); materialSpecular.a = 1.0f; specCoef_ = getFloat24(gstate.materialspecularcoef); // viewer_ = Vec3f(-gstate.viewMatrix[9], -gstate.viewMatrix[10], -gstate.viewMatrix[11]); bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0; materialUpdate_ = hasColor ? (gstate.materialupdate & 7) : 0; for (int l = 0; l < 4; l++) { int i = l * 3; if (gstate.isLightChanEnabled(l)) { lpos[i] = getFloat24(gstate.lpos[i]); lpos[i + 1] = getFloat24(gstate.lpos[i + 1]); lpos[i + 2] = getFloat24(gstate.lpos[i + 2]); ldir[i] = getFloat24(gstate.ldir[i]); ldir[i + 1] = getFloat24(gstate.ldir[i + 1]); ldir[i + 2] = getFloat24(gstate.ldir[i + 2]); latt[i] = getFloat24(gstate.latt[i]); latt[i + 1] = getFloat24(gstate.latt[i + 1]); latt[i + 2] = getFloat24(gstate.latt[i + 2]); for (int t = 0; t < 3; t++) { u32 data = gstate.lcolor[l * 3 + t] & 0xFFFFFF; float r = (float)(data & 0xff) * (1.0f / 255.0f); float g = (float)((data >> 8) & 0xff) * (1.0f / 255.0f); float b = (float)(data >> 16) * (1.0f / 255.0f); lcolor[t][l][0] = r; lcolor[t][l][1] = g; lcolor[t][l][2] = b; } } }
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) { // Lighting if (dirtyUniforms & DIRTY_AMBIENT) { Uint8x3ToFloat4_AlphaUint8(ub->ambientColor, gstate.ambientcolor, gstate.getAmbientA()); } if (dirtyUniforms & DIRTY_MATDIFFUSE) { Uint8x3ToFloat4(ub->materialDiffuse, gstate.materialdiffuse); } if (dirtyUniforms & DIRTY_MATSPECULAR) { Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, std::max(0.0f, getFloat24(gstate.materialspecularcoef))); } if (dirtyUniforms & DIRTY_MATEMISSIVE) { Uint8x3ToFloat4(ub->materialEmissive, gstate.materialemissive); } for (int i = 0; i < 4; i++) { if (dirtyUniforms & (DIRTY_LIGHT0 << i)) { if (gstate.isDirectionalLight(i)) { // Prenormalize float x = getFloat24(gstate.lpos[i * 3 + 0]); float y = getFloat24(gstate.lpos[i * 3 + 1]); float z = getFloat24(gstate.lpos[i * 3 + 2]); float len = sqrtf(x*x + y*y + z*z); if (len == 0.0f) len = 1.0f; else len = 1.0f / len; float vec[3] = { x * len, y * len, z * len }; CopyFloat3To4(ub->lpos[i], vec); } else { ExpandFloat24x3ToFloat4(ub->lpos[i], &gstate.lpos[i * 3]); } ExpandFloat24x3ToFloat4(ub->ldir[i], &gstate.ldir[i * 3]); ExpandFloat24x3ToFloat4(ub->latt[i], &gstate.latt[i * 3]); CopyFloat1To4(ub->lightAngle[i], getFloat24(gstate.lcutoff[i])); CopyFloat1To4(ub->lightSpotCoef[i], getFloat24(gstate.lconv[i])); Uint8x3ToFloat4(ub->lightAmbient[i], gstate.lcolor[i * 3]); Uint8x3ToFloat4(ub->lightDiffuse[i], gstate.lcolor[i * 3 + 1]); Uint8x3ToFloat4(ub->lightSpecular[i], gstate.lcolor[i * 3 + 2]); } } }
// TODO: This is ugly static inline ScreenCoords ClipToScreenInternal(const ClipCoords& coords, bool set_flag = true) { ScreenCoords ret; // TODO: Check for invalid parameters (x2 < x1, etc) float vpx1 = getFloat24(gstate.viewportx1); float vpx2 = getFloat24(gstate.viewportx2); float vpy1 = getFloat24(gstate.viewporty1); float vpy2 = getFloat24(gstate.viewporty2); float vpz1 = getFloat24(gstate.viewportz1); float vpz2 = getFloat24(gstate.viewportz2); float retx = coords.x * vpx1 / coords.w + vpx2; float rety = coords.y * vpy1 / coords.w + vpy2; float retz = coords.z * vpz1 / coords.w + vpz2; if (gstate.clipEnable & 0x1) { if (retz < 0.f) retz = 0.f; if (retz > 65535.f) retz = 65535.f; } if (set_flag && (retx > 4095.9375f || rety > 4096.9375f || retx < 0 || rety < 0 || retz < 0 || retz > 65535.f)) outside_range_flag = true; // 16 = 0xFFFF / 4095.9375 return ScreenCoords(retx * 16, rety * 16, retz); }
Lighter::Lighter() { doShadeMapping_ = gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP; materialEmissive.GetFromRGB(gstate.materialemissive); materialEmissive.a = 0.0f; globalAmbient.GetFromRGB(gstate.ambientcolor); globalAmbient.GetFromA(gstate.ambientalpha); materialAmbient.GetFromRGB(gstate.materialambient); materialAmbient.GetFromA(gstate.materialalpha); materialDiffuse.GetFromRGB(gstate.materialdiffuse); materialDiffuse.a = 1.0f; materialSpecular.GetFromRGB(gstate.materialspecular); materialSpecular.a = 1.0f; specCoef_ = getFloat24(gstate.materialspecularcoef); // viewer_ = Vec3f(-gstate.viewMatrix[9], -gstate.viewMatrix[10], -gstate.viewMatrix[11]); materialUpdate_ = gstate.materialupdate & 7; }
static Vec3f ClipToScreen(const Vec4f& coords) { // TODO: Check for invalid parameters (x2 < x1, etc) float vpx1 = getFloat24(gstate.viewportx1); float vpx2 = getFloat24(gstate.viewportx2); float vpy1 = getFloat24(gstate.viewporty1); float vpy2 = getFloat24(gstate.viewporty2); float vpz1 = getFloat24(gstate.viewportz1); float vpz2 = getFloat24(gstate.viewportz2); float retx = coords.x * vpx1 / coords.w + vpx2; float rety = coords.y * vpy1 / coords.w + vpy2; float retz = coords.z * vpz1 / coords.w + vpz2; // 16 = 0xFFFF / 4095.9375 return Vec3f(retx * 16, rety * 16, retz); }
void ShaderManagerVulkan::LightUpdateUniforms(int dirtyUniforms) { // Lighting if (dirtyUniforms & DIRTY_AMBIENT) { Uint8x3ToFloat4_AlphaUint8(ub_lights.ambientColor, gstate.ambientcolor, gstate.getAmbientA()); } if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) { // Note - this one is not in lighting but in transformCommon as it has uses beyond lighting Uint8x3ToFloat4_AlphaUint8(ub_base.matAmbient, gstate.materialambient, gstate.getMaterialAmbientA()); } if (dirtyUniforms & DIRTY_MATDIFFUSE) { Uint8x3ToFloat4(ub_lights.materialDiffuse, gstate.materialdiffuse); } if (dirtyUniforms & DIRTY_MATEMISSIVE) { Uint8x3ToFloat4(ub_lights.materialEmissive, gstate.materialemissive); } if (dirtyUniforms & DIRTY_MATSPECULAR) { Uint8x3ToFloat4_Alpha(ub_lights.materialSpecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef)); } for (int i = 0; i < 4; i++) { if (dirtyUniforms & (DIRTY_LIGHT0 << i)) { if (gstate.isDirectionalLight(i)) { // Prenormalize float x = getFloat24(gstate.lpos[i * 3 + 0]); float y = getFloat24(gstate.lpos[i * 3 + 1]); float z = getFloat24(gstate.lpos[i * 3 + 2]); float len = sqrtf(x*x + y*y + z*z); if (len == 0.0f) len = 1.0f; else len = 1.0f / len; float vec[3] = { x * len, y * len, z * len }; CopyFloat3To4(ub_lights.lpos[i], vec); } else { ExpandFloat24x3ToFloat4(ub_lights.lpos[i], &gstate.lpos[i * 3]); } ExpandFloat24x3ToFloat4(ub_lights.ldir[i], &gstate.ldir[i * 3]); ExpandFloat24x3ToFloat4(ub_lights.latt[i], &gstate.latt[i * 3]); CopyFloat1To4(ub_lights.lightAngle[i], getFloat24(gstate.lcutoff[i])); CopyFloat1To4(ub_lights.lightSpotCoef[i], getFloat24(gstate.lconv[i])); Uint8x3ToFloat4(ub_lights.lightAmbient[i], gstate.lcolor[i * 3]); Uint8x3ToFloat4(ub_lights.lightDiffuse[i], gstate.lcolor[i * 3 + 1]); Uint8x3ToFloat4(ub_lights.lightSpecular[i], gstate.lcolor[i * 3 + 2]); } } }
void TransformDrawEngine::SoftwareTransformAndDraw( int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex) { bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0; bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled(); // TODO: Split up into multiple draw calls for GLES 2.0 where you can't guarantee support for more than 0x10000 verts. #if defined(MOBILE_DEVICE) if (vertexCount > 0x10000/3) vertexCount = 0x10000/3; #endif float uscale = 1.0f; float vscale = 1.0f; bool scaleUV = false; if (throughmode) { uscale /= gstate_c.curTextureWidth; vscale /= gstate_c.curTextureHeight; } else { scaleUV = !g_Config.bPrescaleUV; } bool skinningEnabled = vertTypeIsSkinningEnabled(vertType); int w = gstate.getTextureWidth(0); int h = gstate.getTextureHeight(0); float widthFactor = (float) w / (float) gstate_c.curTextureWidth; float heightFactor = (float) h / (float) gstate_c.curTextureHeight; Lighter lighter(vertType); float fog_end = getFloat24(gstate.fog1); float fog_slope = getFloat24(gstate.fog2); VertexReader reader(decoded, decVtxFormat, vertType); for (int index = 0; index < maxIndex; index++) { reader.Goto(index); float v[3] = {0, 0, 0}; float c0[4] = {1, 1, 1, 1}; float c1[4] = {0, 0, 0, 0}; float uv[3] = {0, 0, 1}; float fogCoef = 1.0f; if (throughmode) { // Do not touch the coordinates or the colors. No lighting. reader.ReadPos(v); if (reader.hasColor0()) { reader.ReadColor0(c0); for (int j = 0; j < 4; j++) { c1[j] = 0.0f; } } else { c0[0] = gstate.getMaterialAmbientR() / 255.f; c0[1] = gstate.getMaterialAmbientG() / 255.f; c0[2] = gstate.getMaterialAmbientB() / 255.f; c0[3] = gstate.getMaterialAmbientA() / 255.f; } if (reader.hasUV()) { reader.ReadUV(uv); uv[0] *= uscale; uv[1] *= vscale; } fogCoef = 1.0f; // Scale UV? } else { // We do software T&L for now float out[3], norm[3]; float pos[3], nrm[3]; Vec3f normal(0, 0, 1); reader.ReadPos(pos); if (reader.hasNormal()) reader.ReadNrm(nrm); if (!skinningEnabled) { Vec3ByMatrix43(out, pos, gstate.worldMatrix); if (reader.hasNormal()) { Norm3ByMatrix43(norm, nrm, gstate.worldMatrix); normal = Vec3f(norm).Normalized(); } } else { float weights[8]; reader.ReadWeights(weights); // Skinning Vec3f psum(0,0,0); Vec3f nsum(0,0,0); for (int i = 0; i < vertTypeGetNumBoneWeights(vertType); i++) { if (weights[i] != 0.0f) { Vec3ByMatrix43(out, pos, gstate.boneMatrix+i*12); Vec3f tpos(out); psum += tpos * weights[i]; if (reader.hasNormal()) { Norm3ByMatrix43(norm, nrm, gstate.boneMatrix+i*12); Vec3f tnorm(norm); nsum += tnorm * weights[i]; } } } // Yes, we really must multiply by the world matrix too. Vec3ByMatrix43(out, psum.AsArray(), gstate.worldMatrix); if (reader.hasNormal()) { Norm3ByMatrix43(norm, nsum.AsArray(), gstate.worldMatrix); normal = Vec3f(norm).Normalized(); } } // Perform lighting here if enabled. don't need to check through, it's checked above. float unlitColor[4] = {1, 1, 1, 1}; if (reader.hasColor0()) { reader.ReadColor0(unlitColor); } else { unlitColor[0] = gstate.getMaterialAmbientR() / 255.f; unlitColor[1] = gstate.getMaterialAmbientG() / 255.f; unlitColor[2] = gstate.getMaterialAmbientB() / 255.f; unlitColor[3] = gstate.getMaterialAmbientA() / 255.f; } float litColor0[4]; float litColor1[4]; lighter.Light(litColor0, litColor1, unlitColor, out, normal); if (gstate.isLightingEnabled()) { // Don't ignore gstate.lmode - we should send two colors in that case for (int j = 0; j < 4; j++) { c0[j] = litColor0[j]; } if (lmode) { // Separate colors for (int j = 0; j < 4; j++) { c1[j] = litColor1[j]; } } else { // Summed color into c0 for (int j = 0; j < 4; j++) { c0[j] = ((c0[j] + litColor1[j]) > 1.0f) ? 1.0f : (c0[j] + litColor1[j]); } } } else { if (reader.hasColor0()) { for (int j = 0; j < 4; j++) { c0[j] = unlitColor[j]; } } else { c0[0] = gstate.getMaterialAmbientR() / 255.f; c0[1] = gstate.getMaterialAmbientG() / 255.f; c0[2] = gstate.getMaterialAmbientB() / 255.f; c0[3] = gstate.getMaterialAmbientA() / 255.f; } if (lmode) { for (int j = 0; j < 4; j++) { c1[j] = 0.0f; } } } float ruv[2] = {0.0f, 0.0f}; if (reader.hasUV()) reader.ReadUV(ruv); // Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights. switch (gstate.getUVGenMode()) { case GE_TEXMAP_TEXTURE_COORDS: // UV mapping case GE_TEXMAP_UNKNOWN: // Seen in Riviera. Unsure of meaning, but this works. // Texture scale/offset is only performed in this mode. if (scaleUV) { uv[0] = ruv[0]*gstate_c.uv.uScale + gstate_c.uv.uOff; uv[1] = ruv[1]*gstate_c.uv.vScale + gstate_c.uv.vOff; } else { uv[0] = ruv[0]; uv[1] = ruv[1]; } uv[2] = 1.0f; break; case GE_TEXMAP_TEXTURE_MATRIX: { // Projection mapping Vec3f source; switch (gstate.getUVProjMode()) { case GE_PROJMAP_POSITION: // Use model space XYZ as source source = pos; break; case GE_PROJMAP_UV: // Use unscaled UV as source source = Vec3f(ruv[0], ruv[1], 0.0f); break; case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized normal as source if (reader.hasNormal()) { source = Vec3f(norm).Normalized(); } else { ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?"); source = Vec3f(0.0f, 0.0f, 1.0f); } break; case GE_PROJMAP_NORMAL: // Use non-normalized normal as source! if (reader.hasNormal()) { source = Vec3f(norm); } else { ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?"); source = Vec3f(0.0f, 0.0f, 1.0f); } break; } float uvw[3]; Vec3ByMatrix43(uvw, &source.x, gstate.tgenMatrix); uv[0] = uvw[0]; uv[1] = uvw[1]; uv[2] = uvw[2]; } break; case GE_TEXMAP_ENVIRONMENT_MAP: // Shade mapping - use two light sources to generate U and V. { Vec3f lightpos0 = Vec3f(gstate_c.lightpos[gstate.getUVLS0()]).Normalized(); Vec3f lightpos1 = Vec3f(gstate_c.lightpos[gstate.getUVLS1()]).Normalized(); uv[0] = (1.0f + Dot(lightpos0, normal))/2.0f; uv[1] = (1.0f - Dot(lightpos1, normal))/2.0f; uv[2] = 1.0f; } break; default: // Illegal ERROR_LOG_REPORT(G3D, "Impossible UV gen mode? %d", gstate.getUVGenMode()); break; } uv[0] = uv[0] * widthFactor; uv[1] = uv[1] * heightFactor; // Transform the coord by the view matrix. Vec3ByMatrix43(v, out, gstate.viewMatrix); fogCoef = (v[2] + fog_end) * fog_slope; } // TODO: Write to a flexible buffer, we don't always need all four components. memcpy(&transformed[index].x, v, 3 * sizeof(float)); transformed[index].fog = fogCoef; memcpy(&transformed[index].u, uv, 3 * sizeof(float)); if (gstate_c.flipTexture) { transformed[index].v = 1.0f - transformed[index].v; } for (int i = 0; i < 4; i++) { transformed[index].color0[i] = c0[i] * 255.0f; } for (int i = 0; i < 3; i++) { transformed[index].color1[i] = c1[i] * 255.0f; } } // Here's the best opportunity to try to detect rectangles used to clear the screen, and // replace them with real OpenGL clears. This can provide a speedup on certain mobile chips. // Disabled for now - depth does not come out exactly the same. // // An alternative option is to simply ditch all the verts except the first and last to create a single // rectangle out of many. Quite a small optimization though. if (false && maxIndex > 1 && gstate.isModeClear() && prim == GE_PRIM_RECTANGLES && IsReallyAClear(maxIndex)) { u32 clearColor; memcpy(&clearColor, transformed[0].color0, 4); float clearDepth = transformed[0].z; const float col[4] = { ((clearColor & 0xFF)) / 255.0f, ((clearColor & 0xFF00) >> 8) / 255.0f, ((clearColor & 0xFF0000) >> 16) / 255.0f, ((clearColor & 0xFF000000) >> 24) / 255.0f, }; bool colorMask = gstate.isClearModeColorMask(); bool alphaMask = gstate.isClearModeAlphaMask(); glstate.colorMask.set(colorMask, colorMask, colorMask, alphaMask); if (alphaMask) { glstate.stencilTest.set(true); // Clear stencil // TODO: extract the stencilValue properly, see below int stencilValue = 0; glstate.stencilFunc.set(GL_ALWAYS, stencilValue, 255); } else { // Don't touch stencil glstate.stencilTest.set(false); } glstate.scissorTest.set(false); bool depthMask = gstate.isClearModeDepthMask(); int target = 0; if (colorMask || alphaMask) target |= GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; if (depthMask) target |= GL_DEPTH_BUFFER_BIT; glClearColor(col[0], col[1], col[2], col[3]); #ifdef USING_GLES2 glClearDepthf(clearDepth); #else glClearDepth(clearDepth); #endif glClearStencil(0); // TODO - take from alpha? glClear(target); return; }
void Process(VertexData& vertex) { Vec3<int> mec = Vec3<int>(gstate.getMaterialEmissiveR(), gstate.getMaterialEmissiveG(), gstate.getMaterialEmissiveB()); Vec3<int> mac = (gstate.materialupdate&1) ? vertex.color0.rgb() : Vec3<int>(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB()); Vec3<int> final_color = mec + mac * Vec3<int>(gstate.getAmbientR(), gstate.getAmbientG(), gstate.getAmbientB()) / 255; Vec3<int> specular_color(0, 0, 0); for (unsigned int light = 0; light < 4; ++light) { // Always calculate texture coords from lighting results if environment mapping is active // TODO: specular lighting should affect this, too! // TODO: Not sure if this really should be done even if lighting is disabled altogether if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) { Vec3<float> L = Vec3<float>(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF)); float diffuse_factor = Dot(L,vertex.worldnormal) / L.Length() / vertex.worldnormal.Length(); if (gstate.getUVLS0() == light) vertex.texturecoords.s() = (diffuse_factor + 1.f) / 2.f; if (gstate.getUVLS1() == light) vertex.texturecoords.t() = (diffuse_factor + 1.f) / 2.f; } } if (!gstate.isLightingEnabled()) return; for (unsigned int light = 0; light < 4; ++light) { if (!gstate.isLightChanEnabled(light)) continue; // L = vector from vertex to light source // TODO: Should transfer the light positions to world/view space for these calculations Vec3<float> L = Vec3<float>(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF)); L -= vertex.worldpos; float d = L.Length(); float lka = getFloat24(gstate.latt[3*light]&0xFFFFFF); float lkb = getFloat24(gstate.latt[3*light+1]&0xFFFFFF); float lkc = getFloat24(gstate.latt[3*light+2]&0xFFFFFF); float att = 1.f; if (!gstate.isDirectionalLight(light)) { att = 1.f / (lka + lkb * d + lkc * d * d); if (att > 1.f) att = 1.f; if (att < 0.f) att = 0.f; } float spot = 1.f; if (gstate.isSpotLight(light)) { Vec3<float> dir = Vec3<float>(getFloat24(gstate.ldir[3*light]&0xFFFFFF), getFloat24(gstate.ldir[3*light+1]&0xFFFFFF),getFloat24(gstate.ldir[3*light+2]&0xFFFFFF)); float _spot = Dot(-L,dir) / d / dir.Length(); float cutoff = getFloat24(gstate.lcutoff[light]&0xFFFFFF); if (_spot > cutoff) { spot = _spot; float conv = getFloat24(gstate.lconv[light]&0xFFFFFF); spot = pow(_spot, conv); } else { spot = 0.f; } } // ambient lighting Vec3<int> lac = Vec3<int>(gstate.getLightAmbientColorR(light), gstate.getLightAmbientColorG(light), gstate.getLightAmbientColorB(light)); final_color.r() += (int)(att * spot * lac.r() * mac.r() / 255); final_color.g() += (int)(att * spot * lac.g() * mac.g() / 255); final_color.b() += (int)(att * spot * lac.b() * mac.b() / 255); // diffuse lighting Vec3<int> ldc = Vec3<int>(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light)); Vec3<int> mdc = (gstate.materialupdate&2) ? vertex.color0.rgb() : Vec3<int>(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB()); float diffuse_factor = Dot(L,vertex.worldnormal) / d / vertex.worldnormal.Length(); if (gstate.isUsingPoweredDiffuseLight(light)) { float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); diffuse_factor = pow(diffuse_factor, k); } if (diffuse_factor > 0.f) { final_color.r() += (int)(att * spot * ldc.r() * mdc.r() * diffuse_factor / 255); final_color.g() += (int)(att * spot * ldc.g() * mdc.g() * diffuse_factor / 255); final_color.b() += (int)(att * spot * ldc.b() * mdc.b() * diffuse_factor / 255); } if (gstate.isUsingSpecularLight(light)) { Vec3<float> E(0.f, 0.f, 1.f); Mat3x3<float> view_matrix(gstate.viewMatrix); Vec3<float> worldE = view_matrix.Inverse() * (E - Vec3<float>(gstate.viewMatrix[9], gstate.viewMatrix[10], gstate.viewMatrix[11])); Vec3<float> H = worldE / worldE.Length() + L / L.Length(); Vec3<int> lsc = Vec3<int>(gstate.getSpecularColorR(light), gstate.getSpecularColorG(light), gstate.getSpecularColorB(light)); Vec3<int> msc = (gstate.materialupdate&4) ? vertex.color0.rgb() : Vec3<int>(gstate.getMaterialSpecularR(), gstate.getMaterialSpecularG(), gstate.getMaterialSpecularB()); float specular_factor = Dot(H,vertex.worldnormal) / H.Length() / vertex.worldnormal.Length(); float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); specular_factor = pow(specular_factor, k); if (specular_factor > 0.f) { specular_color.r() += (int)(att * spot * lsc.r() * msc.r() * specular_factor / 255); specular_color.g() += (int)(att * spot * lsc.g() * msc.g() * specular_factor / 255); specular_color.b() += (int)(att * spot * lsc.b() * msc.b() * specular_factor / 255); } } } vertex.color0.r() = final_color.r(); vertex.color0.g() = final_color.g(); vertex.color0.b() = final_color.b(); if (gstate.isUsingSecondaryColor()) { vertex.color1 = specular_color; } else { vertex.color0.r() += specular_color.r(); vertex.color0.g() += specular_color.g(); vertex.color0.b() += specular_color.b(); vertex.color1 = Vec3<int>(0, 0, 0); } int maa = (gstate.materialupdate&1) ? vertex.color0.a() : gstate.getMaterialAmbientA(); vertex.color0.a() = gstate.getAmbientA() * maa / 255; if (vertex.color0.r() > 255) vertex.color0.r() = 255; if (vertex.color0.g() > 255) vertex.color0.g() = 255; if (vertex.color0.b() > 255) vertex.color0.b() = 255; if (vertex.color0.a() > 255) vertex.color0.a() = 255; if (vertex.color1.r() > 255) vertex.color1.r() = 255; if (vertex.color1.g() > 255) vertex.color1.g() = 255; if (vertex.color1.b() > 255) vertex.color1.b() = 255; if (vertex.color0.r() < 0) vertex.color0.r() = 0; if (vertex.color0.g() < 0) vertex.color0.g() = 0; if (vertex.color0.b() < 0) vertex.color0.b() = 0; if (vertex.color0.a() < 0) vertex.color0.a() = 0; if (vertex.color1.r() < 0) vertex.color1.r() = 0; if (vertex.color1.g() < 0) vertex.color1.g() = 0; if (vertex.color1.b() < 0) vertex.color1.b() = 0; }
void Light(float colorOut[4], const float colorIn[4], Vec3 pos, Vec3 normal, float dots[4]) { // could cache a lot of stuff, such as ambient, across vertices... bool doShadeMapping = (gstate.texmapmode & 0x3) == 2; if (!doShadeMapping && !(gstate.lightEnable[0]&1) && !(gstate.lightEnable[1]&1) && !(gstate.lightEnable[2]&1) && !(gstate.lightEnable[3]&1)) { memcpy(colorOut, colorIn, sizeof(float) * 4); return; } Color4 emissive; emissive.GetFromRGB(gstate.materialemissive); Color4 globalAmbient; globalAmbient.GetFromRGB(gstate.ambientcolor); globalAmbient.GetFromA(gstate.ambientalpha); Vec3 norm = normal.Normalized(); Color4 in(colorIn); Color4 ambient; if (gstate.materialupdate & 1) { ambient = in; } else { ambient.GetFromRGB(gstate.materialambient); ambient.a=1.0f; } Color4 diffuse; if (gstate.materialupdate & 2) { diffuse = in; } else { diffuse.GetFromRGB(gstate.materialdiffuse); diffuse.a=1.0f; } Color4 specular; if (gstate.materialupdate & 4) { specular = in; } else { specular.GetFromRGB(gstate.materialspecular); specular.a=1.0f; } float specCoef = getFloat24(gstate.materialspecularcoef); norm.Normalize(); Vec3 viewer(gstate.viewMatrix[8], gstate.viewMatrix[9], gstate.viewMatrix[10]); Color4 lightSum = globalAmbient * ambient + emissive; // Try lights.elf - there's something wrong with the lighting for (int l = 0; l < 4; l++) { // can we skip this light? if ((gstate.lightEnable[l] & 1) == 0) // && !doShadeMapping) continue; GELightComputation comp = (GELightComputation)(gstate.ltype[l]&3); GELightType type = (GELightType)((gstate.ltype[l]>>8)&3); Vec3 toLight; if (type == GE_LIGHTTYPE_DIRECTIONAL) toLight = Vec3(gstate.lightpos[l]); else toLight = Vec3(gstate.lightpos[l]) - pos; Vec3 dir = Vec3(gstate.lightdir[l]); bool doSpecular = (comp != GE_LIGHTCOMP_ONLYDIFFUSE); bool poweredDiffuse = comp == GE_LIGHTCOMP_BOTHWITHPOWDIFFUSE; float distance = toLight.Normalize(); float lightScale = 1.0f; if (type != GE_LIGHTTYPE_DIRECTIONAL) { lightScale = 1.0f / (gstate.lightatt[l][0] + gstate.lightatt[l][1]*distance + gstate.lightatt[l][2]*distance*distance); if (lightScale>1.0f) lightScale=1.0f; } float dot = toLight * norm; // Clamp dot to zero. if (dot < 0.0f) dot = 0.0f; if (poweredDiffuse) dot = powf(dot, specCoef); Color4 diff = (gstate.lightColor[1][l] * diffuse) * (dot*lightScale); Color4 spec(0,0,0,0); if (doSpecular) { Vec3 halfVec = toLight; halfVec += viewer.Normalized(); halfVec.Normalize(); dot = halfVec * norm; if (dot >= 0) { spec += (gstate.lightColor[2][l] * specular * (powf(dot, specCoef)*lightScale)); } } dots[l] = dot; if (gstate.lightEnable[l] & 1) { lightSum += gstate.lightColor[0][l]*ambient + diff + spec; } } for (int i = 0; i < 3; i++) colorOut[i] = lightSum[i]; }
void TransformDrawEngine::ApplyDrawState(int prim) { // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. if (gstate_c.textureChanged != TEXCHANGE_UNCHANGED && !gstate.isModeClear() && gstate.isTextureMapEnabled()) { textureCache_->SetTexture(); gstate_c.textureChanged = TEXCHANGE_UNCHANGED; } // TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a // single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily. // Set blend bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled(); glstate.blend.set(wantBlend); if (wantBlend) { // This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop. // HOWEVER - we should be able to approximate the 2x modes in the shader, although they will clip wrongly. // Examples of seen unimplementable blend states: // Mortal Kombat Unchained: FixA=0000ff FixB=000080 FuncA=10 FuncB=10 int blendFuncA = gstate.getBlendFuncA(); int blendFuncB = gstate.getBlendFuncB(); GEBlendMode blendFuncEq = gstate.getBlendEq(); if (blendFuncA > GE_SRCBLEND_FIXA) blendFuncA = GE_SRCBLEND_FIXA; if (blendFuncB > GE_DSTBLEND_FIXB) blendFuncB = GE_DSTBLEND_FIXB; float constantAlpha = 1.0f; ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(); if (gstate.isStencilTestEnabled() && replaceAlphaWithStencil == REPLACE_ALPHA_NO) { if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_UNIFORM) { constantAlpha = (float) gstate.getStencilTestRef() * (1.0f / 255.0f); } } // Shortcut by using GL_ONE where possible, no need to set blendcolor GLuint glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(gstate.getFixA()) : aLookup[blendFuncA]; GLuint glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(gstate.getFixB()) : bLookup[blendFuncB]; if (replaceAlphaWithStencil == REPLACE_ALPHA_DUALSOURCE) { glBlendFuncA = toDualSource(glBlendFuncA); glBlendFuncB = toDualSource(glBlendFuncB); } if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) { Vec3f fixA = Vec3f::FromRGB(gstate.getFixA()); Vec3f fixB = Vec3f::FromRGB(gstate.getFixB()); if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB != GL_INVALID_ENUM) { // Can use blendcolor trivially. const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha}; glstate.blendColor.set(blendColor); glBlendFuncA = GL_CONSTANT_COLOR; } else if (glBlendFuncA != GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) { // Can use blendcolor trivially. const float blendColor[4] = {fixB.x, fixB.y, fixB.z, constantAlpha}; glstate.blendColor.set(blendColor); glBlendFuncB = GL_CONSTANT_COLOR; } else if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) { if (blendColorSimilar(fixA, Vec3f::AssignToAll(constantAlpha) - fixB)) { glBlendFuncA = GL_CONSTANT_COLOR; glBlendFuncB = GL_ONE_MINUS_CONSTANT_COLOR; const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha}; glstate.blendColor.set(blendColor); } else if (blendColorSimilar(fixA, fixB)) { glBlendFuncA = GL_CONSTANT_COLOR; glBlendFuncB = GL_CONSTANT_COLOR; const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha}; glstate.blendColor.set(blendColor); } else { static bool didReportBlend = false; if (!didReportBlend) Reporting::ReportMessage("ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB()); didReportBlend = true; DEBUG_LOG(G3D, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB()); // Let's approximate, at least. Close is better than totally off. const bool nearZeroA = blendColorSimilar(fixA, Vec3f::AssignToAll(0.0f), 0.25f); const bool nearZeroB = blendColorSimilar(fixB, Vec3f::AssignToAll(0.0f), 0.25f); if (nearZeroA || blendColorSimilar(fixA, Vec3f::AssignToAll(1.0f), 0.25f)) { glBlendFuncA = nearZeroA ? GL_ZERO : GL_ONE; glBlendFuncB = GL_CONSTANT_COLOR; const float blendColor[4] = {fixB.x, fixB.y, fixB.z, constantAlpha}; glstate.blendColor.set(blendColor); // We need to pick something. Let's go with A as the fixed color. } else { glBlendFuncA = GL_CONSTANT_COLOR; glBlendFuncB = nearZeroB ? GL_ZERO : GL_ONE; const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha}; glstate.blendColor.set(blendColor); } } } else { // We optimized both, but that's probably not necessary, so let's pick one to be constant. // For now let's just pick whichever was fixed instead of checking error. if (blendFuncA == GE_SRCBLEND_FIXA) { glBlendFuncA = GL_CONSTANT_COLOR; const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha}; glstate.blendColor.set(blendColor); } else { glBlendFuncB = GL_CONSTANT_COLOR; const float blendColor[4] = {fixB.x, fixB.y, fixB.z, constantAlpha}; glstate.blendColor.set(blendColor); } } } else if (constantAlpha < 1.0f) { const float blendColor[4] = {1.0f, 1.0f, 1.0f, constantAlpha}; glstate.blendColor.set(blendColor); } // Some Android devices (especially Mali, it seems) composite badly if there's alpha in the backbuffer. // So in non-buffered rendering, we will simply consider the dest alpha to be zero in blending equations. #ifdef ANDROID if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) { if (glBlendFuncA == GL_DST_ALPHA) glBlendFuncA = GL_ZERO; if (glBlendFuncB == GL_DST_ALPHA) glBlendFuncB = GL_ZERO; if (glBlendFuncA == GL_ONE_MINUS_DST_ALPHA) glBlendFuncA = GL_ONE; if (glBlendFuncB == GL_ONE_MINUS_DST_ALPHA) glBlendFuncB = GL_ONE; } #endif // At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow. // The stencil-to-alpha in fragment shader doesn't apply here (blending is enabled), and we shouldn't // do any blending in the alpha channel as that doesn't seem to happen on PSP. So lacking a better option, // the only value we can set alpha to here without multipass and dual source alpha is zero (by setting // the factors to zero). So let's do that. if (replaceAlphaWithStencil != REPLACE_ALPHA_NO) { // Let the fragment shader take care of it. glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ONE, GL_ZERO); } else if (gstate.isStencilTestEnabled()) { switch (ReplaceAlphaWithStencilType()) { case STENCIL_VALUE_KEEP: glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ZERO, GL_ONE); break; case STENCIL_VALUE_ONE: // This won't give one but it's our best shot... glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ONE, GL_ONE); break; case STENCIL_VALUE_ZERO: glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ZERO, GL_ZERO); break; case STENCIL_VALUE_UNIFORM: // This won't give a correct value (it multiplies) but it may be better than random values. glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_CONSTANT_ALPHA, GL_ZERO); break; case STENCIL_VALUE_UNKNOWN: // For now, let's err at zero. This is INVERT or INCR/DECR. glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ZERO, GL_ZERO); break; } } else { // Retain the existing value when stencil testing is off. glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ZERO, GL_ONE); } if (blendFuncEq == GE_BLENDMODE_ABSDIFF) { WARN_LOG_REPORT_ONCE(blendAbsdiff, G3D, "Unsupported absdiff blend mode"); } if (((blendFuncEq >= GE_BLENDMODE_MIN) && gl_extensions.EXT_blend_minmax) || gl_extensions.GLES3) { if (blendFuncEq == GE_BLENDMODE_ABSDIFF && gl_extensions.EXT_shader_framebuffer_fetch) { // Handle GE_BLENDMODE_ABSDIFF in fragment shader and turn off regular alpha blending here. glstate.blend.set(false); } else { glstate.blendEquation.set(eqLookup[blendFuncEq]); } } else { glstate.blendEquation.set(eqLookupNoMinMax[blendFuncEq]); } } bool alwaysDepthWrite = g_Config.bAlwaysDepthWrite; bool enableStencilTest = !g_Config.bDisableStencilTest; // Dither if (gstate.isDitherEnabled()) { glstate.dither.enable(); glstate.dither.set(GL_TRUE); } else glstate.dither.disable(); if (gstate.isModeClear()) { #if !defined(USING_GLES2) // Logic Ops glstate.colorLogicOp.disable(); #endif // Culling glstate.cullFace.disable(); // Depth Test glstate.depthTest.enable(); glstate.depthFunc.set(GL_ALWAYS); glstate.depthWrite.set(gstate.isClearModeDepthMask() || alwaysDepthWrite ? GL_TRUE : GL_FALSE); if (gstate.isClearModeDepthMask() || alwaysDepthWrite) { framebufferManager_->SetDepthUpdated(); } // Color Test bool colorMask = gstate.isClearModeColorMask(); bool alphaMask = gstate.isClearModeAlphaMask(); glstate.colorMask.set(colorMask, colorMask, colorMask, alphaMask); // Stencil Test if (alphaMask && enableStencilTest) { glstate.stencilTest.enable(); glstate.stencilOp.set(GL_REPLACE, GL_REPLACE, GL_REPLACE); // TODO: In clear mode, the stencil value is set to the alpha value of the vertex. // A normal clear will be 2 points, the second point has the color. // We should set "ref" to that value instead of 0. // In case of clear rectangles, we set it again once we know what the color is. glstate.stencilFunc.set(GL_ALWAYS, 255, 0xFF); } else { glstate.stencilTest.disable(); } } else { #if !defined(USING_GLES2) // Logic Ops if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY) { glstate.colorLogicOp.enable(); glstate.logicOp.set(logicOps[gstate.getLogicOp()]); } else { glstate.colorLogicOp.disable(); } #endif // Set cull bool cullEnabled = !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled(); if (cullEnabled) { glstate.cullFace.enable(); glstate.cullFaceMode.set(cullingMode[gstate.getCullMode()]); } else { glstate.cullFace.disable(); } // Depth Test if (gstate.isDepthTestEnabled()) { glstate.depthTest.enable(); glstate.depthFunc.set(ztests[gstate.getDepthTestFunction()]); glstate.depthWrite.set(gstate.isDepthWriteEnabled() || alwaysDepthWrite ? GL_TRUE : GL_FALSE); framebufferManager_->SetDepthUpdated(); } else { glstate.depthTest.disable(); } // PSP color/alpha mask is per bit but we can only support per byte. // But let's do that, at least. And let's try a threshold. bool rmask = (gstate.pmskc & 0xFF) < 128; bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128; bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128; bool amask = (gstate.pmska & 0xFF) < 128; // Let's not write to alpha if stencil isn't enabled. if (!gstate.isStencilTestEnabled()) { amask = false; } else { // If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel. if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) { amask = false; } } if (g_Config.bAlphaMaskHack) { amask = true; // Yes, this makes no sense, but it "fixes" the 3rd Birthday by popular demand. } glstate.colorMask.set(rmask, gmask, bmask, amask); // Stencil Test if (gstate.isStencilTestEnabled() && enableStencilTest) { glstate.stencilTest.enable(); glstate.stencilFunc.set(ztests[gstate.getStencilTestFunction()], gstate.getStencilTestRef(), gstate.getStencilTestMask()); glstate.stencilOp.set(stencilOps[gstate.getStencilOpSFail()], // stencil fail stencilOps[gstate.getStencilOpZFail()], // depth fail stencilOps[gstate.getStencilOpZPass()]); // depth pass } else { glstate.stencilTest.disable(); } } float renderWidthFactor, renderHeightFactor; float renderWidth, renderHeight; float renderX, renderY; bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; if (useBufferedRendering) { renderX = 0.0f; renderY = 0.0f; renderWidth = framebufferManager_->GetRenderWidth(); renderHeight = framebufferManager_->GetRenderHeight(); } else { // TODO: Aspect-ratio aware and centered float pixelW = PSP_CoreParameter().pixelWidth; float pixelH = PSP_CoreParameter().pixelHeight; CenterRect(&renderX, &renderY, &renderWidth, &renderHeight, 480, 272, pixelW, pixelH); } renderWidthFactor = (float)renderWidth / framebufferManager_->GetTargetWidth(); renderHeightFactor = (float)renderHeight / framebufferManager_->GetTargetHeight(); bool throughmode = gstate.isModeThrough(); // Scissor int scissorX1 = gstate.getScissorX1(); int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; int scissorY2 = gstate.getScissorY2() + 1; // This is a bit of a hack as the render buffer isn't always that size if (scissorX1 == 0 && scissorY1 == 0 && scissorX2 >= (int) gstate_c.curRTWidth && scissorY2 >= (int) gstate_c.curRTHeight) { glstate.scissorTest.disable(); } else { glstate.scissorTest.enable(); glstate.scissorRect.set( renderX + scissorX1 * renderWidthFactor, renderY + renderHeight - (scissorY2 * renderHeightFactor), (scissorX2 - scissorX1) * renderWidthFactor, (scissorY2 - scissorY1) * renderHeightFactor); } /* int regionX1 = gstate.region1 & 0x3FF; int regionY1 = (gstate.region1 >> 10) & 0x3FF; int regionX2 = (gstate.region2 & 0x3FF) + 1; int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1; */ int regionX1 = 0; int regionY1 = 0; int regionX2 = gstate_c.curRTWidth; int regionY2 = gstate_c.curRTHeight; float offsetX = gstate.getOffsetX(); float offsetY = gstate.getOffsetY(); if (throughmode) { // No viewport transform here. Let's experiment with using region. glstate.viewport.set( renderX + (0 + regionX1) * renderWidthFactor, renderY + (0 - regionY1) * renderHeightFactor, (regionX2 - regionX1) * renderWidthFactor, (regionY2 - regionY1) * renderHeightFactor); glstate.depthRange.set(0.0f, 1.0f); } else { // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. float vpXa = getFloat24(gstate.viewportx1); float vpXb = getFloat24(gstate.viewportx2); float vpYa = getFloat24(gstate.viewporty1); float vpYb = getFloat24(gstate.viewporty2); // The viewport transform appears to go like this: // Xscreen = -offsetX + vpXb + vpXa * Xview // Yscreen = -offsetY + vpYb + vpYa * Yview // Zscreen = vpZb + vpZa * Zview // This means that to get the analogue glViewport we must: float vpX0 = vpXb - offsetX - vpXa; float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y gstate_c.vpWidth = vpXa * 2.0f; gstate_c.vpHeight = -vpYa * 2.0f; float vpWidth = fabsf(gstate_c.vpWidth); float vpHeight = fabsf(gstate_c.vpHeight); vpX0 *= renderWidthFactor; vpY0 *= renderHeightFactor; vpWidth *= renderWidthFactor; vpHeight *= renderHeightFactor; vpX0 = (vpXb - offsetX - fabsf(vpXa)) * renderWidthFactor; // Flip vpY0 to match the OpenGL coordinate system. vpY0 = renderHeight - (vpYb - offsetY + fabsf(vpYa)) * renderHeightFactor; glstate.viewport.set(vpX0 + renderX, vpY0 + renderY, vpWidth, vpHeight); // Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh. // shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); float zScale = getFloat24(gstate.viewportz1) / 65535.0f; float zOff = getFloat24(gstate.viewportz2) / 65535.0f; float depthRangeMin = zOff - zScale; float depthRangeMax = zOff + zScale; glstate.depthRange.set(depthRangeMin, depthRangeMax); } }
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport) { if (dirtyUniforms & DIRTY_TEXENV) { Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor); } if (dirtyUniforms & DIRTY_ALPHACOLORREF) { Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_ALPHACOLORMASK) { Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_FOGCOLOR) { Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor); } if (dirtyUniforms & DIRTY_SHADERBLEND) { Uint8x3ToFloat4(ub->blendFixA, gstate.getFixA()); Uint8x3ToFloat4(ub->blendFixB, gstate.getFixB()); } if (dirtyUniforms & DIRTY_TEXCLAMP) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; // First wrap xy, then half texel xy (for clamp.) ub->texClamp[0] = widthFactor; ub->texClamp[1] = heightFactor; ub->texClamp[2] = invW * 0.5f; ub->texClamp[3] = invH * 0.5f; ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW; ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH; } if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = gstate_c.vpHeight < 0; if (invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } if (flipViewport) { ConvertProjMatrixToD3D11(flippedMatrix); } else { ConvertProjMatrixToVulkan(flippedMatrix); } if (g_Config.iRenderingMode == 0 && g_display_rotation != DisplayRotation::ROTATE_0) { flippedMatrix = flippedMatrix * g_display_rot_matrix; } CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr()); } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; if (flipViewport) { proj_through.setOrthoD3D(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1); } else { proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1); } if (g_Config.iRenderingMode == 0 && g_display_rotation != DisplayRotation::ROTATE_0) { proj_through = proj_through * g_display_rot_matrix; } CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr()); } // Transform if (dirtyUniforms & DIRTY_WORLDMATRIX) { ConvertMatrix4x3To3x4Transposed(ub->world, gstate.worldMatrix); } if (dirtyUniforms & DIRTY_VIEWMATRIX) { ConvertMatrix4x3To3x4Transposed(ub->view, gstate.viewMatrix); } if (dirtyUniforms & DIRTY_TEXMATRIX) { ConvertMatrix4x3To3x4Transposed(ub->tex, gstate.tgenMatrix); } // Combined two small uniforms if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) { float fogcoef_stencil[3] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), (float)gstate.getStencilTestRef()/255.0f }; if (my_isinf(fogcoef_stencil[1])) { // not really sure what a sensible value might be. fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f; } else if (my_isnan(fogcoef_stencil[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fogcoef_stencil[0] = 100000.0f; fogcoef_stencil[1] = 1.0f; } #ifndef MOBILE_DEVICE else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) { ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]); } #endif CopyFloat3(ub->fogCoef_stencil, fogcoef_stencil); } // Note - this one is not in lighting but in transformCommon as it has uses beyond lighting if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) { Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA()); } // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; if (gstate_c.bezier || gstate_c.spline) { // When we are generating UV coordinates through the bezier/spline, we need to apply the scaling. // However, this is missing a check that we're not getting our UV:s supplied for us in the vertices. ub->uvScaleOffset[0] = gstate_c.uv.uScale * widthFactor; ub->uvScaleOffset[1] = gstate_c.uv.vScale * heightFactor; ub->uvScaleOffset[2] = gstate_c.uv.uOff * widthFactor; ub->uvScaleOffset[3] = gstate_c.uv.vOff * heightFactor; } else { ub->uvScaleOffset[0] = widthFactor; ub->uvScaleOffset[1] = heightFactor; ub->uvScaleOffset[2] = 0.0f; ub->uvScaleOffset[3] = 0.0f; } } if (dirtyUniforms & DIRTY_DEPTHRANGE) { float viewZScale = gstate.getViewportZScale(); float viewZCenter = gstate.getViewportZCenter(); // We had to scale and translate Z to account for our clamped Z range. // Therefore, we also need to reverse this to round properly. // // Example: scale = 65535.0, center = 0.0 // Resulting range = -65535 to 65535, clamped to [0, 65535] // gstate_c.vpDepthScale = 2.0f // gstate_c.vpZOffset = -1.0f // // The projection already accounts for those, so we need to reverse them. // // Additionally, D3D9 uses a range from [0, 1]. We double and move the center. viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f; viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f; float viewZInvScale; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { viewZInvScale = 0.0; } ub->depthRange[0] = viewZScale; ub->depthRange[1] = viewZCenter; ub->depthRange[2] = viewZCenter; ub->depthRange[3] = viewZInvScale; } if (dirtyUniforms & DIRTY_BEZIERSPLINE) { ub->spline_count_u = gstate_c.spline_count_u; ub->spline_count_v = gstate_c.spline_count_v; ub->spline_type_u = gstate_c.spline_type_u; ub->spline_type_v = gstate_c.spline_type_v; } }
VertexData TransformUnit::ReadVertex(VertexReader& vreader) { VertexData vertex; float pos[3]; // VertexDecoder normally scales z, but we want it unscaled. vreader.ReadPosThroughZ16(pos); if (!gstate.isModeClear() && gstate.isTextureMapEnabled() && vreader.hasUV()) { float uv[2]; vreader.ReadUV(uv); vertex.texturecoords = Vec2<float>(uv[0], uv[1]); } if (vreader.hasNormal()) { float normal[3]; vreader.ReadNrm(normal); vertex.normal = Vec3<float>(normal[0], normal[1], normal[2]); if (gstate.areNormalsReversed()) vertex.normal = -vertex.normal; } if (vertTypeIsSkinningEnabled(gstate.vertType) && !gstate.isModeThrough()) { float W[8] = { 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f }; vreader.ReadWeights(W); Vec3<float> tmppos(0.f, 0.f, 0.f); Vec3<float> tmpnrm(0.f, 0.f, 0.f); for (int i = 0; i < vertTypeGetNumBoneWeights(gstate.vertType); ++i) { Mat3x3<float> bone(&gstate.boneMatrix[12*i]); tmppos += (bone * ModelCoords(pos[0], pos[1], pos[2]) + Vec3<float>(gstate.boneMatrix[12*i+9], gstate.boneMatrix[12*i+10], gstate.boneMatrix[12*i+11])) * W[i]; if (vreader.hasNormal()) tmpnrm += (bone * vertex.normal) * W[i]; } pos[0] = tmppos.x; pos[1] = tmppos.y; pos[2] = tmppos.z; if (vreader.hasNormal()) vertex.normal = tmpnrm; } if (vreader.hasColor0()) { float col[4]; vreader.ReadColor0(col); vertex.color0 = Vec4<int>(col[0]*255, col[1]*255, col[2]*255, col[3]*255); } else { vertex.color0 = Vec4<int>(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB(), gstate.getMaterialAmbientA()); } if (vreader.hasColor1()) { float col[3]; vreader.ReadColor1(col); vertex.color1 = Vec3<int>(col[0]*255, col[1]*255, col[2]*255); } else { vertex.color1 = Vec3<int>(0, 0, 0); } if (!gstate.isModeThrough()) { vertex.modelpos = ModelCoords(pos[0], pos[1], pos[2]); vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(vertex.modelpos)); ModelCoords viewpos = TransformUnit::WorldToView(vertex.worldpos); vertex.clippos = ClipCoords(TransformUnit::ViewToClip(viewpos)); if (gstate.isFogEnabled()) { float fog_end = getFloat24(gstate.fog1); float fog_slope = getFloat24(gstate.fog2); // Same fixup as in ShaderManagerGLES.cpp if (my_isnanorinf(fog_end)) { // Not really sure what a sensible value might be, but let's try 64k. fog_end = std::signbit(fog_end) ? -65535.0f : 65535.0f; } if (my_isnanorinf(fog_slope)) { fog_slope = std::signbit(fog_slope) ? -65535.0f : 65535.0f; } vertex.fogdepth = (viewpos.z + fog_end) * fog_slope; } else { vertex.fogdepth = 1.0f; } vertex.screenpos = ClipToScreenInternal(vertex.clippos, &outside_range_flag); if (vreader.hasNormal()) { vertex.worldnormal = TransformUnit::ModelToWorldNormal(vertex.normal); // TODO: Isn't there a flag that controls whether to normalize the normal? vertex.worldnormal /= vertex.worldnormal.Length(); } else { vertex.worldnormal = Vec3<float>(0.0f, 0.0f, 1.0f); } Lighting::Process(vertex, vreader.hasColor0()); } else { vertex.screenpos.x = (int)(pos[0] * 16) + gstate.getOffsetX16(); vertex.screenpos.y = (int)(pos[1] * 16) + gstate.getOffsetY16(); vertex.screenpos.z = pos[2]; vertex.clippos.w = 1.f; vertex.fogdepth = 1.f; } return vertex; }
void TransformDrawEngine::ApplyDrawState(int prim) { // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. if (gstate_c.textureChanged) { if (gstate.isTextureMapEnabled()) { textureCache_->SetTexture(); } gstate_c.textureChanged = false; } // TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a // single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily. // Set blend bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled(); glstate.blend.set(wantBlend); if (wantBlend) { // This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop. // HOWEVER - we should be able to approximate the 2x modes in the shader, although they will clip wrongly. // Examples of seen unimplementable blend states: // Mortal Kombat Unchained: FixA=0000ff FixB=000080 FuncA=10 FuncB=10 int blendFuncA = gstate.getBlendFuncA(); int blendFuncB = gstate.getBlendFuncB(); int blendFuncEq = gstate.getBlendEq(); if (blendFuncA > GE_SRCBLEND_FIXA) blendFuncA = GE_SRCBLEND_FIXA; if (blendFuncB > GE_DSTBLEND_FIXB) blendFuncB = GE_DSTBLEND_FIXB; // Shortcut by using GL_ONE where possible, no need to set blendcolor GLuint glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(gstate.getFixA()) : aLookup[blendFuncA]; GLuint glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(gstate.getFixB()) : bLookup[blendFuncB]; if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) { Vec3f fixA = Vec3f::FromRGB(gstate.getFixA()); Vec3f fixB = Vec3f::FromRGB(gstate.getFixB()); if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB != GL_INVALID_ENUM) { // Can use blendcolor trivially. const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f}; glstate.blendColor.set(blendColor); glBlendFuncA = GL_CONSTANT_COLOR; } else if (glBlendFuncA != GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) { // Can use blendcolor trivially. const float blendColor[4] = {fixB.x, fixB.y, fixB.z, 1.0f}; glstate.blendColor.set(blendColor); glBlendFuncB = GL_CONSTANT_COLOR; } else if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) { if (blendColorSimilar(fixA, Vec3f::AssignToAll(1.0f) - fixB)) { glBlendFuncA = GL_CONSTANT_COLOR; glBlendFuncB = GL_ONE_MINUS_CONSTANT_COLOR; const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f}; glstate.blendColor.set(blendColor); } else if (blendColorSimilar(fixA, fixB)) { glBlendFuncA = GL_CONSTANT_COLOR; glBlendFuncB = GL_CONSTANT_COLOR; const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f}; glstate.blendColor.set(blendColor); } else { static bool didReportBlend = false; if (!didReportBlend) Reporting::ReportMessage("ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB()); didReportBlend = true; DEBUG_LOG(HLE, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB()); // Let's approximate, at least. Close is better than totally off. const bool nearZeroA = blendColorSimilar(fixA, Vec3f::AssignToAll(0.0f), 0.25f); const bool nearZeroB = blendColorSimilar(fixB, Vec3f::AssignToAll(0.0f), 0.25f); if (nearZeroA || blendColorSimilar(fixA, Vec3f::AssignToAll(1.0f), 0.25f)) { glBlendFuncA = nearZeroA ? GL_ZERO : GL_ONE; glBlendFuncB = GL_CONSTANT_COLOR; const float blendColor[4] = {fixB.x, fixB.y, fixB.z, 1.0f}; glstate.blendColor.set(blendColor); // We need to pick something. Let's go with A as the fixed color. } else { glBlendFuncA = GL_CONSTANT_COLOR; glBlendFuncB = nearZeroB ? GL_ZERO : GL_ONE; const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f}; glstate.blendColor.set(blendColor); } } } } // At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow. if (!gstate.isStencilTestEnabled()) { // Fixes some Persona 2 issues, may be correct? (that is, don't change dest alpha at all if blending) // If this doesn't break anything else, it's likely to be right. // I guess an alternative solution would be to simply disable alpha writes if alpha blending is enabled. glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ZERO, glBlendFuncB); } else { glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, glBlendFuncA, glBlendFuncB); } glstate.blendEquation.set(eqLookup[blendFuncEq]); } // Dither if (gstate.isDitherEnabled()) { glstate.dither.enable(); glstate.dither.set(GL_TRUE); } else glstate.dither.disable(); if (gstate.isModeClear()) { #if !defined(USING_GLES2) // Logic Ops glstate.colorLogicOp.disable(); #endif // Culling glstate.cullFace.disable(); // Depth Test glstate.depthTest.enable(); glstate.depthFunc.set(GL_ALWAYS); glstate.depthWrite.set(gstate.isClearModeDepthWriteEnabled() ? GL_TRUE : GL_FALSE); // Color Test bool colorMask = gstate.isClearModeColorMask(); bool alphaMask = gstate.isClearModeAlphaMask(); glstate.colorMask.set(colorMask, colorMask, colorMask, alphaMask); // Stencil Test if (alphaMask) { glstate.stencilTest.enable(); glstate.stencilOp.set(GL_REPLACE, GL_REPLACE, GL_REPLACE); glstate.stencilFunc.set(GL_ALWAYS, 0, 0xFF); } else glstate.stencilTest.disable(); } else { #if !defined(USING_GLES2) // Logic Ops if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY) { glstate.colorLogicOp.enable(); glstate.logicOp.set(logicOps[gstate.getLogicOp()]); } else glstate.colorLogicOp.disable(); #endif // Set cull bool cullEnabled = !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled(); if (cullEnabled) { glstate.cullFace.enable(); glstate.cullFaceMode.set(cullingMode[gstate.getCullMode()]); } else glstate.cullFace.disable(); // Depth Test if (gstate.isDepthTestEnabled()) { glstate.depthTest.enable(); glstate.depthFunc.set(ztests[gstate.getDepthTestFunction()]); glstate.depthWrite.set(gstate.isDepthWriteEnabled() ? GL_TRUE : GL_FALSE); } else glstate.depthTest.disable(); // PSP color/alpha mask is per bit but we can only support per byte. // But let's do that, at least. And let's try a threshold. bool rmask = (gstate.pmskc & 0xFF) < 128; bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128; bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128; bool amask = (gstate.pmska & 0xFF) < 128; glstate.colorMask.set(rmask, gmask, bmask, amask); // Stencil Test if (gstate.isStencilTestEnabled()) { glstate.stencilTest.enable(); glstate.stencilFunc.set(ztests[gstate.getStencilTestFunction()], gstate.getStencilTestRef(), gstate.getStencilTestMask()); glstate.stencilOp.set(stencilOps[gstate.getStencilOpSFail()], // stencil fail stencilOps[gstate.getStencilOpZFail()], // depth fail stencilOps[gstate.getStencilOpZPass()]); // depth pass } else glstate.stencilTest.disable(); } float renderWidthFactor, renderHeightFactor; float renderWidth, renderHeight; float renderX, renderY; bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; if (useBufferedRendering) { renderX = 0.0f; renderY = 0.0f; renderWidth = framebufferManager_->GetRenderWidth(); renderHeight = framebufferManager_->GetRenderHeight(); renderWidthFactor = (float)renderWidth / framebufferManager_->GetTargetWidth(); renderHeightFactor = (float)renderHeight / framebufferManager_->GetTargetHeight(); } else { // TODO: Aspect-ratio aware and centered float pixelW = PSP_CoreParameter().pixelWidth; float pixelH = PSP_CoreParameter().pixelHeight; CenterRect(&renderX, &renderY, &renderWidth, &renderHeight, 480, 272, pixelW, pixelH); renderWidthFactor = renderWidth / 480.0f; renderHeightFactor = renderHeight / 272.0f; } bool throughmode = gstate.isModeThrough(); // Scissor int scissorX1 = gstate.getScissorX1(); int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; int scissorY2 = gstate.getScissorY2() + 1; // This is a bit of a hack as the render buffer isn't always that size if (scissorX1 == 0 && scissorY1 == 0 && scissorX2 >= (int) gstate_c.curRTWidth && scissorY2 >= (int) gstate_c.curRTHeight) { glstate.scissorTest.disable(); } else { glstate.scissorTest.enable(); glstate.scissorRect.set( renderX + scissorX1 * renderWidthFactor, renderY + renderHeight - (scissorY2 * renderHeightFactor), (scissorX2 - scissorX1) * renderWidthFactor, (scissorY2 - scissorY1) * renderHeightFactor); } /* int regionX1 = gstate.region1 & 0x3FF; int regionY1 = (gstate.region1 >> 10) & 0x3FF; int regionX2 = (gstate.region2 & 0x3FF) + 1; int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1; */ int regionX1 = 0; int regionY1 = 0; int regionX2 = gstate_c.curRTWidth; int regionY2 = gstate_c.curRTHeight; float offsetX = (float)(gstate.offsetx & 0xFFFF) / 16.0f; float offsetY = (float)(gstate.offsety & 0xFFFF) / 16.0f; if (throughmode) { // No viewport transform here. Let's experiment with using region. glstate.viewport.set( renderX + (0 + regionX1) * renderWidthFactor, renderY + (0 - regionY1) * renderHeightFactor, (regionX2 - regionX1) * renderWidthFactor, (regionY2 - regionY1) * renderHeightFactor); glstate.depthRange.set(0.0f, 1.0f); } else { // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. float vpXa = getFloat24(gstate.viewportx1); float vpXb = getFloat24(gstate.viewportx2); float vpYa = getFloat24(gstate.viewporty1); float vpYb = getFloat24(gstate.viewporty2); // The viewport transform appears to go like this: // Xscreen = -offsetX + vpXb + vpXa * Xview // Yscreen = -offsetY + vpYb + vpYa * Yview // Zscreen = vpZb + vpZa * Zview // This means that to get the analogue glViewport we must: float vpX0 = vpXb - offsetX - vpXa; float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y gstate_c.vpWidth = vpXa * 2.0f; gstate_c.vpHeight = -vpYa * 2.0f; float vpWidth = fabsf(gstate_c.vpWidth); float vpHeight = fabsf(gstate_c.vpHeight); vpX0 *= renderWidthFactor; vpY0 *= renderHeightFactor; vpWidth *= renderWidthFactor; vpHeight *= renderHeightFactor; vpX0 = (vpXb - offsetX - fabsf(vpXa)) * renderWidthFactor; // Flip vpY0 to match the OpenGL coordinate system. vpY0 = renderHeight - (vpYb - offsetY + fabsf(vpYa)) * renderHeightFactor; glstate.viewport.set(vpX0 + renderX, vpY0 + renderY, vpWidth, vpHeight); // Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh. // shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); float zScale = getFloat24(gstate.viewportz1) / 65535.0f; float zOff = getFloat24(gstate.viewportz2) / 65535.0f; float depthRangeMin = zOff - zScale; float depthRangeMax = zOff + zScale; glstate.depthRange.set(depthRangeMin, depthRangeMax); } }
void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) { // Update any dirty uniforms before we draw if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = gstate_c.vpHeight < 0; if (!invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } ConvertProjMatrixToD3D(flippedMatrix, invertedX, invertedY); VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr()); } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1); ConvertProjMatrixToD3DThrough(proj_through); VSSetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr()); } // Transform if (dirtyUniforms & DIRTY_WORLDMATRIX) { VSSetMatrix4x3_3(CONST_VS_WORLD, gstate.worldMatrix); } if (dirtyUniforms & DIRTY_VIEWMATRIX) { VSSetMatrix4x3_3(CONST_VS_VIEW, gstate.viewMatrix); } if (dirtyUniforms & DIRTY_TEXMATRIX) { VSSetMatrix4x3_3(CONST_VS_TEXMTX, gstate.tgenMatrix); } if (dirtyUniforms & DIRTY_FOGCOEF) { float fogcoef[2] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), }; if (my_isinf(fogcoef[1])) { // not really sure what a sensible value might be. fogcoef[1] = fogcoef[1] < 0.0f ? -10000.0f : 10000.0f; } else if (my_isnan(fogcoef[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fogcoef[0] = 100000.0f; fogcoef[1] = 1.0f; } #ifndef MOBILE_DEVICE else if (my_isnanorinf(fogcoef[1]) || my_isnanorinf(fogcoef[0])) { ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef[0], fogcoef[1]); } #endif VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2); } // TODO: Could even set all bones in one go if they're all dirty. #ifdef USE_BONE_ARRAY if (u_bone != 0) { float allBones[8 * 16]; bool allDirty = true; for (int i = 0; i < numBones; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { ConvertMatrix4x3To4x4(allBones + 16 * i, gstate.boneMatrix + 12 * i); } else { allDirty = false; } } if (allDirty) { // Set them all with one call //glUniformMatrix4fv(u_bone, numBones, GL_FALSE, allBones); } else { // Set them one by one. Could try to coalesce two in a row etc but too lazy. for (int i = 0; i < numBones; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { //glUniformMatrix4fv(u_bone + i, 1, GL_FALSE, allBones + 16 * i); } } } } #else for (int i = 0; i < 8; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { VSSetMatrix4x3_3(CONST_VS_BONE0 + 3 * i, gstate.boneMatrix + 12 * i); } } #endif // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; float uvscaleoff[4]; switch (gstate.getUVGenMode()) { case GE_TEXMAP_TEXTURE_COORDS: // Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera. Treating the same as GE_TEXMAP_TEXTURE_COORDS works. case GE_TEXMAP_UNKNOWN: if (g_Config.bPrescaleUV) { // We are here but are prescaling UV in the decoder? Let's do the same as in the other case // except consider *Scale and *Off to be 1 and 0. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; } else { uvscaleoff[0] = gstate_c.uv.uScale * widthFactor; uvscaleoff[1] = gstate_c.uv.vScale * heightFactor; uvscaleoff[2] = gstate_c.uv.uOff * widthFactor; uvscaleoff[3] = gstate_c.uv.vOff * heightFactor; } break; // These two work the same whether or not we prescale UV. case GE_TEXMAP_TEXTURE_MATRIX: // We cannot bake the UV coord scale factor in here, as we apply a matrix multiplication // before this is applied, and the matrix multiplication may contain translation. In this case // the translation will be scaled which breaks faces in Hexyz Force for example. // So I've gone back to applying the scale factor in the shader. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; case GE_TEXMAP_ENVIRONMENT_MAP: // In this mode we only use uvscaleoff to scale to the texture size. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; default: ERROR_LOG_REPORT(G3D, "Unexpected UV gen mode: %d", gstate.getUVGenMode()); } VSSetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4); } if (dirtyUniforms & DIRTY_DEPTHRANGE) { // Depth is [0, 1] mapping to [minz, maxz], not too hard. float vpZScale = gstate.getViewportZScale(); float vpZCenter = gstate.getViewportZCenter(); // These are just the reverse of the formulas in GPUStateUtils. float halfActualZRange = vpZScale / gstate_c.vpDepthScale; float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange; float viewZScale = halfActualZRange * 2.0f; // Account for the half pixel offset. float viewZCenter = minz + (DepthSliceFactor() / 256.0f) * 0.5f; float viewZInvScale; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { viewZInvScale = 0.0; } float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; VSSetFloatUniform4(CONST_VS_DEPTHRANGE, data); } // Lighting if (dirtyUniforms & DIRTY_AMBIENT) { VSSetColorUniform3Alpha(CONST_VS_AMBIENT, gstate.ambientcolor, gstate.getAmbientA()); } if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) { VSSetColorUniform3Alpha(CONST_VS_MATAMBIENTALPHA, gstate.materialambient, gstate.getMaterialAmbientA()); } if (dirtyUniforms & DIRTY_MATDIFFUSE) { VSSetColorUniform3(CONST_VS_MATDIFFUSE, gstate.materialdiffuse); } if (dirtyUniforms & DIRTY_MATEMISSIVE) { VSSetColorUniform3(CONST_VS_MATEMISSIVE, gstate.materialemissive); } if (dirtyUniforms & DIRTY_MATSPECULAR) { VSSetColorUniform3ExtraFloat(CONST_VS_MATSPECULAR, gstate.materialspecular, getFloat24(gstate.materialspecularcoef)); } for (int i = 0; i < 4; i++) { if (dirtyUniforms & (DIRTY_LIGHT0 << i)) { if (gstate.isDirectionalLight(i)) { // Prenormalize float x = getFloat24(gstate.lpos[i * 3 + 0]); float y = getFloat24(gstate.lpos[i * 3 + 1]); float z = getFloat24(gstate.lpos[i * 3 + 2]); float len = sqrtf(x*x + y*y + z*z); if (len == 0.0f) len = 1.0f; else len = 1.0f / len; float vec[3] = { x * len, y * len, z * len }; VSSetFloatArray(CONST_VS_LIGHTPOS + i, vec, 3); } else { VSSetFloat24Uniform3(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]); } VSSetFloat24Uniform3(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]); VSSetFloat24Uniform3(CONST_VS_LIGHTATT + i, &gstate.latt[i * 3]); VSSetFloat(CONST_VS_LIGHTANGLE + i, getFloat24(gstate.lcutoff[i])); VSSetFloat(CONST_VS_LIGHTSPOTCOEF + i, getFloat24(gstate.lconv[i])); VSSetColorUniform3(CONST_VS_LIGHTAMBIENT + i, gstate.lcolor[i * 3]); VSSetColorUniform3(CONST_VS_LIGHTDIFFUSE + i, gstate.lcolor[i * 3 + 1]); VSSetColorUniform3(CONST_VS_LIGHTSPECULAR + i, gstate.lcolor[i * 3 + 2]); } } }
void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) { if (dirtyUniforms & DIRTY_TEXENV) { Uint8x3ToFloat4(ub_base.texEnvColor, gstate.texenvcolor); } if (dirtyUniforms & DIRTY_ALPHACOLORREF) { Uint8x3ToInt4_Alpha(ub_base.alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_ALPHACOLORMASK) { Uint8x3ToInt4_Alpha(ub_base.colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_FOGCOLOR) { Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor); } if (dirtyUniforms & DIRTY_SHADERBLEND) { Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA()); Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB()); } if (dirtyUniforms & DIRTY_TEXCLAMP) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; // First wrap xy, then half texel xy (for clamp.) const float texclamp[4] = { widthFactor, heightFactor, invW * 0.5f, invH * 0.5f, }; const float texclampoff[2] = { gstate_c.curTextureXOffset * invW, gstate_c.curTextureYOffset * invH, }; CopyFloat4(ub_base.texClamp, texclamp); CopyFloat2(ub_base.texClampOffset, texclampoff); } if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = gstate_c.vpHeight < 0; if (invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY); CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr()); } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1); CopyMatrix4x4(ub_base.proj_through, proj_through.getReadPtr()); } // Transform if (dirtyUniforms & DIRTY_WORLDMATRIX) { ConvertMatrix4x3To4x4(ub_base.world, gstate.worldMatrix); } if (dirtyUniforms & DIRTY_VIEWMATRIX) { ConvertMatrix4x3To4x4(ub_base.view, gstate.viewMatrix); } if (dirtyUniforms & DIRTY_TEXMATRIX) { ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix); } // Combined two small uniforms if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) { float fogcoef_stencil[3] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), (float)gstate.getStencilTestRef() }; if (my_isinf(fogcoef_stencil[1])) { // not really sure what a sensible value might be. fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f; } else if (my_isnan(fogcoef_stencil[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fogcoef_stencil[0] = 100000.0f; fogcoef_stencil[1] = 1.0f; } #ifndef MOBILE_DEVICE else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) { ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]); } #endif CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil); } // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; static const float rescale[4] = { 1.0f, 2 * 127.5f / 128.f, 2 * 32767.5f / 32768.f, 1.0f }; const float factor = rescale[(gstate.vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT]; float uvscaleoff[4]; switch (gstate.getUVGenMode()) { case GE_TEXMAP_TEXTURE_COORDS: // Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera. Treating the same as GE_TEXMAP_TEXTURE_COORDS works. case GE_TEXMAP_UNKNOWN: if (g_Config.bPrescaleUV) { // We are here but are prescaling UV in the decoder? Let's do the same as in the other case // except consider *Scale and *Off to be 1 and 0. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; } else { uvscaleoff[0] = gstate_c.uv.uScale * factor * widthFactor; uvscaleoff[1] = gstate_c.uv.vScale * factor * heightFactor; uvscaleoff[2] = gstate_c.uv.uOff * widthFactor; uvscaleoff[3] = gstate_c.uv.vOff * heightFactor; } break; // These two work the same whether or not we prescale UV. case GE_TEXMAP_TEXTURE_MATRIX: // We cannot bake the UV coord scale factor in here, as we apply a matrix multiplication // before this is applied, and the matrix multiplication may contain translation. In this case // the translation will be scaled which breaks faces in Hexyz Force for example. // So I've gone back to applying the scale factor in the shader. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; case GE_TEXMAP_ENVIRONMENT_MAP: // In this mode we only use uvscaleoff to scale to the texture size. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; default: ERROR_LOG_REPORT(G3D, "Unexpected UV gen mode: %d", gstate.getUVGenMode()); } CopyFloat4(ub_base.uvScaleOffset, uvscaleoff); } if (dirtyUniforms & DIRTY_DEPTHRANGE) { float viewZScale = gstate.getViewportZScale(); float viewZCenter = gstate.getViewportZCenter(); float viewZInvScale; // We had to scale and translate Z to account for our clamped Z range. // Therefore, we also need to reverse this to round properly. // // Example: scale = 65535.0, center = 0.0 // Resulting range = -65535 to 65535, clamped to [0, 65535] // gstate_c.vpDepthScale = 2.0f // gstate_c.vpZOffset = -1.0f // // The projection already accounts for those, so we need to reverse them. // // Additionally, D3D9 uses a range from [0, 1]. We double and move the center. viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f; viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { viewZInvScale = 0.0; } float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; CopyFloat4(ub_base.depthRange, data); } }
void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) { if (dirtyUniforms & DIRTY_TEXENV) { Uint8x3ToFloat4(ub_base.texEnvColor, gstate.texenvcolor); } if (dirtyUniforms & DIRTY_ALPHACOLORREF) { Uint8x3ToInt4_Alpha(ub_base.alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_ALPHACOLORMASK) { Uint8x3ToInt4_Alpha(ub_base.colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_FOGCOLOR) { Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor); } if (dirtyUniforms & DIRTY_SHADERBLEND) { Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA()); Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB()); } if (dirtyUniforms & DIRTY_TEXCLAMP) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; // First wrap xy, then half texel xy (for clamp.) ub_base.texClamp[0] = widthFactor; ub_base.texClamp[1] = heightFactor; ub_base.texClamp[2] = invW * 0.5f; ub_base.texClamp[3] = invH * 0.5f; ub_base.texClampOffset[0] = gstate_c.curTextureXOffset * invW; ub_base.texClampOffset[1] = gstate_c.curTextureYOffset * invH; } if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = gstate_c.vpHeight < 0; if (invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY); CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr()); } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1); CopyMatrix4x4(ub_base.proj_through, proj_through.getReadPtr()); } // Transform if (dirtyUniforms & DIRTY_WORLDMATRIX) { ConvertMatrix4x3To4x4(ub_base.world, gstate.worldMatrix); } if (dirtyUniforms & DIRTY_VIEWMATRIX) { ConvertMatrix4x3To4x4(ub_base.view, gstate.viewMatrix); } if (dirtyUniforms & DIRTY_TEXMATRIX) { ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix); } // Combined two small uniforms if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) { float fogcoef_stencil[3] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), (float)gstate.getStencilTestRef() }; if (my_isinf(fogcoef_stencil[1])) { // not really sure what a sensible value might be. fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f; } else if (my_isnan(fogcoef_stencil[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fogcoef_stencil[0] = 100000.0f; fogcoef_stencil[1] = 1.0f; } #ifndef MOBILE_DEVICE else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) { ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]); } #endif CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil); } // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; ub_base.uvScaleOffset[0] = widthFactor; ub_base.uvScaleOffset[1] = heightFactor; ub_base.uvScaleOffset[2] = 0.0f; ub_base.uvScaleOffset[3] = 0.0f; } if (dirtyUniforms & DIRTY_DEPTHRANGE) { float viewZScale = gstate.getViewportZScale(); float viewZCenter = gstate.getViewportZCenter(); float viewZInvScale; // We had to scale and translate Z to account for our clamped Z range. // Therefore, we also need to reverse this to round properly. // // Example: scale = 65535.0, center = 0.0 // Resulting range = -65535 to 65535, clamped to [0, 65535] // gstate_c.vpDepthScale = 2.0f // gstate_c.vpZOffset = -1.0f // // The projection already accounts for those, so we need to reverse them. // // Additionally, D3D9 uses a range from [0, 1]. We double and move the center. viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f; viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { viewZInvScale = 0.0; } ub_base.depthRange[0] = viewZScale; ub_base.depthRange[1] = viewZCenter; ub_base.depthRange[2] = viewZCenter; ub_base.depthRange[3] = viewZInvScale; } }
void SoftwareTransform( int prim, int vertexCount, u32 vertType, u16 *&inds, int indexType, const DecVtxFormat &decVtxFormat, int &maxIndex, TransformedVertex *&drawBuffer, int &numTrans, bool &drawIndexed, const SoftwareTransformParams *params, SoftwareTransformResult *result) { u8 *decoded = params->decoded; FramebufferManagerCommon *fbman = params->fbman; TextureCacheCommon *texCache = params->texCache; TransformedVertex *transformed = params->transformed; TransformedVertex *transformedExpanded = params->transformedExpanded; float ySign = 1.0f; bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0; bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled(); // TODO: Split up into multiple draw calls for GLES 2.0 where you can't guarantee support for more than 0x10000 verts. #if defined(MOBILE_DEVICE) if (vertexCount > 0x10000/3) vertexCount = 0x10000/3; #endif float uscale = 1.0f; float vscale = 1.0f; if (throughmode) { uscale /= gstate_c.curTextureWidth; vscale /= gstate_c.curTextureHeight; } bool skinningEnabled = vertTypeIsSkinningEnabled(vertType); const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); float widthFactor = (float) w / (float) gstate_c.curTextureWidth; float heightFactor = (float) h / (float) gstate_c.curTextureHeight; Lighter lighter(vertType); float fog_end = getFloat24(gstate.fog1); float fog_slope = getFloat24(gstate.fog2); // Same fixup as in ShaderManager.cpp if (my_isinf(fog_slope)) { // not really sure what a sensible value might be. fog_slope = fog_slope < 0.0f ? -10000.0f : 10000.0f; } if (my_isnan(fog_slope)) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fog_end = 100000.0f; fog_slope = 1.0f; } VertexReader reader(decoded, decVtxFormat, vertType); if (throughmode) { for (int index = 0; index < maxIndex; index++) { // Do not touch the coordinates or the colors. No lighting. reader.Goto(index); // TODO: Write to a flexible buffer, we don't always need all four components. TransformedVertex &vert = transformed[index]; reader.ReadPos(vert.pos); if (reader.hasColor0()) { reader.ReadColor0_8888(vert.color0); } else { vert.color0_32 = gstate.getMaterialAmbientRGBA(); } if (reader.hasUV()) { reader.ReadUV(vert.uv); vert.u *= uscale; vert.v *= vscale; } else { vert.u = 0.0f; vert.v = 0.0f; } // Ignore color1 and fog, never used in throughmode anyway. // The w of uv is also never used (hardcoded to 1.0.) } } else { // Okay, need to actually perform the full transform. for (int index = 0; index < maxIndex; index++) { reader.Goto(index); float v[3] = {0, 0, 0}; Vec4f c0 = Vec4f(1, 1, 1, 1); Vec4f c1 = Vec4f(0, 0, 0, 0); float uv[3] = {0, 0, 1}; float fogCoef = 1.0f; // We do software T&L for now float out[3]; float pos[3]; Vec3f normal(0, 0, 1); Vec3f worldnormal(0, 0, 1); reader.ReadPos(pos); if (!skinningEnabled) { Vec3ByMatrix43(out, pos, gstate.worldMatrix); if (reader.hasNormal()) { reader.ReadNrm(normal.AsArray()); if (gstate.areNormalsReversed()) { normal = -normal; } Norm3ByMatrix43(worldnormal.AsArray(), normal.AsArray(), gstate.worldMatrix); worldnormal = worldnormal.Normalized(); } } else { float weights[8]; reader.ReadWeights(weights); if (reader.hasNormal()) reader.ReadNrm(normal.AsArray()); // Skinning Vec3f psum(0, 0, 0); Vec3f nsum(0, 0, 0); for (int i = 0; i < vertTypeGetNumBoneWeights(vertType); i++) { if (weights[i] != 0.0f) { Vec3ByMatrix43(out, pos, gstate.boneMatrix+i*12); Vec3f tpos(out); psum += tpos * weights[i]; if (reader.hasNormal()) { Vec3f norm; Norm3ByMatrix43(norm.AsArray(), normal.AsArray(), gstate.boneMatrix+i*12); nsum += norm * weights[i]; } } } // Yes, we really must multiply by the world matrix too. Vec3ByMatrix43(out, psum.AsArray(), gstate.worldMatrix); if (reader.hasNormal()) { normal = nsum; if (gstate.areNormalsReversed()) { normal = -normal; } Norm3ByMatrix43(worldnormal.AsArray(), normal.AsArray(), gstate.worldMatrix); worldnormal = worldnormal.Normalized(); } } // Perform lighting here if enabled. don't need to check through, it's checked above. Vec4f unlitColor = Vec4f(1, 1, 1, 1); if (reader.hasColor0()) { reader.ReadColor0(&unlitColor.x); } else { unlitColor = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA()); } if (gstate.isLightingEnabled()) { float litColor0[4]; float litColor1[4]; lighter.Light(litColor0, litColor1, unlitColor.AsArray(), out, worldnormal); // Don't ignore gstate.lmode - we should send two colors in that case for (int j = 0; j < 4; j++) { c0[j] = litColor0[j]; } if (lmode) { // Separate colors for (int j = 0; j < 4; j++) { c1[j] = litColor1[j]; } } else { // Summed color into c0 (will clamp in ToRGBA().) for (int j = 0; j < 4; j++) { c0[j] += litColor1[j]; } } } else { if (reader.hasColor0()) { for (int j = 0; j < 4; j++) { c0[j] = unlitColor[j]; } } else { c0 = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA()); } if (lmode) { // c1 is already 0. } } float ruv[2] = {0.0f, 0.0f}; if (reader.hasUV()) reader.ReadUV(ruv); // Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights. switch (gstate.getUVGenMode()) { case GE_TEXMAP_TEXTURE_COORDS: // UV mapping case GE_TEXMAP_UNKNOWN: // Seen in Riviera. Unsure of meaning, but this works. // We always prescale in the vertex decoder now. uv[0] = ruv[0]; uv[1] = ruv[1]; uv[2] = 1.0f; break; case GE_TEXMAP_TEXTURE_MATRIX: { // Projection mapping Vec3f source; switch (gstate.getUVProjMode()) { case GE_PROJMAP_POSITION: // Use model space XYZ as source source = pos; break; case GE_PROJMAP_UV: // Use unscaled UV as source source = Vec3f(ruv[0], ruv[1], 0.0f); break; case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized normal as source source = normal.Normalized(); if (!reader.hasNormal()) { ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?"); } break; case GE_PROJMAP_NORMAL: // Use non-normalized normal as source! source = normal; if (!reader.hasNormal()) { ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?"); } break; } float uvw[3]; Vec3ByMatrix43(uvw, &source.x, gstate.tgenMatrix); uv[0] = uvw[0]; uv[1] = uvw[1]; uv[2] = uvw[2]; } break; case GE_TEXMAP_ENVIRONMENT_MAP: // Shade mapping - use two light sources to generate U and V. { Vec3f lightpos0 = Vec3f(&lighter.lpos[gstate.getUVLS0() * 3]).Normalized(); Vec3f lightpos1 = Vec3f(&lighter.lpos[gstate.getUVLS1() * 3]).Normalized(); uv[0] = (1.0f + Dot(lightpos0, worldnormal))/2.0f; uv[1] = (1.0f + Dot(lightpos1, worldnormal))/2.0f; uv[2] = 1.0f; } break; default: // Illegal ERROR_LOG_REPORT(G3D, "Impossible UV gen mode? %d", gstate.getUVGenMode()); break; } uv[0] = uv[0] * widthFactor; uv[1] = uv[1] * heightFactor; // Transform the coord by the view matrix. Vec3ByMatrix43(v, out, gstate.viewMatrix); fogCoef = (v[2] + fog_end) * fog_slope; // TODO: Write to a flexible buffer, we don't always need all four components. memcpy(&transformed[index].x, v, 3 * sizeof(float)); transformed[index].fog = fogCoef; memcpy(&transformed[index].u, uv, 3 * sizeof(float)); transformed[index].color0_32 = c0.ToRGBA(); transformed[index].color1_32 = c1.ToRGBA(); // The multiplication by the projection matrix is still performed in the vertex shader. // So is vertex depth rounding, to simulate the 16-bit depth buffer. } } // Here's the best opportunity to try to detect rectangles used to clear the screen, and // replace them with real clears. This can provide a speedup on certain mobile chips. // // An alternative option is to simply ditch all the verts except the first and last to create a single // rectangle out of many. Quite a small optimization though. // Experiment: Disable on PowerVR (see issue #6290) // TODO: This bleeds outside the play area in non-buffered mode. Big deal? Probably not. bool reallyAClear = false; if (maxIndex > 1 && prim == GE_PRIM_RECTANGLES && gstate.isModeClear()) { int scissorX2 = gstate.getScissorX2() + 1; int scissorY2 = gstate.getScissorY2() + 1; reallyAClear = IsReallyAClear(transformed, maxIndex, scissorX2, scissorY2); } if (reallyAClear && gl_extensions.gpuVendor != GPU_VENDOR_POWERVR) { // && g_Config.iRenderingMode != FB_NON_BUFFERED_MODE) { // If alpha is not allowed to be separate, it must match for both depth/stencil and color. Vulkan requires this. bool alphaMatchesColor = gstate.isClearModeColorMask() == gstate.isClearModeAlphaMask(); bool depthMatchesStencil = gstate.isClearModeAlphaMask() == gstate.isClearModeDepthMask(); if (params->allowSeparateAlphaClear || (alphaMatchesColor && depthMatchesStencil)) { result->color = transformed[1].color0_32; // Need to rescale from a [0, 1] float. This is the final transformed value. result->depth = ToScaledDepth((s16)(int)(transformed[1].z * 65535.0f)); result->action = SW_CLEAR; return; } } // This means we're using a framebuffer (and one that isn't big enough.) if (gstate_c.curTextureHeight < (u32)h && maxIndex >= 2) { // Even if not rectangles, this will detect if either of the first two are outside the framebuffer. // HACK: Adding one pixel margin to this detection fixes issues in Assassin's Creed : Bloodlines, // while still keeping BOF working (see below). const float invTexH = 1.0f / gstate_c.curTextureHeight; // size of one texel. bool tlOutside; bool tlAlmostOutside; bool brOutside; // If we're outside heightFactor, then v must be wrapping or clamping. Avoid this workaround. // If we're <= 1.0f, we're inside the framebuffer (workaround not needed.) // We buffer that 1.0f a little more with a texel to avoid some false positives. tlOutside = transformed[0].v <= heightFactor && transformed[0].v > 1.0f + invTexH; brOutside = transformed[1].v <= heightFactor && transformed[1].v > 1.0f + invTexH; // Careful: if br is outside, but tl is well inside, this workaround still doesn't make sense. // We go with halfway, since we overestimate framebuffer heights sometimes but not by much. tlAlmostOutside = transformed[0].v <= heightFactor && transformed[0].v >= 0.5f; if (tlOutside || (brOutside && tlAlmostOutside)) { // Okay, so we're texturing from outside the framebuffer, but inside the texture height. // Breath of Fire 3 does this to access a render surface at an offset. const u32 bpp = fbman->GetTargetFormat() == GE_FORMAT_8888 ? 4 : 2; const u32 prevH = texCache->AttachedDrawingHeight(); const u32 fb_size = bpp * fbman->GetTargetStride() * prevH; const u32 prevYOffset = gstate_c.curTextureYOffset; if (texCache->SetOffsetTexture(fb_size)) { const float oldWidthFactor = widthFactor; const float oldHeightFactor = heightFactor; widthFactor = (float) w / (float) gstate_c.curTextureWidth; heightFactor = (float) h / (float) gstate_c.curTextureHeight; // We've already baked in the old gstate_c.curTextureYOffset, so correct. const float yDiff = (float) (prevH + prevYOffset - gstate_c.curTextureYOffset) / (float) h; for (int index = 0; index < maxIndex; ++index) { transformed[index].u *= widthFactor / oldWidthFactor; // Inverse it back to scale to the new FBO, and add 1.0f to account for old FBO. transformed[index].v = (transformed[index].v / oldHeightFactor - yDiff) * heightFactor; } } } } // Step 2: expand rectangles. drawBuffer = transformed; numTrans = 0; drawIndexed = false; if (prim != GE_PRIM_RECTANGLES) { // We can simply draw the unexpanded buffer. numTrans = vertexCount; drawIndexed = true; } else { bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; if (useBufferedRendering) ySign = -ySign; float flippedMatrix[16]; if (!throughmode) { memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = useBufferedRendering ? (gstate_c.vpHeight < 0) : (gstate_c.vpHeight > 0); if (invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } } //rectangles always need 2 vertices, disregard the last one if there's an odd number vertexCount = vertexCount & ~1; numTrans = 0; drawBuffer = transformedExpanded; TransformedVertex *trans = &transformedExpanded[0]; const u16 *indsIn = (const u16 *)inds; u16 *newInds = inds + vertexCount; u16 *indsOut = newInds; maxIndex = 4 * vertexCount; for (int i = 0; i < vertexCount; i += 2) { const TransformedVertex &transVtxTL = transformed[indsIn[i + 0]]; const TransformedVertex &transVtxBR = transformed[indsIn[i + 1]]; // We have to turn the rectangle into two triangles, so 6 points. // This is 4 verts + 6 indices. // bottom right trans[0] = transVtxBR; // top right trans[1] = transVtxBR; trans[1].y = transVtxTL.y; trans[1].v = transVtxTL.v; // top left trans[2] = transVtxBR; trans[2].x = transVtxTL.x; trans[2].y = transVtxTL.y; trans[2].u = transVtxTL.u; trans[2].v = transVtxTL.v; // bottom left trans[3] = transVtxBR; trans[3].x = transVtxTL.x; trans[3].u = transVtxTL.u; // That's the four corners. Now process UV rotation. if (throughmode) RotateUVThrough(trans); else RotateUV(trans, flippedMatrix, ySign); // Triangle: BR-TR-TL indsOut[0] = i * 2 + 0; indsOut[1] = i * 2 + 1; indsOut[2] = i * 2 + 2; // Triangle: BL-BR-TL indsOut[3] = i * 2 + 3; indsOut[4] = i * 2 + 0; indsOut[5] = i * 2 + 2; trans += 4; indsOut += 6; numTrans += 6; } inds = newInds; drawIndexed = true; // We don't know the color until here, so we have to do it now, instead of in StateMapping. // Might want to reconsider the order of things later... if (gstate.isModeClear() && gstate.isClearModeAlphaMask()) { result->setStencil = true; if (vertexCount > 1) { // Take the bottom right alpha value of the first rect as the stencil value. // Technically, each rect could individually fill its stencil, but most of the // time they use the same one. result->stencilValue = transformed[indsIn[1]].color0[3]; } else { result->stencilValue = 0; } } } result->action = SW_DRAW_PRIMITIVES; }