VertexData TransformUnit::ReadVertex(VertexReader& vreader) { VertexData vertex; float pos[3]; // VertexDecoder normally scales z, but we want it unscaled. vreader.ReadPosThroughZ16(pos); if (!gstate.isModeClear() && gstate.isTextureMapEnabled() && vreader.hasUV()) { float uv[2]; vreader.ReadUV(uv); vertex.texturecoords = Vec2<float>(uv[0], uv[1]); } if (vreader.hasNormal()) { float normal[3]; vreader.ReadNrm(normal); vertex.normal = Vec3<float>(normal[0], normal[1], normal[2]); if (gstate.areNormalsReversed()) vertex.normal = -vertex.normal; } if (vertTypeIsSkinningEnabled(gstate.vertType) && !gstate.isModeThrough()) { float W[8] = { 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f }; vreader.ReadWeights(W); Vec3<float> tmppos(0.f, 0.f, 0.f); Vec3<float> tmpnrm(0.f, 0.f, 0.f); for (int i = 0; i < vertTypeGetNumBoneWeights(gstate.vertType); ++i) { Mat3x3<float> bone(&gstate.boneMatrix[12*i]); tmppos += (bone * ModelCoords(pos[0], pos[1], pos[2]) + Vec3<float>(gstate.boneMatrix[12*i+9], gstate.boneMatrix[12*i+10], gstate.boneMatrix[12*i+11])) * W[i]; if (vreader.hasNormal()) tmpnrm += (bone * vertex.normal) * W[i]; } pos[0] = tmppos.x; pos[1] = tmppos.y; pos[2] = tmppos.z; if (vreader.hasNormal()) vertex.normal = tmpnrm; } if (vreader.hasColor0()) { float col[4]; vreader.ReadColor0(col); vertex.color0 = Vec4<int>(col[0]*255, col[1]*255, col[2]*255, col[3]*255); } else { vertex.color0 = Vec4<int>(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB(), gstate.getMaterialAmbientA()); } if (vreader.hasColor1()) { float col[3]; vreader.ReadColor1(col); vertex.color1 = Vec3<int>(col[0]*255, col[1]*255, col[2]*255); } else { vertex.color1 = Vec3<int>(0, 0, 0); } if (!gstate.isModeThrough()) { vertex.modelpos = ModelCoords(pos[0], pos[1], pos[2]); vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(vertex.modelpos)); ModelCoords viewpos = TransformUnit::WorldToView(vertex.worldpos); vertex.clippos = ClipCoords(TransformUnit::ViewToClip(viewpos)); if (gstate.isFogEnabled()) { float fog_end = getFloat24(gstate.fog1); float fog_slope = getFloat24(gstate.fog2); // Same fixup as in ShaderManagerGLES.cpp if (my_isnanorinf(fog_end)) { // Not really sure what a sensible value might be, but let's try 64k. fog_end = std::signbit(fog_end) ? -65535.0f : 65535.0f; } if (my_isnanorinf(fog_slope)) { fog_slope = std::signbit(fog_slope) ? -65535.0f : 65535.0f; } vertex.fogdepth = (viewpos.z + fog_end) * fog_slope; } else { vertex.fogdepth = 1.0f; } vertex.screenpos = ClipToScreenInternal(vertex.clippos, &outside_range_flag); if (vreader.hasNormal()) { vertex.worldnormal = TransformUnit::ModelToWorldNormal(vertex.normal); // TODO: Isn't there a flag that controls whether to normalize the normal? vertex.worldnormal /= vertex.worldnormal.Length(); } else { vertex.worldnormal = Vec3<float>(0.0f, 0.0f, 1.0f); } Lighting::Process(vertex, vreader.hasColor0()); } else { vertex.screenpos.x = (int)(pos[0] * 16) + gstate.getOffsetX16(); vertex.screenpos.y = (int)(pos[1] * 16) + gstate.getOffsetY16(); vertex.screenpos.z = pos[2]; vertex.clippos.w = 1.f; vertex.fogdepth = 1.f; } return vertex; }
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport) { if (dirtyUniforms & DIRTY_TEXENV) { Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor); } if (dirtyUniforms & DIRTY_ALPHACOLORREF) { Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_ALPHACOLORMASK) { Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_FOGCOLOR) { Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor); } if (dirtyUniforms & DIRTY_SHADERBLEND) { Uint8x3ToFloat4(ub->blendFixA, gstate.getFixA()); Uint8x3ToFloat4(ub->blendFixB, gstate.getFixB()); } if (dirtyUniforms & DIRTY_TEXCLAMP) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; // First wrap xy, then half texel xy (for clamp.) ub->texClamp[0] = widthFactor; ub->texClamp[1] = heightFactor; ub->texClamp[2] = invW * 0.5f; ub->texClamp[3] = invH * 0.5f; ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW; ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH; } if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = gstate_c.vpHeight < 0; if (invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } if (flipViewport) { ConvertProjMatrixToD3D11(flippedMatrix); } else { ConvertProjMatrixToVulkan(flippedMatrix); } if (g_Config.iRenderingMode == 0 && g_display_rotation != DisplayRotation::ROTATE_0) { flippedMatrix = flippedMatrix * g_display_rot_matrix; } CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr()); } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; if (flipViewport) { proj_through.setOrthoD3D(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1); } else { proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1); } if (g_Config.iRenderingMode == 0 && g_display_rotation != DisplayRotation::ROTATE_0) { proj_through = proj_through * g_display_rot_matrix; } CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr()); } // Transform if (dirtyUniforms & DIRTY_WORLDMATRIX) { ConvertMatrix4x3To3x4Transposed(ub->world, gstate.worldMatrix); } if (dirtyUniforms & DIRTY_VIEWMATRIX) { ConvertMatrix4x3To3x4Transposed(ub->view, gstate.viewMatrix); } if (dirtyUniforms & DIRTY_TEXMATRIX) { ConvertMatrix4x3To3x4Transposed(ub->tex, gstate.tgenMatrix); } // Combined two small uniforms if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) { float fogcoef_stencil[3] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), (float)gstate.getStencilTestRef()/255.0f }; if (my_isinf(fogcoef_stencil[1])) { // not really sure what a sensible value might be. fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f; } else if (my_isnan(fogcoef_stencil[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fogcoef_stencil[0] = 100000.0f; fogcoef_stencil[1] = 1.0f; } #ifndef MOBILE_DEVICE else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) { ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]); } #endif CopyFloat3(ub->fogCoef_stencil, fogcoef_stencil); } // Note - this one is not in lighting but in transformCommon as it has uses beyond lighting if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) { Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA()); } // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; if (gstate_c.bezier || gstate_c.spline) { // When we are generating UV coordinates through the bezier/spline, we need to apply the scaling. // However, this is missing a check that we're not getting our UV:s supplied for us in the vertices. ub->uvScaleOffset[0] = gstate_c.uv.uScale * widthFactor; ub->uvScaleOffset[1] = gstate_c.uv.vScale * heightFactor; ub->uvScaleOffset[2] = gstate_c.uv.uOff * widthFactor; ub->uvScaleOffset[3] = gstate_c.uv.vOff * heightFactor; } else { ub->uvScaleOffset[0] = widthFactor; ub->uvScaleOffset[1] = heightFactor; ub->uvScaleOffset[2] = 0.0f; ub->uvScaleOffset[3] = 0.0f; } } if (dirtyUniforms & DIRTY_DEPTHRANGE) { float viewZScale = gstate.getViewportZScale(); float viewZCenter = gstate.getViewportZCenter(); // We had to scale and translate Z to account for our clamped Z range. // Therefore, we also need to reverse this to round properly. // // Example: scale = 65535.0, center = 0.0 // Resulting range = -65535 to 65535, clamped to [0, 65535] // gstate_c.vpDepthScale = 2.0f // gstate_c.vpZOffset = -1.0f // // The projection already accounts for those, so we need to reverse them. // // Additionally, D3D9 uses a range from [0, 1]. We double and move the center. viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f; viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f; float viewZInvScale; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { viewZInvScale = 0.0; } ub->depthRange[0] = viewZScale; ub->depthRange[1] = viewZCenter; ub->depthRange[2] = viewZCenter; ub->depthRange[3] = viewZInvScale; } if (dirtyUniforms & DIRTY_BEZIERSPLINE) { ub->spline_count_u = gstate_c.spline_count_u; ub->spline_count_v = gstate_c.spline_count_v; ub->spline_type_u = gstate_c.spline_type_u; ub->spline_type_v = gstate_c.spline_type_v; } }
void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) { if (dirtyUniforms & DIRTY_TEXENV) { Uint8x3ToFloat4(ub_base.texEnvColor, gstate.texenvcolor); } if (dirtyUniforms & DIRTY_ALPHACOLORREF) { Uint8x3ToInt4_Alpha(ub_base.alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_ALPHACOLORMASK) { Uint8x3ToInt4_Alpha(ub_base.colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_FOGCOLOR) { Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor); } if (dirtyUniforms & DIRTY_SHADERBLEND) { Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA()); Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB()); } if (dirtyUniforms & DIRTY_TEXCLAMP) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; // First wrap xy, then half texel xy (for clamp.) ub_base.texClamp[0] = widthFactor; ub_base.texClamp[1] = heightFactor; ub_base.texClamp[2] = invW * 0.5f; ub_base.texClamp[3] = invH * 0.5f; ub_base.texClampOffset[0] = gstate_c.curTextureXOffset * invW; ub_base.texClampOffset[1] = gstate_c.curTextureYOffset * invH; } if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = gstate_c.vpHeight < 0; if (invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY); CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr()); } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1); CopyMatrix4x4(ub_base.proj_through, proj_through.getReadPtr()); } // Transform if (dirtyUniforms & DIRTY_WORLDMATRIX) { ConvertMatrix4x3To4x4(ub_base.world, gstate.worldMatrix); } if (dirtyUniforms & DIRTY_VIEWMATRIX) { ConvertMatrix4x3To4x4(ub_base.view, gstate.viewMatrix); } if (dirtyUniforms & DIRTY_TEXMATRIX) { ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix); } // Combined two small uniforms if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) { float fogcoef_stencil[3] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), (float)gstate.getStencilTestRef() }; if (my_isinf(fogcoef_stencil[1])) { // not really sure what a sensible value might be. fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f; } else if (my_isnan(fogcoef_stencil[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fogcoef_stencil[0] = 100000.0f; fogcoef_stencil[1] = 1.0f; } #ifndef MOBILE_DEVICE else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) { ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]); } #endif CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil); } // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; ub_base.uvScaleOffset[0] = widthFactor; ub_base.uvScaleOffset[1] = heightFactor; ub_base.uvScaleOffset[2] = 0.0f; ub_base.uvScaleOffset[3] = 0.0f; } if (dirtyUniforms & DIRTY_DEPTHRANGE) { float viewZScale = gstate.getViewportZScale(); float viewZCenter = gstate.getViewportZCenter(); float viewZInvScale; // We had to scale and translate Z to account for our clamped Z range. // Therefore, we also need to reverse this to round properly. // // Example: scale = 65535.0, center = 0.0 // Resulting range = -65535 to 65535, clamped to [0, 65535] // gstate_c.vpDepthScale = 2.0f // gstate_c.vpZOffset = -1.0f // // The projection already accounts for those, so we need to reverse them. // // Additionally, D3D9 uses a range from [0, 1]. We double and move the center. viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f; viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { viewZInvScale = 0.0; } ub_base.depthRange[0] = viewZScale; ub_base.depthRange[1] = viewZCenter; ub_base.depthRange[2] = viewZCenter; ub_base.depthRange[3] = viewZInvScale; } }
void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) { if (dirtyUniforms & DIRTY_TEXENV) { Uint8x3ToFloat4(ub_base.texEnvColor, gstate.texenvcolor); } if (dirtyUniforms & DIRTY_ALPHACOLORREF) { Uint8x3ToInt4_Alpha(ub_base.alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_ALPHACOLORMASK) { Uint8x3ToInt4_Alpha(ub_base.colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_FOGCOLOR) { Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor); } if (dirtyUniforms & DIRTY_SHADERBLEND) { Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA()); Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB()); } if (dirtyUniforms & DIRTY_TEXCLAMP) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; // First wrap xy, then half texel xy (for clamp.) const float texclamp[4] = { widthFactor, heightFactor, invW * 0.5f, invH * 0.5f, }; const float texclampoff[2] = { gstate_c.curTextureXOffset * invW, gstate_c.curTextureYOffset * invH, }; CopyFloat4(ub_base.texClamp, texclamp); CopyFloat2(ub_base.texClampOffset, texclampoff); } if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = gstate_c.vpHeight < 0; if (invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY); CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr()); } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1); CopyMatrix4x4(ub_base.proj_through, proj_through.getReadPtr()); } // Transform if (dirtyUniforms & DIRTY_WORLDMATRIX) { ConvertMatrix4x3To4x4(ub_base.world, gstate.worldMatrix); } if (dirtyUniforms & DIRTY_VIEWMATRIX) { ConvertMatrix4x3To4x4(ub_base.view, gstate.viewMatrix); } if (dirtyUniforms & DIRTY_TEXMATRIX) { ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix); } // Combined two small uniforms if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) { float fogcoef_stencil[3] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), (float)gstate.getStencilTestRef() }; if (my_isinf(fogcoef_stencil[1])) { // not really sure what a sensible value might be. fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f; } else if (my_isnan(fogcoef_stencil[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fogcoef_stencil[0] = 100000.0f; fogcoef_stencil[1] = 1.0f; } #ifndef MOBILE_DEVICE else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) { ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]); } #endif CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil); } // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; static const float rescale[4] = { 1.0f, 2 * 127.5f / 128.f, 2 * 32767.5f / 32768.f, 1.0f }; const float factor = rescale[(gstate.vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT]; float uvscaleoff[4]; switch (gstate.getUVGenMode()) { case GE_TEXMAP_TEXTURE_COORDS: // Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera. Treating the same as GE_TEXMAP_TEXTURE_COORDS works. case GE_TEXMAP_UNKNOWN: if (g_Config.bPrescaleUV) { // We are here but are prescaling UV in the decoder? Let's do the same as in the other case // except consider *Scale and *Off to be 1 and 0. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; } else { uvscaleoff[0] = gstate_c.uv.uScale * factor * widthFactor; uvscaleoff[1] = gstate_c.uv.vScale * factor * heightFactor; uvscaleoff[2] = gstate_c.uv.uOff * widthFactor; uvscaleoff[3] = gstate_c.uv.vOff * heightFactor; } break; // These two work the same whether or not we prescale UV. case GE_TEXMAP_TEXTURE_MATRIX: // We cannot bake the UV coord scale factor in here, as we apply a matrix multiplication // before this is applied, and the matrix multiplication may contain translation. In this case // the translation will be scaled which breaks faces in Hexyz Force for example. // So I've gone back to applying the scale factor in the shader. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; case GE_TEXMAP_ENVIRONMENT_MAP: // In this mode we only use uvscaleoff to scale to the texture size. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; default: ERROR_LOG_REPORT(G3D, "Unexpected UV gen mode: %d", gstate.getUVGenMode()); } CopyFloat4(ub_base.uvScaleOffset, uvscaleoff); } if (dirtyUniforms & DIRTY_DEPTHRANGE) { float viewZScale = gstate.getViewportZScale(); float viewZCenter = gstate.getViewportZCenter(); float viewZInvScale; // We had to scale and translate Z to account for our clamped Z range. // Therefore, we also need to reverse this to round properly. // // Example: scale = 65535.0, center = 0.0 // Resulting range = -65535 to 65535, clamped to [0, 65535] // gstate_c.vpDepthScale = 2.0f // gstate_c.vpZOffset = -1.0f // // The projection already accounts for those, so we need to reverse them. // // Additionally, D3D9 uses a range from [0, 1]. We double and move the center. viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f; viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { viewZInvScale = 0.0; } float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; CopyFloat4(ub_base.depthRange, data); } }
void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) { // Update any dirty uniforms before we draw if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = gstate_c.vpHeight < 0; if (!invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } ConvertProjMatrixToD3D(flippedMatrix, invertedX, invertedY); VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr()); } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1); ConvertProjMatrixToD3DThrough(proj_through); VSSetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr()); } // Transform if (dirtyUniforms & DIRTY_WORLDMATRIX) { VSSetMatrix4x3_3(CONST_VS_WORLD, gstate.worldMatrix); } if (dirtyUniforms & DIRTY_VIEWMATRIX) { VSSetMatrix4x3_3(CONST_VS_VIEW, gstate.viewMatrix); } if (dirtyUniforms & DIRTY_TEXMATRIX) { VSSetMatrix4x3_3(CONST_VS_TEXMTX, gstate.tgenMatrix); } if (dirtyUniforms & DIRTY_FOGCOEF) { float fogcoef[2] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), }; if (my_isinf(fogcoef[1])) { // not really sure what a sensible value might be. fogcoef[1] = fogcoef[1] < 0.0f ? -10000.0f : 10000.0f; } else if (my_isnan(fogcoef[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fogcoef[0] = 100000.0f; fogcoef[1] = 1.0f; } #ifndef MOBILE_DEVICE else if (my_isnanorinf(fogcoef[1]) || my_isnanorinf(fogcoef[0])) { ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef[0], fogcoef[1]); } #endif VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2); } // TODO: Could even set all bones in one go if they're all dirty. #ifdef USE_BONE_ARRAY if (u_bone != 0) { float allBones[8 * 16]; bool allDirty = true; for (int i = 0; i < numBones; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { ConvertMatrix4x3To4x4(allBones + 16 * i, gstate.boneMatrix + 12 * i); } else { allDirty = false; } } if (allDirty) { // Set them all with one call //glUniformMatrix4fv(u_bone, numBones, GL_FALSE, allBones); } else { // Set them one by one. Could try to coalesce two in a row etc but too lazy. for (int i = 0; i < numBones; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { //glUniformMatrix4fv(u_bone + i, 1, GL_FALSE, allBones + 16 * i); } } } } #else for (int i = 0; i < 8; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { VSSetMatrix4x3_3(CONST_VS_BONE0 + 3 * i, gstate.boneMatrix + 12 * i); } } #endif // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; float uvscaleoff[4]; switch (gstate.getUVGenMode()) { case GE_TEXMAP_TEXTURE_COORDS: // Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera. Treating the same as GE_TEXMAP_TEXTURE_COORDS works. case GE_TEXMAP_UNKNOWN: if (g_Config.bPrescaleUV) { // We are here but are prescaling UV in the decoder? Let's do the same as in the other case // except consider *Scale and *Off to be 1 and 0. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; } else { uvscaleoff[0] = gstate_c.uv.uScale * widthFactor; uvscaleoff[1] = gstate_c.uv.vScale * heightFactor; uvscaleoff[2] = gstate_c.uv.uOff * widthFactor; uvscaleoff[3] = gstate_c.uv.vOff * heightFactor; } break; // These two work the same whether or not we prescale UV. case GE_TEXMAP_TEXTURE_MATRIX: // We cannot bake the UV coord scale factor in here, as we apply a matrix multiplication // before this is applied, and the matrix multiplication may contain translation. In this case // the translation will be scaled which breaks faces in Hexyz Force for example. // So I've gone back to applying the scale factor in the shader. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; case GE_TEXMAP_ENVIRONMENT_MAP: // In this mode we only use uvscaleoff to scale to the texture size. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; default: ERROR_LOG_REPORT(G3D, "Unexpected UV gen mode: %d", gstate.getUVGenMode()); } VSSetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4); } if (dirtyUniforms & DIRTY_DEPTHRANGE) { // Depth is [0, 1] mapping to [minz, maxz], not too hard. float vpZScale = gstate.getViewportZScale(); float vpZCenter = gstate.getViewportZCenter(); // These are just the reverse of the formulas in GPUStateUtils. float halfActualZRange = vpZScale / gstate_c.vpDepthScale; float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange; float viewZScale = halfActualZRange * 2.0f; // Account for the half pixel offset. float viewZCenter = minz + (DepthSliceFactor() / 256.0f) * 0.5f; float viewZInvScale; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { viewZInvScale = 0.0; } float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; VSSetFloatUniform4(CONST_VS_DEPTHRANGE, data); } // Lighting if (dirtyUniforms & DIRTY_AMBIENT) { VSSetColorUniform3Alpha(CONST_VS_AMBIENT, gstate.ambientcolor, gstate.getAmbientA()); } if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) { VSSetColorUniform3Alpha(CONST_VS_MATAMBIENTALPHA, gstate.materialambient, gstate.getMaterialAmbientA()); } if (dirtyUniforms & DIRTY_MATDIFFUSE) { VSSetColorUniform3(CONST_VS_MATDIFFUSE, gstate.materialdiffuse); } if (dirtyUniforms & DIRTY_MATEMISSIVE) { VSSetColorUniform3(CONST_VS_MATEMISSIVE, gstate.materialemissive); } if (dirtyUniforms & DIRTY_MATSPECULAR) { VSSetColorUniform3ExtraFloat(CONST_VS_MATSPECULAR, gstate.materialspecular, getFloat24(gstate.materialspecularcoef)); } for (int i = 0; i < 4; i++) { if (dirtyUniforms & (DIRTY_LIGHT0 << i)) { if (gstate.isDirectionalLight(i)) { // Prenormalize float x = getFloat24(gstate.lpos[i * 3 + 0]); float y = getFloat24(gstate.lpos[i * 3 + 1]); float z = getFloat24(gstate.lpos[i * 3 + 2]); float len = sqrtf(x*x + y*y + z*z); if (len == 0.0f) len = 1.0f; else len = 1.0f / len; float vec[3] = { x * len, y * len, z * len }; VSSetFloatArray(CONST_VS_LIGHTPOS + i, vec, 3); } else { VSSetFloat24Uniform3(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]); } VSSetFloat24Uniform3(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]); VSSetFloat24Uniform3(CONST_VS_LIGHTATT + i, &gstate.latt[i * 3]); VSSetFloat(CONST_VS_LIGHTANGLE + i, getFloat24(gstate.lcutoff[i])); VSSetFloat(CONST_VS_LIGHTSPOTCOEF + i, getFloat24(gstate.lconv[i])); VSSetColorUniform3(CONST_VS_LIGHTAMBIENT + i, gstate.lcolor[i * 3]); VSSetColorUniform3(CONST_VS_LIGHTDIFFUSE + i, gstate.lcolor[i * 3 + 1]); VSSetColorUniform3(CONST_VS_LIGHTSPECULAR + i, gstate.lcolor[i * 3 + 2]); } } }
void SoftwareTransform( int prim, int vertexCount, u32 vertType, u16 *&inds, int indexType, const DecVtxFormat &decVtxFormat, int &maxIndex, TransformedVertex *&drawBuffer, int &numTrans, bool &drawIndexed, const SoftwareTransformParams *params, SoftwareTransformResult *result) { u8 *decoded = params->decoded; FramebufferManagerCommon *fbman = params->fbman; TextureCacheCommon *texCache = params->texCache; TransformedVertex *transformed = params->transformed; TransformedVertex *transformedExpanded = params->transformedExpanded; float ySign = 1.0f; bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0; bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled(); float uscale = 1.0f; float vscale = 1.0f; if (throughmode) { uscale /= gstate_c.curTextureWidth; vscale /= gstate_c.curTextureHeight; } bool skinningEnabled = vertTypeIsSkinningEnabled(vertType); const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); float widthFactor = (float) w / (float) gstate_c.curTextureWidth; float heightFactor = (float) h / (float) gstate_c.curTextureHeight; Lighter lighter(vertType); float fog_end = getFloat24(gstate.fog1); float fog_slope = getFloat24(gstate.fog2); // Same fixup as in ShaderManagerGLES.cpp if (my_isnanorinf(fog_end)) { // Not really sure what a sensible value might be, but let's try 64k. fog_end = std::signbit(fog_end) ? -65535.0f : 65535.0f; } if (my_isnanorinf(fog_slope)) { fog_slope = std::signbit(fog_slope) ? -65535.0f : 65535.0f; } int provokeIndOffset = 0; if (params->provokeFlatFirst) { provokeIndOffset = ColorIndexOffset(prim, gstate.getShadeMode(), gstate.isModeClear()); } VertexReader reader(decoded, decVtxFormat, vertType); if (throughmode) { for (int index = 0; index < maxIndex; index++) { // Do not touch the coordinates or the colors. No lighting. reader.Goto(index); // TODO: Write to a flexible buffer, we don't always need all four components. TransformedVertex &vert = transformed[index]; reader.ReadPos(vert.pos); if (reader.hasColor0()) { if (provokeIndOffset != 0 && index + provokeIndOffset < maxIndex) { reader.Goto(index + provokeIndOffset); reader.ReadColor0_8888(vert.color0); reader.Goto(index); } else { reader.ReadColor0_8888(vert.color0); } } else { vert.color0_32 = gstate.getMaterialAmbientRGBA(); } if (reader.hasUV()) { reader.ReadUV(vert.uv); vert.u *= uscale; vert.v *= vscale; } else { vert.u = 0.0f; vert.v = 0.0f; } // Ignore color1 and fog, never used in throughmode anyway. // The w of uv is also never used (hardcoded to 1.0.) } } else { // Okay, need to actually perform the full transform. for (int index = 0; index < maxIndex; index++) { reader.Goto(index); float v[3] = {0, 0, 0}; Vec4f c0 = Vec4f(1, 1, 1, 1); Vec4f c1 = Vec4f(0, 0, 0, 0); float uv[3] = {0, 0, 1}; float fogCoef = 1.0f; float out[3]; float pos[3]; Vec3f normal(0, 0, 1); Vec3f worldnormal(0, 0, 1); reader.ReadPos(pos); float ruv[2] = { 0.0f, 0.0f }; if (reader.hasUV()) reader.ReadUV(ruv); // Read all the provoking vertex values here. Vec4f unlitColor; if (provokeIndOffset != 0 && index + provokeIndOffset < maxIndex) reader.Goto(index + provokeIndOffset); if (reader.hasColor0()) reader.ReadColor0(unlitColor.AsArray()); else unlitColor = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA()); if (reader.hasNormal()) reader.ReadNrm(normal.AsArray()); if (!skinningEnabled) { Vec3ByMatrix43(out, pos, gstate.worldMatrix); if (reader.hasNormal()) { if (gstate.areNormalsReversed()) { normal = -normal; } Norm3ByMatrix43(worldnormal.AsArray(), normal.AsArray(), gstate.worldMatrix); worldnormal = worldnormal.Normalized(); } } else { float weights[8]; // TODO: For flat, are weights from the provoking used for color/normal? reader.Goto(index); reader.ReadWeights(weights); // Skinning Vec3f psum(0, 0, 0); Vec3f nsum(0, 0, 0); for (int i = 0; i < vertTypeGetNumBoneWeights(vertType); i++) { if (weights[i] != 0.0f) { Vec3ByMatrix43(out, pos, gstate.boneMatrix+i*12); Vec3f tpos(out); psum += tpos * weights[i]; if (reader.hasNormal()) { Vec3f norm; Norm3ByMatrix43(norm.AsArray(), normal.AsArray(), gstate.boneMatrix+i*12); nsum += norm * weights[i]; } } } // Yes, we really must multiply by the world matrix too. Vec3ByMatrix43(out, psum.AsArray(), gstate.worldMatrix); if (reader.hasNormal()) { normal = nsum; if (gstate.areNormalsReversed()) { normal = -normal; } Norm3ByMatrix43(worldnormal.AsArray(), normal.AsArray(), gstate.worldMatrix); worldnormal = worldnormal.Normalized(); } } // Perform lighting here if enabled. if (gstate.isLightingEnabled()) { float litColor0[4]; float litColor1[4]; lighter.Light(litColor0, litColor1, unlitColor.AsArray(), out, worldnormal); // Don't ignore gstate.lmode - we should send two colors in that case for (int j = 0; j < 4; j++) { c0[j] = litColor0[j]; } if (lmode) { // Separate colors for (int j = 0; j < 4; j++) { c1[j] = litColor1[j]; } } else { // Summed color into c0 (will clamp in ToRGBA().) for (int j = 0; j < 4; j++) { c0[j] += litColor1[j]; } } } else { if (reader.hasColor0()) { for (int j = 0; j < 4; j++) { c0[j] = unlitColor[j]; } } else { c0 = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA()); } if (lmode) { // c1 is already 0. } } // Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights. switch (gstate.getUVGenMode()) { case GE_TEXMAP_TEXTURE_COORDS: // UV mapping case GE_TEXMAP_UNKNOWN: // Seen in Riviera. Unsure of meaning, but this works. // We always prescale in the vertex decoder now. uv[0] = ruv[0]; uv[1] = ruv[1]; uv[2] = 1.0f; break; case GE_TEXMAP_TEXTURE_MATRIX: { // TODO: What's the correct behavior with flat shading? Provoked normal or real normal? // Projection mapping Vec3f source; switch (gstate.getUVProjMode()) { case GE_PROJMAP_POSITION: // Use model space XYZ as source source = pos; break; case GE_PROJMAP_UV: // Use unscaled UV as source source = Vec3f(ruv[0], ruv[1], 0.0f); break; case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized normal as source source = normal.Normalized(); if (!reader.hasNormal()) { ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?"); } break; case GE_PROJMAP_NORMAL: // Use non-normalized normal as source! source = normal; if (!reader.hasNormal()) { ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?"); } break; } float uvw[3]; Vec3ByMatrix43(uvw, &source.x, gstate.tgenMatrix); uv[0] = uvw[0]; uv[1] = uvw[1]; uv[2] = uvw[2]; } break; case GE_TEXMAP_ENVIRONMENT_MAP: // Shade mapping - use two light sources to generate U and V. { auto getLPosFloat = [&](int l, int i) { return getFloat24(gstate.lpos[l * 3 + i]); }; auto getLPos = [&](int l) { return Vec3f(getLPosFloat(l, 0), getLPosFloat(l, 1), getLPosFloat(l, 2)); }; auto calcShadingLPos = [&](int l) { Vec3f pos = getLPos(l); if (pos.Length2() == 0.0f) { return Vec3f(0.0f, 0.0f, 1.0f); } else { return pos.Normalized(); } }; // Might not have lighting enabled, so don't use lighter. Vec3f lightpos0 = calcShadingLPos(gstate.getUVLS0()); Vec3f lightpos1 = calcShadingLPos(gstate.getUVLS1()); uv[0] = (1.0f + Dot(lightpos0, worldnormal))/2.0f; uv[1] = (1.0f + Dot(lightpos1, worldnormal))/2.0f; uv[2] = 1.0f; } break; default: // Illegal ERROR_LOG_REPORT(G3D, "Impossible UV gen mode? %d", gstate.getUVGenMode()); break; } uv[0] = uv[0] * widthFactor; uv[1] = uv[1] * heightFactor; // Transform the coord by the view matrix. Vec3ByMatrix43(v, out, gstate.viewMatrix); fogCoef = (v[2] + fog_end) * fog_slope; // TODO: Write to a flexible buffer, we don't always need all four components. memcpy(&transformed[index].x, v, 3 * sizeof(float)); transformed[index].fog = fogCoef; memcpy(&transformed[index].u, uv, 3 * sizeof(float)); transformed[index].color0_32 = c0.ToRGBA(); transformed[index].color1_32 = c1.ToRGBA(); // The multiplication by the projection matrix is still performed in the vertex shader. // So is vertex depth rounding, to simulate the 16-bit depth buffer. } } // Here's the best opportunity to try to detect rectangles used to clear the screen, and // replace them with real clears. This can provide a speedup on certain mobile chips. // // An alternative option is to simply ditch all the verts except the first and last to create a single // rectangle out of many. Quite a small optimization though. // Experiment: Disable on PowerVR (see issue #6290) // TODO: This bleeds outside the play area in non-buffered mode. Big deal? Probably not. // TODO: Allow creating a depth clear and a color draw. bool reallyAClear = false; if (maxIndex > 1 && prim == GE_PRIM_RECTANGLES && gstate.isModeClear() && params->allowClear) { int scissorX2 = gstate.getScissorX2() + 1; int scissorY2 = gstate.getScissorY2() + 1; reallyAClear = IsReallyAClear(transformed, maxIndex, scissorX2, scissorY2); } if (reallyAClear && gl_extensions.gpuVendor != GPU_VENDOR_IMGTEC) { // If alpha is not allowed to be separate, it must match for both depth/stencil and color. Vulkan requires this. bool alphaMatchesColor = gstate.isClearModeColorMask() == gstate.isClearModeAlphaMask(); bool depthMatchesStencil = gstate.isClearModeAlphaMask() == gstate.isClearModeDepthMask(); bool matchingComponents = params->allowSeparateAlphaClear || (alphaMatchesColor && depthMatchesStencil); bool stencilNotMasked = !gstate.isClearModeAlphaMask() || gstate.getStencilWriteMask() == 0x00; if (matchingComponents && stencilNotMasked) { result->color = transformed[1].color0_32; // Need to rescale from a [0, 1] float. This is the final transformed value. result->depth = ToScaledDepthFromIntegerScale((int)(transformed[1].z * 65535.0f)); result->action = SW_CLEAR; gpuStats.numClears++; return; } } // This means we're using a framebuffer (and one that isn't big enough.) if (gstate_c.curTextureHeight < (u32)h && maxIndex >= 2) { // Even if not rectangles, this will detect if either of the first two are outside the framebuffer. // HACK: Adding one pixel margin to this detection fixes issues in Assassin's Creed : Bloodlines, // while still keeping BOF working (see below). const float invTexH = 1.0f / gstate_c.curTextureHeight; // size of one texel. bool tlOutside; bool tlAlmostOutside; bool brOutside; // If we're outside heightFactor, then v must be wrapping or clamping. Avoid this workaround. // If we're <= 1.0f, we're inside the framebuffer (workaround not needed.) // We buffer that 1.0f a little more with a texel to avoid some false positives. tlOutside = transformed[0].v <= heightFactor && transformed[0].v > 1.0f + invTexH; brOutside = transformed[1].v <= heightFactor && transformed[1].v > 1.0f + invTexH; // Careful: if br is outside, but tl is well inside, this workaround still doesn't make sense. // We go with halfway, since we overestimate framebuffer heights sometimes but not by much. tlAlmostOutside = transformed[0].v <= heightFactor && transformed[0].v >= 0.5f; if (tlOutside || (brOutside && tlAlmostOutside)) { // Okay, so we're texturing from outside the framebuffer, but inside the texture height. // Breath of Fire 3 does this to access a render surface at an offset. const u32 bpp = fbman->GetTargetFormat() == GE_FORMAT_8888 ? 4 : 2; const u32 prevH = texCache->AttachedDrawingHeight(); const u32 fb_size = bpp * fbman->GetTargetStride() * prevH; const u32 prevYOffset = gstate_c.curTextureYOffset; if (texCache->SetOffsetTexture(fb_size)) { const float oldWidthFactor = widthFactor; const float oldHeightFactor = heightFactor; widthFactor = (float) w / (float) gstate_c.curTextureWidth; heightFactor = (float) h / (float) gstate_c.curTextureHeight; // We've already baked in the old gstate_c.curTextureYOffset, so correct. const float yDiff = (float) (prevH + prevYOffset - gstate_c.curTextureYOffset) / (float) h; for (int index = 0; index < maxIndex; ++index) { transformed[index].u *= widthFactor / oldWidthFactor; // Inverse it back to scale to the new FBO, and add 1.0f to account for old FBO. transformed[index].v = (transformed[index].v / oldHeightFactor - yDiff) * heightFactor; } } } } // Step 2: expand rectangles. drawBuffer = transformed; numTrans = 0; drawIndexed = false; if (prim != GE_PRIM_RECTANGLES) { // We can simply draw the unexpanded buffer. numTrans = vertexCount; drawIndexed = true; } else { bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; if (useBufferedRendering) ySign = -ySign; float flippedMatrix[16]; if (!throughmode) { memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = useBufferedRendering ? (gstate_c.vpHeight < 0) : (gstate_c.vpHeight > 0); if (invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } } //rectangles always need 2 vertices, disregard the last one if there's an odd number vertexCount = vertexCount & ~1; numTrans = 0; drawBuffer = transformedExpanded; TransformedVertex *trans = &transformedExpanded[0]; const u16 *indsIn = (const u16 *)inds; u16 *newInds = inds + vertexCount; u16 *indsOut = newInds; maxIndex = 4 * (vertexCount / 2); for (int i = 0; i < vertexCount; i += 2) { const TransformedVertex &transVtxTL = transformed[indsIn[i + 0]]; const TransformedVertex &transVtxBR = transformed[indsIn[i + 1]]; // We have to turn the rectangle into two triangles, so 6 points. // This is 4 verts + 6 indices. // bottom right trans[0] = transVtxBR; // top right trans[1] = transVtxBR; trans[1].y = transVtxTL.y; trans[1].v = transVtxTL.v; // top left trans[2] = transVtxBR; trans[2].x = transVtxTL.x; trans[2].y = transVtxTL.y; trans[2].u = transVtxTL.u; trans[2].v = transVtxTL.v; // bottom left trans[3] = transVtxBR; trans[3].x = transVtxTL.x; trans[3].u = transVtxTL.u; // That's the four corners. Now process UV rotation. if (throughmode) RotateUVThrough(trans); else RotateUV(trans, flippedMatrix, ySign); // Triangle: BR-TR-TL indsOut[0] = i * 2 + 0; indsOut[1] = i * 2 + 1; indsOut[2] = i * 2 + 2; // Triangle: BL-BR-TL indsOut[3] = i * 2 + 3; indsOut[4] = i * 2 + 0; indsOut[5] = i * 2 + 2; trans += 4; indsOut += 6; numTrans += 6; } inds = newInds; drawIndexed = true; // We don't know the color until here, so we have to do it now, instead of in StateMapping. // Might want to reconsider the order of things later... if (gstate.isModeClear() && gstate.isClearModeAlphaMask()) { result->setStencil = true; if (vertexCount > 1) { // Take the bottom right alpha value of the first rect as the stencil value. // Technically, each rect could individually fill its stencil, but most of the // time they use the same one. result->stencilValue = transformed[indsIn[1]].color0[3]; } else { result->stencilValue = 0; } } } if (gstate.isModeClear()) { gpuStats.numClears++; } result->action = SW_DRAW_PRIMITIVES; }