void ShaderManagerVulkan::BoneUpdateUniforms(int dirtyUniforms) { for (int i = 0; i < 8; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { ConvertMatrix4x3To4x4(ub_bones.bones[i], gstate.boneMatrix + 12 * i); } } }
// TODO: This probably is not the best interface. // Also, we should try to merge this into the similar function in DrawEngineCommon. bool TransformUnit::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) { // This is always for the current vertices. u16 indexLowerBound = 0; u16 indexUpperBound = count - 1; if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { const u8 *inds = Memory::GetPointer(gstate_c.indexAddr); const u16 *inds16 = (const u16 *)inds; const u32 *inds32 = (const u32 *)inds; if (inds) { GetIndexBounds(inds, count, gstate.vertType, &indexLowerBound, &indexUpperBound); indices.resize(count); switch (gstate.vertType & GE_VTYPE_IDX_MASK) { case GE_VTYPE_IDX_8BIT: for (int i = 0; i < count; ++i) { indices[i] = inds[i]; } break; case GE_VTYPE_IDX_16BIT: for (int i = 0; i < count; ++i) { indices[i] = inds16[i]; } break; case GE_VTYPE_IDX_32BIT: WARN_LOG_REPORT_ONCE(simpleIndexes32, G3D, "SimpleVertices: Decoding 32-bit indexes"); for (int i = 0; i < count; ++i) { // These aren't documented and should be rare. Let's bounds check each one. if (inds32[i] != (u16)inds32[i]) { ERROR_LOG_REPORT_ONCE(simpleIndexes32Bounds, G3D, "SimpleVertices: Index outside 16-bit range"); } indices[i] = (u16)inds32[i]; } break; } } else { indices.clear(); } } else { indices.clear(); } static std::vector<u32> temp_buffer; static std::vector<SimpleVertex> simpleVertices; temp_buffer.resize(65536 * 24 / sizeof(u32)); simpleVertices.resize(indexUpperBound + 1); VertexDecoder vdecoder; VertexDecoderOptions options{}; vdecoder.SetVertexType(gstate.vertType, options); DrawEngineCommon::NormalizeVertices((u8 *)(&simpleVertices[0]), (u8 *)(&temp_buffer[0]), Memory::GetPointer(gstate_c.vertexAddr), &vdecoder, indexLowerBound, indexUpperBound, gstate.vertType); float world[16]; float view[16]; float worldview[16]; float worldviewproj[16]; ConvertMatrix4x3To4x4(world, gstate.worldMatrix); ConvertMatrix4x3To4x4(view, gstate.viewMatrix); Matrix4ByMatrix4(worldview, world, view); Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix); vertices.resize(indexUpperBound + 1); for (int i = indexLowerBound; i <= indexUpperBound; ++i) { const SimpleVertex &vert = simpleVertices[i]; if (gstate.isModeThrough()) { if (gstate.vertType & GE_VTYPE_TC_MASK) { vertices[i].u = vert.uv[0]; vertices[i].v = vert.uv[1]; } else { vertices[i].u = 0.0f; vertices[i].v = 0.0f; } vertices[i].x = vert.pos.x; vertices[i].y = vert.pos.y; vertices[i].z = vert.pos.z; if (gstate.vertType & GE_VTYPE_COL_MASK) { memcpy(vertices[i].c, vert.color, sizeof(vertices[i].c)); } else { memset(vertices[i].c, 0, sizeof(vertices[i].c)); } } else { float clipPos[4]; Vec3ByMatrix44(clipPos, vert.pos.AsArray(), worldviewproj); ScreenCoords screenPos = ClipToScreen(clipPos); DrawingCoords drawPos = ScreenToDrawing(screenPos); if (gstate.vertType & GE_VTYPE_TC_MASK) { vertices[i].u = vert.uv[0] * (float)gstate.getTextureWidth(0); vertices[i].v = vert.uv[1] * (float)gstate.getTextureHeight(0); } else { vertices[i].u = 0.0f; vertices[i].v = 0.0f; } vertices[i].x = drawPos.x; vertices[i].y = drawPos.y; vertices[i].z = drawPos.z; if (gstate.vertType & GE_VTYPE_COL_MASK) { memcpy(vertices[i].c, vert.color, sizeof(vertices[i].c)); } else { memset(vertices[i].c, 0, sizeof(vertices[i].c)); } } } // The GE debugger expects these to be set. gstate_c.curTextureWidth = gstate.getTextureWidth(0); gstate_c.curTextureHeight = gstate.getTextureHeight(0); return true; }
// This code is HIGHLY unoptimized! // // It does the simplest and safest test possible: If all points of a bbox is outside a single of // our clipping planes, we reject the box. Tighter bounds would be desirable but would take more calculations. bool DrawEngineCommon::TestBoundingBox(void* control_points, int vertexCount, u32 vertType) { SimpleVertex *corners = (SimpleVertex *)(decoded + 65536 * 12); float *verts = (float *)(decoded + 65536 * 18); // Try to skip NormalizeVertices if it's pure positions. No need to bother with a vertex decoder // and a large vertex format. if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_FLOAT) { // memcpy(verts, control_points, 12 * vertexCount); verts = (float *)control_points; } else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_8BIT) { const s8 *vtx = (const s8 *)control_points; for (int i = 0; i < vertexCount * 3; i++) { verts[i] = vtx[i] * (1.0f / 128.0f); } } else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_16BIT) { const s16 *vtx = (const s16*)control_points; for (int i = 0; i < vertexCount * 3; i++) { verts[i] = vtx[i] * (1.0f / 32768.0f); } } else { // Simplify away bones and morph before proceeding u8 *temp_buffer = decoded + 65536 * 24; NormalizeVertices((u8 *)corners, temp_buffer, (u8 *)control_points, 0, vertexCount, vertType); // Special case for float positions only. const float *ctrl = (const float *)control_points; for (int i = 0; i < vertexCount; i++) { verts[i * 3] = corners[i].pos.x; verts[i * 3 + 1] = corners[i].pos.y; verts[i * 3 + 2] = corners[i].pos.z; } } Plane planes[6]; float world[16]; float view[16]; float worldview[16]; float worldviewproj[16]; ConvertMatrix4x3To4x4(world, gstate.worldMatrix); ConvertMatrix4x3To4x4(view, gstate.viewMatrix); Matrix4ByMatrix4(worldview, world, view); Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix); PlanesFromMatrix(worldviewproj, planes); for (int plane = 0; plane < 6; plane++) { int inside = 0; int out = 0; for (int i = 0; i < vertexCount; i++) { // Here we can test against the frustum planes! float value = planes[plane].Test(verts + i * 3); if (value < 0) out++; else inside++; } if (inside == 0) { // All out return false; } // Any out. For testing that the planes are in the right locations. // if (out != 0) return false; } return true; }
void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) { if (dirtyUniforms & DIRTY_TEXENV) { Uint8x3ToFloat4(ub_base.texEnvColor, gstate.texenvcolor); } if (dirtyUniforms & DIRTY_ALPHACOLORREF) { Uint8x3ToInt4_Alpha(ub_base.alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_ALPHACOLORMASK) { Uint8x3ToInt4_Alpha(ub_base.colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_FOGCOLOR) { Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor); } if (dirtyUniforms & DIRTY_SHADERBLEND) { Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA()); Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB()); } if (dirtyUniforms & DIRTY_TEXCLAMP) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; // First wrap xy, then half texel xy (for clamp.) ub_base.texClamp[0] = widthFactor; ub_base.texClamp[1] = heightFactor; ub_base.texClamp[2] = invW * 0.5f; ub_base.texClamp[3] = invH * 0.5f; ub_base.texClampOffset[0] = gstate_c.curTextureXOffset * invW; ub_base.texClampOffset[1] = gstate_c.curTextureYOffset * invH; } if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = gstate_c.vpHeight < 0; if (invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY); CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr()); } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1); CopyMatrix4x4(ub_base.proj_through, proj_through.getReadPtr()); } // Transform if (dirtyUniforms & DIRTY_WORLDMATRIX) { ConvertMatrix4x3To4x4(ub_base.world, gstate.worldMatrix); } if (dirtyUniforms & DIRTY_VIEWMATRIX) { ConvertMatrix4x3To4x4(ub_base.view, gstate.viewMatrix); } if (dirtyUniforms & DIRTY_TEXMATRIX) { ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix); } // Combined two small uniforms if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) { float fogcoef_stencil[3] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), (float)gstate.getStencilTestRef() }; if (my_isinf(fogcoef_stencil[1])) { // not really sure what a sensible value might be. fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f; } else if (my_isnan(fogcoef_stencil[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fogcoef_stencil[0] = 100000.0f; fogcoef_stencil[1] = 1.0f; } #ifndef MOBILE_DEVICE else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) { ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]); } #endif CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil); } // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; ub_base.uvScaleOffset[0] = widthFactor; ub_base.uvScaleOffset[1] = heightFactor; ub_base.uvScaleOffset[2] = 0.0f; ub_base.uvScaleOffset[3] = 0.0f; } if (dirtyUniforms & DIRTY_DEPTHRANGE) { float viewZScale = gstate.getViewportZScale(); float viewZCenter = gstate.getViewportZCenter(); float viewZInvScale; // We had to scale and translate Z to account for our clamped Z range. // Therefore, we also need to reverse this to round properly. // // Example: scale = 65535.0, center = 0.0 // Resulting range = -65535 to 65535, clamped to [0, 65535] // gstate_c.vpDepthScale = 2.0f // gstate_c.vpZOffset = -1.0f // // The projection already accounts for those, so we need to reverse them. // // Additionally, D3D9 uses a range from [0, 1]. We double and move the center. viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f; viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { viewZInvScale = 0.0; } ub_base.depthRange[0] = viewZScale; ub_base.depthRange[1] = viewZCenter; ub_base.depthRange[2] = viewZCenter; ub_base.depthRange[3] = viewZInvScale; } }
void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) { if (dirtyUniforms & DIRTY_TEXENV) { Uint8x3ToFloat4(ub_base.texEnvColor, gstate.texenvcolor); } if (dirtyUniforms & DIRTY_ALPHACOLORREF) { Uint8x3ToInt4_Alpha(ub_base.alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_ALPHACOLORMASK) { Uint8x3ToInt4_Alpha(ub_base.colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_FOGCOLOR) { Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor); } if (dirtyUniforms & DIRTY_SHADERBLEND) { Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA()); Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB()); } if (dirtyUniforms & DIRTY_TEXCLAMP) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; // First wrap xy, then half texel xy (for clamp.) const float texclamp[4] = { widthFactor, heightFactor, invW * 0.5f, invH * 0.5f, }; const float texclampoff[2] = { gstate_c.curTextureXOffset * invW, gstate_c.curTextureYOffset * invH, }; CopyFloat4(ub_base.texClamp, texclamp); CopyFloat2(ub_base.texClampOffset, texclampoff); } if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = gstate_c.vpHeight < 0; if (invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY); CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr()); } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1); CopyMatrix4x4(ub_base.proj_through, proj_through.getReadPtr()); } // Transform if (dirtyUniforms & DIRTY_WORLDMATRIX) { ConvertMatrix4x3To4x4(ub_base.world, gstate.worldMatrix); } if (dirtyUniforms & DIRTY_VIEWMATRIX) { ConvertMatrix4x3To4x4(ub_base.view, gstate.viewMatrix); } if (dirtyUniforms & DIRTY_TEXMATRIX) { ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix); } // Combined two small uniforms if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) { float fogcoef_stencil[3] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), (float)gstate.getStencilTestRef() }; if (my_isinf(fogcoef_stencil[1])) { // not really sure what a sensible value might be. fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f; } else if (my_isnan(fogcoef_stencil[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fogcoef_stencil[0] = 100000.0f; fogcoef_stencil[1] = 1.0f; } #ifndef MOBILE_DEVICE else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) { ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]); } #endif CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil); } // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; static const float rescale[4] = { 1.0f, 2 * 127.5f / 128.f, 2 * 32767.5f / 32768.f, 1.0f }; const float factor = rescale[(gstate.vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT]; float uvscaleoff[4]; switch (gstate.getUVGenMode()) { case GE_TEXMAP_TEXTURE_COORDS: // Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera. Treating the same as GE_TEXMAP_TEXTURE_COORDS works. case GE_TEXMAP_UNKNOWN: if (g_Config.bPrescaleUV) { // We are here but are prescaling UV in the decoder? Let's do the same as in the other case // except consider *Scale and *Off to be 1 and 0. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; } else { uvscaleoff[0] = gstate_c.uv.uScale * factor * widthFactor; uvscaleoff[1] = gstate_c.uv.vScale * factor * heightFactor; uvscaleoff[2] = gstate_c.uv.uOff * widthFactor; uvscaleoff[3] = gstate_c.uv.vOff * heightFactor; } break; // These two work the same whether or not we prescale UV. case GE_TEXMAP_TEXTURE_MATRIX: // We cannot bake the UV coord scale factor in here, as we apply a matrix multiplication // before this is applied, and the matrix multiplication may contain translation. In this case // the translation will be scaled which breaks faces in Hexyz Force for example. // So I've gone back to applying the scale factor in the shader. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; case GE_TEXMAP_ENVIRONMENT_MAP: // In this mode we only use uvscaleoff to scale to the texture size. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; default: ERROR_LOG_REPORT(G3D, "Unexpected UV gen mode: %d", gstate.getUVGenMode()); } CopyFloat4(ub_base.uvScaleOffset, uvscaleoff); } if (dirtyUniforms & DIRTY_DEPTHRANGE) { float viewZScale = gstate.getViewportZScale(); float viewZCenter = gstate.getViewportZCenter(); float viewZInvScale; // We had to scale and translate Z to account for our clamped Z range. // Therefore, we also need to reverse this to round properly. // // Example: scale = 65535.0, center = 0.0 // Resulting range = -65535 to 65535, clamped to [0, 65535] // gstate_c.vpDepthScale = 2.0f // gstate_c.vpZOffset = -1.0f // // The projection already accounts for those, so we need to reverse them. // // Additionally, D3D9 uses a range from [0, 1]. We double and move the center. viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f; viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { viewZInvScale = 0.0; } float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; CopyFloat4(ub_base.depthRange, data); } }
void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) { // Update any dirty uniforms before we draw if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); const bool invertedY = gstate_c.vpHeight < 0; if (!invertedY) { flippedMatrix[1] = -flippedMatrix[1]; flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[9] = -flippedMatrix[9]; flippedMatrix[13] = -flippedMatrix[13]; } const bool invertedX = gstate_c.vpWidth < 0; if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[4] = -flippedMatrix[4]; flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } ConvertProjMatrixToD3D(flippedMatrix, invertedX, invertedY); VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr()); } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1); ConvertProjMatrixToD3DThrough(proj_through); VSSetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr()); } // Transform if (dirtyUniforms & DIRTY_WORLDMATRIX) { VSSetMatrix4x3_3(CONST_VS_WORLD, gstate.worldMatrix); } if (dirtyUniforms & DIRTY_VIEWMATRIX) { VSSetMatrix4x3_3(CONST_VS_VIEW, gstate.viewMatrix); } if (dirtyUniforms & DIRTY_TEXMATRIX) { VSSetMatrix4x3_3(CONST_VS_TEXMTX, gstate.tgenMatrix); } if (dirtyUniforms & DIRTY_FOGCOEF) { float fogcoef[2] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), }; if (my_isinf(fogcoef[1])) { // not really sure what a sensible value might be. fogcoef[1] = fogcoef[1] < 0.0f ? -10000.0f : 10000.0f; } else if (my_isnan(fogcoef[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs // so it's best to just make it a sane calculation. fogcoef[0] = 100000.0f; fogcoef[1] = 1.0f; } #ifndef MOBILE_DEVICE else if (my_isnanorinf(fogcoef[1]) || my_isnanorinf(fogcoef[0])) { ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef[0], fogcoef[1]); } #endif VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2); } // TODO: Could even set all bones in one go if they're all dirty. #ifdef USE_BONE_ARRAY if (u_bone != 0) { float allBones[8 * 16]; bool allDirty = true; for (int i = 0; i < numBones; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { ConvertMatrix4x3To4x4(allBones + 16 * i, gstate.boneMatrix + 12 * i); } else { allDirty = false; } } if (allDirty) { // Set them all with one call //glUniformMatrix4fv(u_bone, numBones, GL_FALSE, allBones); } else { // Set them one by one. Could try to coalesce two in a row etc but too lazy. for (int i = 0; i < numBones; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { //glUniformMatrix4fv(u_bone + i, 1, GL_FALSE, allBones + 16 * i); } } } } #else for (int i = 0; i < 8; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { VSSetMatrix4x3_3(CONST_VS_BONE0 + 3 * i, gstate.boneMatrix + 12 * i); } } #endif // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; float uvscaleoff[4]; switch (gstate.getUVGenMode()) { case GE_TEXMAP_TEXTURE_COORDS: // Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera. Treating the same as GE_TEXMAP_TEXTURE_COORDS works. case GE_TEXMAP_UNKNOWN: if (g_Config.bPrescaleUV) { // We are here but are prescaling UV in the decoder? Let's do the same as in the other case // except consider *Scale and *Off to be 1 and 0. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; } else { uvscaleoff[0] = gstate_c.uv.uScale * widthFactor; uvscaleoff[1] = gstate_c.uv.vScale * heightFactor; uvscaleoff[2] = gstate_c.uv.uOff * widthFactor; uvscaleoff[3] = gstate_c.uv.vOff * heightFactor; } break; // These two work the same whether or not we prescale UV. case GE_TEXMAP_TEXTURE_MATRIX: // We cannot bake the UV coord scale factor in here, as we apply a matrix multiplication // before this is applied, and the matrix multiplication may contain translation. In this case // the translation will be scaled which breaks faces in Hexyz Force for example. // So I've gone back to applying the scale factor in the shader. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; case GE_TEXMAP_ENVIRONMENT_MAP: // In this mode we only use uvscaleoff to scale to the texture size. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; break; default: ERROR_LOG_REPORT(G3D, "Unexpected UV gen mode: %d", gstate.getUVGenMode()); } VSSetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4); } if (dirtyUniforms & DIRTY_DEPTHRANGE) { // Depth is [0, 1] mapping to [minz, maxz], not too hard. float vpZScale = gstate.getViewportZScale(); float vpZCenter = gstate.getViewportZCenter(); // These are just the reverse of the formulas in GPUStateUtils. float halfActualZRange = vpZScale / gstate_c.vpDepthScale; float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange; float viewZScale = halfActualZRange * 2.0f; // Account for the half pixel offset. float viewZCenter = minz + (DepthSliceFactor() / 256.0f) * 0.5f; float viewZInvScale; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { viewZInvScale = 0.0; } float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; VSSetFloatUniform4(CONST_VS_DEPTHRANGE, data); } // Lighting if (dirtyUniforms & DIRTY_AMBIENT) { VSSetColorUniform3Alpha(CONST_VS_AMBIENT, gstate.ambientcolor, gstate.getAmbientA()); } if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) { VSSetColorUniform3Alpha(CONST_VS_MATAMBIENTALPHA, gstate.materialambient, gstate.getMaterialAmbientA()); } if (dirtyUniforms & DIRTY_MATDIFFUSE) { VSSetColorUniform3(CONST_VS_MATDIFFUSE, gstate.materialdiffuse); } if (dirtyUniforms & DIRTY_MATEMISSIVE) { VSSetColorUniform3(CONST_VS_MATEMISSIVE, gstate.materialemissive); } if (dirtyUniforms & DIRTY_MATSPECULAR) { VSSetColorUniform3ExtraFloat(CONST_VS_MATSPECULAR, gstate.materialspecular, getFloat24(gstate.materialspecularcoef)); } for (int i = 0; i < 4; i++) { if (dirtyUniforms & (DIRTY_LIGHT0 << i)) { if (gstate.isDirectionalLight(i)) { // Prenormalize float x = getFloat24(gstate.lpos[i * 3 + 0]); float y = getFloat24(gstate.lpos[i * 3 + 1]); float z = getFloat24(gstate.lpos[i * 3 + 2]); float len = sqrtf(x*x + y*y + z*z); if (len == 0.0f) len = 1.0f; else len = 1.0f / len; float vec[3] = { x * len, y * len, z * len }; VSSetFloatArray(CONST_VS_LIGHTPOS + i, vec, 3); } else { VSSetFloat24Uniform3(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]); } VSSetFloat24Uniform3(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]); VSSetFloat24Uniform3(CONST_VS_LIGHTATT + i, &gstate.latt[i * 3]); VSSetFloat(CONST_VS_LIGHTANGLE + i, getFloat24(gstate.lcutoff[i])); VSSetFloat(CONST_VS_LIGHTSPOTCOEF + i, getFloat24(gstate.lconv[i])); VSSetColorUniform3(CONST_VS_LIGHTAMBIENT + i, gstate.lcolor[i * 3]); VSSetColorUniform3(CONST_VS_LIGHTDIFFUSE + i, gstate.lcolor[i * 3 + 1]); VSSetColorUniform3(CONST_VS_LIGHTSPECULAR + i, gstate.lcolor[i * 3 + 2]); } } }
// TODO: This probably is not the best interface. bool TransformUnit::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) { // This is always for the current vertices. u16 indexLowerBound = 0; u16 indexUpperBound = count - 1; if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { const u8 *inds = Memory::GetPointer(gstate_c.indexAddr); const u16 *inds16 = (const u16 *)inds; if (inds) { GetIndexBounds(inds, count, gstate.vertType, &indexLowerBound, &indexUpperBound); indices.resize(count); switch (gstate.vertType & GE_VTYPE_IDX_MASK) { case GE_VTYPE_IDX_16BIT: for (int i = 0; i < count; ++i) { indices[i] = inds16[i]; } break; case GE_VTYPE_IDX_8BIT: for (int i = 0; i < count; ++i) { indices[i] = inds[i]; } break; default: return false; } } else { indices.clear(); } } else { indices.clear(); } static std::vector<u32> temp_buffer; static std::vector<SimpleVertex> simpleVertices; temp_buffer.resize(65536 * 24 / sizeof(u32)); simpleVertices.resize(indexUpperBound + 1); VertexDecoder vdecoder; VertexDecoderOptions options; memset(&options, 0, sizeof(options)); options.expandAllUVtoFloat = false; // TODO: True should be fine here vdecoder.SetVertexType(gstate.vertType, options); DrawEngineCommon::NormalizeVertices((u8 *)(&simpleVertices[0]), (u8 *)(&temp_buffer[0]), Memory::GetPointer(gstate_c.vertexAddr), &vdecoder, indexLowerBound, indexUpperBound, gstate.vertType); float world[16]; float view[16]; float worldview[16]; float worldviewproj[16]; ConvertMatrix4x3To4x4(world, gstate.worldMatrix); ConvertMatrix4x3To4x4(view, gstate.viewMatrix); Matrix4ByMatrix4(worldview, world, view); Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix); vertices.resize(indexUpperBound + 1); for (int i = indexLowerBound; i <= indexUpperBound; ++i) { const SimpleVertex &vert = simpleVertices[i]; if (gstate.isModeThrough()) { if (gstate.vertType & GE_VTYPE_TC_MASK) { vertices[i].u = vert.uv[0]; vertices[i].v = vert.uv[1]; } else { vertices[i].u = 0.0f; vertices[i].v = 0.0f; } vertices[i].x = vert.pos.x; vertices[i].y = vert.pos.y; vertices[i].z = vert.pos.z; if (gstate.vertType & GE_VTYPE_COL_MASK) { memcpy(vertices[i].c, vert.color, sizeof(vertices[i].c)); } else { memset(vertices[i].c, 0, sizeof(vertices[i].c)); } } else { float clipPos[4]; Vec3ByMatrix44(clipPos, vert.pos.AsArray(), worldviewproj); ScreenCoords screenPos = ClipToScreen(clipPos); DrawingCoords drawPos = ScreenToDrawing(screenPos); if (gstate.vertType & GE_VTYPE_TC_MASK) { vertices[i].u = vert.uv[0]; vertices[i].v = vert.uv[1]; } else { vertices[i].u = 0.0f; vertices[i].v = 0.0f; } vertices[i].x = drawPos.x; vertices[i].y = drawPos.y; vertices[i].z = 1.0; if (gstate.vertType & GE_VTYPE_COL_MASK) { memcpy(vertices[i].c, vert.color, sizeof(vertices[i].c)); } else { memset(vertices[i].c, 0, sizeof(vertices[i].c)); } } } return true; }
// TODO: This probably is not the best interface. bool DrawEngineCommon::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) { // This is always for the current vertices. u16 indexLowerBound = 0; u16 indexUpperBound = count - 1; if (!Memory::IsValidAddress(gstate_c.vertexAddr)) return false; bool savedVertexFullAlpha = gstate_c.vertexFullAlpha; if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { const u8 *inds = Memory::GetPointer(gstate_c.indexAddr); const u16 *inds16 = (const u16 *)inds; if (inds) { GetIndexBounds(inds, count, gstate.vertType, &indexLowerBound, &indexUpperBound); indices.resize(count); switch (gstate.vertType & GE_VTYPE_IDX_MASK) { case GE_VTYPE_IDX_16BIT: for (int i = 0; i < count; ++i) { indices[i] = inds16[i]; } break; case GE_VTYPE_IDX_8BIT: for (int i = 0; i < count; ++i) { indices[i] = inds[i]; } break; default: return false; } } else { indices.clear(); } } else { indices.clear(); } static std::vector<u32> temp_buffer; static std::vector<SimpleVertex> simpleVertices; temp_buffer.resize(std::max((int)indexUpperBound, 8192) * 128 / sizeof(u32)); simpleVertices.resize(indexUpperBound + 1); NormalizeVertices((u8 *)(&simpleVertices[0]), (u8 *)(&temp_buffer[0]), Memory::GetPointer(gstate_c.vertexAddr), indexLowerBound, indexUpperBound, gstate.vertType); float world[16]; float view[16]; float worldview[16]; float worldviewproj[16]; ConvertMatrix4x3To4x4(world, gstate.worldMatrix); ConvertMatrix4x3To4x4(view, gstate.viewMatrix); Matrix4ByMatrix4(worldview, world, view); Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix); vertices.resize(indexUpperBound + 1); for (int i = indexLowerBound; i <= indexUpperBound; ++i) { const SimpleVertex &vert = simpleVertices[i]; if (gstate.isModeThrough()) { if (gstate.vertType & GE_VTYPE_TC_MASK) { vertices[i].u = vert.uv[0]; vertices[i].v = vert.uv[1]; } else { vertices[i].u = 0.0f; vertices[i].v = 0.0f; } vertices[i].x = vert.pos.x; vertices[i].y = vert.pos.y; vertices[i].z = vert.pos.z; if (gstate.vertType & GE_VTYPE_COL_MASK) { memcpy(vertices[i].c, vert.color, sizeof(vertices[i].c)); } else { memset(vertices[i].c, 0, sizeof(vertices[i].c)); } } else { float clipPos[4]; Vec3ByMatrix44(clipPos, vert.pos.AsArray(), worldviewproj); Vec3f screenPos = ClipToScreen(clipPos); Vec3f drawPos = ScreenToDrawing(screenPos); if (gstate.vertType & GE_VTYPE_TC_MASK) { vertices[i].u = vert.uv[0] * (float)gstate.getTextureWidth(0); vertices[i].v = vert.uv[1] * (float)gstate.getTextureHeight(0); } else { vertices[i].u = 0.0f; vertices[i].v = 0.0f; } vertices[i].x = drawPos.x; vertices[i].y = drawPos.y; vertices[i].z = drawPos.z; if (gstate.vertType & GE_VTYPE_COL_MASK) { memcpy(vertices[i].c, vert.color, sizeof(vertices[i].c)); } else { memset(vertices[i].c, 0, sizeof(vertices[i].c)); } } } gstate_c.vertexFullAlpha = savedVertexFullAlpha; return true; }