inline VertexLoaderBase *GetOrAddLoader(const TVtxDesc &VtxDesc, const VAT &VtxAttr) { VertexLoaderUID uid(VtxDesc, VtxAttr); VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid); if (iter == s_vertex_loader_map.end()) { s_vertex_loader_map[uid] = VertexLoaderBase::CreateVertexLoader(VtxDesc, VtxAttr); VertexLoaderBase* loader = s_vertex_loader_map[uid].get(); loader->m_native_vertex_format = GetNativeVertexFormat(loader->m_native_vtx_decl); VertexLoaderBase * fallback = loader->GetFallback(); if (fallback) { fallback->m_native_vertex_format = GetNativeVertexFormat(fallback->m_native_vtx_decl); } INCSTAT(stats.numVertexLoaders); return loader; } return iter->second.get(); }
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing, bool is_preprocess) { if (!count) return 0; VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess); int size = count * loader->m_VertexSize; if ((int)src.size() < size) return -1; if (skip_drawing || is_preprocess) return size; // If the native vertex format changed, force a flush. if (loader->m_native_vertex_format != s_current_vtx_fmt || loader->m_native_components != g_current_components) { VertexManagerBase::Flush(); } s_current_vtx_fmt = loader->m_native_vertex_format; g_current_components = loader->m_native_components; // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. // They still need to go through vertex loading, because we need to calculate a zfreeze refrence // slope. bool cullall = (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5); DataReader dst = VertexManagerBase::PrepareForAdditionalData( primitive, count, loader->m_native_vtx_decl.stride, cullall); count = loader->RunVertices(src, dst, count); IndexGenerator::AddIndices(primitive, count); VertexManagerBase::FlushData(count, loader->m_native_vtx_decl.stride); ADDSTAT(stats.thisFrame.numPrims, count); INCSTAT(stats.thisFrame.numPrimitiveJoins); return size; }
TEST_F(VertexLoaderTest, PositionDirectU16XYSpeed) { m_vtx_desc.Position = 1; // Direct m_vtx_attr.g0.PosElements = 0; // XY m_vtx_attr.g0.PosFormat = 2; // U16 VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); ASSERT_EQ(2 * sizeof (u16), (u32)loader->m_VertexSize); for (int i = 0; i < 1000; ++i) { ResetPointers(); int count = loader->RunVertices(7, 100000, src, dst); src.Skip(100000 * loader->m_VertexSize); dst.Skip(count * loader->m_native_vtx_decl.stride); } delete loader; }
TEST_F(VertexLoaderTest, PositionDirectU16XY) { m_vtx_desc.Position = 1; // Direct m_vtx_attr.g0.PosElements = 0; // XY m_vtx_attr.g0.PosFormat = 2; // U16 VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); ASSERT_EQ(2 * sizeof (u16), (u32)loader->m_VertexSize); // Write some vertices. Input<u16>(0); Input<u16>(0); Input<u16>(1); Input<u16>(2); Input<u16>(256); Input<u16>(257); Input<u16>(65535); Input<u16>(65534); Input<u16>(12345); Input<u16>(54321); // Convert 5 points. "7" -> primitive are points. int count = loader->RunVertices(7, 5, src, dst); src.Skip(5 * loader->m_VertexSize); dst.Skip(count * loader->m_native_vtx_decl.stride); delete loader; ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(0.0f); ExpectOut(256.0f); ExpectOut(257.0f); ExpectOut(0.0f); ExpectOut(65535.0f); ExpectOut(65534.0f); ExpectOut(0.0f); ExpectOut(12345.0f); ExpectOut(54321.0f); ExpectOut(0.0f); // Test that scale works on U16 inputs. Input<u16>(42); Input<u16>(24); m_vtx_attr.g0.PosFrac = 1; m_vtx_attr.g0.ByteDequant = 1; loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); count = loader->RunVertices(7, 1, src, dst); src.Skip(1 * loader->m_VertexSize); dst.Skip(count * loader->m_native_vtx_decl.stride); ExpectOut(21.0f); ExpectOut(12.0f); ExpectOut(0.0f); delete loader; }
TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) { m_vtx_desc.Position = 1; // Direct m_vtx_attr.g0.PosElements = 1; // XYZ m_vtx_attr.g0.PosFormat = 4; // Float VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); ASSERT_EQ(3 * sizeof (float), (u32)loader->m_VertexSize); // Write some vertices. Input(0.0f); Input(0.0f); Input(0.0f); Input(1.0f); Input(0.0f); Input(0.0f); Input(0.0f); Input(1.0f); Input(0.0f); Input(0.0f); Input(0.0f); Input(1.0f); // Convert 4 points. "7" -> primitive are points. int count = loader->RunVertices(7, 4, src, dst); src.Skip(4 * loader->m_VertexSize); dst.Skip(count * loader->m_native_vtx_decl.stride); delete loader; ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(1.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(1.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(1.0f); // Test that scale does nothing for floating point inputs. Input(1.0f); Input(2.0f); Input(4.0f); m_vtx_attr.g0.PosFrac = 1; loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); count = loader->RunVertices(7, 1, src, dst); src.Skip(1 * loader->m_VertexSize); dst.Skip(count * loader->m_native_vtx_decl.stride); ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(4.0f); delete loader; }
VertexLoaderBase* VertexLoaderBase::CreateVertexLoader(const TVtxDesc& vtx_desc, const VAT& vtx_attr) { VertexLoaderBase* loader; //#define COMPARE_VERTEXLOADERS #if defined(COMPARE_VERTEXLOADERS) && defined(_M_X86_64) // first try: Any new VertexLoader vs the old one loader = new VertexLoaderTester( new VertexLoader(vtx_desc, vtx_attr), // the software one new VertexLoaderX64(vtx_desc, vtx_attr), // the new one to compare vtx_desc, vtx_attr); if (loader->IsInitialized()) return loader; delete loader; #elif defined(_M_X86_64) loader = new VertexLoaderX64(vtx_desc, vtx_attr); if (loader->IsInitialized()) return loader; delete loader; #elif defined(_M_ARM_64) loader = new VertexLoaderARM64(vtx_desc, vtx_attr); if (loader->IsInitialized()) return loader; delete loader; #endif // last try: The old VertexLoader loader = new VertexLoader(vtx_desc, vtx_attr); if (loader->IsInitialized()) return loader; delete loader; PanicAlert("No Vertex Loader found."); return nullptr; }
TEST_F(VertexLoaderTest, LargeFloatVertexSpeed) { // Enables most attributes in floating point direct mode to test speed. m_vtx_desc.PosMatIdx = 1; m_vtx_desc.Tex0MatIdx = 1; m_vtx_desc.Tex1MatIdx = 1; m_vtx_desc.Tex2MatIdx = 1; m_vtx_desc.Tex3MatIdx = 1; m_vtx_desc.Tex4MatIdx = 1; m_vtx_desc.Tex5MatIdx = 1; m_vtx_desc.Tex6MatIdx = 1; m_vtx_desc.Tex7MatIdx = 1; m_vtx_desc.Position = 1; m_vtx_desc.Normal = 1; m_vtx_desc.Color0 = 1; m_vtx_desc.Color1 = 1; m_vtx_desc.Tex0Coord = 1; m_vtx_desc.Tex1Coord = 1; m_vtx_desc.Tex2Coord = 1; m_vtx_desc.Tex3Coord = 1; m_vtx_desc.Tex4Coord = 1; m_vtx_desc.Tex5Coord = 1; m_vtx_desc.Tex6Coord = 1; m_vtx_desc.Tex7Coord = 1; m_vtx_attr.g0.PosElements = 1; // XYZ m_vtx_attr.g0.PosFormat = 4; // Float m_vtx_attr.g0.NormalElements = 1; // NBT m_vtx_attr.g0.NormalFormat = 4; // Float m_vtx_attr.g0.Color0Elements = 1; // Has Alpha m_vtx_attr.g0.Color0Comp = 5; // RGBA8888 m_vtx_attr.g0.Color1Elements = 1; // Has Alpha m_vtx_attr.g0.Color1Comp = 5; // RGBA8888 m_vtx_attr.g0.Tex0CoordElements = 1; // ST m_vtx_attr.g0.Tex0CoordFormat = 4; // Float m_vtx_attr.g1.Tex1CoordElements = 1; // ST m_vtx_attr.g1.Tex1CoordFormat = 4; // Float m_vtx_attr.g1.Tex2CoordElements = 1; // ST m_vtx_attr.g1.Tex2CoordFormat = 4; // Float m_vtx_attr.g1.Tex3CoordElements = 1; // ST m_vtx_attr.g1.Tex3CoordFormat = 4; // Float m_vtx_attr.g1.Tex4CoordElements = 1; // ST m_vtx_attr.g1.Tex4CoordFormat = 4; // Float m_vtx_attr.g2.Tex5CoordElements = 1; // ST m_vtx_attr.g2.Tex5CoordFormat = 4; // Float m_vtx_attr.g2.Tex6CoordElements = 1; // ST m_vtx_attr.g2.Tex6CoordFormat = 4; // Float m_vtx_attr.g2.Tex7CoordElements = 1; // ST m_vtx_attr.g2.Tex7CoordFormat = 4; // Float VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); // This test is only done 100x in a row since it's ~20x slower using the // current vertex loader implementation. for (int i = 0; i < 100; ++i) { ResetPointers(); int count = loader->RunVertices(7, 100000, src, dst); src.Skip(100000 * loader->m_VertexSize); dst.Skip(count * loader->m_native_vtx_decl.stride); } delete loader; }