Esempio n. 1
0
DWORD __stdcall GameThreadMain(void* args)
{
	HWND hWnd = (HWND) args;

	Render::Init();
	SwapChain swapChain;
	swapChain.Create(hWnd, OUTPUT_SIZE_X, OUTPUT_SIZE_Y);

	Core::Load();
	Core::SetSwapChain(&swapChain);

	for (;;)
	{
		Core::Update();
		swapChain.Present();
	}
	return 0;
}
Esempio n. 2
0
  static void runTestsForDevice(GpuDevice& dev, GraphicsQueue& queue, Window& window, SwapChain& sc, ViewPort& port, GfxCommandList& gfx, faze::Logger& log)
  {
    faze::TestWorks t("advtests");
    t.setAfterTest([&]()
    {
      // clean up
      auto fence = dev.createFence();
      queue.insertFence(fence);
      fence.wait();
      if (!gfx.isClosed())
      {
        gfx.closeList();
      }
      gfx.resetList();

    });

    
    t.addTest("Lots of drawcalls bench, (single thread), rough baseline", [&]()
    {
      using namespace faze;
      struct buf
      {
        float pos[4];
      };
      auto triangleCount = 1000000;
      auto currentTriangleCount = triangleCount;
	  auto srcdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>()
		  .Usage(ResourceUsage::UploadHeap));
	  auto dstdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>());
      auto dstdataSrv = dev.createBufferSRV(dstdata);

      std::random_device rd;
      std::mt19937 gen(rd());
      std::uniform_real_distribution<> dis(-0.8f, 0.8f);
      std::uniform_real_distribution<> dis2(0.f, 1.f);

      {
        auto tmp = srcdata.Map<buf>();
        for (int i = 0;i < triangleCount; ++i)
        {
          auto& it = tmp[i].pos;
          it[0] = static_cast<float>(dis(gen));
          it[1] = static_cast<float>(dis(gen));
          it[2] = static_cast<float>(dis2(gen));
        }
      }

      gfx.CopyResource(dstdata, srcdata);
      GpuFence fence = dev.createFence();
      gfx.closeList();
      queue.submit(gfx);
      queue.insertFence(fence);

      auto pipeline = dev.createGraphicsPipeline(GraphicsPipelineDescriptor()
        .PixelShader("tests/stress/pixel")
        .VertexShader("tests/stress/vertex_triangle")
        .setRenderTargetCount(1)
        .RTVFormat(0, FormatType::R8G8B8A8_UNORM_SRGB)
        .DepthStencil(DepthStencilDescriptor().DepthEnable(false)));

      auto vec = faze::vec4({ 0.2f, 0.2f, 0.2f, 1.0f });

      fence.wait();
      gfx.resetList();

      WTime t;
      t.firstTick();
      float frameTime = 30.f;
      float cpuTime = 30.f;
      Bentsumaakaa b;

      while (cpuTime > 14.f)
      //while(true)
      {
        if (window.simpleReadMessages())
          break;

        // Rendertarget
        b.start(false);
        gfx.setViewPort(port);
        auto backBufferIndex = sc->GetCurrentBackBufferIndex();
        gfx.ClearRenderTargetView(sc[backBufferIndex], vec);
        gfx.setRenderTarget(sc[backBufferIndex]);
        // graphics begin
        {
          auto bind = gfx.bind(pipeline);
          bind.SRV(0, dstdataSrv);
          gfx.drawInstanced(bind, 3, 1, 0, 0);
          
          for (int i = 1; i < currentTriangleCount; ++i)
          {
            gfx.drawInstancedRaw(3, 1, 0, i); // this is the cheat.
          }
          
        }

        // submit all
        gfx.closeList();
        queue.submit(gfx);
        cpuTime = b.stop(false) / 1000000.f;
        // present
        sc->Present(1, 0);
        queue.insertFence(fence);
        fence.wait();
        gfx.resetList();

        t.tick();
        frameTime = static_cast<float>(t.getCurrentNano())*0.000001f;
        if (cpuTime > 16.f)
        {
          currentTriangleCount -= currentTriangleCount/100;
        }
      }
      F_LOG("frametime: %.3fms CpuTime: %.3fms TriangleCount: %d\n", frameTime, cpuTime, currentTriangleCount);
      log.update();
      //fence.wait();
      return true;
    });

    t.addTest("Lots of drawcalls bench, (single thread), myapi", [&]()
    {
      using namespace faze;
      struct buf
      {
        float pos[4];
      };
      auto triangleCount = 200000;
      auto currentTriangleCount = triangleCount;
	  auto srcdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>()
		  .Usage(ResourceUsage::UploadHeap));
	  auto dstdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>());
	  auto dstdataSrv = dev.createBufferSRV(dstdata);


      std::random_device rd;
      std::mt19937 gen(rd());
      std::uniform_real_distribution<> dis(-0.8f, 0.8f);
      std::uniform_real_distribution<> dis2(0.f, 1.f);

      {
        auto tmp = srcdata.Map<buf>();
        for (int i = 0;i < triangleCount; ++i)
        {
          auto& it = tmp[i].pos;
          it[0] = static_cast<float>(dis(gen));
          it[1] = static_cast<float>(dis(gen));
          it[2] = static_cast<float>(dis2(gen));
        }
      }

      gfx.CopyResource(dstdata, srcdata);
      GpuFence fence = dev.createFence();
      gfx.closeList();
      queue.submit(gfx);
      queue.insertFence(fence);

      auto pipeline = dev.createGraphicsPipeline(GraphicsPipelineDescriptor()
        .PixelShader("tests/stress/pixel")
        .VertexShader("tests/stress/vertex_triangle")
        .setRenderTargetCount(1)
        .RTVFormat(0, FormatType::R8G8B8A8_UNORM_SRGB)
        .DepthStencil(DepthStencilDescriptor().DepthEnable(false)));

      auto vec = faze::vec4({ 0.2f, 0.2f, 0.2f, 1.0f });

      fence.wait();
      gfx.resetList();

      WTime t;
      t.firstTick();
      float frameTime = 30.f;
      float cpuTime = 30.f;
      Bentsumaakaa b;
      LBS lbs;

      while (cpuTime > 14.f)
      //while (true)
      {
        if (window.simpleReadMessages())
          break;

        // Rendertarget
        b.start(false);
        gfx.setViewPort(port);
        auto backBufferIndex = sc->GetCurrentBackBufferIndex();
        gfx.ClearRenderTargetView(sc[backBufferIndex], vec);
        gfx.setRenderTarget(sc[backBufferIndex]);
        // graphics begin
        {
          for (int i = 0; i < currentTriangleCount; ++i)
          {
            auto bind = gfx.bind(pipeline);
            bind.SRV(0, dstdataSrv);
            gfx.drawInstanced(bind, 3, 1, 0, i);
          }
        }

        // submit all
        gfx.closeList();
        queue.submit(gfx);
        cpuTime = b.stop(false) / 1000000.f;
        // present
        sc->Present(1, 0);
        queue.insertFence(fence);
        fence.wait();
        gfx.resetList();

        t.tick();
        frameTime = static_cast<float>(t.getCurrentNano())*0.000001f;
        frameTime = t.analyzeFrames().x();
        if (cpuTime > 16.f)
        {
          currentTriangleCount -= currentTriangleCount / 100;
        }
      }
      //fence.wait();
      F_LOG("frametime: %.3fms CpuTime: %.3fms TriangleCount: %d\n", frameTime, cpuTime, currentTriangleCount);
      log.update();
      return true;
    });

    t.addTest("Lots of drawcalls bench, (Multithread), baseline", [&]()
    {
      using namespace faze;
      struct buf
      {
        float pos[4];
      };
      auto triangleCount = 4000000;
      auto currentTriangleCount = triangleCount;
	  auto srcdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>()
		  .Usage(ResourceUsage::UploadHeap));
	  auto dstdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>());
	  auto dstdataSrv = dev.createBufferSRV(dstdata);

      std::random_device rd;
      std::mt19937 gen(rd());
      std::uniform_real_distribution<> dis(-0.8f, 0.8f);
      std::uniform_real_distribution<> dis2(0.f, 1.f);

      {
        auto tmp = srcdata.Map<buf>();
        for (int i = 0;i < triangleCount; ++i)
        {
          auto& it = tmp[i].pos;
          it[0] = static_cast<float>(dis(gen));
          it[1] = static_cast<float>(dis(gen));
          it[2] = static_cast<float>(dis2(gen));
        }
      }

      gfx.CopyResource(dstdata, srcdata);
      GpuFence fence = dev.createFence();
      gfx.closeList();
      queue.submit(gfx);
      queue.insertFence(fence);

      auto pipeline = dev.createGraphicsPipeline(GraphicsPipelineDescriptor()
        .PixelShader("tests/stress/pixel")
        .VertexShader("tests/stress/vertex_triangle")
        .setRenderTargetCount(1)
        .RTVFormat(0, FormatType::R8G8B8A8_UNORM_SRGB)
        .DepthStencil(DepthStencilDescriptor().DepthEnable(false)));

      auto vec = faze::vec4({ 0.2f, 0.2f, 0.2f, 1.0f });

      fence.wait();
      gfx.resetList();

      WTime t;
      t.firstTick();
      float frameTime = 30.f;
      float cpuTime = 30.f;
      Bentsumaakaa b;
      LBS lbs;
      std::vector<GfxCommandList> m_cmds;
      for (size_t i = 0; i < lbs.threadCount(); ++i)
      {
        m_cmds.push_back(dev.createUniversalCommandList());
      }
      //while (true)
      while (cpuTime > 14.f)
      {
        if (window.simpleReadMessages())
          break;

        // Rendertarget
        b.start(false);
        m_cmds[0].setViewPort(port);
        auto backBufferIndex = sc->GetCurrentBackBufferIndex();
        m_cmds[0].ClearRenderTargetView(sc[backBufferIndex], vec);
        m_cmds[0].setRenderTarget(sc[backBufferIndex]);
        for (size_t i = 1; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].setViewPort(port);
          //m_cmds[i].ClearRenderTargetView(sc[backBufferIndex], vec);
          m_cmds[i].setRenderTarget(sc[backBufferIndex]);
        }
        // graphics begin
        lbs.addParallelFor<1>("fillCommands", {}, {}, 0, 100, [&](size_t id, size_t threadIndex)
        {
          auto& gfx2 = m_cmds[threadIndex];
          unsigned workAmount = currentTriangleCount / 100;
          unsigned startIndex = static_cast<unsigned>(workAmount * id);
          auto bind = gfx2.bind(pipeline);
          bind.SRV(0, dstdataSrv);
          gfx2.drawInstanced(bind, 3, 1, 0, startIndex);
          for (unsigned i = startIndex+1; i < startIndex + workAmount; ++i)
          {
            gfx2.drawInstancedRaw(3, 1, 0, i);
          }
        });
        lbs.sleepTillKeywords({ "fillCommands" });
        for (size_t i = 0; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].closeList();
          queue.submit(m_cmds[i]);
        }
        cpuTime = b.stop(false) / 1000000.f;
        // present
        sc->Present(1, 0);
        queue.insertFence(fence);
        fence.wait();
        for (size_t i = 0; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].resetList();
        }

        t.tick();
        frameTime = static_cast<float>(t.getCurrentNano())*0.000001f;
        frameTime = t.analyzeFrames().x();
        if (cpuTime > 16.f)
        {
          currentTriangleCount -= currentTriangleCount / 100;
        }
      }
      F_LOG("frametime: %.3fms CpuTime: %.3fms TriangleCount: %d\n", frameTime, cpuTime, currentTriangleCount);
      log.update();
      return true;
    });

    t.addTest("Lots of drawcalls bench, (Multithread), myapi", [&]()
    {
      using namespace faze;
      struct buf
      {
        float pos[4];
      };
      auto triangleCount = 2000000;
      auto currentTriangleCount = triangleCount;
	  auto srcdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>()
		  .Usage(ResourceUsage::UploadHeap));
	  auto dstdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>());
	  auto dstdataSrv = dev.createBufferSRV(dstdata);

      std::random_device rd;
      std::mt19937 gen(rd());
      std::uniform_real_distribution<> dis(-0.8f, 0.8f);
      std::uniform_real_distribution<> dis2(0.f, 1.f);

      {
        auto tmp = srcdata.Map<buf>();
        for (int i = 0;i < triangleCount; ++i)
        {
          auto& it = tmp[i].pos;
          it[0] = static_cast<float>(dis(gen));
          it[1] = static_cast<float>(dis(gen));
          it[2] = static_cast<float>(dis2(gen));
        }
      }

      gfx.CopyResource(dstdata, srcdata);
      GpuFence fence = dev.createFence();
      gfx.closeList();
      queue.submit(gfx);
      queue.insertFence(fence);

      auto pipeline = dev.createGraphicsPipeline(GraphicsPipelineDescriptor()
        .PixelShader("tests/stress/pixel")
        .VertexShader("tests/stress/vertex_triangle")
        .setRenderTargetCount(1)
        .RTVFormat(0, FormatType::R8G8B8A8_UNORM_SRGB)
        .DepthStencil(DepthStencilDescriptor().DepthEnable(false)));

      auto vec = faze::vec4({ 0.2f, 0.2f, 0.2f, 1.0f });

      fence.wait();
      gfx.resetList();

      WTime t;
      t.firstTick();
      float frameTime = 30.f;
      float cpuTime = 30.f;
      Bentsumaakaa b;
      LBS lbs;
      std::vector<GfxCommandList> m_cmds;
      for (size_t i = 0; i < lbs.threadCount(); ++i)
      {
        m_cmds.push_back(dev.createUniversalCommandList());
      }
      //while (true)
      while (cpuTime > 14.f)
      {
        if (window.simpleReadMessages())
          break;

        // Rendertarget
        b.start(false);
        m_cmds[0].setViewPort(port);
        auto backBufferIndex = sc->GetCurrentBackBufferIndex();
        m_cmds[0].ClearRenderTargetView(sc[backBufferIndex], vec);
        m_cmds[0].setRenderTarget(sc[backBufferIndex]);
        for (size_t i = 1; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].setViewPort(port);
          //m_cmds[i].ClearRenderTargetView(sc[backBufferIndex], vec);
          m_cmds[i].setRenderTarget(sc[backBufferIndex]);
        }
        // graphics begin
        lbs.addParallelFor<1>("fillCommands", {}, {}, 0, 100, [&](size_t id, size_t threadIndex)
        {
          auto& gfx2 = m_cmds[threadIndex];
          unsigned workAmount = currentTriangleCount / 100;
          unsigned startIndex = static_cast<unsigned>(workAmount * id);
          for (unsigned i = startIndex; i < startIndex + workAmount; ++i)
          {
            auto bind = gfx2.bind(pipeline);
            bind.SRV(0, dstdataSrv);
            gfx2.drawInstanced(bind, 3, 1, 0, i);
          }
        });
        lbs.sleepTillKeywords({ "fillCommands" });
        for (size_t i = 0; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].closeList();
          queue.submit(m_cmds[i]);
        }
        cpuTime = b.stop(false) / 1000000.f;
        // present
        sc->Present(1, 0);
        queue.insertFence(fence);
        fence.wait();
        for (size_t i = 0; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].resetList();
        }

        t.tick();
        frameTime = static_cast<float>(t.getCurrentNano())*0.000001f;
        frameTime = t.analyzeFrames().x();
        if (cpuTime > 16.f)
        {
          currentTriangleCount -= currentTriangleCount / 40;
        }
      }
      F_LOG("frametime: %.3fms CpuTime: %.3fms TriangleCount: %d\n", frameTime, cpuTime, currentTriangleCount);
      log.update();
      return true;
    });

    t.addTest("Lots of drawcalls bench, (single thread), rough baseline, frametime 20ms", [&]()
    {
      using namespace faze;
      struct buf
      {
        float pos[4];
      };
      auto triangleCount = 800000;
      auto currentTriangleCount = triangleCount;
	  auto srcdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>()
		  .Usage(ResourceUsage::UploadHeap));
	  auto dstdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>());
	  auto dstdataSrv = dev.createBufferSRV(dstdata);

      std::random_device rd;
      std::mt19937 gen(rd());
      std::uniform_real_distribution<> dis(-0.8f, 0.8f);
      std::uniform_real_distribution<> dis2(0.f, 1.f);

      {
        auto tmp = srcdata.Map<buf>();
        for (int i = 0;i < triangleCount; ++i)
        {
          auto& it = tmp[i].pos;
          it[0] = static_cast<float>(dis(gen));
          it[1] = static_cast<float>(dis(gen));
          it[2] = static_cast<float>(dis2(gen));
        }
      }

      gfx.CopyResource(dstdata, srcdata);
      GpuFence fence = dev.createFence();
      gfx.closeList();
      queue.submit(gfx);
      queue.insertFence(fence);
      fence.wait();
      gfx.resetList();

      auto pipeline = dev.createGraphicsPipeline(GraphicsPipelineDescriptor()
        .PixelShader("tests/stress/pixel")
        .VertexShader("tests/stress/vertex_triangle")
        .setRenderTargetCount(1)
        .RTVFormat(0, FormatType::R8G8B8A8_UNORM_SRGB)
        .DepthStencil(DepthStencilDescriptor().DepthEnable(false)));

      auto vec = faze::vec4({ 0.2f, 0.2f, 0.2f, 1.0f });


      WTime t;
      t.firstTick();
      float frameTime = 30.f;
      float cpuTime = 30.f;
      Bentsumaakaa b;

      int beenUnderLimit = 0;
      while (beenUnderLimit < 10)
      {
        if (frameTime < 20.f)
          beenUnderLimit++;
        if (window.simpleReadMessages())
          break;

        // Rendertarget
        b.start(false);
        gfx.setViewPort(port);
        auto backBufferIndex = sc->GetCurrentBackBufferIndex();
        gfx.ClearRenderTargetView(sc[backBufferIndex], vec);
        gfx.setRenderTarget(sc[backBufferIndex]);
        // graphics begin
        {
          auto bind = gfx.bind(pipeline);
          bind.SRV(0, dstdataSrv);
          gfx.drawInstanced(bind, 3, 1, 0, 0);

          for (int i = 1; i < currentTriangleCount; ++i)
          {
            gfx.drawInstancedRaw(3, 1, 0, i); // this is the cheat.
          }

        }

        // submit all
        gfx.closeList();
        queue.submit(gfx);
        cpuTime = b.stop(false) / 1000000.f;
        // present
        sc->Present(1, 0);
        queue.insertFence(fence);
        fence.wait();
        gfx.resetList();

        t.tick();
        frameTime = static_cast<float>(t.getCurrentNano())*0.000001f;
        if (frameTime > 20.f)
        {
          currentTriangleCount -= currentTriangleCount / 100;
        }
      }
      F_LOG("frametime: %.3fms CpuTime: %.3fms TriangleCount: %d\n", frameTime, cpuTime, currentTriangleCount);
      log.update();
      //fence.wait();
      return true;
    });

    t.addTest("Lots of drawcalls bench, (single thread), myapi, frametime 20ms", [&]()
    {
      using namespace faze;
      struct buf
      {
        float pos[4];
      };
      auto triangleCount = 100000;
      auto currentTriangleCount = triangleCount;
	  auto srcdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>()
		  .Usage(ResourceUsage::UploadHeap));
	  auto dstdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>());
	  auto dstdataSrv = dev.createBufferSRV(dstdata);

      std::random_device rd;
      std::mt19937 gen(rd());
      std::uniform_real_distribution<> dis(-0.8f, 0.8f);
      std::uniform_real_distribution<> dis2(0.f, 1.f);

      {
        auto tmp = srcdata.Map<buf>();
        for (int i = 0;i < triangleCount; ++i)
        {
          auto& it = tmp[i].pos;
          it[0] = static_cast<float>(dis(gen));
          it[1] = static_cast<float>(dis(gen));
          it[2] = static_cast<float>(dis2(gen));
        }
      }

      gfx.CopyResource(dstdata, srcdata);
      GpuFence fence = dev.createFence();
      gfx.closeList();
      queue.submit(gfx);
      queue.insertFence(fence);

      auto pipeline = dev.createGraphicsPipeline(GraphicsPipelineDescriptor()
        .PixelShader("tests/stress/pixel")
        .VertexShader("tests/stress/vertex_triangle")
        .setRenderTargetCount(1)
        .RTVFormat(0, FormatType::R8G8B8A8_UNORM_SRGB)
        .DepthStencil(DepthStencilDescriptor().DepthEnable(false)));

      auto vec = faze::vec4({ 0.2f, 0.2f, 0.2f, 1.0f });

      fence.wait();
      gfx.resetList();

      WTime t;
      t.firstTick();
      float frameTime = 30.f;
      float cpuTime = 30.f;
      Bentsumaakaa b;
      LBS lbs;

      int beenUnderLimit = 0;
      while (beenUnderLimit < 10)
      {
        if (frameTime < 20.f)
          beenUnderLimit++;
        if (window.simpleReadMessages())
          break;

        // Rendertarget
        b.start(false);
        gfx.setViewPort(port);
        auto backBufferIndex = sc->GetCurrentBackBufferIndex();
        gfx.ClearRenderTargetView(sc[backBufferIndex], vec);
        gfx.setRenderTarget(sc[backBufferIndex]);
        // graphics begin
        {
          for (int i = 0; i < currentTriangleCount; ++i)
          {
            auto bind = gfx.bind(pipeline);
            bind.SRV(0, dstdataSrv);
            gfx.drawInstanced(bind, 3, 1, 0, i);
          }
        }

        // submit all
        gfx.closeList();
        queue.submit(gfx);
        cpuTime = b.stop(false) / 1000000.f;
        // present
        sc->Present(1, 0);
        queue.insertFence(fence);
        fence.wait();
        gfx.resetList();

        t.tick();
        frameTime = static_cast<float>(t.getCurrentNano())*0.000001f;
        //frameTime = t.analyzeFrames().x();
        if (frameTime > 20.f)
        {
          currentTriangleCount -= currentTriangleCount / 100;
        }
      }
      //fence.wait();
      F_LOG("frametime: %.3fms CpuTime: %.3fms TriangleCount: %d\n", frameTime, cpuTime, currentTriangleCount);
      log.update();
      return true;
    });

    t.addTest("Lots of drawcalls bench, (Multithread), baseline, frametime 20ms", [&]()
    {
      using namespace faze;
      struct buf
      {
        float pos[4];
      };
      auto triangleCount = 1000000;
      auto currentTriangleCount = triangleCount;
	  auto srcdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>()
		  .Usage(ResourceUsage::UploadHeap));
	  auto dstdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>());
	  auto dstdataSrv = dev.createBufferSRV(dstdata);

      std::random_device rd;
      std::mt19937 gen(rd());
      std::uniform_real_distribution<> dis(-0.8f, 0.8f);
      std::uniform_real_distribution<> dis2(0.f, 1.f);

      {
        auto tmp = srcdata.Map<buf>();
        for (int i = 0;i < triangleCount; ++i)
        {
          auto& it = tmp[i].pos;
          it[0] = static_cast<float>(dis(gen));
          it[1] = static_cast<float>(dis(gen));
          it[2] = static_cast<float>(dis2(gen));
        }
      }

      gfx.CopyResource(dstdata, srcdata);
      GpuFence fence = dev.createFence();
      gfx.closeList();
      queue.submit(gfx);
      queue.insertFence(fence);

      auto pipeline = dev.createGraphicsPipeline(GraphicsPipelineDescriptor()
        .PixelShader("tests/stress/pixel")
        .VertexShader("tests/stress/vertex_triangle")
        .setRenderTargetCount(1)
        .RTVFormat(0, FormatType::R8G8B8A8_UNORM_SRGB)
        .DepthStencil(DepthStencilDescriptor().DepthEnable(false)));

      auto vec = faze::vec4({ 0.2f, 0.2f, 0.2f, 1.0f });

      fence.wait();
      gfx.resetList();

      WTime t;
      t.firstTick();
      float frameTime = 30.f;
      float cpuTime = 30.f;
      Bentsumaakaa b;
      LBS lbs;
      std::vector<GfxCommandList> m_cmds;
      for (size_t i = 0; i < lbs.threadCount(); ++i)
      {
        m_cmds.push_back(dev.createUniversalCommandList());
      }
      //while (true)
      int beenUnderLimit = 0;
      while (beenUnderLimit < 10)
      {
        if (frameTime < 20.f)
          beenUnderLimit++;
        if (window.simpleReadMessages())
          break;

        // Rendertarget
        b.start(false);
        m_cmds[0].setViewPort(port);
        auto backBufferIndex = sc->GetCurrentBackBufferIndex();
        m_cmds[0].ClearRenderTargetView(sc[backBufferIndex], vec);
        m_cmds[0].setRenderTarget(sc[backBufferIndex]);
        for (size_t i = 1; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].setViewPort(port);
          //m_cmds[i].ClearRenderTargetView(sc[backBufferIndex], vec);
          m_cmds[i].setRenderTarget(sc[backBufferIndex]);
        }
        // graphics begin
        lbs.addParallelFor<1>("fillCommands", {}, {}, 0, 100, [&](size_t id, size_t threadIndex)
        {
          auto& gfx2 = m_cmds[threadIndex];
          unsigned workAmount = static_cast<unsigned>(currentTriangleCount / 100);
          unsigned startIndex = static_cast<unsigned>(workAmount * id);
          auto bind = gfx2.bind(pipeline);
          bind.SRV(0, dstdataSrv);
          gfx2.drawInstanced(bind, 3, 1, 0, startIndex);
          for (unsigned i = startIndex + 1; i < startIndex + workAmount; ++i)
          {
            gfx2.drawInstancedRaw(3, 1, 0, i);
          }
        });
        lbs.sleepTillKeywords({ "fillCommands" });
        for (size_t i = 0; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].closeList();
          queue.submit(m_cmds[i]);
        }
        cpuTime = b.stop(false) / 1000000.f;
        // present
        sc->Present(1, 0);
        queue.insertFence(fence);
        fence.wait();
        for (size_t i = 0; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].resetList();
        }

        t.tick();
        frameTime = static_cast<float>(t.getCurrentNano())*0.000001f;
        //frameTime = t.analyzeFrames().x();
        if (frameTime > 20.f)
        {
          currentTriangleCount -= currentTriangleCount / 120;
        }
      }
      F_LOG("frametime: %.3fms CpuTime: %.3fms TriangleCount: %d\n", frameTime, cpuTime, currentTriangleCount);
      log.update();
      return true;
    });

    t.addTest("Lots of drawcalls bench, (Multithread), myapi, frametime 20ms", [&]()
    {
      using namespace faze;
      struct buf
      {
        float pos[4];
      };
      auto triangleCount = 400000;
      auto currentTriangleCount = triangleCount;
	  auto srcdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>()
		  .Usage(ResourceUsage::UploadHeap));
	  auto dstdata = dev.createBuffer(ResourceDescriptor()
		  .Width(triangleCount)
		  .Format<buf>());
	  auto dstdataSrv = dev.createBufferSRV(dstdata);

      std::random_device rd;
      std::mt19937 gen(rd());
      std::uniform_real_distribution<> dis(-0.8f, 0.8f);
      std::uniform_real_distribution<> dis2(0.f, 1.f);

      {
        auto tmp = srcdata.Map<buf>();
        for (int i = 0;i < triangleCount; ++i)
        {
          auto& it = tmp[i].pos;
          it[0] = static_cast<float>(dis(gen));
          it[1] = static_cast<float>(dis(gen));
          it[2] = static_cast<float>(dis2(gen));
        }
      }

      gfx.CopyResource(dstdata, srcdata);
      GpuFence fence = dev.createFence();
      gfx.closeList();
      queue.submit(gfx);
      queue.insertFence(fence);

      auto pipeline = dev.createGraphicsPipeline(GraphicsPipelineDescriptor()
        .PixelShader("tests/stress/pixel")
        .VertexShader("tests/stress/vertex_triangle")
        .setRenderTargetCount(1)
        .RTVFormat(0, FormatType::R8G8B8A8_UNORM_SRGB)
        .DepthStencil(DepthStencilDescriptor().DepthEnable(false)));

      auto vec = faze::vec4({ 0.2f, 0.2f, 0.2f, 1.0f });

      fence.wait();
      gfx.resetList();

      WTime t;
      t.firstTick();
      float frameTime = 30.f;
      float cpuTime = 30.f;
      Bentsumaakaa b;
      LBS lbs;
      std::vector<GfxCommandList> m_cmds;
      for (size_t i = 0; i < lbs.threadCount(); ++i)
      {
        m_cmds.push_back(dev.createUniversalCommandList());
      }
      //while (true)
      int beenUnderLimit = 0;
      while (beenUnderLimit < 10)
      {
        if (frameTime < 20.f)
          beenUnderLimit++;
        if (window.simpleReadMessages())
          break;

        // Rendertarget
        b.start(false);
        m_cmds[0].setViewPort(port);
        auto backBufferIndex = sc->GetCurrentBackBufferIndex();
        m_cmds[0].ClearRenderTargetView(sc[backBufferIndex], vec);
        m_cmds[0].setRenderTarget(sc[backBufferIndex]);
        for (size_t i = 1; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].setViewPort(port);
          //m_cmds[i].ClearRenderTargetView(sc[backBufferIndex], vec);
          m_cmds[i].setRenderTarget(sc[backBufferIndex]);
        }
        // graphics begin
        lbs.addParallelFor<1>("fillCommands", {}, {}, 0, 100, [&](size_t id, size_t threadIndex)
        {
          auto& gfx2 = m_cmds[threadIndex];
          size_t workAmount = currentTriangleCount / 100;
          size_t startIndex = workAmount * id;
          for (unsigned i = static_cast<unsigned>(startIndex); i < static_cast<unsigned>(startIndex + workAmount); ++i)
          {
            auto bind = gfx2.bind(pipeline);
            bind.SRV(0, dstdataSrv);
            gfx2.drawInstanced(bind, 3, 1, 0, i);
          }
        });
        lbs.sleepTillKeywords({ "fillCommands" });
        for (size_t i = 0; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].closeList();
          queue.submit(m_cmds[i]);
        }
        cpuTime = b.stop(false) / 1000000.f;
        // present
        sc->Present(1, 0);
        queue.insertFence(fence);
        fence.wait();
        for (size_t i = 0; i < lbs.threadCount(); ++i)
        {
          m_cmds[i].resetList();
        }

        t.tick();
        frameTime = static_cast<float>(t.getCurrentNano())*0.000001f;
        //frameTime = t.analyzeFrames().x();
        if (frameTime > 20.f)
        {
          currentTriangleCount -= currentTriangleCount / 120;
        }
      }
      F_LOG("frametime: %.3fms CpuTime: %.3fms TriangleCount: %d\n", frameTime, cpuTime, currentTriangleCount);
      log.update();
      return true;
    });

    t.runTests();
  }