Exemple #1
0
void StateCache::Init()
{
	// Root signature isn't available at time of StateCache construction, so fill it in now.
	gx_state_cache.m_current_pso_desc.pRootSignature = D3D::default_root_signature;

	// Multi-sample configuration isn't available at time of StateCache construction, so fille it in now.
	gx_state_cache.m_current_pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples;
	gx_state_cache.m_current_pso_desc.SampleDesc.Quality = 0;

	if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
		File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

	std::string cache_filename = StringFromFormat("%sdx12-%s-pso.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
		SConfig::GetInstance().m_strUniqueID.c_str());

	PipelineStateCacheInserter inserter;
	s_pso_disk_cache.OpenAndRead(cache_filename, inserter);

	if (s_cache_is_corrupted)
	{
		// If a PSO fails to create, that means either:
		// - The file itself is corrupt.
		// - A driver/HW change has occurred, causing the existing cache blobs to be invalid.
		//
		// In either case, we want to re-create the disk cache. This should not be a frequent occurence.

		s_pso_disk_cache.Close();

		for (auto it : gx_state_cache.m_small_pso_map)
		{
			SAFE_RELEASE(it.second);
		}
		gx_state_cache.m_small_pso_map.clear();

		File::Delete(cache_filename);

		s_pso_disk_cache.OpenAndRead(cache_filename, inserter);

		s_cache_is_corrupted = false;
	}
}
bool GeometryShaderCache::SetShader(u32 primitive_type)
{
  GeometryShaderUid uid = GetGeometryShaderUid(primitive_type);

  // Check if the shader is already set
  if (last_entry)
  {
    if (uid == last_uid)
    {
      GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
      return true;
    }
  }

  last_uid = uid;

  // Check if the shader is a pass-through shader
  if (uid.GetUidData()->IsPassthrough())
  {
    // Return the default pass-through shader
    last_entry = &pass_entry;
    return true;
  }

  // Check if the shader is already in the cache
  GSCache::iterator iter;
  iter = GeometryShaders.find(uid);
  if (iter != GeometryShaders.end())
  {
    const GSCacheEntry& entry = iter->second;
    last_entry = &entry;

    return (entry.shader != nullptr);
  }

  // Need to compile a new shader
  ShaderCode code = GenerateGeometryShaderCode(APIType::D3D, uid.GetUidData());

  D3DBlob* pbytecode;
  if (!D3D::CompileGeometryShader(code.GetBuffer(), &pbytecode))
  {
    GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
    return false;
  }

  // Insert the bytecode into the caches
  g_gs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());

  bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
  pbytecode->Release();

  return success;
}
bool VertexShaderCache::SetShader()
{
  VertexShaderUid uid = GetVertexShaderUid(API_D3D);
  if (g_ActiveConfig.bEnableShaderDebugging)
  {
    ShaderCode code = GenerateVertexShaderCode(API_D3D);
    vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v");
  }

  if (last_entry)
  {
    if (uid == last_uid)
    {
      GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
      return (last_entry->shader != nullptr);
    }
  }

  last_uid = uid;

  VSCache::iterator iter = vshaders.find(uid);
  if (iter != vshaders.end())
  {
    const VSCacheEntry& entry = iter->second;
    last_entry = &entry;

    GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
    return (entry.shader != nullptr);
  }

  ShaderCode code = GenerateVertexShaderCode(API_D3D);

  D3DBlob* pbytecode = nullptr;
  D3D::CompileVertexShader(code.GetBuffer(), &pbytecode);

  if (pbytecode == nullptr)
  {
    GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
    return false;
  }
  g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());

  bool success = InsertByteCode(uid, pbytecode);
  pbytecode->Release();

  if (g_ActiveConfig.bEnableShaderDebugging && success)
  {
    vshaders[uid].code = code.GetBuffer();
  }

  GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
  return success;
}
void VertexShaderCache::Shutdown()
{
	if (Compiler)
	{
		Compiler->WaitForFinish();
	}
	for (int i = 0; i < MAX_SSAA_SHADERS; i++)
	{
		if (SimpleVertexShader[i])
			SimpleVertexShader[i]->Release();
		SimpleVertexShader[i] = NULL;
	}

	if (ClearVertexShader)
		ClearVertexShader->Release();
	ClearVertexShader = NULL;

	Clear();
	g_vs_disk_cache.Sync();
	g_vs_disk_cache.Close();
}
void ProgramShaderCache::Shutdown(void)
{
	// store all shaders in cache on disk
	if (g_ogl_config.bSupportsGLSLCache && !g_Config.bEnableShaderDebugging)
	{
		PCache::iterator iter = pshaders.begin();
		for (; iter != pshaders.end(); ++iter)
		{
			if(iter->second.in_cache) continue;

			GLint binary_size;
			glGetProgramiv(iter->second.shader.glprogid, GL_PROGRAM_BINARY_LENGTH, &binary_size);
			if(!binary_size) continue;

			u8 *data = new u8[binary_size+sizeof(GLenum)];
			u8 *binary = data + sizeof(GLenum);
			GLenum *prog_format = (GLenum*)data;
			glGetProgramBinary(iter->second.shader.glprogid, binary_size, NULL, prog_format, binary);

			g_program_disk_cache.Append(iter->first, data, binary_size+sizeof(GLenum));
			delete [] data;
		}

		g_program_disk_cache.Sync();
		g_program_disk_cache.Close();
	}

	glUseProgram(0);

	PCache::iterator iter = pshaders.begin();
	for (; iter != pshaders.end(); ++iter)
		iter->second.Destroy();
	pshaders.clear();

	pixel_uid_checker.Invalidate();
	vertex_uid_checker.Invalidate();

	delete s_buffer;
	s_buffer = 0;
}
Exemple #6
0
void PixelShaderCache::Init()
{
  unsigned int cbsize = Common::AlignUp(static_cast<unsigned int>(sizeof(PixelShaderConstants)),
                                        16);  // must be a multiple of 16
  D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(cbsize, D3D11_BIND_CONSTANT_BUFFER,
                                                D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE);
  D3D::device->CreateBuffer(&cbdesc, nullptr, &pscbuf);
  CHECK(pscbuf != nullptr, "Create pixel shader constant buffer");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)pscbuf,
                          "pixel shader constant buffer used to emulate the GX pipeline");

  // used when drawing clear quads
  s_ClearProgram = D3D::CompileAndCreatePixelShader(clear_program_code);
  CHECK(s_ClearProgram != nullptr, "Create clear pixel shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ClearProgram, "clear pixel shader");

  // used for anaglyph stereoscopy
  s_AnaglyphProgram = D3D::CompileAndCreatePixelShader(anaglyph_program_code);
  CHECK(s_AnaglyphProgram != nullptr, "Create anaglyph pixel shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)s_AnaglyphProgram, "anaglyph pixel shader");

  // used when copying/resolving the color buffer
  s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(color_copy_program_code);
  CHECK(s_ColorCopyProgram[0] != nullptr, "Create color copy pixel shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorCopyProgram[0], "color copy pixel shader");

  // used for color conversion
  s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(color_matrix_program_code);
  CHECK(s_ColorMatrixProgram[0] != nullptr, "Create color matrix pixel shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorMatrixProgram[0], "color matrix pixel shader");

  // used for depth copy
  s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(depth_matrix_program);
  CHECK(s_DepthMatrixProgram[0] != nullptr, "Create depth matrix pixel shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthMatrixProgram[0], "depth matrix pixel shader");

  Clear();

  if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
    File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

  SETSTAT(stats.numPixelShadersCreated, 0);
  SETSTAT(stats.numPixelShadersAlive, 0);

  std::string cache_filename =
      StringFromFormat("%sdx11-%s-ps.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
                       SConfig::GetInstance().m_strGameID.c_str());
  PixelShaderCacheInserter inserter;
  g_ps_disk_cache.OpenAndRead(cache_filename, inserter);

  last_entry = nullptr;
}
Exemple #7
0
void ShaderCache::Shutdown()
{
	if (s_compiler)
	{
		s_compiler->WaitForFinish();
	}

	for (auto& iter : s_shader_blob_list)
		SAFE_RELEASE(iter);

	s_shader_blob_list.clear();

	vs_bytecode_cache->Persist();
	delete vs_bytecode_cache;
	vs_bytecode_cache = nullptr;

	gs_bytecode_cache->Persist();
	delete gs_bytecode_cache;
	gs_bytecode_cache = nullptr;

	ts_bytecode_cache->Persist();
	delete ts_bytecode_cache;
	ts_bytecode_cache = nullptr;

	ps_bytecode_cache->Persist();
	delete ps_bytecode_cache;
	ps_bytecode_cache = nullptr;

	s_ds_disk_cache.Sync();
	s_ds_disk_cache.Close();
	s_hs_disk_cache.Sync();
	s_hs_disk_cache.Close();
	s_gs_disk_cache.Sync();
	s_gs_disk_cache.Close();
	s_ps_disk_cache.Sync();
	s_ps_disk_cache.Close();
	s_vs_disk_cache.Sync();
	s_vs_disk_cache.Close();
}
bool VertexShaderCache::SetShader(u32 components)
{
	VERTEXSHADERUID uid;
	GetVertexShaderId(&uid, components);
	if (last_entry)
	{
		if (uid == last_uid)
		{
			GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
			ValidateVertexShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, components);
			return (last_entry->shader != NULL);
		}
	}

	last_uid = uid;

	VSCache::iterator iter = vshaders.find(uid);
	if (iter != vshaders.end())
	{
		const VSCacheEntry &entry = iter->second;
		last_entry = &entry;

		GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
		ValidateVertexShaderIDs(API_D3D11, entry.safe_uid, entry.code, components);
		return (entry.shader != NULL);
	}

	const char *code = GenerateVertexShaderCode(components, API_D3D11);

	D3DBlob* pbytecode = NULL;
	D3D::CompileVertexShader(code, (int)strlen(code), &pbytecode);

	if (pbytecode == NULL)
	{
		GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
		return false;
	}
	g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());

	bool success = InsertByteCode(uid, pbytecode);
	pbytecode->Release();

	if (g_ActiveConfig.bEnableShaderDebugging && success)
	{
		vshaders[uid].code = code;
		GetSafeVertexShaderId(&vshaders[uid].safe_uid, components);
	}

	GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
	return success;
}
Exemple #9
0
void PixelShaderCache::Init()
{
	unsigned int cbsize = ((sizeof(PixelShaderConstants))&(~0xf))+0x10; // must be a multiple of 16
	D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(cbsize, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE);
	D3D::device->CreateBuffer(&cbdesc, NULL, &pscbuf);
	CHECK(pscbuf!=NULL, "Create pixel shader constant buffer");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)pscbuf, "pixel shader constant buffer used to emulate the GX pipeline");

	// used when drawing clear quads
	s_ClearProgram = D3D::CompileAndCreatePixelShader(clear_program_code, sizeof(clear_program_code));
	CHECK(s_ClearProgram!=NULL, "Create clear pixel shader");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ClearProgram, "clear pixel shader");

	// used when copying/resolving the color buffer
	s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(color_copy_program_code, sizeof(color_copy_program_code));
	CHECK(s_ColorCopyProgram[0]!=NULL, "Create color copy pixel shader");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorCopyProgram[0], "color copy pixel shader");

	// used for color conversion
	s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(color_matrix_program_code, sizeof(color_matrix_program_code));
	CHECK(s_ColorMatrixProgram[0]!=NULL, "Create color matrix pixel shader");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorMatrixProgram[0], "color matrix pixel shader");

	// used for depth copy
	s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(depth_matrix_program, sizeof(depth_matrix_program));
	CHECK(s_DepthMatrixProgram[0]!=NULL, "Create depth matrix pixel shader");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthMatrixProgram[0], "depth matrix pixel shader");

	Clear();

	if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
		File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX).c_str());

	SETSTAT(stats.numPixelShadersCreated, 0);
	SETSTAT(stats.numPixelShadersAlive, 0);

	char cache_filename[MAX_PATH];
	sprintf(cache_filename, "%sdx11-%s-ps.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
			SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
	PixelShaderCacheInserter inserter;
	g_ps_disk_cache.OpenAndRead(cache_filename, inserter);

	if (g_Config.bEnableShaderDebugging)
		Clear();

	last_entry = NULL;
}
Exemple #10
0
bool PixelShaderCache::SetShader()
{
  PixelShaderUid uid = GetPixelShaderUid();

  // Check if the shader is already set
  if (last_entry)
  {
    if (uid == last_uid)
    {
      GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
      return (last_entry->shader != nullptr);
    }
  }

  last_uid = uid;

  // Check if the shader is already in the cache
  PSCache::iterator iter;
  iter = PixelShaders.find(uid);
  if (iter != PixelShaders.end())
  {
    const PSCacheEntry& entry = iter->second;
    last_entry = &entry;

    GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
    return (entry.shader != nullptr);
  }

  // Need to compile a new shader
  ShaderCode code = GeneratePixelShaderCode(APIType::D3D, uid.GetUidData());

  D3DBlob* pbytecode;
  if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode))
  {
    GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
    return false;
  }

  // Insert the bytecode into the caches
  g_ps_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());

  bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
  pbytecode->Release();

  GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
  return success;
}
Exemple #11
0
void ShaderCache::Shutdown()
{
  Clear();

  s_gs_disk_cache.Sync();
  s_gs_disk_cache.Close();
  s_ps_disk_cache.Sync();
  s_ps_disk_cache.Close();
  s_vs_disk_cache.Sync();
  s_vs_disk_cache.Close();
}
Exemple #12
0
bool VertexShaderCache::SetUberShader(D3DVertexFormat* vertex_format)
{
  D3DVertexFormat* uber_vertex_format = static_cast<D3DVertexFormat*>(
      VertexLoaderManager::GetUberVertexFormat(vertex_format->GetVertexDeclaration()));
  UberShader::VertexShaderUid uid = UberShader::GetVertexShaderUid();
  if (last_uber_entry && last_uber_uid == uid)
  {
    if (!last_uber_entry->shader)
      return false;

    uber_vertex_format->SetInputLayout(last_uber_entry->bytecode);
    D3D::stateman->SetVertexShader(last_uber_entry->shader);
    return true;
  }

  auto iter = ubervshaders.find(uid);
  if (iter != ubervshaders.end())
  {
    const VSCacheEntry& entry = iter->second;
    last_uber_uid = uid;
    last_uber_entry = &entry;

    GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
    if (!last_uber_entry->shader)
      return false;

    uber_vertex_format->SetInputLayout(last_uber_entry->bytecode);
    D3D::stateman->SetVertexShader(last_uber_entry->shader);
    return true;
  }

  // Need to compile a new shader
  D3DBlob* bytecode = nullptr;
  ShaderCode code =
      UberShader::GenVertexShader(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
  D3D::CompileVertexShader(code.GetBuffer(), &bytecode);
  if (!InsertByteCode(uid, bytecode))
  {
    SAFE_RELEASE(bytecode);
    return false;
  }

  g_uber_vs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
  bytecode->Release();
  return SetUberShader(vertex_format);
}
void ProgramShaderCache::Init()
{
	// We have to get the UBO alignment here because
	// if we generate a buffer that isn't aligned
	// then the UBO will fail.
	glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &s_ubo_align);

	s_ubo_buffer_size = ROUND_UP(PixelShaderManager::ConstantBufferSize * sizeof(float), s_ubo_align)
	+ ROUND_UP(VertexShaderManager::ConstantBufferSize * sizeof(float), s_ubo_align)
	+ ROUND_UP(sizeof(GeometryShaderConstants), s_ubo_align);

	// We multiply by *4*4 because we need to get down to basic machine units.
	// So multiply by four to get how many floats we have from vec4s
	// Then once more to get bytes
	s_buffer = StreamBuffer::Create(GL_UNIFORM_BUFFER, UBO_LENGTH);

	// Read our shader cache, only if supported
	if (g_ogl_config.bSupportsGLSLCache && !g_Config.bEnableShaderDebugging)
	{
		GLint Supported;
		glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &Supported);
		if (!Supported)
		{
			ERROR_LOG(VIDEO, "GL_ARB_get_program_binary is supported, but no binary format is known. So disable shader cache.");
			g_ogl_config.bSupportsGLSLCache = false;
		}
		else
		{
			if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
				File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

			std::string cache_filename = StringFromFormat("%sIOGL-%s-shaders.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
				SConfig::GetInstance().m_strUniqueID.c_str());

			ProgramShaderCacheInserter inserter;
			g_program_disk_cache.OpenAndRead(cache_filename, inserter);
		}
		SETSTAT(stats.numPixelShadersAlive, pshaders.size());
	}

	CreateHeader();

	CurrentProgram = 0;
	last_entry = nullptr;
}
Exemple #14
0
bool VertexShaderCache::SetShader()
{
  VertexShaderUid uid = GetVertexShaderUid();

  if (last_entry)
  {
    if (uid == last_uid)
    {
      GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
      return (last_entry->shader != nullptr);
    }
  }

  last_uid = uid;

  VSCache::iterator iter = vshaders.find(uid);
  if (iter != vshaders.end())
  {
    const VSCacheEntry& entry = iter->second;
    last_entry = &entry;

    GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
    return (entry.shader != nullptr);
  }

  ShaderCode code = GenerateVertexShaderCode(API_D3D, uid.GetUidData());

  D3DBlob* pbytecode = nullptr;
  D3D::CompileVertexShader(code.GetBuffer(), &pbytecode);

  if (pbytecode == nullptr)
  {
    GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
    return false;
  }
  g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());

  bool success = InsertByteCode(uid, pbytecode);
  pbytecode->Release();

  GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
  return success;
}
void GeometryShaderCache::Init()
{
	unsigned int gscbuf12sizeInBytes = gscbuf12paddedSize * gscbuf12Slots;

	CheckHR(
		D3D::device12->CreateCommittedResource(
		&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD),
		D3D12_HEAP_FLAG_NONE,
		&CD3DX12_RESOURCE_DESC::Buffer(gscbuf12sizeInBytes),
		D3D12_RESOURCE_STATE_GENERIC_READ,
		nullptr,
		IID_PPV_ARGS(&gscbuf12)
		)
		);

	D3D::SetDebugObjectName12(gscbuf12, "vertex shader constant buffer used to emulate the GX pipeline");

	// Obtain persistent CPU pointer to GS Constant Buffer
	CheckHR(gscbuf12->Map(0, nullptr, &gscbuf12data));

	// used when drawing clear quads
	D3D::CompileGeometryShader(clear_shader_code, &ClearGeometryShaderBlob);

	// used for buffer copy
	D3D::CompileGeometryShader(copy_shader_code, &CopyGeometryShaderBlob);

	Clear();

	if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
		File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

	// Intentionally share the same cache as DX11, as the shaders are identical. Reduces recompilation when switching APIs.
	std::string cache_filename = StringFromFormat("%sdx11-%s-gs.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
			SConfig::GetInstance().m_strUniqueID.c_str());
	GeometryShaderCacheInserter inserter;
	g_gs_disk_cache.OpenAndRead(cache_filename, inserter);

	if (g_Config.bEnableShaderDebugging)
		Clear();

	last_entry = nullptr;
	last_uid = {};
}
Exemple #16
0
void VertexShaderCache::UberVertexShaderCompilerWorkItem::Retrieve()
{
  if (InsertShader(m_uid, m_vs, m_bytecode))
    g_uber_vs_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
}
Exemple #17
0
namespace DX12
{
// Primitive topology type is always triangle, unless the GS stage is used. This is consumed
// by the PSO created in Renderer::ApplyState.
static D3D12_PRIMITIVE_TOPOLOGY_TYPE s_current_primitive_topology =
    D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;

using GsBytecodeCache = std::map<GeometryShaderUid, D3D12_SHADER_BYTECODE>;
using PsBytecodeCache = std::map<PixelShaderUid, D3D12_SHADER_BYTECODE>;
using VsBytecodeCache = std::map<VertexShaderUid, D3D12_SHADER_BYTECODE>;
GsBytecodeCache s_gs_bytecode_cache;
PsBytecodeCache s_ps_bytecode_cache;
VsBytecodeCache s_vs_bytecode_cache;

// Used to keep track of blobs to release at Shutdown time.
static std::vector<ID3DBlob*> s_shader_blob_list;

static LinearDiskCache<GeometryShaderUid, u8> s_gs_disk_cache;
static LinearDiskCache<PixelShaderUid, u8> s_ps_disk_cache;
static LinearDiskCache<VertexShaderUid, u8> s_vs_disk_cache;

static D3D12_SHADER_BYTECODE s_last_geometry_shader_bytecode;
static D3D12_SHADER_BYTECODE s_last_pixel_shader_bytecode;
static D3D12_SHADER_BYTECODE s_last_vertex_shader_bytecode;
static GeometryShaderUid s_last_geometry_shader_uid;
static PixelShaderUid s_last_pixel_shader_uid;
static VertexShaderUid s_last_vertex_shader_uid;

template <class UidType, class ShaderCacheType, ShaderCacheType* cache>
class ShaderCacheInserter final : public LinearDiskCacheReader<UidType, u8>
{
public:
  void Read(const UidType& key, const u8* value, u32 value_size)
  {
    ID3DBlob* blob = nullptr;
    CheckHR(d3d_create_blob(value_size, &blob));
    memcpy(blob->GetBufferPointer(), value, value_size);

    ShaderCache::InsertByteCode<UidType, ShaderCacheType>(key, cache, blob);
  }
};

void ShaderCache::Init()
{
  // This class intentionally shares its shader cache files with DX11, as the shaders are (right
  // now) identical.
  // Reduces unnecessary compilation when switching between APIs.

  s_last_geometry_shader_bytecode = {};
  s_last_pixel_shader_bytecode = {};
  s_last_vertex_shader_bytecode = {};
  s_last_geometry_shader_uid = {};
  s_last_pixel_shader_uid = {};
  s_last_vertex_shader_uid = {};

  // Ensure shader cache directory exists..
  std::string shader_cache_path = File::GetUserPath(D_SHADERCACHE_IDX);

  if (!File::Exists(shader_cache_path))
    File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

  std::string title_game_id = SConfig::GetInstance().m_strGameID.c_str();

  std::string gs_cache_filename =
      StringFromFormat("%sdx11-%s-gs.cache", shader_cache_path.c_str(), title_game_id.c_str());
  std::string ps_cache_filename =
      StringFromFormat("%sdx11-%s-ps.cache", shader_cache_path.c_str(), title_game_id.c_str());
  std::string vs_cache_filename =
      StringFromFormat("%sdx11-%s-vs.cache", shader_cache_path.c_str(), title_game_id.c_str());

  ShaderCacheInserter<GeometryShaderUid, GsBytecodeCache, &s_gs_bytecode_cache> gs_inserter;
  s_gs_disk_cache.OpenAndRead(gs_cache_filename, gs_inserter);

  ShaderCacheInserter<PixelShaderUid, PsBytecodeCache, &s_ps_bytecode_cache> ps_inserter;
  s_ps_disk_cache.OpenAndRead(ps_cache_filename, ps_inserter);

  ShaderCacheInserter<VertexShaderUid, VsBytecodeCache, &s_vs_bytecode_cache> vs_inserter;
  s_vs_disk_cache.OpenAndRead(vs_cache_filename, vs_inserter);

  SETSTAT(stats.numPixelShadersAlive, static_cast<int>(s_ps_bytecode_cache.size()));
  SETSTAT(stats.numPixelShadersCreated, static_cast<int>(s_ps_bytecode_cache.size()));
  SETSTAT(stats.numVertexShadersAlive, static_cast<int>(s_vs_bytecode_cache.size()));
  SETSTAT(stats.numVertexShadersCreated, static_cast<int>(s_vs_bytecode_cache.size()));
}

void ShaderCache::Clear()
{
  for (auto& iter : s_shader_blob_list)
    SAFE_RELEASE(iter);

  s_shader_blob_list.clear();

  s_gs_bytecode_cache.clear();
  s_ps_bytecode_cache.clear();
  s_vs_bytecode_cache.clear();

  s_last_geometry_shader_bytecode = {};
  s_last_geometry_shader_uid = {};

  s_last_pixel_shader_bytecode = {};
  s_last_pixel_shader_uid = {};

  s_last_vertex_shader_bytecode = {};
  s_last_vertex_shader_uid = {};
}

void ShaderCache::Shutdown()
{
  Clear();

  s_gs_disk_cache.Sync();
  s_gs_disk_cache.Close();
  s_ps_disk_cache.Sync();
  s_ps_disk_cache.Close();
  s_vs_disk_cache.Sync();
  s_vs_disk_cache.Close();
}

void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 gs_primitive_type)
{
  SetCurrentPrimitiveTopology(gs_primitive_type);

  GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type);
  PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode);
  VertexShaderUid vs_uid = GetVertexShaderUid();

  bool gs_changed = gs_uid != s_last_geometry_shader_uid;
  bool ps_changed = ps_uid != s_last_pixel_shader_uid;
  bool vs_changed = vs_uid != s_last_vertex_shader_uid;

  if (!gs_changed && !ps_changed && !vs_changed)
  {
    return;
  }

  if (gs_changed)
  {
    HandleGSUIDChange(gs_uid, gs_primitive_type);
  }

  if (ps_changed)
  {
    HandlePSUIDChange(ps_uid, ps_dst_alpha_mode);
  }

  if (vs_changed)
  {
    HandleVSUIDChange(vs_uid);
  }

  // A Uid has changed, so the PSO will need to be reset at next ApplyState.
  D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true);
}

void ShaderCache::SetCurrentPrimitiveTopology(u32 gs_primitive_type)
{
  switch (gs_primitive_type)
  {
  case PRIMITIVE_TRIANGLES:
    s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
    break;
  case PRIMITIVE_LINES:
    s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
    break;
  case PRIMITIVE_POINTS:
    s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
    break;
  default:
    CHECK(0, "Invalid primitive type.");
    break;
  }
}

void ShaderCache::HandleGSUIDChange(GeometryShaderUid gs_uid, u32 gs_primitive_type)
{
  s_last_geometry_shader_uid = gs_uid;

  if (gs_uid.GetUidData()->IsPassthrough())
  {
    s_last_geometry_shader_bytecode = {};
    return;
  }

  auto gs_iterator = s_gs_bytecode_cache.find(gs_uid);
  if (gs_iterator != s_gs_bytecode_cache.end())
  {
    s_last_geometry_shader_bytecode = gs_iterator->second;
  }
  else
  {
    ShaderCode gs_code = GenerateGeometryShaderCode(APIType::D3D, gs_uid.GetUidData());
    ID3DBlob* gs_bytecode = nullptr;

    if (!D3D::CompileGeometryShader(gs_code.GetBuffer(), &gs_bytecode))
    {
      GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
      return;
    }

    s_last_geometry_shader_bytecode = InsertByteCode(gs_uid, &s_gs_bytecode_cache, gs_bytecode);
    s_gs_disk_cache.Append(gs_uid, reinterpret_cast<u8*>(gs_bytecode->GetBufferPointer()),
                           static_cast<u32>(gs_bytecode->GetBufferSize()));
  }
}

void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_alpha_mode)
{
  s_last_pixel_shader_uid = ps_uid;

  auto ps_iterator = s_ps_bytecode_cache.find(ps_uid);
  if (ps_iterator != s_ps_bytecode_cache.end())
  {
    s_last_pixel_shader_bytecode = ps_iterator->second;
    GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
  }
  else
  {
    ShaderCode ps_code = GeneratePixelShaderCode(APIType::D3D, ps_uid.GetUidData());
    ID3DBlob* ps_bytecode = nullptr;

    if (!D3D::CompilePixelShader(ps_code.GetBuffer(), &ps_bytecode))
    {
      GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
      return;
    }

    s_last_pixel_shader_bytecode = InsertByteCode(ps_uid, &s_ps_bytecode_cache, ps_bytecode);
    s_ps_disk_cache.Append(ps_uid, reinterpret_cast<u8*>(ps_bytecode->GetBufferPointer()),
                           static_cast<u32>(ps_bytecode->GetBufferSize()));

    SETSTAT(stats.numPixelShadersAlive, static_cast<int>(s_ps_bytecode_cache.size()));
    INCSTAT(stats.numPixelShadersCreated);
  }
}

void ShaderCache::HandleVSUIDChange(VertexShaderUid vs_uid)
{
  s_last_vertex_shader_uid = vs_uid;

  auto vs_iterator = s_vs_bytecode_cache.find(vs_uid);
  if (vs_iterator != s_vs_bytecode_cache.end())
  {
    s_last_vertex_shader_bytecode = vs_iterator->second;
    GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
  }
  else
  {
    ShaderCode vs_code = GenerateVertexShaderCode(APIType::D3D, vs_uid.GetUidData());
    ID3DBlob* vs_bytecode = nullptr;

    if (!D3D::CompileVertexShader(vs_code.GetBuffer(), &vs_bytecode))
    {
      GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
      return;
    }

    s_last_vertex_shader_bytecode = InsertByteCode(vs_uid, &s_vs_bytecode_cache, vs_bytecode);
    s_vs_disk_cache.Append(vs_uid, reinterpret_cast<u8*>(vs_bytecode->GetBufferPointer()),
                           static_cast<u32>(vs_bytecode->GetBufferSize()));

    SETSTAT(stats.numVertexShadersAlive, static_cast<int>(s_vs_bytecode_cache.size()));
    INCSTAT(stats.numVertexShadersCreated);
  }
}

template <class UidType, class ShaderCacheType>
D3D12_SHADER_BYTECODE ShaderCache::InsertByteCode(const UidType& uid, ShaderCacheType* shader_cache,
                                                  ID3DBlob* bytecode_blob)
{
  // Note: Don't release the incoming bytecode, we need it to stick around, since in D3D12
  // the raw bytecode itself is bound. It is released at Shutdown() time.

  s_shader_blob_list.push_back(bytecode_blob);

  D3D12_SHADER_BYTECODE shader_bytecode;
  shader_bytecode.pShaderBytecode = bytecode_blob->GetBufferPointer();
  shader_bytecode.BytecodeLength = bytecode_blob->GetBufferSize();

  (*shader_cache)[uid] = shader_bytecode;

  return shader_bytecode;
}

D3D12_PRIMITIVE_TOPOLOGY_TYPE ShaderCache::GetCurrentPrimitiveTopology()
{
  return s_current_primitive_topology;
}

D3D12_SHADER_BYTECODE ShaderCache::GetActiveGeometryShaderBytecode()
{
  return s_last_geometry_shader_bytecode;
}
D3D12_SHADER_BYTECODE ShaderCache::GetActivePixelShaderBytecode()
{
  return s_last_pixel_shader_bytecode;
}
D3D12_SHADER_BYTECODE ShaderCache::GetActiveVertexShaderBytecode()
{
  return s_last_vertex_shader_bytecode;
}

const GeometryShaderUid* ShaderCache::GetActiveGeometryShaderUid()
{
  return &s_last_geometry_shader_uid;
}
const PixelShaderUid* ShaderCache::GetActivePixelShaderUid()
{
  return &s_last_pixel_shader_uid;
}
const VertexShaderUid* ShaderCache::GetActiveVertexShaderUid()
{
  return &s_last_vertex_shader_uid;
}

D3D12_SHADER_BYTECODE ShaderCache::GetGeometryShaderFromUid(const GeometryShaderUid* uid)
{
  auto bytecode = s_gs_bytecode_cache.find(*uid);
  if (bytecode != s_gs_bytecode_cache.end())
    return bytecode->second;

  return D3D12_SHADER_BYTECODE();
}

D3D12_SHADER_BYTECODE ShaderCache::GetPixelShaderFromUid(const PixelShaderUid* uid)
{
  auto bytecode = s_ps_bytecode_cache.find(*uid);
  if (bytecode != s_ps_bytecode_cache.end())
    return bytecode->second;

  return D3D12_SHADER_BYTECODE();
}

D3D12_SHADER_BYTECODE ShaderCache::GetVertexShaderFromUid(const VertexShaderUid* uid)
{
  auto bytecode = s_vs_bytecode_cache.find(*uid);
  if (bytecode != s_vs_bytecode_cache.end())
    return bytecode->second;

  return D3D12_SHADER_BYTECODE();
}
}
void VertexShaderCache::Init()
{
	char* vProg = new char[2048];
	sprintf(vProg,"struct VSOUTPUT\n"
						"{\n"
							"float4 vPosition : POSITION;\n"
							"float2 vTexCoord : TEXCOORD0;\n"
							"float vTexCoord1 : TEXCOORD1;\n"
						"};\n"
						"VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float inTEX2 : TEXCOORD2)\n"
						"{\n"
							"VSOUTPUT OUT;\n"
							"OUT.vPosition = inPosition;\n"
							"OUT.vTexCoord = inTEX0;\n"
							"OUT.vTexCoord1 = inTEX2;\n"
							"return OUT;\n"
						"}\n");

	SimpleVertexShader[0] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));

	sprintf(vProg,"struct VSOUTPUT\n"
						"{\n"
							"float4 vPosition   : POSITION;\n"
							"float4 vColor0   : COLOR0;\n"
						"};\n"
						"VSOUTPUT main(float4 inPosition : POSITION,float4 inColor0: COLOR0)\n"
						"{\n"
							"VSOUTPUT OUT;\n"
							"OUT.vPosition = inPosition;\n"
							"OUT.vColor0 = inColor0;\n"
							"return OUT;\n"
						"}\n");

	ClearVertexShader = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));
	sprintf(vProg,	"struct VSOUTPUT\n"
						"{\n"
							"float4 vPosition   : POSITION;\n"
							"float2 vTexCoord   : TEXCOORD0;\n"
							"float vTexCoord1   : TEXCOORD1;\n"
						"};\n"
						"VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inInvTexSize : TEXCOORD1,float inTEX2 : TEXCOORD2)\n"
						"{\n"
							"VSOUTPUT OUT;"
							"OUT.vPosition = inPosition;\n"
							// HACK: Scale the texture coordinate range from (0,width) to (0,width-1), otherwise the linear filter won't average our samples correctly
							"OUT.vTexCoord  = inTEX0 * (float2(1.f,1.f) / inInvTexSize - float2(1.f,1.f)) * inInvTexSize;\n"
							"OUT.vTexCoord1 = inTEX2;\n"
							"return OUT;\n"
						"}\n");
	SimpleVertexShader[1] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));

	sprintf(vProg,	"struct VSOUTPUT\n"
						"{\n"
							"float4 vPosition   : POSITION;\n"
							"float4 vTexCoord   : TEXCOORD0;\n"
							"float  vTexCoord1   : TEXCOORD1;\n"
							"float4 vTexCoord2   : TEXCOORD2;\n"   
							"float4 vTexCoord3   : TEXCOORD3;\n"
						"};\n"
						"VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float inTEX2 : TEXCOORD2)\n"
						"{\n"
							"VSOUTPUT OUT;"
							"OUT.vPosition = inPosition;\n"
							"OUT.vTexCoord  = inTEX0.xyyx;\n"
							"OUT.vTexCoord1 = inTEX2.x;\n"
							"OUT.vTexCoord2 = inTEX0.xyyx + (float4(-1.0f,-0.5f, 1.0f,-0.5f) * inTEX1.xyyx);\n"
							"OUT.vTexCoord3 = inTEX0.xyyx + (float4( 1.0f, 0.5f,-1.0f, 0.5f) * inTEX1.xyyx);\n"	
							"return OUT;\n"
						"}\n");
	SimpleVertexShader[2] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));	
	
	Clear();
	delete [] vProg;

	if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
		File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX).c_str());

	SETSTAT(stats.numVertexShadersCreated, 0);
	SETSTAT(stats.numVertexShadersAlive, 0);

	char cache_filename[MAX_PATH];
	sprintf(cache_filename, "%sdx9-%s-vs.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
			SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
	VertexShaderCacheInserter inserter;
	g_vs_disk_cache.OpenAndRead(cache_filename, inserter);
}
namespace DX11
{

GeometryShaderCache::GSCache GeometryShaderCache::GeometryShaders;
const GeometryShaderCache::GSCacheEntry* GeometryShaderCache::last_entry;
GeometryShaderUid GeometryShaderCache::last_uid;
UidChecker<GeometryShaderUid,ShaderCode> GeometryShaderCache::geometry_uid_checker;
const GeometryShaderCache::GSCacheEntry GeometryShaderCache::pass_entry;

ID3D11GeometryShader* ClearGeometryShader = nullptr;
ID3D11GeometryShader* CopyGeometryShader = nullptr;

LinearDiskCache<GeometryShaderUid, u8> g_gs_disk_cache;

ID3D11GeometryShader* GeometryShaderCache::GetClearGeometryShader() { return (g_ActiveConfig.iStereoMode > 0) ? ClearGeometryShader: nullptr; }
ID3D11GeometryShader* GeometryShaderCache::GetCopyGeometryShader() { return (g_ActiveConfig.iStereoMode > 0) ? CopyGeometryShader : nullptr; }

ID3D11Buffer* gscbuf = nullptr;

ID3D11Buffer* &GeometryShaderCache::GetConstantBuffer()
{
	// TODO: divide the global variables of the generated shaders into about 5 constant buffers to speed this up
	if (GeometryShaderManager::dirty)
	{
		D3D11_MAPPED_SUBRESOURCE map;
		D3D::context->Map(gscbuf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
		memcpy(map.pData, &GeometryShaderManager::constants, sizeof(GeometryShaderConstants));
		D3D::context->Unmap(gscbuf, 0);
		GeometryShaderManager::dirty = false;

		ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(GeometryShaderConstants));
	}
	return gscbuf;
}

// this class will load the precompiled shaders into our cache
class GeometryShaderCacheInserter : public LinearDiskCacheReader<GeometryShaderUid, u8>
{
public:
	void Read(const GeometryShaderUid &key, const u8* value, u32 value_size)
	{
		GeometryShaderCache::InsertByteCode(key, value, value_size);
	}
};

const char clear_shader_code[] = {
	"struct VSOUTPUT\n"
	"{\n"
	"	float4 vPosition   : POSITION;\n"
	"	float4 vColor0   : COLOR0;\n"
	"};\n"
	"struct GSOUTPUT\n"
	"{\n"
	"	float4 vPosition   : POSITION;\n"
	"	float4 vColor0   : COLOR0;\n"
	"	uint slice    : SV_RenderTargetArrayIndex;\n"
	"};\n"
	"[maxvertexcount(6)]\n"
	"void main(triangle VSOUTPUT o[3], inout TriangleStream<GSOUTPUT> Output)\n"
	"{\n"
	"for(int slice = 0; slice < 2; slice++)\n"
	"{\n"
	"	for(int i = 0; i < 3; i++)\n"
	"	{\n"
	"		GSOUTPUT OUT;\n"
	"		OUT.vPosition = o[i].vPosition;\n"
	"		OUT.vColor0 = o[i].vColor0;\n"
	"		OUT.slice = slice;\n"
	"		Output.Append(OUT);\n"
	"	}\n"
	"	Output.RestartStrip();\n"
	"}\n"
	"}\n"
};

const char copy_shader_code[] = {
	"struct VSOUTPUT\n"
	"{\n"
	"	float4 vPosition : POSITION;\n"
	"	float3 vTexCoord : TEXCOORD0;\n"
	"	float  vTexCoord1 : TEXCOORD1;\n"
	"};\n"
	"struct GSOUTPUT\n"
	"{\n"
	"	float4 vPosition : POSITION;\n"
	"	float3 vTexCoord : TEXCOORD0;\n"
	"	float  vTexCoord1 : TEXCOORD1;\n"
	"	uint slice    : SV_RenderTargetArrayIndex;\n"
	"};\n"
	"[maxvertexcount(6)]\n"
	"void main(triangle VSOUTPUT o[3], inout TriangleStream<GSOUTPUT> Output)\n"
	"{\n"
	"for(int slice = 0; slice < 2; slice++)\n"
	"{\n"
	"	for(int i = 0; i < 3; i++)\n"
	"	{\n"
	"		GSOUTPUT OUT;\n"
	"		OUT.vPosition = o[i].vPosition;\n"
	"		OUT.vTexCoord = o[i].vTexCoord;\n"
	"		OUT.vTexCoord.z = slice;\n"
	"		OUT.vTexCoord1 = o[i].vTexCoord1;\n"
	"		OUT.slice = slice;\n"
	"		Output.Append(OUT);\n"
	"	}\n"
	"	Output.RestartStrip();\n"
	"}\n"
	"}\n"
};

void GeometryShaderCache::Init()
{
	unsigned int gbsize = ROUND_UP(sizeof(GeometryShaderConstants), 16); // must be a multiple of 16
	D3D11_BUFFER_DESC gbdesc = CD3D11_BUFFER_DESC(gbsize, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE);
	HRESULT hr = D3D::device->CreateBuffer(&gbdesc, nullptr, &gscbuf);
	CHECK(hr == S_OK, "Create geometry shader constant buffer (size=%u)", gbsize);
	D3D::SetDebugObjectName((ID3D11DeviceChild*)gscbuf, "geometry shader constant buffer used to emulate the GX pipeline");

	// used when drawing clear quads
	ClearGeometryShader = D3D::CompileAndCreateGeometryShader(clear_shader_code);
	CHECK(ClearGeometryShader != nullptr, "Create clear geometry shader");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)ClearGeometryShader, "clear geometry shader");

	// used for buffer copy
	CopyGeometryShader = D3D::CompileAndCreateGeometryShader(copy_shader_code);
	CHECK(CopyGeometryShader != nullptr, "Create copy geometry shader");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)CopyGeometryShader, "copy geometry shader");

	Clear();

	if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
		File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

	std::string cache_filename = StringFromFormat("%sdx11-%s-gs.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
			SConfig::GetInstance().m_strUniqueID.c_str());
	GeometryShaderCacheInserter inserter;
	g_gs_disk_cache.OpenAndRead(cache_filename, inserter);

	if (g_Config.bEnableShaderDebugging)
		Clear();

	last_entry = nullptr;
}

// ONLY to be used during shutdown.
void GeometryShaderCache::Clear()
{
	for (auto& iter : GeometryShaders)
		iter.second.Destroy();
	GeometryShaders.clear();
	geometry_uid_checker.Invalidate();

	last_entry = nullptr;
}

void GeometryShaderCache::Shutdown()
{
	SAFE_RELEASE(gscbuf);

	SAFE_RELEASE(ClearGeometryShader);
	SAFE_RELEASE(CopyGeometryShader);

	Clear();
	g_gs_disk_cache.Sync();
	g_gs_disk_cache.Close();
}

bool GeometryShaderCache::SetShader(u32 primitive_type)
{
	GeometryShaderUid uid = GetGeometryShaderUid(primitive_type, API_D3D);
	if (g_ActiveConfig.bEnableShaderDebugging)
	{
		ShaderCode code = GenerateGeometryShaderCode(primitive_type, API_D3D);
		geometry_uid_checker.AddToIndexAndCheck(code, uid, "Geometry", "g");
	}

	// Check if the shader is already set
	if (last_entry)
	{
		if (uid == last_uid)
		{
			GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
			return true;
		}
	}

	last_uid = uid;

	// Check if the shader is a pass-through shader
	if (uid.GetUidData()->IsPassthrough())
	{
		// Return the default pass-through shader
		last_entry = &pass_entry;
		return true;
	}

	// Check if the shader is already in the cache
	GSCache::iterator iter;
	iter = GeometryShaders.find(uid);
	if (iter != GeometryShaders.end())
	{
		const GSCacheEntry &entry = iter->second;
		last_entry = &entry;

		return (entry.shader != nullptr);
	}

	// Need to compile a new shader
	ShaderCode code = GenerateGeometryShaderCode(primitive_type, API_D3D);

	D3DBlob* pbytecode;
	if (!D3D::CompileGeometryShader(code.GetBuffer(), &pbytecode))
	{
		GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
		return false;
	}

	// Insert the bytecode into the caches
	g_gs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());

	bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
	pbytecode->Release();

	if (g_ActiveConfig.bEnableShaderDebugging && success)
	{
		GeometryShaders[uid].code = code.GetBuffer();
	}

	return success;
}

bool GeometryShaderCache::InsertByteCode(const GeometryShaderUid &uid, const void* bytecode, unsigned int bytecodelen)
{
	ID3D11GeometryShader* shader = D3D::CreateGeometryShaderFromByteCode(bytecode, bytecodelen);
	if (shader == nullptr)
		return false;

	// TODO: Somehow make the debug name a bit more specific
	D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a pixel shader of GeometryShaderCache");

	// Make an entry in the table
	GSCacheEntry newentry;
	newentry.shader = shader;
	GeometryShaders[uid] = newentry;
	last_entry = &GeometryShaders[uid];

	if (!shader)
		return false;

	return true;
}

}  // DX11
Exemple #20
0
void ShaderCache::HandleTSUIDChange(
	const TessellationShaderUid& ts_uid,
	bool on_gpu_thread)
{
	s_shaders_lock.lock();
	std::pair<ByteCodeCacheEntry, ByteCodeCacheEntry>& entry = ts_bytecode_cache->GetOrAdd(ts_uid);
	s_shaders_lock.unlock();
	ByteCodeCacheEntry* dentry = &entry.first;
	ByteCodeCacheEntry* hentry = &entry.second;
	if (on_gpu_thread)
	{
		if (dentry->m_compiled && hentry->m_compiled)
		{
			s_last_domain_shader_bytecode = dentry;
			s_last_hull_shader_bytecode = hentry;
		}
		else
		{
			s_last_tessellation_shader_uid = {};
			s_last_domain_shader_bytecode = &s_pass_entry;
			s_last_hull_shader_bytecode = &s_pass_entry;
		}
	}
	if (dentry->m_initialized.test_and_set())
	{
		return;
	}
	hentry->m_initialized.test_and_set();

	// Need to compile a new shader
	ShaderCode code;
	ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(TESSELLATIONSHADERGEN_BUFFERSIZE);
	ShaderCompilerWorkUnit *wunitd = s_compiler->NewUnit(TESSELLATIONSHADERGEN_BUFFERSIZE);
	code.SetBuffer(wunit->code.data());
	GenerateTessellationShaderCode(code, API_D3D11, ts_uid.GetUidData());
	memcpy(wunitd->code.data(), wunit->code.data(), code.BufferSize());

	wunit->codesize = (u32)code.BufferSize();
	wunit->entrypoint = "HS_TFO";
	wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_SKIP_OPTIMIZATION;
	wunit->target = D3D::HullShaderVersionString();

	wunitd->codesize = (u32)code.BufferSize();
	wunitd->entrypoint = "DS_TFO";
	wunitd->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3;
	wunitd->target = D3D::DomainShaderVersionString();

	wunitd->ResultHandler = [ts_uid, dentry](ShaderCompilerWorkUnit* wunit)
	{
		if (SUCCEEDED(wunit->cresult))
		{
			D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode);
			s_ds_disk_cache.Append(ts_uid, shaderBuffer->Data(), shaderBuffer->Size());
			PushByteCode(dentry, shaderBuffer);
			wunit->shaderbytecode->Release();
			wunit->shaderbytecode = nullptr;
		}
		else
		{
			static int num_failures = 0;
			std::string filename = StringFromFormat("%sbad_ds_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
			std::ofstream file;
			OpenFStream(file, filename, std::ios_base::out);
			file << ((const char *)wunit->code.data());
			file << ((const char *)wunit->error->GetBufferPointer());
			file.close();

			PanicAlert("Failed to compile domain shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s",
				filename,
				D3D::DomainShaderVersionString(),
				(char*)wunit->error->GetBufferPointer());
		}
	};

	wunit->ResultHandler = [ts_uid, hentry](ShaderCompilerWorkUnit* wunit)
	{
		if (SUCCEEDED(wunit->cresult))
		{
			D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode);
			s_hs_disk_cache.Append(ts_uid, shaderBuffer->Data(), shaderBuffer->Size());
			PushByteCode(hentry, shaderBuffer);
			wunit->shaderbytecode->Release();
			wunit->shaderbytecode = nullptr;
		}
		else
		{
			static int num_failures = 0;
			std::string filename = StringFromFormat("%sbad_hs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
			std::ofstream file;
			OpenFStream(file, filename, std::ios_base::out);
			file << ((const char *)wunit->code.data());
			file << ((const char *)wunit->error->GetBufferPointer());
			file.close();

			PanicAlert("Failed to compile hull shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s",
				filename,
				D3D::HullShaderVersionString(),
				(char*)wunit->error->GetBufferPointer());
		}
	};

	s_compiler->CompileShaderAsync(wunit);
	s_compiler->CompileShaderAsync(wunitd);
}
namespace DX9
{

VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;

#define MAX_SSAA_SHADERS 3

static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader[MAX_SSAA_SHADERS];
static LPDIRECT3DVERTEXSHADER9 ClearVertexShader;

LinearDiskCache<VERTEXSHADERUID, u8> g_vs_disk_cache;

LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader(int level)
{
	return SimpleVertexShader[level % MAX_SSAA_SHADERS];
}

LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetClearVertexShader()
{
	return ClearVertexShader;
}

// this class will load the precompiled shaders into our cache
class VertexShaderCacheInserter : public LinearDiskCacheReader<VERTEXSHADERUID, u8>
{
public:
	void Read(const VERTEXSHADERUID &key, const u8 *value, u32 value_size)
	{
		VertexShaderCache::InsertByteCode(key, value, value_size, false);
	}
};

void VertexShaderCache::Init()
{
	char* vProg = new char[2048];
	sprintf(vProg,"struct VSOUTPUT\n"
						"{\n"
							"float4 vPosition : POSITION;\n"
							"float2 vTexCoord : TEXCOORD0;\n"
							"float vTexCoord1 : TEXCOORD1;\n"
						"};\n"
						"VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float inTEX2 : TEXCOORD2)\n"
						"{\n"
							"VSOUTPUT OUT;\n"
							"OUT.vPosition = inPosition;\n"
							"OUT.vTexCoord = inTEX0;\n"
							"OUT.vTexCoord1 = inTEX2;\n"
							"return OUT;\n"
						"}\n");

	SimpleVertexShader[0] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));

	sprintf(vProg,"struct VSOUTPUT\n"
						"{\n"
							"float4 vPosition   : POSITION;\n"
							"float4 vColor0   : COLOR0;\n"
						"};\n"
						"VSOUTPUT main(float4 inPosition : POSITION,float4 inColor0: COLOR0)\n"
						"{\n"
							"VSOUTPUT OUT;\n"
							"OUT.vPosition = inPosition;\n"
							"OUT.vColor0 = inColor0;\n"
							"return OUT;\n"
						"}\n");

	ClearVertexShader = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));
	sprintf(vProg,	"struct VSOUTPUT\n"
						"{\n"
							"float4 vPosition   : POSITION;\n"
							"float2 vTexCoord   : TEXCOORD0;\n"
							"float vTexCoord1   : TEXCOORD1;\n"
						"};\n"
						"VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inInvTexSize : TEXCOORD1,float inTEX2 : TEXCOORD2)\n"
						"{\n"
							"VSOUTPUT OUT;"
							"OUT.vPosition = inPosition;\n"
							// HACK: Scale the texture coordinate range from (0,width) to (0,width-1), otherwise the linear filter won't average our samples correctly
							"OUT.vTexCoord  = inTEX0 * (float2(1.f,1.f) / inInvTexSize - float2(1.f,1.f)) * inInvTexSize;\n"
							"OUT.vTexCoord1 = inTEX2;\n"
							"return OUT;\n"
						"}\n");
	SimpleVertexShader[1] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));

	sprintf(vProg,	"struct VSOUTPUT\n"
						"{\n"
							"float4 vPosition   : POSITION;\n"
							"float4 vTexCoord   : TEXCOORD0;\n"
							"float  vTexCoord1   : TEXCOORD1;\n"
							"float4 vTexCoord2   : TEXCOORD2;\n"   
							"float4 vTexCoord3   : TEXCOORD3;\n"
						"};\n"
						"VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float inTEX2 : TEXCOORD2)\n"
						"{\n"
							"VSOUTPUT OUT;"
							"OUT.vPosition = inPosition;\n"
							"OUT.vTexCoord  = inTEX0.xyyx;\n"
							"OUT.vTexCoord1 = inTEX2.x;\n"
							"OUT.vTexCoord2 = inTEX0.xyyx + (float4(-1.0f,-0.5f, 1.0f,-0.5f) * inTEX1.xyyx);\n"
							"OUT.vTexCoord3 = inTEX0.xyyx + (float4( 1.0f, 0.5f,-1.0f, 0.5f) * inTEX1.xyyx);\n"	
							"return OUT;\n"
						"}\n");
	SimpleVertexShader[2] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));	
	
	Clear();
	delete [] vProg;

	if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
		File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX).c_str());

	SETSTAT(stats.numVertexShadersCreated, 0);
	SETSTAT(stats.numVertexShadersAlive, 0);

	char cache_filename[MAX_PATH];
	sprintf(cache_filename, "%sdx9-%s-vs.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
			SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
	VertexShaderCacheInserter inserter;
	g_vs_disk_cache.OpenAndRead(cache_filename, inserter);
}

void VertexShaderCache::Clear()
{
	for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter)
		iter->second.Destroy();
	vshaders.clear();

	memset(&last_vertex_shader_uid, 0xFF, sizeof(last_vertex_shader_uid));
}

void VertexShaderCache::Shutdown()
{
	for (int i = 0; i < MAX_SSAA_SHADERS; i++)
	{
		if (SimpleVertexShader[i])
			SimpleVertexShader[i]->Release();
		SimpleVertexShader[i] = NULL;
	}

	if (ClearVertexShader)
		ClearVertexShader->Release();
	ClearVertexShader = NULL;
	
	Clear();
	g_vs_disk_cache.Sync();
	g_vs_disk_cache.Close();
}

bool VertexShaderCache::SetShader(u32 components)
{
	VERTEXSHADERUID uid;
	GetVertexShaderId(&uid, components);
	if (uid == last_vertex_shader_uid && vshaders[uid].frameCount == frameCount)
	{
		GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
		return (vshaders[uid].shader != NULL);
	}

	memcpy(&last_vertex_shader_uid, &uid, sizeof(VERTEXSHADERUID));

	VSCache::iterator iter = vshaders.find(uid);
	if (iter != vshaders.end())
	{
		iter->second.frameCount = frameCount;
		const VSCacheEntry &entry = iter->second;
		last_entry = &entry;

		if (entry.shader) D3D::SetVertexShader(entry.shader);
		GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
		return (entry.shader != NULL);
	}

	const char *code = GenerateVertexShaderCode(components, API_D3D9);
	u8 *bytecode;
	int bytecodelen;
	if (!D3D::CompileVertexShader(code, (int)strlen(code), &bytecode, &bytecodelen))
	{
		if (g_ActiveConfig.bShowShaderErrors)
		{
			PanicAlert("Failed to compile Vertex Shader:\n\n%s", code);
		}
		GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
		return false;
	}
	g_vs_disk_cache.Append(uid, bytecode, bytecodelen);
	g_vs_disk_cache.Sync();

	bool result = InsertByteCode(uid, bytecode, bytecodelen, true);
	delete [] bytecode;
	GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
	return result;
}

bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) {
	LPDIRECT3DVERTEXSHADER9 shader = D3D::CreateVertexShaderFromByteCode(bytecode, bytecodelen);

	// Make an entry in the table
	VSCacheEntry entry;
	entry.shader = shader;
	entry.frameCount = frameCount;

	vshaders[uid] = entry;
	last_entry = &vshaders[uid];
	if (!shader)
		return false;

	INCSTAT(stats.numVertexShadersCreated);
	SETSTAT(stats.numVertexShadersAlive, (int)vshaders.size());
	if (activate)
	{
		D3D::SetVertexShader(shader);
		return true;
	}
	return false;
}

void Renderer::SetVSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
{
	const float f[4] = { f1, f2, f3, f4 };
	DX9::D3D::dev->SetVertexShaderConstantF(const_number, f, 1);
}

void Renderer::SetVSConstant4fv(unsigned int const_number, const float *f)
{
	DX9::D3D::dev->SetVertexShaderConstantF(const_number, f, 1);
}

void Renderer::SetMultiVSConstant3fv(unsigned int const_number, unsigned int count, const float *f)
{
	float buf[4*C_VENVCONST_END];
	for (unsigned int i = 0; i < count; i++)
	{
		buf[4*i  ] = *f++;
		buf[4*i+1] = *f++;
		buf[4*i+2] = *f++;
		buf[4*i+3] = 0.f;
	}
	DX9::D3D::dev->SetVertexShaderConstantF(const_number, buf, count);
}

void Renderer::SetMultiVSConstant4fv(unsigned int const_number, unsigned int count, const float *f)
{
	DX9::D3D::dev->SetVertexShaderConstantF(const_number, f, count);
}

}  // namespace DX9
Exemple #22
0
void ShaderCache::HandleGSUIDChange(
	const GeometryShaderUid &gs_uid,
	bool on_gpu_thread)
{
	if (gs_uid.GetUidData().IsPassthrough())
	{
		s_last_geometry_shader_bytecode = &s_pass_entry;
		return;
	}

	s_shaders_lock.lock();
	ByteCodeCacheEntry* entry = &gs_bytecode_cache->GetOrAdd(gs_uid);
	s_shaders_lock.unlock();
	if (on_gpu_thread)
	{
		s_last_geometry_shader_bytecode = entry;
	}

	if (entry->m_initialized.test_and_set())
	{
		return;
	}

	// Need to compile a new shader
	ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(GEOMETRYSHADERGEN_BUFFERSIZE);
	wunit->GenerateCodeHandler = [gs_uid](ShaderCompilerWorkUnit* wunit)
	{
		ShaderCode code;
		code.SetBuffer(wunit->code.data());
		GenerateGeometryShaderCode(code, gs_uid.GetUidData(), API_D3D11);
		wunit->codesize = (u32)code.BufferSize();
	};

	wunit->entrypoint = "main";
	wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3;
	wunit->target = D3D::GeometryShaderVersionString();

	wunit->ResultHandler = [gs_uid, entry](ShaderCompilerWorkUnit* wunit)
	{
		if (SUCCEEDED(wunit->cresult))
		{
			D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode);
			s_gs_disk_cache.Append(gs_uid, shaderBuffer->Data(), shaderBuffer->Size());
			PushByteCode(entry, shaderBuffer);
			wunit->shaderbytecode->Release();
			wunit->shaderbytecode = nullptr;
			SETSTAT(stats.numGeometryShadersAlive, static_cast<int>(ps_bytecode_cache->size()));
			INCSTAT(stats.numGeometryShadersCreated);
		}
		else
		{
			static int num_failures = 0;
			std::string filename = StringFromFormat("%sbad_gs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
			std::ofstream file;
			OpenFStream(file, filename, std::ios_base::out);
			file << ((const char *)wunit->code.data());
			file << ((const char *)wunit->error->GetBufferPointer());
			file.close();

			PanicAlert("Failed to compile geometry shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s",
				filename,
				D3D::GeometryShaderVersionString(),
				(char*)wunit->error->GetBufferPointer());
		}
	};
	s_compiler->CompileShaderAsync(wunit);
}
namespace OGL
{

static const u32 UBO_LENGTH = 32*1024*1024;

GLintptr ProgramShaderCache::s_vs_data_size;
GLintptr ProgramShaderCache::s_ps_data_size;
GLintptr ProgramShaderCache::s_vs_data_offset;
u8 *ProgramShaderCache::s_ubo_buffer;
u32 ProgramShaderCache::s_ubo_buffer_size;
bool ProgramShaderCache::s_ubo_dirty;

static StreamBuffer *s_buffer;
static int num_failures = 0;

LinearDiskCache<SHADERUID, u8> g_program_disk_cache;
static GLuint CurrentProgram = 0;
ProgramShaderCache::PCache ProgramShaderCache::pshaders;
ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_entry;
SHADERUID ProgramShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> ProgramShaderCache::pixel_uid_checker;
UidChecker<VertexShaderUid,VertexShaderCode> ProgramShaderCache::vertex_uid_checker;

static char s_glsl_header[1024] = "";

const char *UniformNames[NUM_UNIFORMS] =
{
	// PIXEL SHADER UNIFORMS
	I_COLORS,
	I_KCOLORS,
	I_ALPHA,
	I_TEXDIMS,
	I_ZBIAS ,
	I_INDTEXSCALE ,
	I_INDTEXMTX,
	I_FOG,
	I_PLIGHTS,
	I_PMATERIALS,
	// VERTEX SHADER UNIFORMS
	I_POSNORMALMATRIX,
	I_PROJECTION ,
	I_MATERIALS,
	I_LIGHTS,
	I_TEXMATRICES,
	I_TRANSFORMMATRICES ,
	I_NORMALMATRICES ,
	I_POSTTRANSFORMMATRICES,
	I_DEPTHPARAMS,
};

void SHADER::SetProgramVariables()
{
	// glsl shader must be bind to set samplers
	Bind();
	
	// Bind UBO 
	if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
	{
		GLint PSBlock_id = glGetUniformBlockIndex(glprogid, "PSBlock");
		GLint VSBlock_id = glGetUniformBlockIndex(glprogid, "VSBlock");
		
		if(PSBlock_id != -1)
			glUniformBlockBinding(glprogid, PSBlock_id, 1);
		if(VSBlock_id != -1)
			glUniformBlockBinding(glprogid, VSBlock_id, 2);
	}
	
	// We cache our uniform locations for now
	// Once we move up to a newer version of GLSL, ~1.30
	// We can remove this

	// (Sonicadvance): For some reason this fails on my hardware
	//glGetUniformIndices(glprogid, NUM_UNIFORMS, UniformNames, UniformLocations);
	// Got to do it this crappy way.
	UniformLocations[0] = glGetUniformLocation(glprogid, UniformNames[0]);
	if (!g_ActiveConfig.backend_info.bSupportsGLSLUBO)
		for (int a = 1; a < NUM_UNIFORMS; ++a)
			UniformLocations[a] = glGetUniformLocation(glprogid, UniformNames[a]);

	// Bind Texture Sampler
	for (int a = 0; a <= 9; ++a)
	{
		char name[8];
		snprintf(name, 8, "samp%d", a);
		
		// Still need to get sampler locations since we aren't binding them statically in the shaders
		int loc = glGetUniformLocation(glprogid, name);
		if (loc != -1)
			glUniform1i(loc, a);
	}

}

void SHADER::SetProgramBindings()
{
#ifndef USE_GLES3
	if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
	{
		// So we do support extended blending
		// So we need to set a few more things here.
		// Bind our out locations
		glBindFragDataLocationIndexed(glprogid, 0, 0, "ocol0");
		glBindFragDataLocationIndexed(glprogid, 0, 1, "ocol1");
	}
	else if(g_ogl_config.eSupportedGLSLVersion > GLSL_120)
	{
		glBindFragDataLocation(glprogid, 0, "ocol0");
	}
	else
	{
		// ogl2 shaders don't need to bind output colors.
		// gl_FragColor already point to color channel
	}
#endif
	// Need to set some attribute locations
	glBindAttribLocation(glprogid, SHADER_POSITION_ATTRIB, "rawpos");
	
	glBindAttribLocation(glprogid, SHADER_POSMTX_ATTRIB,   "fposmtx");
	
	glBindAttribLocation(glprogid, SHADER_COLOR0_ATTRIB,   "color0");
	glBindAttribLocation(glprogid, SHADER_COLOR1_ATTRIB,   "color1");
	
	glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB,    "rawnorm0");
	glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB,    "rawnorm1");
	glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB,    "rawnorm2");
	
	for(int i=0; i<8; i++) {
		char attrib_name[8];
		snprintf(attrib_name, 8, "tex%d", i);
		glBindAttribLocation(glprogid, SHADER_TEXTURE0_ATTRIB+i, attrib_name);
	}
}

void SHADER::Bind()
{
	if(CurrentProgram != glprogid)
	{
		glUseProgram(glprogid);
		CurrentProgram = glprogid;
	}
}


void ProgramShaderCache::SetMultiPSConstant4fv(unsigned int offset, const float *f, unsigned int count)
{
	s_ubo_dirty = true;
	memcpy(s_ubo_buffer+(offset*4*sizeof(float)), f, count*4*sizeof(float));
}

void ProgramShaderCache::SetMultiVSConstant4fv(unsigned int offset, const float *f, unsigned int count)
{
	s_ubo_dirty = true;
	memcpy(s_ubo_buffer+(offset*4*sizeof(float))+s_vs_data_offset, f, count*4*sizeof(float));
}

void ProgramShaderCache::UploadConstants()
{
	if(s_ubo_dirty) {
		s_buffer->Alloc(s_ubo_buffer_size);
		size_t offset = s_buffer->Upload(s_ubo_buffer, s_ubo_buffer_size);
		glBindBufferRange(GL_UNIFORM_BUFFER, 1, s_buffer->getBuffer(), offset, s_ps_data_size);
		glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->getBuffer(), offset + s_vs_data_offset, s_vs_data_size);
		s_ubo_dirty = false;
		
		ADDSTAT(stats.thisFrame.bytesUniformStreamed, s_ubo_buffer_size);
	}
}

GLuint ProgramShaderCache::GetCurrentProgram(void)
{
	return CurrentProgram;
}

SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 components )
{
	SHADERUID uid;
	GetShaderId(&uid, dstAlphaMode, components);

	// Check if the shader is already set
	if (last_entry)
	{
		if (uid == last_uid)
		{
			GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
			last_entry->shader.Bind();
			return &last_entry->shader;
		}
	}

	last_uid = uid;

	// Check if shader is already in cache
	PCache::iterator iter = pshaders.find(uid);
	if (iter != pshaders.end())
	{
		PCacheEntry *entry = &iter->second;
		last_entry = entry;

		GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
		last_entry->shader.Bind();
		return &last_entry->shader;
	}

	// Make an entry in the table
	PCacheEntry& newentry = pshaders[uid];
	last_entry = &newentry;
	newentry.in_cache = 0;

	VertexShaderCode vcode;
	PixelShaderCode pcode;
	GenerateVertexShaderCode(vcode, components, API_OPENGL);
	GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);

	if (g_ActiveConfig.bEnableShaderDebugging)
	{
		newentry.shader.strvprog = vcode.GetBuffer();
		newentry.shader.strpprog = pcode.GetBuffer();
	}

#if defined(_DEBUG) || defined(DEBUGFAST)
	if (g_ActiveConfig.iLog & CONF_SAVESHADERS) {
		static int counter = 0;
		char szTemp[MAX_PATH];
		sprintf(szTemp, "%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
		SaveData(szTemp, vcode.GetBuffer());
		sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
		SaveData(szTemp, pcode.GetBuffer());
	}
#endif

	if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer())) {
		GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
		return NULL;
	}

	INCSTAT(stats.numPixelShadersCreated);
	SETSTAT(stats.numPixelShadersAlive, pshaders.size());
	GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
	
	last_entry->shader.Bind();
	return &last_entry->shader;
}

bool ProgramShaderCache::CompileShader ( SHADER& shader, const char* vcode, const char* pcode )
{
	GLuint vsid = CompileSingleShader(GL_VERTEX_SHADER, vcode);
	GLuint psid = CompileSingleShader(GL_FRAGMENT_SHADER, pcode);

	if(!vsid || !psid)
	{
		glDeleteShader(vsid);
		glDeleteShader(psid);
		return false;
	}
	
	GLuint pid = shader.glprogid = glCreateProgram();;
	
	glAttachShader(pid, vsid);
	glAttachShader(pid, psid);

	if (g_ogl_config.bSupportsGLSLCache)
		glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);

	shader.SetProgramBindings();
	
	glLinkProgram(pid);
	
	// original shaders aren't needed any more
	glDeleteShader(vsid);
	glDeleteShader(psid);
	
	GLint linkStatus;
	glGetProgramiv(pid, GL_LINK_STATUS, &linkStatus);
	GLsizei length = 0;
	glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length);
	if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
	{
		GLsizei charsWritten;
		GLchar* infoLog = new GLchar[length];
		glGetProgramInfoLog(pid, length, &charsWritten, infoLog);
		ERROR_LOG(VIDEO, "Program info log:\n%s", infoLog);
		char szTemp[MAX_PATH];
		sprintf(szTemp, "%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
		std::ofstream file;
		OpenFStream(file, szTemp, std::ios_base::out);
		file << infoLog << s_glsl_header << vcode << s_glsl_header << pcode;
		file.close();
		
		PanicAlert("Failed to link shaders!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s, %s, %s):\n%s",
			szTemp,
			g_ogl_config.gl_vendor,
			g_ogl_config.gl_renderer,
			g_ogl_config.gl_version,
			infoLog);
		
		delete [] infoLog;
	}
	if (linkStatus != GL_TRUE)
	{
		// Compile failed
		ERROR_LOG(VIDEO, "Program linking failed; see info log");

		// Don't try to use this shader
		glDeleteProgram(pid);
		return false;
	}

	shader.SetProgramVariables();
	
	return true;
}

GLuint ProgramShaderCache::CompileSingleShader (GLuint type, const char* code )
{
	GLuint result = glCreateShader(type);

	const char *src[] = {s_glsl_header, code};
	
	glShaderSource(result, 2, src, NULL);
	glCompileShader(result);
	GLint compileStatus;
	glGetShaderiv(result, GL_COMPILE_STATUS, &compileStatus);
	GLsizei length = 0;
	glGetShaderiv(result, GL_INFO_LOG_LENGTH, &length);

	if (DriverDetails::HasBug(DriverDetails::BUG_BROKENINFOLOG))
		length = 1024;

	if (compileStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
	{
		GLsizei charsWritten;
		GLchar* infoLog = new GLchar[length];
		glGetShaderInfoLog(result, length, &charsWritten, infoLog);
		ERROR_LOG(VIDEO, "PS Shader info log:\n%s", infoLog);
		char szTemp[MAX_PATH];
		sprintf(szTemp, 
			"%sbad_%s_%04i.txt", 
			File::GetUserPath(D_DUMP_IDX).c_str(), 
			type==GL_VERTEX_SHADER ? "vs" : "ps", 
			num_failures++);
		std::ofstream file;
		OpenFStream(file, szTemp, std::ios_base::out);
		file << infoLog << s_glsl_header << code;
		file.close();
		
		PanicAlert("Failed to compile %s shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s, %s, %s):\n%s",
			type==GL_VERTEX_SHADER ? "vertex" : "pixel",
			szTemp,
			g_ogl_config.gl_vendor,
			g_ogl_config.gl_renderer,
			g_ogl_config.gl_version,
			infoLog);
		
		delete[] infoLog;
	}
	if (compileStatus != GL_TRUE)
	{
		// Compile failed
		ERROR_LOG(VIDEO, "Shader compilation failed; see info log");

		// Don't try to use this shader
		glDeleteShader(result);
		return 0;
	}
	(void)GL_REPORT_ERROR();
	return result;
}

void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{
	GetPixelShaderUid(uid->puid, dstAlphaMode, API_OPENGL, components);
	GetVertexShaderUid(uid->vuid, components, API_OPENGL);

	if (g_ActiveConfig.bEnableShaderDebugging)
	{
		PixelShaderCode pcode;
		GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);
		pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p");

		VertexShaderCode vcode;
		GenerateVertexShaderCode(vcode, components, API_OPENGL);
		vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v");
	}
}

ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram(void)
{
	return *last_entry;
}

void ProgramShaderCache::Init(void)
{
	// We have to get the UBO alignment here because
	// if we generate a buffer that isn't aligned
	// then the UBO will fail.
	if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
	{
		GLint Align;
		glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &Align);

		s_ps_data_size = C_PENVCONST_END * sizeof(float) * 4;
		s_vs_data_size = C_VENVCONST_END * sizeof(float) * 4;
		s_vs_data_offset = ROUND_UP(s_ps_data_size, Align);
		s_ubo_buffer_size = ROUND_UP(s_ps_data_size, Align) + ROUND_UP(s_vs_data_size, Align);

		// We multiply by *4*4 because we need to get down to basic machine units.
		// So multiply by four to get how many floats we have from vec4s
		// Then once more to get bytes
		s_buffer = new StreamBuffer(GL_UNIFORM_BUFFER, UBO_LENGTH);
		
		s_ubo_buffer = new u8[s_ubo_buffer_size];
		memset(s_ubo_buffer, 0, s_ubo_buffer_size);
		s_ubo_dirty = true;
	}

	// Read our shader cache, only if supported
	if (g_ogl_config.bSupportsGLSLCache)
	{
		GLint Supported;
		glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &Supported);
		if(!Supported)
		{
			ERROR_LOG(VIDEO, "GL_ARB_get_program_binary is supported, but no binary format is known. So disable shader cache.");
			g_ogl_config.bSupportsGLSLCache = false;
		}
		else
		{
			if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
				File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX).c_str());
		
			char cache_filename[MAX_PATH];
			sprintf(cache_filename, "%sogl-%s-shaders.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
				SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
			
			ProgramShaderCacheInserter inserter;
			g_program_disk_cache.OpenAndRead(cache_filename, inserter);
		}
		SETSTAT(stats.numPixelShadersAlive, pshaders.size());
	}
	
	CreateHeader();
	
	CurrentProgram = 0;
	last_entry = NULL;
}

void ProgramShaderCache::Shutdown(void)
{
	// store all shaders in cache on disk
	if (g_ogl_config.bSupportsGLSLCache)
	{
		PCache::iterator iter = pshaders.begin();
		for (; iter != pshaders.end(); ++iter)
		{
			if(iter->second.in_cache) continue;
			
			GLint binary_size;
			glGetProgramiv(iter->second.shader.glprogid, GL_PROGRAM_BINARY_LENGTH, &binary_size);
			if(!binary_size) continue;
			
			u8 *data = new u8[binary_size+sizeof(GLenum)];
			u8 *binary = data + sizeof(GLenum);
			GLenum *prog_format = (GLenum*)data;
			glGetProgramBinary(iter->second.shader.glprogid, binary_size, NULL, prog_format, binary);
			
			g_program_disk_cache.Append(iter->first, data, binary_size+sizeof(GLenum));
			delete [] data;
		}

		g_program_disk_cache.Sync();
		g_program_disk_cache.Close();
	}

	glUseProgram(0);
	
	PCache::iterator iter = pshaders.begin();
	for (; iter != pshaders.end(); ++iter)
		iter->second.Destroy();
	pshaders.clear();

	pixel_uid_checker.Invalidate();
	vertex_uid_checker.Invalidate();

	if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
	{
		delete s_buffer;
		s_buffer = 0;
		delete [] s_ubo_buffer;
		s_ubo_buffer = 0;
	}
}

void ProgramShaderCache::CreateHeader ( void )
{
	GLSL_VERSION v = g_ogl_config.eSupportedGLSLVersion;
	snprintf(s_glsl_header, sizeof(s_glsl_header), 
		"#version %s\n"
		"%s\n" // default precision
		"%s\n" // ubo
		
		"\n"// A few required defines and ones that will make our lives a lot easier
		"#define ATTRIN %s\n"
		"#define ATTROUT %s\n"
		"#define VARYIN %s\n"
		"#define VARYOUT %s\n"

		// Silly differences
		"#define float2 vec2\n"
		"#define float3 vec3\n"
		"#define float4 vec4\n"

		// hlsl to glsl function translation
		"#define frac fract\n"
		"#define lerp mix\n"

		// glsl 120 hack
		"%s\n"
		"%s\n"
		"%s\n"
		"%s\n"
		"%s\n"
		"#define COLOROUT(name) %s\n"
		
		// texture2d hack
		"%s\n"
		"%s\n"
		"%s\n"
				
		, v==GLSLES3 ? "300 es" : v==GLSL_120 ? "120" : v==GLSL_130 ? "130" : "140"
		, v==GLSLES3 ? "precision highp float;" : ""
		, g_ActiveConfig.backend_info.bSupportsGLSLUBO && v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : ""
		, v==GLSL_120 ? "attribute" : "in"
		, v==GLSL_120 ? "attribute" : "out"
		, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "in" : v==GLSL_120 ? "varying" : "centroid in"
		, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "out" : v==GLSL_120 ? "varying" : "centroid out"
		, v==GLSL_120 ? "#define texture texture2D" : ""
		, v==GLSL_120 ? "#define round(x) floor((x)+0.5f)" : ""
		, v==GLSL_120 ? "#define out " : ""
		, v==GLSL_120 ? "#define ocol0 gl_FragColor" : ""
		, v==GLSL_120 ? "#define ocol1 gl_FragColor" : "" //TODO: implement dual source blend
		, v==GLSL_120 ? "" : "out vec4 name;"
		, v==GLSL_120 ? "#extension GL_ARB_texture_rectangle : enable" : ""
		, v==GLSL_120 ? "" : "#define texture2DRect(samp, uv)  texelFetch(samp, ivec2(floor(uv)), 0)"
		, v==GLSL_120 ? "" : "#define sampler2DRect sampler2D"
	);
}


void ProgramShaderCache::ProgramShaderCacheInserter::Read ( const SHADERUID& key, const u8* value, u32 value_size )
{
	const u8 *binary = value+sizeof(GLenum);
	GLenum *prog_format = (GLenum*)value;
	GLint binary_size = value_size-sizeof(GLenum);
	
	PCacheEntry entry;
	entry.in_cache = 1;
	entry.shader.glprogid = glCreateProgram();
	glProgramBinary(entry.shader.glprogid, *prog_format, binary, binary_size);

	GLint success;
	glGetProgramiv(entry.shader.glprogid, GL_LINK_STATUS, &success);

	if (success)
	{
		pshaders[key] = entry;
		entry.shader.SetProgramVariables();
	}
	else
		glDeleteProgram(entry.shader.glprogid);
}


} // namespace OGL
Exemple #24
0
namespace DX12
{

// Primitive topology type is always triangle, unless the GS stage is used. This is consumed
// by the PSO created in Renderer::ApplyState.
static D3D12_PRIMITIVE_TOPOLOGY_TYPE s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
struct ByteCodeCacheEntry
{
	D3D12_SHADER_BYTECODE m_shader_bytecode;
	bool m_compiled;
	std::atomic_flag m_initialized;

	ByteCodeCacheEntry() : m_compiled(false), m_shader_bytecode({})
	{
		m_initialized.clear();
	}
	void Release()
	{
		if (m_shader_bytecode.pShaderBytecode)
			delete[] m_shader_bytecode.pShaderBytecode;
		m_shader_bytecode.pShaderBytecode = nullptr;
	}
};

static ByteCodeCacheEntry s_pass_entry;

using GsBytecodeCache = ObjectUsageProfiler<GeometryShaderUid, pKey_t, ByteCodeCacheEntry, GeometryShaderUid::ShaderUidHasher>;
using PsBytecodeCache = ObjectUsageProfiler<PixelShaderUid, pKey_t, ByteCodeCacheEntry, PixelShaderUid::ShaderUidHasher>;
using VsBytecodeCache = ObjectUsageProfiler<VertexShaderUid, pKey_t, ByteCodeCacheEntry, VertexShaderUid::ShaderUidHasher>;
using TsBytecodeCache = ObjectUsageProfiler<TessellationShaderUid, pKey_t, std::pair<ByteCodeCacheEntry, ByteCodeCacheEntry>, TessellationShaderUid::ShaderUidHasher>;

GsBytecodeCache* gs_bytecode_cache;
PsBytecodeCache* ps_bytecode_cache;
VsBytecodeCache* vs_bytecode_cache;
TsBytecodeCache* ts_bytecode_cache;

static std::vector<D3DBlob*> s_shader_blob_list;

static LinearDiskCache<TessellationShaderUid, u8> s_hs_disk_cache;
static LinearDiskCache<TessellationShaderUid, u8> s_ds_disk_cache;
static LinearDiskCache<GeometryShaderUid, u8> s_gs_disk_cache;
static LinearDiskCache<PixelShaderUid, u8> s_ps_disk_cache;
static LinearDiskCache<VertexShaderUid, u8> s_vs_disk_cache;

static ByteCodeCacheEntry* s_last_domain_shader_bytecode;
static ByteCodeCacheEntry* s_last_hull_shader_bytecode;
static ByteCodeCacheEntry* s_last_geometry_shader_bytecode;
static ByteCodeCacheEntry* s_last_pixel_shader_bytecode;
static ByteCodeCacheEntry* s_last_vertex_shader_bytecode;

static GeometryShaderUid s_last_geometry_shader_uid;
static PixelShaderUid s_last_pixel_shader_uid;
static VertexShaderUid s_last_vertex_shader_uid;
static TessellationShaderUid s_last_tessellation_shader_uid;

static GeometryShaderUid s_last_cpu_geometry_shader_uid;
static PixelShaderUid s_last_cpu_pixel_shader_uid;
static VertexShaderUid s_last_cpu_vertex_shader_uid;
static TessellationShaderUid s_last_cpu_tessellation_shader_uid;

static HLSLAsyncCompiler *s_compiler;
static Common::SpinLock<true> s_shaders_lock;

template<typename UidType, typename ShaderCacheType, ShaderCacheType** cache>
class ShaderCacheInserter final : public LinearDiskCacheReader<UidType, u8>
{
public:
	void Read(const UidType &key, const u8* value, u32 value_size)
	{
		D3DBlob* blob = new D3DBlob(value_size, value);
		ShaderCache::InsertByteCode<UidType, ShaderCacheType>(key, *cache, blob);
	}
};

class DShaderCacheInserter final : public LinearDiskCacheReader<TessellationShaderUid, u8>
{
public:
	void Read(const TessellationShaderUid &key, const u8* value, u32 value_size)
	{
		D3DBlob* blob = new D3DBlob(value_size, value);
		ShaderCache::InsertDSByteCode(key, blob);
	}
};

class HShaderCacheInserter final : public LinearDiskCacheReader<TessellationShaderUid, u8>
{
public:
	void Read(const TessellationShaderUid &key, const u8* value, u32 value_size)
	{
		D3DBlob* blob = new D3DBlob(value_size, value);
		ShaderCache::InsertHSByteCode(key, blob);
	}
};

void ShaderCache::Init()
{
	s_compiler = &HLSLAsyncCompiler::getInstance();
	s_shaders_lock.unlock();
	s_pass_entry.m_compiled = true;
	s_pass_entry.m_initialized.test_and_set();
	// This class intentionally shares its shader cache files with DX11, as the shaders are (right now) identical.
	// Reduces unnecessary compilation when switching between APIs.
	s_last_domain_shader_bytecode = &s_pass_entry;
	s_last_hull_shader_bytecode = &s_pass_entry;
	s_last_geometry_shader_bytecode = &s_pass_entry;
	s_last_pixel_shader_bytecode = nullptr;
	s_last_vertex_shader_bytecode = nullptr;

	s_last_geometry_shader_uid = {};
	s_last_pixel_shader_uid = {};
	s_last_vertex_shader_uid = {};
	s_last_tessellation_shader_uid = {};

	s_last_cpu_geometry_shader_uid = {};
	s_last_cpu_pixel_shader_uid = {};
	s_last_cpu_vertex_shader_uid = {};
	s_last_cpu_tessellation_shader_uid = {};

	// Ensure shader cache directory exists..
	std::string shader_cache_path = File::GetUserPath(D_SHADERCACHE_IDX);

	if (!File::Exists(shader_cache_path))
		File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

	std::string title_unique_id = SConfig::GetInstance().m_strGameID;

	std::string ds_cache_filename = StringFromFormat("%sIDX11-%s-ds.cache", shader_cache_path.c_str(), title_unique_id.c_str());
	std::string hs_cache_filename = StringFromFormat("%sIDX11-%s-hs.cache", shader_cache_path.c_str(), title_unique_id.c_str());
	std::string gs_cache_filename = StringFromFormat("%sIDX11-%s-gs.cache", shader_cache_path.c_str(), title_unique_id.c_str());
	std::string ps_cache_filename = StringFromFormat("%sIDX11-%s-ps.cache", shader_cache_path.c_str(), title_unique_id.c_str());
	std::string vs_cache_filename = StringFromFormat("%sIDX11-%s-vs.cache", shader_cache_path.c_str(), title_unique_id.c_str());

	pKey_t gameid = (pKey_t)GetMurmurHash3(reinterpret_cast<const u8*>(SConfig::GetInstance().m_strGameID.data()), (u32)SConfig::GetInstance().m_strGameID.size(), 0);

	vs_bytecode_cache = VsBytecodeCache::Create(
		gameid,
		VERTEXSHADERGEN_UID_VERSION,
		"Ishiiruka.vs",
		StringFromFormat("%s.vs", title_unique_id.c_str())
	);

	ps_bytecode_cache = PsBytecodeCache::Create(
		gameid,
		PIXELSHADERGEN_UID_VERSION,
		"Ishiiruka.ps",
		StringFromFormat("%s.ps", title_unique_id.c_str())
	);

	gs_bytecode_cache = GsBytecodeCache::Create(
		gameid,
		GEOMETRYSHADERGEN_UID_VERSION,
		"Ishiiruka.gs",
		StringFromFormat("%s.gs", title_unique_id.c_str())
	);

	ts_bytecode_cache = TsBytecodeCache::Create(
		gameid,
		TESSELLATIONSHADERGEN_UID_VERSION,
		"Ishiiruka.ts",
		StringFromFormat("%s.ts", title_unique_id.c_str())
	);

	DShaderCacheInserter ds_inserter;
	s_ds_disk_cache.OpenAndRead(ds_cache_filename, ds_inserter);

	HShaderCacheInserter hs_inserter;
	s_hs_disk_cache.OpenAndRead(hs_cache_filename, hs_inserter);

	ShaderCacheInserter<GeometryShaderUid, GsBytecodeCache, &gs_bytecode_cache> gs_inserter;
	s_gs_disk_cache.OpenAndRead(gs_cache_filename, gs_inserter);

	ShaderCacheInserter<PixelShaderUid, PsBytecodeCache, &ps_bytecode_cache> ps_inserter;
	s_ps_disk_cache.OpenAndRead(ps_cache_filename, ps_inserter);

	ShaderCacheInserter<VertexShaderUid, VsBytecodeCache, &vs_bytecode_cache> vs_inserter;
	s_vs_disk_cache.OpenAndRead(vs_cache_filename, vs_inserter);

	// Clear out disk cache when debugging shaders to ensure stale ones don't stick around..
	SETSTAT(stats.numGeometryShadersAlive, static_cast<int>(gs_bytecode_cache->size()));
	SETSTAT(stats.numGeometryShadersCreated, 0);
	SETSTAT(stats.numPixelShadersAlive, static_cast<int>(ps_bytecode_cache->size()));
	SETSTAT(stats.numPixelShadersCreated, 0);
	SETSTAT(stats.numVertexShadersAlive, static_cast<int>(vs_bytecode_cache->size()));
	SETSTAT(stats.numVertexShadersCreated, 0);
	if (g_ActiveConfig.bCompileShaderOnStartup)
	{
		size_t shader_count = 0;
		ps_bytecode_cache->ForEachMostUsedByCategory(gameid,
			[&](const PixelShaderUid& it, size_t total)
		{
			PixelShaderUid item = it;
			item.ClearHASH();
			item.CalculateUIDHash();
			HandlePSUIDChange(item, true);
			shader_count++;
			if ((shader_count & 7) == 0)
			{
				Host_UpdateTitle(StringFromFormat("Compiling Pixel Shaders %i %% (%i/%i)", (shader_count * 100) / total, shader_count, total));
				s_compiler->WaitForFinish();
			}
		},
			[](ByteCodeCacheEntry& entry)
		{
			return !entry.m_shader_bytecode.pShaderBytecode;
		}
		, true);
		shader_count = 0;
		vs_bytecode_cache->ForEachMostUsedByCategory(gameid,
			[&](const VertexShaderUid& it, size_t total)
		{
			VertexShaderUid item = it;
			item.ClearHASH();
			item.CalculateUIDHash();
			HandleVSUIDChange(item, true);
			shader_count++;
			if ((shader_count & 31) == 0)
			{
				Host_UpdateTitle(StringFromFormat("Compiling Vertex Shaders %i %% (%i/%i)", (shader_count * 100) / total, shader_count, total));
				s_compiler->WaitForFinish();
			}
		},
			[](ByteCodeCacheEntry& entry)
		{
			return !entry.m_shader_bytecode.pShaderBytecode;
		}
		, true);
		shader_count = 0;
		gs_bytecode_cache->ForEachMostUsedByCategory(gameid,
			[&](const GeometryShaderUid& it, size_t total)
		{
			GeometryShaderUid item = it;
			item.ClearHASH();
			item.CalculateUIDHash();
			HandleGSUIDChange(item, true);
			shader_count++;
			if ((shader_count & 7) == 0)
			{
				Host_UpdateTitle(StringFromFormat("Compiling Geometry Shaders %i %% (%i/%i)", (shader_count * 100) / total, shader_count, total));
				s_compiler->WaitForFinish();
			}
		},
			[](ByteCodeCacheEntry& entry)
		{
			return !entry.m_shader_bytecode.pShaderBytecode;
		}
		, true);
		shader_count = 0;
		ts_bytecode_cache->ForEachMostUsedByCategory(gameid,
			[&](const TessellationShaderUid& it, size_t total)
		{
			TessellationShaderUid item = it;
			item.ClearHASH();
			item.CalculateUIDHash();
			HandleTSUIDChange(item, true);
			shader_count++;
			if ((shader_count & 31) == 0)
			{
				Host_UpdateTitle(StringFromFormat("Compiling Tessellation Shaders %i %% (%i/%i)", (shader_count * 100) / total, shader_count, total));
				s_compiler->WaitForFinish();
			}
		},
			[](std::pair<ByteCodeCacheEntry, ByteCodeCacheEntry>& entry)
		{
			return !entry.first.m_shader_bytecode.pShaderBytecode;
		}
		, true);
		s_compiler->WaitForFinish();
	}
}

void ShaderCache::Clear()
{

}

void ShaderCache::Shutdown()
{
	if (s_compiler)
	{
		s_compiler->WaitForFinish();
	}

	for (auto& iter : s_shader_blob_list)
		SAFE_RELEASE(iter);

	s_shader_blob_list.clear();

	vs_bytecode_cache->Persist();
	delete vs_bytecode_cache;
	vs_bytecode_cache = nullptr;

	gs_bytecode_cache->Persist();
	delete gs_bytecode_cache;
	gs_bytecode_cache = nullptr;

	ts_bytecode_cache->Persist();
	delete ts_bytecode_cache;
	ts_bytecode_cache = nullptr;

	ps_bytecode_cache->Persist();
	delete ps_bytecode_cache;
	ps_bytecode_cache = nullptr;

	s_ds_disk_cache.Sync();
	s_ds_disk_cache.Close();
	s_hs_disk_cache.Sync();
	s_hs_disk_cache.Close();
	s_gs_disk_cache.Sync();
	s_gs_disk_cache.Close();
	s_ps_disk_cache.Sync();
	s_ps_disk_cache.Close();
	s_vs_disk_cache.Sync();
	s_vs_disk_cache.Close();
}

static void PushByteCode(ByteCodeCacheEntry* entry, D3DBlob* shaderBuffer)
{
	s_shader_blob_list.push_back(shaderBuffer);
	entry->m_shader_bytecode.pShaderBytecode = shaderBuffer->Data();
	entry->m_shader_bytecode.BytecodeLength = shaderBuffer->Size();
	entry->m_compiled = true;
}

void ShaderCache::SetCurrentPrimitiveTopology(u32 gs_primitive_type)
{
	switch (gs_primitive_type)
	{
	case PRIMITIVE_TRIANGLES:
		s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
		break;
	case PRIMITIVE_LINES:
		s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
		break;
	case PRIMITIVE_POINTS:
		s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
		break;
	default:
		CHECK(0, "Invalid primitive type.");
		break;
	}
}

void ShaderCache::HandleGSUIDChange(
	const GeometryShaderUid &gs_uid,
	bool on_gpu_thread)
{
	if (gs_uid.GetUidData().IsPassthrough())
	{
		s_last_geometry_shader_bytecode = &s_pass_entry;
		return;
	}

	s_shaders_lock.lock();
	ByteCodeCacheEntry* entry = &gs_bytecode_cache->GetOrAdd(gs_uid);
	s_shaders_lock.unlock();
	if (on_gpu_thread)
	{
		s_last_geometry_shader_bytecode = entry;
	}

	if (entry->m_initialized.test_and_set())
	{
		return;
	}

	// Need to compile a new shader
	ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(GEOMETRYSHADERGEN_BUFFERSIZE);
	wunit->GenerateCodeHandler = [gs_uid](ShaderCompilerWorkUnit* wunit)
	{
		ShaderCode code;
		code.SetBuffer(wunit->code.data());
		GenerateGeometryShaderCode(code, gs_uid.GetUidData(), API_D3D11);
		wunit->codesize = (u32)code.BufferSize();
	};

	wunit->entrypoint = "main";
	wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3;
	wunit->target = D3D::GeometryShaderVersionString();

	wunit->ResultHandler = [gs_uid, entry](ShaderCompilerWorkUnit* wunit)
	{
		if (SUCCEEDED(wunit->cresult))
		{
			D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode);
			s_gs_disk_cache.Append(gs_uid, shaderBuffer->Data(), shaderBuffer->Size());
			PushByteCode(entry, shaderBuffer);
			wunit->shaderbytecode->Release();
			wunit->shaderbytecode = nullptr;
			SETSTAT(stats.numGeometryShadersAlive, static_cast<int>(ps_bytecode_cache->size()));
			INCSTAT(stats.numGeometryShadersCreated);
		}
		else
		{
			static int num_failures = 0;
			std::string filename = StringFromFormat("%sbad_gs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
			std::ofstream file;
			OpenFStream(file, filename, std::ios_base::out);
			file << ((const char *)wunit->code.data());
			file << ((const char *)wunit->error->GetBufferPointer());
			file.close();

			PanicAlert("Failed to compile geometry shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s",
				filename,
				D3D::GeometryShaderVersionString(),
				(char*)wunit->error->GetBufferPointer());
		}
	};
	s_compiler->CompileShaderAsync(wunit);
}

void ShaderCache::HandlePSUIDChange(
	const PixelShaderUid &ps_uid,
	bool on_gpu_thread)
{
	s_shaders_lock.lock();
	ByteCodeCacheEntry* entry = &ps_bytecode_cache->GetOrAdd(ps_uid);
	s_shaders_lock.unlock();
	if (on_gpu_thread)
	{
		s_last_pixel_shader_bytecode = entry;
	}
	if (entry->m_initialized.test_and_set())
	{
		return;
	}
	// Need to compile a new shader
	ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(PIXELSHADERGEN_BUFFERSIZE);
	wunit->GenerateCodeHandler = [ps_uid](ShaderCompilerWorkUnit* wunit)
	{
		ShaderCode code;
		code.SetBuffer(wunit->code.data());
		GeneratePixelShaderCodeD3D11(code, ps_uid.GetUidData());
		wunit->codesize = (u32)code.BufferSize();
	};

	wunit->entrypoint = "main";
	wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3;
	wunit->target = D3D::PixelShaderVersionString();
	wunit->ResultHandler = [ps_uid, entry](ShaderCompilerWorkUnit* wunit)
	{
		if (SUCCEEDED(wunit->cresult))
		{
			D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode);
			s_ps_disk_cache.Append(ps_uid, shaderBuffer->Data(), shaderBuffer->Size());
			PushByteCode(entry, shaderBuffer);
			wunit->shaderbytecode->Release();
			wunit->shaderbytecode = nullptr;
		}
		else
		{
			static int num_failures = 0;
			std::string filename = StringFromFormat("%sbad_ps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
			std::ofstream file;
			OpenFStream(file, filename, std::ios_base::out);
			file << ((const char *)wunit->code.data());
			file << ((const char *)wunit->error->GetBufferPointer());
			file.close();

			PanicAlert("Failed to compile pixel shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s",
				filename,
				D3D::PixelShaderVersionString(),
				(char*)wunit->error->GetBufferPointer());
		}
	};
	s_compiler->CompileShaderAsync(wunit);
}

void ShaderCache::HandleVSUIDChange(
	const VertexShaderUid& vs_uid,
	bool on_gpu_thread)
{
	s_shaders_lock.lock();
	ByteCodeCacheEntry* entry = &vs_bytecode_cache->GetOrAdd(vs_uid);
	s_shaders_lock.unlock();
	if (on_gpu_thread)
	{
		s_last_vertex_shader_bytecode = entry;
	}
	// Compile only when we have a new instance
	if (entry->m_initialized.test_and_set())
	{
		return;
	}
	ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(VERTEXSHADERGEN_BUFFERSIZE);
	wunit->GenerateCodeHandler = [vs_uid](ShaderCompilerWorkUnit* wunit)
	{
		ShaderCode code;
		code.SetBuffer(wunit->code.data());
		GenerateVertexShaderCodeD3D11(code, vs_uid.GetUidData());
		wunit->codesize = (u32)code.BufferSize();
	};

	wunit->entrypoint = "main";
	wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3 | D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY;
	wunit->target = D3D::VertexShaderVersionString();
	wunit->ResultHandler = [vs_uid, entry](ShaderCompilerWorkUnit* wunit)
	{
		if (SUCCEEDED(wunit->cresult))
		{
			D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode);
			s_vs_disk_cache.Append(vs_uid, shaderBuffer->Data(), shaderBuffer->Size());
			PushByteCode(entry, shaderBuffer);
			wunit->shaderbytecode->Release();
			wunit->shaderbytecode = nullptr;
		}
		else
		{
			static int num_failures = 0;
			std::string filename = StringFromFormat("%sbad_vs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
			std::ofstream file;
			OpenFStream(file, filename, std::ios_base::out);
			file << ((const char*)wunit->code.data());
			file.close();

			PanicAlert("Failed to compile vertex shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s",
				filename,
				D3D::VertexShaderVersionString(),
				(char*)wunit->error->GetBufferPointer());
		}
	};
	s_compiler->CompileShaderAsync(wunit);
}

void ShaderCache::HandleTSUIDChange(
	const TessellationShaderUid& ts_uid,
	bool on_gpu_thread)
{
	s_shaders_lock.lock();
	std::pair<ByteCodeCacheEntry, ByteCodeCacheEntry>& entry = ts_bytecode_cache->GetOrAdd(ts_uid);
	s_shaders_lock.unlock();
	ByteCodeCacheEntry* dentry = &entry.first;
	ByteCodeCacheEntry* hentry = &entry.second;
	if (on_gpu_thread)
	{
		if (dentry->m_compiled && hentry->m_compiled)
		{
			s_last_domain_shader_bytecode = dentry;
			s_last_hull_shader_bytecode = hentry;
		}
		else
		{
			s_last_tessellation_shader_uid = {};
			s_last_domain_shader_bytecode = &s_pass_entry;
			s_last_hull_shader_bytecode = &s_pass_entry;
		}
	}
	if (dentry->m_initialized.test_and_set())
	{
		return;
	}
	hentry->m_initialized.test_and_set();

	// Need to compile a new shader
	ShaderCode code;
	ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(TESSELLATIONSHADERGEN_BUFFERSIZE);
	ShaderCompilerWorkUnit *wunitd = s_compiler->NewUnit(TESSELLATIONSHADERGEN_BUFFERSIZE);
	code.SetBuffer(wunit->code.data());
	GenerateTessellationShaderCode(code, API_D3D11, ts_uid.GetUidData());
	memcpy(wunitd->code.data(), wunit->code.data(), code.BufferSize());

	wunit->codesize = (u32)code.BufferSize();
	wunit->entrypoint = "HS_TFO";
	wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_SKIP_OPTIMIZATION;
	wunit->target = D3D::HullShaderVersionString();

	wunitd->codesize = (u32)code.BufferSize();
	wunitd->entrypoint = "DS_TFO";
	wunitd->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3;
	wunitd->target = D3D::DomainShaderVersionString();

	wunitd->ResultHandler = [ts_uid, dentry](ShaderCompilerWorkUnit* wunit)
	{
		if (SUCCEEDED(wunit->cresult))
		{
			D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode);
			s_ds_disk_cache.Append(ts_uid, shaderBuffer->Data(), shaderBuffer->Size());
			PushByteCode(dentry, shaderBuffer);
			wunit->shaderbytecode->Release();
			wunit->shaderbytecode = nullptr;
		}
		else
		{
			static int num_failures = 0;
			std::string filename = StringFromFormat("%sbad_ds_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
			std::ofstream file;
			OpenFStream(file, filename, std::ios_base::out);
			file << ((const char *)wunit->code.data());
			file << ((const char *)wunit->error->GetBufferPointer());
			file.close();

			PanicAlert("Failed to compile domain shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s",
				filename,
				D3D::DomainShaderVersionString(),
				(char*)wunit->error->GetBufferPointer());
		}
	};

	wunit->ResultHandler = [ts_uid, hentry](ShaderCompilerWorkUnit* wunit)
	{
		if (SUCCEEDED(wunit->cresult))
		{
			D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode);
			s_hs_disk_cache.Append(ts_uid, shaderBuffer->Data(), shaderBuffer->Size());
			PushByteCode(hentry, shaderBuffer);
			wunit->shaderbytecode->Release();
			wunit->shaderbytecode = nullptr;
		}
		else
		{
			static int num_failures = 0;
			std::string filename = StringFromFormat("%sbad_hs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
			std::ofstream file;
			OpenFStream(file, filename, std::ios_base::out);
			file << ((const char *)wunit->code.data());
			file << ((const char *)wunit->error->GetBufferPointer());
			file.close();

			PanicAlert("Failed to compile hull shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s",
				filename,
				D3D::HullShaderVersionString(),
				(char*)wunit->error->GetBufferPointer());
		}
	};

	s_compiler->CompileShaderAsync(wunit);
	s_compiler->CompileShaderAsync(wunitd);
}

void ShaderCache::PrepareShaders(PIXEL_SHADER_RENDER_MODE render_mode,
	u32 gs_primitive_type,
	u32 components,
	const XFMemory &xfr,
	const BPMemory &bpm, bool on_gpu_thread)
{
	SetCurrentPrimitiveTopology(gs_primitive_type);
	GeometryShaderUid gs_uid;
	GetGeometryShaderUid(gs_uid, gs_primitive_type, xfr, components);
	PixelShaderUid ps_uid;
	GetPixelShaderUID(ps_uid, render_mode, components, xfr, bpm);
	VertexShaderUid vs_uid;
	GetVertexShaderUID(vs_uid, components, xfr, bpm);
	TessellationShaderUid ts_uid;
	bool tessellationenabled = false;
	if (gs_primitive_type == PrimitiveType::PRIMITIVE_TRIANGLES
		&& g_ActiveConfig.TessellationEnabled()
		&& xfr.projection.type == GX_PERSPECTIVE
		&& (g_ActiveConfig.bForcedLighting || g_ActiveConfig.PixelLightingEnabled(xfr, components)))
	{
		GetTessellationShaderUID(ts_uid, xfr, bpm, components);
		tessellationenabled = true;
	}

	bool gs_changed = false;
	bool ps_changed = false;
	bool vs_changed = false;
	bool ts_changed = false;

	if (on_gpu_thread)
	{
		s_compiler->ProcCompilationResults();
		gs_changed = gs_uid != s_last_geometry_shader_uid;
		ps_changed = ps_uid != s_last_pixel_shader_uid;
		vs_changed = vs_uid != s_last_vertex_shader_uid;
		ts_changed = tessellationenabled && ts_uid != s_last_tessellation_shader_uid;
	}
	else
	{
		gs_changed = gs_uid != s_last_cpu_geometry_shader_uid;
		ps_changed = ps_uid != s_last_cpu_pixel_shader_uid;
		vs_changed = vs_uid != s_last_cpu_vertex_shader_uid;
		ts_changed = tessellationenabled && ts_uid != s_last_cpu_tessellation_shader_uid;
	}

	if (!gs_changed && !ps_changed && !vs_changed && !ts_changed)
	{
		return;
	}

	if (on_gpu_thread)
	{
		if (gs_changed)
		{
			s_last_geometry_shader_uid = gs_uid;
		}

		if (ps_changed)
		{
			s_last_pixel_shader_uid = ps_uid;
		}

		if (vs_changed)
		{
			s_last_vertex_shader_uid = vs_uid;
		}
		if (ts_changed)
		{
			s_last_tessellation_shader_uid = ts_uid;
		}
		// A Uid has changed, so the PSO will need to be reset at next ApplyState.
		D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true);
	}
	else
	{
		if (gs_changed)
		{
			s_last_cpu_geometry_shader_uid = gs_uid;
		}

		if (ps_changed)
		{
			s_last_cpu_pixel_shader_uid = ps_uid;
		}

		if (vs_changed)
		{
			s_last_cpu_vertex_shader_uid = vs_uid;
		}

		if (ts_changed)
		{
			s_last_cpu_tessellation_shader_uid = ts_uid;
		}
	}

	if (vs_changed)
	{
		HandleVSUIDChange(vs_uid, on_gpu_thread);
	}

	if (ts_changed)
	{
		HandleTSUIDChange(ts_uid, on_gpu_thread);
	}
	else
	{
		if (on_gpu_thread)
		{
			s_last_domain_shader_bytecode = &s_pass_entry;
			s_last_hull_shader_bytecode = &s_pass_entry;
		}
	}

	if (gs_changed)
	{
		HandleGSUIDChange(gs_uid, on_gpu_thread);
	}

	if (ps_changed)
	{
		HandlePSUIDChange(ps_uid, on_gpu_thread);
	}
}

bool ShaderCache::TestShaders()
{
	if (s_last_geometry_shader_bytecode == nullptr
		|| s_last_pixel_shader_bytecode == nullptr
		|| s_last_vertex_shader_bytecode == nullptr)
	{
		return false;
	}
	int count = 0;
	while (!(s_last_geometry_shader_bytecode->m_compiled
		&& s_last_pixel_shader_bytecode->m_compiled
		&& s_last_vertex_shader_bytecode->m_compiled))
	{
		s_compiler->ProcCompilationResults();
		if (g_ActiveConfig.bFullAsyncShaderCompilation)
		{
			break;
		}
		Common::cYield(count++);
	}
	return s_last_geometry_shader_bytecode->m_compiled
		&& s_last_pixel_shader_bytecode->m_compiled
		&& s_last_vertex_shader_bytecode->m_compiled;
}

void ShaderCache::InsertDSByteCode(const TessellationShaderUid & uid, D3DBlob * bytecode_blob)
{
	s_shader_blob_list.push_back(bytecode_blob);
	s_shaders_lock.lock();
	ByteCodeCacheEntry* entry = &ts_bytecode_cache->GetOrAdd(uid).first;
	s_shaders_lock.unlock();
	entry->m_shader_bytecode.pShaderBytecode = bytecode_blob->Data();
	entry->m_shader_bytecode.BytecodeLength = bytecode_blob->Size();
	entry->m_compiled = true;
	entry->m_initialized.test_and_set();
}

void ShaderCache::InsertHSByteCode(const TessellationShaderUid & uid, D3DBlob * bytecode_blob)
{
	s_shader_blob_list.push_back(bytecode_blob);
	s_shaders_lock.lock();
	ByteCodeCacheEntry* entry = &ts_bytecode_cache->GetOrAdd(uid).second;
	s_shaders_lock.unlock();
	entry->m_shader_bytecode.pShaderBytecode = bytecode_blob->Data();
	entry->m_shader_bytecode.BytecodeLength = bytecode_blob->Size();
	entry->m_compiled = true;
	entry->m_initialized.test_and_set();
}

template<typename UidType, typename ShaderCacheType>
void ShaderCache::InsertByteCode(const UidType& uid, ShaderCacheType* shader_cache, D3DBlob* bytecode_blob)
{
	s_shader_blob_list.push_back(bytecode_blob);
	s_shaders_lock.lock();
	ByteCodeCacheEntry* entry = entry = &shader_cache->GetOrAdd(uid);
	s_shaders_lock.unlock();
	entry->m_shader_bytecode.pShaderBytecode = bytecode_blob->Data();
	entry->m_shader_bytecode.BytecodeLength = bytecode_blob->Size();
	entry->m_compiled = true;
	entry->m_initialized.test_and_set();
}

D3D12_PRIMITIVE_TOPOLOGY_TYPE ShaderCache::GetCurrentPrimitiveTopology()
{
	return s_current_primitive_topology;
}

D3D12_SHADER_BYTECODE ShaderCache::GetActiveDomainShaderBytecode()
{
	return s_current_primitive_topology == D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE
		&& s_last_hull_shader_bytecode->m_compiled
		&& s_last_domain_shader_bytecode->m_compiled ? s_last_domain_shader_bytecode->m_shader_bytecode : s_pass_entry.m_shader_bytecode;
}
D3D12_SHADER_BYTECODE ShaderCache::GetActiveHullShaderBytecode()
{
	return s_current_primitive_topology == D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE
		&& s_last_hull_shader_bytecode->m_compiled
		&& s_last_domain_shader_bytecode->m_compiled ? s_last_hull_shader_bytecode->m_shader_bytecode : s_pass_entry.m_shader_bytecode;
}
D3D12_SHADER_BYTECODE ShaderCache::GetActiveGeometryShaderBytecode()
{
	return s_last_geometry_shader_bytecode->m_shader_bytecode;
}
D3D12_SHADER_BYTECODE ShaderCache::GetActivePixelShaderBytecode()
{
	return s_last_pixel_shader_bytecode->m_shader_bytecode;
}
D3D12_SHADER_BYTECODE ShaderCache::GetActiveVertexShaderBytecode()
{
	return s_last_vertex_shader_bytecode->m_shader_bytecode;
}

const GeometryShaderUid* ShaderCache::GetActiveGeometryShaderUid()
{
	return &s_last_geometry_shader_uid;
}
const PixelShaderUid* ShaderCache::GetActivePixelShaderUid()
{
	return &s_last_pixel_shader_uid;
}
const VertexShaderUid* ShaderCache::GetActiveVertexShaderUid()
{
	return &s_last_vertex_shader_uid;
}
const TessellationShaderUid* ShaderCache::GetActiveTessellationShaderUid()
{
	return &s_last_tessellation_shader_uid;
}

static const D3D12_SHADER_BYTECODE empty = { 0 };

D3D12_SHADER_BYTECODE ShaderCache::GetDomainShaderFromUid(const TessellationShaderUid* uid)
{
	auto it = ts_bytecode_cache->GetInfoIfexists(*uid);
	if (it != nullptr)
		return it->first.m_shader_bytecode;

	return empty;
}
D3D12_SHADER_BYTECODE ShaderCache::GetHullShaderFromUid(const TessellationShaderUid* uid)
{
	auto it = ts_bytecode_cache->GetInfoIfexists(*uid);
	if (it != nullptr)
		return it->second.m_shader_bytecode;

	return empty;
}
D3D12_SHADER_BYTECODE ShaderCache::GetGeometryShaderFromUid(const GeometryShaderUid* uid)
{
	auto it = gs_bytecode_cache->GetInfoIfexists(*uid);
	if (it != nullptr)
		return it->m_shader_bytecode;

	return empty;
}
D3D12_SHADER_BYTECODE ShaderCache::GetPixelShaderFromUid(const PixelShaderUid* uid)
{
	auto it = ps_bytecode_cache->GetInfoIfexists(*uid);
	if (it != nullptr)
		return it->m_shader_bytecode;

	return empty;
}
D3D12_SHADER_BYTECODE ShaderCache::GetVertexShaderFromUid(const VertexShaderUid* uid)
{
	auto it = vs_bytecode_cache->GetInfoIfexists(*uid);
	if (it != nullptr)
		return it->m_shader_bytecode;

	return empty;
}

}
Exemple #25
0
namespace DX11
{
PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry;
PixelShaderUid PixelShaderCache::last_uid;

LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;

ID3D11PixelShader* s_ColorMatrixProgram[2] = {nullptr};
ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr};
ID3D11PixelShader* s_DepthMatrixProgram[2] = {nullptr};
ID3D11PixelShader* s_ClearProgram = nullptr;
ID3D11PixelShader* s_AnaglyphProgram = nullptr;
ID3D11PixelShader* s_DepthResolveProgram = nullptr;
ID3D11PixelShader* s_rgba6_to_rgb8[2] = {nullptr};
ID3D11PixelShader* s_rgb8_to_rgba6[2] = {nullptr};
ID3D11Buffer* pscbuf = nullptr;

const char clear_program_code[] = {"void main(\n"
                                   "out float4 ocol0 : SV_Target,\n"
                                   "in float4 pos : SV_Position,\n"
                                   "in float4 incol0 : COLOR0){\n"
                                   "ocol0 = incol0;\n"
                                   "}\n"};

// TODO: Find some way to avoid having separate shaders for non-MSAA and MSAA...
const char color_copy_program_code[] = {"sampler samp0 : register(s0);\n"
                                        "Texture2DArray Tex0 : register(t0);\n"
                                        "void main(\n"
                                        "out float4 ocol0 : SV_Target,\n"
                                        "in float4 pos : SV_Position,\n"
                                        "in float3 uv0 : TEXCOORD0){\n"
                                        "ocol0 = Tex0.Sample(samp0,uv0);\n"
                                        "}\n"};

// Anaglyph Red-Cyan shader based on Dubois algorithm
// Constants taken from the paper:
// "Conversion of a Stereo Pair to Anaglyph with
// the Least-Squares Projection Method"
// Eric Dubois, March 2009
const char anaglyph_program_code[] = {"sampler samp0 : register(s0);\n"
                                      "Texture2DArray Tex0 : register(t0);\n"
                                      "void main(\n"
                                      "out float4 ocol0 : SV_Target,\n"
                                      "in float4 pos : SV_Position,\n"
                                      "in float3 uv0 : TEXCOORD0){\n"
                                      "float4 c0 = Tex0.Sample(samp0, float3(uv0.xy, 0.0));\n"
                                      "float4 c1 = Tex0.Sample(samp0, float3(uv0.xy, 1.0));\n"
                                      "float3x3 l = float3x3( 0.437, 0.449, 0.164,\n"
                                      "                      -0.062,-0.062,-0.024,\n"
                                      "                      -0.048,-0.050,-0.017);\n"
                                      "float3x3 r = float3x3(-0.011,-0.032,-0.007,\n"
                                      "                       0.377, 0.761, 0.009,\n"
                                      "                      -0.026,-0.093, 1.234);\n"
                                      "ocol0 = float4(mul(l, c0.rgb) + mul(r, c1.rgb), c0.a);\n"
                                      "}\n"};

// TODO: Improve sampling algorithm!
const char color_copy_program_code_msaa[] = {
    "#define SAMPLES %d\n"
    "sampler samp0 : register(s0);\n"
    "Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
    "void main(\n"
    "out float4 ocol0 : SV_Target,\n"
    "in float4 pos : SV_Position,\n"
    "in float3 uv0 : TEXCOORD0){\n"
    "int width, height, slices, samples;\n"
    "Tex0.GetDimensions(width, height, slices, samples);\n"
    "ocol0 = 0;\n"
    "for(int i = 0; i < SAMPLES; ++i)\n"
    "	ocol0 += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
    "ocol0 /= SAMPLES;\n"
    "}\n"};

const char color_matrix_program_code[] = {"sampler samp0 : register(s0);\n"
                                          "Texture2DArray Tex0 : register(t0);\n"
                                          "uniform float4 cColMatrix[7] : register(c0);\n"
                                          "void main(\n"
                                          "out float4 ocol0 : SV_Target,\n"
                                          "in float4 pos : SV_Position,\n"
                                          "in float3 uv0 : TEXCOORD0){\n"
                                          "float4 texcol = Tex0.Sample(samp0,uv0);\n"
                                          "texcol = floor(texcol * cColMatrix[5])*cColMatrix[6];\n"
                                          "ocol0 = "
                                          "float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix["
                                          "1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3]))"
                                          " + cColMatrix[4];\n"
                                          "}\n"};

const char color_matrix_program_code_msaa[] = {
    "#define SAMPLES %d\n"
    "sampler samp0 : register(s0);\n"
    "Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
    "uniform float4 cColMatrix[7] : register(c0);\n"
    "void main(\n"
    "out float4 ocol0 : SV_Target,\n"
    "in float4 pos : SV_Position,\n"
    "in float3 uv0 : TEXCOORD0){\n"
    "int width, height, slices, samples;\n"
    "Tex0.GetDimensions(width, height, slices, samples);\n"
    "float4 texcol = 0;\n"
    "for(int i = 0; i < SAMPLES; ++i)\n"
    "	texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
    "texcol /= SAMPLES;\n"
    "texcol = floor(texcol * cColMatrix[5])*cColMatrix[6];\n"
    "ocol0 = "
    "float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot("
    "texcol,cColMatrix[3])) + cColMatrix[4];\n"
    "}\n"};

const char depth_matrix_program[] = {"sampler samp0 : register(s0);\n"
                                     "Texture2DArray Tex0 : register(t0);\n"
                                     "uniform float4 cColMatrix[7] : register(c0);\n"
                                     "void main(\n"
                                     "out float4 ocol0 : SV_Target,\n"
                                     " in float4 pos : SV_Position,\n"
                                     " in float3 uv0 : TEXCOORD0){\n"
                                     "	float4 texcol = Tex0.Sample(samp0,uv0);\n"
                                     "	int depth = int((1.0 - texcol.x) * 16777216.0);\n"

                                     // Convert to Z24 format
                                     "	int4 workspace;\n"
                                     "	workspace.r = (depth >> 16) & 255;\n"
                                     "	workspace.g = (depth >> 8) & 255;\n"
                                     "	workspace.b = depth & 255;\n"

                                     // Convert to Z4 format
                                     "	workspace.a = (depth >> 16) & 0xF0;\n"

                                     // Normalize components to [0.0..1.0]
                                     "	texcol = float4(workspace) / 255.0;\n"

                                     // Apply color matrix
                                     "	ocol0 = "
                                     "float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),"
                                     "dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + "
                                     "cColMatrix[4];\n"
                                     "}\n"};

const char depth_matrix_program_msaa[] = {
    "#define SAMPLES %d\n"
    "sampler samp0 : register(s0);\n"
    "Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
    "uniform float4 cColMatrix[7] : register(c0);\n"
    "void main(\n"
    "out float4 ocol0 : SV_Target,\n"
    " in float4 pos : SV_Position,\n"
    " in float3 uv0 : TEXCOORD0){\n"
    "	int width, height, slices, samples;\n"
    "	Tex0.GetDimensions(width, height, slices, samples);\n"
    "	float4 texcol = 0;\n"
    "	for(int i = 0; i < SAMPLES; ++i)\n"
    "		texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
    "	texcol /= SAMPLES;\n"
    "	int depth = int((1.0 - texcol.x) * 16777216.0);\n"

    // Convert to Z24 format
    "	int4 workspace;\n"
    "	workspace.r = (depth >> 16) & 255;\n"
    "	workspace.g = (depth >> 8) & 255;\n"
    "	workspace.b = depth & 255;\n"

    // Convert to Z4 format
    "	workspace.a = (depth >> 16) & 0xF0;\n"

    // Normalize components to [0.0..1.0]
    "	texcol = float4(workspace) / 255.0;\n"

    // Apply color matrix
    "	ocol0 = "
    "float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot("
    "texcol,cColMatrix[3])) + cColMatrix[4];\n"
    "}\n"};

const char depth_resolve_program[] = {
    "#define SAMPLES %d\n"
    "Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
    "void main(\n"
    "	 out float ocol0 : SV_Target,\n"
    "    in float4 pos : SV_Position,\n"
    "    in float3 uv0 : TEXCOORD0)\n"
    "{\n"
    "	int width, height, slices, samples;\n"
    "	Tex0.GetDimensions(width, height, slices, samples);\n"
    "	ocol0 = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n"
    "	for(int i = 1; i < SAMPLES; ++i)\n"
    "		ocol0 = min(ocol0, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n"
    "}\n"};

const char reint_rgba6_to_rgb8[] = {"sampler samp0 : register(s0);\n"
                                    "Texture2DArray Tex0 : register(t0);\n"
                                    "void main(\n"
                                    "	out float4 ocol0 : SV_Target,\n"
                                    "	in float4 pos : SV_Position,\n"
                                    "	in float3 uv0 : TEXCOORD0)\n"
                                    "{\n"
                                    "	int4 src6 = round(Tex0.Sample(samp0,uv0) * 63.f);\n"
                                    "	int4 dst8;\n"
                                    "	dst8.r = (src6.r << 2) | (src6.g >> 4);\n"
                                    "	dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"
                                    "	dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n"
                                    "	dst8.a = 255;\n"
                                    "	ocol0 = (float4)dst8 / 255.f;\n"
                                    "}"};

const char reint_rgba6_to_rgb8_msaa[] = {
    "#define SAMPLES %d\n"
    "sampler samp0 : register(s0);\n"
    "Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
    "void main(\n"
    "	out float4 ocol0 : SV_Target,\n"
    "	in float4 pos : SV_Position,\n"
    "	in float3 uv0 : TEXCOORD0)\n"
    "{\n"
    "	int width, height, slices, samples;\n"
    "	Tex0.GetDimensions(width, height, slices, samples);\n"
    "	float4 texcol = 0;\n"
    "	for (int i = 0; i < SAMPLES; ++i)\n"
    "		texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
    "	texcol /= SAMPLES;\n"
    "	int4 src6 = round(texcol * 63.f);\n"
    "	int4 dst8;\n"
    "	dst8.r = (src6.r << 2) | (src6.g >> 4);\n"
    "	dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"
    "	dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n"
    "	dst8.a = 255;\n"
    "	ocol0 = (float4)dst8 / 255.f;\n"
    "}"};

const char reint_rgb8_to_rgba6[] = {"sampler samp0 : register(s0);\n"
                                    "Texture2DArray Tex0 : register(t0);\n"
                                    "void main(\n"
                                    "	out float4 ocol0 : SV_Target,\n"
                                    "	in float4 pos : SV_Position,\n"
                                    "	in float3 uv0 : TEXCOORD0)\n"
                                    "{\n"
                                    "	int4 src8 = round(Tex0.Sample(samp0,uv0) * 255.f);\n"
                                    "	int4 dst6;\n"
                                    "	dst6.r = src8.r >> 2;\n"
                                    "	dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"
                                    "	dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n"
                                    "	dst6.a = src8.b & 0x3F;\n"
                                    "	ocol0 = (float4)dst6 / 63.f;\n"
                                    "}\n"};

const char reint_rgb8_to_rgba6_msaa[] = {
    "#define SAMPLES %d\n"
    "sampler samp0 : register(s0);\n"
    "Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
    "void main(\n"
    "	out float4 ocol0 : SV_Target,\n"
    "	in float4 pos : SV_Position,\n"
    "	in float3 uv0 : TEXCOORD0)\n"
    "{\n"
    "	int width, height, slices, samples;\n"
    "	Tex0.GetDimensions(width, height, slices, samples);\n"
    "	float4 texcol = 0;\n"
    "	for (int i = 0; i < SAMPLES; ++i)\n"
    "		texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
    "	texcol /= SAMPLES;\n"
    "	int4 src8 = round(texcol * 255.f);\n"
    "	int4 dst6;\n"
    "	dst6.r = src8.r >> 2;\n"
    "	dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"
    "	dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n"
    "	dst6.a = src8.b & 0x3F;\n"
    "	ocol0 = (float4)dst6 / 63.f;\n"
    "}\n"};

ID3D11PixelShader* PixelShaderCache::ReinterpRGBA6ToRGB8(bool multisampled)
{
  if (!multisampled || g_ActiveConfig.iMultisamples <= 1)
  {
    if (!s_rgba6_to_rgb8[0])
    {
      s_rgba6_to_rgb8[0] = D3D::CompileAndCreatePixelShader(reint_rgba6_to_rgb8);
      CHECK(s_rgba6_to_rgb8[0], "Create RGBA6 to RGB8 pixel shader");
      D3D::SetDebugObjectName(s_rgba6_to_rgb8[0], "RGBA6 to RGB8 pixel shader");
    }
    return s_rgba6_to_rgb8[0];
  }
  else if (!s_rgba6_to_rgb8[1])
  {
    // create MSAA shader for current AA mode
    std::string buf = StringFromFormat(reint_rgba6_to_rgb8_msaa, g_ActiveConfig.iMultisamples);
    s_rgba6_to_rgb8[1] = D3D::CompileAndCreatePixelShader(buf);

    CHECK(s_rgba6_to_rgb8[1], "Create RGBA6 to RGB8 MSAA pixel shader");
    D3D::SetDebugObjectName(s_rgba6_to_rgb8[1], "RGBA6 to RGB8 MSAA pixel shader");
  }
  return s_rgba6_to_rgb8[1];
}

ID3D11PixelShader* PixelShaderCache::ReinterpRGB8ToRGBA6(bool multisampled)
{
  if (!multisampled || g_ActiveConfig.iMultisamples <= 1)
  {
    if (!s_rgb8_to_rgba6[0])
    {
      s_rgb8_to_rgba6[0] = D3D::CompileAndCreatePixelShader(reint_rgb8_to_rgba6);
      CHECK(s_rgb8_to_rgba6[0], "Create RGB8 to RGBA6 pixel shader");
      D3D::SetDebugObjectName(s_rgb8_to_rgba6[0], "RGB8 to RGBA6 pixel shader");
    }
    return s_rgb8_to_rgba6[0];
  }
  else if (!s_rgb8_to_rgba6[1])
  {
    // create MSAA shader for current AA mode
    std::string buf = StringFromFormat(reint_rgb8_to_rgba6_msaa, g_ActiveConfig.iMultisamples);
    s_rgb8_to_rgba6[1] = D3D::CompileAndCreatePixelShader(buf);

    CHECK(s_rgb8_to_rgba6[1], "Create RGB8 to RGBA6 MSAA pixel shader");
    D3D::SetDebugObjectName(s_rgb8_to_rgba6[1], "RGB8 to RGBA6 MSAA pixel shader");
  }
  return s_rgb8_to_rgba6[1];
}

ID3D11PixelShader* PixelShaderCache::GetColorCopyProgram(bool multisampled)
{
  if (!multisampled || g_ActiveConfig.iMultisamples <= 1)
  {
    return s_ColorCopyProgram[0];
  }
  else if (s_ColorCopyProgram[1])
  {
    return s_ColorCopyProgram[1];
  }
  else
  {
    // create MSAA shader for current AA mode
    std::string buf = StringFromFormat(color_copy_program_code_msaa, g_ActiveConfig.iMultisamples);
    s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(buf);
    CHECK(s_ColorCopyProgram[1] != nullptr, "Create color copy MSAA pixel shader");
    D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorCopyProgram[1],
                            "color copy MSAA pixel shader");
    return s_ColorCopyProgram[1];
  }
}

ID3D11PixelShader* PixelShaderCache::GetColorMatrixProgram(bool multisampled)
{
  if (!multisampled || g_ActiveConfig.iMultisamples <= 1)
  {
    return s_ColorMatrixProgram[0];
  }
  else if (s_ColorMatrixProgram[1])
  {
    return s_ColorMatrixProgram[1];
  }
  else
  {
    // create MSAA shader for current AA mode
    std::string buf =
        StringFromFormat(color_matrix_program_code_msaa, g_ActiveConfig.iMultisamples);
    s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(buf);
    CHECK(s_ColorMatrixProgram[1] != nullptr, "Create color matrix MSAA pixel shader");
    D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorMatrixProgram[1],
                            "color matrix MSAA pixel shader");
    return s_ColorMatrixProgram[1];
  }
}

ID3D11PixelShader* PixelShaderCache::GetDepthMatrixProgram(bool multisampled)
{
  if (!multisampled || g_ActiveConfig.iMultisamples <= 1)
  {
    return s_DepthMatrixProgram[0];
  }
  else if (s_DepthMatrixProgram[1])
  {
    return s_DepthMatrixProgram[1];
  }
  else
  {
    // create MSAA shader for current AA mode
    std::string buf = StringFromFormat(depth_matrix_program_msaa, g_ActiveConfig.iMultisamples);
    s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(buf);
    CHECK(s_DepthMatrixProgram[1] != nullptr, "Create depth matrix MSAA pixel shader");
    D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthMatrixProgram[1],
                            "depth matrix MSAA pixel shader");
    return s_DepthMatrixProgram[1];
  }
}

ID3D11PixelShader* PixelShaderCache::GetClearProgram()
{
  return s_ClearProgram;
}

ID3D11PixelShader* PixelShaderCache::GetAnaglyphProgram()
{
  return s_AnaglyphProgram;
}

ID3D11PixelShader* PixelShaderCache::GetDepthResolveProgram()
{
  if (s_DepthResolveProgram != nullptr)
    return s_DepthResolveProgram;

  // create MSAA shader for current AA mode
  std::string buf = StringFromFormat(depth_resolve_program, g_ActiveConfig.iMultisamples);
  s_DepthResolveProgram = D3D::CompileAndCreatePixelShader(buf);
  CHECK(s_DepthResolveProgram != nullptr, "Create depth matrix MSAA pixel shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthResolveProgram, "depth resolve pixel shader");
  return s_DepthResolveProgram;
}

ID3D11Buffer*& PixelShaderCache::GetConstantBuffer()
{
  // TODO: divide the global variables of the generated shaders into about 5 constant buffers to
  // speed this up
  if (PixelShaderManager::dirty)
  {
    D3D11_MAPPED_SUBRESOURCE map;
    D3D::context->Map(pscbuf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
    memcpy(map.pData, &PixelShaderManager::constants, sizeof(PixelShaderConstants));
    D3D::context->Unmap(pscbuf, 0);
    PixelShaderManager::dirty = false;

    ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants));
  }
  return pscbuf;
}

// this class will load the precompiled shaders into our cache
class PixelShaderCacheInserter : public LinearDiskCacheReader<PixelShaderUid, u8>
{
public:
  void Read(const PixelShaderUid& key, const u8* value, u32 value_size)
  {
    PixelShaderCache::InsertByteCode(key, value, value_size);
  }
};

void PixelShaderCache::Init()
{
  unsigned int cbsize = Common::AlignUp(static_cast<unsigned int>(sizeof(PixelShaderConstants)),
                                        16);  // must be a multiple of 16
  D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(cbsize, D3D11_BIND_CONSTANT_BUFFER,
                                                D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE);
  D3D::device->CreateBuffer(&cbdesc, nullptr, &pscbuf);
  CHECK(pscbuf != nullptr, "Create pixel shader constant buffer");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)pscbuf,
                          "pixel shader constant buffer used to emulate the GX pipeline");

  // used when drawing clear quads
  s_ClearProgram = D3D::CompileAndCreatePixelShader(clear_program_code);
  CHECK(s_ClearProgram != nullptr, "Create clear pixel shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ClearProgram, "clear pixel shader");

  // used for anaglyph stereoscopy
  s_AnaglyphProgram = D3D::CompileAndCreatePixelShader(anaglyph_program_code);
  CHECK(s_AnaglyphProgram != nullptr, "Create anaglyph pixel shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)s_AnaglyphProgram, "anaglyph pixel shader");

  // used when copying/resolving the color buffer
  s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(color_copy_program_code);
  CHECK(s_ColorCopyProgram[0] != nullptr, "Create color copy pixel shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorCopyProgram[0], "color copy pixel shader");

  // used for color conversion
  s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(color_matrix_program_code);
  CHECK(s_ColorMatrixProgram[0] != nullptr, "Create color matrix pixel shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorMatrixProgram[0], "color matrix pixel shader");

  // used for depth copy
  s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(depth_matrix_program);
  CHECK(s_DepthMatrixProgram[0] != nullptr, "Create depth matrix pixel shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthMatrixProgram[0], "depth matrix pixel shader");

  Clear();

  if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
    File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

  SETSTAT(stats.numPixelShadersCreated, 0);
  SETSTAT(stats.numPixelShadersAlive, 0);

  std::string cache_filename =
      StringFromFormat("%sdx11-%s-ps.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
                       SConfig::GetInstance().m_strGameID.c_str());
  PixelShaderCacheInserter inserter;
  g_ps_disk_cache.OpenAndRead(cache_filename, inserter);

  last_entry = nullptr;
}

// ONLY to be used during shutdown.
void PixelShaderCache::Clear()
{
  for (auto& iter : PixelShaders)
    iter.second.Destroy();
  PixelShaders.clear();

  last_entry = nullptr;
}

// Used in Swap() when AA mode has changed
void PixelShaderCache::InvalidateMSAAShaders()
{
  SAFE_RELEASE(s_ColorCopyProgram[1]);
  SAFE_RELEASE(s_ColorMatrixProgram[1]);
  SAFE_RELEASE(s_DepthMatrixProgram[1]);
  SAFE_RELEASE(s_rgb8_to_rgba6[1]);
  SAFE_RELEASE(s_rgba6_to_rgb8[1]);
  SAFE_RELEASE(s_DepthResolveProgram);
}

void PixelShaderCache::Shutdown()
{
  SAFE_RELEASE(pscbuf);

  SAFE_RELEASE(s_ClearProgram);
  SAFE_RELEASE(s_AnaglyphProgram);
  SAFE_RELEASE(s_DepthResolveProgram);
  for (int i = 0; i < 2; ++i)
  {
    SAFE_RELEASE(s_ColorCopyProgram[i]);
    SAFE_RELEASE(s_ColorMatrixProgram[i]);
    SAFE_RELEASE(s_DepthMatrixProgram[i]);
    SAFE_RELEASE(s_rgba6_to_rgb8[i]);
    SAFE_RELEASE(s_rgb8_to_rgba6[i]);
  }

  Clear();
  g_ps_disk_cache.Sync();
  g_ps_disk_cache.Close();
}

bool PixelShaderCache::SetShader()
{
  PixelShaderUid uid = GetPixelShaderUid();

  // Check if the shader is already set
  if (last_entry)
  {
    if (uid == last_uid)
    {
      GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
      return (last_entry->shader != nullptr);
    }
  }

  last_uid = uid;

  // Check if the shader is already in the cache
  PSCache::iterator iter;
  iter = PixelShaders.find(uid);
  if (iter != PixelShaders.end())
  {
    const PSCacheEntry& entry = iter->second;
    last_entry = &entry;

    GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
    return (entry.shader != nullptr);
  }

  // Need to compile a new shader
  ShaderCode code = GeneratePixelShaderCode(APIType::D3D, uid.GetUidData());

  D3DBlob* pbytecode;
  if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode))
  {
    GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
    return false;
  }

  // Insert the bytecode into the caches
  g_ps_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());

  bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
  pbytecode->Release();

  GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
  return success;
}

bool PixelShaderCache::InsertByteCode(const PixelShaderUid& uid, const void* bytecode,
                                      unsigned int bytecodelen)
{
  ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
  if (shader == nullptr)
    return false;

  // TODO: Somehow make the debug name a bit more specific
  D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a pixel shader of PixelShaderCache");

  // Make an entry in the table
  PSCacheEntry newentry;
  newentry.shader = shader;
  PixelShaders[uid] = newentry;
  last_entry = &PixelShaders[uid];

  if (!shader)
  {
    // INCSTAT(stats.numPixelShadersFailed);
    return false;
  }

  INCSTAT(stats.numPixelShadersCreated);
  SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
  return true;
}

}  // DX11
namespace OGL
{

static const u32 UBO_LENGTH = 32*1024*1024;

u32 ProgramShaderCache::s_ubo_buffer_size;
s32 ProgramShaderCache::s_ubo_align;

static StreamBuffer *s_buffer;
static int num_failures = 0;

LinearDiskCache<SHADERUID, u8> g_program_disk_cache;
static GLuint CurrentProgram = 0;
ProgramShaderCache::PCache ProgramShaderCache::pshaders;
ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_entry;
SHADERUID ProgramShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> ProgramShaderCache::pixel_uid_checker;
UidChecker<VertexShaderUid,VertexShaderCode> ProgramShaderCache::vertex_uid_checker;

static char s_glsl_header[1024] = "";

void SHADER::SetProgramVariables()
{
	// glsl shader must be bind to set samplers
	Bind();

	// Bind UBO
	if (!g_ActiveConfig.backend_info.bSupportShadingLanguage420pack)
	{
		GLint PSBlock_id = glGetUniformBlockIndex(glprogid, "PSBlock");
		GLint VSBlock_id = glGetUniformBlockIndex(glprogid, "VSBlock");

		if(PSBlock_id != -1)
			glUniformBlockBinding(glprogid, PSBlock_id, 1);
		if(VSBlock_id != -1)
			glUniformBlockBinding(glprogid, VSBlock_id, 2);
	}

	// Bind Texture Sampler
	for (int a = 0; a <= 9; ++a)
	{
		char name[8];
		snprintf(name, 8, "samp%d", a);

		// Still need to get sampler locations since we aren't binding them statically in the shaders
		int loc = glGetUniformLocation(glprogid, name);
		if (loc != -1)
			glUniform1i(loc, a);
	}

}

void SHADER::SetProgramBindings()
{
	if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
	{
		// So we do support extended blending
		// So we need to set a few more things here.
		// Bind our out locations
		glBindFragDataLocationIndexed(glprogid, 0, 0, "ocol0");
		glBindFragDataLocationIndexed(glprogid, 0, 1, "ocol1");
	}
	// Need to set some attribute locations
	glBindAttribLocation(glprogid, SHADER_POSITION_ATTRIB, "rawpos");

	glBindAttribLocation(glprogid, SHADER_POSMTX_ATTRIB,   "fposmtx");

	glBindAttribLocation(glprogid, SHADER_COLOR0_ATTRIB,   "color0");
	glBindAttribLocation(glprogid, SHADER_COLOR1_ATTRIB,   "color1");

	glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB,    "rawnorm0");
	glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB,    "rawnorm1");
	glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB,    "rawnorm2");

	for(int i=0; i<8; i++) {
		char attrib_name[8];
		snprintf(attrib_name, 8, "tex%d", i);
		glBindAttribLocation(glprogid, SHADER_TEXTURE0_ATTRIB+i, attrib_name);
	}
}

void SHADER::Bind()
{
	if(CurrentProgram != glprogid)
	{
		glUseProgram(glprogid);
		CurrentProgram = glprogid;
	}
}

void ProgramShaderCache::UploadConstants()
{
	if(PixelShaderManager::dirty || VertexShaderManager::dirty)
	{
		auto buffer = s_buffer->Map(s_ubo_buffer_size, s_ubo_align);

		memcpy(buffer.first,
			&PixelShaderManager::constants, sizeof(PixelShaderConstants));

		memcpy(buffer.first + ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align),
			&VertexShaderManager::constants, sizeof(VertexShaderConstants));

		s_buffer->Unmap(s_ubo_buffer_size);
		glBindBufferRange(GL_UNIFORM_BUFFER, 1, s_buffer->m_buffer, buffer.second,
					sizeof(PixelShaderConstants));
		glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->m_buffer, buffer.second + ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align),
					sizeof(VertexShaderConstants));

		PixelShaderManager::dirty = false;
		VertexShaderManager::dirty = false;

		ADDSTAT(stats.thisFrame.bytesUniformStreamed, s_ubo_buffer_size);
	}
}

GLuint ProgramShaderCache::GetCurrentProgram(void)
{
	return CurrentProgram;
}

SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 components )
{
	SHADERUID uid;
	GetShaderId(&uid, dstAlphaMode, components);

	// Check if the shader is already set
	if (last_entry)
	{
		if (uid == last_uid)
		{
			GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
			last_entry->shader.Bind();
			return &last_entry->shader;
		}
	}

	last_uid = uid;

	// Check if shader is already in cache
	PCache::iterator iter = pshaders.find(uid);
	if (iter != pshaders.end())
	{
		PCacheEntry *entry = &iter->second;
		last_entry = entry;

		GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
		last_entry->shader.Bind();
		return &last_entry->shader;
	}

	// Make an entry in the table
	PCacheEntry& newentry = pshaders[uid];
	last_entry = &newentry;
	newentry.in_cache = 0;

	VertexShaderCode vcode;
	PixelShaderCode pcode;
	GenerateVertexShaderCode(vcode, components, API_OPENGL);
	GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);

	if (g_ActiveConfig.bEnableShaderDebugging)
	{
		newentry.shader.strvprog = vcode.GetBuffer();
		newentry.shader.strpprog = pcode.GetBuffer();
	}

#if defined(_DEBUG) || defined(DEBUGFAST)
	if (g_ActiveConfig.iLog & CONF_SAVESHADERS) {
		static int counter = 0;
		char szTemp[MAX_PATH];
		sprintf(szTemp, "%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
		SaveData(szTemp, vcode.GetBuffer());
		sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
		SaveData(szTemp, pcode.GetBuffer());
	}
#endif

	if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer())) {
		GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
		return NULL;
	}

	INCSTAT(stats.numPixelShadersCreated);
	SETSTAT(stats.numPixelShadersAlive, pshaders.size());
	GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);

	last_entry->shader.Bind();
	return &last_entry->shader;
}

bool ProgramShaderCache::CompileShader ( SHADER& shader, const char* vcode, const char* pcode )
{
	GLuint vsid = CompileSingleShader(GL_VERTEX_SHADER, vcode);
	GLuint psid = CompileSingleShader(GL_FRAGMENT_SHADER, pcode);

	if(!vsid || !psid)
	{
		glDeleteShader(vsid);
		glDeleteShader(psid);
		return false;
	}

	GLuint pid = shader.glprogid = glCreateProgram();;

	glAttachShader(pid, vsid);
	glAttachShader(pid, psid);

	if (g_ogl_config.bSupportsGLSLCache)
		glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);

	shader.SetProgramBindings();

	glLinkProgram(pid);

	// original shaders aren't needed any more
	glDeleteShader(vsid);
	glDeleteShader(psid);

	GLint linkStatus;
	glGetProgramiv(pid, GL_LINK_STATUS, &linkStatus);
	GLsizei length = 0;
	glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length);
	if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
	{
		GLsizei charsWritten;
		GLchar* infoLog = new GLchar[length];
		glGetProgramInfoLog(pid, length, &charsWritten, infoLog);
		ERROR_LOG(VIDEO, "Program info log:\n%s", infoLog);
		char szTemp[MAX_PATH];
		sprintf(szTemp, "%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
		std::ofstream file;
		OpenFStream(file, szTemp, std::ios_base::out);
		file << s_glsl_header << vcode << s_glsl_header << pcode << infoLog;
		file.close();

		if(linkStatus != GL_TRUE)
			PanicAlert("Failed to link shaders!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s, %s, %s):\n%s",
				szTemp,
				g_ogl_config.gl_vendor,
				g_ogl_config.gl_renderer,
				g_ogl_config.gl_version,
				infoLog);

		delete [] infoLog;
	}
	if (linkStatus != GL_TRUE)
	{
		// Compile failed
		ERROR_LOG(VIDEO, "Program linking failed; see info log");

		// Don't try to use this shader
		glDeleteProgram(pid);
		return false;
	}

	shader.SetProgramVariables();

	return true;
}

GLuint ProgramShaderCache::CompileSingleShader (GLuint type, const char* code )
{
	GLuint result = glCreateShader(type);

	const char *src[] = {s_glsl_header, code};

	glShaderSource(result, 2, src, NULL);
	glCompileShader(result);
	GLint compileStatus;
	glGetShaderiv(result, GL_COMPILE_STATUS, &compileStatus);
	GLsizei length = 0;
	glGetShaderiv(result, GL_INFO_LOG_LENGTH, &length);

	if (DriverDetails::HasBug(DriverDetails::BUG_BROKENINFOLOG))
		length = 1024;

	if (compileStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
	{
		GLsizei charsWritten;
		GLchar* infoLog = new GLchar[length];
		glGetShaderInfoLog(result, length, &charsWritten, infoLog);
		ERROR_LOG(VIDEO, "%s Shader info log:\n%s", type==GL_VERTEX_SHADER ? "VS" : "PS", infoLog);
		char szTemp[MAX_PATH];
		sprintf(szTemp,
			"%sbad_%s_%04i.txt",
			File::GetUserPath(D_DUMP_IDX).c_str(),
			type==GL_VERTEX_SHADER ? "vs" : "ps",
			num_failures++);
		std::ofstream file;
		OpenFStream(file, szTemp, std::ios_base::out);
		file << s_glsl_header << code << infoLog;
		file.close();

		if(compileStatus != GL_TRUE)
			PanicAlert("Failed to compile %s shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s, %s, %s):\n%s",
				type==GL_VERTEX_SHADER ? "vertex" : "pixel",
				szTemp,
				g_ogl_config.gl_vendor,
				g_ogl_config.gl_renderer,
				g_ogl_config.gl_version,
				infoLog);

		delete[] infoLog;
	}
	if (compileStatus != GL_TRUE)
	{
		// Compile failed
		ERROR_LOG(VIDEO, "Shader compilation failed; see info log");

		// Don't try to use this shader
		glDeleteShader(result);
		return 0;
	}
	(void)GL_REPORT_ERROR();
	return result;
}

void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{
	GetPixelShaderUid(uid->puid, dstAlphaMode, API_OPENGL, components);
	GetVertexShaderUid(uid->vuid, components, API_OPENGL);

	if (g_ActiveConfig.bEnableShaderDebugging)
	{
		PixelShaderCode pcode;
		GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);
		pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p");

		VertexShaderCode vcode;
		GenerateVertexShaderCode(vcode, components, API_OPENGL);
		vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v");
	}
}

ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram(void)
{
	return *last_entry;
}

void ProgramShaderCache::Init(void)
{
	// We have to get the UBO alignment here because
	// if we generate a buffer that isn't aligned
	// then the UBO will fail.
	glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &s_ubo_align);

	s_ubo_buffer_size = ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align) + ROUND_UP(sizeof(VertexShaderConstants), s_ubo_align);

	// We multiply by *4*4 because we need to get down to basic machine units.
	// So multiply by four to get how many floats we have from vec4s
	// Then once more to get bytes
	s_buffer = StreamBuffer::Create(GL_UNIFORM_BUFFER, UBO_LENGTH);

	// Read our shader cache, only if supported
	if (g_ogl_config.bSupportsGLSLCache && !g_Config.bEnableShaderDebugging)
	{
		GLint Supported;
		glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &Supported);
		if(!Supported)
		{
			ERROR_LOG(VIDEO, "GL_ARB_get_program_binary is supported, but no binary format is known. So disable shader cache.");
			g_ogl_config.bSupportsGLSLCache = false;
		}
		else
		{
			if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
				File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX).c_str());

			char cache_filename[MAX_PATH];
			sprintf(cache_filename, "%sogl-%s-shaders.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
				SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());

			ProgramShaderCacheInserter inserter;
			g_program_disk_cache.OpenAndRead(cache_filename, inserter);
		}
		SETSTAT(stats.numPixelShadersAlive, pshaders.size());
	}

	CreateHeader();

	CurrentProgram = 0;
	last_entry = NULL;
}

void ProgramShaderCache::Shutdown(void)
{
	// store all shaders in cache on disk
	if (g_ogl_config.bSupportsGLSLCache && !g_Config.bEnableShaderDebugging)
	{
		PCache::iterator iter = pshaders.begin();
		for (; iter != pshaders.end(); ++iter)
		{
			if(iter->second.in_cache) continue;

			GLint binary_size;
			glGetProgramiv(iter->second.shader.glprogid, GL_PROGRAM_BINARY_LENGTH, &binary_size);
			if(!binary_size) continue;

			u8 *data = new u8[binary_size+sizeof(GLenum)];
			u8 *binary = data + sizeof(GLenum);
			GLenum *prog_format = (GLenum*)data;
			glGetProgramBinary(iter->second.shader.glprogid, binary_size, NULL, prog_format, binary);

			g_program_disk_cache.Append(iter->first, data, binary_size+sizeof(GLenum));
			delete [] data;
		}

		g_program_disk_cache.Sync();
		g_program_disk_cache.Close();
	}

	glUseProgram(0);

	PCache::iterator iter = pshaders.begin();
	for (; iter != pshaders.end(); ++iter)
		iter->second.Destroy();
	pshaders.clear();

	pixel_uid_checker.Invalidate();
	vertex_uid_checker.Invalidate();

	delete s_buffer;
	s_buffer = 0;
}

void ProgramShaderCache::CreateHeader ( void )
{
	GLSL_VERSION v = g_ogl_config.eSupportedGLSLVersion;
	snprintf(s_glsl_header, sizeof(s_glsl_header),
		"%s\n"
		"%s\n" // ubo
		"%s\n" // early-z
		"%s\n" // 420pack

		// Precision defines for GLSLES3
		"%s\n"

		"\n"// A few required defines and ones that will make our lives a lot easier
		"#define ATTRIN in\n"
		"#define ATTROUT out\n"
		"#define VARYIN %s\n"
		"#define VARYOUT %s\n"

		// Silly differences
		"#define float2 vec2\n"
		"#define float3 vec3\n"
		"#define float4 vec4\n"
		"#define int2 ivec2\n"
		"#define int3 ivec3\n"
		"#define int4 ivec4\n"

		// hlsl to glsl function translation
		"#define frac fract\n"
		"#define lerp mix\n"

		// Terrible hack, look at DriverDetails.h
		"%s\n"

		, v==GLSLES3 ? "#version 300 es" : v==GLSL_130 ? "#version 130" : v==GLSL_140 ? "#version 140" : "#version 150"
		, v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : ""
		, g_ActiveConfig.backend_info.bSupportsEarlyZ ? "#extension GL_ARB_shader_image_load_store : enable" : ""
		, g_ActiveConfig.backend_info.bSupportShadingLanguage420pack ? "#extension GL_ARB_shading_language_420pack : enable" : ""

		, v==GLSLES3 ? "precision highp float;" : ""

		, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "in" : "centroid in"
		, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "out" : "centroid out"
		, DriverDetails::HasBug(DriverDetails::BUG_BROKENTEXTURESIZE) ? "#define textureSize(x, y) ivec2(1, 1)" : ""
	);
}


void ProgramShaderCache::ProgramShaderCacheInserter::Read ( const SHADERUID& key, const u8* value, u32 value_size )
{
	const u8 *binary = value+sizeof(GLenum);
	GLenum *prog_format = (GLenum*)value;
	GLint binary_size = value_size-sizeof(GLenum);

	PCacheEntry entry;
	entry.in_cache = 1;
	entry.shader.glprogid = glCreateProgram();
	glProgramBinary(entry.shader.glprogid, *prog_format, binary, binary_size);

	GLint success;
	glGetProgramiv(entry.shader.glprogid, GL_LINK_STATUS, &success);

	if (success)
	{
		pshaders[key] = entry;
		entry.shader.SetProgramVariables();
	}
	else
		glDeleteProgram(entry.shader.glprogid);
}


} // namespace OGL
Exemple #27
0
namespace DX12
{

static bool s_cache_is_corrupted = false;
static LinearDiskCache<SmallPsoDiskDesc, u8> s_pso_disk_cache;

class PipelineStateCacheInserter : public LinearDiskCacheReader<SmallPsoDiskDesc, u8>
{
public:
	void Read(const SmallPsoDiskDesc& key, const u8* value, u32 value_size)
	{
		if (s_cache_is_corrupted)
			return;

		D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {};
		desc.pRootSignature = D3D::default_root_signature;
		desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; // This state changes in PSTextureEncoder::Encode.
		desc.DSVFormat = DXGI_FORMAT_D32_FLOAT; // This state changes in PSTextureEncoder::Encode.
		desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
		desc.NumRenderTargets = 1;
		desc.SampleMask = UINT_MAX;
		desc.SampleDesc.Count = 1;
		desc.SampleDesc.Quality = 0;

		desc.GS = ShaderCache::GetGeometryShaderFromUid(&key.gs_uid);
		desc.PS = ShaderCache::GetPixelShaderFromUid(&key.ps_uid);
		desc.VS = ShaderCache::GetVertexShaderFromUid(&key.vs_uid);

		if (!desc.PS.pShaderBytecode || !desc.VS.pShaderBytecode)
		{
			s_cache_is_corrupted = true;
			return;
		}

		BlendState blend_state = {};
		blend_state.hex = key.blend_state_hex;
		desc.BlendState = StateCache::GetDesc12(blend_state);

		ZMode depth_stencil_state = {};
		depth_stencil_state.hex = key.depth_stencil_state_hex;
		desc.DepthStencilState = StateCache::GetDesc12(depth_stencil_state);

		RasterizerState rasterizer_state = {};
		rasterizer_state.hex = key.rasterizer_state_hex;
		desc.RasterizerState = StateCache::GetDesc12(rasterizer_state);

		desc.PrimitiveTopologyType = key.topology;

		// search for a cached native vertex format
		const PortableVertexDeclaration& native_vtx_decl = key.vertex_declaration;
		std::unique_ptr<NativeVertexFormat>& native = (*VertexLoaderManager::GetNativeVertexFormatMap())[native_vtx_decl];

		if (!native)
		{
			native.reset(g_vertex_manager->CreateNativeVertexFormat(native_vtx_decl));
		}

		desc.InputLayout = reinterpret_cast<D3DVertexFormat*>(native.get())->GetActiveInputLayout12();

		desc.CachedPSO.CachedBlobSizeInBytes = value_size;
		desc.CachedPSO.pCachedBlob = value;

		ID3D12PipelineState* pso = nullptr;
		HRESULT hr = D3D::device12->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&pso));

		if (FAILED(hr))
		{
			// Failure can occur if disk cache is corrupted, or a driver upgrade invalidates the existing blobs.
			// In this case, we need to clear the disk cache.

			s_cache_is_corrupted = true;
			return;
		}

		SmallPsoDesc small_desc = {};
		small_desc.blend_state.hex = key.blend_state_hex;
		small_desc.depth_stencil_state.hex = key.depth_stencil_state_hex;
		small_desc.rasterizer_state.hex = key.rasterizer_state_hex;
		small_desc.gs_bytecode = desc.GS;
		small_desc.ps_bytecode = desc.PS;
		small_desc.vs_bytecode = desc.VS;
		small_desc.input_layout = reinterpret_cast<D3DVertexFormat*>(native.get());

		gx_state_cache.m_small_pso_map[small_desc] = pso;
	}
};

StateCache::StateCache()
{
	m_current_pso_desc = {};

	m_current_pso_desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; // This state changes in PSTextureEncoder::Encode.
	m_current_pso_desc.DSVFormat = DXGI_FORMAT_D32_FLOAT; // This state changes in PSTextureEncoder::Encode.
	m_current_pso_desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
	m_current_pso_desc.NumRenderTargets = 1;
	m_current_pso_desc.SampleMask = UINT_MAX;
}

void StateCache::Init()
{
	// Root signature isn't available at time of StateCache construction, so fill it in now.
	gx_state_cache.m_current_pso_desc.pRootSignature = D3D::default_root_signature;

	// Multi-sample configuration isn't available at time of StateCache construction, so fille it in now.
	gx_state_cache.m_current_pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples;
	gx_state_cache.m_current_pso_desc.SampleDesc.Quality = 0;

	if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
		File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

	std::string cache_filename = StringFromFormat("%sdx12-%s-pso.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
		SConfig::GetInstance().m_strUniqueID.c_str());

	PipelineStateCacheInserter inserter;
	s_pso_disk_cache.OpenAndRead(cache_filename, inserter);

	if (s_cache_is_corrupted)
	{
		// If a PSO fails to create, that means either:
		// - The file itself is corrupt.
		// - A driver/HW change has occurred, causing the existing cache blobs to be invalid.
		//
		// In either case, we want to re-create the disk cache. This should not be a frequent occurence.

		s_pso_disk_cache.Close();

		for (auto it : gx_state_cache.m_small_pso_map)
		{
			SAFE_RELEASE(it.second);
		}
		gx_state_cache.m_small_pso_map.clear();

		File::Delete(cache_filename);

		s_pso_disk_cache.OpenAndRead(cache_filename, inserter);

		s_cache_is_corrupted = false;
	}
}

D3D12_SAMPLER_DESC StateCache::GetDesc12(SamplerState state)
{
	const unsigned int d3d_mip_filters[4] =
	{
		TexMode0::TEXF_NONE,
		TexMode0::TEXF_POINT,
		TexMode0::TEXF_LINEAR,
		TexMode0::TEXF_NONE, //reserved
	};
	const D3D12_TEXTURE_ADDRESS_MODE d3d_clamps[4] =
	{
		D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
		D3D12_TEXTURE_ADDRESS_MODE_WRAP,
		D3D12_TEXTURE_ADDRESS_MODE_MIRROR,
		D3D12_TEXTURE_ADDRESS_MODE_WRAP //reserved
	};

	D3D12_SAMPLER_DESC sampdc;

	unsigned int mip = d3d_mip_filters[state.min_filter & 3];

	sampdc.MaxAnisotropy = 1;
	if (g_ActiveConfig.iMaxAnisotropy > 0 && !SamplerCommon::IsBpTexMode0PointFiltering(state))
	{
		sampdc.Filter = D3D12_FILTER_ANISOTROPIC;
		sampdc.MaxAnisotropy = 1 << g_ActiveConfig.iMaxAnisotropy;
	}
	else if (state.min_filter & 4) // linear min filter
	{
		if (state.mag_filter) // linear mag filter
		{
			if (mip == TexMode0::TEXF_NONE)
				sampdc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
			else if (mip == TexMode0::TEXF_POINT)
				sampdc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
			else if (mip == TexMode0::TEXF_LINEAR)
				sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
		}
		else // point mag filter
		{
			if (mip == TexMode0::TEXF_NONE)
				sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT;
			else if (mip == TexMode0::TEXF_POINT)
				sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT;
			else if (mip == TexMode0::TEXF_LINEAR)
				sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR;
		}
	}
	else // point min filter
	{
		if (state.mag_filter) // linear mag filter
		{
			if (mip == TexMode0::TEXF_NONE)
				sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT;
			else if (mip == TexMode0::TEXF_POINT)
				sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT;
			else if (mip == TexMode0::TEXF_LINEAR)
				sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR;
		}
		else // point mag filter
		{
			if (mip == TexMode0::TEXF_NONE)
				sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT;
			else if (mip == TexMode0::TEXF_POINT)
				sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT;
			else if (mip == TexMode0::TEXF_LINEAR)
				sampdc.Filter = D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR;
		}
	}

	sampdc.AddressU = d3d_clamps[state.wrap_s];
	sampdc.AddressV = d3d_clamps[state.wrap_t];
	sampdc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;

	sampdc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;

	sampdc.BorderColor[0] = sampdc.BorderColor[1] = sampdc.BorderColor[2] = sampdc.BorderColor[3] = 1.0f;

	sampdc.MaxLOD = SamplerCommon::AreBpTexMode0MipmapsEnabled(state) ? state.max_lod / 16.f : 0.f;
	sampdc.MinLOD = std::min(state.min_lod / 16.f, sampdc.MaxLOD);
	sampdc.MipLODBias = static_cast<s32>(state.lod_bias) / 32.0f;

	return sampdc;
}

D3D12_BLEND GetBlendingAlpha(D3D12_BLEND blend)
{
	switch (blend)
	{
	case D3D12_BLEND_SRC_COLOR:
		return D3D12_BLEND_SRC_ALPHA;
	case D3D12_BLEND_INV_SRC_COLOR:
		return D3D12_BLEND_INV_SRC_ALPHA;
	case D3D12_BLEND_DEST_COLOR:
		return D3D12_BLEND_DEST_ALPHA;
	case D3D12_BLEND_INV_DEST_COLOR:
		return D3D12_BLEND_INV_DEST_ALPHA;

	default:
		return blend;
	}
}

D3D12_BLEND_DESC StateCache::GetDesc12(BlendState state)
{
	if (!state.blend_enable)
	{
		state.src_blend = D3D12_BLEND_ONE;
		state.dst_blend = D3D12_BLEND_ZERO;
		state.blend_op = D3D12_BLEND_OP_ADD;
		state.use_dst_alpha = false;
	}

	D3D12_BLEND_DESC blenddc = {
		FALSE, // BOOL AlphaToCoverageEnable;
		FALSE, // BOOL IndependentBlendEnable;
		{
			state.blend_enable,   // BOOL BlendEnable;
			FALSE,                // BOOL LogicOpEnable;
			state.src_blend,      // D3D12_BLEND SrcBlend;
			state.dst_blend,      // D3D12_BLEND DestBlend;
			state.blend_op,       // D3D12_BLEND_OP BlendOp;
			state.src_blend,      // D3D12_BLEND SrcBlendAlpha;
			state.dst_blend,      // D3D12_BLEND DestBlendAlpha;
			state.blend_op,       // D3D12_BLEND_OP BlendOpAlpha;
			D3D12_LOGIC_OP_NOOP,  // D3D12_LOGIC_OP LogicOp
			state.write_mask      // UINT8 RenderTargetWriteMask;
		}
	};

	blenddc.RenderTarget[0].SrcBlendAlpha = GetBlendingAlpha(blenddc.RenderTarget[0].SrcBlend);
	blenddc.RenderTarget[0].DestBlendAlpha = GetBlendingAlpha(blenddc.RenderTarget[0].DestBlend);

	if (state.use_dst_alpha)
	{
		// Colors should blend against SRC1_ALPHA
		if (blenddc.RenderTarget[0].SrcBlend == D3D12_BLEND_SRC_ALPHA)
			blenddc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC1_ALPHA;
		else if (blenddc.RenderTarget[0].SrcBlend == D3D12_BLEND_INV_SRC_ALPHA)
			blenddc.RenderTarget[0].SrcBlend = D3D12_BLEND_INV_SRC1_ALPHA;

		// Colors should blend against SRC1_ALPHA
		if (blenddc.RenderTarget[0].DestBlend == D3D12_BLEND_SRC_ALPHA)
			blenddc.RenderTarget[0].DestBlend = D3D12_BLEND_SRC1_ALPHA;
		else if (blenddc.RenderTarget[0].DestBlend == D3D12_BLEND_INV_SRC_ALPHA)
			blenddc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC1_ALPHA;

		blenddc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE;
		blenddc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_ZERO;
		blenddc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
	}

	return blenddc;
}

D3D12_RASTERIZER_DESC StateCache::GetDesc12(RasterizerState state)
{
	return {
		D3D12_FILL_MODE_SOLID,
		state.cull_mode,
		false,
		0,
		0.f,
		0,
		true,
		true,
		false,
		0,
		D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF
	};
}

inline D3D12_DEPTH_STENCIL_DESC StateCache::GetDesc12(ZMode state)
{
	D3D12_DEPTH_STENCIL_DESC depthdc;

	depthdc.StencilEnable = FALSE;
	depthdc.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK;
	depthdc.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK;

	D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = { D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS };
	depthdc.FrontFace = defaultStencilOp;
	depthdc.BackFace = defaultStencilOp;

	const D3D12_COMPARISON_FUNC d3dCmpFuncs[8] =
	{
		D3D12_COMPARISON_FUNC_NEVER,
		D3D12_COMPARISON_FUNC_GREATER,
		D3D12_COMPARISON_FUNC_EQUAL,
		D3D12_COMPARISON_FUNC_GREATER_EQUAL,
		D3D12_COMPARISON_FUNC_LESS,
		D3D12_COMPARISON_FUNC_NOT_EQUAL,
		D3D12_COMPARISON_FUNC_LESS_EQUAL,
		D3D12_COMPARISON_FUNC_ALWAYS
	};

	if (state.testenable)
	{
		depthdc.DepthEnable = TRUE;
		depthdc.DepthWriteMask = state.updateenable ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
		depthdc.DepthFunc = d3dCmpFuncs[state.func];
	}
	else
	{
		// if the test is disabled write is disabled too
		depthdc.DepthEnable = FALSE;
		depthdc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
	}

	return depthdc;
}

HRESULT StateCache::GetPipelineStateObjectFromCache(D3D12_GRAPHICS_PIPELINE_STATE_DESC* pso_desc, ID3D12PipelineState** pso)
{
	auto it = m_pso_map.find(*pso_desc);

	if (it == m_pso_map.end())
	{
		// Not found, create new PSO.

		ID3D12PipelineState* new_pso = nullptr;
		HRESULT hr = D3D::device12->CreateGraphicsPipelineState(pso_desc, IID_PPV_ARGS(&new_pso));

		if (FAILED(hr))
		{
			return hr;
		}

		m_pso_map[*pso_desc] = new_pso;
		*pso = new_pso;
	}
	else
	{
		*pso = it->second;
	}

	return S_OK;
}

HRESULT StateCache::GetPipelineStateObjectFromCache(SmallPsoDesc* pso_desc, ID3D12PipelineState** pso, D3D12_PRIMITIVE_TOPOLOGY_TYPE topology, const GeometryShaderUid* gs_uid, const PixelShaderUid* ps_uid, const VertexShaderUid* vs_uid)
{
	auto it = m_small_pso_map.find(*pso_desc);

	if (it == m_small_pso_map.end())
	{
		// Not found, create new PSO.

		// RootSignature, SampleMask, SampleDesc, NumRenderTargets, RTVFormats, DSVFormat
		// never change so they are set in constructor and forgotten.
		m_current_pso_desc.GS = pso_desc->gs_bytecode;
		m_current_pso_desc.PS = pso_desc->ps_bytecode;
		m_current_pso_desc.VS = pso_desc->vs_bytecode;
		m_current_pso_desc.BlendState = GetDesc12(pso_desc->blend_state);
		m_current_pso_desc.DepthStencilState = GetDesc12(pso_desc->depth_stencil_state);
		m_current_pso_desc.RasterizerState = GetDesc12(pso_desc->rasterizer_state);
		m_current_pso_desc.PrimitiveTopologyType = topology;
		m_current_pso_desc.InputLayout = pso_desc->input_layout->GetActiveInputLayout12();

		ID3D12PipelineState* new_pso = nullptr;
		HRESULT hr = D3D::device12->CreateGraphicsPipelineState(&m_current_pso_desc, IID_PPV_ARGS(&new_pso));

		if (FAILED(hr))
		{
			return hr;
		}

		m_small_pso_map[*pso_desc] = new_pso;
		*pso = new_pso;

		// This contains all of the information needed to reconstruct a PSO at startup.
		SmallPsoDiskDesc disk_desc = {};
		disk_desc.blend_state_hex = pso_desc->blend_state.hex;
		disk_desc.depth_stencil_state_hex = pso_desc->depth_stencil_state.hex;
		disk_desc.rasterizer_state_hex = pso_desc->rasterizer_state.hex;
		disk_desc.ps_uid = *ps_uid;
		disk_desc.vs_uid = *vs_uid;
		disk_desc.gs_uid = *gs_uid;
		disk_desc.vertex_declaration = pso_desc->input_layout->GetVertexDeclaration();
		disk_desc.topology = topology;

		// This shouldn't fail.. but if it does, don't cache to disk.
		ID3DBlob* psoBlob = nullptr;
		hr = new_pso->GetCachedBlob(&psoBlob);

		if (SUCCEEDED(hr))
		{
			s_pso_disk_cache.Append(disk_desc, reinterpret_cast<const u8*>(psoBlob->GetBufferPointer()), static_cast<u32>(psoBlob->GetBufferSize()));
			psoBlob->Release();
		}
	}
	else
	{
		*pso = it->second;
	}

	return S_OK;
}

void StateCache::OnMSAASettingsChanged()
{
	for (auto& it : m_small_pso_map)
	{
		SAFE_RELEASE(it.second);
	}
	m_small_pso_map.clear();

	// Update sample count for new PSOs being created
	gx_state_cache.m_current_pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples;
}

void StateCache::Clear()
{
	for (auto& it : m_pso_map)
	{
		SAFE_RELEASE(it.second);
	}
	m_pso_map.clear();

	for (auto& it : m_small_pso_map)
	{
		SAFE_RELEASE(it.second);
	}
	m_small_pso_map.clear();

	s_pso_disk_cache.Sync();
	s_pso_disk_cache.Close();
}

}  // namespace DX12
Exemple #28
0
void ShaderCache::Init()
{
	s_compiler = &HLSLAsyncCompiler::getInstance();
	s_shaders_lock.unlock();
	s_pass_entry.m_compiled = true;
	s_pass_entry.m_initialized.test_and_set();
	// This class intentionally shares its shader cache files with DX11, as the shaders are (right now) identical.
	// Reduces unnecessary compilation when switching between APIs.
	s_last_domain_shader_bytecode = &s_pass_entry;
	s_last_hull_shader_bytecode = &s_pass_entry;
	s_last_geometry_shader_bytecode = &s_pass_entry;
	s_last_pixel_shader_bytecode = nullptr;
	s_last_vertex_shader_bytecode = nullptr;

	s_last_geometry_shader_uid = {};
	s_last_pixel_shader_uid = {};
	s_last_vertex_shader_uid = {};
	s_last_tessellation_shader_uid = {};

	s_last_cpu_geometry_shader_uid = {};
	s_last_cpu_pixel_shader_uid = {};
	s_last_cpu_vertex_shader_uid = {};
	s_last_cpu_tessellation_shader_uid = {};

	// Ensure shader cache directory exists..
	std::string shader_cache_path = File::GetUserPath(D_SHADERCACHE_IDX);

	if (!File::Exists(shader_cache_path))
		File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

	std::string title_unique_id = SConfig::GetInstance().m_strGameID;

	std::string ds_cache_filename = StringFromFormat("%sIDX11-%s-ds.cache", shader_cache_path.c_str(), title_unique_id.c_str());
	std::string hs_cache_filename = StringFromFormat("%sIDX11-%s-hs.cache", shader_cache_path.c_str(), title_unique_id.c_str());
	std::string gs_cache_filename = StringFromFormat("%sIDX11-%s-gs.cache", shader_cache_path.c_str(), title_unique_id.c_str());
	std::string ps_cache_filename = StringFromFormat("%sIDX11-%s-ps.cache", shader_cache_path.c_str(), title_unique_id.c_str());
	std::string vs_cache_filename = StringFromFormat("%sIDX11-%s-vs.cache", shader_cache_path.c_str(), title_unique_id.c_str());

	pKey_t gameid = (pKey_t)GetMurmurHash3(reinterpret_cast<const u8*>(SConfig::GetInstance().m_strGameID.data()), (u32)SConfig::GetInstance().m_strGameID.size(), 0);

	vs_bytecode_cache = VsBytecodeCache::Create(
		gameid,
		VERTEXSHADERGEN_UID_VERSION,
		"Ishiiruka.vs",
		StringFromFormat("%s.vs", title_unique_id.c_str())
	);

	ps_bytecode_cache = PsBytecodeCache::Create(
		gameid,
		PIXELSHADERGEN_UID_VERSION,
		"Ishiiruka.ps",
		StringFromFormat("%s.ps", title_unique_id.c_str())
	);

	gs_bytecode_cache = GsBytecodeCache::Create(
		gameid,
		GEOMETRYSHADERGEN_UID_VERSION,
		"Ishiiruka.gs",
		StringFromFormat("%s.gs", title_unique_id.c_str())
	);

	ts_bytecode_cache = TsBytecodeCache::Create(
		gameid,
		TESSELLATIONSHADERGEN_UID_VERSION,
		"Ishiiruka.ts",
		StringFromFormat("%s.ts", title_unique_id.c_str())
	);

	DShaderCacheInserter ds_inserter;
	s_ds_disk_cache.OpenAndRead(ds_cache_filename, ds_inserter);

	HShaderCacheInserter hs_inserter;
	s_hs_disk_cache.OpenAndRead(hs_cache_filename, hs_inserter);

	ShaderCacheInserter<GeometryShaderUid, GsBytecodeCache, &gs_bytecode_cache> gs_inserter;
	s_gs_disk_cache.OpenAndRead(gs_cache_filename, gs_inserter);

	ShaderCacheInserter<PixelShaderUid, PsBytecodeCache, &ps_bytecode_cache> ps_inserter;
	s_ps_disk_cache.OpenAndRead(ps_cache_filename, ps_inserter);

	ShaderCacheInserter<VertexShaderUid, VsBytecodeCache, &vs_bytecode_cache> vs_inserter;
	s_vs_disk_cache.OpenAndRead(vs_cache_filename, vs_inserter);

	// Clear out disk cache when debugging shaders to ensure stale ones don't stick around..
	SETSTAT(stats.numGeometryShadersAlive, static_cast<int>(gs_bytecode_cache->size()));
	SETSTAT(stats.numGeometryShadersCreated, 0);
	SETSTAT(stats.numPixelShadersAlive, static_cast<int>(ps_bytecode_cache->size()));
	SETSTAT(stats.numPixelShadersCreated, 0);
	SETSTAT(stats.numVertexShadersAlive, static_cast<int>(vs_bytecode_cache->size()));
	SETSTAT(stats.numVertexShadersCreated, 0);
	if (g_ActiveConfig.bCompileShaderOnStartup)
	{
		size_t shader_count = 0;
		ps_bytecode_cache->ForEachMostUsedByCategory(gameid,
			[&](const PixelShaderUid& it, size_t total)
		{
			PixelShaderUid item = it;
			item.ClearHASH();
			item.CalculateUIDHash();
			HandlePSUIDChange(item, true);
			shader_count++;
			if ((shader_count & 7) == 0)
			{
				Host_UpdateTitle(StringFromFormat("Compiling Pixel Shaders %i %% (%i/%i)", (shader_count * 100) / total, shader_count, total));
				s_compiler->WaitForFinish();
			}
		},
			[](ByteCodeCacheEntry& entry)
		{
			return !entry.m_shader_bytecode.pShaderBytecode;
		}
		, true);
		shader_count = 0;
		vs_bytecode_cache->ForEachMostUsedByCategory(gameid,
			[&](const VertexShaderUid& it, size_t total)
		{
			VertexShaderUid item = it;
			item.ClearHASH();
			item.CalculateUIDHash();
			HandleVSUIDChange(item, true);
			shader_count++;
			if ((shader_count & 31) == 0)
			{
				Host_UpdateTitle(StringFromFormat("Compiling Vertex Shaders %i %% (%i/%i)", (shader_count * 100) / total, shader_count, total));
				s_compiler->WaitForFinish();
			}
		},
			[](ByteCodeCacheEntry& entry)
		{
			return !entry.m_shader_bytecode.pShaderBytecode;
		}
		, true);
		shader_count = 0;
		gs_bytecode_cache->ForEachMostUsedByCategory(gameid,
			[&](const GeometryShaderUid& it, size_t total)
		{
			GeometryShaderUid item = it;
			item.ClearHASH();
			item.CalculateUIDHash();
			HandleGSUIDChange(item, true);
			shader_count++;
			if ((shader_count & 7) == 0)
			{
				Host_UpdateTitle(StringFromFormat("Compiling Geometry Shaders %i %% (%i/%i)", (shader_count * 100) / total, shader_count, total));
				s_compiler->WaitForFinish();
			}
		},
			[](ByteCodeCacheEntry& entry)
		{
			return !entry.m_shader_bytecode.pShaderBytecode;
		}
		, true);
		shader_count = 0;
		ts_bytecode_cache->ForEachMostUsedByCategory(gameid,
			[&](const TessellationShaderUid& it, size_t total)
		{
			TessellationShaderUid item = it;
			item.ClearHASH();
			item.CalculateUIDHash();
			HandleTSUIDChange(item, true);
			shader_count++;
			if ((shader_count & 31) == 0)
			{
				Host_UpdateTitle(StringFromFormat("Compiling Tessellation Shaders %i %% (%i/%i)", (shader_count * 100) / total, shader_count, total));
				s_compiler->WaitForFinish();
			}
		},
			[](std::pair<ByteCodeCacheEntry, ByteCodeCacheEntry>& entry)
		{
			return !entry.first.m_shader_bytecode.pShaderBytecode;
		}
		, true);
		s_compiler->WaitForFinish();
	}
}
Exemple #29
0
namespace DX11
{
VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry* VertexShaderCache::last_entry;
VertexShaderUid VertexShaderCache::last_uid;

static ID3D11VertexShader* SimpleVertexShader = nullptr;
static ID3D11VertexShader* ClearVertexShader = nullptr;
static ID3D11InputLayout* SimpleLayout = nullptr;
static ID3D11InputLayout* ClearLayout = nullptr;

LinearDiskCache<VertexShaderUid, u8> g_vs_disk_cache;

ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader()
{
  return SimpleVertexShader;
}
ID3D11VertexShader* VertexShaderCache::GetClearVertexShader()
{
  return ClearVertexShader;
}
ID3D11InputLayout* VertexShaderCache::GetSimpleInputLayout()
{
  return SimpleLayout;
}
ID3D11InputLayout* VertexShaderCache::GetClearInputLayout()
{
  return ClearLayout;
}

ID3D11Buffer* vscbuf = nullptr;

ID3D11Buffer*& VertexShaderCache::GetConstantBuffer()
{
  // TODO: divide the global variables of the generated shaders into about 5 constant buffers to
  // speed this up
  if (VertexShaderManager::dirty)
  {
    D3D11_MAPPED_SUBRESOURCE map;
    D3D::context->Map(vscbuf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
    memcpy(map.pData, &VertexShaderManager::constants, sizeof(VertexShaderConstants));
    D3D::context->Unmap(vscbuf, 0);
    VertexShaderManager::dirty = false;

    ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(VertexShaderConstants));
  }
  return vscbuf;
}

// this class will load the precompiled shaders into our cache
class VertexShaderCacheInserter : public LinearDiskCacheReader<VertexShaderUid, u8>
{
public:
  void Read(const VertexShaderUid& key, const u8* value, u32 value_size)
  {
    D3DBlob* blob = new D3DBlob(value_size, value);
    VertexShaderCache::InsertByteCode(key, blob);
    blob->Release();
  }
};

const char simple_shader_code[] = {
    "struct VSOUTPUT\n"
    "{\n"
    "float4 vPosition : POSITION;\n"
    "float3 vTexCoord : TEXCOORD0;\n"
    "float  vTexCoord1 : TEXCOORD1;\n"
    "};\n"
    "VSOUTPUT main(float4 inPosition : POSITION,float4 inTEX0 : TEXCOORD0)\n"
    "{\n"
    "VSOUTPUT OUT;\n"
    "OUT.vPosition = inPosition;\n"
    "OUT.vTexCoord = inTEX0.xyz;\n"
    "OUT.vTexCoord1 = inTEX0.w;\n"
    "return OUT;\n"
    "}\n"};

const char clear_shader_code[] = {
    "struct VSOUTPUT\n"
    "{\n"
    "float4 vPosition   : POSITION;\n"
    "float4 vColor0   : COLOR0;\n"
    "};\n"
    "VSOUTPUT main(float4 inPosition : POSITION,float4 inColor0: COLOR0)\n"
    "{\n"
    "VSOUTPUT OUT;\n"
    "OUT.vPosition = inPosition;\n"
    "OUT.vColor0 = inColor0;\n"
    "return OUT;\n"
    "}\n"};

void VertexShaderCache::Init()
{
  const D3D11_INPUT_ELEMENT_DESC simpleelems[2] = {
      {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
      {"TEXCOORD", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},

  };
  const D3D11_INPUT_ELEMENT_DESC clearelems[2] = {
      {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
      {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},
  };

  unsigned int cbsize = ROUND_UP(sizeof(VertexShaderConstants), 16);  // must be a multiple of 16
  D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(cbsize, D3D11_BIND_CONSTANT_BUFFER,
                                                D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE);
  HRESULT hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &vscbuf);
  CHECK(hr == S_OK, "Create vertex shader constant buffer (size=%u)", cbsize);
  D3D::SetDebugObjectName((ID3D11DeviceChild*)vscbuf,
                          "vertex shader constant buffer used to emulate the GX pipeline");

  D3DBlob* blob;
  D3D::CompileVertexShader(simple_shader_code, &blob);
  D3D::device->CreateInputLayout(simpleelems, 2, blob->Data(), blob->Size(), &SimpleLayout);
  SimpleVertexShader = D3D::CreateVertexShaderFromByteCode(blob);
  if (SimpleLayout == nullptr || SimpleVertexShader == nullptr)
    PanicAlert("Failed to create simple vertex shader or input layout at %s %d\n", __FILE__,
               __LINE__);
  blob->Release();
  D3D::SetDebugObjectName((ID3D11DeviceChild*)SimpleVertexShader, "simple vertex shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)SimpleLayout, "simple input layout");

  D3D::CompileVertexShader(clear_shader_code, &blob);
  D3D::device->CreateInputLayout(clearelems, 2, blob->Data(), blob->Size(), &ClearLayout);
  ClearVertexShader = D3D::CreateVertexShaderFromByteCode(blob);
  if (ClearLayout == nullptr || ClearVertexShader == nullptr)
    PanicAlert("Failed to create clear vertex shader or input layout at %s %d\n", __FILE__,
               __LINE__);
  blob->Release();
  D3D::SetDebugObjectName((ID3D11DeviceChild*)ClearVertexShader, "clear vertex shader");
  D3D::SetDebugObjectName((ID3D11DeviceChild*)ClearLayout, "clear input layout");

  Clear();

  if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
    File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

  SETSTAT(stats.numVertexShadersCreated, 0);
  SETSTAT(stats.numVertexShadersAlive, 0);

  std::string cache_filename =
      StringFromFormat("%sdx11-%s-vs.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
                       SConfig::GetInstance().m_strUniqueID.c_str());
  VertexShaderCacheInserter inserter;
  g_vs_disk_cache.OpenAndRead(cache_filename, inserter);

  last_entry = nullptr;
}

void VertexShaderCache::Clear()
{
  for (auto& iter : vshaders)
    iter.second.Destroy();
  vshaders.clear();

  last_entry = nullptr;
}

void VertexShaderCache::Shutdown()
{
  SAFE_RELEASE(vscbuf);

  SAFE_RELEASE(SimpleVertexShader);
  SAFE_RELEASE(ClearVertexShader);

  SAFE_RELEASE(SimpleLayout);
  SAFE_RELEASE(ClearLayout);

  Clear();
  g_vs_disk_cache.Sync();
  g_vs_disk_cache.Close();
}

bool VertexShaderCache::SetShader()
{
  VertexShaderUid uid = GetVertexShaderUid();

  if (last_entry)
  {
    if (uid == last_uid)
    {
      GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
      return (last_entry->shader != nullptr);
    }
  }

  last_uid = uid;

  VSCache::iterator iter = vshaders.find(uid);
  if (iter != vshaders.end())
  {
    const VSCacheEntry& entry = iter->second;
    last_entry = &entry;

    GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
    return (entry.shader != nullptr);
  }

  ShaderCode code = GenerateVertexShaderCode(API_D3D, uid.GetUidData());

  D3DBlob* pbytecode = nullptr;
  D3D::CompileVertexShader(code.GetBuffer(), &pbytecode);

  if (pbytecode == nullptr)
  {
    GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
    return false;
  }
  g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());

  bool success = InsertByteCode(uid, pbytecode);
  pbytecode->Release();

  GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
  return success;
}

bool VertexShaderCache::InsertByteCode(const VertexShaderUid& uid, D3DBlob* bcodeblob)
{
  ID3D11VertexShader* shader = D3D::CreateVertexShaderFromByteCode(bcodeblob);
  if (shader == nullptr)
    return false;

  // TODO: Somehow make the debug name a bit more specific
  D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a vertex shader of VertexShaderCache");

  // Make an entry in the table
  VSCacheEntry entry;
  entry.shader = shader;
  entry.SetByteCode(bcodeblob);

  vshaders[uid] = entry;
  last_entry = &vshaders[uid];

  INCSTAT(stats.numVertexShadersCreated);
  SETSTAT(stats.numVertexShadersAlive, (int)vshaders.size());

  return true;
}

}  // namespace DX11
namespace DX11
{

PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry;
PixelShaderUid PixelShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> PixelShaderCache::pixel_uid_checker;

LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;

ID3D11PixelShader* s_ColorMatrixProgram[2] = {nullptr};
ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr};
ID3D11PixelShader* s_DepthMatrixProgram[2] = {nullptr};
ID3D11PixelShader* s_ClearProgram = nullptr;
ID3D11PixelShader* s_rgba6_to_rgb8[2] = {nullptr};
ID3D11PixelShader* s_rgb8_to_rgba6[2] = {nullptr};
ID3D11Buffer* pscbuf = nullptr;

const char clear_program_code[] = {
	"void main(\n"
	"out float4 ocol0 : SV_Target,\n"
	"in float4 pos : SV_Position,\n"
	"in float4 incol0 : COLOR0){\n"
	"ocol0 = incol0;\n"
	"}\n"
};

// TODO: Find some way to avoid having separate shaders for non-MSAA and MSAA...
const char color_copy_program_code[] = {
	"sampler samp0 : register(s0);\n"
	"Texture2D Tex0 : register(t0);\n"
	"void main(\n"
	"out float4 ocol0 : SV_Target,\n"
	"in float4 pos : SV_Position,\n"
	"in float2 uv0 : TEXCOORD0){\n"
	"ocol0 = Tex0.Sample(samp0,uv0);\n"
	"}\n"
};

// TODO: Improve sampling algorithm!
const char color_copy_program_code_msaa[] = {
	"sampler samp0 : register(s0);\n"
	"Texture2DMS<float4, %d> Tex0 : register(t0);\n"
	"void main(\n"
	"out float4 ocol0 : SV_Target,\n"
	"in float4 pos : SV_Position,\n"
	"in float2 uv0 : TEXCOORD0){\n"
	"int width, height, samples;\n"
	"Tex0.GetDimensions(width, height, samples);\n"
	"ocol0 = 0;\n"
	"for(int i = 0; i < samples; ++i)\n"
	"	ocol0 += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);\n"
	"ocol0 /= samples;\n"
	"}\n"
};

const char color_matrix_program_code[] = {
	"sampler samp0 : register(s0);\n"
	"Texture2D Tex0 : register(t0);\n"
	"uniform float4 cColMatrix[7] : register(c0);\n"
	"void main(\n"
	"out float4 ocol0 : SV_Target,\n"
	"in float4 pos : SV_Position,\n"
	" in float2 uv0 : TEXCOORD0){\n"
	"float4 texcol = Tex0.Sample(samp0,uv0);\n"
	"texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n"
	"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
	"}\n"
};

const char color_matrix_program_code_msaa[] = {
	"sampler samp0 : register(s0);\n"
	"Texture2DMS<float4, %d> Tex0 : register(t0);\n"
	"uniform float4 cColMatrix[7] : register(c0);\n"
	"void main(\n"
	"out float4 ocol0 : SV_Target,\n"
	"in float4 pos : SV_Position,\n"
	" in float2 uv0 : TEXCOORD0){\n"
	"int width, height, samples;\n"
	"Tex0.GetDimensions(width, height, samples);\n"
	"float4 texcol = 0;\n"
	"for(int i = 0; i < samples; ++i)\n"
	"	texcol += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);\n"
	"texcol /= samples;\n"
	"texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n"
	"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
	"}\n"
};

const char depth_matrix_program[] = {
	"sampler samp0 : register(s0);\n"
	"Texture2D Tex0 : register(t0);\n"
	"uniform float4 cColMatrix[7] : register(c0);\n"
	"void main(\n"
	"out float4 ocol0 : SV_Target,\n"
	" in float4 pos : SV_Position,\n"
	" in float2 uv0 : TEXCOORD0){\n"
	"	float4 texcol = Tex0.Sample(samp0,uv0);\n"

	// 255.99998474121 = 16777215/16777216*256
	"	float workspace = texcol.x * 255.99998474121;\n"

	"	texcol.x = floor(workspace);\n"         // x component

	"	workspace = workspace - texcol.x;\n"    // subtract x component out
	"	workspace = workspace * 256.0;\n"       // shift left 8 bits
	"	texcol.y = floor(workspace);\n"         // y component

	"	workspace = workspace - texcol.y;\n"    // subtract y component out
	"	workspace = workspace * 256.0;\n"       // shift left 8 bits
	"	texcol.z = floor(workspace);\n"         // z component

	"	texcol.w = texcol.x;\n"                 // duplicate x into w

	"	texcol = texcol / 255.0;\n"             // normalize components to [0.0..1.0]

	"	texcol.w = texcol.w * 15.0;\n"
	"	texcol.w = floor(texcol.w);\n"
	"	texcol.w = texcol.w / 15.0;\n"          // w component

	"	ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
	"}\n"
};

const char depth_matrix_program_msaa[] = {
	"sampler samp0 : register(s0);\n"
	"Texture2DMS<float4, %d> Tex0 : register(t0);\n"
	"uniform float4 cColMatrix[7] : register(c0);\n"
	"void main(\n"
	"out float4 ocol0 : SV_Target,\n"
	" in float4 pos : SV_Position,\n"
	" in float2 uv0 : TEXCOORD0){\n"
	"	int width, height, samples;\n"
	"	Tex0.GetDimensions(width, height, samples);\n"
	"	float4 texcol = 0;\n"
	"	for(int i = 0; i < samples; ++i)\n"
	"		texcol += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);\n"
	"	texcol /= samples;\n"

	// 255.99998474121 = 16777215/16777216*256
	"	float workspace = texcol.x * 255.99998474121;\n"

	"	texcol.x = floor(workspace);\n"         // x component

	"	workspace = workspace - texcol.x;\n"    // subtract x component out
	"	workspace = workspace * 256.0;\n"       // shift left 8 bits
	"	texcol.y = floor(workspace);\n"         // y component

	"	workspace = workspace - texcol.y;\n"    // subtract y component out
	"	workspace = workspace * 256.0;\n"       // shift left 8 bits
	"	texcol.z = floor(workspace);\n"         // z component

	"	texcol.w = texcol.x;\n"                 // duplicate x into w

	"	texcol = texcol / 255.0;\n"             // normalize components to [0.0..1.0]

	"	texcol.w = texcol.w * 15.0;\n"
	"	texcol.w = floor(texcol.w);\n"
	"	texcol.w = texcol.w / 15.0;\n"          // w component

	"	ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
	"}\n"
};

const char reint_rgba6_to_rgb8[] = {
	"sampler samp0 : register(s0);\n"
	"Texture2D Tex0 : register(t0);\n"
	"void main(\n"
	"	out float4 ocol0 : SV_Target,\n"
	"	in float4 pos : SV_Position,\n"
	"	in float2 uv0 : TEXCOORD0)\n"
	"{\n"
	"	int4 src6 = round(Tex0.Sample(samp0,uv0) * 63.f);\n"
	"	int4 dst8;\n"
	"	dst8.r = (src6.r << 2) | (src6.g >> 4);\n"
	"	dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"
	"	dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n"
	"	dst8.a = 255;\n"
	"	ocol0 = (float4)dst8 / 255.f;\n"
	"}"
};

const char reint_rgba6_to_rgb8_msaa[] = {
	"sampler samp0 : register(s0);\n"
	"Texture2DMS<float4, %d> Tex0 : register(t0);\n"
	"void main(\n"
	"	out float4 ocol0 : SV_Target,\n"
	"	in float4 pos : SV_Position,\n"
	"	in float2 uv0 : TEXCOORD0)\n"
	"{\n"
	"	int width, height, samples;\n"
	"	Tex0.GetDimensions(width, height, samples);\n"
	"	float4 texcol = 0;\n"
	"	for (int i = 0; i < samples; ++i)\n"
	"		texcol += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);\n"
	"	texcol /= samples;\n"
	"	int4 src6 = round(texcol * 63.f);\n"
	"	int4 dst8;\n"
	"	dst8.r = (src6.r << 2) | (src6.g >> 4);\n"
	"	dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"
	"	dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n"
	"	dst8.a = 255;\n"
	"	ocol0 = (float4)dst8 / 255.f;\n"
	"}"
};

const char reint_rgb8_to_rgba6[] = {
	"sampler samp0 : register(s0);\n"
	"Texture2D Tex0 : register(t0);\n"
	"void main(\n"
	"	out float4 ocol0 : SV_Target,\n"
	"	in float4 pos : SV_Position,\n"
	"	in float2 uv0 : TEXCOORD0)\n"
	"{\n"
	"	int4 src8 = round(Tex0.Sample(samp0,uv0) * 255.f);\n"
	"	int4 dst6;\n"
	"	dst6.r = src8.r >> 2;\n"
	"	dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"
	"	dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n"
	"	dst6.a = src8.b & 0x3F;\n"
	"	ocol0 = (float4)dst6 / 63.f;\n"
	"}\n"
};

const char reint_rgb8_to_rgba6_msaa[] = {
	"sampler samp0 : register(s0);\n"
	"Texture2DMS<float4, %d> Tex0 : register(t0);\n"
	"void main(\n"
	"	out float4 ocol0 : SV_Target,\n"
	"	in float4 pos : SV_Position,\n"
	"	in float2 uv0 : TEXCOORD0)\n"
	"{\n"
	"	int width, height, samples;\n"
	"	Tex0.GetDimensions(width, height, samples);\n"
	"	float4 texcol = 0;\n"
	"	for (int i = 0; i < samples; ++i)\n"
	"		texcol += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);\n"
	"	texcol /= samples;\n"
	"	int4 src8 = round(texcol * 255.f);\n"
	"	int4 dst6;\n"
	"	dst6.r = src8.r >> 2;\n"
	"	dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"
	"	dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n"
	"	dst6.a = src8.b & 0x3F;\n"
	"	ocol0 = (float4)dst6 / 63.f;\n"
	"}\n"
};

ID3D11PixelShader* PixelShaderCache::ReinterpRGBA6ToRGB8(bool multisampled)
{
	if (!multisampled || D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count == 1)
	{
		if (!s_rgba6_to_rgb8[0])
		{
			s_rgba6_to_rgb8[0] = D3D::CompileAndCreatePixelShader(reint_rgba6_to_rgb8);
			CHECK(s_rgba6_to_rgb8[0], "Create RGBA6 to RGB8 pixel shader");
			D3D::SetDebugObjectName(s_rgba6_to_rgb8[0], "RGBA6 to RGB8 pixel shader");
		}
		return s_rgba6_to_rgb8[0];
	}
	else if (!s_rgba6_to_rgb8[1])
	{
		// create MSAA shader for current AA mode
		std::string buf = StringFromFormat(reint_rgba6_to_rgb8_msaa, D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count);
		s_rgba6_to_rgb8[1] = D3D::CompileAndCreatePixelShader(buf);

		CHECK(s_rgba6_to_rgb8[1], "Create RGBA6 to RGB8 MSAA pixel shader");
		D3D::SetDebugObjectName(s_rgba6_to_rgb8[1], "RGBA6 to RGB8 MSAA pixel shader");
	}
	return s_rgba6_to_rgb8[1];
}

ID3D11PixelShader* PixelShaderCache::ReinterpRGB8ToRGBA6(bool multisampled)
{
	if (!multisampled || D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count == 1)
	{
		if (!s_rgb8_to_rgba6[0])
		{
			s_rgb8_to_rgba6[0] = D3D::CompileAndCreatePixelShader(reint_rgb8_to_rgba6);
			CHECK(s_rgb8_to_rgba6[0], "Create RGB8 to RGBA6 pixel shader");
			D3D::SetDebugObjectName(s_rgb8_to_rgba6[0], "RGB8 to RGBA6 pixel shader");
		}
		return s_rgb8_to_rgba6[0];
	}
	else if (!s_rgb8_to_rgba6[1])
	{
		// create MSAA shader for current AA mode
		std::string buf = StringFromFormat(reint_rgb8_to_rgba6_msaa, D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count);
		s_rgb8_to_rgba6[1] = D3D::CompileAndCreatePixelShader(buf);

		CHECK(s_rgb8_to_rgba6[1], "Create RGB8 to RGBA6 MSAA pixel shader");
		D3D::SetDebugObjectName(s_rgb8_to_rgba6[1], "RGB8 to RGBA6 MSAA pixel shader");
	}
	return s_rgb8_to_rgba6[1];
}

ID3D11PixelShader* PixelShaderCache::GetColorCopyProgram(bool multisampled)
{
	if (!multisampled || D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count == 1)
	{
		return s_ColorCopyProgram[0];
	}
	else if (s_ColorCopyProgram[1])
	{
		return s_ColorCopyProgram[1];
	}
	else
	{
		// create MSAA shader for current AA mode
		std::string buf = StringFromFormat(color_copy_program_code_msaa, D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count);
		s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(buf);
		CHECK(s_ColorCopyProgram[1]!=nullptr, "Create color copy MSAA pixel shader");
		D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorCopyProgram[1], "color copy MSAA pixel shader");
		return s_ColorCopyProgram[1];
	}
}

ID3D11PixelShader* PixelShaderCache::GetColorMatrixProgram(bool multisampled)
{
	if (!multisampled || D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count == 1)
	{
		return s_ColorMatrixProgram[0];
	}
	else if (s_ColorMatrixProgram[1])
	{
		return s_ColorMatrixProgram[1];
	}
	else
	{
		// create MSAA shader for current AA mode
		std::string buf = StringFromFormat(color_matrix_program_code_msaa, D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count);
		s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(buf);
		CHECK(s_ColorMatrixProgram[1]!=nullptr, "Create color matrix MSAA pixel shader");
		D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorMatrixProgram[1], "color matrix MSAA pixel shader");
		return s_ColorMatrixProgram[1];
	}
}

ID3D11PixelShader* PixelShaderCache::GetDepthMatrixProgram(bool multisampled)
{
	if (!multisampled || D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count == 1)
	{
		return s_DepthMatrixProgram[0];
	}
	else if (s_DepthMatrixProgram[1])
	{
		return s_DepthMatrixProgram[1];
	}
	else
	{
		// create MSAA shader for current AA mode
		std::string buf = StringFromFormat(depth_matrix_program_msaa, D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count);
		s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(buf);
		CHECK(s_DepthMatrixProgram[1]!=nullptr, "Create depth matrix MSAA pixel shader");
		D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthMatrixProgram[1], "depth matrix MSAA pixel shader");
		return s_DepthMatrixProgram[1];
	}
}

ID3D11PixelShader* PixelShaderCache::GetClearProgram()
{
	return s_ClearProgram;
}

ID3D11Buffer* &PixelShaderCache::GetConstantBuffer()
{
	// TODO: divide the global variables of the generated shaders into about 5 constant buffers to speed this up
	if (PixelShaderManager::dirty)
	{
		D3D11_MAPPED_SUBRESOURCE map;
		D3D::context->Map(pscbuf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
		memcpy(map.pData, &PixelShaderManager::constants, sizeof(PixelShaderConstants));
		D3D::context->Unmap(pscbuf, 0);
		PixelShaderManager::dirty = false;

		ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants));
	}
	return pscbuf;
}

// this class will load the precompiled shaders into our cache
class PixelShaderCacheInserter : public LinearDiskCacheReader<PixelShaderUid, u8>
{
public:
	void Read(const PixelShaderUid &key, const u8 *value, u32 value_size)
	{
		PixelShaderCache::InsertByteCode(key, value, value_size);
	}
};

void PixelShaderCache::Init()
{
	unsigned int cbsize = ((sizeof(PixelShaderConstants))&(~0xf))+0x10; // must be a multiple of 16
	D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(cbsize, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE);
	D3D::device->CreateBuffer(&cbdesc, nullptr, &pscbuf);
	CHECK(pscbuf!=nullptr, "Create pixel shader constant buffer");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)pscbuf, "pixel shader constant buffer used to emulate the GX pipeline");

	// used when drawing clear quads
	s_ClearProgram = D3D::CompileAndCreatePixelShader(clear_program_code);
	CHECK(s_ClearProgram!=nullptr, "Create clear pixel shader");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ClearProgram, "clear pixel shader");

	// used when copying/resolving the color buffer
	s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(color_copy_program_code);
	CHECK(s_ColorCopyProgram[0]!=nullptr, "Create color copy pixel shader");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorCopyProgram[0], "color copy pixel shader");

	// used for color conversion
	s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(color_matrix_program_code);
	CHECK(s_ColorMatrixProgram[0]!=nullptr, "Create color matrix pixel shader");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorMatrixProgram[0], "color matrix pixel shader");

	// used for depth copy
	s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(depth_matrix_program);
	CHECK(s_DepthMatrixProgram[0]!=nullptr, "Create depth matrix pixel shader");
	D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthMatrixProgram[0], "depth matrix pixel shader");

	Clear();

	if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
		File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

	SETSTAT(stats.numPixelShadersCreated, 0);
	SETSTAT(stats.numPixelShadersAlive, 0);

	std::string cache_filename = StringFromFormat("%sdx11-%s-ps.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
			SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
	PixelShaderCacheInserter inserter;
	g_ps_disk_cache.OpenAndRead(cache_filename, inserter);

	if (g_Config.bEnableShaderDebugging)
		Clear();

	last_entry = nullptr;
}

// ONLY to be used during shutdown.
void PixelShaderCache::Clear()
{
	for (auto& iter : PixelShaders)
		iter.second.Destroy();
	PixelShaders.clear();
	pixel_uid_checker.Invalidate();

	last_entry = nullptr;
}

// Used in Swap() when AA mode has changed
void PixelShaderCache::InvalidateMSAAShaders()
{
	SAFE_RELEASE(s_ColorCopyProgram[1]);
	SAFE_RELEASE(s_ColorMatrixProgram[1]);
	SAFE_RELEASE(s_DepthMatrixProgram[1]);
	SAFE_RELEASE(s_rgb8_to_rgba6[1]);
	SAFE_RELEASE(s_rgba6_to_rgb8[1]);
}

void PixelShaderCache::Shutdown()
{
	SAFE_RELEASE(pscbuf);

	SAFE_RELEASE(s_ClearProgram);
	for (int i = 0; i < 2; ++i)
	{
		SAFE_RELEASE(s_ColorCopyProgram[i]);
		SAFE_RELEASE(s_ColorMatrixProgram[i]);
		SAFE_RELEASE(s_DepthMatrixProgram[i]);
		SAFE_RELEASE(s_rgba6_to_rgb8[i]);
		SAFE_RELEASE(s_rgb8_to_rgba6[i]);
	}

	Clear();
	g_ps_disk_cache.Sync();
	g_ps_disk_cache.Close();
}

bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{
	PixelShaderUid uid;
	GetPixelShaderUid(uid, dstAlphaMode, API_D3D, components);
	if (g_ActiveConfig.bEnableShaderDebugging)
	{
		PixelShaderCode code;
		GeneratePixelShaderCode(code, dstAlphaMode, API_D3D, components);
		pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p");
	}

	// Check if the shader is already set
	if (last_entry)
	{
		if (uid == last_uid)
		{
			GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
			return (last_entry->shader != nullptr);
		}
	}

	last_uid = uid;

	// Check if the shader is already in the cache
	PSCache::iterator iter;
	iter = PixelShaders.find(uid);
	if (iter != PixelShaders.end())
	{
		const PSCacheEntry &entry = iter->second;
		last_entry = &entry;

		GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
		return (entry.shader != nullptr);
	}

	// Need to compile a new shader
	PixelShaderCode code;
	GeneratePixelShaderCode(code, dstAlphaMode, API_D3D, components);

	D3DBlob* pbytecode;
	if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode))
	{
		GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
		return false;
	}

	// Insert the bytecode into the caches
	g_ps_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());

	bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
	pbytecode->Release();

	if (g_ActiveConfig.bEnableShaderDebugging && success)
	{
		PixelShaders[uid].code = code.GetBuffer();
	}

	GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
	return success;
}

bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen)
{
	ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
	if (shader == nullptr)
		return false;

	// TODO: Somehow make the debug name a bit more specific
	D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a pixel shader of PixelShaderCache");

	// Make an entry in the table
	PSCacheEntry newentry;
	newentry.shader = shader;
	PixelShaders[uid] = newentry;
	last_entry = &PixelShaders[uid];

	if (!shader)
	{
		// INCSTAT(stats.numPixelShadersFailed);
		return false;
	}

	INCSTAT(stats.numPixelShadersCreated);
	SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
	return true;
}

}  // DX11