bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode) { PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode, API_OPENGL); // Check if the shader is already set if (last_entry) { if (uid == last_uid) { return true; } } last_uid = uid; // Check if the shader is already in the cache PSCache::iterator iter; iter = PixelShaders.find(uid); if (iter != PixelShaders.end()) { const std::string &entry = iter->second; last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); return true; } // Need to compile a new shader ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); PixelShaders.insert(std::make_pair(uid, code.GetBuffer())); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); return true; }
void VertexShaderCache::CompileVShader(const VertexShaderUid& uid, bool ongputhread) { s_vshaders_lock.lock(); VSCacheEntry* entry = &s_vshaders->GetOrAdd(uid); s_vshaders_lock.unlock(); if (ongputhread) { s_last_entry = entry; } // Compile only when we have a new instance if (entry->initialized.test_and_set()) { return; } ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(VERTEXSHADERGEN_BUFFERSIZE); wunit->GenerateCodeHandler = [uid](ShaderCompilerWorkUnit* wunit) { ShaderCode code; code.SetBuffer(wunit->code.data()); GenerateVertexShaderCodeD3D11(code, uid.GetUidData()); wunit->codesize = (u32)code.BufferSize(); }; wunit->entrypoint = "main"; #if defined(_DEBUG) || defined(DEBUGFAST) wunit->flags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; #else wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3 | D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY; #endif wunit->target = D3D::VertexShaderVersionString(); wunit->ResultHandler = [uid, entry](ShaderCompilerWorkUnit* wunit) { if (SUCCEEDED(wunit->cresult)) { g_vs_disk_cache.Append(uid, (const u8*)wunit->shaderbytecode->GetBufferPointer(), (u32)wunit->shaderbytecode->GetBufferSize()); PushByteCode(D3DBlob(D3D::UniquePtr<ID3D10Blob>(wunit->shaderbytecode)), entry); wunit->shaderbytecode = nullptr; } else { static int num_failures = 0; std::string filename = StringFromFormat("%sbad_vs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); std::ofstream file; OpenFStream(file, filename, std::ios_base::out); file << ((const char*)wunit->code.data()); file.close(); PanicAlert("Failed to compile vertex shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s", filename, D3D::VertexShaderVersionString(), (char*)wunit->error->GetBufferPointer()); } }; s_compiler->CompileShaderAsync(wunit); }
bool VertexShaderCache::SetShader(u32 components) { VertexShaderUid uid; GetVertexShaderUid(uid, components, API_D3D); if (g_ActiveConfig.bEnableShaderDebugging) { ShaderCode code; GenerateVertexShaderCode(code, components, API_D3D); vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v"); } if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); return (last_entry->shader != nullptr); } } last_uid = uid; VSCache::iterator iter = vshaders.find(uid); if (iter != vshaders.end()) { const VSCacheEntry &entry = iter->second; last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); return (entry.shader != nullptr); } ShaderCode code; GenerateVertexShaderCode(code, components, API_D3D); D3DBlob* pbytecode = nullptr; D3D::CompileVertexShader(code.GetBuffer(), &pbytecode); if (pbytecode == nullptr) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; } g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size()); bool success = InsertByteCode(uid, pbytecode); pbytecode->Release(); if (g_ActiveConfig.bEnableShaderDebugging && success) { vshaders[uid].code = code.GetBuffer(); } GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); return success; }
void ShaderCache::HandlePSUIDChange( const PixelShaderUid &ps_uid, bool on_gpu_thread) { s_shaders_lock.lock(); ByteCodeCacheEntry* entry = &ps_bytecode_cache->GetOrAdd(ps_uid); s_shaders_lock.unlock(); if (on_gpu_thread) { s_last_pixel_shader_bytecode = entry; } if (entry->m_initialized.test_and_set()) { return; } // Need to compile a new shader ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(PIXELSHADERGEN_BUFFERSIZE); wunit->GenerateCodeHandler = [ps_uid](ShaderCompilerWorkUnit* wunit) { ShaderCode code; code.SetBuffer(wunit->code.data()); GeneratePixelShaderCodeD3D11(code, ps_uid.GetUidData()); wunit->codesize = (u32)code.BufferSize(); }; wunit->entrypoint = "main"; wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3; wunit->target = D3D::PixelShaderVersionString(); wunit->ResultHandler = [ps_uid, entry](ShaderCompilerWorkUnit* wunit) { if (SUCCEEDED(wunit->cresult)) { D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode); s_ps_disk_cache.Append(ps_uid, shaderBuffer->Data(), shaderBuffer->Size()); PushByteCode(entry, shaderBuffer); wunit->shaderbytecode->Release(); wunit->shaderbytecode = nullptr; } else { static int num_failures = 0; std::string filename = StringFromFormat("%sbad_ps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); std::ofstream file; OpenFStream(file, filename, std::ios_base::out); file << ((const char *)wunit->code.data()); file << ((const char *)wunit->error->GetBufferPointer()); file.close(); PanicAlert("Failed to compile pixel shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s", filename, D3D::PixelShaderVersionString(), (char*)wunit->error->GetBufferPointer()); } }; s_compiler->CompileShaderAsync(wunit); }
bool VertexShaderCache::UberVertexShaderCompilerWorkItem::Compile() { ShaderCode code = UberShader::GenVertexShader(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData()); if (D3D::CompileVertexShader(code.GetBuffer(), &m_bytecode)) m_vs = D3D::CreateVertexShaderFromByteCode(m_bytecode); return true; }
static void EndPrimitive(ShaderCode& out, const geometry_shader_uid_data* uid_data, APIType ApiType) { if (uid_data->wireframe) EmitVertex(out, uid_data, "first", ApiType); if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) out.Write("\tEndPrimitive();\n"); else out.Write("\toutput.RestartStrip();\n"); }
bool GeometryShaderCache::SetShader(u32 primitive_type) { GeometryShaderUid uid = GetGeometryShaderUid(primitive_type); // Check if the shader is already set if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); return true; } } last_uid = uid; // Check if the shader is a pass-through shader if (uid.GetUidData()->IsPassthrough()) { // Return the default pass-through shader last_entry = &pass_entry; return true; } // Check if the shader is already in the cache GSCache::iterator iter; iter = GeometryShaders.find(uid); if (iter != GeometryShaders.end()) { const GSCacheEntry& entry = iter->second; last_entry = &entry; return (entry.shader != nullptr); } // Need to compile a new shader ShaderCode code = GenerateGeometryShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData()); D3DBlob* pbytecode; if (!D3D::CompileGeometryShader(code.GetBuffer(), &pbytecode)) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; } // Insert the bytecode into the caches g_gs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size()); bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size()); pbytecode->Release(); return success; }
static void dx11_fill_constant_table(ShaderConstantTable& out_constants, ShaderConstantTable& out_samplers, const ShaderCode& bytecode) { out_constants.clear(); out_samplers.clear(); ID3D11ShaderReflection* refl = NULL; D3DReflect( bytecode.data(), bytecode.size(), IID_ID3D11ShaderReflection, (void**)&refl); if( refl ) { HRESULT hr = S_OK; D3D11_SHADER_DESC refl_desc; hr = refl->GetDesc(&refl_desc); for( uint32 i=0; i<refl_desc.ConstantBuffers; ++i ) { ID3D11ShaderReflectionConstantBuffer* cb = refl->GetConstantBufferByIndex(i); D3D11_SHADER_BUFFER_DESC sb_desc; cb->GetDesc(&sb_desc); for( uint32 j=0; j<sb_desc.Variables; ++j ) { ID3D11ShaderReflectionVariable* var = cb->GetVariableByIndex(j); D3D11_SHADER_VARIABLE_DESC var_desc; var->GetDesc(&var_desc); ShaderConstantDescr scd; scd.name = var_desc.Name; scd.register_index = var_desc.StartOffset/16; scd.register_count = var_desc.Size/16; out_constants.push_back(scd); } } for( uint32 i=0; i<refl_desc.BoundResources; ++i ) { D3D11_SHADER_INPUT_BIND_DESC desc; refl->GetResourceBindingDesc(i, &desc); if( desc.Type == D3D10_SIT_SAMPLER ) { ShaderConstantDescr scd; scd.name = desc.Name; scd.register_index = desc.BindPoint; scd.register_count = desc.BindCount; } } refl->Release(); } }
static void EmitVertex(ShaderCode& out, const geometry_shader_uid_data* uid_data, const char* vertex, APIType ApiType, bool first_vertex) { if (uid_data->wireframe && first_vertex) out.Write("\tif (i == 0) first = %s;\n", vertex); if (ApiType == APIType::OpenGL) { out.Write("\tgl_Position = %s.pos;\n", vertex); if (g_ActiveConfig.backend_info.bSupportsDepthClamp) { out.Write("\tgl_ClipDistance[0] = %s.clipDist0;\n", vertex); out.Write("\tgl_ClipDistance[1] = %s.clipDist1;\n", vertex); } AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, uid_data->pixel_lighting); } else if (ApiType == APIType::Vulkan) { // Vulkan NDC space has Y pointing down (right-handed NDC space). out.Write("\tgl_Position = %s.pos;\n", vertex); out.Write("\tgl_Position.y = -gl_Position.y;\n"); AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, uid_data->pixel_lighting); } else { out.Write("\tps.o = %s;\n", vertex); } if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) out.Write("\tEmitVertex();\n"); else out.Write("\toutput.Append(ps);\n"); }
ShaderObject::ShaderObject( std::string VertexShaderFileName, std::string PixelShaderFileName ) { Handle = glCreateProgramObjectARB(); Compiled = false; ShaderCode VertexShader; ShaderCode PixelShader; VertexShader.CreateFromFile( VertexShaderFileName, GL_VERTEX_SHADER_ARB ); PixelShader.CreateFromFile( PixelShaderFileName, GL_FRAGMENT_SHADER_ARB ); AttachShader( VertexShader ); AttachShader( PixelShader ); LinkProgram(); }
bool PixelShaderCache::SetShader() { PixelShaderUid uid = GetPixelShaderUid(); // Check if the shader is already set if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); return (last_entry->shader != nullptr); } } last_uid = uid; // Check if the shader is already in the cache PSCache::iterator iter; iter = PixelShaders.find(uid); if (iter != PixelShaders.end()) { const PSCacheEntry& entry = iter->second; last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); return (entry.shader != nullptr); } // Need to compile a new shader ShaderCode code = GeneratePixelShaderCode(APIType::D3D, uid.GetUidData()); D3DBlob* pbytecode; if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode)) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; } // Insert the bytecode into the caches g_ps_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size()); bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size()); pbytecode->Release(); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); return success; }
bool VertexShaderCache::SetUberShader(D3DVertexFormat* vertex_format) { D3DVertexFormat* uber_vertex_format = static_cast<D3DVertexFormat*>( VertexLoaderManager::GetUberVertexFormat(vertex_format->GetVertexDeclaration())); UberShader::VertexShaderUid uid = UberShader::GetVertexShaderUid(); if (last_uber_entry && last_uber_uid == uid) { if (!last_uber_entry->shader) return false; uber_vertex_format->SetInputLayout(last_uber_entry->bytecode); D3D::stateman->SetVertexShader(last_uber_entry->shader); return true; } auto iter = ubervshaders.find(uid); if (iter != ubervshaders.end()) { const VSCacheEntry& entry = iter->second; last_uber_uid = uid; last_uber_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); if (!last_uber_entry->shader) return false; uber_vertex_format->SetInputLayout(last_uber_entry->bytecode); D3D::stateman->SetVertexShader(last_uber_entry->shader); return true; } // Need to compile a new shader D3DBlob* bytecode = nullptr; ShaderCode code = UberShader::GenVertexShader(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData()); D3D::CompileVertexShader(code.GetBuffer(), &bytecode); if (!InsertByteCode(uid, bytecode)) { SAFE_RELEASE(bytecode); return false; } g_uber_vs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size()); bytecode->Release(); return SetUberShader(vertex_format); }
bool VertexShaderCache::SetShader() { VertexShaderUid uid = GetVertexShaderUid(); if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); return (last_entry->shader != nullptr); } } last_uid = uid; VSCache::iterator iter = vshaders.find(uid); if (iter != vshaders.end()) { const VSCacheEntry& entry = iter->second; last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); return (entry.shader != nullptr); } ShaderCode code = GenerateVertexShaderCode(API_D3D, uid.GetUidData()); D3DBlob* pbytecode = nullptr; D3D::CompileVertexShader(code.GetBuffer(), &pbytecode); if (pbytecode == nullptr) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; } g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size()); bool success = InsertByteCode(uid, pbytecode); pbytecode->Release(); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); return success; }
static bool dx11_compile_shader( ShaderCode& out_shader, const ShaderCode& source, const char* entry, const char* profile_str, FileSystem* /*fs*/, const char* /*include_dir*/ ) { LPD3DBLOB code_blob = NULL; LPD3DBLOB error_blob = NULL; uint32 flags = D3D10_SHADER_DEBUG | D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY; HRESULT hr = D3DCompile(source.data(), source.size(), "", NULL, NULL, entry, profile_str, flags, 0, &code_blob, &error_blob); out_shader.clear(); if( hr == S_OK ) { size_t size = code_blob->GetBufferSize(); out_shader.resize(size); memcpy(out_shader.data(), code_blob->GetBufferPointer(), size); } if( error_blob != NULL ) { char* error_str = (char*)error_blob->GetBufferPointer(); Log::error("Failed to compile shader\n%s", error_str); } SafeRelease(code_blob); SafeRelease(error_blob); if( hr == S_OK ) { return true; } else { return false; } }
void PixelShaderCache::PrepareShader( PIXEL_SHADER_RENDER_MODE render_mode, u32 components, const XFMemory &xfr, const BPMemory &bpm, bool ongputhread) { const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30; PixelShaderUid uid; GetPixelShaderUID(uid, render_mode, components, xfr, bpm); if (ongputhread) { Compiler->ProcCompilationResults(); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.bEnableShaderDebugging) { ShaderCode code; GeneratePixelShaderCodeD3D9(code, uid.GetUidData()); } #endif // Check if the shader is already set if (last_entry[render_mode]) { if (uid == last_uid[render_mode]) { return; } } last_uid[render_mode] = uid; GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); } else { if (external_last_uid[render_mode] == uid) { return; } external_last_uid[render_mode] = uid; } PixelShadersLock.lock(); PSCacheEntry* entry = &PixelShaders[uid]; PixelShadersLock.unlock(); if (ongputhread) { last_entry[render_mode] = entry; } // Compile only when we have a new instance if (entry->initialized.test_and_set()) { return; } // Need to compile a new shader ShaderCompilerWorkUnit *wunit = Compiler->NewUnit(PIXELSHADERGEN_BUFFERSIZE); wunit->GenerateCodeHandler = [uid, api](ShaderCompilerWorkUnit* wunit) { ShaderCode code; code.SetBuffer(wunit->code.data()); if (api == API_D3D9_SM20) { GeneratePixelShaderCodeD3D9SM2(code, uid.GetUidData()); } else { GeneratePixelShaderCodeD3D9(code, uid.GetUidData()); } wunit->codesize = (u32)code.BufferSize(); }; wunit->entrypoint = "main"; wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3; wunit->target = D3D::PixelShaderVersionString(); wunit->ResultHandler = [uid, entry](ShaderCompilerWorkUnit* wunit) { if (SUCCEEDED(wunit->cresult)) { ID3DBlob* shaderBuffer = wunit->shaderbytecode; const u8* bytecode = (const u8*)shaderBuffer->GetBufferPointer(); u32 bytecodelen = (u32)shaderBuffer->GetBufferSize(); g_ps_disk_cache.Append(uid, bytecode, bytecodelen); PushByteCode(uid, bytecode, bytecodelen, entry); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.bEnableShaderDebugging) { u32 code_hash = HashAdler32((const u8 *)wunit->code.data(), wunit->codesize); unique_shaders.insert(code_hash); entry->code = wunit->code.data(); } if (g_ActiveConfig.iLog & CONF_SAVESHADERS) { static int counter = 0; char szTemp[MAX_PATH]; sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); SaveData(szTemp, wunit->code.data()); } #endif } else { static int num_failures = 0; std::string filename = StringFromFormat("%sbad_ps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); std::ofstream file; OpenFStream(file, filename, std::ios_base::out); file << ((const char *)wunit->code.data()); file << ((const char*)wunit->error->GetBufferPointer()); file.close(); PanicAlert("Failed to compile pixel shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s", filename, D3D::VertexShaderVersionString(), (const char*)wunit->error->GetBufferPointer()); } }; Compiler->CompileShaderAsync(wunit); }
bool LineGeometryShader::SetShader(u32 components, float lineWidth, float texOffset, float vpWidth, float vpHeight, const bool* texOffsetEnable) { if (!m_ready) return false; // Make sure geometry shader for "components" is available ComboMap::iterator shaderIt = m_shaders.find(components); if (shaderIt == m_shaders.end()) { // Generate new shader. Warning: not thread-safe. static char buffer[16384]; ShaderCode code; code.SetBuffer(buffer); GenerateVSOutputStructForGS(code, API_D3D); code.Write("\n%s", LINE_GS_COMMON); std::stringstream numTexCoordsStream; numTexCoordsStream << xfmem.numTexGen.numTexGens; INFO_LOG(VIDEO, "Compiling line geometry shader for components 0x%.08X (num texcoords %d)", components, xfmem.numTexGen.numTexGens); const std::string& numTexCoordsStr = numTexCoordsStream.str(); D3D_SHADER_MACRO macros[] = { { "NUM_TEXCOORDS", numTexCoordsStr.c_str() }, { nullptr, nullptr } }; ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code.GetBuffer(), macros); if (!newShader) { WARN_LOG(VIDEO, "Line geometry shader for components 0x%.08X failed to compile", components); // Add dummy shader to prevent trying to compile again m_shaders[components] = nullptr; return false; } shaderIt = m_shaders.insert(std::make_pair(components, newShader)).first; } if (shaderIt != m_shaders.end()) { if (shaderIt->second) { D3D11_MAPPED_SUBRESOURCE map; HRESULT hr = D3D::context->Map(m_paramsBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); if (SUCCEEDED(hr)) { LineGSParams* params = (LineGSParams*)map.pData; params->LineWidth = lineWidth; params->TexOffset = texOffset; params->VpWidth = vpWidth; params->VpHeight = vpHeight; for (int i = 0; i < 8; ++i) params->TexOffsetEnable[i] = texOffsetEnable[i] ? 1.f : 0.f; D3D::context->Unmap(m_paramsBuffer, 0); } else ERROR_LOG(VIDEO, "Failed to map line gs params buffer"); DEBUG_LOG(VIDEO, "Line params: width %f, texOffset %f, vpWidth %f, vpHeight %f", lineWidth, texOffset, vpWidth, vpHeight); D3D::context->GSSetShader(shaderIt->second, nullptr, 0); D3D::context->GSSetConstantBuffers(0, 1, &m_paramsBuffer); return true; } else return false; } else return false; }
void GeometryShaderCache::CompileGShader(const GeometryShaderUid& uid, bool ongputhread) { s_geometry_shaders_lock.lock(); GSCacheEntry* entry = &s_geometry_shaders->GetOrAdd(uid); s_geometry_shaders_lock.unlock(); if (ongputhread) { s_last_entry = entry; } // Compile only when we have a new instance if (entry->initialized.test_and_set()) { return; } // Need to compile a new shader ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(GEOMETRYSHADERGEN_BUFFERSIZE); wunit->GenerateCodeHandler = [uid](ShaderCompilerWorkUnit* wunit) { ShaderCode code; code.SetBuffer(wunit->code.data()); GenerateGeometryShaderCode(code, uid.GetUidData(), API_D3D11); wunit->codesize = (u32)code.BufferSize(); }; wunit->entrypoint = "main"; #if defined(_DEBUG) || defined(DEBUGFAST) wunit->flags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; #else wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3; #endif wunit->target = D3D::GeometryShaderVersionString(); wunit->ResultHandler = [uid, entry](ShaderCompilerWorkUnit* wunit) { if (SUCCEEDED(wunit->cresult)) { ID3DBlob* shaderBuffer = wunit->shaderbytecode; const u8* bytecode = (const u8*)shaderBuffer->GetBufferPointer(); u32 bytecodelen = (u32)shaderBuffer->GetBufferSize(); g_gs_disk_cache.Append(uid, bytecode, bytecodelen); PushByteCode(bytecode, bytecodelen, entry); } else { static int num_failures = 0; char szTemp[MAX_PATH]; sprintf(szTemp, "%sbad_gs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); std::ofstream file; OpenFStream(file, szTemp, std::ios_base::out); file << ((const char *)wunit->code.data()); file << ((const char *)wunit->error->GetBufferPointer()); file.close(); PanicAlert("Failed to compile geometry shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s", szTemp, D3D::GeometryShaderVersionString(), (char*)wunit->error->GetBufferPointer()); } }; s_compiler->CompileShaderAsync(wunit); }
static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_data, int index, int litchan_index, bool alpha) { const char* swizzle = alpha ? "a" : "rgb"; const char* swizzle_components = (alpha) ? "" : "3"; int attnfunc = (uid_data.attnfunc >> (2 * litchan_index)) & 0x3; int diffusefunc = (uid_data.diffusefunc >> (2 * litchan_index)) & 0x3; switch (attnfunc) { case LIGHTATTN_NONE: case LIGHTATTN_DIR: object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index)); object.Write("attn = 1.0;\n"); object.Write("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n"); break; case LIGHTATTN_SPEC: object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index)); object.Write("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR ".xyz)) : 0.0;\n", LIGHT_DIR_PARAMS(index)); object.Write("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index)); object.Write("distAttn = %s(" LIGHT_DISTATT ".xyz);\n", (diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", LIGHT_DISTATT_PARAMS(index)); object.Write("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, " "float3(1.0, attn, attn*attn));\n"); break; case LIGHTATTN_SPOT: object.Write("ldir = " LIGHT_POS ".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(index)); object.Write("dist2 = dot(ldir, ldir);\n" "dist = sqrt(dist2);\n" "ldir = ldir / dist;\n" "attn = max(0.0, dot(ldir, " LIGHT_DIR ".xyz));\n", LIGHT_DIR_PARAMS(index)); // attn*attn may overflow object.Write("attn = max(0.0, " LIGHT_COSATT ".x + " LIGHT_COSATT ".y*attn + " LIGHT_COSATT ".z*attn*attn) / dot(" LIGHT_DISTATT ".xyz, float3(1.0,dist,dist2));\n", LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_DISTATT_PARAMS(index)); break; } switch (diffusefunc) { case LIGHTDIF_NONE: object.Write("lacc.%s += int%s(round(attn * float%s(" LIGHT_COL ")));\n", swizzle, swizzle_components, swizzle_components, LIGHT_COL_PARAMS(index, swizzle)); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: object.Write("lacc.%s += int%s(round(attn * %sdot(ldir, _norm0)) * float%s(" LIGHT_COL ")));\n", swizzle, swizzle_components, diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(", swizzle_components, LIGHT_COL_PARAMS(index, swizzle)); break; default: _assert_(0); } object.Write("\n"); }
void ShaderCache::HandleTSUIDChange( const TessellationShaderUid& ts_uid, bool on_gpu_thread) { s_shaders_lock.lock(); std::pair<ByteCodeCacheEntry, ByteCodeCacheEntry>& entry = ts_bytecode_cache->GetOrAdd(ts_uid); s_shaders_lock.unlock(); ByteCodeCacheEntry* dentry = &entry.first; ByteCodeCacheEntry* hentry = &entry.second; if (on_gpu_thread) { if (dentry->m_compiled && hentry->m_compiled) { s_last_domain_shader_bytecode = dentry; s_last_hull_shader_bytecode = hentry; } else { s_last_tessellation_shader_uid = {}; s_last_domain_shader_bytecode = &s_pass_entry; s_last_hull_shader_bytecode = &s_pass_entry; } } if (dentry->m_initialized.test_and_set()) { return; } hentry->m_initialized.test_and_set(); // Need to compile a new shader ShaderCode code; ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(TESSELLATIONSHADERGEN_BUFFERSIZE); ShaderCompilerWorkUnit *wunitd = s_compiler->NewUnit(TESSELLATIONSHADERGEN_BUFFERSIZE); code.SetBuffer(wunit->code.data()); GenerateTessellationShaderCode(code, API_D3D11, ts_uid.GetUidData()); memcpy(wunitd->code.data(), wunit->code.data(), code.BufferSize()); wunit->codesize = (u32)code.BufferSize(); wunit->entrypoint = "HS_TFO"; wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_SKIP_OPTIMIZATION; wunit->target = D3D::HullShaderVersionString(); wunitd->codesize = (u32)code.BufferSize(); wunitd->entrypoint = "DS_TFO"; wunitd->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3; wunitd->target = D3D::DomainShaderVersionString(); wunitd->ResultHandler = [ts_uid, dentry](ShaderCompilerWorkUnit* wunit) { if (SUCCEEDED(wunit->cresult)) { D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode); s_ds_disk_cache.Append(ts_uid, shaderBuffer->Data(), shaderBuffer->Size()); PushByteCode(dentry, shaderBuffer); wunit->shaderbytecode->Release(); wunit->shaderbytecode = nullptr; } else { static int num_failures = 0; std::string filename = StringFromFormat("%sbad_ds_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); std::ofstream file; OpenFStream(file, filename, std::ios_base::out); file << ((const char *)wunit->code.data()); file << ((const char *)wunit->error->GetBufferPointer()); file.close(); PanicAlert("Failed to compile domain shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s", filename, D3D::DomainShaderVersionString(), (char*)wunit->error->GetBufferPointer()); } }; wunit->ResultHandler = [ts_uid, hentry](ShaderCompilerWorkUnit* wunit) { if (SUCCEEDED(wunit->cresult)) { D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode); s_hs_disk_cache.Append(ts_uid, shaderBuffer->Data(), shaderBuffer->Size()); PushByteCode(hentry, shaderBuffer); wunit->shaderbytecode->Release(); wunit->shaderbytecode = nullptr; } else { static int num_failures = 0; std::string filename = StringFromFormat("%sbad_hs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); std::ofstream file; OpenFStream(file, filename, std::ios_base::out); file << ((const char *)wunit->code.data()); file << ((const char *)wunit->error->GetBufferPointer()); file.close(); PanicAlert("Failed to compile hull shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s", filename, D3D::HullShaderVersionString(), (char*)wunit->error->GetBufferPointer()); } }; s_compiler->CompileShaderAsync(wunit); s_compiler->CompileShaderAsync(wunitd); }
void PixelShaderCache::CompilePShader(const PixelShaderUid& uid, PIXEL_SHADER_RENDER_MODE render_mode, bool ongputhread) { const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30; s_pixel_shaders_lock.lock(); PSCacheEntry* entry = &s_pshaders->GetOrAdd(uid); s_pixel_shaders_lock.unlock(); if (ongputhread) { s_last_entry[render_mode] = entry; } // Compile only when we have a new instance if (entry->initialized.test_and_set()) { return; } // Need to compile a new shader ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(PIXELSHADERGEN_BUFFERSIZE); wunit->GenerateCodeHandler = [uid, api](ShaderCompilerWorkUnit* wunit) { ShaderCode code; code.SetBuffer(wunit->code.data()); if (api == API_D3D9_SM20) { GeneratePixelShaderCodeD3D9SM2(code, uid.GetUidData()); } else { GeneratePixelShaderCodeD3D9(code, uid.GetUidData()); } wunit->codesize = (u32)code.BufferSize(); }; wunit->entrypoint = "main"; wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3; wunit->target = D3D::PixelShaderVersionString(); wunit->ResultHandler = [uid, entry](ShaderCompilerWorkUnit* wunit) { if (SUCCEEDED(wunit->cresult)) { ID3DBlob* shaderBuffer = wunit->shaderbytecode; const u8* bytecode = (const u8*)shaderBuffer->GetBufferPointer(); u32 bytecodelen = (u32)shaderBuffer->GetBufferSize(); g_ps_disk_cache.Append(uid, bytecode, bytecodelen); PushByteCode(uid, bytecode, bytecodelen, entry); } else { static int num_failures = 0; std::string filename = StringFromFormat("%sbad_ps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); std::ofstream file; OpenFStream(file, filename, std::ios_base::out); file << ((const char *)wunit->code.data()); file << ((const char*)wunit->error->GetBufferPointer()); file.close(); PanicAlert("Failed to compile pixel shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s", filename, D3D::VertexShaderVersionString(), (const char*)wunit->error->GetBufferPointer()); } }; s_compiler->CompileShaderAsync(wunit); }
// vertex shader // lights/colors // materials name is I_MATERIALS in vs and I_PMATERIALS in ps // inColorName is color in vs and colors_ in ps // dest is o.colors_ in vs and colors_ in ps void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_data, int components, const char* inColorName, const char* dest) { for (unsigned int j = 0; j < xfmem.numChan.numColorChans; j++) { object.Write("{\n"); bool colormatsource = !!(uid_data.matsource & (1 << j)); if (colormatsource) // from vertex { if (components & (VB_HAS_COL0 << j)) object.Write("int4 mat = int4(round(%s%d * 255.0));\n", inColorName, j); else if (components & VB_HAS_COL0) object.Write("int4 mat = int4(round(%s0 * 255.0));\n", inColorName); else object.Write("int4 mat = int4(255, 255, 255, 255);\n"); } else // from color { object.Write("int4 mat = %s[%d];\n", I_MATERIALS, j + 2); } if (uid_data.enablelighting & (1 << j)) { if (uid_data.ambsource & (1 << j)) // from vertex { if (components & (VB_HAS_COL0 << j)) object.Write("lacc = int4(round(%s%d * 255.0));\n", inColorName, j); else if (components & VB_HAS_COL0) object.Write("lacc = int4(round(%s0 * 255.0));\n", inColorName); else // TODO: this isn't verified. Here we want to read the ambient from the vertex, // but the vertex itself has no color. So we don't know which value to read. // Returning 1.0 is the same as disabled lightning, so this could be fine object.Write("lacc = int4(255, 255, 255, 255);\n"); } else // from color { object.Write("lacc = %s[%d];\n", I_MATERIALS, j); } } else { object.Write("lacc = int4(255, 255, 255, 255);\n"); } // check if alpha is different bool alphamatsource = !!(uid_data.matsource & (1 << (j + 2))); if (alphamatsource != colormatsource) { if (alphamatsource) // from vertex { if (components & (VB_HAS_COL0 << j)) object.Write("mat.w = int(round(%s%d.w * 255.0));\n", inColorName, j); else if (components & VB_HAS_COL0) object.Write("mat.w = int(round(%s0.w * 255.0));\n", inColorName); else object.Write("mat.w = 255;\n"); } else // from color { object.Write("mat.w = %s[%d].w;\n", I_MATERIALS, j + 2); } } if (uid_data.enablelighting & (1 << (j + 2))) { if (uid_data.ambsource & (1 << (j + 2))) // from vertex { if (components & (VB_HAS_COL0 << j)) object.Write("lacc.w = int(round(%s%d.w * 255.0));\n", inColorName, j); else if (components & VB_HAS_COL0) object.Write("lacc.w = int(round(%s0.w * 255.0));\n", inColorName); else // TODO: The same for alpha: We want to read from vertex, but the vertex has no color object.Write("lacc.w = 255;\n"); } else // from color { object.Write("lacc.w = %s[%d].w;\n", I_MATERIALS, j); } } else { object.Write("lacc.w = 255;\n"); } if (uid_data.enablelighting & (1 << j)) // Color lights { for (int i = 0; i < 8; ++i) if (uid_data.light_mask & (1 << (i + 8 * j))) GenerateLightShader(object, uid_data, i, j, false); } if (uid_data.enablelighting & (1 << (j + 2))) // Alpha lights { for (int i = 0; i < 8; ++i) if (uid_data.light_mask & (1 << (i + 8 * (j + 2)))) GenerateLightShader(object, uid_data, i, j + 2, true); } object.Write("lacc = clamp(lacc, 0, 255);\n"); object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j); object.Write("}\n"); } }
SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components, u32 primitive_type) { SHADERUID uid; GetShaderId(&uid, dstAlphaMode, components, primitive_type); // Check if the shader is already set if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); last_entry->shader.Bind(); return &last_entry->shader; } } last_uid = uid; // Check if shader is already in cache PCache::iterator iter = pshaders.find(uid); if (iter != pshaders.end()) { PCacheEntry *entry = &iter->second; last_entry = entry; GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); last_entry->shader.Bind(); return &last_entry->shader; } // Make an entry in the table PCacheEntry& newentry = pshaders[uid]; last_entry = &newentry; newentry.in_cache = 0; VertexShaderCode vcode; PixelShaderCode pcode; ShaderCode gcode; GenerateVertexShaderCode(vcode, components, API_OPENGL); GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components); if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !uid.guid.GetUidData()->IsPassthrough()) GenerateGeometryShaderCode(gcode, primitive_type, API_OPENGL); if (g_ActiveConfig.bEnableShaderDebugging) { newentry.shader.strvprog = vcode.GetBuffer(); newentry.shader.strpprog = pcode.GetBuffer(); newentry.shader.strgprog = gcode.GetBuffer(); } #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS) { static int counter = 0; std::string filename = StringFromFormat("%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); SaveData(filename, vcode.GetBuffer()); filename = StringFromFormat("%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); SaveData(filename, pcode.GetBuffer()); if (gcode.GetBuffer() != nullptr) { filename = StringFromFormat("%sgs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); SaveData(filename, gcode.GetBuffer()); } } #endif if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer())) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return nullptr; } INCSTAT(stats.numPixelShadersCreated); SETSTAT(stats.numPixelShadersAlive, pshaders.size()); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); last_entry->shader.Bind(); return &last_entry->shader; }
ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid_data* uid_data) { ShaderCode out; // Non-uid template parameters will write to the dummy data (=> gets optimized out) const unsigned int vertex_in = uid_data->primitive_type + 1; unsigned int vertex_out = uid_data->primitive_type == PRIMITIVE_TRIANGLES ? 3 : 4; if (uid_data->wireframe) vertex_out++; if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { // Insert layout parameters if (g_ActiveConfig.backend_info.bSupportsGSInstancing) { out.Write("layout(%s, invocations = %d) in;\n", primitives_ogl[uid_data->primitive_type], uid_data->stereo ? 2 : 1); out.Write("layout(%s_strip, max_vertices = %d) out;\n", uid_data->wireframe ? "line" : "triangle", vertex_out); } else { out.Write("layout(%s) in;\n", primitives_ogl[uid_data->primitive_type]); out.Write("layout(%s_strip, max_vertices = %d) out;\n", uid_data->wireframe ? "line" : "triangle", uid_data->stereo ? vertex_out * 2 : vertex_out); } } out.Write("%s", s_lighting_struct); // uniforms if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) out.Write("UBO_BINDING(std140, 3) uniform GSBlock {\n"); else out.Write("cbuffer GSBlock {\n"); out.Write("\tfloat4 " I_STEREOPARAMS ";\n" "\tfloat4 " I_LINEPTPARAMS ";\n" "\tint4 " I_TEXOFFSET ";\n" "};\n"); out.Write("struct VS_OUTPUT {\n"); GenerateVSOutputMembers<ShaderCode>(out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, ""); out.Write("};\n"); if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { if (g_ActiveConfig.backend_info.bSupportsGSInstancing) out.Write("#define InstanceID gl_InvocationID\n"); out.Write("VARYING_LOCATION(0) in VertexData {\n"); GenerateVSOutputMembers<ShaderCode>( out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true)); out.Write("} vs[%d];\n", vertex_in); out.Write("VARYING_LOCATION(0) out VertexData {\n"); GenerateVSOutputMembers<ShaderCode>( out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, false, true)); if (uid_data->stereo) out.Write("\tflat int layer;\n"); out.Write("} ps;\n"); out.Write("void main()\n{\n"); } else // D3D { out.Write("struct VertexData {\n"); out.Write("\tVS_OUTPUT o;\n"); if (uid_data->stereo) out.Write("\tuint layer : SV_RenderTargetArrayIndex;\n"); out.Write("};\n"); if (g_ActiveConfig.backend_info.bSupportsGSInstancing) { out.Write("[maxvertexcount(%d)]\n[instance(%d)]\n", vertex_out, uid_data->stereo ? 2 : 1); out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream<VertexData> output, in uint " "InstanceID : SV_GSInstanceID)\n{\n", primitives_d3d[uid_data->primitive_type], vertex_in, uid_data->wireframe ? "Line" : "Triangle"); } else { out.Write("[maxvertexcount(%d)]\n", uid_data->stereo ? vertex_out * 2 : vertex_out); out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream<VertexData> output)\n{\n", primitives_d3d[uid_data->primitive_type], vertex_in, uid_data->wireframe ? "Line" : "Triangle"); } out.Write("\tVertexData ps;\n"); } if (uid_data->primitive_type == PRIMITIVE_LINES) { if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { out.Write("\tVS_OUTPUT start, end;\n"); AssignVSOutputMembers(out, "start", "vs[0]", uid_data->numTexGens, uid_data->pixel_lighting); AssignVSOutputMembers(out, "end", "vs[1]", uid_data->numTexGens, uid_data->pixel_lighting); } else { out.Write("\tVS_OUTPUT start = o[0];\n"); out.Write("\tVS_OUTPUT end = o[1];\n"); } // GameCube/Wii's line drawing algorithm is a little quirky. It does not // use the correct line caps. Instead, the line caps are vertical or // horizontal depending the slope of the line. out.Write("\tfloat2 offset;\n" "\tfloat2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w);\n" // FIXME: What does real hardware do when line is at a 45-degree angle? // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map. "\tif (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {\n" // Line is more tall. Extend geometry left and right. // Lerp LineWidth/2 from [0..VpWidth] to [-1..1] "\t\toffset = float2(" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n" "\t} else {\n" // Line is more wide. Extend geometry up and down. // Lerp LineWidth/2 from [0..VpHeight] to [1..-1] "\t\toffset = float2(0, -" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" "\t}\n"); } else if (uid_data->primitive_type == PRIMITIVE_POINTS) { if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { out.Write("\tVS_OUTPUT center;\n"); AssignVSOutputMembers(out, "center", "vs[0]", uid_data->numTexGens, uid_data->pixel_lighting); } else { out.Write("\tVS_OUTPUT center = o[0];\n"); } // Offset from center to upper right vertex // Lerp PointSize/2 from [0,0..VpWidth,VpHeight] to [-1,1..1,-1] out.Write("\tfloat2 offset = float2(" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS ".x, -" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS ".y) * center.pos.w;\n"); } if (uid_data->stereo) { // If the GPU supports invocation we don't need a for loop and can simply use the // invocation identifier to determine which layer we're rendering. if (g_ActiveConfig.backend_info.bSupportsGSInstancing) out.Write("\tint eye = InstanceID;\n"); else out.Write("\tfor (int eye = 0; eye < 2; ++eye) {\n"); } if (uid_data->wireframe) out.Write("\tVS_OUTPUT first;\n"); out.Write("\tfor (int i = 0; i < %d; ++i) {\n", vertex_in); if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { out.Write("\tVS_OUTPUT f;\n"); AssignVSOutputMembers(out, "f", "vs[i]", uid_data->numTexGens, uid_data->pixel_lighting); if (g_ActiveConfig.backend_info.bSupportsDepthClamp && DriverDetails::HasBug(DriverDetails::BUG_BROKENCLIPDISTANCE)) { // On certain GPUs we have to consume the clip distance from the vertex shader // or else the other vertex shader outputs will get corrupted. out.Write("\tf.clipDist0 = gl_in[i].gl_ClipDistance[0];\n"); out.Write("\tf.clipDist1 = gl_in[i].gl_ClipDistance[1];\n"); } } else { out.Write("\tVS_OUTPUT f = o[i];\n"); } if (uid_data->stereo) { // Select the output layer out.Write("\tps.layer = eye;\n"); if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) out.Write("\tgl_Layer = eye;\n"); // For stereoscopy add a small horizontal offset in Normalized Device Coordinates proportional // to the depth of the vertex. We retrieve the depth value from the w-component of the projected // vertex which contains the negated z-component of the original vertex. // For negative parallax (out-of-screen effects) we subtract a convergence value from // the depth value. This results in objects at a distance smaller than the convergence // distance to seemingly appear in front of the screen. // This formula is based on page 13 of the "Nvidia 3D Vision Automatic, Best Practices Guide" out.Write("\tfloat hoffset = (eye == 0) ? " I_STEREOPARAMS ".x : " I_STEREOPARAMS ".y;\n"); out.Write("\tf.pos.x += hoffset * (f.pos.w - " I_STEREOPARAMS ".z);\n"); } if (uid_data->primitive_type == PRIMITIVE_LINES) { out.Write("\tVS_OUTPUT l = f;\n" "\tVS_OUTPUT r = f;\n"); out.Write("\tl.pos.xy -= offset * l.pos.w;\n" "\tr.pos.xy += offset * r.pos.w;\n"); out.Write("\tif (" I_TEXOFFSET "[2] != 0) {\n"); out.Write("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n"); for (unsigned int i = 0; i < uid_data->numTexGens; ++i) { out.Write("\tif (((" I_TEXOFFSET "[0] >> %d) & 0x1) != 0)\n", i); out.Write("\t\tr.tex%d.x += texOffset;\n", i); } out.Write("\t}\n"); EmitVertex(out, uid_data, "l", ApiType, true); EmitVertex(out, uid_data, "r", ApiType); } else if (uid_data->primitive_type == PRIMITIVE_POINTS) { out.Write("\tVS_OUTPUT ll = f;\n" "\tVS_OUTPUT lr = f;\n" "\tVS_OUTPUT ul = f;\n" "\tVS_OUTPUT ur = f;\n"); out.Write("\tll.pos.xy += float2(-1,-1) * offset;\n" "\tlr.pos.xy += float2(1,-1) * offset;\n" "\tul.pos.xy += float2(-1,1) * offset;\n" "\tur.pos.xy += offset;\n"); out.Write("\tif (" I_TEXOFFSET "[3] != 0) {\n"); out.Write("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET "[3]), 1.0 / float(" I_TEXOFFSET "[3]));\n"); for (unsigned int i = 0; i < uid_data->numTexGens; ++i) { out.Write("\tif (((" I_TEXOFFSET "[1] >> %d) & 0x1) != 0) {\n", i); out.Write("\t\tll.tex%d.xy += float2(0,1) * texOffset;\n", i); out.Write("\t\tlr.tex%d.xy += texOffset;\n", i); out.Write("\t\tur.tex%d.xy += float2(1,0) * texOffset;\n", i); out.Write("\t}\n"); } out.Write("\t}\n"); EmitVertex(out, uid_data, "ll", ApiType, true); EmitVertex(out, uid_data, "lr", ApiType); EmitVertex(out, uid_data, "ul", ApiType); EmitVertex(out, uid_data, "ur", ApiType); } else { EmitVertex(out, uid_data, "f", ApiType, true); } out.Write("\t}\n"); EndPrimitive(out, uid_data, ApiType); if (uid_data->stereo && !g_ActiveConfig.backend_info.bSupportsGSInstancing) out.Write("\t}\n"); out.Write("}\n"); return out; }
bool GeometryShaderCache::SetShader(u32 primitive_type) { switch (primitive_type) { case PRIMITIVE_TRIANGLES: currentPrimitiveTopology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; case PRIMITIVE_LINES: currentPrimitiveTopology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; case PRIMITIVE_POINTS: currentPrimitiveTopology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; default: CHECK(0, "Invalid primitive type."); break; } GeometryShaderUid uid = GetGeometryShaderUid(primitive_type, API_D3D); // Check if the shader is already set if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); return true; } last_uid = uid; D3D::commandListMgr->dirtyPso = true; if (g_ActiveConfig.bEnableShaderDebugging) { ShaderCode code = GenerateGeometryShaderCode(primitive_type, API_D3D); geometry_uid_checker.AddToIndexAndCheck(code, uid, "Geometry", "g"); } // Check if the shader is a pass-through shader if (uid.GetUidData()->IsPassthrough()) { // Return the default pass-through shader last_entry = &pass_entry; return true; } // Check if the shader is already in the cache GSCache::iterator iter; iter = GeometryShaders.find(uid); if (iter != GeometryShaders.end()) { const GSCacheEntry &entry = iter->second; last_entry = &entry; return (entry.shader12.pShaderBytecode != nullptr); } // Need to compile a new shader ShaderCode code = GenerateGeometryShaderCode(primitive_type, API_D3D); D3DBlob* pbytecode; if (!D3D::CompileGeometryShader(code.GetBuffer(), &pbytecode)) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; } // Insert the bytecode into the caches g_gs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size()); bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size()); pbytecode->Release(); if (g_ActiveConfig.bEnableShaderDebugging && success) { GeometryShaders[uid].code = code.GetBuffer(); } return success; }
bool VertexShaderCache::SetShader(D3DVertexFormat* vertex_format) { if (g_ActiveConfig.bDisableSpecializedShaders) return SetUberShader(vertex_format); VertexShaderUid uid = GetVertexShaderUid(); if (last_entry && uid == last_uid) { if (last_entry->pending) return SetUberShader(vertex_format); if (!last_entry->shader) return false; vertex_format->SetInputLayout(last_entry->bytecode); D3D::stateman->SetVertexShader(last_entry->shader); return true; } auto iter = vshaders.find(uid); if (iter != vshaders.end()) { const VSCacheEntry& entry = iter->second; if (entry.pending) return SetUberShader(vertex_format); last_uid = uid; last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); if (!last_entry->shader) return false; vertex_format->SetInputLayout(last_entry->bytecode); D3D::stateman->SetVertexShader(last_entry->shader); return true; } // Background compiling? if (g_ActiveConfig.CanBackgroundCompileShaders()) { // Create a pending entry VSCacheEntry entry; entry.pending = true; vshaders[uid] = entry; // Queue normal shader compiling and use ubershader g_async_compiler->QueueWorkItem( g_async_compiler->CreateWorkItem<VertexShaderCompilerWorkItem>(uid)); return SetUberShader(vertex_format); } // Need to compile a new shader D3DBlob* bytecode = nullptr; ShaderCode code = GenerateVertexShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData()); D3D::CompileVertexShader(code.GetBuffer(), &bytecode); if (!InsertByteCode(uid, bytecode)) { SAFE_RELEASE(bytecode); return false; } g_vs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size()); bytecode->Release(); return SetShader(vertex_format); }
bool GeometryShaderCache::SetShader(u32 primitive_type) { GeometryShaderUid uid; GetGeometryShaderUid(uid, primitive_type, API_D3D); if (g_ActiveConfig.bEnableShaderDebugging) { ShaderCode code; GenerateGeometryShaderCode(code, primitive_type, API_D3D); geometry_uid_checker.AddToIndexAndCheck(code, uid, "Geometry", "g"); } // Check if the shader is already set if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); return true; } } last_uid = uid; // Check if the shader is a pass-through shader if (uid.GetUidData()->IsPassthrough()) { // Return the default pass-through shader last_entry = &pass_entry; return true; } // Check if the shader is already in the cache GSCache::iterator iter; iter = GeometryShaders.find(uid); if (iter != GeometryShaders.end()) { const GSCacheEntry &entry = iter->second; last_entry = &entry; return (entry.shader != nullptr); } // Need to compile a new shader ShaderCode code; GenerateGeometryShaderCode(code, primitive_type, API_D3D); D3DBlob* pbytecode; if (!D3D::CompileGeometryShader(code.GetBuffer(), &pbytecode)) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; } // Insert the bytecode into the caches g_gs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size()); bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size()); pbytecode->Release(); if (g_ActiveConfig.bEnableShaderDebugging && success) { GeometryShaders[uid].code = code.GetBuffer(); } return success; }
static bool dx9_compile_shader(ShaderCode& out_shader, const ShaderCode& source, ShaderType::type type, FileSystem* fs=NULL, const char* include_dir=NULL) { R_ASSERT(source.type == ShaderCodeType_Text); HRESULT hr = S_OK; ID3DXBuffer* error_buffer = NULL; ID3DXBuffer* shader_buffer = NULL; ID3DXConstantTable* constant_table = NULL; uint32 flags = D3DXSHADER_DEBUG; const char* profile = shader_profile_string(type); static const char dummy_vertex_shader[] = "float4 vertex() : POSITION { return 1; }"; static const char dummy_pixel_shader[] = "float4 pixel() : COLOR { return float4(1,0,1,1); }"; DX9ShaderIncluder includer(fs, include_dir); hr = D3DXCompileShader(source.data(), (UINT)source.size(), NULL, &includer, "main", profile, flags, &shader_buffer, &error_buffer, &constant_table ); if( hr!=S_OK && error_buffer && error_buffer->GetBufferPointer() ) { char* tmp = (char*)error_buffer->GetBufferPointer(); R_ERROR_MESSAGE_BOX(tmp, "Shader compile error"); shader_buffer = NULL; error_buffer = NULL; //create dummy shader if( type == ShaderType::Pixel ) { hr = D3DXCompileShader( dummy_pixel_shader, uint32(strlen(dummy_pixel_shader)), NULL, NULL, "pixel", "ps_2_0", flags, &shader_buffer, &error_buffer, NULL ); validate_d3d_result(hr, true); } else if( type == ShaderType::Vertex ) { hr = D3DXCompileShader( dummy_vertex_shader, uint32(strlen(dummy_vertex_shader)), NULL, NULL, "vertex", "vs_2_0", flags, &shader_buffer, &error_buffer, NULL ); validate_d3d_result(hr, true); } } out_shader.type = ShaderCodeType_Binary; out_shader.clear(); uint32 size = shader_buffer->GetBufferSize(); if( size ) { out_shader.resize(size); memcpy(out_shader.data(), shader_buffer->GetBufferPointer(), size); } if(error_buffer) error_buffer->Release(); if(shader_buffer) shader_buffer->Release(); if(constant_table) constant_table->Release(); return true; }
void VertexShaderCache::PrepareShader(u32 components, const XFMemory &xfr, const BPMemory &bpm, bool ongputhread) { VertexShaderUid uid; GetVertexShaderUID(uid, components, xfr, bpm); if (ongputhread) { Compiler->ProcCompilationResults(); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.bEnableShaderDebugging) { ShaderCode code; GenerateVertexShaderCodeD3D9(code, uid.GetUidData()); } #endif if (last_entry) { if (uid == last_uid) { return; } } last_uid = uid; GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); } else { if (external_last_uid == uid) { return; } external_last_uid = uid; } vshaderslock.lock(); VSCacheEntry *entry = &vshaders[uid]; vshaderslock.unlock(); if (ongputhread) { last_entry = entry; } // Compile only when we have a new instance if (entry->initialized.test_and_set()) { return; } ShaderCompilerWorkUnit *wunit = Compiler->NewUnit(VERTEXSHADERGEN_BUFFERSIZE); wunit->GenerateCodeHandler = [uid](ShaderCompilerWorkUnit* wunit) { ShaderCode code; code.SetBuffer(wunit->code.data()); GenerateVertexShaderCodeD3D9(code, uid.GetUidData()); wunit->codesize = (u32)code.BufferSize(); }; wunit->entrypoint = "main"; wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3; wunit->target = D3D::VertexShaderVersionString(); wunit->ResultHandler = [uid, entry](ShaderCompilerWorkUnit* wunit) { if (SUCCEEDED(wunit->cresult)) { ID3DBlob* shaderBuffer = wunit->shaderbytecode; const u8* bytecode = (const u8*)shaderBuffer->GetBufferPointer(); u32 bytecodelen = (u32)shaderBuffer->GetBufferSize(); g_vs_disk_cache.Append(uid, bytecode, bytecodelen); PushByteCode(uid, bytecode, bytecodelen, entry); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.bEnableShaderDebugging) { entry->code = wunit->code.data(); } #endif } else { static int num_failures = 0; std::string filename = StringFromFormat("%sbad_vs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); std::ofstream file; OpenFStream(file, filename, std::ios_base::out); file << ((const char*)wunit->code.data()); file << ((const char*)wunit->error->GetBufferPointer()); file.close(); PanicAlert("Failed to compile vertex shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s", filename, D3D::VertexShaderVersionString(), (char*)wunit->error->GetBufferPointer()); } }; Compiler->CompileShaderAsync(wunit); }