Exemple #1
0
void Routine::InitProgram(std::initializer_list<const char *> source) {

  // Determines the identifier for this particular routine call
  auto routine_info = routine_name_;
  for (const auto &kernel_name : kernel_names_) {
    routine_info += "_" + kernel_name + db_(kernel_name).GetValuesString();
  }
  log_debug(routine_info);

  // Queries the cache to see whether or not the program (context-specific) is already there
  bool has_program;
  program_ = ProgramCache::Instance().Get(ProgramKeyRef{ context_(), device_(), precision_, routine_info },
                                          &has_program);
  if (has_program) { return; }

  // Sets the build options from an environmental variable (if set)
  auto options = std::vector<std::string>();
  const auto environment_variable = std::getenv("CLBLAST_BUILD_OPTIONS");
  if (environment_variable != nullptr) {
    options.push_back(std::string(environment_variable));
  }

  // Queries the cache to see whether or not the binary (device-specific) is already there. If it
  // is, a program is created and stored in the cache
  const auto device_name = GetDeviceName(device_);
  const auto platform_id = device_.PlatformID();
  bool has_binary;
  auto binary = BinaryCache::Instance().Get(BinaryKeyRef{platform_id,  precision_, routine_info, device_name },
                                            &has_binary);
  if (has_binary) {
    program_ = std::make_shared<Program>(device_, context_, binary);
    program_->Build(device_, options);
    ProgramCache::Instance().Store(ProgramKey{ context_(), device_(), precision_, routine_info },
                                    std::shared_ptr<Program>{program_});
    return;
  }

  // Otherwise, the kernel will be compiled and program will be built. Both the binary and the
  // program will be added to the cache.

  // Inspects whether or not FP64 is supported in case of double precision
  if ((precision_ == Precision::kDouble && !PrecisionSupported<double>(device_)) ||
      (precision_ == Precision::kComplexDouble && !PrecisionSupported<double2>(device_))) {
    throw RuntimeErrorCode(StatusCode::kNoDoublePrecision);
  }

  // As above, but for FP16 (half precision)
  if (precision_ == Precision::kHalf && !PrecisionSupported<half>(device_)) {
    throw RuntimeErrorCode(StatusCode::kNoHalfPrecision);
  }

  // Collects the parameters for this device in the form of defines
  auto source_string = std::string{""};
  for (const auto &kernel_name : kernel_names_) {
    source_string += db_(kernel_name).GetDefines();
  }

  // Adds routine-specific code to the constructed source string
  for (const char *s: source) {
    source_string += s;
  }

  // Completes the source and compiles the kernel
  program_ = CompileFromSource(source_string, precision_, routine_name_,
                               device_, context_, options, 0);


  // Store the compiled binary and program in the cache
  BinaryCache::Instance().Store(BinaryKey{platform_id, precision_, routine_info, device_name},
                                program_->GetIR());

  ProgramCache::Instance().Store(ProgramKey{context_(), device_(), precision_, routine_info},
                                 std::shared_ptr<Program>{program_});
}
Exemple #2
0
//-------------------------------------------------------------------------------------------------------------
// UTILITY FUNCTIONS
//-------------------------------------------------------------------------------------------------------------
bool Shader::CompileShaders(ID3D11Device* device, const ShaderDesc& desc)
{
	constexpr const char * SHADER_BINARY_EXTENSION = ".bin";
	mDescriptor = desc;
	HRESULT result;
	ShaderBlobs blobs;
	bool bPrinted = false;

	PerfTimer timer;
	timer.Start();

	// COMPILE SHADER STAGES
	//----------------------------------------------------------------------------
	for (const ShaderStageDesc& stageDesc : desc.stages)
	{
		if (stageDesc.fileName.empty())
			continue;

		// stage.macros
		const std::string sourceFilePath = std::string(Renderer::sShaderRoot + stageDesc.fileName);
		
		const EShaderStage stage = GetShaderTypeFromSourceFilePath(sourceFilePath);

		// USE SHADER CACHE
		//
		const size_t ShaderHash = GeneratePreprocessorDefinitionsHash(stageDesc.macros);
		const std::string cacheFileName = stageDesc.macros.empty()
			? DirectoryUtil::GetFileNameFromPath(sourceFilePath) + SHADER_BINARY_EXTENSION
			: DirectoryUtil::GetFileNameFromPath(sourceFilePath) + "_" + std::to_string(ShaderHash) + SHADER_BINARY_EXTENSION;
		const std::string cacheFilePath = Application::s_ShaderCacheDirectory + "\\" + cacheFileName;
		const bool bUseCachedShaders =
			DirectoryUtil::FileExists(cacheFilePath)
			&& !IsCacheDirty(sourceFilePath, cacheFilePath);
		//---------------------------------------------------------------------------------
		if (!bPrinted)	// quick status print here
		{
			const char* pMsgLoad = bUseCachedShaders ? "Loading cached shader binaries" : "Compiling shader from source";
			Log::Info("\t%s %s...", pMsgLoad, mName.c_str());
			bPrinted = true;
		}
		//---------------------------------------------------------------------------------
		if (bUseCachedShaders)
		{
			blobs.of[stage] = CompileFromCachedBinary(cacheFilePath);
		}
		else
		{
			std::string errMsg;
			ID3D10Blob* pBlob;
			if (CompileFromSource(sourceFilePath, stage, pBlob, errMsg, stageDesc.macros))
			{
				blobs.of[stage] = pBlob;
				CacheShaderBinary(cacheFilePath, blobs.of[stage]);
			}
			else
			{
				Log::Error(errMsg);
				return false;
			}
		}

		CreateShaderStage(device, stage, blobs.of[stage]->GetBufferPointer(), blobs.of[stage]->GetBufferSize());
		SetReflections(blobs);
		//CheckSignatures();

		ShaderLoadDesc loadDesc = {};
		loadDesc.fullPath = sourceFilePath;
		loadDesc.lastWriteTime = std::experimental::filesystem::last_write_time(sourceFilePath);
		mDirectories[stage] = loadDesc;
	}

	// INPUT LAYOUT (VS)
	//---------------------------------------------------------------------------
	// src: https://stackoverflow.com/questions/42388979/directx-11-vertex-shader-reflection
	// setup the layout of the data that goes into the shader
	//
	if(mReflections.vsRefl)
	{

		D3D11_SHADER_DESC shaderDesc = {};
		mReflections.vsRefl->GetDesc(&shaderDesc);
		std::vector<D3D11_INPUT_ELEMENT_DESC> inputLayout(shaderDesc.InputParameters);

		D3D_PRIMITIVE primitiveDesc = shaderDesc.InputPrimitive;

		for (unsigned i = 0; i < shaderDesc.InputParameters; ++i)
		{
			D3D11_SIGNATURE_PARAMETER_DESC paramDesc;
			mReflections.vsRefl->GetInputParameterDesc(i, &paramDesc);

			// fill out input element desc
			D3D11_INPUT_ELEMENT_DESC elementDesc;
			elementDesc.SemanticName = paramDesc.SemanticName;
			elementDesc.SemanticIndex = paramDesc.SemanticIndex;
			elementDesc.InputSlot = 0;
			elementDesc.AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
			elementDesc.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
			elementDesc.InstanceDataStepRate = 0;

			// determine DXGI format
			if (paramDesc.Mask == 1)
			{
				if      (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32)  elementDesc.Format = DXGI_FORMAT_R32_UINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32)  elementDesc.Format = DXGI_FORMAT_R32_SINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32_FLOAT;
			}
			else if (paramDesc.Mask <= 3)
			{
				if      (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32)  elementDesc.Format = DXGI_FORMAT_R32G32_UINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32)  elementDesc.Format = DXGI_FORMAT_R32G32_SINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32G32_FLOAT;
			}
			else if (paramDesc.Mask <= 7)
			{
				if      (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32)  elementDesc.Format = DXGI_FORMAT_R32G32B32_UINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32)  elementDesc.Format = DXGI_FORMAT_R32G32B32_SINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32G32B32_FLOAT;
			}
			else if (paramDesc.Mask <= 15)
			{
				if      (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32)  elementDesc.Format = DXGI_FORMAT_R32G32B32A32_UINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32)  elementDesc.Format = DXGI_FORMAT_R32G32B32A32_SINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
			}

			inputLayout[i] = elementDesc; //save element desc
		}

		// Try to create Input Layout
		const auto* pData = inputLayout.data();
		if (pData)
		{
			result = device->CreateInputLayout(
				pData,
				shaderDesc.InputParameters,
				blobs.vs->GetBufferPointer(),
				blobs.vs->GetBufferSize(),
				&mpInputLayout);

			if (FAILED(result))
			{
				OutputDebugString("Error creating input layout");
				return false;
			}
		}
	}

	// CONSTANT BUFFERS 
	//---------------------------------------------------------------------------
	// Obtain cbuffer layout information
	for (EShaderStage type = EShaderStage::VS; type < EShaderStage::COUNT; type = (EShaderStage)(type + 1))
	{
		if (mReflections.of[type])
		{
			ReflectConstantBufferLayouts(mReflections.of[type], type);
		}
	}

	// Create CPU & GPU constant buffers
	// CPU CBuffers
	int constantBufferSlot = 0;
	for (const ConstantBufferLayout& cbLayout : m_CBLayouts)
	{
		std::vector<CPUConstantID> cpuBuffers;
		for (D3D11_SHADER_VARIABLE_DESC varDesc : cbLayout.variables)
		{
			CPUConstant c;
			CPUConstantID c_id = static_cast<CPUConstantID>(mCPUConstantBuffers.size());

			c._name = varDesc.Name;
			c._size = varDesc.Size;
			c._data = new char[c._size];
			memset(c._data, 0, c._size);
			m_constants.push_back(std::make_pair(constantBufferSlot, c_id));
			mCPUConstantBuffers.push_back(c);
		}
		++constantBufferSlot;
	}

	// GPU CBuffers
	D3D11_BUFFER_DESC cBufferDesc;
	cBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	cBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
	cBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	cBufferDesc.MiscFlags = 0;
	cBufferDesc.StructureByteStride = 0;
	for (const ConstantBufferLayout& cbLayout : m_CBLayouts)
	{
		ConstantBufferBinding cBuffer;
		cBufferDesc.ByteWidth = cbLayout.desc.Size;
		if (FAILED(device->CreateBuffer(&cBufferDesc, NULL, &cBuffer.data)))
		{
			OutputDebugString("Error creating constant buffer");
			return false;
		}
		cBuffer.dirty = true;
		cBuffer.shaderStage = cbLayout.stage;
		cBuffer.bufferSlot = cbLayout.bufSlot;
		mConstantBuffers.push_back(cBuffer);
	}


	// TEXTURES & SAMPLERS
	//---------------------------------------------------------------------------
	for (int shaderStage = 0; shaderStage < EShaderStage::COUNT; ++shaderStage)
	{
		unsigned texSlot = 0;	unsigned smpSlot = 0;
		unsigned uavSlot = 0;
		auto& sRefl = mReflections.of[shaderStage];
		if (sRefl)
		{
			D3D11_SHADER_DESC desc = {};
			sRefl->GetDesc(&desc);

			for (unsigned i = 0; i < desc.BoundResources; ++i)
			{
				D3D11_SHADER_INPUT_BIND_DESC shdInpDesc;
				sRefl->GetResourceBindingDesc(i, &shdInpDesc);

				switch (shdInpDesc.Type)
				{
					case D3D_SIT_SAMPLER:
					{
						SamplerBinding smp;
						smp.shaderStage = static_cast<EShaderStage>(shaderStage);
						smp.samplerSlot = smpSlot++;
						mSamplerBindings.push_back(smp);
						mShaderSamplerLookup[shdInpDesc.Name] = static_cast<int>(mSamplerBindings.size() - 1);
					} break;

					case D3D_SIT_TEXTURE:
					{
						TextureBinding tex;
						tex.shaderStage = static_cast<EShaderStage>(shaderStage);
						tex.textureSlot = texSlot++;
						mTextureBindings.push_back(tex);
						mShaderTextureLookup[shdInpDesc.Name] = static_cast<int>(mTextureBindings.size() - 1);
					} break;

					case D3D_SIT_UAV_RWTYPED:
					{
						TextureBinding tex;
						tex.shaderStage = static_cast<EShaderStage>(shaderStage);
						tex.textureSlot = uavSlot++;
						mTextureBindings.push_back(tex);
						mShaderTextureLookup[shdInpDesc.Name] = static_cast<int>(mTextureBindings.size() - 1);
					} break;

					case D3D_SIT_CBUFFER: break;


					default:
						Log::Warning("Unhandled shader input bind type in shader reflection");
						break;

				} // switch shader input type
			} // bound resource
		} // sRefl
	} // shaderStage

	// release blobs
	for (unsigned type = EShaderStage::VS; type < EShaderStage::COUNT; ++type)
	{
		if (blobs.of[type])
			blobs.of[type]->Release();
	}

	return true;
}