void Vc4Shader::Emit_Mov(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER || this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(Inst.m_NumOperands == 2); { for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); Vc4Register src[1]; Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i , src); { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(dst, src[0]); Vc4Inst.Vc4_m_Pack(dst.GetPack(i)); Vc4Inst.Emit(CurrentStorage); } } aCurrent <<= 1; } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }
void Vc4Shader::Emit_with_Mul_pipe(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER || this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(Inst.m_NumOperands == 3); { for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); uint8_t pack = VC4_QPU_PACK_A_32; if (dst.GetFlags().packed) { pack = VC4_QPU_PACK_MUL_8a + i; } Vc4Register src[2]; this->Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src); { Vc4Instruction Vc4Inst; switch (Inst.m_OpCode) { case D3D10_SB_OPCODE_MUL: Vc4Inst.Vc4_m_FMUL(dst, src[0], src[1]); break; default: VC4_ASSERT(false); } Vc4Inst.Vc4_m_Pack(pack); Vc4Inst.Emit(CurrentStorage); } } aCurrent <<= 1; } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }
HRESULT Vc4Shader::Translate_VS() { assert(this->uShaderType == D3D10_SB_VERTEX_SHADER); this->SetCurrentStorage(this->ShaderStorage, this->ShaderUniform); this->HLSL_ParseDecl(); this->HLSL_Link_PS(); this->Emit_Prologue_VS(); { CInstruction Inst; assert(Inst.m_bSaturate == false); // saturate is not supported. while (HLSL_GetShaderInstruction(this->HLSLParser, Inst)) { // Need to add support for D3D10_SB_OPCODE_IADD - Issue #38 switch (Inst.m_OpCode) { case D3D10_SB_OPCODE_ADD: case D3D10_SB_OPCODE_MAX: case D3D10_SB_OPCODE_MIN: case D3D10_SB_OPCODE_IADD: this->Emit_with_Add_pipe(Inst); break; case D3D10_SB_OPCODE_DP2: case D3D10_SB_OPCODE_DP3: case D3D10_SB_OPCODE_DP4: this->Emit_DPx(Inst); break; case D3D10_SB_OPCODE_MAD: this->Emit_Mad(Inst); break; case D3D10_SB_OPCODE_MOV: this->Emit_Mov(Inst); break; case D3D10_SB_OPCODE_MUL: this->Emit_with_Mul_pipe(Inst); break; case D3D10_SB_OPCODE_RET: break; default: VC4_ASSERT(false); } } } this->ShaderStorageAux->CopyFrom(*this->ShaderStorage); // Copy VS to CS. this->ShaderUniformAux->CopyFrom(*this->ShaderUniform); // Copy VS uniform to CS. this->Emit_ShaderOutput_VS(true); // VS this->Emit_Epilogue(); // VS this->SetCurrentStorage(this->ShaderStorageAux, this->ShaderUniformAux); // switch to CS storage. this->Emit_ShaderOutput_VS(false); // CS this->Emit_Epilogue(); // CS return S_OK; }
HRESULT Vc4Shader::Translate_PS() { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER); this->SetCurrentStorage(this->ShaderStorage, this->ShaderUniform); this->HLSL_ParseDecl(); this->Emit_Prologue_PS(); { CInstruction Inst; assert(Inst.m_bSaturate == false); // saturate is not supported. while (HLSL_GetShaderInstruction(this->HLSLParser, Inst)) { switch (Inst.m_OpCode) { case D3D10_SB_OPCODE_ADD: case D3D10_SB_OPCODE_MAX: case D3D10_SB_OPCODE_MIN: this->Emit_with_Add_pipe(Inst); break; case D3D10_SB_OPCODE_DP2: case D3D10_SB_OPCODE_DP3: case D3D10_SB_OPCODE_DP4: this->Emit_DPx(Inst); break; case D3D10_SB_OPCODE_MAD: this->Emit_Mad(Inst); break; case D3D10_SB_OPCODE_MOV: this->Emit_Mov(Inst); break; case D3D10_SB_OPCODE_MUL: this->Emit_with_Mul_pipe(Inst); break; case D3D10_SB_OPCODE_RET: break; case D3D10_SB_OPCODE_SAMPLE: this->Emit_Sample(Inst); break; default: VC4_ASSERT(false); } } } this->Emit_Epilogue(); return S_OK; }
void Vc4Shader::HLSL_Link_PS() { assert(this->uShaderType == D3D10_SB_VERTEX_SHADER); assert(this->HLSLDownstreamParser.IsValid()); boolean bDone = false, bFound = false; CInstruction Inst; while (HLSL_GetShaderInstruction(this->HLSLDownstreamParser, Inst) && !bDone) { switch (Inst.m_OpCode) { case D3D10_SB_OPCODE_DCL_INPUT_PS: VC4_ASSERT(Inst.m_InputPSDecl.InterpolationMode == D3D10_SB_INTERPOLATION_LINEAR); // PS input must be linear. VC4_ASSERT(Inst.m_NumOperands == 1); VC4_ASSERT(Inst.m_Operands[0].m_ComponentSelection == D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE); VC4_ASSERT(Inst.m_Operands[0].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D); VC4_ASSERT(Inst.m_Operands[0].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); VC4_ASSERT(Inst.m_Operands[0].m_Index[0].m_RegIndex < 8); VC4_ASSERT(Inst.m_Operands[0].m_WriteMask & D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK); for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { VC4_ASSERT(this->OutputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].flags.valid); this->OutputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].flags.linkage = true; } aCurrent <<= 1; } bFound = true; break; default: if (bFound) { bDone = true; } } } }
void Vc4Shader::Emit_Prologue_VS() { assert(this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(cInput < 16); // VR_SETUP:NUM limitation, vpm only can read up to 16 values. { Vc4Instruction Vc4Inst(vc4_load_immediate_32); Vc4Register vr_setup(VC4_QPU_ALU_REG_A, VC4_QPU_WADDR_VPMVCD_RD_SETUP); Vc4Register value; value.SetImmediateI(MAKE_VR_SETUP(cInput, 1, true, false, VC4_QPU_32BIT_VECTOR, 0)); Vc4Inst.Vc4_a_LOAD32(vr_setup, value); Vc4Inst.Emit(CurrentStorage); } { Vc4Instruction Vc4Inst(vc4_load_immediate_32); Vc4Register vw_setup(VC4_QPU_ALU_REG_B, VC4_QPU_WADDR_VPMVCD_WR_SETUP); Vc4Register value; value.SetImmediateI(MAKE_VW_SETUP(1, true, false, VC4_QPU_32BIT_VECTOR, 0)); Vc4Inst.Vc4_a_LOAD32(vw_setup, value); Vc4Inst.Emit(CurrentStorage); } for (uint8_t iRegUsed = 0, iRegIndex = 0; iRegUsed < this->cInput; iRegIndex++) { Vc4Instruction Vc4Inst; Vc4Register raX = this->InputRegister[iRegIndex / 4][iRegIndex % 4]; if (raX.GetFlags().valid) { assert(raX.GetMux() == VC4_QPU_ALU_REG_A || raX.GetMux() == VC4_QPU_ALU_REG_B); Vc4Register vpm(raX.GetMux(), VC4_QPU_RADDR_VPM); Vc4Inst.Vc4_a_MOV(raX, vpm); Vc4Inst.Emit(CurrentStorage); iRegUsed++; } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }
void Vc4Shader::HLSL_ParseDecl() { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER || this->uShaderType == D3D10_SB_VERTEX_SHADER); assert(this->HLSLParser.IsValid()); boolean bDone = false; D3D10_SB_OPCODE_TYPE OpCode; while (HLSL_PeekShaderInstructionOpCode(this->HLSLParser, OpCode) && !bDone) { CInstruction Inst; switch (OpCode) { case D3D10_SB_OPCODE_DCL_RESOURCE: HLSL_GetShaderInstruction(this->HLSLParser, Inst); VC4_ASSERT(Inst.m_Operands[0].m_Index[0].m_RegIndex < ARRAYSIZE(this->ResourceDimension)); this->ResourceDimension[Inst.m_Operands[0].m_Index[0].m_RegIndex] = Inst.m_ResourceDecl.SRVInfo.Dimension; cResources++; break; case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: HLSL_GetShaderInstruction(this->HLSLParser, Inst); // Issue 37: VC4 Find_Vc4Register_I needs to implement dynamic index support for constant buffers VC4_ASSERT(Inst.m_ResourceDecl.CBInfo.AccessPattern == D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED); cConstants++; break; case D3D10_SB_OPCODE_DCL_SAMPLER: HLSL_GetShaderInstruction(this->HLSLParser, Inst); cSampler++; break; case D3D10_SB_OPCODE_DCL_INPUT: case D3D10_SB_OPCODE_DCL_INPUT_PS: HLSL_GetShaderInstruction(this->HLSLParser, Inst); if (Inst.m_OpCode == D3D10_SB_OPCODE_DCL_INPUT_PS) { VC4_ASSERT(Inst.m_InputPSDecl.InterpolationMode == D3D10_SB_INTERPOLATION_LINEAR); // PS input must be linear. } VC4_ASSERT(Inst.m_NumOperands == 1); VC4_ASSERT(Inst.m_Operands[0].m_ComponentSelection == D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE); VC4_ASSERT(Inst.m_Operands[0].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D); VC4_ASSERT(Inst.m_Operands[0].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); VC4_ASSERT(Inst.m_Operands[0].m_Index[0].m_RegIndex < 8); VC4_ASSERT(Inst.m_Operands[0].m_WriteMask & D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK); for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { VC4_ASSERT((ROS_VC4_INPUT_REGISTER_FILE_START + cInput) <= ROS_VC4_INPUT_REGISTER_FILE_END); this->InputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].flags.valid = true; this->InputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].flags.require_linear_conversion = (Inst.m_OpCode == D3D10_SB_OPCODE_DCL_INPUT_PS ? true : false); this->InputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].addr = ROS_VC4_INPUT_REGISTER_FILE_START + cInput++; this->InputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].mux = ROS_VC4_INPUT_REGISTER_FILE; this->InputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].swizzleMask = aCurrent; } aCurrent <<= 1; } break; case D3D10_SB_OPCODE_DCL_OUTPUT: case D3D10_SB_OPCODE_DCL_OUTPUT_SIV: HLSL_GetShaderInstruction(this->HLSLParser, Inst); VC4_ASSERT(Inst.m_NumOperands == 1); VC4_ASSERT(Inst.m_Operands[0].m_ComponentSelection == D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE); VC4_ASSERT(Inst.m_Operands[0].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D); VC4_ASSERT(Inst.m_Operands[0].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); if (this->uShaderType == D3D10_SB_PIXEL_SHADER) { // only support single 8888RGBA colour output from pixel shader. VC4_ASSERT(Inst.m_Operands[0].m_WriteMask == (D3D10_SB_OPERAND_4_COMPONENT_MASK_R | D3D10_SB_OPERAND_4_COMPONENT_MASK_G | D3D10_SB_OPERAND_4_COMPONENT_MASK_B | D3D10_SB_OPERAND_4_COMPONENT_MASK_A)); VC4_ASSERT(Inst.m_Operands[0].m_Index[0].m_RegIndex == 0); VC4_ASSERT(cOutput == 0); this->OutputRegister[0][0].flags.valid = true; this->OutputRegister[0][0].flags.color = true; this->OutputRegister[0][0].flags.packed = true; // RGBA components are packed in single register (see above WriteMask assert). this->OutputRegister[0][0].addr = ROS_VC4_OUTPUT_REGISTER_FILE_START; this->OutputRegister[0][0].mux = ROS_VC4_OUTPUT_REGISTER_FILE; this->OutputRegister[0][0].swizzleMask = (uint8_t)(Inst.m_Operands[0].m_WriteMask & D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK); // TODO: more generic color channel swizzle support. DXGI_FORMAT texFormat = UmdCompiler->GetRenderTargetFormat(0); VC4_ASSERT((texFormat == DXGI_FORMAT_B8G8R8A8_UNORM) || (texFormat == DXGI_FORMAT_R8G8B8A8_UNORM)); this->OutputRegister[0][0].flags.swap_color_channel = (texFormat != DXGI_FORMAT_R8G8B8A8_UNORM); cOutput++; } else { VC4_ASSERT(this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(Inst.m_Operands[0].m_Index[0].m_RegIndex < 8); bool bPos; uint8_t aMask; if ((Inst.m_OpCode == D3D10_SB_OPCODE_DCL_OUTPUT_SIV) && (Inst.m_InputDeclSIV.Name == D3D10_SB_NAME_POSITION)) { bPos = true; aMask = D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK; } else { bPos = false; aMask = (Inst.m_Operands[0].m_WriteMask & D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK); VC4_ASSERT(aMask); } for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (aMask & aCurrent) { VC4_ASSERT((ROS_VC4_OUTPUT_REGISTER_FILE_START + cOutput) <= ROS_VC4_OUTPUT_REGISTER_FILE_END); this->OutputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].flags.valid = true; this->OutputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].flags.position = bPos; this->OutputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].addr = ROS_VC4_OUTPUT_REGISTER_FILE_START + cOutput++; this->OutputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].mux = ROS_VC4_OUTPUT_REGISTER_FILE; this->OutputRegister[Inst.m_Operands[0].m_Index[0].m_RegIndex][i].swizzleMask = aCurrent; } aCurrent <<= 1; } } break; case D3D10_SB_OPCODE_DCL_TEMPS: HLSL_GetShaderInstruction(this->HLSLParser, Inst); // Temp register doesn't have swizzle mask, so assume all 4 components to be used. // TODO: AllocateRegister(); Currently temps are allocated at ra16~ra31. // since currently reserve temp to ra16~31, so only upto 4 temps are allowed. VC4_ASSERT(Inst.m_TempsDecl.NumTemps <= 4); for (uint8_t i = 0; i < Inst.m_TempsDecl.NumTemps * 4; i++) { VC4_ASSERT((ROS_VC4_TEMP_REGISTER_FILE_START + cTemp) <= ROS_VC4_TEMP_REGISTER_FILE_END); this->TempRegister[i / 4][i % 4].flags.valid = true; this->TempRegister[i / 4][i % 4].flags.temp = true; this->TempRegister[i / 4][i % 4].addr = ROS_VC4_TEMP_REGISTER_FILE_START + cTemp++; this->TempRegister[i / 4][i % 4].mux = ROS_VC4_TEMP_REGISTER_FILE; this->TempRegister[i / 4][i % 4].swizzleMask = D3D10_SB_OPERAND_4_COMPONENT_MASK_X << (i % 4); } break; case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS: HLSL_GetShaderInstruction(this->HLSLParser, Inst); // TODO: break; case D3D10_SB_OPCODE_DCL_INDEX_RANGE: case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE: case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: case D3D10_SB_OPCODE_DCL_INPUT_SGV: case D3D10_SB_OPCODE_DCL_INPUT_SIV: case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV: case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV: case D3D10_SB_OPCODE_DCL_OUTPUT_SGV: case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP: HLSL_GetShaderInstruction(this->HLSLParser, Inst); VC4_ASSERT(false); // TODO __fallthrough; default: if (OpCode >= D3D10_SB_OPCODE_RESERVED0) { VC4_ASSERT(false); // only 10.0 opcode is supported. } bDone = true; } } }
void Vc4Shader::Emit_Sample(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER); VC4_ASSERT(Inst.m_NumOperands == 4); boolean bUnpack = false; Vc4Register o[4]; VC4_ASSERT(Inst.m_Operands[0].m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT); VC4_ASSERT(Inst.m_Operands[0].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D); VC4_ASSERT(Inst.m_Operands[0].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); VC4_ASSERT(Inst.m_Operands[0].m_ComponentSelection == D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE); VC4_ASSERT(Inst.m_Operands[0].m_WriteMask == (D3D10_SB_OPERAND_4_COMPONENT_MASK_R | D3D10_SB_OPERAND_4_COMPONENT_MASK_G | D3D10_SB_OPERAND_4_COMPONENT_MASK_B | D3D10_SB_OPERAND_4_COMPONENT_MASK_A)); switch (Inst.m_Operands[0].m_Type) { case D3D10_SB_OPERAND_TYPE_OUTPUT: o[0] = Find_Vc4Register_M(Inst.m_Operands[0], (Inst.m_Operands[0].m_WriteMask & D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK)); VC4_ASSERT(o[0].GetFlags().packed); break; case D3D10_SB_OPERAND_TYPE_TEMP: for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { o[i] = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); } aCurrent <<= 1; } bUnpack = true; break; default: VC4_ASSERT(false); } // Resource VC4_ASSERT(Inst.m_Operands[2].m_Type == D3D10_SB_OPERAND_TYPE_RESOURCE); VC4_ASSERT(Inst.m_Operands[2].m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT); VC4_ASSERT(Inst.m_Operands[2].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D); VC4_ASSERT(Inst.m_Operands[2].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); uint32_t resourceIndex = Inst.m_Operands[2].m_Index[0].m_RegIndex; uint32_t texDimension = this->ResourceDimension[resourceIndex]; DXGI_FORMAT texFormat = UmdCompiler->GetShaderResourceFormat((uint8_t)resourceIndex); VC4_ASSERT((texFormat == DXGI_FORMAT_B8G8R8A8_UNORM) || (texFormat == DXGI_FORMAT_R8G8B8A8_UNORM)); // TODO: more generic color channel swizzle support. boolean bSwapColorChannel = (texFormat != DXGI_FORMAT_R8G8B8A8_UNORM); // Texture coordinate VC4_ASSERT(Inst.m_Operands[1].m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT); VC4_ASSERT(Inst.m_Operands[1].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D); VC4_ASSERT(Inst.m_Operands[1].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); VC4_ASSERT(Inst.m_Operands[1].m_ComponentSelection == D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE); Vc4Register s; Vc4Register t; Vc4Register r; switch (texDimension) { case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE: r = Find_Vc4Register_M(Inst.m_Operands[1], D3D10_SB_OPERAND_4_COMPONENT_MASK(Inst.m_Operands[1].m_Swizzle[2])); __fallthrough; case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D: t = Find_Vc4Register_M(Inst.m_Operands[1], D3D10_SB_OPERAND_4_COMPONENT_MASK(Inst.m_Operands[1].m_Swizzle[1])); __fallthrough; case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D: s = Find_Vc4Register_M(Inst.m_Operands[1], D3D10_SB_OPERAND_4_COMPONENT_MASK(Inst.m_Operands[1].m_Swizzle[0])); break; case D3D10_SB_RESOURCE_DIMENSION_BUFFER: case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D: case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS: case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY: case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY: case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: default: assert(false); } // Sampler VC4_ASSERT(Inst.m_Operands[3].m_Type == D3D10_SB_OPERAND_TYPE_SAMPLER); VC4_ASSERT(Inst.m_Operands[3].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D); VC4_ASSERT(Inst.m_Operands[3].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); uint32_t samplerIndex = Inst.m_Operands[3].m_Index[0].m_RegIndex; // texture address : z if (r.GetFlags().valid) { Vc4Instruction Vc4Inst; Vc4Register tmu0_r(VC4_QPU_ALU_REG_A, VC4_QPU_WADDR_TMU0_R); Vc4Inst.Vc4_a_MOV(tmu0_r, r); Vc4Inst.Emit(CurrentStorage); } // texture address : y if (t.GetFlags().valid) { Vc4Instruction Vc4Inst; Vc4Register tmu0_t(VC4_QPU_ALU_REG_A, VC4_QPU_WADDR_TMU0_T); Vc4Inst.Vc4_a_MOV(tmu0_t, t); Vc4Inst.Emit(CurrentStorage); } // texture address : x and must write 's' at last. assert(s.GetFlags().valid); { Vc4Instruction Vc4Inst; Vc4Register tmu0_s(VC4_QPU_ALU_REG_A, VC4_QPU_WADDR_TMU0_S); Vc4Inst.Vc4_a_MOV(tmu0_s, s); Vc4Inst.Emit(CurrentStorage); } // add uniform references. { { VC4_UNIFORM_FORMAT u; u.Type = VC4_UNIFORM_TYPE_SAMPLER_CONFIG_P0; u.samplerConfiguration.samplerIndex = samplerIndex; u.samplerConfiguration.resourceIndex = resourceIndex; this->AddUniformReference(u); } { VC4_UNIFORM_FORMAT u; u.Type = VC4_UNIFORM_TYPE_SAMPLER_CONFIG_P1; u.samplerConfiguration.samplerIndex = samplerIndex; u.samplerConfiguration.resourceIndex = resourceIndex; this->AddUniformReference(u); } if (r.GetFlags().valid) // only cube needs P2 config. { VC4_UNIFORM_FORMAT u; u.Type = VC4_UNIFORM_TYPE_SAMPLER_CONFIG_P2; u.samplerConfiguration.samplerIndex = samplerIndex; u.samplerConfiguration.resourceIndex = resourceIndex; this->AddUniformReference(u); } } // Sample texture, result come up in r4. { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_Sig(VC4_QPU_SIG_LOAD_TMU0); Vc4Inst.Emit(CurrentStorage); } // Sample result is now at r4. Vc4Register r4(VC4_QPU_ALU_R4); // Move result at r4 to output register. if (Inst.m_Operands[0].m_Type == D3D10_SB_OPERAND_TYPE_OUTPUT) { if (bSwapColorChannel == o[0].GetFlags().swap_color_channel) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_a_MOV(o[0], r4); Vc4Inst.Emit(CurrentStorage); } else { // R, G, B channel for (uint8_t i = 0; i < 3; i++) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(o[0], r4); Vc4Inst.Vc4_m_Pack(VC4_QPU_PACK_MUL_8c - i); Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8a + i, true); // Use R4 unpack. Vc4Inst.Emit(CurrentStorage); } // A channel { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(o[0], r4); Vc4Inst.Vc4_m_Pack(VC4_QPU_PACK_MUL_8d); Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8d, true); // Use R4 unpack. Vc4Inst.Emit(CurrentStorage); } } } else { // Move each color channel at r4 to o[i]. // R, G, B channel for (uint8_t i = 0; i < 3; i++) { Vc4Register out = bSwapColorChannel ? o[2 - i] : o[i]; if (out.GetFlags().valid) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(out, r4); Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8a + i, true); // Use R4 unpack. Vc4Inst.Emit(CurrentStorage); } } // A channel if (o[3].GetFlags().valid) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(o[3], r4); Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8d, true); // Use R4 unpack. Vc4Inst.Emit(CurrentStorage); } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }
void Vc4Shader::Emit_with_Add_pipe(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER || this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(Inst.m_NumOperands == 3); { for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); Vc4Register src[2]; this->Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src); Vc4Register _dst; if (dst.GetFlags().packed) { // pack has to be done at mul pipe, so result to r3, // then use mul pipe to move to final dst (with pack). Vc4Register r3(VC4_QPU_ALU_R3, VC4_QPU_WADDR_ACC3); _dst = r3; } else { _dst = dst; } { Vc4Instruction Vc4Inst; switch (Inst.m_OpCode) { case D3D10_SB_OPCODE_ADD: Vc4Inst.Vc4_a_FADD(_dst, src[0], src[1]); break; case D3D10_SB_OPCODE_MAX: Vc4Inst.Vc4_a_FMAX(_dst, src[0], src[1]); break; case D3D10_SB_OPCODE_MIN: Vc4Inst.Vc4_a_FMIN(_dst, src[0], src[1]); break; case D3D10_SB_OPCODE_IADD: Vc4Inst.Vc4_a_IADD(_dst, src[0], src[1]); break; default: VC4_ASSERT(false); } Vc4Inst.Emit(CurrentStorage); } if (dst.GetFlags().packed) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(dst, _dst); Vc4Inst.Vc4_m_Pack(dst.GetPack(i)); Vc4Inst.Emit(CurrentStorage); } } aCurrent <<= 1; } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }
void Vc4Shader::Emit_DPx(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER || this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(Inst.m_NumOperands == 3); { // DP2 loop 2 times. // DP3 loop 3 times. // DP4 loop 4 times. uint8_t c = (uint8_t)(Inst.m_OpCode - 13); // where to accumulate result of mul. Vc4Register accum(VC4_QPU_ALU_R3, VC4_QPU_WADDR_ACC3); { Vc4Register zero(VC4_QPU_ALU_REG_B, 0); // 0 as small immediate in raddr_b Vc4Instruction Vc4Inst(vc4_alu_small_immediate); Vc4Inst.Vc4_m_MOV(accum, zero); Vc4Inst.Emit(CurrentStorage); } for(uint8_t i = 0; i < c; i++) { Vc4Register temp(VC4_QPU_ALU_R1, VC4_QPU_WADDR_ACC1); Vc4Register src[2]; Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src); { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_FMUL(temp, src[0], src[1]); if (i > 0) { Vc4Inst.Vc4_a_FADD(accum, accum, temp); } Vc4Inst.Emit(CurrentStorage); } if (i+1 == c) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_a_FADD(accum, accum, temp); Vc4Inst.Emit(CurrentStorage); } } // replicate ouput where specified. for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(dst, accum); Vc4Inst.Vc4_m_Pack(dst.GetPack(i)); Vc4Inst.Emit(CurrentStorage); } aCurrent <<= 1; } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }
void Vc4Shader::Emit_Mad(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER || this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(Inst.m_NumOperands == 4); { for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { Vc4Register accum(VC4_QPU_ALU_R3, VC4_QPU_WADDR_ACC3); Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); // perform mul first 2 operands { Vc4Register src[2]; this->Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src); { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_FMUL(accum, src[0], src[1]); Vc4Inst.Emit(CurrentStorage); } } Vc4Register _dst; if (dst.GetFlags().packed) { // pack has to be done at mul pipe, so result to r3, // then use mul pipe to move to final dst (with pack). _dst = accum; } else { _dst = dst; } // perform add with 3rd operand. { Vc4Register src[1]; this->Setup_SourceRegisters(Inst, 3, ARRAYSIZE(src), i, src); { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_a_FADD(_dst, accum, src[0]); Vc4Inst.Emit(CurrentStorage); } } // move to destination (with packing). if (dst.GetFlags().packed) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(dst, accum); Vc4Inst.Vc4_m_Pack(dst.GetPack(i)); Vc4Inst.Emit(CurrentStorage); } } aCurrent <<= 1; } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }