void Vc4Shader::Emit_Mov(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER || this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(Inst.m_NumOperands == 2); { for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); Vc4Register src[1]; Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i , src); { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(dst, src[0]); Vc4Inst.Vc4_m_Pack(dst.GetPack(i)); Vc4Inst.Emit(CurrentStorage); } } aCurrent <<= 1; } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }
void Vc4Shader::Emit_with_Mul_pipe(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER || this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(Inst.m_NumOperands == 3); { for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); uint8_t pack = VC4_QPU_PACK_A_32; if (dst.GetFlags().packed) { pack = VC4_QPU_PACK_MUL_8a + i; } Vc4Register src[2]; this->Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src); { Vc4Instruction Vc4Inst; switch (Inst.m_OpCode) { case D3D10_SB_OPCODE_MUL: Vc4Inst.Vc4_m_FMUL(dst, src[0], src[1]); break; default: VC4_ASSERT(false); } Vc4Inst.Vc4_m_Pack(pack); Vc4Inst.Emit(CurrentStorage); } } aCurrent <<= 1; } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }
void Vc4Shader::Emit_Sample(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER); VC4_ASSERT(Inst.m_NumOperands == 4); boolean bUnpack = false; Vc4Register o[4]; VC4_ASSERT(Inst.m_Operands[0].m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT); VC4_ASSERT(Inst.m_Operands[0].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D); VC4_ASSERT(Inst.m_Operands[0].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); VC4_ASSERT(Inst.m_Operands[0].m_ComponentSelection == D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE); VC4_ASSERT(Inst.m_Operands[0].m_WriteMask == (D3D10_SB_OPERAND_4_COMPONENT_MASK_R | D3D10_SB_OPERAND_4_COMPONENT_MASK_G | D3D10_SB_OPERAND_4_COMPONENT_MASK_B | D3D10_SB_OPERAND_4_COMPONENT_MASK_A)); switch (Inst.m_Operands[0].m_Type) { case D3D10_SB_OPERAND_TYPE_OUTPUT: o[0] = Find_Vc4Register_M(Inst.m_Operands[0], (Inst.m_Operands[0].m_WriteMask & D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK)); VC4_ASSERT(o[0].GetFlags().packed); break; case D3D10_SB_OPERAND_TYPE_TEMP: for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { o[i] = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); } aCurrent <<= 1; } bUnpack = true; break; default: VC4_ASSERT(false); } // Resource VC4_ASSERT(Inst.m_Operands[2].m_Type == D3D10_SB_OPERAND_TYPE_RESOURCE); VC4_ASSERT(Inst.m_Operands[2].m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT); VC4_ASSERT(Inst.m_Operands[2].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D); VC4_ASSERT(Inst.m_Operands[2].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); uint32_t resourceIndex = Inst.m_Operands[2].m_Index[0].m_RegIndex; uint32_t texDimension = this->ResourceDimension[resourceIndex]; DXGI_FORMAT texFormat = UmdCompiler->GetShaderResourceFormat((uint8_t)resourceIndex); VC4_ASSERT((texFormat == DXGI_FORMAT_B8G8R8A8_UNORM) || (texFormat == DXGI_FORMAT_R8G8B8A8_UNORM)); // TODO: more generic color channel swizzle support. boolean bSwapColorChannel = (texFormat != DXGI_FORMAT_R8G8B8A8_UNORM); // Texture coordinate VC4_ASSERT(Inst.m_Operands[1].m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT); VC4_ASSERT(Inst.m_Operands[1].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D); VC4_ASSERT(Inst.m_Operands[1].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); VC4_ASSERT(Inst.m_Operands[1].m_ComponentSelection == D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE); Vc4Register s; Vc4Register t; Vc4Register r; switch (texDimension) { case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE: r = Find_Vc4Register_M(Inst.m_Operands[1], D3D10_SB_OPERAND_4_COMPONENT_MASK(Inst.m_Operands[1].m_Swizzle[2])); __fallthrough; case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D: t = Find_Vc4Register_M(Inst.m_Operands[1], D3D10_SB_OPERAND_4_COMPONENT_MASK(Inst.m_Operands[1].m_Swizzle[1])); __fallthrough; case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D: s = Find_Vc4Register_M(Inst.m_Operands[1], D3D10_SB_OPERAND_4_COMPONENT_MASK(Inst.m_Operands[1].m_Swizzle[0])); break; case D3D10_SB_RESOURCE_DIMENSION_BUFFER: case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D: case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS: case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY: case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY: case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: default: assert(false); } // Sampler VC4_ASSERT(Inst.m_Operands[3].m_Type == D3D10_SB_OPERAND_TYPE_SAMPLER); VC4_ASSERT(Inst.m_Operands[3].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D); VC4_ASSERT(Inst.m_Operands[3].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); uint32_t samplerIndex = Inst.m_Operands[3].m_Index[0].m_RegIndex; // texture address : z if (r.GetFlags().valid) { Vc4Instruction Vc4Inst; Vc4Register tmu0_r(VC4_QPU_ALU_REG_A, VC4_QPU_WADDR_TMU0_R); Vc4Inst.Vc4_a_MOV(tmu0_r, r); Vc4Inst.Emit(CurrentStorage); } // texture address : y if (t.GetFlags().valid) { Vc4Instruction Vc4Inst; Vc4Register tmu0_t(VC4_QPU_ALU_REG_A, VC4_QPU_WADDR_TMU0_T); Vc4Inst.Vc4_a_MOV(tmu0_t, t); Vc4Inst.Emit(CurrentStorage); } // texture address : x and must write 's' at last. assert(s.GetFlags().valid); { Vc4Instruction Vc4Inst; Vc4Register tmu0_s(VC4_QPU_ALU_REG_A, VC4_QPU_WADDR_TMU0_S); Vc4Inst.Vc4_a_MOV(tmu0_s, s); Vc4Inst.Emit(CurrentStorage); } // add uniform references. { { VC4_UNIFORM_FORMAT u; u.Type = VC4_UNIFORM_TYPE_SAMPLER_CONFIG_P0; u.samplerConfiguration.samplerIndex = samplerIndex; u.samplerConfiguration.resourceIndex = resourceIndex; this->AddUniformReference(u); } { VC4_UNIFORM_FORMAT u; u.Type = VC4_UNIFORM_TYPE_SAMPLER_CONFIG_P1; u.samplerConfiguration.samplerIndex = samplerIndex; u.samplerConfiguration.resourceIndex = resourceIndex; this->AddUniformReference(u); } if (r.GetFlags().valid) // only cube needs P2 config. { VC4_UNIFORM_FORMAT u; u.Type = VC4_UNIFORM_TYPE_SAMPLER_CONFIG_P2; u.samplerConfiguration.samplerIndex = samplerIndex; u.samplerConfiguration.resourceIndex = resourceIndex; this->AddUniformReference(u); } } // Sample texture, result come up in r4. { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_Sig(VC4_QPU_SIG_LOAD_TMU0); Vc4Inst.Emit(CurrentStorage); } // Sample result is now at r4. Vc4Register r4(VC4_QPU_ALU_R4); // Move result at r4 to output register. if (Inst.m_Operands[0].m_Type == D3D10_SB_OPERAND_TYPE_OUTPUT) { if (bSwapColorChannel == o[0].GetFlags().swap_color_channel) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_a_MOV(o[0], r4); Vc4Inst.Emit(CurrentStorage); } else { // R, G, B channel for (uint8_t i = 0; i < 3; i++) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(o[0], r4); Vc4Inst.Vc4_m_Pack(VC4_QPU_PACK_MUL_8c - i); Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8a + i, true); // Use R4 unpack. Vc4Inst.Emit(CurrentStorage); } // A channel { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(o[0], r4); Vc4Inst.Vc4_m_Pack(VC4_QPU_PACK_MUL_8d); Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8d, true); // Use R4 unpack. Vc4Inst.Emit(CurrentStorage); } } } else { // Move each color channel at r4 to o[i]. // R, G, B channel for (uint8_t i = 0; i < 3; i++) { Vc4Register out = bSwapColorChannel ? o[2 - i] : o[i]; if (out.GetFlags().valid) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(out, r4); Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8a + i, true); // Use R4 unpack. Vc4Inst.Emit(CurrentStorage); } } // A channel if (o[3].GetFlags().valid) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(o[3], r4); Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8d, true); // Use R4 unpack. Vc4Inst.Emit(CurrentStorage); } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }
void Vc4Shader::Emit_with_Add_pipe(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER || this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(Inst.m_NumOperands == 3); { for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); Vc4Register src[2]; this->Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src); Vc4Register _dst; if (dst.GetFlags().packed) { // pack has to be done at mul pipe, so result to r3, // then use mul pipe to move to final dst (with pack). Vc4Register r3(VC4_QPU_ALU_R3, VC4_QPU_WADDR_ACC3); _dst = r3; } else { _dst = dst; } { Vc4Instruction Vc4Inst; switch (Inst.m_OpCode) { case D3D10_SB_OPCODE_ADD: Vc4Inst.Vc4_a_FADD(_dst, src[0], src[1]); break; case D3D10_SB_OPCODE_MAX: Vc4Inst.Vc4_a_FMAX(_dst, src[0], src[1]); break; case D3D10_SB_OPCODE_MIN: Vc4Inst.Vc4_a_FMIN(_dst, src[0], src[1]); break; case D3D10_SB_OPCODE_IADD: Vc4Inst.Vc4_a_IADD(_dst, src[0], src[1]); break; default: VC4_ASSERT(false); } Vc4Inst.Emit(CurrentStorage); } if (dst.GetFlags().packed) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(dst, _dst); Vc4Inst.Vc4_m_Pack(dst.GetPack(i)); Vc4Inst.Emit(CurrentStorage); } } aCurrent <<= 1; } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }
void Vc4Shader::Emit_DPx(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER || this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(Inst.m_NumOperands == 3); { // DP2 loop 2 times. // DP3 loop 3 times. // DP4 loop 4 times. uint8_t c = (uint8_t)(Inst.m_OpCode - 13); // where to accumulate result of mul. Vc4Register accum(VC4_QPU_ALU_R3, VC4_QPU_WADDR_ACC3); { Vc4Register zero(VC4_QPU_ALU_REG_B, 0); // 0 as small immediate in raddr_b Vc4Instruction Vc4Inst(vc4_alu_small_immediate); Vc4Inst.Vc4_m_MOV(accum, zero); Vc4Inst.Emit(CurrentStorage); } for(uint8_t i = 0; i < c; i++) { Vc4Register temp(VC4_QPU_ALU_R1, VC4_QPU_WADDR_ACC1); Vc4Register src[2]; Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src); { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_FMUL(temp, src[0], src[1]); if (i > 0) { Vc4Inst.Vc4_a_FADD(accum, accum, temp); } Vc4Inst.Emit(CurrentStorage); } if (i+1 == c) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_a_FADD(accum, accum, temp); Vc4Inst.Emit(CurrentStorage); } } // replicate ouput where specified. for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(dst, accum); Vc4Inst.Vc4_m_Pack(dst.GetPack(i)); Vc4Inst.Emit(CurrentStorage); } aCurrent <<= 1; } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }
void Vc4Shader::Emit_Mad(CInstruction &Inst) { assert(this->uShaderType == D3D10_SB_PIXEL_SHADER || this->uShaderType == D3D10_SB_VERTEX_SHADER); VC4_ASSERT(Inst.m_NumOperands == 4); { for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++) { if (Inst.m_Operands[0].m_WriteMask & aCurrent) { Vc4Register accum(VC4_QPU_ALU_R3, VC4_QPU_WADDR_ACC3); Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent); // perform mul first 2 operands { Vc4Register src[2]; this->Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src); { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_FMUL(accum, src[0], src[1]); Vc4Inst.Emit(CurrentStorage); } } Vc4Register _dst; if (dst.GetFlags().packed) { // pack has to be done at mul pipe, so result to r3, // then use mul pipe to move to final dst (with pack). _dst = accum; } else { _dst = dst; } // perform add with 3rd operand. { Vc4Register src[1]; this->Setup_SourceRegisters(Inst, 3, ARRAYSIZE(src), i, src); { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_a_FADD(_dst, accum, src[0]); Vc4Inst.Emit(CurrentStorage); } } // move to destination (with packing). if (dst.GetFlags().packed) { Vc4Instruction Vc4Inst; Vc4Inst.Vc4_m_MOV(dst, accum); Vc4Inst.Vc4_m_Pack(dst.GetPack(i)); Vc4Inst.Emit(CurrentStorage); } } aCurrent <<= 1; } } { // Emit a NOP Vc4Instruction Vc4Inst; Vc4Inst.Emit(CurrentStorage); } }