C++ (Cpp) Find_Vc4Register_M 예제들

예제 #1

0

파일 보기

파일: Vc4Shader.cpp 프로젝트: Microsoft/graphics-driver-samples

void Vc4Shader::Emit_Mov(CInstruction &Inst)
{
    assert(this->uShaderType == D3D10_SB_PIXEL_SHADER ||
           this->uShaderType == D3D10_SB_VERTEX_SHADER);

    VC4_ASSERT(Inst.m_NumOperands == 2);

    {
        for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++)
        {
            if (Inst.m_Operands[0].m_WriteMask & aCurrent)
            {
                Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent);
                Vc4Register src[1];
                Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i , src);

                {
                    Vc4Instruction Vc4Inst;
                    Vc4Inst.Vc4_m_MOV(dst, src[0]);
                    Vc4Inst.Vc4_m_Pack(dst.GetPack(i));
                    Vc4Inst.Emit(CurrentStorage);
                }
            }

            aCurrent <<= 1;
        }
    }

    { // Emit a NOP
        Vc4Instruction Vc4Inst;
        Vc4Inst.Emit(CurrentStorage);
    }
}

예제 #2

0

파일 보기

파일: Vc4Shader.cpp 프로젝트: purplemunster/graphics-driver-samples

void Vc4Shader::Emit_with_Mul_pipe(CInstruction &Inst)
{
    assert(this->uShaderType == D3D10_SB_PIXEL_SHADER ||
           this->uShaderType == D3D10_SB_VERTEX_SHADER);
    
    VC4_ASSERT(Inst.m_NumOperands == 3);

    {
        for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++)
        {
            if (Inst.m_Operands[0].m_WriteMask & aCurrent)
            {
                Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent);
                uint8_t pack = VC4_QPU_PACK_A_32;
                if (dst.GetFlags().packed)
                {
                    pack = VC4_QPU_PACK_MUL_8a + i;
                }

                Vc4Register src[2];
                this->Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src);

                {
                    Vc4Instruction Vc4Inst;
                    switch (Inst.m_OpCode)
                    {
                    case D3D10_SB_OPCODE_MUL:
                        Vc4Inst.Vc4_m_FMUL(dst, src[0], src[1]);
                        break;
                    default:
                        VC4_ASSERT(false);
                    }
                    Vc4Inst.Vc4_m_Pack(pack);
                    Vc4Inst.Emit(CurrentStorage);
                }
            }

            aCurrent <<= 1;
        }
    }

    { // Emit a NOP
        Vc4Instruction Vc4Inst;
        Vc4Inst.Emit(CurrentStorage);
    }
}

예제 #3

0

파일 보기

파일: Vc4Shader.cpp 프로젝트: Microsoft/graphics-driver-samples

void Vc4Shader::Emit_Sample(CInstruction &Inst)
{
    assert(this->uShaderType == D3D10_SB_PIXEL_SHADER);
    
    VC4_ASSERT(Inst.m_NumOperands == 4);

    boolean bUnpack = false;

    Vc4Register o[4];

    VC4_ASSERT(Inst.m_Operands[0].m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT);
    VC4_ASSERT(Inst.m_Operands[0].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D);
    VC4_ASSERT(Inst.m_Operands[0].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
    VC4_ASSERT(Inst.m_Operands[0].m_ComponentSelection == D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE);
    VC4_ASSERT(Inst.m_Operands[0].m_WriteMask == (D3D10_SB_OPERAND_4_COMPONENT_MASK_R | D3D10_SB_OPERAND_4_COMPONENT_MASK_G | D3D10_SB_OPERAND_4_COMPONENT_MASK_B | D3D10_SB_OPERAND_4_COMPONENT_MASK_A));
    switch (Inst.m_Operands[0].m_Type)
    {
    case D3D10_SB_OPERAND_TYPE_OUTPUT:
        o[0] = Find_Vc4Register_M(Inst.m_Operands[0], (Inst.m_Operands[0].m_WriteMask & D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK));
        VC4_ASSERT(o[0].GetFlags().packed);
        break;
    case D3D10_SB_OPERAND_TYPE_TEMP:
        for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++)
        {
            if (Inst.m_Operands[0].m_WriteMask & aCurrent)
            {
                o[i] = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent);
            }
            aCurrent <<= 1;
        }
        bUnpack = true;
        break;
    default:
        VC4_ASSERT(false);
    }

    // Resource
    VC4_ASSERT(Inst.m_Operands[2].m_Type == D3D10_SB_OPERAND_TYPE_RESOURCE);
    VC4_ASSERT(Inst.m_Operands[2].m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT);
    VC4_ASSERT(Inst.m_Operands[2].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D);
    VC4_ASSERT(Inst.m_Operands[2].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
    uint32_t resourceIndex = Inst.m_Operands[2].m_Index[0].m_RegIndex;
    uint32_t texDimension = this->ResourceDimension[resourceIndex];

    DXGI_FORMAT texFormat = UmdCompiler->GetShaderResourceFormat((uint8_t)resourceIndex);
    VC4_ASSERT((texFormat == DXGI_FORMAT_B8G8R8A8_UNORM) || (texFormat == DXGI_FORMAT_R8G8B8A8_UNORM));
        
    // TODO: more generic color channel swizzle support.
    boolean bSwapColorChannel = (texFormat != DXGI_FORMAT_R8G8B8A8_UNORM);
    
    // Texture coordinate
    VC4_ASSERT(Inst.m_Operands[1].m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT);
    VC4_ASSERT(Inst.m_Operands[1].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D);
    VC4_ASSERT(Inst.m_Operands[1].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
    VC4_ASSERT(Inst.m_Operands[1].m_ComponentSelection == D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE);

    Vc4Register s;
    Vc4Register t;
    Vc4Register r;

    switch (texDimension)
    {
    case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE:
        r = Find_Vc4Register_M(Inst.m_Operands[1], D3D10_SB_OPERAND_4_COMPONENT_MASK(Inst.m_Operands[1].m_Swizzle[2]));
        __fallthrough;
    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D:
        t = Find_Vc4Register_M(Inst.m_Operands[1], D3D10_SB_OPERAND_4_COMPONENT_MASK(Inst.m_Operands[1].m_Swizzle[1]));
        __fallthrough;
    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D:
        s = Find_Vc4Register_M(Inst.m_Operands[1], D3D10_SB_OPERAND_4_COMPONENT_MASK(Inst.m_Operands[1].m_Swizzle[0]));
        break;
    case D3D10_SB_RESOURCE_DIMENSION_BUFFER:
    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D:
    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS:
    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY:
    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY:
    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
    default:
        assert(false);
    }
    
    // Sampler
    VC4_ASSERT(Inst.m_Operands[3].m_Type == D3D10_SB_OPERAND_TYPE_SAMPLER);
    VC4_ASSERT(Inst.m_Operands[3].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D);
    VC4_ASSERT(Inst.m_Operands[3].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
    uint32_t samplerIndex = Inst.m_Operands[3].m_Index[0].m_RegIndex;
               
    // texture address : z
    if (r.GetFlags().valid)
    {
        Vc4Instruction Vc4Inst;
        Vc4Register tmu0_r(VC4_QPU_ALU_REG_A, VC4_QPU_WADDR_TMU0_R);
        Vc4Inst.Vc4_a_MOV(tmu0_r, r);
        Vc4Inst.Emit(CurrentStorage);
    }

    // texture address : y
    if (t.GetFlags().valid)
    {
        Vc4Instruction Vc4Inst;
        Vc4Register tmu0_t(VC4_QPU_ALU_REG_A, VC4_QPU_WADDR_TMU0_T);
        Vc4Inst.Vc4_a_MOV(tmu0_t, t);
        Vc4Inst.Emit(CurrentStorage);
    }

    // texture address : x and must write 's' at last.
    assert(s.GetFlags().valid);
    {
        Vc4Instruction Vc4Inst;
        Vc4Register tmu0_s(VC4_QPU_ALU_REG_A, VC4_QPU_WADDR_TMU0_S);
        Vc4Inst.Vc4_a_MOV(tmu0_s, s);
        Vc4Inst.Emit(CurrentStorage);
    }

    // add uniform references.
    {
        {
            VC4_UNIFORM_FORMAT u;
            u.Type = VC4_UNIFORM_TYPE_SAMPLER_CONFIG_P0;
            u.samplerConfiguration.samplerIndex = samplerIndex;
            u.samplerConfiguration.resourceIndex = resourceIndex;
            this->AddUniformReference(u);
        }

        {
            VC4_UNIFORM_FORMAT u;
            u.Type = VC4_UNIFORM_TYPE_SAMPLER_CONFIG_P1;
            u.samplerConfiguration.samplerIndex = samplerIndex;
            u.samplerConfiguration.resourceIndex = resourceIndex;
            this->AddUniformReference(u);
        }

        if (r.GetFlags().valid) // only cube needs P2 config.
        {
            VC4_UNIFORM_FORMAT u;
            u.Type = VC4_UNIFORM_TYPE_SAMPLER_CONFIG_P2;
            u.samplerConfiguration.samplerIndex = samplerIndex;
            u.samplerConfiguration.resourceIndex = resourceIndex;
            this->AddUniformReference(u);
        }
    }

    // Sample texture, result come up in r4.
    {
        Vc4Instruction Vc4Inst;
        Vc4Inst.Vc4_Sig(VC4_QPU_SIG_LOAD_TMU0);
        Vc4Inst.Emit(CurrentStorage);
    }

    // Sample result is now at r4.
    Vc4Register r4(VC4_QPU_ALU_R4);

    // Move result at r4 to output register.
    if (Inst.m_Operands[0].m_Type == D3D10_SB_OPERAND_TYPE_OUTPUT)
    {
        if (bSwapColorChannel == o[0].GetFlags().swap_color_channel)
        {
            Vc4Instruction Vc4Inst;
            Vc4Inst.Vc4_a_MOV(o[0], r4);
            Vc4Inst.Emit(CurrentStorage);
        }
        else
        {
            // R, G, B channel
            for (uint8_t i = 0; i < 3; i++)
            {
                Vc4Instruction Vc4Inst;
                Vc4Inst.Vc4_m_MOV(o[0], r4);
                Vc4Inst.Vc4_m_Pack(VC4_QPU_PACK_MUL_8c - i);
                Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8a + i, true); // Use R4 unpack.
                Vc4Inst.Emit(CurrentStorage);
            }

            // A channel
            {
                Vc4Instruction Vc4Inst;
                Vc4Inst.Vc4_m_MOV(o[0], r4);
                Vc4Inst.Vc4_m_Pack(VC4_QPU_PACK_MUL_8d);
                Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8d, true); // Use R4 unpack.
                Vc4Inst.Emit(CurrentStorage);
            }
        }
    }
    else
    {
        // Move each color channel at r4 to o[i].
        // R, G, B channel
        for (uint8_t i = 0; i < 3; i++)
        {
            Vc4Register out = bSwapColorChannel ? o[2 - i] : o[i];
            if (out.GetFlags().valid)
            {
                Vc4Instruction Vc4Inst;
                Vc4Inst.Vc4_m_MOV(out, r4);
                Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8a + i, true); // Use R4 unpack.
                Vc4Inst.Emit(CurrentStorage);
            }
        }

        // A channel
        if (o[3].GetFlags().valid)
        {
            Vc4Instruction Vc4Inst;
            Vc4Inst.Vc4_m_MOV(o[3], r4);
            Vc4Inst.Vc4_m_Unpack(VC4_QPU_UNPACK_8d, true); // Use R4 unpack.
            Vc4Inst.Emit(CurrentStorage);
        }
    }

    { // Emit a NOP
        Vc4Instruction Vc4Inst;
        Vc4Inst.Emit(CurrentStorage);
    }
}

예제 #4

0

파일 보기

파일: Vc4Shader.cpp 프로젝트: Microsoft/graphics-driver-samples

void Vc4Shader::Emit_with_Add_pipe(CInstruction &Inst)
{
    assert(this->uShaderType == D3D10_SB_PIXEL_SHADER ||
           this->uShaderType == D3D10_SB_VERTEX_SHADER);

    VC4_ASSERT(Inst.m_NumOperands == 3);

    {
        for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++)
        {
            if (Inst.m_Operands[0].m_WriteMask & aCurrent)
            {
                Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent);

                Vc4Register src[2];
                this->Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src);

                Vc4Register _dst;
                if (dst.GetFlags().packed)
                {
                    // pack has to be done at mul pipe, so result to r3, 
                    // then use mul pipe to move to final dst (with pack).
                    Vc4Register r3(VC4_QPU_ALU_R3, VC4_QPU_WADDR_ACC3);
                    _dst = r3;
                }
                else
                {
                    _dst = dst;
                }
                
                {
                    Vc4Instruction Vc4Inst;
                    switch (Inst.m_OpCode)
                    {
                    case D3D10_SB_OPCODE_ADD:
                        Vc4Inst.Vc4_a_FADD(_dst, src[0], src[1]);
                        break;
                    case D3D10_SB_OPCODE_MAX:
                        Vc4Inst.Vc4_a_FMAX(_dst, src[0], src[1]);
                        break;
                    case D3D10_SB_OPCODE_MIN:
                        Vc4Inst.Vc4_a_FMIN(_dst, src[0], src[1]);
                        break;
                    case D3D10_SB_OPCODE_IADD:
                        Vc4Inst.Vc4_a_IADD(_dst, src[0], src[1]);
                        break;
                    default:
                        VC4_ASSERT(false);
                    }
                    Vc4Inst.Emit(CurrentStorage);
                }

                if (dst.GetFlags().packed)
                {
                    Vc4Instruction Vc4Inst;
                    Vc4Inst.Vc4_m_MOV(dst, _dst);
                    Vc4Inst.Vc4_m_Pack(dst.GetPack(i));
                    Vc4Inst.Emit(CurrentStorage);
                }
            }

            aCurrent <<= 1;
        }
    }

    { // Emit a NOP
        Vc4Instruction Vc4Inst;
        Vc4Inst.Emit(CurrentStorage);
    }
}

예제 #5

0

파일 보기

파일: Vc4Shader.cpp 프로젝트: Microsoft/graphics-driver-samples

void Vc4Shader::Emit_DPx(CInstruction &Inst)
{
    assert(this->uShaderType == D3D10_SB_PIXEL_SHADER ||
           this->uShaderType == D3D10_SB_VERTEX_SHADER); 

    VC4_ASSERT(Inst.m_NumOperands == 3);

    {
        // DP2 loop 2 times.
        // DP3 loop 3 times.
        // DP4 loop 4 times.
        uint8_t c = (uint8_t)(Inst.m_OpCode - 13);

        // where to accumulate result of mul.
        Vc4Register accum(VC4_QPU_ALU_R3, VC4_QPU_WADDR_ACC3);

        {
            Vc4Register zero(VC4_QPU_ALU_REG_B, 0); // 0 as small immediate in raddr_b
            Vc4Instruction Vc4Inst(vc4_alu_small_immediate);
            Vc4Inst.Vc4_m_MOV(accum, zero);
            Vc4Inst.Emit(CurrentStorage);
        }
           
        for(uint8_t i = 0; i < c; i++)
        {
            Vc4Register temp(VC4_QPU_ALU_R1, VC4_QPU_WADDR_ACC1);
            Vc4Register src[2];
            Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src);

            {
                Vc4Instruction Vc4Inst;
                Vc4Inst.Vc4_m_FMUL(temp, src[0], src[1]);
                if (i > 0)
                {
                    Vc4Inst.Vc4_a_FADD(accum, accum, temp);
                }
                Vc4Inst.Emit(CurrentStorage);
            }

            if (i+1 == c)
            {
                Vc4Instruction Vc4Inst;
                Vc4Inst.Vc4_a_FADD(accum, accum, temp);
                Vc4Inst.Emit(CurrentStorage);
            }
        }

        // replicate ouput where specified.
        for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++)
        {
            if (Inst.m_Operands[0].m_WriteMask & aCurrent)
            {
                Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent);
                Vc4Instruction Vc4Inst;
                Vc4Inst.Vc4_m_MOV(dst, accum);
                Vc4Inst.Vc4_m_Pack(dst.GetPack(i));
                Vc4Inst.Emit(CurrentStorage);
            }
     
            aCurrent <<= 1;
        }
    }

    { // Emit a NOP
        Vc4Instruction Vc4Inst;
        Vc4Inst.Emit(CurrentStorage);
    }
}

예제 #6

0

파일 보기

파일: Vc4Shader.cpp 프로젝트: Microsoft/graphics-driver-samples

void Vc4Shader::Emit_Mad(CInstruction &Inst)
{
    assert(this->uShaderType == D3D10_SB_PIXEL_SHADER ||
           this->uShaderType == D3D10_SB_VERTEX_SHADER);

    VC4_ASSERT(Inst.m_NumOperands == 4);

    {
        for (uint8_t i = 0, aCurrent = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; i < 4; i++)
        {
            if (Inst.m_Operands[0].m_WriteMask & aCurrent)
            {
                Vc4Register accum(VC4_QPU_ALU_R3, VC4_QPU_WADDR_ACC3);
                Vc4Register dst = Find_Vc4Register_M(Inst.m_Operands[0], aCurrent);

                // perform mul first 2 operands
                {
                    Vc4Register src[2];
                    this->Setup_SourceRegisters(Inst, 1, ARRAYSIZE(src), i, src);

                    {
                        Vc4Instruction Vc4Inst;
                        Vc4Inst.Vc4_m_FMUL(accum, src[0], src[1]);
                        Vc4Inst.Emit(CurrentStorage);
                    }
                }

                Vc4Register _dst;
                if (dst.GetFlags().packed)
                {
                    // pack has to be done at mul pipe, so result to r3, 
                    // then use mul pipe to move to final dst (with pack).
                    _dst = accum;
                }
                else
                {
                    _dst = dst;
                }

                // perform add with 3rd operand.
                {
                    Vc4Register src[1];
                    this->Setup_SourceRegisters(Inst, 3, ARRAYSIZE(src), i, src);

                    {
                        Vc4Instruction Vc4Inst;
                        Vc4Inst.Vc4_a_FADD(_dst, accum, src[0]);
                        Vc4Inst.Emit(CurrentStorage);
                    }
                }

                // move to destination (with packing).
                if (dst.GetFlags().packed)
                {
                    Vc4Instruction Vc4Inst;
                    Vc4Inst.Vc4_m_MOV(dst, accum);
                    Vc4Inst.Vc4_m_Pack(dst.GetPack(i));
                    Vc4Inst.Emit(CurrentStorage);
                }
            }

            aCurrent <<= 1;
        }
    }

    { // Emit a NOP
        Vc4Instruction Vc4Inst;
        Vc4Inst.Emit(CurrentStorage);
    }
}