Ejemplo n.º 1
0
void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic            intrinsic,
                                              regNumber                 nonConstImmReg,
                                              regNumber                 baseReg,
                                              regNumber                 offsReg,
                                              HWIntrinsicSwitchCaseBody emitSwCase)
{
    assert(nonConstImmReg != REG_NA);
    emitter* emit = getEmitter();

    const unsigned maxByte = (unsigned)Compiler::immUpperBoundOfHWIntrinsic(intrinsic) + 1;
    assert(maxByte <= 256);
    BasicBlock* jmpTable[256];

    unsigned jmpTableBase = emit->emitBBTableDataGenBeg(maxByte, true);
    unsigned jmpTableOffs = 0;

    // Emit the jump table
    for (unsigned i = 0; i < maxByte; i++)
    {
        jmpTable[i] = genCreateTempLabel();
        emit->emitDataGenData(i, jmpTable[i]);
    }

    emit->emitDataGenEnd();

    // Compute and jump to the appropriate offset in the switch table
    emit->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), offsReg, compiler->eeFindJitDataOffs(jmpTableBase), 0);

    emit->emitIns_R_ARX(INS_mov, EA_4BYTE, offsReg, offsReg, nonConstImmReg, 4, 0);
    emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, baseReg);
    emit->emitIns_R_R(INS_add, EA_PTRSIZE, offsReg, baseReg);
    emit->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), offsReg);

    // Emit the switch table entries

    BasicBlock* switchTableBeg = genCreateTempLabel();
    BasicBlock* switchTableEnd = genCreateTempLabel();

    genDefineTempLabel(switchTableBeg);

    for (unsigned i = 0; i < maxByte; i++)
    {
        genDefineTempLabel(jmpTable[i]);
        emitSwCase(i);
        emit->emitIns_J(INS_jmp, switchTableEnd);
    }

    genDefineTempLabel(switchTableEnd);
}
Ejemplo n.º 2
0
//------------------------------------------------------------------------
// genSSE42Intrinsic: Generates the code for an SSE4.2 hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    GenTree*       op1         = node->gtGetOp1();
    GenTree*       op2         = node->gtGetOp2();
    regNumber      targetReg   = node->gtRegNum;
    assert(targetReg != REG_NA);
    var_types targetType = node->TypeGet();
    var_types baseType   = node->gtSIMDBaseType;

    regNumber op1Reg = op1->gtRegNum;
    regNumber op2Reg = op2->gtRegNum;
    genConsumeOperands(node);

    switch (intrinsicID)
    {
        case NI_SSE42_Crc32:
            if (op1Reg != targetReg)
            {
                inst_RV_RV(INS_mov, targetReg, op1Reg, targetType, emitTypeSize(targetType));
            }

            if (baseType == TYP_UBYTE || baseType == TYP_USHORT) // baseType is the type of the second argument
            {
                assert(targetType == TYP_INT);
                inst_RV_RV(INS_crc32, targetReg, op2Reg, baseType, emitTypeSize(baseType));
            }
            else
            {
                assert(op1->TypeGet() == op2->TypeGet());
                assert(targetType == TYP_INT || targetType == TYP_LONG);
                inst_RV_RV(INS_crc32, targetReg, op2Reg, targetType, emitTypeSize(targetType));
            }

            break;
        default:
            unreached();
            break;
    }
    genProduceReg(node);
}
Ejemplo n.º 3
0
//------------------------------------------------------------------------
// genPOPCNTIntrinsic: Generates the code for a POPCNT hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genPOPCNTIntrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    GenTree*       op1         = node->gtGetOp1();
    regNumber      targetReg   = node->gtRegNum;
    assert(targetReg != REG_NA);
    var_types targetType = node->TypeGet();
    regNumber op1Reg     = op1->gtRegNum;
    genConsumeOperands(node);

    assert(intrinsicID == NI_POPCNT_PopCount);

    inst_RV_RV(INS_popcnt, targetReg, op1Reg, targetType, emitTypeSize(targetType));

    genProduceReg(node);
}
Ejemplo n.º 4
0
//------------------------------------------------------------------------
// TreeNodeInfoInitIndir: Specify register requirements for address expression
//                       of an indirection operation.
//
// Arguments:
//    indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node
//
void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
{
    ContainCheckIndir(indirTree);

    // If this is the rhs of a block copy (i.e. non-enregisterable struct),
    // it has no register requirements.
    if (indirTree->TypeGet() == TYP_STRUCT)
    {
        return;
    }

    TreeNodeInfo* info    = &(indirTree->gtLsraInfo);
    bool          isStore = (indirTree->gtOper == GT_STOREIND);
    info->srcCount        = GetIndirSourceCount(indirTree);

    GenTree* addr  = indirTree->Addr();
    GenTree* index = nullptr;
    unsigned cns   = 0;

#ifdef _TARGET_ARM_
    // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
    if (indirTree->gtFlags & GTF_IND_UNALIGNED)
    {
        var_types type = TYP_UNDEF;
        if (indirTree->OperGet() == GT_STOREIND)
        {
            type = indirTree->AsStoreInd()->Data()->TypeGet();
        }
        else if (indirTree->OperGet() == GT_IND)
        {
            type = indirTree->TypeGet();
        }

        if (type == TYP_FLOAT)
        {
            info->internalIntCount = 1;
        }
        else if (type == TYP_DOUBLE)
        {
            info->internalIntCount = 2;
        }
    }
#endif

    if (addr->isContained())
    {
        assert(addr->OperGet() == GT_LEA);
        GenTreeAddrMode* lea = addr->AsAddrMode();
        index                = lea->Index();
        cns                  = lea->gtOffset;

        // On ARM we may need a single internal register
        // (when both conditions are true then we still only need a single internal register)
        if ((index != nullptr) && (cns != 0))
        {
            // ARM does not support both Index and offset so we need an internal register
            info->internalIntCount++;
        }
        else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
        {
            // This offset can't be contained in the ldr/str instruction, so we need an internal register
            info->internalIntCount++;
        }
    }
}
Ejemplo n.º 5
0
//------------------------------------------------------------------------
// TreeNodeInfoInitIndir: Specify register requirements for address expression
//                       of an indirection operation.
//
// Arguments:
//    indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node
//
void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree)
{
    assert(indirTree->OperIsIndir());
    // If this is the rhs of a block copy (i.e. non-enregisterable struct),
    // it has no register requirements.
    if (indirTree->TypeGet() == TYP_STRUCT)
    {
        return;
    }

    GenTreePtr    addr = indirTree->gtGetOp1();
    TreeNodeInfo* info = &(indirTree->gtLsraInfo);

    GenTreePtr base  = nullptr;
    GenTreePtr index = nullptr;
    unsigned   cns   = 0;
    unsigned   mul;
    bool       rev;
    bool       modifiedSources = false;
    bool       makeContained   = true;

    if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
    {
        GenTreeAddrMode* lea = addr->AsAddrMode();
        base                 = lea->Base();
        index                = lea->Index();
        cns                  = lea->gtOffset;

#ifdef _TARGET_ARM_
        // ARM floating-point load/store doesn't support a form similar to integer
        // ldr Rdst, [Rbase + Roffset] with offset in a register. The only supported
        // form is vldr Rdst, [Rbase + imm] with a more limited constraint on the imm.
        if (lea->HasIndex() || !emitter::emitIns_valid_imm_for_vldst_offset(cns))
        {
            if (indirTree->OperGet() == GT_STOREIND)
            {
                if (varTypeIsFloating(indirTree->AsStoreInd()->Data()))
                {
                    makeContained = false;
                }
            }
            else if (indirTree->OperGet() == GT_IND)
            {
                if (varTypeIsFloating(indirTree))
                {
                    makeContained = false;
                }
            }
        }
#endif

        if (makeContained)
        {
            m_lsra->clearOperandCounts(addr);
            addr->SetContained();
            // The srcCount is decremented because addr is now "contained",
            // then we account for the base and index below, if they are non-null.
            info->srcCount--;
        }
    }
    else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
             !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
    {
        // An addressing mode will be constructed that may cause some
        // nodes to not need a register, and cause others' lifetimes to be extended
        // to the GT_IND or even its parent if it's an assignment

        assert(base != addr);
        m_lsra->clearOperandCounts(addr);
        addr->SetContained();

        // Traverse the computation below GT_IND to find the operands
        // for the addressing mode, marking the various constants and
        // intermediate results as not consuming/producing.
        // If the traversal were more complex, we might consider using
        // a traversal function, but the addressing mode is only made
        // up of simple arithmetic operators, and the code generator
        // only traverses one leg of each node.

        bool       foundBase  = (base == nullptr);
        bool       foundIndex = (index == nullptr);
        GenTreePtr nextChild  = nullptr;
        for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
        {
            nextChild      = nullptr;
            GenTreePtr op1 = child->gtOp.gtOp1;
            GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;

            if (op1 == base)
            {
                foundBase = true;
            }
            else if (op1 == index)
            {
                foundIndex = true;
            }
            else
            {
                m_lsra->clearOperandCounts(op1);
                op1->SetContained();
                if (!op1->OperIsLeaf())
                {
                    nextChild = op1;
                }
            }

            if (op2 != nullptr)
            {
                if (op2 == base)
                {
                    foundBase = true;
                }
                else if (op2 == index)
                {
                    foundIndex = true;
                }
                else
                {
                    m_lsra->clearOperandCounts(op2);
                    op2->SetContained();
                    if (!op2->OperIsLeaf())
                    {
                        assert(nextChild == nullptr);
                        nextChild = op2;
                    }
                }
            }
        }
        assert(foundBase && foundIndex);
        info->srcCount--; // it gets incremented below.
    }
    else if (addr->gtOper == GT_ARR_ELEM)
    {
        // The GT_ARR_ELEM consumes all the indices and produces the offset.
        // The array object lives until the mem access.
        // We also consume the target register to which the address is
        // computed

        info->srcCount++;
        assert(addr->gtLsraInfo.srcCount >= 2);
        addr->gtLsraInfo.srcCount -= 1;
    }
    else
    {
        // it is nothing but a plain indir
        info->srcCount--; // base gets added in below
        base = addr;
    }

    if (!makeContained)
    {
        return;
    }

    if (base != nullptr)
    {
        info->srcCount++;
    }
    if (index != nullptr && !modifiedSources)
    {
        info->srcCount++;
    }

    // On ARM we may need a single internal register
    // (when both conditions are true then we still only need a single internal register)
    if ((index != nullptr) && (cns != 0))
    {
        // ARM does not support both Index and offset so we need an internal register
        info->internalIntCount = 1;
    }
    else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
    {
        // This offset can't be contained in the ldr/str instruction, so we need an internal register
        info->internalIntCount = 1;
    }
}
Ejemplo n.º 6
0
void CodeGen::genLoadFloat(GenTreePtr tree, regNumber reg)
{
    if (tree->IsRegVar())
    {
        // if it has been spilled, unspill it.%
        LclVarDsc * varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
        if (varDsc->lvSpilled)
        {
            UnspillFloat(varDsc);
        }

        inst_RV_RV(ins_FloatCopy(tree->TypeGet()), reg, tree->gtRegNum, tree->TypeGet());
    }
    else
    {
        bool unalignedLoad = false;
        switch (tree->OperGet())
        {
        case GT_IND:
        case GT_CLS_VAR:
            if  (tree->gtFlags & GTF_IND_UNALIGNED)
                unalignedLoad = true;
            break;
        case GT_LCL_FLD:
            // Check for a misalignment on a Floating Point field
            //
            if (varTypeIsFloating(tree->TypeGet()))
            {                
                if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
                {
                    unalignedLoad = true;
                }
            }
            break;
        default:
            break;
        }

        if (unalignedLoad)
        {
            // Make the target addressable
            //
            regMaskTP   addrReg = genMakeAddressable(tree, 0, RegSet::KEEP_REG, true);
            regSet.rsLockUsedReg(addrReg);  // Must prevent regSet.rsGrabReg from choosing an addrReg

            var_types loadType = tree->TypeGet();
            assert(loadType == TYP_DOUBLE || loadType == TYP_FLOAT);

            // Unaligned Floating-Point Loads must be loaded into integer register(s)
            // and then moved over to the Floating-Point register
            regNumber  intRegLo    = regSet.rsGrabReg(RBM_ALLINT);
            regNumber  intRegHi    = REG_NA;
            regMaskTP  tmpLockMask = genRegMask(intRegLo);

            if (loadType == TYP_DOUBLE)
            {
                intRegHi     = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo));
                tmpLockMask |= genRegMask(intRegHi);
            }
            
            regSet.rsLockReg(tmpLockMask);     // Temporarily lock the intRegs 
            tree->gtType = TYP_INT;     // Temporarily change the type to TYP_INT

            inst_RV_TT(ins_Load(TYP_INT), intRegLo, tree);
            regTracker.rsTrackRegTrash(intRegLo);

            if (loadType == TYP_DOUBLE)
            {
                inst_RV_TT(ins_Load(TYP_INT), intRegHi, tree, 4);
                regTracker.rsTrackRegTrash(intRegHi);
            }

            tree->gtType = loadType;    // Change the type back to the floating point type
            regSet.rsUnlockReg(tmpLockMask);   // Unlock the intRegs

            // move the integer register(s) over to the FP register
            //
            if  (loadType == TYP_DOUBLE)
                getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, reg, intRegLo, intRegHi);
            else 
                getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, reg, intRegLo);

            // Free up anything that was tied up by genMakeAddressable
            //
            regSet.rsUnlockUsedReg(addrReg);
            genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
        }
        else
        {
            inst_RV_TT(ins_FloatLoad(tree->TypeGet()), reg, tree);
        }
        if (((tree->OperGet() == GT_CLS_VAR) || (tree->OperGet() == GT_IND)) && 
            (tree->gtFlags & GTF_IND_VOLATILE))
        {
            // Emit a memory barrier instruction after the load 
            instGen_MemoryBarrier();
        }
    }
}
Ejemplo n.º 7
0
void CodeGen::genFloatAssign(GenTree *tree)
{
    var_types   type       = tree->TypeGet();
    GenTreePtr  op1        = tree->gtGetOp1();
    GenTreePtr  op2        = tree->gtGetOp2();

    regMaskTP   needRegOp1 = RBM_ALLINT;
    regMaskTP   addrReg    = RBM_NONE;
    bool        volat      = false;        // Is this a volatile store
    bool        unaligned  = false;        // Is this an unaligned store
    regNumber   op2reg     = REG_NA;

#ifdef DEBUGGING_SUPPORT
    unsigned    lclVarNum  = compiler->lvaCount;
    unsigned    lclILoffs  = DUMMY_INIT(0);
#endif

    noway_assert(tree->OperGet() == GT_ASG);

    // Is the target a floating-point local variable?
    //  possibly even an enregistered floating-point local variable?
    //
    switch (op1->gtOper)
    {
        unsigned        varNum;
        LclVarDsc   *   varDsc;

    case GT_LCL_FLD:
        // Check for a misalignment on a Floating Point field
        //
        if (varTypeIsFloating(op1->TypeGet()))
        {                
            if ((op1->gtLclFld.gtLclOffs % emitTypeSize(op1->TypeGet())) != 0)
            {
                unaligned = true;
            }
        }
        break;

    case GT_LCL_VAR:
        varNum = op1->gtLclVarCommon.gtLclNum;
        noway_assert(varNum < compiler->lvaCount);
        varDsc = compiler->lvaTable + varNum;

 #ifdef DEBUGGING_SUPPORT
        // For non-debuggable code, every definition of a lcl-var has
        // to be checked to see if we need to open a new scope for it.
        // Remember the local var info to call siCheckVarScope
        // AFTER code generation of the assignment.
        //
        if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
        {
            lclVarNum = varNum;
            lclILoffs = op1->gtLclVar.gtLclILoffs;
        }
 #endif

        // Dead Store assert (with min opts we may have dead stores)
        //
        noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() ||  !(op1->gtFlags & GTF_VAR_DEATH));

        // Does this variable live in a register?
        //
        if (genMarkLclVar(op1))
        {
             noway_assert(!compiler->opts.compDbgCode);   // We don't enregister any floats with debug codegen

            // Get hold of the target register
            //
            regNumber op1Reg = op1->gtRegVar.gtRegNum;

            // the variable being assigned should be dead in op2
            assert(!varDsc->lvTracked || !VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex));

            // Setup register preferencing, so that we try to target the op1 enregistered variable
            //
            regMaskTP bestMask = genRegMask(op1Reg);
            if (type==TYP_DOUBLE)
            {
                assert((bestMask & RBM_DBL_REGS) != 0);
                bestMask |= genRegMask(REG_NEXT(op1Reg));
            }
            RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestMask);

            // Evaluate op2 into a floating point register 
            //
            genCodeForTreeFloat(op2, &pref);
    
            noway_assert(op2->gtFlags & GTF_REG_VAL);

            // Make sure the value ends up in the right place ...
            // For example if op2 is a call that returns a result 
            // in REG_F0, we will need to do a move instruction here
            //
            if ((op2->gtRegNum != op1Reg) || (op2->TypeGet() != type))
            {
                regMaskTP spillRegs = regSet.rsMaskUsed & genRegMaskFloat(op1Reg, op1->TypeGet());
                if  (spillRegs != 0)
                    regSet.rsSpillRegs(spillRegs);

                assert(type == op1->TypeGet());

                inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op1Reg, op2->gtRegNum, type);
            }
            genUpdateLife(op1);
            goto DONE_ASG;
        }
        break;

    case GT_CLS_VAR:
    case GT_IND:
        // Check for a volatile/unaligned store
        //
        assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));   // Required for GTF_IND_VOLATILE flag to be valid
        if (op1->gtFlags & GTF_IND_VOLATILE)
            volat = true;
        if (op1->gtFlags & GTF_IND_UNALIGNED)
            unaligned = true;
        break;

    default:
        break;
    }

    // Is the value being assigned an enregistered floating-point local variable?
    //
    switch (op2->gtOper)
    {
    case GT_LCL_VAR:

        if  (!genMarkLclVar(op2))
            break;

        __fallthrough;

    case GT_REG_VAR:

        // We must honor the order evalauation in case op1 reassigns our op2 register
        //
        if  (tree->gtFlags & GTF_REVERSE_OPS)
            break;

        // Is there an implicit conversion that we have to insert?
        // Handle this case with the normal cases below.
        //
        if (type != op2->TypeGet())
            break;

        // Make the target addressable
        //
        addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);

        noway_assert(op2->gtFlags & GTF_REG_VAL);
        noway_assert(op2->IsRegVar());
        
        op2reg = op2->gtRegVar.gtRegNum; 
        genUpdateLife(op2);

        goto CHK_VOLAT_UNALIGN;
    default:
        break;
    }

    // Is the op2 (RHS) more complex than op1 (LHS)?
    //
    if  (tree->gtFlags & GTF_REVERSE_OPS)
    {
        regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op1->gtRsvdRegs);
        RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);

        // Generate op2 (RHS) into a floating point register
        //
        genCodeForTreeFloat(op2, &pref);
        regSet.SetUsedRegFloat(op2, true);

        // Make the target addressable
        //
        addrReg = genMakeAddressable(op1,
                                     needRegOp1,
                                     RegSet::KEEP_REG, true);

        genRecoverReg(op2, RBM_ALLFLOAT, RegSet::KEEP_REG);
        noway_assert(op2->gtFlags & GTF_REG_VAL);
        regSet.SetUsedRegFloat(op2, false);
    }
    else
    {
        needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);

        // Make the target addressable
        //
        addrReg = genMakeAddressable(op1,
                                     needRegOp1,
                                     RegSet::KEEP_REG, true);
        

        // Generate the RHS into any floating point register
        genCodeForTreeFloat(op2);

    }
    noway_assert(op2->gtFlags & GTF_REG_VAL);

    op2reg = op2->gtRegNum;

    // Is there an implicit conversion that we have to insert?
    //
    if (type != op2->TypeGet())
    {
        regMaskTP bestMask = genRegMask(op2reg);
        if (type==TYP_DOUBLE)
        {
            if (bestMask & RBM_DBL_REGS)
            {
                bestMask |= genRegMask(REG_NEXT(op2reg));
            }
            else
            {
                bestMask |= genRegMask(REG_PREV(op2reg));
            }
        }
        RegSet::RegisterPreference op2Pref(RBM_ALLFLOAT, bestMask);
        op2reg = regSet.PickRegFloat(type, &op2Pref);

        inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op2reg, op2->gtRegNum, type);
    }

    // Make sure the LHS is still addressable
    //
    addrReg = genKeepAddressable(op1, addrReg);

CHK_VOLAT_UNALIGN:
        
    regSet.rsLockUsedReg(addrReg);  // Must prevent unaligned regSet.rsGrabReg from choosing an addrReg
        
    if (volat)
    {
        // Emit a memory barrier instruction before the store
        instGen_MemoryBarrier();
    }
    if (unaligned)
    {
        var_types storeType = op1->TypeGet();
        assert(storeType == TYP_DOUBLE || storeType == TYP_FLOAT);

        // Unaligned Floating-Point Stores must be done using the integer register(s)
        regNumber  intRegLo    = regSet.rsGrabReg(RBM_ALLINT);
        regNumber  intRegHi    = REG_NA;
        regMaskTP  tmpLockMask = genRegMask(intRegLo);

        if (storeType == TYP_DOUBLE)
        {
            intRegHi     = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo));
            tmpLockMask |= genRegMask(intRegHi);
        }

        // move the FP register over to the integer register(s)
        //
        if  (storeType == TYP_DOUBLE)
        {
            getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegLo, intRegHi, op2reg);
            regTracker.rsTrackRegTrash(intRegHi);
        }
        else 
        {
            getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, intRegLo, op2reg);
        }
        regTracker.rsTrackRegTrash(intRegLo);

        regSet.rsLockReg(tmpLockMask);     // Temporarily lock the intRegs 
        op1->gtType = TYP_INT;      // Temporarily change the type to TYP_INT
        
        inst_TT_RV(ins_Store(TYP_INT), op1, intRegLo);

        if (storeType == TYP_DOUBLE)
        {
            inst_TT_RV(ins_Store(TYP_INT), op1, intRegHi, 4);
        }
        
        op1->gtType = storeType;    // Change the type back to the floating point type
        regSet.rsUnlockReg(tmpLockMask);   // Unlock the intRegs
    }
    else
    {
        // Move the value into the target
        //        
        inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2reg);
    }

    // Free up anything that was tied up by the LHS
    //
    regSet.rsUnlockUsedReg(addrReg);
    genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);

DONE_ASG:

    genUpdateLife(tree);

#ifdef DEBUGGING_SUPPORT
    /* For non-debuggable code, every definition of a lcl-var has
     * to be checked to see if we need to open a new scope for it.
     */
    if (lclVarNum < compiler->lvaCount)
        siCheckVarScope(lclVarNum, lclILoffs);
#endif
}
Ejemplo n.º 8
0
//------------------------------------------------------------------------
// genSSE2Intrinsic: Generates the code for an SSE2 hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    GenTree*       op1         = node->gtGetOp1();
    GenTree*       op2         = node->gtGetOp2();
    regNumber      targetReg   = node->gtRegNum;
    var_types      targetType  = node->TypeGet();
    var_types      baseType    = node->gtSIMDBaseType;
    regNumber      op1Reg      = REG_NA;
    regNumber      op2Reg      = REG_NA;
    emitter*       emit        = getEmitter();
    int            ival        = -1;

    if ((op1 != nullptr) && !op1->OperIsList())
    {
        op1Reg = op1->gtRegNum;
        genConsumeOperands(node);
    }

    switch (intrinsicID)
    {
        // All integer overloads are handled by table codegen
        case NI_SSE2_CompareLessThan:
        {
            assert(op1 != nullptr);
            assert(op2 != nullptr);
            assert(baseType == TYP_DOUBLE);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            op2Reg          = op2->gtRegNum;
            ival            = Compiler::ivalOfHWIntrinsic(intrinsicID);
            assert(ival != -1);
            emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival);

            break;
        }

        case NI_SSE2_CompareEqualOrderedScalar:
        case NI_SSE2_CompareEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg             = op2->gtRegNum;
            regNumber   tmpReg = node->GetSingleTempReg();
            instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            // Ensure we aren't overwriting targetReg
            assert(tmpReg != targetReg);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
            emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
            emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
            break;
        }

        case NI_SSE2_CompareGreaterThanOrderedScalar:
        case NI_SSE2_CompareGreaterThanUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareGreaterThanOrEqualOrderedScalar:
        case NI_SSE2_CompareGreaterThanOrEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareLessThanOrderedScalar:
        case NI_SSE2_CompareLessThanUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareLessThanOrEqualOrderedScalar:
        case NI_SSE2_CompareLessThanOrEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
            emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareNotEqualOrderedScalar:
        case NI_SSE2_CompareNotEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg             = op2->gtRegNum;
            instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            regNumber   tmpReg = node->GetSingleTempReg();

            // Ensure we aren't overwriting targetReg
            assert(tmpReg != targetReg);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
            emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
            emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
            break;
        }

        case NI_SSE2_ConvertScalarToVector128Double:
        case NI_SSE2_ConvertScalarToVector128Single:
        {
            assert(baseType == TYP_INT || baseType == TYP_LONG || baseType == TYP_FLOAT || baseType == TYP_DOUBLE);
            assert(op1 != nullptr);
            assert(op2 != nullptr);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            genHWIntrinsic_R_R_RM(node, ins);
            break;
        }

        case NI_SSE2_ConvertScalarToVector128Int64:
        case NI_SSE2_ConvertScalarToVector128UInt64:
        {
            assert(baseType == TYP_LONG || baseType == TYP_ULONG);
            assert(op1 != nullptr);
            assert(op2 == nullptr);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            // TODO-XArch-CQ -> use of type size of TYP_SIMD16 leads to
            // instruction register encoding errors for SSE legacy encoding
            emit->emitIns_R_R(ins, emitTypeSize(baseType), targetReg, op1Reg);
            break;
        }

        case NI_SSE2_ConvertToDouble:
        {
            assert(op2 == nullptr);
            if (op1Reg != targetReg)
            {
                instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
                emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
            }
            break;
        }

        case NI_SSE2_ConvertToInt32:
        case NI_SSE2_ConvertToInt64:
        case NI_SSE2_ConvertToUInt32:
        case NI_SSE2_ConvertToUInt64:
        {
            assert(op2 == nullptr);
            assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT || baseType == TYP_INT || baseType == TYP_UINT ||
                   baseType == TYP_LONG || baseType == TYP_ULONG);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT)
            {
                emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
            }
            else
            {
                emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
            }
            break;
        }

        case NI_SSE2_LoadFence:
        {
            assert(baseType == TYP_VOID);
            assert(op1 == nullptr);
            assert(op2 == nullptr);
            emit->emitIns(INS_lfence);
            break;
        }

        case NI_SSE2_MemoryFence:
        {
            assert(baseType == TYP_VOID);
            assert(op1 == nullptr);
            assert(op2 == nullptr);
            emit->emitIns(INS_mfence);
            break;
        }

        case NI_SSE2_MoveMask:
        {
            assert(op2 == nullptr);
            assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
            break;
        }

        case NI_SSE2_SetScalarVector128:
        {
            assert(baseType == TYP_DOUBLE);
            assert(op2 == nullptr);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            if (op1Reg == targetReg)
            {
                regNumber tmpReg = node->GetSingleTempReg();

                // Ensure we aren't overwriting targetReg
                assert(tmpReg != targetReg);

                emit->emitIns_R_R(INS_movapd, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg);
                op1Reg = tmpReg;
            }

            emit->emitIns_SIMD_R_R_R(INS_xorpd, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
            emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg);
            break;
        }

        case NI_SSE2_SetZeroVector128:
        {
            assert(baseType != TYP_FLOAT);
            assert(baseType >= TYP_BYTE && baseType <= TYP_DOUBLE);
            assert(op1 == nullptr);
            assert(op2 == nullptr);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
            break;
        }

        default:
            unreached();
            break;
    }

    genProduceReg(node);
}
Ejemplo n.º 9
0
//------------------------------------------------------------------------
// genSSEIntrinsic: Generates the code for an SSE hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    GenTree*       op1         = node->gtGetOp1();
    GenTree*       op2         = node->gtGetOp2();
    GenTree*       op3         = nullptr;
    GenTree*       op4         = nullptr;
    regNumber      targetReg   = node->gtRegNum;
    var_types      targetType  = node->TypeGet();
    var_types      baseType    = node->gtSIMDBaseType;

    regNumber op1Reg = REG_NA;
    regNumber op2Reg = REG_NA;
    regNumber op3Reg = REG_NA;
    regNumber op4Reg = REG_NA;
    emitter*  emit   = getEmitter();

    if ((op1 != nullptr) && !op1->OperIsList())
    {
        op1Reg = op1->gtRegNum;
        genConsumeOperands(node);
    }

    switch (intrinsicID)
    {
        case NI_SSE_CompareEqualOrderedScalar:
        case NI_SSE_CompareEqualUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg             = op2->gtRegNum;
            regNumber   tmpReg = node->GetSingleTempReg();
            instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);

            // Ensure we aren't overwriting targetReg
            assert(tmpReg != targetReg);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
            emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
            emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
            break;
        }

        case NI_SSE_CompareGreaterThanOrderedScalar:
        case NI_SSE_CompareGreaterThanUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg = op2->gtRegNum;

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE_CompareGreaterThanOrEqualOrderedScalar:
        case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg = op2->gtRegNum;

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE_CompareLessThanOrderedScalar:
        case NI_SSE_CompareLessThanUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg = op2->gtRegNum;

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE_CompareLessThanOrEqualOrderedScalar:
        case NI_SSE_CompareLessThanOrEqualUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg = op2->gtRegNum;

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
            emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE_CompareNotEqualOrderedScalar:
        case NI_SSE_CompareNotEqualUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg             = op2->gtRegNum;
            regNumber   tmpReg = node->GetSingleTempReg();
            instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);

            // Ensure we aren't overwriting targetReg
            assert(tmpReg != targetReg);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
            emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
            emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
            break;
        }

        case NI_SSE_ConvertToSingle:
        case NI_SSE_StaticCast:
        {
            assert(op2 == nullptr);
            if (op1Reg != targetReg)
            {
                instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
                emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg);
            }
            break;
        }

        case NI_SSE_MoveMask:
        {
            assert(baseType == TYP_FLOAT);
            assert(op2 == nullptr);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
            break;
        }

        case NI_SSE_Prefetch0:
        case NI_SSE_Prefetch1:
        case NI_SSE_Prefetch2:
        case NI_SSE_PrefetchNonTemporal:
        {
            assert(baseType == TYP_UBYTE);
            assert(op2 == nullptr);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_AR(ins, emitTypeSize(baseType), op1Reg, 0);
            break;
        }

        case NI_SSE_SetScalarVector128:
        {
            assert(baseType == TYP_FLOAT);
            assert(op2 == nullptr);

            if (op1Reg == targetReg)
            {
                regNumber tmpReg = node->GetSingleTempReg();

                // Ensure we aren't overwriting targetReg
                assert(tmpReg != targetReg);

                emit->emitIns_R_R(INS_movaps, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg);
                op1Reg = tmpReg;
            }

            emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
            emit->emitIns_SIMD_R_R_R(INS_movss, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg);
            break;
        }

        case NI_SSE_SetZeroVector128:
        {
            assert(baseType == TYP_FLOAT);
            assert(op1 == nullptr);
            assert(op2 == nullptr);
            emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
            break;
        }

        case NI_SSE_StoreFence:
        {
            assert(baseType == TYP_VOID);
            assert(op1 == nullptr);
            assert(op2 == nullptr);
            emit->emitIns(INS_sfence);
            break;
        }

        default:
            unreached();
            break;
    }

    genProduceReg(node);
}
Ejemplo n.º 10
0
//------------------------------------------------------------------------
// genHWIntrinsic: Generates the code for a given hardware intrinsic node.
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic      intrinsicID = node->gtHWIntrinsicId;
    InstructionSet      isa         = Compiler::isaOfHWIntrinsic(intrinsicID);
    HWIntrinsicCategory category    = Compiler::categoryOfHWIntrinsic(intrinsicID);
    HWIntrinsicFlag     flags       = Compiler::flagsOfHWIntrinsic(intrinsicID);
    int                 ival        = Compiler::ivalOfHWIntrinsic(intrinsicID);
    int                 numArgs     = Compiler::numArgsOfHWIntrinsic(node);

    assert((flags & HW_Flag_NoCodeGen) == 0);

    if (genIsTableDrivenHWIntrinsic(category, flags))
    {
        GenTree*  op1        = node->gtGetOp1();
        GenTree*  op2        = node->gtGetOp2();
        regNumber targetReg  = node->gtRegNum;
        var_types targetType = node->TypeGet();
        var_types baseType   = node->gtSIMDBaseType;

        regNumber op1Reg = REG_NA;
        regNumber op2Reg = REG_NA;
        emitter*  emit   = getEmitter();

        assert(numArgs >= 0);
        instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
        assert(ins != INS_invalid);
        emitAttr simdSize = EA_ATTR(node->gtSIMDSize);
        assert(simdSize != 0);

        switch (numArgs)
        {
            case 1:
                genConsumeOperands(node);
                op1Reg = op1->gtRegNum;
                if (category == HW_Category_MemoryLoad)
                {
                    emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0);
                }
                else if (category == HW_Category_SIMDScalar && (flags & HW_Flag_CopyUpperBits) != 0)
                {
                    emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg);
                }
                else if ((ival != -1) && varTypeIsFloating(baseType))
                {
                    emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival);
                }
                else
                {
                    emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg);
                }
                break;

            case 2:
                genConsumeOperands(node);
                op1Reg = op1->gtRegNum;
                op2Reg = op2->gtRegNum;
                if (category == HW_Category_MemoryStore)
                {
                    emit->emitIns_AR_R(ins, simdSize, op2Reg, op1Reg, 0);
                }
                else if ((ival != -1) && varTypeIsFloating(baseType))
                {
                    genHWIntrinsic_R_R_RM_I(node, ins);
                }
                else if (category == HW_Category_MemoryLoad)
                {
                    emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg);
                }
                else if (Compiler::isImmHWIntrinsic(intrinsicID, op2))
                {
                    if (intrinsicID == NI_SSE2_Extract)
                    {
                        // extract instructions return to GP-registers, so it needs int size as the emitsize
                        simdSize = emitTypeSize(TYP_INT);
                    }
                    auto emitSwCase = [&](unsigned i) {
                        emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, (int)i);
                    };

                    if (op2->IsCnsIntOrI())
                    {
                        ssize_t ival = op2->AsIntCon()->IconValue();
                        emitSwCase((unsigned)ival);
                    }
                    else
                    {
                        // We emit a fallback case for the scenario when the imm-op is not a constant. This should
                        // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
                        // can also occur if the consumer calls it directly and just doesn't pass a constant value.
                        regNumber baseReg = node->ExtractTempReg();
                        regNumber offsReg = node->GetSingleTempReg();
                        genHWIntrinsicJumpTableFallback(intrinsicID, op2Reg, baseReg, offsReg, emitSwCase);
                    }
                }
                else
                {
                    genHWIntrinsic_R_R_RM(node, ins);
                }
                break;
            case 3:
            {
                assert(op1->OperIsList());
                assert(op1->gtGetOp2()->OperIsList());
                assert(op1->gtGetOp2()->gtGetOp2()->OperIsList());

                GenTreeArgList* argList = op1->AsArgList();
                op1                     = argList->Current();
                genConsumeRegs(op1);
                op1Reg = op1->gtRegNum;

                argList = argList->Rest();
                op2     = argList->Current();
                genConsumeRegs(op2);
                op2Reg = op2->gtRegNum;

                argList      = argList->Rest();
                GenTree* op3 = argList->Current();
                genConsumeRegs(op3);
                regNumber op3Reg = op3->gtRegNum;

                if (Compiler::isImmHWIntrinsic(intrinsicID, op3))
                {
                    auto emitSwCase = [&](unsigned i) {
                        emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, (int)i);
                    };
                    if (op3->IsCnsIntOrI())
                    {
                        ssize_t ival = op3->AsIntCon()->IconValue();
                        emitSwCase((unsigned)ival);
                    }
                    else
                    {
                        // We emit a fallback case for the scenario when the imm-op is not a constant. This should
                        // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
                        // can also occur if the consumer calls it directly and just doesn't pass a constant value.
                        regNumber baseReg = node->ExtractTempReg();
                        regNumber offsReg = node->GetSingleTempReg();
                        genHWIntrinsicJumpTableFallback(intrinsicID, op3Reg, baseReg, offsReg, emitSwCase);
                    }
                }
                else if (category == HW_Category_MemoryStore)
                {
                    assert(intrinsicID == NI_SSE2_MaskMove);
                    assert(targetReg == REG_NA);

                    // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI
                    if (op3Reg != REG_EDI)
                    {
                        emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg);
                    }
                    emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg);
                }
                else
                {
                    emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg, op3Reg);
                }
                break;
            }

            default:
                unreached();
                break;
        }
        genProduceReg(node);
        return;
    }

    switch (isa)
    {
        case InstructionSet_SSE:
            genSSEIntrinsic(node);
            break;
        case InstructionSet_SSE2:
            genSSE2Intrinsic(node);
            break;
        case InstructionSet_SSE41:
            genSSE41Intrinsic(node);
            break;
        case InstructionSet_SSE42:
            genSSE42Intrinsic(node);
            break;
        case InstructionSet_AVX:
            genAVXIntrinsic(node);
            break;
        case InstructionSet_AVX2:
            genAVX2Intrinsic(node);
            break;
        case InstructionSet_AES:
            genAESIntrinsic(node);
            break;
        case InstructionSet_BMI1:
            genBMI1Intrinsic(node);
            break;
        case InstructionSet_BMI2:
            genBMI2Intrinsic(node);
            break;
        case InstructionSet_FMA:
            genFMAIntrinsic(node);
            break;
        case InstructionSet_LZCNT:
            genLZCNTIntrinsic(node);
            break;
        case InstructionSet_PCLMULQDQ:
            genPCLMULQDQIntrinsic(node);
            break;
        case InstructionSet_POPCNT:
            genPOPCNTIntrinsic(node);
            break;
        default:
            unreached();
            break;
    }
}
Ejemplo n.º 11
0
//------------------------------------------------------------------------
// genSSE41Intrinsic: Generates the code for an SSE4.1 hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    GenTree*       op1         = node->gtGetOp1();
    GenTree*       op2         = node->gtGetOp2();
    GenTree*       op3         = nullptr;
    GenTree*       op4         = nullptr;
    regNumber      targetReg   = node->gtRegNum;
    var_types      targetType  = node->TypeGet();
    var_types      baseType    = node->gtSIMDBaseType;

    regNumber op1Reg = REG_NA;
    regNumber op2Reg = REG_NA;
    regNumber op3Reg = REG_NA;
    regNumber op4Reg = REG_NA;
    emitter*  emit   = getEmitter();

    if ((op1 != nullptr) && !op1->OperIsList())
    {
        op1Reg = op1->gtRegNum;
        genConsumeOperands(node);
    }

    switch (intrinsicID)
    {
        case NI_SSE41_TestAllOnes:
        {
            regNumber tmpReg = node->GetSingleTempReg();
            assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
            emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, emitTypeSize(TYP_SIMD16), tmpReg, tmpReg, tmpReg);
            emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
            emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, tmpReg);
            emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
            break;
        }

        case NI_SSE41_TestAllZeros:
        case NI_SSE41_TestZ:
        {
            assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
            emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
            emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
            emit->emitIns_R(INS_sete, EA_1BYTE, targetReg);
            break;
        }

        case NI_SSE41_TestC:
        {
            assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
            emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
            emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
            emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
            break;
        }

        case NI_SSE41_TestMixOnesZeros:
        case NI_SSE41_TestNotZAndNotC:
        {
            assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
            emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
            emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            break;
        }

        case NI_SSE41_Extract:
        {
            regNumber   tmpTargetReg = REG_NA;
            instruction ins          = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            if (baseType == TYP_FLOAT)
            {
                tmpTargetReg = node->ExtractTempReg();
            }
            auto emitSwCase = [&](unsigned i) {
                if (baseType == TYP_FLOAT)
                {
                    // extract instructions return to GP-registers, so it needs int size as the emitsize
                    emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), op1Reg, tmpTargetReg, (int)i);
                    emit->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, targetReg, tmpTargetReg);
                }
                else
                {
                    emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), targetReg, op1Reg, (int)i);
                }
            };

            if (op2->IsCnsIntOrI())
            {
                ssize_t ival = op2->AsIntCon()->IconValue();
                emitSwCase((unsigned)ival);
            }
            else
            {
                // We emit a fallback case for the scenario when the imm-op is not a constant. This should
                // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
                // can also occur if the consumer calls it directly and just doesn't pass a constant value.
                regNumber baseReg = node->ExtractTempReg();
                regNumber offsReg = node->GetSingleTempReg();
                genHWIntrinsicJumpTableFallback(intrinsicID, op2->gtRegNum, baseReg, offsReg, emitSwCase);
            }
            break;
        }

        default:
            unreached();
            break;
    }

    genProduceReg(node);
}