Example #1
0
//------------------------------------------------------------------------
// ContainCheckIndir: Determine whether operands of an indir should be contained.
//
// Arguments:
//    indirNode - The indirection node of interest
//
// Notes:
//    This is called for both store and load indirections.
//
// Return Value:
//    None.
//
void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
{
    // If this is the rhs of a block copy it will be handled when we handle the store.
    if (indirNode->TypeGet() == TYP_STRUCT)
    {
        return;
    }

#ifdef FEATURE_SIMD
    // If indirTree is of TYP_SIMD12, don't mark addr as contained
    // so that it always get computed to a register.  This would
    // mean codegen side logic doesn't need to handle all possible
    // addr expressions that could be contained.
    //
    // TODO-ARM64-CQ: handle other addr mode expressions that could be marked
    // as contained.
    if (indirNode->TypeGet() == TYP_SIMD12)
    {
        return;
    }
#endif // FEATURE_SIMD

    GenTree* addr          = indirNode->Addr();
    bool     makeContained = true;
    if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr))
    {
        GenTreeAddrMode* lea   = addr->AsAddrMode();
        GenTree*         base  = lea->Base();
        GenTree*         index = lea->Index();
        int              cns   = lea->Offset();

#ifdef _TARGET_ARM_
        // ARM floating-point load/store doesn't support a form similar to integer
        // ldr Rdst, [Rbase + Roffset] with offset in a register. The only supported
        // form is vldr Rdst, [Rbase + imm] with a more limited constraint on the imm.
        if (lea->HasIndex() || !emitter::emitIns_valid_imm_for_vldst_offset(cns))
        {
            if (indirNode->OperGet() == GT_STOREIND)
            {
                if (varTypeIsFloating(indirNode->AsStoreInd()->Data()))
                {
                    makeContained = false;
                }
            }
            else if (indirNode->OperGet() == GT_IND)
            {
                if (varTypeIsFloating(indirNode))
                {
                    makeContained = false;
                }
            }
        }
#endif
        if (makeContained)
        {
            MakeSrcContained(indirNode, addr);
        }
    }
}
Example #2
0
//------------------------------------------------------------------------
// LowerCast: Lower GT_CAST(srcType, DstType) nodes.
//
// Arguments:
//    tree - GT_CAST node to be lowered
//
// Return Value:
//    None.
//
// Notes:
//    Casts from float/double to a smaller int type are transformed as follows:
//    GT_CAST(float/double, byte)     =   GT_CAST(GT_CAST(float/double, int32), byte)
//    GT_CAST(float/double, sbyte)    =   GT_CAST(GT_CAST(float/double, int32), sbyte)
//    GT_CAST(float/double, int16)    =   GT_CAST(GT_CAST(double/double, int32), int16)
//    GT_CAST(float/double, uint16)   =   GT_CAST(GT_CAST(double/double, int32), uint16)
//
//    Note that for the overflow conversions we still depend on helper calls and
//    don't expect to see them here.
//    i) GT_CAST(float/double, int type with overflow detection)
//
void Lowering::LowerCast(GenTree* tree)
{
    assert(tree->OperGet() == GT_CAST);

    JITDUMP("LowerCast for: ");
    DISPNODE(tree);
    JITDUMP("\n");

    GenTree*  op1     = tree->gtOp.gtOp1;
    var_types dstType = tree->CastToType();
    var_types srcType = genActualType(op1->TypeGet());
    var_types tmpType = TYP_UNDEF;

    if (varTypeIsFloating(srcType))
    {
        noway_assert(!tree->gtOverflow());
        assert(!varTypeIsSmall(dstType)); // fgMorphCast creates intermediate casts when converting from float to small
                                          // int.
    }

    assert(!varTypeIsSmall(srcType));

    if (tmpType != TYP_UNDEF)
    {
        GenTree* tmp = comp->gtNewCastNode(tmpType, op1, tree->IsUnsigned(), tmpType);
        tmp->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));

        tree->gtFlags &= ~GTF_UNSIGNED;
        tree->gtOp.gtOp1 = tmp;
        BlockRange().InsertAfter(op1, tmp);
    }

    // Now determine if we have operands that should be contained.
    ContainCheckCast(tree->AsCast());
}
unsigned InitVarDscInfo::alignReg(var_types type, unsigned requiredRegAlignment)
{
    NYI_ARM64("alignReg");

    assert(requiredRegAlignment > 0);
    if (requiredRegAlignment == 1)
        return 0;   // Everything is always "1" aligned

    assert(requiredRegAlignment == 2); // we don't expect anything else right now

    int alignMask = regArgNum(type) & (requiredRegAlignment - 1);
    if (alignMask == 0)
        return 0;   // We're already aligned

    unsigned cAlignSkipped = requiredRegAlignment - alignMask;
    assert(cAlignSkipped == 1); // Alignment is currently only 1 or 2, so misalignment can only be 1.

#ifdef _TARGET_ARM_
    if (varTypeIsFloating(type))
    {
        fltArgSkippedRegMask |= genMapFloatRegArgNumToRegMask(floatRegArgNum);
    }
#endif // _TARGET_ARM_

    assert(regArgNum(type) + cAlignSkipped <= maxRegArgNum(type));  // if equal, then we aligned the last slot, and the arg can't be enregistered
    regArgNum(type) += cAlignSkipped;

    return cAlignSkipped;
}
Example #4
0
//------------------------------------------------------------------------
// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG.
//
// Arguments:
//    node                - The PUTARG_REG node.
//    argReg              - The register in which to pass the argument.
//    info                - The info for the node's using call.
//    isVarArgs           - True if the call uses a varargs calling convention.
//    callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register.
//
// Return Value:
//    None.
//
void Lowering::TreeNodeInfoInitPutArgReg(
    GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs)
{
    assert(node != nullptr);
    assert(node->OperIsPutArgReg());
    assert(argReg != REG_NA);

    // Each register argument corresponds to one source.
    info.srcCount++;

    // Set the register requirements for the node.
    regMaskTP argMask = genRegMask(argReg);
#ifdef ARM_SOFTFP
    // If type of node is `long` then it is actually `double`.
    // The actual `long` types must have been transformed as a field list with two fields.
    if (node->TypeGet() == TYP_LONG)
    {
        info.srcCount++;
        assert(genRegArgNext(argReg) == REG_NEXT(argReg));
        argMask |= genRegMask(REG_NEXT(argReg));
    }
#endif // ARM_SOFTFP
    node->gtLsraInfo.setDstCandidates(m_lsra, argMask);
    node->gtLsraInfo.setSrcCandidates(m_lsra, argMask);

    // To avoid redundant moves, have the argument operand computed in the
    // register in which the argument is passed to the call.
    node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node));

    *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet());
}
Example #5
0
void RegSet::SetLockedRegFloat(GenTree * tree, bool bValue)
{
    regNumber  reg     = tree->gtRegNum;
    var_types  type    = tree->TypeGet();    assert(varTypeIsFloating(type));
    regMaskTP  regMask = genRegMaskFloat(reg, tree->TypeGet());

    if (bValue)
    {
        JITDUMP("locking register %s\n", getRegNameFloat(reg, type));

        assert((rsGetMaskUsed() & regMask) == regMask);
        assert((rsGetMaskLock() & regMask) == 0);

        rsSetMaskLock( (rsGetMaskLock() | regMask) );
    }
    else
    {
        JITDUMP("unlocking register %s\n", getRegNameFloat(reg, type));

        assert((rsGetMaskUsed()   & regMask) == regMask);
        assert((rsGetMaskLock() & regMask) == regMask);

        rsSetMaskLock( (rsGetMaskLock() & ~regMask) );
    }
}
Example #6
0
//------------------------------------------------------------------------
// ContainCheckIndir: Determine whether operands of an indir should be contained.
//
// Arguments:
//    indirNode - The indirection node of interest
//
// Notes:
//    This is called for both store and load indirections.
//
// Return Value:
//    None.
//
void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
{
    // If this is the rhs of a block copy it will be handled when we handle the store.
    if (indirNode->TypeGet() == TYP_STRUCT)
    {
        return;
    }

    GenTree* addr          = indirNode->Addr();
    bool     makeContained = true;
    if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr))
    {
        GenTreeAddrMode* lea   = addr->AsAddrMode();
        GenTree*         base  = lea->Base();
        GenTree*         index = lea->Index();
        int              cns   = lea->Offset();

#ifdef _TARGET_ARM_
        // ARM floating-point load/store doesn't support a form similar to integer
        // ldr Rdst, [Rbase + Roffset] with offset in a register. The only supported
        // form is vldr Rdst, [Rbase + imm] with a more limited constraint on the imm.
        if (lea->HasIndex() || !emitter::emitIns_valid_imm_for_vldst_offset(cns))
        {
            if (indirNode->OperGet() == GT_STOREIND)
            {
                if (varTypeIsFloating(indirNode->AsStoreInd()->Data()))
                {
                    makeContained = false;
                }
            }
            else if (indirNode->OperGet() == GT_IND)
            {
                if (varTypeIsFloating(indirNode))
                {
                    makeContained = false;
                }
            }
        }
#endif
        if (makeContained)
        {
            MakeSrcContained(indirNode, addr);
        }
    }
}
Example #7
0
bool RegSet::IsLockedRegFloat(GenTreePtr tree)
{
    /* The value must be sitting in a register */
    assert(tree);
    assert(tree->gtFlags & GTF_REG_VAL);
    assert(varTypeIsFloating(tree->TypeGet()));

    regMaskTP  regMask = genRegMaskFloat(tree->gtRegNum, tree->TypeGet());
    return (rsGetMaskLock() & regMask) == regMask;
}
Example #8
0
void CodeGen::genCodeForTreeCastFloat(GenTree *tree, RegSet::RegisterPreference *pref)
{
    GenTreePtr op1 = tree->gtOp.gtOp1;
    var_types from = op1->gtType;
    var_types to = tree->gtType;

    if (varTypeIsFloating(from))
        genCodeForTreeCastFromFloat(tree, pref);
    else
        genCodeForTreeCastToFloat(tree, pref);
}
Example #9
0
//------------------------------------------------------------------------
// ContainCheckStoreIndir: determine whether the sources of a STOREIND node should be contained.
//
// Arguments:
//    node - pointer to the node
//
void Lowering::ContainCheckStoreIndir(GenTreeIndir* node)
{
#ifdef _TARGET_ARM64_
    GenTree* src = node->gtOp.gtOp2;
    if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
    {
        // an integer zero for 'src' can be contained.
        MakeSrcContained(node, src);
    }
#endif // _TARGET_ARM64_
    ContainCheckIndir(node);
}
Example #10
0
//------------------------------------------------------------------------
// LowerCast: Lower GT_CAST(srcType, DstType) nodes.
//
// Arguments:
//    tree - GT_CAST node to be lowered
//
// Return Value:
//    None.
//
// Notes:
//    Casts from float/double to a smaller int type are transformed as follows:
//    GT_CAST(float/double, byte)     =   GT_CAST(GT_CAST(float/double, int32), byte)
//    GT_CAST(float/double, sbyte)    =   GT_CAST(GT_CAST(float/double, int32), sbyte)
//    GT_CAST(float/double, int16)    =   GT_CAST(GT_CAST(double/double, int32), int16)
//    GT_CAST(float/double, uint16)   =   GT_CAST(GT_CAST(double/double, int32), uint16)
//
//    Note that for the overflow conversions we still depend on helper calls and
//    don't expect to see them here.
//    i) GT_CAST(float/double, int type with overflow detection)
//
void Lowering::LowerCast(GenTree* tree)
{
    assert(tree->OperGet() == GT_CAST);

    JITDUMP("LowerCast for: ");
    DISPNODE(tree);
    JITDUMP("\n");

    GenTreePtr op1     = tree->gtOp.gtOp1;
    var_types  dstType = tree->CastToType();
    var_types  srcType = genActualType(op1->TypeGet());
    var_types  tmpType = TYP_UNDEF;

    if (varTypeIsFloating(srcType))
    {
        noway_assert(!tree->gtOverflow());
    }

    assert(!varTypeIsSmall(srcType));

    // case of src is a floating point type and dst is a small type.
    if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType))
    {
        NYI_ARM("Lowering for cast from float to small type"); // Not tested yet.
        tmpType = TYP_INT;
    }

    if (tmpType != TYP_UNDEF)
    {
        GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType);
        tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));

        tree->gtFlags &= ~GTF_UNSIGNED;
        tree->gtOp.gtOp1 = tmp;
        BlockRange().InsertAfter(op1, tmp);
    }

    // Now determine if we have operands that should be contained.
    ContainCheckCast(tree->AsCast());
}
bool InitVarDscInfo::enoughAvailRegs(var_types type, unsigned numRegs /* = 1 */)
{
    assert(numRegs > 0);

    unsigned backFillCount = 0;

#ifdef _TARGET_ARM_
    // Check for back-filling
    if (varTypeIsFloating(type) &&              // We only back-fill the float registers
        !anyFloatStackArgs &&                   // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
        (numRegs == 1) &&                       // Is there a possibility we could back-fill?
        (fltArgSkippedRegMask != RBM_NONE))     // Is there an available back-fill slot?
    {
        backFillCount = 1;
    }
#endif // _TARGET_ARM_

    return regArgNum(type) + numRegs - backFillCount <= maxRegArgNum(type);
}
Example #12
0
void CodeGen::genCodeForTreeCastFromFloat(GenTree *tree, RegSet::RegisterPreference *pref)
{
    GenTreePtr op1          = tree->gtOp.gtOp1;
    var_types  from         = op1->gtType;
    var_types  final        = tree->gtType;
    var_types  intermediate = tree->CastToType();

    regNumber srcReg;
    regNumber dstReg;

    assert(varTypeIsFloating(from));

    // Evaluate op1 into a floating point register 
    //
    if (varTypeIsFloating(final))
    {
        genCodeForTreeFloat(op1, pref);
    }
    else
    {
unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */)
{
    assert(numRegs > 0);

    unsigned resultArgNum = regArgNum(type);
    bool isBackFilled = false;

#ifdef _TARGET_ARM_
    // Check for back-filling
    if (varTypeIsFloating(type) &&              // We only back-fill the float registers
        !anyFloatStackArgs &&                   // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
        (numRegs == 1) &&                       // Is there a possibility we could back-fill?
        (fltArgSkippedRegMask != RBM_NONE))     // Is there an available back-fill slot?
    {
        // We will never back-fill something greater than a single register
        // (TYP_FLOAT, or TYP_STRUCT HFA with a single float). This is because
        // we don't have any types that require > 2 register alignment, so we
        // can't create a > 1 register alignment hole to back-fill.

        // Back-fill the register
        regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
        fltArgSkippedRegMask &= ~backFillBitMask;   // Remove the back-filled register(s) from the skipped mask
        resultArgNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
        assert(resultArgNum < MAX_FLOAT_REG_ARG);
        isBackFilled = true;
    }
#endif // _TARGET_ARM_

    if (!isBackFilled)
    {
        // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated.
#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // For System V the reg type counters should be independent.
        nextReg(TYP_INT, numRegs);
        nextReg(TYP_FLOAT, numRegs);
#else
        nextReg(type, numRegs);
#endif
    }

    return resultArgNum;
}
Example #14
0
//------------------------------------------------------------------------
// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG.
//
// Arguments:
//    node                - The PUTARG_REG node.
//    argReg              - The register in which to pass the argument.
//    info                - The info for the node's using call.
//    isVarArgs           - True if the call uses a varargs calling convention.
//    callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register.
//
// Return Value:
//    None.
//
void Lowering::TreeNodeInfoInitPutArgReg(
    GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs)
{
    assert(node != nullptr);
    assert(node->OperIsPutArgReg());
    assert(argReg != REG_NA);

    // Each register argument corresponds to one source.
    info.srcCount++;

    // Set the register requirements for the node.
    const regMaskTP argMask = genRegMask(argReg);
    node->gtLsraInfo.setDstCandidates(m_lsra, argMask);
    node->gtLsraInfo.setSrcCandidates(m_lsra, argMask);

    // To avoid redundant moves, have the argument operand computed in the
    // register in which the argument is passed to the call.
    node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node));

    *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet());
}
Example #15
0
void        CodeGen::genComputeAddressableFloat(GenTreePtr tree, 
                                                regMaskTP addrRegInt,
                                                regMaskTP addrRegFlt,
                                                RegSet::KeepReg keptReg,
                                                regMaskTP needReg,
                                                RegSet::KeepReg keepReg,
                                                bool freeOnly /* = false */)
{
    noway_assert(genStillAddressable(tree));
    noway_assert(varTypeIsFloating(tree->TypeGet()));

    genDoneAddressableFloat(tree, addrRegInt, addrRegFlt, keptReg);

    regNumber reg;
    if (tree->gtFlags & GTF_REG_VAL)
    {
        reg = tree->gtRegNum;
        if (freeOnly && !(genRegMaskFloat(reg, tree->TypeGet()) & regSet.RegFreeFloat()))
        {
            goto LOAD_REG;
        }
    }
    else
    {
        LOAD_REG:
            RegSet::RegisterPreference pref(needReg, RBM_NONE);
            reg = regSet.PickRegFloat(tree->TypeGet(), &pref);
            genLoadFloat(tree, reg);
    }

    genMarkTreeInReg(tree, reg);

    if (keepReg == RegSet::KEEP_REG)
    {
        regSet.SetUsedRegFloat(tree, true);
    }
}
Example #16
0
//------------------------------------------------------------------------
// TreeNodeInfoInitCall: Set the NodeInfo for a call.
//
// Arguments:
//    call - The call node of interest
//
// Return Value:
//    None.
//
void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
{
    TreeNodeInfo*   info              = &(call->gtLsraInfo);
    LinearScan*     l                 = m_lsra;
    Compiler*       compiler          = comp;
    bool            hasMultiRegRetVal = false;
    ReturnTypeDesc* retTypeDesc       = nullptr;

    info->srcCount = 0;
    if (call->TypeGet() != TYP_VOID)
    {
        hasMultiRegRetVal = call->HasMultiRegRetVal();
        if (hasMultiRegRetVal)
        {
            // dst count = number of registers in which the value is returned by call
            retTypeDesc    = call->GetReturnTypeDesc();
            info->dstCount = retTypeDesc->GetReturnRegCount();
        }
        else
        {
            info->dstCount = 1;
        }
    }
    else
    {
        info->dstCount = 0;
    }

    GenTree* ctrlExpr = call->gtControlExpr;
    if (call->gtCallType == CT_INDIRECT)
    {
        // either gtControlExpr != null or gtCallAddr != null.
        // Both cannot be non-null at the same time.
        assert(ctrlExpr == nullptr);
        assert(call->gtCallAddr != nullptr);
        ctrlExpr = call->gtCallAddr;
    }

    // set reg requirements on call target represented as control sequence.
    if (ctrlExpr != nullptr)
    {
        // we should never see a gtControlExpr whose type is void.
        assert(ctrlExpr->TypeGet() != TYP_VOID);

        info->srcCount++;

        // In case of fast tail implemented as jmp, make sure that gtControlExpr is
        // computed into a register.
        if (call->IsFastTailCall())
        {
            NYI_ARM("tail call");

#ifdef _TARGET_ARM64_
            // Fast tail call - make sure that call target is always computed in IP0
            // so that epilog sequence can generate "br xip0" to achieve fast tail call.
            ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0));
#endif // _TARGET_ARM64_
        }
    }
#ifdef _TARGET_ARM_
    else
    {
        info->internalIntCount = 1;
    }
#endif // _TARGET_ARM_

    RegisterType registerType = call->TypeGet();

// Set destination candidates for return value of the call.

#ifdef _TARGET_ARM_
    if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
    {
        // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
        // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers.
        info->setDstCandidates(l, RBM_PINVOKE_TCB);
    }
    else
#endif // _TARGET_ARM_
        if (hasMultiRegRetVal)
    {
        assert(retTypeDesc != nullptr);
        info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
    }
    else if (varTypeIsFloating(registerType))
    {
        info->setDstCandidates(l, RBM_FLOATRET);
    }
    else if (registerType == TYP_LONG)
    {
        info->setDstCandidates(l, RBM_LNGRET);
    }
    else
    {
        info->setDstCandidates(l, RBM_INTRET);
    }

    // If there is an explicit this pointer, we don't want that node to produce anything
    // as it is redundant
    if (call->gtCallObjp != nullptr)
    {
        GenTreePtr thisPtrNode = call->gtCallObjp;

        if (thisPtrNode->gtOper == GT_PUTARG_REG)
        {
            l->clearOperandCounts(thisPtrNode);
            thisPtrNode->SetContained();
            l->clearDstCount(thisPtrNode->gtOp.gtOp1);
        }
        else
        {
            l->clearDstCount(thisPtrNode);
        }
    }

    // First, count reg args
    bool callHasFloatRegArgs = false;

    for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
    {
        assert(list->OperIsList());

        GenTreePtr argNode = list->Current();

        fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
        assert(curArgTabEntry);

        if (curArgTabEntry->regNum == REG_STK)
        {
            // late arg that is not passed in a register
            assert(argNode->gtOper == GT_PUTARG_STK);

            TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry);
            continue;
        }

        // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
        if (argNode->OperGet() == GT_FIELD_LIST)
        {
            argNode->SetContained();

            // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
            regNumber argReg = curArgTabEntry->regNum;
            for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
            {
                TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, false, &callHasFloatRegArgs);

                // Update argReg for the next putarg_reg (if any)
                argReg = genRegArgNext(argReg);

#if defined(_TARGET_ARM_)
                // A double register is modelled as an even-numbered single one
                if (entry->Current()->TypeGet() == TYP_DOUBLE)
                {
                    argReg = genRegArgNext(argReg);
                }
#endif // _TARGET_ARM_
            }
        }
#ifdef _TARGET_ARM_
        else if (argNode->OperGet() == GT_PUTARG_SPLIT)
        {
            fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
            TreeNodeInfoInitPutArgSplit(argNode->AsPutArgSplit(), *info, curArgTabEntry);
        }
#endif
        else
        {
            TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs);
        }
    }

    // Now, count stack args
    // Note that these need to be computed into a register, but then
    // they're just stored to the stack - so the reg doesn't
    // need to remain live until the call.  In fact, it must not
    // because the code generator doesn't actually consider it live,
    // so it can't be spilled.

    GenTreePtr args = call->gtCallArgs;
    while (args)
    {
        GenTreePtr arg = args->gtOp.gtOp1;

        // Skip arguments that have been moved to the Late Arg list
        if (!(args->gtFlags & GTF_LATE_ARG))
        {
            if (arg->gtOper == GT_PUTARG_STK)
            {
                fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
                assert(curArgTabEntry);

                assert(curArgTabEntry->regNum == REG_STK);

                TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry);
            }
#ifdef _TARGET_ARM_
            else if (arg->OperGet() == GT_PUTARG_SPLIT)
            {
                fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
                TreeNodeInfoInitPutArgSplit(arg->AsPutArgSplit(), *info, curArgTabEntry);
            }
#endif
            else
            {
                TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
                if (argInfo->dstCount != 0)
                {
                    argInfo->isLocalDefUse = true;
                }

                argInfo->dstCount = 0;
            }
        }
        args = args->gtOp.gtOp2;
    }

    // If it is a fast tail call, it is already preferenced to use IP0.
    // Therefore, no need set src candidates on call tgt again.
    if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
    {
        NYI_ARM("float reg varargs");

        // Don't assign the call target to any of the argument registers because
        // we will use them to also pass floating point arguments as required
        // by Arm64 ABI.
        ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
    }

#ifdef _TARGET_ARM_

    if (call->NeedsNullCheck())
    {
        info->internalIntCount++;
    }

#endif // _TARGET_ARM_
}
Example #17
0
//------------------------------------------------------------------------
// IsContainableImmed: Is an immediate encodable in-place?
//
// Return Value:
//    True if the immediate can be folded into an instruction,
//    for example small enough and non-relocatable.
bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
{
    if (varTypeIsFloating(parentNode->TypeGet()))
    {
        // We can contain a floating point 0.0 constant in a compare instruction
        switch (parentNode->OperGet())
        {
            default:
                return false;

            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
                if (childNode->IsIntegralConst(0))
                {
                    // TODO-ARM-Cleanup: not tested yet.
                    NYI_ARM("ARM IsContainableImmed for floating point type");

                    return true;
                }
                break;
        }
    }
    else
    {
        // Make sure we have an actual immediate
        if (!childNode->IsCnsIntOrI())
            return false;
        if (childNode->IsIconHandle() && comp->opts.compReloc)
            return false;

        ssize_t  immVal = childNode->gtIntCon.gtIconVal;
        emitAttr attr   = emitActualTypeSize(childNode->TypeGet());
        emitAttr size   = EA_SIZE(attr);
#ifdef _TARGET_ARM_
        insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
#endif

        switch (parentNode->OperGet())
        {
            default:
                return false;

            case GT_ADD:
            case GT_SUB:
#ifdef _TARGET_ARM64_
            case GT_CMPXCHG:
            case GT_LOCKADD:
            case GT_XADD:
                return emitter::emitIns_valid_imm_for_add(immVal, size);
#elif defined(_TARGET_ARM_)
                return emitter::emitIns_valid_imm_for_add(immVal, flags);
#endif
                break;

#ifdef _TARGET_ARM64_
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
                return emitter::emitIns_valid_imm_for_cmp(immVal, size);
                break;
            case GT_AND:
            case GT_OR:
            case GT_XOR:
            case GT_TEST_EQ:
            case GT_TEST_NE:
                return emitter::emitIns_valid_imm_for_alu(immVal, size);
                break;
            case GT_JCMP:
                assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal));
                return true;
                break;
#elif defined(_TARGET_ARM_)
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
            case GT_CMP:
            case GT_AND:
            case GT_OR:
            case GT_XOR:
                return emitter::emitIns_valid_imm_for_alu(immVal);
                break;
#endif // _TARGET_ARM_

#ifdef _TARGET_ARM64_
            case GT_STORE_LCL_VAR:
                if (immVal == 0)
                    return true;
                break;
#endif
        }
    }

    return false;
}
Example #18
0
//------------------------------------------------------------------------
// TreeNodeInfoInitIndir: Specify register requirements for address expression
//                       of an indirection operation.
//
// Arguments:
//    indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node
//
void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree)
{
    assert(indirTree->OperIsIndir());
    // If this is the rhs of a block copy (i.e. non-enregisterable struct),
    // it has no register requirements.
    if (indirTree->TypeGet() == TYP_STRUCT)
    {
        return;
    }

    GenTreePtr    addr = indirTree->gtGetOp1();
    TreeNodeInfo* info = &(indirTree->gtLsraInfo);

    GenTreePtr base  = nullptr;
    GenTreePtr index = nullptr;
    unsigned   cns   = 0;
    unsigned   mul;
    bool       rev;
    bool       modifiedSources = false;
    bool       makeContained   = true;

    if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
    {
        GenTreeAddrMode* lea = addr->AsAddrMode();
        base                 = lea->Base();
        index                = lea->Index();
        cns                  = lea->gtOffset;

#ifdef _TARGET_ARM_
        // ARM floating-point load/store doesn't support a form similar to integer
        // ldr Rdst, [Rbase + Roffset] with offset in a register. The only supported
        // form is vldr Rdst, [Rbase + imm] with a more limited constraint on the imm.
        if (lea->HasIndex() || !emitter::emitIns_valid_imm_for_vldst_offset(cns))
        {
            if (indirTree->OperGet() == GT_STOREIND)
            {
                if (varTypeIsFloating(indirTree->AsStoreInd()->Data()))
                {
                    makeContained = false;
                }
            }
            else if (indirTree->OperGet() == GT_IND)
            {
                if (varTypeIsFloating(indirTree))
                {
                    makeContained = false;
                }
            }
        }
#endif

        if (makeContained)
        {
            m_lsra->clearOperandCounts(addr);
            addr->SetContained();
            // The srcCount is decremented because addr is now "contained",
            // then we account for the base and index below, if they are non-null.
            info->srcCount--;
        }
    }
    else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
             !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
    {
        // An addressing mode will be constructed that may cause some
        // nodes to not need a register, and cause others' lifetimes to be extended
        // to the GT_IND or even its parent if it's an assignment

        assert(base != addr);
        m_lsra->clearOperandCounts(addr);
        addr->SetContained();

        // Traverse the computation below GT_IND to find the operands
        // for the addressing mode, marking the various constants and
        // intermediate results as not consuming/producing.
        // If the traversal were more complex, we might consider using
        // a traversal function, but the addressing mode is only made
        // up of simple arithmetic operators, and the code generator
        // only traverses one leg of each node.

        bool       foundBase  = (base == nullptr);
        bool       foundIndex = (index == nullptr);
        GenTreePtr nextChild  = nullptr;
        for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
        {
            nextChild      = nullptr;
            GenTreePtr op1 = child->gtOp.gtOp1;
            GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;

            if (op1 == base)
            {
                foundBase = true;
            }
            else if (op1 == index)
            {
                foundIndex = true;
            }
            else
            {
                m_lsra->clearOperandCounts(op1);
                op1->SetContained();
                if (!op1->OperIsLeaf())
                {
                    nextChild = op1;
                }
            }

            if (op2 != nullptr)
            {
                if (op2 == base)
                {
                    foundBase = true;
                }
                else if (op2 == index)
                {
                    foundIndex = true;
                }
                else
                {
                    m_lsra->clearOperandCounts(op2);
                    op2->SetContained();
                    if (!op2->OperIsLeaf())
                    {
                        assert(nextChild == nullptr);
                        nextChild = op2;
                    }
                }
            }
        }
        assert(foundBase && foundIndex);
        info->srcCount--; // it gets incremented below.
    }
    else if (addr->gtOper == GT_ARR_ELEM)
    {
        // The GT_ARR_ELEM consumes all the indices and produces the offset.
        // The array object lives until the mem access.
        // We also consume the target register to which the address is
        // computed

        info->srcCount++;
        assert(addr->gtLsraInfo.srcCount >= 2);
        addr->gtLsraInfo.srcCount -= 1;
    }
    else
    {
        // it is nothing but a plain indir
        info->srcCount--; // base gets added in below
        base = addr;
    }

    if (!makeContained)
    {
        return;
    }

    if (base != nullptr)
    {
        info->srcCount++;
    }
    if (index != nullptr && !modifiedSources)
    {
        info->srcCount++;
    }

    // On ARM we may need a single internal register
    // (when both conditions are true then we still only need a single internal register)
    if ((index != nullptr) && (cns != 0))
    {
        // ARM does not support both Index and offset so we need an internal register
        info->internalIntCount = 1;
    }
    else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
    {
        // This offset can't be contained in the ldr/str instruction, so we need an internal register
        info->internalIntCount = 1;
    }
}
Example #19
0
//------------------------------------------------------------------------
// BuildNode: Build the RefPositions for for a node
//
// Arguments:
//    treeNode - the node of interest
//
// Return Value:
//    The number of sources consumed by this node.
//
// Notes:
// Preconditions:
//    LSRA Has been initialized.
//
// Postconditions:
//    RefPositions have been built for all the register defs and uses required
//    for this node.
//
int LinearScan::BuildNode(GenTree* tree)
{
    assert(!tree->isContained());
    Interval* prefSrcInterval = nullptr;
    int       srcCount;
    int       dstCount      = 0;
    regMaskTP dstCandidates = RBM_NONE;
    regMaskTP killMask      = RBM_NONE;
    bool      isLocalDefUse = false;

    // Reset the build-related members of LinearScan.
    clearBuildState();

    RegisterType registerType = TypeGet(tree);

    // Set the default dstCount. This may be modified below.
    if (tree->IsValue())
    {
        dstCount = 1;
        if (tree->IsUnusedValue())
        {
            isLocalDefUse = true;
        }
    }
    else
    {
        dstCount = 0;
    }

    switch (tree->OperGet())
    {
        default:
            srcCount = BuildSimple(tree);
            break;

        case GT_LCL_VAR:
        case GT_LCL_FLD:
        {
            // We handle tracked variables differently from non-tracked ones.  If it is tracked,
            // we will simply add a use of the tracked variable at its parent/consumer.
            // Otherwise, for a use we need to actually add the appropriate references for loading
            // or storing the variable.
            //
            // A tracked variable won't actually get used until the appropriate ancestor tree node
            // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
            // to a call or an orphaned dead node.
            //
            LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
            if (isCandidateVar(varDsc))
            {
                INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1));
                return 0;
            }
            srcCount = 0;
#ifdef FEATURE_SIMD
            // Need an additional register to read upper 4 bytes of Vector3.
            if (tree->TypeGet() == TYP_SIMD12)
            {
                // We need an internal register different from targetReg in which 'tree' produces its result
                // because both targetReg and internal reg will be in use at the same time.
                buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
                setInternalRegsDelayFree = true;
                buildInternalRegisterUses();
            }
#endif
            BuildDef(tree);
        }
        break;

        case GT_STORE_LCL_FLD:
        case GT_STORE_LCL_VAR:
            srcCount = 1;
            assert(dstCount == 0);
            srcCount = BuildStoreLoc(tree->AsLclVarCommon());
            break;

        case GT_FIELD_LIST:
            // These should always be contained. We don't correctly allocate or
            // generate code for a non-contained GT_FIELD_LIST.
            noway_assert(!"Non-contained GT_FIELD_LIST");
            srcCount = 0;
            break;

        case GT_LIST:
        case GT_ARGPLACE:
        case GT_NO_OP:
        case GT_START_NONGC:
        case GT_PROF_HOOK:
            srcCount = 0;
            assert(dstCount == 0);
            break;

        case GT_START_PREEMPTGC:
            // This kills GC refs in callee save regs
            srcCount = 0;
            assert(dstCount == 0);
            BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE);
            break;

        case GT_CNS_DBL:
        {
            GenTreeDblCon* dblConst   = tree->AsDblCon();
            double         constValue = dblConst->gtDblCon.gtDconVal;

            if (emitter::emitIns_valid_imm_for_fmov(constValue))
            {
                // Directly encode constant to instructions.
            }
            else
            {
                // Reserve int to load constant from memory (IF_LARGELDC)
                buildInternalIntRegisterDefForNode(tree);
                buildInternalRegisterUses();
            }
        }
            __fallthrough;

        case GT_CNS_INT:
        {
            srcCount = 0;
            assert(dstCount == 1);
            RefPosition* def               = BuildDef(tree);
            def->getInterval()->isConstant = true;
        }
        break;

        case GT_BOX:
        case GT_COMMA:
        case GT_QMARK:
        case GT_COLON:
            srcCount = 0;
            assert(dstCount == 0);
            unreached();
            break;

        case GT_RETURN:
            srcCount = BuildReturn(tree);
            break;

        case GT_RETFILT:
            assert(dstCount == 0);
            if (tree->TypeGet() == TYP_VOID)
            {
                srcCount = 0;
            }
            else
            {
                assert(tree->TypeGet() == TYP_INT);
                srcCount = 1;
                BuildUse(tree->gtGetOp1(), RBM_INTRET);
            }
            break;

        case GT_NOP:
            // A GT_NOP is either a passthrough (if it is void, or if it has
            // a child), but must be considered to produce a dummy value if it
            // has a type but no child.
            srcCount = 0;
            if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr)
            {
                assert(dstCount == 1);
                BuildDef(tree);
            }
            else
            {
                assert(dstCount == 0);
            }
            break;

        case GT_JTRUE:
            srcCount = 0;
            assert(dstCount == 0);
            break;

        case GT_JMP:
            srcCount = 0;
            assert(dstCount == 0);
            break;

        case GT_SWITCH:
            // This should never occur since switch nodes must not be visible at this
            // point in the JIT.
            srcCount = 0;
            noway_assert(!"Switch must be lowered at this point");
            break;

        case GT_JMPTABLE:
            srcCount = 0;
            assert(dstCount == 1);
            BuildDef(tree);
            break;

        case GT_SWITCH_TABLE:
            buildInternalIntRegisterDefForNode(tree);
            srcCount = BuildBinaryUses(tree->AsOp());
            assert(dstCount == 0);
            break;

        case GT_ASG:
            noway_assert(!"We should never hit any assignment operator in lowering");
            srcCount = 0;
            break;

        case GT_ADD:
        case GT_SUB:
            if (varTypeIsFloating(tree->TypeGet()))
            {
                // overflow operations aren't supported on float/double types.
                assert(!tree->gtOverflow());

                // No implicit conversions at this stage as the expectation is that
                // everything is made explicit by adding casts.
                assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet());
            }

            __fallthrough;

        case GT_AND:
        case GT_OR:
        case GT_XOR:
        case GT_LSH:
        case GT_RSH:
        case GT_RSZ:
        case GT_ROR:
            srcCount = BuildBinaryUses(tree->AsOp());
            assert(dstCount == 1);
            BuildDef(tree);
            break;

        case GT_RETURNTRAP:
            // this just turns into a compare of its child with an int
            // + a conditional call
            BuildUse(tree->gtGetOp1());
            srcCount = 1;
            assert(dstCount == 0);
            killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
            BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
            break;

        case GT_MOD:
        case GT_UMOD:
            NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
            assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
            srcCount = 0;
            break;

        case GT_MUL:
            if (tree->gtOverflow())
            {
                // Need a register different from target reg to check for overflow.
                buildInternalIntRegisterDefForNode(tree);
                setInternalRegsDelayFree = true;
            }
            __fallthrough;

        case GT_DIV:
        case GT_MULHI:
        case GT_UDIV:
        {
            srcCount = BuildBinaryUses(tree->AsOp());
            buildInternalRegisterUses();
            assert(dstCount == 1);
            BuildDef(tree);
        }
        break;

        case GT_INTRINSIC:
        {
            noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) ||
                         (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Ceiling) ||
                         (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Floor) ||
                         (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) ||
                         (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt));

            // Both operand and its result must be of the same floating point type.
            GenTree* op1 = tree->gtGetOp1();
            assert(varTypeIsFloating(op1));
            assert(op1->TypeGet() == tree->TypeGet());

            BuildUse(op1);
            srcCount = 1;
            assert(dstCount == 1);
            BuildDef(tree);
        }
        break;

#ifdef FEATURE_SIMD
        case GT_SIMD:
            srcCount = BuildSIMD(tree->AsSIMD());
            break;
#endif // FEATURE_SIMD

#ifdef FEATURE_HW_INTRINSICS
        case GT_HWIntrinsic:
            srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic());
            break;
#endif // FEATURE_HW_INTRINSICS

        case GT_CAST:
            assert(dstCount == 1);
            srcCount = BuildCast(tree->AsCast());
            break;

        case GT_NEG:
        case GT_NOT:
            BuildUse(tree->gtGetOp1());
            srcCount = 1;
            assert(dstCount == 1);
            BuildDef(tree);
            break;

        case GT_EQ:
        case GT_NE:
        case GT_LT:
        case GT_LE:
        case GT_GE:
        case GT_GT:
        case GT_TEST_EQ:
        case GT_TEST_NE:
        case GT_JCMP:
            srcCount = BuildCmp(tree);
            break;

        case GT_CKFINITE:
            srcCount = 1;
            assert(dstCount == 1);
            buildInternalIntRegisterDefForNode(tree);
            BuildUse(tree->gtGetOp1());
            BuildDef(tree);
            buildInternalRegisterUses();
            break;

        case GT_CMPXCHG:
        {
            GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg();
            srcCount                    = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3;
            assert(dstCount == 1);

            if (!compiler->compSupports(InstructionSet_Atomics))
            {
                // For ARMv8 exclusives requires a single internal register
                buildInternalIntRegisterDefForNode(tree);
            }

            // For ARMv8 exclusives the lifetime of the addr and data must be extended because
            // it may be used used multiple during retries

            // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent
            // them being reused as the target register which must be destroyed early

            RefPosition* locationUse = BuildUse(tree->gtCmpXchg.gtOpLocation);
            setDelayFree(locationUse);
            RefPosition* valueUse = BuildUse(tree->gtCmpXchg.gtOpValue);
            setDelayFree(valueUse);
            if (!cmpXchgNode->gtOpComparand->isContained())
            {
                RefPosition* comparandUse = BuildUse(tree->gtCmpXchg.gtOpComparand);

                // For ARMv8 exclusives the lifetime of the comparand must be extended because
                // it may be used used multiple during retries
                if (!compiler->compSupports(InstructionSet_Atomics))
                {
                    setDelayFree(comparandUse);
                }
            }

            // Internals may not collide with target
            setInternalRegsDelayFree = true;
            buildInternalRegisterUses();
            BuildDef(tree);
        }
        break;

        case GT_LOCKADD:
        case GT_XADD:
        case GT_XCHG:
        {
            assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
            srcCount = tree->gtGetOp2()->isContained() ? 1 : 2;

            if (!compiler->compSupports(InstructionSet_Atomics))
            {
                // GT_XCHG requires a single internal register; the others require two.
                buildInternalIntRegisterDefForNode(tree);
                if (tree->OperGet() != GT_XCHG)
                {
                    buildInternalIntRegisterDefForNode(tree);
                }
            }

            assert(!tree->gtGetOp1()->isContained());
            RefPosition* op1Use = BuildUse(tree->gtGetOp1());
            RefPosition* op2Use = nullptr;
            if (!tree->gtGetOp2()->isContained())
            {
                op2Use = BuildUse(tree->gtGetOp2());
            }

            // For ARMv8 exclusives the lifetime of the addr and data must be extended because
            // it may be used used multiple during retries
            if (!compiler->compSupports(InstructionSet_Atomics))
            {
                // Internals may not collide with target
                if (dstCount == 1)
                {
                    setDelayFree(op1Use);
                    if (op2Use != nullptr)
                    {
                        setDelayFree(op2Use);
                    }
                    setInternalRegsDelayFree = true;
                }
                buildInternalRegisterUses();
            }
            if (dstCount == 1)
            {
                BuildDef(tree);
            }
        }
        break;

#if FEATURE_ARG_SPLIT
        case GT_PUTARG_SPLIT:
            srcCount = BuildPutArgSplit(tree->AsPutArgSplit());
            dstCount = tree->AsPutArgSplit()->gtNumRegs;
            break;
#endif // FEATURE _SPLIT_ARG

        case GT_PUTARG_STK:
            srcCount = BuildPutArgStk(tree->AsPutArgStk());
            break;

        case GT_PUTARG_REG:
            srcCount = BuildPutArgReg(tree->AsUnOp());
            break;

        case GT_CALL:
            srcCount = BuildCall(tree->AsCall());
            if (tree->AsCall()->HasMultiRegRetVal())
            {
                dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
            }
            break;

        case GT_ADDR:
        {
            // For a GT_ADDR, the child node should not be evaluated into a register
            GenTree* child = tree->gtGetOp1();
            assert(!isCandidateLocalRef(child));
            assert(child->isContained());
            assert(dstCount == 1);
            srcCount = 0;
            BuildDef(tree);
        }
        break;

        case GT_BLK:
        case GT_DYN_BLK:
            // These should all be eliminated prior to Lowering.
            assert(!"Non-store block node in Lowering");
            srcCount = 0;
            break;

        case GT_STORE_BLK:
        case GT_STORE_OBJ:
        case GT_STORE_DYN_BLK:
            srcCount = BuildBlockStore(tree->AsBlk());
            break;

        case GT_INIT_VAL:
            // Always a passthrough of its child's value.
            assert(!"INIT_VAL should always be contained");
            srcCount = 0;
            break;

        case GT_LCLHEAP:
        {
            assert(dstCount == 1);

            // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
            // Here '-' means don't care.
            //
            //  Size?                   Init Memory?    # temp regs
            //   0                          -               0
            //   const and <=6 ptr words    -               0
            //   const and <PageSize        No              0
            //   >6 ptr words               Yes             0
            //   Non-const                  Yes             0
            //   Non-const                  No              2
            //

            GenTree* size = tree->gtGetOp1();
            if (size->IsCnsIntOrI())
            {
                assert(size->isContained());
                srcCount = 0;

                size_t sizeVal = size->gtIntCon.gtIconVal;

                if (sizeVal != 0)
                {
                    // Compute the amount of memory to properly STACK_ALIGN.
                    // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
                    // This should also help in debugging as we can examine the original size specified with
                    // localloc.
                    sizeVal         = AlignUp(sizeVal, STACK_ALIGN);
                    size_t stpCount = sizeVal / (REGSIZE_BYTES * 2);

                    // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc)
                    //
                    if (stpCount <= 4)
                    {
                        // Need no internal registers
                    }
                    else if (!compiler->info.compInitMem)
                    {
                        // No need to initialize allocated stack space.
                        if (sizeVal < compiler->eeGetPageSize())
                        {
                            // Need no internal registers
                        }
                        else
                        {
                            // We need two registers: regCnt and RegTmp
                            buildInternalIntRegisterDefForNode(tree);
                            buildInternalIntRegisterDefForNode(tree);
                        }
                    }
                }
            }
            else
            {
                srcCount = 1;
                if (!compiler->info.compInitMem)
                {
                    buildInternalIntRegisterDefForNode(tree);
                    buildInternalIntRegisterDefForNode(tree);
                }
            }

            if (!size->isContained())
            {
                BuildUse(size);
            }
            buildInternalRegisterUses();
            BuildDef(tree);
        }
        break;

        case GT_ARR_BOUNDS_CHECK:
#ifdef FEATURE_SIMD
        case GT_SIMD_CHK:
#endif // FEATURE_SIMD
        {
            GenTreeBoundsChk* node = tree->AsBoundsChk();
            // Consumes arrLen & index - has no result
            assert(dstCount == 0);

            GenTree* intCns = nullptr;
            GenTree* other  = nullptr;
            srcCount        = BuildOperandUses(tree->AsBoundsChk()->gtIndex);
            srcCount += BuildOperandUses(tree->AsBoundsChk()->gtArrLen);
        }
        break;

        case GT_ARR_ELEM:
            // These must have been lowered to GT_ARR_INDEX
            noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
            srcCount = 0;
            assert(dstCount == 0);
            break;

        case GT_ARR_INDEX:
        {
            srcCount = 2;
            assert(dstCount == 1);
            buildInternalIntRegisterDefForNode(tree);
            setInternalRegsDelayFree = true;

            // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
            // times while the result is being computed.
            RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj());
            setDelayFree(arrObjUse);
            BuildUse(tree->AsArrIndex()->IndexExpr());
            buildInternalRegisterUses();
            BuildDef(tree);
        }
        break;

        case GT_ARR_OFFSET:
            // This consumes the offset, if any, the arrObj and the effective index,
            // and produces the flattened offset for this dimension.
            srcCount = 2;
            if (!tree->gtArrOffs.gtOffset->isContained())
            {
                BuildUse(tree->AsArrOffs()->gtOffset);
                srcCount++;
            }
            BuildUse(tree->AsArrOffs()->gtIndex);
            BuildUse(tree->AsArrOffs()->gtArrObj);
            assert(dstCount == 1);
            buildInternalIntRegisterDefForNode(tree);
            buildInternalRegisterUses();
            BuildDef(tree);
            break;

        case GT_LEA:
        {
            GenTreeAddrMode* lea = tree->AsAddrMode();

            GenTree* base  = lea->Base();
            GenTree* index = lea->Index();
            int      cns   = lea->Offset();

            // This LEA is instantiating an address, so we set up the srcCount here.
            srcCount = 0;
            if (base != nullptr)
            {
                srcCount++;
                BuildUse(base);
            }
            if (index != nullptr)
            {
                srcCount++;
                BuildUse(index);
            }
            assert(dstCount == 1);

            // On ARM64 we may need a single internal register
            // (when both conditions are true then we still only need a single internal register)
            if ((index != nullptr) && (cns != 0))
            {
                // ARM64 does not support both Index and offset so we need an internal register
                buildInternalIntRegisterDefForNode(tree);
            }
            else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
            {
                // This offset can't be contained in the add instruction, so we need an internal register
                buildInternalIntRegisterDefForNode(tree);
            }
            buildInternalRegisterUses();
            BuildDef(tree);
        }
        break;

        case GT_STOREIND:
        {
            assert(dstCount == 0);

            if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree))
            {
                srcCount = BuildGCWriteBarrier(tree);
                break;
            }

            srcCount = BuildIndir(tree->AsIndir());
            if (!tree->gtGetOp2()->isContained())
            {
                BuildUse(tree->gtGetOp2());
                srcCount++;
            }
        }
        break;

        case GT_NULLCHECK:
            // Unlike ARM, ARM64 implements NULLCHECK as a load to REG_ZR, so no internal register
            // is required, and it is not a localDefUse.
            assert(dstCount == 0);
            assert(!tree->gtGetOp1()->isContained());
            BuildUse(tree->gtGetOp1());
            srcCount = 1;
            break;

        case GT_IND:
            assert(dstCount == 1);
            srcCount = BuildIndir(tree->AsIndir());
            break;

        case GT_CATCH_ARG:
            srcCount = 0;
            assert(dstCount == 1);
            BuildDef(tree, RBM_EXCEPTION_OBJECT);
            break;

        case GT_CLS_VAR:
            srcCount = 0;
            // GT_CLS_VAR, by the time we reach the backend, must always
            // be a pure use.
            // It will produce a result of the type of the
            // node, and use an internal register for the address.

            assert(dstCount == 1);
            assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0);
            buildInternalIntRegisterDefForNode(tree);
            buildInternalRegisterUses();
            BuildDef(tree);
            break;

        case GT_INDEX_ADDR:
            assert(dstCount == 1);
            srcCount = BuildBinaryUses(tree->AsOp());
            buildInternalIntRegisterDefForNode(tree);
            buildInternalRegisterUses();
            BuildDef(tree);
            break;

    } // end switch (tree->OperGet())

    if (tree->IsUnusedValue() && (dstCount != 0))
    {
        isLocalDefUse = true;
    }
    // We need to be sure that we've set srcCount and dstCount appropriately
    assert((dstCount < 2) || tree->IsMultiRegCall());
    assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
    assert(!tree->IsUnusedValue() || (dstCount != 0));
    assert(dstCount == tree->GetRegisterDstCount());
    INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount));
    return srcCount;
}
Example #20
0
//------------------------------------------------------------------------
// BuildSIMD: Set the NodeInfo for a GT_SIMD tree.
//
// Arguments:
//    tree       - The GT_SIMD node of interest
//
// Return Value:
//    The number of sources consumed by this node.
//
int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
{
    int srcCount = 0;
    // Only SIMDIntrinsicInit can be contained
    if (simdTree->isContained())
    {
        assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit);
    }
    int dstCount = simdTree->IsValue() ? 1 : 0;
    assert(dstCount == 1);

    bool buildUses = true;

    GenTree* op1 = simdTree->gtGetOp1();
    GenTree* op2 = simdTree->gtGetOp2();

    switch (simdTree->gtSIMDIntrinsicID)
    {
        case SIMDIntrinsicInit:
        case SIMDIntrinsicCast:
        case SIMDIntrinsicSqrt:
        case SIMDIntrinsicAbs:
        case SIMDIntrinsicConvertToSingle:
        case SIMDIntrinsicConvertToInt32:
        case SIMDIntrinsicConvertToDouble:
        case SIMDIntrinsicConvertToInt64:
        case SIMDIntrinsicWidenLo:
        case SIMDIntrinsicWidenHi:
            // No special handling required.
            break;

        case SIMDIntrinsicGetItem:
        {
            op1 = simdTree->gtGetOp1();
            op2 = simdTree->gtGetOp2();

            // We have an object and an index, either of which may be contained.
            bool setOp2DelayFree = false;
            if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal()))
            {
                // If the index is not a constant and the object is not contained or is a local
                // we will need a general purpose register to calculate the address
                // internal register must not clobber input index
                // TODO-Cleanup: An internal register will never clobber a source; this code actually
                // ensures that the index (op2) doesn't interfere with the target.
                buildInternalIntRegisterDefForNode(simdTree);
                setOp2DelayFree = true;
            }
            srcCount += BuildOperandUses(op1);
            if (!op2->isContained())
            {
                RefPosition* op2Use = BuildUse(op2);
                if (setOp2DelayFree)
                {
                    setDelayFree(op2Use);
                }
                srcCount++;
            }

            if (!op2->IsCnsIntOrI() && (!op1->isContained()))
            {
                // If vector is not already in memory (contained) and the index is not a constant,
                // we will use the SIMD temp location to store the vector.
                compiler->getSIMDInitTempVarNum();
            }
            buildUses = false;
        }
        break;

        case SIMDIntrinsicAdd:
        case SIMDIntrinsicSub:
        case SIMDIntrinsicMul:
        case SIMDIntrinsicDiv:
        case SIMDIntrinsicBitwiseAnd:
        case SIMDIntrinsicBitwiseAndNot:
        case SIMDIntrinsicBitwiseOr:
        case SIMDIntrinsicBitwiseXor:
        case SIMDIntrinsicMin:
        case SIMDIntrinsicMax:
        case SIMDIntrinsicEqual:
        case SIMDIntrinsicLessThan:
        case SIMDIntrinsicGreaterThan:
        case SIMDIntrinsicLessThanOrEqual:
        case SIMDIntrinsicGreaterThanOrEqual:
            // No special handling required.
            break;

        case SIMDIntrinsicSetX:
        case SIMDIntrinsicSetY:
        case SIMDIntrinsicSetZ:
        case SIMDIntrinsicSetW:
        case SIMDIntrinsicNarrow:
        {
            // Op1 will write to dst before Op2 is free
            BuildUse(op1);
            RefPosition* op2Use = BuildUse(op2);
            setDelayFree(op2Use);
            srcCount  = 2;
            buildUses = false;
            break;
        }

        case SIMDIntrinsicInitN:
        {
            var_types baseType = simdTree->gtSIMDBaseType;
            srcCount           = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
            if (varTypeIsFloating(simdTree->gtSIMDBaseType))
            {
                // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
                buildInternalFloatRegisterDefForNode(simdTree);
            }

            int initCount = 0;
            for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2())
            {
                assert(list->OperGet() == GT_LIST);
                GenTree* listItem = list->gtGetOp1();
                assert(listItem->TypeGet() == baseType);
                assert(!listItem->isContained());
                BuildUse(listItem);
                initCount++;
            }
            assert(initCount == srcCount);
            buildUses = false;

            break;
        }

        case SIMDIntrinsicInitArray:
            // We have an array and an index, which may be contained.
            break;

        case SIMDIntrinsicOpEquality:
        case SIMDIntrinsicOpInEquality:
            buildInternalFloatRegisterDefForNode(simdTree);
            break;

        case SIMDIntrinsicDotProduct:
            buildInternalFloatRegisterDefForNode(simdTree);
            break;

        case SIMDIntrinsicSelect:
            // TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB
            // bsl target register must be VC.  Reserve a temp in case we need to shuffle things.
            // This will require a different approach, as GenTreeSIMD has only two operands.
            assert(!"SIMDIntrinsicSelect not yet supported");
            buildInternalFloatRegisterDefForNode(simdTree);
            break;

        case SIMDIntrinsicInitArrayX:
        case SIMDIntrinsicInitFixed:
        case SIMDIntrinsicCopyToArray:
        case SIMDIntrinsicCopyToArrayX:
        case SIMDIntrinsicNone:
        case SIMDIntrinsicGetCount:
        case SIMDIntrinsicGetOne:
        case SIMDIntrinsicGetZero:
        case SIMDIntrinsicGetAllOnes:
        case SIMDIntrinsicGetX:
        case SIMDIntrinsicGetY:
        case SIMDIntrinsicGetZ:
        case SIMDIntrinsicGetW:
        case SIMDIntrinsicInstEquals:
        case SIMDIntrinsicHWAccel:
        case SIMDIntrinsicWiden:
        case SIMDIntrinsicInvalid:
            assert(!"These intrinsics should not be seen during register allocation");
            __fallthrough;

        default:
            noway_assert(!"Unimplemented SIMD node type.");
            unreached();
    }
    if (buildUses)
    {
        assert(!op1->OperIs(GT_LIST));
        assert(srcCount == 0);
        srcCount = BuildOperandUses(op1);
        if ((op2 != nullptr) && !op2->isContained())
        {
            srcCount += BuildOperandUses(op2);
        }
    }
    assert(internalCount <= MaxInternalCount);
    buildInternalRegisterUses();
    if (dstCount == 1)
    {
        BuildDef(simdTree);
    }
    else
    {
        assert(dstCount == 0);
    }
    return srcCount;
}
Example #21
0
//------------------------------------------------------------------------
// IsContainableImmed: Is an immediate encodable in-place?
//
// Return Value:
//    True if the immediate can be folded into an instruction,
//    for example small enough and non-relocatable.
//
// TODO-CQ: we can contain a floating point 0.0 constant in a compare instruction
// (vcmp on arm, fcmp on arm64).
//
bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
{
    if (!varTypeIsFloating(parentNode->TypeGet()))
    {
        // Make sure we have an actual immediate
        if (!childNode->IsCnsIntOrI())
            return false;
        if (childNode->gtIntCon.ImmedValNeedsReloc(comp))
            return false;

        // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type.
        target_ssize_t immVal = (target_ssize_t)childNode->gtIntCon.gtIconVal;
        emitAttr       attr   = emitActualTypeSize(childNode->TypeGet());
        emitAttr       size   = EA_SIZE(attr);
#ifdef _TARGET_ARM_
        insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
#endif

        switch (parentNode->OperGet())
        {
            case GT_ADD:
            case GT_SUB:
#ifdef _TARGET_ARM64_
            case GT_CMPXCHG:
            case GT_LOCKADD:
            case GT_XADD:
                return comp->compSupports(InstructionSet_Atomics) ? false
                                                                  : emitter::emitIns_valid_imm_for_add(immVal, size);
#elif defined(_TARGET_ARM_)
                return emitter::emitIns_valid_imm_for_add(immVal, flags);
#endif
                break;

#ifdef _TARGET_ARM64_
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
                return emitter::emitIns_valid_imm_for_cmp(immVal, size);
            case GT_AND:
            case GT_OR:
            case GT_XOR:
            case GT_TEST_EQ:
            case GT_TEST_NE:
                return emitter::emitIns_valid_imm_for_alu(immVal, size);
            case GT_JCMP:
                assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal));
                return true;
#elif defined(_TARGET_ARM_)
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
            case GT_CMP:
            case GT_AND:
            case GT_OR:
            case GT_XOR:
                return emitter::emitIns_valid_imm_for_alu(immVal);
#endif // _TARGET_ARM_

#ifdef _TARGET_ARM64_
            case GT_STORE_LCL_FLD:
            case GT_STORE_LCL_VAR:
                if (immVal == 0)
                    return true;
                break;
#endif

            default:
                break;
        }
    }

    return false;
}
Example #22
0
void CodeGen::genLoadFloat(GenTreePtr tree, regNumber reg)
{
    if (tree->IsRegVar())
    {
        // if it has been spilled, unspill it.%
        LclVarDsc * varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
        if (varDsc->lvSpilled)
        {
            UnspillFloat(varDsc);
        }

        inst_RV_RV(ins_FloatCopy(tree->TypeGet()), reg, tree->gtRegNum, tree->TypeGet());
    }
    else
    {
        bool unalignedLoad = false;
        switch (tree->OperGet())
        {
        case GT_IND:
        case GT_CLS_VAR:
            if  (tree->gtFlags & GTF_IND_UNALIGNED)
                unalignedLoad = true;
            break;
        case GT_LCL_FLD:
            // Check for a misalignment on a Floating Point field
            //
            if (varTypeIsFloating(tree->TypeGet()))
            {                
                if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
                {
                    unalignedLoad = true;
                }
            }
            break;
        default:
            break;
        }

        if (unalignedLoad)
        {
            // Make the target addressable
            //
            regMaskTP   addrReg = genMakeAddressable(tree, 0, RegSet::KEEP_REG, true);
            regSet.rsLockUsedReg(addrReg);  // Must prevent regSet.rsGrabReg from choosing an addrReg

            var_types loadType = tree->TypeGet();
            assert(loadType == TYP_DOUBLE || loadType == TYP_FLOAT);

            // Unaligned Floating-Point Loads must be loaded into integer register(s)
            // and then moved over to the Floating-Point register
            regNumber  intRegLo    = regSet.rsGrabReg(RBM_ALLINT);
            regNumber  intRegHi    = REG_NA;
            regMaskTP  tmpLockMask = genRegMask(intRegLo);

            if (loadType == TYP_DOUBLE)
            {
                intRegHi     = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo));
                tmpLockMask |= genRegMask(intRegHi);
            }
            
            regSet.rsLockReg(tmpLockMask);     // Temporarily lock the intRegs 
            tree->gtType = TYP_INT;     // Temporarily change the type to TYP_INT

            inst_RV_TT(ins_Load(TYP_INT), intRegLo, tree);
            regTracker.rsTrackRegTrash(intRegLo);

            if (loadType == TYP_DOUBLE)
            {
                inst_RV_TT(ins_Load(TYP_INT), intRegHi, tree, 4);
                regTracker.rsTrackRegTrash(intRegHi);
            }

            tree->gtType = loadType;    // Change the type back to the floating point type
            regSet.rsUnlockReg(tmpLockMask);   // Unlock the intRegs

            // move the integer register(s) over to the FP register
            //
            if  (loadType == TYP_DOUBLE)
                getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, reg, intRegLo, intRegHi);
            else 
                getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, reg, intRegLo);

            // Free up anything that was tied up by genMakeAddressable
            //
            regSet.rsUnlockUsedReg(addrReg);
            genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
        }
        else
        {
            inst_RV_TT(ins_FloatLoad(tree->TypeGet()), reg, tree);
        }
        if (((tree->OperGet() == GT_CLS_VAR) || (tree->OperGet() == GT_IND)) && 
            (tree->gtFlags & GTF_IND_VOLATILE))
        {
            // Emit a memory barrier instruction after the load 
            instGen_MemoryBarrier();
        }
    }
}
Example #23
0
void CodeGen::genFloatAssign(GenTree *tree)
{
    var_types   type       = tree->TypeGet();
    GenTreePtr  op1        = tree->gtGetOp1();
    GenTreePtr  op2        = tree->gtGetOp2();

    regMaskTP   needRegOp1 = RBM_ALLINT;
    regMaskTP   addrReg    = RBM_NONE;
    bool        volat      = false;        // Is this a volatile store
    bool        unaligned  = false;        // Is this an unaligned store
    regNumber   op2reg     = REG_NA;

#ifdef DEBUGGING_SUPPORT
    unsigned    lclVarNum  = compiler->lvaCount;
    unsigned    lclILoffs  = DUMMY_INIT(0);
#endif

    noway_assert(tree->OperGet() == GT_ASG);

    // Is the target a floating-point local variable?
    //  possibly even an enregistered floating-point local variable?
    //
    switch (op1->gtOper)
    {
        unsigned        varNum;
        LclVarDsc   *   varDsc;

    case GT_LCL_FLD:
        // Check for a misalignment on a Floating Point field
        //
        if (varTypeIsFloating(op1->TypeGet()))
        {                
            if ((op1->gtLclFld.gtLclOffs % emitTypeSize(op1->TypeGet())) != 0)
            {
                unaligned = true;
            }
        }
        break;

    case GT_LCL_VAR:
        varNum = op1->gtLclVarCommon.gtLclNum;
        noway_assert(varNum < compiler->lvaCount);
        varDsc = compiler->lvaTable + varNum;

 #ifdef DEBUGGING_SUPPORT
        // For non-debuggable code, every definition of a lcl-var has
        // to be checked to see if we need to open a new scope for it.
        // Remember the local var info to call siCheckVarScope
        // AFTER code generation of the assignment.
        //
        if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
        {
            lclVarNum = varNum;
            lclILoffs = op1->gtLclVar.gtLclILoffs;
        }
 #endif

        // Dead Store assert (with min opts we may have dead stores)
        //
        noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() ||  !(op1->gtFlags & GTF_VAR_DEATH));

        // Does this variable live in a register?
        //
        if (genMarkLclVar(op1))
        {
             noway_assert(!compiler->opts.compDbgCode);   // We don't enregister any floats with debug codegen

            // Get hold of the target register
            //
            regNumber op1Reg = op1->gtRegVar.gtRegNum;

            // the variable being assigned should be dead in op2
            assert(!varDsc->lvTracked || !VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex));

            // Setup register preferencing, so that we try to target the op1 enregistered variable
            //
            regMaskTP bestMask = genRegMask(op1Reg);
            if (type==TYP_DOUBLE)
            {
                assert((bestMask & RBM_DBL_REGS) != 0);
                bestMask |= genRegMask(REG_NEXT(op1Reg));
            }
            RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestMask);

            // Evaluate op2 into a floating point register 
            //
            genCodeForTreeFloat(op2, &pref);
    
            noway_assert(op2->gtFlags & GTF_REG_VAL);

            // Make sure the value ends up in the right place ...
            // For example if op2 is a call that returns a result 
            // in REG_F0, we will need to do a move instruction here
            //
            if ((op2->gtRegNum != op1Reg) || (op2->TypeGet() != type))
            {
                regMaskTP spillRegs = regSet.rsMaskUsed & genRegMaskFloat(op1Reg, op1->TypeGet());
                if  (spillRegs != 0)
                    regSet.rsSpillRegs(spillRegs);

                assert(type == op1->TypeGet());

                inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op1Reg, op2->gtRegNum, type);
            }
            genUpdateLife(op1);
            goto DONE_ASG;
        }
        break;

    case GT_CLS_VAR:
    case GT_IND:
        // Check for a volatile/unaligned store
        //
        assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));   // Required for GTF_IND_VOLATILE flag to be valid
        if (op1->gtFlags & GTF_IND_VOLATILE)
            volat = true;
        if (op1->gtFlags & GTF_IND_UNALIGNED)
            unaligned = true;
        break;

    default:
        break;
    }

    // Is the value being assigned an enregistered floating-point local variable?
    //
    switch (op2->gtOper)
    {
    case GT_LCL_VAR:

        if  (!genMarkLclVar(op2))
            break;

        __fallthrough;

    case GT_REG_VAR:

        // We must honor the order evalauation in case op1 reassigns our op2 register
        //
        if  (tree->gtFlags & GTF_REVERSE_OPS)
            break;

        // Is there an implicit conversion that we have to insert?
        // Handle this case with the normal cases below.
        //
        if (type != op2->TypeGet())
            break;

        // Make the target addressable
        //
        addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);

        noway_assert(op2->gtFlags & GTF_REG_VAL);
        noway_assert(op2->IsRegVar());
        
        op2reg = op2->gtRegVar.gtRegNum; 
        genUpdateLife(op2);

        goto CHK_VOLAT_UNALIGN;
    default:
        break;
    }

    // Is the op2 (RHS) more complex than op1 (LHS)?
    //
    if  (tree->gtFlags & GTF_REVERSE_OPS)
    {
        regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op1->gtRsvdRegs);
        RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);

        // Generate op2 (RHS) into a floating point register
        //
        genCodeForTreeFloat(op2, &pref);
        regSet.SetUsedRegFloat(op2, true);

        // Make the target addressable
        //
        addrReg = genMakeAddressable(op1,
                                     needRegOp1,
                                     RegSet::KEEP_REG, true);

        genRecoverReg(op2, RBM_ALLFLOAT, RegSet::KEEP_REG);
        noway_assert(op2->gtFlags & GTF_REG_VAL);
        regSet.SetUsedRegFloat(op2, false);
    }
    else
    {
        needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);

        // Make the target addressable
        //
        addrReg = genMakeAddressable(op1,
                                     needRegOp1,
                                     RegSet::KEEP_REG, true);
        

        // Generate the RHS into any floating point register
        genCodeForTreeFloat(op2);

    }
    noway_assert(op2->gtFlags & GTF_REG_VAL);

    op2reg = op2->gtRegNum;

    // Is there an implicit conversion that we have to insert?
    //
    if (type != op2->TypeGet())
    {
        regMaskTP bestMask = genRegMask(op2reg);
        if (type==TYP_DOUBLE)
        {
            if (bestMask & RBM_DBL_REGS)
            {
                bestMask |= genRegMask(REG_NEXT(op2reg));
            }
            else
            {
                bestMask |= genRegMask(REG_PREV(op2reg));
            }
        }
        RegSet::RegisterPreference op2Pref(RBM_ALLFLOAT, bestMask);
        op2reg = regSet.PickRegFloat(type, &op2Pref);

        inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op2reg, op2->gtRegNum, type);
    }

    // Make sure the LHS is still addressable
    //
    addrReg = genKeepAddressable(op1, addrReg);

CHK_VOLAT_UNALIGN:
        
    regSet.rsLockUsedReg(addrReg);  // Must prevent unaligned regSet.rsGrabReg from choosing an addrReg
        
    if (volat)
    {
        // Emit a memory barrier instruction before the store
        instGen_MemoryBarrier();
    }
    if (unaligned)
    {
        var_types storeType = op1->TypeGet();
        assert(storeType == TYP_DOUBLE || storeType == TYP_FLOAT);

        // Unaligned Floating-Point Stores must be done using the integer register(s)
        regNumber  intRegLo    = regSet.rsGrabReg(RBM_ALLINT);
        regNumber  intRegHi    = REG_NA;
        regMaskTP  tmpLockMask = genRegMask(intRegLo);

        if (storeType == TYP_DOUBLE)
        {
            intRegHi     = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo));
            tmpLockMask |= genRegMask(intRegHi);
        }

        // move the FP register over to the integer register(s)
        //
        if  (storeType == TYP_DOUBLE)
        {
            getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegLo, intRegHi, op2reg);
            regTracker.rsTrackRegTrash(intRegHi);
        }
        else 
        {
            getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, intRegLo, op2reg);
        }
        regTracker.rsTrackRegTrash(intRegLo);

        regSet.rsLockReg(tmpLockMask);     // Temporarily lock the intRegs 
        op1->gtType = TYP_INT;      // Temporarily change the type to TYP_INT
        
        inst_TT_RV(ins_Store(TYP_INT), op1, intRegLo);

        if (storeType == TYP_DOUBLE)
        {
            inst_TT_RV(ins_Store(TYP_INT), op1, intRegHi, 4);
        }
        
        op1->gtType = storeType;    // Change the type back to the floating point type
        regSet.rsUnlockReg(tmpLockMask);   // Unlock the intRegs
    }
    else
    {
        // Move the value into the target
        //        
        inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2reg);
    }

    // Free up anything that was tied up by the LHS
    //
    regSet.rsUnlockUsedReg(addrReg);
    genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);

DONE_ASG:

    genUpdateLife(tree);

#ifdef DEBUGGING_SUPPORT
    /* For non-debuggable code, every definition of a lcl-var has
     * to be checked to see if we need to open a new scope for it.
     */
    if (lclVarNum < compiler->lvaCount)
        siCheckVarScope(lclVarNum, lclILoffs);
#endif
}
Example #24
0
//------------------------------------------------------------------------
// genHWIntrinsic: Generates the code for a given hardware intrinsic node.
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic      intrinsicID = node->gtHWIntrinsicId;
    InstructionSet      isa         = Compiler::isaOfHWIntrinsic(intrinsicID);
    HWIntrinsicCategory category    = Compiler::categoryOfHWIntrinsic(intrinsicID);
    HWIntrinsicFlag     flags       = Compiler::flagsOfHWIntrinsic(intrinsicID);
    int                 ival        = Compiler::ivalOfHWIntrinsic(intrinsicID);
    int                 numArgs     = Compiler::numArgsOfHWIntrinsic(node);

    assert((flags & HW_Flag_NoCodeGen) == 0);

    if (genIsTableDrivenHWIntrinsic(category, flags))
    {
        GenTree*  op1        = node->gtGetOp1();
        GenTree*  op2        = node->gtGetOp2();
        regNumber targetReg  = node->gtRegNum;
        var_types targetType = node->TypeGet();
        var_types baseType   = node->gtSIMDBaseType;

        regNumber op1Reg = REG_NA;
        regNumber op2Reg = REG_NA;
        emitter*  emit   = getEmitter();

        assert(numArgs >= 0);
        instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
        assert(ins != INS_invalid);
        emitAttr simdSize = EA_ATTR(node->gtSIMDSize);
        assert(simdSize != 0);

        switch (numArgs)
        {
            case 1:
                genConsumeOperands(node);
                op1Reg = op1->gtRegNum;
                if (category == HW_Category_MemoryLoad)
                {
                    emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0);
                }
                else if (category == HW_Category_SIMDScalar && (flags & HW_Flag_CopyUpperBits) != 0)
                {
                    emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg);
                }
                else if ((ival != -1) && varTypeIsFloating(baseType))
                {
                    emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival);
                }
                else
                {
                    emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg);
                }
                break;

            case 2:
                genConsumeOperands(node);
                op1Reg = op1->gtRegNum;
                op2Reg = op2->gtRegNum;
                if (category == HW_Category_MemoryStore)
                {
                    emit->emitIns_AR_R(ins, simdSize, op2Reg, op1Reg, 0);
                }
                else if ((ival != -1) && varTypeIsFloating(baseType))
                {
                    genHWIntrinsic_R_R_RM_I(node, ins);
                }
                else if (category == HW_Category_MemoryLoad)
                {
                    emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg);
                }
                else if (Compiler::isImmHWIntrinsic(intrinsicID, op2))
                {
                    if (intrinsicID == NI_SSE2_Extract)
                    {
                        // extract instructions return to GP-registers, so it needs int size as the emitsize
                        simdSize = emitTypeSize(TYP_INT);
                    }
                    auto emitSwCase = [&](unsigned i) {
                        emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, (int)i);
                    };

                    if (op2->IsCnsIntOrI())
                    {
                        ssize_t ival = op2->AsIntCon()->IconValue();
                        emitSwCase((unsigned)ival);
                    }
                    else
                    {
                        // We emit a fallback case for the scenario when the imm-op is not a constant. This should
                        // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
                        // can also occur if the consumer calls it directly and just doesn't pass a constant value.
                        regNumber baseReg = node->ExtractTempReg();
                        regNumber offsReg = node->GetSingleTempReg();
                        genHWIntrinsicJumpTableFallback(intrinsicID, op2Reg, baseReg, offsReg, emitSwCase);
                    }
                }
                else
                {
                    genHWIntrinsic_R_R_RM(node, ins);
                }
                break;
            case 3:
            {
                assert(op1->OperIsList());
                assert(op1->gtGetOp2()->OperIsList());
                assert(op1->gtGetOp2()->gtGetOp2()->OperIsList());

                GenTreeArgList* argList = op1->AsArgList();
                op1                     = argList->Current();
                genConsumeRegs(op1);
                op1Reg = op1->gtRegNum;

                argList = argList->Rest();
                op2     = argList->Current();
                genConsumeRegs(op2);
                op2Reg = op2->gtRegNum;

                argList      = argList->Rest();
                GenTree* op3 = argList->Current();
                genConsumeRegs(op3);
                regNumber op3Reg = op3->gtRegNum;

                if (Compiler::isImmHWIntrinsic(intrinsicID, op3))
                {
                    auto emitSwCase = [&](unsigned i) {
                        emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, (int)i);
                    };
                    if (op3->IsCnsIntOrI())
                    {
                        ssize_t ival = op3->AsIntCon()->IconValue();
                        emitSwCase((unsigned)ival);
                    }
                    else
                    {
                        // We emit a fallback case for the scenario when the imm-op is not a constant. This should
                        // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
                        // can also occur if the consumer calls it directly and just doesn't pass a constant value.
                        regNumber baseReg = node->ExtractTempReg();
                        regNumber offsReg = node->GetSingleTempReg();
                        genHWIntrinsicJumpTableFallback(intrinsicID, op3Reg, baseReg, offsReg, emitSwCase);
                    }
                }
                else if (category == HW_Category_MemoryStore)
                {
                    assert(intrinsicID == NI_SSE2_MaskMove);
                    assert(targetReg == REG_NA);

                    // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI
                    if (op3Reg != REG_EDI)
                    {
                        emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg);
                    }
                    emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg);
                }
                else
                {
                    emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg, op3Reg);
                }
                break;
            }

            default:
                unreached();
                break;
        }
        genProduceReg(node);
        return;
    }

    switch (isa)
    {
        case InstructionSet_SSE:
            genSSEIntrinsic(node);
            break;
        case InstructionSet_SSE2:
            genSSE2Intrinsic(node);
            break;
        case InstructionSet_SSE41:
            genSSE41Intrinsic(node);
            break;
        case InstructionSet_SSE42:
            genSSE42Intrinsic(node);
            break;
        case InstructionSet_AVX:
            genAVXIntrinsic(node);
            break;
        case InstructionSet_AVX2:
            genAVX2Intrinsic(node);
            break;
        case InstructionSet_AES:
            genAESIntrinsic(node);
            break;
        case InstructionSet_BMI1:
            genBMI1Intrinsic(node);
            break;
        case InstructionSet_BMI2:
            genBMI2Intrinsic(node);
            break;
        case InstructionSet_FMA:
            genFMAIntrinsic(node);
            break;
        case InstructionSet_LZCNT:
            genLZCNTIntrinsic(node);
            break;
        case InstructionSet_PCLMULQDQ:
            genPCLMULQDQIntrinsic(node);
            break;
        case InstructionSet_POPCNT:
            genPOPCNTIntrinsic(node);
            break;
        default:
            unreached();
            break;
    }
}