Exemplo n.º 1
0
void CodeGen::genFloatMath(GenTree *tree, RegSet::RegisterPreference *pref)
{
    assert(tree->OperGet() == GT_INTRINSIC);

    GenTreePtr op1 = tree->gtOp.gtOp1;

    // get tree into a register
    genCodeForTreeFloat(op1, pref);

    instruction ins;

    switch (tree->gtIntrinsic.gtIntrinsicId)
    {
    case CORINFO_INTRINSIC_Sin:
        ins = INS_invalid; 
        break;
    case CORINFO_INTRINSIC_Cos:
        ins = INS_invalid; 
        break;
    case CORINFO_INTRINSIC_Sqrt:
        ins = INS_vsqrt; 
        break;
    case CORINFO_INTRINSIC_Abs:
        ins = INS_vabs; 
        break;
    case CORINFO_INTRINSIC_Round:
    {
        regNumber reg = regSet.PickRegFloat(tree->TypeGet(), pref);
        genMarkTreeInReg(tree, reg);
        // convert it to a long and back
        inst_RV_RV(ins_FloatConv(TYP_LONG,tree->TypeGet()), 
                   reg, op1->gtRegNum, tree->TypeGet());
        inst_RV_RV(ins_FloatConv(tree->TypeGet(), TYP_LONG), reg, reg);
        genCodeForTreeFloat_DONE(tree, op1->gtRegNum);
        return;
    }
    break;
    default:
        unreached();
    }

    if (ins != INS_invalid)
    {
        regNumber reg = regSet.PickRegFloat(tree->TypeGet(), pref);
        genMarkTreeInReg(tree, reg);
        inst_RV_RV(ins, reg, op1->gtRegNum, tree->TypeGet());
        // mark register that holds tree
        genCodeForTreeFloat_DONE(tree, reg);
    }
    else
    {
        unreached();
        // If unreached is removed, mark register that holds tree
        // genCodeForTreeFloat_DONE(tree, op1->gtRegNum);
    }
            
    return;            
}
Exemplo n.º 2
0
void Rationalizer::RewriteAssignment(LIR::Use& use)
{
    assert(use.IsInitialized());

    GenTreeOp* assignment = use.Def()->AsOp();
    assert(assignment->OperGet() == GT_ASG);

    GenTree* location = assignment->gtGetOp1();
    GenTree* value    = assignment->gtGetOp2();

    genTreeOps locationOp = location->OperGet();
    switch (locationOp)
    {
        case GT_LCL_VAR:
        case GT_LCL_FLD:
        case GT_REG_VAR:
        case GT_PHI_ARG:
            RewriteAssignmentIntoStoreLclCore(assignment, location, value, locationOp);
            BlockRange().Remove(location);
            break;

        case GT_IND:
        {
            GenTreeStoreInd* store =
                new (comp, GT_STOREIND) GenTreeStoreInd(location->TypeGet(), location->gtGetOp1(), value);

            copyFlags(store, assignment, GTF_ALL_EFFECT);
            copyFlags(store, location, GTF_IND_FLAGS);

            if (assignment->IsReverseOp())
            {
                store->gtFlags |= GTF_REVERSE_OPS;
            }

            // TODO: JIT dump

            // Remove the GT_IND node and replace the assignment node with the store
            BlockRange().Remove(location);
            BlockRange().InsertBefore(assignment, store);
            use.ReplaceWith(comp, store);
            BlockRange().Remove(assignment);
        }
        break;

        case GT_CLS_VAR:
        {
            location->SetOper(GT_CLS_VAR_ADDR);
            location->gtType = TYP_BYREF;

            assignment->SetOper(GT_STOREIND);

            // TODO: JIT dump
        }
        break;

        default:
            unreached();
            break;
    }
}
Exemplo n.º 3
0
// return op that is the store equivalent of the given load opcode
genTreeOps storeForm(genTreeOps loadForm)
{
    switch (loadForm)
    {
        case GT_LCL_VAR:
            return GT_STORE_LCL_VAR;
        case GT_LCL_FLD:
            return GT_STORE_LCL_FLD;
        case GT_REG_VAR:
            noway_assert(!"reg vars only supported in classic backend\n");
            unreached();
        default:
            noway_assert(!"not a data load opcode\n");
            unreached();
    }
}
Exemplo n.º 4
0
void LegacyPolicy::SetFailure(InlineObservation obs)
{
    // Expect a valid observation
    assert(InlIsValidObservation(obs));

    switch (m_Decision)
    {
    case InlineDecision::FAILURE:
        // Repeated failure only ok if evaluating a prejit root
        // (since we can't fail fast because we're not inlining)
        // or if inlining and the observation is CALLSITE_TOO_MANY_LOCALS
        // (since we can't fail fast from lvaGrabTemp).
        assert(m_IsPrejitRoot ||
               (obs == InlineObservation::CALLSITE_TOO_MANY_LOCALS));
        break;
    case InlineDecision::UNDECIDED:
    case InlineDecision::CANDIDATE:
        m_Decision = InlineDecision::FAILURE;
        m_Observation = obs;
        break;
    default:
        // SUCCESS, NEVER, or ??
        assert(!"Unexpected m_Decision");
        unreached();
    }
}
Exemplo n.º 5
0
// return op that is the load equivalent of the given addr opcode
genTreeOps loadForm(genTreeOps addrForm)
{
    switch (addrForm)
    {
        case GT_LCL_VAR_ADDR:
            return GT_LCL_VAR;
        case GT_LCL_FLD_ADDR:
            return GT_LCL_FLD;
        default:
            noway_assert(!"not a local address opcode\n");
            unreached();
    }
}
Exemplo n.º 6
0
//--------------------------------------------------------------------------------------------------
// ToGenTree - Convert an "expression" into a gentree node.
//
// Arguments:
//      comp    Compiler instance to allocate trees
//
// Return Values:
//      Returns the gen tree representation for either a constant or a variable or an arrLen operation
//      defined by the "type" member
//
GenTreePtr LC_Expr::ToGenTree(Compiler* comp)
{
    // Convert to GenTree nodes.
    switch (type)
    {
        case Ident:
            return ident.ToGenTree(comp);
        default:
            assert(!"Could not convert LC_Expr to GenTree");
            unreached();
            break;
    }
}
Exemplo n.º 7
0
// LIR helpers
void BasicBlock::MakeLIR(GenTree* firstNode, GenTree* lastNode)
{
#ifdef LEGACY_BACKEND
    unreached();
#else  // !LEGACY_BACKEND
    assert(!IsLIR());
    assert((firstNode == nullptr) == (lastNode == nullptr));
    assert((firstNode == lastNode) || firstNode->Precedes(lastNode));

    m_firstNode = firstNode;
    m_lastNode  = lastNode;
    bbFlags |= BBF_IS_LIR;
#endif // LEGACY_BACKEND
}
Exemplo n.º 8
0
CorInfoInline InlGetCorInfoInlineDecision(InlineDecision d)
{
    switch (d) {
    case InlineDecision::SUCCESS:
        return INLINE_PASS;
    case InlineDecision::FAILURE:
        return INLINE_FAIL;
    case InlineDecision::NEVER:
        return INLINE_NEVER;
    default:
        assert(!"Unexpected InlineDecision");
        unreached();
    }
}
Exemplo n.º 9
0
bool InlDecisionIsDecided(InlineDecision d)
{
    switch (d) {
    case InlineDecision::NEVER:
    case InlineDecision::FAILURE:
    case InlineDecision::SUCCESS:
        return true;
    case InlineDecision::UNDECIDED:
    case InlineDecision::CANDIDATE:
        return false;
    default:
        assert(!"Unexpected InlineDecision");
        unreached();
    }
}
Exemplo n.º 10
0
//------------------------------------------------------------------------
// genAVX2Intrinsic: Generates the code for an AVX2 hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    var_types      baseType    = node->gtSIMDBaseType;
    instruction    ins         = INS_invalid;

    genConsumeOperands(node);

    switch (intrinsicID)
    {
        default:
            unreached();
            break;
    }

    genProduceReg(node);
}
Exemplo n.º 11
0
const char* InlGetDecisionString(InlineDecision d)
{
    switch (d) {
    case InlineDecision::SUCCESS:
        return "success";
    case InlineDecision::FAILURE:
        return "failed this call site";
    case InlineDecision::NEVER:
        return "failed this callee";
    case InlineDecision::CANDIDATE:
        return "candidate";
    case InlineDecision::UNDECIDED:
        return "undecided";
    default:
        assert(!"Unexpected InlineDecision");
        unreached();
    }
}
Exemplo n.º 12
0
// Rewrite GT_OBJ of SIMD Vector as GT_IND(GT_LEA(obj.op1)) of a SIMD type.
//
// Arguments:
//    ppTree      - A pointer-to-a-pointer for the GT_OBJ
//    fgWalkData  - A pointer to tree walk data providing the context
//
// Return Value:
//    None.
//
// TODO-Cleanup: Once SIMD types are plumbed through the frontend, this will no longer
// be required.
//
void Rationalizer::RewriteObj(LIR::Use& use)
{
#ifdef FEATURE_SIMD
    GenTreeObj* obj = use.Def()->AsObj();

// For UNIX struct passing, we can have Obj nodes for arguments.
// For other cases, we should never see a non-SIMD type here.
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
    if (!varTypeIsSIMD(obj))
    {
        return;
    }
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING

    // Should come here only if featureSIMD is enabled
    noway_assert(comp->featureSIMD);

    // We should only call this with a SIMD type.
    noway_assert(varTypeIsSIMD(obj));
    var_types simdType = obj->TypeGet();

    // If the operand of obj is a GT_ADDR(GT_LCL_VAR) and LclVar is known to be a SIMD type,
    // replace obj by GT_LCL_VAR.
    GenTree* srcAddr = obj->gtGetOp1();
    if (srcAddr->OperIsLocalAddr() && comp->isAddrOfSIMDType(srcAddr))
    {
        BlockRange().Remove(obj);

        srcAddr->SetOper(loadForm(srcAddr->OperGet()));
        srcAddr->gtType = simdType;
        use.ReplaceWith(comp, srcAddr);
    }
    else
    {
        obj->SetOper(GT_IND);
        obj->gtType = simdType;
    }
#else
    // we should never reach without feature SIMD
    assert(!"Unexpected obj during rationalization\n");
    unreached();
#endif
}
Exemplo n.º 13
0
/*-----------------------------------------------------------------------------
 * ppu_finish_dt
 *
 * Finishes processing for a PPU data transfer command after the SPU command
 * that handles the other end has completed.
 *---------------------------------------------------------------------------*/
void
ppu_finish_dt(PPU_DT_PARAMS *cmd)
{
  switch (cmd->type) {
    // case PPU_CMD_DT_IN_FRONT:

  case PPU_CMD_DT_IN_BACK:
    ppu_finish_dt_in_back(cmd);
    break;

  case PPU_CMD_DT_OUT_FRONT:
    ppu_finish_dt_out_front(cmd);
    break;

    // case PPU_CMD_DT_OUT_BACK:

  default:
    unreached();
  }
}
Exemplo n.º 14
0
//--------------------------------------------------------------------------------------------------
// ToGenTree - Convert an "identifier" into a gentree node.
//
// Arguments:
//      comp    Compiler instance to allocate trees
//
// Return Values:
//      Returns the gen tree representation for either a constant or a variable or an arrLen operation
//      defined by the "type" member
//
GenTreePtr LC_Ident::ToGenTree(Compiler* comp)
{
    // Convert to GenTree nodes.
    switch (type)
    {
        case Const:
            assert(constant <= INT32_MAX);
            return comp->gtNewIconNode(constant);
        case Var:
            return comp->gtNewLclvNode(constant, comp->lvaTable[constant].lvType);
        case ArrLen:
            return arrLen.ToGenTree(comp);
        case Null:
            return comp->gtNewIconNode(0, TYP_REF);
        default:
            assert(!"Could not convert LC_Ident to GenTree");
            unreached();
            break;
    }
}
Exemplo n.º 15
0
//------------------------------------------------------------------------
// genSSE42Intrinsic: Generates the code for an SSE4.2 hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    GenTree*       op1         = node->gtGetOp1();
    GenTree*       op2         = node->gtGetOp2();
    regNumber      targetReg   = node->gtRegNum;
    assert(targetReg != REG_NA);
    var_types targetType = node->TypeGet();
    var_types baseType   = node->gtSIMDBaseType;

    regNumber op1Reg = op1->gtRegNum;
    regNumber op2Reg = op2->gtRegNum;
    genConsumeOperands(node);

    switch (intrinsicID)
    {
        case NI_SSE42_Crc32:
            if (op1Reg != targetReg)
            {
                inst_RV_RV(INS_mov, targetReg, op1Reg, targetType, emitTypeSize(targetType));
            }

            if (baseType == TYP_UBYTE || baseType == TYP_USHORT) // baseType is the type of the second argument
            {
                assert(targetType == TYP_INT);
                inst_RV_RV(INS_crc32, targetReg, op2Reg, baseType, emitTypeSize(baseType));
            }
            else
            {
                assert(op1->TypeGet() == op2->TypeGet());
                assert(targetType == TYP_INT || targetType == TYP_LONG);
                inst_RV_RV(INS_crc32, targetReg, op2Reg, targetType, emitTypeSize(targetType));
            }

            break;
        default:
            unreached();
            break;
    }
    genProduceReg(node);
}
Exemplo n.º 16
0
void LegacyPolicy::SetNever(InlineObservation obs)
{
    // Expect a valid observation
    assert(InlIsValidObservation(obs));

    switch (m_Decision)
    {
    case InlineDecision::NEVER:
        // Repeated never only ok if evaluating a prejit root
        assert(m_IsPrejitRoot);
        break;
    case InlineDecision::UNDECIDED:
    case InlineDecision::CANDIDATE:
        m_Decision = InlineDecision::NEVER;
        m_Observation = obs;
        break;
    default:
        // SUCCESS, FAILURE or ??
        assert(!"Unexpected m_Decision");
        unreached();
    }
}
Exemplo n.º 17
0
//------------------------------------------------------------------------
// genAVXIntrinsic: Generates the code for an AVX hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    var_types      baseType    = node->gtSIMDBaseType;
    emitAttr       attr        = EA_ATTR(node->gtSIMDSize);
    var_types      targetType  = node->TypeGet();
    instruction    ins         = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
    GenTree*       op1         = node->gtGetOp1();
    GenTree*       op2         = node->gtGetOp2();
    regNumber      targetReg   = node->gtRegNum;
    emitter*       emit        = getEmitter();

    genConsumeOperands(node);

    switch (intrinsicID)
    {
        case NI_AVX_SetZeroVector256:
        {
            assert(op1 == nullptr);
            assert(op2 == nullptr);
            // SetZeroVector256 will generate pxor with integral base-typ, but pxor is a AVX2 instruction, so we
            // generate xorps on AVX machines.
            if (!compiler->compSupports(InstructionSet_AVX2) && varTypeIsIntegral(baseType))
            {
                emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg);
            }
            else
            {
                emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
            }
            break;
        }

        case NI_AVX_TestC:
        {
            emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
            emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum);
            emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
            break;
        }

        case NI_AVX_TestNotZAndNotC:
        {
            emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
            emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            break;
        }

        case NI_AVX_TestZ:
        {
            emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
            emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum);
            emit->emitIns_R(INS_sete, EA_1BYTE, targetReg);
            break;
        }

        default:
            unreached();
            break;
    }

    genProduceReg(node);
}
Exemplo n.º 18
0
void Rationalizer::RewriteAssignment(LIR::Use& use)
{
    assert(use.IsInitialized());

    GenTreeOp* assignment = use.Def()->AsOp();
    assert(assignment->OperGet() == GT_ASG);

    GenTree* location = assignment->gtGetOp1();
    GenTree* value    = assignment->gtGetOp2();

    genTreeOps locationOp = location->OperGet();

    if (assignment->OperIsBlkOp())
    {
#ifdef FEATURE_SIMD
        if (varTypeIsSIMD(location) && assignment->OperIsInitBlkOp())
        {
            if (location->OperGet() == GT_LCL_VAR)
            {
                var_types simdType = location->TypeGet();
                GenTree*  initVal  = assignment->gtOp.gtOp2;
                var_types baseType = comp->getBaseTypeOfSIMDLocal(location);
                if (baseType != TYP_UNKNOWN)
                {
                    GenTreeSIMD* simdTree = new (comp, GT_SIMD)
                        GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, genTypeSize(simdType));
                    assignment->gtOp.gtOp2 = simdTree;
                    value                  = simdTree;
                    initVal->gtNext        = simdTree;
                    simdTree->gtPrev       = initVal;

                    simdTree->gtNext = location;
                    location->gtPrev = simdTree;
                }
            }
        }
#endif // FEATURE_SIMD
        if ((location->TypeGet() == TYP_STRUCT) && !assignment->IsPhiDefn() && !value->IsMultiRegCall())
        {
            if ((location->OperGet() == GT_LCL_VAR))
            {
                // We need to construct a block node for the location.
                // Modify lcl to be the address form.
                location->SetOper(addrForm(locationOp));
                LclVarDsc* varDsc     = &(comp->lvaTable[location->AsLclVarCommon()->gtLclNum]);
                location->gtType      = TYP_BYREF;
                GenTreeBlk*  storeBlk = nullptr;
                unsigned int size     = varDsc->lvExactSize;

                if (varDsc->lvStructGcCount != 0)
                {
                    CORINFO_CLASS_HANDLE structHnd = varDsc->lvVerTypeInfo.GetClassHandle();
                    GenTreeObj*          objNode   = comp->gtNewObjNode(structHnd, location)->AsObj();
                    unsigned int         slots = (unsigned)(roundUp(size, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);

                    objNode->SetGCInfo(varDsc->lvGcLayout, varDsc->lvStructGcCount, slots);
                    objNode->ChangeOper(GT_STORE_OBJ);
                    objNode->SetData(value);
                    comp->fgMorphUnsafeBlk(objNode);
                    storeBlk = objNode;
                }
                else
                {
                    storeBlk = new (comp, GT_STORE_BLK) GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, location, value, size);
                }
                storeBlk->gtFlags |= (GTF_REVERSE_OPS | GTF_ASG);
                storeBlk->gtFlags |= ((location->gtFlags | value->gtFlags) & GTF_ALL_EFFECT);

                GenTree* insertionPoint = location->gtNext;
                BlockRange().InsertBefore(insertionPoint, storeBlk);
                use.ReplaceWith(comp, storeBlk);
                BlockRange().Remove(assignment);
                JITDUMP("After transforming local struct assignment into a block op:\n");
                DISPTREERANGE(BlockRange(), use.Def());
                JITDUMP("\n");
                return;
            }
            else
            {
                assert(location->OperIsBlk());
            }
        }
    }

    switch (locationOp)
    {
        case GT_LCL_VAR:
        case GT_LCL_FLD:
        case GT_REG_VAR:
        case GT_PHI_ARG:
            RewriteAssignmentIntoStoreLclCore(assignment, location, value, locationOp);
            BlockRange().Remove(location);
            break;

        case GT_IND:
        {
            GenTreeStoreInd* store =
                new (comp, GT_STOREIND) GenTreeStoreInd(location->TypeGet(), location->gtGetOp1(), value);

            copyFlags(store, assignment, GTF_ALL_EFFECT);
            copyFlags(store, location, GTF_IND_FLAGS);

            if (assignment->IsReverseOp())
            {
                store->gtFlags |= GTF_REVERSE_OPS;
            }

            // TODO: JIT dump

            // Remove the GT_IND node and replace the assignment node with the store
            BlockRange().Remove(location);
            BlockRange().InsertBefore(assignment, store);
            use.ReplaceWith(comp, store);
            BlockRange().Remove(assignment);
        }
        break;

        case GT_CLS_VAR:
        {
            location->SetOper(GT_CLS_VAR_ADDR);
            location->gtType = TYP_BYREF;

            assignment->SetOper(GT_STOREIND);

            // TODO: JIT dump
        }
        break;

        case GT_BLK:
        case GT_OBJ:
        case GT_DYN_BLK:
        {
            assert(varTypeIsStruct(location));
            GenTreeBlk* storeBlk = location->AsBlk();
            genTreeOps  storeOper;
            switch (location->gtOper)
            {
                case GT_BLK:
                    storeOper = GT_STORE_BLK;
                    break;
                case GT_OBJ:
                    storeOper = GT_STORE_OBJ;
                    break;
                case GT_DYN_BLK:
                    storeOper = GT_STORE_DYN_BLK;
                    break;
                default:
                    unreached();
            }
            JITDUMP("Rewriting GT_ASG(%s(X), Y) to %s(X,Y):\n", GenTree::NodeName(location->gtOper),
                    GenTree::NodeName(storeOper));
            storeBlk->SetOperRaw(storeOper);
            storeBlk->gtFlags &= ~GTF_DONT_CSE;
            storeBlk->gtFlags |= (assignment->gtFlags & (GTF_ALL_EFFECT | GTF_REVERSE_OPS | GTF_BLK_VOLATILE |
                                                         GTF_BLK_UNALIGNED | GTF_DONT_CSE));
            storeBlk->gtBlk.Data() = value;

            // Replace the assignment node with the store
            use.ReplaceWith(comp, storeBlk);
            BlockRange().Remove(assignment);
            DISPTREERANGE(BlockRange(), use.Def());
            JITDUMP("\n");
        }
        break;

        default:
            unreached();
            break;
    }
}
Exemplo n.º 19
0
//------------------------------------------------------------------------
// BuildNode: Build the RefPositions for for a node
//
// Arguments:
//    treeNode - the node of interest
//
// Return Value:
//    The number of sources consumed by this node.
//
// Notes:
// Preconditions:
//    LSRA Has been initialized.
//
// Postconditions:
//    RefPositions have been built for all the register defs and uses required
//    for this node.
//
int LinearScan::BuildNode(GenTree* tree)
{
    assert(!tree->isContained());
    Interval* prefSrcInterval = nullptr;
    int       srcCount;
    int       dstCount      = 0;
    regMaskTP dstCandidates = RBM_NONE;
    regMaskTP killMask      = RBM_NONE;
    bool      isLocalDefUse = false;

    // Reset the build-related members of LinearScan.
    clearBuildState();

    RegisterType registerType = TypeGet(tree);

    // Set the default dstCount. This may be modified below.
    if (tree->IsValue())
    {
        dstCount = 1;
        if (tree->IsUnusedValue())
        {
            isLocalDefUse = true;
        }
    }
    else
    {
        dstCount = 0;
    }

    switch (tree->OperGet())
    {
        default:
            srcCount = BuildSimple(tree);
            break;

        case GT_LCL_VAR:
        case GT_LCL_FLD:
        {
            // We handle tracked variables differently from non-tracked ones.  If it is tracked,
            // we will simply add a use of the tracked variable at its parent/consumer.
            // Otherwise, for a use we need to actually add the appropriate references for loading
            // or storing the variable.
            //
            // A tracked variable won't actually get used until the appropriate ancestor tree node
            // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
            // to a call or an orphaned dead node.
            //
            LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
            if (isCandidateVar(varDsc))
            {
                INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1));
                return 0;
            }
            srcCount = 0;
#ifdef FEATURE_SIMD
            // Need an additional register to read upper 4 bytes of Vector3.
            if (tree->TypeGet() == TYP_SIMD12)
            {
                // We need an internal register different from targetReg in which 'tree' produces its result
                // because both targetReg and internal reg will be in use at the same time.
                buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
                setInternalRegsDelayFree = true;
                buildInternalRegisterUses();
            }
#endif
            BuildDef(tree);
        }
        break;

        case GT_STORE_LCL_FLD:
        case GT_STORE_LCL_VAR:
            srcCount = 1;
            assert(dstCount == 0);
            srcCount = BuildStoreLoc(tree->AsLclVarCommon());
            break;

        case GT_FIELD_LIST:
            // These should always be contained. We don't correctly allocate or
            // generate code for a non-contained GT_FIELD_LIST.
            noway_assert(!"Non-contained GT_FIELD_LIST");
            srcCount = 0;
            break;

        case GT_LIST:
        case GT_ARGPLACE:
        case GT_NO_OP:
        case GT_START_NONGC:
        case GT_PROF_HOOK:
            srcCount = 0;
            assert(dstCount == 0);
            break;

        case GT_START_PREEMPTGC:
            // This kills GC refs in callee save regs
            srcCount = 0;
            assert(dstCount == 0);
            BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE);
            break;

        case GT_CNS_DBL:
        {
            GenTreeDblCon* dblConst   = tree->AsDblCon();
            double         constValue = dblConst->gtDblCon.gtDconVal;

            if (emitter::emitIns_valid_imm_for_fmov(constValue))
            {
                // Directly encode constant to instructions.
            }
            else
            {
                // Reserve int to load constant from memory (IF_LARGELDC)
                buildInternalIntRegisterDefForNode(tree);
                buildInternalRegisterUses();
            }
        }
            __fallthrough;

        case GT_CNS_INT:
        {
            srcCount = 0;
            assert(dstCount == 1);
            RefPosition* def               = BuildDef(tree);
            def->getInterval()->isConstant = true;
        }
        break;

        case GT_BOX:
        case GT_COMMA:
        case GT_QMARK:
        case GT_COLON:
            srcCount = 0;
            assert(dstCount == 0);
            unreached();
            break;

        case GT_RETURN:
            srcCount = BuildReturn(tree);
            break;

        case GT_RETFILT:
            assert(dstCount == 0);
            if (tree->TypeGet() == TYP_VOID)
            {
                srcCount = 0;
            }
            else
            {
                assert(tree->TypeGet() == TYP_INT);
                srcCount = 1;
                BuildUse(tree->gtGetOp1(), RBM_INTRET);
            }
            break;

        case GT_NOP:
            // A GT_NOP is either a passthrough (if it is void, or if it has
            // a child), but must be considered to produce a dummy value if it
            // has a type but no child.
            srcCount = 0;
            if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr)
            {
                assert(dstCount == 1);
                BuildDef(tree);
            }
            else
            {
                assert(dstCount == 0);
            }
            break;

        case GT_JTRUE:
            srcCount = 0;
            assert(dstCount == 0);
            break;

        case GT_JMP:
            srcCount = 0;
            assert(dstCount == 0);
            break;

        case GT_SWITCH:
            // This should never occur since switch nodes must not be visible at this
            // point in the JIT.
            srcCount = 0;
            noway_assert(!"Switch must be lowered at this point");
            break;

        case GT_JMPTABLE:
            srcCount = 0;
            assert(dstCount == 1);
            BuildDef(tree);
            break;

        case GT_SWITCH_TABLE:
            buildInternalIntRegisterDefForNode(tree);
            srcCount = BuildBinaryUses(tree->AsOp());
            assert(dstCount == 0);
            break;

        case GT_ASG:
            noway_assert(!"We should never hit any assignment operator in lowering");
            srcCount = 0;
            break;

        case GT_ADD:
        case GT_SUB:
            if (varTypeIsFloating(tree->TypeGet()))
            {
                // overflow operations aren't supported on float/double types.
                assert(!tree->gtOverflow());

                // No implicit conversions at this stage as the expectation is that
                // everything is made explicit by adding casts.
                assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet());
            }

            __fallthrough;

        case GT_AND:
        case GT_OR:
        case GT_XOR:
        case GT_LSH:
        case GT_RSH:
        case GT_RSZ:
        case GT_ROR:
            srcCount = BuildBinaryUses(tree->AsOp());
            assert(dstCount == 1);
            BuildDef(tree);
            break;

        case GT_RETURNTRAP:
            // this just turns into a compare of its child with an int
            // + a conditional call
            BuildUse(tree->gtGetOp1());
            srcCount = 1;
            assert(dstCount == 0);
            killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
            BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
            break;

        case GT_MOD:
        case GT_UMOD:
            NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
            assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
            srcCount = 0;
            break;

        case GT_MUL:
            if (tree->gtOverflow())
            {
                // Need a register different from target reg to check for overflow.
                buildInternalIntRegisterDefForNode(tree);
                setInternalRegsDelayFree = true;
            }
            __fallthrough;

        case GT_DIV:
        case GT_MULHI:
        case GT_UDIV:
        {
            srcCount = BuildBinaryUses(tree->AsOp());
            buildInternalRegisterUses();
            assert(dstCount == 1);
            BuildDef(tree);
        }
        break;

        case GT_INTRINSIC:
        {
            noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) ||
                         (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Ceiling) ||
                         (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Floor) ||
                         (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) ||
                         (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt));

            // Both operand and its result must be of the same floating point type.
            GenTree* op1 = tree->gtGetOp1();
            assert(varTypeIsFloating(op1));
            assert(op1->TypeGet() == tree->TypeGet());

            BuildUse(op1);
            srcCount = 1;
            assert(dstCount == 1);
            BuildDef(tree);
        }
        break;

#ifdef FEATURE_SIMD
        case GT_SIMD:
            srcCount = BuildSIMD(tree->AsSIMD());
            break;
#endif // FEATURE_SIMD

#ifdef FEATURE_HW_INTRINSICS
        case GT_HWIntrinsic:
            srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic());
            break;
#endif // FEATURE_HW_INTRINSICS

        case GT_CAST:
            assert(dstCount == 1);
            srcCount = BuildCast(tree->AsCast());
            break;

        case GT_NEG:
        case GT_NOT:
            BuildUse(tree->gtGetOp1());
            srcCount = 1;
            assert(dstCount == 1);
            BuildDef(tree);
            break;

        case GT_EQ:
        case GT_NE:
        case GT_LT:
        case GT_LE:
        case GT_GE:
        case GT_GT:
        case GT_TEST_EQ:
        case GT_TEST_NE:
        case GT_JCMP:
            srcCount = BuildCmp(tree);
            break;

        case GT_CKFINITE:
            srcCount = 1;
            assert(dstCount == 1);
            buildInternalIntRegisterDefForNode(tree);
            BuildUse(tree->gtGetOp1());
            BuildDef(tree);
            buildInternalRegisterUses();
            break;

        case GT_CMPXCHG:
        {
            GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg();
            srcCount                    = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3;
            assert(dstCount == 1);

            if (!compiler->compSupports(InstructionSet_Atomics))
            {
                // For ARMv8 exclusives requires a single internal register
                buildInternalIntRegisterDefForNode(tree);
            }

            // For ARMv8 exclusives the lifetime of the addr and data must be extended because
            // it may be used used multiple during retries

            // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent
            // them being reused as the target register which must be destroyed early

            RefPosition* locationUse = BuildUse(tree->gtCmpXchg.gtOpLocation);
            setDelayFree(locationUse);
            RefPosition* valueUse = BuildUse(tree->gtCmpXchg.gtOpValue);
            setDelayFree(valueUse);
            if (!cmpXchgNode->gtOpComparand->isContained())
            {
                RefPosition* comparandUse = BuildUse(tree->gtCmpXchg.gtOpComparand);

                // For ARMv8 exclusives the lifetime of the comparand must be extended because
                // it may be used used multiple during retries
                if (!compiler->compSupports(InstructionSet_Atomics))
                {
                    setDelayFree(comparandUse);
                }
            }

            // Internals may not collide with target
            setInternalRegsDelayFree = true;
            buildInternalRegisterUses();
            BuildDef(tree);
        }
        break;

        case GT_LOCKADD:
        case GT_XADD:
        case GT_XCHG:
        {
            assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
            srcCount = tree->gtGetOp2()->isContained() ? 1 : 2;

            if (!compiler->compSupports(InstructionSet_Atomics))
            {
                // GT_XCHG requires a single internal register; the others require two.
                buildInternalIntRegisterDefForNode(tree);
                if (tree->OperGet() != GT_XCHG)
                {
                    buildInternalIntRegisterDefForNode(tree);
                }
            }

            assert(!tree->gtGetOp1()->isContained());
            RefPosition* op1Use = BuildUse(tree->gtGetOp1());
            RefPosition* op2Use = nullptr;
            if (!tree->gtGetOp2()->isContained())
            {
                op2Use = BuildUse(tree->gtGetOp2());
            }

            // For ARMv8 exclusives the lifetime of the addr and data must be extended because
            // it may be used used multiple during retries
            if (!compiler->compSupports(InstructionSet_Atomics))
            {
                // Internals may not collide with target
                if (dstCount == 1)
                {
                    setDelayFree(op1Use);
                    if (op2Use != nullptr)
                    {
                        setDelayFree(op2Use);
                    }
                    setInternalRegsDelayFree = true;
                }
                buildInternalRegisterUses();
            }
            if (dstCount == 1)
            {
                BuildDef(tree);
            }
        }
        break;

#if FEATURE_ARG_SPLIT
        case GT_PUTARG_SPLIT:
            srcCount = BuildPutArgSplit(tree->AsPutArgSplit());
            dstCount = tree->AsPutArgSplit()->gtNumRegs;
            break;
#endif // FEATURE _SPLIT_ARG

        case GT_PUTARG_STK:
            srcCount = BuildPutArgStk(tree->AsPutArgStk());
            break;

        case GT_PUTARG_REG:
            srcCount = BuildPutArgReg(tree->AsUnOp());
            break;

        case GT_CALL:
            srcCount = BuildCall(tree->AsCall());
            if (tree->AsCall()->HasMultiRegRetVal())
            {
                dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
            }
            break;

        case GT_ADDR:
        {
            // For a GT_ADDR, the child node should not be evaluated into a register
            GenTree* child = tree->gtGetOp1();
            assert(!isCandidateLocalRef(child));
            assert(child->isContained());
            assert(dstCount == 1);
            srcCount = 0;
            BuildDef(tree);
        }
        break;

        case GT_BLK:
        case GT_DYN_BLK:
            // These should all be eliminated prior to Lowering.
            assert(!"Non-store block node in Lowering");
            srcCount = 0;
            break;

        case GT_STORE_BLK:
        case GT_STORE_OBJ:
        case GT_STORE_DYN_BLK:
            srcCount = BuildBlockStore(tree->AsBlk());
            break;

        case GT_INIT_VAL:
            // Always a passthrough of its child's value.
            assert(!"INIT_VAL should always be contained");
            srcCount = 0;
            break;

        case GT_LCLHEAP:
        {
            assert(dstCount == 1);

            // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
            // Here '-' means don't care.
            //
            //  Size?                   Init Memory?    # temp regs
            //   0                          -               0
            //   const and <=6 ptr words    -               0
            //   const and <PageSize        No              0
            //   >6 ptr words               Yes             0
            //   Non-const                  Yes             0
            //   Non-const                  No              2
            //

            GenTree* size = tree->gtGetOp1();
            if (size->IsCnsIntOrI())
            {
                assert(size->isContained());
                srcCount = 0;

                size_t sizeVal = size->gtIntCon.gtIconVal;

                if (sizeVal != 0)
                {
                    // Compute the amount of memory to properly STACK_ALIGN.
                    // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
                    // This should also help in debugging as we can examine the original size specified with
                    // localloc.
                    sizeVal         = AlignUp(sizeVal, STACK_ALIGN);
                    size_t stpCount = sizeVal / (REGSIZE_BYTES * 2);

                    // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc)
                    //
                    if (stpCount <= 4)
                    {
                        // Need no internal registers
                    }
                    else if (!compiler->info.compInitMem)
                    {
                        // No need to initialize allocated stack space.
                        if (sizeVal < compiler->eeGetPageSize())
                        {
                            // Need no internal registers
                        }
                        else
                        {
                            // We need two registers: regCnt and RegTmp
                            buildInternalIntRegisterDefForNode(tree);
                            buildInternalIntRegisterDefForNode(tree);
                        }
                    }
                }
            }
            else
            {
                srcCount = 1;
                if (!compiler->info.compInitMem)
                {
                    buildInternalIntRegisterDefForNode(tree);
                    buildInternalIntRegisterDefForNode(tree);
                }
            }

            if (!size->isContained())
            {
                BuildUse(size);
            }
            buildInternalRegisterUses();
            BuildDef(tree);
        }
        break;

        case GT_ARR_BOUNDS_CHECK:
#ifdef FEATURE_SIMD
        case GT_SIMD_CHK:
#endif // FEATURE_SIMD
        {
            GenTreeBoundsChk* node = tree->AsBoundsChk();
            // Consumes arrLen & index - has no result
            assert(dstCount == 0);

            GenTree* intCns = nullptr;
            GenTree* other  = nullptr;
            srcCount        = BuildOperandUses(tree->AsBoundsChk()->gtIndex);
            srcCount += BuildOperandUses(tree->AsBoundsChk()->gtArrLen);
        }
        break;

        case GT_ARR_ELEM:
            // These must have been lowered to GT_ARR_INDEX
            noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
            srcCount = 0;
            assert(dstCount == 0);
            break;

        case GT_ARR_INDEX:
        {
            srcCount = 2;
            assert(dstCount == 1);
            buildInternalIntRegisterDefForNode(tree);
            setInternalRegsDelayFree = true;

            // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
            // times while the result is being computed.
            RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj());
            setDelayFree(arrObjUse);
            BuildUse(tree->AsArrIndex()->IndexExpr());
            buildInternalRegisterUses();
            BuildDef(tree);
        }
        break;

        case GT_ARR_OFFSET:
            // This consumes the offset, if any, the arrObj and the effective index,
            // and produces the flattened offset for this dimension.
            srcCount = 2;
            if (!tree->gtArrOffs.gtOffset->isContained())
            {
                BuildUse(tree->AsArrOffs()->gtOffset);
                srcCount++;
            }
            BuildUse(tree->AsArrOffs()->gtIndex);
            BuildUse(tree->AsArrOffs()->gtArrObj);
            assert(dstCount == 1);
            buildInternalIntRegisterDefForNode(tree);
            buildInternalRegisterUses();
            BuildDef(tree);
            break;

        case GT_LEA:
        {
            GenTreeAddrMode* lea = tree->AsAddrMode();

            GenTree* base  = lea->Base();
            GenTree* index = lea->Index();
            int      cns   = lea->Offset();

            // This LEA is instantiating an address, so we set up the srcCount here.
            srcCount = 0;
            if (base != nullptr)
            {
                srcCount++;
                BuildUse(base);
            }
            if (index != nullptr)
            {
                srcCount++;
                BuildUse(index);
            }
            assert(dstCount == 1);

            // On ARM64 we may need a single internal register
            // (when both conditions are true then we still only need a single internal register)
            if ((index != nullptr) && (cns != 0))
            {
                // ARM64 does not support both Index and offset so we need an internal register
                buildInternalIntRegisterDefForNode(tree);
            }
            else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
            {
                // This offset can't be contained in the add instruction, so we need an internal register
                buildInternalIntRegisterDefForNode(tree);
            }
            buildInternalRegisterUses();
            BuildDef(tree);
        }
        break;

        case GT_STOREIND:
        {
            assert(dstCount == 0);

            if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree))
            {
                srcCount = BuildGCWriteBarrier(tree);
                break;
            }

            srcCount = BuildIndir(tree->AsIndir());
            if (!tree->gtGetOp2()->isContained())
            {
                BuildUse(tree->gtGetOp2());
                srcCount++;
            }
        }
        break;

        case GT_NULLCHECK:
            // Unlike ARM, ARM64 implements NULLCHECK as a load to REG_ZR, so no internal register
            // is required, and it is not a localDefUse.
            assert(dstCount == 0);
            assert(!tree->gtGetOp1()->isContained());
            BuildUse(tree->gtGetOp1());
            srcCount = 1;
            break;

        case GT_IND:
            assert(dstCount == 1);
            srcCount = BuildIndir(tree->AsIndir());
            break;

        case GT_CATCH_ARG:
            srcCount = 0;
            assert(dstCount == 1);
            BuildDef(tree, RBM_EXCEPTION_OBJECT);
            break;

        case GT_CLS_VAR:
            srcCount = 0;
            // GT_CLS_VAR, by the time we reach the backend, must always
            // be a pure use.
            // It will produce a result of the type of the
            // node, and use an internal register for the address.

            assert(dstCount == 1);
            assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0);
            buildInternalIntRegisterDefForNode(tree);
            buildInternalRegisterUses();
            BuildDef(tree);
            break;

        case GT_INDEX_ADDR:
            assert(dstCount == 1);
            srcCount = BuildBinaryUses(tree->AsOp());
            buildInternalIntRegisterDefForNode(tree);
            buildInternalRegisterUses();
            BuildDef(tree);
            break;

    } // end switch (tree->OperGet())

    if (tree->IsUnusedValue() && (dstCount != 0))
    {
        isLocalDefUse = true;
    }
    // We need to be sure that we've set srcCount and dstCount appropriately
    assert((dstCount < 2) || tree->IsMultiRegCall());
    assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
    assert(!tree->IsUnusedValue() || (dstCount != 0));
    assert(dstCount == tree->GetRegisterDstCount());
    INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount));
    return srcCount;
}
Exemplo n.º 20
0
void Rationalizer::RewriteAssignment(LIR::Use& use)
{
    assert(use.IsInitialized());

    GenTreeOp* assignment = use.Def()->AsOp();
    assert(assignment->OperGet() == GT_ASG);

    GenTree* location = assignment->gtGetOp1();
    GenTree* value    = assignment->gtGetOp2();

    genTreeOps locationOp = location->OperGet();

#ifdef FEATURE_SIMD
    if (varTypeIsSIMD(location) && assignment->OperIsInitBlkOp())
    {
        if (location->OperGet() == GT_LCL_VAR)
        {
            var_types simdType = location->TypeGet();
            GenTree*  initVal  = assignment->gtOp.gtOp2;
            var_types baseType = comp->getBaseTypeOfSIMDLocal(location);
            if (baseType != TYP_UNKNOWN)
            {
                GenTreeSIMD* simdTree = new (comp, GT_SIMD)
                    GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, genTypeSize(simdType));
                assignment->gtOp.gtOp2 = simdTree;
                value                  = simdTree;
                initVal->gtNext        = simdTree;
                simdTree->gtPrev       = initVal;

                simdTree->gtNext = location;
                location->gtPrev = simdTree;
            }
        }
        else
        {
            assert(location->OperIsBlk());
        }
    }
#endif // FEATURE_SIMD

    switch (locationOp)
    {
        case GT_LCL_VAR:
        case GT_LCL_FLD:
        case GT_REG_VAR:
        case GT_PHI_ARG:
            RewriteAssignmentIntoStoreLclCore(assignment, location, value, locationOp);
            BlockRange().Remove(location);
            break;

        case GT_IND:
        {
            GenTreeStoreInd* store =
                new (comp, GT_STOREIND) GenTreeStoreInd(location->TypeGet(), location->gtGetOp1(), value);

            copyFlags(store, assignment, GTF_ALL_EFFECT);
            copyFlags(store, location, GTF_IND_FLAGS);

            if (assignment->IsReverseOp())
            {
                store->gtFlags |= GTF_REVERSE_OPS;
            }

            // TODO: JIT dump

            // Remove the GT_IND node and replace the assignment node with the store
            BlockRange().Remove(location);
            BlockRange().InsertBefore(assignment, store);
            use.ReplaceWith(comp, store);
            BlockRange().Remove(assignment);
        }
        break;

        case GT_CLS_VAR:
        {
            location->SetOper(GT_CLS_VAR_ADDR);
            location->gtType = TYP_BYREF;

            assignment->SetOper(GT_STOREIND);

            // TODO: JIT dump
        }
        break;

        case GT_BLK:
        case GT_OBJ:
        case GT_DYN_BLK:
        {
            assert(varTypeIsStruct(location));
            GenTreeBlk* storeBlk = location->AsBlk();
            genTreeOps  storeOper;
            switch (location->gtOper)
            {
                case GT_BLK:
                    storeOper = GT_STORE_BLK;
                    break;
                case GT_OBJ:
                    storeOper = GT_STORE_OBJ;
                    break;
                case GT_DYN_BLK:
                    storeOper = GT_STORE_DYN_BLK;
                    break;
                default:
                    unreached();
            }
            JITDUMP("Rewriting GT_ASG(%s(X), Y) to %s(X,Y):\n", GenTree::NodeName(location->gtOper),
                    GenTree::NodeName(storeOper));
            storeBlk->SetOperRaw(storeOper);
            storeBlk->gtFlags &= ~GTF_DONT_CSE;
            storeBlk->gtFlags |= (assignment->gtFlags & (GTF_ALL_EFFECT | GTF_REVERSE_OPS | GTF_BLK_VOLATILE |
                                                         GTF_BLK_UNALIGNED | GTF_BLK_INIT | GTF_DONT_CSE));
            storeBlk->gtBlk.Data() = value;

            // Replace the assignment node with the store
            use.ReplaceWith(comp, storeBlk);
            BlockRange().Remove(assignment);
            DISPTREERANGE(BlockRange(), use.Def());
            JITDUMP("\n");
        }
        break;

        default:
            unreached();
            break;
    }
}
Exemplo n.º 21
0
//------------------------------------------------------------------------
// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes, we need to use
// the shift helper functions, so we here convert the shift into a helper call by
// pulling its arguments out of linear order and making them the args to a call, then
// replacing the original node with the new call.
//
// Arguments:
//    use - the LIR::Use object for the def that needs to be decomposed.
//
// Return Value:
//    The next node to process.
//
GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
{
    assert(use.IsInitialized());

    GenTree* tree   = use.Def();
    GenTree* gtLong = tree->gtGetOp1();
    genTreeOps oper = tree->OperGet();

    assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ));

    LIR::Use loOp1Use(Range(), &gtLong->gtOp.gtOp1, gtLong);
    loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);

    LIR::Use hiOp1Use(Range(), &gtLong->gtOp.gtOp2, gtLong);
    hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);

    LIR::Use shiftWidthUse(Range(), &tree->gtOp.gtOp2, tree);
    shiftWidthUse.ReplaceWithLclVar(m_compiler, m_blockWeight);

    GenTree* loOp1 = gtLong->gtGetOp1();
    GenTree* hiOp1 = gtLong->gtGetOp2();

    GenTree* shiftWidthOp = tree->gtGetOp2();

    Range().Remove(gtLong);
    Range().Remove(loOp1);
    Range().Remove(hiOp1);

    Range().Remove(shiftWidthOp);

    // TODO-X86-CQ: If the shift operand is a GT_CNS_INT, we should pipe the instructions through to codegen
    // and generate the shift instructions ourselves there, rather than replacing it with a helper call.

    unsigned helper;

    switch (oper)
    {
    case GT_LSH:
        helper = CORINFO_HELP_LLSH;
        break;
    case GT_RSH:
        helper = CORINFO_HELP_LRSH;
        break;
    case GT_RSZ:
        helper = CORINFO_HELP_LRSZ;
        break;
    default:
        unreached();
    }

    GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftWidthOp);

    GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList);

    GenTreeCall*    callNode    = call->AsCall();
    ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
    retTypeDesc->InitializeLongReturnType(m_compiler);

    call = m_compiler->fgMorphArgs(callNode);
    Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call));

    Range().Remove(tree);
    use.ReplaceWith(m_compiler, call);
    return call;
}
Exemplo n.º 22
0
// Display a compact representation of the bbJumpKind, that is, where this block branches.
// This is similar to code in Compiler::fgTableDispBasicBlock(), but doesn't have that code's requirements to align things strictly.
void                BasicBlock::dspJumpKind()
{
    switch (bbJumpKind)
    {
    case BBJ_EHFINALLYRET:
        printf(" (finret)");
        break;

    case BBJ_EHFILTERRET:
        printf(" (fltret)");
        break;

    case BBJ_EHCATCHRET:
        printf(" -> BB%02u (cret)", bbJumpDest->bbNum);
        break;

    case BBJ_THROW:
        printf(" (throw)");
        break;

    case BBJ_RETURN:
        printf(" (return)");
        break;

    case BBJ_NONE:
        // For fall-through blocks, print nothing.
        break;

    case BBJ_ALWAYS:
        if (bbFlags & BBF_KEEP_BBJ_ALWAYS)
        {
            printf(" -> BB%02u (ALWAYS)", bbJumpDest->bbNum);
        }
        else
        {
            printf(" -> BB%02u (always)", bbJumpDest->bbNum);
        }
        break;

    case BBJ_LEAVE:
        printf(" -> BB%02u (leave)", bbJumpDest->bbNum);
        break;

    case BBJ_CALLFINALLY:
        printf(" -> BB%02u (callf)", bbJumpDest->bbNum);
        break;

    case BBJ_COND:
        printf(" -> BB%02u (cond)", bbJumpDest->bbNum);
        break;

    case BBJ_SWITCH:
        printf(" ->");

        unsigned        jumpCnt;
                        jumpCnt = bbJumpSwt->bbsCount;
        BasicBlock**    jumpTab;
                        jumpTab = bbJumpSwt->bbsDstTab;
        do
        {
            printf("%cBB%02u",
                   (jumpTab == bbJumpSwt->bbsDstTab) ? ' ' : ',',
                   (*jumpTab)->bbNum);
        }
        while (++jumpTab, --jumpCnt);

        printf(" (switch)");
        break;

    default:
        unreached();
        break;
    }
}
Exemplo n.º 23
0
//------------------------------------------------------------------------
// BuildSIMD: Set the NodeInfo for a GT_SIMD tree.
//
// Arguments:
//    tree       - The GT_SIMD node of interest
//
// Return Value:
//    The number of sources consumed by this node.
//
int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
{
    int srcCount = 0;
    // Only SIMDIntrinsicInit can be contained
    if (simdTree->isContained())
    {
        assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit);
    }
    int dstCount = simdTree->IsValue() ? 1 : 0;
    assert(dstCount == 1);

    bool buildUses = true;

    GenTree* op1 = simdTree->gtGetOp1();
    GenTree* op2 = simdTree->gtGetOp2();

    switch (simdTree->gtSIMDIntrinsicID)
    {
        case SIMDIntrinsicInit:
        case SIMDIntrinsicCast:
        case SIMDIntrinsicSqrt:
        case SIMDIntrinsicAbs:
        case SIMDIntrinsicConvertToSingle:
        case SIMDIntrinsicConvertToInt32:
        case SIMDIntrinsicConvertToDouble:
        case SIMDIntrinsicConvertToInt64:
        case SIMDIntrinsicWidenLo:
        case SIMDIntrinsicWidenHi:
            // No special handling required.
            break;

        case SIMDIntrinsicGetItem:
        {
            op1 = simdTree->gtGetOp1();
            op2 = simdTree->gtGetOp2();

            // We have an object and an index, either of which may be contained.
            bool setOp2DelayFree = false;
            if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal()))
            {
                // If the index is not a constant and the object is not contained or is a local
                // we will need a general purpose register to calculate the address
                // internal register must not clobber input index
                // TODO-Cleanup: An internal register will never clobber a source; this code actually
                // ensures that the index (op2) doesn't interfere with the target.
                buildInternalIntRegisterDefForNode(simdTree);
                setOp2DelayFree = true;
            }
            srcCount += BuildOperandUses(op1);
            if (!op2->isContained())
            {
                RefPosition* op2Use = BuildUse(op2);
                if (setOp2DelayFree)
                {
                    setDelayFree(op2Use);
                }
                srcCount++;
            }

            if (!op2->IsCnsIntOrI() && (!op1->isContained()))
            {
                // If vector is not already in memory (contained) and the index is not a constant,
                // we will use the SIMD temp location to store the vector.
                compiler->getSIMDInitTempVarNum();
            }
            buildUses = false;
        }
        break;

        case SIMDIntrinsicAdd:
        case SIMDIntrinsicSub:
        case SIMDIntrinsicMul:
        case SIMDIntrinsicDiv:
        case SIMDIntrinsicBitwiseAnd:
        case SIMDIntrinsicBitwiseAndNot:
        case SIMDIntrinsicBitwiseOr:
        case SIMDIntrinsicBitwiseXor:
        case SIMDIntrinsicMin:
        case SIMDIntrinsicMax:
        case SIMDIntrinsicEqual:
        case SIMDIntrinsicLessThan:
        case SIMDIntrinsicGreaterThan:
        case SIMDIntrinsicLessThanOrEqual:
        case SIMDIntrinsicGreaterThanOrEqual:
            // No special handling required.
            break;

        case SIMDIntrinsicSetX:
        case SIMDIntrinsicSetY:
        case SIMDIntrinsicSetZ:
        case SIMDIntrinsicSetW:
        case SIMDIntrinsicNarrow:
        {
            // Op1 will write to dst before Op2 is free
            BuildUse(op1);
            RefPosition* op2Use = BuildUse(op2);
            setDelayFree(op2Use);
            srcCount  = 2;
            buildUses = false;
            break;
        }

        case SIMDIntrinsicInitN:
        {
            var_types baseType = simdTree->gtSIMDBaseType;
            srcCount           = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
            if (varTypeIsFloating(simdTree->gtSIMDBaseType))
            {
                // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
                buildInternalFloatRegisterDefForNode(simdTree);
            }

            int initCount = 0;
            for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2())
            {
                assert(list->OperGet() == GT_LIST);
                GenTree* listItem = list->gtGetOp1();
                assert(listItem->TypeGet() == baseType);
                assert(!listItem->isContained());
                BuildUse(listItem);
                initCount++;
            }
            assert(initCount == srcCount);
            buildUses = false;

            break;
        }

        case SIMDIntrinsicInitArray:
            // We have an array and an index, which may be contained.
            break;

        case SIMDIntrinsicOpEquality:
        case SIMDIntrinsicOpInEquality:
            buildInternalFloatRegisterDefForNode(simdTree);
            break;

        case SIMDIntrinsicDotProduct:
            buildInternalFloatRegisterDefForNode(simdTree);
            break;

        case SIMDIntrinsicSelect:
            // TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB
            // bsl target register must be VC.  Reserve a temp in case we need to shuffle things.
            // This will require a different approach, as GenTreeSIMD has only two operands.
            assert(!"SIMDIntrinsicSelect not yet supported");
            buildInternalFloatRegisterDefForNode(simdTree);
            break;

        case SIMDIntrinsicInitArrayX:
        case SIMDIntrinsicInitFixed:
        case SIMDIntrinsicCopyToArray:
        case SIMDIntrinsicCopyToArrayX:
        case SIMDIntrinsicNone:
        case SIMDIntrinsicGetCount:
        case SIMDIntrinsicGetOne:
        case SIMDIntrinsicGetZero:
        case SIMDIntrinsicGetAllOnes:
        case SIMDIntrinsicGetX:
        case SIMDIntrinsicGetY:
        case SIMDIntrinsicGetZ:
        case SIMDIntrinsicGetW:
        case SIMDIntrinsicInstEquals:
        case SIMDIntrinsicHWAccel:
        case SIMDIntrinsicWiden:
        case SIMDIntrinsicInvalid:
            assert(!"These intrinsics should not be seen during register allocation");
            __fallthrough;

        default:
            noway_assert(!"Unimplemented SIMD node type.");
            unreached();
    }
    if (buildUses)
    {
        assert(!op1->OperIs(GT_LIST));
        assert(srcCount == 0);
        srcCount = BuildOperandUses(op1);
        if ((op2 != nullptr) && !op2->isContained())
        {
            srcCount += BuildOperandUses(op2);
        }
    }
    assert(internalCount <= MaxInternalCount);
    buildInternalRegisterUses();
    if (dstCount == 1)
    {
        BuildDef(simdTree);
    }
    else
    {
        assert(dstCount == 0);
    }
    return srcCount;
}
Exemplo n.º 24
0
//------------------------------------------------------------------------
// genHWIntrinsic: Generates the code for a given hardware intrinsic node.
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic      intrinsicID = node->gtHWIntrinsicId;
    InstructionSet      isa         = Compiler::isaOfHWIntrinsic(intrinsicID);
    HWIntrinsicCategory category    = Compiler::categoryOfHWIntrinsic(intrinsicID);
    HWIntrinsicFlag     flags       = Compiler::flagsOfHWIntrinsic(intrinsicID);
    int                 ival        = Compiler::ivalOfHWIntrinsic(intrinsicID);
    int                 numArgs     = Compiler::numArgsOfHWIntrinsic(node);

    assert((flags & HW_Flag_NoCodeGen) == 0);

    if (genIsTableDrivenHWIntrinsic(category, flags))
    {
        GenTree*  op1        = node->gtGetOp1();
        GenTree*  op2        = node->gtGetOp2();
        regNumber targetReg  = node->gtRegNum;
        var_types targetType = node->TypeGet();
        var_types baseType   = node->gtSIMDBaseType;

        regNumber op1Reg = REG_NA;
        regNumber op2Reg = REG_NA;
        emitter*  emit   = getEmitter();

        assert(numArgs >= 0);
        instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
        assert(ins != INS_invalid);
        emitAttr simdSize = EA_ATTR(node->gtSIMDSize);
        assert(simdSize != 0);

        switch (numArgs)
        {
            case 1:
                genConsumeOperands(node);
                op1Reg = op1->gtRegNum;
                if (category == HW_Category_MemoryLoad)
                {
                    emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0);
                }
                else if (category == HW_Category_SIMDScalar && (flags & HW_Flag_CopyUpperBits) != 0)
                {
                    emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg);
                }
                else if ((ival != -1) && varTypeIsFloating(baseType))
                {
                    emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival);
                }
                else
                {
                    emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg);
                }
                break;

            case 2:
                genConsumeOperands(node);
                op1Reg = op1->gtRegNum;
                op2Reg = op2->gtRegNum;
                if (category == HW_Category_MemoryStore)
                {
                    emit->emitIns_AR_R(ins, simdSize, op2Reg, op1Reg, 0);
                }
                else if ((ival != -1) && varTypeIsFloating(baseType))
                {
                    genHWIntrinsic_R_R_RM_I(node, ins);
                }
                else if (category == HW_Category_MemoryLoad)
                {
                    emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg);
                }
                else if (Compiler::isImmHWIntrinsic(intrinsicID, op2))
                {
                    if (intrinsicID == NI_SSE2_Extract)
                    {
                        // extract instructions return to GP-registers, so it needs int size as the emitsize
                        simdSize = emitTypeSize(TYP_INT);
                    }
                    auto emitSwCase = [&](unsigned i) {
                        emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, (int)i);
                    };

                    if (op2->IsCnsIntOrI())
                    {
                        ssize_t ival = op2->AsIntCon()->IconValue();
                        emitSwCase((unsigned)ival);
                    }
                    else
                    {
                        // We emit a fallback case for the scenario when the imm-op is not a constant. This should
                        // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
                        // can also occur if the consumer calls it directly and just doesn't pass a constant value.
                        regNumber baseReg = node->ExtractTempReg();
                        regNumber offsReg = node->GetSingleTempReg();
                        genHWIntrinsicJumpTableFallback(intrinsicID, op2Reg, baseReg, offsReg, emitSwCase);
                    }
                }
                else
                {
                    genHWIntrinsic_R_R_RM(node, ins);
                }
                break;
            case 3:
            {
                assert(op1->OperIsList());
                assert(op1->gtGetOp2()->OperIsList());
                assert(op1->gtGetOp2()->gtGetOp2()->OperIsList());

                GenTreeArgList* argList = op1->AsArgList();
                op1                     = argList->Current();
                genConsumeRegs(op1);
                op1Reg = op1->gtRegNum;

                argList = argList->Rest();
                op2     = argList->Current();
                genConsumeRegs(op2);
                op2Reg = op2->gtRegNum;

                argList      = argList->Rest();
                GenTree* op3 = argList->Current();
                genConsumeRegs(op3);
                regNumber op3Reg = op3->gtRegNum;

                if (Compiler::isImmHWIntrinsic(intrinsicID, op3))
                {
                    auto emitSwCase = [&](unsigned i) {
                        emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, (int)i);
                    };
                    if (op3->IsCnsIntOrI())
                    {
                        ssize_t ival = op3->AsIntCon()->IconValue();
                        emitSwCase((unsigned)ival);
                    }
                    else
                    {
                        // We emit a fallback case for the scenario when the imm-op is not a constant. This should
                        // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
                        // can also occur if the consumer calls it directly and just doesn't pass a constant value.
                        regNumber baseReg = node->ExtractTempReg();
                        regNumber offsReg = node->GetSingleTempReg();
                        genHWIntrinsicJumpTableFallback(intrinsicID, op3Reg, baseReg, offsReg, emitSwCase);
                    }
                }
                else if (category == HW_Category_MemoryStore)
                {
                    assert(intrinsicID == NI_SSE2_MaskMove);
                    assert(targetReg == REG_NA);

                    // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI
                    if (op3Reg != REG_EDI)
                    {
                        emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg);
                    }
                    emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg);
                }
                else
                {
                    emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg, op3Reg);
                }
                break;
            }

            default:
                unreached();
                break;
        }
        genProduceReg(node);
        return;
    }

    switch (isa)
    {
        case InstructionSet_SSE:
            genSSEIntrinsic(node);
            break;
        case InstructionSet_SSE2:
            genSSE2Intrinsic(node);
            break;
        case InstructionSet_SSE41:
            genSSE41Intrinsic(node);
            break;
        case InstructionSet_SSE42:
            genSSE42Intrinsic(node);
            break;
        case InstructionSet_AVX:
            genAVXIntrinsic(node);
            break;
        case InstructionSet_AVX2:
            genAVX2Intrinsic(node);
            break;
        case InstructionSet_AES:
            genAESIntrinsic(node);
            break;
        case InstructionSet_BMI1:
            genBMI1Intrinsic(node);
            break;
        case InstructionSet_BMI2:
            genBMI2Intrinsic(node);
            break;
        case InstructionSet_FMA:
            genFMAIntrinsic(node);
            break;
        case InstructionSet_LZCNT:
            genLZCNTIntrinsic(node);
            break;
        case InstructionSet_PCLMULQDQ:
            genPCLMULQDQIntrinsic(node);
            break;
        case InstructionSet_POPCNT:
            genPOPCNTIntrinsic(node);
            break;
        default:
            unreached();
            break;
    }
}
Exemplo n.º 25
0
//------------------------------------------------------------------------
// genSSE41Intrinsic: Generates the code for an SSE4.1 hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    GenTree*       op1         = node->gtGetOp1();
    GenTree*       op2         = node->gtGetOp2();
    GenTree*       op3         = nullptr;
    GenTree*       op4         = nullptr;
    regNumber      targetReg   = node->gtRegNum;
    var_types      targetType  = node->TypeGet();
    var_types      baseType    = node->gtSIMDBaseType;

    regNumber op1Reg = REG_NA;
    regNumber op2Reg = REG_NA;
    regNumber op3Reg = REG_NA;
    regNumber op4Reg = REG_NA;
    emitter*  emit   = getEmitter();

    if ((op1 != nullptr) && !op1->OperIsList())
    {
        op1Reg = op1->gtRegNum;
        genConsumeOperands(node);
    }

    switch (intrinsicID)
    {
        case NI_SSE41_TestAllOnes:
        {
            regNumber tmpReg = node->GetSingleTempReg();
            assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
            emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, emitTypeSize(TYP_SIMD16), tmpReg, tmpReg, tmpReg);
            emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
            emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, tmpReg);
            emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
            break;
        }

        case NI_SSE41_TestAllZeros:
        case NI_SSE41_TestZ:
        {
            assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
            emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
            emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
            emit->emitIns_R(INS_sete, EA_1BYTE, targetReg);
            break;
        }

        case NI_SSE41_TestC:
        {
            assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
            emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
            emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
            emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
            break;
        }

        case NI_SSE41_TestMixOnesZeros:
        case NI_SSE41_TestNotZAndNotC:
        {
            assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
            emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
            emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            break;
        }

        case NI_SSE41_Extract:
        {
            regNumber   tmpTargetReg = REG_NA;
            instruction ins          = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            if (baseType == TYP_FLOAT)
            {
                tmpTargetReg = node->ExtractTempReg();
            }
            auto emitSwCase = [&](unsigned i) {
                if (baseType == TYP_FLOAT)
                {
                    // extract instructions return to GP-registers, so it needs int size as the emitsize
                    emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), op1Reg, tmpTargetReg, (int)i);
                    emit->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, targetReg, tmpTargetReg);
                }
                else
                {
                    emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), targetReg, op1Reg, (int)i);
                }
            };

            if (op2->IsCnsIntOrI())
            {
                ssize_t ival = op2->AsIntCon()->IconValue();
                emitSwCase((unsigned)ival);
            }
            else
            {
                // We emit a fallback case for the scenario when the imm-op is not a constant. This should
                // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
                // can also occur if the consumer calls it directly and just doesn't pass a constant value.
                regNumber baseReg = node->ExtractTempReg();
                regNumber offsReg = node->GetSingleTempReg();
                genHWIntrinsicJumpTableFallback(intrinsicID, op2->gtRegNum, baseReg, offsReg, emitSwCase);
            }
            break;
        }

        default:
            unreached();
            break;
    }

    genProduceReg(node);
}
Exemplo n.º 26
0
//------------------------------------------------------------------------
// genSSEIntrinsic: Generates the code for an SSE hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    GenTree*       op1         = node->gtGetOp1();
    GenTree*       op2         = node->gtGetOp2();
    GenTree*       op3         = nullptr;
    GenTree*       op4         = nullptr;
    regNumber      targetReg   = node->gtRegNum;
    var_types      targetType  = node->TypeGet();
    var_types      baseType    = node->gtSIMDBaseType;

    regNumber op1Reg = REG_NA;
    regNumber op2Reg = REG_NA;
    regNumber op3Reg = REG_NA;
    regNumber op4Reg = REG_NA;
    emitter*  emit   = getEmitter();

    if ((op1 != nullptr) && !op1->OperIsList())
    {
        op1Reg = op1->gtRegNum;
        genConsumeOperands(node);
    }

    switch (intrinsicID)
    {
        case NI_SSE_CompareEqualOrderedScalar:
        case NI_SSE_CompareEqualUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg             = op2->gtRegNum;
            regNumber   tmpReg = node->GetSingleTempReg();
            instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);

            // Ensure we aren't overwriting targetReg
            assert(tmpReg != targetReg);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
            emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
            emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
            break;
        }

        case NI_SSE_CompareGreaterThanOrderedScalar:
        case NI_SSE_CompareGreaterThanUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg = op2->gtRegNum;

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE_CompareGreaterThanOrEqualOrderedScalar:
        case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg = op2->gtRegNum;

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE_CompareLessThanOrderedScalar:
        case NI_SSE_CompareLessThanUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg = op2->gtRegNum;

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE_CompareLessThanOrEqualOrderedScalar:
        case NI_SSE_CompareLessThanOrEqualUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg = op2->gtRegNum;

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
            emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE_CompareNotEqualOrderedScalar:
        case NI_SSE_CompareNotEqualUnorderedScalar:
        {
            assert(baseType == TYP_FLOAT);
            op2Reg             = op2->gtRegNum;
            regNumber   tmpReg = node->GetSingleTempReg();
            instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);

            // Ensure we aren't overwriting targetReg
            assert(tmpReg != targetReg);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
            emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
            emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
            break;
        }

        case NI_SSE_ConvertToSingle:
        case NI_SSE_StaticCast:
        {
            assert(op2 == nullptr);
            if (op1Reg != targetReg)
            {
                instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
                emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg);
            }
            break;
        }

        case NI_SSE_MoveMask:
        {
            assert(baseType == TYP_FLOAT);
            assert(op2 == nullptr);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
            break;
        }

        case NI_SSE_Prefetch0:
        case NI_SSE_Prefetch1:
        case NI_SSE_Prefetch2:
        case NI_SSE_PrefetchNonTemporal:
        {
            assert(baseType == TYP_UBYTE);
            assert(op2 == nullptr);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            emit->emitIns_AR(ins, emitTypeSize(baseType), op1Reg, 0);
            break;
        }

        case NI_SSE_SetScalarVector128:
        {
            assert(baseType == TYP_FLOAT);
            assert(op2 == nullptr);

            if (op1Reg == targetReg)
            {
                regNumber tmpReg = node->GetSingleTempReg();

                // Ensure we aren't overwriting targetReg
                assert(tmpReg != targetReg);

                emit->emitIns_R_R(INS_movaps, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg);
                op1Reg = tmpReg;
            }

            emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
            emit->emitIns_SIMD_R_R_R(INS_movss, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg);
            break;
        }

        case NI_SSE_SetZeroVector128:
        {
            assert(baseType == TYP_FLOAT);
            assert(op1 == nullptr);
            assert(op2 == nullptr);
            emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
            break;
        }

        case NI_SSE_StoreFence:
        {
            assert(baseType == TYP_VOID);
            assert(op1 == nullptr);
            assert(op2 == nullptr);
            emit->emitIns(INS_sfence);
            break;
        }

        default:
            unreached();
            break;
    }

    genProduceReg(node);
}
Exemplo n.º 27
0
//------------------------------------------------------------------------
// genHWIntrinsic_R_R_RM_I: Generates the code for a hardware intrinsic node that takes a register operand, a
//                        register/memory operand, an immediate operand, and that returns a value in register
//
// Arguments:
//    node - The hardware intrinsic node
//    ins  - The instruction being generated
//
void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
{
    var_types targetType = node->TypeGet();
    regNumber targetReg  = node->gtRegNum;
    GenTree*  op1        = node->gtGetOp1();
    GenTree*  op2        = node->gtGetOp2();
    emitAttr  simdSize   = EA_ATTR(node->gtSIMDSize);
    int       ival       = Compiler::ivalOfHWIntrinsic(node->gtHWIntrinsicId);
    emitter*  emit       = getEmitter();

    // TODO-XArch-CQ: Commutative operations can have op1 be contained
    // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained

    regNumber op1Reg = op1->gtRegNum;

    assert(targetReg != REG_NA);
    assert(op1Reg != REG_NA);

    if (op2->isContained() || op2->isUsedFromSpillTemp())
    {
        assert((Compiler::flagsOfHWIntrinsic(node->gtHWIntrinsicId) & HW_Flag_NoContainment) == 0);
        assert(compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2) || op2->IsRegOptional());

        TempDsc* tmpDsc = nullptr;
        unsigned varNum = BAD_VAR_NUM;
        unsigned offset = (unsigned)-1;

        if (op2->isUsedFromSpillTemp())
        {
            assert(op2->IsRegOptional());

            tmpDsc = getSpillTempDsc(op2);
            varNum = tmpDsc->tdTempNum();
            offset = 0;

            compiler->tmpRlsTemp(tmpDsc);
        }
        else if (op2->OperIsHWIntrinsic())
        {
            emit->emitIns_SIMD_R_R_AR_I(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum, ival);
            return;
        }
        else if (op2->isIndir())
        {
            GenTreeIndir* memIndir = op2->AsIndir();
            GenTree*      memBase  = memIndir->gtOp1;

            switch (memBase->OperGet())
            {
                case GT_LCL_VAR_ADDR:
                {
                    varNum = memBase->AsLclVarCommon()->GetLclNum();
                    offset = 0;

                    // Ensure that all the GenTreeIndir values are set to their defaults.
                    assert(!memIndir->HasIndex());
                    assert(memIndir->Scale() == 1);
                    assert(memIndir->Offset() == 0);

                    break;
                }

                case GT_CLS_VAR_ADDR:
                {
                    emit->emitIns_SIMD_R_R_C_I(ins, simdSize, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0,
                                               ival);
                    return;
                }

                default:
                {
                    emit->emitIns_SIMD_R_R_A_I(ins, simdSize, targetReg, op1Reg, memIndir, ival);
                    return;
                }
            }
        }
        else
        {
            switch (op2->OperGet())
            {
                case GT_LCL_FLD:
                {
                    GenTreeLclFld* lclField = op2->AsLclFld();

                    varNum = lclField->GetLclNum();
                    offset = lclField->gtLclFld.gtLclOffs;
                    break;
                }

                case GT_LCL_VAR:
                {
                    assert(op2->IsRegOptional() || !compiler->lvaTable[op2->gtLclVar.gtLclNum].lvIsRegCandidate());
                    varNum = op2->AsLclVar()->GetLclNum();
                    offset = 0;
                    break;
                }

                default:
                    unreached();
                    break;
            }
        }

        // Ensure we got a good varNum and offset.
        // We also need to check for `tmpDsc != nullptr` since spill temp numbers
        // are negative and start with -1, which also happens to be BAD_VAR_NUM.
        assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
        assert(offset != (unsigned)-1);

        emit->emitIns_SIMD_R_R_S_I(ins, simdSize, targetReg, op1Reg, varNum, offset, ival);
    }
    else
    {
        emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2->gtRegNum, ival);
    }
}
Exemplo n.º 28
0
//------------------------------------------------------------------------
// genSSE2Intrinsic: Generates the code for an SSE2 hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    GenTree*       op1         = node->gtGetOp1();
    GenTree*       op2         = node->gtGetOp2();
    regNumber      targetReg   = node->gtRegNum;
    var_types      targetType  = node->TypeGet();
    var_types      baseType    = node->gtSIMDBaseType;
    regNumber      op1Reg      = REG_NA;
    regNumber      op2Reg      = REG_NA;
    emitter*       emit        = getEmitter();
    int            ival        = -1;

    if ((op1 != nullptr) && !op1->OperIsList())
    {
        op1Reg = op1->gtRegNum;
        genConsumeOperands(node);
    }

    switch (intrinsicID)
    {
        // All integer overloads are handled by table codegen
        case NI_SSE2_CompareLessThan:
        {
            assert(op1 != nullptr);
            assert(op2 != nullptr);
            assert(baseType == TYP_DOUBLE);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            op2Reg          = op2->gtRegNum;
            ival            = Compiler::ivalOfHWIntrinsic(intrinsicID);
            assert(ival != -1);
            emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival);

            break;
        }

        case NI_SSE2_CompareEqualOrderedScalar:
        case NI_SSE2_CompareEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg             = op2->gtRegNum;
            regNumber   tmpReg = node->GetSingleTempReg();
            instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            // Ensure we aren't overwriting targetReg
            assert(tmpReg != targetReg);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
            emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
            emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
            break;
        }

        case NI_SSE2_CompareGreaterThanOrderedScalar:
        case NI_SSE2_CompareGreaterThanUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareGreaterThanOrEqualOrderedScalar:
        case NI_SSE2_CompareGreaterThanOrEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareLessThanOrderedScalar:
        case NI_SSE2_CompareLessThanUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareLessThanOrEqualOrderedScalar:
        case NI_SSE2_CompareLessThanOrEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
            emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareNotEqualOrderedScalar:
        case NI_SSE2_CompareNotEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg             = op2->gtRegNum;
            instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            regNumber   tmpReg = node->GetSingleTempReg();

            // Ensure we aren't overwriting targetReg
            assert(tmpReg != targetReg);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
            emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
            emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
            break;
        }

        case NI_SSE2_ConvertScalarToVector128Double:
        case NI_SSE2_ConvertScalarToVector128Single:
        {
            assert(baseType == TYP_INT || baseType == TYP_LONG || baseType == TYP_FLOAT || baseType == TYP_DOUBLE);
            assert(op1 != nullptr);
            assert(op2 != nullptr);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            genHWIntrinsic_R_R_RM(node, ins);
            break;
        }

        case NI_SSE2_ConvertScalarToVector128Int64:
        case NI_SSE2_ConvertScalarToVector128UInt64:
        {
            assert(baseType == TYP_LONG || baseType == TYP_ULONG);
            assert(op1 != nullptr);
            assert(op2 == nullptr);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            // TODO-XArch-CQ -> use of type size of TYP_SIMD16 leads to
            // instruction register encoding errors for SSE legacy encoding
            emit->emitIns_R_R(ins, emitTypeSize(baseType), targetReg, op1Reg);
            break;
        }

        case NI_SSE2_ConvertToDouble:
        {
            assert(op2 == nullptr);
            if (op1Reg != targetReg)
            {
                instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
                emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
            }
            break;
        }

        case NI_SSE2_ConvertToInt32:
        case NI_SSE2_ConvertToInt64:
        case NI_SSE2_ConvertToUInt32:
        case NI_SSE2_ConvertToUInt64:
        {
            assert(op2 == nullptr);
            assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT || baseType == TYP_INT || baseType == TYP_UINT ||
                   baseType == TYP_LONG || baseType == TYP_ULONG);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT)
            {
                emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
            }
            else
            {
                emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
            }
            break;
        }

        case NI_SSE2_LoadFence:
        {
            assert(baseType == TYP_VOID);
            assert(op1 == nullptr);
            assert(op2 == nullptr);
            emit->emitIns(INS_lfence);
            break;
        }

        case NI_SSE2_MemoryFence:
        {
            assert(baseType == TYP_VOID);
            assert(op1 == nullptr);
            assert(op2 == nullptr);
            emit->emitIns(INS_mfence);
            break;
        }

        case NI_SSE2_MoveMask:
        {
            assert(op2 == nullptr);
            assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
            break;
        }

        case NI_SSE2_SetScalarVector128:
        {
            assert(baseType == TYP_DOUBLE);
            assert(op2 == nullptr);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            if (op1Reg == targetReg)
            {
                regNumber tmpReg = node->GetSingleTempReg();

                // Ensure we aren't overwriting targetReg
                assert(tmpReg != targetReg);

                emit->emitIns_R_R(INS_movapd, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg);
                op1Reg = tmpReg;
            }

            emit->emitIns_SIMD_R_R_R(INS_xorpd, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
            emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg);
            break;
        }

        case NI_SSE2_SetZeroVector128:
        {
            assert(baseType != TYP_FLOAT);
            assert(baseType >= TYP_BYTE && baseType <= TYP_DOUBLE);
            assert(op1 == nullptr);
            assert(op2 == nullptr);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
            break;
        }

        default:
            unreached();
            break;
    }

    genProduceReg(node);
}