//------------------------------------------------------------------------ // DecomposeCast: Decompose GT_CAST. // // Arguments: // use - the LIR::Use object for the def that needs to be decomposed. // // Return Value: // The next node to process. // GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use) { assert(use.IsInitialized()); assert(use.Def()->OperGet() == GT_CAST); GenTree* tree = use.Def(); GenTree* loResult = nullptr; GenTree* hiResult = nullptr; assert(tree->gtPrev == tree->gtGetOp1()); NYI_IF(tree->gtOverflow(), "TYP_LONG cast with overflow"); switch (tree->AsCast()->CastFromType()) { case TYP_INT: if (tree->gtFlags & GTF_UNSIGNED) { loResult = tree->gtGetOp1(); Range().Remove(tree); hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, 0); Range().InsertAfter(loResult, hiResult); } else { NYI("Lowering of signed cast TYP_INT->TYP_LONG"); } break; default: NYI("Unimplemented type for Lowering of cast to TYP_LONG"); break; } return FinalizeDecomposition(use, loResult, hiResult); }
//------------------------------------------------------------------------ // DecomposeCast: Decompose GT_CAST. // // Arguments: // ppTree - the tree to decompose // data - tree walk context // // Return Value: // None. // void DecomposeLongs::DecomposeCast(GenTree** ppTree, Compiler::fgWalkData* data) { assert(ppTree != nullptr); assert(*ppTree != nullptr); assert(data != nullptr); assert((*ppTree)->OperGet() == GT_CAST); assert(m_compiler->compCurStmt != nullptr); GenTree* tree = *ppTree; GenTree* loResult = nullptr; GenTree* hiResult = nullptr; GenTreeStmt* curStmt = m_compiler->compCurStmt->AsStmt(); assert(tree->gtPrev == tree->gtGetOp1()); NYI_IF(tree->gtOverflow(), "TYP_LONG cast with overflow"); switch (tree->AsCast()->CastFromType()) { case TYP_INT: if (tree->gtFlags & GTF_UNSIGNED) { loResult = tree->gtGetOp1(); hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, 0); m_compiler->fgSnipNode(curStmt, tree); } else { NYI("Lowering of signed cast TYP_INT->TYP_LONG"); } break; default: NYI("Unimplemented type for Lowering of cast to TYP_LONG"); break; } FinalizeDecomposition(ppTree, data, loResult, hiResult); }
//------------------------------------------------------------------------ // DecomposeArith: Decompose GT_ADD, GT_SUB, GT_OR, GT_XOR, GT_AND. // // Arguments: // ppTree - the tree to decompose // data - tree walk context // // Return Value: // None. // void DecomposeLongs::DecomposeArith(GenTree** ppTree, Compiler::fgWalkData* data) { assert(ppTree != nullptr); assert(*ppTree != nullptr); assert(data != nullptr); assert(m_compiler->compCurStmt != nullptr); GenTreeStmt* curStmt = m_compiler->compCurStmt->AsStmt(); GenTree* tree = *ppTree; genTreeOps oper = tree->OperGet(); assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_OR) || (oper == GT_XOR) || (oper == GT_AND)); NYI_IF((tree->gtFlags & GTF_REVERSE_OPS) != 0, "Binary operator with GTF_REVERSE_OPS"); GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); // Both operands must have already been decomposed into GT_LONG operators. noway_assert((op1->OperGet() == GT_LONG) && (op2->OperGet() == GT_LONG)); // Capture the lo and hi halves of op1 and op2. GenTree* loOp1 = op1->gtGetOp1(); GenTree* hiOp1 = op1->gtGetOp2(); GenTree* loOp2 = op2->gtGetOp1(); GenTree* hiOp2 = op2->gtGetOp2(); // We don't have support to decompose a TYP_LONG node that already has a child that has // been decomposed into parts, where the high part depends on the value generated by the // low part (via the flags register). For example, if we have: // +(gt_long(+(lo3, lo4), +Hi(hi3, hi4)), gt_long(lo2, hi2)) // We would decompose it here to: // gt_long(+(+(lo3, lo4), lo2), +Hi(+Hi(hi3, hi4), hi2)) // But this would generate incorrect code, because the "+Hi(hi3, hi4)" code generation // needs to immediately follow the "+(lo3, lo4)" part. Also, if this node is one that // requires a unique high operator, and the child nodes are not simple locals (e.g., // they are decomposed nodes), then we also can't decompose the node, as we aren't // guaranteed the high and low parts will be executed immediately after each other. NYI_IF(hiOp1->OperIsHigh() || hiOp2->OperIsHigh() || (GenTree::OperIsHigh(GetHiOper(oper)) && (!loOp1->OperIsLeaf() || !hiOp1->OperIsLeaf() || !loOp1->OperIsLeaf() || !hiOp2->OperIsLeaf())), "Can't decompose expression tree TYP_LONG node"); // Now, remove op1 and op2 from the node list. m_compiler->fgSnipNode(curStmt, op1); m_compiler->fgSnipNode(curStmt, op2); // We will reuse "tree" for the loResult, which will now be of TYP_INT, and its operands // will be the lo halves of op1 from above. GenTree* loResult = tree; loResult->SetOper(GetLoOper(loResult->OperGet())); loResult->gtType = TYP_INT; loResult->gtOp.gtOp1 = loOp1; loResult->gtOp.gtOp2 = loOp2; // The various halves will be correctly threaded internally. We simply need to // relink them into the proper order, i.e. loOp1 is followed by loOp2, and then // the loResult node. // (This rethreading, and that below, are where we need to address the reverse ops case). // The current order is (after snipping op1 and op2): // ... loOp1-> ... hiOp1->loOp2First ... loOp2->hiOp2First ... hiOp2 // The order we want is: // ... loOp1->loOp2First ... loOp2->loResult // ... hiOp1->hiOp2First ... hiOp2->hiResult // i.e. we swap hiOp1 and loOp2, and create (for now) separate loResult and hiResult trees GenTree* loOp2First = hiOp1->gtNext; GenTree* hiOp2First = loOp2->gtNext; // First, we will NYI if both hiOp1 and loOp2 have side effects. NYI_IF(((loOp2->gtFlags & GTF_ALL_EFFECT) != 0) && ((hiOp1->gtFlags & GTF_ALL_EFFECT) != 0), "Binary long operator with non-reorderable sub expressions"); // Now, we reorder the loOps and the loResult. loOp1->gtNext = loOp2First; loOp2First->gtPrev = loOp1; loOp2->gtNext = loResult; loResult->gtPrev = loOp2; // Next, reorder the hiOps and the hiResult. GenTree* hiResult = new (m_compiler, oper) GenTreeOp(GetHiOper(oper), TYP_INT, hiOp1, hiOp2); hiOp1->gtNext = hiOp2First; hiOp2First->gtPrev = hiOp1; hiOp2->gtNext = hiResult; hiResult->gtPrev = hiOp2; if ((oper == GT_ADD) || (oper == GT_SUB)) { if (loResult->gtOverflow()) { hiResult->gtFlags |= GTF_OVERFLOW; loResult->gtFlags &= ~GTF_OVERFLOW; } if (loResult->gtFlags & GTF_UNSIGNED) { hiResult->gtFlags |= GTF_UNSIGNED; } } FinalizeDecomposition(ppTree, data, loResult, hiResult); }
//------------------------------------------------------------------------ // BuildNode: Build the RefPositions for for a node // // Arguments: // treeNode - the node of interest // // Return Value: // The number of sources consumed by this node. // // Notes: // Preconditions: // LSRA Has been initialized. // // Postconditions: // RefPositions have been built for all the register defs and uses required // for this node. // int LinearScan::BuildNode(GenTree* tree) { assert(!tree->isContained()); Interval* prefSrcInterval = nullptr; int srcCount; int dstCount = 0; regMaskTP dstCandidates = RBM_NONE; regMaskTP killMask = RBM_NONE; bool isLocalDefUse = false; // Reset the build-related members of LinearScan. clearBuildState(); RegisterType registerType = TypeGet(tree); // Set the default dstCount. This may be modified below. if (tree->IsValue()) { dstCount = 1; if (tree->IsUnusedValue()) { isLocalDefUse = true; } } else { dstCount = 0; } switch (tree->OperGet()) { default: srcCount = BuildSimple(tree); break; case GT_LCL_VAR: case GT_LCL_FLD: { // We handle tracked variables differently from non-tracked ones. If it is tracked, // we will simply add a use of the tracked variable at its parent/consumer. // Otherwise, for a use we need to actually add the appropriate references for loading // or storing the variable. // // A tracked variable won't actually get used until the appropriate ancestor tree node // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument // to a call or an orphaned dead node. // LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum]; if (isCandidateVar(varDsc)) { INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1)); return 0; } srcCount = 0; #ifdef FEATURE_SIMD // Need an additional register to read upper 4 bytes of Vector3. if (tree->TypeGet() == TYP_SIMD12) { // We need an internal register different from targetReg in which 'tree' produces its result // because both targetReg and internal reg will be in use at the same time. buildInternalFloatRegisterDefForNode(tree, allSIMDRegs()); setInternalRegsDelayFree = true; buildInternalRegisterUses(); } #endif BuildDef(tree); } break; case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: srcCount = 1; assert(dstCount == 0); srcCount = BuildStoreLoc(tree->AsLclVarCommon()); break; case GT_FIELD_LIST: // These should always be contained. We don't correctly allocate or // generate code for a non-contained GT_FIELD_LIST. noway_assert(!"Non-contained GT_FIELD_LIST"); srcCount = 0; break; case GT_LIST: case GT_ARGPLACE: case GT_NO_OP: case GT_START_NONGC: case GT_PROF_HOOK: srcCount = 0; assert(dstCount == 0); break; case GT_START_PREEMPTGC: // This kills GC refs in callee save regs srcCount = 0; assert(dstCount == 0); BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); break; case GT_CNS_DBL: { GenTreeDblCon* dblConst = tree->AsDblCon(); double constValue = dblConst->gtDblCon.gtDconVal; if (emitter::emitIns_valid_imm_for_fmov(constValue)) { // Directly encode constant to instructions. } else { // Reserve int to load constant from memory (IF_LARGELDC) buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); } } __fallthrough; case GT_CNS_INT: { srcCount = 0; assert(dstCount == 1); RefPosition* def = BuildDef(tree); def->getInterval()->isConstant = true; } break; case GT_BOX: case GT_COMMA: case GT_QMARK: case GT_COLON: srcCount = 0; assert(dstCount == 0); unreached(); break; case GT_RETURN: srcCount = BuildReturn(tree); break; case GT_RETFILT: assert(dstCount == 0); if (tree->TypeGet() == TYP_VOID) { srcCount = 0; } else { assert(tree->TypeGet() == TYP_INT); srcCount = 1; BuildUse(tree->gtGetOp1(), RBM_INTRET); } break; case GT_NOP: // A GT_NOP is either a passthrough (if it is void, or if it has // a child), but must be considered to produce a dummy value if it // has a type but no child. srcCount = 0; if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr) { assert(dstCount == 1); BuildDef(tree); } else { assert(dstCount == 0); } break; case GT_JTRUE: srcCount = 0; assert(dstCount == 0); break; case GT_JMP: srcCount = 0; assert(dstCount == 0); break; case GT_SWITCH: // This should never occur since switch nodes must not be visible at this // point in the JIT. srcCount = 0; noway_assert(!"Switch must be lowered at this point"); break; case GT_JMPTABLE: srcCount = 0; assert(dstCount == 1); BuildDef(tree); break; case GT_SWITCH_TABLE: buildInternalIntRegisterDefForNode(tree); srcCount = BuildBinaryUses(tree->AsOp()); assert(dstCount == 0); break; case GT_ASG: noway_assert(!"We should never hit any assignment operator in lowering"); srcCount = 0; break; case GT_ADD: case GT_SUB: if (varTypeIsFloating(tree->TypeGet())) { // overflow operations aren't supported on float/double types. assert(!tree->gtOverflow()); // No implicit conversions at this stage as the expectation is that // everything is made explicit by adding casts. assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet()); } __fallthrough; case GT_AND: case GT_OR: case GT_XOR: case GT_LSH: case GT_RSH: case GT_RSZ: case GT_ROR: srcCount = BuildBinaryUses(tree->AsOp()); assert(dstCount == 1); BuildDef(tree); break; case GT_RETURNTRAP: // this just turns into a compare of its child with an int // + a conditional call BuildUse(tree->gtGetOp1()); srcCount = 1; assert(dstCount == 0); killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); BuildDefsWithKills(tree, 0, RBM_NONE, killMask); break; case GT_MOD: case GT_UMOD: NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64"); assert(!"Shouldn't see an integer typed GT_MOD node in ARM64"); srcCount = 0; break; case GT_MUL: if (tree->gtOverflow()) { // Need a register different from target reg to check for overflow. buildInternalIntRegisterDefForNode(tree); setInternalRegsDelayFree = true; } __fallthrough; case GT_DIV: case GT_MULHI: case GT_UDIV: { srcCount = BuildBinaryUses(tree->AsOp()); buildInternalRegisterUses(); assert(dstCount == 1); BuildDef(tree); } break; case GT_INTRINSIC: { noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Ceiling) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Floor) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt)); // Both operand and its result must be of the same floating point type. GenTree* op1 = tree->gtGetOp1(); assert(varTypeIsFloating(op1)); assert(op1->TypeGet() == tree->TypeGet()); BuildUse(op1); srcCount = 1; assert(dstCount == 1); BuildDef(tree); } break; #ifdef FEATURE_SIMD case GT_SIMD: srcCount = BuildSIMD(tree->AsSIMD()); break; #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWIntrinsic: srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic()); break; #endif // FEATURE_HW_INTRINSICS case GT_CAST: assert(dstCount == 1); srcCount = BuildCast(tree->AsCast()); break; case GT_NEG: case GT_NOT: BuildUse(tree->gtGetOp1()); srcCount = 1; assert(dstCount == 1); BuildDef(tree); break; case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: case GT_TEST_EQ: case GT_TEST_NE: case GT_JCMP: srcCount = BuildCmp(tree); break; case GT_CKFINITE: srcCount = 1; assert(dstCount == 1); buildInternalIntRegisterDefForNode(tree); BuildUse(tree->gtGetOp1()); BuildDef(tree); buildInternalRegisterUses(); break; case GT_CMPXCHG: { GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3; assert(dstCount == 1); if (!compiler->compSupports(InstructionSet_Atomics)) { // For ARMv8 exclusives requires a single internal register buildInternalIntRegisterDefForNode(tree); } // For ARMv8 exclusives the lifetime of the addr and data must be extended because // it may be used used multiple during retries // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent // them being reused as the target register which must be destroyed early RefPosition* locationUse = BuildUse(tree->gtCmpXchg.gtOpLocation); setDelayFree(locationUse); RefPosition* valueUse = BuildUse(tree->gtCmpXchg.gtOpValue); setDelayFree(valueUse); if (!cmpXchgNode->gtOpComparand->isContained()) { RefPosition* comparandUse = BuildUse(tree->gtCmpXchg.gtOpComparand); // For ARMv8 exclusives the lifetime of the comparand must be extended because // it may be used used multiple during retries if (!compiler->compSupports(InstructionSet_Atomics)) { setDelayFree(comparandUse); } } // Internals may not collide with target setInternalRegsDelayFree = true; buildInternalRegisterUses(); BuildDef(tree); } break; case GT_LOCKADD: case GT_XADD: case GT_XCHG: { assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1); srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; if (!compiler->compSupports(InstructionSet_Atomics)) { // GT_XCHG requires a single internal register; the others require two. buildInternalIntRegisterDefForNode(tree); if (tree->OperGet() != GT_XCHG) { buildInternalIntRegisterDefForNode(tree); } } assert(!tree->gtGetOp1()->isContained()); RefPosition* op1Use = BuildUse(tree->gtGetOp1()); RefPosition* op2Use = nullptr; if (!tree->gtGetOp2()->isContained()) { op2Use = BuildUse(tree->gtGetOp2()); } // For ARMv8 exclusives the lifetime of the addr and data must be extended because // it may be used used multiple during retries if (!compiler->compSupports(InstructionSet_Atomics)) { // Internals may not collide with target if (dstCount == 1) { setDelayFree(op1Use); if (op2Use != nullptr) { setDelayFree(op2Use); } setInternalRegsDelayFree = true; } buildInternalRegisterUses(); } if (dstCount == 1) { BuildDef(tree); } } break; #if FEATURE_ARG_SPLIT case GT_PUTARG_SPLIT: srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); dstCount = tree->AsPutArgSplit()->gtNumRegs; break; #endif // FEATURE _SPLIT_ARG case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); break; case GT_PUTARG_REG: srcCount = BuildPutArgReg(tree->AsUnOp()); break; case GT_CALL: srcCount = BuildCall(tree->AsCall()); if (tree->AsCall()->HasMultiRegRetVal()) { dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); } break; case GT_ADDR: { // For a GT_ADDR, the child node should not be evaluated into a register GenTree* child = tree->gtGetOp1(); assert(!isCandidateLocalRef(child)); assert(child->isContained()); assert(dstCount == 1); srcCount = 0; BuildDef(tree); } break; case GT_BLK: case GT_DYN_BLK: // These should all be eliminated prior to Lowering. assert(!"Non-store block node in Lowering"); srcCount = 0; break; case GT_STORE_BLK: case GT_STORE_OBJ: case GT_STORE_DYN_BLK: srcCount = BuildBlockStore(tree->AsBlk()); break; case GT_INIT_VAL: // Always a passthrough of its child's value. assert(!"INIT_VAL should always be contained"); srcCount = 0; break; case GT_LCLHEAP: { assert(dstCount == 1); // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp): // Here '-' means don't care. // // Size? Init Memory? # temp regs // 0 - 0 // const and <=6 ptr words - 0 // const and <PageSize No 0 // >6 ptr words Yes 0 // Non-const Yes 0 // Non-const No 2 // GenTree* size = tree->gtGetOp1(); if (size->IsCnsIntOrI()) { assert(size->isContained()); srcCount = 0; size_t sizeVal = size->gtIntCon.gtIconVal; if (sizeVal != 0) { // Compute the amount of memory to properly STACK_ALIGN. // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size. // This should also help in debugging as we can examine the original size specified with // localloc. sizeVal = AlignUp(sizeVal, STACK_ALIGN); size_t stpCount = sizeVal / (REGSIZE_BYTES * 2); // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc) // if (stpCount <= 4) { // Need no internal registers } else if (!compiler->info.compInitMem) { // No need to initialize allocated stack space. if (sizeVal < compiler->eeGetPageSize()) { // Need no internal registers } else { // We need two registers: regCnt and RegTmp buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); } } } } else { srcCount = 1; if (!compiler->info.compInitMem) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); } } if (!size->isContained()) { BuildUse(size); } buildInternalRegisterUses(); BuildDef(tree); } break; case GT_ARR_BOUNDS_CHECK: #ifdef FEATURE_SIMD case GT_SIMD_CHK: #endif // FEATURE_SIMD { GenTreeBoundsChk* node = tree->AsBoundsChk(); // Consumes arrLen & index - has no result assert(dstCount == 0); GenTree* intCns = nullptr; GenTree* other = nullptr; srcCount = BuildOperandUses(tree->AsBoundsChk()->gtIndex); srcCount += BuildOperandUses(tree->AsBoundsChk()->gtArrLen); } break; case GT_ARR_ELEM: // These must have been lowered to GT_ARR_INDEX noway_assert(!"We should never see a GT_ARR_ELEM in lowering"); srcCount = 0; assert(dstCount == 0); break; case GT_ARR_INDEX: { srcCount = 2; assert(dstCount == 1); buildInternalIntRegisterDefForNode(tree); setInternalRegsDelayFree = true; // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj()); setDelayFree(arrObjUse); BuildUse(tree->AsArrIndex()->IndexExpr()); buildInternalRegisterUses(); BuildDef(tree); } break; case GT_ARR_OFFSET: // This consumes the offset, if any, the arrObj and the effective index, // and produces the flattened offset for this dimension. srcCount = 2; if (!tree->gtArrOffs.gtOffset->isContained()) { BuildUse(tree->AsArrOffs()->gtOffset); srcCount++; } BuildUse(tree->AsArrOffs()->gtIndex); BuildUse(tree->AsArrOffs()->gtArrObj); assert(dstCount == 1); buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); BuildDef(tree); break; case GT_LEA: { GenTreeAddrMode* lea = tree->AsAddrMode(); GenTree* base = lea->Base(); GenTree* index = lea->Index(); int cns = lea->Offset(); // This LEA is instantiating an address, so we set up the srcCount here. srcCount = 0; if (base != nullptr) { srcCount++; BuildUse(base); } if (index != nullptr) { srcCount++; BuildUse(index); } assert(dstCount == 1); // On ARM64 we may need a single internal register // (when both conditions are true then we still only need a single internal register) if ((index != nullptr) && (cns != 0)) { // ARM64 does not support both Index and offset so we need an internal register buildInternalIntRegisterDefForNode(tree); } else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE)) { // This offset can't be contained in the add instruction, so we need an internal register buildInternalIntRegisterDefForNode(tree); } buildInternalRegisterUses(); BuildDef(tree); } break; case GT_STOREIND: { assert(dstCount == 0); if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree)) { srcCount = BuildGCWriteBarrier(tree); break; } srcCount = BuildIndir(tree->AsIndir()); if (!tree->gtGetOp2()->isContained()) { BuildUse(tree->gtGetOp2()); srcCount++; } } break; case GT_NULLCHECK: // Unlike ARM, ARM64 implements NULLCHECK as a load to REG_ZR, so no internal register // is required, and it is not a localDefUse. assert(dstCount == 0); assert(!tree->gtGetOp1()->isContained()); BuildUse(tree->gtGetOp1()); srcCount = 1; break; case GT_IND: assert(dstCount == 1); srcCount = BuildIndir(tree->AsIndir()); break; case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); BuildDef(tree, RBM_EXCEPTION_OBJECT); break; case GT_CLS_VAR: srcCount = 0; // GT_CLS_VAR, by the time we reach the backend, must always // be a pure use. // It will produce a result of the type of the // node, and use an internal register for the address. assert(dstCount == 1); assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0); buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); BuildDef(tree); break; case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); BuildDef(tree); break; } // end switch (tree->OperGet()) if (tree->IsUnusedValue() && (dstCount != 0)) { isLocalDefUse = true; } // We need to be sure that we've set srcCount and dstCount appropriately assert((dstCount < 2) || tree->IsMultiRegCall()); assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue())); assert(!tree->IsUnusedValue() || (dstCount != 0)); assert(dstCount == tree->GetRegisterDstCount()); INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount)); return srcCount; }
//------------------------------------------------------------------------ // DecomposeArith: Decompose GT_ADD, GT_SUB, GT_OR, GT_XOR, GT_AND. // // Arguments: // use - the LIR::Use object for the def that needs to be decomposed. // // Return Value: // The next node to process. // GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use) { assert(use.IsInitialized()); GenTree* tree = use.Def(); genTreeOps oper = tree->OperGet(); assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_OR) || (oper == GT_XOR) || (oper == GT_AND)); GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); // Both operands must have already been decomposed into GT_LONG operators. noway_assert((op1->OperGet() == GT_LONG) && (op2->OperGet() == GT_LONG)); // Capture the lo and hi halves of op1 and op2. GenTree* loOp1 = op1->gtGetOp1(); GenTree* hiOp1 = op1->gtGetOp2(); GenTree* loOp2 = op2->gtGetOp1(); GenTree* hiOp2 = op2->gtGetOp2(); // We don't have support to decompose a TYP_LONG node that already has a child that has // been decomposed into parts, where the high part depends on the value generated by the // low part (via the flags register). For example, if we have: // +(gt_long(+(lo3, lo4), +Hi(hi3, hi4)), gt_long(lo2, hi2)) // We would decompose it here to: // gt_long(+(+(lo3, lo4), lo2), +Hi(+Hi(hi3, hi4), hi2)) // But this would generate incorrect code, because the "+Hi(hi3, hi4)" code generation // needs to immediately follow the "+(lo3, lo4)" part. Also, if this node is one that // requires a unique high operator, and the child nodes are not simple locals (e.g., // they are decomposed nodes), then we also can't decompose the node, as we aren't // guaranteed the high and low parts will be executed immediately after each other. NYI_IF(hiOp1->OperIsHigh() || hiOp2->OperIsHigh() || (GenTree::OperIsHigh(GetHiOper(oper)) && (!loOp1->OperIsLeaf() || !hiOp1->OperIsLeaf() || !loOp1->OperIsLeaf() || !hiOp2->OperIsLeaf())), "Can't decompose expression tree TYP_LONG node"); // Now, remove op1 and op2 from the node list. BlockRange().Remove(op1); BlockRange().Remove(op2); // We will reuse "tree" for the loResult, which will now be of TYP_INT, and its operands // will be the lo halves of op1 from above. GenTree* loResult = tree; loResult->SetOper(GetLoOper(loResult->OperGet())); loResult->gtType = TYP_INT; loResult->gtOp.gtOp1 = loOp1; loResult->gtOp.gtOp2 = loOp2; GenTree* hiResult = new (m_compiler, oper) GenTreeOp(GetHiOper(oper), TYP_INT, hiOp1, hiOp2); hiResult->CopyCosts(loResult); BlockRange().InsertAfter(loResult, hiResult); if ((oper == GT_ADD) || (oper == GT_SUB)) { if (loResult->gtOverflow()) { hiResult->gtFlags |= GTF_OVERFLOW; loResult->gtFlags &= ~GTF_OVERFLOW; } if (loResult->gtFlags & GTF_UNSIGNED) { hiResult->gtFlags |= GTF_UNSIGNED; } } return FinalizeDecomposition(use, loResult, hiResult); }