void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { unsigned NodeNumDefs = 0; for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) if (N->isMachineOpcode()) { const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); // No register need be allocated for this. if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { NodeNumDefs = 0; break; } NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); } else switch(N->getOpcode()) { default: break; case ISD::CopyFromReg: NodeNumDefs++; break; case ISD::INLINEASM: NodeNumDefs++; break; } SU->NumRegDefsLeft = NodeNumDefs; }
void ScheduleDAGSDNodes::computeLatency(SUnit *SU) { SDNode *N = SU->getNode(); // TokenFactor operands are considered zero latency, and some schedulers // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero // whenever node latency is nonzero. if (N && N->getOpcode() == ISD::TokenFactor) { SU->Latency = 0; return; } // Check to see if the scheduler cares about latencies. if (forceUnitLatencies()) { SU->Latency = 1; return; } if (!InstrItins || InstrItins->isEmpty()) { if (N && N->isMachineOpcode() && TII->isHighLatencyDef(N->getMachineOpcode())) SU->Latency = HighLatencyCycles; else SU->Latency = 1; return; } // Compute the latency for the node. We use the sum of the latencies for // all nodes glued together into this SUnit. SU->Latency = 0; for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) if (N->isMachineOpcode()) SU->Latency += TII->getInstrLatency(InstrItins, N); }
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay /// scheduling of the given node to satisfy live physical register dependencies. /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs){ if (NumLiveRegs == 0) return false; SmallSet<unsigned, 4> RegAdded; // If this node would clobber any "live" register, then it's not ready. for (SDep &Pred : SU->Preds) { if (Pred.isAssignedRegDep()) { CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs, RegAdded, LRegs, TRI); } } for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { if (Node->getOpcode() == ISD::INLINEASM) { // Inline asm can clobber physical defs. unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) --NumOps; // Ignore the glue operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); ++i; // Skip the ID value. if (InlineAsm::isRegDefKind(Flags) || InlineAsm::isRegDefEarlyClobberKind(Flags) || InlineAsm::isClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } else i += NumVals; } continue; } if (!Node->isMachineOpcode()) continue; const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } return !LRegs.empty(); }
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { if (!SU->getNode()) { dbgs() << "PHYS REG COPY\n"; return; } SU->getNode()->dump(DAG); dbgs() << "\n"; SmallVector<SDNode *, 4> GluedNodes; for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { dbgs() << " "; GluedNodes.back()->dump(DAG); dbgs() << "\n"; GluedNodes.pop_back(); } }
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) { SU->Latency = 1; return; } if (!InstrItins || InstrItins->isEmpty()) { SU->Latency = 1; return; } // Compute the latency for the node. We use the sum of the latencies for // all nodes glued together into this SUnit. SU->Latency = 0; for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) if (N->isMachineOpcode()) SU->Latency += TII->getInstrLatency(InstrItins, N); }
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) if (!SU->getNode()) { dbgs() << "PHYS REG COPY\n"; return; } SU->getNode()->dump(DAG); dbgs() << "\n"; SmallVector<SDNode *, 4> GluedNodes; for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { dbgs() << " "; GluedNodes.back()->dump(DAG); dbgs() << "\n"; GluedNodes.pop_back(); } #endif }
std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { std::string s; raw_string_ostream O(s); O << "SU(" << SU->NodeNum << "): "; if (SU->getNode()) { SmallVector<SDNode *, 4> GluedNodes; for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { O << DOTGraphTraits<SelectionDAG*> ::getSimpleNodeLabel(GluedNodes.back(), DAG); GluedNodes.pop_back(); if (!GluedNodes.empty()) O << "\n "; } } else { O << "CROSS RC COPY"; } return O.str(); }
/// EmitSchedule - Emit the machine code in scheduled order. Return the new /// InsertPos and MachineBasicBlock that contains this insertion /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does /// not necessarily refer to returned BB. The emitter may split blocks. MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; DenseMap<SUnit*, unsigned> CopyVRBaseMap; SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; SmallSet<unsigned, 8> Seen; bool HasDbg = DAG->hasDebugValues(); // If this is the first BB, emit byval parameter dbg_value's. if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); for (; PDI != PDE; ++PDI) { MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); if (DbgMI) BB->insert(InsertPos, DbgMI); } } for (unsigned i = 0, e = Sequence.size(); i != e; i++) { SUnit *SU = Sequence[i]; if (!SU) { // Null SUnit* is a noop. TII->insertNoop(*Emitter.getBlock(), InsertPos); continue; } // For pre-regalloc scheduling, create instructions corresponding to the // SDNode and any glued SDNodes and append them to the block. if (!SU->getNode()) { // Emit a copy. EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos); continue; } SmallVector<SDNode *, 4> GluedNodes; for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { SDNode *N = GluedNodes.back(); Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); GluedNodes.pop_back(); } Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen); } // Insert all the dbg_values which have not already been inserted in source // order sequence. if (HasDbg) { MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); // Sort the source order instructions and use the order to insert debug // values. std::sort(Orders.begin(), Orders.end(), OrderSorter()); SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); // Now emit the rest according to source order. unsigned LastOrder = 0; for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { unsigned Order = Orders[i].first; MachineInstr *MI = Orders[i].second; // Insert all SDDbgValue's whose order(s) are before "Order". if (!MI) continue; for (; DI != DE && (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { if ((*DI)->isInvalidated()) continue; MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); if (DbgMI) { if (!LastOrder) // Insert to start of the BB (after PHIs). BB->insert(BBBegin, DbgMI); else { // Insert at the instruction, which may be in a different // block, if the block was split by a custom inserter. MachineBasicBlock::iterator Pos = MI; MI->getParent()->insert(llvm::next(Pos), DbgMI); } } } LastOrder = Order; } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? SmallVector<MachineInstr*, 8> DbgMIs; while (DI != DE) { if (!(*DI)->isInvalidated()) if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) DbgMIs.push_back(DbgMI); ++DI; } MachineBasicBlock *InsertBB = Emitter.getBlock(); MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); } InsertPos = Emitter.getInsertPos(); return Emitter.getBlock(); }
void ScheduleDAGSDNodes::AddSchedEdges() { const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); // Check to see if the scheduler cares about latencies. bool UnitLatencies = forceUnitLatencies(); // Pass 2: add the preds, succs, etc. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { SUnit *SU = &SUnits[su]; SDNode *MainNode = SU->getNode(); if (MainNode->isMachineOpcode()) { unsigned Opc = MainNode->getMachineOpcode(); const MCInstrDesc &MCID = TII->get(Opc); for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { SU->isTwoAddress = true; break; } } if (MCID.isCommutable()) SU->isCommutable = true; } // Find all predecessors and successors of the group. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).getImplicitDefs()) { SU->hasPhysRegClobbers = true; unsigned NumUsed = InstrEmitter::CountResults(N); while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) --NumUsed; // Skip over unused values at the end. if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) SU->hasPhysRegDefs = true; } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *OpN = N->getOperand(i).getNode(); if (isPassiveNode(OpN)) continue; // Not scheduled. SUnit *OpSU = &SUnits[OpN->getNodeId()]; assert(OpSU && "Node has no SUnit!"); if (OpSU == SU) continue; // In the same group. EVT OpVT = N->getOperand(i).getValueType(); assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); bool isChain = OpVT == MVT::Other; unsigned PhysReg = 0; int Cost = 1; // Determine if this is a physical register dependency. CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); assert((PhysReg == 0 || !isChain) && "Chain dependence via physreg data?"); // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler // emits a copy from the physical register to a virtual register unless // it requires a cross class copy (cost < 0). That means we are only // treating "expensive to copy" register dependency as physical register // dependency. This may change in the future though. if (Cost >= 0 && !StressSched) PhysReg = 0; // If this is a ctrl dep, latency is 1. unsigned OpLatency = isChain ? 1 : OpSU->Latency; // Special-case TokenFactor chains as zero-latency. if(isChain && OpN->getOpcode() == ISD::TokenFactor) OpLatency = 0; const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { // Multiple register uses are combined in the same SUnit. For example, // we could have a set of glued nodes with all their defs consumed by // another set of glued nodes. Register pressure tracking sees this as // a single use, so to keep pressure balanced we reduce the defs. // // We can't tell (without more book-keeping) if this results from // glued nodes or duplicate operands. As long as we don't reduce // NumRegDefsLeft to zero, we handle the common cases well. --OpSU->NumRegDefsLeft; } } } } }
/// Returns single number reflecting benefit of scheduling SU /// in the current cycle. signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { // Initial trivial priority. signed ResCount = 1; // Do not waste time on a node that is already scheduled. if (SU->isScheduled) return ResCount; // Forced priority is high. if (SU->isScheduleHigh) ResCount += PriorityOne; // Adaptable scheduling // A small, but very parallel // region, where reg pressure is an issue. if (HorizontalVerticalBalance > RegPressureThreshold) { // Critical path first ResCount += (SU->getHeight() * ScaleTwo); // If resources are available for it, multiply the // chance of scheduling. if (isResourceAvailable(SU)) ResCount <<= FactorOne; // Consider change to reg pressure from scheduling // this SU. ResCount -= (regPressureDelta(SU,true) * ScaleOne); } // Default heuristic, greeady and // critical path driven. else { // Critical path first. ResCount += (SU->getHeight() * ScaleTwo); // Now see how many instructions is blocked by this SU. ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); // If resources are available for it, multiply the // chance of scheduling. if (isResourceAvailable(SU)) ResCount <<= FactorOne; ResCount -= (regPressureDelta(SU) * ScaleTwo); } // These are platform specific things. // Will need to go into the back end // and accessed from here via a hook. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { if (N->isMachineOpcode()) { const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); if (TID.isCall()) ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); } else switch (N->getOpcode()) { default: break; case ISD::TokenFactor: case ISD::CopyFromReg: case ISD::CopyToReg: ResCount += PriorityFive; break; case ISD::INLINEASM: ResCount += PriorityFour; break; } } return ResCount; }
void ScheduleDAGSDNodes::AddSchedEdges() { const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); // Pass 2: add the preds, succs, etc. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { SUnit *SU = &SUnits[su]; SDNode *MainNode = SU->getNode(); if (MainNode->isMachineOpcode()) { unsigned Opc = MainNode->getMachineOpcode(); const TargetInstrDesc &TID = TII->get(Opc); for (unsigned i = 0; i != TID.getNumOperands(); ++i) { if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { SU->isTwoAddress = true; break; } } if (TID.isCommutable()) SU->isCommutable = true; } // Find all predecessors and successors of the group. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).getImplicitDefs()) { SU->hasPhysRegClobbers = true; unsigned NumUsed = InstrEmitter::CountResults(N); while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) --NumUsed; // Skip over unused values at the end. if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) SU->hasPhysRegDefs = true; } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *OpN = N->getOperand(i).getNode(); if (isPassiveNode(OpN)) continue; // Not scheduled. SUnit *OpSU = &SUnits[OpN->getNodeId()]; assert(OpSU && "Node has no SUnit!"); if (OpSU == SU) continue; // In the same group. EVT OpVT = N->getOperand(i).getValueType(); assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); bool isChain = OpVT == MVT::Other; unsigned PhysReg = 0; int Cost = 1; // Determine if this is a physical register dependency. CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); assert((PhysReg == 0 || !isChain) && "Chain dependence via physreg data?"); // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler // emits a copy from the physical register to a virtual register unless // it requires a cross class copy (cost < 0). That means we are only // treating "expensive to copy" register dependency as physical register // dependency. This may change in the future though. if (Cost >= 0) PhysReg = 0; // If this is a ctrl dep, latency is 1. unsigned OpLatency = isChain ? 1 : OpSU->Latency; const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } SU->addPred(dep); } } } }
void SelectionDAGBuilder::LowerStatepoint( ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. NumOfStatepoints++; // Clear state StatepointLowering.startNewStatepoint(*this); ImmutableCallSite CS(ISP.getCallSite()); #ifndef NDEBUG // Consistency check. Don't do this for invokes. It would be too // expensive to preserve this information across different basic blocks if (!CS.isInvoke()) { for (const User *U : CS->users()) { const CallInst *Call = cast<CallInst>(U); if (isGCRelocate(Call)) StatepointLowering.scheduleRelocCall(*Call); } } #endif #ifndef NDEBUG // If this is a malformed statepoint, report it early to simplify debugging. // This should catch any IR level mistake that's made when constructing or // transforming statepoints. ISP.verify(); // Check that the associated GCStrategy expects to encounter statepoints. assert(GFI->getStrategy().useStatepoints() && "GCStrategy does not expect to encounter statepoints"); #endif // Lower statepoint vmstate and gcstate arguments SmallVector<SDValue, 10> LoweredMetaArgs; lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this); // Get call node, we will replace it later with statepoint SDNode *CallNode = lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. // Call Node: Chain, Target, {Args}, RegMask, [Glue] SDValue Chain = CallNode->getOperand(0); SDValue Glue; bool CallHasIncomingGlue = CallNode->getGluedNode(); if (CallHasIncomingGlue) { // Glue is always last operand Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); } // Build the GC_TRANSITION_START node if necessary. // // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the // order in which they appear in the call to the statepoint intrinsic. If // any of the operands is a pointer-typed, that operand is immediately // followed by a SRCVALUE for the pointer that may be used during lowering // (e.g. to form MachinePointerInfo values for loads/stores). const bool IsGCTransition = (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) == (uint64_t)StatepointFlags::GCTransition; if (IsGCTransition) { SmallVector<SDValue, 8> TSOps; // Add chain TSOps.push_back(Chain); // Add GC transition arguments for (const Value *V : ISP.gc_transition_args()) { TSOps.push_back(getValue(V)); if (V->getType()->isPointerTy()) TSOps.push_back(DAG.getSrcValue(V)); } // Add glue if necessary if (CallHasIncomingGlue) TSOps.push_back(Glue); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue GCTransitionStart = DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps); Chain = GCTransitionStart.getValue(0); Glue = GCTransitionStart.getValue(1); } // TODO: Currently, all of these operands are being marked as read/write in // PrologEpilougeInserter.cpp, we should special case the VMState arguments // and flags to be read-only. SmallVector<SDValue, 40> Ops; // Add the <id> and <numBytes> constants. Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64)); Ops.push_back( DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32)); // Calculate and push starting position of vmstate arguments // Get number of arguments incoming directly into call node unsigned NumCallRegArgs = CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3); Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32)); // Add call target SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0); Ops.push_back(CallTarget); // Add call arguments // Get position of register mask in the call SDNode::op_iterator RegMaskIt; if (CallHasIncomingGlue) RegMaskIt = CallNode->op_end() - 2; else RegMaskIt = CallNode->op_end() - 1; Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); // Add a constant argument for the calling convention pushStackMapConstant(Ops, *this, CS.getCallingConv()); // Add a constant argument for the flags uint64_t Flags = ISP.getFlags(); assert( ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) && "unknown flag used"); pushStackMapConstant(Ops, *this, Flags); // Insert all vmstate and gcstate arguments Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end()); // Add register mask from call node Ops.push_back(*RegMaskIt); // Add chain Ops.push_back(Chain); // Same for the glue, but we add it only if original call had it if (Glue.getNode()) Ops.push_back(Glue); // Compute return values. Provide a glue output since we consume one as // input. This allows someone else to chain off us as needed. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); SDNode *SinkNode = StatepointMCNode; // Build the GC_TRANSITION_END node if necessary. // // See the comment above regarding GC_TRANSITION_START for the layout of // the operands to the GC_TRANSITION_END node. if (IsGCTransition) { SmallVector<SDValue, 8> TEOps; // Add chain TEOps.push_back(SDValue(StatepointMCNode, 0)); // Add GC transition arguments for (const Value *V : ISP.gc_transition_args()) { TEOps.push_back(getValue(V)); if (V->getType()->isPointerTy()) TEOps.push_back(DAG.getSrcValue(V)); } // Add glue TEOps.push_back(SDValue(StatepointMCNode, 1)); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue GCTransitionStart = DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps); SinkNode = GCTransitionStart.getNode(); } // Replace original call DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root // Remove originall call node DAG.DeleteNode(CallNode); // DON'T set the root - under the assumption that it's already set past the // inserted node we created. // TODO: A better future implementation would be to emit a single variable // argument, variable return value STATEPOINT node here and then hookup the // return value of each gc.relocate to the respective output of the // previously emitted STATEPOINT value. Unfortunately, this doesn't appear // to actually be possible today. }
/// EmitSchedule - Emit the machine code in scheduled order. Return the new /// InsertPos and MachineBasicBlock that contains this insertion /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does /// not necessarily refer to returned BB. The emitter may split blocks. MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; DenseMap<SUnit*, unsigned> CopyVRBaseMap; SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; SmallSet<unsigned, 8> Seen; bool HasDbg = DAG->hasDebugValues(); // Emit a node, and determine where its first instruction is for debuginfo. // Zero, one, or multiple instructions can be created when emitting a node. auto EmitNode = [&](SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * { // Fetch instruction prior to this, or end() if nonexistant. auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { if (I == BB->begin()) return BB->end(); else return std::prev(Emitter.getInsertPos()); }; MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos()); Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap); MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos()); // If the iterator did not change, no instructions were inserted. if (Before == After) return nullptr; if (Before == BB->end()) { // There were no prior instructions; the new ones must start at the // beginning of the block. return &Emitter.getBlock()->instr_front(); } else { // Return first instruction after the pre-existing instructions. return &*std::next(Before); } }; // If this is the first BB, emit byval parameter dbg_value's. if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); for (; PDI != PDE; ++PDI) { MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); if (DbgMI) { BB->insert(InsertPos, DbgMI); // We re-emit the dbg_value closer to its use, too, after instructions // are emitted to the BB. (*PDI)->clearIsEmitted(); } } } for (unsigned i = 0, e = Sequence.size(); i != e; i++) { SUnit *SU = Sequence[i]; if (!SU) { // Null SUnit* is a noop. TII->insertNoop(*Emitter.getBlock(), InsertPos); continue; } // For pre-regalloc scheduling, create instructions corresponding to the // SDNode and any glued SDNodes and append them to the block. if (!SU->getNode()) { // Emit a copy. EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos); continue; } SmallVector<SDNode *, 4> GluedNodes; for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { SDNode *N = GluedNodes.back(); auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap); // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); GluedNodes.pop_back(); } auto NewInsn = EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); } // Insert all the dbg_values which have not already been inserted in source // order sequence. if (HasDbg) { MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); // Sort the source order instructions and use the order to insert debug // values. Use stable_sort so that DBG_VALUEs are inserted in the same order // regardless of the host's implementation fo std::sort. std::stable_sort(Orders.begin(), Orders.end(), less_first()); std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(), [](const SDDbgValue *LHS, const SDDbgValue *RHS) { return LHS->getOrder() < RHS->getOrder(); }); SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); // Now emit the rest according to source order. unsigned LastOrder = 0; for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { unsigned Order = Orders[i].first; MachineInstr *MI = Orders[i].second; // Insert all SDDbgValue's whose order(s) are before "Order". assert(MI); for (; DI != DE; ++DI) { if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order) break; if ((*DI)->isEmitted()) continue; MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); if (DbgMI) { if (!LastOrder) // Insert to start of the BB (after PHIs). BB->insert(BBBegin, DbgMI); else { // Insert at the instruction, which may be in a different // block, if the block was split by a custom inserter. MachineBasicBlock::iterator Pos = MI; MI->getParent()->insert(Pos, DbgMI); } } } LastOrder = Order; } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? SmallVector<MachineInstr*, 8> DbgMIs; for (; DI != DE; ++DI) { if ((*DI)->isEmitted()) continue; assert((*DI)->getOrder() >= LastOrder && "emitting DBG_VALUE out of order"); if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) DbgMIs.push_back(DbgMI); } MachineBasicBlock *InsertBB = Emitter.getBlock(); MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin(); SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd(); // Now emit the rest according to source order. LastOrder = 0; for (const auto &InstrOrder : Orders) { unsigned Order = InstrOrder.first; MachineInstr *MI = InstrOrder.second; if (!MI) continue; // Insert all SDDbgLabel's whose order(s) are before "Order". for (; DLI != DLE && (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order; ++DLI) { MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI); if (DbgMI) { if (!LastOrder) // Insert to start of the BB (after PHIs). BB->insert(BBBegin, DbgMI); else { // Insert at the instruction, which may be in a different // block, if the block was split by a custom inserter. MachineBasicBlock::iterator Pos = MI; MI->getParent()->insert(Pos, DbgMI); } } } if (DLI == DLE) break; LastOrder = Order; } } InsertPos = Emitter.getInsertPos(); return Emitter.getBlock(); }
SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( SelectionDAGBuilder::StatepointLoweringInfo &SI) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. NumOfStatepoints++; // Clear state StatepointLowering.startNewStatepoint(*this); #ifndef NDEBUG // We schedule gc relocates before removeDuplicateGCPtrs since we _will_ // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs. for (auto *Reloc : SI.GCRelocates) if (Reloc->getParent() == SI.StatepointInstr->getParent()) StatepointLowering.scheduleRelocCall(*Reloc); #endif // Remove any redundant llvm::Values which map to the same SDValue as another // input. Also has the effect of removing duplicates in the original // llvm::Value input list as well. This is a useful optimization for // reducing the size of the StackMap section. It has no other impact. removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this, FuncInfo.StatepointSpillMaps[SI.StatepointInstr]); assert(SI.Bases.size() == SI.Ptrs.size() && SI.Ptrs.size() == SI.GCRelocates.size()); // Lower statepoint vmstate and gcstate arguments SmallVector<SDValue, 10> LoweredMetaArgs; lowerStatepointMetaArgs(LoweredMetaArgs, SI, *this); // Now that we've emitted the spills, we need to update the root so that the // call sequence is ordered correctly. SI.CLI.setChain(getRoot()); // Get call node, we will replace it later with statepoint SDValue ReturnVal; SDNode *CallNode; std::tie(ReturnVal, CallNode) = lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. // Call Node: Chain, Target, {Args}, RegMask, [Glue] SDValue Chain = CallNode->getOperand(0); SDValue Glue; bool CallHasIncomingGlue = CallNode->getGluedNode(); if (CallHasIncomingGlue) { // Glue is always last operand Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); } // Build the GC_TRANSITION_START node if necessary. // // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the // order in which they appear in the call to the statepoint intrinsic. If // any of the operands is a pointer-typed, that operand is immediately // followed by a SRCVALUE for the pointer that may be used during lowering // (e.g. to form MachinePointerInfo values for loads/stores). const bool IsGCTransition = (SI.StatepointFlags & (uint64_t)StatepointFlags::GCTransition) == (uint64_t)StatepointFlags::GCTransition; if (IsGCTransition) { SmallVector<SDValue, 8> TSOps; // Add chain TSOps.push_back(Chain); // Add GC transition arguments for (const Value *V : SI.GCTransitionArgs) { TSOps.push_back(getValue(V)); if (V->getType()->isPointerTy()) TSOps.push_back(DAG.getSrcValue(V)); } // Add glue if necessary if (CallHasIncomingGlue) TSOps.push_back(Glue); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue GCTransitionStart = DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps); Chain = GCTransitionStart.getValue(0); Glue = GCTransitionStart.getValue(1); } // TODO: Currently, all of these operands are being marked as read/write in // PrologEpilougeInserter.cpp, we should special case the VMState arguments // and flags to be read-only. SmallVector<SDValue, 40> Ops; // Add the <id> and <numBytes> constants. Ops.push_back(DAG.getTargetConstant(SI.ID, getCurSDLoc(), MVT::i64)); Ops.push_back( DAG.getTargetConstant(SI.NumPatchBytes, getCurSDLoc(), MVT::i32)); // Calculate and push starting position of vmstate arguments // Get number of arguments incoming directly into call node unsigned NumCallRegArgs = CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3); Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32)); // Add call target SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0); Ops.push_back(CallTarget); // Add call arguments // Get position of register mask in the call SDNode::op_iterator RegMaskIt; if (CallHasIncomingGlue) RegMaskIt = CallNode->op_end() - 2; else RegMaskIt = CallNode->op_end() - 1; Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); // Add a constant argument for the calling convention pushStackMapConstant(Ops, *this, SI.CLI.CallConv); // Add a constant argument for the flags uint64_t Flags = SI.StatepointFlags; assert(((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) && "Unknown flag used"); pushStackMapConstant(Ops, *this, Flags); // Insert all vmstate and gcstate arguments Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end()); // Add register mask from call node Ops.push_back(*RegMaskIt); // Add chain Ops.push_back(Chain); // Same for the glue, but we add it only if original call had it if (Glue.getNode()) Ops.push_back(Glue); // Compute return values. Provide a glue output since we consume one as // input. This allows someone else to chain off us as needed. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); SDNode *SinkNode = StatepointMCNode; // Build the GC_TRANSITION_END node if necessary. // // See the comment above regarding GC_TRANSITION_START for the layout of // the operands to the GC_TRANSITION_END node. if (IsGCTransition) { SmallVector<SDValue, 8> TEOps; // Add chain TEOps.push_back(SDValue(StatepointMCNode, 0)); // Add GC transition arguments for (const Value *V : SI.GCTransitionArgs) { TEOps.push_back(getValue(V)); if (V->getType()->isPointerTy()) TEOps.push_back(DAG.getSrcValue(V)); } // Add glue TEOps.push_back(SDValue(StatepointMCNode, 1)); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue GCTransitionStart = DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps); SinkNode = GCTransitionStart.getNode(); } // Replace original call DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root // Remove original call node DAG.DeleteNode(CallNode); // DON'T set the root - under the assumption that it's already set past the // inserted node we created. // TODO: A better future implementation would be to emit a single variable // argument, variable return value STATEPOINT node here and then hookup the // return value of each gc.relocate to the respective output of the // previously emitted STATEPOINT value. Unfortunately, this doesn't appear // to actually be possible today. return ReturnVal; }