void ScheduleDAGSDNodes::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, const TargetInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); for (unsigned i = 0; i < II.getNumDefs(); ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. unsigned VRBase = 0; const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, II, i); if (!IsClone && !IsCloned) for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *User = *UI; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; MI->addOperand(MachineOperand::CreateReg(Reg, true)); break; } } } } // Create the result registers for this node and add the result regs to // the machine instruction. if (VRBase == 0) { assert(RC && "Isn't a register operand!"); VRBase = MRI.createVirtualRegister(RC); MI->addOperand(MachineOperand::CreateReg(VRBase, true)); } SDValue Op(Node, i); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; isNew = isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } }
/// getDstOfCopyToRegUse - If the only use of the specified result number of /// node is a CopyToReg, return its destination register. Return 0 otherwise. unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const { if (!Node->hasOneUse()) return 0; SDNode *User = *Node->use_begin(); if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) return Reg; } return 0; }
void AMDGPUDAGToDAGISel::PostprocessISelDAG() { if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { return; } // Go over all selected nodes and try to fold them a bit more const AMDGPUTargetLowering& Lowering = (*(const AMDGPUTargetLowering*)getTargetLowering()); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ++I) { SDNode *Node = I; switch (Node->getOpcode()) { // Fix the register class in copy to CopyToReg nodes - ISel will always // use SReg classes for 64-bit copies, but this is not always what we want. case ISD::CopyToReg: { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); SDValue Val = Node->getOperand(2); const TargetRegisterClass *RC = RegInfo->getRegClass(Reg); if (RC != &AMDGPU::SReg_64RegClass) { continue; } if (!Val.getNode()->isMachineOpcode() || Val.getNode()->getMachineOpcode() == AMDGPU::IMPLICIT_DEF) { continue; } const MCInstrDesc Desc = TM.getInstrInfo()->get(Val.getNode()->getMachineOpcode()); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); RegInfo->setRegClass(Reg, TRI->getRegClass(Desc.OpInfo[0].RegClass)); continue; } } MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I); if (!MachineNode) continue; SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); if (ResNode != Node) { ReplaceUses(Node, ResNode); } } }
void ScheduleDAGLinearize::Schedule() { LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n"); SmallVector<SDNode*, 8> Glues; unsigned DAGSize = 0; for (SDNode &Node : DAG->allnodes()) { SDNode *N = &Node; // Use node id to record degree. unsigned Degree = N->use_size(); N->setNodeId(Degree); unsigned NumVals = N->getNumValues(); if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && N->hasAnyUseOfValue(NumVals-1)) { SDNode *User = findGluedUser(N); if (User) { Glues.push_back(N); GluedMap.insert(std::make_pair(N, User)); } } if (N->isMachineOpcode() || (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) ++DAGSize; } for (unsigned i = 0, e = Glues.size(); i != e; ++i) { SDNode *Glue = Glues[i]; SDNode *GUser = GluedMap[Glue]; unsigned Degree = Glue->getNodeId(); unsigned UDegree = GUser->getNodeId(); // Glue user must be scheduled together with the glue operand. So other // users of the glue operand must be treated as its users. SDNode *ImmGUser = Glue->getGluedUser(); for (const SDNode *U : Glue->uses()) if (U == ImmGUser) --Degree; GUser->setNodeId(UDegree + Degree); Glue->setNodeId(1); } Sequence.reserve(DAGSize); ScheduleNode(DAG->getRoot().getNode()); }
// Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. SDNode *IA64DAGToDAGISel::Select(SDValue Op) { SDNode *N = Op.getNode(); if (N->isMachineOpcode()) return NULL; // Already selected. DebugLoc dl = Op.getDebugLoc(); switch (N->getOpcode()) { default: break; case IA64ISD::BRCALL: { // XXX: this is also a hack! SDValue Chain = N->getOperand(0); SDValue InFlag; // Null incoming flag value. if(N->getNumOperands()==3) { // we have an incoming chain, callee and flag InFlag = N->getOperand(2); } unsigned CallOpcode; SDValue CallOperand; // if we can call directly, do so if (GlobalAddressSDNode *GASD = dyn_cast<GlobalAddressSDNode>(N->getOperand(1))) { CallOpcode = IA64::BRCALL_IPREL_GA; CallOperand = CurDAG->getTargetGlobalAddress(GASD->getGlobal(), MVT::i64); } else if (isa<ExternalSymbolSDNode>(N->getOperand(1))) { // FIXME: we currently NEED this case for correctness, to avoid // "non-pic code with imm reloc.n against dynamic symbol" errors CallOpcode = IA64::BRCALL_IPREL_ES; CallOperand = N->getOperand(1); } else { // otherwise we need to load the function descriptor, // load the branch target (function)'s entry point and GP, // branch (call) then restore the GP SDValue FnDescriptor = N->getOperand(1); // load the branch target's entry point [mem] and // GP value [mem+8] SDValue targetEntryPoint= SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other, FnDescriptor, CurDAG->getEntryNode()), 0); Chain = targetEntryPoint.getValue(1); SDValue targetGPAddr= SDValue(CurDAG->getTargetNode(IA64::ADDS, dl, MVT::i64, FnDescriptor, CurDAG->getConstant(8, MVT::i64)), 0); Chain = targetGPAddr.getValue(1); SDValue targetGP = SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64,MVT::Other, targetGPAddr, CurDAG->getEntryNode()), 0); Chain = targetGP.getValue(1); Chain = CurDAG->getCopyToReg(Chain, dl, IA64::r1, targetGP, InFlag); InFlag = Chain.getValue(1); Chain = CurDAG->getCopyToReg(Chain, dl, IA64::B6, targetEntryPoint, InFlag); // FLAG these? InFlag = Chain.getValue(1); CallOperand = CurDAG->getRegister(IA64::B6, MVT::i64); CallOpcode = IA64::BRCALL_INDIRECT; } // Finally, once everything is setup, emit the call itself if (InFlag.getNode()) Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other, MVT::Flag, CallOperand, InFlag), 0); else // there might be no arguments Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other, MVT::Flag, CallOperand, Chain), 0); InFlag = Chain.getValue(1); std::vector<SDValue> CallResults; CallResults.push_back(Chain); CallResults.push_back(InFlag); for (unsigned i = 0, e = CallResults.size(); i != e; ++i) ReplaceUses(Op.getValue(i), CallResults[i]); return NULL; } case IA64ISD::GETFD: { SDValue Input = N->getOperand(0); return CurDAG->getTargetNode(IA64::GETFD, dl, MVT::i64, Input); } case ISD::FDIV: case ISD::SDIV: case ISD::UDIV: case ISD::SREM: case ISD::UREM: return SelectDIV(Op); case ISD::TargetConstantFP: { SDValue Chain = CurDAG->getEntryNode(); // this is a constant, so.. SDValue V; ConstantFPSDNode* N2 = cast<ConstantFPSDNode>(N); if (N2->getValueAPF().isPosZero()) { V = CurDAG->getCopyFromReg(Chain, dl, IA64::F0, MVT::f64); } else if (N2->isExactlyValue(N2->getValueType(0) == MVT::f32 ? APFloat(+1.0f) : APFloat(+1.0))) { V = CurDAG->getCopyFromReg(Chain, dl, IA64::F1, MVT::f64); } else assert(0 && "Unexpected FP constant!"); ReplaceUses(SDValue(N, 0), V); return 0; } case ISD::FrameIndex: { // TODO: reduce creepyness int FI = cast<FrameIndexSDNode>(N)->getIndex(); if (N->hasOneUse()) return CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64, CurDAG->getTargetFrameIndex(FI, MVT::i64)); else return CurDAG->getTargetNode(IA64::MOV, dl, MVT::i64, CurDAG->getTargetFrameIndex(FI, MVT::i64)); } case ISD::ConstantPool: { // TODO: nuke the constant pool // (ia64 doesn't need one) ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); Constant *C = CP->getConstVal(); SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlignment()); return CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64, // ? CurDAG->getRegister(IA64::r1, MVT::i64), CPI); } case ISD::GlobalAddress: { GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal(); SDValue GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64); SDValue Tmp = SDValue(CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64, CurDAG->getRegister(IA64::r1, MVT::i64), GA), 0); return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other, Tmp, CurDAG->getEntryNode()); } /* XXX case ISD::ExternalSymbol: { SDValue EA = CurDAG->getTargetExternalSymbol( cast<ExternalSymbolSDNode>(N)->getSymbol(), MVT::i64); SDValue Tmp = CurDAG->getTargetNode(IA64::ADDL_EA, dl, MVT::i64, CurDAG->getRegister(IA64::r1, MVT::i64), EA); return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, Tmp); } */ case ISD::LOAD: { // FIXME: load -1, not 1, for bools? LoadSDNode *LD = cast<LoadSDNode>(N); SDValue Chain = LD->getChain(); SDValue Address = LD->getBasePtr(); MVT TypeBeingLoaded = LD->getMemoryVT(); unsigned Opc; switch (TypeBeingLoaded.getSimpleVT()) { default: #ifndef NDEBUG N->dump(CurDAG); #endif assert(0 && "Cannot load this type!"); case MVT::i1: { // this is a bool Opc = IA64::LD1; // first we load a byte, then compare for != 0 if(N->getValueType(0) == MVT::i1) { // XXX: early exit! return CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other, SDValue(CurDAG->getTargetNode(Opc, dl, MVT::i64, Address), 0), CurDAG->getRegister(IA64::r0, MVT::i64), Chain); } /* otherwise, we want to load a bool into something bigger: LD1 will do that for us, so we just fall through */ } case MVT::i8: Opc = IA64::LD1; break; case MVT::i16: Opc = IA64::LD2; break; case MVT::i32: Opc = IA64::LD4; break; case MVT::i64: Opc = IA64::LD8; break; case MVT::f32: Opc = IA64::LDF4; break; case MVT::f64: Opc = IA64::LDF8; break; } // TODO: comment this return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other, Address, Chain); } case ISD::STORE: { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Address = ST->getBasePtr(); SDValue Chain = ST->getChain(); unsigned Opc; if (ISD::isNON_TRUNCStore(N)) { switch (N->getOperand(1).getValueType().getSimpleVT()) { default: assert(0 && "unknown type in store"); case MVT::i1: { // this is a bool Opc = IA64::ST1; // we store either 0 or 1 as a byte // first load zero! SDValue Initial = CurDAG->getCopyFromReg(Chain, dl, IA64::r0, MVT::i64); Chain = Initial.getValue(1); // then load 1 into the same reg iff the predicate to store is 1 SDValue Tmp = ST->getValue(); Tmp = SDValue(CurDAG->getTargetNode(IA64::TPCADDS, dl, MVT::i64, Initial, CurDAG->getTargetConstant(1, MVT::i64), Tmp), 0); return CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address, Tmp, Chain); } case MVT::i64: Opc = IA64::ST8; break; case MVT::f64: Opc = IA64::STF8; break; } } else { // Truncating store switch(ST->getMemoryVT().getSimpleVT()) { default: assert(0 && "unknown type in truncstore"); case MVT::i8: Opc = IA64::ST1; break; case MVT::i16: Opc = IA64::ST2; break; case MVT::i32: Opc = IA64::ST4; break; case MVT::f32: Opc = IA64::STF4; break; } } SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); return CurDAG->SelectNodeTo(N, Opc, MVT::Other, N2, N1, Chain); } case ISD::BRCOND: { SDValue Chain = N->getOperand(0); SDValue CC = N->getOperand(1); MachineBasicBlock *Dest = cast<BasicBlockSDNode>(N->getOperand(2))->getBasicBlock(); //FIXME - we do NOT need long branches all the time return CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC, CurDAG->getBasicBlock(Dest), Chain); } case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: { int64_t Amt = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); unsigned Opc = N->getOpcode() == ISD::CALLSEQ_START ? IA64::ADJUSTCALLSTACKDOWN : IA64::ADJUSTCALLSTACKUP; SDValue N0 = N->getOperand(0); return CurDAG->SelectNodeTo(N, Opc, MVT::Other, getI64Imm(Amt), N0); } case ISD::BR: // FIXME: we don't need long branches all the time! SDValue N0 = N->getOperand(0); return CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other, N->getOperand(1), N0); } return SelectCode(Op); }
/// EmitSubregNode - Generate machine code for subreg nodes. /// void InstrEmitter::EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsClone, bool IsCloned) { unsigned VRBase = 0; unsigned Opc = Node->getMachineOpcode(); // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *User = *UI; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(DestReg)) { VRBase = DestReg; break; } } } if (Opc == TargetOpcode::EXTRACT_SUBREG) { // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no // constraints on the %dst register, COPY can target all legal register // classes. unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getValueType(0)); unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); MachineInstr *DefMI = MRI->getVRegDef(VReg); unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && SubIdx == DefSubIdx) { // Optimize these: // r1025 = s/zext r1024, 4 // r1026 = extract_subreg r1025, 4 // to a copy // r1026 = copy r1024 VRBase = MRI->createVirtualRegister(TRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); } else { // VReg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. VReg = ConstrainForSubReg(VReg, SubIdx, Node->getOperand(0).getValueType(), Node->getDebugLoc()); // Create the destreg if it is missing. if (VRBase == 0) VRBase = MRI->createVirtualRegister(TRC); // Create the extract_subreg machine instruction. BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx); } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); SDValue N2 = Node->getOperand(2); unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); // Figure out the register class to create for the destreg. It should be // the largest legal register class supporting SubIdx sub-registers. // RegisterCoalescer will constrain it further if it decides to eliminate // the INSERT_SUBREG instruction. // // %dst = INSERT_SUBREG %src, %sub, SubIdx // // is lowered by TwoAddressInstructionPass to: // // %dst = COPY %src // %dst:SubIdx = COPY %sub // // There is no constraint on the %src register class. // const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getValueType(0)); SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase))) VRBase = MRI->createVirtualRegister(SRC); // Create the insert_subreg or subreg_to_reg machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc)); MI->addOperand(MachineOperand::CreateReg(VRBase, true)); // If creating a subreg_to_reg, then the first input operand // is an implicit value immediate, otherwise it's a register if (Opc == TargetOpcode::SUBREG_TO_REG) { const ConstantSDNode *SD = cast<ConstantSDNode>(N0); MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); } else AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add the subregster being inserted AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MI->addOperand(MachineOperand::CreateImm(SubIdx)); MBB->insert(InsertPos, MI); } else llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); }
/// Returns single number reflecting benefit of scheduling SU /// in the current cycle. signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { // Initial trivial priority. signed ResCount = 1; // Do not waste time on a node that is already scheduled. if (SU->isScheduled) return ResCount; // Forced priority is high. if (SU->isScheduleHigh) ResCount += PriorityOne; // Adaptable scheduling // A small, but very parallel // region, where reg pressure is an issue. if (HorizontalVerticalBalance > RegPressureThreshold) { // Critical path first ResCount += (SU->getHeight() * ScaleTwo); // If resources are available for it, multiply the // chance of scheduling. if (isResourceAvailable(SU)) ResCount <<= FactorOne; // Consider change to reg pressure from scheduling // this SU. ResCount -= (regPressureDelta(SU,true) * ScaleOne); } // Default heuristic, greeady and // critical path driven. else { // Critical path first. ResCount += (SU->getHeight() * ScaleTwo); // Now see how many instructions is blocked by this SU. ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); // If resources are available for it, multiply the // chance of scheduling. if (isResourceAvailable(SU)) ResCount <<= FactorOne; ResCount -= (regPressureDelta(SU) * ScaleTwo); } // These are platform specific things. // Will need to go into the back end // and accessed from here via a hook. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { if (N->isMachineOpcode()) { const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); if (TID.isCall()) ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); } else switch (N->getOpcode()) { default: break; case ISD::TokenFactor: case ISD::CopyFromReg: case ISD::CopyToReg: ResCount += PriorityFive; break; case ISD::INLINEASM: ResCount += PriorityFour; break; } } return ResCount; }
SDNode *IA64DAGToDAGISel::SelectDIV(SDValue Op) { SDNode *N = Op.getNode(); SDValue Chain = N->getOperand(0); SDValue Tmp1 = N->getOperand(0); SDValue Tmp2 = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); bool isFP=false; if(Tmp1.getValueType().isFloatingPoint()) isFP=true; bool isModulus=false; // is it a division or a modulus? bool isSigned=false; switch(N->getOpcode()) { case ISD::FDIV: case ISD::SDIV: isModulus=false; isSigned=true; break; case ISD::UDIV: isModulus=false; isSigned=false; break; case ISD::FREM: case ISD::SREM: isModulus=true; isSigned=true; break; case ISD::UREM: isModulus=true; isSigned=false; break; } // TODO: check for integer divides by powers of 2 (or other simple patterns?) SDValue TmpPR, TmpPR2; SDValue TmpF1, TmpF2, TmpF3, TmpF4, TmpF5, TmpF6, TmpF7, TmpF8; SDValue TmpF9, TmpF10,TmpF11,TmpF12,TmpF13,TmpF14,TmpF15; SDNode *Result; // we'll need copies of F0 and F1 SDValue F0 = CurDAG->getRegister(IA64::F0, MVT::f64); SDValue F1 = CurDAG->getRegister(IA64::F1, MVT::f64); // OK, emit some code: if(!isFP) { // first, load the inputs into FP regs. TmpF1 = SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, Tmp1), 0); Chain = TmpF1.getValue(1); TmpF2 = SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, Tmp2), 0); Chain = TmpF2.getValue(1); // next, convert the inputs to FP if(isSigned) { TmpF3 = SDValue(CurDAG->getTargetNode(IA64::FCVTXF, dl, MVT::f64, TmpF1), 0); Chain = TmpF3.getValue(1); TmpF4 = SDValue(CurDAG->getTargetNode(IA64::FCVTXF, dl, MVT::f64, TmpF2), 0); Chain = TmpF4.getValue(1); } else { // is unsigned TmpF3 = SDValue(CurDAG->getTargetNode(IA64::FCVTXUFS1, dl, MVT::f64, TmpF1), 0); Chain = TmpF3.getValue(1); TmpF4 = SDValue(CurDAG->getTargetNode(IA64::FCVTXUFS1, dl, MVT::f64, TmpF2), 0); Chain = TmpF4.getValue(1); } } else { // this is an FP divide/remainder, so we 'leak' some temp // regs and assign TmpF3=Tmp1, TmpF4=Tmp2 TmpF3=Tmp1; TmpF4=Tmp2; } // we start by computing an approximate reciprocal (good to 9 bits?) // note, this instruction writes _both_ TmpF5 (answer) and TmpPR (predicate) if(isFP) TmpF5 = SDValue(CurDAG->getTargetNode(IA64::FRCPAS0, dl, MVT::f64, MVT::i1, TmpF3, TmpF4), 0); else TmpF5 = SDValue(CurDAG->getTargetNode(IA64::FRCPAS1, dl, MVT::f64, MVT::i1, TmpF3, TmpF4), 0); TmpPR = TmpF5.getValue(1); Chain = TmpF5.getValue(2); SDValue minusB; if(isModulus) { // for remainders, it'll be handy to have // copies of -input_b minusB = SDValue(CurDAG->getTargetNode(IA64::SUB, dl, MVT::i64, CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2), 0); Chain = minusB.getValue(1); } SDValue TmpE0, TmpY1, TmpE1, TmpY2; SDValue OpsE0[] = { TmpF4, TmpF5, F1, TmpPR }; TmpE0 = SDValue(CurDAG->getTargetNode(IA64::CFNMAS1, dl, MVT::f64, OpsE0, 4), 0); Chain = TmpE0.getValue(1); SDValue OpsY1[] = { TmpF5, TmpE0, TmpF5, TmpPR }; TmpY1 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, OpsY1, 4), 0); Chain = TmpY1.getValue(1); SDValue OpsE1[] = { TmpE0, TmpE0, F0, TmpPR }; TmpE1 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, OpsE1, 4), 0); Chain = TmpE1.getValue(1); SDValue OpsY2[] = { TmpY1, TmpE1, TmpY1, TmpPR }; TmpY2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, OpsY2, 4), 0); Chain = TmpY2.getValue(1); if(isFP) { // if this is an FP divide, we finish up here and exit early if(isModulus) assert(0 && "Sorry, try another FORTRAN compiler."); SDValue TmpE2, TmpY3, TmpQ0, TmpR0; SDValue OpsE2[] = { TmpE1, TmpE1, F0, TmpPR }; TmpE2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, OpsE2, 4), 0); Chain = TmpE2.getValue(1); SDValue OpsY3[] = { TmpY2, TmpE2, TmpY2, TmpPR }; TmpY3 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, OpsY3, 4), 0); Chain = TmpY3.getValue(1); SDValue OpsQ0[] = { Tmp1, TmpY3, F0, TmpPR }; TmpQ0 = SDValue(CurDAG->getTargetNode(IA64::CFMADS1, dl, // double prec! MVT::f64, OpsQ0, 4), 0); Chain = TmpQ0.getValue(1); SDValue OpsR0[] = { Tmp2, TmpQ0, Tmp1, TmpPR }; TmpR0 = SDValue(CurDAG->getTargetNode(IA64::CFNMADS1, dl, // double prec! MVT::f64, OpsR0, 4), 0); Chain = TmpR0.getValue(1); // we want Result to have the same target register as the frcpa, so // we two-address hack it. See the comment "for this to work..." on // page 48 of Intel application note #245415 SDValue Ops[] = { TmpF5, TmpY3, TmpR0, TmpQ0, TmpPR }; Result = CurDAG->getTargetNode(IA64::TCFMADS0, dl, // d.p. s0 rndg! MVT::f64, Ops, 5); Chain = SDValue(Result, 1); return Result; // XXX: early exit! } else { // this is *not* an FP divide, so there's a bit left to do: SDValue TmpQ2, TmpR2, TmpQ3, TmpQ; SDValue OpsQ2[] = { TmpF3, TmpY2, F0, TmpPR }; TmpQ2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, OpsQ2, 4), 0); Chain = TmpQ2.getValue(1); SDValue OpsR2[] = { TmpF4, TmpQ2, TmpF3, TmpPR }; TmpR2 = SDValue(CurDAG->getTargetNode(IA64::CFNMAS1, dl, MVT::f64, OpsR2, 4), 0); Chain = TmpR2.getValue(1); // we want TmpQ3 to have the same target register as the frcpa? maybe we // should two-address hack it. See the comment "for this to work..." on page // 48 of Intel application note #245415 SDValue OpsQ3[] = { TmpF5, TmpR2, TmpY2, TmpQ2, TmpPR }; TmpQ3 = SDValue(CurDAG->getTargetNode(IA64::TCFMAS1, dl, MVT::f64, OpsQ3, 5), 0); Chain = TmpQ3.getValue(1); // STORY: without these two-address instructions (TCFMAS1 and TCFMADS0) // the FPSWA won't be able to help out in the case of large/tiny // arguments. Other fun bugs may also appear, e.g. 0/x = x, not 0. if(isSigned) TmpQ = SDValue(CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1, dl, MVT::f64, TmpQ3), 0); else TmpQ = SDValue(CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1, dl, MVT::f64, TmpQ3), 0); Chain = TmpQ.getValue(1); if(isModulus) { SDValue FPminusB = SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, minusB), 0); Chain = FPminusB.getValue(1); SDValue Remainder = SDValue(CurDAG->getTargetNode(IA64::XMAL, dl, MVT::f64, TmpQ, FPminusB, TmpF1), 0); Chain = Remainder.getValue(1); Result = CurDAG->getTargetNode(IA64::GETFSIG, dl, MVT::i64, Remainder); Chain = SDValue(Result, 1); } else { // just an integer divide Result = CurDAG->getTargetNode(IA64::GETFSIG, dl, MVT::i64, TmpQ); Chain = SDValue(Result, 1); } return Result; } // wasn't an FP divide }
/// EmitSubregNode - Generate machine code for subreg nodes. /// void ScheduleDAGSDNodes::EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned VRBase = 0; unsigned Opc = Node->getMachineOpcode(); // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *User = *UI; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(DestReg)) { VRBase = DestReg; break; } } } if (Opc == TargetInstrInfo::EXTRACT_SUBREG) { unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); // Create the extract_subreg machine instruction. MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), TII->get(TargetInstrInfo::EXTRACT_SUBREG)); // Figure out the register class to create for the destreg. unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); const TargetRegisterClass *TRC = MRI.getRegClass(VReg); const TargetRegisterClass *SRC = getSubRegisterRegClass(TRC, SubIdx); // Figure out the register class to create for the destreg. // Note that if we're going to directly use an existing register, // it must be precisely the required class, and not a subclass // thereof. if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) { // Create the reg assert(SRC && "Couldn't find source register class"); VRBase = MRI.createVirtualRegister(SRC); } // Add def, source, and subreg index MI->addOperand(MachineOperand::CreateReg(VRBase, true)); AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap); MI->addOperand(MachineOperand::CreateImm(SubIdx)); BB->insert(InsertPos, MI); } else if (Opc == TargetInstrInfo::INSERT_SUBREG || Opc == TargetInstrInfo::SUBREG_TO_REG) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); SDValue N2 = Node->getOperand(2); unsigned SubReg = getVR(N1, VRBaseMap); unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); const TargetRegisterClass *TRC = MRI.getRegClass(SubReg); const TargetRegisterClass *SRC = getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); // Figure out the register class to create for the destreg. // Note that if we're going to directly use an existing register, // it must be precisely the required class, and not a subclass // thereof. if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) { // Create the reg assert(SRC && "Couldn't find source register class"); VRBase = MRI.createVirtualRegister(SRC); } // Create the insert_subreg or subreg_to_reg machine instruction. MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), TII->get(Opc)); MI->addOperand(MachineOperand::CreateReg(VRBase, true)); // If creating a subreg_to_reg, then the first input operand // is an implicit value immediate, otherwise it's a register if (Opc == TargetInstrInfo::SUBREG_TO_REG) { const ConstantSDNode *SD = cast<ConstantSDNode>(N0); MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); } else AddOperand(MI, N0, 0, 0, VRBaseMap); // Add the subregster being inserted AddOperand(MI, N1, 0, 0, VRBaseMap); MI->addOperand(MachineOperand::CreateImm(SubIdx)); BB->insert(InsertPos, MI); } else assert(0 && "Node is not insert_subreg, extract_subreg, or subreg_to_reg"); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; isNew = isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); }
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned VRBase = 0; if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Just use the input register directly! SDValue Op(Node, ResNo); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; isNew = isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); return; } // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; const TargetRegisterClass *UseRC = NULL; if (!IsClone && !IsCloned) for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *User = *UI; bool Match = true; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(DestReg)) { VRBase = DestReg; Match = false; } else if (DestReg != SrcReg) Match = false; } else { for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { SDValue Op = User->getOperand(i); if (Op.getNode() != Node || Op.getResNo() != ResNo) continue; MVT VT = Node->getValueType(Op.getResNo()); if (VT == MVT::Other || VT == MVT::Flag) continue; Match = false; if (User->isMachineOpcode()) { const TargetInstrDesc &II = TII->get(User->getMachineOpcode()); const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, II, i+II.getNumDefs()); if (!UseRC) UseRC = RC; else if (RC) { if (UseRC->hasSuperClass(RC)) UseRC = RC; else assert((UseRC == RC || RC->hasSuperClass(UseRC)) && "Multiple uses expecting different register classes!"); } } } } MatchReg &= Match; if (VRBase) break; } MVT VT = Node->getValueType(ResNo); const TargetRegisterClass *SrcRC = 0, *DstRC = 0; SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT); // Figure out the register class to create for the destreg. if (VRBase) { DstRC = MRI.getRegClass(VRBase); } else if (UseRC) { assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); DstRC = UseRC; } else { DstRC = TLI->getRegClassFor(VT); } // If all uses are reading from the src physical register and copying the // register is either impossible or very expensive, then don't create a copy. if (MatchReg && SrcRC->getCopyCost() < 0) { VRBase = SrcReg; } else { // Create the reg, emit the copy. VRBase = MRI.createVirtualRegister(DstRC); bool Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg, DstRC, SrcRC); // If the target didn't handle the copy with different register // classes and the destination is a subset of the source, // try a normal same-RC copy. if (!Emitted && DstRC->hasSuperClass(SrcRC)) Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg, SrcRC, SrcRC); assert(Emitted && "Unable to issue a copy instruction!\n"); } SDValue Op(Node, ResNo); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; isNew = isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); }
/// Return true if this node is so simple that we should just print it inline /// if it appears as an operand. static bool shouldPrintInline(const SDNode &Node) { if (Node.getOpcode() == ISD::EntryToken) return false; return Node.getNumOperands() == 0; }
SDNode *XCoreDAGToDAGISel::Select(SDValue Op) { SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); EVT NVT = N->getValueType(0); if (NVT == MVT::i32) { switch (N->getOpcode()) { default: break; case ISD::Constant: { if (Predicate_immMskBitp(N)) { SDValue MskSize = Transform_msksize_xform(N); return CurDAG->getTargetNode(XCore::MKMSK_rus, dl, MVT::i32, MskSize); } else if (! Predicate_immU16(N)) { unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); SDValue CPIdx = CurDAG->getTargetConstantPool(ConstantInt::get( Type::getInt32Ty(*CurDAG->getContext()), Val), TLI.getPointerTy()); return CurDAG->getTargetNode(XCore::LDWCP_lru6, dl, MVT::i32, MVT::Other, CPIdx, CurDAG->getEntryNode()); } break; } case ISD::SMUL_LOHI: { // FIXME fold addition into the macc instruction if (!Subtarget.isXS1A()) { SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32, CurDAG->getTargetConstant(0, MVT::i32)), 0); SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) }; SDNode *ResNode = CurDAG->getTargetNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32, Ops, 4); ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1)); ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0)); return NULL; } break; } case ISD::UMUL_LOHI: { // FIXME fold addition into the macc / lmul instruction SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32, CurDAG->getTargetConstant(0, MVT::i32)), 0); SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), Zero, Zero }; SDNode *ResNode = CurDAG->getTargetNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32, Ops, 4); ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1)); ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0)); return NULL; } case XCoreISD::LADD: { if (!Subtarget.isXS1A()) { SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), Op.getOperand(2) }; return CurDAG->getTargetNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, Ops, 3); } break; } case XCoreISD::LSUB: { if (!Subtarget.isXS1A()) { SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), Op.getOperand(2) }; return CurDAG->getTargetNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, Ops, 3); } break; } // Other cases are autogenerated. } } return SelectCode(Op); }
/// EmitMachineNode - Generate machine code for a target-specific node and /// needed dependencies. /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially if (Opc == TargetOpcode::EXTRACT_SUBREG || Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); return; } // Handle COPY_TO_REGCLASS specially. if (Opc == TargetOpcode::COPY_TO_REGCLASS) { EmitCopyToRegClassNode(Node, VRBaseMap); return; } // Handle REG_SEQUENCE specially. if (Opc == TargetOpcode::REG_SEQUENCE) { EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); return; } if (Opc == TargetOpcode::IMPLICIT_DEF) // We want a unique VR for each IMPLICIT_DEF use. return; const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NumDefs = II.getNumDefs(); const MCPhysReg *ScratchRegs = nullptr; // Handle STACKMAP and PATCHPOINT specially and then use the generic code. if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { // Stackmaps do not have arguments and do not preserve their calling // convention. However, to simplify runtime support, they clobber the same // scratch registers as AnyRegCC. unsigned CC = CallingConv::AnyReg; if (Opc == TargetOpcode::PATCHPOINT) { CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); NumDefs = NumResults; } ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); } unsigned NumImpUses = 0; unsigned NodeOperands = countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) assert(NumMIOperands >= II.getNumOperands() && "Too few operands for a variadic node!"); else assert(NumMIOperands >= II.getNumOperands() && NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + NumImpUses && "#operands for dag node doesn't match .td file!"); #endif // Create the new machine instruction. MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II); // Add result register values for things that are defined by this // instruction. if (NumResults) { CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap); // Transfer any IR flags from the SDNode to the MachineInstr MachineInstr *MI = MIB.getInstr(); const SDNodeFlags Flags = Node->getFlags(); if (Flags.hasNoSignedZeros()) MI->setFlag(MachineInstr::MIFlag::FmNsz); if (Flags.hasAllowReciprocal()) MI->setFlag(MachineInstr::MIFlag::FmArcp); if (Flags.hasNoNaNs()) MI->setFlag(MachineInstr::MIFlag::FmNoNans); if (Flags.hasNoInfs()) MI->setFlag(MachineInstr::MIFlag::FmNoInfs); if (Flags.hasAllowContract()) MI->setFlag(MachineInstr::MIFlag::FmContract); if (Flags.hasApproximateFuncs()) MI->setFlag(MachineInstr::MIFlag::FmAfn); if (Flags.hasAllowReassociation()) MI->setFlag(MachineInstr::MIFlag::FmReassoc); } // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. bool HasOptPRefs = NumDefs > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add scratch registers as implicit def and early clobber if (ScratchRegs) for (unsigned i = 0; ScratchRegs[i]; ++i) MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine | RegState::EarlyClobber); // Transfer all of the memory reference descriptions of this instruction. MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); // Insert the instruction into position in the block. This needs to // happen before any custom inserter hook is called so that the // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MIB); // The MachineInstr may also define physregs instead of virtregs. These // physreg values can reach other instructions in different ways: // // 1. When there is a use of a Node value beyond the explicitly defined // virtual registers, we emit a CopyFromReg for one of the implicitly // defined physregs. This only happens when HasPhysRegOuts is true. // // 2. A CopyFromReg reading a physreg may be glued to this instruction. // // 3. A glued instruction may implicitly use a physreg. // // 4. A glued instruction may use a RegisterSDNode operand. // // Collect all the used physreg defs, and make sure that any unused physreg // defs are marked as dead. SmallVector<unsigned, 8> UsedRegs; // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = NumDefs; i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. UsedRegs.push_back(Reg); EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); } } // Scan the glue chain for any used physregs. if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { if (F->getOpcode() == ISD::CopyFromReg) { UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); continue; } else if (F->getOpcode() == ISD::CopyToReg) { // Skip CopyToReg nodes that are internal to the glue chain. continue; } // Collect declared implicit uses. const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); UsedRegs.append(MCID.getImplicitUses(), MCID.getImplicitUses() + MCID.getNumImplicitUses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { unsigned Reg = R->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) UsedRegs.push_back(Reg); } } } // Finally mark unused registers as dead. if (!UsedRegs.empty() || II.getImplicitDefs()) MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. if (II.hasPostISelHook()) TLI->AdjustInstrPostInstrSelection(*MIB, Node); }
void ScheduleDAGSDNodes::AddSchedEdges() { const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); // Check to see if the scheduler cares about latencies. bool UnitLatencies = forceUnitLatencies(); // Pass 2: add the preds, succs, etc. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { SUnit *SU = &SUnits[su]; SDNode *MainNode = SU->getNode(); if (MainNode->isMachineOpcode()) { unsigned Opc = MainNode->getMachineOpcode(); const MCInstrDesc &MCID = TII->get(Opc); for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { SU->isTwoAddress = true; break; } } if (MCID.isCommutable()) SU->isCommutable = true; } // Find all predecessors and successors of the group. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).getImplicitDefs()) { SU->hasPhysRegClobbers = true; unsigned NumUsed = InstrEmitter::CountResults(N); while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) --NumUsed; // Skip over unused values at the end. if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) SU->hasPhysRegDefs = true; } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *OpN = N->getOperand(i).getNode(); if (isPassiveNode(OpN)) continue; // Not scheduled. SUnit *OpSU = &SUnits[OpN->getNodeId()]; assert(OpSU && "Node has no SUnit!"); if (OpSU == SU) continue; // In the same group. EVT OpVT = N->getOperand(i).getValueType(); assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); bool isChain = OpVT == MVT::Other; unsigned PhysReg = 0; int Cost = 1; // Determine if this is a physical register dependency. CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); assert((PhysReg == 0 || !isChain) && "Chain dependence via physreg data?"); // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler // emits a copy from the physical register to a virtual register unless // it requires a cross class copy (cost < 0). That means we are only // treating "expensive to copy" register dependency as physical register // dependency. This may change in the future though. if (Cost >= 0 && !StressSched) PhysReg = 0; // If this is a ctrl dep, latency is 1. unsigned OpLatency = isChain ? 1 : OpSU->Latency; // Special-case TokenFactor chains as zero-latency. if(isChain && OpN->getOpcode() == ISD::TokenFactor) OpLatency = 0; const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { // Multiple register uses are combined in the same SUnit. For example, // we could have a set of glued nodes with all their defs consumed by // another set of glued nodes. Register pressure tracking sees this as // a single use, so to keep pressure balanced we reduce the defs. // // We can't tell (without more book-keeping) if this results from // glued nodes or duplicate operands. As long as we don't reduce // NumRegDefsLeft to zero, we handle the common cases well. --OpSU->NumRegDefsLeft; } } } } }
/// Extract call from statepoint, lower it and return pointer to the /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result static SDNode * lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) { ImmutableCallSite CS(ISP.getCallSite()); SDValue ActualCallee = Builder.getValue(ISP.getCalledValue()); assert(CS.getCallingConv() != CallingConv::AnyReg && "anyregcc is not supported on statepoints!"); Type *DefTy = ISP.getActualReturnType(); bool HasDef = !DefTy->isVoidTy(); SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands( ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos, ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad, false /* IsPatchPoint */); SDNode *CallEnd = CallEndVal.getNode(); // Get a call instruction from the call sequence chain. Tail calls are not // allowed. The following code is essentially reverse engineering X86's // LowerCallTo. // // We are expecting DAG to have the following form: // // ch = eh_label (only in case of invoke statepoint) // ch, glue = callseq_start ch // ch, glue = X86::Call ch, glue // ch, glue = callseq_end ch, glue // get_return_value ch, glue // // get_return_value can either be a CopyFromReg to grab the return value from // %RAX, or it can be a LOAD to load a value returned by reference via a stack // slot. if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg || CallEnd->getOpcode() == ISD::LOAD)) CallEnd = CallEnd->getOperand(0).getNode(); assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); if (HasDef) { if (CS.isInvoke()) { // Result value will be used in different basic block for invokes // so we need to export it now. But statepoint call has a different type // than the actuall call. It means that standart exporting mechanism will // create register of the wrong type. So instead we need to create // register with correct type and save value into it manually. // TODO: To eliminate this problem we can remove gc.result intrinsics // completelly and make statepoint call to return a tuple. unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); RegsForValue RFV( *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(), Builder.DAG.getDataLayout(), Reg, ISP.getActualReturnType()); SDValue Chain = Builder.DAG.getEntryNode(); RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain, nullptr); PendingExports.push_back(Chain); Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg; } else { // The value of the statepoint itself will be the value of call itself. // We'll replace the actually call node shortly. gc_result will grab // this value. Builder.setValue(CS.getInstruction(), ReturnValue); } } else { // The token value is never used from here on, just generate a poison value Builder.setValue(CS.getInstruction(), Builder.DAG.getIntPtrConstant(-1, Builder.getCurSDLoc())); } return CallEnd->getOperand(0).getNode(); }
/// EmitMachineNode - Generate machine code for a target-specific node and /// needed dependencies. /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially if (Opc == TargetOpcode::EXTRACT_SUBREG || Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); return; } // Handle COPY_TO_REGCLASS specially. if (Opc == TargetOpcode::COPY_TO_REGCLASS) { EmitCopyToRegClassNode(Node, VRBaseMap); return; } // Handle REG_SEQUENCE specially. if (Opc == TargetOpcode::REG_SEQUENCE) { EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); return; } if (Opc == TargetOpcode::IMPLICIT_DEF) // We want a unique VR for each IMPLICIT_DEF use. return; const TargetInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NodeOperands = CountOperands(Node); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) assert(NumMIOperands >= II.getNumOperands() && "Too few operands for a variadic node!"); else assert(NumMIOperands >= II.getNumOperands() && NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() && "#operands for dag node doesn't match .td file!"); #endif // Create the new machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); // The MachineInstr constructor adds implicit-def operands. Scan through // these to determine which are dead. if (MI->getNumOperands() != 0 && Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { // First, collect all used registers. SmallVector<unsigned, 8> UsedRegs; for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) if (F->getOpcode() == ISD::CopyFromReg) UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); else { // Collect declared implicit uses. const TargetInstrDesc &TID = TII->get(F->getMachineOpcode()); UsedRegs.append(TID.getImplicitUses(), TID.getImplicitUses() + TID.getNumImplicitUses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { unsigned Reg = R->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) UsedRegs.push_back(Reg); } } // Then mark unused registers as dead. MI->setPhysRegsDeadExcept(UsedRegs, *TRI); } // Add result register values for things that are defined by this // instruction. if (NumResults) CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. bool HasOptPRefs = II.getNumDefs() > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Transfer all of the memory reference descriptions of this instruction. MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); // Insert the instruction into position in the block. This needs to // happen before any custom inserter hook is called so that the // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MI); // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; if (Node->hasAnyUseOfValue(i)) EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); // If there are no uses, mark the register as dead now, so that // MachineLICM/Sink can see that it's dead. Don't do this if the // node has a Glue value, for the benefit of targets still using // Glue for values in physregs. else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) MI->addRegisterDead(Reg, TRI); } } // If the instruction has implicit defs and the node doesn't, mark the // implicit def as dead. If the node has any glue outputs, we don't do this // because we don't know what implicit defs are being used by glued nodes. if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) if (const unsigned *IDList = II.getImplicitDefs()) { for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs(); i != e; ++i) MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI); } }
/// EmitSubregNode - Generate machine code for subreg nodes. /// void InstrEmitter::EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsClone, bool IsCloned) { unsigned VRBase = 0; unsigned Opc = Node->getMachineOpcode(); // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *User = *UI; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(DestReg)) { VRBase = DestReg; break; } } } if (Opc == TargetOpcode::EXTRACT_SUBREG) { // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); // Figure out the register class to create for the destreg. unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); MachineInstr *DefMI = MRI->getVRegDef(VReg); unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && SubIdx == DefSubIdx) { // Optimize these: // r1025 = s/zext r1024, 4 // r1026 = extract_subreg r1025, 4 // to a copy // r1026 = copy r1024 const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg); VRBase = MRI->createVirtualRegister(TRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); } else { const TargetRegisterClass *TRC = MRI->getRegClass(VReg); const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); // Figure out the register class to create for the destreg. // Note that if we're going to directly use an existing register, // it must be precisely the required class, and not a subclass // thereof. if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { // Create the reg assert(SRC && "Couldn't find source register class"); VRBase = MRI->createVirtualRegister(SRC); } // Create the extract_subreg machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase); // Add source, and subreg index AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&& "Cannot yet extract from physregs"); MI->getOperand(1).setSubReg(SubIdx); MBB->insert(InsertPos, MI); } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); SDValue N2 = Node->getOperand(2); unsigned SubReg = getVR(N1, VRBaseMap); unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); const TargetRegisterClass *SRC = getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); // Figure out the register class to create for the destreg. // Note that if we're going to directly use an existing register, // it must be precisely the required class, and not a subclass // thereof. if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { // Create the reg assert(SRC && "Couldn't find source register class"); VRBase = MRI->createVirtualRegister(SRC); } // Create the insert_subreg or subreg_to_reg machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc)); MI->addOperand(MachineOperand::CreateReg(VRBase, true)); // If creating a subreg_to_reg, then the first input operand // is an implicit value immediate, otherwise it's a register if (Opc == TargetOpcode::SUBREG_TO_REG) { const ConstantSDNode *SD = cast<ConstantSDNode>(N0); MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); } else AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add the subregster being inserted AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MI->addOperand(MachineOperand::CreateImm(SubIdx)); MBB->insert(InsertPos, MI); } else llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); }
void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); unsigned NumResults = CountResults(Node); for (unsigned i = 0; i < II.getNumDefs(); ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); // Always let the value type influence the used register class. The // constraints on the instruction may be too lax to represent the value // type correctly. For example, a 64-bit float (X86::FR64) can't live in // the 32-bit float super-class (X86::FR32). if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) { const TargetRegisterClass *VTRC = TLI->getRegClassFor(Node->getSimpleValueType(i)); if (RC) VTRC = TRI->getCommonSubClass(RC, VTRC); if (VTRC) RC = VTRC; } if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); MIB.addReg(VRBase, RegState::Define); } if (!VRBase && !IsClone && !IsCloned) for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *User = *UI; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; MIB.addReg(VRBase, RegState::Define); break; } } } } // Create the result registers for this node and add the result regs to // the machine instruction. if (VRBase == 0) { assert(RC && "Isn't a register operand!"); VRBase = MRI->createVirtualRegister(RC); MIB.addReg(VRBase, RegState::Define); } SDValue Op(Node, i); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } }
// selectMSUB - // Transforms a subgraph in CurDAG if the following pattern is found: // (addc Lo0, multLo), (sube Hi0, multHi), // where, // multHi/Lo: product of multiplication // Lo0: initial value of Lo register // Hi0: initial value of Hi register // Return true if pattern matching was successful. static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { // SUBENode's second operand must be a flag output of an SUBC node in order // for the matching to be successful. SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); if (SUBCNode->getOpcode() != ISD::SUBC) return false; SDValue MultHi = SUBENode->getOperand(1); SDValue MultLo = SUBCNode->getOperand(1); SDNode *MultNode = MultHi.getNode(); unsigned MultOpc = MultHi.getOpcode(); // MultHi and MultLo must be generated by the same node, if (MultLo.getNode() != MultNode) return false; // and it must be a multiplication. if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) return false; // MultLo amd MultHi must be the first and second output of MultNode // respectively. if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) return false; // Transform this to a MSUB only if SUBENode and SUBCNode are the only users // of the values of MultNode, in which case MultNode will be removed in later // phases. // If there exist users other than SUBENode or SUBCNode, this function returns // here, which will result in MultNode being mapped to a single MULT // instruction node rather than a pair of MULT and MSUB instructions being // produced. if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) return false; SDLoc DL(SUBENode); // Initialize accumulator. SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, SUBCNode->getOperand(0), SUBENode->getOperand(0)); // create MipsSub(u) node MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, MultNode->getOperand(0),// Factor 0 MultNode->getOperand(1),// Factor 1 ACCIn); // replace uses of sube and subc here if (!SDValue(SUBCNode, 0).use_empty()) { SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32); SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub, LoIdx); CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); } if (!SDValue(SUBENode, 0).use_empty()) { SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32); SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub, HiIdx); CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); } return true; }
void ScheduleDAGSDNodes::BuildSchedUnits() { // During scheduling, the NodeId field of SDNode is used to map SDNodes // to their associated SUnits by holding SUnits table indices. A value // of -1 means the SDNode does not yet have an associated SUnit. unsigned NumNodes = 0; for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), E = DAG->allnodes_end(); NI != E; ++NI) { NI->setNodeId(-1); ++NumNodes; } // Reserve entries in the vector for each of the SUnits we are creating. This // ensure that reallocation of the vector won't happen, so SUnit*'s won't get // invalidated. // FIXME: Multiply by 2 because we may clone nodes during scheduling. // This is a temporary workaround. SUnits.reserve(NumNodes * 2); // Add all nodes in depth first order. SmallVector<SDNode*, 64> Worklist; SmallPtrSet<SDNode*, 64> Visited; Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); SmallVector<SUnit*, 8> CallSUnits; while (!Worklist.empty()) { SDNode *NI = Worklist.pop_back_val(); // Add all operands to the worklist unless they've already been added. for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) if (Visited.insert(NI->getOperand(i).getNode())) Worklist.push_back(NI->getOperand(i).getNode()); if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. continue; // If this node has already been processed, stop now. if (NI->getNodeId() != -1) continue; SUnit *NodeSUnit = newSUnit(NI); // See if anything is glued to this node, if so, add them to glued // nodes. Nodes can have at most one glue input and one glue output. Glue // is required to be the last operand and result of a node. // Scan up to find glued preds. SDNode *N = NI; while (N->getNumOperands() && N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { N = N->getOperand(N->getNumOperands()-1).getNode(); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) NodeSUnit->isCall = true; } // Scan down to find any glued succs. N = NI; while (N->getValueType(N->getNumValues()-1) == MVT::Glue) { SDValue GlueVal(N, N->getNumValues()-1); // There are either zero or one users of the Glue result. bool HasGlueUse = false; for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; ++UI) if (GlueVal.isOperandOf(*UI)) { HasGlueUse = true; assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); N = *UI; if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) NodeSUnit->isCall = true; break; } if (!HasGlueUse) break; } if (NodeSUnit->isCall) CallSUnits.push_back(NodeSUnit); // Schedule zero-latency TokenFactor below any nodes that may increase the // schedule height. Otherwise, ancestors of the TokenFactor may appear to // have false stalls. if (NI->getOpcode() == ISD::TokenFactor) NodeSUnit->isScheduleLow = true; // If there are glue operands involved, N is now the bottom-most node // of the sequence of nodes that are glued together. // Update the SUnit. NodeSUnit->setNode(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); // Compute NumRegDefsLeft. This must be done before AddSchedEdges. InitNumRegDefsLeft(NodeSUnit); // Assign the Latency field of NodeSUnit using target-provided information. computeLatency(NodeSUnit); } // Find all call operands. while (!CallSUnits.empty()) { SUnit *SU = CallSUnits.pop_back_val(); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (SUNode->getOpcode() != ISD::CopyToReg) continue; SDNode *SrcN = SUNode->getOperand(2).getNode(); if (isPassiveNode(SrcN)) continue; // Not scheduled. SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; SrcSU->isCallOp = true; } } }
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void InstrEmitter:: EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned VRBase = 0; if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Just use the input register directly! SDValue Op(Node, ResNo); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); return; } // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; const TargetRegisterClass *UseRC = NULL; EVT VT = Node->getValueType(ResNo); // Stick to the preferred register classes for legal types. if (TLI->isTypeLegal(VT)) UseRC = TLI->getRegClassFor(VT); if (!IsClone && !IsCloned) for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *User = *UI; bool Match = true; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(DestReg)) { VRBase = DestReg; Match = false; } else if (DestReg != SrcReg) Match = false; } else { for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { SDValue Op = User->getOperand(i); if (Op.getNode() != Node || Op.getResNo() != ResNo) continue; EVT VT = Node->getValueType(Op.getResNo()); if (VT == MVT::Other || VT == MVT::Glue) continue; Match = false; if (User->isMachineOpcode()) { const MCInstrDesc &II = TII->get(User->getMachineOpcode()); const TargetRegisterClass *RC = 0; if (i+II.getNumDefs() < II.getNumOperands()) RC = TII->getRegClass(II, i+II.getNumDefs(), TRI); if (!UseRC) UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = TRI->getCommonSubClass(UseRC, RC); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) UseRC = ComRC; } } } } MatchReg &= Match; if (VRBase) break; } const TargetRegisterClass *SrcRC = 0, *DstRC = 0; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); // Figure out the register class to create for the destreg. if (VRBase) { DstRC = MRI->getRegClass(VRBase); } else if (UseRC) { assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); DstRC = UseRC; } else { DstRC = TLI->getRegClassFor(VT); } // If all uses are reading from the src physical register and copying the // register is either impossible or very expensive, then don't create a copy. if (MatchReg && SrcRC->getCopyCost() < 0) { VRBase = SrcReg; } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); } SDValue Op(Node, ResNo); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); }
/// EmitMachineNode - Generate machine code for a target-specific node and /// needed dependencies. /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially if (Opc == TargetOpcode::EXTRACT_SUBREG || Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); return; } // Handle COPY_TO_REGCLASS specially. if (Opc == TargetOpcode::COPY_TO_REGCLASS) { EmitCopyToRegClassNode(Node, VRBaseMap); return; } // Handle REG_SEQUENCE specially. if (Opc == TargetOpcode::REG_SEQUENCE) { EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); return; } if (Opc == TargetOpcode::IMPLICIT_DEF) // We want a unique VR for each IMPLICIT_DEF use. return; const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NodeOperands = CountOperands(Node); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) assert(NumMIOperands >= II.getNumOperands() && "Too few operands for a variadic node!"); else assert(NumMIOperands >= II.getNumOperands() && NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() && "#operands for dag node doesn't match .td file!"); #endif // Create the new machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); // Add result register values for things that are defined by this // instruction. if (NumResults) CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. bool HasOptPRefs = II.getNumDefs() > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Transfer all of the memory reference descriptions of this instruction. MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); // Insert the instruction into position in the block. This needs to // happen before any custom inserter hook is called so that the // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MI); // The MachineInstr may also define physregs instead of virtregs. These // physreg values can reach other instructions in different ways: // // 1. When there is a use of a Node value beyond the explicitly defined // virtual registers, we emit a CopyFromReg for one of the implicitly // defined physregs. This only happens when HasPhysRegOuts is true. // // 2. A CopyFromReg reading a physreg may be glued to this instruction. // // 3. A glued instruction may implicitly use a physreg. // // 4. A glued instruction may use a RegisterSDNode operand. // // Collect all the used physreg defs, and make sure that any unused physreg // defs are marked as dead. SmallVector<unsigned, 8> UsedRegs; // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. UsedRegs.push_back(Reg); EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); } } // Scan the glue chain for any used physregs. if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { if (F->getOpcode() == ISD::CopyFromReg) { UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); continue; } else if (F->getOpcode() == ISD::CopyToReg) { // Skip CopyToReg nodes that are internal to the glue chain. continue; } // Collect declared implicit uses. const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); UsedRegs.append(MCID.getImplicitUses(), MCID.getImplicitUses() + MCID.getNumImplicitUses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { unsigned Reg = R->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) UsedRegs.push_back(Reg); } } } // Finally mark unused registers as dead. if (!UsedRegs.empty() || II.getImplicitDefs()) MI->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. #ifdef NDEBUG if (II.hasPostISelHook()) #endif TLI->AdjustInstrPostInstrSelection(MI, Node); }
/// Extract call from statepoint, lower it and return pointer to the /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result static SDNode * lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) { ImmutableCallSite CS(ISP.getCallSite()); SDValue ActualCallee; if (ISP.getNumPatchBytes() > 0) { // If we've been asked to emit a nop sequence instead of a call instruction // for this statepoint then don't lower the call target, but use a constant // `null` instead. Not lowering the call target lets statepoint clients get // away without providing a physical address for the symbolic call target at // link time. const auto &TLI = Builder.DAG.getTargetLoweringInfo(); const auto &DL = Builder.DAG.getDataLayout(); unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(), TLI.getPointerTy(DL, AS)); } else ActualCallee = Builder.getValue(ISP.getCalledValue()); assert(CS.getCallingConv() != CallingConv::AnyReg && "anyregcc is not supported on statepoints!"); Type *DefTy = ISP.getActualReturnType(); bool HasDef = !DefTy->isVoidTy(); SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands( ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos, ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB, false /* IsPatchPoint */); SDNode *CallEnd = CallEndVal.getNode(); // Get a call instruction from the call sequence chain. Tail calls are not // allowed. The following code is essentially reverse engineering X86's // LowerCallTo. // // We are expecting DAG to have the following form: // // ch = eh_label (only in case of invoke statepoint) // ch, glue = callseq_start ch // ch, glue = X86::Call ch, glue // ch, glue = callseq_end ch, glue // get_return_value ch, glue // // get_return_value can either be a sequence of CopyFromReg instructions // to grab the return value from the return register(s), or it can be a LOAD // to load a value returned by reference via a stack slot. if (HasDef) { if (CallEnd->getOpcode() == ISD::LOAD) CallEnd = CallEnd->getOperand(0).getNode(); else while (CallEnd->getOpcode() == ISD::CopyFromReg) CallEnd = CallEnd->getOperand(0).getNode(); } assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); // Export the result value if needed const Instruction *GCResult = ISP.getGCResult(); if (HasDef && GCResult) { if (GCResult->getParent() != CS.getParent()) { // Result value will be used in a different basic block so we need to // export it now. // Default exporting mechanism will not work here because statepoint call // has a different type than the actual call. It means that by default // llvm will create export register of the wrong type (always i32 in our // case). So instead we need to create export register with correct type // manually. // TODO: To eliminate this problem we can remove gc.result intrinsics // completely and make statepoint call to return a tuple. unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); RegsForValue RFV( *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(), Builder.DAG.getDataLayout(), Reg, ISP.getActualReturnType()); SDValue Chain = Builder.DAG.getEntryNode(); RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain, nullptr); PendingExports.push_back(Chain); Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg; } else { // Result value will be used in a same basic block. Don't export it or // perform any explicit register copies. // We'll replace the actuall call node shortly. gc_result will grab // this value. Builder.setValue(CS.getInstruction(), ReturnValue); } } else { // The token value is never used from here on, just generate a poison value Builder.setValue(CS.getInstruction(), Builder.DAG.getIntPtrConstant(-1, Builder.getCurSDLoc())); } return CallEnd->getOperand(0).getNode(); }