/// shrinkToUses - After removing some uses of a register, shrink its live /// range to just the remaining uses. This method does not compute reaching /// defs for new uses, and it doesn't remove dead defs. bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead) { DEBUG(dbgs() << "Shrink: " << *li << '\n'); assert(TargetRegisterInfo::isVirtualRegister(li->reg) && "Can only shrink virtual registers"); // Find all the values used, including PHI kills. SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList; // Blocks that have already been added to WorkList as live-out. SmallPtrSet<MachineBasicBlock*, 16> LiveOut; // Visit all instructions reading li->reg. for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(li->reg), E = MRI->reg_instr_end(); I != E; ) { MachineInstr *UseMI = &*(I++); if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); LiveQueryResult LRQ = li->Query(Idx); VNInfo *VNI = LRQ.valueIn(); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is // no live value. It is likely caused by a target getting <undef> flags // wrong. DEBUG(dbgs() << Idx << '\t' << *UseMI << "Warning: Instr claims to read non-existent value in " << *li << '\n'); continue; } // Special case: An early-clobber tied operand reads and writes the // register one slot early. if (VNInfo *DefVNI = LRQ.valueDefined()) Idx = DefVNI->def; WorkList.push_back(std::make_pair(Idx, VNI)); } // Create new live ranges with only minimal live segments per def. LiveRange NewLR; for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; NewLR.addSegment(LiveRange::Segment(VNI->def, VNI->def.getDeadSlot(), VNI)); } // Keep track of the PHIs that are in use. SmallPtrSet<VNInfo*, 8> UsedPHIs; // Extend intervals to reach all uses in WorkList. while (!WorkList.empty()) { SlotIndex Idx = WorkList.back().first; VNInfo *VNI = WorkList.back().second; WorkList.pop_back(); const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot()); SlotIndex BlockStart = getMBBStartIdx(MBB); // Extend the live range for VNI to be live at Idx. if (VNInfo *ExtVNI = NewLR.extendInBlock(BlockStart, Idx)) { (void)ExtVNI; assert(ExtVNI == VNI && "Unexpected existing value number"); // Is this a PHIDef we haven't seen before? if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI)) continue; // The PHI is live, make sure the predecessors are live-out. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { if (!LiveOut.insert(*PI)) continue; SlotIndex Stop = getMBBEndIdx(*PI); // A predecessor is not required to have a live-out value for a PHI. if (VNInfo *PVNI = li->getVNInfoBefore(Stop)) WorkList.push_back(std::make_pair(Stop, PVNI)); } continue; } // VNI is live-in to MBB. DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); NewLR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); // Make sure VNI is live-out from the predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { if (!LiveOut.insert(*PI)) continue; SlotIndex Stop = getMBBEndIdx(*PI); assert(li->getVNInfoBefore(Stop) == VNI && "Wrong value out of predecessor"); WorkList.push_back(std::make_pair(Stop, VNI)); } } // Handle dead values. bool CanSeparate = false; for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def); assert(LRI != NewLR.end() && "Missing segment for PHI"); if (LRI->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->markUnused(); NewLR.removeSegment(LRI->start, LRI->end); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); CanSeparate = true; } else { // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(VNI->def); assert(MI && "No instruction defining live value"); MI->addRegisterDead(li->reg, TRI); if (dead && MI->allDefsAreDead()) { DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI); dead->push_back(MI); } } } // Move the trimmed segments back. li->segments.swap(NewLR.segments); DEBUG(dbgs() << "Shrunk: " << *li << '\n'); return CanSeparate; }
unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const { const MachineOperand &MO = MI.getOperand(OpNo); MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx)); return 0; }
// Branch analysis. // Note: If the condition register is set to CTR or CTR8 then this is a // BDNZ (imm == 1) or BDZ (imm == 0) branch. bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (LastInst->getOpcode() == PPC::B) { if (!LastInst->getOperand(0).isMBB()) return true; TBB = LastInst->getOperand(0).getMBB(); return false; } else if (LastInst->getOpcode() == PPC::BCC) { if (!LastInst->getOperand(2).isMBB()) return true; // Block ends with fall-through condbranch. TBB = LastInst->getOperand(2).getMBB(); Cond.push_back(LastInst->getOperand(0)); Cond.push_back(LastInst->getOperand(1)); return false; } else if (LastInst->getOpcode() == PPC::BDNZ8 || LastInst->getOpcode() == PPC::BDNZ) { if (!LastInst->getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = LastInst->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(1)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); return false; } else if (LastInst->getOpcode() == PPC::BDZ8 || LastInst->getOpcode() == PPC::BDZ) { if (!LastInst->getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = LastInst->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(0)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); return false; } // Otherwise, don't know what this is. return true; } // Get the instruction before it if it's a terminator. MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with PPC::B and PPC:BCC, handle it. if (SecondLastInst->getOpcode() == PPC::BCC && LastInst->getOpcode() == PPC::B) { if (!SecondLastInst->getOperand(2).isMBB() || !LastInst->getOperand(0).isMBB()) return true; TBB = SecondLastInst->getOperand(2).getMBB(); Cond.push_back(SecondLastInst->getOperand(0)); Cond.push_back(SecondLastInst->getOperand(1)); FBB = LastInst->getOperand(0).getMBB(); return false; } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 || SecondLastInst->getOpcode() == PPC::BDNZ) && LastInst->getOpcode() == PPC::B) { if (!SecondLastInst->getOperand(0).isMBB() || !LastInst->getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = SecondLastInst->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(1)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); FBB = LastInst->getOperand(0).getMBB(); return false; } else if ((SecondLastInst->getOpcode() == PPC::BDZ8 || SecondLastInst->getOpcode() == PPC::BDZ) && LastInst->getOpcode() == PPC::B) { if (!SecondLastInst->getOperand(0).isMBB() || !LastInst->getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = SecondLastInst->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(0)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); FBB = LastInst->getOperand(0).getMBB(); return false; } // If the block ends with two PPC:Bs, handle it. The second one is not // executed, so remove it. if (SecondLastInst->getOpcode() == PPC::B && LastInst->getOpcode() == PPC::B) { if (!SecondLastInst->getOperand(0).isMBB()) return true; TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
void MCS51FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const MCS51InstrInfo &TII = *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned StackAlign = getStackAlignment(); if (!hasReservedCallFrame(MF)) { // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub SPW, <amt>' and the // adjcallstackdown instruction into 'add SPW, <amt>' // TODO: consider using push / pop instead of sub + store / add MachineInstr *Old = I; uint64_t Amount = Old->getOperand(0).getImm(); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; MachineInstr *New = 0; if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), TII.get(MCS51::SUB16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(Amount); } else { assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode()); // factor out the amount the callee already popped. uint64_t CalleeAmt = Old->getOperand(1).getImm(); Amount -= CalleeAmt; if (Amount) New = BuildMI(MF, Old->getDebugLoc(), TII.get(MCS51::ADD16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(Amount); } if (New) { // The SRW implicit def is dead. New->getOperand(3).setIsDead(); // Replace the pseudo instruction with a new instruction... MBB.insert(I, New); } } } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { MachineInstr *Old = I; MachineInstr *New = BuildMI(MF, Old->getDebugLoc(), TII.get(MCS51::SUB16ri), MCS51::SPW).addReg(MCS51::SPW).addImm(CalleeAmt); // The SRW implicit def is dead. New->getOperand(3).setIsDead(); MBB.insert(I, New); } } MBB.erase(I); }
/// Return true if the instruction is a register to register move and /// leave the source and dest operands in the passed parameters. bool MipsInstrInfo:: isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SrcSubIdx, unsigned &DstSubIdx) const { SrcSubIdx = DstSubIdx = 0; // No sub-registers. // addu $dst, $src, $zero || addu $dst, $zero, $src // or $dst, $src, $zero || or $dst, $zero, $src if ((MI.getOpcode() == Mips::ADDu) || (MI.getOpcode() == Mips::OR)) { if (MI.getOperand(1).getReg() == Mips::ZERO) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(2).getReg(); return true; } else if (MI.getOperand(2).getReg() == Mips::ZERO) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); return true; } } // mov $fpDst, $fpSrc // mfc $gpDst, $fpSrc // mtc $fpDst, $gpSrc if (MI.getOpcode() == Mips::FMOV_S32 || MI.getOpcode() == Mips::FMOV_D32 || MI.getOpcode() == Mips::MFC1 || MI.getOpcode() == Mips::MTC1 ) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); return true; } // addiu $dst, $src, 0 if (MI.getOpcode() == Mips::ADDiu) { if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); return true; } } return false; }
// Determine whether MI reads memory, writes memory, has side effects, // and/or uses the stack pointer value. static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, bool &Write, bool &Effects, bool &StackPointer) { assert(!MI.isPosition()); assert(!MI.isTerminator()); if (MI.isDebugValue()) return; // Check for loads. if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA)) Read = true; // Check for stores. if (MI.mayStore()) { Write = true; const MachineFunction &MF = *MI.getParent()->getParent(); if (MF.getSubtarget<WebAssemblySubtarget>() .getTargetTriple().isOSBinFormatELF()) { // Check for stores to __stack_pointer. for (auto MMO : MI.memoperands()) { const MachinePointerInfo &MPI = MMO->getPointerInfo(); if (MPI.V.is<const PseudoSourceValue *>()) { auto PSV = MPI.V.get<const PseudoSourceValue *>(); if (const ExternalSymbolPseudoSourceValue *EPSV = dyn_cast<ExternalSymbolPseudoSourceValue>(PSV)) if (StringRef(EPSV->getSymbol()) == "__stack_pointer") StackPointer = true; } } } else { // Check for sets of the stack pointer. const MachineModuleInfoWasm &MMIW = MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>(); if ((MI.getOpcode() == WebAssembly::SET_LOCAL_I32 || MI.getOpcode() == WebAssembly::SET_LOCAL_I64) && MI.getOperand(0).getImm() == MMIW.getStackPointerGlobal()) { StackPointer = true; } } } else if (MI.hasOrderedMemoryRef()) { switch (MI.getOpcode()) { case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64: case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64: case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64: case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64: case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32: case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64: case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32: case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64: // These instruction have hasUnmodeledSideEffects() returning true // because they trap on overflow and invalid so they can't be arbitrarily // moved, however hasOrderedMemoryRef() interprets this plus their lack // of memoperands as having a potential unknown memory reference. break; default: // Record volatile accesses, unless it's a call, as calls are handled // specially below. if (!MI.isCall()) { Write = true; Effects = true; } break; } } // Check for side effects. if (MI.hasUnmodeledSideEffects()) { switch (MI.getOpcode()) { case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64: case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64: case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64: case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64: case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32: case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64: case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32: case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64: // These instructions have hasUnmodeledSideEffects() returning true // because they trap on overflow and invalid so they can't be arbitrarily // moved, however in the specific case of register stackifying, it is safe // to move them because overflow and invalid are Undefined Behavior. break; default: Effects = true; break; } } // Analyze calls. if (MI.isCall()) { switch (MI.getOpcode()) { case WebAssembly::CALL_VOID: case WebAssembly::CALL_INDIRECT_VOID: QueryCallee(MI, 0, Read, Write, Effects, StackPointer); break; case WebAssembly::CALL_I32: case WebAssembly::CALL_I64: case WebAssembly::CALL_F32: case WebAssembly::CALL_F64: case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64: case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64: QueryCallee(MI, 1, Read, Write, Effects, StackPointer); break; default: llvm_unreachable("unexpected call opcode"); } } }
/// EmitSchedule - Emit the machine code in scheduled order. Return the new /// InsertPos and MachineBasicBlock that contains this insertion /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does /// not necessarily refer to returned BB. The emitter may split blocks. MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; DenseMap<SUnit*, unsigned> CopyVRBaseMap; SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; SmallSet<unsigned, 8> Seen; bool HasDbg = DAG->hasDebugValues(); // If this is the first BB, emit byval parameter dbg_value's. if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); for (; PDI != PDE; ++PDI) { MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); if (DbgMI) BB->insert(InsertPos, DbgMI); } } for (unsigned i = 0, e = Sequence.size(); i != e; i++) { SUnit *SU = Sequence[i]; if (!SU) { // Null SUnit* is a noop. TII->insertNoop(*Emitter.getBlock(), InsertPos); continue; } // For pre-regalloc scheduling, create instructions corresponding to the // SDNode and any glued SDNodes and append them to the block. if (!SU->getNode()) { // Emit a copy. EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos); continue; } SmallVector<SDNode *, 4> GluedNodes; for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { SDNode *N = GluedNodes.back(); Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); GluedNodes.pop_back(); } Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen); } // Insert all the dbg_values which have not already been inserted in source // order sequence. if (HasDbg) { MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); // Sort the source order instructions and use the order to insert debug // values. std::sort(Orders.begin(), Orders.end(), less_first()); SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); // Now emit the rest according to source order. unsigned LastOrder = 0; for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { unsigned Order = Orders[i].first; MachineInstr *MI = Orders[i].second; // Insert all SDDbgValue's whose order(s) are before "Order". if (!MI) continue; for (; DI != DE && (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { if ((*DI)->isInvalidated()) continue; MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); if (DbgMI) { if (!LastOrder) // Insert to start of the BB (after PHIs). BB->insert(BBBegin, DbgMI); else { // Insert at the instruction, which may be in a different // block, if the block was split by a custom inserter. MachineBasicBlock::iterator Pos = MI; MI->getParent()->insert(Pos, DbgMI); } } } LastOrder = Order; } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? SmallVector<MachineInstr*, 8> DbgMIs; while (DI != DE) { if (!(*DI)->isInvalidated()) if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) DbgMIs.push_back(DbgMI); ++DI; } MachineBasicBlock *InsertBB = Emitter.getBlock(); MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); } InsertPos = Emitter.getInsertPos(); return Emitter.getBlock(); }
/// EmitMachineNode - Generate machine code for a target-specific node and /// needed dependencies. /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially if (Opc == TargetOpcode::EXTRACT_SUBREG || Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); return; } // Handle COPY_TO_REGCLASS specially. if (Opc == TargetOpcode::COPY_TO_REGCLASS) { EmitCopyToRegClassNode(Node, VRBaseMap); return; } // Handle REG_SEQUENCE specially. if (Opc == TargetOpcode::REG_SEQUENCE) { EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); return; } if (Opc == TargetOpcode::IMPLICIT_DEF) // We want a unique VR for each IMPLICIT_DEF use. return; const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NodeOperands = CountOperands(Node); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) assert(NumMIOperands >= II.getNumOperands() && "Too few operands for a variadic node!"); else assert(NumMIOperands >= II.getNumOperands() && NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() && "#operands for dag node doesn't match .td file!"); #endif // Create the new machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); // Add result register values for things that are defined by this // instruction. if (NumResults) CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. bool HasOptPRefs = II.getNumDefs() > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Transfer all of the memory reference descriptions of this instruction. MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); // Insert the instruction into position in the block. This needs to // happen before any custom inserter hook is called so that the // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MI); // The MachineInstr may also define physregs instead of virtregs. These // physreg values can reach other instructions in different ways: // // 1. When there is a use of a Node value beyond the explicitly defined // virtual registers, we emit a CopyFromReg for one of the implicitly // defined physregs. This only happens when HasPhysRegOuts is true. // // 2. A CopyFromReg reading a physreg may be glued to this instruction. // // 3. A glued instruction may implicitly use a physreg. // // 4. A glued instruction may use a RegisterSDNode operand. // // Collect all the used physreg defs, and make sure that any unused physreg // defs are marked as dead. SmallVector<unsigned, 8> UsedRegs; // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. UsedRegs.push_back(Reg); EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); } } // Scan the glue chain for any used physregs. if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { if (F->getOpcode() == ISD::CopyFromReg) { UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); continue; } else if (F->getOpcode() == ISD::CopyToReg) { // Skip CopyToReg nodes that are internal to the glue chain. continue; } // Collect declared implicit uses. const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); UsedRegs.append(MCID.getImplicitUses(), MCID.getImplicitUses() + MCID.getNumImplicitUses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { unsigned Reg = R->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) UsedRegs.push_back(Reg); } } } // Finally mark unused registers as dead. if (!UsedRegs.empty() || II.getImplicitDefs()) MI->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. #ifdef NDEBUG if (II.hasPostISelHook()) #endif TLI->AdjustInstrPostInstrSelection(MI, Node); }
/// EmitSpecialNode - Generate machine code for a target-independent node and /// needed dependencies. void InstrEmitter:: EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG Node->dump(); #endif llvm_unreachable("This target-independent node should have been selected!"); case ISD::EntryToken: llvm_unreachable("EntryToken should have been excluded from the schedule!"); case ISD::MERGE_VALUES: case ISD::TokenFactor: // fall thru break; case ISD::CopyToReg: { unsigned SrcReg; SDValue SrcVal = Node->getOperand(2); if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) SrcReg = R->getReg(); else SrcReg = getVR(SrcVal, VRBaseMap); unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), DestReg).addReg(SrcReg); break; } case ISD::CopyFromReg: { unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); break; } case ISD::EH_LABEL: { MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel(); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::EH_LABEL)).addSym(S); break; } case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) --NumOps; // Ignore the glue operand. // Create the inline asm machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::INLINEASM)); // Add the asm string as an external symbol operand. SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); // Add the HasSideEffect and isAlignStack bits. int64_t ExtraInfo = cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> getZExtValue(); MI->addOperand(MachineOperand::CreateImm(ExtraInfo)); // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); MI->addOperand(MachineOperand::CreateImm(Flags)); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. MI->addOperand(MachineOperand::CreateReg(Reg, true, /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), /*isKill=*/ false, /*isDead=*/ false, /*isUndef=*/false, /*isEarlyClobber=*/ true)); } break; case InlineAsm::Kind_RegUse: // Use of register. case InlineAsm::Kind_Imm: // Immediate. case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (; NumVals; --NumVals, ++i) AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); break; } } // Get the mdnode from the asm if it exists and add it to the instruction. SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); if (MD) MI->addOperand(MachineOperand::CreateMetadata(MD)); MBB->insert(InsertPos, MI); break; } } }
// transformInstruction - Perform the transformation of an instruction // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs // to be the correct register class, minimizing cross-class copies. void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) { DEBUG(dbgs() << "Scalar transform: " << MI); MachineBasicBlock *MBB = MI.getParent(); unsigned OldOpc = MI.getOpcode(); unsigned NewOpc = getTransformOpcode(OldOpc); assert(OldOpc != NewOpc && "transform an instruction to itself?!"); // Check if we need a copy for the source registers. unsigned OrigSrc0 = MI.getOperand(1).getReg(); unsigned OrigSrc1 = MI.getOperand(2).getReg(); unsigned Src0 = 0, SubReg0; unsigned Src1 = 0, SubReg1; bool KillSrc0 = false, KillSrc1 = false; if (!MRI->def_empty(OrigSrc0)) { MachineRegisterInfo::def_instr_iterator Def = MRI->def_instr_begin(OrigSrc0); assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); MachineOperand *MOSrc0 = getSrcFromCopy(&*Def, MRI, SubReg0); // If there are no other users of the original source, we can delete // that instruction. if (MOSrc0) { Src0 = MOSrc0->getReg(); KillSrc0 = MOSrc0->isKill(); // Src0 is going to be reused, thus, it cannot be killed anymore. MOSrc0->setIsKill(false); if (MRI->hasOneNonDBGUse(OrigSrc0)) { assert(MOSrc0 && "Can't delete copy w/o a valid original source!"); Def->eraseFromParent(); ++NumCopiesDeleted; } } } if (!MRI->def_empty(OrigSrc1)) { MachineRegisterInfo::def_instr_iterator Def = MRI->def_instr_begin(OrigSrc1); assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); MachineOperand *MOSrc1 = getSrcFromCopy(&*Def, MRI, SubReg1); // If there are no other users of the original source, we can delete // that instruction. if (MOSrc1) { Src1 = MOSrc1->getReg(); KillSrc1 = MOSrc1->isKill(); // Src0 is going to be reused, thus, it cannot be killed anymore. MOSrc1->setIsKill(false); if (MRI->hasOneNonDBGUse(OrigSrc1)) { assert(MOSrc1 && "Can't delete copy w/o a valid original source!"); Def->eraseFromParent(); ++NumCopiesDeleted; } } } // If we weren't able to reference the original source directly, create a // copy. if (!Src0) { SubReg0 = 0; Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); insertCopy(TII, MI, Src0, OrigSrc0, KillSrc0); KillSrc0 = true; } if (!Src1) { SubReg1 = 0; Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); insertCopy(TII, MI, Src1, OrigSrc1, KillSrc1); KillSrc1 = true; } // Create a vreg for the destination. // FIXME: No need to do this if the ultimate user expects an FPR64. // Check for that and avoid the copy if possible. unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass); // For now, all of the new instructions have the same simple three-register // form, so no need to special case based on what instruction we're // building. BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), Dst) .addReg(Src0, getKillRegState(KillSrc0), SubReg0) .addReg(Src1, getKillRegState(KillSrc1), SubReg1); // Now copy the result back out to a GPR. // FIXME: Try to avoid this if all uses could actually just use the FPR64 // directly. insertCopy(TII, MI, MI.getOperand(0).getReg(), Dst, true); // Erase the old instruction. MI.eraseFromParent(); ++NumScalarInsnsUsed; }
/// EmitSubregNode - Generate machine code for subreg nodes. /// void InstrEmitter::EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsClone, bool IsCloned) { unsigned VRBase = 0; unsigned Opc = Node->getMachineOpcode(); // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *User = *UI; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(DestReg)) { VRBase = DestReg; break; } } } if (Opc == TargetOpcode::EXTRACT_SUBREG) { // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no // constraints on the %dst register, COPY can target all legal register // classes. unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getValueType(0)); unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); MachineInstr *DefMI = MRI->getVRegDef(VReg); unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && SubIdx == DefSubIdx) { // Optimize these: // r1025 = s/zext r1024, 4 // r1026 = extract_subreg r1025, 4 // to a copy // r1026 = copy r1024 VRBase = MRI->createVirtualRegister(TRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); } else { // VReg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. VReg = ConstrainForSubReg(VReg, SubIdx, Node->getOperand(0).getValueType(), Node->getDebugLoc()); // Create the destreg if it is missing. if (VRBase == 0) VRBase = MRI->createVirtualRegister(TRC); // Create the extract_subreg machine instruction. BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx); } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); SDValue N2 = Node->getOperand(2); unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); // Figure out the register class to create for the destreg. It should be // the largest legal register class supporting SubIdx sub-registers. // RegisterCoalescer will constrain it further if it decides to eliminate // the INSERT_SUBREG instruction. // // %dst = INSERT_SUBREG %src, %sub, SubIdx // // is lowered by TwoAddressInstructionPass to: // // %dst = COPY %src // %dst:SubIdx = COPY %sub // // There is no constraint on the %src register class. // const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getValueType(0)); SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase))) VRBase = MRI->createVirtualRegister(SRC); // Create the insert_subreg or subreg_to_reg machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc)); MI->addOperand(MachineOperand::CreateReg(VRBase, true)); // If creating a subreg_to_reg, then the first input operand // is an implicit value immediate, otherwise it's a register if (Opc == TargetOpcode::SUBREG_TO_REG) { const ConstantSDNode *SD = cast<ConstantSDNode>(N0); MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); } else AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add the subregster being inserted AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MI->addOperand(MachineOperand::CreateImm(SubIdx)); MBB->insert(InsertPos, MI); } else llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); }
// isProfitableToTransform - Predicate function to determine whether an // instruction should be transformed to its equivalent AdvSIMD scalar // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example. bool AArch64AdvSIMDScalar::isProfitableToTransform( const MachineInstr &MI) const { // If this instruction isn't eligible to be transformed (no SIMD equivalent), // early exit since that's the common case. if (!isTransformable(MI)) return false; // Count the number of copies we'll need to add and approximate the number // of copies that a transform will enable us to remove. unsigned NumNewCopies = 3; unsigned NumRemovableCopies = 0; unsigned OrigSrc0 = MI.getOperand(1).getReg(); unsigned OrigSrc1 = MI.getOperand(2).getReg(); unsigned SubReg0; unsigned SubReg1; if (!MRI->def_empty(OrigSrc0)) { MachineRegisterInfo::def_instr_iterator Def = MRI->def_instr_begin(OrigSrc0); assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); MachineOperand *MOSrc0 = getSrcFromCopy(&*Def, MRI, SubReg0); // If the source was from a copy, we don't need to insert a new copy. if (MOSrc0) --NumNewCopies; // If there are no other users of the original source, we can delete // that instruction. if (MOSrc0 && MRI->hasOneNonDBGUse(OrigSrc0)) ++NumRemovableCopies; } if (!MRI->def_empty(OrigSrc1)) { MachineRegisterInfo::def_instr_iterator Def = MRI->def_instr_begin(OrigSrc1); assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); MachineOperand *MOSrc1 = getSrcFromCopy(&*Def, MRI, SubReg1); if (MOSrc1) --NumNewCopies; // If there are no other users of the original source, we can delete // that instruction. if (MOSrc1 && MRI->hasOneNonDBGUse(OrigSrc1)) ++NumRemovableCopies; } // If any of the uses of the original instructions is a cross class copy, // that's a copy that will be removable if we transform. Likewise, if // any of the uses is a transformable instruction, it's likely the tranforms // will chain, enabling us to save a copy there, too. This is an aggressive // heuristic that approximates the graph based cost analysis described above. unsigned Dst = MI.getOperand(0).getReg(); bool AllUsesAreCopies = true; for (MachineRegisterInfo::use_instr_nodbg_iterator Use = MRI->use_instr_nodbg_begin(Dst), E = MRI->use_instr_nodbg_end(); Use != E; ++Use) { unsigned SubReg; if (getSrcFromCopy(&*Use, MRI, SubReg) || isTransformable(*Use)) ++NumRemovableCopies; // If the use is an INSERT_SUBREG, that's still something that can // directly use the FPR64, so we don't invalidate AllUsesAreCopies. It's // preferable to have it use the FPR64 in most cases, as if the source // vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely. // Ditto for a lane insert. else if (Use->getOpcode() == AArch64::INSERT_SUBREG || Use->getOpcode() == AArch64::INSvi64gpr) ; else AllUsesAreCopies = false; } // If all of the uses of the original destination register are copies to // FPR64, then we won't end up having a new copy back to GPR64 either. if (AllUsesAreCopies) --NumNewCopies; // If a transform will not increase the number of cross-class copies required, // return true. if (NumNewCopies <= NumRemovableCopies) return true; // Finally, even if we otherwise wouldn't transform, check if we're forcing // transformation of everything. return TransformAll; }
static bool isTransformable(const MachineInstr &MI) { unsigned Opc = MI.getOpcode(); return Opc != getTransformOpcode(Opc); }
void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // Keep track of regunit ranges. SmallVector<std::pair<LiveRange*, LiveRange::iterator>, 8> RU; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; LiveInterval *LI = &getInterval(Reg); if (LI->empty()) continue; // Find the regunit intervals for the assigned register. They may overlap // the virtual register live range, cancelling any kills. RU.clear(); for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); ++Units) { LiveRange &RURanges = getRegUnit(*Units); if (RURanges.empty()) continue; RU.push_back(std::make_pair(&RURanges, RURanges.find(LI->begin()->end))); } // Every instruction that kills Reg corresponds to a segment range end // point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; ++RI) { // A block index indicates an MBB edge. if (RI->end.isBlock()) continue; MachineInstr *MI = getInstructionFromIndex(RI->end); if (!MI) continue; // Check if any of the regunits are live beyond the end of RI. That could // happen when a physreg is defined as a copy of a virtreg: // // %EAX = COPY %vreg5 // FOO %vreg5 <--- MI, cancel kill because %EAX is live. // BAR %EAX<kill> // // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. bool CancelKill = false; for (unsigned u = 0, e = RU.size(); u != e; ++u) { LiveRange &RRanges = *RU[u].first; LiveRange::iterator &I = RU[u].second; if (I == RRanges.end()) continue; I = RRanges.advanceTo(I, RI->end); if (I == RRanges.end() || I->start >= RI->end) continue; // I is overlapping RI. CancelKill = true; break; } if (CancelKill) MI->clearRegisterKills(Reg, nullptr); else MI->addRegisterKilled(Reg, nullptr); } } }
/// traceSiblingValue - Trace a value that is about to be spilled back to the /// real defining instructions by looking through sibling copies. Always stay /// within the range of OrigVNI so the registers are known to carry the same /// value. /// /// Determine if the value is defined by all reloads, so spilling isn't /// necessary - the value is already in the stack slot. /// /// Return a defining instruction that may be a candidate for rematerialization. /// MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, VNInfo *OrigVNI) { // Check if a cached value already exists. SibValueMap::iterator SVI; bool Inserted; tie(SVI, Inserted) = SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI))); if (!Inserted) { DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':' << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second); return SVI->second.DefMI; } DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':' << UseVNI->id << '@' << UseVNI->def << '\n'); // List of (Reg, VNI) that have been inserted into SibValues, but need to be // processed. SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList; WorkList.push_back(std::make_pair(UseReg, UseVNI)); do { unsigned Reg; VNInfo *VNI; tie(Reg, VNI) = WorkList.pop_back_val(); DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def << ":\t"); // First check if this value has already been computed. SVI = SibValues.find(VNI); assert(SVI != SibValues.end() && "Missing SibValues entry"); // Trace through PHI-defs created by live range splitting. if (VNI->isPHIDef()) { // Stop at original PHIs. We don't know the value at the predecessors. if (VNI->def == OrigVNI->def) { DEBUG(dbgs() << "orig phi value\n"); SVI->second.DefByOrigPHI = true; SVI->second.AllDefsAreReloads = false; propagateSiblingValue(SVI); continue; } // This is a PHI inserted by live range splitting. We could trace the // live-out value from predecessor blocks, but that search can be very // expensive if there are many predecessors and many more PHIs as // generated by tail-dup when it sees an indirectbr. Instead, look at // all the non-PHI defs that have the same value as OrigVNI. They must // jointly dominate VNI->def. This is not optimal since VNI may actually // be jointly dominated by a smaller subset of defs, so there is a change // we will miss a AllDefsAreReloads optimization. // Separate all values dominated by OrigVNI into PHIs and non-PHIs. SmallVector<VNInfo*, 8> PHIs, NonPHIs; LiveInterval &LI = LIS.getInterval(Reg); LiveInterval &OrigLI = LIS.getInterval(Original); for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end(); VI != VE; ++VI) { VNInfo *VNI2 = *VI; if (VNI2->isUnused()) continue; if (!OrigLI.containsOneValue() && OrigLI.getVNInfoAt(VNI2->def) != OrigVNI) continue; if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def) PHIs.push_back(VNI2); else NonPHIs.push_back(VNI2); } DEBUG(dbgs() << "split phi value, checking " << PHIs.size() << " phi-defs, and " << NonPHIs.size() << " non-phi/orig defs\n"); // Create entries for all the PHIs. Don't add them to the worklist, we // are processing all of them in one go here. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i]))); // Add every PHI as a dependent of all the non-PHIs. for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) { VNInfo *NonPHI = NonPHIs[i]; // Known value? Try an insertion. tie(SVI, Inserted) = SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); // Add all the PHIs as dependents of NonPHI. for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi) SVI->second.Deps.push_back(PHIs[pi]); // This is the first time we see NonPHI, add it to the worklist. if (Inserted) WorkList.push_back(std::make_pair(Reg, NonPHI)); else // Propagate to all inserted PHIs, not just VNI. propagateSiblingValue(SVI); } // Next work list item. continue; } MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); assert(MI && "Missing def"); // Trace through sibling copies. if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); LiveRangeQuery SrcQ(SrcLI, VNI->def); assert(SrcQ.valueIn() && "Copy from non-existing value"); // Check if this COPY kills its source. SVI->second.KillsSource = SrcQ.isKill(); VNInfo *SrcVNI = SrcQ.valueIn(); DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':' << SrcVNI->id << '@' << SrcVNI->def << " kill=" << unsigned(SVI->second.KillsSource) << '\n'); // Known sibling source value? Try an insertion. tie(SVI, Inserted) = SibValues.insert(std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI))); // This is the first time we see Src, add it to the worklist. if (Inserted) WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); propagateSiblingValue(SVI, VNI); // Next work list item. continue; } } // Track reachable reloads. SVI->second.DefMI = MI; SVI->second.SpillMBB = MI->getParent(); int FI; if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) { DEBUG(dbgs() << "reload\n"); propagateSiblingValue(SVI); // Next work list item. continue; } // Potential remat candidate. DEBUG(dbgs() << "def " << *MI); SVI->second.AllDefsAreReloads = false; propagateSiblingValue(SVI); } while (!WorkList.empty()); // Look up the value we were looking for. We already did this lookup at the // top of the function, but SibValues may have been invalidated. SVI = SibValues.find(UseVNI); assert(SVI != SibValues.end() && "Didn't compute requested info"); DEBUG(dbgs() << " traced to:\t" << SVI->second); return SVI->second.DefMI; }
RegisterBankInfo::InstructionMappings AArch64RegisterBankInfo::getInstrAlternativeMappings( const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); switch (MI.getOpcode()) { case TargetOpcode::G_OR: { // 32 and 64-bit or can be mapped on either FPR or // GPR for the same cost. unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); if (Size != 32 && Size != 64) break; // If the instruction has any implicit-defs or uses, // do not mess with it. if (MI.getNumOperands() != 3) break; InstructionMappings AltMappings; const InstructionMapping &GPRMapping = getInstructionMapping( /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size), /*NumOperands*/ 3); const InstructionMapping &FPRMapping = getInstructionMapping( /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size), /*NumOperands*/ 3); AltMappings.push_back(&GPRMapping); AltMappings.push_back(&FPRMapping); return AltMappings; } case TargetOpcode::G_BITCAST: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); if (Size != 32 && Size != 64) break; // If the instruction has any implicit-defs or uses, // do not mess with it. if (MI.getNumOperands() != 2) break; InstructionMappings AltMappings; const InstructionMapping &GPRMapping = getInstructionMapping( /*ID*/ 1, /*Cost*/ 1, getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size), /*NumOperands*/ 2); const InstructionMapping &FPRMapping = getInstructionMapping( /*ID*/ 2, /*Cost*/ 1, getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size), /*NumOperands*/ 2); const InstructionMapping &GPRToFPRMapping = getInstructionMapping( /*ID*/ 3, /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size), getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size), /*NumOperands*/ 2); const InstructionMapping &FPRToGPRMapping = getInstructionMapping( /*ID*/ 3, /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size), getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size), /*NumOperands*/ 2); AltMappings.push_back(&GPRMapping); AltMappings.push_back(&FPRMapping); AltMappings.push_back(&GPRToFPRMapping); AltMappings.push_back(&FPRToGPRMapping); return AltMappings; } case TargetOpcode::G_LOAD: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); if (Size != 64) break; // If the instruction has any implicit-defs or uses, // do not mess with it. if (MI.getNumOperands() != 2) break; InstructionMappings AltMappings; const InstructionMapping &GPRMapping = getInstructionMapping( /*ID*/ 1, /*Cost*/ 1, getOperandsMapping({getValueMapping(PMI_FirstGPR, Size), // Addresses are GPR 64-bit. getValueMapping(PMI_FirstGPR, 64)}), /*NumOperands*/ 2); const InstructionMapping &FPRMapping = getInstructionMapping( /*ID*/ 2, /*Cost*/ 1, getOperandsMapping({getValueMapping(PMI_FirstFPR, Size), // Addresses are GPR 64-bit. getValueMapping(PMI_FirstGPR, 64)}), /*NumOperands*/ 2); AltMappings.push_back(&GPRMapping); AltMappings.push_back(&FPRMapping); return AltMappings; } default: break; } return RegisterBankInfo::getInstrAlternativeMappings(MI); }
/// reMaterializeAll - Try to rematerialize as many uses as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { // analyzeSiblingValues has already tested all relevant defining instructions. if (!Edit->anyRematerializable(AA)) return; UsedValues.clear(); // Try to remat before all uses of snippets. bool anyRemat = false; for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { unsigned Reg = RegsToSpill[i]; LiveInterval &LI = LIS.getInterval(Reg); for (MachineRegisterInfo::use_nodbg_iterator RI = MRI.use_nodbg_begin(Reg); MachineInstr *MI = RI.skipBundle();) anyRemat |= reMaterializeFor(LI, MI); } if (!anyRemat) return; // Remove any values that were completely rematted. for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { unsigned Reg = RegsToSpill[i]; LiveInterval &LI = LIS.getInterval(Reg); for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI)) continue; MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); MI->addRegisterDead(Reg, &TRI); if (!MI->allDefsAreDead()) continue; DEBUG(dbgs() << "All defs dead: " << *MI); DeadDefs.push_back(MI); } } // Eliminate dead code after remat. Note that some snippet copies may be // deleted here. if (DeadDefs.empty()) return; DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); // Get rid of deleted and empty intervals. unsigned ResultPos = 0; for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { unsigned Reg = RegsToSpill[i]; if (!LIS.hasInterval(Reg)) continue; LiveInterval &LI = LIS.getInterval(Reg); if (LI.empty()) { Edit->eraseVirtReg(Reg); continue; } RegsToSpill[ResultPos++] = Reg; } RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end()); DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); }
const RegisterBankInfo::InstructionMapping & AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const unsigned Opc = MI.getOpcode(); const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) { const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; } switch (Opc) { // G_{F|S|U}REM are not listed because they are not legal. // Arithmetic ops. case TargetOpcode::G_ADD: case TargetOpcode::G_SUB: case TargetOpcode::G_GEP: case TargetOpcode::G_MUL: case TargetOpcode::G_SDIV: case TargetOpcode::G_UDIV: // Bitwise ops. case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: // Shifts. case TargetOpcode::G_SHL: case TargetOpcode::G_LSHR: case TargetOpcode::G_ASHR: // Floating point ops. case TargetOpcode::G_FADD: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: case TargetOpcode::G_FDIV: return getSameKindOfOperandsMapping(MI); case TargetOpcode::G_BITCAST: { LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); unsigned Size = DstTy.getSizeInBits(); bool DstIsGPR = !DstTy.isVector(); bool SrcIsGPR = !SrcTy.isVector(); const RegisterBank &DstRB = DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; const RegisterBank &SrcRB = SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; return getInstructionMapping( DefaultMappingID, copyCost(DstRB, SrcRB, Size), getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), /*NumOperands*/ 2); } default: break; } unsigned NumOperands = MI.getNumOperands(); // Track the size and bank of each register. We don't do partial mappings. SmallVector<unsigned, 4> OpSize(NumOperands); SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands); for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { auto &MO = MI.getOperand(Idx); if (!MO.isReg() || !MO.getReg()) continue; LLT Ty = MRI.getType(MO.getReg()); OpSize[Idx] = Ty.getSizeInBits(); // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs. // For floating-point instructions, scalars go in FPRs. if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) || Ty.getSizeInBits() > 64) OpRegBankIdx[Idx] = PMI_FirstFPR; else OpRegBankIdx[Idx] = PMI_FirstGPR; } unsigned Cost = 1; // Some of the floating-point instructions have mixed GPR and FPR operands: // fine-tune the computed mapping. switch (Opc) { case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; break; case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; break; case TargetOpcode::G_FCMP: OpRegBankIdx = {PMI_FirstGPR, /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR}; break; case TargetOpcode::G_BITCAST: // This is going to be a cross register bank copy and this is expensive. if (OpRegBankIdx[0] != OpRegBankIdx[1]) Cost = copyCost( *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank, *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank, OpSize[0]); break; case TargetOpcode::G_LOAD: // Loading in vector unit is slightly more expensive. // This is actually only true for the LD1R and co instructions, // but anyway for the fast mode this number does not matter and // for the greedy mode the cost of the cross bank copy will // offset this number. // FIXME: Should be derived from the scheduling model. if (OpRegBankIdx[0] != PMI_FirstGPR) Cost = 2; else // Check if that load feeds fp instructions. // In that case, we want the default mapping to be on FPR // instead of blind map every scalar to GPR. for (const MachineInstr &UseMI : MRI.use_instructions(MI.getOperand(0).getReg())) // If we have at least one direct use in a FP instruction, // assume this was a floating point load in the IR. // If it was not, we would have had a bitcast before // reaching that instruction. if (isPreISelGenericFloatingPointOpcode(UseMI.getOpcode())) { OpRegBankIdx[0] = PMI_FirstFPR; break; } break; case TargetOpcode::G_STORE: // Check if that store is fed by fp instructions. if (OpRegBankIdx[0] == PMI_FirstGPR) { unsigned VReg = MI.getOperand(0).getReg(); if (!VReg) break; MachineInstr *DefMI = MRI.getVRegDef(VReg); if (isPreISelGenericFloatingPointOpcode(DefMI->getOpcode())) OpRegBankIdx[0] = PMI_FirstFPR; break; } } // Finally construct the computed mapping. SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]); if (!Mapping->isValid()) return getInvalidInstructionMapping(); OpdsMapping[Idx] = Mapping; } } return getInstructionMapping(DefaultMappingID, Cost, getOperandsMapping(OpdsMapping), NumOperands); }
// Test whether Def is safe and profitable to rematerialize. static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA, const WebAssemblyInstrInfo *TII) { return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA); }
MachineBasicBlock * MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. if (Succ->isEHPad()) return nullptr; MachineFunction *MF = getParent(); DebugLoc DL; // FIXME: this is nowhere // Performance might be harmed on HW that implements branching using exec mask // where both sides of the branches are always executed. if (MF->getTarget().requiresStructuredCFG()) return nullptr; // We may need to update this's terminator, but we can't do that if // AnalyzeBranch fails. If this uses a jump table, we won't touch it. const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) return nullptr; // Avoid bugpoint weirdness: A block may end with a conditional branch but // jumps to the same MBB is either case. We have duplicate CFG edges in that // case that we can't handle. Since this never happens in properly optimized // code, just skip those edges. if (TBB && TBB == FBB) { DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" << getNumber() << '\n'); return nullptr; } MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(std::next(MachineFunction::iterator(this)), NMBB); DEBUG(dbgs() << "Splitting critical edge:" " BB#" << getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>(); SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>(); if (LIS) LIS->insertMBBInMaps(NMBB); else if (Indexes) Indexes->insertMBBInMaps(NMBB); // On some targets like Mips, branches may kill virtual registers. Make sure // that LiveVariables is properly updated after updateTerminator replaces the // terminators. LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>(); // Collect a list of virtual registers killed by the terminators. SmallVector<unsigned, 4> KilledRegs; if (LV) for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { MachineInstr *MI = &*I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { if (!OI->isReg() || OI->getReg() == 0 || !OI->isUse() || !OI->isKill() || OI->isUndef()) continue; unsigned Reg = OI->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg) || LV->getVarInfo(Reg).removeKill(MI)) { KilledRegs.push_back(Reg); DEBUG(dbgs() << "Removing terminator kill: " << *MI); OI->setIsKill(false); } } } SmallVector<unsigned, 4> UsedRegs; if (LIS) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { MachineInstr *MI = &*I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { if (!OI->isReg() || OI->getReg() == 0) continue; unsigned Reg = OI->getReg(); if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end()) UsedRegs.push_back(Reg); } } } ReplaceUsesOfBlockWith(Succ, NMBB); // If updateTerminator() removes instructions, we need to remove them from // SlotIndexes. SmallVector<MachineInstr*, 4> Terminators; if (Indexes) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) Terminators.push_back(&*I); } updateTerminator(); if (Indexes) { SmallVector<MachineInstr*, 4> NewTerminators; for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) NewTerminators.push_back(&*I); for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(), E = Terminators.end(); I != E; ++I) { if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) == NewTerminators.end()) Indexes->removeMachineInstrFromMaps(*I); } } // Insert unconditional "jump Succ" instruction in NMBB if necessary. NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL); if (Indexes) { for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); I != E; ++I) { // Some instructions may have been moved to NMBB by updateTerminator(), // so we first remove any instruction that already has an index. if (Indexes->hasIndex(&*I)) Indexes->removeMachineInstrFromMaps(&*I); Indexes->insertMachineInstrInMaps(&*I); } } } // Fix PHI nodes in Succ so they refer to NMBB instead of this for (MachineBasicBlock::instr_iterator i = Succ->instr_begin(),e = Succ->instr_end(); i != e && i->isPHI(); ++i) for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) if (i->getOperand(ni+1).getMBB() == this) i->getOperand(ni+1).setMBB(NMBB); // Inherit live-ins from the successor for (const auto &LI : Succ->liveins()) NMBB->addLiveIn(LI); // Update LiveVariables. const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); if (LV) { // Restore kills of virtual registers that were killed by the terminators. while (!KilledRegs.empty()) { unsigned Reg = KilledRegs.pop_back_val(); for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) LV->getVarInfo(Reg).Kills.push_back(&*I); DEBUG(dbgs() << "Restored terminator kill: " << *I); break; } } // Update relevant live-through information. LV->addNewBlock(NMBB, this, Succ); } if (LIS) { // After splitting the edge and updating SlotIndexes, live intervals may be // in one of two situations, depending on whether this block was the last in // the function. If the original block was the last in the function, all // live intervals will end prior to the beginning of the new split block. If // the original block was not at the end of the function, all live intervals // will extend to the end of the new split block. bool isLastMBB = std::next(MachineFunction::iterator(NMBB)) == getParent()->end(); SlotIndex StartIndex = Indexes->getMBBEndIdx(this); SlotIndex PrevIndex = StartIndex.getPrevSlot(); SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB); // Find the registers used from NMBB in PHIs in Succ. SmallSet<unsigned, 8> PHISrcRegs; for (MachineBasicBlock::instr_iterator I = Succ->instr_begin(), E = Succ->instr_end(); I != E && I->isPHI(); ++I) { for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { if (I->getOperand(ni+1).getMBB() == NMBB) { MachineOperand &MO = I->getOperand(ni); unsigned Reg = MO.getReg(); PHISrcRegs.insert(Reg); if (MO.isUndef()) continue; LiveInterval &LI = LIS->getInterval(Reg); VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "PHI sources should be live out of their predecessors."); LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } } } MachineRegisterInfo *MRI = &getParent()->getRegInfo(); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) continue; LiveInterval &LI = LIS->getInterval(Reg); if (!LI.liveAt(PrevIndex)) continue; bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ)); if (isLiveOut && isLastMBB) { VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "LiveInterval should have VNInfo where it is live."); LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } else if (!isLiveOut && !isLastMBB) { LI.removeSegment(StartIndex, EndIndex); } } // Update all intervals for registers whose uses may have been modified by // updateTerminator(). LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs); } if (MachineDominatorTree *MDT = P->getAnalysisIfAvailable<MachineDominatorTree>()) MDT->recordSplitCriticalEdge(this, Succ, NMBB); if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) if (MachineLoop *TIL = MLI->getLoopFor(this)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { if (TIL == DestLoop) { // Both in the same loop, the NMBB joins loop. DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == Succ && "Should not create irreducible loops!"); if (MachineLoop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NMBB, MLI->getBase()); } } } return NMBB; }
void MCS51FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); MCS51MachineFunctionInfo *MCS51FI = MF.getInfo<MCS51MachineFunctionInfo>(); const MCS51InstrInfo &TII = *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); switch (RetOpcode) { case MCS51::RET: case MCS51::RETI: break; // These are ok default: llvm_unreachable("Can only insert epilog into returning blocks"); } // Get the number of bytes to allocate from the FrameInfo uint64_t StackSize = MFI->getStackSize(); unsigned CSSize = MCS51FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - CSSize; // pop FPW. BuildMI(MBB, MBBI, DL, TII.get(MCS51::POP16r), MCS51::FPW); } else NumBytes = StackSize - CSSize; // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); if (Opc != MCS51::POP16r && !PI->isTerminator()) break; --MBBI; } DL = MBBI->getDebugLoc(); // If there is an ADD16ri or SUB16ri of SPW immediately before this // instruction, merge the two instructions. //if (NumBytes || MFI->hasVarSizedObjects()) // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); if (MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, DL, TII.get(MCS51::MOV16rr), MCS51::SPW).addReg(MCS51::FPW); if (CSSize) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MCS51::SUB16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(CSSize); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } else { // adjust stack pointer back: SPW += numbytes if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MCS51::ADD16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Modified = false; SmallSet<unsigned, 4> Defs; SmallSet<unsigned, 4> Uses; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; } Defs.clear(); Uses.clear(); TrackDefUses(MI, Defs, Uses, TRI); // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); // Add implicit use of ITSTATE to IT block instructions. MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); MachineInstr *LastITMI = MI; MachineBasicBlock::iterator InsertPos = MIB; ++MBBI; // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; MachineInstr *NMI = &*MBBI; MI = NMI; unsigned NPredReg = 0; ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); LastITMI = NMI; } else { if (NCC == ARMCC::AL && MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { --MBBI; MBB.remove(NMI); MBB.insert(InsertPos, NMI); ++NumMovedInsts; continue; } break; } TrackDefUses(NMI, Defs, Uses, TRI); --Pos; } // Finalize IT mask. Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); // Last instruction in IT block kills ITSTATE. LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); Modified = true; ++NumITs; } return Modified; }
void MCS51FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineFrameInfo *MFI = MF.getFrameInfo(); MCS51MachineFunctionInfo *MCS51FI = MF.getInfo<MCS51MachineFunctionInfo>(); const MCS51InstrInfo &TII = *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - MCS51FI->getCalleeSavedFrameSize(); // Get the offset of the stack slot for the EBP register... which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. MFI->setOffsetAdjustment(-NumBytes); // Save FPW into the appropriate stack slot... BuildMI(MBB, MBBI, DL, TII.get(MCS51::PUSH16r)) .addReg(MCS51::FPW, RegState::Kill); // Update FPW with the new base value... BuildMI(MBB, MBBI, DL, TII.get(MCS51::MOV16rr), MCS51::FPW) .addReg(MCS51::SPW); // Mark the FramePtr as live-in in every block except the entry. for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(MCS51::FPW); } else NumBytes = StackSize - MCS51FI->getCalleeSavedFrameSize(); // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == MCS51::PUSH16r)) ++MBBI; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); if (NumBytes) { // adjust stack pointer: SPW -= numbytes // If there is an SUB16ri of SPW immediately before this instruction, merge // the two. //NumBytes -= mergeSPUpdates(MBB, MBBI, true); // If there is an ADD16ri or SUB16ri of SPW immediately after this // instruction, merge the two instructions. // mergeSPUpdatesDown(MBB, MBBI, &NumBytes); if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MCS51::SUB16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); TII = MF.getTarget().getInstrInfo(); DT = &getAnalysis<MachineDominatorTree>(); LI = &getAnalysis<LiveIntervals>(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { unsigned DestReg = BBI->getOperand(0).getReg(); addReg(DestReg); PHISrcDefs[I].push_back(BBI); for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { MachineOperand &SrcMO = BBI->getOperand(i); unsigned SrcReg = SrcMO.getReg(); addReg(SrcReg); unionRegs(DestReg, SrcReg); MachineInstr *DefMI = MRI->getVRegDef(SrcReg); if (DefMI) PHISrcDefs[DefMI->getParent()].push_back(DefMI); } } } // Perform a depth-first traversal of the dominator tree, splitting // interferences amongst PHI-congruence classes. DenseMap<unsigned, unsigned> CurrentDominatingParent; DenseMap<unsigned, unsigned> ImmediateDominatingParent; for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) { SplitInterferencesForBasicBlock(*DI->getBlock(), CurrentDominatingParent, ImmediateDominatingParent); } // Insert copies for all PHI source and destination registers. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { InsertCopiesForPHI(BBI, I); } } // FIXME: Preserve the equivalence classes during copy insertion and use // the preversed equivalence classes instead of recomputing them. RegNodeMap.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { unsigned DestReg = BBI->getOperand(0).getReg(); addReg(DestReg); for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { unsigned SrcReg = BBI->getOperand(i).getReg(); addReg(SrcReg); unionRegs(DestReg, SrcReg); } } } DenseMap<unsigned, unsigned> RegRenamingMap; bool Changed = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); while (BBI != BBE && BBI->isPHI()) { MachineInstr *PHI = BBI; assert(PHI->getNumOperands() > 0); unsigned SrcReg = PHI->getOperand(1).getReg(); unsigned SrcColor = getRegColor(SrcReg); unsigned NewReg = RegRenamingMap[SrcColor]; if (!NewReg) { NewReg = SrcReg; RegRenamingMap[SrcColor] = SrcReg; } MergeLIsAndRename(SrcReg, NewReg); unsigned DestReg = PHI->getOperand(0).getReg(); if (!InsertedDestCopies.count(DestReg)) MergeLIsAndRename(DestReg, NewReg); for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { unsigned SrcReg = PHI->getOperand(i).getReg(); MergeLIsAndRename(SrcReg, NewReg); } ++BBI; LI->RemoveMachineInstrFromMaps(PHI); PHI->eraseFromParent(); Changed = true; } } // Due to the insertion of copies to split live ranges, the live intervals are // guaranteed to not overlap, except in one case: an original PHI source and a // PHI destination copy. In this case, they have the same value and thus don't // truly intersect, so we merge them into the value live at that point. // FIXME: Is there some better way we can handle this? for (DestCopyMap::iterator I = InsertedDestCopies.begin(), E = InsertedDestCopies.end(); I != E; ++I) { unsigned DestReg = I->first; unsigned DestColor = getRegColor(DestReg); unsigned NewReg = RegRenamingMap[DestColor]; LiveInterval &DestLI = LI->getInterval(DestReg); LiveInterval &NewLI = LI->getInterval(NewReg); assert(DestLI.ranges.size() == 1 && "PHI destination copy's live interval should be a single live " "range from the beginning of the BB to the copy instruction."); LiveRange *DestLR = DestLI.begin(); VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); if (!NewVNI) { NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); MachineInstr *CopyInstr = I->second; CopyInstr->getOperand(1).setIsKill(true); } LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); NewLI.addRange(NewLR); LI->removeInterval(DestReg); MRI->replaceRegWith(DestReg, NewReg); } // Adjust the live intervals of all PHI source registers to handle the case // where the PHIs in successor blocks were the only later uses of the source // register. for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), E = InsertedSrcCopySet.end(); I != E; ++I) { MachineBasicBlock *MBB = I->first; unsigned SrcReg = I->second; if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) SrcReg = RenamedRegister; LiveInterval &SrcLI = LI->getInterval(SrcReg); bool isLiveOut = false; for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { isLiveOut = true; break; } } if (isLiveOut) continue; MachineOperand *LastUse = findLastUse(MBB, SrcReg); assert(LastUse); SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB)); LastUse->setIsKill(true); } LI->renumber(); Allocator.Reset(); RegNodeMap.clear(); PHISrcDefs.clear(); InsertedSrcCopySet.clear(); InsertedSrcCopyMap.clear(); InsertedDestCopies.clear(); return Changed; }
bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (!LastInst->getDesc().isBranch()) return true; // Unconditional branch if (LastOpc == Mips::J) { TBB = LastInst->getOperand(0).getMBB(); return false; } Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode()); if (BranchCode == Mips::COND_INVALID) return true; // Can't handle indirect branch. // Conditional branch // Block ends with fall-through condbranch. if (LastOpc != Mips::COND_INVALID) { int LastNumOp = LastInst->getNumOperands(); TBB = LastInst->getOperand(LastNumOp-1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); for (int i=0; i<LastNumOp-1; i++) { Cond.push_back(LastInst->getOperand(i)); } return false; } } // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it. unsigned SecondLastOpc = SecondLastInst->getOpcode(); Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc); if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) { int SecondNumOp = SecondLastInst->getNumOperands(); TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); for (int i=0; i<SecondNumOp-1; i++) { Cond.push_back(SecondLastInst->getOperand(i)); } FBB = LastInst->getOperand(0).getMBB(); return false; } // If the block ends with two unconditional branches, handle it. The last // one is not executed, so remove it. if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any /// interferences found between registers in the same congruence class. It /// takes two DenseMaps as arguments that it also updates: /// /// 1) CurrentDominatingParent, which maps a color to the register in that /// congruence class whose definition was most recently seen. /// /// 2) ImmediateDominatingParent, which maps a register to the register in the /// same congruence class that most immediately dominates it. /// /// This function assumes that it is being called in a depth-first traversal /// of the dominator tree. /// /// The algorithm used here is a generalization of the dominance-based SSA test /// for two variables. If there are variables a_1, ..., a_n such that /// /// def(a_1) dom ... dom def(a_n), /// /// then we can test for an interference between any two a_i by only using O(n) /// interference tests between pairs of variables. If i < j and a_i and a_j /// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). /// Thus, in order to test for an interference involving a_i, we need only check /// for a potential interference with a_i+1. /// /// This method can be generalized to arbitrary sets of variables by performing /// a depth-first traversal of the dominator tree. As we traverse down a branch /// of the dominator tree, we keep track of the current dominating variable and /// only perform an interference test with that variable. However, when we go to /// another branch of the dominator tree, the definition of the current dominating /// variable may no longer dominate the current block. In order to correct this, /// we need to use a stack of past choices of the current dominating variable /// and pop from this stack until we find a variable whose definition actually /// dominates the current block. /// /// There will be one push on this stack for each variable that has become the /// current dominating variable, so instead of using an explicit stack we can /// simply associate the previous choice for a current dominating variable with /// the new choice. This works better in our implementation, where we test for /// interference in multiple distinct sets at once. void StrongPHIElimination::SplitInterferencesForBasicBlock( MachineBasicBlock &MBB, DenseMap<unsigned, unsigned> &CurrentDominatingParent, DenseMap<unsigned, unsigned> &ImmediateDominatingParent) { // Sort defs by their order in the original basic block, as the code below // assumes that it is processing definitions in dominance order. std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB]; std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI)); for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(), BBE = DefInstrs.end(); BBI != BBE; ++BBI) { for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(), E = (*BBI)->operands_end(); I != E; ++I) { const MachineOperand &MO = *I; // FIXME: This would be faster if it were possible to bail out of checking // an instruction's operands after the explicit defs, but this is incorrect // for variadic instructions, which may appear before register allocation // in the future. if (!MO.isReg() || !MO.isDef()) continue; unsigned DestReg = MO.getReg(); if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg)) continue; // If the virtual register being defined is not used in any PHI or has // already been isolated, then there are no more interferences to check. unsigned DestColor = getRegColor(DestReg); if (!DestColor) continue; // The input to this pass sometimes is not in SSA form in every basic // block, as some virtual registers have redefinitions. We could eliminate // this by fixing the passes that generate the non-SSA code, or we could // handle it here by tracking defining machine instructions rather than // virtual registers. For now, we just handle the situation conservatively // in a way that will possibly lead to false interferences. unsigned &CurrentParent = CurrentDominatingParent[DestColor]; unsigned NewParent = CurrentParent; if (NewParent == DestReg) continue; // Pop registers from the stack represented by ImmediateDominatingParent // until we find a parent that dominates the current instruction. while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI) || !getRegColor(NewParent))) NewParent = ImmediateDominatingParent[NewParent]; // If NewParent is nonzero, then its definition dominates the current // instruction, so it is only necessary to check for the liveness of // NewParent in order to check for an interference. if (NewParent && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) { // If there is an interference, always isolate the new register. This // could be improved by using a heuristic that decides which of the two // registers to isolate. isolateReg(DestReg); CurrentParent = NewParent; } else { // If there is no interference, update ImmediateDominatingParent and set // the CurrentDominatingParent for this color to the current register. ImmediateDominatingParent[DestReg] = NewParent; CurrentParent = DestReg; } } } // We now walk the PHIs in successor blocks and check for interferences. This // is necessary because the use of a PHI's operands are logically contained in // the predecessor block. The def of a PHI's destination register is processed // along with the other defs in a basic block. CurrentPHIForColor.clear(); for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) { for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); BBI != BBE && BBI->isPHI(); ++BBI) { MachineInstr *PHI = BBI; // If a PHI is already isolated, either by being isolated directly or // having all of its operands isolated, ignore it. unsigned Color = getPHIColor(PHI); if (!Color) continue; // Find the index of the PHI operand that corresponds to this basic block. unsigned PredIndex; for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) { if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB) break; } assert(PredIndex < PHI->getNumOperands()); unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg(); // Pop registers from the stack represented by ImmediateDominatingParent // until we find a parent that dominates the current instruction. unsigned &CurrentParent = CurrentDominatingParent[Color]; unsigned NewParent = CurrentParent; while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB) || !getRegColor(NewParent))) NewParent = ImmediateDominatingParent[NewParent]; CurrentParent = NewParent; // If there is an interference with a register, always isolate the // register rather than the PHI. It is also possible to isolate the // PHI, but that introduces copies for all of the registers involved // in that PHI. if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB) && NewParent != PredOperandReg) isolateReg(NewParent); std::pair<MachineInstr*, unsigned> &CurrentPHI = CurrentPHIForColor[Color]; // If two PHIs have the same operand from every shared predecessor, then // they don't actually interfere. Otherwise, isolate the current PHI. This // could possibly be improved, e.g. we could isolate the PHI with the // fewest operands. if (CurrentPHI.first && CurrentPHI.second != PredOperandReg) isolatePHI(PHI); else CurrentPHI = std::make_pair(PHI, PredOperandReg); } } }
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *BundleMI = nullptr; CPSRDef = nullptr; HighLatencyCPSR = false; // Check predecessors for the latest CPSRDef. for (auto *Pred : MBB.predecessors()) { const MBBInfo &PInfo = BlockInfo[Pred->getNumber()]; if (!PInfo.Visited) { // Since blocks are visited in RPO, this must be a back-edge. continue; } if (PInfo.HighLatencyCPSR) { HighLatencyCPSR = true; break; } } // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = std::next(MII); MachineInstr *MI = &*MII; if (MI->isBundle()) { BundleMI = MI; continue; } if (MI->isDebugValue()) continue; LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); // Does NextMII belong to the same bundle as MI? bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = std::prev(NextMII); MI = &*I; // Removing and reinserting the first instruction in a bundle will break // up the bundle. Fix the bundling if it was broken. if (NextInSameBundle && !NextMII->isBundledWithPred()) NextMII->bundleWithPred(); } if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around // the inconsistency. if (BundleMI->killsRegister(ARM::CPSR)) LiveCPSR = false; MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); if (MO && !MO->isDead()) LiveCPSR = true; MO = BundleMI->findRegisterUseOperand(ARM::CPSR); if (MO && !MO->isKill()) LiveCPSR = true; } bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = nullptr; HighLatencyCPSR = false; IsSelfLoop = false; } else if (DefCPSR) { // This is the last CPSR defining instruction. CPSRDef = MI; HighLatencyCPSR = isHighLatencyCPSR(CPSRDef); IsSelfLoop = false; } } MBBInfo &Info = BlockInfo[MBB.getNumber()]; Info.HighLatencyCPSR = HighLatencyCPSR; Info.Visited = true; return Modified; }
/// foldMemoryOperand - Try folding stack slot references in Ops into their /// instructions. /// /// @param Ops Operand indices from analyzeVirtReg(). /// @param LoadMI Load instruction to use instead of stack slot when non-null. /// @return True on success. bool InlineSpiller:: foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, MachineInstr *LoadMI) { if (Ops.empty()) return false; // Don't attempt folding in bundles. MachineInstr *MI = Ops.front().first; if (Ops.back().first != MI || MI->isBundled()) return false; bool WasCopy = MI->isCopy(); unsigned ImpReg = 0; // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector<unsigned, 8> FoldOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { unsigned Idx = Ops[i].second; MachineOperand &MO = MI->getOperand(Idx); if (MO.isImplicit()) { ImpReg = MO.getReg(); continue; } // FIXME: Teach targets to deal with subregs. if (MO.getSubReg()) return false; // We cannot fold a load instruction into a def. if (LoadMI && MO.isDef()) return false; // Tied use operands should not be passed to foldMemoryOperand. if (!MI->isRegTiedToDefOperand(Idx)) FoldOps.push_back(Idx); } MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) : TII.foldMemoryOperand(MI, FoldOps, StackSlot); if (!FoldMI) return false; // Remove LIS for any dead defs in the original MI not in FoldMI. for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { if (!MO->isReg()) continue; unsigned Reg = MO->getReg(); if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || MRI.isReserved(Reg)) { continue; } MIBundleOperands::PhysRegInfo RI = MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); if (MO->readsReg()) { assert(RI.Reads && "Cannot fold physreg reader"); continue; } if (RI.Defines) continue; // FoldMI does not define this physreg. Remove the LI segment. assert(MO->isDead() && "Cannot fold physreg def"); for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) { SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); if (VNInfo *VNI = LI->getVNInfoAt(Idx)) LI->removeValNo(VNI); } } } LIS.ReplaceMachineInstrInMaps(MI, FoldMI); MI->eraseFromParent(); // TII.foldMemoryOperand may have left some implicit operands on the // instruction. Strip them. if (ImpReg) for (unsigned i = FoldMI->getNumOperands(); i; --i) { MachineOperand &MO = FoldMI->getOperand(i - 1); if (!MO.isReg() || !MO.isImplicit()) break; if (MO.getReg() == ImpReg) FoldMI->RemoveOperand(i - 1); } DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t' << *FoldMI); if (!WasCopy) ++NumFolded; else if (Ops.front().second == 0) ++NumSpills; else ++NumReloads; return true; }
unsigned CriticalAntiDepBreaker:: BreakAntiDependencies(std::vector<SUnit>& SUnits, MachineBasicBlock::iterator& Begin, MachineBasicBlock::iterator& End, unsigned InsertPosIndex) { // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; // Find the node at the bottom of the critical path. SUnit *Max = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { SUnit *SU = &SUnits[i]; if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } #ifndef NDEBUG { DEBUG(errs() << "Critical path has total latency " << (Max->getDepth() + Max->Latency) << "\n"); DEBUG(errs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (KillIndices[Reg] == ~0u) DEBUG(errs() << " " << TRI->getName(Reg)); } DEBUG(errs() << '\n'); } #endif // Track progress along the critical path through the SUnit graph as we walk // the instructions. SUnit *CriticalPathSU = Max; MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); // Consider this pattern: // A = ... // ... = A // A = ... // ... = A // A = ... // ... = A // A = ... // ... = A // There are three anti-dependencies here, and without special care, // we'd break all of them using the same register: // A = ... // ... = A // B = ... // ... = B // B = ... // ... = B // B = ... // ... = B // because at each anti-dependence, B is the first register that // isn't A which is free. This re-introduces anti-dependencies // at all but one of the original anti-dependencies that we were // trying to break. To avoid this, keep track of the most recent // register that each register was replaced with, avoid // using it to repair an anti-dependence on the same register. // This lets us produce this: // A = ... // ... = A // B = ... // ... = B // C = ... // ... = C // B = ... // ... = B // This still has an anti-dependence on B, but at least it isn't on the // original critical path. // // TODO: If we tracked more than one register here, we could potentially // fix that remaining critical edge too. This is a little more involved, // because unlike the most recent register, less recent registers should // still be considered, though only if no other registers are available. unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; // Attempt to break anti-dependence edges on the critical path. Walk the // instructions from the bottom up, tracking information about liveness // as we go to help determine which registers are available. unsigned Broken = 0; unsigned Count = InsertPosIndex - 1; for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { MachineInstr *MI = --I; // Check if this instruction has a dependence on the critical path that // is an anti-dependence that we may be able to break. If it is, set // AntiDepReg to the non-zero register associated with the anti-dependence. // // We limit our attention to the critical path as a heuristic to avoid // breaking anti-dependence edges that aren't going to significantly // impact the overall schedule. There are a limited number of registers // and we want to save them for the important edges. // // TODO: Instructions with multiple defs could have multiple // anti-dependencies. The current code here only knows how to break one // edge per instruction. Note that we'd have to be able to break all of // the anti-dependencies in an instruction in order to be effective. unsigned AntiDepReg = 0; if (MI == CriticalPathMI) { if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { SUnit *NextSU = Edge->getSUnit(); // Only consider anti-dependence edges. if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); if (!AllocatableSet.test(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.count(AntiDepReg)) // Don't break anti-dependencies if an use down below requires // this exact register. AntiDepReg = 0; else { // If the SUnit has other dependencies on the SUnit that it // anti-depends on, don't bother breaking the anti-dependency // since those edges would prevent such units from being // scheduled past each other regardless. // // Also, if there are dependencies on other SUnits with the // same register as the anti-dependency, don't attempt to // break it. for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), PE = CriticalPathSU->Preds.end(); P != PE; ++P) if (P->getSUnit() == NextSU ? (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { AntiDepReg = 0; break; } } } CriticalPathSU = NextSU; CriticalPathMI = CriticalPathSU->getInstr(); } else { // We've reached the end of the critical path. CriticalPathSU = 0; CriticalPathMI = 0; } } PrescanInstruction(MI); if (MI->getDesc().hasExtraDefRegAllocReq()) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; else if (AntiDepReg) { // If this instruction has a use of AntiDepReg, breaking it // is invalid. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (MO.isUse() && AntiDepReg == Reg) { AntiDepReg = 0; break; } } } // Determine AntiDepReg's register class, if it is live and is // consistently used within a single class. const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; assert((AntiDepReg == 0 || RC != NULL) && "Register should be live if it's causing an anti-dependence!"); if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) AntiDepReg = 0; // Look for a suitable register to use to break the anti-depenence. // // TODO: Instead of picking the first free register, consider which might // be the best. if (AntiDepReg != 0) { if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg, LastNewReg[AntiDepReg], RC)) { DEBUG(errs() << "Breaking anti-dependence edge on " << TRI->getName(AntiDepReg) << " with " << RegRefs.count(AntiDepReg) << " references" << " using " << TRI->getName(NewReg) << "!\n"); // Update the references to the old register to refer to the new // register. std::pair<std::multimap<unsigned, MachineOperand *>::iterator, std::multimap<unsigned, MachineOperand *>::iterator> Range = RegRefs.equal_range(AntiDepReg); for (std::multimap<unsigned, MachineOperand *>::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) Q->second->setReg(NewReg); // We just went back in time and modified history; the // liveness information for the anti-depenence reg is now // inconsistent. Set the state as if it were dead. Classes[NewReg] = Classes[AntiDepReg]; DefIndices[NewReg] = DefIndices[AntiDepReg]; KillIndices[NewReg] = KillIndices[AntiDepReg]; assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && "Kill and Def maps aren't consistent for NewReg!"); Classes[AntiDepReg] = 0; DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; KillIndices[AntiDepReg] = ~0u; assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && "Kill and Def maps aren't consistent for AntiDepReg!"); RegRefs.erase(AntiDepReg); LastNewReg[AntiDepReg] = NewReg; ++Broken; } } ScanInstruction(MI, Count); } return Broken; }
unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const { // size is encoded as pos+size-1. return getMachineOpValue(MI, MI.getOperand(OpNo-1)) + getMachineOpValue(MI, MI.getOperand(OpNo)) - 1; }