/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a /// common expression that defines Reg. bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. if (MI->getDesc().isAsCheapAsAMove()) { MachineBasicBlock *CSBB = CSMI->getParent(); MachineBasicBlock *BB = MI->getParent(); if (CSBB != BB && !CSBB->isSuccessor(BB)) return false; } // Heuristics #2: If the expression doesn't not use a vr and the only use // of the redundant computation are copies, do not cse. bool HasVRegUse = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { HasVRegUse = true; break; } } if (!HasVRegUse) { bool HasNonCopyUse = false; for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; ++I) { MachineInstr *Use = &*I; // Ignore copies. if (!Use->isCopyLike()) { HasNonCopyUse = true; break; } } if (!HasNonCopyUse) return false; } // Heuristics #3: If the common subexpression is used by PHIs, do not reuse // it unless the defined value is already used in the BB of the new use. bool HasPHI = false; SmallPtrSet<MachineBasicBlock*, 4> CSBBs; for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), E = MRI->use_nodbg_end(); I != E; ++I) { MachineInstr *Use = &*I; HasPHI |= Use->isPHI(); CSBBs.insert(Use->getParent()); } if (!HasPHI) return true; return CSBBs.count(MI->getParent()); }
/// \returns true if the specified basic block can fallthrough /// into the block immediately after it. static bool hasFallthrough(const MachineBasicBlock &MBB) { // Get the next machine basic block in the function. MachineFunction::const_iterator MBBI(MBB); // Can't fall off end of function. auto NextBB = std::next(MBBI); if (NextBB == MBB.getParent()->end()) return false; return MBB.isSuccessor(&*NextBB); }
/// shouldTailDuplicate - Determine if it is profitable to duplicate this block. bool TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, MachineBasicBlock &TailBB) { // Only duplicate blocks that end with unconditional branches. if (TailBB.canFallThrough()) return false; // Don't try to tail-duplicate single-block loops. if (TailBB.isSuccessor(&TailBB)) return false; // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there // are common paths through the code. The limit needs to be high enough // to allow undoing the effects of tail merging and other optimizations // that rearrange the predecessors of the indirect branch. if (PreRegAlloc && !TailBB.empty()) { const TargetInstrDesc &TID = TailBB.back().getDesc(); if (TID.isIndirectBranch()) MaxDuplicateCount = 20; } // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. if (I->getDesc().isNotDuplicable()) return false; // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. if (PreRegAlloc && I->getDesc().isReturn()) return false; // Avoid duplicating calls before register allocation. Calls presents a // barrier to register allocation so duplicating them may end up increasing // spills. if (PreRegAlloc && I->getDesc().isCall()) return false; if (!I->isPHI() && !I->isDebugValue()) InstrCount += 1; if (InstrCount > MaxDuplicateCount) return false; } return true; }
/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor /// blocks, the successors have gained new predecessors. Update the PHI /// instructions in them accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallSetVector<MachineBasicBlock*,8> &Succs) { for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), SE = Succs.end(); SI != SE; ++SI) { MachineBasicBlock *SuccBB = *SI; for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); II != EE; ++II) { if (!II->isPHI()) break; unsigned Idx = 0; for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { Idx = i; break; } } assert(Idx != 0); MachineOperand &MO0 = II->getOperand(Idx); unsigned Reg = MO0.getReg(); if (isDead) { // Folded into the previous BB. // There could be duplicate phi source entries. FIXME: Should sdisel // or earlier pass fixed this? for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { II->RemoveOperand(i+1); II->RemoveOperand(i); } } } else Idx = 0; // If Idx is set, the operands at Idx and Idx+1 must be removed. // We reuse the location to avoid expensive RemoveOperand calls. DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); if (LI != SSAUpdateVals.end()) { // This register is defined in the tail block. for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; // If we didn't duplicate a bb into a particular predecessor, we // might still have added an entry to SSAUpdateVals to correcly // recompute SSA. If that case, avoid adding a dummy extra argument // this PHI. if (!SrcBB->isSuccessor(SuccBB)) continue; unsigned SrcReg = LI->second[j].second; if (Idx != 0) { II->getOperand(Idx).setReg(SrcReg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { II->addOperand(MachineOperand::CreateReg(SrcReg, false)); II->addOperand(MachineOperand::CreateMBB(SrcBB)); } } } else { // Live in tail block, must also be live in predecessors. for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = TDBBs[j]; if (Idx != 0) { II->getOperand(Idx).setReg(Reg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { II->addOperand(MachineOperand::CreateReg(Reg, false)); II->addOperand(MachineOperand::CreateMBB(SrcBB)); } } } if (Idx != 0) { II->RemoveOperand(Idx+1); II->RemoveOperand(Idx); } } } }
/// /// Analyze the branch statement to determine if it can be coalesced. This /// method analyses the branch statement for the given candidate to determine /// if it can be coalesced. If the branch can be coalesced, then the /// BranchTargetBlock and the FallThroughBlock are recorded in the specified /// Candidate. /// ///\param[in,out] Cand The coalescing candidate to analyze ///\return true if and only if the branch can be coalesced, false otherwise /// bool PPCBranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) { DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber() << " can be coalesced:"); MachineBasicBlock *FalseMBB = nullptr; if (TII->analyzeBranch(*Cand.BranchBlock, Cand.BranchTargetBlock, FalseMBB, Cand.Cond)) { DEBUG(dbgs() << "TII unable to Analyze Branch - skip\n"); return false; } for (auto &I : Cand.BranchBlock->terminators()) { DEBUG(dbgs() << "Looking at terminator : " << I << "\n"); if (!I.isBranch()) continue; // The analyzeBranch method does not include any implicit operands. // This is not an issue on PPC but must be handled on other targets. // For this pass to be made target-independent, the analyzeBranch API // need to be updated to support implicit operands and there would // need to be a way to verify that any implicit operands would not be // clobbered by merging blocks. This would include identifying the // implicit operands as well as the basic block they are defined in. // This could be done by changing the analyzeBranch API to have it also // record and return the implicit operands and the blocks where they are // defined. Alternatively, the BranchCoalescing code would need to be // extended to identify the implicit operands. The analysis in canMerge // must then be extended to prove that none of the implicit operands are // changed in the blocks that are combined during coalescing. if (I.getNumOperands() != I.getNumExplicitOperands()) { DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I << "\n"); return false; } } if (Cand.BranchBlock->isEHPad() || Cand.BranchBlock->hasEHPadSuccessor()) { DEBUG(dbgs() << "EH Pad - skip\n"); return false; } // For now only consider triangles (i.e, BranchTargetBlock is set, // FalseMBB is null, and BranchTargetBlock is a successor to BranchBlock) if (!Cand.BranchTargetBlock || FalseMBB || !Cand.BranchBlock->isSuccessor(Cand.BranchTargetBlock)) { DEBUG(dbgs() << "Does not form a triangle - skip\n"); return false; } // Ensure there are only two successors if (Cand.BranchBlock->succ_size() != 2) { DEBUG(dbgs() << "Does not have 2 successors - skip\n"); return false; } // Sanity check - the block must be able to fall through assert(Cand.BranchBlock->canFallThrough() && "Expecting the block to fall through!"); // We have already ensured there are exactly two successors to // BranchBlock and that BranchTargetBlock is a successor to BranchBlock. // Ensure the single fall though block is empty. MachineBasicBlock *Succ = (*Cand.BranchBlock->succ_begin() == Cand.BranchTargetBlock) ? *Cand.BranchBlock->succ_rbegin() : *Cand.BranchBlock->succ_begin(); assert(Succ && "Expecting a valid fall-through block\n"); if (!Succ->empty()) { DEBUG(dbgs() << "Fall-through block contains code -- skip\n"); return false; } if (!Succ->isSuccessor(Cand.BranchTargetBlock)) { DEBUG(dbgs() << "Successor of fall through block is not branch taken block\n"); return false; } Cand.FallThroughBlock = Succ; DEBUG(dbgs() << "Valid Candidate\n"); return true; }
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *BundleMI = 0; CPSRDef = 0; HighLatencyCPSR = false; // Check predecessors for the latest CPSRDef. for (MachineBasicBlock::pred_iterator I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) { const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()]; if (!PInfo.Visited) { // Since blocks are visited in RPO, this must be a back-edge. continue; } if (PInfo.HighLatencyCPSR) { HighLatencyCPSR = true; break; } } // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); MachineInstr *MI = &*MII; if (MI->isBundle()) { BundleMI = MI; continue; } if (MI->isDebugValue()) continue; LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); // Does NextMII belong to the same bundle as MI? bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; // Removing and reinserting the first instruction in a bundle will break // up the bundle. Fix the bundling if it was broken. if (NextInSameBundle && !NextMII->isBundledWithPred()) NextMII->bundleWithPred(); } if (!NextInSameBundle && MI->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around // the inconsistency. if (BundleMI->killsRegister(ARM::CPSR)) LiveCPSR = false; MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); if (MO && !MO->isDead()) LiveCPSR = true; } bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = 0; HighLatencyCPSR = false; IsSelfLoop = false; } else if (DefCPSR) { // This is the last CPSR defining instruction. CPSRDef = MI; HighLatencyCPSR = isHighLatencyCPSR(CPSRDef); IsSelfLoop = false; } } MBBInfo &Info = BlockInfo[MBB.getNumber()]; Info.HighLatencyCPSR = HighLatencyCPSR; Info.Visited = true; return Modified; }
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *CPSRDef = 0; MachineInstr *BundleMI = 0; // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end(); MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); MachineInstr *MI = &*MII; if (MI->isBundle()) { BundleMI = MI; continue; } LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); unsigned Opcode = MI->getOpcode(); DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); if (OPI != ReduceOpcodeMap.end()) { const ReduceEntry &Entry = ReduceTable[OPI->second]; // Ignore "special" cases for now. if (Entry.Special) { if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } goto ProcessNext; } // Try to transform to a 16-bit two-address instruction. if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; goto ProcessNext; } // Try to transform to a 16-bit non-two-address instruction. if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } } ProcessNext: if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around // the inconsistency. if (BundleMI->killsRegister(ARM::CPSR)) LiveCPSR = false; MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); if (MO && !MO->isDead()) LiveCPSR = true; } bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = 0; IsSelfLoop = false; } else if (DefCPSR) { // This is the last CPSR defining instruction. CPSRDef = MI; IsSelfLoop = false; } } return Modified; }
/// Determine if it is profitable to duplicate this block. bool TailDuplicator::shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB) { // When doing tail-duplication during layout, the block ordering is in flux, // so canFallThrough returns a result based on incorrect information and // should just be ignored. if (!LayoutMode && TailBB.canFallThrough()) return false; // Don't try to tail-duplicate single-block loops. if (TailBB.isSuccessor(&TailBB)) return false; // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDupSize == 0 && TailDuplicateSize.getNumOccurrences() == 0 && MF->getFunction()->optForSize()) MaxDuplicateCount = 1; else if (TailDupSize == 0) MaxDuplicateCount = TailDuplicateSize; else MaxDuplicateCount = TailDupSize; // If the block to be duplicated ends in an unanalyzable fallthrough, don't // duplicate it. // A similar check is necessary in MachineBlockPlacement to make sure pairs of // blocks with unanalyzable fallthrough get layed out contiguously. MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) && TailBB.canFallThrough()) return false; // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there // are common paths through the code. The limit needs to be high enough // to allow undoing the effects of tail merging and other optimizations // that rearrange the predecessors of the indirect branch. bool HasIndirectbr = false; if (!TailBB.empty()) HasIndirectbr = TailBB.back().isIndirectBranch(); if (HasIndirectbr && PreRegAlloc) MaxDuplicateCount = TailDupIndirectBranchSize; // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; for (MachineInstr &MI : TailBB) { // Non-duplicable things shouldn't be tail-duplicated. if (MI.isNotDuplicable()) return false; // Convergent instructions can be duplicated only if doing so doesn't add // new control dependencies, which is what we're going to do here. if (MI.isConvergent()) return false; // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. if (PreRegAlloc && MI.isReturn()) return false; // Avoid duplicating calls before register allocation. Calls presents a // barrier to register allocation so duplicating them may end up increasing // spills. if (PreRegAlloc && MI.isCall()) return false; if (!MI.isPHI() && !MI.isDebugValue()) InstrCount += 1; if (InstrCount > MaxDuplicateCount) return false; } // Check if any of the successors of TailBB has a PHI node in which the // value corresponding to TailBB uses a subregister. // If a phi node uses a register paired with a subregister, the actual // "value type" of the phi may differ from the type of the register without // any subregisters. Due to a bug, tail duplication may add a new operand // without a necessary subregister, producing an invalid code. This is // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. // Disable tail duplication for this case for now, until the problem is // fixed. for (auto SB : TailBB.successors()) { for (auto &I : *SB) { if (!I.isPHI()) break; unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); assert(Idx != 0); MachineOperand &PU = I.getOperand(Idx); if (PU.getSubReg() != 0) return false; } } if (HasIndirectbr && PreRegAlloc) return true; if (IsSimple) return true; if (!PreRegAlloc) return true; return canCompletelyDuplicateBB(TailBB); }
/// converToCTRLoop - check if the loop is a candidate for /// converting to a CTR loop. If so, then perform the /// transformation. /// /// This function works on innermost loops first. A loop can /// be converted if it is a counting loop; either a register /// value or an immediate. /// /// The code makes several assumptions about the representation /// of the loop in llvm. bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { bool Changed = false; // Process nested loops first. for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { Changed |= convertToCTRLoop(*I); } // If a nested loop has been converted, then we can't convert this loop. if (Changed) { return Changed; } SmallVector<MachineInstr *, 2> OldInsts; // Are we able to determine the trip count for the loop? CountValue *TripCount = getTripCount(L, OldInsts); if (TripCount == 0) { DEBUG(dbgs() << "failed to get trip count!\n"); return false; } if (TripCount->isImm()) { DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n"); // FIXME: We currently can't form 64-bit constants // (including 32-bit unsigned constants) if (!isInt<32>(TripCount->getImm())) return false; } // Does the loop contain any invalid instructions? if (containsInvalidInstruction(L)) { return false; } MachineBasicBlock *Preheader = L->getLoopPreheader(); // No preheader means there's not place for the loop instr. if (Preheader == 0) { return false; } MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); DebugLoc dl; if (InsertPos != Preheader->end()) dl = InsertPos->getDebugLoc(); MachineBasicBlock *LastMBB = L->getExitingBlock(); // Don't generate CTR loop if the loop has more than one exit. if (LastMBB == 0) { return false; } MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); // Determine the loop start. MachineBasicBlock *LoopStart = L->getTopBlock(); if (L->getLoopLatch() != LastMBB) { // When the exit and latch are not the same, use the latch block as the // start. // The loop start address is used only after the 1st iteration, and the loop // latch may contains instrs. that need to be executed after the 1st iter. LoopStart = L->getLoopLatch(); // Make sure the latch is a successor of the exit, otherwise it won't work. if (!LastMBB->isSuccessor(LoopStart)) { return false; } } // Convert the loop to a CTR loop DEBUG(dbgs() << "Change to CTR loop at "; L->dump()); MachineFunction *MF = LastMBB->getParent(); const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>(); bool isPPC64 = Subtarget.isPPC64(); const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; unsigned CountReg; if (TripCount->isReg()) { // Create a copy of the loop count register. const TargetRegisterClass *SrcRC = MF->getRegInfo().getRegClass(TripCount->getReg()); CountReg = MF->getRegInfo().createVirtualRegister(RC); unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ? (unsigned) PPC::EXTSW_32_64 : (unsigned) TargetOpcode::COPY; BuildMI(*Preheader, InsertPos, dl, TII->get(CopyOp), CountReg).addReg(TripCount->getReg()); if (TripCount->isNeg()) { unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, dl, TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG), CountReg).addReg(CountReg1); } } else { assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); // Put the trip count in a register for transfer into the count register. int64_t CountImm = TripCount->getImm(); if (TripCount->isNeg()) CountImm = -CountImm; CountReg = MF->getRegInfo().createVirtualRegister(RC); if (abs64(CountImm) > 0x7FFF) { BuildMI(*Preheader, InsertPos, dl, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), CountReg).addImm((CountImm >> 16) & 0xFFFF); unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, dl, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI), CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF); } else {
/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a /// common expression that defines Reg. bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. // If CSReg is used at all uses of Reg, CSE should not increase register // pressure of CSReg. bool MayIncreasePressure = true; if (TargetRegisterInfo::isVirtualRegister(CSReg) && TargetRegisterInfo::isVirtualRegister(Reg)) { MayIncreasePressure = false; SmallPtrSet<MachineInstr*, 8> CSUses; for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { CSUses.insert(&MI); } for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { if (!CSUses.count(&MI)) { MayIncreasePressure = true; break; } } } if (!MayIncreasePressure) return true; // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. if (TII->isAsCheapAsAMove(*MI)) { MachineBasicBlock *CSBB = CSMI->getParent(); MachineBasicBlock *BB = MI->getParent(); if (CSBB != BB && !CSBB->isSuccessor(BB)) return false; } // Heuristics #2: If the expression doesn't not use a vr and the only use // of the redundant computation are copies, do not cse. bool HasVRegUse = false; for (const MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.isUse() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { HasVRegUse = true; break; } } if (!HasVRegUse) { bool HasNonCopyUse = false; for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { // Ignore copies. if (!MI.isCopyLike()) { HasNonCopyUse = true; break; } } if (!HasNonCopyUse) return false; } // Heuristics #3: If the common subexpression is used by PHIs, do not reuse // it unless the defined value is already used in the BB of the new use. bool HasPHI = false; SmallPtrSet<MachineBasicBlock*, 4> CSBBs; for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { HasPHI |= MI.isPHI(); CSBBs.insert(MI.getParent()); } if (!HasPHI) return true; return CSBBs.count(MI->getParent()); }
/// converToHardwareLoop - check if the loop is a candidate for /// converting to a hardware loop. If so, then perform the /// transformation. /// /// This function works on innermost loops first. A loop can /// be converted if it is a counting loop; either a register /// value or an immediate. /// /// The code makes several assumptions about the representation /// of the loop in llvm. bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { bool Changed = false; // Process nested loops first. for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { Changed |= convertToHardwareLoop(*I); } // If a nested loop has been converted, then we can't convert this loop. if (Changed) { return Changed; } // Are we able to determine the trip count for the loop? CountValue *TripCount = getTripCount(L); if (TripCount == 0) { return false; } // Does the loop contain any invalid instructions? if (containsInvalidInstruction(L)) { return false; } MachineBasicBlock *Preheader = L->getLoopPreheader(); // No preheader means there's not place for the loop instr. if (Preheader == 0) { return false; } MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); MachineBasicBlock *LastMBB = L->getExitingBlock(); // Don't generate hw loop if the loop has more than one exit. if (LastMBB == 0) { return false; } MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); // Determine the loop start. MachineBasicBlock *LoopStart = L->getTopBlock(); if (L->getLoopLatch() != LastMBB) { // When the exit and latch are not the same, use the latch block as the // start. // The loop start address is used only after the 1st iteration, and the loop // latch may contains instrs. that need to be executed after the 1st iter. LoopStart = L->getLoopLatch(); // Make sure the latch is a successor of the exit, otherwise it won't work. if (!LastMBB->isSuccessor(LoopStart)) { return false; } } // Convert the loop to a hardware loop DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); if (TripCount->isReg()) { // Create a copy of the loop count register. MachineFunction *MF = LastMBB->getParent(); const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(TripCount->getReg()); unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg()); if (TripCount->isNeg()) { unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(Hexagon::NEG), CountReg).addReg(CountReg1); } // Add the Loop instruction to the begining of the loop. BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg); } else { assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); // Add the Loop immediate instruction to the beginning of the loop. int64_t CountImm = TripCount->getImm(); BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm); } // Make sure the loop start always has a reference in the CFG. We need to // create a BlockAddress operand to get this mechanism to work both the // MachineBasicBlock and BasicBlock objects need the flag set. LoopStart->setHasAddressTaken(); // This line is needed to set the hasAddressTaken flag on the BasicBlock // object BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); // Replace the loop branch with an endloop instruction. DebugLoc dl = LastI->getDebugLoc(); BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart); // The loop ends with either: // - a conditional branch followed by an unconditional branch, or // - a conditional branch to the loop start. if (LastI->getOpcode() == Hexagon::JMP_c || LastI->getOpcode() == Hexagon::JMP_cNot) { // delete one and change/add an uncond. branch to out of the loop MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); LastI = LastMBB->erase(LastI); if (!L->contains(BranchTarget)) { if (LastI != LastMBB->end()) { TII->RemoveBranch(*LastMBB); } SmallVector<MachineOperand, 0> Cond; TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl); } } else { // Conditional branch to loop start; just delete it. LastMBB->erase(LastI); } delete TripCount; ++NumHWLoops; return true; }
/// /// Analyze the branch statement to determine if it can be coalesced. This /// method analyses the branch statement for the given candidate to determine /// if it can be coalesced. If the branch can be coalesced, then the /// BranchTargetBlock and the FallThroughBlock are recorded in the specified /// Candidate. /// ///\param[in,out] Cand The coalescing candidate to analyze ///\return true if and only if the branch can be coalesced, false otherwise /// bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) { DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber() << " can be coalesced:"); MachineBasicBlock *FalseMBB = nullptr; if (TII->analyzeBranch(*Cand.BranchBlock, Cand.BranchTargetBlock, FalseMBB, Cand.Cond)) { DEBUG(dbgs() << "TII unable to Analyze Branch - skip\n"); return false; } for (auto &I : Cand.BranchBlock->terminators()) { DEBUG(dbgs() << "Looking at terminator : " << I << "\n"); if (!I.isBranch()) continue; if (I.getNumOperands() != I.getNumExplicitOperands()) { DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I << "\n"); return false; } } if (Cand.BranchBlock->isEHPad() || Cand.BranchBlock->hasEHPadSuccessor()) { DEBUG(dbgs() << "EH Pad - skip\n"); return false; } // For now only consider triangles (i.e, BranchTargetBlock is set, // FalseMBB is null, and BranchTargetBlock is a successor to BranchBlock) if (!Cand.BranchTargetBlock || FalseMBB || !Cand.BranchBlock->isSuccessor(Cand.BranchTargetBlock)) { DEBUG(dbgs() << "Does not form a triangle - skip\n"); return false; } // Ensure there are only two successors if (Cand.BranchBlock->succ_size() != 2) { DEBUG(dbgs() << "Does not have 2 successors - skip\n"); return false; } // Sanity check - the block must be able to fall through assert(Cand.BranchBlock->canFallThrough() && "Expecting the block to fall through!"); // We have already ensured there are exactly two successors to // BranchBlock and that BranchTargetBlock is a successor to BranchBlock. // Ensure the single fall though block is empty. MachineBasicBlock *Succ = (*Cand.BranchBlock->succ_begin() == Cand.BranchTargetBlock) ? *Cand.BranchBlock->succ_rbegin() : *Cand.BranchBlock->succ_begin(); assert(Succ && "Expecting a valid fall-through block\n"); if (!Succ->empty()) { DEBUG(dbgs() << "Fall-through block contains code -- skip\n"); return false; } if (!Succ->isSuccessor(Cand.BranchTargetBlock)) { DEBUG(dbgs() << "Successor of fall through block is not branch taken block\n"); return false; } Cand.FallThroughBlock = Succ; DEBUG(dbgs() << "Valid Candidate\n"); return true; }