// Split MBB if it has two direct jumps/branches. void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) { ReverseIter End = MBB->rend(); ReverseIter LastBr = getNonDebugInstr(MBB->rbegin(), End); // Return if MBB has no branch instructions. if ((LastBr == End) || (!LastBr->isConditionalBranch() && !LastBr->isUnconditionalBranch())) return; ReverseIter FirstBr = getNonDebugInstr(std::next(LastBr), End); // MBB has only one branch instruction if FirstBr is not a branch // instruction. if ((FirstBr == End) || (!FirstBr->isConditionalBranch() && !FirstBr->isUnconditionalBranch())) return; assert(!FirstBr->isIndirectBranch() && "Unexpected indirect branch found."); // Create a new MBB. Move instructions in MBB to the newly created MBB. MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(MBB->getBasicBlock()); // Insert NewMBB and fix control flow. MachineBasicBlock *Tgt = getTargetMBB(*FirstBr); NewMBB->transferSuccessors(MBB); NewMBB->removeSuccessor(Tgt, true); MBB->addSuccessor(NewMBB); MBB->addSuccessor(Tgt); MF->insert(std::next(MachineFunction::iterator(MBB)), NewMBB); NewMBB->splice(NewMBB->end(), MBB, (++LastBr).base(), MBB->end()); }
/// Split the basic block containing MI into two blocks, which are joined by /// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, MachineBasicBlock *DestBB) { MachineBasicBlock *OrigBB = MI.getParent(); // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); MF->insert(++OrigBB->getIterator(), NewBB); // Splice the instructions starting with MI over to NewBB. NewBB->splice(NewBB->end(), OrigBB, MI.getIterator(), OrigBB->end()); // Add an unconditional branch from OrigBB to NewBB. // Note the new unconditional branch is not being recorded. // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond to anything in the source. TII->insertUnconditionalBranch(*OrigBB, NewBB, DebugLoc()); // Insert an entry into BlockInfo to align it properly with the block numbers. BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); NewBB->transferSuccessors(OrigBB); OrigBB->addSuccessor(NewBB); OrigBB->addSuccessor(DestBB); // Cleanup potential unconditional branch to successor block. // Note that updateTerminator may change the size of the blocks. NewBB->updateTerminator(); OrigBB->updateTerminator(); // Figure out how large the OrigBB is. As the first half of the original // block, it cannot contain a tablejump. The size includes // the new jump we added. (It should be possible to do this without // recounting everything, but it's very confusing, and this is rarely // executed.) BlockInfo[OrigBB->getNumber()].Size = computeBlockSize(*OrigBB); // Figure out how large the NewMBB is. As the second half of the original // block, it may contain a tablejump. BlockInfo[NewBB->getNumber()].Size = computeBlockSize(*NewBB); // All BBOffsets following these blocks must be modified. adjustBlockOffsets(*OrigBB); // Need to fix live-in lists if we track liveness. if (TRI->trackLivenessAfterRegAlloc(*MF)) computeLiveIns(LiveRegs, *TRI, *NewBB); ++NumSplit; return NewBB; }
bool AArch64BranchRelaxation::relaxBranchInstructions() { bool Changed = false; // Relaxing branches involves creating new basic blocks, so re-eval // end() for termination. for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) { MachineBasicBlock &MBB = *I; MachineBasicBlock::iterator J = MBB.getFirstTerminator(); if (J == MBB.end()) continue; MachineBasicBlock::iterator Next; for (MachineBasicBlock::iterator J = MBB.getFirstTerminator(); J != MBB.end(); J = Next) { Next = std::next(J); MachineInstr &MI = *J; if (MI.isConditionalBranch()) { MachineBasicBlock *DestBB = getDestBlock(MI); if (!isBlockInRange(MI, *DestBB)) { if (Next != MBB.end() && Next->isConditionalBranch()) { // If there are multiple conditional branches, this isn't an // analyzable block. Split later terminators into a new block so // each one will be analyzable. MachineBasicBlock *NewBB = splitBlockBeforeInstr(*Next); NewBB->transferSuccessors(&MBB); MBB.addSuccessor(NewBB); MBB.addSuccessor(DestBB); // Cleanup potential unconditional branch to successor block. NewBB->updateTerminator(); MBB.updateTerminator(); } else { fixupConditionalBranch(MI); ++NumConditionalRelaxed; } Changed = true; // This may have modified all of the terminators, so start over. Next = MBB.getFirstTerminator(); } } } } return Changed; }
// MI is a load-register-on-condition pseudo instruction that could not be // handled as a single hardware instruction. Replace it by a branch sequence. bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { MachineFunction &MF = *MBB.getParent(); const BasicBlock *BB = MBB.getBasicBlock(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); unsigned DestReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(2).getReg(); unsigned CCValid = MI.getOperand(3).getImm(); unsigned CCMask = MI.getOperand(4).getImm(); LivePhysRegs LiveRegs(TII->getRegisterInfo()); LiveRegs.addLiveOuts(MBB); for (auto I = std::prev(MBB.end()); I != MBBI; --I) LiveRegs.stepBackward(*I); // Splice MBB at MI, moving the rest of the block into RestMBB. MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); RestMBB->transferSuccessors(&MBB); for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) RestMBB->addLiveIn(*I); // Create a new block MoveMBB to hold the move instruction. MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); MoveMBB->addLiveIn(SrcReg); for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) MoveMBB->addLiveIn(*I); // At the end of MBB, create a conditional branch to RestMBB if the // condition is false, otherwise fall through to MoveMBB. BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); MBB.addSuccessor(RestMBB); MBB.addSuccessor(MoveMBB); // In MoveMBB, emit an instruction to move SrcReg into DestReg, // then fall through to RestMBB. TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg, MI.getOperand(2).isKill()); MoveMBB->addSuccessor(RestMBB); NextMBBI = MBB.end(); MI.eraseFromParent(); return true; }
/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies) { if (!shouldTailDuplicate(MF, *TailBB)) return false; DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); DenseSet<unsigned> UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); // EH edges are ignored by AnalyzeBranch. if (PredBB->succ_size() > 1) continue; MachineBasicBlock *PredTBB, *PredFBB; SmallVector<MachineOperand, 4> PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) continue; if (!PredCond.empty()) continue; // Don't duplicate into a fall-through predecessor (at least for now). if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) continue; DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); TDBBs.push_back(PredBB); // Remove PredBB's unconditional branch. TII->RemoveBranch(*PredBB); // Clone the contents of TailBB into PredBB. DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); while (I != TailBB->end()) { MachineInstr *MI = &*I; ++I; if (MI->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); } } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } // Simplify TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); NumInstrDups += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. PredBB->removeSuccessor(PredBB->succ_begin()); assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), E = TailBB->succ_end(); I != E; ++I) PredBB->addSuccessor(*I); Changed = true; ++NumTailDups; } // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; SmallVector<MachineOperand, 4> PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. if (PrevBB->succ_size() == 1 && !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); if (PreRegAlloc) { DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); if (MI->getParent()) MI->eraseFromParent(); } // Now copy the non-PHI instructions. while (I != TailBB->end()) { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first) .addReg(CopyInfos[i].second)); } } else { // No PHIs to worry about, just splice the instructions over. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } PrevBB->removeSuccessor(PrevBB->succ_begin()); assert(PrevBB->succ_empty()); PrevBB->transferSuccessors(TailBB); TDBBs.push_back(PrevBB); Changed = true; } // If this is after register allocation, there are no phis to fix. if (!PreRegAlloc) return Changed; // If we made no changes so far, we are safe. if (!Changed) return Changed; // Handle the nasty case in that we duplicated a block that is part of a loop // into some but not all of its predecessors. For example: // 1 -> 2 <-> 3 | // \ | // \---> rest | // if we duplicate 2 into 1 but not into 3, we end up with // 12 -> 3 <-> 2 -> rest | // \ / | // \----->-----/ | // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced // with a phi in 3 (which now dominates 2). // What we do here is introduce a copy in 3 of the register defined by the // phi, just like when we are duplicating 2 into 3, but we don't copy any // real instructions or remove the 3 -> 2 edge from the phi in 2. for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) continue; // EH edges if (PredBB->succ_size() != 1) continue; DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } } return Changed; }
/// Splits a MachineBasicBlock to branch before \p SplitBefore. The original /// branch is \p OrigBranch. The target of the new branch can either be the same /// as the target of the original branch or the fallthrough successor of the /// original block as determined by \p BranchToFallThrough. The branch /// conditions will be inverted according to \p InvertNewBranch and /// \p InvertOrigBranch. If an instruction that previously fed the branch is to /// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as /// the branch condition. The branch probabilities will be set if the /// MachineBranchProbabilityInfo isn't null. static bool splitMBB(BlockSplitInfo &BSI) { assert(BSI.allInstrsInSameMBB() && "All instructions must be in the same block."); MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent(); MachineFunction *MF = ThisMBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); assert(MRI->isSSA() && "Can only do this while the function is in SSA form."); if (ThisMBB->succ_size() != 2) { LLVM_DEBUG( dbgs() << "Don't know how to handle blocks that don't have exactly" << " two successors.\n"); return false; } const PPCInstrInfo *TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); unsigned OrigBROpcode = BSI.OrigBranch->getOpcode(); unsigned InvertedOpcode = OrigBROpcode == PPC::BC ? PPC::BCn : OrigBROpcode == PPC::BCn ? PPC::BC : OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR; unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode; MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB(); MachineBasicBlock *OrigFallThrough = OrigTarget == *ThisMBB->succ_begin() ? *ThisMBB->succ_rbegin() : *ThisMBB->succ_begin(); MachineBasicBlock *NewBRTarget = BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget; BranchProbability ProbToNewTarget = !BSI.MBPI ? BranchProbability::getUnknown() : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget); // Create a new basic block. MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore; const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); MachineFunction::iterator It = ThisMBB->getIterator(); MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB); MF->insert(++It, NewMBB); // Move everything after SplitBefore into the new block. NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end()); NewMBB->transferSuccessors(ThisMBB); // Add the two successors to ThisMBB. The probabilities come from the // existing blocks if available. ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget); ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl()); // Add the branches to ThisMBB. BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), TII->get(NewBROpcode)) .addReg(BSI.SplitCond->getOperand(0).getReg()) .addMBB(NewBRTarget); BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), TII->get(PPC::B)) .addMBB(NewMBB); if (BSI.MIToDelete) BSI.MIToDelete->eraseFromParent(); // Change the condition on the original branch and invert it if requested. auto FirstTerminator = NewMBB->getFirstTerminator(); if (BSI.NewCond) { assert(FirstTerminator->getOperand(0).isReg() && "Can't update condition of unconditional branch."); FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg()); } if (BSI.InvertOrigBranch) FirstTerminator->setDesc(TII->get(InvertedOpcode)); // If any of the PHIs in the successors of NewMBB reference values that // now come from NewMBB, they need to be updated. for (auto *Succ : NewMBB->successors()) { updatePHIs(Succ, ThisMBB, NewMBB, MRI); } addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI); LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump()); LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump()); LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump()); return true; }
MachineBasicBlock * AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); assert((MI->getOpcode() == Alpha::CAS32 || MI->getOpcode() == Alpha::CAS64 || MI->getOpcode() == Alpha::LAS32 || MI->getOpcode() == Alpha::LAS64 || MI->getOpcode() == Alpha::SWAP32 || MI->getOpcode() == Alpha::SWAP64) && "Unexpected instr type to insert"); bool is32 = MI->getOpcode() == Alpha::CAS32 || MI->getOpcode() == Alpha::LAS32 || MI->getOpcode() == Alpha::SWAP32; //Load locked store conditional for atomic ops take on the same form //start: //ll //do stuff (maybe branch to exit) //sc //test sc and maybe branck to start //exit: const BasicBlock *LLVM_BB = BB->getBasicBlock(); DebugLoc dl = MI->getDebugLoc(); MachineFunction::iterator It = BB; ++It; MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); sinkMBB->transferSuccessors(thisMBB); F->insert(It, llscMBB); F->insert(It, sinkMBB); BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB); unsigned reg_res = MI->getOperand(0).getReg(), reg_ptr = MI->getOperand(1).getReg(), reg_v2 = MI->getOperand(2).getReg(), reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L), reg_res).addImm(0).addReg(reg_ptr); switch (MI->getOpcode()) { case Alpha::CAS32: case Alpha::CAS64: { unsigned reg_cmp = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp) .addReg(reg_v2).addReg(reg_res); BuildMI(llscMBB, dl, TII->get(Alpha::BEQ)) .addImm(0).addReg(reg_cmp).addMBB(sinkMBB); BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store) .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg()); break; } case Alpha::LAS32: case Alpha::LAS64: { BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store) .addReg(reg_res).addReg(reg_v2); break; } case Alpha::SWAP32: case Alpha::SWAP64: { BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store) .addReg(reg_v2).addReg(reg_v2); break; } } BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store) .addReg(reg_store).addImm(0).addReg(reg_ptr); BuildMI(llscMBB, dl, TII->get(Alpha::BEQ)) .addImm(0).addReg(reg_store).addMBB(llscMBB); BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB); thisMBB->addSuccessor(llscMBB); llscMBB->addSuccessor(llscMBB); llscMBB->addSuccessor(sinkMBB); F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return sinkMBB; }
/// If it is profitable, duplicate TailBB's contents in each /// of its predecessors. /// \p IsSimple result of isSimpleBB /// \p TailBB Block to be duplicated. /// \p ForcedLayoutPred When non-null, use this block as the layout predecessor /// instead of the previous block in MF's order. /// \p TDBBs A vector to keep track of all blocks tail-duplicated /// into. /// \p Copies A vector of copy instructions inserted. Used later to /// walk all the inserted copies and remove redundant ones. bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallVectorImpl<MachineInstr *> &Copies) { DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); DenseSet<unsigned> UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); if (IsSimple) return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); for (MachineBasicBlock *PredBB : Preds) { assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); if (!canTailDuplicate(TailBB, PredBB)) continue; // Don't duplicate into a fall-through predecessor (at least for now). bool IsLayoutSuccessor = false; if (ForcedLayoutPred) IsLayoutSuccessor = (ForcedLayoutPred == PredBB); else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) IsLayoutSuccessor = true; if (IsLayoutSuccessor) continue; DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); TDBBs.push_back(PredBB); // Remove PredBB's unconditional branch. TII->removeBranch(*PredBB); // Clone the contents of TailBB into PredBB. DenseMap<unsigned, RegSubRegPair> LocalVRMap; SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); I != E; /* empty */) { MachineInstr *MI = &*I; ++I; if (MI->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi); } } appendCopies(PredBB, CopyInfos, Copies); // Simplify MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond); NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. PredBB->removeSuccessor(PredBB->succ_begin()); assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock *Succ : TailBB->successors()) PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ)); Changed = true; ++NumTailDups; } // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. MachineBasicBlock *PrevBB = ForcedLayoutPred; if (!PrevBB) PrevBB = &*std::prev(TailBB->getIterator()); MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector<MachineOperand, 4> PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by // analyzeBranch. if (PrevBB->succ_size() == 1 && // Layout preds are not always CFG preds. Check. *PrevBB->succ_begin() == TailBB && !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) && PriorCond.empty() && (!PriorTBB || PriorTBB == TailBB) && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); // There may be a branch to the layout successor. This is unlikely but it // happens. The correct thing to do is to remove the branch before // duplicating the instructions in all cases. TII->removeBranch(*PrevBB); if (PreRegAlloc) { DenseMap<unsigned, RegSubRegPair> LocalVRMap; SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); } // Now copy the non-PHI instructions. while (I != TailBB->end()) { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } appendCopies(PrevBB, CopyInfos, Copies); } else { TII->removeBranch(*PrevBB); // No PHIs to worry about, just splice the instructions over. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } PrevBB->removeSuccessor(PrevBB->succ_begin()); assert(PrevBB->succ_empty()); PrevBB->transferSuccessors(TailBB); TDBBs.push_back(PrevBB); Changed = true; } // If this is after register allocation, there are no phis to fix. if (!PreRegAlloc) return Changed; // If we made no changes so far, we are safe. if (!Changed) return Changed; // Handle the nasty case in that we duplicated a block that is part of a loop // into some but not all of its predecessors. For example: // 1 -> 2 <-> 3 | // \ | // \---> rest | // if we duplicate 2 into 1 but not into 3, we end up with // 12 -> 3 <-> 2 -> rest | // \ / | // \----->-----/ | // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced // with a phi in 3 (which now dominates 2). // What we do here is introduce a copy in 3 of the register defined by the // phi, just like when we are duplicating 2 into 3, but we don't copy any // real instructions or remove the 3 -> 2 edge from the phi in 2. for (MachineBasicBlock *PredBB : Preds) { if (is_contained(TDBBs, PredBB)) continue; // EH edges if (PredBB->succ_size() != 1) continue; DenseMap<unsigned, RegSubRegPair> LocalVRMap; SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); } appendCopies(PredBB, CopyInfos, Copies); } return Changed; }
/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies) { // Set the limit on the number of instructions to duplicate, with a default // of one less than the tail-merge threshold. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; if (PreRegAlloc) { if (TailBB->empty()) return false; const TargetInstrDesc &TID = TailBB->back().getDesc(); // Pre-regalloc tail duplication hurts compile time and doesn't help // much except for indirect branches and returns. if (!TID.isIndirectBranch() && !TID.isReturn()) return false; // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there // are common paths through the code. The limit needs to be high enough // to allow undoing the effects of tail merging and other optimizations // that rearrange the predecessors of the indirect branch. MaxDuplicateCount = 20; } // Don't try to tail-duplicate single-block loops. if (TailBB->isSuccessor(TailBB)) return false; // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; bool HasCall = false; for (MachineBasicBlock::iterator I = TailBB->begin(); I != TailBB->end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. if (I->getDesc().isNotDuplicable()) return false; // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. if (PreRegAlloc && I->getDesc().isReturn()) return false; // Don't duplicate more than the threshold. if (InstrCount == MaxDuplicateCount) return false; // Remember if we saw a call. if (I->getDesc().isCall()) HasCall = true; if (!I->isPHI() && !I->isDebugValue()) InstrCount += 1; } // Don't tail-duplicate calls before register allocation. Calls presents a // barrier to register allocation so duplicating them may end up increasing // spills. if (InstrCount > 1 && (PreRegAlloc && HasCall)) return false; DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); if (PredBB->succ_size() > 1) continue; MachineBasicBlock *PredTBB, *PredFBB; SmallVector<MachineOperand, 4> PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) continue; if (!PredCond.empty()) continue; // EH edges are ignored by AnalyzeBranch. if (PredBB->succ_size() != 1) continue; // Don't duplicate into a fall-through predecessor (at least for now). if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) continue; DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); TDBBs.push_back(PredBB); // Remove PredBB's unconditional branch. TII->RemoveBranch(*PredBB); // Clone the contents of TailBB into PredBB. DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); while (I != TailBB->end()) { MachineInstr *MI = &*I; ++I; if (MI->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos); } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap); } } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } NumInstrDups += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. PredBB->removeSuccessor(PredBB->succ_begin()); assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), E = TailBB->succ_end(); I != E; ++I) PredBB->addSuccessor(*I); Changed = true; ++NumTailDups; } // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; SmallVector<MachineOperand, 4> PriorCond; bool PriorUnAnalyzable = TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true); // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 && !TailBB->hasAddressTaken()) { DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); if (PreRegAlloc) { DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos); if (MI->getParent()) MI->eraseFromParent(); } // Now copy the non-PHI instructions. while (I != TailBB->end()) { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap); MI->eraseFromParent(); } MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first) .addReg(CopyInfos[i].second)); } } else { // No PHIs to worry about, just splice the instructions over. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } PrevBB->removeSuccessor(PrevBB->succ_begin()); assert(PrevBB->succ_empty()); PrevBB->transferSuccessors(TailBB); TDBBs.push_back(PrevBB); Changed = true; } return Changed; }
MachineBasicBlock* MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, MachineBasicBlock *BB) const { MachineFunction *F = BB->getParent(); MachineRegisterInfo &RI = F->getRegInfo(); DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); unsigned Opc; const TargetRegisterClass * RC; switch (MI->getOpcode()) { default: assert(0 && "Invalid shift opcode!"); case MSP430::Shl8: Opc = MSP430::SHL8r1; RC = MSP430::GR8RegisterClass; break; case MSP430::Shl16: Opc = MSP430::SHL16r1; RC = MSP430::GR16RegisterClass; break; case MSP430::Sra8: Opc = MSP430::SAR8r1; RC = MSP430::GR8RegisterClass; break; case MSP430::Sra16: Opc = MSP430::SAR16r1; RC = MSP430::GR16RegisterClass; break; case MSP430::Srl8: Opc = MSP430::SAR8r1c; RC = MSP430::GR8RegisterClass; break; case MSP430::Srl16: Opc = MSP430::SAR16r1c; RC = MSP430::GR16RegisterClass; break; } const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator I = BB; ++I; // Create loop block MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(I, LoopBB); F->insert(I, RemBB); // Update machine-CFG edges by transferring all successors of the current // block to the block containing instructions after shift. RemBB->transferSuccessors(BB); // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB BB->addSuccessor(LoopBB); BB->addSuccessor(RemBB); LoopBB->addSuccessor(RemBB); LoopBB->addSuccessor(LoopBB); unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass); unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass); unsigned ShiftReg = RI.createVirtualRegister(RC); unsigned ShiftReg2 = RI.createVirtualRegister(RC); unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg(); unsigned SrcReg = MI->getOperand(1).getReg(); unsigned DstReg = MI->getOperand(0).getReg(); // BB: // cmp 0, N // je RemBB BuildMI(BB, dl, TII.get(MSP430::CMP8ri)) .addReg(ShiftAmtSrcReg).addImm(0); BuildMI(BB, dl, TII.get(MSP430::JCC)) .addMBB(RemBB) .addImm(MSP430CC::COND_E); // LoopBB: // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB] // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB] // ShiftReg2 = shift ShiftReg // ShiftAmt2 = ShiftAmt - 1; BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg) .addReg(SrcReg).addMBB(BB) .addReg(ShiftReg2).addMBB(LoopBB); BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg) .addReg(ShiftAmtSrcReg).addMBB(BB) .addReg(ShiftAmtReg2).addMBB(LoopBB); BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2) .addReg(ShiftReg); BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2) .addReg(ShiftAmtReg).addImm(1); BuildMI(LoopBB, dl, TII.get(MSP430::JCC)) .addMBB(LoopBB) .addImm(MSP430CC::COND_NE); // RemBB: // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] BuildMI(RemBB, dl, TII.get(MSP430::PHI), DstReg) .addReg(SrcReg).addMBB(BB) .addReg(ShiftReg2).addMBB(LoopBB); F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return RemBB; }
// Returns true if a new block was inserted. bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx); if (AMDGPU::SReg_32RegClass.contains(Idx->getReg())) { if (Offset) { BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef())) .addImm(Offset); } else { BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef())); } MBB.insert(I, MovRel); MI.eraseFromParent(); return false; } MachineFunction &MF = *MBB.getParent(); MachineOperand *SaveOp = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); SaveOp->setIsDead(false); unsigned Save = SaveOp->getReg(); // Reading from a VGPR requires looping over all workitems in the wavefront. assert(AMDGPU::SReg_64RegClass.contains(Save) && AMDGPU::VGPR_32RegClass.contains(Idx->getReg())); // Save the EXEC mask BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), Save) .addReg(AMDGPU::EXEC); // To insert the loop we need to split the block. Move everything after this // point to a new block, and insert a new empty block between the two. MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock(); MachineFunction::iterator MBBI(MBB); ++MBBI; MF.insert(MBBI, LoopBB); MF.insert(MBBI, RemainderBB); LoopBB->addSuccessor(LoopBB); LoopBB->addSuccessor(RemainderBB); splitBlockLiveIns(MBB, MI, *LoopBB, *RemainderBB, Save, *Idx); // Move the rest of the block into a new block. RemainderBB->transferSuccessors(&MBB); RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end()); emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, *Idx, Offset); MachineBasicBlock::iterator First = RemainderBB->begin(); BuildMI(*RemainderBB, First, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) .addReg(Save); MI.eraseFromParent(); return true; }
MachineBasicBlock * SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); unsigned BROpcode; unsigned CC; DebugLoc dl = MI->getDebugLoc(); // Figure out the conditional branch opcode to use for this select_cc. switch (MI->getOpcode()) { default: assert(0 && "Unknown SELECT_CC!"); case SP::SELECT_CC_Int_ICC: case SP::SELECT_CC_FP_ICC: case SP::SELECT_CC_DFP_ICC: BROpcode = SP::BCOND; break; case SP::SELECT_CC_Int_FCC: case SP::SELECT_CC_FP_FCC: case SP::SELECT_CC_DFP_FCC: BROpcode = SP::FBCOND; break; } CC = (SPCC::CondCodes)MI->getOperand(3).getImm(); // To "insert" a SELECT_CC instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg // to set, the condition code register to branch on, the true/false values to // select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = BB; ++It; // thisMBB: // ... // TrueVal = ... // [f]bCC copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. sinkMBB->transferSuccessors(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; // Update machine-CFG edges BB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; BuildMI(BB, dl, TII.get(SP::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return BB; }