bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); if (DisablePeephole) return false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; bool Changed = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; SmallSet<unsigned, 16> FoldAsLoadDefCandidates; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; // We may be erasing MI below, increment MII now. ++MII; LocalMIs.insert(MI); // Skip debug values. They should not affect this peephole optimization. if (MI->isDebugValue()) continue; // If there exists an instruction which belongs to the following // categories, we will discard the load candidates. if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) { FoldAsLoadDefCandidates.clear(); continue; } if (MI->mayStore() || MI->isCall()) FoldAsLoadDefCandidates.clear(); if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || (MI->isSelect() && optimizeSelect(MI))) { // MI is deleted. LocalMIs.erase(MI); Changed = true; continue; } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { Changed |= optimizeExtInstr(MI, MBB, LocalMIs); // optimizeExtInstr might have created new instructions after MI // and before the already incremented MII. Adjust MII so that the // next iteration sees the new instructions. MII = MI; ++MII; if (SeenMoveImm) Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } // Check whether MI is a load candidate for folding into a later // instruction. If MI is not a candidate, check whether we can fold an // earlier load into MI. if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && !FoldAsLoadDefCandidates.empty()) { const MCInstrDesc &MIDesc = MI->getDesc(); for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); ++i) { const MachineOperand &MOp = MI->getOperand(i); if (!MOp.isReg()) continue; unsigned FoldAsLoadDefReg = MOp.getReg(); if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { // We need to fold load after optimizeCmpInstr, since // optimizeCmpInstr can enable folding by converting SUB to CMP. // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and // we need it for markUsesInDebugValueAsUndef(). unsigned FoldedReg = FoldAsLoadDefReg; MachineInstr *DefMI = nullptr; MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefReg, DefMI); if (FoldMI) { // Update LocalMIs since we replaced MI with FoldMI and deleted // DefMI. DEBUG(dbgs() << "Replacing: " << *MI); DEBUG(dbgs() << " With: " << *FoldMI); LocalMIs.erase(MI); LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); MI->eraseFromParent(); DefMI->eraseFromParent(); MRI->markUsesInDebugValueAsUndef(FoldedReg); FoldAsLoadDefCandidates.erase(FoldedReg); ++NumLoadFold; // MI is replaced with FoldMI. Changed = true; break; } } } } } } return Changed; }
bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); auto &MBB = *I.getParent(); auto &MF = *MBB.getParent(); auto &MRI = MF.getRegInfo(); if (!isPreISelGenericOpcode(I.getOpcode())) { if (I.isCopy()) return selectCopy(I, TII, MRI, TRI, RBI); return true; } if (selectImpl(I)) return true; MachineInstrBuilder MIB{MF, I}; bool isSExt = false; using namespace TargetOpcode; switch (I.getOpcode()) { case G_SEXT: isSExt = true; LLVM_FALLTHROUGH; case G_ZEXT: { LLT DstTy = MRI.getType(I.getOperand(0).getReg()); // FIXME: Smaller destination sizes coming soon! if (DstTy.getSizeInBits() != 32) { DEBUG(dbgs() << "Unsupported destination size for extension"); return false; } LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); unsigned SrcSize = SrcTy.getSizeInBits(); switch (SrcSize) { case 1: { // ZExt boils down to & 0x1; for SExt we also subtract that from 0 I.setDesc(TII.get(ARM::ANDri)); MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp()); if (isSExt) { unsigned SExtResult = I.getOperand(0).getReg(); // Use a new virtual register for the result of the AND unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass); I.getOperand(0).setReg(AndResult); auto InsertBefore = std::next(I.getIterator()); auto SubI = BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri)) .addDef(SExtResult) .addUse(AndResult) .addImm(0) .add(predOps(ARMCC::AL)) .add(condCodeOp()); if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI)) return false; } break; } case 8: case 16: { unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize); if (NewOpc == I.getOpcode()) return false; I.setDesc(TII.get(NewOpc)); MIB.addImm(0).add(predOps(ARMCC::AL)); break; } default: DEBUG(dbgs() << "Unsupported source size for extension"); return false; } break; } case G_ANYEXT: case G_TRUNC: { // The high bits are undefined, so there's nothing special to do, just // treat it as a copy. auto SrcReg = I.getOperand(1).getReg(); auto DstReg = I.getOperand(0).getReg(); const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); if (SrcRegBank.getID() != DstRegBank.getID()) { DEBUG(dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n"); return false; } if (SrcRegBank.getID() != ARM::GPRRegBankID) { DEBUG(dbgs() << "G_TRUNC/G_ANYEXT on non-GPR not supported yet\n"); return false; } I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } case G_SELECT: return selectSelect(MIB, MRI); case G_ICMP: { CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END, ARM::GPRRegBankID, 32); return selectCmp(Helper, MIB, MRI); } case G_FCMP: { assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP"); unsigned OpReg = I.getOperand(2).getReg(); unsigned Size = MRI.getType(OpReg).getSizeInBits(); if (Size == 64 && TII.getSubtarget().isFPOnlySP()) { DEBUG(dbgs() << "Subtarget only supports single precision"); return false; } if (Size != 32 && Size != 64) { DEBUG(dbgs() << "Unsupported size for G_FCMP operand"); return false; } CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT, ARM::FPRRegBankID, Size); return selectCmp(Helper, MIB, MRI); } case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; case G_FRAME_INDEX: // Add 0 to the given frame index and hope it will eventually be folded into // the user(s). I.setDesc(TII.get(ARM::ADDri)); MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp()); break; case G_CONSTANT: { unsigned Reg = I.getOperand(0).getReg(); if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID)) return false; I.setDesc(TII.get(ARM::MOVi)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); auto &Val = I.getOperand(1); if (Val.isCImm()) { if (Val.getCImm()->getBitWidth() > 32) return false; Val.ChangeToImmediate(Val.getCImm()->getZExtValue()); } if (!Val.isImm()) { return false; } break; } case G_GLOBAL_VALUE: return selectGlobal(MIB, MRI); case G_STORE: case G_LOAD: { const auto &MemOp = **I.memoperands_begin(); if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { DEBUG(dbgs() << "Atomic load/store not supported yet\n"); return false; } unsigned Reg = I.getOperand(0).getReg(); unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID(); LLT ValTy = MRI.getType(Reg); const auto ValSize = ValTy.getSizeInBits(); assert((ValSize != 64 || TII.getSubtarget().hasVFP2()) && "Don't know how to load/store 64-bit value without VFP"); const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize); if (NewOpc == G_LOAD || NewOpc == G_STORE) return false; I.setDesc(TII.get(NewOpc)); if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH) // LDRH has a funny addressing mode (there's already a FIXME for it). MIB.addReg(0); MIB.addImm(0).add(predOps(ARMCC::AL)); break; } case G_MERGE_VALUES: { if (!selectMergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } case G_UNMERGE_VALUES: { if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } case G_BRCOND: { if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) { DEBUG(dbgs() << "Unsupported condition register for G_BRCOND"); return false; } // Set the flags. auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri)) .addReg(I.getOperand(0).getReg()) .addImm(1) .add(predOps(ARMCC::AL)); if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI)) return false; // Branch conditionally. auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc)) .add(I.getOperand(1)) .add(predOps(ARMCC::EQ, ARM::CPSR)); if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI)) return false; I.eraseFromParent(); return true; } default: return false; } return constrainSelectedInstRegOperands(I, TII, TRI, RBI); }
bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { bool AnyChanges = false; MRI = &MF.getRegInfo(); TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); // Treat reserved registers as always live. BitVector ReservedRegs = TRI->getReservedRegs(MF); // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will // be cleaned up. for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend(); I != E; ++I) { MachineBasicBlock *MBB = &*I; // Start out assuming that reserved registers are live out of this block. LivePhysRegs = ReservedRegs; // Also add any explicit live-out physregs for this block. if (!MBB->empty() && MBB->back().getDesc().isReturn()) for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { unsigned Reg = *LOI; if (TargetRegisterInfo::isPhysicalRegister(Reg)) LivePhysRegs.set(Reg); } // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs // are not live across blocks, but some targets (x86) can have flags live // out of a block. // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE; ) { MachineInstr *MI = &*MII; // If the instruction is dead, delete it! if (isDead(MI)) { DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); // It is possible that some DBG_VALUE instructions refer to this // instruction. Examine each def operand for such references; // if found, mark the DBG_VALUE as undef (but don't delete it). for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; MachineRegisterInfo::use_iterator nextI; for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), E = MRI->use_end(); I!=E; I=nextI) { nextI = llvm::next(I); // I is invalidated by the setReg MachineOperand& Use = I.getOperand(); MachineInstr *UseMI = Use.getParent(); if (UseMI==MI) continue; assert(Use.isDebug()); UseMI->getOperand(0).setReg(0U); } } AnyChanges = true; MI->eraseFromParent(); ++NumDeletes; MIE = MBB->rend(); // MII is now pointing to the next instruction to process, // so don't increment it. continue; } // Record the physreg defs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { LivePhysRegs.reset(Reg); // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); *SubRegs; ++SubRegs) LivePhysRegs.reset(*SubRegs); } } } // Record the physreg uses, after the defs, in case a physreg is // both defined and used in the same instruction. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { LivePhysRegs.set(Reg); for (const unsigned *AliasSet = TRI->getAliasSet(Reg); *AliasSet; ++AliasSet) LivePhysRegs.set(*AliasSet); } } } // We didn't delete the current instruction, so increment MII to // the next one. ++MII; } } LivePhysRegs.clear(); return AnyChanges; }
/// fixupConditionalBranch - Fix up a conditional branch whose destination is /// too far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { MachineBasicBlock *DestBB = getDestBlock(MI); // Add an unconditional branch to the destination and invert the branch // condition to jump over it: // tbz L1 // => // tbnz L2 // b L1 // L2: // If the branch is at the end of its MBB and that has a fall-through block, // direct the updated conditional branch to the fall-through block. Otherwise, // split the MBB before the next instruction. MachineBasicBlock *MBB = MI.getParent(); MachineInstr *BMI = &MBB->back(); bool NeedSplit = (BMI != &MI) || !hasFallthrough(*MBB); if (BMI != &MI) { if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->getLastNonDebugInstr()) && BMI->isUnconditionalBranch()) { // Last MI in the BB is an unconditional branch. We can simply invert the // condition and swap destinations: // beq L1 // b L2 // => // bne L2 // b L1 MachineBasicBlock *NewDest = getDestBlock(*BMI); if (isBlockInRange(MI, *NewDest)) { DEBUG(dbgs() << " Invert condition and swap its destination with " << *BMI); changeBranchDestBlock(*BMI, *DestBB); int NewSize = insertInvertedConditionalBranch(*MBB, MI.getIterator(), MI.getDebugLoc(), MI, *NewDest); int OldSize = TII->getInstSizeInBytes(MI); BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize); MI.eraseFromParent(); return true; } } } if (NeedSplit) { // Analyze the branch so we know how to update the successor lists. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 2> Cond; bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond, false); assert(!Fail && "branches to relax should be analyzable"); (void)Fail; MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI); // No need for the branch to the next block. We're adding an unconditional // branch to the destination. int delta = TII->getInstSizeInBytes(MBB->back()); BlockInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below // Update the successor lists according to the transformation to follow. // Do it here since if there's no split, no update is needed. MBB->replaceSuccessor(FBB, NewBB); NewBB->addSuccessor(FBB); } MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << ", invert condition and change dest. to BB#" << NextBB.getNumber() << '\n'); unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size; // Insert a new conditional branch and a new unconditional branch. MBBSize += insertInvertedConditionalBranch(*MBB, MBB->end(), MI.getDebugLoc(), MI, NextBB); MBBSize += insertUnconditionalBranch(*MBB, *DestBB, MI.getDebugLoc()); // Remove the old conditional branch. It may or may not still be in MBB. MBBSize -= TII->getInstSizeInBytes(MI); MI.eraseFromParent(); // Finally, keep the block offsets up to date. adjustBlockOffsets(*MBB); return true; }
/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup. bool TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, bool IsSimple, MachineFunction &MF) { // Save the successors list. SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), MBB->succ_end()); SmallVector<MachineBasicBlock*, 8> TDBBs; SmallVector<MachineInstr*, 16> Copies; if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies)) return false; ++NumTails; SmallVector<MachineInstr*, 8> NewPHIs; MachineSSAUpdater SSAUpdate(MF, &NewPHIs); // TailBB's immediate successors are now successors of those predecessors // which duplicated TailBB. Add the predecessors as sources to the PHI // instructions. bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); if (PreRegAlloc) UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); // If it is dead, remove it. if (isDead) { NumInstrDups -= MBB->size(); RemoveDeadBlock(MBB); ++NumDeadBlocks; } // Update SSA form. if (!SSAUpdateVRs.empty()) { for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { unsigned VReg = SSAUpdateVRs[i]; SSAUpdate.Initialize(VReg); // If the original definition is still around, add it as an available // value. MachineInstr *DefMI = MRI->getVRegDef(VReg); MachineBasicBlock *DefBB = 0; if (DefMI) { DefBB = DefMI->getParent(); SSAUpdate.AddAvailableValue(DefBB, VReg); } // Add the new vregs as available values. DenseMap<unsigned, AvailableValsTy>::iterator LI = SSAUpdateVals.find(VReg); for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; unsigned SrcReg = LI->second[j].second; SSAUpdate.AddAvailableValue(SrcBB, SrcReg); } // Rewrite uses that are outside of the original def's block. MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); while (UI != MRI->use_end()) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; ++UI; if (UseMI->isDebugValue()) { // SSAUpdate can replace the use with an undef. That creates // a debug instruction that is a kill. // FIXME: Should it SSAUpdate job to delete debug instructions // instead of replacing the use with undef? UseMI->eraseFromParent(); continue; } if (UseMI->getParent() == DefBB && !UseMI->isPHI()) continue; SSAUpdate.RewriteUse(UseMO); } } SSAUpdateVRs.clear(); SSAUpdateVals.clear(); } // Eliminate some of the copies inserted by tail duplication to maintain // SSA form. for (unsigned i = 0, e = Copies.size(); i != e; ++i) { MachineInstr *Copy = Copies[i]; if (!Copy->isCopy()) continue; unsigned Dst = Copy->getOperand(0).getReg(); unsigned Src = Copy->getOperand(1).getReg(); if (MRI->hasOneNonDBGUse(Src) && MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { // Copy is the only use. Do trivial copy propagation here. MRI->replaceRegWith(Dst, Src); Copy->eraseFromParent(); } } if (NewPHIs.size()) NumAddedPHIs += NewPHIs.size(); return true; }
bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); TII = MF.getTarget().getInstrInfo(); DT = &getAnalysis<MachineDominatorTree>(); LI = &getAnalysis<LiveIntervals>(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { unsigned DestReg = BBI->getOperand(0).getReg(); addReg(DestReg); PHISrcDefs[I].push_back(BBI); for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { MachineOperand &SrcMO = BBI->getOperand(i); unsigned SrcReg = SrcMO.getReg(); addReg(SrcReg); unionRegs(DestReg, SrcReg); MachineInstr *DefMI = MRI->getVRegDef(SrcReg); if (DefMI) PHISrcDefs[DefMI->getParent()].push_back(DefMI); } } } // Perform a depth-first traversal of the dominator tree, splitting // interferences amongst PHI-congruence classes. DenseMap<unsigned, unsigned> CurrentDominatingParent; DenseMap<unsigned, unsigned> ImmediateDominatingParent; for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) { SplitInterferencesForBasicBlock(*DI->getBlock(), CurrentDominatingParent, ImmediateDominatingParent); } // Insert copies for all PHI source and destination registers. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { InsertCopiesForPHI(BBI, I); } } // FIXME: Preserve the equivalence classes during copy insertion and use // the preversed equivalence classes instead of recomputing them. RegNodeMap.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { unsigned DestReg = BBI->getOperand(0).getReg(); addReg(DestReg); for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { unsigned SrcReg = BBI->getOperand(i).getReg(); addReg(SrcReg); unionRegs(DestReg, SrcReg); } } } DenseMap<unsigned, unsigned> RegRenamingMap; bool Changed = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); while (BBI != BBE && BBI->isPHI()) { MachineInstr *PHI = BBI; assert(PHI->getNumOperands() > 0); unsigned SrcReg = PHI->getOperand(1).getReg(); unsigned SrcColor = getRegColor(SrcReg); unsigned NewReg = RegRenamingMap[SrcColor]; if (!NewReg) { NewReg = SrcReg; RegRenamingMap[SrcColor] = SrcReg; } MergeLIsAndRename(SrcReg, NewReg); unsigned DestReg = PHI->getOperand(0).getReg(); if (!InsertedDestCopies.count(DestReg)) MergeLIsAndRename(DestReg, NewReg); for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { unsigned SrcReg = PHI->getOperand(i).getReg(); MergeLIsAndRename(SrcReg, NewReg); } ++BBI; LI->RemoveMachineInstrFromMaps(PHI); PHI->eraseFromParent(); Changed = true; } } // Due to the insertion of copies to split live ranges, the live intervals are // guaranteed to not overlap, except in one case: an original PHI source and a // PHI destination copy. In this case, they have the same value and thus don't // truly intersect, so we merge them into the value live at that point. // FIXME: Is there some better way we can handle this? for (DestCopyMap::iterator I = InsertedDestCopies.begin(), E = InsertedDestCopies.end(); I != E; ++I) { unsigned DestReg = I->first; unsigned DestColor = getRegColor(DestReg); unsigned NewReg = RegRenamingMap[DestColor]; LiveInterval &DestLI = LI->getInterval(DestReg); LiveInterval &NewLI = LI->getInterval(NewReg); assert(DestLI.ranges.size() == 1 && "PHI destination copy's live interval should be a single live " "range from the beginning of the BB to the copy instruction."); LiveRange *DestLR = DestLI.begin(); VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); if (!NewVNI) { NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); MachineInstr *CopyInstr = I->second; CopyInstr->getOperand(1).setIsKill(true); } LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); NewLI.addRange(NewLR); LI->removeInterval(DestReg); MRI->replaceRegWith(DestReg, NewReg); } // Adjust the live intervals of all PHI source registers to handle the case // where the PHIs in successor blocks were the only later uses of the source // register. for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), E = InsertedSrcCopySet.end(); I != E; ++I) { MachineBasicBlock *MBB = I->first; unsigned SrcReg = I->second; if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) SrcReg = RenamedRegister; LiveInterval &SrcLI = LI->getInterval(SrcReg); bool isLiveOut = false; for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { isLiveOut = true; break; } } if (isLiveOut) continue; MachineOperand *LastUse = findLastUse(MBB, SrcReg); assert(LastUse); SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB)); LastUse->setIsKill(true); } LI->renumber(); Allocator.Reset(); RegNodeMap.clear(); PHISrcDefs.clear(); InsertedSrcCopySet.clear(); InsertedSrcCopyMap.clear(); InsertedDestCopies.clear(); return Changed; }
void SILowerControlFlow::emitIf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); MachineOperand &SaveExec = MI.getOperand(0); MachineOperand &Cond = MI.getOperand(1); assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister && Cond.getSubReg() == AMDGPU::NoSubRegister); unsigned SaveExecReg = SaveExec.getReg(); MachineOperand &ImpDefSCC = MI.getOperand(4); assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef()); // Add an implicit def of exec to discourage scheduling VALU after this which // will interfere with trying to form s_and_saveexec_b64 later. MachineInstr *CopyExec = BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SaveExecReg) .addReg(AMDGPU::EXEC) .addReg(AMDGPU::EXEC, RegState::ImplicitDefine); unsigned Tmp = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); MachineInstr *And = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp) .addReg(SaveExecReg) //.addReg(AMDGPU::EXEC) .addReg(Cond.getReg()); setImpSCCDefDead(*And, true); MachineInstr *Xor = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg) .addReg(Tmp) .addReg(SaveExecReg); setImpSCCDefDead(*Xor, ImpDefSCC.isDead()); // Use a copy that is a terminator to get correct spill code placement it with // fast regalloc. MachineInstr *SetExec = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), AMDGPU::EXEC) .addReg(Tmp, RegState::Kill); // Insert a pseudo terminator to help keep the verifier happy. This will also // be used later when inserting skips. MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH)) .addOperand(MI.getOperand(2)); if (!LIS) { MI.eraseFromParent(); return; } LIS->InsertMachineInstrInMaps(*CopyExec); // Replace with and so we don't need to fix the live interval for condition // register. LIS->ReplaceMachineInstrInMaps(MI, *And); LIS->InsertMachineInstrInMaps(*Xor); LIS->InsertMachineInstrInMaps(*SetExec); LIS->InsertMachineInstrInMaps(*NewBr); LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI)); MI.eraseFromParent(); // FIXME: Is there a better way of adjusting the liveness? It shouldn't be // hard to add another def here but I'm not sure how to correctly update the // valno. LIS->removeInterval(SaveExecReg); LIS->createAndComputeVirtRegInterval(SaveExecReg); LIS->createAndComputeVirtRegInterval(Tmp); }
void HexagonExpandCondsets::removeInstr(MachineInstr &MI) { LIS->RemoveMachineInstrFromMaps(MI); MI.eraseFromParent(); }
bool LanaiInstrInfo::optimizeCompareInstr( MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int /*CmpMask*/, int CmpValue, const MachineRegisterInfo *MRI) const { // Get the unique definition of SrcReg. MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); if (!MI) return false; // Get ready to iterate backward from CmpInstr. MachineBasicBlock::iterator I = CmpInstr, E = MI, B = CmpInstr.getParent()->begin(); // Early exit if CmpInstr is at the beginning of the BB. if (I == B) return false; // There are two possible candidates which can be changed to set SR: // One is MI, the other is a SUB instruction. // * For SFSUB_F_RR(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). // * For SFSUB_F_RI(r1, CmpValue), we are looking for SUB(r1, CmpValue). MachineInstr *Sub = nullptr; if (SrcReg2 != 0) // MI is not a candidate to transform into a flag setting instruction. MI = nullptr; else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. Don't return if SFSUB_F_RI and CmpValue != 0 as Sub // may still be a candidate. if (CmpInstr.getOpcode() == Lanai::SFSUB_F_RI_LO) MI = nullptr; else return false; } // Check that SR isn't set between the comparison instruction and the // instruction we want to change while searching for Sub. const TargetRegisterInfo *TRI = &getRegisterInfo(); for (--I; I != E; --I) { const MachineInstr &Instr = *I; if (Instr.modifiesRegister(Lanai::SR, TRI) || Instr.readsRegister(Lanai::SR, TRI)) // This instruction modifies or uses SR after the one we want to change. // We can't do this transformation. return false; // Check whether CmpInstr can be made redundant by the current instruction. if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { Sub = &*I; break; } // Don't search outside the containing basic block. if (I == B) return false; } // Return false if no candidates exist. if (!MI && !Sub) return false; // The single candidate is called MI. if (!MI) MI = Sub; if (flagSettingOpcodeVariant(MI->getOpcode()) != Lanai::NOP) { bool isSafe = false; SmallVector<std::pair<MachineOperand *, LPCC::CondCode>, 4> OperandsToUpdate; I = CmpInstr; E = CmpInstr.getParent()->end(); while (!isSafe && ++I != E) { const MachineInstr &Instr = *I; for (unsigned IO = 0, EO = Instr.getNumOperands(); !isSafe && IO != EO; ++IO) { const MachineOperand &MO = Instr.getOperand(IO); if (MO.isRegMask() && MO.clobbersPhysReg(Lanai::SR)) { isSafe = true; break; } if (!MO.isReg() || MO.getReg() != Lanai::SR) continue; if (MO.isDef()) { isSafe = true; break; } // Condition code is after the operand before SR. LPCC::CondCode CC; CC = (LPCC::CondCode)Instr.getOperand(IO - 1).getImm(); if (Sub) { LPCC::CondCode NewCC = getOppositeCondition(CC); if (NewCC == LPCC::ICC_T) return false; // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on // CMP needs to be updated to be based on SUB. Push the condition // code operands to OperandsToUpdate. If it is safe to remove // CmpInstr, the condition code of these operands will be modified. if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && Sub->getOperand(2).getReg() == SrcReg) { OperandsToUpdate.push_back( std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); } } else { // No Sub, so this is x = <op> y, z; cmp x, 0. switch (CC) { case LPCC::ICC_EQ: // Z case LPCC::ICC_NE: // Z case LPCC::ICC_MI: // N case LPCC::ICC_PL: // N case LPCC::ICC_F: // none case LPCC::ICC_T: // none // SR can be used multiple times, we should continue. break; case LPCC::ICC_CS: // C case LPCC::ICC_CC: // C case LPCC::ICC_VS: // V case LPCC::ICC_VC: // V case LPCC::ICC_HI: // C Z case LPCC::ICC_LS: // C Z case LPCC::ICC_GE: // N V case LPCC::ICC_LT: // N V case LPCC::ICC_GT: // Z N V case LPCC::ICC_LE: // Z N V // The instruction uses the V bit or C bit which is not safe. return false; case LPCC::UNKNOWN: return false; } } } } // If SR is not killed nor re-defined, we should check whether it is // live-out. If it is live-out, do not optimize. if (!isSafe) { MachineBasicBlock *MBB = CmpInstr.getParent(); for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) if ((*SI)->isLiveIn(Lanai::SR)) return false; } // Toggle the optional operand to SR. MI->setDesc(get(flagSettingOpcodeVariant(MI->getOpcode()))); MI->addRegisterDefined(Lanai::SR); CmpInstr.eraseFromParent(); return true; } return false; }
/// foldMemoryOperand - Try folding stack slot references in Ops into their /// instructions. /// /// @param Ops Operand indices from analyzeVirtReg(). /// @param LoadMI Load instruction to use instead of stack slot when non-null. /// @return True on success. bool InlineSpiller:: foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, MachineInstr *LoadMI) { if (Ops.empty()) return false; // Don't attempt folding in bundles. MachineInstr *MI = Ops.front().first; if (Ops.back().first != MI || MI->isBundled()) return false; bool WasCopy = MI->isCopy(); unsigned ImpReg = 0; bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT || MI->getOpcode() == TargetOpcode::PATCHPOINT || MI->getOpcode() == TargetOpcode::STACKMAP); // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector<unsigned, 8> FoldOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { unsigned Idx = Ops[i].second; assert(MI == Ops[i].first && "Instruction conflict during operand folding"); MachineOperand &MO = MI->getOperand(Idx); if (MO.isImplicit()) { ImpReg = MO.getReg(); continue; } // FIXME: Teach targets to deal with subregs. if (!SpillSubRegs && MO.getSubReg()) return false; // We cannot fold a load instruction into a def. if (LoadMI && MO.isDef()) return false; // Tied use operands should not be passed to foldMemoryOperand. if (!MI->isRegTiedToDefOperand(Idx)) FoldOps.push_back(Idx); } MachineInstrSpan MIS(MI); MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) : TII.foldMemoryOperand(MI, FoldOps, StackSlot); if (!FoldMI) return false; // Remove LIS for any dead defs in the original MI not in FoldMI. for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { if (!MO->isReg()) continue; unsigned Reg = MO->getReg(); if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || MRI.isReserved(Reg)) { continue; } // Skip non-Defs, including undef uses and internal reads. if (MO->isUse()) continue; MIBundleOperands::PhysRegInfo RI = MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); if (RI.Defines) continue; // FoldMI does not define this physreg. Remove the LI segment. assert(MO->isDead() && "Cannot fold physreg def"); SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); LIS.removePhysRegDefAt(Reg, Idx); } LIS.ReplaceMachineInstrInMaps(MI, FoldMI); MI->eraseFromParent(); // Insert any new instructions other than FoldMI into the LIS maps. assert(!MIS.empty() && "Unexpected empty span of instructions!"); for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end(); MII != End; ++MII) if (&*MII != FoldMI) LIS.InsertMachineInstrInMaps(&*MII); // TII.foldMemoryOperand may have left some implicit operands on the // instruction. Strip them. if (ImpReg) for (unsigned i = FoldMI->getNumOperands(); i; --i) { MachineOperand &MO = FoldMI->getOperand(i - 1); if (!MO.isReg() || !MO.isImplicit()) break; if (MO.getReg() == ImpReg) FoldMI->RemoveOperand(i - 1); } DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, "folded")); if (!WasCopy) ++NumFolded; else if (Ops.front().second == 0) ++NumSpills; else ++NumReloads; return true; }
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" << "********** Function: " << MF.getName() << "\n"); #if 0 // for now disable this, if we move NewValueJump before register // allocation we need this information. LiveVariables &LVs = getAnalysis<LiveVariables>(); #endif QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); QRI = static_cast<const HexagonRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); if (!QRI->Subtarget.hasV4TOps() || DisableNewValueJumps) { return false; } int nvjCount = DbgNVJCount; int nvjGenerated = 0; // Loop through all the bb's of the function for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { MachineBasicBlock* MBB = MBBb; DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n"); DEBUG(MBB->dump()); DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n"); bool foundJump = false; bool foundCompare = false; bool invertPredicate = false; unsigned predReg = 0; // predicate reg of the jump. unsigned cmpReg1 = 0; int cmpOp2 = 0; bool MO1IsKill = false; bool MO2IsKill = false; MachineBasicBlock::iterator jmpPos; MachineBasicBlock::iterator cmpPos; MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr; MachineBasicBlock *jmpTarget = nullptr; bool afterRA = false; bool isSecondOpReg = false; bool isSecondOpNewified = false; // Traverse the basic block - bottom up for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin(); MII != E;) { MachineInstr *MI = --MII; if (MI->isDebugValue()) { continue; } if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated)) break; DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n"); if (!foundJump && (MI->getOpcode() == Hexagon::JMP_t || MI->getOpcode() == Hexagon::JMP_f || MI->getOpcode() == Hexagon::JMP_tnew_t || MI->getOpcode() == Hexagon::JMP_tnew_nt || MI->getOpcode() == Hexagon::JMP_fnew_t || MI->getOpcode() == Hexagon::JMP_fnew_nt)) { // This is where you would insert your compare and // instr that feeds compare jmpPos = MII; jmpInstr = MI; predReg = MI->getOperand(0).getReg(); afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); // If ifconverter had not messed up with the kill flags of the // operands, the following check on the kill flag would suffice. // if(!jmpInstr->getOperand(0).isKill()) break; // This predicate register is live out out of BB // this would only work if we can actually use Live // variable analysis on phy regs - but LLVM does not // provide LV analysis on phys regs. //if(LVs.isLiveOut(predReg, *MBB)) break; // Get all the successors of this block - which will always // be 2. Check if the predicate register is live in in those // successor. If yes, we can not delete the predicate - // I am doing this only because LLVM does not provide LiveOut // at the BB level. bool predLive = false; for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), SIE = MBB->succ_end(); SI != SIE; ++SI) { MachineBasicBlock* succMBB = *SI; if (succMBB->isLiveIn(predReg)) { predLive = true; } } if (predLive) break; jmpTarget = MI->getOperand(1).getMBB(); foundJump = true; if (MI->getOpcode() == Hexagon::JMP_f || MI->getOpcode() == Hexagon::JMP_fnew_t || MI->getOpcode() == Hexagon::JMP_fnew_nt) { invertPredicate = true; } continue; } // No new value jump if there is a barrier. A barrier has to be in its // own packet. A barrier has zero operands. We conservatively bail out // here if we see any instruction with zero operands. if (foundJump && MI->getNumOperands() == 0) break; if (foundJump && !foundCompare && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == predReg) { // Not all compares can be new value compare. Arch Spec: 7.6.1.1 if (QII->isNewValueJumpCandidate(MI)) { assert((MI->getDesc().isCompare()) && "Only compare instruction can be collapsed into New Value Jump"); isSecondOpReg = MI->getOperand(2).isReg(); if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg, afterRA, jmpPos, MF)) break; cmpInstr = MI; cmpPos = MII; foundCompare = true; // We need cmpReg1 and cmpOp2(imm or reg) while building // new value jump instruction. cmpReg1 = MI->getOperand(1).getReg(); if (MI->getOperand(1).isKill()) MO1IsKill = true; if (isSecondOpReg) { cmpOp2 = MI->getOperand(2).getReg(); if (MI->getOperand(2).isKill()) MO2IsKill = true; } else cmpOp2 = MI->getOperand(2).getImm(); continue; } } if (foundCompare && foundJump) { // If "common" checks fail, bail out on this BB. if (!commonChecksToProhibitNewValueJump(afterRA, MII)) break; bool foundFeeder = false; MachineBasicBlock::iterator feederPos = MII; if (MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && (MI->getOperand(0).getReg() == cmpReg1 || (isSecondOpReg && MI->getOperand(0).getReg() == (unsigned) cmpOp2))) { unsigned feederReg = MI->getOperand(0).getReg(); // First try to see if we can get the feeder from the first operand // of the compare. If we can not, and if secondOpReg is true // (second operand of the compare is also register), try that one. // TODO: Try to come up with some heuristic to figure out which // feeder would benefit. if (feederReg == cmpReg1) { if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) { if (!isSecondOpReg) break; else continue; } else foundFeeder = true; } if (!foundFeeder && isSecondOpReg && feederReg == (unsigned) cmpOp2) if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) break; if (isSecondOpReg) { // In case of CMPLT, or CMPLTU, or EQ with the second register // to newify, swap the operands. if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq && feederReg == (unsigned) cmpOp2) { unsigned tmp = cmpReg1; bool tmpIsKill = MO1IsKill; cmpReg1 = cmpOp2; MO1IsKill = MO2IsKill; cmpOp2 = tmp; MO2IsKill = tmpIsKill; } // Now we have swapped the operands, all we need to check is, // if the second operand (after swap) is the feeder. // And if it is, make a note. if (feederReg == (unsigned)cmpOp2) isSecondOpNewified = true; } // Now that we are moving feeder close the jump, // make sure we are respecting the kill values of // the operands of the feeder. bool updatedIsKill = false; for (unsigned i = 0; i < MI->getNumOperands(); i++) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse()) { unsigned feederReg = MO.getReg(); for (MachineBasicBlock::iterator localII = feederPos, end = jmpPos; localII != end; localII++) { MachineInstr *localMI = localII; for (unsigned j = 0; j < localMI->getNumOperands(); j++) { MachineOperand &localMO = localMI->getOperand(j); if (localMO.isReg() && localMO.isUse() && localMO.isKill() && feederReg == localMO.getReg()) { // We found that there is kill of a use register // Set up a kill flag on the register localMO.setIsKill(false); MO.setIsKill(); updatedIsKill = true; break; } } if (updatedIsKill) break; } } if (updatedIsKill) break; } MBB->splice(jmpPos, MI->getParent(), MI); MBB->splice(jmpPos, MI->getParent(), cmpInstr); DebugLoc dl = MI->getDebugLoc(); MachineInstr *NewMI; assert((QII->isNewValueJumpCandidate(cmpInstr)) && "This compare is not a New Value Jump candidate."); unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, isSecondOpNewified, jmpTarget, MBPI); if (invertPredicate) opc = QII->getInvertedPredicatedOpcode(opc); if (isSecondOpReg) NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addReg(cmpOp2, getKillRegState(MO2IsKill)) .addMBB(jmpTarget); else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi || cmpInstr->getOpcode() == Hexagon::C2_cmpgti) && cmpOp2 == -1 ) // Corresponding new-value compare jump instructions don't have the // operand for -1 immediate value. NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addMBB(jmpTarget); else NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addImm(cmpOp2) .addMBB(jmpTarget); assert(NewMI && "New Value Jump Instruction Not created!"); (void)NewMI; if (cmpInstr->getOperand(0).isReg() && cmpInstr->getOperand(0).isKill()) cmpInstr->getOperand(0).setIsKill(false); if (cmpInstr->getOperand(1).isReg() && cmpInstr->getOperand(1).isKill()) cmpInstr->getOperand(1).setIsKill(false); cmpInstr->eraseFromParent(); jmpInstr->eraseFromParent(); ++nvjGenerated; ++NumNVJGenerated; break; } } } } return true; }
/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { // Do a quick scan of the interval values to find if any are remattable. reMattable_.clear(); usedValues_.clear(); for (LiveInterval::const_vni_iterator I = li_->vni_begin(), E = li_->vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused() || !VNI->isDefAccurate()) continue; MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI)) continue; reMattable_.insert(VNI); } // Often, no defs are remattable. if (reMattable_.empty()) return; // Try to remat before all uses of li_->reg. bool anyRemat = false; for (MachineRegisterInfo::use_nodbg_iterator RI = mri_.use_nodbg_begin(li_->reg); MachineInstr *MI = RI.skipInstruction();) anyRemat |= reMaterializeFor(MI); if (!anyRemat) return; // Remove any values that were completely rematted. bool anyRemoved = false; for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(), E = reMattable_.end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->hasPHIKill() || usedValues_.count(VNI)) continue; MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI); lis_.RemoveMachineInstrFromMaps(DefMI); vrm_.RemoveMachineInstrFromMaps(DefMI); DefMI->eraseFromParent(); VNI->setIsDefAccurate(false); anyRemoved = true; } if (!anyRemoved) return; // Removing values may cause debug uses where li_ is not live. for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg); MachineInstr *MI = RI.skipInstruction();) { if (!MI->isDebugValue()) continue; // Try to preserve the debug value if li_ is live immediately after it. MachineBasicBlock::iterator NextMI = MI; ++NextMI; if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI)); if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI))) continue; } DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI); MI->eraseFromParent(); } }
bool rvexInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); // Skip all the debug instructions. while (I != REnd && I->isDebugValue()) ++I; if (I == REnd || !isUnpredicatedTerminator(&*I)) { // If this block ends with no branches (it just falls through to its succ) // just return false, leaving TBB/FBB null. TBB = FBB = NULL; return false; } MachineInstr *LastInst = &*I; unsigned LastOpc = LastInst->getOpcode(); // Not an analyzable branch (must be an indirect jump). if (!GetAnalyzableBrOpc(LastOpc)) { return true; } // Get the second to last instruction in the block. unsigned SecondLastOpc = 0; MachineInstr *SecondLastInst = NULL; if (++I != REnd) { SecondLastInst = &*I; SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode()); // Not an analyzable branch (must be an indirect jump). if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc) { return true; } } // If there is only one terminator instruction, process it. if (!SecondLastOpc) { // Unconditional branch if (LastOpc == rvex::JMP) { TBB = LastInst->getOperand(0).getMBB(); // If the basic block is next, remove the GOTO inst if(MBB.isLayoutSuccessor(TBB)) { LastInst->eraseFromParent(); } return false; } // Conditional branch AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); return false; } // If we reached here, there are two branches. // If there are three terminators, we don't know what sort of block this is. if (++I != REnd && isUnpredicatedTerminator(&*I)) { return true; } // If second to last instruction is an unconditional branch, // analyze it and remove the last instruction. if (SecondLastOpc == rvex::JMP) { // Return if the last instruction cannot be removed. if (!AllowModify) { return true; } TBB = SecondLastInst->getOperand(0).getMBB(); LastInst->eraseFromParent(); return false; } // Conditional branch followed by an unconditional branch. // The last one must be unconditional. if (LastOpc != rvex::JMP) { return true; } AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); FBB = LastInst->getOperand(0).getMBB(); return false; }
DeadMemOpElimination::instr_iterator DeadMemOpElimination::handleMemOp(instr_iterator I, DefMapTy &Defs, AliasSetTracker &AST) { MachineInstr *MI = I; MachineMemOperand *MO = *MI->memoperands_begin(); // AliasAnalysis cannot handle offset right now, so we pretend to write a // a big enough size to the location pointed by the base pointer. uint64_t Size = MO->getSize() + MO->getOffset(); AliasSet *ASet = &AST.getAliasSetForPointer(const_cast<Value*>(MO->getValue()), Size, 0); MachineInstr *&LastMI = Defs[ASet]; bool canHandleLastStore = LastMI && ASet->isMustAlias() && LastMI->getOpcode() != VTM::VOpInternalCall // FIXME: We may need to remember the last // definition for all predicates. && isPredIdentical(LastMI, MI); if (canHandleLastStore) { MachineMemOperand *LastMO = *LastMI->memoperands_begin(); // We can only handle last store if and only if their memory operand have // the must-alias address and the same size. canHandleLastStore = LastMO->getSize() == MO->getSize() && !LastMO->isVolatile() && MachineMemOperandAlias(MO, LastMO, AA, SE) == AliasAnalysis::MustAlias; } // FIXME: These elimination is only valid if we are in single-thread mode! if (VInstrInfo::mayStore(MI)) { if (canHandleLastStore) { // Dead store find, remove it. LastMI->eraseFromParent(); ++DeadStoreEliminated; } // Update the definition. LastMI = MI; return I; } // Now MI is a load. if (!canHandleLastStore) return I; // Loading the value that just be stored, the load is not necessary. MachineOperand LoadedMO = MI->getOperand(0); MachineOperand StoredMO = LastMI->getOperand(2); // Simply replace the load by a copy. DebugLoc dl = MI->getDebugLoc(); I = *BuildMI(*MI->getParent(), I, dl, VInstrInfo::getDesc(VTM::VOpMove)) .addOperand(LoadedMO).addOperand(StoredMO). addOperand(*VInstrInfo::getPredOperand(MI)). addOperand(*VInstrInfo::getTraceOperand(MI)); MI->eraseFromParent(); ++DeadLoadEliminated; return I; }
bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, const CallContext &Context) { // Ok, we can in fact do the transformation for this call. // Do not remove the FrameSetup instruction, but adjust the parameters. // PEI will end up finalizing the handling of this. MachineBasicBlock::iterator FrameSetup = Context.FrameSetup; MachineBasicBlock &MBB = *(FrameSetup->getParent()); FrameSetup->getOperand(1).setImm(Context.ExpectedDist); DebugLoc DL = FrameSetup->getDebugLoc(); // Now, iterate through the vector in reverse order, and replace the movs // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to // replace uses. for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator MOV = *Context.MovVector[Idx]; MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); MachineBasicBlock::iterator Push = nullptr; if (MOV->getOpcode() == X86::MOV32mi) { unsigned PushOpcode = X86::PUSHi32; // If the operand is a small (8-bit) immediate, we can use a // PUSH instruction with a shorter encoding. // Note that isImm() may fail even though this is a MOVmi, because // the operand can also be a symbol. if (PushOp.isImm()) { int64_t Val = PushOp.getImm(); if (isInt<8>(Val)) PushOpcode = X86::PUSH32i8; } Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)) .addOperand(PushOp); } else { unsigned int Reg = PushOp.getReg(); // If PUSHrmm is not slow on this target, try to fold the source of the // push into the instruction. bool SlowPUSHrmm = STI->isAtom() || STI->isSLM(); // Check that this is legal to fold. Right now, we're extremely // conservative about that. MachineInstr *DefMov = nullptr; if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm)); unsigned NumOps = DefMov->getDesc().getNumOperands(); for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) Push->addOperand(DefMov->getOperand(i)); DefMov->eraseFromParent(); } else { Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r)) .addReg(Reg) .getInstr(); } } // For debugging, when using SP-based CFA, we need to adjust the CFA // offset after each push. // TODO: This is needed only if we require precise CFA. if (!TFL->hasFP(MF)) TFL->BuildCFI(MBB, std::next(Push), DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, 4)); MBB.erase(MOV); } // The stack-pointer copy is no longer used in the call sequences. // There should not be any other users, but we can't commit to that, so: if (MRI->use_empty(Context.SPCopy->getOperand(0).getReg())) Context.SPCopy->eraseFromParent(); // Once we've done this, we need to make sure PEI doesn't assume a reserved // frame. X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); FuncInfo->setHasPushSequences(true); return true; }
bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; SmallVector<unsigned, 2> ImplicitDefsToUpdate; SmallVector<unsigned, 2> ImplicitDefs; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; if (!isCSECandidate(MI)) continue; bool FoundCSE = VNT.count(MI); if (!FoundCSE) { // Using trivial copy propagation to find more CSE opportunities. if (PerformTrivialCopyPropagation(MI, MBB)) { Changed = true; // After coalescing MI itself may become a copy. if (MI->isCopyLike()) continue; // Try again to see if CSE is possible. FoundCSE = VNT.count(MI); } } // Commute commutable instructions. bool Commuted = false; if (!FoundCSE && MI->isCommutable()) { MachineInstr *NewMI = TII->commuteInstruction(MI); if (NewMI) { Commuted = true; FoundCSE = VNT.count(NewMI); if (NewMI != MI) { // New instruction. It doesn't need to be kept. NewMI->eraseFromParent(); Changed = true; } else if (!FoundCSE) // MI was changed but it didn't help, commute it back! (void)TII->commuteInstruction(MI); } } // If the instruction defines physical registers and the values *may* be // used, then it's not safe to replace it with a common subexpression. // It's also not safe if the instruction uses physical registers. bool CrossMBBPhysDef = false; SmallSet<unsigned, 8> PhysRefs; SmallVector<unsigned, 2> PhysDefs; bool PhysUseDef = false; if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) { FoundCSE = false; // ... Unless the CS is local or is in the sole predecessor block // and it also defines the physical register which is not clobbered // in between and the physical register uses were not clobbered. // This can never be the case if the instruction both uses and // defines the same physical register, which was detected above. if (!PhysUseDef) { unsigned CSVN = VNT.lookup(MI); MachineInstr *CSMI = Exps[CSVN]; if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) FoundCSE = true; } } if (!FoundCSE) { VNT.insert(MI, CurrVN++); Exps.push_back(MI); continue; } // Found a common subexpression, eliminate it. unsigned CSVN = VNT.lookup(MI); MachineInstr *CSMI = Exps[CSVN]; DEBUG(dbgs() << "Examining: " << *MI); DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); // Check if it's profitable to perform this CSE. bool DoCSE = true; unsigned NumDefs = MI->getDesc().getNumDefs() + MI->getDesc().getNumImplicitDefs(); for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned OldReg = MO.getReg(); unsigned NewReg = CSMI->getOperand(i).getReg(); // Go through implicit defs of CSMI and MI, if a def is not dead at MI, // we should make sure it is not dead at CSMI. if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) ImplicitDefsToUpdate.push_back(i); // Keep track of implicit defs of CSMI and MI, to clear possibly // made-redundant kill flags. if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) ImplicitDefs.push_back(OldReg); if (OldReg == NewReg) { --NumDefs; continue; } assert(TargetRegisterInfo::isVirtualRegister(OldReg) && TargetRegisterInfo::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); DoCSE = false; break; } // Don't perform CSE if the result of the old instruction cannot exist // within the register class of the new instruction. const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); if (!MRI->constrainRegClass(NewReg, OldRC)) { DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); DoCSE = false; break; } CSEPairs.push_back(std::make_pair(OldReg, NewReg)); --NumDefs; } // Actually perform the elimination. if (DoCSE) { for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); MRI->clearKillFlags(CSEPairs[i].second); } // Go through implicit defs of CSMI and MI, if a def is not dead at MI, // we should make sure it is not dead at CSMI. for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); // Go through implicit defs of CSMI and MI, and clear the kill flags on // their uses in all the instructions between CSMI and MI. // We might have made some of the kill flags redundant, consider: // subs ... %NZCV<imp-def> <- CSMI // csinc ... %NZCV<imp-use,kill> <- this kill flag isn't valid anymore // subs ... %NZCV<imp-def> <- MI, to be eliminated // csinc ... %NZCV<imp-use,kill> // Since we eliminated MI, and reused a register imp-def'd by CSMI // (here %NZCV), that register, if it was killed before MI, should have // that kill flag removed, because it's lifetime was extended. if (CSMI->getParent() == MI->getParent()) { for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) for (auto ImplicitDef : ImplicitDefs) if (MachineOperand *MO = II->findRegisterUseOperand( ImplicitDef, /*isKill=*/true, TRI)) MO->setIsKill(false); } else { // If the instructions aren't in the same BB, bail out and clear the // kill flag on all uses of the imp-def'd register. for (auto ImplicitDef : ImplicitDefs) MRI->clearKillFlags(ImplicitDef); } if (CrossMBBPhysDef) { // Add physical register defs now coming in from a predecessor to MBB // livein list. while (!PhysDefs.empty()) { unsigned LiveIn = PhysDefs.pop_back_val(); if (!MBB->isLiveIn(LiveIn)) MBB->addLiveIn(LiveIn); } ++NumCrossBBCSEs; } MI->eraseFromParent(); ++NumCSEs; if (!PhysRefs.empty()) ++NumPhysCSEs; if (Commuted) ++NumCommutes; Changed = true; } else { VNT.insert(MI, CurrVN++); Exps.push_back(MI); } CSEPairs.clear(); ImplicitDefsToUpdate.clear(); ImplicitDefs.clear(); } return Changed; }
// transformInstruction - Perform the transformation of an instruction // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs // to be the correct register class, minimizing cross-class copies. void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) { DEBUG(dbgs() << "Scalar transform: " << MI); MachineBasicBlock *MBB = MI.getParent(); unsigned OldOpc = MI.getOpcode(); unsigned NewOpc = getTransformOpcode(OldOpc); assert(OldOpc != NewOpc && "transform an instruction to itself?!"); // Check if we need a copy for the source registers. unsigned OrigSrc0 = MI.getOperand(1).getReg(); unsigned OrigSrc1 = MI.getOperand(2).getReg(); unsigned Src0 = 0, SubReg0; unsigned Src1 = 0, SubReg1; bool KillSrc0 = false, KillSrc1 = false; if (!MRI->def_empty(OrigSrc0)) { MachineRegisterInfo::def_instr_iterator Def = MRI->def_instr_begin(OrigSrc0); assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); MachineOperand *MOSrc0 = getSrcFromCopy(&*Def, MRI, SubReg0); // If there are no other users of the original source, we can delete // that instruction. if (MOSrc0) { Src0 = MOSrc0->getReg(); KillSrc0 = MOSrc0->isKill(); // Src0 is going to be reused, thus, it cannot be killed anymore. MOSrc0->setIsKill(false); if (MRI->hasOneNonDBGUse(OrigSrc0)) { assert(MOSrc0 && "Can't delete copy w/o a valid original source!"); Def->eraseFromParent(); ++NumCopiesDeleted; } } } if (!MRI->def_empty(OrigSrc1)) { MachineRegisterInfo::def_instr_iterator Def = MRI->def_instr_begin(OrigSrc1); assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); MachineOperand *MOSrc1 = getSrcFromCopy(&*Def, MRI, SubReg1); // If there are no other users of the original source, we can delete // that instruction. if (MOSrc1) { Src1 = MOSrc1->getReg(); KillSrc1 = MOSrc1->isKill(); // Src0 is going to be reused, thus, it cannot be killed anymore. MOSrc1->setIsKill(false); if (MRI->hasOneNonDBGUse(OrigSrc1)) { assert(MOSrc1 && "Can't delete copy w/o a valid original source!"); Def->eraseFromParent(); ++NumCopiesDeleted; } } } // If we weren't able to reference the original source directly, create a // copy. if (!Src0) { SubReg0 = 0; Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); insertCopy(TII, MI, Src0, OrigSrc0, KillSrc0); KillSrc0 = true; } if (!Src1) { SubReg1 = 0; Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); insertCopy(TII, MI, Src1, OrigSrc1, KillSrc1); KillSrc1 = true; } // Create a vreg for the destination. // FIXME: No need to do this if the ultimate user expects an FPR64. // Check for that and avoid the copy if possible. unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass); // For now, all of the new instructions have the same simple three-register // form, so no need to special case based on what instruction we're // building. BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), Dst) .addReg(Src0, getKillRegState(KillSrc0), SubReg0) .addReg(Src1, getKillRegState(KillSrc1), SubReg1); // Now copy the result back out to a GPR. // FIXME: Try to avoid this if all uses could actually just use the FPR64 // directly. insertCopy(TII, MI, MI.getOperand(0).getReg(), Dst, true); // Erase the old instruction. MI.eraseFromParent(); ++NumScalarInsnsUsed; }
/// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); LV = getAnalysisIfAvailable<LiveVariables>(); AA = &getAnalysis<AliasAnalysis>(); bool MadeChange = false; DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(errs() << "********** Function: " << MF.getFunction()->getName() << '\n'); // ReMatRegs - Keep track of the registers whose def's are remat'ed. BitVector ReMatRegs; ReMatRegs.resize(MRI->getLastVirtReg()+1); typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> > TiedOperandMap; TiedOperandMap TiedOperands(4); SmallPtrSet<MachineInstr*, 8> Processed; for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); mbbi != mbbe; ++mbbi) { unsigned Dist = 0; DistanceMap.clear(); SrcRegMap.clear(); DstRegMap.clear(); Processed.clear(); for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me; ) { MachineBasicBlock::iterator nmi = next(mi); const TargetInstrDesc &TID = mi->getDesc(); bool FirstTied = true; DistanceMap.insert(std::make_pair(mi, ++Dist)); ProcessCopy(&*mi, &*mbbi, Processed); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM) ? mi->getNumOperands() : TID.getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) continue; if (FirstTied) { FirstTied = false; ++NumTwoAddressInstrs; DEBUG(errs() << '\t' << *mi); } assert(mi->getOperand(SrcIdx).isReg() && mi->getOperand(SrcIdx).getReg() && mi->getOperand(SrcIdx).isUse() && "two address instruction invalid"); unsigned regB = mi->getOperand(SrcIdx).getReg(); TiedOperandMap::iterator OI = TiedOperands.find(regB); if (OI == TiedOperands.end()) { SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair; OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first; } OI->second.push_back(std::make_pair(SrcIdx, DstIdx)); } // Now iterate over the information collected above. for (TiedOperandMap::iterator OI = TiedOperands.begin(), OE = TiedOperands.end(); OI != OE; ++OI) { SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second; // If the instruction has a single pair of tied operands, try some // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. if (TiedOperands.size() == 1 && TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; unsigned DstIdx = TiedPairs[0].second; // If the registers are already equal, nothing needs to be done. if (mi->getOperand(SrcIdx).getReg() == mi->getOperand(DstIdx).getReg()) break; // Done with this instruction. if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist)) break; // The tied operands have been eliminated. } bool RemovedKillFlag = false; bool AllUsesCopied = true; unsigned LastCopiedReg = 0; unsigned regB = OI->first; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; unsigned DstIdx = TiedPairs[tpi].second; unsigned regA = mi->getOperand(DstIdx).getReg(); // Grab regB from the instruction because it may have changed if the // instruction was commuted. regB = mi->getOperand(SrcIdx).getReg(); if (regA == regB) { // The register is tied to multiple destinations (or else we would // not have continued this far), but this use of the register // already matches the tied destination. Leave it. AllUsesCopied = false; continue; } LastCopiedReg = regA; assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); #ifndef NDEBUG // First, verify that we don't have a use of "a" in the instruction // (a = b + a for example) because our transformation will not // work. This should never occur because we are in SSA form. for (unsigned i = 0; i != mi->getNumOperands(); ++i) assert(i == DstIdx || !mi->getOperand(i).isReg() || mi->getOperand(i).getReg() != regA); #endif // Emit a copy or rematerialize the definition. const TargetRegisterClass *rc = MRI->getRegClass(regB); MachineInstr *DefMI = MRI->getVRegDef(regB); // If it's safe and profitable, remat the definition instead of // copying it. if (DefMI && DefMI->getDesc().isAsCheapAsAMove() && DefMI->isSafeToReMat(TII, regB, AA) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); ReMatRegs.set(regB); ++NumReMats; } else { bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc); (void)Emitted; assert(Emitted && "Unable to issue a copy instruction!\n"); } MachineBasicBlock::iterator prevMI = prior(mi); // Update DistanceMap. DistanceMap.insert(std::make_pair(prevMI, Dist)); DistanceMap[mi] = ++Dist; DEBUG(errs() << "\t\tprepend:\t" << *prevMI); MachineOperand &MO = mi->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && "inconsistent operand info for 2-reg pass"); if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(regA); } if (AllUsesCopied) { // Replace other (un-tied) uses of regB with LastCopiedReg. for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { MachineOperand &MO = mi->getOperand(i); if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(LastCopiedReg); } } // Update live variables for regB. if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) LV->addVirtualRegisterKilled(regB, prior(mi)); } else if (RemovedKillFlag) { // Some tied uses of regB matched their destination registers, so // regB is still used in this instruction, but a kill flag was // removed from a different tied use of regB, so now we need to add // a kill flag to one of the remaining uses of regB. for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { MachineOperand &MO = mi->getOperand(i); if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { MO.setIsKill(true); break; } } } MadeChange = true; DEBUG(errs() << "\t\trewrite to:\t" << *mi); } // Clear TiedOperands here instead of at the top of the loop // since most instructions do not have tied operands. TiedOperands.clear(); mi = nmi; } } // Some remat'ed instructions are dead. int VReg = ReMatRegs.find_first(); while (VReg != -1) { if (MRI->use_empty(VReg)) { MachineInstr *DefMI = MRI->getVRegDef(VReg); DefMI->eraseFromParent(); } VReg = ReMatRegs.find_next(VReg); } return MadeChange; }
/// foldMemoryOperand - Try folding stack slot references in Ops into their /// instructions. /// /// @param Ops Operand indices from analyzeVirtReg(). /// @param LoadMI Load instruction to use instead of stack slot when non-null. /// @return True on success. bool InlineSpiller:: foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, MachineInstr *LoadMI) { if (Ops.empty()) return false; // Don't attempt folding in bundles. MachineInstr *MI = Ops.front().first; if (Ops.back().first != MI || MI->isBundled()) return false; bool WasCopy = MI->isCopy(); unsigned ImpReg = 0; // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector<unsigned, 8> FoldOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { unsigned Idx = Ops[i].second; MachineOperand &MO = MI->getOperand(Idx); if (MO.isImplicit()) { ImpReg = MO.getReg(); continue; } // FIXME: Teach targets to deal with subregs. if (MO.getSubReg()) return false; // We cannot fold a load instruction into a def. if (LoadMI && MO.isDef()) return false; // Tied use operands should not be passed to foldMemoryOperand. if (!MI->isRegTiedToDefOperand(Idx)) FoldOps.push_back(Idx); } MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) : TII.foldMemoryOperand(MI, FoldOps, StackSlot); if (!FoldMI) return false; // Remove LIS for any dead defs in the original MI not in FoldMI. for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { if (!MO->isReg()) continue; unsigned Reg = MO->getReg(); if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || MRI.isReserved(Reg)) { continue; } MIBundleOperands::PhysRegInfo RI = MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); if (MO->readsReg()) { assert(RI.Reads && "Cannot fold physreg reader"); continue; } if (RI.Defines) continue; // FoldMI does not define this physreg. Remove the LI segment. assert(MO->isDead() && "Cannot fold physreg def"); for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) { SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); if (VNInfo *VNI = LI->getVNInfoAt(Idx)) LI->removeValNo(VNI); } } } LIS.ReplaceMachineInstrInMaps(MI, FoldMI); MI->eraseFromParent(); // TII.foldMemoryOperand may have left some implicit operands on the // instruction. Strip them. if (ImpReg) for (unsigned i = FoldMI->getNumOperands(); i; --i) { MachineOperand &MO = FoldMI->getOperand(i - 1); if (!MO.isReg() || !MO.isImplicit()) break; if (MO.getReg() == ImpReg) FoldMI->RemoveOperand(i - 1); } DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t' << *FoldMI); if (!WasCopy) ++NumFolded; else if (Ops.front().second == 0) ++NumSpills; else ++NumReloads; return true; }
void SILowerControlFlow::Branch(MachineInstr &MI) { if (MI.getOperand(0).getMBB() == MI.getParent()->getNextNode()) MI.eraseFromParent(); // If these aren't equal, this is probably an infinite loop. }
void SILowerControlFlow::emitElse(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); unsigned DstReg = MI.getOperand(0).getReg(); assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister); bool ExecModified = MI.getOperand(3).getImm() != 0; MachineBasicBlock::iterator Start = MBB.begin(); // We are running before TwoAddressInstructions, and si_else's operands are // tied. In order to correctly tie the registers, split this into a copy of // the src like it does. BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), DstReg) .addOperand(MI.getOperand(1)); // Saved EXEC // This must be inserted before phis and any spill code inserted before the // else. MachineInstr *OrSaveExec = BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), DstReg) .addReg(DstReg); MachineBasicBlock *DestBB = MI.getOperand(2).getMBB(); MachineBasicBlock::iterator ElsePt(MI); if (ExecModified) { MachineInstr *And = BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_AND_B64), DstReg) .addReg(AMDGPU::EXEC) .addReg(DstReg); if (LIS) LIS->InsertMachineInstrInMaps(*And); } MachineInstr *Xor = BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC) .addReg(AMDGPU::EXEC) .addReg(DstReg); MachineInstr *Branch = BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH)) .addMBB(DestBB); if (!LIS) { MI.eraseFromParent(); return; } LIS->RemoveMachineInstrFromMaps(MI); MI.eraseFromParent(); LIS->InsertMachineInstrInMaps(*OrSaveExec); LIS->InsertMachineInstrInMaps(*Xor); LIS->InsertMachineInstrInMaps(*Branch); // src reg is tied to dst reg. LIS->removeInterval(DstReg); LIS->createAndComputeVirtRegInterval(DstReg); // Let this be recomputed. LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI)); }
void SILowerControlFlow::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock::iterator I = MI; unsigned Save = MI.getOperand(1).getReg(); unsigned Idx = MI.getOperand(3).getReg(); if (AMDGPU::SReg_32RegClass.contains(Idx)) { if (Offset) { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) .addReg(Idx) .addImm(Offset); } else { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(Idx); } MBB.insert(I, MovRel); } else { assert(AMDGPU::SReg_64RegClass.contains(Save)); assert(AMDGPU::VGPR_32RegClass.contains(Idx)); // Save the EXEC mask BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) .addReg(AMDGPU::EXEC); // Read the next variant into VCC (lower 32 bits) <- also loop target BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), AMDGPU::VCC_LO) .addReg(Idx); // Move index from VCC into M0 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(AMDGPU::VCC_LO); // Compare the just read M0 value to all possible Idx values BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32)) .addReg(AMDGPU::M0) .addReg(Idx); // Update EXEC, save the original EXEC value to VCC BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) .addReg(AMDGPU::VCC); if (Offset) { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) .addReg(AMDGPU::M0) .addImm(Offset); } // Do the actual move MBB.insert(I, MovRel); // Update EXEC, switch all done bits to 0 and all todo bits to 1 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) .addReg(AMDGPU::EXEC) .addReg(AMDGPU::VCC); // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) .addImm(-7); // Restore EXEC BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) .addReg(Save); } MI.eraseFromParent(); }
// Erase the nodes given in the Nodes set from DFG. In addition to removing // them from the DFG, if a node corresponds to a statement, the corresponding // machine instruction is erased from the function. bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) { if (Nodes.empty()) return false; // Prepare the actual set of ref nodes to remove: ref nodes from Nodes // are included directly, for each InstrNode in Nodes, include the set // of all RefNodes from it. NodeList DRNs, DINs; for (auto I : Nodes) { auto BA = DFG.addr<NodeBase*>(I); uint16_t Type = BA.Addr->getType(); if (Type == NodeAttrs::Ref) { DRNs.push_back(DFG.addr<RefNode*>(I)); continue; } // If it's a code node, add all ref nodes from it. uint16_t Kind = BA.Addr->getKind(); if (Kind == NodeAttrs::Stmt || Kind == NodeAttrs::Phi) { for (auto N : NodeAddr<CodeNode*>(BA).Addr->members(DFG)) DRNs.push_back(N); DINs.push_back(DFG.addr<InstrNode*>(I)); } else { llvm_unreachable("Unexpected code node"); return false; } } // Sort the list so that use nodes are removed first. This makes the // "unlink" functions a bit faster. auto UsesFirst = [] (NodeAddr<RefNode*> A, NodeAddr<RefNode*> B) -> bool { uint16_t KindA = A.Addr->getKind(), KindB = B.Addr->getKind(); if (KindA == NodeAttrs::Use && KindB == NodeAttrs::Def) return true; if (KindA == NodeAttrs::Def && KindB == NodeAttrs::Use) return false; return A.Id < B.Id; }; std::sort(DRNs.begin(), DRNs.end(), UsesFirst); if (trace()) dbgs() << "Removing dead ref nodes:\n"; for (NodeAddr<RefNode*> RA : DRNs) { if (trace()) dbgs() << " " << PrintNode<RefNode*>(RA, DFG) << '\n'; if (DFG.IsUse(RA)) DFG.unlinkUse(RA); else if (DFG.IsDef(RA)) DFG.unlinkDef(RA); } // Now, remove all dead instruction nodes. for (NodeAddr<InstrNode*> IA : DINs) { NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG); BA.Addr->removeMember(IA, DFG); if (!DFG.IsCode<NodeAttrs::Stmt>(IA)) continue; MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); if (trace()) dbgs() << "erasing: " << *MI; MI->eraseFromParent(); } return true; }
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map DenseMap<unsigned, unsigned> SrcMap; // Src -> Def map bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; ++I; if (MI->isCopy()) { unsigned Def = MI->getOperand(0).getReg(); unsigned Src = MI->getOperand(1).getReg(); if (TargetRegisterInfo::isVirtualRegister(Def) || TargetRegisterInfo::isVirtualRegister(Src)) report_fatal_error("MachineCopyPropagation should be run after" " register allocation!"); DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); if (CI != AvailCopyMap.end()) { MachineInstr *CopyMI = CI->second; unsigned SrcSrc = CopyMI->getOperand(1).getReg(); if (!ReservedRegs.test(Def) && (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) && (SrcSrc == Def || TRI->isSubRegister(SrcSrc, Def))) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. // %ECX<def> = COPY %EAX<kill> // ... nothing clobbered EAX. // %EAX<def> = COPY %ECX // => // %ECX<def> = COPY %EAX // // Also avoid eliminating a copy from reserved registers unless the // definition is proven not clobbered. e.g. // %RSP<def> = COPY %RAX // CALL // %RAX<def> = COPY %RSP CopyMI->getOperand(1).setIsKill(false); MI->eraseFromParent(); Changed = true; ++NumDeletes; continue; } } // If Src is defined by a previous copy, it cannot be eliminated. CI = CopyMap.find(Src); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); for (const unsigned *AS = TRI->getAliasSet(Src); *AS; ++AS) { CI = CopyMap.find(*AS); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } // Copy is now a candidate for deletion. MaybeDeadCopies.insert(MI); // If 'Src' is previously source of another copy, then this earlier copy's // source is no longer available. e.g. // %xmm9<def> = copy %xmm2 // ... // %xmm2<def> = copy %xmm0 // ... // %xmm2<def> = copy %xmm9 SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap); // Remember Def is defined by the copy. CopyMap[Def] = MI; AvailCopyMap[Def] = MI; for (const unsigned *SR = TRI->getSubRegisters(Def); *SR; ++SR) { CopyMap[*SR] = MI; AvailCopyMap[*SR] = MI; } // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. SrcMap[Src] = Def; continue; } // Not a copy. SmallVector<unsigned, 2> Defs; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) report_fatal_error("MachineCopyPropagation should be run after" " register allocation!"); if (MO.isDef()) { Defs.push_back(Reg); continue; } // If 'Reg' is defined by a copy, the copy is no longer a candidate // for elimination. DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { CI = CopyMap.find(*AS); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } } for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; // No longer defined by a copy. CopyMap.erase(Reg); AvailCopyMap.erase(Reg); for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { CopyMap.erase(*AS); AvailCopyMap.erase(*AS); } // If 'Reg' is previously source of a copy, it is no longer available for // copy propagation. SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap); } } // If MBB doesn't have successors, delete the copies whose defs are not used. // If MBB does have successors, then conservative assume the defs are live-out // since we don't want to trust live-in lists. if (MBB.succ_empty()) { for (SmallSetVector<MachineInstr*, 8>::iterator DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) { (*DI)->eraseFromParent(); Changed = true; ++NumDeletes; } } } return Changed; }
/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, bool IsSimple, MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies) { DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); DenseSet<unsigned> UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); if (IsSimple) return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); // EH edges are ignored by AnalyzeBranch. if (PredBB->succ_size() > 1) continue; MachineBasicBlock *PredTBB, *PredFBB; SmallVector<MachineOperand, 4> PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) continue; if (!PredCond.empty()) continue; // Don't duplicate into a fall-through predecessor (at least for now). if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) continue; DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); TDBBs.push_back(PredBB); // Remove PredBB's unconditional branch. TII->RemoveBranch(*PredBB); if (RS && !TailBB->livein_empty()) { // Update PredBB livein. RS->enterBasicBlock(PredBB); if (!PredBB->empty()) RS->forward(prior(PredBB->end())); BitVector RegsLiveAtExit(TRI->getNumRegs()); RS->getRegsUsed(RegsLiveAtExit, false); for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), E = TailBB->livein_end(); I != E; ++I) { if (!RegsLiveAtExit[*I]) // If a register is previously livein to the tail but it's not live // at the end of predecessor BB, then it should be added to its // livein list. PredBB->addLiveIn(*I); } } // Clone the contents of TailBB into PredBB. DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; // Use instr_iterator here to properly handle bundles, e.g. // ARM Thumb2 IT block. MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); while (I != TailBB->instr_end()) { MachineInstr *MI = &*I; ++I; if (MI->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); } } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } // Simplify TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); NumInstrDups += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. PredBB->removeSuccessor(PredBB->succ_begin()); assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), E = TailBB->succ_end(); I != E; ++I) PredBB->addSuccessor(*I); Changed = true; ++NumTailDups; } // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; SmallVector<MachineOperand, 4> PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. if (PrevBB->succ_size() == 1 && !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); if (PreRegAlloc) { DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); if (MI->getParent()) MI->eraseFromParent(); } // Now copy the non-PHI instructions. while (I != TailBB->end()) { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first) .addReg(CopyInfos[i].second)); } } else { // No PHIs to worry about, just splice the instructions over. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } PrevBB->removeSuccessor(PrevBB->succ_begin()); assert(PrevBB->succ_empty()); PrevBB->transferSuccessors(TailBB); TDBBs.push_back(PrevBB); Changed = true; } // If this is after register allocation, there are no phis to fix. if (!PreRegAlloc) return Changed; // If we made no changes so far, we are safe. if (!Changed) return Changed; // Handle the nasty case in that we duplicated a block that is part of a loop // into some but not all of its predecessors. For example: // 1 -> 2 <-> 3 | // \ | // \---> rest | // if we duplicate 2 into 1 but not into 3, we end up with // 12 -> 3 <-> 2 -> rest | // \ / | // \----->-----/ | // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced // with a phi in 3 (which now dominates 2). // What we do here is introduce a copy in 3 of the register defined by the // phi, just like when we are duplicating 2 into 3, but we don't copy any // real instructions or remove the 3 -> 2 edge from the phi in 2. for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) continue; // EH edges if (PredBB->succ_size() != 1) continue; DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } } return Changed; }
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { const PPCInstrInfo *TII = static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo()); // Give the blocks of the function a dense, in-order, numbering. Fn.RenumberBlocks(); BlockSizes.resize(Fn.getNumBlockIDs()); // Measure each MBB and compute a size for the entire function. unsigned FuncSize = 0; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock *MBB = MFI; unsigned BlockSize = 0; for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); MBBI != EE; ++MBBI) BlockSize += TII->GetInstSizeInBytes(MBBI); BlockSizes[MBB->getNumber()] = BlockSize; FuncSize += BlockSize; } // If the entire function is smaller than the displacement of a branch field, // we know we don't need to shrink any branches in this function. This is a // common case. if (FuncSize < (1 << 15)) { BlockSizes.clear(); return false; } // For each conditional branch, if the offset to its destination is larger // than the offset field allows, transform it into a long branch sequence // like this: // short branch: // bCC MBB // long branch: // b!CC $PC+8 // b MBB // bool MadeChange = true; bool EverMadeChange = false; while (MadeChange) { // Iteratively expand branches until we reach a fixed point. MadeChange = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock &MBB = *MFI; unsigned MBBStartOffset = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineBasicBlock *Dest = nullptr; if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm()) Dest = I->getOperand(2).getMBB(); else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) && !I->getOperand(1).isImm()) Dest = I->getOperand(1).getMBB(); else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ || I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) && !I->getOperand(0).isImm()) Dest = I->getOperand(0).getMBB(); if (!Dest) { MBBStartOffset += TII->GetInstSizeInBytes(I); continue; } // Determine the offset from the current branch to the destination // block. int BranchSize; if (Dest->getNumber() <= MBB.getNumber()) { // If this is a backwards branch, the delta is the offset from the // start of this block to this branch, plus the sizes of all blocks // from this block to the dest. BranchSize = MBBStartOffset; for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } else { // Otherwise, add the size of the blocks between this block and the // dest to the number of bytes left in this block. BranchSize = -MBBStartOffset; for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } // If this branch is in range, ignore it. if (isInt<16>(BranchSize)) { MBBStartOffset += 4; continue; } // Otherwise, we have to expand it to a long branch. MachineInstr *OldBranch = I; DebugLoc dl = OldBranch->getDebugLoc(); if (I->getOpcode() == PPC::BCC) { // The BCC operands are: // 0. PPC branch predicate // 1. CR register // 2. Target MBB PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); unsigned CRReg = I->getOperand(1).getReg(); // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. BuildMI(MBB, I, dl, TII->get(PPC::BCC)) .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); } else if (I->getOpcode() == PPC::BC) { unsigned CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BCn) { unsigned CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BDNZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2); } else if (I->getOpcode() == PPC::BDNZ8) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2); } else if (I->getOpcode() == PPC::BDZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2); } else if (I->getOpcode() == PPC::BDZ8) { BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2); } else { llvm_unreachable("Unhandled branch type!"); } // Uncond branch to the real destination. I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest); // Remove the old branch from the function. OldBranch->eraseFromParent(); // Remember that this instruction is 8-bytes, increase the size of the // block by 4, remember to iterate. BlockSizes[MBB.getNumber()] += 4; MBBStartOffset += 8; ++NumExpanded; MadeChange = true; } } EverMadeChange |= MadeChange; } BlockSizes.clear(); return true; }
MipsInstrInfo::BranchType MipsInstrInfo:: AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify, SmallVectorImpl<MachineInstr*> &BranchInstrs) const { MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); // Skip all the debug instructions. while (I != REnd && I->isDebugValue()) ++I; if (I == REnd || !isUnpredicatedTerminator(&*I)) { // This block ends with no branches (it just falls through to its succ). // Leave TBB/FBB null. TBB = FBB = NULL; return BT_NoBranch; } MachineInstr *LastInst = &*I; unsigned LastOpc = LastInst->getOpcode(); BranchInstrs.push_back(LastInst); // Not an analyzable branch (e.g., indirect jump). if (!GetAnalyzableBrOpc(LastOpc)) return LastInst->isIndirectBranch() ? BT_Indirect : BT_None; // Get the second to last instruction in the block. unsigned SecondLastOpc = 0; MachineInstr *SecondLastInst = NULL; if (++I != REnd) { SecondLastInst = &*I; SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode()); // Not an analyzable branch (must be an indirect jump). if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc) return BT_None; } // If there is only one terminator instruction, process it. if (!SecondLastOpc) { // Unconditional branch if (LastOpc == UncondBrOpc) { TBB = LastInst->getOperand(0).getMBB(); return BT_Uncond; } // Conditional branch AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); return BT_Cond; } // If we reached here, there are two branches. // If there are three terminators, we don't know what sort of block this is. if (++I != REnd && isUnpredicatedTerminator(&*I)) return BT_None; BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst); // If second to last instruction is an unconditional branch, // analyze it and remove the last instruction. if (SecondLastOpc == UncondBrOpc) { // Return if the last instruction cannot be removed. if (!AllowModify) return BT_None; TBB = SecondLastInst->getOperand(0).getMBB(); LastInst->eraseFromParent(); BranchInstrs.pop_back(); return BT_Uncond; } // Conditional branch followed by an unconditional branch. // The last one must be unconditional. if (LastOpc != UncondBrOpc) return BT_None; AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); FBB = LastInst->getOperand(0).getMBB(); return BT_CondUncond; }
bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Register Stackifying **********\n" "********** Function: " << MF.getName() << '\n'); bool Changed = false; MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); LiveIntervals &LIS = getAnalysis<LiveIntervals>(); // Walk the instructions from the bottom up. Currently we don't look past // block boundaries, and the blocks aren't ordered so the block visitation // order isn't significant, but we may want to change this in the future. for (MachineBasicBlock &MBB : MF) { // Don't use a range-based for loop, because we modify the list as we're // iterating over it and the end iterator may change. for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) { MachineInstr *Insert = &*MII; // Don't nest anything inside a phi. if (Insert->getOpcode() == TargetOpcode::PHI) break; // Don't nest anything inside an inline asm, because we don't have // constraints for $push inputs. if (Insert->getOpcode() == TargetOpcode::INLINEASM) break; // Iterate through the inputs in reverse order, since we'll be pulling // operands off the stack in LIFO order. bool AnyStackified = false; for (MachineOperand &Op : reverse(Insert->uses())) { // We're only interested in explicit virtual register operands. if (!Op.isReg() || Op.isImplicit() || !Op.isUse()) continue; unsigned Reg = Op.getReg(); // Only consider registers with a single definition. // TODO: Eventually we may relax this, to stackify phi transfers. MachineInstr *Def = MRI.getUniqueVRegDef(Reg); if (!Def) continue; // Don't nest an INLINE_ASM def into anything, because we don't have // constraints for $pop outputs. if (Def->getOpcode() == TargetOpcode::INLINEASM) continue; // Don't nest PHIs inside of anything. if (Def->getOpcode() == TargetOpcode::PHI) continue; // Argument instructions represent live-in registers and not real // instructions. if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 || Def->getOpcode() == WebAssembly::ARGUMENT_I64 || Def->getOpcode() == WebAssembly::ARGUMENT_F32 || Def->getOpcode() == WebAssembly::ARGUMENT_F64) continue; if (MRI.hasOneUse(Reg) && Def->getParent() == &MBB && IsSafeToMove(Def, Insert, AA, LIS, MRI)) { // A single-use def in the same block with no intervening memory or // register dependencies; move the def down and nest it with the // current instruction. // TODO: Stackify multiple-use values, taking advantage of set_local // returning its result. Changed = true; AnyStackified = true; MBB.splice(Insert, &MBB, Def); LIS.handleMove(Def); MFI.stackifyVReg(Reg); ImposeStackOrdering(Def); Insert = Def; } else if (Def->isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA)) { // A trivially cloneable instruction; clone it and nest the new copy // with the current instruction. Changed = true; AnyStackified = true; unsigned OldReg = Def->getOperand(0).getReg(); unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI); Op.setReg(NewReg); MachineInstr *Clone = &*std::prev(MachineBasicBlock::instr_iterator(Insert)); LIS.InsertMachineInstrInMaps(Clone); LIS.createAndComputeVirtRegInterval(NewReg); MFI.stackifyVReg(NewReg); ImposeStackOrdering(Clone); Insert = Clone; // If that was the last use of the original, delete the original. // Otherwise shrink the LiveInterval. if (MRI.use_empty(OldReg)) { SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot(); LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx); LIS.removeVRegDefAt(LIS.getInterval(OldReg), Idx); LIS.removeInterval(OldReg); LIS.RemoveMachineInstrFromMaps(Def); Def->eraseFromParent(); } else { LIS.shrinkToUses(&LIS.getInterval(OldReg)); } } } if (AnyStackified) ImposeStackOrdering(&*MII); } } // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere // so that it never looks like a use-before-def. if (Changed) { MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK); for (MachineBasicBlock &MBB : MF) MBB.addLiveIn(WebAssembly::EXPR_STACK); } #ifndef NDEBUG // Verify that pushes and pops are performed in LIFO order. SmallVector<unsigned, 0> Stack; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { for (MachineOperand &MO : reverse(MI.explicit_operands())) { if (!MO.isReg()) continue; unsigned VReg = MO.getReg(); // Don't stackify physregs like SP or FP. if (!TargetRegisterInfo::isVirtualRegister(VReg)) continue; if (MFI.isVRegStackified(VReg)) { if (MO.isDef()) Stack.push_back(VReg); else assert(Stack.pop_back_val() == VReg); } } } // TODO: Generalize this code to support keeping values on the stack across // basic block boundaries. assert(Stack.empty()); } #endif return Changed; }
bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (LastOpc == AArch64::Bimm) { TBB = LastInst->getOperand(0).getMBB(); return false; } if (isCondBranch(LastOpc)) { classifyCondBranch(LastInst, TBB, Cond); return false; } return true; // Can't handle indirect branch. } // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; unsigned SecondLastOpc = SecondLastInst->getOpcode(); // If AllowModify is true and the block ends with two or more unconditional // branches, delete all but the first unconditional branch. if (AllowModify && LastOpc == AArch64::Bimm) { while (SecondLastOpc == AArch64::Bimm) { LastInst->eraseFromParent(); LastInst = SecondLastInst; LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { // Return now the only terminator is an unconditional branch. TBB = LastInst->getOperand(0).getMBB(); return false; } else { SecondLastInst = I; SecondLastOpc = SecondLastInst->getOpcode(); } } } // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with a B and a Bcc, handle it. if (LastOpc == AArch64::Bimm) { if (SecondLastOpc == AArch64::Bcc) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } else if (isCondBranch(SecondLastOpc)) { classifyCondBranch(SecondLastInst, TBB, Cond); FBB = LastInst->getOperand(0).getMBB(); return false; } } // If the block ends with two unconditional branches, handle it. The second // one is not executed, so remove it. if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const { using namespace TargetOpcode; MIRBuilder.setInstr(MI); switch (MI.getOpcode()) { default: return false; case G_SREM: case G_UREM: { unsigned OriginalResult = MI.getOperand(0).getReg(); auto Size = MRI.getType(OriginalResult).getSizeInBits(); if (Size != 32) return false; auto Libcall = MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; // Our divmod libcalls return a struct containing the quotient and the // remainder. We need to create a virtual register for it. auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); Type *ArgTy = Type::getInt32Ty(Ctx); StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true); auto RetVal = MRI.createGenericVirtualRegister( getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout())); auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy}, {{MI.getOperand(1).getReg(), ArgTy}, {MI.getOperand(2).getReg(), ArgTy}}); if (Status != LegalizerHelper::Legalized) return false; // The remainder is the second result of divmod. Split the return value into // a new, unused register for the quotient and the destination of the // original instruction for the remainder. MIRBuilder.buildUnmerge( {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult}, RetVal); break; } case G_FCMP: { assert(MRI.getType(MI.getOperand(2).getReg()) == MRI.getType(MI.getOperand(3).getReg()) && "Mismatched operands for G_FCMP"); auto OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); auto OriginalResult = MI.getOperand(0).getReg(); auto Predicate = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); auto Libcalls = getFCmpLibcalls(Predicate, OpSize); if (Libcalls.empty()) { assert((Predicate == CmpInst::FCMP_TRUE || Predicate == CmpInst::FCMP_FALSE) && "Predicate needs libcalls, but none specified"); MIRBuilder.buildConstant(OriginalResult, Predicate == CmpInst::FCMP_TRUE ? 1 : 0); MI.eraseFromParent(); return true; } auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size"); auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); auto *RetTy = Type::getInt32Ty(Ctx); SmallVector<unsigned, 2> Results; for (auto Libcall : Libcalls) { auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32)); auto Status = createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy}, {{MI.getOperand(2).getReg(), ArgTy}, {MI.getOperand(3).getReg(), ArgTy}}); if (Status != LegalizerHelper::Legalized) return false; auto ProcessedResult = Libcalls.size() == 1 ? OriginalResult : MRI.createGenericVirtualRegister(MRI.getType(OriginalResult)); // We have a result, but we need to transform it into a proper 1-bit 0 or // 1, taking into account the different peculiarities of the values // returned by the comparison functions. CmpInst::Predicate ResultPred = Libcall.Predicate; if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) { // We have a nice 0 or 1, and we just need to truncate it back to 1 bit // to keep the types consistent. MIRBuilder.buildTrunc(ProcessedResult, LibcallResult); } else { // We need to compare against 0. assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate"); auto Zero = MRI.createGenericVirtualRegister(LLT::scalar(32)); MIRBuilder.buildConstant(Zero, 0); MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero); } Results.push_back(ProcessedResult); } if (Results.size() != 1) { assert(Results.size() == 2 && "Unexpected number of results"); MIRBuilder.buildOr(OriginalResult, Results[0], Results[1]); } break; } } MI.eraseFromParent(); return true; }