/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, /// under the assuption that it needs to be lowered in a way that supports /// atomic execution of PHIs. This lowering method is always correct all of the /// time. /// void llvm::PHIElimination::LowerAtomicPHINode( MachineBasicBlock &MBB, MachineBasicBlock::iterator AfterPHIsIt) { ++NumAtomic; // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; unsigned DestReg = MPhi->getOperand(0).getReg(); assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs"); bool isDead = MPhi->getOperand(0).isDead(); // Create a new register for the incoming PHI arguments. MachineFunction &MF = *MBB.getParent(); unsigned IncomingReg = 0; bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? // Insert a register to register copy at the top of the current block (but // after any remaining phi nodes) which copies the new incoming register // into the phi node destination. const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); if (isSourceDefinedByImplicitDef(MPhi, MRI)) // If all sources of a PHI node are implicit_def, just emit an // implicit_def instead of a copy. BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); else { // Can we reuse an earlier PHI node? This only happens for critical edges, // typically those created by tail duplication. unsigned &entry = LoweredPHIs[MPhi]; if (entry) { // An identical PHI node was already lowered. Reuse the incoming register. IncomingReg = entry; reusedIncoming = true; ++NumReused; DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); } else { const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), TII->get(TargetOpcode::COPY), DestReg) .addReg(IncomingReg); } // Update live variable information if there is any. LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>(); if (LV) { MachineInstr *PHICopy = prior(AfterPHIsIt); if (IncomingReg) { LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); // Increment use count of the newly created virtual register. VI.NumUses++; LV->setPHIJoin(IncomingReg); // When we are reusing the incoming register, it may already have been // killed in this block. The old kill will also have been inserted at // AfterPHIsIt, so it appears before the current PHICopy. if (reusedIncoming) if (MachineInstr *OldKill = VI.findKill(&MBB)) { DEBUG(dbgs() << "Remove old kill from " << *OldKill); LV->removeVirtualRegisterKilled(IncomingReg, OldKill); DEBUG(MBB.dump()); } // Add information to LiveVariables to know that the incoming value is // killed. Note that because the value is defined in several places (once // each for each incoming block), the "def" block and instruction fields // for the VarInfo is not filled in. LV->addVirtualRegisterKilled(IncomingReg, PHICopy); } // Since we are going to be deleting the PHI node, if it is the last use of // any registers, or if the value itself is dead, we need to move this // information over to the new copy we just inserted. LV->removeVirtualRegistersKilled(MPhi); // If the result is dead, update LV. if (isDead) { LV->addVirtualRegisterDead(DestReg, PHICopy); LV->removeVirtualRegisterDead(DestReg, MPhi); } } // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), MPhi->getOperand(i).getReg())]; // Now loop over all of the incoming arguments, changing them to copy into the // IncomingReg register in the corresponding predecessor basic block. SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; for (int i = NumSrcs - 1; i >= 0; --i) { unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); // Get the MachineBasicBlock equivalent of the BasicBlock that is the source // path the PHI. MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); // If source is defined by an implicit def, there is no need to insert a // copy. MachineInstr *DefMI = MRI->getVRegDef(SrcReg); if (DefMI->isImplicitDef()) { ImpDefs.insert(DefMI); continue; } // Check to make sure we haven't already emitted the copy for this block. // This can happen because PHI nodes may have multiple entries for the same // basic block. if (!MBBsInsertedInto.insert(&opBlock)) continue; // If the copy has already been emitted, we're done. // Find a safe location to insert the copy, this may be the first terminator // in the block (or end()). MachineBasicBlock::iterator InsertPos = FindCopyInsertPoint(opBlock, MBB, SrcReg); // Insert the copy. if (!reusedIncoming && IncomingReg) BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg); // Now update live variable information if we have it. Otherwise we're done if (!LV) continue; // We want to be able to insert a kill of the register if this PHI (aka, the // copy we just inserted) is the last use of the source value. Live // variable analysis conservatively handles this by saying that the value is // live until the end of the block the PHI entry lives in. If the value // really is dead at the PHI copy, there will be no successor blocks which // have the value live-in. // Also check to see if this register is in use by another PHI node which // has not yet been eliminated. If so, it will be killed at an appropriate // point later. // Is it used by any PHI instructions in this block? bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]; // Okay, if we now know that the value is not live out of the block, we can // add a kill marker in this block saying that it kills the incoming value! if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) { // In our final twist, we have to decide which instruction kills the // register. In most cases this is the copy, however, the first // terminator instruction at the end of the block may also use the value. // In this case, we should mark *it* as being the killing block, not the // copy. MachineBasicBlock::iterator KillInst; MachineBasicBlock::iterator Term = opBlock.getFirstTerminator(); if (Term != opBlock.end() && Term->readsRegister(SrcReg)) { KillInst = Term; // Check that no other terminators use values. #ifndef NDEBUG for (MachineBasicBlock::iterator TI = llvm::next(Term); TI != opBlock.end(); ++TI) { assert(!TI->readsRegister(SrcReg) && "Terminator instructions cannot use virtual registers unless" "they are the first terminator in a block!"); } #endif } else if (reusedIncoming || !IncomingReg) { // We may have to rewind a bit if we didn't insert a copy this time. KillInst = Term; while (KillInst != opBlock.begin()) if ((--KillInst)->readsRegister(SrcReg)) break; } else { // We just inserted this copy. KillInst = prior(InsertPos); } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); // Finally, mark it killed. LV->addVirtualRegisterKilled(SrcReg, KillInst); // This vreg no longer lives all of the way through opBlock. unsigned opBlockNum = opBlock.getNumber(); LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum); } } // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. if (reusedIncoming || !IncomingReg) MF.DeleteMachineInstr(MPhi); }
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Modified = false; SmallSet<unsigned, 4> Defs; SmallSet<unsigned, 4> Uses; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; } Defs.clear(); Uses.clear(); TrackDefUses(MI, Defs, Uses, TRI); // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); // Add implicit use of ITSTATE to IT block instructions. MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); MachineInstr *LastITMI = MI; MachineBasicBlock::iterator InsertPos = MIB.getInstr(); ++MBBI; // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it // is set: skip the loop if (!restrictIT) { // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; MachineInstr *NMI = &*MBBI; MI = NMI; unsigned NPredReg = 0; ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); LastITMI = NMI; } else { if (NCC == ARMCC::AL && MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { --MBBI; MBB.remove(NMI); MBB.insert(InsertPos, NMI); ++NumMovedInsts; continue; } break; } TrackDefUses(NMI, Defs, Uses, TRI); --Pos; } } // Finalize IT mask. Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); // Last instruction in IT block kills ITSTATE. LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); // Finalize the bundle. MachineBasicBlock::instr_iterator LI = LastITMI; finalizeBundle(MBB, InsertPos.getInstrIterator(), std::next(LI)); Modified = true; ++NumITs; } return Modified; }
/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads /// a single register and writes a single register and it does not modify the /// source, and if the source value is preserved as a sub-register of the /// result, then replace all reachable uses of the source with the subreg of the /// result. /// /// Do not generate an EXTRACT that is used only in a debug use, as this changes /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. bool PeepholeOptimizer:: OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs) { unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) return false; if (TargetRegisterInfo::isPhysicalRegister(DstReg) || TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); if (++UI == MRI->use_nodbg_end()) // No other uses. return false; // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; UI = MRI->use_nodbg_begin(DstReg); for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); UI != UE; ++UI) ReachedBBs.insert(UI->getParent()); // Uses that are in the same BB of uses of the result of the instruction. SmallVector<MachineOperand*, 8> Uses; // Uses that the result of the instruction can reach. SmallVector<MachineOperand*, 8> ExtendedUses; bool ExtendLife = true; UI = MRI->use_nodbg_begin(SrcReg); for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; if (UseMI == MI) continue; if (UseMI->isPHI()) { ExtendLife = false; continue; } // It's an error to translate this: // // %reg1025 = <sext> %reg1024 // ... // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 // // into this: // // %reg1025 = <sext> %reg1024 // ... // %reg1027 = COPY %reg1025:4 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 // // The problem here is that SUBREG_TO_REG is there to assert that an // implicit zext occurs. It doesn't insert a zext instruction. If we allow // the COPY here, it will give us the value after the <sext>, not the // original value of %reg1024 before <sext>. if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) continue; MachineBasicBlock *UseMBB = UseMI->getParent(); if (UseMBB == MBB) { // Local uses that come after the extension. if (!LocalMIs.count(UseMI)) Uses.push_back(&UseMO); } else if (ReachedBBs.count(UseMBB)) { // Non-local uses where the result of the extension is used. Always // replace these unless it's a PHI. Uses.push_back(&UseMO); } else if (Aggressive && DT->dominates(MBB, UseMBB)) { // We may want to extend the live range of the extension result in order // to replace these uses. ExtendedUses.push_back(&UseMO); } else { // Both will be live out of the def MBB anyway. Don't extend live range of // the extension result. ExtendLife = false; break; } } if (ExtendLife && !ExtendedUses.empty()) // Extend the liveness of the extension result. std::copy(ExtendedUses.begin(), ExtendedUses.end(), std::back_inserter(Uses)); // Now replace all uses. bool Changed = false; if (!Uses.empty()) { SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. UI = MRI->use_nodbg_begin(DstReg); for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); UI != UE; ++UI) if (UI->isPHI()) PHIBBs.insert(UI->getParent()); const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); for (unsigned i = 0, e = Uses.size(); i != e; ++i) { MachineOperand *UseMO = Uses[i]; MachineInstr *UseMI = UseMO->getParent(); MachineBasicBlock *UseMBB = UseMI->getParent(); if (PHIBBs.count(UseMBB)) continue; // About to add uses of DstReg, clear DstReg's kill flags. if (!Changed) MRI->clearKillFlags(DstReg); unsigned NewVR = MRI->createVirtualRegister(RC); BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); UseMO->setReg(NewVR); ++NumReuse; Changed = true; } } return Changed; }
bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) { const MSP430InstrInfo *TII = static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo()); // Give the blocks of the function a dense, in-order, numbering. Fn.RenumberBlocks(); BlockSizes.resize(Fn.getNumBlockIDs()); // Measure each MBB and compute a size for the entire function. unsigned FuncSize = 0; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock *MBB = MFI; unsigned BlockSize = 0; for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); MBBI != EE; ++MBBI) BlockSize += TII->GetInstSizeInBytes(MBBI); BlockSizes[MBB->getNumber()] = BlockSize; FuncSize += BlockSize; } // If the entire function is smaller than the displacement of a branch field, // we know we don't need to shrink any branches in this function. This is a // common case. if (FuncSize < (1 << 9)) { BlockSizes.clear(); return false; } // For each conditional branch, if the offset to its destination is larger // than the offset field allows, transform it into a long branch sequence // like this: // short branch: // bCC MBB // long branch: // b!CC $PC+6 // b MBB // bool MadeChange = true; bool EverMadeChange = false; while (MadeChange) { // Iteratively expand branches until we reach a fixed point. MadeChange = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock &MBB = *MFI; unsigned MBBStartOffset = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { if ((I->getOpcode() != MSP430::JCC || I->getOperand(0).isImm()) && I->getOpcode() != MSP430::JMP) { MBBStartOffset += TII->GetInstSizeInBytes(I); continue; } // Determine the offset from the current branch to the destination // block. MachineBasicBlock *Dest = I->getOperand(0).getMBB(); int BranchSize; if (Dest->getNumber() <= MBB.getNumber()) { // If this is a backwards branch, the delta is the offset from the // start of this block to this branch, plus the sizes of all blocks // from this block to the dest. BranchSize = MBBStartOffset; for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } else { // Otherwise, add the size of the blocks between this block and the // dest to the number of bytes left in this block. BranchSize = -MBBStartOffset; for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } // If this branch is in range, ignore it. if (isInt<10>(BranchSize)) { MBBStartOffset += 2; continue; } // Otherwise, we have to expand it to a long branch. unsigned NewSize; MachineInstr *OldBranch = I; DebugLoc dl = OldBranch->getDebugLoc(); if (I->getOpcode() == MSP430::JMP) { NewSize = 4; } else { // The BCC operands are: // 0. MSP430 branch predicate // 1. Target MBB SmallVector<MachineOperand, 1> Cond; Cond.push_back(I->getOperand(1)); // Jump over the uncond branch inst (i.e. $+6) on opposite condition. TII->ReverseBranchCondition(Cond); BuildMI(MBB, I, dl, TII->get(MSP430::JCC)) .addImm(4).addOperand(Cond[0]); NewSize = 6; } // Uncond branch to the real destination. I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest); // Remove the old branch from the function. OldBranch->eraseFromParent(); // Remember that this instruction is NewSize bytes, increase the size of the // block by NewSize-2, remember to iterate. BlockSizes[MBB.getNumber()] += NewSize-2; MBBStartOffset += NewSize; ++NumExpanded; MadeChange = true; } } EverMadeChange |= MadeChange; } BlockSizes.clear(); return true; }
// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE. // // SGPRx = ... // SGPRy = REG_SEQUENCE SGPRx, sub0 ... // VGPRz = COPY SGPRy // // ==> // // VGPRx = COPY SGPRx // VGPRz = REG_SEQUENCE VGPRx, sub0 // // This exposes immediate folding opportunities when materializing 64-bit // immediates. static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII, MachineRegisterInfo &MRI) { assert(MI.isRegSequence()); unsigned DstReg = MI.getOperand(0).getReg(); if (!TRI->isSGPRClass(MRI.getRegClass(DstReg))) return false; if (!MRI.hasOneUse(DstReg)) return false; MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg); if (!CopyUse.isCopy()) return false; const TargetRegisterClass *SrcRC, *DstRC; std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI); if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) return false; // TODO: Could have multiple extracts? unsigned SubReg = CopyUse.getOperand(1).getSubReg(); if (SubReg != AMDGPU::NoSubRegister) return false; MRI.setRegClass(DstReg, DstRC); // SGPRx = ... // SGPRy = REG_SEQUENCE SGPRx, sub0 ... // VGPRz = COPY SGPRy // => // VGPRx = COPY SGPRx // VGPRz = REG_SEQUENCE VGPRx, sub0 MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg()); for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { unsigned SrcReg = MI.getOperand(I).getReg(); unsigned SrcSubReg = MI.getOperand(I).getSubReg(); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); assert(TRI->isSGPRClass(SrcRC) && "Expected SGPR REG_SEQUENCE to only have SGPR inputs"); SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC); BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg) .addOperand(MI.getOperand(I)); MI.getOperand(I).setReg(TmpReg); } CopyUse.eraseFromParent(); return true; }
/// LowerPHINode - Lower the PHI node at the top of the specified block, /// void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt) { ++NumLowered; MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt); // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; unsigned DestReg = MPhi->getOperand(0).getReg(); assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs"); bool isDead = MPhi->getOperand(0).isDead(); // Create a new register for the incoming PHI arguments. MachineFunction &MF = *MBB.getParent(); unsigned IncomingReg = 0; bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? // Insert a register to register copy at the top of the current block (but // after any remaining phi nodes) which copies the new incoming register // into the phi node destination. const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); if (isSourceDefinedByImplicitDef(MPhi, MRI)) // If all sources of a PHI node are implicit_def, just emit an // implicit_def instead of a copy. BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); else { // Can we reuse an earlier PHI node? This only happens for critical edges, // typically those created by tail duplication. unsigned &entry = LoweredPHIs[MPhi]; if (entry) { // An identical PHI node was already lowered. Reuse the incoming register. IncomingReg = entry; reusedIncoming = true; ++NumReused; DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi); } else { const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), TII->get(TargetOpcode::COPY), DestReg) .addReg(IncomingReg); } // Update live variable information if there is any. if (LV) { MachineInstr *PHICopy = std::prev(AfterPHIsIt); if (IncomingReg) { LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); // Increment use count of the newly created virtual register. LV->setPHIJoin(IncomingReg); // When we are reusing the incoming register, it may already have been // killed in this block. The old kill will also have been inserted at // AfterPHIsIt, so it appears before the current PHICopy. if (reusedIncoming) if (MachineInstr *OldKill = VI.findKill(&MBB)) { DEBUG(dbgs() << "Remove old kill from " << *OldKill); LV->removeVirtualRegisterKilled(IncomingReg, OldKill); DEBUG(MBB.dump()); } // Add information to LiveVariables to know that the incoming value is // killed. Note that because the value is defined in several places (once // each for each incoming block), the "def" block and instruction fields // for the VarInfo is not filled in. LV->addVirtualRegisterKilled(IncomingReg, PHICopy); } // Since we are going to be deleting the PHI node, if it is the last use of // any registers, or if the value itself is dead, we need to move this // information over to the new copy we just inserted. LV->removeVirtualRegistersKilled(MPhi); // If the result is dead, update LV. if (isDead) { LV->addVirtualRegisterDead(DestReg, PHICopy); LV->removeVirtualRegisterDead(DestReg, MPhi); } } // Update LiveIntervals for the new copy or implicit def. if (LIS) { MachineInstr *NewInstr = std::prev(AfterPHIsIt); SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr); SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); if (IncomingReg) { // Add the region from the beginning of MBB to the copy instruction to // IncomingReg's live interval. LiveInterval &IncomingLI = LIS->createEmptyInterval(IncomingReg); VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex); if (!IncomingVNI) IncomingVNI = IncomingLI.getNextValue(MBBStartIndex, LIS->getVNInfoAllocator()); IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex, DestCopyIndex.getRegSlot(), IncomingVNI)); } LiveInterval &DestLI = LIS->getInterval(DestReg); assert(DestLI.begin() != DestLI.end() && "PHIs should have nonempty LiveIntervals."); if (DestLI.endIndex().isDead()) { // A dead PHI's live range begins and ends at the start of the MBB, but // the lowered copy, which will still be dead, needs to begin and end at // the copy instruction. VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex); assert(OrigDestVNI && "PHI destination should be live at block entry."); DestLI.removeSegment(MBBStartIndex, MBBStartIndex.getDeadSlot()); DestLI.createDeadDef(DestCopyIndex.getRegSlot(), LIS->getVNInfoAllocator()); DestLI.removeValNo(OrigDestVNI); } else { // Otherwise, remove the region from the beginning of MBB to the copy // instruction from DestReg's live interval. DestLI.removeSegment(MBBStartIndex, DestCopyIndex.getRegSlot()); VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); assert(DestVNI && "PHI destination should be live at its definition."); DestVNI->def = DestCopyIndex.getRegSlot(); } } // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), MPhi->getOperand(i).getReg())]; // Now loop over all of the incoming arguments, changing them to copy into the // IncomingReg register in the corresponding predecessor basic block. SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; for (int i = NumSrcs - 1; i >= 0; --i) { unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || isImplicitlyDefined(SrcReg, MRI); assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); // Get the MachineBasicBlock equivalent of the BasicBlock that is the source // path the PHI. MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); // Check to make sure we haven't already emitted the copy for this block. // This can happen because PHI nodes may have multiple entries for the same // basic block. if (!MBBsInsertedInto.insert(&opBlock)) continue; // If the copy has already been emitted, we're done. // Find a safe location to insert the copy, this may be the first terminator // in the block (or end()). MachineBasicBlock::iterator InsertPos = findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. MachineInstr *NewSrcInstr = nullptr; if (!reusedIncoming && IncomingReg) { if (SrcUndef) { // The source register is undefined, so there is no need for a real // COPY, but we still need to ensure joint dominance by defs. // Insert an IMPLICIT_DEF instruction. NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg); // Clean up the old implicit-def, if there even was one. if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg)) if (DefMI->isImplicitDef()) ImpDefs.insert(DefMI); } else { NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), TII->get(TargetOpcode::COPY), IncomingReg) .addReg(SrcReg, 0, SrcSubReg); } } // We only need to update the LiveVariables kill of SrcReg if this was the // last PHI use of SrcReg to be lowered on this CFG edge and it is not live // out of the predecessor. We can also ignore undef sources. if (LV && !SrcUndef && !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] && !LV->isLiveOut(SrcReg, opBlock)) { // We want to be able to insert a kill of the register if this PHI (aka, // the copy we just inserted) is the last use of the source value. Live // variable analysis conservatively handles this by saying that the value // is live until the end of the block the PHI entry lives in. If the value // really is dead at the PHI copy, there will be no successor blocks which // have the value live-in. // Okay, if we now know that the value is not live out of the block, we // can add a kill marker in this block saying that it kills the incoming // value! // In our final twist, we have to decide which instruction kills the // register. In most cases this is the copy, however, terminator // instructions at the end of the block may also use the value. In this // case, we should mark the last such terminator as being the killing // block, not the copy. MachineBasicBlock::iterator KillInst = opBlock.end(); MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); for (MachineBasicBlock::iterator Term = FirstTerm; Term != opBlock.end(); ++Term) { if (Term->readsRegister(SrcReg)) KillInst = Term; } if (KillInst == opBlock.end()) { // No terminator uses the register. if (reusedIncoming || !IncomingReg) { // We may have to rewind a bit if we didn't insert a copy this time. KillInst = FirstTerm; while (KillInst != opBlock.begin()) { --KillInst; if (KillInst->isDebugValue()) continue; if (KillInst->readsRegister(SrcReg)) break; } } else { // We just inserted this copy. KillInst = std::prev(InsertPos); } } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); // Finally, mark it killed. LV->addVirtualRegisterKilled(SrcReg, KillInst); // This vreg no longer lives all of the way through opBlock. unsigned opBlockNum = opBlock.getNumber(); LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum); } if (LIS) { if (NewSrcInstr) { LIS->InsertMachineInstrInMaps(NewSrcInstr); LIS->addSegmentToEndOfBlock(IncomingReg, NewSrcInstr); } if (!SrcUndef && !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) { LiveInterval &SrcLI = LIS->getInterval(SrcReg); bool isLiveOut = false; for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), SE = opBlock.succ_end(); SI != SE; ++SI) { SlotIndex startIdx = LIS->getMBBStartIdx(*SI); VNInfo *VNI = SrcLI.getVNInfoAt(startIdx); // Definitions by other PHIs are not truly live-in for our purposes. if (VNI && VNI->def != startIdx) { isLiveOut = true; break; } } if (!isLiveOut) { MachineBasicBlock::iterator KillInst = opBlock.end(); MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); for (MachineBasicBlock::iterator Term = FirstTerm; Term != opBlock.end(); ++Term) { if (Term->readsRegister(SrcReg)) KillInst = Term; } if (KillInst == opBlock.end()) { // No terminator uses the register. if (reusedIncoming || !IncomingReg) { // We may have to rewind a bit if we didn't just insert a copy. KillInst = FirstTerm; while (KillInst != opBlock.begin()) { --KillInst; if (KillInst->isDebugValue()) continue; if (KillInst->readsRegister(SrcReg)) break; } } else { // We just inserted this copy. KillInst = std::prev(InsertPos); } } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst); SrcLI.removeSegment(LastUseIndex.getRegSlot(), LIS->getMBBEndIdx(&opBlock)); } } } } // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. if (reusedIncoming || !IncomingReg) { if (LIS) LIS->RemoveMachineInstrFromMaps(MPhi); MF.DeleteMachineInstr(MPhi); } }
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { const PPCInstrInfo *TII = static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo()); // Give the blocks of the function a dense, in-order, numbering. Fn.RenumberBlocks(); BlockSizes.resize(Fn.getNumBlockIDs()); // Measure each MBB and compute a size for the entire function. unsigned FuncSize = 0; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock *MBB = MFI; unsigned BlockSize = 0; for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); MBBI != EE; ++MBBI) BlockSize += TII->GetInstSizeInBytes(MBBI); BlockSizes[MBB->getNumber()] = BlockSize; FuncSize += BlockSize; } // If the entire function is smaller than the displacement of a branch field, // we know we don't need to shrink any branches in this function. This is a // common case. if (FuncSize < (1 << 15)) { BlockSizes.clear(); return false; } // For each conditional branch, if the offset to its destination is larger // than the offset field allows, transform it into a long branch sequence // like this: // short branch: // bCC MBB // long branch: // b!CC $PC+8 // b MBB // bool MadeChange = true; bool EverMadeChange = false; while (MadeChange) { // Iteratively expand branches until we reach a fixed point. MadeChange = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock &MBB = *MFI; unsigned MBBStartOffset = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineBasicBlock *Dest = 0; if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm()) Dest = I->getOperand(2).getMBB(); else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) && !I->getOperand(1).isImm()) Dest = I->getOperand(1).getMBB(); else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ || I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) && !I->getOperand(0).isImm()) Dest = I->getOperand(0).getMBB(); if (!Dest) { MBBStartOffset += TII->GetInstSizeInBytes(I); continue; } // Determine the offset from the current branch to the destination // block. int BranchSize; if (Dest->getNumber() <= MBB.getNumber()) { // If this is a backwards branch, the delta is the offset from the // start of this block to this branch, plus the sizes of all blocks // from this block to the dest. BranchSize = MBBStartOffset; for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } else { // Otherwise, add the size of the blocks between this block and the // dest to the number of bytes left in this block. BranchSize = -MBBStartOffset; for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } // If this branch is in range, ignore it. if (isInt<16>(BranchSize)) { MBBStartOffset += 4; continue; } // Otherwise, we have to expand it to a long branch. MachineInstr *OldBranch = I; DebugLoc dl = OldBranch->getDebugLoc(); if (I->getOpcode() == PPC::BCC) { // The BCC operands are: // 0. PPC branch predicate // 1. CR register // 2. Target MBB PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); unsigned CRReg = I->getOperand(1).getReg(); // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. BuildMI(MBB, I, dl, TII->get(PPC::BCC)) .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); } else if (I->getOpcode() == PPC::BC) { unsigned CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BCn) { unsigned CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BDNZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2); } else if (I->getOpcode() == PPC::BDNZ8) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2); } else if (I->getOpcode() == PPC::BDZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2); } else if (I->getOpcode() == PPC::BDZ8) { BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2); } else { llvm_unreachable("Unhandled branch type!"); } // Uncond branch to the real destination. I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest); // Remove the old branch from the function. OldBranch->eraseFromParent(); // Remember that this instruction is 8-bytes, increase the size of the // block by 4, remember to iterate. BlockSizes[MBB.getNumber()] += 4; MBBStartOffset += 8; ++NumExpanded; MadeChange = true; } } EverMadeChange |= MadeChange; } BlockSizes.clear(); return true; }
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Modified = false; SmallSet<unsigned, 4> Defs; SmallSet<unsigned, 4> Uses; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; ARMCC::CondCodes CC = getPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; } Defs.clear(); Uses.clear(); TrackDefUses(MI, Defs, Uses); // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); MachineBasicBlock::iterator InsertPos = MIB; ++MBBI; // Finalize IT mask. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; MachineInstr *NMI = &*MBBI; MI = NMI; unsigned NPredReg = 0; ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); if (NCC == CC || NCC == OCC) Mask |= (NCC & 1) << Pos; else { unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (NCC == ARMCC::AL && TII->isMoveInstr(*NMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { assert(SrcSubIdx == 0 && DstSubIdx == 0 && "Sub-register indices still around?"); // llvm models select's as two-address instructions. That means a copy // is inserted before a t2MOVccr, etc. If the copy is scheduled in // between selects we would end up creating multiple IT blocks. if (!Uses.count(DstReg) && !Defs.count(SrcReg)) { --MBBI; MBB.remove(NMI); MBB.insert(InsertPos, NMI); ++NumMovedInsts; continue; } } break; } TrackDefUses(NMI, Defs, Uses); --Pos; } Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); Modified = true; ++NumITs; } return Modified; }
void SIInsertSkips::kill(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); switch (MI.getOpcode()) { case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: { unsigned Opcode = 0; // The opcodes are inverted because the inline immediate has to be // the first operand, e.g. from "x < imm" to "imm > x" switch (MI.getOperand(2).getImm()) { case ISD::SETOEQ: case ISD::SETEQ: Opcode = AMDGPU::V_CMPX_EQ_F32_e64; break; case ISD::SETOGT: case ISD::SETGT: Opcode = AMDGPU::V_CMPX_LT_F32_e64; break; case ISD::SETOGE: case ISD::SETGE: Opcode = AMDGPU::V_CMPX_LE_F32_e64; break; case ISD::SETOLT: case ISD::SETLT: Opcode = AMDGPU::V_CMPX_GT_F32_e64; break; case ISD::SETOLE: case ISD::SETLE: Opcode = AMDGPU::V_CMPX_GE_F32_e64; break; case ISD::SETONE: case ISD::SETNE: Opcode = AMDGPU::V_CMPX_LG_F32_e64; break; case ISD::SETO: Opcode = AMDGPU::V_CMPX_O_F32_e64; break; case ISD::SETUO: Opcode = AMDGPU::V_CMPX_U_F32_e64; break; case ISD::SETUEQ: Opcode = AMDGPU::V_CMPX_NLG_F32_e64; break; case ISD::SETUGT: Opcode = AMDGPU::V_CMPX_NGE_F32_e64; break; case ISD::SETUGE: Opcode = AMDGPU::V_CMPX_NGT_F32_e64; break; case ISD::SETULT: Opcode = AMDGPU::V_CMPX_NLE_F32_e64; break; case ISD::SETULE: Opcode = AMDGPU::V_CMPX_NLT_F32_e64; break; case ISD::SETUNE: Opcode = AMDGPU::V_CMPX_NEQ_F32_e64; break; default: llvm_unreachable("invalid ISD:SET cond code"); } assert(MI.getOperand(0).isReg()); if (TRI->isVGPR(MBB.getParent()->getRegInfo(), MI.getOperand(0).getReg())) { Opcode = AMDGPU::getVOPe32(Opcode); BuildMI(MBB, &MI, DL, TII->get(Opcode)) .add(MI.getOperand(1)) .add(MI.getOperand(0)); } else { BuildMI(MBB, &MI, DL, TII->get(Opcode)) .addReg(AMDGPU::VCC, RegState::Define) .addImm(0) // src0 modifiers .add(MI.getOperand(1)) .addImm(0) // src1 modifiers .add(MI.getOperand(0)) .addImm(0); // omod } break; } case AMDGPU::SI_KILL_I1_TERMINATOR: { const MachineOperand &Op = MI.getOperand(0); int64_t KillVal = MI.getOperand(1).getImm(); assert(KillVal == 0 || KillVal == -1); // Kill all threads if Op0 is an immediate and equal to the Kill value. if (Op.isImm()) { int64_t Imm = Op.getImm(); assert(Imm == 0 || Imm == -1); if (Imm == KillVal) BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) .addImm(0); break; } unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64; BuildMI(MBB, &MI, DL, TII->get(Opcode), AMDGPU::EXEC) .addReg(AMDGPU::EXEC) .add(Op); break; } default: llvm_unreachable("invalid opcode, expected SI_KILL_*_TERMINATOR"); } }
void SILowerControlFlow::emitIf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); MachineOperand &SaveExec = MI.getOperand(0); MachineOperand &Cond = MI.getOperand(1); assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister && Cond.getSubReg() == AMDGPU::NoSubRegister); unsigned SaveExecReg = SaveExec.getReg(); MachineOperand &ImpDefSCC = MI.getOperand(4); assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef()); // Add an implicit def of exec to discourage scheduling VALU after this which // will interfere with trying to form s_and_saveexec_b64 later. MachineInstr *CopyExec = BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SaveExecReg) .addReg(AMDGPU::EXEC) .addReg(AMDGPU::EXEC, RegState::ImplicitDefine); unsigned Tmp = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); MachineInstr *And = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp) .addReg(SaveExecReg) //.addReg(AMDGPU::EXEC) .addReg(Cond.getReg()); setImpSCCDefDead(*And, true); MachineInstr *Xor = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg) .addReg(Tmp) .addReg(SaveExecReg); setImpSCCDefDead(*Xor, ImpDefSCC.isDead()); // Use a copy that is a terminator to get correct spill code placement it with // fast regalloc. MachineInstr *SetExec = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), AMDGPU::EXEC) .addReg(Tmp, RegState::Kill); // Insert a pseudo terminator to help keep the verifier happy. This will also // be used later when inserting skips. MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH)) .addOperand(MI.getOperand(2)); if (!LIS) { MI.eraseFromParent(); return; } LIS->InsertMachineInstrInMaps(*CopyExec); // Replace with and so we don't need to fix the live interval for condition // register. LIS->ReplaceMachineInstrInMaps(MI, *And); LIS->InsertMachineInstrInMaps(*Xor); LIS->InsertMachineInstrInMaps(*SetExec); LIS->InsertMachineInstrInMaps(*NewBr); LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI)); MI.eraseFromParent(); // FIXME: Is there a better way of adjusting the liveness? It shouldn't be // hard to add another def here but I'm not sure how to correctly update the // valno. LIS->removeInterval(SaveExecReg); LIS->createAndComputeVirtRegInterval(SaveExecReg); LIS->createAndComputeVirtRegInterval(Tmp); }
void SILowerControlFlow::emitElse(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); unsigned DstReg = MI.getOperand(0).getReg(); assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister); bool ExecModified = MI.getOperand(3).getImm() != 0; MachineBasicBlock::iterator Start = MBB.begin(); // We are running before TwoAddressInstructions, and si_else's operands are // tied. In order to correctly tie the registers, split this into a copy of // the src like it does. BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), DstReg) .addOperand(MI.getOperand(1)); // Saved EXEC // This must be inserted before phis and any spill code inserted before the // else. MachineInstr *OrSaveExec = BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), DstReg) .addReg(DstReg); MachineBasicBlock *DestBB = MI.getOperand(2).getMBB(); MachineBasicBlock::iterator ElsePt(MI); if (ExecModified) { MachineInstr *And = BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_AND_B64), DstReg) .addReg(AMDGPU::EXEC) .addReg(DstReg); if (LIS) LIS->InsertMachineInstrInMaps(*And); } MachineInstr *Xor = BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC) .addReg(AMDGPU::EXEC) .addReg(DstReg); MachineInstr *Branch = BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH)) .addMBB(DestBB); if (!LIS) { MI.eraseFromParent(); return; } LIS->RemoveMachineInstrFromMaps(MI); MI.eraseFromParent(); LIS->InsertMachineInstrInMaps(*OrSaveExec); LIS->InsertMachineInstrInMaps(*Xor); LIS->InsertMachineInstrInMaps(*Branch); // src reg is tied to dst reg. LIS->removeInterval(DstReg); LIS->createAndComputeVirtRegInterval(DstReg); // Let this be recomputed. LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI)); }
// transformInstruction - Perform the transformation of an instruction // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs // to be the correct register class, minimizing cross-class copies. void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) { DEBUG(dbgs() << "Scalar transform: " << MI); MachineBasicBlock *MBB = MI.getParent(); unsigned OldOpc = MI.getOpcode(); unsigned NewOpc = getTransformOpcode(OldOpc); assert(OldOpc != NewOpc && "transform an instruction to itself?!"); // Check if we need a copy for the source registers. unsigned OrigSrc0 = MI.getOperand(1).getReg(); unsigned OrigSrc1 = MI.getOperand(2).getReg(); unsigned Src0 = 0, SubReg0; unsigned Src1 = 0, SubReg1; bool KillSrc0 = false, KillSrc1 = false; if (!MRI->def_empty(OrigSrc0)) { MachineRegisterInfo::def_instr_iterator Def = MRI->def_instr_begin(OrigSrc0); assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); MachineOperand *MOSrc0 = getSrcFromCopy(&*Def, MRI, SubReg0); // If there are no other users of the original source, we can delete // that instruction. if (MOSrc0) { Src0 = MOSrc0->getReg(); KillSrc0 = MOSrc0->isKill(); // Src0 is going to be reused, thus, it cannot be killed anymore. MOSrc0->setIsKill(false); if (MRI->hasOneNonDBGUse(OrigSrc0)) { assert(MOSrc0 && "Can't delete copy w/o a valid original source!"); Def->eraseFromParent(); ++NumCopiesDeleted; } } } if (!MRI->def_empty(OrigSrc1)) { MachineRegisterInfo::def_instr_iterator Def = MRI->def_instr_begin(OrigSrc1); assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); MachineOperand *MOSrc1 = getSrcFromCopy(&*Def, MRI, SubReg1); // If there are no other users of the original source, we can delete // that instruction. if (MOSrc1) { Src1 = MOSrc1->getReg(); KillSrc1 = MOSrc1->isKill(); // Src0 is going to be reused, thus, it cannot be killed anymore. MOSrc1->setIsKill(false); if (MRI->hasOneNonDBGUse(OrigSrc1)) { assert(MOSrc1 && "Can't delete copy w/o a valid original source!"); Def->eraseFromParent(); ++NumCopiesDeleted; } } } // If we weren't able to reference the original source directly, create a // copy. if (!Src0) { SubReg0 = 0; Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); insertCopy(TII, MI, Src0, OrigSrc0, KillSrc0); KillSrc0 = true; } if (!Src1) { SubReg1 = 0; Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); insertCopy(TII, MI, Src1, OrigSrc1, KillSrc1); KillSrc1 = true; } // Create a vreg for the destination. // FIXME: No need to do this if the ultimate user expects an FPR64. // Check for that and avoid the copy if possible. unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass); // For now, all of the new instructions have the same simple three-register // form, so no need to special case based on what instruction we're // building. BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), Dst) .addReg(Src0, getKillRegState(KillSrc0), SubReg0) .addReg(Src1, getKillRegState(KillSrc1), SubReg1); // Now copy the result back out to a GPR. // FIXME: Try to avoid this if all uses could actually just use the FPR64 // directly. insertCopy(TII, MI, MI.getOperand(0).getReg(), Dst, true); // Erase the old instruction. MI.eraseFromParent(); ++NumScalarInsnsUsed; }
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" << "********** Function: " << MF.getName() << "\n"); #if 0 // for now disable this, if we move NewValueJump before register // allocation we need this information. LiveVariables &LVs = getAnalysis<LiveVariables>(); #endif QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); QRI = static_cast<const HexagonRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); if (!QRI->Subtarget.hasV4TOps() || DisableNewValueJumps) { return false; } int nvjCount = DbgNVJCount; int nvjGenerated = 0; // Loop through all the bb's of the function for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { MachineBasicBlock* MBB = MBBb; DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n"); DEBUG(MBB->dump()); DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n"); bool foundJump = false; bool foundCompare = false; bool invertPredicate = false; unsigned predReg = 0; // predicate reg of the jump. unsigned cmpReg1 = 0; int cmpOp2 = 0; bool MO1IsKill = false; bool MO2IsKill = false; MachineBasicBlock::iterator jmpPos; MachineBasicBlock::iterator cmpPos; MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr; MachineBasicBlock *jmpTarget = nullptr; bool afterRA = false; bool isSecondOpReg = false; bool isSecondOpNewified = false; // Traverse the basic block - bottom up for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin(); MII != E;) { MachineInstr *MI = --MII; if (MI->isDebugValue()) { continue; } if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated)) break; DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n"); if (!foundJump && (MI->getOpcode() == Hexagon::JMP_t || MI->getOpcode() == Hexagon::JMP_f || MI->getOpcode() == Hexagon::JMP_tnew_t || MI->getOpcode() == Hexagon::JMP_tnew_nt || MI->getOpcode() == Hexagon::JMP_fnew_t || MI->getOpcode() == Hexagon::JMP_fnew_nt)) { // This is where you would insert your compare and // instr that feeds compare jmpPos = MII; jmpInstr = MI; predReg = MI->getOperand(0).getReg(); afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); // If ifconverter had not messed up with the kill flags of the // operands, the following check on the kill flag would suffice. // if(!jmpInstr->getOperand(0).isKill()) break; // This predicate register is live out out of BB // this would only work if we can actually use Live // variable analysis on phy regs - but LLVM does not // provide LV analysis on phys regs. //if(LVs.isLiveOut(predReg, *MBB)) break; // Get all the successors of this block - which will always // be 2. Check if the predicate register is live in in those // successor. If yes, we can not delete the predicate - // I am doing this only because LLVM does not provide LiveOut // at the BB level. bool predLive = false; for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), SIE = MBB->succ_end(); SI != SIE; ++SI) { MachineBasicBlock* succMBB = *SI; if (succMBB->isLiveIn(predReg)) { predLive = true; } } if (predLive) break; jmpTarget = MI->getOperand(1).getMBB(); foundJump = true; if (MI->getOpcode() == Hexagon::JMP_f || MI->getOpcode() == Hexagon::JMP_fnew_t || MI->getOpcode() == Hexagon::JMP_fnew_nt) { invertPredicate = true; } continue; } // No new value jump if there is a barrier. A barrier has to be in its // own packet. A barrier has zero operands. We conservatively bail out // here if we see any instruction with zero operands. if (foundJump && MI->getNumOperands() == 0) break; if (foundJump && !foundCompare && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == predReg) { // Not all compares can be new value compare. Arch Spec: 7.6.1.1 if (QII->isNewValueJumpCandidate(MI)) { assert((MI->getDesc().isCompare()) && "Only compare instruction can be collapsed into New Value Jump"); isSecondOpReg = MI->getOperand(2).isReg(); if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg, afterRA, jmpPos, MF)) break; cmpInstr = MI; cmpPos = MII; foundCompare = true; // We need cmpReg1 and cmpOp2(imm or reg) while building // new value jump instruction. cmpReg1 = MI->getOperand(1).getReg(); if (MI->getOperand(1).isKill()) MO1IsKill = true; if (isSecondOpReg) { cmpOp2 = MI->getOperand(2).getReg(); if (MI->getOperand(2).isKill()) MO2IsKill = true; } else cmpOp2 = MI->getOperand(2).getImm(); continue; } } if (foundCompare && foundJump) { // If "common" checks fail, bail out on this BB. if (!commonChecksToProhibitNewValueJump(afterRA, MII)) break; bool foundFeeder = false; MachineBasicBlock::iterator feederPos = MII; if (MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && (MI->getOperand(0).getReg() == cmpReg1 || (isSecondOpReg && MI->getOperand(0).getReg() == (unsigned) cmpOp2))) { unsigned feederReg = MI->getOperand(0).getReg(); // First try to see if we can get the feeder from the first operand // of the compare. If we can not, and if secondOpReg is true // (second operand of the compare is also register), try that one. // TODO: Try to come up with some heuristic to figure out which // feeder would benefit. if (feederReg == cmpReg1) { if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) { if (!isSecondOpReg) break; else continue; } else foundFeeder = true; } if (!foundFeeder && isSecondOpReg && feederReg == (unsigned) cmpOp2) if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) break; if (isSecondOpReg) { // In case of CMPLT, or CMPLTU, or EQ with the second register // to newify, swap the operands. if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq && feederReg == (unsigned) cmpOp2) { unsigned tmp = cmpReg1; bool tmpIsKill = MO1IsKill; cmpReg1 = cmpOp2; MO1IsKill = MO2IsKill; cmpOp2 = tmp; MO2IsKill = tmpIsKill; } // Now we have swapped the operands, all we need to check is, // if the second operand (after swap) is the feeder. // And if it is, make a note. if (feederReg == (unsigned)cmpOp2) isSecondOpNewified = true; } // Now that we are moving feeder close the jump, // make sure we are respecting the kill values of // the operands of the feeder. bool updatedIsKill = false; for (unsigned i = 0; i < MI->getNumOperands(); i++) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse()) { unsigned feederReg = MO.getReg(); for (MachineBasicBlock::iterator localII = feederPos, end = jmpPos; localII != end; localII++) { MachineInstr *localMI = localII; for (unsigned j = 0; j < localMI->getNumOperands(); j++) { MachineOperand &localMO = localMI->getOperand(j); if (localMO.isReg() && localMO.isUse() && localMO.isKill() && feederReg == localMO.getReg()) { // We found that there is kill of a use register // Set up a kill flag on the register localMO.setIsKill(false); MO.setIsKill(); updatedIsKill = true; break; } } if (updatedIsKill) break; } } if (updatedIsKill) break; } MBB->splice(jmpPos, MI->getParent(), MI); MBB->splice(jmpPos, MI->getParent(), cmpInstr); DebugLoc dl = MI->getDebugLoc(); MachineInstr *NewMI; assert((QII->isNewValueJumpCandidate(cmpInstr)) && "This compare is not a New Value Jump candidate."); unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, isSecondOpNewified, jmpTarget, MBPI); if (invertPredicate) opc = QII->getInvertedPredicatedOpcode(opc); if (isSecondOpReg) NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addReg(cmpOp2, getKillRegState(MO2IsKill)) .addMBB(jmpTarget); else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi || cmpInstr->getOpcode() == Hexagon::C2_cmpgti) && cmpOp2 == -1 ) // Corresponding new-value compare jump instructions don't have the // operand for -1 immediate value. NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addMBB(jmpTarget); else NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addImm(cmpOp2) .addMBB(jmpTarget); assert(NewMI && "New Value Jump Instruction Not created!"); (void)NewMI; if (cmpInstr->getOperand(0).isReg() && cmpInstr->getOperand(0).isKill()) cmpInstr->getOperand(0).setIsKill(false); if (cmpInstr->getOperand(1).isReg() && cmpInstr->getOperand(1).isKill()) cmpInstr->getOperand(1).setIsKill(false); cmpInstr->eraseFromParent(); jmpInstr->eraseFromParent(); ++nvjGenerated; ++NumNVJGenerated; break; } } } } return true; }
DeadMemOpElimination::instr_iterator DeadMemOpElimination::handleMemOp(instr_iterator I, DefMapTy &Defs, AliasSetTracker &AST) { MachineInstr *MI = I; MachineMemOperand *MO = *MI->memoperands_begin(); // AliasAnalysis cannot handle offset right now, so we pretend to write a // a big enough size to the location pointed by the base pointer. uint64_t Size = MO->getSize() + MO->getOffset(); AliasSet *ASet = &AST.getAliasSetForPointer(const_cast<Value*>(MO->getValue()), Size, 0); MachineInstr *&LastMI = Defs[ASet]; bool canHandleLastStore = LastMI && ASet->isMustAlias() && LastMI->getOpcode() != VTM::VOpInternalCall // FIXME: We may need to remember the last // definition for all predicates. && isPredIdentical(LastMI, MI); if (canHandleLastStore) { MachineMemOperand *LastMO = *LastMI->memoperands_begin(); // We can only handle last store if and only if their memory operand have // the must-alias address and the same size. canHandleLastStore = LastMO->getSize() == MO->getSize() && !LastMO->isVolatile() && MachineMemOperandAlias(MO, LastMO, AA, SE) == AliasAnalysis::MustAlias; } // FIXME: These elimination is only valid if we are in single-thread mode! if (VInstrInfo::mayStore(MI)) { if (canHandleLastStore) { // Dead store find, remove it. LastMI->eraseFromParent(); ++DeadStoreEliminated; } // Update the definition. LastMI = MI; return I; } // Now MI is a load. if (!canHandleLastStore) return I; // Loading the value that just be stored, the load is not necessary. MachineOperand LoadedMO = MI->getOperand(0); MachineOperand StoredMO = LastMI->getOperand(2); // Simply replace the load by a copy. DebugLoc dl = MI->getDebugLoc(); I = *BuildMI(*MI->getParent(), I, dl, VInstrInfo::getDesc(VTM::VOpMove)) .addOperand(LoadedMO).addOperand(StoredMO). addOperand(*VInstrInfo::getPredOperand(MI)). addOperand(*VInstrInfo::getTraceOperand(MI)); MI->eraseFromParent(); ++DeadLoadEliminated; return I; }
void MIPrinter::print(const MachineInstr &MI) { const auto *MF = MI.getParent()->getParent(); const auto &MRI = MF->getRegInfo(); const auto &SubTarget = MF->getSubtarget(); const auto *TRI = SubTarget.getRegisterInfo(); assert(TRI && "Expected target register info"); const auto *TII = SubTarget.getInstrInfo(); assert(TII && "Expected target instruction info"); if (MI.isCFIInstruction()) assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI); unsigned I = 0, E = MI.getNumOperands(); for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() && !MI.getOperand(I).isImplicit(); ++I) { if (I) OS << ", "; print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, &MRI, /*IsDef=*/true); } if (I) OS << " = "; if (MI.getFlag(MachineInstr::FrameSetup)) OS << "frame-setup "; OS << TII->getName(MI.getOpcode()); if (isPreISelGenericOpcode(MI.getOpcode())) { assert(MI.getType() && "Generic instructions must have a type"); OS << ' '; MI.getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true); } if (I < E) OS << ' '; bool NeedComma = false; for (; I < E; ++I) { if (NeedComma) OS << ", "; print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies); NeedComma = true; } if (MI.getDebugLoc()) { if (NeedComma) OS << ','; OS << " debug-location "; MI.getDebugLoc()->printAsOperand(OS, MST); } if (!MI.memoperands_empty()) { OS << " :: "; bool NeedComma = false; for (const auto *Op : MI.memoperands()) { if (NeedComma) OS << ", "; print(*Op); NeedComma = true; } } }
MachineInstr * ARMBSISimplifyIndexMemOpsPass::convertToSimpleInstrs(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const { // FIXME: Thumb2 support. llvm::errs() << "about to convert to three address\n"; llvm::errs() << "arg passed to convert to three address\n"; MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); uint64_t TSFlags = MI->getDesc().TSFlags; bool isPre = false; switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { default: return NULL; case ARMII::IndexModePre: isPre = true; break; case ARMII::IndexModePost: break; } // Try splitting an indexed load/store to an un-indexed one plus an add/sub // operation. unsigned MemOpc = TII->getUnindexedOpcode(MI->getOpcode()); if (MemOpc == 0) return NULL; MachineInstr *UpdateMI = NULL; MachineInstr *MemMI = NULL; MachineInstrBuilder MemMIB; unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); bool isLoad = !MI->mayStore(); const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); const MachineOperand &Base = MI->getOperand(2); const MachineOperand &Offset = MI->getOperand(NumOps-3); unsigned WBReg = WB.getReg(); unsigned BaseReg = Base.getReg(); unsigned OffReg = -1; unsigned OffImm = -1; if (Offset.isReg()) OffReg = Offset.getReg(); else if (MI->getOperand(NumOps-3).isImm()) { OffImm = MI->getOperand(NumOps-3).getImm(); OffReg = 0; } else { llvm::errs() << "FAIL\n"; return NULL;//FAIL } ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-2).getImm(); switch (AddrMode) { default: llvm_unreachable("Unknown indexed op!"); case ARMII::AddrMode2: { bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; uint64_t Amt = ARM_AM::getAM2Offset(OffImm); if (isSub) { Amt = ~Amt; Amt++; Amt &= 0x0fff; return NULL; } if (OffReg == 0) { if (ARM_AM::getSOImmVal(Amt) == -1) { // Can't encode it in a so_imm operand. This transformation will // add more than 1 instruction. Abandon! llvm::errs() << "here amt = " << Amt << " is sub: " << isSub << "\n"; return NULL; } llvm::errs() << "NONFAIL offImm amt " << OffImm << " pred = " << Pred << "here amt = " << Amt << " is sub: " << isSub << "\n"; UpdateMI = BuildMI(MF, MI->getDebugLoc(), TII->get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) .addReg(BaseReg).addImm(Amt) .addImm(Pred).addReg(0).addReg(0); } else if (Amt != 0) { ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); UpdateMI = BuildMI(MF, MI->getDebugLoc(), TII->get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) .addImm(Pred).addReg(0).addReg(0); } else UpdateMI = BuildMI(MF, MI->getDebugLoc(), TII->get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) .addReg(BaseReg).addReg(OffReg) .addImm(Pred).addReg(0).addReg(0); break; } case ARMII::AddrMode3 : { bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; unsigned Amt = ARM_AM::getAM3Offset(OffImm); llvm::errs() << " in addressing mode 3\n"; if (OffReg == 0) // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. UpdateMI = BuildMI(MF, MI->getDebugLoc(), TII->get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg).addImm(Amt) .addImm(Pred).addReg(0).addReg(0); else UpdateMI = BuildMI(MF, MI->getDebugLoc(), TII->get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) .addReg(BaseReg).addReg(OffReg) .addImm(Pred).addReg(0).addReg(0); break; } } std::vector<MachineInstr*> NewMIs; if (isPre) { if (isLoad) MemMI = BuildMI(MF, MI->getDebugLoc(), TII->get(MemOpc), MI->getOperand(0).getReg()) .addReg(WBReg).addImm(0).addImm(Pred).addReg(0); else { MemMI = BuildMI(MF, MI->getDebugLoc(), TII->get(MemOpc)).addReg(MI->getOperand(1).getReg()) .addReg(WBReg).addImm(0).addImm(Pred).addReg(0); //AddDefaultPred(MemMIB); } NewMIs.push_back(MemMI); } else { if (isLoad) MemMI = BuildMI(MF, MI->getDebugLoc(), TII->get(MemOpc), MI->getOperand(0).getReg()) .addReg(BaseReg).addImm(0).addImm(Pred).addReg(0); else { MemMI = BuildMI(MF, MI->getDebugLoc(), TII->get(MemOpc)).addReg(MI->getOperand(1).getReg()) .addReg(BaseReg).addImm(0).addImm(Pred).addReg(0); //AddDefaultPred(MemMIB); } if (WB.isDead()) UpdateMI->getOperand(0).setIsDead(); NewMIs.push_back(UpdateMI); NewMIs.push_back(MemMI); } // Transfer LiveVariables states, kill / dead info. if (LV) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { unsigned Reg = MO.getReg(); LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); if (MO.isDef()) { MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; if (MO.isDead()) LV->addVirtualRegisterDead(Reg, NewMI); } if (MO.isUse() && MO.isKill()) { for (unsigned j = 0; j < 2; ++j) { // Look at the two new MI's in reverse order. MachineInstr *NewMI = NewMIs[j]; if (!NewMI->readsRegister(Reg)) continue; LV->addVirtualRegisterKilled(Reg, NewMI); if (VI.removeKill(MI)) VI.Kills.push_back(NewMI); break; } } } } } MFI->insert(MBBI, NewMIs[1]); MFI->insert(MBBI, NewMIs[0]); return NewMIs[0]; }
/// fixupConditionalBranch - Fix up a conditional branch whose destination is /// too far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond); assert(!Fail && "branches to be relaxed must be analyzable"); (void)Fail; // Add an unconditional branch to the destination and invert the branch // condition to jump over it: // tbz L1 // => // tbnz L2 // b L1 // L2: if (FBB && isBlockInRange(MI, *FBB)) { // Last MI in the BB is an unconditional branch. We can simply invert the // condition and swap destinations: // beq L1 // b L2 // => // bne L2 // b L1 DEBUG(dbgs() << " Invert condition and swap " "its destination with " << MBB->back()); TII->reverseBranchCondition(Cond); int OldSize = 0, NewSize = 0; TII->removeBranch(*MBB, &OldSize); TII->insertBranch(*MBB, FBB, TBB, Cond, DL, &NewSize); BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize); return true; } else if (FBB) { // We need to split the basic block here to obtain two long-range // unconditional branches. auto &NewBB = *MF->CreateMachineBasicBlock(MBB->getBasicBlock()); MF->insert(++MBB->getIterator(), &NewBB); // Insert an entry into BlockInfo to align it properly with the block // numbers. BlockInfo.insert(BlockInfo.begin() + NewBB.getNumber(), BasicBlockInfo()); unsigned &NewBBSize = BlockInfo[NewBB.getNumber()].Size; int NewBrSize; TII->insertUnconditionalBranch(NewBB, FBB, DL, &NewBrSize); NewBBSize += NewBrSize; // Update the successor lists according to the transformation to follow. // Do it here since if there's no split, no update is needed. MBB->replaceSuccessor(FBB, &NewBB); NewBB.addSuccessor(FBB); } // We now have an appropriate fall-through block in place (either naturally or // just created), so we can invert the condition. MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); DEBUG(dbgs() << " Insert B to BB#" << TBB->getNumber() << ", invert condition and change dest. to BB#" << NextBB.getNumber() << '\n'); unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size; // Insert a new conditional branch and a new unconditional branch. int RemovedSize = 0; TII->reverseBranchCondition(Cond); TII->removeBranch(*MBB, &RemovedSize); MBBSize -= RemovedSize; int AddedSize = 0; TII->insertBranch(*MBB, &NextBB, TBB, Cond, DL, &AddedSize); MBBSize += AddedSize; // Finally, keep the block offsets up to date. adjustBlockOffsets(*MBB); return true; }
bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Lowering br_unless **********\n" "********** Function: " << MF.getName() << '\n'); auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); auto &MRI = MF.getRegInfo(); for (auto &MBB : MF) { for (auto MII = MBB.begin(); MII != MBB.end();) { MachineInstr *MI = &*MII++; if (MI->getOpcode() != WebAssembly::BR_UNLESS) continue; unsigned Cond = MI->getOperand(1).getReg(); bool Inverted = false; // Attempt to invert the condition in place. if (MFI.isVRegStackified(Cond)) { assert(MRI.hasOneDef(Cond)); MachineInstr *Def = MRI.getVRegDef(Cond); switch (Def->getOpcode()) { using namespace WebAssembly; case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break; case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break; case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break; case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break; case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break; case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break; case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break; case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break; case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break; case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break; case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break; case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break; case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break; case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break; case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break; case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break; case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break; case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break; case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break; case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break; case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break; case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break; case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break; case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break; default: break; } } // If we weren't able to invert the condition in place. Insert an // instruction to invert it. if (!Inverted) { unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp) .addReg(Cond); MFI.stackifyVReg(Tmp); Cond = Tmp; Inverted = true; } // The br_unless condition has now been inverted. Insert a br_if and // delete the br_unless. assert(Inverted); BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF)) .addOperand(MI->getOperand(0)) .addReg(Cond); MBB.erase(MI); } } return true; }
/// Attempt the reassociation transformation to reduce critical path length. /// See the above comments before getMachineCombinerPatterns(). void TargetInstrInfo::reassociateOps( MachineInstr &Root, MachineInstr &Prev, MachineCombinerPattern Pattern, SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { MachineFunction *MF = Root.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); // This array encodes the operand index for each parameter because the // operands may be commuted. Each row corresponds to a pattern value, // and each column specifies the index of A, B, X, Y. unsigned OpIdx[4][4] = { { 1, 1, 2, 2 }, { 1, 2, 2, 1 }, { 2, 1, 1, 2 }, { 2, 2, 1, 1 } }; int Row; switch (Pattern) { case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break; case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break; case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break; case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break; default: llvm_unreachable("unexpected MachineCombinerPattern"); } MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]); MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]); MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]); MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]); MachineOperand &OpC = Root.getOperand(0); unsigned RegA = OpA.getReg(); unsigned RegB = OpB.getReg(); unsigned RegX = OpX.getReg(); unsigned RegY = OpY.getReg(); unsigned RegC = OpC.getReg(); if (TargetRegisterInfo::isVirtualRegister(RegA)) MRI.constrainRegClass(RegA, RC); if (TargetRegisterInfo::isVirtualRegister(RegB)) MRI.constrainRegClass(RegB, RC); if (TargetRegisterInfo::isVirtualRegister(RegX)) MRI.constrainRegClass(RegX, RC); if (TargetRegisterInfo::isVirtualRegister(RegY)) MRI.constrainRegClass(RegY, RC); if (TargetRegisterInfo::isVirtualRegister(RegC)) MRI.constrainRegClass(RegC, RC); // Create a new virtual register for the result of (X op Y) instead of // recycling RegB because the MachineCombiner's computation of the critical // path requires a new register definition rather than an existing one. unsigned NewVR = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); unsigned Opcode = Root.getOpcode(); bool KillA = OpA.isKill(); bool KillX = OpX.isKill(); bool KillY = OpY.isKill(); // Create new instructions for insertion. MachineInstrBuilder MIB1 = BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) .addReg(RegX, getKillRegState(KillX)) .addReg(RegY, getKillRegState(KillY)); MachineInstrBuilder MIB2 = BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) .addReg(RegA, getKillRegState(KillA)) .addReg(NewVR, getKillRegState(true)); setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); // Record new instructions for insertion and old instructions for deletion. InsInstrs.push_back(MIB1); InsInstrs.push_back(MIB2); DelInstrs.push_back(&Prev); DelInstrs.push_back(&Root); }
bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); auto &MBB = *I.getParent(); auto &MF = *MBB.getParent(); auto &MRI = MF.getRegInfo(); if (!isPreISelGenericOpcode(I.getOpcode())) { if (I.isCopy()) return selectCopy(I, TII, MRI, TRI, RBI); return true; } if (selectImpl(I)) return true; MachineInstrBuilder MIB{MF, I}; bool isSExt = false; using namespace TargetOpcode; switch (I.getOpcode()) { case G_SEXT: isSExt = true; LLVM_FALLTHROUGH; case G_ZEXT: { LLT DstTy = MRI.getType(I.getOperand(0).getReg()); // FIXME: Smaller destination sizes coming soon! if (DstTy.getSizeInBits() != 32) { DEBUG(dbgs() << "Unsupported destination size for extension"); return false; } LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); unsigned SrcSize = SrcTy.getSizeInBits(); switch (SrcSize) { case 1: { // ZExt boils down to & 0x1; for SExt we also subtract that from 0 I.setDesc(TII.get(ARM::ANDri)); MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp()); if (isSExt) { unsigned SExtResult = I.getOperand(0).getReg(); // Use a new virtual register for the result of the AND unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass); I.getOperand(0).setReg(AndResult); auto InsertBefore = std::next(I.getIterator()); auto SubI = BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri)) .addDef(SExtResult) .addUse(AndResult) .addImm(0) .add(predOps(ARMCC::AL)) .add(condCodeOp()); if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI)) return false; } break; } case 8: case 16: { unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize); if (NewOpc == I.getOpcode()) return false; I.setDesc(TII.get(NewOpc)); MIB.addImm(0).add(predOps(ARMCC::AL)); break; } default: DEBUG(dbgs() << "Unsupported source size for extension"); return false; } break; } case G_ANYEXT: case G_TRUNC: { // The high bits are undefined, so there's nothing special to do, just // treat it as a copy. auto SrcReg = I.getOperand(1).getReg(); auto DstReg = I.getOperand(0).getReg(); const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); if (SrcRegBank.getID() != DstRegBank.getID()) { DEBUG(dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n"); return false; } if (SrcRegBank.getID() != ARM::GPRRegBankID) { DEBUG(dbgs() << "G_TRUNC/G_ANYEXT on non-GPR not supported yet\n"); return false; } I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } case G_SELECT: return selectSelect(MIB, MRI); case G_ICMP: { CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END, ARM::GPRRegBankID, 32); return selectCmp(Helper, MIB, MRI); } case G_FCMP: { assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP"); unsigned OpReg = I.getOperand(2).getReg(); unsigned Size = MRI.getType(OpReg).getSizeInBits(); if (Size == 64 && TII.getSubtarget().isFPOnlySP()) { DEBUG(dbgs() << "Subtarget only supports single precision"); return false; } if (Size != 32 && Size != 64) { DEBUG(dbgs() << "Unsupported size for G_FCMP operand"); return false; } CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT, ARM::FPRRegBankID, Size); return selectCmp(Helper, MIB, MRI); } case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; case G_FRAME_INDEX: // Add 0 to the given frame index and hope it will eventually be folded into // the user(s). I.setDesc(TII.get(ARM::ADDri)); MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp()); break; case G_CONSTANT: { unsigned Reg = I.getOperand(0).getReg(); if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID)) return false; I.setDesc(TII.get(ARM::MOVi)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); auto &Val = I.getOperand(1); if (Val.isCImm()) { if (Val.getCImm()->getBitWidth() > 32) return false; Val.ChangeToImmediate(Val.getCImm()->getZExtValue()); } if (!Val.isImm()) { return false; } break; } case G_GLOBAL_VALUE: return selectGlobal(MIB, MRI); case G_STORE: case G_LOAD: { const auto &MemOp = **I.memoperands_begin(); if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { DEBUG(dbgs() << "Atomic load/store not supported yet\n"); return false; } unsigned Reg = I.getOperand(0).getReg(); unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID(); LLT ValTy = MRI.getType(Reg); const auto ValSize = ValTy.getSizeInBits(); assert((ValSize != 64 || TII.getSubtarget().hasVFP2()) && "Don't know how to load/store 64-bit value without VFP"); const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize); if (NewOpc == G_LOAD || NewOpc == G_STORE) return false; I.setDesc(TII.get(NewOpc)); if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH) // LDRH has a funny addressing mode (there's already a FIXME for it). MIB.addReg(0); MIB.addImm(0).add(predOps(ARMCC::AL)); break; } case G_MERGE_VALUES: { if (!selectMergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } case G_UNMERGE_VALUES: { if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } case G_BRCOND: { if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) { DEBUG(dbgs() << "Unsupported condition register for G_BRCOND"); return false; } // Set the flags. auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri)) .addReg(I.getOperand(0).getReg()) .addImm(1) .add(predOps(ARMCC::AL)); if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI)) return false; // Branch conditionally. auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc)) .add(I.getOperand(1)) .add(predOps(ARMCC::EQ, ARM::CPSR)); if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI)) return false; I.eraseFromParent(); return true; } default: return false; } return constrainSelectedInstRegOperands(I, TII, TRI, RBI); }
/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads /// a single register and writes a single register and it does not modify the /// source, and if the source value is preserved as a sub-register of the /// result, then replace all reachable uses of the source with the subreg of the /// result. /// /// Do not generate an EXTRACT that is used only in a debug use, as this changes /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. bool PeepholeOptimizer:: optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs) { unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) return false; if (TargetRegisterInfo::isPhysicalRegister(DstReg) || TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; if (MRI->hasOneNonDBGUse(SrcReg)) // No other uses. return false; // Ensure DstReg can get a register class that actually supports // sub-registers. Don't change the class until we commit. const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx); if (!DstRC) return false; // The ext instr may be operating on a sub-register of SrcReg as well. // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit // register. // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of // SrcReg:SubIdx should be replaced. bool UseSrcSubIdx = TM->getRegisterInfo()-> getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0; // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) ReachedBBs.insert(UI->getParent()); // Uses that are in the same BB of uses of the result of the instruction. SmallVector<MachineOperand*, 8> Uses; // Uses that the result of the instruction can reach. SmallVector<MachineOperand*, 8> ExtendedUses; bool ExtendLife = true; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; if (UseMI == MI) continue; if (UseMI->isPHI()) { ExtendLife = false; continue; } // Only accept uses of SrcReg:SubIdx. if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) continue; // It's an error to translate this: // // %reg1025 = <sext> %reg1024 // ... // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 // // into this: // // %reg1025 = <sext> %reg1024 // ... // %reg1027 = COPY %reg1025:4 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 // // The problem here is that SUBREG_TO_REG is there to assert that an // implicit zext occurs. It doesn't insert a zext instruction. If we allow // the COPY here, it will give us the value after the <sext>, not the // original value of %reg1024 before <sext>. if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) continue; MachineBasicBlock *UseMBB = UseMI->getParent(); if (UseMBB == MBB) { // Local uses that come after the extension. if (!LocalMIs.count(UseMI)) Uses.push_back(&UseMO); } else if (ReachedBBs.count(UseMBB)) { // Non-local uses where the result of the extension is used. Always // replace these unless it's a PHI. Uses.push_back(&UseMO); } else if (Aggressive && DT->dominates(MBB, UseMBB)) { // We may want to extend the live range of the extension result in order // to replace these uses. ExtendedUses.push_back(&UseMO); } else { // Both will be live out of the def MBB anyway. Don't extend live range of // the extension result. ExtendLife = false; break; } } if (ExtendLife && !ExtendedUses.empty()) // Extend the liveness of the extension result. std::copy(ExtendedUses.begin(), ExtendedUses.end(), std::back_inserter(Uses)); // Now replace all uses. bool Changed = false; if (!Uses.empty()) { SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) if (UI->isPHI()) PHIBBs.insert(UI->getParent()); const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); for (unsigned i = 0, e = Uses.size(); i != e; ++i) { MachineOperand *UseMO = Uses[i]; MachineInstr *UseMI = UseMO->getParent(); MachineBasicBlock *UseMBB = UseMI->getParent(); if (PHIBBs.count(UseMBB)) continue; // About to add uses of DstReg, clear DstReg's kill flags. if (!Changed) { MRI->clearKillFlags(DstReg); MRI->constrainRegClass(DstReg, DstRC); } unsigned NewVR = MRI->createVirtualRegister(RC); MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. if (UseSrcSubIdx) { Copy->getOperand(0).setSubReg(SubIdx); Copy->getOperand(0).setIsUndef(); } UseMO->setReg(NewVR); ++NumReuse; Changed = true; } } return Changed; }
MachineBasicBlock::iterator MSP430FrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const MSP430InstrInfo &TII = *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo()); unsigned StackAlign = getStackAlignment(); if (!hasReservedCallFrame(MF)) { // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub SP, <amt>' and the // adjcallstackdown instruction into 'add SP, <amt>' // TODO: consider using push / pop instead of sub + store / add MachineInstr *Old = I; uint64_t Amount = Old->getOperand(0).getImm(); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; MachineInstr *New = nullptr; if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), MSP430::SP) .addReg(MSP430::SP).addImm(Amount); } else { assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode()); // factor out the amount the callee already popped. uint64_t CalleeAmt = Old->getOperand(1).getImm(); Amount -= CalleeAmt; if (Amount) New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::ADD16ri), MSP430::SP) .addReg(MSP430::SP).addImm(Amount); } if (New) { // The SRW implicit def is dead. New->getOperand(3).setIsDead(); // Replace the pseudo instruction with a new instruction... MBB.insert(I, New); } } } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { MachineInstr *Old = I; MachineInstr *New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), MSP430::SP).addReg(MSP430::SP).addImm(CalleeAmt); // The SRW implicit def is dead. New->getOperand(3).setIsDead(); MBB.insert(I, New); } } return MBB.erase(I); }
/// spillAroundUses - insert spill code around each use of Reg. void InlineSpiller::spillAroundUses(unsigned Reg) { DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n'); LiveInterval &OldLI = LIS.getInterval(Reg); // Iterate over instructions using Reg. for (MachineRegisterInfo::reg_bundle_iterator RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); RegI != E; ) { MachineInstr *MI = &*(RegI++); // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { // Modify DBG_VALUE now that the value is in a spill slot. bool IsIndirect = MI->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *Var = MI->getDebugVariable(); const MDNode *Expr = MI->getDebugExpression(); DebugLoc DL = MI->getDebugLoc(); DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); MachineBasicBlock *MBB = MI->getParent(); assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) .addFrameIndex(StackSlot) .addImm(Offset) .addMetadata(Var) .addMetadata(Expr); continue; } // Ignore copies to/from snippets. We'll delete them. if (SnippetCopies.count(MI)) continue; // Stack slot accesses may coalesce away. if (coalesceStackAccess(MI, Reg)) continue; // Analyze instruction. SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; MIBundleOperands::VirtRegInfo RI = MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops); // Find the slot index where this instruction reads and writes OldLI. // This is usually the def slot, except for tied early clobbers. SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true))) if (SlotIndex::isSameInstr(Idx, VNI->def)) Idx = VNI->def; // Check for a sibling copy. unsigned SibReg = isFullCopyOf(MI, Reg); if (SibReg && isSibling(SibReg)) { // This may actually be a copy between snippets. if (isRegToSpill(SibReg)) { DEBUG(dbgs() << "Found new snippet copy: " << *MI); SnippetCopies.insert(MI); continue; } if (RI.Writes) { // Hoist the spill of a sib-reg copy. if (hoistSpill(OldLI, MI)) { // This COPY is now dead, the value is already in the stack slot. MI->getOperand(0).setIsDead(); DeadDefs.push_back(MI); continue; } } else { // This is a reload for a sib-reg copy. Drop spills downstream. LiveInterval &SibLI = LIS.getInterval(SibReg); eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx)); // The COPY will fold to a reload below. } } // Attempt to fold memory ops. if (foldMemoryOperand(Ops)) continue; // Create a new virtual register for spill/fill. // FIXME: Infer regclass from instruction alone. unsigned NewVReg = Edit->createFrom(Reg); if (RI.Reads) insertReload(NewVReg, Idx, MI); // Rewrite instruction operands. bool hasLiveDef = false; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); MO.setReg(NewVReg); if (MO.isUse()) { if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second)) MO.setIsKill(); } else { if (!MO.isDead()) hasLiveDef = true; } } DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); // FIXME: Use a second vreg if instruction has no tied ops. if (RI.Writes) if (hasLiveDef) insertSpill(NewVReg, true, MI); } }
void X86ExpandPseudo::ExpandICallBranchFunnel( MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) { MachineBasicBlock *JTMBB = MBB; MachineInstr *JTInst = &*MBBI; MachineFunction *MF = MBB->getParent(); const BasicBlock *BB = MBB->getBasicBlock(); auto InsPt = MachineFunction::iterator(MBB); ++InsPt; std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs; DebugLoc DL = JTInst->getDebugLoc(); MachineOperand Selector = JTInst->getOperand(0); const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal(); auto CmpTarget = [&](unsigned Target) { BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11) .addReg(X86::RIP) .addImm(1) .addReg(0) .addGlobalAddress(CombinedGlobal, JTInst->getOperand(2 + 2 * Target).getImm()) .addReg(0); BuildMI(*MBB, MBBI, DL, TII->get(X86::CMP64rr)) .add(Selector) .addReg(X86::R11); }; auto CreateMBB = [&]() { auto *NewMBB = MF->CreateMachineBasicBlock(BB); MBB->addSuccessor(NewMBB); return NewMBB; }; auto EmitCondJump = [&](unsigned Opcode, MachineBasicBlock *ThenMBB) { BuildMI(*MBB, MBBI, DL, TII->get(Opcode)).addMBB(ThenMBB); auto *ElseMBB = CreateMBB(); MF->insert(InsPt, ElseMBB); MBB = ElseMBB; MBBI = MBB->end(); }; auto EmitCondJumpTarget = [&](unsigned Opcode, unsigned Target) { auto *ThenMBB = CreateMBB(); TargetMBBs.push_back({ThenMBB, Target}); EmitCondJump(Opcode, ThenMBB); }; auto EmitTailCall = [&](unsigned Target) { BuildMI(*MBB, MBBI, DL, TII->get(X86::TAILJMPd64)) .add(JTInst->getOperand(3 + 2 * Target)); }; std::function<void(unsigned, unsigned)> EmitBranchFunnel = [&](unsigned FirstTarget, unsigned NumTargets) { if (NumTargets == 1) { EmitTailCall(FirstTarget); return; } if (NumTargets == 2) { CmpTarget(FirstTarget + 1); EmitCondJumpTarget(X86::JB_1, FirstTarget); EmitTailCall(FirstTarget + 1); return; } if (NumTargets < 6) { CmpTarget(FirstTarget + 1); EmitCondJumpTarget(X86::JB_1, FirstTarget); EmitCondJumpTarget(X86::JE_1, FirstTarget + 1); EmitBranchFunnel(FirstTarget + 2, NumTargets - 2); return; } auto *ThenMBB = CreateMBB(); CmpTarget(FirstTarget + (NumTargets / 2)); EmitCondJump(X86::JB_1, ThenMBB); EmitCondJumpTarget(X86::JE_1, FirstTarget + (NumTargets / 2)); EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1, NumTargets - (NumTargets / 2) - 1); MF->insert(InsPt, ThenMBB); MBB = ThenMBB; MBBI = MBB->end(); EmitBranchFunnel(FirstTarget, NumTargets / 2); }; EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2); for (auto P : TargetMBBs) { MF->insert(InsPt, P.first); BuildMI(P.first, DL, TII->get(X86::TAILJMPd64)) .add(JTInst->getOperand(3 + 2 * P.second)); } JTMBB->erase(JTInst); }
/// fixupConditionalBranch - Fix up a conditional branch whose destination is /// too far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { MachineBasicBlock *DestBB = getDestBlock(MI); // Add an unconditional branch to the destination and invert the branch // condition to jump over it: // tbz L1 // => // tbnz L2 // b L1 // L2: // If the branch is at the end of its MBB and that has a fall-through block, // direct the updated conditional branch to the fall-through block. Otherwise, // split the MBB before the next instruction. MachineBasicBlock *MBB = MI.getParent(); MachineInstr *BMI = &MBB->back(); bool NeedSplit = (BMI != &MI) || !hasFallthrough(*MBB); if (BMI != &MI) { if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->getLastNonDebugInstr()) && BMI->isUnconditionalBranch()) { // Last MI in the BB is an unconditional branch. We can simply invert the // condition and swap destinations: // beq L1 // b L2 // => // bne L2 // b L1 MachineBasicBlock *NewDest = getDestBlock(*BMI); if (isBlockInRange(MI, *NewDest)) { DEBUG(dbgs() << " Invert condition and swap its destination with " << *BMI); changeBranchDestBlock(*BMI, *DestBB); int NewSize = insertInvertedConditionalBranch(*MBB, MI.getIterator(), MI.getDebugLoc(), MI, *NewDest); int OldSize = TII->getInstSizeInBytes(MI); BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize); MI.eraseFromParent(); return true; } } } if (NeedSplit) { // Analyze the branch so we know how to update the successor lists. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 2> Cond; bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond, false); assert(!Fail && "branches to relax should be analyzable"); (void)Fail; MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI); // No need for the branch to the next block. We're adding an unconditional // branch to the destination. int delta = TII->getInstSizeInBytes(MBB->back()); BlockInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below // Update the successor lists according to the transformation to follow. // Do it here since if there's no split, no update is needed. MBB->replaceSuccessor(FBB, NewBB); NewBB->addSuccessor(FBB); } MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << ", invert condition and change dest. to BB#" << NextBB.getNumber() << '\n'); unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size; // Insert a new conditional branch and a new unconditional branch. MBBSize += insertInvertedConditionalBranch(*MBB, MBB->end(), MI.getDebugLoc(), MI, NextBB); MBBSize += insertUnconditionalBranch(*MBB, *DestBB, MI.getDebugLoc()); // Remove the old conditional branch. It may or may not still be in MBB. MBBSize -= TII->getInstSizeInBytes(MI); MI.eraseFromParent(); // Finally, keep the block offsets up to date. adjustBlockOffsets(*MBB); return true; }
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock::iterator I = MI; unsigned Save = MI.getOperand(1).getReg(); unsigned Idx = MI.getOperand(3).getReg(); if (AMDGPU::SReg_32RegClass.contains(Idx)) { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(Idx); MBB.insert(I, MovRel); } else { assert(AMDGPU::SReg_64RegClass.contains(Save)); assert(AMDGPU::VReg_32RegClass.contains(Idx)); // Save the EXEC mask BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) .addReg(AMDGPU::EXEC); // Read the next variant into VCC (lower 32 bits) <- also loop target BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), AMDGPU::VCC_LO) .addReg(Idx); // Move index from VCC into M0 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(AMDGPU::VCC_LO); // Compare the just read M0 value to all possible Idx values BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) .addReg(AMDGPU::M0) .addReg(Idx); // Update EXEC, save the original EXEC value to VCC BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) .addReg(AMDGPU::VCC); // Do the actual move MBB.insert(I, MovRel); // Update EXEC, switch all done bits to 0 and all todo bits to 1 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) .addReg(AMDGPU::EXEC) .addReg(AMDGPU::VCC); // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) .addImm(-7) .addReg(AMDGPU::EXEC); // Restore EXEC BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) .addReg(Save); } // FIXME: Are there any values other than the LDS address clamp that need to // be stored in the m0 register and may be live for more than a few // instructions? If so, we should save the m0 register at the beginning // of this function and restore it here. // FIXME: Add support for LDS direct loads. InitM0ForLDS(&MI); MI.eraseFromParent(); }
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock::iterator I = MI; unsigned Save = MI.getOperand(1).getReg(); unsigned Idx = MI.getOperand(3).getReg(); if (AMDGPU::SReg_32RegClass.contains(Idx)) { if (Offset) { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) .addReg(Idx) .addImm(Offset); } else { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(Idx); } MBB.insert(I, MovRel); } else { assert(AMDGPU::SReg_64RegClass.contains(Save)); assert(AMDGPU::VGPR_32RegClass.contains(Idx)); // Save the EXEC mask BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) .addReg(AMDGPU::EXEC); // Read the next variant into VCC (lower 32 bits) <- also loop target BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), AMDGPU::VCC_LO) .addReg(Idx); // Move index from VCC into M0 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(AMDGPU::VCC_LO); // Compare the just read M0 value to all possible Idx values BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32)) .addReg(AMDGPU::M0) .addReg(Idx); // Update EXEC, save the original EXEC value to VCC BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) .addReg(AMDGPU::VCC); if (Offset) { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) .addReg(AMDGPU::M0) .addImm(Offset); } // Do the actual move MBB.insert(I, MovRel); // Update EXEC, switch all done bits to 0 and all todo bits to 1 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) .addReg(AMDGPU::EXEC) .addReg(AMDGPU::VCC); // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) .addImm(-7); // Restore EXEC BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) .addReg(Save); } MI.eraseFromParent(); }
bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN, MachineInstr *AddAslMI, const MachineOperand &ImmOp, unsigned ImmOpNum) { NodeAddr<StmtNode *> SA = AddAslUN.Addr->getOwner(*DFG); DEBUG(dbgs() << "Processing addasl :" << *AddAslMI << "\n"); NodeList UNodeList; getAllRealUses(SA, UNodeList); for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { NodeAddr<UseNode *> UseUN = *I; assert(!(UseUN.Addr->getFlags() & NodeAttrs::PhiRef) && "Can't transform this 'AddAsl' instruction!"); NodeAddr<StmtNode *> UseIA = UseUN.Addr->getOwner(*DFG); DEBUG(dbgs() << "[InstrNode]: " << Print<NodeAddr<InstrNode *>>(UseIA, *DFG) << "\n"); MachineInstr *UseMI = UseIA.Addr->getCode(); DEBUG(dbgs() << "[MI <BB#" << UseMI->getParent()->getNumber() << ">]: " << *UseMI << "\n"); const MCInstrDesc &UseMID = UseMI->getDesc(); assert(HII->getAddrMode(UseMI) == HexagonII::BaseImmOffset); auto UsePos = MachineBasicBlock::iterator(UseMI); MachineBasicBlock::instr_iterator InsertPt = UsePos.getInstrIterator(); short NewOpCode = getBaseWithLongOffset(UseMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); unsigned OpStart; unsigned OpEnd = UseMI->getNumOperands(); MachineBasicBlock *BB = UseMI->getParent(); MachineInstrBuilder MIB = BuildMI(*BB, InsertPt, UseMI->getDebugLoc(), HII->get(NewOpCode)); // change mem(Rs + # ) -> mem(Rt << # + ##) if (UseMID.mayLoad()) { MIB.addOperand(UseMI->getOperand(0)); MIB.addOperand(AddAslMI->getOperand(2)); MIB.addOperand(AddAslMI->getOperand(3)); const GlobalValue *GV = ImmOp.getGlobal(); MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm(), ImmOp.getTargetFlags()); OpStart = 3; } else if (UseMID.mayStore()) { MIB.addOperand(AddAslMI->getOperand(2)); MIB.addOperand(AddAslMI->getOperand(3)); const GlobalValue *GV = ImmOp.getGlobal(); MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm(), ImmOp.getTargetFlags()); MIB.addOperand(UseMI->getOperand(2)); OpStart = 3; } else llvm_unreachable("Unhandled instruction"); for (unsigned i = OpStart; i < OpEnd; ++i) MIB.addOperand(UseMI->getOperand(i)); Deleted.insert(UseMI); } return true; }