bool MipsExpandPseudo::expandAtomicCmpSwapSubword( MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &NMBBI) { MachineFunction *MF = BB.getParent(); const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); DebugLoc DL = I->getDebugLoc(); unsigned LL, SC; unsigned ZERO = Mips::ZERO; unsigned BNE = Mips::BNE; unsigned BEQ = Mips::BEQ; unsigned SEOp = I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_POSTRA ? Mips::SEB : Mips::SEH; if (STI->inMicroMipsMode()) { LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM; SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM; BNE = STI->hasMips32r6() ? Mips::BNEC_MMR6 : Mips::BNE_MM; BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM; } else { LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) : (ArePtrs64bit ? Mips::LL64 : Mips::LL); SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) : (ArePtrs64bit ? Mips::SC64 : Mips::SC); } unsigned Dest = I->getOperand(0).getReg(); unsigned Ptr = I->getOperand(1).getReg(); unsigned Mask = I->getOperand(2).getReg(); unsigned ShiftCmpVal = I->getOperand(3).getReg(); unsigned Mask2 = I->getOperand(4).getReg(); unsigned ShiftNewVal = I->getOperand(5).getReg(); unsigned ShiftAmnt = I->getOperand(6).getReg(); unsigned Scratch = I->getOperand(7).getReg(); unsigned Scratch2 = I->getOperand(8).getReg(); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB.getBasicBlock(); MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB.getIterator(); MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), &BB, std::next(MachineBasicBlock::iterator(I)), BB.end()); exitMBB->transferSuccessorsAndUpdatePHIs(&BB); // thisMBB: // ... // fallthrough --> loop1MBB BB.addSuccessor(loop1MBB, BranchProbability::getOne()); loop1MBB->addSuccessor(sinkMBB); loop1MBB->addSuccessor(loop2MBB); loop1MBB->normalizeSuccProbs(); loop2MBB->addSuccessor(loop1MBB); loop2MBB->addSuccessor(sinkMBB); loop2MBB->normalizeSuccProbs(); sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); // loop1MBB: // ll dest, 0(ptr) // and Mask', dest, Mask // bne Mask', ShiftCmpVal, exitMBB BuildMI(loop1MBB, DL, TII->get(LL), Scratch).addReg(Ptr).addImm(0); BuildMI(loop1MBB, DL, TII->get(Mips::AND), Scratch2) .addReg(Scratch) .addReg(Mask); BuildMI(loop1MBB, DL, TII->get(BNE)) .addReg(Scratch2).addReg(ShiftCmpVal).addMBB(sinkMBB); // loop2MBB: // and dest, dest, mask2 // or dest, dest, ShiftNewVal // sc dest, dest, 0(ptr) // beq dest, $0, loop1MBB BuildMI(loop2MBB, DL, TII->get(Mips::AND), Scratch) .addReg(Scratch, RegState::Kill) .addReg(Mask2); BuildMI(loop2MBB, DL, TII->get(Mips::OR), Scratch) .addReg(Scratch, RegState::Kill) .addReg(ShiftNewVal); BuildMI(loop2MBB, DL, TII->get(SC), Scratch) .addReg(Scratch, RegState::Kill) .addReg(Ptr) .addImm(0); BuildMI(loop2MBB, DL, TII->get(BEQ)) .addReg(Scratch, RegState::Kill) .addReg(ZERO) .addMBB(loop1MBB); // sinkMBB: // srl srlres, Mask', shiftamt // sign_extend dest,srlres BuildMI(sinkMBB, DL, TII->get(Mips::SRLV), Dest) .addReg(Scratch2) .addReg(ShiftAmnt); if (STI->hasMips32r2()) { BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); } else { const unsigned ShiftImm = I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I16_POSTRA ? 16 : 24; BuildMI(sinkMBB, DL, TII->get(Mips::SLL), Dest) .addReg(Dest, RegState::Kill) .addImm(ShiftImm); BuildMI(sinkMBB, DL, TII->get(Mips::SRA), Dest) .addReg(Dest, RegState::Kill) .addImm(ShiftImm); } LivePhysRegs LiveRegs; computeAndAddLiveIns(LiveRegs, *loop1MBB); computeAndAddLiveIns(LiveRegs, *loop2MBB); computeAndAddLiveIns(LiveRegs, *sinkMBB); computeAndAddLiveIns(LiveRegs, *exitMBB); NMBBI = BB.end(); I->eraseFromParent(); return true; }
// hasFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. bool MipsRegisterInfo:: hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return NoFramePointerElim || MFI->hasVarSizedObjects(); }
/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the /// portion of the loop contiguous with the header. This usually makes the loop /// contiguous, provided that AnalyzeBranch can handle all the relevant /// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll /// for an example of this. bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, MachineLoop *L) { bool Changed = false; MachineBasicBlock *TopMBB = L->getTopBlock(); MachineBasicBlock *BotMBB = L->getBottomBlock(); // Determine a position to move orphaned loop blocks to. If TopMBB is not // entered via fallthrough and BotMBB is exited via fallthrough, prepend them // to the top of the loop to avoid loosing that fallthrough. Otherwise append // them to the bottom, even if it previously had a fallthrough, on the theory // that it's worth an extra branch to keep the loop contiguous. MachineFunction::iterator InsertPt = llvm::next(MachineFunction::iterator(BotMBB)); bool InsertAtTop = false; if (TopMBB != MF.begin() && !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) && HasFallthrough(BotMBB)) { InsertPt = TopMBB; InsertAtTop = true; } // Keep a record of which blocks are in the portion of the loop contiguous // with the loop header. SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks; for (MachineFunction::iterator I = TopMBB, E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I) ContiguousBlocks.insert(I); // Find non-contigous blocks and fix them. if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt))) for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end(); BI != BE; ++BI) { MachineBasicBlock *BB = *BI; // Verify that we can analyze all the loop entry edges before beginning // any changes which will require us to be able to analyze them. if (!HasAnalyzableTerminator(BB)) continue; if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB)))) continue; // If the layout predecessor is part of the loop, this block will be // processed along with it. This keeps them in their relative order. if (BB != MF.begin() && L->contains(prior(MachineFunction::iterator(BB)))) continue; // Check to see if this block is already contiguous with the main // portion of the loop. if (!ContiguousBlocks.insert(BB)) continue; // Move the block. DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber() << " to be contiguous with loop.\n"); Changed = true; // Process this block and all loop blocks contiguous with it, to keep // them in their relative order. MachineFunction::iterator Begin = BB; MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB)); for (; End != MF.end(); ++End) { if (!L->contains(End)) break; if (!HasAnalyzableTerminator(End)) break; ContiguousBlocks.insert(End); ++NumIntraMoved; } // If we're inserting at the bottom of the loop, and the code we're // moving originally had fall-through successors, bring the sucessors // up with the loop blocks to preserve the fall-through edges. if (!InsertAtTop) for (; End != MF.end(); ++End) { if (L->contains(End)) break; if (!HasAnalyzableTerminator(End)) break; if (!HasFallthrough(prior(End))) break; } // Move the blocks. This may invalidate TopMBB and/or BotMBB, but // we don't need them anymore at this point. Splice(MF, InsertPt, Begin, End); } return Changed; }
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo &TRI = TII->getRegisterInfo(); std::vector<unsigned> I1Defs; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. if (MI.getOpcode() == AMDGPU::S_MOV_B32) { const MachineOperand &Src = MI.getOperand(1); if (Src.isImm()) { if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4)) MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); } continue; } if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; if (!canShrink(MI, TII, TRI, MRI)) { // Try commuting the instruction and see if that enables us to shrink // it. if (!MI.isCommutable() || !TII->commuteInstruction(&MI) || !canShrink(MI, TII, TRI, MRI)) continue; } // getVOPe32 could be -1 here if we started with an instruction that had // a 32-bit encoding and then commuted it to an instruction that did not. if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); if (TII->isVOPC(Op32)) { unsigned DstReg = MI.getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { // VOPC instructions can only write to the VCC register. We can't // force them to use VCC here, because this is only one register and // cannot deal with sequences which would require multiple copies of // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...) // // So, instead of forcing the instruction to write to VCC, we provide // a hint to the register allocator to use VCC and then we we will run // this pass again after RA and shrink it if it outputs to VCC. MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC); continue; } if (DstReg != AMDGPU::VCC) continue; } if (Op32 == AMDGPU::V_CNDMASK_B32_e32) { // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC // instructions. const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); if (!Src2->isReg()) continue; unsigned SReg = Src2->getReg(); if (TargetRegisterInfo::isVirtualRegister(SReg)) { MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC); continue; } if (SReg != AMDGPU::VCC) continue; } // We can shrink this instruction DEBUG(dbgs() << "Shrinking " << MI); MachineInstrBuilder Inst32 = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32)); // Add the dst operand if the 32-bit encoding also has an explicit $dst. // For VOPC instructions, this is replaced by an implicit def of vcc. int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::dst); if (Op32DstIdx != -1) { // dst Inst32.addOperand(MI.getOperand(0)); } else { assert(MI.getOperand(0).getReg() == AMDGPU::VCC && "Unexpected case"); } Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (Src1) Inst32.addOperand(*Src1); const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); if (Src2) { int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2); if (Op32Src2Idx != -1) { Inst32.addOperand(*Src2); } else { // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is // replaced with an implicit read of vcc. assert(Src2->getReg() == AMDGPU::VCC && "Unexpected missing register operand"); Inst32.addOperand(copyRegOperandAsImplicit(*Src2)); } } ++NumInstructionsShrunk; MI.eraseFromParent(); foldImmediates(*Inst32, TII, MRI); DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); } } return false; }
// hasFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. bool MBlazeFrameInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); }
void PatmosFrameLowering::processFunctionBeforeCalleeSavedScan( MachineFunction& MF, RegScavenger* RS) const { const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); MachineFrameInfo &MFI = *MF.getFrameInfo(); PatmosMachineFunctionInfo &PMFI = *MF.getInfo<PatmosMachineFunctionInfo>(); // Insert instructions at the beginning of the entry block; // callee-saved-reg spills are inserted at front afterwards MachineBasicBlock &EntryMBB = MF.front(); DebugLoc DL; // Do not emit anything for naked functions if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) { return; } if (hasFP(MF)) { // if framepointer enabled, set it to point to the stack pointer. // Set frame pointer: FP = SP AddDefaultPred(BuildMI(EntryMBB, EntryMBB.begin(), DL, TII->get(Patmos::MOV), Patmos::RFP)).addReg(Patmos::RSP); // Mark RFP as used MRI.setPhysRegUsed(Patmos::RFP); } // load the current function base if it needs to be passed to call sites if (MFI.hasCalls()) { // If we have calls, we need to spill the call link registers MRI.setPhysRegUsed(Patmos::SRB); MRI.setPhysRegUsed(Patmos::SRO); } else { MRI.setPhysRegUnused(Patmos::SRB); MRI.setPhysRegUnused(Patmos::SRO); } // mark all predicate registers as used, for single path support // S0 is saved/restored as whole anyway if (PatmosSinglePathInfo::isEnabled(MF)) { MRI.setPhysRegUsed(Patmos::S0); MRI.setPhysRegUsed(Patmos::R26); } // If we need to spill S0, try to find an unused scratch register that we can // use instead. This only works if we do not have calls that may clobber // the register though. // It also makes no sense if we single-path convert the function, // because the SP converter introduces spill slots anyway. if (MRI.isPhysRegUsed(Patmos::S0) && !MF.getFrameInfo()->hasCalls() && !PatmosSinglePathInfo::isEnabled(MF)) { unsigned SpillReg = 0; BitVector Reserved = MRI.getReservedRegs(); BitVector CalleeSaved(TRI->getNumRegs()); const uint16_t *saved = TRI->getCalleeSavedRegs(&MF); while (*saved) { CalleeSaved.set(*saved++); } for (TargetRegisterClass::iterator i = Patmos::RRegsRegClass.begin(), e = Patmos::RRegsRegClass.end(); i != e; ++i) { if (MRI.isPhysRegUsed(*i) || *i == Patmos::R9) continue; if (Reserved[*i] || CalleeSaved[*i]) continue; SpillReg = *i; break; } if (SpillReg) { // Remember the register for the prologe-emitter and mark as used PMFI.setS0SpillReg(SpillReg); MRI.setPhysRegUsed(SpillReg); } } if (TRI->requiresRegisterScavenging(MF)) { const TargetRegisterClass *RC = &Patmos::RRegsRegClass; int fi = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), false); RS->addScavengingFrameIndex(fi); PMFI.setRegScavengingFI(fi); } }
bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const { return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects(); }
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo &TRI = TII->getRegisterInfo(); std::vector<unsigned> I1Defs; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. if (MI.getOpcode() == AMDGPU::S_MOV_B32) { const MachineOperand &Src = MI.getOperand(1); if (Src.isImm()) { if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4)) MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); } continue; } if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; if (!canShrink(MI, TII, TRI, MRI)) { // Try commuting the instruction and see if that enables us to shrink // it. if (!MI.isCommutable() || !TII->commuteInstruction(&MI) || !canShrink(MI, TII, TRI, MRI)) continue; } // getVOPe32 could be -1 here if we started with an instruction that had // a 32-bit encoding and then commuted it to an instruction that did not. if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); if (TII->isVOPC(Op32)) { unsigned DstReg = MI.getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { // VOPC instructions can only write to the VCC register. We can't // force them to use VCC here, because the register allocator has // trouble with sequences like this, which cause the allocator to run // out of registers if vreg0 and vreg1 belong to the VCCReg register // class: // vreg0 = VOPC; // vreg1 = VOPC; // S_AND_B64 vreg0, vreg1 // // So, instead of forcing the instruction to write to VCC, we provide // a hint to the register allocator to use VCC and then we we will run // this pass again after RA and shrink it if it outputs to VCC. MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC); continue; } if (DstReg != AMDGPU::VCC) continue; } if (Op32 == AMDGPU::V_CNDMASK_B32_e32) { // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC // instructions. const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); if (!Src2->isReg()) continue; unsigned SReg = Src2->getReg(); if (TargetRegisterInfo::isVirtualRegister(SReg)) { MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC); continue; } if (SReg != AMDGPU::VCC) continue; } // We can shrink this instruction DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';); MachineInstrBuilder Inst32 = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32)); // dst Inst32.addOperand(MI.getOperand(0)); Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (Src1) Inst32.addOperand(*Src1); const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); if (Src2) Inst32.addOperand(*Src2); ++NumInstructionsShrunk; MI.eraseFromParent(); foldImmediates(*Inst32, TII, MRI); DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); } }
bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo *MRI = &MF.getRegInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetMachine &TM = MF.getTarget(); LLVM_DEBUG(dbgs() << " -------------------- " << getPassName() << " -------------------- \n"); LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n"); std::vector<uint32_t> RegMask; // Compute the size of the bit vector to represent all the registers. // The bit vector is broken into 32-bit chunks, thus takes the ceil of // the number of registers divided by 32 for the size. unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); RegMask.resize(RegMaskSize, ~((uint32_t)0)); const Function &F = MF.getFunction(); PhysicalRegisterUsageInfo &PRUI = getAnalysis<PhysicalRegisterUsageInfo>(); PRUI.setTargetMachine(TM); LLVM_DEBUG(dbgs() << "Clobbered Registers: "); BitVector SavedRegs; computeCalleeSavedRegs(SavedRegs, MF); const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask(); auto SetRegAsDefined = [&RegMask] (unsigned Reg) { RegMask[Reg / 32] &= ~(1u << Reg % 32); }; // Scan all the physical registers. When a register is defined in the current // function set it and all the aliasing registers as defined in the regmask. for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { // Don't count registers that are saved and restored. if (SavedRegs.test(PReg)) continue; // If a register is defined by an instruction mark it as defined together // with all it's unsaved aliases. if (!MRI->def_empty(PReg)) { for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI) if (!SavedRegs.test(*AI)) SetRegAsDefined(*AI); continue; } // If a register is in the UsedPhysRegsMask set then mark it as defined. // All clobbered aliases will also be in the set, so we can skip setting // as defined all the aliases here. if (UsedPhysRegsMask.test(PReg)) SetRegAsDefined(PReg); } if (TargetFrameLowering::isSafeForNoCSROpt(F)) { ++NumCSROpt; LLVM_DEBUG(dbgs() << MF.getName() << " function optimized for not having CSR.\n"); } for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " "); LLVM_DEBUG(dbgs() << " \n----------------------------------------\n"); PRUI.storeUpdateRegUsageInfo(F, RegMask); return false; }
bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); TII = MF.getTarget().getInstrInfo(); DT = &getAnalysis<MachineDominatorTree>(); LI = &getAnalysis<LiveIntervals>(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { unsigned DestReg = BBI->getOperand(0).getReg(); addReg(DestReg); PHISrcDefs[I].push_back(BBI); for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { MachineOperand &SrcMO = BBI->getOperand(i); unsigned SrcReg = SrcMO.getReg(); addReg(SrcReg); unionRegs(DestReg, SrcReg); MachineInstr *DefMI = MRI->getVRegDef(SrcReg); if (DefMI) PHISrcDefs[DefMI->getParent()].push_back(DefMI); } } } // Perform a depth-first traversal of the dominator tree, splitting // interferences amongst PHI-congruence classes. DenseMap<unsigned, unsigned> CurrentDominatingParent; DenseMap<unsigned, unsigned> ImmediateDominatingParent; for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) { SplitInterferencesForBasicBlock(*DI->getBlock(), CurrentDominatingParent, ImmediateDominatingParent); } // Insert copies for all PHI source and destination registers. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { InsertCopiesForPHI(BBI, I); } } // FIXME: Preserve the equivalence classes during copy insertion and use // the preversed equivalence classes instead of recomputing them. RegNodeMap.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { unsigned DestReg = BBI->getOperand(0).getReg(); addReg(DestReg); for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { unsigned SrcReg = BBI->getOperand(i).getReg(); addReg(SrcReg); unionRegs(DestReg, SrcReg); } } } DenseMap<unsigned, unsigned> RegRenamingMap; bool Changed = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); while (BBI != BBE && BBI->isPHI()) { MachineInstr *PHI = BBI; assert(PHI->getNumOperands() > 0); unsigned SrcReg = PHI->getOperand(1).getReg(); unsigned SrcColor = getRegColor(SrcReg); unsigned NewReg = RegRenamingMap[SrcColor]; if (!NewReg) { NewReg = SrcReg; RegRenamingMap[SrcColor] = SrcReg; } MergeLIsAndRename(SrcReg, NewReg); unsigned DestReg = PHI->getOperand(0).getReg(); if (!InsertedDestCopies.count(DestReg)) MergeLIsAndRename(DestReg, NewReg); for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { unsigned SrcReg = PHI->getOperand(i).getReg(); MergeLIsAndRename(SrcReg, NewReg); } ++BBI; LI->RemoveMachineInstrFromMaps(PHI); PHI->eraseFromParent(); Changed = true; } } // Due to the insertion of copies to split live ranges, the live intervals are // guaranteed to not overlap, except in one case: an original PHI source and a // PHI destination copy. In this case, they have the same value and thus don't // truly intersect, so we merge them into the value live at that point. // FIXME: Is there some better way we can handle this? for (DestCopyMap::iterator I = InsertedDestCopies.begin(), E = InsertedDestCopies.end(); I != E; ++I) { unsigned DestReg = I->first; unsigned DestColor = getRegColor(DestReg); unsigned NewReg = RegRenamingMap[DestColor]; LiveInterval &DestLI = LI->getInterval(DestReg); LiveInterval &NewLI = LI->getInterval(NewReg); assert(DestLI.ranges.size() == 1 && "PHI destination copy's live interval should be a single live " "range from the beginning of the BB to the copy instruction."); LiveRange *DestLR = DestLI.begin(); VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); if (!NewVNI) { NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); MachineInstr *CopyInstr = I->second; CopyInstr->getOperand(1).setIsKill(true); } LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); NewLI.addRange(NewLR); LI->removeInterval(DestReg); MRI->replaceRegWith(DestReg, NewReg); } // Adjust the live intervals of all PHI source registers to handle the case // where the PHIs in successor blocks were the only later uses of the source // register. for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), E = InsertedSrcCopySet.end(); I != E; ++I) { MachineBasicBlock *MBB = I->first; unsigned SrcReg = I->second; if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) SrcReg = RenamedRegister; LiveInterval &SrcLI = LI->getInterval(SrcReg); bool isLiveOut = false; for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { isLiveOut = true; break; } } if (isLiveOut) continue; MachineOperand *LastUse = findLastUse(MBB, SrcReg); assert(LastUse); SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB)); LastUse->setIsKill(true); } LI->renumber(); Allocator.Reset(); RegNodeMap.clear(); PHISrcDefs.clear(); InsertedSrcCopySet.clear(); InsertedSrcCopyMap.clear(); InsertedDestCopies.clear(); return Changed; }
void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, SelectionDAG *DAG) { const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); Fn = &fn; MF = &mf; RegInfo = &MF->getRegInfo(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI); CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, Fn->isVarArg(), Outs, Fn->getContext()); // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. Function::const_iterator BB = Fn->begin(), EB = Fn->end(); for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { // Don't fold inalloca allocas or other dynamic allocas into the initial // stack frame allocation, even if they are in the entry block. if (!AI->isStaticAlloca()) continue; if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); unsigned Align = std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); } } for (; BB != EB; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { // Look for dynamic allocas. if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { if (!AI->isStaticAlloca()) { unsigned Align = std::max( (unsigned)TLI->getDataLayout()->getPrefTypeAlignment( AI->getAllocatedType()), AI->getAlignment()); unsigned StackAlign = TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) Align = 0; // Inform the Frame Information that we have variable-sized objects. MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI); } } // Look for inline asm that clobbers the SP register. if (isa<CallInst>(I) || isa<InvokeInst>(I)) { ImmutableCallSite CS(I); if (isa<InlineAsm>(CS.getCalledValue())) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); std::vector<TargetLowering::AsmOperandInfo> Ops = TLI->ParseConstraints(CS); for (size_t I = 0, E = Ops.size(); I != E; ++I) { TargetLowering::AsmOperandInfo &Op = Ops[I]; if (Op.Type == InlineAsm::isClobber) { // Clobbers don't have SDValue operands, hence SDValue(). TLI->ComputeConstraintToUse(Op, SDValue(), DAG); std::pair<unsigned, const TargetRegisterClass*> PhysReg = TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, Op.ConstraintVT); if (PhysReg.first == SP) MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true); } } } } // Mark values used outside their block as exported, by allocating // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I))) InitializeRegForValue(I); // Collect llvm.dbg.declare information. This is done now instead of // during the initial isel pass through the IR so that it is done // in a predictable order. if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { MachineModuleInfo &MMI = MF->getMMI(); DIVariable DIVar(DI->getVariable()); assert((!DIVar || DIVar.isVariable()) && "Variable in DbgDeclareInst should be either null or a DIVariable."); if (MMI.hasDebugInfo() && DIVar && !DI->getDebugLoc().isUnknown()) { // Don't handle byval struct arguments or VLAs, for example. // Non-byval arguments are handled here (they refer to the stack // temporary alloca at this point). const Value *Address = DI->getAddress(); if (Address) { if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { DenseMap<const AllocaInst *, int>::iterator SI = StaticAllocaMap.find(AI); if (SI != StaticAllocaMap.end()) { // Check for VLAs. int FI = SI->second; MMI.setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc()); } } } } } } // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. for (BB = Fn->begin(); BB != EB; ++BB) { MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); MBBMap[BB] = MBB; MF->push_back(MBB); // Transfer the address-taken flag. This is necessary because there could // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only // the first one should be marked. if (BB->hasAddressTaken()) MBB->setHasAddressTaken(); // Create Machine PHI nodes for LLVM PHI nodes, lowering them as // appropriate. for (BasicBlock::const_iterator I = BB->begin(); const PHINode *PN = dyn_cast<PHINode>(I); ++I) { if (PN->use_empty()) continue; // Skip empty types if (PN->getType()->isEmptyTy()) continue; DebugLoc DL = PN->getDebugLoc(); unsigned PHIReg = ValueMap[PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); PHIReg += NumRegisters; } } } // Mark landing pad blocks. for (BB = Fn->begin(); BB != EB; ++BB) if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); }
MachineBasicBlock * MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. if (Succ->isEHPad()) return nullptr; MachineFunction *MF = getParent(); DebugLoc DL; // FIXME: this is nowhere // Performance might be harmed on HW that implements branching using exec mask // where both sides of the branches are always executed. if (MF->getTarget().requiresStructuredCFG()) return nullptr; // We may need to update this's terminator, but we can't do that if // AnalyzeBranch fails. If this uses a jump table, we won't touch it. const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) return nullptr; // Avoid bugpoint weirdness: A block may end with a conditional branch but // jumps to the same MBB is either case. We have duplicate CFG edges in that // case that we can't handle. Since this never happens in properly optimized // code, just skip those edges. if (TBB && TBB == FBB) { DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" << getNumber() << '\n'); return nullptr; } MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(std::next(MachineFunction::iterator(this)), NMBB); DEBUG(dbgs() << "Splitting critical edge:" " BB#" << getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>(); SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>(); if (LIS) LIS->insertMBBInMaps(NMBB); else if (Indexes) Indexes->insertMBBInMaps(NMBB); // On some targets like Mips, branches may kill virtual registers. Make sure // that LiveVariables is properly updated after updateTerminator replaces the // terminators. LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>(); // Collect a list of virtual registers killed by the terminators. SmallVector<unsigned, 4> KilledRegs; if (LV) for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { MachineInstr *MI = &*I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { if (!OI->isReg() || OI->getReg() == 0 || !OI->isUse() || !OI->isKill() || OI->isUndef()) continue; unsigned Reg = OI->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg) || LV->getVarInfo(Reg).removeKill(MI)) { KilledRegs.push_back(Reg); DEBUG(dbgs() << "Removing terminator kill: " << *MI); OI->setIsKill(false); } } } SmallVector<unsigned, 4> UsedRegs; if (LIS) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { MachineInstr *MI = &*I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { if (!OI->isReg() || OI->getReg() == 0) continue; unsigned Reg = OI->getReg(); if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end()) UsedRegs.push_back(Reg); } } } ReplaceUsesOfBlockWith(Succ, NMBB); // If updateTerminator() removes instructions, we need to remove them from // SlotIndexes. SmallVector<MachineInstr*, 4> Terminators; if (Indexes) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) Terminators.push_back(&*I); } updateTerminator(); if (Indexes) { SmallVector<MachineInstr*, 4> NewTerminators; for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) NewTerminators.push_back(&*I); for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(), E = Terminators.end(); I != E; ++I) { if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) == NewTerminators.end()) Indexes->removeMachineInstrFromMaps(*I); } } // Insert unconditional "jump Succ" instruction in NMBB if necessary. NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL); if (Indexes) { for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); I != E; ++I) { // Some instructions may have been moved to NMBB by updateTerminator(), // so we first remove any instruction that already has an index. if (Indexes->hasIndex(&*I)) Indexes->removeMachineInstrFromMaps(&*I); Indexes->insertMachineInstrInMaps(&*I); } } } // Fix PHI nodes in Succ so they refer to NMBB instead of this for (MachineBasicBlock::instr_iterator i = Succ->instr_begin(),e = Succ->instr_end(); i != e && i->isPHI(); ++i) for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) if (i->getOperand(ni+1).getMBB() == this) i->getOperand(ni+1).setMBB(NMBB); // Inherit live-ins from the successor for (const auto &LI : Succ->liveins()) NMBB->addLiveIn(LI); // Update LiveVariables. const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); if (LV) { // Restore kills of virtual registers that were killed by the terminators. while (!KilledRegs.empty()) { unsigned Reg = KilledRegs.pop_back_val(); for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) LV->getVarInfo(Reg).Kills.push_back(&*I); DEBUG(dbgs() << "Restored terminator kill: " << *I); break; } } // Update relevant live-through information. LV->addNewBlock(NMBB, this, Succ); } if (LIS) { // After splitting the edge and updating SlotIndexes, live intervals may be // in one of two situations, depending on whether this block was the last in // the function. If the original block was the last in the function, all // live intervals will end prior to the beginning of the new split block. If // the original block was not at the end of the function, all live intervals // will extend to the end of the new split block. bool isLastMBB = std::next(MachineFunction::iterator(NMBB)) == getParent()->end(); SlotIndex StartIndex = Indexes->getMBBEndIdx(this); SlotIndex PrevIndex = StartIndex.getPrevSlot(); SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB); // Find the registers used from NMBB in PHIs in Succ. SmallSet<unsigned, 8> PHISrcRegs; for (MachineBasicBlock::instr_iterator I = Succ->instr_begin(), E = Succ->instr_end(); I != E && I->isPHI(); ++I) { for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { if (I->getOperand(ni+1).getMBB() == NMBB) { MachineOperand &MO = I->getOperand(ni); unsigned Reg = MO.getReg(); PHISrcRegs.insert(Reg); if (MO.isUndef()) continue; LiveInterval &LI = LIS->getInterval(Reg); VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "PHI sources should be live out of their predecessors."); LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } } } MachineRegisterInfo *MRI = &getParent()->getRegInfo(); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) continue; LiveInterval &LI = LIS->getInterval(Reg); if (!LI.liveAt(PrevIndex)) continue; bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ)); if (isLiveOut && isLastMBB) { VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "LiveInterval should have VNInfo where it is live."); LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } else if (!isLiveOut && !isLastMBB) { LI.removeSegment(StartIndex, EndIndex); } } // Update all intervals for registers whose uses may have been modified by // updateTerminator(). LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs); } if (MachineDominatorTree *MDT = P->getAnalysisIfAvailable<MachineDominatorTree>()) MDT->recordSplitCriticalEdge(this, Succ, NMBB); if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) if (MachineLoop *TIL = MLI->getLoopFor(this)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { if (TIL == DestLoop) { // Both in the same loop, the NMBB joins loop. DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == Succ && "Should not create irreducible loops!"); if (MachineLoop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NMBB, MLI->getBase()); } } } return NMBB; }
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { Instructions.clear(); Blocks.clear(); LiveMaskQueries.clear(); LowerToCopyInstrs.clear(); CallingConv = MF.getFunction().getCallingConv(); const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); LIS = &getAnalysis<LiveIntervals>(); char GlobalFlags = analyzeFunction(MF); unsigned LiveMaskReg = 0; if (!(GlobalFlags & StateWQM)) { lowerLiveMaskQueries(AMDGPU::EXEC); if (!(GlobalFlags & StateWWM)) return !LiveMaskQueries.empty(); } else { // Store a copy of the original live mask when required MachineBasicBlock &Entry = MF.front(); MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI(); if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) { LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); MachineInstr *MI = BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg) .addReg(AMDGPU::EXEC); LIS->InsertMachineInstrInMaps(*MI); } lowerLiveMaskQueries(LiveMaskReg); if (GlobalFlags == StateWQM) { // For a shader that needs only WQM, we can just set it once. BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC) .addReg(AMDGPU::EXEC); lowerCopyInstrs(); // EntryMI may become invalid here return true; } } LLVM_DEBUG(printInfo()); lowerCopyInstrs(); // Handle the general case for (auto BII : Blocks) processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin()); // Physical registers like SCC aren't tracked by default anyway, so just // removing the ranges we computed is the simplest option for maintaining // the analysis results. LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI)); return true; }
// Scan instructions to determine which ones require an Exact execmask and // which ones seed WQM requirements. char SIWholeQuadMode::scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist) { char GlobalFlags = 0; bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs"); SmallVector<MachineInstr *, 4> SetInactiveInstrs; // We need to visit the basic blocks in reverse post-order so that we visit // defs before uses, in particular so that we don't accidentally mark an // instruction as needing e.g. WQM before visiting it and realizing it needs // WQM disabled. ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); for (auto BI = RPOT.begin(), BE = RPOT.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = **BI; BlockInfo &BBI = Blocks[&MBB]; for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) { MachineInstr &MI = *II; InstrInfo &III = Instructions[&MI]; unsigned Opcode = MI.getOpcode(); char Flags = 0; if (TII->isWQM(Opcode)) { // Sampling instructions don't need to produce results for all pixels // in a quad, they just require all inputs of a quad to have been // computed for derivatives. markInstructionUses(MI, StateWQM, Worklist); GlobalFlags |= StateWQM; continue; } else if (Opcode == AMDGPU::WQM) { // The WQM intrinsic requires its output to have all the helper lanes // correct, so we need it to be in WQM. Flags = StateWQM; LowerToCopyInstrs.push_back(&MI); } else if (Opcode == AMDGPU::WWM) { // The WWM intrinsic doesn't make the same guarantee, and plus it needs // to be executed in WQM or Exact so that its copy doesn't clobber // inactive lanes. markInstructionUses(MI, StateWWM, Worklist); GlobalFlags |= StateWWM; LowerToCopyInstrs.push_back(&MI); continue; } else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 || Opcode == AMDGPU::V_SET_INACTIVE_B64) { III.Disabled = StateWWM; MachineOperand &Inactive = MI.getOperand(2); if (Inactive.isReg()) { if (Inactive.isUndef()) { LowerToCopyInstrs.push_back(&MI); } else { unsigned Reg = Inactive.getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { for (MachineInstr &DefMI : MRI->def_instructions(Reg)) markInstruction(DefMI, StateWWM, Worklist); } } } SetInactiveInstrs.push_back(&MI); continue; } else if (TII->isDisableWQM(MI)) { BBI.Needs |= StateExact; if (!(BBI.InNeeds & StateExact)) { BBI.InNeeds |= StateExact; Worklist.push_back(&MBB); } GlobalFlags |= StateExact; III.Disabled = StateWQM | StateWWM; continue; } else { if (Opcode == AMDGPU::SI_PS_LIVE) { LiveMaskQueries.push_back(&MI); } else if (WQMOutputs) { // The function is in machine SSA form, which means that physical // VGPRs correspond to shader inputs and outputs. Inputs are // only used, outputs are only defined. for (const MachineOperand &MO : MI.defs()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TRI->isVirtualRegister(Reg) && TRI->hasVGPRs(TRI->getPhysRegClass(Reg))) { Flags = StateWQM; break; } } } if (!Flags) continue; } markInstruction(MI, Flags, Worklist); GlobalFlags |= Flags; } } // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is // ever used anywhere in the function. This implements the corresponding // semantics of @llvm.amdgcn.set.inactive. if (GlobalFlags & StateWQM) { for (MachineInstr *MI : SetInactiveInstrs) markInstruction(*MI, StateWQM, Worklist); } return GlobalFlags; }
MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf, MipsAsmPrinter &asmprinter) : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {}
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); // Set the stack-pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); // Set the instruction pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RIP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); // Set the frame-pointer register and its aliases as reserved if needed. if (TFI->hasFP(MF)) { for (MCSubRegIterator I(X86::RBP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); } // Set the base-pointer register and its aliases as reserved if needed. if (hasBasePointer(MF)) { CallingConv::ID CC = MF.getFunction()->getCallingConv(); const uint32_t *RegMask = getCallPreservedMask(MF, CC); if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) report_fatal_error( "Stack realignment in presence of dynamic allocas is not supported with" "this calling convention."); unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), MVT::i64, false); for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); } // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); Reserved.set(X86::DS); Reserved.set(X86::ES); Reserved.set(X86::FS); Reserved.set(X86::GS); // Mark the floating point stack registers as reserved. for (unsigned n = 0; n != 8; ++n) Reserved.set(X86::ST0 + n); // Reserve the registers that only exist in 64-bit mode. if (!Is64Bit) { // These 8-bit registers are part of the x86-64 extension even though their // super-registers are old 32-bits. Reserved.set(X86::SIL); Reserved.set(X86::DIL); Reserved.set(X86::BPL); Reserved.set(X86::SPL); for (unsigned n = 0; n != 8; ++n) { // R8, R9, ... for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); // XMM8, XMM9, ... for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } } if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) { for (unsigned n = 16; n != 32; ++n) { for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } } // @LOCALMOD-START const X86Subtarget& Subtarget = MF.getSubtarget<X86Subtarget>(); const bool RestrictR15 = FlagRestrictR15; assert(FlagUseZeroBasedSandbox || RestrictR15); if (Subtarget.isTargetNaCl64()) { if (RestrictR15) { Reserved.set(X86::R15); Reserved.set(X86::R15D); Reserved.set(X86::R15W); Reserved.set(X86::R15B); } Reserved.set(X86::RBP); Reserved.set(X86::EBP); Reserved.set(X86::BP); Reserved.set(X86::BPL); const bool RestrictR11 = FlagHideSandboxBase && !FlagUseZeroBasedSandbox; if (RestrictR11) { // Restrict r11 so that it can be used for indirect jump // sequences that don't leak the sandbox base address onto the // stack. Reserved.set(X86::R11); Reserved.set(X86::R11D); Reserved.set(X86::R11W); Reserved.set(X86::R11B); } } // @LOCALMOD-END return Reserved; }
unsigned PatmosFrameLowering::assignFrameObjects(MachineFunction &MF, bool UseStackCache) const { MachineFrameInfo &MFI = *MF.getFrameInfo(); PatmosMachineFunctionInfo &PMFI = *MF.getInfo<PatmosMachineFunctionInfo>(); unsigned maxFrameSize = MFI.getMaxCallFrameSize(); // defaults to false (all objects are assigned to shadow stack) BitVector SCFIs(MFI.getObjectIndexEnd()); if (UseStackCache) { assignFIsToStackCache(MF, SCFIs); } // assign new offsets to FIs // next stack slot in stack cache unsigned int SCOffset = 0; // next stack slot in shadow stack // Also reserve space for the call frame if we do not use a frame pointer. // This must be in sync with PatmosRegisterInfo::eliminateCallFramePseudoInstr unsigned int SSOffset = (hasFP(MF) ? 0 : maxFrameSize); DEBUG(dbgs() << "PatmosSC: " << MF.getFunction()->getName() << "\n"); DEBUG(MFI.print(MF, dbgs())); for(unsigned FI = 0, FIe = MFI.getObjectIndexEnd(); FI != FIe; FI++) { if (MFI.isDeadObjectIndex(FI)) continue; unsigned FIalignment = MFI.getObjectAlignment(FI); int64_t FIsize = MFI.getObjectSize(FI); if (FIsize > INT_MAX) { report_fatal_error("Frame objects with size > INT_MAX not supported."); } // be sure to catch some special stack objects not expected for Patmos assert(!MFI.isFixedObjectIndex(FI) && !MFI.isObjectPreAllocated(FI)); // assigned to stack cache or shadow stack? if (SCFIs[FI]) { // alignment unsigned int next_SCOffset = align(SCOffset, FIalignment); // check if the FI still fits into the SC if (align(next_SCOffset + FIsize, getEffectiveStackCacheBlockSize()) <= getEffectiveStackCacheSize()) { DEBUG(dbgs() << "PatmosSC: FI: " << FI << " on SC: " << next_SCOffset << "(" << MFI.getObjectOffset(FI) << ")\n"); // reassign stack offset MFI.setObjectOffset(FI, next_SCOffset); // reserve space on the stack cache SCOffset = next_SCOffset + FIsize; // the FI is assigned to the SC, process next FI continue; } else { // the FI did not fit in the SC -- fall-through and put it on the // shadow stack SCFIs[FI] = false; FIsNotFitSC++; } } // assign the FI to the shadow stack { assert(!SCFIs[FI]); // alignment SSOffset = align(SSOffset, FIalignment); DEBUG(dbgs() << "PatmosSC: FI: " << FI << " on SS: " << SSOffset << "(" << MFI.getObjectOffset(FI) << ")\n"); // reassign stack offset MFI.setObjectOffset(FI, SSOffset); // reserve space on the shadow stack SSOffset += FIsize; } } // align stack frame on stack cache unsigned stackCacheSize = align(SCOffset, getEffectiveStackCacheBlockSize()); assert(stackCacheSize <= getEffectiveStackCacheSize()); // align shadow stack. call arguments are already included in SSOffset unsigned stackSize = align(SSOffset, getStackAlignment()); // update offset of fixed objects for(unsigned FI = MFI.getObjectIndexBegin(), FIe = 0; FI != FIe; FI++) { // reassign stack offset MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) + stackSize); } DEBUG(MFI.print(MF, dbgs())); // store assignment information PMFI.setStackCacheReservedBytes(stackCacheSize); PMFI.setStackCacheFIs(SCFIs); PMFI.setStackReservedBytes(stackSize); MFI.setStackSize(stackSize); return stackSize; }
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); return TFI->hasFP(MF) ? FramePtr : StackPtr; }
bool PatmosFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); }
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); bool FP = hasFP(MF); // Work out frame sizes. int FrameSize = MFI->getStackSize(); assert(FrameSize%4 == 0 && "Misaligned frame size"); FrameSize/=4; bool isU6 = isImmU6(FrameSize); if (!isU6 && !isImmU16(FrameSize)) { // FIXME could emit multiple instructions. cerr << "emitPrologue Frame size too big: " << FrameSize << "\n"; abort(); } bool emitFrameMoves = needsFrameMoves(MF); // Do we need to allocate space on the stack? if (FrameSize) { bool saveLR = XFI->getUsesLR(); bool LRSavedOnEntry = false; int Opcode; if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) { Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6; MBB.addLiveIn(XCore::LR); saveLR = false; LRSavedOnEntry = true; } else { Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; } BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); if (emitFrameMoves) { std::vector<MachineMove> &Moves = MMI->getFrameMoves(); // Show update of SP. unsigned FrameLabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId); MachineLocation SPDst(MachineLocation::VirtualFP); MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4); Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); if (LRSavedOnEntry) { MachineLocation CSDst(MachineLocation::VirtualFP, 0); MachineLocation CSSrc(XCore::LR); Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); } } if (saveLR) { int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl); MBB.addLiveIn(XCore::LR); if (emitFrameMoves) { unsigned SaveLRLabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveLRLabelId); MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset); MachineLocation CSSrc(XCore::LR); MMI->getFrameMoves().push_back(MachineMove(SaveLRLabelId, CSDst, CSSrc)); } } } if (FP) { // Save R10 to the stack. int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl); // R10 is live-in. It is killed at the spill. MBB.addLiveIn(XCore::R10); if (emitFrameMoves) { unsigned SaveR10LabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveR10LabelId); MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset); MachineLocation CSSrc(XCore::R10); MMI->getFrameMoves().push_back(MachineMove(SaveR10LabelId, CSDst, CSSrc)); } // Set the FP from the SP. unsigned FramePtr = XCore::R10; BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr) .addImm(0); if (emitFrameMoves) { // Show FP is now valid. unsigned FrameLabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId); MachineLocation SPDst(FramePtr); MachineLocation SPSrc(MachineLocation::VirtualFP); MMI->getFrameMoves().push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); } } if (emitFrameMoves) { // Frame moves for callee saved. std::vector<MachineMove> &Moves = MMI->getFrameMoves(); std::vector<std::pair<unsigned, CalleeSavedInfo> >&SpillLabels = XFI->getSpillLabels(); for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) { unsigned SpillLabel = SpillLabels[I].first; CalleeSavedInfo &CSI = SpillLabels[I].second; int Offset = MFI->getObjectOffset(CSI.getFrameIdx()); unsigned Reg = CSI.getReg(); MachineLocation CSDst(MachineLocation::VirtualFP, Offset); MachineLocation CSSrc(Reg); Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc)); } } }
bool MSP430RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); }
bool IRTranslator::runOnMachineFunction(MachineFunction &MF) { const Function &F = *MF.getFunction(); if (F.empty()) return false; CLI = MF.getSubtarget().getCallLowering(); MIRBuilder.setMF(MF); EntryBuilder.setMF(MF); MRI = &MF.getRegInfo(); DL = &F.getParent()->getDataLayout(); TPC = &getAnalysis<TargetPassConfig>(); assert(PendingPHIs.empty() && "stale PHIs"); // Setup the arguments. MachineBasicBlock &MBB = getOrCreateBB(F.front()); MIRBuilder.setMBB(MBB); SmallVector<unsigned, 8> VRegArgs; for (const Argument &Arg: F.args()) VRegArgs.push_back(getOrCreateVReg(Arg)); bool Succeeded = CLI->lowerFormalArguments(MIRBuilder, F, VRegArgs); if (!Succeeded) { if (!TPC->isGlobalISelAbortEnabled()) { MIRBuilder.getMF().getProperties().set( MachineFunctionProperties::Property::FailedISel); return false; } report_fatal_error("Unable to lower arguments"); } // Now that we've got the ABI handling code, it's safe to set a location for // any Constants we find in the IR. if (MBB.empty()) EntryBuilder.setMBB(MBB); else EntryBuilder.setInstr(MBB.back(), /* Before */ false); for (const BasicBlock &BB: F) { MachineBasicBlock &MBB = getOrCreateBB(BB); // Set the insertion point of all the following translations to // the end of this basic block. MIRBuilder.setMBB(MBB); for (const Instruction &Inst: BB) { bool Succeeded = translate(Inst); if (!Succeeded) { if (TPC->isGlobalISelAbortEnabled()) reportTranslationError(Inst, "unable to translate instruction"); MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); break; } } } finalizeFunction(); // Now that the MachineFrameInfo has been configured, no further changes to // the reserved registers are possible. MRI->freezeReservedRegs(MF); return false; }
void MCS51FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); MCS51MachineFunctionInfo *MCS51FI = MF.getInfo<MCS51MachineFunctionInfo>(); const MCS51InstrInfo &TII = *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); switch (RetOpcode) { case MCS51::RET: case MCS51::RETI: break; // These are ok default: llvm_unreachable("Can only insert epilog into returning blocks"); } // Get the number of bytes to allocate from the FrameInfo uint64_t StackSize = MFI->getStackSize(); unsigned CSSize = MCS51FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - CSSize; // pop FPW. BuildMI(MBB, MBBI, DL, TII.get(MCS51::POP16r), MCS51::FPW); } else NumBytes = StackSize - CSSize; // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); if (Opc != MCS51::POP16r && !PI->isTerminator()) break; --MBBI; } DL = MBBI->getDebugLoc(); // If there is an ADD16ri or SUB16ri of SPW immediately before this // instruction, merge the two instructions. //if (NumBytes || MFI->hasVarSizedObjects()) // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); if (MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, DL, TII.get(MCS51::MOV16rr), MCS51::SPW).addReg(MCS51::FPW); if (CSSize) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MCS51::SUB16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(CSSize); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } else { // adjust stack pointer back: SPW += numbytes if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MCS51::ADD16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8; bool HasGP = MipsFI->needGPSaveRestore(); // Min and Max CSI FrameIndex. int MinCSFI = -1, MaxCSFI = -1; // See the description at MipsMachineFunction.h int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1; // Replace the dummy '0' SPOffset by the negative offsets, as explained on // LowerFormalArguments. Leaving '0' for while is necessary to avoid // the approach done by calculateFrameObjectOffsets to the stack frame. MipsFI->adjustLoadArgsFI(MFI); MipsFI->adjustStoreVarArgsFI(MFI); // It happens that the default stack frame allocation order does not directly // map to the convention used for mips. So we must fix it. We move the callee // save register slots after the local variables area, as described in the // stack frame above. unsigned CalleeSavedAreaSize = 0; if (!CSI.empty()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size()-1].getFrameIdx(); } for (unsigned i = 0, e = CSI.size(); i != e; ++i) CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx()); unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize) : (Subtarget.isABI_O32() ? 16 : 0); // Adjust local variables. They should come on the stack right // after the arguments. int LastOffsetFI = -1; for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (i >= MinCSFI && i <= MaxCSFI) continue; if (MFI->isDeadObjectIndex(i)) continue; unsigned Offset = StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize; if (LastOffsetFI == -1) LastOffsetFI = i; if (Offset > MFI->getObjectOffset(LastOffsetFI)) LastOffsetFI = i; MFI->setObjectOffset(i, Offset); } // Adjust CPU Callee Saved Registers Area. Registers RA and FP must // be saved in this CPU Area. This whole area must be aligned to the // default Stack Alignment requirements. if (LastOffsetFI >= 0) StackOffset = MFI->getObjectOffset(LastOffsetFI)+ MFI->getObjectSize(LastOffsetFI); StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); for (unsigned i = 0, e = CSI.size(); i != e ; ++i) { if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass) break; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopCPUSavedRegOff = StackOffset; StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); } // Stack locations for FP and RA. If only one of them is used, // the space must be allocated for both, otherwise no space at all. if (hasFP(MF) || MFI->hasCalls()) { // FP stack location MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setFPStackOffset(StackOffset); TopCPUSavedRegOff = StackOffset; StackOffset += RegSize; // SP stack location MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setRAStackOffset(StackOffset); StackOffset += RegSize; if (MFI->hasCalls()) TopCPUSavedRegOff += RegSize; } StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); // Adjust FPU Callee Saved Registers Area. This Area must be // aligned to the default Stack Alignment requirements. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass) continue; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopFPUSavedRegOff = StackOffset; StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); } StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); // Update frame info MFI->setStackSize(StackOffset); // Recalculate the final tops offset. The final values must be '0' // if there isn't a callee saved register for CPU or FPU, otherwise // a negative offset is needed. if (TopCPUSavedRegOff >= 0) MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset); if (TopFPUSavedRegOff >= 0) MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset); }
void MCS51FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const MCS51InstrInfo &TII = *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned StackAlign = getStackAlignment(); if (!hasReservedCallFrame(MF)) { // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub SPW, <amt>' and the // adjcallstackdown instruction into 'add SPW, <amt>' // TODO: consider using push / pop instead of sub + store / add MachineInstr *Old = I; uint64_t Amount = Old->getOperand(0).getImm(); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; MachineInstr *New = 0; if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), TII.get(MCS51::SUB16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(Amount); } else { assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode()); // factor out the amount the callee already popped. uint64_t CalleeAmt = Old->getOperand(1).getImm(); Amount -= CalleeAmt; if (Amount) New = BuildMI(MF, Old->getDebugLoc(), TII.get(MCS51::ADD16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(Amount); } if (New) { // The SRW implicit def is dead. New->getOperand(3).setIsDead(); // Replace the pseudo instruction with a new instruction... MBB.insert(I, New); } } } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { MachineInstr *Old = I; MachineInstr *New = BuildMI(MF, Old->getDebugLoc(), TII.get(MCS51::SUB16ri), MCS51::SPW).addReg(MCS51::SPW).addImm(CalleeAmt); // The SRW implicit def is dead. New->getOperand(3).setIsDead(); MBB.insert(I, New); } } MBB.erase(I); }
/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump /// to the loop top to the top of the loop so that they have a fall through. /// This can introduce a branch on entry to the loop, but it can eliminate a /// branch within the loop. See the @simple case in /// test/CodeGen/X86/loop_blocks.ll for an example of this. bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, MachineLoop *L) { bool Changed = false; MachineBasicBlock *TopMBB = L->getTopBlock(); bool BotHasFallthrough = HasFallthrough(L->getBottomBlock()); if (TopMBB == MF.begin() || HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) { new_top: for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(), PE = TopMBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *Pred = *PI; if (Pred == TopMBB) continue; if (HasFallthrough(Pred)) continue; if (!L->contains(Pred)) continue; // Verify that we can analyze all the loop entry edges before beginning // any changes which will require us to be able to analyze them. if (Pred == MF.begin()) continue; if (!HasAnalyzableTerminator(Pred)) continue; if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred)))) continue; // Move the block. DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber() << " to top of loop.\n"); Changed = true; // Move it and all the blocks that can reach it via fallthrough edges // exclusively, to keep existing fallthrough edges intact. MachineFunction::iterator Begin = Pred; MachineFunction::iterator End = llvm::next(Begin); while (Begin != MF.begin()) { MachineFunction::iterator Prior = prior(Begin); if (Prior == MF.begin()) break; // Stop when a non-fallthrough edge is found. if (!HasFallthrough(Prior)) break; // Stop if a block which could fall-through out of the loop is found. if (Prior->isSuccessor(End)) break; // If we've reached the top, stop scanning. if (Prior == MachineFunction::iterator(TopMBB)) { // We know top currently has a fall through (because we just checked // it) which would be lost if we do the transformation, so it isn't // worthwhile to do the transformation unless it would expose a new // fallthrough edge. if (!Prior->isSuccessor(End)) goto next_pred; // Otherwise we can stop scanning and procede to move the blocks. break; } // If we hit a switch or something complicated, don't move anything // for this predecessor. if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior)))) break; // Ok, the block prior to Begin will be moved along with the rest. // Extend the range to include it. Begin = Prior; ++NumIntraMoved; } // Move the blocks. Splice(MF, TopMBB, Begin, End); // Update TopMBB. TopMBB = L->getTopBlock(); // We have a new loop top. Iterate on it. We shouldn't have to do this // too many times if BranchFolding has done a reasonable job. goto new_top; next_pred:; } } // If the loop previously didn't exit with a fall-through and it now does, // we eliminated a branch. if (Changed && !BotHasFallthrough && HasFallthrough(L->getBottomBlock())) { ++NumIntraElim; } return Changed; }
void MCS51FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineFrameInfo *MFI = MF.getFrameInfo(); MCS51MachineFunctionInfo *MCS51FI = MF.getInfo<MCS51MachineFunctionInfo>(); const MCS51InstrInfo &TII = *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - MCS51FI->getCalleeSavedFrameSize(); // Get the offset of the stack slot for the EBP register... which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. MFI->setOffsetAdjustment(-NumBytes); // Save FPW into the appropriate stack slot... BuildMI(MBB, MBBI, DL, TII.get(MCS51::PUSH16r)) .addReg(MCS51::FPW, RegState::Kill); // Update FPW with the new base value... BuildMI(MBB, MBBI, DL, TII.get(MCS51::MOV16rr), MCS51::FPW) .addReg(MCS51::SPW); // Mark the FramePtr as live-in in every block except the entry. for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(MCS51::FPW); } else NumBytes = StackSize - MCS51FI->getCalleeSavedFrameSize(); // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == MCS51::PUSH16r)) ++MBBI; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); if (NumBytes) { // adjust stack pointer: SPW -= numbytes // If there is an SUB16ri of SPW immediately before this instruction, merge // the two. //NumBytes -= mergeSPUpdates(MBB, MBBI, true); // If there is an ADD16ri or SUB16ri of SPW immediately after this // instruction, merge the two instructions. // mergeSPUpdatesDown(MBB, MBBI, &NumBytes); if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MCS51::SUB16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" << "********** Function: " << MF.getName() << '\n'); TII = MF.getSubtarget().getInstrInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); bool Changed = false; // Visit blocks in dominator tree pre-order. The pre-order enables multiple // cmp-conversions from the same head block. // Note that updateDomTree() modifies the children of the DomTree node // currently being visited. The df_iterator supports that; it doesn't look at // child_begin() / child_end() until after a node has been visited. for (MachineDomTreeNode *I : depth_first(DomTree)) { MachineBasicBlock *HBB = I->getBlock(); SmallVector<MachineOperand, 4> HeadCond; MachineBasicBlock *TBB = nullptr, *FBB = nullptr; if (TII->AnalyzeBranch(*HBB, TBB, FBB, HeadCond)) { continue; } // Equivalence check is to skip loops. if (!TBB || TBB == HBB) { continue; } SmallVector<MachineOperand, 4> TrueCond; MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr; if (TII->AnalyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) { continue; } MachineInstr *HeadCmpMI = findSuitableCompare(HBB); if (!HeadCmpMI) { continue; } MachineInstr *TrueCmpMI = findSuitableCompare(TBB); if (!TrueCmpMI) { continue; } AArch64CC::CondCode HeadCmp; if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) { continue; } AArch64CC::CondCode TrueCmp; if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) { continue; } const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm(); const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm(); DEBUG(dbgs() << "Head branch:\n"); DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp) << '\n'); DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n'); DEBUG(dbgs() << "True branch:\n"); DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(TrueCmp) << '\n'); DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n'); if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) || (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) && std::abs(TrueImm - HeadImm) == 2) { // This branch transforms machine instructions that correspond to // // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...) // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...) // // into // // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...) // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...) CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp); CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp); if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) && std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) { modifyCmp(HeadCmpMI, HeadCmpInfo); modifyCmp(TrueCmpMI, TrueCmpInfo); Changed = true; } } else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) || (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) && std::abs(TrueImm - HeadImm) == 1) { // This branch transforms machine instructions that correspond to // // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...) // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...) // // into // // 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...) // 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...) // GT -> GE transformation increases immediate value, so picking the // smaller one; LT -> LE decreases immediate value so invert the choice. bool adjustHeadCond = (HeadImm < TrueImm); if (HeadCmp == AArch64CC::LT) { adjustHeadCond = !adjustHeadCond; } if (adjustHeadCond) { Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm); } else { Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm); } } // Other transformation cases almost never occur due to generation of < or > // comparisons instead of <= and >=. } return Changed; }
bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &NMBBI, unsigned Size) { MachineFunction *MF = BB.getParent(); const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); DebugLoc DL = I->getDebugLoc(); unsigned LL, SC, ZERO, BEQ; if (Size == 4) { if (STI->inMicroMipsMode()) { LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM; SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM; BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM; } else { LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) : (ArePtrs64bit ? Mips::LL64 : Mips::LL); SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) : (ArePtrs64bit ? Mips::SC64 : Mips::SC); BEQ = Mips::BEQ; } ZERO = Mips::ZERO; } else { LL = STI->hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; ZERO = Mips::ZERO_64; BEQ = Mips::BEQ64; } unsigned OldVal = I->getOperand(0).getReg(); unsigned Ptr = I->getOperand(1).getReg(); unsigned Incr = I->getOperand(2).getReg(); unsigned Scratch = I->getOperand(3).getReg(); unsigned Opcode = 0; unsigned OR = 0; unsigned AND = 0; unsigned NOR = 0; bool IsNand = false; switch (I->getOpcode()) { case Mips::ATOMIC_LOAD_ADD_I32_POSTRA: Opcode = Mips::ADDu; break; case Mips::ATOMIC_LOAD_SUB_I32_POSTRA: Opcode = Mips::SUBu; break; case Mips::ATOMIC_LOAD_AND_I32_POSTRA: Opcode = Mips::AND; break; case Mips::ATOMIC_LOAD_OR_I32_POSTRA: Opcode = Mips::OR; break; case Mips::ATOMIC_LOAD_XOR_I32_POSTRA: Opcode = Mips::XOR; break; case Mips::ATOMIC_LOAD_NAND_I32_POSTRA: IsNand = true; AND = Mips::AND; NOR = Mips::NOR; break; case Mips::ATOMIC_SWAP_I32_POSTRA: OR = Mips::OR; break; case Mips::ATOMIC_LOAD_ADD_I64_POSTRA: Opcode = Mips::DADDu; break; case Mips::ATOMIC_LOAD_SUB_I64_POSTRA: Opcode = Mips::DSUBu; break; case Mips::ATOMIC_LOAD_AND_I64_POSTRA: Opcode = Mips::AND64; break; case Mips::ATOMIC_LOAD_OR_I64_POSTRA: Opcode = Mips::OR64; break; case Mips::ATOMIC_LOAD_XOR_I64_POSTRA: Opcode = Mips::XOR64; break; case Mips::ATOMIC_LOAD_NAND_I64_POSTRA: IsNand = true; AND = Mips::AND64; NOR = Mips::NOR64; break; case Mips::ATOMIC_SWAP_I64_POSTRA: OR = Mips::OR64; break; default: llvm_unreachable("Unknown pseudo atomic!"); } const BasicBlock *LLVM_BB = BB.getBasicBlock(); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB.getIterator(); MF->insert(It, loopMBB); MF->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); exitMBB->transferSuccessorsAndUpdatePHIs(&BB); BB.addSuccessor(loopMBB, BranchProbability::getOne()); loopMBB->addSuccessor(exitMBB); loopMBB->addSuccessor(loopMBB); loopMBB->normalizeSuccProbs(); BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!"); assert((OldVal != Incr) && "Clobbered the wrong reg!"); if (Opcode) { BuildMI(loopMBB, DL, TII->get(Opcode), Scratch).addReg(OldVal).addReg(Incr); } else if (IsNand) { assert(AND && NOR && "Unknown nand instruction for atomic pseudo expansion"); BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr); BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch); } else { assert(OR && "Unknown instruction for atomic pseudo expansion!"); BuildMI(loopMBB, DL, TII->get(OR), Scratch).addReg(Incr).addReg(ZERO); } BuildMI(loopMBB, DL, TII->get(SC), Scratch).addReg(Scratch).addReg(Ptr).addImm(0); BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Scratch).addReg(ZERO).addMBB(loopMBB); NMBBI = BB.end(); I->eraseFromParent(); LivePhysRegs LiveRegs; computeAndAddLiveIns(LiveRegs, *loopMBB); computeAndAddLiveIns(LiveRegs, *exitMBB); return true; }