void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (NumBytes && !canUseRedZone(MF)) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else if (NumBytes) { ++NumRedZoneFunctions; } return; } // Only set up FP if we actually need to. int FPOffset = 0; if (HasFP) // Frame pointer is fp = sp - 16. FPOffset = AFI->getCalleeSavedStackSize() - 16; // Move past the saves of the callee-saved registers. MachineBasicBlock::iterator End = MBB.end(); while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) ++MBBI; NumBytes -= AFI->getCalleeSavedStackSize(); assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (HasFP) { // Issue sub fp, sp, FPOffset or // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, MachineInstr::FrameSetup); } // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. const unsigned Alignment = MFI->getMaxAlignment(); const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NumBytes && NeedsRealignment) { scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); assert(scratchSPReg != AArch64::NoRegister); } // If we're a leaf function, try using the red zone. if (NumBytes && !canUseRedZone(MF)) // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); if (NumBytes && NeedsRealignment) { const unsigned NrBitsToZero = countTrailingZeros(Alignment); assert(NrBitsToZero > 1); assert(scratchSPReg != AArch64::SP); // SUB X9, SP, NumBytes // -- X9 is temporary register, so shouldn't contain any live data here, // -- free to use. This is already produced by emitFrameOffset above. // AND SP, X9, 0b11111...0000 // The logical immediates have a non-trivial encoding. The following // formula computes the encoded immediate with all ones but // NrBitsToZero zero bits as least significant bits. uint32_t andMaskEncoded = (1 <<12) // = N | ((64-NrBitsToZero) << 6) // immr | ((64-NrBitsToZero-1) << 0) // imms ; BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) .addReg(scratchSPReg, RegState::Kill) .addImm(andMaskEncoded); } // If we need a base pointer, set it up here. It's whatever the value of the // stack pointer is at this point. Any variable size objects will be allocated // after this, so we can still use the base pointer to reference locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. if (RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); } if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); const int StackGrowth = -TD.getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 if (HasFP) { // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } // Now emit the moves for whatever callee saved regs we have (including FP, // LR if those are saved). emitCalleeSavedFrameMoves(MBB, MBBI); } }
/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, /// under the assuption that it needs to be lowered in a way that supports /// atomic execution of PHIs. This lowering method is always correct all of the /// time. /// void PHIElimination::LowerAtomicPHINode( MachineBasicBlock &MBB, MachineBasicBlock::iterator AfterPHIsIt) { ++NumAtomic; // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; unsigned DestReg = MPhi->getOperand(0).getReg(); assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs"); bool isDead = MPhi->getOperand(0).isDead(); // Create a new register for the incoming PHI arguments. MachineFunction &MF = *MBB.getParent(); unsigned IncomingReg = 0; bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? // Insert a register to register copy at the top of the current block (but // after any remaining phi nodes) which copies the new incoming register // into the phi node destination. const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); if (isSourceDefinedByImplicitDef(MPhi, MRI)) // If all sources of a PHI node are implicit_def, just emit an // implicit_def instead of a copy. BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); else { // Can we reuse an earlier PHI node? This only happens for critical edges, // typically those created by tail duplication. unsigned &entry = LoweredPHIs[MPhi]; if (entry) { // An identical PHI node was already lowered. Reuse the incoming register. IncomingReg = entry; reusedIncoming = true; ++NumReused; DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi); } else { const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), TII->get(TargetOpcode::COPY), DestReg) .addReg(IncomingReg); } // Update live variable information if there is any. LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>(); if (LV) { MachineInstr *PHICopy = prior(AfterPHIsIt); if (IncomingReg) { LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); // Increment use count of the newly created virtual register. VI.NumUses++; LV->setPHIJoin(IncomingReg); // When we are reusing the incoming register, it may already have been // killed in this block. The old kill will also have been inserted at // AfterPHIsIt, so it appears before the current PHICopy. if (reusedIncoming) if (MachineInstr *OldKill = VI.findKill(&MBB)) { DEBUG(dbgs() << "Remove old kill from " << *OldKill); LV->removeVirtualRegisterKilled(IncomingReg, OldKill); DEBUG(MBB.dump()); } // Add information to LiveVariables to know that the incoming value is // killed. Note that because the value is defined in several places (once // each for each incoming block), the "def" block and instruction fields // for the VarInfo is not filled in. LV->addVirtualRegisterKilled(IncomingReg, PHICopy); } // Since we are going to be deleting the PHI node, if it is the last use of // any registers, or if the value itself is dead, we need to move this // information over to the new copy we just inserted. LV->removeVirtualRegistersKilled(MPhi); // If the result is dead, update LV. if (isDead) { LV->addVirtualRegisterDead(DestReg, PHICopy); LV->removeVirtualRegisterDead(DestReg, MPhi); } } // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), MPhi->getOperand(i).getReg())]; // Now loop over all of the incoming arguments, changing them to copy into the // IncomingReg register in the corresponding predecessor basic block. SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; for (int i = NumSrcs - 1; i >= 0; --i) { unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); // Get the MachineBasicBlock equivalent of the BasicBlock that is the source // path the PHI. MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); // If source is defined by an implicit def, there is no need to insert a // copy. MachineInstr *DefMI = MRI->getVRegDef(SrcReg); if (DefMI->isImplicitDef()) { ImpDefs.insert(DefMI); continue; } // Check to make sure we haven't already emitted the copy for this block. // This can happen because PHI nodes may have multiple entries for the same // basic block. if (!MBBsInsertedInto.insert(&opBlock)) continue; // If the copy has already been emitted, we're done. // Find a safe location to insert the copy, this may be the first terminator // in the block (or end()). MachineBasicBlock::iterator InsertPos = findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. if (!reusedIncoming && IncomingReg) BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg); // Now update live variable information if we have it. Otherwise we're done if (!LV) continue; // We want to be able to insert a kill of the register if this PHI (aka, the // copy we just inserted) is the last use of the source value. Live // variable analysis conservatively handles this by saying that the value is // live until the end of the block the PHI entry lives in. If the value // really is dead at the PHI copy, there will be no successor blocks which // have the value live-in. // Also check to see if this register is in use by another PHI node which // has not yet been eliminated. If so, it will be killed at an appropriate // point later. // Is it used by any PHI instructions in this block? bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]; // Okay, if we now know that the value is not live out of the block, we can // add a kill marker in this block saying that it kills the incoming value! if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) { // In our final twist, we have to decide which instruction kills the // register. In most cases this is the copy, however, the first // terminator instruction at the end of the block may also use the value. // In this case, we should mark *it* as being the killing block, not the // copy. MachineBasicBlock::iterator KillInst; MachineBasicBlock::iterator Term = opBlock.getFirstTerminator(); if (Term != opBlock.end() && Term->readsRegister(SrcReg)) { KillInst = Term; // Check that no other terminators use values. #ifndef NDEBUG for (MachineBasicBlock::iterator TI = llvm::next(Term); TI != opBlock.end(); ++TI) { if (TI->isDebugValue()) continue; assert(!TI->readsRegister(SrcReg) && "Terminator instructions cannot use virtual registers unless" "they are the first terminator in a block!"); } #endif } else if (reusedIncoming || !IncomingReg) { // We may have to rewind a bit if we didn't insert a copy this time. KillInst = Term; while (KillInst != opBlock.begin()) { --KillInst; if (KillInst->isDebugValue()) continue; if (KillInst->readsRegister(SrcReg)) break; } } else { // We just inserted this copy. KillInst = prior(InsertPos); } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); // Finally, mark it killed. LV->addVirtualRegisterKilled(SrcReg, KillInst); // This vreg no longer lives all of the way through opBlock. unsigned opBlockNum = opBlock.getNumber(); LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum); } } // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. if (reusedIncoming || !IncomingReg) MF.DeleteMachineInstr(MPhi); }
/// insertCSRSpillsAndRestores - Insert spill and restore code for /// callee saved registers used in the function, handling shrink wrapping. /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. MachineFrameInfo *MFI = Fn.getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); MFI->setCalleeSavedInfoValid(true); // Early exit if no callee saved registers are modified! if (CSI.empty()) return; const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; if (!ShrinkWrapThisFunction) { // Spill using target interface. I = EntryBlock->begin(); if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } // Restore using target interface. for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { MachineBasicBlock* MBB = ReturnBlocks[ri]; I = MBB->end(); --I; // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = I; while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; if (!AtStart) --BeforeI; // Restore all registers immediately before the return and any // terminators that precede it. if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert // multiple instructions. if (AtStart) I = MBB->begin(); else { I = BeforeI; ++I; } } } } return; } // Insert spills. std::vector<CalleeSavedInfo> blockCSI; for (CSRegBlockMap::iterator BI = CSRSave.begin(), BE = CSRSave.end(); BI != BE; ++BI) { MachineBasicBlock* MBB = BI->first; CSRegSet save = BI->second; if (save.empty()) continue; blockCSI.clear(); for (CSRegSet::iterator RI = save.begin(), RE = save.end(); RI != RE; ++RI) { blockCSI.push_back(CSI[*RI]); } assert(blockCSI.size() > 0 && "Could not collect callee saved register info"); I = MBB->begin(); // When shrink wrapping, use stack slot stores/loads. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. MBB->addLiveIn(blockCSI[i].getReg()); // Insert the spill to the stack frame. unsigned Reg = blockCSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(*MBB, I, Reg, true, blockCSI[i].getFrameIdx(), RC, TRI); } } for (CSRegBlockMap::iterator BI = CSRRestore.begin(), BE = CSRRestore.end(); BI != BE; ++BI) { MachineBasicBlock* MBB = BI->first; CSRegSet restore = BI->second; if (restore.empty()) continue; blockCSI.clear(); for (CSRegSet::iterator RI = restore.begin(), RE = restore.end(); RI != RE; ++RI) { blockCSI.push_back(CSI[*RI]); } assert(blockCSI.size() > 0 && "Could not find callee saved register info"); // If MBB is empty and needs restores, insert at the _beginning_. if (MBB->empty()) { I = MBB->begin(); } else { I = MBB->end(); --I; // Skip over all terminator instructions, which are part of the // return sequence. if (! I->isTerminator()) { ++I; } else { MachineBasicBlock::iterator I2 = I; while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; } } bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; if (!AtStart) --BeforeI; // Restore all registers immediately before the return and any // terminators that precede it. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { unsigned Reg = blockCSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.loadRegFromStackSlot(*MBB, I, Reg, blockCSI[i].getFrameIdx(), RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert // multiple instructions. if (AtStart) I = MBB->begin(); else { I = BeforeI; ++I; } } } }
// Branch analysis. // Note: If the condition register is set to CTR or CTR8 then this is a // BDNZ (imm == 1) or BDZ (imm == 0) branch. bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (LastInst->getOpcode() == PPC::B) { if (!LastInst->getOperand(0).isMBB()) return true; TBB = LastInst->getOperand(0).getMBB(); return false; } else if (LastInst->getOpcode() == PPC::BCC) { if (!LastInst->getOperand(2).isMBB()) return true; // Block ends with fall-through condbranch. TBB = LastInst->getOperand(2).getMBB(); Cond.push_back(LastInst->getOperand(0)); Cond.push_back(LastInst->getOperand(1)); return false; } else if (LastInst->getOpcode() == PPC::BDNZ8 || LastInst->getOpcode() == PPC::BDNZ) { if (!LastInst->getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = LastInst->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(1)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); return false; } else if (LastInst->getOpcode() == PPC::BDZ8 || LastInst->getOpcode() == PPC::BDZ) { if (!LastInst->getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = LastInst->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(0)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); return false; } // Otherwise, don't know what this is. return true; } // Get the instruction before it if it's a terminator. MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with PPC::B and PPC:BCC, handle it. if (SecondLastInst->getOpcode() == PPC::BCC && LastInst->getOpcode() == PPC::B) { if (!SecondLastInst->getOperand(2).isMBB() || !LastInst->getOperand(0).isMBB()) return true; TBB = SecondLastInst->getOperand(2).getMBB(); Cond.push_back(SecondLastInst->getOperand(0)); Cond.push_back(SecondLastInst->getOperand(1)); FBB = LastInst->getOperand(0).getMBB(); return false; } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 || SecondLastInst->getOpcode() == PPC::BDNZ) && LastInst->getOpcode() == PPC::B) { if (!SecondLastInst->getOperand(0).isMBB() || !LastInst->getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = SecondLastInst->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(1)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); FBB = LastInst->getOperand(0).getMBB(); return false; } else if ((SecondLastInst->getOpcode() == PPC::BDZ8 || SecondLastInst->getOpcode() == PPC::BDZ) && LastInst->getOpcode() == PPC::B) { if (!SecondLastInst->getOperand(0).isMBB() || !LastInst->getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = SecondLastInst->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(0)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); FBB = LastInst->getOperand(0).getMBB(); return false; } // If the block ends with two PPC:Bs, handle it. The second one is not // executed, so remove it. if (SecondLastInst->getOpcode() == PPC::B && LastInst->getOpcode() == PPC::B) { if (!SecondLastInst->getOperand(0).isMBB()) return true; TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr &MI = *MBBI; MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); unsigned Opcode = MI.getOpcode(); switch (Opcode) { default: break; case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) ? ARM::tLDRpci : ARM::t2LDRpci; unsigned DstReg = MI.getOperand(0).getReg(); if (!MI.getOperand(0).isDead()) { MachineInstr *NewMI = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) .addOperand(MI.getOperand(1))); NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) .addReg(DstReg, getDefRegState(true)) .addReg(DstReg) .addOperand(MI.getOperand(2)); } MI.eraseFromParent(); Modified = true; break; } case ARM::t2MOVi32imm: { unsigned DstReg = MI.getOperand(0).getReg(); if (!MI.getOperand(0).isDead()) { const MachineOperand &MO = MI.getOperand(1); MachineInstrBuilder LO16, HI16; LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), DstReg); HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) .addReg(DstReg, getDefRegState(true)).addReg(DstReg); if (MO.isImm()) { unsigned Imm = MO.getImm(); unsigned Lo16 = Imm & 0xffff; unsigned Hi16 = (Imm >> 16) & 0xffff; LO16 = LO16.addImm(Lo16); HI16 = HI16.addImm(Hi16); } else { GlobalValue *GV = MO.getGlobal(); unsigned TF = MO.getTargetFlags(); LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); // FIXME: What's about memoperands? } AddDefaultPred(LO16); AddDefaultPred(HI16); } MI.eraseFromParent(); Modified = true; } // FIXME: expand t2MOVi32imm }
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Modified = false; SmallSet<unsigned, 4> Defs; SmallSet<unsigned, 4> Uses; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; } Defs.clear(); Uses.clear(); TrackDefUses(MI, Defs, Uses, TRI); // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); // Add implicit use of ITSTATE to IT block instructions. MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); MachineInstr *LastITMI = MI; MachineBasicBlock::iterator InsertPos = MIB; ++MBBI; // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; MachineInstr *NMI = &*MBBI; MI = NMI; unsigned NPredReg = 0; ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); LastITMI = NMI; } else { if (NCC == ARMCC::AL && MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { --MBBI; MBB.remove(NMI); MBB.insert(InsertPos, NMI); ++NumMovedInsts; continue; } break; } TrackDefUses(NMI, Defs, Uses, TRI); --Pos; } // Finalize IT mask. Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); // Last instruction in IT block kills ITSTATE. LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); Modified = true; ++NumITs; } return Modified; }
void MCS51FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); MCS51MachineFunctionInfo *MCS51FI = MF.getInfo<MCS51MachineFunctionInfo>(); const MCS51InstrInfo &TII = *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); switch (RetOpcode) { case MCS51::RET: case MCS51::RETI: break; // These are ok default: llvm_unreachable("Can only insert epilog into returning blocks"); } // Get the number of bytes to allocate from the FrameInfo uint64_t StackSize = MFI->getStackSize(); unsigned CSSize = MCS51FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - CSSize; // pop FPW. BuildMI(MBB, MBBI, DL, TII.get(MCS51::POP16r), MCS51::FPW); } else NumBytes = StackSize - CSSize; // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); if (Opc != MCS51::POP16r && !PI->isTerminator()) break; --MBBI; } DL = MBBI->getDebugLoc(); // If there is an ADD16ri or SUB16ri of SPW immediately before this // instruction, merge the two instructions. //if (NumBytes || MFI->hasVarSizedObjects()) // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); if (MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, DL, TII.get(MCS51::MOV16rr), MCS51::SPW).addReg(MCS51::FPW); if (CSSize) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MCS51::SUB16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(CSSize); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } else { // adjust stack pointer back: SPW += numbytes if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MCS51::ADD16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { const PPCInstrInfo *TII = static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo()); // Give the blocks of the function a dense, in-order, numbering. Fn.RenumberBlocks(); BlockSizes.resize(Fn.getNumBlockIDs()); // Measure each MBB and compute a size for the entire function. unsigned FuncSize = 0; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock *MBB = MFI; unsigned BlockSize = 0; for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); MBBI != EE; ++MBBI) BlockSize += TII->GetInstSizeInBytes(MBBI); BlockSizes[MBB->getNumber()] = BlockSize; FuncSize += BlockSize; } // If the entire function is smaller than the displacement of a branch field, // we know we don't need to shrink any branches in this function. This is a // common case. if (FuncSize < (1 << 15)) { BlockSizes.clear(); return false; } // For each conditional branch, if the offset to its destination is larger // than the offset field allows, transform it into a long branch sequence // like this: // short branch: // bCC MBB // long branch: // b!CC $PC+8 // b MBB // bool MadeChange = true; bool EverMadeChange = false; while (MadeChange) { // Iteratively expand branches until we reach a fixed point. MadeChange = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock &MBB = *MFI; unsigned MBBStartOffset = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineBasicBlock *Dest = nullptr; if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm()) Dest = I->getOperand(2).getMBB(); else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) && !I->getOperand(1).isImm()) Dest = I->getOperand(1).getMBB(); else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ || I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) && !I->getOperand(0).isImm()) Dest = I->getOperand(0).getMBB(); if (!Dest) { MBBStartOffset += TII->GetInstSizeInBytes(I); continue; } // Determine the offset from the current branch to the destination // block. int BranchSize; if (Dest->getNumber() <= MBB.getNumber()) { // If this is a backwards branch, the delta is the offset from the // start of this block to this branch, plus the sizes of all blocks // from this block to the dest. BranchSize = MBBStartOffset; for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } else { // Otherwise, add the size of the blocks between this block and the // dest to the number of bytes left in this block. BranchSize = -MBBStartOffset; for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } // If this branch is in range, ignore it. if (isInt<16>(BranchSize)) { MBBStartOffset += 4; continue; } // Otherwise, we have to expand it to a long branch. MachineInstr *OldBranch = I; DebugLoc dl = OldBranch->getDebugLoc(); if (I->getOpcode() == PPC::BCC) { // The BCC operands are: // 0. PPC branch predicate // 1. CR register // 2. Target MBB PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); unsigned CRReg = I->getOperand(1).getReg(); // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. BuildMI(MBB, I, dl, TII->get(PPC::BCC)) .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); } else if (I->getOpcode() == PPC::BC) { unsigned CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BCn) { unsigned CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BDNZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2); } else if (I->getOpcode() == PPC::BDNZ8) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2); } else if (I->getOpcode() == PPC::BDZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2); } else if (I->getOpcode() == PPC::BDZ8) { BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2); } else { llvm_unreachable("Unhandled branch type!"); } // Uncond branch to the real destination. I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest); // Remove the old branch from the function. OldBranch->eraseFromParent(); // Remember that this instruction is 8-bytes, increase the size of the // block by 4, remember to iterate. BlockSizes[MBB.getNumber()] += 4; MBBStartOffset += 8; ++NumExpanded; MadeChange = true; } } EverMadeChange |= MadeChange; } BlockSizes.clear(); return true; }
/// insertCSRSpillsAndRestores - Insert spill and restore code for /// callee saved registers used in the function. /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. MachineFrameInfo *MFI = Fn.getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); MFI->setCalleeSavedInfoValid(true); // Early exit if no callee saved registers are modified! if (CSI.empty()) return; const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); MachineBasicBlock::iterator I; // Spill using target interface. I = EntryBlock->begin(); if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } // Restore using target interface. for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { MachineBasicBlock *MBB = ReturnBlocks[ri]; I = MBB->end(); --I; // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = I; while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; if (!AtStart) --BeforeI; // Restore all registers immediately before the return and any // terminators that precede it. if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert // multiple instructions. if (AtStart) I = MBB->begin(); else { I = BeforeI; ++I; } } } } }
/// Walk the specified loop in the CFG (defined by all blocks dominated by the /// specified header block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. /// void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) return; SmallVector<MachineDomTreeNode*, 32> Scopes; SmallVector<MachineDomTreeNode*, 8> WorkList; DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; // Perform a DFS walk to determine the order of visit. WorkList.push_back(HeaderN); while (!WorkList.empty()) { MachineDomTreeNode *Node = WorkList.pop_back_val(); assert(Node && "Null dominator tree node?"); MachineBasicBlock *BB = Node->getBlock(); // If the header of the loop containing this basic block is a landing pad, // then don't try to hoist instructions out of this loop. const MachineLoop *ML = MLI->getLoopFor(BB); if (ML && ML->getHeader()->isEHPad()) continue; // If this subregion is not in the top level loop at all, exit. if (!CurLoop->contains(BB)) continue; Scopes.push_back(Node); const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); unsigned NumChildren = Children.size(); // Don't hoist things out of a large switch statement. This often causes // code to be hoisted that wasn't going to be executed, and increases // register pressure in a situation where it's likely to matter. if (BB->succ_size() >= 25) NumChildren = 0; OpenChildren[Node] = NumChildren; // Add children in reverse order as then the next popped worklist node is // the first child of this node. This means we ultimately traverse the // DOM tree in exactly the same order as if we'd recursed. for (int i = (int)NumChildren-1; i >= 0; --i) { MachineDomTreeNode *Child = Children[i]; ParentMap[Child] = Node; WorkList.push_back(Child); } } if (Scopes.size() == 0) return; // Compute registers which are livein into the loop headers. RegSeen.clear(); BackTrace.clear(); InitRegPressure(Preheader); // Now perform LICM. for (MachineDomTreeNode *Node : Scopes) { MachineBasicBlock *MBB = Node->getBlock(); EnterScope(MBB); // Process the block SpeculationState = SpeculateUnknown; for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ) { MachineBasicBlock::iterator NextMII = MII; ++NextMII; MachineInstr *MI = &*MII; if (!Hoist(MI, Preheader)) UpdateRegPressure(MI); MII = NextMII; } // If it's a leaf node, it's done. Traverse upwards to pop ancestors. ExitScopeIfDone(Node, OpenChildren, ParentMap); } }
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map DenseMap<unsigned, unsigned> SrcMap; // Src -> Def map bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; ++I; if (MI->isCopy()) { unsigned Def = MI->getOperand(0).getReg(); unsigned Src = MI->getOperand(1).getReg(); if (TargetRegisterInfo::isVirtualRegister(Def) || TargetRegisterInfo::isVirtualRegister(Src)) report_fatal_error("MachineCopyPropagation should be run after" " register allocation!"); DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); if (CI != AvailCopyMap.end()) { MachineInstr *CopyMI = CI->second; unsigned SrcSrc = CopyMI->getOperand(1).getReg(); if (!ReservedRegs.test(Def) && (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) && (SrcSrc == Def || TRI->isSubRegister(SrcSrc, Def))) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. // %ECX<def> = COPY %EAX<kill> // ... nothing clobbered EAX. // %EAX<def> = COPY %ECX // => // %ECX<def> = COPY %EAX // // Also avoid eliminating a copy from reserved registers unless the // definition is proven not clobbered. e.g. // %RSP<def> = COPY %RAX // CALL // %RAX<def> = COPY %RSP CopyMI->getOperand(1).setIsKill(false); MI->eraseFromParent(); Changed = true; ++NumDeletes; continue; } } // If Src is defined by a previous copy, it cannot be eliminated. CI = CopyMap.find(Src); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); for (const unsigned *AS = TRI->getAliasSet(Src); *AS; ++AS) { CI = CopyMap.find(*AS); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } // Copy is now a candidate for deletion. MaybeDeadCopies.insert(MI); // If 'Src' is previously source of another copy, then this earlier copy's // source is no longer available. e.g. // %xmm9<def> = copy %xmm2 // ... // %xmm2<def> = copy %xmm0 // ... // %xmm2<def> = copy %xmm9 SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap); // Remember Def is defined by the copy. CopyMap[Def] = MI; AvailCopyMap[Def] = MI; for (const unsigned *SR = TRI->getSubRegisters(Def); *SR; ++SR) { CopyMap[*SR] = MI; AvailCopyMap[*SR] = MI; } // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. SrcMap[Src] = Def; continue; } // Not a copy. SmallVector<unsigned, 2> Defs; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) report_fatal_error("MachineCopyPropagation should be run after" " register allocation!"); if (MO.isDef()) { Defs.push_back(Reg); continue; } // If 'Reg' is defined by a copy, the copy is no longer a candidate // for elimination. DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { CI = CopyMap.find(*AS); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } } for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; // No longer defined by a copy. CopyMap.erase(Reg); AvailCopyMap.erase(Reg); for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { CopyMap.erase(*AS); AvailCopyMap.erase(*AS); } // If 'Reg' is previously source of a copy, it is no longer available for // copy propagation. SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap); } } // If MBB doesn't have successors, delete the copies whose defs are not used. // If MBB does have successors, then conservative assume the defs are live-out // since we don't want to trust live-in lists. if (MBB.succ_empty()) { for (SmallSetVector<MachineInstr*, 8>::iterator DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) { (*DI)->eraseFromParent(); Changed = true; ++NumDeletes; } } } return Changed; }
bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // Most of the code and comments here are boilerplate. // Start from the bottom of the block and work up, examining the // terminator instructions. MachineBasicBlock::iterator I = MBB.end(); while (I != MBB.begin()) { --I; if (I->isDebugValue()) continue; // Working from the bottom, when we see a non-terminator instruction, we're // done. if (!isUnpredicatedTerminator(I)) break; // A terminator that isn't a branch can't easily be handled by this // analysis. unsigned ThisCond; const MachineOperand *ThisTarget; if (!isBranch(I, ThisCond, ThisTarget)) return true; // Can't handle indirect branches. if (!ThisTarget->isMBB()) return true; if (ThisCond == SystemZ::CCMASK_ANY) { // Handle unconditional branches. if (!AllowModify) { TBB = ThisTarget->getMBB(); continue; } // If the block has any instructions after a JMP, delete them. while (llvm::next(I) != MBB.end()) llvm::next(I)->eraseFromParent(); Cond.clear(); FBB = 0; // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(ThisTarget->getMBB())) { TBB = 0; I->eraseFromParent(); I = MBB.end(); continue; } // TBB is used to indicate the unconditinal destination. TBB = ThisTarget->getMBB(); continue; } // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { // FIXME: add X86-style branch swap FBB = TBB; TBB = ThisTarget->getMBB(); Cond.push_back(MachineOperand::CreateImm(ThisCond)); continue; } // Handle subsequent conditional branches. assert(Cond.size() == 1); assert(TBB); // Only handle the case where all conditional branches branch to the same // destination. if (TBB != ThisTarget->getMBB()) return true; // If the conditions are the same, we can leave them alone. unsigned OldCond = Cond[0].getImm(); if (OldCond == ThisCond) continue; // FIXME: Try combining conditions like X86 does. Should be easy on Z! } return false; }
bool mprocInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // Start from the bottom of the block and work up, examining the // terminator instructions. MachineBasicBlock::iterator I = MBB.end(); while (I != MBB.begin()) { --I; if (I->isDebugValue()) continue; // Working from the bottom, when we see a non-terminator // instruction, we're done. if (!isUnpredicatedTerminator(I)) break; // A terminator that isn't a branch can't easily be handled // by this analysis. if (!I->isBranch()) return true; // Cannot handle indirect branches. /*if (I->getOpcode() == mproc::Br || I->getOpcode() == mproc::Bm) return true; // Handle unconditional branches. if (I->getOpcode() == mproc::JMP) { if (!AllowModify) { TBB = I->getOperand(0).getMBB(); continue; } // If the block has any instructions after a JMP, delete them. while (llvm::next(I) != MBB.end()) llvm::next(I)->eraseFromParent(); Cond.clear(); FBB = 0; // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { TBB = 0; I->eraseFromParent(); I = MBB.end(); continue; } // TBB is used to indicate the unconditinal destination. TBB = I->getOperand(0).getMBB(); continue; } */ // Handle conditional branches. // assert(I->getOpcode() == mproc::JCC && "Invalid conditional branch"); mprocCC::CondCodes BranchCode = static_cast<mprocCC::CondCodes>(I->getOperand(1).getImm()); if (BranchCode == mprocCC::COND_INVALID) return true; // Can't handle weird stuff. // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { FBB = TBB; TBB = I->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); continue; } // Handle subsequent conditional branches. Only handle the case where all // conditional branches branch to the same destination. assert(Cond.size() == 1); assert(TBB); // Only handle the case where all conditional branches branch to // the same destination. if (TBB != I->getOperand(0).getMBB()) return true; mprocCC::CondCodes OldBranchCode = (mprocCC::CondCodes)Cond[0].getImm(); // If the conditions are the same, we can leave them alone. if (OldBranchCode == BranchCode) continue; return true; } return false; }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri; } int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (IsTailCallReturn) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (CalleeSavedStackSize)| | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); MachineBasicBlock::iterator Begin = MBB.begin(); while (LastPopI != Begin) { --LastPopI; if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { ++LastPopI; break; } } NumBytes -= AFI->getCalleeSavedStackSize(); assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { bool RedZone = canUseRedZone(MF); // If this was a redzone leaf function, we don't need to restore the // stack pointer (but we may need to pop stack args for fastcc). if (RedZone && ArgumentPopSize == 0) return; bool NoCalleeSaveRestore = AFI->getCalleeSavedStackSize() == 0; int StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) StackRestoreBytes += ArgumentPopSize; emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, StackRestoreBytes, TII, MachineInstr::FrameDestroy); // If we were able to combine the local stack pop with the argument pop, // then we're done. if (NoCalleeSaveRestore || ArgumentPopSize == 0) return; NumBytes = 0; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -AFI->getCalleeSavedStackSize() + 16, TII, MachineInstr::FrameDestroy); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save // code in the prologue. if (ArgumentPopSize) emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, ArgumentPopSize, TII, MachineInstr::FrameDestroy); }
bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &NMBBI, unsigned Size) { MachineFunction *MF = BB.getParent(); const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); DebugLoc DL = I->getDebugLoc(); unsigned LL, SC, ZERO, BEQ; if (Size == 4) { if (STI->inMicroMipsMode()) { LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM; SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM; BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM; } else { LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) : (ArePtrs64bit ? Mips::LL64 : Mips::LL); SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) : (ArePtrs64bit ? Mips::SC64 : Mips::SC); BEQ = Mips::BEQ; } ZERO = Mips::ZERO; } else { LL = STI->hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; ZERO = Mips::ZERO_64; BEQ = Mips::BEQ64; } unsigned OldVal = I->getOperand(0).getReg(); unsigned Ptr = I->getOperand(1).getReg(); unsigned Incr = I->getOperand(2).getReg(); unsigned Scratch = I->getOperand(3).getReg(); unsigned Opcode = 0; unsigned OR = 0; unsigned AND = 0; unsigned NOR = 0; bool IsNand = false; switch (I->getOpcode()) { case Mips::ATOMIC_LOAD_ADD_I32_POSTRA: Opcode = Mips::ADDu; break; case Mips::ATOMIC_LOAD_SUB_I32_POSTRA: Opcode = Mips::SUBu; break; case Mips::ATOMIC_LOAD_AND_I32_POSTRA: Opcode = Mips::AND; break; case Mips::ATOMIC_LOAD_OR_I32_POSTRA: Opcode = Mips::OR; break; case Mips::ATOMIC_LOAD_XOR_I32_POSTRA: Opcode = Mips::XOR; break; case Mips::ATOMIC_LOAD_NAND_I32_POSTRA: IsNand = true; AND = Mips::AND; NOR = Mips::NOR; break; case Mips::ATOMIC_SWAP_I32_POSTRA: OR = Mips::OR; break; case Mips::ATOMIC_LOAD_ADD_I64_POSTRA: Opcode = Mips::DADDu; break; case Mips::ATOMIC_LOAD_SUB_I64_POSTRA: Opcode = Mips::DSUBu; break; case Mips::ATOMIC_LOAD_AND_I64_POSTRA: Opcode = Mips::AND64; break; case Mips::ATOMIC_LOAD_OR_I64_POSTRA: Opcode = Mips::OR64; break; case Mips::ATOMIC_LOAD_XOR_I64_POSTRA: Opcode = Mips::XOR64; break; case Mips::ATOMIC_LOAD_NAND_I64_POSTRA: IsNand = true; AND = Mips::AND64; NOR = Mips::NOR64; break; case Mips::ATOMIC_SWAP_I64_POSTRA: OR = Mips::OR64; break; default: llvm_unreachable("Unknown pseudo atomic!"); } const BasicBlock *LLVM_BB = BB.getBasicBlock(); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB.getIterator(); MF->insert(It, loopMBB); MF->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); exitMBB->transferSuccessorsAndUpdatePHIs(&BB); BB.addSuccessor(loopMBB, BranchProbability::getOne()); loopMBB->addSuccessor(exitMBB); loopMBB->addSuccessor(loopMBB); loopMBB->normalizeSuccProbs(); BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!"); assert((OldVal != Incr) && "Clobbered the wrong reg!"); if (Opcode) { BuildMI(loopMBB, DL, TII->get(Opcode), Scratch).addReg(OldVal).addReg(Incr); } else if (IsNand) { assert(AND && NOR && "Unknown nand instruction for atomic pseudo expansion"); BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr); BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch); } else { assert(OR && "Unknown instruction for atomic pseudo expansion!"); BuildMI(loopMBB, DL, TII->get(OR), Scratch).addReg(Incr).addReg(ZERO); } BuildMI(loopMBB, DL, TII->get(SC), Scratch).addReg(Scratch).addReg(Ptr).addImm(0); BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Scratch).addReg(ZERO).addMBB(loopMBB); NMBBI = BB.end(); I->eraseFromParent(); LivePhysRegs LiveRegs; computeAndAddLiveIns(LiveRegs, *loopMBB); computeAndAddLiveIns(LiveRegs, *exitMBB); return true; }
/// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore, AllSuccsCache &AllSuccessors) { // Don't sink instructions that the target prefers not to sink. if (!TII->shouldSink(*MI)) return false; // Check if it's safe to move the instruction. if (!MI->isSafeToMove(AA, SawStore)) return false; // Convergent operations may not be made control-dependent on additional // values. if (MI->isConvergent()) return false; // Don't break implicit null checks. This is a performance heuristic, and not // required for correctness. if (SinkingPreventsImplicitNullCheck(MI, TII, TRI)) return false; // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to // also sink them down before their first use in the block. This xform has to // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. bool BreakPHIEdge = false; MachineBasicBlock *ParentBlock = MI->getParent(); MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors); // If there are no outputs, it must have side-effects. if (!SuccToSinkTo) return false; // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI->getOperand(I); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (SuccToSinkTo->isLiveIn(Reg)) return false; } DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); // If the block has multiple predecessors, this is a critical edge. // Decide if we can sink along it or need to break the edge. if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. bool TryBreak = false; bool store = true; if (!MI->isSafeToMove(AA, store)) { DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); TryBreak = true; } // We don't want to sink across a critical edge if we don't dominate the // successor. We could be introducing calculations to new code paths. if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); TryBreak = true; } // Don't sink instructions into a loop. if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { DEBUG(dbgs() << " *** NOTE: Loop header found\n"); TryBreak = true; } // Otherwise we are OK with sinking along a critical edge. if (!TryBreak) DEBUG(dbgs() << "Sinking along critical edge.\n"); else { // Mark this edge as to be split. // If the edge can actually be split, the next iteration of the main loop // will sink MI in the newly created block. bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!Status) DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); // The instruction will not be sunk this time. return false; } } if (BreakPHIEdge) { // BreakPHIEdge is true if all the uses are in the successor MBB being // sunken into and they are all PHI nodes. In this case, machine-sink must // break the critical edge first. bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!Status) DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); // The instruction will not be sunk this time. return false; } // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; // collect matching debug values. SmallVector<MachineInstr *, 2> DbgValuesToSink; collectDebugValues(MI, DbgValuesToSink); // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); // Move debug values. for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { MachineInstr *DbgMI = *DBI; SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI, ++MachineBasicBlock::iterator(DbgMI)); } // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. // Note that we have to clear the kill flags for any register this instruction // uses as we may sink over another instruction which currently kills the // used registers. for (MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.isUse()) RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags. } return true; }
bool MipsExpandPseudo::expandAtomicCmpSwapSubword( MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &NMBBI) { MachineFunction *MF = BB.getParent(); const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); DebugLoc DL = I->getDebugLoc(); unsigned LL, SC; unsigned ZERO = Mips::ZERO; unsigned BNE = Mips::BNE; unsigned BEQ = Mips::BEQ; unsigned SEOp = I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_POSTRA ? Mips::SEB : Mips::SEH; if (STI->inMicroMipsMode()) { LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM; SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM; BNE = STI->hasMips32r6() ? Mips::BNEC_MMR6 : Mips::BNE_MM; BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM; } else { LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) : (ArePtrs64bit ? Mips::LL64 : Mips::LL); SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) : (ArePtrs64bit ? Mips::SC64 : Mips::SC); } unsigned Dest = I->getOperand(0).getReg(); unsigned Ptr = I->getOperand(1).getReg(); unsigned Mask = I->getOperand(2).getReg(); unsigned ShiftCmpVal = I->getOperand(3).getReg(); unsigned Mask2 = I->getOperand(4).getReg(); unsigned ShiftNewVal = I->getOperand(5).getReg(); unsigned ShiftAmnt = I->getOperand(6).getReg(); unsigned Scratch = I->getOperand(7).getReg(); unsigned Scratch2 = I->getOperand(8).getReg(); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB.getBasicBlock(); MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB.getIterator(); MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), &BB, std::next(MachineBasicBlock::iterator(I)), BB.end()); exitMBB->transferSuccessorsAndUpdatePHIs(&BB); // thisMBB: // ... // fallthrough --> loop1MBB BB.addSuccessor(loop1MBB, BranchProbability::getOne()); loop1MBB->addSuccessor(sinkMBB); loop1MBB->addSuccessor(loop2MBB); loop1MBB->normalizeSuccProbs(); loop2MBB->addSuccessor(loop1MBB); loop2MBB->addSuccessor(sinkMBB); loop2MBB->normalizeSuccProbs(); sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); // loop1MBB: // ll dest, 0(ptr) // and Mask', dest, Mask // bne Mask', ShiftCmpVal, exitMBB BuildMI(loop1MBB, DL, TII->get(LL), Scratch).addReg(Ptr).addImm(0); BuildMI(loop1MBB, DL, TII->get(Mips::AND), Scratch2) .addReg(Scratch) .addReg(Mask); BuildMI(loop1MBB, DL, TII->get(BNE)) .addReg(Scratch2).addReg(ShiftCmpVal).addMBB(sinkMBB); // loop2MBB: // and dest, dest, mask2 // or dest, dest, ShiftNewVal // sc dest, dest, 0(ptr) // beq dest, $0, loop1MBB BuildMI(loop2MBB, DL, TII->get(Mips::AND), Scratch) .addReg(Scratch, RegState::Kill) .addReg(Mask2); BuildMI(loop2MBB, DL, TII->get(Mips::OR), Scratch) .addReg(Scratch, RegState::Kill) .addReg(ShiftNewVal); BuildMI(loop2MBB, DL, TII->get(SC), Scratch) .addReg(Scratch, RegState::Kill) .addReg(Ptr) .addImm(0); BuildMI(loop2MBB, DL, TII->get(BEQ)) .addReg(Scratch, RegState::Kill) .addReg(ZERO) .addMBB(loop1MBB); // sinkMBB: // srl srlres, Mask', shiftamt // sign_extend dest,srlres BuildMI(sinkMBB, DL, TII->get(Mips::SRLV), Dest) .addReg(Scratch2) .addReg(ShiftAmnt); if (STI->hasMips32r2()) { BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); } else { const unsigned ShiftImm = I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I16_POSTRA ? 16 : 24; BuildMI(sinkMBB, DL, TII->get(Mips::SLL), Dest) .addReg(Dest, RegState::Kill) .addImm(ShiftImm); BuildMI(sinkMBB, DL, TII->get(Mips::SRA), Dest) .addReg(Dest, RegState::Kill) .addImm(ShiftImm); } LivePhysRegs LiveRegs; computeAndAddLiveIns(LiveRegs, *loop1MBB); computeAndAddLiveIns(LiveRegs, *loop2MBB); computeAndAddLiveIns(LiveRegs, *sinkMBB); computeAndAddLiveIns(LiveRegs, *exitMBB); NMBBI = BB.end(); I->eraseFromParent(); return true; }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { AArch64MachineFunctionInfo *FuncInfo = MF.getInfo<AArch64MachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); DebugLoc DL = MBBI->getDebugLoc(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned RetOpcode = MBBI->getOpcode(); // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust(); uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes; uint64_t ArgumentPopSize = 0; if (RetOpcode == AArch64::TC_RETURNdi || RetOpcode == AArch64::TC_RETURNxi) { MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(1); MachineInstrBuilder MIB; if (RetOpcode == AArch64::TC_RETURNdi) { MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm)); if (JumpTarget.isGlobal()) { MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); } else { assert(JumpTarget.isSymbol() && "unexpected tail call destination"); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } } else { assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg() && "Unexpected tail call"); MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx)); MIB.addReg(JumpTarget.getReg(), RegState::Kill); } // Add the extra operands onto the new tail call instruction even though // they're not used directly (so that liveness is tracked properly etc). for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) MIB->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TC_RETURN. MachineInstr *NewMI = prior(MBBI); MBB.erase(MBBI); MBBI = NewMI; // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = FuncInfo->getArgumentStackToRestore(); } assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0 && "refusing to adjust stack by misaligned amt"); // We may need to address callee-saved registers differently, so find out the // bound on the frame indices. const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); int MinCSFI = 0; int MaxCSFI = -1; if (CSI.size()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } // The "residual" stack update comes first from this direction and guarantees // that SP is NumInitialBytes below its value on function entry, either by a // direct update or restoring it from the frame pointer. if (NumInitialBytes + ArgumentPopSize != 0) { emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, NumInitialBytes + ArgumentPopSize); --MBBI; } // MBBI now points to the instruction just past the last callee-saved // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp" // otherwise). // Now we need to find out where to put the bulk of the stack adjustment MachineBasicBlock::iterator FirstEpilogue = MBBI; while (MBBI != MBB.begin()) { --MBBI; unsigned FrameOp; for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) { if (MBBI->getOperand(FrameOp).isFI()) break; } // If this instruction doesn't have a frame index we've reached the end of // the callee-save restoration. if (FrameOp == MBBI->getNumOperands()) break; // Likewise if it *is* a local reference, but not to a callee-saved object. int FrameIdx = MBBI->getOperand(FrameOp).getIndex(); if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI) break; FirstEpilogue = MBBI; } if (MF.getFrameInfo()->hasVarSizedObjects()) { int64_t StaticFrameBase; StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset()); emitRegUpdate(MBB, FirstEpilogue, DL, TII, AArch64::XSP, AArch64::X29, AArch64::NoRegister, StaticFrameBase); } else { emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes); } }
/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert /// a spill at a better location. bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { SlotIndex Idx = LIS.getInstructionIndex(CopyMI); VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); SibValueMap::iterator I = SibValues.find(VNI); if (I == SibValues.end()) return false; const SibValueInfo &SVI = I->second; // Let the normal folding code deal with the boring case. if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI) return false; // SpillReg may have been deleted by remat and DCE. if (!LIS.hasInterval(SVI.SpillReg)) { DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n'); SibValues.erase(I); return false; } LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg); if (!SibLI.containsValue(SVI.SpillVNI)) { DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n'); SibValues.erase(I); return false; } // Conservatively extend the stack slot range to the range of the original // value. We may be able to do better with stack slot coloring by being more // careful here. assert(StackInt && "No stack slot assigned yet."); LiveInterval &OrigLI = LIS.getInterval(Original); VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0)); DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " << *StackInt << '\n'); // Already spilled everywhere. if (SVI.AllDefsAreReloads) { DEBUG(dbgs() << "\tno spill needed: " << SVI); ++NumOmitReloadSpill; return true; } // We are going to spill SVI.SpillVNI immediately after its def, so clear out // any later spills of the same value. eliminateRedundantSpills(SibLI, SVI.SpillVNI); MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def); MachineBasicBlock::iterator MII; if (SVI.SpillVNI->isPHIDef()) MII = MBB->SkipPHIsAndLabels(MBB->begin()); else { MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def); assert(DefMI && "Defining instruction disappeared"); MII = DefMI; ++MII; } // Insert spill without kill flag immediately after def. TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot, MRI.getRegClass(SVI.SpillReg), &TRI); --MII; // Point to store instruction. LIS.InsertMachineInstrInMaps(MII); DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); ++NumSpills; ++NumHoists; return true; }
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr &MI = *MBBI; MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); bool ModifiedOp = true; unsigned Opcode = MI.getOpcode(); switch (Opcode) { default: ModifiedOp = false; break; case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) ? ARM::tLDRpci : ARM::t2LDRpci; unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); MachineInstrBuilder MIB1 = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) .addOperand(MI.getOperand(1))); (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) .addReg(DstReg) .addOperand(MI.getOperand(2)); TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); break; } case ARM::MOVi32imm: case ARM::t2MOVi32imm: { unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO = MI.getOperand(1); MachineInstrBuilder LO16, HI16; LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode == ARM::MOVi32imm ? ARM::MOVi16 : ARM::t2MOVi16), DstReg); HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode == ARM::MOVi32imm ? ARM::MOVTi16 : ARM::t2MOVTi16)) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) .addReg(DstReg); if (MO.isImm()) { unsigned Imm = MO.getImm(); unsigned Lo16 = Imm & 0xffff; unsigned Hi16 = (Imm >> 16) & 0xffff; LO16 = LO16.addImm(Lo16); HI16 = HI16.addImm(Hi16); } else { const GlobalValue *GV = MO.getGlobal(); unsigned TF = MO.getTargetFlags(); LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); } (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); LO16.addImm(Pred).addReg(PredReg); HI16.addImm(Pred).addReg(PredReg); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); break; } case ARM::VMOVQQ: { unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0); unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1); unsigned SrcReg = MI.getOperand(1).getReg(); bool SrcIsKill = MI.getOperand(1).isKill(); unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0); unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1); MachineInstrBuilder Even = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead)) .addReg(EvenSrc, getKillRegState(SrcIsKill))); MachineInstrBuilder Odd = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead)) .addReg(OddSrc, getKillRegState(SrcIsKill))); TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); break; } case ARM::VLDMQ: { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VLDMD)); unsigned OpIdx = 0; // Grab the Q register destination. bool DstIsDead = MI.getOperand(OpIdx).isDead(); unsigned DstReg = MI.getOperand(OpIdx++).getReg(); // Copy the addrmode4 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Add the destination operands (D subregs). unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0); unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1); MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); break; } case ARM::VSTMQ: { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VSTMD)); unsigned OpIdx = 0; // Grab the Q register source. bool SrcIsKill = MI.getOperand(OpIdx).isKill(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); // Copy the addrmode4 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Add the source operands (D subregs). unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); MIB.addReg(D0).addReg(D1); if (SrcIsKill) // Add an implicit kill for the Q register. (*MIB).addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); break; } case ARM::VDUPfqf: case ARM::VDUPfdf: { unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Lane = getARMRegisterNumbering(SrcReg) & 1; unsigned DReg = TRI->getMatchingSuperReg(SrcReg, Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); // The lane is [0,1] for the containing DReg superregister. // Copy the dst/src register operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addReg(DReg); ++OpIdx; // Add the lane select operand. MIB.addImm(Lane); // Add the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); break; } case ARM::VLD1q8Pseudo: case ARM::VLD1q16Pseudo: case ARM::VLD1q32Pseudo: case ARM::VLD1q64Pseudo: case ARM::VLD1q8Pseudo_UPD: case ARM::VLD1q16Pseudo_UPD: case ARM::VLD1q32Pseudo_UPD: case ARM::VLD1q64Pseudo_UPD: case ARM::VLD2d8Pseudo: case ARM::VLD2d16Pseudo: case ARM::VLD2d32Pseudo: case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: case ARM::VLD2d8Pseudo_UPD: case ARM::VLD2d16Pseudo_UPD: case ARM::VLD2d32Pseudo_UPD: case ARM::VLD2q8Pseudo_UPD: case ARM::VLD2q16Pseudo_UPD: case ARM::VLD2q32Pseudo_UPD: case ARM::VLD3d8Pseudo: case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: case ARM::VLD1d64TPseudo: case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: case ARM::VLD1d64TPseudo_UPD: case ARM::VLD3q8Pseudo_UPD: case ARM::VLD3q16Pseudo_UPD: case ARM::VLD3q32Pseudo_UPD: case ARM::VLD3q8oddPseudo_UPD: case ARM::VLD3q16oddPseudo_UPD: case ARM::VLD3q32oddPseudo_UPD: case ARM::VLD4d8Pseudo: case ARM::VLD4d16Pseudo: case ARM::VLD4d32Pseudo: case ARM::VLD1d64QPseudo: case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: case ARM::VLD1d64QPseudo_UPD: case ARM::VLD4q8Pseudo_UPD: case ARM::VLD4q16Pseudo_UPD: case ARM::VLD4q32Pseudo_UPD: case ARM::VLD4q8oddPseudo_UPD: case ARM::VLD4q16oddPseudo_UPD: case ARM::VLD4q32oddPseudo_UPD: ExpandVLD(MBBI); break; case ARM::VST1q8Pseudo: case ARM::VST1q16Pseudo: case ARM::VST1q32Pseudo: case ARM::VST1q64Pseudo: case ARM::VST1q8Pseudo_UPD: case ARM::VST1q16Pseudo_UPD: case ARM::VST1q32Pseudo_UPD: case ARM::VST1q64Pseudo_UPD: case ARM::VST2d8Pseudo: case ARM::VST2d16Pseudo: case ARM::VST2d32Pseudo: case ARM::VST2q8Pseudo: case ARM::VST2q16Pseudo: case ARM::VST2q32Pseudo: case ARM::VST2d8Pseudo_UPD: case ARM::VST2d16Pseudo_UPD: case ARM::VST2d32Pseudo_UPD: case ARM::VST2q8Pseudo_UPD: case ARM::VST2q16Pseudo_UPD: case ARM::VST2q32Pseudo_UPD: case ARM::VST3d8Pseudo: case ARM::VST3d16Pseudo: case ARM::VST3d32Pseudo: case ARM::VST1d64TPseudo: case ARM::VST3d8Pseudo_UPD: case ARM::VST3d16Pseudo_UPD: case ARM::VST3d32Pseudo_UPD: case ARM::VST1d64TPseudo_UPD: case ARM::VST3q8Pseudo_UPD: case ARM::VST3q16Pseudo_UPD: case ARM::VST3q32Pseudo_UPD: case ARM::VST3q8oddPseudo_UPD: case ARM::VST3q16oddPseudo_UPD: case ARM::VST3q32oddPseudo_UPD: case ARM::VST4d8Pseudo: case ARM::VST4d16Pseudo: case ARM::VST4d32Pseudo: case ARM::VST1d64QPseudo: case ARM::VST4d8Pseudo_UPD: case ARM::VST4d16Pseudo_UPD: case ARM::VST4d32Pseudo_UPD: case ARM::VST1d64QPseudo_UPD: case ARM::VST4q8Pseudo_UPD: case ARM::VST4q16Pseudo_UPD: case ARM::VST4q32Pseudo_UPD: case ARM::VST4q8oddPseudo_UPD: case ARM::VST4q16oddPseudo_UPD: case ARM::VST4q32oddPseudo_UPD: ExpandVST(MBBI); break; case ARM::VLD2LNd8Pseudo: case ARM::VLD2LNd16Pseudo: case ARM::VLD2LNd32Pseudo: case ARM::VLD2LNq16Pseudo: case ARM::VLD2LNq32Pseudo: case ARM::VLD2LNd8Pseudo_UPD: case ARM::VLD2LNd16Pseudo_UPD: case ARM::VLD2LNd32Pseudo_UPD: case ARM::VLD2LNq16Pseudo_UPD: case ARM::VLD2LNq32Pseudo_UPD: case ARM::VLD3LNd8Pseudo: case ARM::VLD3LNd16Pseudo: case ARM::VLD3LNd32Pseudo: case ARM::VLD3LNq16Pseudo: case ARM::VLD3LNq32Pseudo: case ARM::VLD3LNd8Pseudo_UPD: case ARM::VLD3LNd16Pseudo_UPD: case ARM::VLD3LNd32Pseudo_UPD: case ARM::VLD3LNq16Pseudo_UPD: case ARM::VLD3LNq32Pseudo_UPD: case ARM::VLD4LNd8Pseudo: case ARM::VLD4LNd16Pseudo: case ARM::VLD4LNd32Pseudo: case ARM::VLD4LNq16Pseudo: case ARM::VLD4LNq32Pseudo: case ARM::VLD4LNd8Pseudo_UPD: case ARM::VLD4LNd16Pseudo_UPD: case ARM::VLD4LNd32Pseudo_UPD: case ARM::VLD4LNq16Pseudo_UPD: case ARM::VLD4LNq32Pseudo_UPD: case ARM::VST2LNd8Pseudo: case ARM::VST2LNd16Pseudo: case ARM::VST2LNd32Pseudo: case ARM::VST2LNq16Pseudo: case ARM::VST2LNq32Pseudo: case ARM::VST2LNd8Pseudo_UPD: case ARM::VST2LNd16Pseudo_UPD: case ARM::VST2LNd32Pseudo_UPD: case ARM::VST2LNq16Pseudo_UPD: case ARM::VST2LNq32Pseudo_UPD: case ARM::VST3LNd8Pseudo: case ARM::VST3LNd16Pseudo: case ARM::VST3LNd32Pseudo: case ARM::VST3LNq16Pseudo: case ARM::VST3LNq32Pseudo: case ARM::VST3LNd8Pseudo_UPD: case ARM::VST3LNd16Pseudo_UPD: case ARM::VST3LNd32Pseudo_UPD: case ARM::VST3LNq16Pseudo_UPD: case ARM::VST3LNq32Pseudo_UPD: case ARM::VST4LNd8Pseudo: case ARM::VST4LNd16Pseudo: case ARM::VST4LNd32Pseudo: case ARM::VST4LNq16Pseudo: case ARM::VST4LNq32Pseudo: case ARM::VST4LNd8Pseudo_UPD: case ARM::VST4LNd16Pseudo_UPD: case ARM::VST4LNd32Pseudo_UPD: case ARM::VST4LNq16Pseudo_UPD: case ARM::VST4LNq32Pseudo_UPD: ExpandLaneOp(MBBI); break; case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); break; case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); break; case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); break; case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); break; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); break; case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); break; }
bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (!LastInst->getDesc().isBranch()) return true; // Unconditional branch if (LastOpc == Mips::J) { TBB = LastInst->getOperand(0).getMBB(); return false; } Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode()); if (BranchCode == Mips::COND_INVALID) return true; // Can't handle indirect branch. // Conditional branch // Block ends with fall-through condbranch. if (LastOpc != Mips::COND_INVALID) { int LastNumOp = LastInst->getNumOperands(); TBB = LastInst->getOperand(LastNumOp-1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); for (int i=0; i<LastNumOp-1; i++) { Cond.push_back(LastInst->getOperand(i)); } return false; } } // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it. unsigned SecondLastOpc = SecondLastInst->getOpcode(); Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc); if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) { int SecondNumOp = SecondLastInst->getNumOperands(); TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); for (int i=0; i<SecondNumOp-1; i++) { Cond.push_back(SecondLastInst->getOperand(i)); } FBB = LastInst->getOperand(0).getMBB(); return false; } // If the block ends with two unconditional branches, handle it. The last // one is not executed, so remove it. if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
/// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to // be close to the source to make it easier to coalesce. if (AvoidsSinking(MI, MRI)) return false; // Check if it's safe to move the instruction. if (!MI->isSafeToMove(TII, AA, SawStore)) return false; // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to // also sink them down before their first use in the block. This xform has to // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. bool BreakPHIEdge = false; MachineBasicBlock *ParentBlock = MI->getParent(); MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge); // If there are no outputs, it must have side-effects. if (SuccToSinkTo == 0) return false; // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI->getOperand(I); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (SuccToSinkTo->isLiveIn(Reg)) return false; } DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); // If the block has multiple predecessors, this would introduce computation on // a path that it doesn't already exist. We could split the critical edge, // but for now we just punt. if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. bool TryBreak = false; bool store = true; if (!MI->isSafeToMove(TII, AA, store)) { DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); TryBreak = true; } // We don't want to sink across a critical edge if we don't dominate the // successor. We could be introducing calculations to new code paths. if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); TryBreak = true; } // Don't sink instructions into a loop. if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { DEBUG(dbgs() << " *** NOTE: Loop header found\n"); TryBreak = true; } // Otherwise we are OK with sinking along a critical edge. if (!TryBreak) DEBUG(dbgs() << "Sinking along critical edge.\n"); else { MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!NewSucc) { DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); return false; } else { DEBUG(dbgs() << " *** Splitting critical edge:" " BB#" << ParentBlock->getNumber() << " -- BB#" << NewSucc->getNumber() << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); SuccToSinkTo = NewSucc; ++NumSplit; BreakPHIEdge = false; } } } if (BreakPHIEdge) { // BreakPHIEdge is true if all the uses are in the successor MBB being // sunken into and they are all PHI nodes. In this case, machine-sink must // break the critical edge first. MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!NewSucc) { DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); return false; } DEBUG(dbgs() << " *** Splitting critical edge:" " BB#" << ParentBlock->getNumber() << " -- BB#" << NewSucc->getNumber() << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); SuccToSinkTo = NewSucc; ++NumSplit; } // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; // collect matching debug values. SmallVector<MachineInstr *, 2> DbgValuesToSink; collectDebugValues(MI, DbgValuesToSink); // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); // Move debug values. for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(), DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { MachineInstr *DbgMI = *DBI; SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI, ++MachineBasicBlock::iterator(DbgMI)); } // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. MI->clearKillInfo(); return true; }
bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { bool Changed = false; InstrIndexMap I2X; DefUseInfoMap DUM; buildMaps(B, I2X, DUM); typedef DenseMap<unsigned,CondsetInfo> CondsetMap; CondsetMap CM; MuxInfoList ML; MachineBasicBlock::iterator NextI, End = B.end(); for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) { MachineInstr *MI = &*I; NextI = std::next(I); unsigned Opc = MI->getOpcode(); if (!isCondTransfer(Opc)) continue; unsigned DR = MI->getOperand(0).getReg(); if (isRegPair(DR)) continue; unsigned PR = MI->getOperand(1).getReg(); unsigned Idx = I2X.lookup(MI); CondsetMap::iterator F = CM.find(DR); bool IfTrue = HII->isPredicatedTrue(Opc); // If there is no record of a conditional transfer for this register, // or the predicate register differs, create a new record for it. if (F != CM.end() && F->second.PredR != PR) { CM.erase(F); F = CM.end(); } if (F == CM.end()) { auto It = CM.insert(std::make_pair(DR, CondsetInfo())); F = It.first; F->second.PredR = PR; } CondsetInfo &CI = F->second; if (IfTrue) CI.TrueX = Idx; else CI.FalseX = Idx; if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX) continue; // There is now a complete definition of DR, i.e. we have the predicate // register, the definition if-true, and definition if-false. // First, check if both definitions are far enough from the definition // of the predicate register. unsigned MinX = std::min(CI.TrueX, CI.FalseX); unsigned MaxX = std::max(CI.TrueX, CI.FalseX); unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0; bool NearDef = false; for (unsigned X = SearchX; X < MaxX; ++X) { const DefUseInfo &DU = DUM.lookup(X); if (!DU.Defs[PR]) continue; NearDef = true; break; } if (NearDef) continue; // The predicate register is not defined in the last few instructions. // Check if the conversion to MUX is possible (either "up", i.e. at the // place of the earlier partial definition, or "down", where the later // definition is located). Examine all defs and uses between these two // definitions. // SR1, SR2 - source registers from the first and the second definition. MachineBasicBlock::iterator It1 = B.begin(), It2 = B.begin(); std::advance(It1, MinX); std::advance(It2, MaxX); MachineInstr *Def1 = It1, *Def2 = It2; MachineOperand *Src1 = &Def1->getOperand(2), *Src2 = &Def2->getOperand(2); unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0; unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0; bool Failure = false, CanUp = true, CanDown = true; for (unsigned X = MinX+1; X < MaxX; X++) { const DefUseInfo &DU = DUM.lookup(X); if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) { Failure = true; break; } if (CanDown && DU.Defs[SR1]) CanDown = false; if (CanUp && DU.Defs[SR2]) CanUp = false; } if (Failure || (!CanUp && !CanDown)) continue; MachineOperand *SrcT = (MinX == CI.TrueX) ? Src1 : Src2; MachineOperand *SrcF = (MinX == CI.FalseX) ? Src1 : Src2; // Prefer "down", since this will move the MUX farther away from the // predicate definition. MachineBasicBlock::iterator At = CanDown ? Def2 : Def1; ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2)); } for (unsigned I = 0, N = ML.size(); I < N; ++I) { MuxInfo &MX = ML[I]; MachineBasicBlock &B = *MX.At->getParent(); DebugLoc DL = MX.At->getDebugLoc(); unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF); if (!MxOpc) continue; BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR) .addReg(MX.PredR) .addOperand(*MX.SrcT) .addOperand(*MX.SrcF); B.erase(MX.Def1); B.erase(MX.Def2); Changed = true; } return Changed; }
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert((MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET) && "Can only insert epilog into returning blocks"); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); if (!isCSRestore(MBBI, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4)); } else AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr)); } else { if (MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && prior(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = prior(MBBI); emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); } else emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } if (VARegSaveSize) { // Unlike T2 and ARM mode, the T1 pop instruction cannot restore // to LR, and we can't pop the value directly to the PC since // we need to update the SP after popping the value. Therefore, we // pop the old LR into R3 as a temporary. // Move back past the callee-saved register restoration while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) ++MBBI; // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill)); // erase the old tBX_RET instruction MBB.erase(MBBI); } }
void ARM64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); MachineFrameInfo *MFI = MF.getFrameInfo(); const ARM64InstrInfo *TII = static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo()); const ARM64RegisterInfo *RegInfo = static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo()); DebugLoc DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); int NumBytes = MFI->getStackSize(); const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>(); // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (RetOpcode == ARM64::TCRETURNdi || RetOpcode == ARM64::TCRETURNri) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (NumRestores * 16) | | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // ARM64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of ARM64ISD::TC_RETURN. NumBytes += ArgumentPopSize; unsigned NumRestores = 0; // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBBI; const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (LastPopI != MBB.begin()) { do { ++NumRestores; --LastPopI; } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs)); if (!isCSRestore(LastPopI, CSRegs)) { ++LastPopI; --NumRestores; } } NumBytes -= NumRestores * 16; assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { // If this was a redzone leaf function, we don't need to restore the // stack pointer. if (!canUseRedZone(MF)) emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII); return; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP, -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags); }
bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &NMBBI) { const unsigned Size = I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I32_POSTRA ? 4 : 8; MachineFunction *MF = BB.getParent(); const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); DebugLoc DL = I->getDebugLoc(); unsigned LL, SC, ZERO, BNE, BEQ, MOVE; if (Size == 4) { if (STI->inMicroMipsMode()) { LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM; SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM; BNE = STI->hasMips32r6() ? Mips::BNEC_MMR6 : Mips::BNE_MM; BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM; } else { LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) : (ArePtrs64bit ? Mips::LL64 : Mips::LL); SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) : (ArePtrs64bit ? Mips::SC64 : Mips::SC); BNE = Mips::BNE; BEQ = Mips::BEQ; } ZERO = Mips::ZERO; MOVE = Mips::OR; } else { LL = STI->hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; ZERO = Mips::ZERO_64; BNE = Mips::BNE64; BEQ = Mips::BEQ64; MOVE = Mips::OR64; } unsigned Dest = I->getOperand(0).getReg(); unsigned Ptr = I->getOperand(1).getReg(); unsigned OldVal = I->getOperand(2).getReg(); unsigned NewVal = I->getOperand(3).getReg(); unsigned Scratch = I->getOperand(4).getReg(); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB.getBasicBlock(); MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB.getIterator(); MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), &BB, std::next(MachineBasicBlock::iterator(I)), BB.end()); exitMBB->transferSuccessorsAndUpdatePHIs(&BB); // thisMBB: // ... // fallthrough --> loop1MBB BB.addSuccessor(loop1MBB, BranchProbability::getOne()); loop1MBB->addSuccessor(exitMBB); loop1MBB->addSuccessor(loop2MBB); loop1MBB->normalizeSuccProbs(); loop2MBB->addSuccessor(loop1MBB); loop2MBB->addSuccessor(exitMBB); loop2MBB->normalizeSuccProbs(); // loop1MBB: // ll dest, 0(ptr) // bne dest, oldval, exitMBB BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); BuildMI(loop1MBB, DL, TII->get(BNE)) .addReg(Dest, RegState::Kill).addReg(OldVal).addMBB(exitMBB); // loop2MBB: // move scratch, NewVal // sc Scratch, Scratch, 0(ptr) // beq Scratch, $0, loop1MBB BuildMI(loop2MBB, DL, TII->get(MOVE), Scratch).addReg(NewVal).addReg(ZERO); BuildMI(loop2MBB, DL, TII->get(SC), Scratch) .addReg(Scratch).addReg(Ptr).addImm(0); BuildMI(loop2MBB, DL, TII->get(BEQ)) .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); LivePhysRegs LiveRegs; computeAndAddLiveIns(LiveRegs, *loop1MBB); computeAndAddLiveIns(LiveRegs, *loop2MBB); computeAndAddLiveIns(LiveRegs, *exitMBB); NMBBI = BB.end(); I->eraseFromParent(); return true; }
MachineBasicBlock* MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, MachineBasicBlock *BB) const { MachineFunction *F = BB->getParent(); MachineRegisterInfo &RI = F->getRegInfo(); DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); unsigned Opc; const TargetRegisterClass * RC; switch (MI->getOpcode()) { default: assert(0 && "Invalid shift opcode!"); case MSP430::Shl8: Opc = MSP430::SHL8r1; RC = MSP430::GR8RegisterClass; break; case MSP430::Shl16: Opc = MSP430::SHL16r1; RC = MSP430::GR16RegisterClass; break; case MSP430::Sra8: Opc = MSP430::SAR8r1; RC = MSP430::GR8RegisterClass; break; case MSP430::Sra16: Opc = MSP430::SAR16r1; RC = MSP430::GR16RegisterClass; break; case MSP430::Srl8: Opc = MSP430::SAR8r1c; RC = MSP430::GR8RegisterClass; break; case MSP430::Srl16: Opc = MSP430::SAR16r1c; RC = MSP430::GR16RegisterClass; break; } const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator I = BB; ++I; // Create loop block MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(I, LoopBB); F->insert(I, RemBB); // Update machine-CFG edges by transferring all successors of the current // block to the block containing instructions after shift. RemBB->splice(RemBB->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); RemBB->transferSuccessorsAndUpdatePHIs(BB); // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB BB->addSuccessor(LoopBB); BB->addSuccessor(RemBB); LoopBB->addSuccessor(RemBB); LoopBB->addSuccessor(LoopBB); unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass); unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass); unsigned ShiftReg = RI.createVirtualRegister(RC); unsigned ShiftReg2 = RI.createVirtualRegister(RC); unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg(); unsigned SrcReg = MI->getOperand(1).getReg(); unsigned DstReg = MI->getOperand(0).getReg(); // BB: // cmp 0, N // je RemBB BuildMI(BB, dl, TII.get(MSP430::CMP8ri)) .addReg(ShiftAmtSrcReg).addImm(0); BuildMI(BB, dl, TII.get(MSP430::JCC)) .addMBB(RemBB) .addImm(MSP430CC::COND_E); // LoopBB: // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB] // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB] // ShiftReg2 = shift ShiftReg // ShiftAmt2 = ShiftAmt - 1; BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg) .addReg(SrcReg).addMBB(BB) .addReg(ShiftReg2).addMBB(LoopBB); BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg) .addReg(ShiftAmtSrcReg).addMBB(BB) .addReg(ShiftAmtReg2).addMBB(LoopBB); BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2) .addReg(ShiftReg); BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2) .addReg(ShiftAmtReg).addImm(1); BuildMI(LoopBB, dl, TII.get(MSP430::JCC)) .addMBB(LoopBB) .addImm(MSP430CC::COND_NE); // RemBB: // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg) .addReg(SrcReg).addMBB(BB) .addReg(ShiftReg2).addMBB(LoopBB); MI->eraseFromParent(); // The pseudo instruction is gone now. return RemBB; }
bool MipsExpandPseudo::expandAtomicBinOpSubword( MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &NMBBI) { MachineFunction *MF = BB.getParent(); const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); DebugLoc DL = I->getDebugLoc(); unsigned LL, SC; unsigned BEQ = Mips::BEQ; unsigned SEOp = Mips::SEH; if (STI->inMicroMipsMode()) { LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM; SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM; BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM; } else { LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) : (ArePtrs64bit ? Mips::LL64 : Mips::LL); SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) : (ArePtrs64bit ? Mips::SC64 : Mips::SC); } bool IsSwap = false; bool IsNand = false; unsigned Opcode = 0; switch (I->getOpcode()) { case Mips::ATOMIC_LOAD_NAND_I8_POSTRA: SEOp = Mips::SEB; LLVM_FALLTHROUGH; case Mips::ATOMIC_LOAD_NAND_I16_POSTRA: IsNand = true; break; case Mips::ATOMIC_SWAP_I8_POSTRA: SEOp = Mips::SEB; LLVM_FALLTHROUGH; case Mips::ATOMIC_SWAP_I16_POSTRA: IsSwap = true; break; case Mips::ATOMIC_LOAD_ADD_I8_POSTRA: SEOp = Mips::SEB; LLVM_FALLTHROUGH; case Mips::ATOMIC_LOAD_ADD_I16_POSTRA: Opcode = Mips::ADDu; break; case Mips::ATOMIC_LOAD_SUB_I8_POSTRA: SEOp = Mips::SEB; LLVM_FALLTHROUGH; case Mips::ATOMIC_LOAD_SUB_I16_POSTRA: Opcode = Mips::SUBu; break; case Mips::ATOMIC_LOAD_AND_I8_POSTRA: SEOp = Mips::SEB; LLVM_FALLTHROUGH; case Mips::ATOMIC_LOAD_AND_I16_POSTRA: Opcode = Mips::AND; break; case Mips::ATOMIC_LOAD_OR_I8_POSTRA: SEOp = Mips::SEB; LLVM_FALLTHROUGH; case Mips::ATOMIC_LOAD_OR_I16_POSTRA: Opcode = Mips::OR; break; case Mips::ATOMIC_LOAD_XOR_I8_POSTRA: SEOp = Mips::SEB; LLVM_FALLTHROUGH; case Mips::ATOMIC_LOAD_XOR_I16_POSTRA: Opcode = Mips::XOR; break; default: llvm_unreachable("Unknown subword atomic pseudo for expansion!"); } unsigned Dest = I->getOperand(0).getReg(); unsigned Ptr = I->getOperand(1).getReg(); unsigned Incr = I->getOperand(2).getReg(); unsigned Mask = I->getOperand(3).getReg(); unsigned Mask2 = I->getOperand(4).getReg(); unsigned ShiftAmnt = I->getOperand(5).getReg(); unsigned OldVal = I->getOperand(6).getReg(); unsigned BinOpRes = I->getOperand(7).getReg(); unsigned StoreVal = I->getOperand(8).getReg(); const BasicBlock *LLVM_BB = BB.getBasicBlock(); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB.getIterator(); MF->insert(It, loopMBB); MF->insert(It, sinkMBB); MF->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); exitMBB->transferSuccessorsAndUpdatePHIs(&BB); BB.addSuccessor(loopMBB, BranchProbability::getOne()); loopMBB->addSuccessor(sinkMBB); loopMBB->addSuccessor(loopMBB); loopMBB->normalizeSuccProbs(); BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); if (IsNand) { // and andres, oldval, incr2 // nor binopres, $0, andres // and newval, binopres, mask BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes) .addReg(OldVal) .addReg(Incr); BuildMI(loopMBB, DL, TII->get(Mips::NOR), BinOpRes) .addReg(Mips::ZERO) .addReg(BinOpRes); BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes) .addReg(BinOpRes) .addReg(Mask); } else if (!IsSwap) { // <binop> binopres, oldval, incr2 // and newval, binopres, mask BuildMI(loopMBB, DL, TII->get(Opcode), BinOpRes) .addReg(OldVal) .addReg(Incr); BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes) .addReg(BinOpRes) .addReg(Mask); } else { // atomic.swap // and newval, incr2, mask BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes) .addReg(Incr) .addReg(Mask); } // and StoreVal, OlddVal, Mask2 // or StoreVal, StoreVal, BinOpRes // StoreVal<tied1> = sc StoreVal, 0(Ptr) // beq StoreVal, zero, loopMBB BuildMI(loopMBB, DL, TII->get(Mips::AND), StoreVal) .addReg(OldVal).addReg(Mask2); BuildMI(loopMBB, DL, TII->get(Mips::OR), StoreVal) .addReg(StoreVal).addReg(BinOpRes); BuildMI(loopMBB, DL, TII->get(SC), StoreVal) .addReg(StoreVal).addReg(Ptr).addImm(0); BuildMI(loopMBB, DL, TII->get(BEQ)) .addReg(StoreVal).addReg(Mips::ZERO).addMBB(loopMBB); // sinkMBB: // and maskedoldval1,oldval,mask // srl srlres,maskedoldval1,shiftamt // sign_extend dest,srlres sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); BuildMI(sinkMBB, DL, TII->get(Mips::AND), Dest) .addReg(OldVal).addReg(Mask); BuildMI(sinkMBB, DL, TII->get(Mips::SRLV), Dest) .addReg(Dest).addReg(ShiftAmnt); if (STI->hasMips32r2()) { BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); } else { const unsigned ShiftImm = SEOp == Mips::SEH ? 16 : 24; BuildMI(sinkMBB, DL, TII->get(Mips::SLL), Dest) .addReg(Dest, RegState::Kill) .addImm(ShiftImm); BuildMI(sinkMBB, DL, TII->get(Mips::SRA), Dest) .addReg(Dest, RegState::Kill) .addImm(ShiftImm); } LivePhysRegs LiveRegs; computeAndAddLiveIns(LiveRegs, *loopMBB); computeAndAddLiveIns(LiveRegs, *sinkMBB); computeAndAddLiveIns(LiveRegs, *exitMBB); NMBBI = BB.end(); I->eraseFromParent(); return true; }
bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = MF->getSubtarget().getInstrInfo(); TRI = MF->getSubtarget().getRegisterInfo(); LiveRegs = nullptr; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " << TRI->getRegClassName(RC) << " **********\n"); // If no relevant registers are used in the function, we can skip it // completely. bool anyregs = false; for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); I != E; ++I) if (MF->getRegInfo().isPhysRegUsed(*I)) { anyregs = true; break; } if (!anyregs) return false; // Initialize the AliasMap on the first use. if (AliasMap.empty()) { // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and // therefore the LiveRegs array. AliasMap.resize(TRI->getNumRegs()); for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid(); ++AI) AliasMap[*AI].push_back(i); } MachineBasicBlock *Entry = MF->begin(); ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); SmallVector<MachineBasicBlock*, 16> Loops; for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock *MBB = *MBBI; enterBasicBlock(MBB); if (SeenUnknownBackEdge) Loops.push_back(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) visitInstr(I); processUndefReads(MBB); leaveBasicBlock(MBB); } // Visit all the loop blocks again in order to merge DomainValues from // back-edges. for (unsigned i = 0, e = Loops.size(); i != e; ++i) { MachineBasicBlock *MBB = Loops[i]; enterBasicBlock(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) if (!I->isDebugValue()) processDefs(I, false); processUndefReads(MBB); leaveBasicBlock(MBB); } // Clear the LiveOuts vectors and collapse any remaining DomainValues. for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); if (FI == LiveOuts.end() || !FI->second) continue; for (unsigned i = 0, e = NumRegs; i != e; ++i) if (FI->second[i].Value) release(FI->second[i].Value); delete[] FI->second; } LiveOuts.clear(); UndefReads.clear(); Avail.clear(); Allocator.DestroyAll(); return false; }
bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (LastOpc == AArch64::Bimm) { TBB = LastInst->getOperand(0).getMBB(); return false; } if (isCondBranch(LastOpc)) { classifyCondBranch(LastInst, TBB, Cond); return false; } return true; // Can't handle indirect branch. } // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; unsigned SecondLastOpc = SecondLastInst->getOpcode(); // If AllowModify is true and the block ends with two or more unconditional // branches, delete all but the first unconditional branch. if (AllowModify && LastOpc == AArch64::Bimm) { while (SecondLastOpc == AArch64::Bimm) { LastInst->eraseFromParent(); LastInst = SecondLastInst; LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { // Return now the only terminator is an unconditional branch. TBB = LastInst->getOperand(0).getMBB(); return false; } else { SecondLastInst = I; SecondLastOpc = SecondLastInst->getOpcode(); } } } // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with a B and a Bcc, handle it. if (LastOpc == AArch64::Bimm) { if (SecondLastOpc == AArch64::Bcc) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } else if (isCondBranch(SecondLastOpc)) { classifyCondBranch(SecondLastInst, TBB, Cond); FBB = LastInst->getOperand(0).getMBB(); return false; } } // If the block ends with two unconditional branches, handle it. The second // one is not executed, so remove it. if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }