bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, const CallContext &Context) { // Ok, we can in fact do the transformation for this call. // Do not remove the FrameSetup instruction, but adjust the parameters. // PEI will end up finalizing the handling of this. MachineBasicBlock::iterator FrameSetup = Context.FrameSetup; MachineBasicBlock &MBB = *(FrameSetup->getParent()); FrameSetup->getOperand(1).setImm(Context.ExpectedDist); DebugLoc DL = FrameSetup->getDebugLoc(); // Now, iterate through the vector in reverse order, and replace the movs // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to // replace uses. for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator MOV = *Context.MovVector[Idx]; MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); MachineBasicBlock::iterator Push = nullptr; if (MOV->getOpcode() == X86::MOV32mi) { unsigned PushOpcode = X86::PUSHi32; // If the operand is a small (8-bit) immediate, we can use a // PUSH instruction with a shorter encoding. // Note that isImm() may fail even though this is a MOVmi, because // the operand can also be a symbol. if (PushOp.isImm()) { int64_t Val = PushOp.getImm(); if (isInt<8>(Val)) PushOpcode = X86::PUSH32i8; } Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)) .addOperand(PushOp); } else { unsigned int Reg = PushOp.getReg(); // If PUSHrmm is not slow on this target, try to fold the source of the // push into the instruction. bool SlowPUSHrmm = STI->isAtom() || STI->isSLM(); // Check that this is legal to fold. Right now, we're extremely // conservative about that. MachineInstr *DefMov = nullptr; if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm)); unsigned NumOps = DefMov->getDesc().getNumOperands(); for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) Push->addOperand(DefMov->getOperand(i)); DefMov->eraseFromParent(); } else { Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r)) .addReg(Reg) .getInstr(); } } // For debugging, when using SP-based CFA, we need to adjust the CFA // offset after each push. // TODO: This is needed only if we require precise CFA. if (!TFL->hasFP(MF)) TFL->BuildCFI(MBB, std::next(Push), DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, 4)); MBB.erase(MOV); } // The stack-pointer copy is no longer used in the call sequences. // There should not be any other users, but we can't commit to that, so: if (MRI->use_empty(Context.SPCopy->getOperand(0).getReg())) Context.SPCopy->eraseFromParent(); // Once we've done this, we need to make sure PEI doesn't assume a reserved // frame. X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); FuncInfo->setHasPushSequences(true); return true; }
/// findSurvivorReg - Return the candidate register that is unused for the /// longest after StargMII. UseMI is set to the instruction where the search /// stopped. /// /// No more than InstrLimit instructions are inspected. /// unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, unsigned InstrLimit, MachineBasicBlock::iterator &UseMI) { int Survivor = Candidates.find_first(); assert(Survivor > 0 && "No candidates for scavenging"); MachineBasicBlock::iterator ME = MBB->getFirstTerminator(); assert(StartMI != ME && "MI already at terminator"); MachineBasicBlock::iterator RestorePointMI = StartMI; MachineBasicBlock::iterator MI = StartMI; bool inVirtLiveRange = false; for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { if (MI->isDebugValue()) { ++InstrLimit; // Don't count debug instructions continue; } bool isVirtKillInsn = false; bool isVirtDefInsn = false; // Remove any candidates touched by instruction. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isRegMask()) Candidates.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg() || MO.isUndef() || !MO.getReg()) continue; if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { if (MO.isDef()) isVirtDefInsn = true; else if (MO.isKill()) isVirtKillInsn = true; continue; } for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) Candidates.reset(*AI); } // If we're not in a virtual reg's live range, this is a valid // restore point. if (!inVirtLiveRange) RestorePointMI = MI; // Update whether we're in the live range of a virtual register if (isVirtKillInsn) inVirtLiveRange = false; if (isVirtDefInsn) inVirtLiveRange = true; // Was our survivor untouched by this instruction? if (Candidates.test(Survivor)) continue; // All candidates gone? if (Candidates.none()) break; Survivor = Candidates.find_first(); } // If we ran off the end, that's where we want to restore. if (MI == ME) RestorePointMI = ME; assert (RestorePointMI != StartMI && "No available scavenger restore location!"); // We ran out of candidates, so stop the search. UseMI = RestorePointMI; return Survivor; }
X86CallFrameOptimization::InstClassification X86CallFrameOptimization::classifyInstruction( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const X86RegisterInfo &RegInfo, DenseSet<unsigned int> &UsedRegs) { if (MI == MBB.end()) return Exit; // The instructions we actually care about are movs onto the stack or special // cases of constant-stores to stack switch (MI->getOpcode()) { case X86::AND16mi8: case X86::AND32mi8: case X86::AND64mi8: { MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands); return ImmOp.getImm() == 0 ? Convert : Exit; } case X86::OR16mi8: case X86::OR32mi8: case X86::OR64mi8: { MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands); return ImmOp.getImm() == -1 ? Convert : Exit; } case X86::MOV32mi: case X86::MOV32mr: case X86::MOV64mi32: case X86::MOV64mr: return Convert; } // Not all calling conventions have only stack MOVs between the stack // adjust and the call. // We want to tolerate other instructions, to cover more cases. // In particular: // a) PCrel calls, where we expect an additional COPY of the basereg. // b) Passing frame-index addresses. // c) Calling conventions that have inreg parameters. These generate // both copies and movs into registers. // To avoid creating lots of special cases, allow any instruction // that does not write into memory, does not def or use the stack // pointer, and does not def any register that was used by a preceding // push. // (Reading from memory is allowed, even if referenced through a // frame index, since these will get adjusted properly in PEI) // The reason for the last condition is that the pushes can't replace // the movs in place, because the order must be reversed. // So if we have a MOV32mr that uses EDX, then an instruction that defs // EDX, and then the call, after the transformation the push will use // the modified version of EDX, and not the original one. // Since we are still in SSA form at this point, we only need to // make sure we don't clobber any *physical* registers that were // used by an earlier mov that will become a push. if (MI->isCall() || MI->mayStore()) return Exit; for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; unsigned int Reg = MO.getReg(); if (!RegInfo.isPhysicalRegister(Reg)) continue; if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister())) return Exit; if (MO.isDef()) { for (unsigned int U : UsedRegs) if (RegInfo.regsOverlap(Reg, U)) return Exit; } } return Skip; }
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { AArch64MachineFunctionInfo *FuncInfo = MF.getInfo<AArch64MachineFunctionInfo>(); MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MachineModuleInfo &MMI = MF.getMMI(); std::vector<MachineMove> &Moves = MMI.getFrameMoves(); bool NeedsFrameMoves = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); uint64_t NumInitialBytes, NumResidualBytes; // Currently we expect the stack to be laid out by // sub sp, sp, #initial // stp x29, x30, [sp, #offset] // ... // str xxx, [sp, #offset] // sub sp, sp, #rest (possibly via extra instructions). if (MFI->getCalleeSavedInfo().size()) { // If there are callee-saved registers, we want to store them efficiently as // a block, and virtual base assignment happens too early to do it for us so // we adjust the stack in two phases: first just for callee-saved fiddling, // then to allocate the rest of the frame. splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes); } else { // If there aren't any callee-saved registers, two-phase adjustment is // inefficient. It's more efficient to adjust with NumInitialBytes too // because when we're in a "callee pops argument space" situation, that pop // must be tacked onto Initial for correctness. NumInitialBytes = MFI->getStackSize(); NumResidualBytes = 0; } // Tell everyone else how much adjustment we're expecting them to use. In // particular if an adjustment is required for a tail call the epilogue could // have a different view of things. FuncInfo->setInitialStackAdjust(NumInitialBytes); emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes, MachineInstr::FrameSetup); if (NeedsFrameMoves && NumInitialBytes) { // We emit this update even if the CFA is set from a frame pointer later so // that the CFA is valid in the interim. MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(SPLabel); MachineLocation Dst(MachineLocation::VirtualFP); MachineLocation Src(AArch64::XSP, NumInitialBytes); Moves.push_back(MachineMove(SPLabel, Dst, Src)); } // Otherwise we need to set the frame pointer and/or add a second stack // adjustment. bool FPNeedsSetting = hasFP(MF); for (; MBBI != MBB.end(); ++MBBI) { // Note that this search makes strong assumptions about the operation used // to store the frame-pointer: it must be "STP x29, x30, ...". This could // change in future, but until then there's no point in implementing // untestable more generic cases. if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR && MBBI->getOperand(0).getReg() == AArch64::X29) { int64_t X29FrameIdx = MBBI->getOperand(2).getIndex(); FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx)); ++MBBI; emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP, AArch64::X29, NumInitialBytes + MFI->getObjectOffset(X29FrameIdx), MachineInstr::FrameSetup); // The offset adjustment used when emitting debugging locations relative // to whatever frame base is set. AArch64 uses the default frame base (FP // or SP) and this adjusts the calculations to be correct. MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx) - MFI->getStackSize()); if (NeedsFrameMoves) { MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(FPLabel); MachineLocation Dst(MachineLocation::VirtualFP); MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx)); Moves.push_back(MachineMove(FPLabel, Dst, Src)); } FPNeedsSetting = false; } if (!MBBI->getFlag(MachineInstr::FrameSetup)) break; } assert(!FPNeedsSetting && "Frame pointer couldn't be set"); emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes, MachineInstr::FrameSetup); // Now we emit the rest of the frame setup information, if necessary: we've // already noted the FP and initial SP moves so we're left with the prologue's // final SP update and callee-saved register locations. if (!NeedsFrameMoves) return; // Reuse the label if appropriate, so create it in this outer scope. MCSymbol *CSLabel = 0; // The rest of the stack adjustment if (!hasFP(MF) && NumResidualBytes) { CSLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(CSLabel); MachineLocation Dst(MachineLocation::VirtualFP); MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes); Moves.push_back(MachineMove(CSLabel, Dst, Src)); } // And any callee-saved registers (it's fine to leave them to the end here, // because the old values are still valid at this point. const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { if (!CSLabel) { CSLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(CSLabel); } for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { MachineLocation Dst(MachineLocation::VirtualFP, MFI->getObjectOffset(I->getFrameIdx())); MachineLocation Src(I->getReg()); Moves.push_back(MachineMove(CSLabel, Dst, Src)); } } }
void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const Counters &Increment) { // Get the hardware counter increments and sum them up Counters Limit = ZeroCounts; unsigned Sum = 0; if (TII->mayAccessFlatAddressSpace(*I)) IsFlatOutstanding = true; for (unsigned i = 0; i < 3; ++i) { LastIssued.Array[i] += Increment.Array[i]; if (Increment.Array[i]) Limit.Array[i] = LastIssued.Array[i]; Sum += Increment.Array[i]; } // If we don't increase anything then that's it if (Sum == 0) { LastOpcodeType = OTHER; return; } if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM // or SMEM clause, respectively. // // The temporary workaround is to break the clauses with S_NOP. // // The proper solution would be to allocate registers such that all source // and destination registers don't overlap, e.g. this is illegal: // r0 = load r2 // r2 = load r0 if (LastOpcodeType == VMEM && Increment.Named.VM) { // Insert a NOP to break the clause. BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)) .addImm(0); LastInstWritesM0 = false; } if (TII->isSMRD(*I)) LastOpcodeType = SMEM; else if (Increment.Named.VM) LastOpcodeType = VMEM; } // Remember which export instructions we have seen if (Increment.Named.EXP) { ExpInstrTypesSeen |= TII->isEXP(*I) ? 1 : 2; } for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &Op = I->getOperand(i); if (!isOpRelevant(Op)) continue; const TargetRegisterClass *RC = TII->getOpRegClass(*I, i); RegInterval Interval = getRegInterval(RC, Op); for (unsigned j = Interval.first; j < Interval.second; ++j) { // Remember which registers we define if (Op.isDef()) DefinedRegs[j] = Limit; // and which one we are using if (Op.isUse()) UsedRegs[j] = Limit; } } }
// Finds compare instruction that corresponds to supported types of branching. // Returns the instruction or nullptr on failures or detecting unsupported // instructions. MachineInstr *AArch64ConditionOptimizer::findSuitableCompare( MachineBasicBlock *MBB) { MachineBasicBlock::iterator I = MBB->getFirstTerminator(); if (I == MBB->end()) return nullptr; if (I->getOpcode() != AArch64::Bcc) return nullptr; // Since we may modify cmp of this MBB, make sure NZCV does not live out. for (auto SuccBB : MBB->successors()) if (SuccBB->isLiveIn(AArch64::NZCV)) return nullptr; // Now find the instruction controlling the terminator. for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) { --I; assert(!I->isTerminator() && "Spurious terminator"); // Check if there is any use of NZCV between CMP and Bcc. if (I->readsRegister(AArch64::NZCV)) return nullptr; switch (I->getOpcode()) { // cmp is an alias for subs with a dead destination register. case AArch64::SUBSWri: case AArch64::SUBSXri: // cmn is an alias for adds with a dead destination register. case AArch64::ADDSWri: case AArch64::ADDSXri: { unsigned ShiftAmt = AArch64_AM::getShiftValue(I->getOperand(3).getImm()); if (!I->getOperand(2).isImm()) { DEBUG(dbgs() << "Immediate of cmp is symbolic, " << *I << '\n'); return nullptr; } else if (I->getOperand(2).getImm() << ShiftAmt >= 0xfff) { DEBUG(dbgs() << "Immediate of cmp may be out of range, " << *I << '\n'); return nullptr; } else if (!MRI->use_empty(I->getOperand(0).getReg())) { DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n'); return nullptr; } return I; } // Prevent false positive case like: // cmp w19, #0 // cinc w0, w19, gt // ... // fcmp d8, #0.0 // b.gt .LBB0_5 case AArch64::FCMPDri: case AArch64::FCMPSri: case AArch64::FCMPESri: case AArch64::FCMPEDri: case AArch64::SUBSWrr: case AArch64::SUBSXrr: case AArch64::ADDSWrr: case AArch64::ADDSXrr: case AArch64::FCMPSrr: case AArch64::FCMPDrr: case AArch64::FCMPESrr: case AArch64::FCMPEDrr: // Skip comparison instructions without immediate operands. return nullptr; } } DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n'); return nullptr; }
static void analyzeFrameIndexes(MachineFunction &MF) { if (MBDisableStackAdjust) return; MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineRegisterInfo::livein_iterator LII = MRI.livein_begin(); MachineRegisterInfo::livein_iterator LIE = MRI.livein_end(); const SmallVector<int, 16> &LiveInFI = MBlazeFI->getLiveIn(); SmallVector<MachineInstr*, 16> EraseInstr; SmallVector<std::pair<int,int64_t>, 16> FrameRelocate; MachineBasicBlock *MBB = MF.getBlockNumbered(0); MachineBasicBlock::iterator MIB = MBB->begin(); MachineBasicBlock::iterator MIE = MBB->end(); int StackAdjust = 0; int StackOffset = -28; // In this loop we are searching frame indexes that corrospond to incoming // arguments that are already in the stack. We look for instruction sequences // like the following: // // LWI REG, FI1, 0 // ... // SWI REG, FI2, 0 // // As long as there are no defs of REG in the ... part, we can eliminate // the SWI instruction because the value has already been stored to the // stack by the caller. All we need to do is locate FI at the correct // stack location according to the calling convensions. // // Additionally, if the SWI operation kills the def of REG then we don't // need the LWI operation so we can erase it as well. for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i) { for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) { if (I->getOpcode() != MBlaze::LWI || I->getNumOperands() != 3 || !I->getOperand(1).isFI() || !I->getOperand(0).isReg() || I->getOperand(1).getIndex() != LiveInFI[i]) continue; unsigned FIReg = I->getOperand(0).getReg(); MachineBasicBlock::iterator SI = I; for (SI++; SI != MIE; ++SI) { if (!SI->getOperand(0).isReg() || !SI->getOperand(1).isFI() || SI->getOpcode() != MBlaze::SWI) continue; int FI = SI->getOperand(1).getIndex(); if (SI->getOperand(0).getReg() != FIReg || MFI->isFixedObjectIndex(FI) || MFI->getObjectSize(FI) != 4) continue; if (SI->getOperand(0).isDef()) break; if (SI->getOperand(0).isKill()) { DEBUG(dbgs() << "LWI for FI#" << I->getOperand(1).getIndex() << " removed\n"); EraseInstr.push_back(I); } EraseInstr.push_back(SI); DEBUG(dbgs() << "SWI for FI#" << FI << " removed\n"); FrameRelocate.push_back(std::make_pair(FI,StackOffset)); DEBUG(dbgs() << "FI#" << FI << " relocated to " << StackOffset << "\n"); StackOffset -= 4; StackAdjust += 4; break; } } } // In this loop we are searching for frame indexes that corrospond to // incoming arguments that are in registers. We look for instruction // sequences like the following: // // ... SWI REG, FI, 0 // // As long as the ... part does not define REG and if REG is an incoming // parameter register then we know that, according to ABI convensions, the // caller has allocated stack space for it already. Instead of allocating // stack space on our frame, we record the correct location in the callers // frame. for (MachineRegisterInfo::livein_iterator LI = LII; LI != LIE; ++LI) { for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) { if (I->definesRegister(LI->first)) break; if (I->getOpcode() != MBlaze::SWI || I->getNumOperands() != 3 || !I->getOperand(1).isFI() || !I->getOperand(0).isReg() || I->getOperand(1).getIndex() < 0) continue; if (I->getOperand(0).getReg() == LI->first) { int FI = I->getOperand(1).getIndex(); MBlazeFI->recordLiveIn(FI); int FILoc = 0; switch (LI->first) { default: llvm_unreachable("invalid incoming parameter!"); case MBlaze::R5: FILoc = -4; break; case MBlaze::R6: FILoc = -8; break; case MBlaze::R7: FILoc = -12; break; case MBlaze::R8: FILoc = -16; break; case MBlaze::R9: FILoc = -20; break; case MBlaze::R10: FILoc = -24; break; } StackAdjust += 4; FrameRelocate.push_back(std::make_pair(FI,FILoc)); DEBUG(dbgs() << "FI#" << FI << " relocated to " << FILoc << "\n"); break; } } } // Go ahead and erase all of the instructions that we determined were // no longer needed. for (int i = 0, e = EraseInstr.size(); i < e; ++i) MBB->erase(EraseInstr[i]); // Replace all of the frame indexes that we have relocated with new // fixed object frame indexes. replaceFrameIndexes(MF, FrameRelocate); }
MachineBasicBlock * MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { MachineFunction *MF = getParent(); DebugLoc dl; // FIXME: this is nowhere // We may need to update this's terminator, but we can't do that if // AnalyzeBranch fails. If this uses a jump table, we won't touch it. const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) return NULL; // Avoid bugpoint weirdness: A block may end with a conditional branch but // jumps to the same MBB is either case. We have duplicate CFG edges in that // case that we can't handle. Since this never happens in properly optimized // code, just skip those edges. if (TBB && TBB == FBB) { DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" << getNumber() << '\n'); return NULL; } MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB); DEBUG(dbgs() << "Splitting critical edge:" " BB#" << getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); // On some targets like Mips, branches may kill virtual registers. Make sure // that LiveVariables is properly updated after updateTerminator replaces the // terminators. LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>(); // Collect a list of virtual registers killed by the terminators. SmallVector<unsigned, 4> KilledRegs; if (LV) for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) { MachineInstr *MI = I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef()) continue; unsigned Reg = OI->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg) && LV->getVarInfo(Reg).removeKill(MI)) { KilledRegs.push_back(Reg); DEBUG(dbgs() << "Removing terminator kill: " << *MI); OI->setIsKill(false); } } } ReplaceUsesOfBlockWith(Succ, NMBB); updateTerminator(); // Insert unconditional "jump Succ" instruction in NMBB if necessary. NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl); } // Fix PHI nodes in Succ so they refer to NMBB instead of this for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end(); i != e && i->isPHI(); ++i) for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) if (i->getOperand(ni+1).getMBB() == this) i->getOperand(ni+1).setMBB(NMBB); // Inherit live-ins from the successor for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(), E = Succ->livein_end(); I != E; ++I) NMBB->addLiveIn(*I); // Update LiveVariables. if (LV) { // Restore kills of virtual registers that were killed by the terminators. while (!KilledRegs.empty()) { unsigned Reg = KilledRegs.pop_back_val(); for (iterator I = end(), E = begin(); I != E;) { if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false)) continue; LV->getVarInfo(Reg).Kills.push_back(I); DEBUG(dbgs() << "Restored terminator kill: " << *I); break; } } // Update relevant live-through information. LV->addNewBlock(NMBB, this, Succ); } if (MachineDominatorTree *MDT = P->getAnalysisIfAvailable<MachineDominatorTree>()) { // Update dominator information. MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ); bool IsNewIDom = true; for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end(); PI != E; ++PI) { MachineBasicBlock *PredBB = *PI; if (PredBB == NMBB) continue; if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) { IsNewIDom = false; break; } } // We know "this" dominates the newly created basic block. MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this); // If all the other predecessors of "Succ" are dominated by "Succ" itself // then the new block is the new immediate dominator of "Succ". Otherwise, // the new block doesn't dominate anything. if (IsNewIDom) MDT->changeImmediateDominator(SucccDTNode, NewDTNode); } if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) if (MachineLoop *TIL = MLI->getLoopFor(this)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { if (TIL == DestLoop) { // Both in the same loop, the NMBB joins loop. DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == Succ && "Should not create irreducible loops!"); if (MachineLoop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NMBB, MLI->getBase()); } } } return NMBB; }
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); DebugLoc DL = MBB.findDebugLoc(MBBI); int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (NumBytes && !canUseRedZone(MF)) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else if (NumBytes) { ++NumRedZoneFunctions; } return; } // Only set up FP if we actually need to. int FPOffset = 0; if (HasFP) { // First instruction must a) allocate the stack and b) have an immediate // that is a multiple of -2. assert((MBBI->getOpcode() == AArch64::STPXpre || MBBI->getOpcode() == AArch64::STPDpre) && MBBI->getOperand(3).getReg() == AArch64::SP && MBBI->getOperand(4).getImm() < 0 && (MBBI->getOperand(4).getImm() & 1) == 0); // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space // required for the callee saved register area we get the frame pointer // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8; assert(FPOffset >= 0 && "Bad Framepointer Offset"); } // Move past the saves of the callee-saved registers. while (MBBI->getOpcode() == AArch64::STPXi || MBBI->getOpcode() == AArch64::STPDi || MBBI->getOpcode() == AArch64::STPXpre || MBBI->getOpcode() == AArch64::STPDpre) { ++MBBI; NumBytes -= 16; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (HasFP) { // Issue sub fp, sp, FPOffset or // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, MachineInstr::FrameSetup); } // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. if (NumBytes) { // If we're a leaf function, try using the red zone. if (!canUseRedZone(MF)) emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); } // If we need a base pointer, set it up here. It's whatever the value of the // stack pointer is at this point. Any variable size objects will be allocated // after this, so we can still use the base pointer to reference locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. // if (RegInfo->hasBasePointer(MF)) TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); if (needsFrameMoves) { const DataLayout *TD = MF.getSubtarget().getDataLayout(); const int StackGrowth = -TD->getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 if (HasFP) { // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // Record the location of the stored LR unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // Record the location of the stored FP CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else { // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } // Now emit the moves for whatever callee saved regs we have. emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr); } }
void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineBasicBlock::iterator mi, SlotIndex MIIdx, MachineOperand& MO, unsigned MOIdx, LiveInterval &interval) { DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); // Virtual registers may be defined multiple times (due to phi // elimination and 2-addr elimination). Much of what we do only has to be // done once for the vreg. We use an empty interval to detect the first // time we see a vreg. LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); if (interval.empty()) { // Get the Idx of the defining instructions. SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); // Make sure the first definition is not a partial redefinition. Add an // <imp-def> of the full register. // FIXME: LiveIntervals shouldn't modify the code like this. Whoever // created the machine instruction should annotate it with <undef> flags // as needed. Then we can simply assert here. The REG_SEQUENCE lowering // is the main suspect. if (MO.getSubReg()) { mi->addRegisterDefined(interval.reg); // Mark all defs of interval.reg on this instruction as reading <undef>. for (unsigned i = MOIdx, e = mi->getNumOperands(); i != e; ++i) { MachineOperand &MO2 = mi->getOperand(i); if (MO2.isReg() && MO2.getReg() == interval.reg && MO2.getSubReg()) MO2.setIsUndef(); } } MachineInstr *CopyMI = NULL; if (mi->isCopyLike()) { CopyMI = mi; } VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); assert(ValNo->id == 0 && "First value in interval is not 0?"); // Loop over all of the blocks that the vreg is defined in. There are // two cases we have to handle here. The most common case is a vreg // whose lifetime is contained within a basic block. In this case there // will be a single kill, in MBB, which comes after the definition. if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) { // FIXME: what about dead vars? SlotIndex killIdx; if (vi.Kills[0] != mi) killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot(); else killIdx = defIndex.getDeadSlot(); // If the kill happens after the definition, we have an intra-block // live range. if (killIdx > defIndex) { assert(vi.AliveBlocks.empty() && "Shouldn't be alive across any blocks!"); LiveRange LR(defIndex, killIdx, ValNo); interval.addRange(LR); DEBUG(dbgs() << " +" << LR << "\n"); return; } } // The other case we handle is when a virtual register lives to the end // of the defining block, potentially live across some blocks, then is // live into some number of blocks, but gets killed. Start by adding a // range that goes from this definition to the end of the defining block. LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo); DEBUG(dbgs() << " +" << NewLR); interval.addRange(NewLR); bool PHIJoin = lv_->isPHIJoin(interval.reg); if (PHIJoin) { // A phi join register is killed at the end of the MBB and revived as a new // valno in the killing blocks. assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); DEBUG(dbgs() << " phi-join"); ValNo->setHasPHIKill(true); } else { // Iterate over all of the blocks that the variable is completely // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the // live interval. for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), E = vi.AliveBlocks.end(); I != E; ++I) { MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I); LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo); interval.addRange(LR); DEBUG(dbgs() << " +" << LR); } } // Finally, this virtual register is live from the start of any killing // block to the 'use' slot of the killing instruction. for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { MachineInstr *Kill = vi.Kills[i]; SlotIndex Start = getMBBStartIdx(Kill->getParent()); SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot(); // Create interval with one of a NEW value number. Note that this value // number isn't actually defined by an instruction, weird huh? :) if (PHIJoin) { assert(getInstructionFromIndex(Start) == 0 && "PHI def index points at actual instruction."); ValNo = interval.getNextValue(Start, 0, VNInfoAllocator); ValNo->setIsPHIDef(true); } LiveRange LR(Start, killIdx, ValNo); interval.addRange(LR); DEBUG(dbgs() << " +" << LR); } } else { if (MultipleDefsBySameMI(*mi, MOIdx)) // Multiple defs of the same virtual register by the same instruction. // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ... // This is likely due to elimination of REG_SEQUENCE instructions. Return // here since there is nothing to do. return; // If this is the second time we see a virtual register definition, it // must be due to phi elimination or two addr elimination. If this is // the result of two address elimination, then the vreg is one of the // def-and-use register operand. // It may also be partial redef like this: // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0 // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0 bool PartReDef = isPartialRedef(MIIdx, MO, interval); if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) { // If this is a two-address definition, then we have already processed // the live range. The only problem is that we didn't realize there // are actually two values in the live interval. Because of this we // need to take the LiveRegion that defines this register and split it // into two values. SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); VNInfo *OldValNo = OldLR->valno; SlotIndex DefIndex = OldValNo->def.getRegSlot(); // Delete the previous value, which should be short and continuous, // because the 2-addr copy must be in the same MBB as the redef. interval.removeRange(DefIndex, RedefIndex); // The new value number (#1) is defined by the instruction we claimed // defined value #0. VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator); // Value#0 is now defined by the 2-addr instruction. OldValNo->def = RedefIndex; OldValNo->setCopy(0); // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ... if (PartReDef && mi->isCopyLike()) OldValNo->setCopy(&*mi); // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); DEBUG(dbgs() << " replace range with " << LR); interval.addRange(LR); // If this redefinition is dead, we need to add a dummy unit live // range covering the def slot. if (MO.isDead()) interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(), OldValNo)); DEBUG({ dbgs() << " RESULT: "; interval.print(dbgs(), tri_); }); } else if (lv_->isPHIJoin(interval.reg)) {
MachineInstrBuilder MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, MachineBasicBlock::iterator I) const { MachineInstrBuilder MIB; // Certain branches have two forms: e.g beq $1, $zero, dest vs beqz $1, dest // Pick the zero form of the branch for readable assembly and for greater // branch distance in non-microMIPS mode. // Additional MIPSR6 does not permit the use of register $zero for compact // branches. // FIXME: Certain atomic sequences on mips64 generate 32bit references to // Mips::ZERO, which is incorrect. This test should be updated to use // Subtarget.getABI().GetZeroReg() when those atomic sequences and others // are fixed. int ZeroOperandPosition = -1; bool BranchWithZeroOperand = false; if (I->isBranch() && !I->isPseudo()) { auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo(); ZeroOperandPosition = I->findRegisterUseOperandIdx(Mips::ZERO, false, TRI); BranchWithZeroOperand = ZeroOperandPosition != -1; } if (BranchWithZeroOperand) { switch (NewOpc) { case Mips::BEQC: NewOpc = Mips::BEQZC; break; case Mips::BNEC: NewOpc = Mips::BNEZC; break; case Mips::BGEC: NewOpc = Mips::BGEZC; break; case Mips::BLTC: NewOpc = Mips::BLTZC; break; case Mips::BEQC64: NewOpc = Mips::BEQZC64; break; case Mips::BNEC64: NewOpc = Mips::BNEZC64; break; } } MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an // immediate 0 as an operand and requires the removal of it's %RA<imp-def> // implicit operand as copying the implicit operations of the instructio we're // looking at will give us the correct flags. if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 || NewOpc == Mips::JIALC64) { if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64) MIB->RemoveOperand(0); for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.add(I->getOperand(J)); } MIB.addImm(0); } else { for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J) continue; MIB.add(I->getOperand(J)); } } MIB.copyImplicitOps(*I); MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end()); return MIB; }
/// ComputeLocalLiveness - Computes liveness of registers within a basic /// block, setting the killed/dead flags as appropriate. void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // Keep track of the most recently seen previous use or def of each reg, // so that we can update them with dead/kill markers. DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { if (I->isDebugValue()) continue; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &MO = I->getOperand(i); // Uses don't trigger any flags, but we need to save // them for later. Also, we have to process these // _before_ processing the defs, since an instr // uses regs before it defs them. if (!MO.isReg() || !MO.getReg() || !MO.isUse()) continue; LastUseDef[MO.getReg()] = std::make_pair(I, i); if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; const unsigned *Aliases = TRI->getAliasSet(MO.getReg()); if (Aliases == 0) continue; while (*Aliases) { DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator alias = LastUseDef.find(*Aliases); if (alias != LastUseDef.end() && alias->second.first != I) LastUseDef[*Aliases] = std::make_pair(I, i); ++Aliases; } } for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &MO = I->getOperand(i); // Defs others than 2-addr redefs _do_ trigger flag changes: // - A def followed by a def is dead // - A use followed by a def is a kill if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator last = LastUseDef.find(MO.getReg()); if (last != LastUseDef.end()) { // Check if this is a two address instruction. If so, then // the def does not kill the use. if (last->second.first == I && I->isRegTiedToUseOperand(i)) continue; MachineOperand &lastUD = last->second.first->getOperand(last->second.second); if (lastUD.isDef()) lastUD.setIsDead(true); else lastUD.setIsKill(true); } LastUseDef[MO.getReg()] = std::make_pair(I, i); } } // Live-out (of the function) registers contain return values of the function, // so we need to make sure they are alive at return time. MachineBasicBlock::iterator Ret = MBB.getFirstTerminator(); bool BBEndsInReturn = (Ret != MBB.end() && Ret->getDesc().isReturn()); if (BBEndsInReturn) for (MachineRegisterInfo::liveout_iterator I = MF->getRegInfo().liveout_begin(), E = MF->getRegInfo().liveout_end(); I != E; ++I) if (!Ret->readsRegister(*I)) { Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1); } // Finally, loop over the final use/def of each reg // in the block and determine if it is dead. for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) { MachineInstr *MI = I->second.first; unsigned idx = I->second.second; MachineOperand &MO = MI->getOperand(idx); bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg()); // A crude approximation of "live-out" calculation bool usedOutsideBlock = isPhysReg ? false : UsedInMultipleBlocks.test(MO.getReg() - TargetRegisterInfo::FirstVirtualRegister); // If the machine BB ends in a return instruction, then the value isn't used // outside of the BB. if (!isPhysReg && (!usedOutsideBlock || BBEndsInReturn)) { // DBG_VALUE complicates this: if the only refs of a register outside // this block are DBG_VALUE, we can't keep the reg live just for that, // as it will cause the reg to be spilled at the end of this block when // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that // happens. bool UsedByDebugValueOnly = false; for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), UE = MRI.reg_end(); UI != UE; ++UI) { // Two cases: // - used in another block // - used in the same block before it is defined (loop) if (UI->getParent() == &MBB && !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) continue; if (UI->isDebugValue()) { UsedByDebugValueOnly = true; continue; } // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone. UsedInMultipleBlocks.set(MO.getReg() - TargetRegisterInfo::FirstVirtualRegister); usedOutsideBlock = true; UsedByDebugValueOnly = false; break; } if (UsedByDebugValueOnly) for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), UE = MRI.reg_end(); UI != UE; ++UI) if (UI->isDebugValue() && (UI->getParent() != &MBB || (MO.isDef() && precedes(&*UI, MI)))) UI.getOperand().setReg(0U); } // Physical registers and those that are not live-out of the block are // killed/dead at their last use/def within this block. if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) { if (MO.isUse()) { // Don't mark uses that are tied to defs as kills. if (!MI->isRegTiedToDefOperand(idx)) MO.setIsKill(true); } else { MO.setIsDead(true); } } } }
// Try to fuse comparison instruction Compare into a later branch. // Return true on success and if Compare is therefore redundant. bool SystemZElimCompare::fuseCompareOperations( MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { // See whether we have a single branch with which to fuse. if (CCUsers.size() != 1) return false; MachineInstr *Branch = CCUsers[0]; SystemZII::FusedCompareType Type; switch (Branch->getOpcode()) { case SystemZ::BRC: Type = SystemZII::CompareAndBranch; break; case SystemZ::CondReturn: Type = SystemZII::CompareAndReturn; break; case SystemZ::CallBCR: Type = SystemZII::CompareAndSibcall; break; case SystemZ::CondTrap: Type = SystemZII::CompareAndTrap; break; default: return false; } // See whether we have a comparison that can be fused. unsigned FusedOpcode = TII->getFusedCompare(Compare.getOpcode(), Type, &Compare); if (!FusedOpcode) return false; // Make sure that the operands are available at the branch. // SrcReg2 is the register if the source operand is a register, // 0 if the source operand is immediate, and the base register // if the source operand is memory (index is not supported). unsigned SrcReg = Compare.getOperand(0).getReg(); unsigned SrcReg2 = Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0; MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; for (++MBBI; MBBI != MBBE; ++MBBI) if (MBBI->modifiesRegister(SrcReg, TRI) || (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI))) return false; // Read the branch mask, target (if applicable), regmask (if applicable). MachineOperand CCMask(MBBI->getOperand(1)); assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && "Invalid condition-code mask for integer comparison"); // This is only valid for CompareAndBranch. MachineOperand Target(MBBI->getOperand( Type == SystemZII::CompareAndBranch ? 2 : 0)); const uint32_t *RegMask; if (Type == SystemZII::CompareAndSibcall) RegMask = MBBI->getOperand(2).getRegMask(); // Clear out all current operands. int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI); assert(CCUse >= 0 && "BRC/BCR must use CC"); Branch->RemoveOperand(CCUse); // Remove target (branch) or regmask (sibcall). if (Type == SystemZII::CompareAndBranch || Type == SystemZII::CompareAndSibcall) Branch->RemoveOperand(2); Branch->RemoveOperand(1); Branch->RemoveOperand(0); // Rebuild Branch as a fused compare and branch. // SrcNOps is the number of MI operands of the compare instruction // that we need to copy over. unsigned SrcNOps = 2; if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT) SrcNOps = 3; Branch->setDesc(TII->get(FusedOpcode)); MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); for (unsigned I = 0; I < SrcNOps; I++) MIB.addOperand(Compare.getOperand(I)); MIB.addOperand(CCMask); if (Type == SystemZII::CompareAndBranch) { // Only conditional branches define CC, as they may be converted back // to a non-fused branch because of a long displacement. Conditional // returns don't have that problem. MIB.addOperand(Target) .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); } if (Type == SystemZII::CompareAndSibcall) MIB.addRegMask(RegMask); // Clear any intervening kills of SrcReg and SrcReg2. MBBI = Compare; for (++MBBI; MBBI != MBBE; ++MBBI) { MBBI->clearRegisterKills(SrcReg, TRI); if (SrcReg2) MBBI->clearRegisterKills(SrcReg2, TRI); } FusedComparisons += 1; return true; }
/// If \p MBBI is a pseudo instruction, this method expands /// it to the corresponding (sequence of) actual instruction(s). /// \returns true if \p MBBI has been expanded. bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); switch (Opcode) { default: return false; case X86::TCRETURNdi: case X86::TCRETURNri: case X86::TCRETURNmi: case X86::TCRETURNdi64: case X86::TCRETURNri64: case X86::TCRETURNmi64: { bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); assert(StackAdjust.isImm() && "Expecting immediate value."); // Adjust stack pointer. int StackAdj = StackAdjust.getImm(); if (StackAdj) { // Check for possible merge with preceding ADD instruction. StackAdj += X86FL->mergeSPUpdates(MBB, MBBI, true); X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true); } // Jump to label or value in register. bool IsWin64 = STI->isTargetWin64(); if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) { unsigned Op = (Opcode == X86::TCRETURNdi) ? X86::TAILJMPd : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); else { assert(JumpTarget.isSymbol()); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { unsigned Op = (Opcode == X86::TCRETURNmi) ? X86::TAILJMPm : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); for (unsigned i = 0; i != 5; ++i) MIB.addOperand(MBBI->getOperand(i)); } else if (Opcode == X86::TCRETURNri64) { BuildMI(MBB, MBBI, DL, TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) .addReg(JumpTarget.getReg(), RegState::Kill); } else { BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr)) .addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = std::prev(MBBI); NewMI->copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); return true; } case X86::EH_RETURN: case X86::EH_RETURN64: { MachineOperand &DestAddr = MBBI->getOperand(0); assert(DestAddr.isReg() && "Offset should be in register!"); const bool Uses64BitFramePtr = STI->isTarget64BitLP64() || STI->isTargetNaCl64(); unsigned StackPtr = TRI->getStackRegister(); BuildMI(MBB, MBBI, DL, TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr) .addReg(DestAddr.getReg()); // The EH_RETURN pseudo is really removed during the MC Lowering. return true; } case X86::IRET: { // Adjust stack to erase error code int64_t StackAdj = MBBI->getOperand(0).getImm(); X86FL->emitSPUpdate(MBB, MBBI, StackAdj, true); // Replace pseudo with machine iret BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::IRET64 : X86::IRET32)); MBB.erase(MBBI); return true; } case X86::RET: { // Adjust stack to erase error code int64_t StackAdj = MBBI->getOperand(0).getImm(); MachineInstrBuilder MIB; if (StackAdj == 0) { MIB = BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::RETQ : X86::RETL)); } else if (isUInt<16>(StackAdj)) { MIB = BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::RETIQ : X86::RETIL)) .addImm(StackAdj); } else { assert(!STI->is64Bit() && "shouldn't need to do this for x86_64 targets!"); // A ret can only handle immediates as big as 2**16-1. If we need to pop // off bytes before the return address, we must do it manually. BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define); X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true); BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX); MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL)); } for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I) MIB.addOperand(MBBI->getOperand(I)); MBB.erase(MBBI); return true; } case X86::EH_RESTORE: { // Restore ESP and EBP, and optionally ESI if required. bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality( MBB.getParent()->getFunction()->getPersonalityFn())); X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH); MBBI->eraseFromParent(); return true; } } llvm_unreachable("Previous switch has a fallthrough?"); }
bool PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, MachineLoopInfo *MLI) { if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0; bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader(); bool Changed = false; for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); // Is there a critical edge from PreMBB to MBB? if (PreMBB->succ_size() == 1) continue; // Avoid splitting backedges of loops. It would introduce small // out-of-line blocks into the loop which is very bad for code placement. if (PreMBB == &MBB && !SplitAllCriticalEdges) continue; const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0; if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges) continue; // LV doesn't consider a phi use live-out, so isLiveOut only returns true // when the source register is live-out for some other reason than a phi // use. That means the copy we will insert in PreMBB won't be a kill, and // there is a risk it may not be coalesced away. // // If the copy would be a kill, there is no need to split the edge. if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges) continue; DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() << ": " << *BBI); // If Reg is not live-in to MBB, it means it must be live-in to some // other PreMBB successor, and we can avoid the interference by splitting // the edge. // // If Reg *is* live-in to MBB, the interference is inevitable and a copy // is likely to be left after coalescing. If we are looking at a loop // exiting edge, split it so we won't insert code in the loop, otherwise // don't bother. bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges; // Check for a loop exiting edge. if (!ShouldSplit && CurLoop != PreLoop) { DEBUG({ dbgs() << "Split wouldn't help, maybe avoid loop copies?\n"; if (PreLoop) dbgs() << "PreLoop: " << *PreLoop; if (CurLoop) dbgs() << "CurLoop: " << *CurLoop; }); // This edge could be entering a loop, exiting a loop, or it could be // both: Jumping directly form one loop to the header of a sibling // loop. // Split unless this edge is entering CurLoop from an outer loop. ShouldSplit = PreLoop && !PreLoop->contains(CurLoop); } if (!ShouldSplit) continue; if (!PreMBB->SplitCriticalEdge(&MBB, this)) { DEBUG(dbgs() << "Failed to split ciritcal edge.\n"); continue; } Changed = true; ++NumCriticalEdgesSplit; }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri; } int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (IsTailCallReturn) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (NumRestores * 8) | | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. NumBytes += ArgumentPopSize; unsigned NumRestores = 0; // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); MachineBasicBlock::iterator Begin = MBB.begin(); while (LastPopI != Begin) { --LastPopI; unsigned Restores = getNumCSRestores(*LastPopI, CSRegs); NumRestores += Restores; if (Restores == 0) { ++LastPopI; break; } } NumBytes -= NumRestores * 8; assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { // If this was a redzone leaf function, we don't need to restore the // stack pointer. if (!canUseRedZone(MF)) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII); return; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags); }
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineBasicBlock::iterator MI) { SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true); VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); if (!ParentVNI) { DEBUG(dbgs() << "\tadding <undef> flags: "); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) MO.setIsUndef(); } DEBUG(dbgs() << UseIdx << '\t' << *MI); return true; } if (SnippetCopies.count(MI)) return false; // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy. LiveRangeEdit::Remat RM(ParentVNI); SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); if (SibI != SibValues.end()) RM.OrigMI = SibI->second.DefMI; if (!Edit->canRematerializeAt(RM, UseIdx, false)) { markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); return false; } // If the instruction also writes VirtReg.reg, it had better not require the // same register for uses and defs. SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; MIBundleOperands::VirtRegInfo RI = MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); if (RI.Tied) { markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); return false; } // Before rematerializing into a register for a single instruction, try to // fold a load into the instruction. That avoids allocating a new register. if (RM.OrigMI->canFoldAsLoad() && foldMemoryOperand(Ops, RM.OrigMI)) { Edit->markRematerialized(RM.ParentVNI); ++NumFoldedLoads; return true; } // Alocate a new register for the remat. unsigned NewVReg = Edit->createFrom(Original); // Finally we can rematerialize OrigMI before MI. SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewVReg, RM, TRI); (void)DefIdx; DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); // Replace operands for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i].second); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { MO.setReg(NewVReg); MO.setIsKill(); } } DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI << '\n'); ++NumRemats; return true; }
/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor /// blocks, the successors have gained new predecessors. Update the PHI /// instructions in them accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallSetVector<MachineBasicBlock*,8> &Succs) { for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), SE = Succs.end(); SI != SE; ++SI) { MachineBasicBlock *SuccBB = *SI; for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); II != EE; ++II) { if (!II->isPHI()) break; MachineInstrBuilder MIB(*FromBB->getParent(), II); unsigned Idx = 0; for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { Idx = i; break; } } assert(Idx != 0); MachineOperand &MO0 = II->getOperand(Idx); unsigned Reg = MO0.getReg(); if (isDead) { // Folded into the previous BB. // There could be duplicate phi source entries. FIXME: Should sdisel // or earlier pass fixed this? for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { II->RemoveOperand(i+1); II->RemoveOperand(i); } } } else Idx = 0; // If Idx is set, the operands at Idx and Idx+1 must be removed. // We reuse the location to avoid expensive RemoveOperand calls. DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); if (LI != SSAUpdateVals.end()) { // This register is defined in the tail block. for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; // If we didn't duplicate a bb into a particular predecessor, we // might still have added an entry to SSAUpdateVals to correcly // recompute SSA. If that case, avoid adding a dummy extra argument // this PHI. if (!SrcBB->isSuccessor(SuccBB)) continue; unsigned SrcReg = LI->second[j].second; if (Idx != 0) { II->getOperand(Idx).setReg(SrcReg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { MIB.addReg(SrcReg).addMBB(SrcBB); } } } else { // Live in tail block, must also be live in predecessors. for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = TDBBs[j]; if (Idx != 0) { II->getOperand(Idx).setReg(Reg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { MIB.addReg(Reg).addMBB(SrcBB); } } } if (Idx != 0) { II->RemoveOperand(Idx+1); II->RemoveOperand(Idx); } } } }
void MipsSEInstrInfo::expandPseudoMFHiLo(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned NewOpc) const { BuildMI(MBB, I, I->getDebugLoc(), get(NewOpc), I->getOperand(0).getReg()); }
bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { df_iterator_default_set<MachineBasicBlock*> Reachable; bool ModifiedPHI = false; MMI = getAnalysisIfAvailable<MachineModuleInfo>(); MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); // Mark all reachable blocks. for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable)) (void)BB/* Mark all reachable blocks */; // Loop over all dead blocks, remembering them and deleting all instructions // in them. std::vector<MachineBasicBlock*> DeadBlocks; for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { MachineBasicBlock *BB = &*I; // Test for deadness. if (!Reachable.count(BB)) { DeadBlocks.push_back(BB); // Update dominator and loop info. if (MLI) MLI->removeBlock(BB); if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB); while (BB->succ_begin() != BB->succ_end()) { MachineBasicBlock* succ = *BB->succ_begin(); MachineBasicBlock::iterator start = succ->begin(); while (start != succ->end() && start->isPHI()) { for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2) if (start->getOperand(i).isMBB() && start->getOperand(i).getMBB() == BB) { start->RemoveOperand(i); start->RemoveOperand(i-1); } start++; } BB->removeSuccessor(BB->succ_begin()); } } } // Actually remove the blocks now. for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) DeadBlocks[i]->eraseFromParent(); // Cleanup PHI nodes. for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { MachineBasicBlock *BB = &*I; // Prune unneeded PHI entries. SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(), BB->pred_end()); MachineBasicBlock::iterator phi = BB->begin(); while (phi != BB->end() && phi->isPHI()) { for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2) if (!preds.count(phi->getOperand(i).getMBB())) { phi->RemoveOperand(i); phi->RemoveOperand(i-1); ModifiedPHI = true; } if (phi->getNumOperands() == 3) { const MachineOperand &Input = phi->getOperand(1); const MachineOperand &Output = phi->getOperand(0); unsigned InputReg = Input.getReg(); unsigned OutputReg = Output.getReg(); assert(Output.getSubReg() == 0 && "Cannot have output subregister"); ModifiedPHI = true; if (InputReg != OutputReg) { MachineRegisterInfo &MRI = F.getRegInfo(); unsigned InputSub = Input.getSubReg(); if (InputSub == 0 && MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) { MRI.replaceRegWith(OutputReg, InputReg); } else { // The input register to the PHI has a subregister or it can't be // constrained to the proper register class: // insert a COPY instead of simply replacing the output // with the input. const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo(); BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(), TII->get(TargetOpcode::COPY), OutputReg) .addReg(InputReg, getRegState(Input), InputSub); } phi++->eraseFromParent(); } continue; } ++phi; } } F.RenumberBlocks(); return (!DeadBlocks.empty() || ModifiedPHI); }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { AArch64MachineFunctionInfo *FuncInfo = MF.getInfo<AArch64MachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); DebugLoc DL = MBBI->getDebugLoc(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned RetOpcode = MBBI->getOpcode(); // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust(); uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes; uint64_t ArgumentPopSize = 0; if (RetOpcode == AArch64::TC_RETURNdi || RetOpcode == AArch64::TC_RETURNxi) { MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(1); MachineInstrBuilder MIB; if (RetOpcode == AArch64::TC_RETURNdi) { MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm)); if (JumpTarget.isGlobal()) { MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); } else { assert(JumpTarget.isSymbol() && "unexpected tail call destination"); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } } else { assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg() && "Unexpected tail call"); MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx)); MIB.addReg(JumpTarget.getReg(), RegState::Kill); } // Add the extra operands onto the new tail call instruction even though // they're not used directly (so that liveness is tracked properly etc). for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) MIB->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TC_RETURN. MachineInstr *NewMI = prior(MBBI); MBB.erase(MBBI); MBBI = NewMI; // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = FuncInfo->getArgumentStackToRestore(); } assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0 && "refusing to adjust stack by misaligned amt"); // We may need to address callee-saved registers differently, so find out the // bound on the frame indices. const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); int MinCSFI = 0; int MaxCSFI = -1; if (CSI.size()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } // The "residual" stack update comes first from this direction and guarantees // that SP is NumInitialBytes below its value on function entry, either by a // direct update or restoring it from the frame pointer. if (NumInitialBytes + ArgumentPopSize != 0) { emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, NumInitialBytes + ArgumentPopSize); --MBBI; } // MBBI now points to the instruction just past the last callee-saved // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp" // otherwise). // Now we need to find out where to put the bulk of the stack adjustment MachineBasicBlock::iterator FirstEpilogue = MBBI; while (MBBI != MBB.begin()) { --MBBI; unsigned FrameOp; for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) { if (MBBI->getOperand(FrameOp).isFI()) break; } // If this instruction doesn't have a frame index we've reached the end of // the callee-save restoration. if (FrameOp == MBBI->getNumOperands()) break; // Likewise if it *is* a local reference, but not to a callee-saved object. int FrameIdx = MBBI->getOperand(FrameOp).getIndex(); if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI) break; FirstEpilogue = MBBI; } if (MF.getFrameInfo()->hasVarSizedObjects()) { int64_t StaticFrameBase; StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset()); emitRegUpdate(MBB, FirstEpilogue, DL, TII, AArch64::XSP, AArch64::X29, AArch64::NoRegister, StaticFrameBase); } else { emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes); } }
/// Return the corresponding compact (no delay slot) form of a branch. unsigned MipsInstrInfo::getEquivalentCompactForm( const MachineBasicBlock::iterator I) const { unsigned Opcode = I->getOpcode(); bool canUseShortMicroMipsCTI = false; if (Subtarget.inMicroMipsMode()) { switch (Opcode) { case Mips::BNE: case Mips::BEQ: // microMIPS has NE,EQ branches that do not have delay slots provided one // of the operands is zero. if (I->getOperand(1).getReg() == Subtarget.getABI().GetZeroReg()) canUseShortMicroMipsCTI = true; break; // For microMIPS the PseudoReturn and PseudoIndirectBranch are always // expanded to JR_MM, so they can be replaced with JRC16_MM. case Mips::JR: case Mips::PseudoReturn: case Mips::PseudoIndirectBranch: canUseShortMicroMipsCTI = true; break; } } // MIPSR6 forbids both operands being the zero register. if (Subtarget.hasMips32r6() && (I->getNumOperands() > 1) && (I->getOperand(0).isReg() && (I->getOperand(0).getReg() == Mips::ZERO || I->getOperand(0).getReg() == Mips::ZERO_64)) && (I->getOperand(1).isReg() && (I->getOperand(1).getReg() == Mips::ZERO || I->getOperand(1).getReg() == Mips::ZERO_64))) return 0; if (Subtarget.hasMips32r6() || canUseShortMicroMipsCTI) { switch (Opcode) { case Mips::B: return Mips::BC; case Mips::BAL: return Mips::BALC; case Mips::BEQ: if (canUseShortMicroMipsCTI) return Mips::BEQZC_MM; else if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BEQC; case Mips::BNE: if (canUseShortMicroMipsCTI) return Mips::BNEZC_MM; else if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BNEC; case Mips::BGE: if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BGEC; case Mips::BGEU: if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BGEUC; case Mips::BGEZ: return Mips::BGEZC; case Mips::BGTZ: return Mips::BGTZC; case Mips::BLEZ: return Mips::BLEZC; case Mips::BLT: if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BLTC; case Mips::BLTU: if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BLTUC; case Mips::BLTZ: return Mips::BLTZC; // For MIPSR6, the instruction 'jic' can be used for these cases. Some // tools will accept 'jrc reg' as an alias for 'jic 0, $reg'. case Mips::JR: case Mips::PseudoReturn: case Mips::PseudoIndirectBranch: if (canUseShortMicroMipsCTI) return Mips::JRC16_MM; return Mips::JIC; case Mips::JALRPseudo: return Mips::JIALC; case Mips::JR64: case Mips::PseudoReturn64: case Mips::PseudoIndirectBranch64: return Mips::JIC64; case Mips::JALR64Pseudo: return Mips::JIALC64; default: return 0; } } return 0; }
MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { MachineBasicBlock::iterator I = MBB->getFirstTerminator(); if (I == MBB->end()) return nullptr; // The terminator must be controlled by the flags. if (!I->readsRegister(AArch64::NZCV)) { switch (I->getOpcode()) { case AArch64::CBZW: case AArch64::CBZX: case AArch64::CBNZW: case AArch64::CBNZX: // These can be converted into a ccmp against #0. return I; } ++NumCmpTermRejs; DEBUG(dbgs() << "Flags not used by terminator: " << *I); return nullptr; } // Now find the instruction controlling the terminator. for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) { --I; assert(!I->isTerminator() && "Spurious terminator"); switch (I->getOpcode()) { // cmp is an alias for subs with a dead destination register. case AArch64::SUBSWri: case AArch64::SUBSXri: // cmn is an alias for adds with a dead destination register. case AArch64::ADDSWri: case AArch64::ADDSXri: // Check that the immediate operand is within range, ccmp wants a uimm5. // Rd = SUBSri Rn, imm, shift if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) { DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I); ++NumImmRangeRejs; return nullptr; } // Fall through. case AArch64::SUBSWrr: case AArch64::SUBSXrr: case AArch64::ADDSWrr: case AArch64::ADDSXrr: if (isDeadDef(I->getOperand(0).getReg())) return I; DEBUG(dbgs() << "Can't convert compare with live destination: " << *I); ++NumLiveDstRejs; return nullptr; case AArch64::FCMPSrr: case AArch64::FCMPDrr: case AArch64::FCMPESrr: case AArch64::FCMPEDrr: return I; } // Check for flag reads and clobbers. MIOperands::PhysRegInfo PRI = MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI); if (PRI.Reads) { // The ccmp doesn't produce exactly the same flags as the original // compare, so reject the transform if there are uses of the flags // besides the terminators. DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I); ++NumMultNZCVUses; return nullptr; } if (PRI.Clobbers) { DEBUG(dbgs() << "Not convertible compare: " << *I); ++NumUnknNZCVDefs; return nullptr; } } DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n'); return nullptr; }
MachineInstrBuilder MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, MachineBasicBlock::iterator I) const { MachineInstrBuilder MIB; // Certain branches have two forms: e.g beq $1, $zero, dst vs beqz $1, dest // Pick the zero form of the branch for readable assembly and for greater // branch distance in non-microMIPS mode. // FIXME: Certain atomic sequences on mips64 generate 32bit references to // Mips::ZERO, which is incorrect. This test should be updated to use // Subtarget.getABI().GetZeroReg() when those atomic sequences and others // are fixed. bool BranchWithZeroOperand = (I->isBranch() && !I->isPseudo() && I->getOperand(1).isReg() && (I->getOperand(1).getReg() == Mips::ZERO || I->getOperand(1).getReg() == Mips::ZERO_64)); if (BranchWithZeroOperand) { switch (NewOpc) { case Mips::BEQC: NewOpc = Mips::BEQZC; break; case Mips::BNEC: NewOpc = Mips::BNEZC; break; case Mips::BGEC: NewOpc = Mips::BGEZC; break; case Mips::BLTC: NewOpc = Mips::BLTZC; break; } } MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an // immediate 0 as an operand and requires the removal of it's %RA<imp-def> // implicit operand as copying the implicit operations of the instructio we're // looking at will give us the correct flags. if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 || NewOpc == Mips::JIALC64) { if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64) MIB->RemoveOperand(0); for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.addOperand(I->getOperand(J)); } MIB.addImm(0); } else if (BranchWithZeroOperand) { // For MIPSR6 and microMIPS branches with an explicit zero operand, copy // everything after the zero. MIB.addOperand(I->getOperand(0)); for (unsigned J = 2, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.addOperand(I->getOperand(J)); } } else { // All other cases copy all other operands. for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.addOperand(I->getOperand(J)); } } MIB.copyImplicitOps(*I); MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end()); return MIB; }
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. /// void PEI::replaceFrameIndices(MachineFunction &Fn) { if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); const TargetFrameLowering *TFI = TM.getFrameLowering(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { #ifndef NDEBUG int SPAdjCount = 0; // frame setup / destroy count. #endif int SPAdj = 0; // SP offset due to call frame setup / destroy. if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { #ifndef NDEBUG // Track whether we see even pairs of them SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; #endif // Remember how much SP has been adjusted to create the call // frame. int Size = I->getOperand(0).getImm(); if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) Size = -Size; SPAdj += Size; MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = prior(I); TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else I = llvm::next(PrevI); continue; } MachineInstr *MI = I; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) if (MI->getOperand(i).isFI()) { // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register // scavenger to go through all of these instructions so that // it can update its register information. We keep the // iterator at the point before insertion so that we can // revisit them in full. bool AtBeginning = (I == BB->begin()); if (!AtBeginning) --I; // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. TRI.eliminateFrameIndex(MI, SPAdj, FrameIndexVirtualScavenging ? NULL : RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { I = BB->begin(); DoIncr = false; } MI = 0; break; } if (DoIncr && I != BB->end()) ++I; // Update register states. if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } // If we have evenly matched pairs of frame setup / destroy instructions, // make sure the adjustments come out to zero. If we don't have matched // pairs, we can't be sure the missing bit isn't in another basic block // due to a custom inserter playing tricks, so just asserting SPAdj==0 // isn't sufficient. See tMOVCC on Thumb1, for example. assert((SPAdjCount || SPAdj == 0) && "Unbalanced call frame setup / destroy pairs?"); } }
bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // Start from the bottom of the block and work up, examining the // terminator instructions. MachineBasicBlock::iterator I = MBB.end(); while (I != MBB.begin()) { --I; if (I->isDebugValue()) continue; // Working from the bottom, when we see a non-terminator // instruction, we're done. if (!isUnpredicatedTerminator(I)) break; // A terminator that isn't a branch can't easily be handled // by this analysis. if (!I->isBranch()) return true; // Cannot handle indirect branches. if (I->getOpcode() == MSP430::Br || I->getOpcode() == MSP430::Bm) return true; // Handle unconditional branches. if (I->getOpcode() == MSP430::JMP) { if (!AllowModify) { TBB = I->getOperand(0).getMBB(); continue; } // If the block has any instructions after a JMP, delete them. while (std::next(I) != MBB.end()) std::next(I)->eraseFromParent(); Cond.clear(); FBB = nullptr; // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { TBB = nullptr; I->eraseFromParent(); I = MBB.end(); continue; } // TBB is used to indicate the unconditinal destination. TBB = I->getOperand(0).getMBB(); continue; } // Handle conditional branches. assert(I->getOpcode() == MSP430::JCC && "Invalid conditional branch"); MSP430CC::CondCodes BranchCode = static_cast<MSP430CC::CondCodes>(I->getOperand(1).getImm()); if (BranchCode == MSP430CC::COND_INVALID) return true; // Can't handle weird stuff. // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { FBB = TBB; TBB = I->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); continue; } // Handle subsequent conditional branches. Only handle the case where all // conditional branches branch to the same destination. assert(Cond.size() == 1); assert(TBB); // Only handle the case where all conditional branches branch to // the same destination. if (TBB != I->getOperand(0).getMBB()) return true; MSP430CC::CondCodes OldBranchCode = (MSP430CC::CondCodes)Cond[0].getImm(); // If the conditions are the same, we can leave them alone. if (OldBranchCode == BranchCode) continue; return true; } return false; }
unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { // Consider all allocatable registers in the register class initially BitVector Candidates = TRI->getAllocatableSet(*I->getParent()->getParent(), RC); // Exclude all the registers being used by the instruction. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &MO = I->getOperand(i); if (MO.isReg() && MO.getReg() != 0 && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } // Try to find a register that's unused if there is one, as then we won't // have to spill. Search explicitly rather than masking out based on // RegsAvailable, as RegsAvailable does not take aliases into account. // That's what getRegsAvailable() is for. BitVector Available = getRegsAvailable(RC); Available &= Candidates; if (Available.any()) Candidates = Available; // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); // If we found an unused register there is no reason to spill it. if (!isAliasUsed(SReg)) { DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); return SReg; } assert(ScavengedReg == 0 && "Scavenger slot is live, unable to scavenge another register!"); // Avoid infinite regress ScavengedReg = SReg; // If the target knows how to save/restore the register, let it do so; // otherwise, use the emergency stack spill slot. if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { // Spill the scavenged register before I. assert(ScavengingFrameIndex >= 0 && "Cannot scavenge register without an emergency spill slot!"); TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI); MachineBasicBlock::iterator II = prior(I); unsigned FIOperandNum = getFrameIndexOperandNum(II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI); II = prior(UseMI); FIOperandNum = getFrameIndexOperandNum(II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } ScavengeRestore = prior(UseMI); // Doing this here leads to infinite regress. // ScavengedReg = SReg; DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << "\n"); return SReg; }
/// This function generates the sequence of instructions needed to get the /// result of adding register REG and immediate IMM. unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, const DebugLoc &DL, unsigned &NewImm) const { // // given original instruction is: // Instr rx, T[offset] where offset is too big. // // lo = offset & 0xFFFF // hi = ((offset >> 16) + (lo >> 15)) & 0xFFFF; // // let T = temporary register // li T, hi // shl T, 16 // add T, Rx, T // RegScavenger rs; int32_t lo = Imm & 0xFFFF; NewImm = lo; int Reg =0; int SpReg = 0; rs.enterBasicBlock(MBB); rs.forward(II); // // We need to know which registers can be used, in the case where there // are not enough free registers. We exclude all registers that // are used in the instruction that we are helping. // // Consider all allocatable registers in the register class initially BitVector Candidates = RI.getAllocatableSet (*II->getParent()->getParent(), &Mips::CPU16RegsRegClass); // Exclude all the registers being used by the instruction. for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { MachineOperand &MO = II->getOperand(i); if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } // If the same register was used and defined in an instruction, then // it will not be in the list of candidates. // // we need to analyze the instruction that we are helping. // we need to know if it defines register x but register x is not // present as an operand of the instruction. this tells // whether the register is live before the instruction. if it's not // then we don't need to save it in case there are no free registers. int DefReg = 0; for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { MachineOperand &MO = II->getOperand(i); if (MO.isReg() && MO.isDef()) { DefReg = MO.getReg(); break; } } BitVector Available = rs.getRegsAvailable(&Mips::CPU16RegsRegClass); Available &= Candidates; // // we use T0 for the first register, if we need to save something away. // we use T1 for the second register, if we need to save something away. // unsigned FirstRegSaved =0, SecondRegSaved=0; unsigned FirstRegSavedTo = 0, SecondRegSavedTo = 0; Reg = Available.find_first(); if (Reg == -1) { Reg = Candidates.find_first(); Candidates.reset(Reg); if (DefReg != Reg) { FirstRegSaved = Reg; FirstRegSavedTo = Mips::T0; copyPhysReg(MBB, II, DL, FirstRegSavedTo, FirstRegSaved, true); } } else Available.reset(Reg); BuildMI(MBB, II, DL, get(Mips::LwConstant32), Reg).addImm(Imm).addImm(-1); NewImm = 0; if (FrameReg == Mips::SP) { SpReg = Available.find_first(); if (SpReg == -1) { SpReg = Candidates.find_first(); // Candidates.reset(SpReg); // not really needed if (DefReg!= SpReg) { SecondRegSaved = SpReg; SecondRegSavedTo = Mips::T1; } if (SecondRegSaved) copyPhysReg(MBB, II, DL, SecondRegSavedTo, SecondRegSaved, true); } else Available.reset(SpReg); copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false); BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(SpReg, RegState::Kill) .addReg(Reg); } else BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(FrameReg) .addReg(Reg, RegState::Kill); if (FirstRegSaved || SecondRegSaved) { II = std::next(II); if (FirstRegSaved) copyPhysReg(MBB, II, DL, FirstRegSaved, FirstRegSavedTo, true); if (SecondRegSaved) copyPhysReg(MBB, II, DL, SecondRegSaved, SecondRegSavedTo, true); } return Reg; }
void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, CallContext &Context) { // Check that this particular call sequence is amenable to the // transformation. const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo()); // We expect to enter this at the beginning of a call sequence assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); MachineBasicBlock::iterator FrameSetup = I++; Context.FrameSetup = FrameSetup; // How much do we adjust the stack? This puts an upper bound on // the number of parameters actually passed on it. unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize; // A zero adjustment means no stack parameters if (!MaxAdjust) { Context.NoStackParams = true; return; } // Skip over DEBUG_VALUE. // For globals in PIC mode, we can have some LEAs here. Skip them as well. // TODO: Extend this to something that covers more cases. while (I->getOpcode() == X86::LEA32r || I->isDebugInstr()) ++I; unsigned StackPtr = RegInfo.getStackRegister(); auto StackPtrCopyInst = MBB.end(); // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual // register. If it's there, use that virtual register as stack pointer // instead. Also, we need to locate this instruction so that we can later // safely ignore it while doing the conservative processing of the call chain. // The COPY can be located anywhere between the call-frame setup // instruction and its first use. We use the call instruction as a boundary // because it is usually cheaper to check if an instruction is a call than // checking if an instruction uses a register. for (auto J = I; !J->isCall(); ++J) if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() && J->getOperand(1).getReg() == StackPtr) { StackPtrCopyInst = J; Context.SPCopy = &*J++; StackPtr = Context.SPCopy->getOperand(0).getReg(); break; } // Scan the call setup sequence for the pattern we're looking for. // We only handle a simple case - a sequence of store instructions that // push a sequence of stack-slot-aligned values onto the stack, with // no gaps between them. if (MaxAdjust > 4) Context.ArgStoreVector.resize(MaxAdjust, nullptr); DenseSet<unsigned int> UsedRegs; for (InstClassification Classification = Skip; Classification != Exit; ++I) { // If this is the COPY of the stack pointer, it's ok to ignore. if (I == StackPtrCopyInst) continue; Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs); if (Classification != Convert) continue; // We know the instruction has a supported store opcode. // We only want movs of the form: // mov imm/reg, k(%StackPtr) // If we run into something else, bail. // Note that AddrBaseReg may, counter to its name, not be a register, // but rather a frame index. // TODO: Support the fi case. This should probably work now that we // have the infrastructure to track the stack pointer within a call // sequence. if (!I->getOperand(X86::AddrBaseReg).isReg() || (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || !I->getOperand(X86::AddrScaleAmt).isImm() || (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || !I->getOperand(X86::AddrDisp).isImm()) return; int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); assert(StackDisp >= 0 && "Negative stack displacement when passing parameters"); // We really don't want to consider the unaligned case. if (StackDisp & (SlotSize - 1)) return; StackDisp >>= Log2SlotSize; assert((size_t)StackDisp < Context.ArgStoreVector.size() && "Function call has more parameters than the stack is adjusted for."); // If the same stack slot is being filled twice, something's fishy. if (Context.ArgStoreVector[StackDisp] != nullptr) return; Context.ArgStoreVector[StackDisp] = &*I; for (const MachineOperand &MO : I->uses()) { if (!MO.isReg()) continue; unsigned int Reg = MO.getReg(); if (RegInfo.isPhysicalRegister(Reg)) UsedRegs.insert(Reg); } } --I; // We now expect the end of the sequence. If we stopped early, // or reached the end of the block without finding a call, bail. if (I == MBB.end() || !I->isCall()) return; Context.Call = &*I; if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode()) return; // Now, go through the vector, and see that we don't have any gaps, // but only a series of storing instructions. auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end(); for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize) if (*MMI == nullptr) break; // If the call had no parameters, do nothing if (MMI == Context.ArgStoreVector.begin()) return; // We are either at the last parameter, or a gap. // Make sure it's not a gap for (; MMI != MME; ++MMI) if (*MMI != nullptr) return; Context.UsePush = true; }
void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, CallContext &Context) { // Check that this particular call sequence is amenable to the // transformation. const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>( STI->getRegisterInfo()); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); // We expect to enter this at the beginning of a call sequence assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); MachineBasicBlock::iterator FrameSetup = I++; Context.FrameSetup = FrameSetup; // How much do we adjust the stack? This puts an upper bound on // the number of parameters actually passed on it. unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4; // A zero adjustment means no stack parameters if (!MaxAdjust) { Context.NoStackParams = true; return; } // For globals in PIC mode, we can have some LEAs here. // Ignore them, they don't bother us. // TODO: Extend this to something that covers more cases. while (I->getOpcode() == X86::LEA32r) ++I; // We expect a copy instruction here. // TODO: The copy instruction is a lowering artifact. // We should also support a copy-less version, where the stack // pointer is used directly. if (!I->isCopy() || !I->getOperand(0).isReg()) return; Context.SPCopy = I++; unsigned StackPtr = Context.SPCopy->getOperand(0).getReg(); // Scan the call setup sequence for the pattern we're looking for. // We only handle a simple case - a sequence of MOV32mi or MOV32mr // instructions, that push a sequence of 32-bit values onto the stack, with // no gaps between them. if (MaxAdjust > 4) Context.MovVector.resize(MaxAdjust, nullptr); InstClassification Classification; DenseSet<unsigned int> UsedRegs; while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) != Exit) { if (Classification == Skip) { ++I; continue; } // We know the instruction is a MOV32mi/MOV32mr. // We only want movs of the form: // movl imm/r32, k(%esp) // If we run into something else, bail. // Note that AddrBaseReg may, counter to its name, not be a register, // but rather a frame index. // TODO: Support the fi case. This should probably work now that we // have the infrastructure to track the stack pointer within a call // sequence. if (!I->getOperand(X86::AddrBaseReg).isReg() || (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || !I->getOperand(X86::AddrScaleAmt).isImm() || (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || !I->getOperand(X86::AddrDisp).isImm()) return; int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); assert(StackDisp >= 0 && "Negative stack displacement when passing parameters"); // We really don't want to consider the unaligned case. if (StackDisp % 4) return; StackDisp /= 4; assert((size_t)StackDisp < Context.MovVector.size() && "Function call has more parameters than the stack is adjusted for."); // If the same stack slot is being filled twice, something's fishy. if (Context.MovVector[StackDisp] != nullptr) return; Context.MovVector[StackDisp] = I; for (const MachineOperand &MO : I->uses()) { if (!MO.isReg()) continue; unsigned int Reg = MO.getReg(); if (RegInfo.isPhysicalRegister(Reg)) UsedRegs.insert(Reg); } ++I; } // We now expect the end of the sequence. If we stopped early, // or reached the end of the block without finding a call, bail. if (I == MBB.end() || !I->isCall()) return; Context.Call = I; if ((++I)->getOpcode() != FrameDestroyOpcode) return; // Now, go through the vector, and see that we don't have any gaps, // but only a series of 32-bit MOVs. auto MMI = Context.MovVector.begin(), MME = Context.MovVector.end(); for (; MMI != MME; ++MMI, Context.ExpectedDist += 4) if (*MMI == nullptr) break; // If the call had no parameters, do nothing if (MMI == Context.MovVector.begin()) return; // We are either at the last parameter, or a gap. // Make sure it's not a gap for (; MMI != MME; ++MMI) if (*MMI != nullptr) return; Context.UsePush = true; }