bool PTXInstrInfo:: AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // TODO implement cases when AllowModify is true if (MBB.empty()) return true; MachineBasicBlock::const_iterator iter = MBB.end(); const MachineInstr& instLast1 = *--iter; const MCInstrDesc &desc1 = instLast1.getDesc(); // for special case that MBB has only 1 instruction const bool IsSizeOne = MBB.size() == 1; // if IsSizeOne is true, *--iter and instLast2 are invalid // we put a dummy value in instLast2 and desc2 since they are used const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); DEBUG(dbgs() << "\n"); DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n"); DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n"); DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n"); // this block ends with no branches if (!IsAnyKindOfBranch(instLast1)) { DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n"); return false; } // this block ends with only an unconditional branch if (desc1.isUnconditionalBranch() && // when IsSizeOne is true, it "absorbs" the evaluation of instLast2 (IsSizeOne || !IsAnyKindOfBranch(instLast2))) { DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n"); TBB = GetBranchTarget(instLast1); return false; } // this block ends with a conditional branch and // it falls through to a successor block if (desc1.isConditionalBranch() && IsAnySuccessorAlsoLayoutSuccessor(MBB)) { DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n"); TBB = GetBranchTarget(instLast1); int i = instLast1.findFirstPredOperandIdx(); Cond.push_back(instLast1.getOperand(i)); Cond.push_back(instLast1.getOperand(i+1)); return false; } // when IsSizeOne is true, we are done if (IsSizeOne) return true; // this block ends with a conditional branch // followed by an unconditional branch if (desc2.isConditionalBranch() && desc1.isUnconditionalBranch()) { DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n"); TBB = GetBranchTarget(instLast2); FBB = GetBranchTarget(instLast1); int i = instLast2.findFirstPredOperandIdx(); Cond.push_back(instLast2.getOperand(i)); Cond.push_back(instLast2.getOperand(i+1)); return false; } // branch cannot be understood DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n"); return true; }
/// Walk the specified region of the CFG and hoist loop invariants out to the /// preheader. void MachineLICM::HoistRegionPostRA() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) return; unsigned NumRegs = TRI->getNumRegs(); BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. SmallVector<CandidateInfo, 32> Candidates; SmallSet<int, 32> StoredFIs; // Walk the entire region, count number of defs for each register, and // collect potential LICM candidates. const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; // If the header of the loop containing this basic block is a landing pad, // then don't try to hoist instructions out of this loop. const MachineLoop *ML = MLI->getLoopFor(BB); if (ML && ML->getHeader()->isEHPad()) continue; // Conservatively treat live-in's as an external def. // FIXME: That means a reload that're reused in successor block(s) will not // be LICM'ed. for (const auto &LI : BB->liveins()) { for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) PhysRegDefs.set(*AI); } SpeculationState = SpeculateUnknown; for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); MII != E; ++MII) { MachineInstr *MI = &*MII; ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); } } // Gather the registers read / clobbered by the terminator. BitVector TermRegs(NumRegs); MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); if (TI != Preheader->end()) { for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = TI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) TermRegs.set(*AI); } } // Now evaluate whether the potential candidates qualify. // 1. Check if the candidate defined register is defined by another // instruction in the loop. // 2. If the candidate is a load from stack slot (always true for now), // check if the slot is stored anywhere in the loop. // 3. Make sure candidate def should not clobber // registers read by the terminator. Similarly its def should not be // clobbered by the terminator. for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { if (Candidates[i].FI != INT_MIN && StoredFIs.count(Candidates[i].FI)) continue; unsigned Def = Candidates[i].Def; if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { bool Safe = true; MachineInstr *MI = Candidates[i].MI; for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { const MachineOperand &MO = MI->getOperand(j); if (!MO.isReg() || MO.isDef() || !MO.getReg()) continue; unsigned Reg = MO.getReg(); if (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)) { // If it's using a non-loop-invariant register, then it's obviously // not safe to hoist. Safe = false; break; } } if (Safe) HoistPostRA(MI, Candidates[i].Def); } } }
void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, CallContext &Context) { // Check that this particular call sequence is amenable to the // transformation. const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo()); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); // We expect to enter this at the beginning of a call sequence assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); MachineBasicBlock::iterator FrameSetup = I++; Context.FrameSetup = FrameSetup; // How much do we adjust the stack? This puts an upper bound on // the number of parameters actually passed on it. unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() >> Log2SlotSize; // A zero adjustment means no stack parameters if (!MaxAdjust) { Context.NoStackParams = true; return; } // Skip over DEBUG_VALUE. // For globals in PIC mode, we can have some LEAs here. Skip them as well. // TODO: Extend this to something that covers more cases. while (I->getOpcode() == X86::LEA32r || I->isDebugValue()) ++I; unsigned StackPtr = RegInfo.getStackRegister(); // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual // register here. If it's there, use that virtual register as stack pointer // instead. if (I->isCopy() && I->getOperand(0).isReg() && I->getOperand(1).isReg() && I->getOperand(1).getReg() == StackPtr) { Context.SPCopy = &*I++; StackPtr = Context.SPCopy->getOperand(0).getReg(); } // Scan the call setup sequence for the pattern we're looking for. // We only handle a simple case - a sequence of store instructions that // push a sequence of stack-slot-aligned values onto the stack, with // no gaps between them. if (MaxAdjust > 4) Context.MovVector.resize(MaxAdjust, nullptr); InstClassification Classification; DenseSet<unsigned int> UsedRegs; while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) != Exit) { if (Classification == Skip) { ++I; continue; } // We know the instruction has a supported store opcode. // We only want movs of the form: // mov imm/reg, k(%StackPtr) // If we run into something else, bail. // Note that AddrBaseReg may, counter to its name, not be a register, // but rather a frame index. // TODO: Support the fi case. This should probably work now that we // have the infrastructure to track the stack pointer within a call // sequence. if (!I->getOperand(X86::AddrBaseReg).isReg() || (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || !I->getOperand(X86::AddrScaleAmt).isImm() || (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || !I->getOperand(X86::AddrDisp).isImm()) return; int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); assert(StackDisp >= 0 && "Negative stack displacement when passing parameters"); // We really don't want to consider the unaligned case. if (StackDisp & (SlotSize - 1)) return; StackDisp >>= Log2SlotSize; assert((size_t)StackDisp < Context.MovVector.size() && "Function call has more parameters than the stack is adjusted for."); // If the same stack slot is being filled twice, something's fishy. if (Context.MovVector[StackDisp] != nullptr) return; Context.MovVector[StackDisp] = &*I; for (const MachineOperand &MO : I->uses()) { if (!MO.isReg()) continue; unsigned int Reg = MO.getReg(); if (RegInfo.isPhysicalRegister(Reg)) UsedRegs.insert(Reg); } ++I; } // We now expect the end of the sequence. If we stopped early, // or reached the end of the block without finding a call, bail. if (I == MBB.end() || !I->isCall()) return; Context.Call = &*I; if ((++I)->getOpcode() != FrameDestroyOpcode) return; // Now, go through the vector, and see that we don't have any gaps, // but only a series of MOVs. auto MMI = Context.MovVector.begin(), MME = Context.MovVector.end(); for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize) if (*MMI == nullptr) break; // If the call had no parameters, do nothing if (MMI == Context.MovVector.begin()) return; // We are either at the last parameter, or a gap. // Make sure it's not a gap for (; MMI != MME; ++MMI) if (*MMI != nullptr) return; Context.UsePush = true; }
/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed /// as of just before "MI". /// /// Search is localised to a neighborhood of /// Neighborhood instructions before (searching for defs or kills) and N /// instructions after (searching just for defs) MI. MachineBasicBlock::LivenessQueryResult MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, MachineInstr *MI, unsigned Neighborhood) { unsigned N = Neighborhood; MachineBasicBlock *MBB = MI->getParent(); // Start by searching backwards from MI, looking for kills, reads or defs. MachineBasicBlock::iterator I(MI); // If this is the first insn in the block, don't search backwards. if (I != MBB->begin()) { do { --I; MachineOperandIteratorBase::PhysRegInfo Analysis = MIOperands(I).analyzePhysReg(Reg, TRI); if (Analysis.Defines) // Outputs happen after inputs so they take precedence if both are // present. return Analysis.DefinesDead ? LQR_Dead : LQR_Live; if (Analysis.Kills || Analysis.Clobbers) // Register killed, so isn't live. return LQR_Dead; else if (Analysis.ReadsOverlap) // Defined or read without a previous kill - live. return Analysis.Reads ? LQR_Live : LQR_OverlappingLive; } while (I != MBB->begin() && --N > 0); } // Did we get to the start of the block? if (I == MBB->begin()) { // If so, the register's state is definitely defined by the live-in state. for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid(); ++RAI) { if (MBB->isLiveIn(*RAI)) return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive; } return LQR_Dead; } N = Neighborhood; // Try searching forwards from MI, looking for reads or defs. I = MachineBasicBlock::iterator(MI); // If this is the last insn in the block, don't search forwards. if (I != MBB->end()) { for (++I; I != MBB->end() && N > 0; ++I, --N) { MachineOperandIteratorBase::PhysRegInfo Analysis = MIOperands(I).analyzePhysReg(Reg, TRI); if (Analysis.ReadsOverlap) // Used, therefore must have been live. return (Analysis.Reads) ? LQR_Live : LQR_OverlappingLive; else if (Analysis.Clobbers || Analysis.Defines) // Defined (but not read) therefore cannot have been live. return LQR_Dead; } } // At this point we have no idea of the liveness of the register. return LQR_Unknown; }
/// After FromBB is tail duplicated into its predecessor blocks, the successors /// have gained new predecessors. Update the PHI instructions in them /// accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallSetVector<MachineBasicBlock*,8> &Succs) { for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), SE = Succs.end(); SI != SE; ++SI) { MachineBasicBlock *SuccBB = *SI; for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); II != EE; ++II) { if (!II->isPHI()) break; MachineInstrBuilder MIB(*FromBB->getParent(), II); unsigned Idx = 0; for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { Idx = i; break; } } assert(Idx != 0); MachineOperand &MO0 = II->getOperand(Idx); unsigned Reg = MO0.getReg(); if (isDead) { // Folded into the previous BB. // There could be duplicate phi source entries. FIXME: Should sdisel // or earlier pass fixed this? for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { II->RemoveOperand(i+1); II->RemoveOperand(i); } } } else Idx = 0; // If Idx is set, the operands at Idx and Idx+1 must be removed. // We reuse the location to avoid expensive RemoveOperand calls. DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); if (LI != SSAUpdateVals.end()) { // This register is defined in the tail block. for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; // If we didn't duplicate a bb into a particular predecessor, we // might still have added an entry to SSAUpdateVals to correcly // recompute SSA. If that case, avoid adding a dummy extra argument // this PHI. if (!SrcBB->isSuccessor(SuccBB)) continue; unsigned SrcReg = LI->second[j].second; if (Idx != 0) { II->getOperand(Idx).setReg(SrcReg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { MIB.addReg(SrcReg).addMBB(SrcBB); } } } else { // Live in tail block, must also be live in predecessors. for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = TDBBs[j]; if (Idx != 0) { II->getOperand(Idx).setReg(Reg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { MIB.addReg(Reg).addMBB(SrcBB); } } } if (Idx != 0) { II->RemoveOperand(Idx+1); II->RemoveOperand(Idx); } } } }
/// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Check if it's safe to move the instruction. if (!MI->isSafeToMove(TII, AA, SawStore)) return false; // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to // also sink them down before their first use in the block. This xform has to // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. MachineBasicBlock *ParentBlock = MI->getParent(); // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = 0; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. if (!RegInfo->def_empty(Reg)) return false; if (AllocatableSet.test(Reg)) return false; // Check for a def among the register's aliases too. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; if (!RegInfo->def_empty(AliasReg)) return false; if (AllocatableSet.test(AliasReg)) return false; } } else if (!MO.isDead()) { // A def that isn't dead. We can't move it. return false; } } else { // Virtual register uses are always safe to sink. if (MO.isUse()) continue; // If it's not safe to move defs of the register class, then abort. if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg))) return false; // FIXME: This picks a successor to sink into based on having one // successor that dominates all the uses. However, there are cases where // sinking can happen but where the sink point isn't a successor. For // example: // x = computation // if () {} else {} // use x // the instruction could be sunk over the whole diamond for the // if/then/else (or loop, etc), allowing it to be sunk into other blocks // after that. // Virtual register defs can only be sunk if all their uses are in blocks // dominated by one of the successors. if (SuccToSinkTo) { // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo)) return false; continue; } // Otherwise, we should look at all the successors and decide which one // we should sink to. for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), E = ParentBlock->succ_end(); SI != E; ++SI) { if (AllUsesDominatedByBlock(Reg, *SI)) { SuccToSinkTo = *SI; break; } } // If we couldn't find a block to sink to, ignore this instruction. if (SuccToSinkTo == 0) return false; } } // If there are no outputs, it must have side-effects. if (SuccToSinkTo == 0) return false; // It's not safe to sink instructions to EH landing pad. Control flow into // landing pad is implicitly defined. if (SuccToSinkTo->isLandingPad()) return false; // It is not possible to sink an instruction into its own block. This can // happen with loops. if (MI->getParent() == SuccToSinkTo) return false; DEBUG(dbgs() << "Sink instr " << *MI); DEBUG(dbgs() << "to block " << *SuccToSinkTo); // If the block has multiple predecessors, this would introduce computation on // a path that it doesn't already exist. We could split the critical edge, // but for now we just punt. // FIXME: Split critical edges if not backedges. if (SuccToSinkTo->pred_size() > 1) { DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); return false; } // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); return true; }
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MRI = &mf.getRegInfo(); TRI = MF->getTarget().getRegisterInfo(); ReservedRegisters = TRI->getReservedRegs(mf); unsigned NumRegs = TRI->getNumRegs(); PhysRegDef = new MachineInstr*[NumRegs]; PhysRegUse = new MachineInstr*[NumRegs]; PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()]; std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); PHIJoins.clear(); analyzePHINodes(mf); // Calculate live variable information in depth first order on the CFG of the // function. This guarantees that we will see the definition of a virtual // register before its uses due to dominance properties of SSA (except for PHI // nodes, which are treated as a special case). MachineBasicBlock *Entry = MF->begin(); SmallPtrSet<MachineBasicBlock*,16> Visited; for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); DFI != E; ++DFI) { MachineBasicBlock *MBB = *DFI; // Mark live-in registers as live-in. SmallVector<unsigned, 4> Defs; for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), EE = MBB->livein_end(); II != EE; ++II) { assert(TargetRegisterInfo::isPhysicalRegister(*II) && "Cannot have a live-in virtual register!"); HandlePhysRegDef(*II, 0, Defs); } // Loop over all of the instructions, processing them. DistanceMap.clear(); unsigned Dist = 0; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { MachineInstr *MI = I; if (MI->isDebugValue()) continue; DistanceMap.insert(std::make_pair(MI, Dist++)); // Process all of the operands of the instruction... unsigned NumOperandsToProcess = MI->getNumOperands(); // Unless it is a PHI node. In this case, ONLY process the DEF, not any // of the uses. They will be handled in other basic blocks. if (MI->isPHI()) NumOperandsToProcess = 1; // Clear kill and dead markers. LV will recompute them. SmallVector<unsigned, 4> UseRegs; SmallVector<unsigned, 4> DefRegs; for (unsigned i = 0; i != NumOperandsToProcess; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.getReg() == 0) continue; unsigned MOReg = MO.getReg(); if (MO.isUse()) { MO.setIsKill(false); UseRegs.push_back(MOReg); } else /*MO.isDef()*/ { MO.setIsDead(false); DefRegs.push_back(MOReg); } } // Process all uses. for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { unsigned MOReg = UseRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegUse(MOReg, MBB, MI); else if (!ReservedRegisters[MOReg]) HandlePhysRegUse(MOReg, MI); } // Process all defs. for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { unsigned MOReg = DefRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); else if (!ReservedRegisters[MOReg]) HandlePhysRegDef(MOReg, MI, Defs); } UpdatePhysRegDefs(MI, Defs); } // Handle any virtual assignments from PHI nodes which might be at the // bottom of this basic block. We check all of our successor blocks to see // if they have PHI nodes, and if so, we simulate an assignment at the end // of the current block. if (!PHIVarInfo[MBB->getNumber()].empty()) { SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()]; for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(), E = VarInfoVec.end(); I != E; ++I) // Mark it alive only in the block we are representing. MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), MBB); } // Finally, if the last instruction in the block is a return, make sure to // mark it as using all of the live-out values in the function. // Things marked both call and return are tail calls; do not do this for // them. The tail callee need not take the same registers as input // that it produces as output, and there are dependencies for its input // registers elsewhere. if (!MBB->empty() && MBB->back().getDesc().isReturn() && !MBB->back().getDesc().isCall()) { MachineInstr *Ret = &MBB->back(); for (MachineRegisterInfo::liveout_iterator I = MF->getRegInfo().liveout_begin(), E = MF->getRegInfo().liveout_end(); I != E; ++I) { assert(TargetRegisterInfo::isPhysicalRegister(*I) && "Cannot have a live-out virtual register!"); HandlePhysRegUse(*I, Ret); // Add live-out registers as implicit uses. if (!Ret->readsRegister(*I)) Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); } } // Loop over PhysRegDef / PhysRegUse, killing any registers that are // available at the end of the basic block. for (unsigned i = 0; i != NumRegs; ++i) if (PhysRegDef[i] || PhysRegUse[i]) HandlePhysRegDef(i, 0, Defs); std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); } // Convert and transfer the dead / killed information we have gathered into // VirtRegInfo onto MI's. for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) { const unsigned Reg = TargetRegisterInfo::index2VirtReg(i); for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j) if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg)) VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI); else VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI); } // Check to make sure there are no unreachable blocks in the MC CFG for the // function. If so, it is due to a bug in the instruction selector or some // other part of the code generator if this happens. #ifndef NDEBUG for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) assert(Visited.count(&*i) != 0 && "unreachable basic block found"); #endif delete[] PhysRegDef; delete[] PhysRegUse; delete[] PHIVarInfo; return false; }
bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const Counters &Required) { // End of program? No need to wait on anything // A function not returning void needs to wait, because other bytecode will // be appended after it and we don't know what it will be. if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid) return false; // Figure out if the async instructions execute in order bool Ordered[3]; // VM_CNT is always ordered except when there are flat instructions, which // can return out of order. Ordered[0] = !IsFlatOutstanding; // EXP_CNT is unordered if we have both EXP & VM-writes Ordered[1] = ExpInstrTypesSeen == 3; // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS Ordered[2] = false; // The values we are going to put into the S_WAITCNT instruction Counters Counts = HardwareLimits; // Do we really need to wait? bool NeedWait = false; for (unsigned i = 0; i < 3; ++i) { if (Required.Array[i] <= WaitedOn.Array[i]) continue; NeedWait = true; if (Ordered[i]) { unsigned Value = LastIssued.Array[i] - Required.Array[i]; // Adjust the value to the real hardware possibilities. Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]); } else Counts.Array[i] = 0; // Remember on what we have waited on. WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i]; } if (!NeedWait) return false; // Reset EXP_CNT instruction types if (Counts.Named.EXP == 0) ExpInstrTypesSeen = 0; // Build the wait instruction BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) .addImm(encodeWaitcnt(IV, Counts.Named.VM, Counts.Named.EXP, Counts.Named.LGKM)); LastOpcodeType = OTHER; LastInstWritesM0 = false; IsFlatOutstanding = false; return true; }
bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) { // Check if the current basic block has a single predecessor. if (MBB->pred_size() != 1) return false; MachineBasicBlock *PredMBB = *MBB->pred_begin(); MachineBasicBlock::iterator CompBr = PredMBB->getLastNonDebugInstr(); if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2) return false; ++CompBr; do { --CompBr; if (guaranteesZeroRegInBlock(*CompBr, MBB)) break; } while (CompBr != PredMBB->begin() && CompBr->isTerminator()); // We've not found a CBZ/CBNZ, time to bail out. if (!guaranteesZeroRegInBlock(*CompBr, MBB)) return false; unsigned TargetReg = CompBr->getOperand(0).getReg(); if (!TargetReg) return false; assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) && "Expect physical register"); // Remember all registers aliasing with TargetReg. SmallSetVector<unsigned, 8> TargetRegs; for (MCRegAliasIterator AI(TargetReg, TRI, true); AI.isValid(); ++AI) TargetRegs.insert(*AI); bool Changed = false; MachineBasicBlock::iterator LastChange = MBB->begin(); unsigned SmallestDef = TargetReg; // Remove redundant Copy instructions unless TargetReg is modified. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { MachineInstr *MI = &*I; ++I; if (MI->isCopy() && MI->getOperand(0).isReg() && MI->getOperand(1).isReg()) { unsigned DefReg = MI->getOperand(0).getReg(); unsigned SrcReg = MI->getOperand(1).getReg(); if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) && !MRI->isReserved(DefReg) && (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) { DEBUG(dbgs() << "Remove redundant Copy : "); DEBUG((MI)->print(dbgs())); MI->eraseFromParent(); Changed = true; LastChange = I; NumCopiesRemoved++; SmallestDef = TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef; continue; } } if (MI->modifiesRegister(TargetReg, TRI)) break; } if (!Changed) return false; // Otherwise, we have to fixup the use-def chain, starting with the // CBZ/CBNZ. Conservatively mark as much as we can live. CompBr->clearRegisterKills(SmallestDef, TRI); if (std::none_of(TargetRegs.begin(), TargetRegs.end(), [&](unsigned Reg) { return MBB->isLiveIn(Reg); })) MBB->addLiveIn(TargetReg); // Clear any kills of TargetReg between CompBr and the last removed COPY. for (MachineInstr &MMI : make_range(MBB->begin()->getIterator(), LastChange->getIterator())) MMI.clearRegisterKills(SmallestDef, TRI); return true; }
void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry) { auto BII = Blocks.find(&MBB); if (BII == Blocks.end()) return; const BlockInfo &BI = BII->second; if (!(BI.InNeeds & StateWQM)) return; // This is a non-entry block that is WQM throughout, so no need to do // anything. if (!isEntry && !(BI.Needs & StateExact) && BI.OutNeeds != StateExact) return; DEBUG(dbgs() << "\nProcessing block BB#" << MBB.getNumber() << ":\n"); unsigned SavedWQMReg = 0; bool WQMFromExec = isEntry; char State = isEntry ? StateExact : StateWQM; auto II = MBB.getFirstNonPHI(), IE = MBB.end(); if (isEntry) ++II; // Skip the instruction that saves LiveMask MachineBasicBlock::iterator First = IE; for (;;) { MachineBasicBlock::iterator Next = II; char Needs = 0; char OutNeeds = 0; if (First == IE) First = II; if (II != IE) { MachineInstr &MI = *II; if (requiresCorrectState(MI)) { auto III = Instructions.find(&MI); if (III != Instructions.end()) { Needs = III->second.Needs; OutNeeds = III->second.OutNeeds; } } if (MI.isTerminator() && !Needs && OutNeeds == StateExact) Needs = StateExact; if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact) MI.getOperand(3).setImm(1); ++Next; } else { // End of basic block if (BI.OutNeeds & StateWQM) Needs = StateWQM; else if (BI.OutNeeds == StateExact) Needs = StateExact; } if (Needs) { if (Needs != State) { MachineBasicBlock::iterator Before = prepareInsertion(MBB, First, II, Needs == StateWQM, Needs == StateExact || WQMFromExec); if (Needs == StateExact) { if (!WQMFromExec && (OutNeeds & StateWQM)) SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); toExact(MBB, Before, SavedWQMReg, LiveMaskReg); } else { assert(WQMFromExec == (SavedWQMReg == 0)); toWQM(MBB, Before, SavedWQMReg); if (SavedWQMReg) { LIS->createAndComputeVirtRegInterval(SavedWQMReg); SavedWQMReg = 0; } } State = Needs; } First = IE; } if (II == IE) break; II = Next; } }
/// shouldTailDuplicate - Determine if it is profitable to duplicate this block. bool TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, bool IsSimple, MachineBasicBlock &TailBB) { // Only duplicate blocks that end with unconditional branches. if (TailBB.canFallThrough()) return false; // Don't try to tail-duplicate single-block loops. if (TailBB.isSuccessor(&TailBB)) return false; // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && MF.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there // are common paths through the code. The limit needs to be high enough // to allow undoing the effects of tail merging and other optimizations // that rearrange the predecessors of the indirect branch. bool HasIndirectbr = false; if (!TailBB.empty()) HasIndirectbr = TailBB.back().isIndirectBranch(); if (HasIndirectbr && PreRegAlloc) MaxDuplicateCount = 20; // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. if (I->isNotDuplicable()) return false; // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. if (PreRegAlloc && I->isReturn()) return false; // Avoid duplicating calls before register allocation. Calls presents a // barrier to register allocation so duplicating them may end up increasing // spills. if (PreRegAlloc && I->isCall()) return false; if (!I->isPHI() && !I->isDebugValue()) InstrCount += 1; if (InstrCount > MaxDuplicateCount) return false; } if (HasIndirectbr && PreRegAlloc) return true; if (IsSimple) return true; if (!PreRegAlloc) return true; return canCompletelyDuplicateBB(TailBB); }
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { bool Modified = false; // Two tranformations to do here: // 1) Find loads and stores that can be merged into a single load or store // pair instruction. // e.g., // ldr x0, [x2] // ldr x1, [x2, #8] // ; becomes // ldp x0, x1, [x2] // 2) Find base register updates that can be merged into the load or store // as a base-reg writeback. // e.g., // ldr x0, [x2] // add x2, x2, #4 // ; becomes // ldr x0, [x2], #4 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { MachineInstr *MI = MBBI; switch (MI->getOpcode()) { default: // Just move on to the next instruction. ++MBBI; break; case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: case AArch64::STRXui: case AArch64::STRWui: case AArch64::LDRSui: case AArch64::LDRDui: case AArch64::LDRQui: case AArch64::LDRXui: case AArch64::LDRWui: case AArch64::LDRSWui: // do the unscaled versions as well case AArch64::STURSi: case AArch64::STURDi: case AArch64::STURQi: case AArch64::STURWi: case AArch64::STURXi: case AArch64::LDURSi: case AArch64::LDURDi: case AArch64::LDURQi: case AArch64::LDURWi: case AArch64::LDURXi: case AArch64::LDURSWi: { // If this is a volatile load/store, don't mess with it. if (MI->hasOrderedMemoryRef()) { ++MBBI; break; } // Make sure this is a reg+imm (as opposed to an address reloc). if (!MI->getOperand(2).isImm()) { ++MBBI; break; } // Check if this load/store has a hint to avoid pair formation. // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. if (TII->isLdStPairSuppressed(MI)) { ++MBBI; break; } // Look ahead up to ScanLimit instructions for a pairable instruction. bool MergeForward = false; int SExtIdx = -1; MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit); if (Paired != E) { // Merge the loads into a pair. Keeping the iterator straight is a // pain, so we let the merge routine tell us what the next instruction // is after it's done mucking about. MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx); Modified = true; ++NumPairCreated; if (isUnscaledLdst(MI->getOpcode())) ++NumUnscaledPairCreated; break; } ++MBBI; break; } // FIXME: Do the other instructions. } } for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { MachineInstr *MI = MBBI; // Do update merging. It's simpler to keep this separate from the above // switch, though not strictly necessary. int Opc = MI->getOpcode(); switch (Opc) { default: // Just move on to the next instruction. ++MBBI; break; case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: case AArch64::STRXui: case AArch64::STRWui: case AArch64::LDRSui: case AArch64::LDRDui: case AArch64::LDRQui: case AArch64::LDRXui: case AArch64::LDRWui: // do the unscaled versions as well case AArch64::STURSi: case AArch64::STURDi: case AArch64::STURQi: case AArch64::STURWi: case AArch64::STURXi: case AArch64::LDURSi: case AArch64::LDURDi: case AArch64::LDURQi: case AArch64::LDURWi: case AArch64::LDURXi: { // Make sure this is a reg+imm (as opposed to an address reloc). if (!MI->getOperand(2).isImm()) { ++MBBI; break; } // Look ahead up to ScanLimit instructions for a mergable instruction. MachineBasicBlock::iterator Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); if (Update != E) { // Merge the update into the ld/st. MBBI = mergePostIdxUpdateInsn(MBBI, Update); Modified = true; ++NumPostFolded; break; } // Don't know how to handle pre/post-index versions, so move to the next // instruction. if (isUnscaledLdst(Opc)) { ++MBBI; break; } // Look back to try to find a pre-index instruction. For example, // add x0, x0, #8 // ldr x1, [x0] // merged into: // ldr x1, [x0, #8]! Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); if (Update != E) { // Merge the update into the ld/st. MBBI = mergePreIdxUpdateInsn(MBBI, Update); Modified = true; ++NumPreFolded; break; } // Look forward to try to find a post-index instruction. For example, // ldr x1, [x0, #64] // add x0, x0, #64 // merged into: // ldr x1, [x0, #64]! // The immediate in the load/store is scaled by the size of the register // being loaded. The immediate in the add we're looking for, // however, is not, so adjust here. int Value = MI->getOperand(2).getImm() * TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent())) ->getSize(); Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value); if (Update != E) { // Merge the update into the ld/st. MBBI = mergePreIdxUpdateInsn(MBBI, Update); Modified = true; ++NumPreFolded; break; } // Nothing found. Just move to the next instruction. ++MBBI; break; } // FIXME: Do the other instructions. } } return Modified; }
bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (!LastInst->getDesc().isBranch()) return true; // Unconditional branch if (LastOpc == Mips::J) { TBB = LastInst->getOperand(0).getMBB(); return false; } Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode()); if (BranchCode == Mips::COND_INVALID) return true; // Can't handle indirect branch. // Conditional branch // Block ends with fall-through condbranch. if (LastOpc != Mips::COND_INVALID) { int LastNumOp = LastInst->getNumOperands(); TBB = LastInst->getOperand(LastNumOp-1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); for (int i=0; i<LastNumOp-1; i++) { Cond.push_back(LastInst->getOperand(i)); } return false; } } // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it. unsigned SecondLastOpc = SecondLastInst->getOpcode(); Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc); if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) { int SecondNumOp = SecondLastInst->getNumOperands(); TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); for (int i=0; i<SecondNumOp-1; i++) { Cond.push_back(SecondLastInst->getOperand(i)); } FBB = LastInst->getOperand(0).getMBB(); return false; } // If the block ends with two unconditional branches, handle it. The last // one is not executed, so remove it. if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
bool MipsInstrInfo:: copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC) const { DebugLoc DL = DebugLoc::getUnknownLoc(); const MachineFunction *MF = MBB.getParent(); const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); if (I != MBB.end()) DL = I->getDebugLoc(); if (DestRC != SrcRC) { // Copy to/from FCR31 condition register if ((DestRC == Mips::CPURegsRegisterClass) && (SrcRC == Mips::CCRRegisterClass)) BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg).addReg(SrcReg); else if ((DestRC == Mips::CCRRegisterClass) && (SrcRC == Mips::CPURegsRegisterClass)) BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg).addReg(SrcReg); // Moves between coprocessors and cpu else if ((DestRC == Mips::CPURegsRegisterClass) && (SrcRC == Mips::FGR32RegisterClass)) BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg).addReg(SrcReg); else if ((DestRC == Mips::FGR32RegisterClass) && (SrcRC == Mips::CPURegsRegisterClass)) BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg); else if ((DestRC == Mips::AFGR64RegisterClass) && (SrcRC == Mips::CPURegsRegisterClass) && (SrcReg == Mips::ZERO)) { const unsigned *AliasSet = TRI->getAliasSet(DestReg); BuildMI(MBB, I, DL, get(Mips::MTC1), AliasSet[0]).addReg(SrcReg); BuildMI(MBB, I, DL, get(Mips::MTC1), AliasSet[1]).addReg(SrcReg); } // Move from/to Hi/Lo registers else if ((DestRC == Mips::HILORegisterClass) && (SrcRC == Mips::CPURegsRegisterClass)) { unsigned Opc = (DestReg == Mips::HI) ? Mips::MTHI : Mips::MTLO; BuildMI(MBB, I, DL, get(Opc), DestReg); } else if ((SrcRC == Mips::HILORegisterClass) && (DestRC == Mips::CPURegsRegisterClass)) { unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO; BuildMI(MBB, I, DL, get(Opc), DestReg); } else // Can't copy this register return false; return true; } if (DestRC == Mips::CPURegsRegisterClass) BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO) .addReg(SrcReg); else if (DestRC == Mips::FGR32RegisterClass) BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg).addReg(SrcReg); else if (DestRC == Mips::AFGR64RegisterClass) BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg).addReg(SrcReg); else if (DestRC == Mips::CCRRegisterClass) BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg).addReg(SrcReg); else // Can't copy this register return false; return true; }
void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions auto &MFI = MF.getFrameInfo(); assert(MFI.getCalleeSavedInfo().empty() && "WebAssembly should not have callee-saved registers"); if (!needsSP(MF, MFI)) return; uint64_t StackSize = MFI.getStackSize(); const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); auto &MRI = MF.getRegInfo(); auto InsertPt = MBB.begin(); while (InsertPt != MBB.end() && WebAssembly::isArgument(*InsertPt)) ++InsertPt; DebugLoc DL; const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); unsigned Zero = MRI.createVirtualRegister(PtrRC); unsigned SPReg = WebAssembly::SP32; if (StackSize) SPReg = MRI.createVirtualRegister(PtrRC); const char *ES = "__stack_pointer"; auto *SPSymbol = MF.createExternalSymbolName(ES); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero) .addImm(0); MachineMemOperand *LoadMMO = MF.getMachineMemOperand( MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)), MachineMemOperand::MOLoad, 4, 4); // Load the SP value. BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg) .addImm(2) // p2align .addExternalSymbol(SPSymbol) .addReg(Zero) // addr .addMemOperand(LoadMMO); bool HasBP = hasBP(MF); if (HasBP) { auto FI = MF.getInfo<WebAssemblyFunctionInfo>(); unsigned BasePtr = MRI.createVirtualRegister(PtrRC); FI->setBasePointerVreg(BasePtr); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), BasePtr) .addReg(SPReg); } if (StackSize) { // Subtract the frame size unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) .addImm(StackSize); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), WebAssembly::SP32) .addReg(SPReg) .addReg(OffsetReg); } if (HasBP) { unsigned BitmaskReg = MRI.createVirtualRegister(PtrRC); unsigned Alignment = MFI.getMaxAlignment(); assert((1u << countTrailingZeros(Alignment)) == Alignment && "Alignment must be a power of 2"); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg) .addImm((int)~(Alignment - 1)); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32), WebAssembly::SP32) .addReg(WebAssembly::SP32) .addReg(BitmaskReg); } if (hasFP(MF)) { // Unlike most conventional targets (where FP points to the saved FP), // FP points to the bottom of the fixed-size locals, so we can use positive // offsets in load/store instructions. BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32) .addReg(WebAssembly::SP32); } if (StackSize && needsSPWriteback(MF, MFI)) { writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL); } }
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Modified = false; SmallSet<unsigned, 4> Defs; SmallSet<unsigned, 4> Uses; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; } Defs.clear(); Uses.clear(); TrackDefUses(MI, Defs, Uses, TRI); // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); // Add implicit use of ITSTATE to IT block instructions. MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); MachineInstr *LastITMI = MI; MachineBasicBlock::iterator InsertPos = MIB.getInstr(); ++MBBI; // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it // is set: skip the loop if (!restrictIT) { // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; MachineInstr *NMI = &*MBBI; MI = NMI; unsigned NPredReg = 0; ARMCC::CondCodes NCC = getITInstrPredicate(*NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); LastITMI = NMI; } else { if (NCC == ARMCC::AL && MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { --MBBI; MBB.remove(NMI); MBB.insert(InsertPos, NMI); ClearKillFlags(MI, Uses); ++NumMovedInsts; continue; } break; } TrackDefUses(NMI, Defs, Uses, TRI); --Pos; } } // Finalize IT mask. Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); // Last instruction in IT block kills ITSTATE. LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); // Finalize the bundle. finalizeBundle(MBB, InsertPos.getInstrIterator(), ++LastITMI->getIterator()); Modified = true; ++NumITs; } return Modified; }
void X86FrameInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); const X86InstrInfo &TII = *TM.getInstrInfo(); MachineBasicBlock::iterator MBBI = prior(MBB.end()); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); bool Is64Bit = STI.is64Bit(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); switch (RetOpcode) { default: llvm_unreachable("Can only insert epilog into returning blocks"); case X86::RET: case X86::RETI: case X86::TCRETURNdi: case X86::TCRETURNri: case X86::TCRETURNmi: case X86::TCRETURNdi64: case X86::TCRETURNri64: case X86::TCRETURNmi64: case X86::EH_RETURN: case X86::EH_RETURN64: break; // These are ok } // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); uint64_t MaxAlign = MFI->getMaxAlignment(); unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; // If we're forcing a stack realignment we can't rely on just the frame // info, we need to know the ABI stack alignment as well in case we // have a call out. Otherwise just make sure we have some alignment - we'll // go with the minimum. if (ForceStackAlign) { if (MFI->hasCalls()) MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; else MaxAlign = MaxAlign ? MaxAlign : 4; } if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; if (RegInfo->needsStackRealignment(MF)) FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; NumBytes = FrameSize - CSSize; // Pop EBP. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); } else { NumBytes = StackSize - CSSize; } // Skip the callee-saved pop instructions. MachineBasicBlock::iterator LastCSPop = MBBI; while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); if (Opc != X86::POP32r && Opc != X86::POP64r && !PI->getDesc().isTerminator()) break; --MBBI; } DL = MBBI->getDebugLoc(); // If there is an ADD32ri or SUB32ri of ESP immediately before this // instruction, merge the two instructions. if (NumBytes || MFI->hasVarSizedObjects()) mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); // If dynamic alloca is used, then reset esp to point to the last callee-saved // slot before popping them off! Same applies for the case, when stack was // realigned. if (RegInfo->needsStackRealignment(MF)) { // We cannot use LEA here, because stack pointer was realigned. We need to // deallocate local frame back. if (CSSize) { emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); MBBI = prior(LastCSPop); } BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr).addReg(FramePtr); } else if (MFI->hasVarSizedObjects()) { if (CSSize) { unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), FramePtr, false, -CSSize); MBB.insert(MBBI, MI); } else { BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) .addReg(FramePtr); } } else if (NumBytes) { // Adjust stack pointer back: ESP += numbytes. emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); } // We're returning from function via eh_return. if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { MBBI = prior(MBB.end()); MachineOperand &DestAddr = MBBI->getOperand(0); assert(DestAddr.isReg() && "Offset should be in register!"); BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr).addReg(DestAddr.getReg()); } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || RetOpcode == X86::TCRETURNmi64) { bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; // Tail call return: adjust the stack pointer and jump to callee. MBBI = prior(MBB.end()); MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); assert(StackAdjust.isImm() && "Expecting immediate value."); // Adjust stack pointer. int StackAdj = StackAdjust.getImm(); int MaxTCDelta = X86FI->getTCReturnAddrDelta(); int Offset = 0; assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); // Incoporate the retaddr area. Offset = StackAdj-MaxTCDelta; assert(Offset >= 0 && "Offset should never be negative"); if (Offset) { // Check for possible merge with preceeding ADD instruction. Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII); } // Jump to label or value in register. if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) ? X86::TAILJMPd : X86::TAILJMPd64)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); else { assert(JumpTarget.isSymbol()); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) ? X86::TAILJMPm : X86::TAILJMPm64)); for (unsigned i = 0; i != 5; ++i) MIB.addOperand(MBBI->getOperand(i)); } else if (RetOpcode == X86::TCRETURNri64) { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). addReg(JumpTarget.getReg(), RegState::Kill); } else { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && (X86FI->getTCReturnAddrDelta() < 0)) { // Add the return addr area delta back since we are not tail calling. int delta = -1*X86FI->getTCReturnAddrDelta(); MBBI = prior(MBB.end()); // Check for possible merge with preceeding ADD instruction. delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII); } }
bool R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // Most of the following comes from the ARM implementation of AnalyzeBranch // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) { return false; } // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) { if (LastOpc == AMDGPU::JUMP) { if(!isPredicated(LastInst)) { TBB = LastInst->getOperand(0).getMBB(); return false; } else { MachineInstr *predSet = I; while (!isPredicateSetter(predSet->getOpcode())) { predSet = --I; } TBB = LastInst->getOperand(0).getMBB(); Cond.push_back(predSet->getOperand(1)); Cond.push_back(predSet->getOperand(2)); Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); return false; } } return true; // Can't handle indirect branch. } // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; unsigned SecondLastOpc = SecondLastInst->getOpcode(); // If the block ends with a B and a Bcc, handle it. if (SecondLastOpc == AMDGPU::JUMP && isPredicated(SecondLastInst) && LastOpc == AMDGPU::JUMP && !isPredicated(LastInst)) { MachineInstr *predSet = --I; while (!isPredicateSetter(predSet->getOpcode())) { predSet = --I; } TBB = SecondLastInst->getOperand(0).getMBB(); FBB = LastInst->getOperand(0).getMBB(); Cond.push_back(predSet->getOperand(1)); Cond.push_back(predSet->getOperand(2)); Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); return false; } // Otherwise, can't handle this. return true; }
/// Colorslots - Color all spill stack slots and rewrite all frameindex machine /// operands in the function. bool StackSlotColoring::ColorSlots(MachineFunction &MF) { unsigned NumObjs = MFI->getObjectIndexEnd(); SmallVector<int, 16> SlotMapping(NumObjs, -1); SmallVector<float, 16> SlotWeights(NumObjs, 0.0); SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs); BitVector UsedColors(NumObjs); DEBUG(dbgs() << "Color spill slot intervals:\n"); bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = TargetRegisterInfo::stackSlot2Index(li->reg); int NewSS = ColorSlot(li); assert(NewSS >= 0 && "Stack coloring failed?"); SlotMapping[SS] = NewSS; RevMap[NewSS].push_back(SS); SlotWeights[NewSS] += li->weight; UsedColors.set(NewSS); Changed |= (SS != NewSS); } DEBUG(dbgs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = TargetRegisterInfo::stackSlot2Index(li->reg); li->weight = SlotWeights[SS]; } // Sort them by new weight. std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); #ifndef NDEBUG for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) DEBUG(SSIntervals[i]->dump()); DEBUG(dbgs() << '\n'); #endif if (!Changed) return false; // Rewrite all MachineMemOperands. for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { int NewFI = SlotMapping[SS]; if (NewFI == -1 || (NewFI == (int)SS)) continue; const PseudoSourceValue *NewSV = MF.getPSVManager().getFixedStack(NewFI); SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS]; for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i) RefMMOs[i]->setValue(NewSV); } // Rewrite all MO_FrameIndex operands. Look for dead stores. for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = &*MBBI; for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); MII != EE; ++MII) RewriteInstruction(MII, SlotMapping, MF); RemoveDeadStores(MBB); } // Delete unused stack slots. while (NextColor != -1) { DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); MFI->RemoveStackObject(NextColor); NextColor = AllColors.find_next(NextColor); } return true; }
/// computeIntervals - computes the live intervals for virtual /// registers. for some ordering of the machine instructions [1,N] a /// live interval is an interval [i, j) where 1 <= i <= j < N for /// which a variable is live void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" << "********** Function: " << MF->getName() << '\n'); RegMaskBlocks.resize(MF->getNumBlockIDs()); SmallVector<unsigned, 8> UndefUses; for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size(); if (MBB->empty()) continue; // Track the index of the current machine instr. SlotIndex MIIndex = getMBBStartIdx(MBB); DEBUG(dbgs() << "BB#" << MBB->getNumber() << ":\t\t# derived from " << MBB->getName() << "\n"); // Skip over empty initial indices. if (getInstructionFromIndex(MIIndex) == 0) MIIndex = Indexes->getNextNonNullIndex(MIIndex); for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); MI != miEnd; ++MI) { DEBUG(dbgs() << MIIndex << "\t" << *MI); if (MI->isDebugValue()) continue; assert(Indexes->getInstructionFromIndex(MIIndex) == MI && "Lost SlotIndex synchronization"); // Handle defs. for (int i = MI->getNumOperands() - 1; i >= 0; --i) { MachineOperand &MO = MI->getOperand(i); // Collect register masks. if (MO.isRegMask()) { RegMaskSlots.push_back(MIIndex.getRegSlot()); RegMaskBits.push_back(MO.getRegMask()); continue; } if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; // handle register defs - build intervals if (MO.isDef()) handleRegisterDef(MBB, MI, MIIndex, MO, i); else if (MO.isUndef()) UndefUses.push_back(MO.getReg()); } // Move to the next instr slot. MIIndex = Indexes->getNextNonNullIndex(MIIndex); } // Compute the number of register mask instructions in this block. std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()]; RMB.second = RegMaskSlots.size() - RMB.first; } // Create empty intervals for registers defined by implicit_def's (except // for those implicit_def that define values which are liveout of their // blocks. for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) { unsigned UndefReg = UndefUses[i]; (void)getOrCreateInterval(UndefReg); } }
void MipsSEFrameLowering::emitInterruptPrologueStub( MachineFunction &MF, MachineBasicBlock &MBB) const { MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Report an error the target doesn't support Mips32r2 or later. // The epilogue relies on the use of the "ehb" to clear execution // hazards. Pre R2 Mips relies on an implementation defined number // of "ssnop"s to clear the execution hazard. Support for ssnop hazard // clearing is not provided so reject that configuration. if (!STI.hasMips32r2()) report_fatal_error( "\"interrupt\" attribute is not supported on pre-MIPS32R2 or " "MIPS16 targets."); // The GP register contains the "user" value, so we cannot perform // any gp relative loads until we restore the "kernel" or "system" gp // value. Until support is written we shall only accept the static // relocation model. if ((STI.getRelocationModel() != Reloc::Static)) report_fatal_error("\"interrupt\" attribute is only supported for the " "static relocation model on MIPS at the present time."); if (!STI.isABI_O32() || STI.hasMips64()) report_fatal_error("\"interrupt\" attribute is only supported for the " "O32 ABI on MIPS32R2+ at the present time."); // Perform ISR handling like GCC StringRef IntKind = MF.getFunction()->getFnAttribute("interrupt").getValueAsString(); const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass; // EIC interrupt handling needs to read the Cause register to disable // interrupts. if (IntKind == "eic") { // Coprocessor registers are always live per se. MBB.addLiveIn(Mips::COP013); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K0) .addReg(Mips::COP013) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EXT), Mips::K0) .addReg(Mips::K0) .addImm(10) .addImm(6) .setMIFlag(MachineInstr::FrameSetup); } // Fetch and spill EPC MBB.addLiveIn(Mips::COP014); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1) .addReg(Mips::COP014) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false, MipsFI->getISRRegFI(0), PtrRC, STI.getRegisterInfo(), 0); // Fetch and Spill Status MBB.addLiveIn(Mips::COP012); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1) .addReg(Mips::COP012) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false, MipsFI->getISRRegFI(1), PtrRC, STI.getRegisterInfo(), 0); // Build the configuration for disabling lower priority interrupts. Non EIC // interrupts need to be masked off with zero, EIC from the Cause register. unsigned InsPosition = 8; unsigned InsSize = 0; unsigned SrcReg = Mips::ZERO; // If the interrupt we're tied to is the EIC, switch the source for the // masking off interrupts to the cause register. if (IntKind == "eic") { SrcReg = Mips::K0; InsPosition = 10; InsSize = 6; } else InsSize = StringSwitch<unsigned>(IntKind) .Case("sw0", 1) .Case("sw1", 2) .Case("hw0", 3) .Case("hw1", 4) .Case("hw2", 5) .Case("hw3", 6) .Case("hw4", 7) .Case("hw5", 8) .Default(0); assert(InsSize != 0 && "Unknown interrupt type!"); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1) .addReg(SrcReg) .addImm(InsPosition) .addImm(InsSize) .addReg(Mips::K1) .setMIFlag(MachineInstr::FrameSetup); // Mask off KSU, ERL, EXL BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1) .addReg(Mips::ZERO) .addImm(1) .addImm(4) .addReg(Mips::K1) .setMIFlag(MachineInstr::FrameSetup); // Disable the FPU as we are not spilling those register sets. if (!STI.useSoftFloat()) BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1) .addReg(Mips::ZERO) .addImm(29) .addImm(1) .addReg(Mips::K1) .setMIFlag(MachineInstr::FrameSetup); // Set the new status BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012) .addReg(Mips::K1) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); }
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(*MBBI, CSRegs)); if (!isCSRestore(*MBBI, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize() + ArgRegsSaveSize); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { assert(!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4)); } else AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr)); } else { if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = std::prev(MBBI); if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes)) emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } if (needPopSpecialFixUp(MF)) { bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); (void)Done; assert(Done && "Emission of the special fixup failed!?"); } }
bool SVMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { /* * Find the last instruction in the block. If there is no terminator, * it implicitly falls through to the next block. */ MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; // Skip debug values while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (!isUnpredicatedTerminator(I)) return false; // Last and next-to-last branch instructions MachineInstr *lastI = I; MachineInstr *prevI = NULL; if (I != MBB.begin()) { --I; if (isUnpredicatedTerminator(I)) prevI = I; } if (prevI == NULL) { // Only a single terminator instruction if (isUncondNearBranchOpcode(lastI->getOpcode())) { // Unconditional branch TBB = lastI->getOperand(0).getMBB(); return false; } if (isCondNearBranchOpcode(lastI->getOpcode())) { // Conditional branch with fall-through TBB = lastI->getOperand(0).getMBB(); Cond.push_back(lastI->getOperand(1)); // CC Cond.push_back(lastI->getOperand(2)); // CPSR return false; } } else { // Multiple terminators if (isUncondNearBranchOpcode(lastI->getOpcode()) && isCondNearBranchOpcode(prevI->getOpcode())) { // Conditional branch followed by unconditional TBB = prevI->getOperand(0).getMBB(); FBB = lastI->getOperand(0).getMBB(); Cond.push_back(prevI->getOperand(1)); // CC Cond.push_back(prevI->getOperand(2)); // CPSR return false; } } // Unhandled return true; }
bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); const TargetInstrInfo &TII = *STI.getInstrInfo(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); // If MBBI is a return instruction, or is a tPOP followed by a return // instruction in the successor BB, we may be able to directly restore // LR in the PC. // This is only possible with v5T ops (v4T can't change the Thumb bit via // a POP PC instruction), and only if we do not need to emit any SP update. // Otherwise, we need a temporary register to pop the value // and copy that value into LR. auto MBBI = MBB.getFirstTerminator(); bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; if (CanRestoreDirectly) { if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET); else { auto MBBI_prev = MBBI; MBBI_prev--; assert(MBBI_prev->getOpcode() == ARM::tPOP); assert(MBB.succ_size() == 1); if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. else CanRestoreDirectly = false; } } if (CanRestoreDirectly) { if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) return true; MachineInstrBuilder MIB = AddDefaultPred( BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))); // Copy implicit ops and popped registers, if any. for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef())) MIB.addOperand(MO); MIB.addReg(ARM::PC, RegState::Define); // Erase the old instruction (tBX_RET or tPOP). MBB.erase(MBBI); return true; } // Look for a temporary register to use. // First, compute the liveness information. LivePhysRegs UsedRegs(STI.getRegisterInfo()); UsedRegs.addLiveOuts(MBB); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function // are not part of the pristines set anymore. // Add those callee-saved now. const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) UsedRegs.addReg(CSRegs[i]); DebugLoc dl = DebugLoc(); if (MBBI != MBB.end()) { dl = MBBI->getDebugLoc(); auto InstUpToMBBI = MBB.end(); while (InstUpToMBBI != MBBI) // The pre-decrement is on purpose here. // We want to have the liveness right before MBBI. UsedRegs.stepBackward(*--InstUpToMBBI); } // Look for a register that can be directly use in the POP. unsigned PopReg = 0; // And some temporary register, just in case. unsigned TemporaryReg = 0; BitVector PopFriendly = TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); // Rebuild the GPRs from the high registers because they are removed // form the GPR reg class for thumb1. BitVector GPRsNoLRSP = TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); GPRsNoLRSP |= PopFriendly; GPRsNoLRSP.reset(ARM::LR); GPRsNoLRSP.reset(ARM::SP); GPRsNoLRSP.reset(ARM::PC); for (int Register = GPRsNoLRSP.find_first(); Register != -1; Register = GPRsNoLRSP.find_next(Register)) { if (!UsedRegs.contains(Register)) { // Remember the first pop-friendly register and exit. if (PopFriendly.test(Register)) { PopReg = Register; TemporaryReg = 0; break; } // Otherwise, remember that the register will be available to // save a pop-friendly register. TemporaryReg = Register; } } if (!DoIt && !PopReg && !TemporaryReg) return false; assert((PopReg || TemporaryReg) && "Cannot get LR"); if (TemporaryReg) { assert(!PopReg && "Unnecessary MOV is about to be inserted"); PopReg = PopFriendly.find_first(); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(TemporaryReg, RegState::Define) .addReg(PopReg, RegState::Kill)); } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { // We couldn't use the direct restoration above, so // perform the opposite conversion: tPOP_RET to tPOP. MachineInstrBuilder MIB = AddDefaultPred( BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))); bool Popped = false; for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && MO.getReg() != ARM::PC) { MIB.addOperand(MO); if (!MO.isImplicit()) Popped = true; } // Is there anything left to pop? if (!Popped) MBB.erase(MIB.getInstr()); // Erase the old instruction. MBB.erase(MBBI); MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))); } assert(PopReg && "Do not know how to get LR"); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(PopReg, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::LR, RegState::Define) .addReg(PopReg, RegState::Kill)); if (TemporaryReg) AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(PopReg, RegState::Define) .addReg(TemporaryReg, RegState::Kill)); return true; }
/// If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, bool IsSimple, MachineFunction &MF, SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallVectorImpl<MachineInstr *> &Copies) { DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); DenseSet<unsigned> UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); if (IsSimple) return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); // EH edges are ignored by AnalyzeBranch. if (PredBB->succ_size() > 1) continue; MachineBasicBlock *PredTBB, *PredFBB; SmallVector<MachineOperand, 4> PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) continue; if (!PredCond.empty()) continue; // Don't duplicate into a fall-through predecessor (at least for now). if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) continue; DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); TDBBs.push_back(PredBB); // Remove PredBB's unconditional branch. TII->RemoveBranch(*PredBB); if (RS && !TailBB->livein_empty()) { // Update PredBB livein. RS->enterBasicBlock(PredBB); if (!PredBB->empty()) RS->forward(std::prev(PredBB->end())); for (const auto &LI : TailBB->liveins()) { if (!RS->isRegUsed(LI.PhysReg, false)) // If a register is previously livein to the tail but it's not live // at the end of predecessor BB, then it should be added to its // livein list. PredBB->addLiveIn(LI); } } // Clone the contents of TailBB into PredBB. DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; // Use instr_iterator here to properly handle bundles, e.g. // ARM Thumb2 IT block. MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); while (I != TailBB->instr_end()) { MachineInstr *MI = &*I; ++I; if (MI->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); } } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } // Simplify TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); NumInstrDups += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. PredBB->removeSuccessor(PredBB->succ_begin()); assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), E = TailBB->succ_end(); I != E; ++I) PredBB->addSuccessor(*I, MBPI->getEdgeWeight(TailBB, I)); Changed = true; ++NumTailDups; } // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector<MachineOperand, 4> PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. if (PrevBB->succ_size() == 1 && !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); if (PreRegAlloc) { DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); if (MI->getParent()) MI->eraseFromParent(); } // Now copy the non-PHI instructions. while (I != TailBB->end()) { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first) .addReg(CopyInfos[i].second)); } } else { // No PHIs to worry about, just splice the instructions over. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } PrevBB->removeSuccessor(PrevBB->succ_begin()); assert(PrevBB->succ_empty()); PrevBB->transferSuccessors(TailBB); TDBBs.push_back(PrevBB); Changed = true; } // If this is after register allocation, there are no phis to fix. if (!PreRegAlloc) return Changed; // If we made no changes so far, we are safe. if (!Changed) return Changed; // Handle the nasty case in that we duplicated a block that is part of a loop // into some but not all of its predecessors. For example: // 1 -> 2 <-> 3 | // \ | // \---> rest | // if we duplicate 2 into 1 but not into 3, we end up with // 12 -> 3 <-> 2 -> rest | // \ / | // \----->-----/ | // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced // with a phi in 3 (which now dominates 2). // What we do here is introduce a copy in 3 of the register defined by the // phi, just like when we are duplicating 2 into 3, but we don't copy any // real instructions or remove the 3 -> 2 edge from the phi in 2. for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) continue; // EH edges if (PredBB->succ_size() != 1) continue; DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } } return Changed; }
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); assert(NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc dl; unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); int CFAOffset = 0; // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; MFI->setStackSize(NumBytes); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (ArgRegsSaveSize) { emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, MachineInstr::FrameSetup); CFAOffset -= ArgRegsSaveSize; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) { emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); CFAOffset -= NumBytes - ArgRegsSaveSize; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } return; } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: if (STI.splitFramePushPop()) { GPRCS2Size += 4; break; } // fallthrough case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; GPRCS1Size += 4; break; default: DPRCSSize += 8; } } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { ++MBBI; } // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); if (HasFP) AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; if (tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; } if (adjustedGPRCS1Size) { CFAOffset -= adjustedGPRCS1Size; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Reg = I->getReg(); int FI = I->getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: case ARM::R12: if (STI.splitFramePushPop()) break; // fallthough case ARM::R0: case ARM::R1: case ARM::R2: case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); break; } } // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4) .setMIFlags(MachineInstr::FrameSetup)); if(FramePtrOffsetInBlock) { CFAOffset += FramePtrOffsetInBlock; unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. AFI->setShouldRestoreSPFromFP(true); } if (NumBytes) { // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); if (!HasFP) { CFAOffset -= NumBytes; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } } if (STI.isTargetELF() && HasFP) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // Thumb1 does not currently support dynamic stack realignment. Report a // fatal error rather then silently generate bad code. if (RegInfo->needsStackRealignment(MF)) report_fatal_error("Dynamic stack realignment not supported for thumb1."); // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) .addReg(ARM::SP)); // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. if (MFI->hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); }
/// Walk the specified loop in the CFG (defined by all blocks dominated by the /// specified header block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. /// void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) return; SmallVector<MachineDomTreeNode*, 32> Scopes; SmallVector<MachineDomTreeNode*, 8> WorkList; DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; // Perform a DFS walk to determine the order of visit. WorkList.push_back(HeaderN); while (!WorkList.empty()) { MachineDomTreeNode *Node = WorkList.pop_back_val(); assert(Node && "Null dominator tree node?"); MachineBasicBlock *BB = Node->getBlock(); // If the header of the loop containing this basic block is a landing pad, // then don't try to hoist instructions out of this loop. const MachineLoop *ML = MLI->getLoopFor(BB); if (ML && ML->getHeader()->isEHPad()) continue; // If this subregion is not in the top level loop at all, exit. if (!CurLoop->contains(BB)) continue; Scopes.push_back(Node); const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); unsigned NumChildren = Children.size(); // Don't hoist things out of a large switch statement. This often causes // code to be hoisted that wasn't going to be executed, and increases // register pressure in a situation where it's likely to matter. if (BB->succ_size() >= 25) NumChildren = 0; OpenChildren[Node] = NumChildren; // Add children in reverse order as then the next popped worklist node is // the first child of this node. This means we ultimately traverse the // DOM tree in exactly the same order as if we'd recursed. for (int i = (int)NumChildren-1; i >= 0; --i) { MachineDomTreeNode *Child = Children[i]; ParentMap[Child] = Node; WorkList.push_back(Child); } } if (Scopes.size() == 0) return; // Compute registers which are livein into the loop headers. RegSeen.clear(); BackTrace.clear(); InitRegPressure(Preheader); // Now perform LICM. for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { MachineDomTreeNode *Node = Scopes[i]; MachineBasicBlock *MBB = Node->getBlock(); EnterScope(MBB); // Process the block SpeculationState = SpeculateUnknown; for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ) { MachineBasicBlock::iterator NextMII = MII; ++NextMII; MachineInstr *MI = &*MII; if (!Hoist(MI, Preheader)) UpdateRegPressure(MI); MII = NextMII; } // If it's a leaf node, it's done. Traverse upwards to pop ancestors. ExitScopeIfDone(Node, OpenChildren, ParentMap); } }
void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = prior(MBB.end()); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); switch (RetOpcode) { case MSP430::RET: case MSP430::RETI: break; // These are ok default: llvm_unreachable("Can only insert epilog into returning blocks"); } // Get the number of bytes to allocate from the FrameInfo uint64_t StackSize = MFI->getStackSize(); unsigned CSSize = MSP430FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - CSSize; // pop FPW. BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW); } else NumBytes = StackSize - CSSize; // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator()) break; --MBBI; } DL = MBBI->getDebugLoc(); // If there is an ADD16ri or SUB16ri of SPW immediately before this // instruction, merge the two instructions. //if (NumBytes || MFI->hasVarSizedObjects()) // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); if (MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW); if (CSSize) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(CSSize); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } else { // adjust stack pointer back: SPW += numbytes if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
MachineBasicBlock * AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); assert((MI->getOpcode() == Alpha::CAS32 || MI->getOpcode() == Alpha::CAS64 || MI->getOpcode() == Alpha::LAS32 || MI->getOpcode() == Alpha::LAS64 || MI->getOpcode() == Alpha::SWAP32 || MI->getOpcode() == Alpha::SWAP64) && "Unexpected instr type to insert"); bool is32 = MI->getOpcode() == Alpha::CAS32 || MI->getOpcode() == Alpha::LAS32 || MI->getOpcode() == Alpha::SWAP32; //Load locked store conditional for atomic ops take on the same form //start: //ll //do stuff (maybe branch to exit) //sc //test sc and maybe branck to start //exit: const BasicBlock *LLVM_BB = BB->getBasicBlock(); DebugLoc dl = MI->getDebugLoc(); MachineFunction::iterator It = BB; ++It; MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); sinkMBB->splice(sinkMBB->begin(), thisMBB, llvm::next(MachineBasicBlock::iterator(MI)), thisMBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB); F->insert(It, llscMBB); F->insert(It, sinkMBB); BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB); unsigned reg_res = MI->getOperand(0).getReg(), reg_ptr = MI->getOperand(1).getReg(), reg_v2 = MI->getOperand(2).getReg(), reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L), reg_res).addImm(0).addReg(reg_ptr); switch (MI->getOpcode()) { case Alpha::CAS32: case Alpha::CAS64: { unsigned reg_cmp = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp) .addReg(reg_v2).addReg(reg_res); BuildMI(llscMBB, dl, TII->get(Alpha::BEQ)) .addImm(0).addReg(reg_cmp).addMBB(sinkMBB); BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store) .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg()); break; } case Alpha::LAS32: case Alpha::LAS64: { BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store) .addReg(reg_res).addReg(reg_v2); break; } case Alpha::SWAP32: case Alpha::SWAP64: { BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store) .addReg(reg_v2).addReg(reg_v2); break; } } BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store) .addReg(reg_store).addImm(0).addReg(reg_ptr); BuildMI(llscMBB, dl, TII->get(Alpha::BEQ)) .addImm(0).addReg(reg_store).addMBB(llscMBB); BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB); thisMBB->addSuccessor(llscMBB); llscMBB->addSuccessor(llscMBB); llscMBB->addSuccessor(sinkMBB); MI->eraseFromParent(); // The pseudo instruction is gone now. return sinkMBB; }
void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo()); const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MipsABIInfo ABI = STI.getABI(); unsigned SP = ABI.GetStackPtr(); unsigned FP = ABI.GetFramePtr(); unsigned ZERO = ABI.GetNullPtr(); unsigned ADDu = ABI.GetPtrAdduOp(); // First, compute final stack size. uint64_t StackSize = MFI->getStackSize(); // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); MachineLocation DstML, SrcML; // Adjust stack. TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); // emit ".cfi_def_cfa_offset StackSize" unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { // Find the instruction past the last instruction that saves a callee-saved // register to the stack. for (unsigned i = 0; i < CSI.size(); ++i) ++MBBI; // Iterate over list of callee-saved registers and emit .cfi_offset // directives. for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); // If Reg is a double precision register, emit two cfa_offsets, // one for each of the paired single precision registers. if (Mips::AFGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_lo), true); unsigned Reg1 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_hi), true); if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else if (Mips::FGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(Reg, true); unsigned Reg1 = MRI->getDwarfRegNum(Reg, true) + 1; if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else { // Reg is either in GPR32 or FGR32. unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, 1), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } } if (MipsFI->callsEhReturn()) { const TargetRegisterClass *PtrRC = ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; // Insert instructions that spill eh data registers. for (int I = 0; I < 4; ++I) { if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) MBB.addLiveIn(ABI.GetEhDataReg(I)); TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, MipsFI->getEhDataRegFI(I), PtrRC, &RegInfo); } // Emit .cfi_offset directives for eh data registers. for (int I = 0; I < 4; ++I) { int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I)); unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO) .setMIFlag(MachineInstr::FrameSetup); // emit ".cfi_def_cfa_register $fp" unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FP, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } }