unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) { unsigned rv = 0; // Return value; defaults to 0 for unhandled cases // or things that get fixed up later by the JIT. if (MO.isReg()) { rv = getAlphaRegNumber(MO.getReg()); } else if (MO.isImm()) { rv = MO.getImm(); } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) { DEBUG(errs() << MO << " is a relocated op for " << MI << "\n"); unsigned Reloc = 0; int Offset = 0; bool useGOT = false; switch (MI.getOpcode()) { case Alpha::BSR: Reloc = Alpha::reloc_bsr; break; case Alpha::LDLr: case Alpha::LDQr: case Alpha::LDBUr: case Alpha::LDWUr: case Alpha::LDSr: case Alpha::LDTr: case Alpha::LDAr: case Alpha::STQr: case Alpha::STLr: case Alpha::STWr: case Alpha::STBr: case Alpha::STSr: case Alpha::STTr: Reloc = Alpha::reloc_gprellow; break; case Alpha::LDAHr: Reloc = Alpha::reloc_gprelhigh; break; case Alpha::LDQl: Reloc = Alpha::reloc_literal; useGOT = true; break; case Alpha::LDAg: case Alpha::LDAHg: Reloc = Alpha::reloc_gpdist; Offset = MI.getOperand(3).getImm(); break; default: llvm_unreachable("unknown relocatable instruction"); } if (MO.isGlobal()) MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, MO.getGlobal(), Offset, isa<Function>(MO.getGlobal()), useGOT)); else if (MO.isSymbol()) MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), Reloc, MO.getSymbolName(), Offset, true)); else MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), Reloc, MO.getIndex(), Offset)); } else if (MO.isMBB()) { MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), Alpha::reloc_bsr, MO.getMBB())); } else { #ifndef NDEBUG errs() << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; #endif llvm_unreachable(0); } return rv; }
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo &TRI = TII->getRegisterInfo(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; if (!isSafeToFold(MI.getOpcode())) continue; unsigned OpSize = TII->getOpSize(MI, 1); MachineOperand &OpToFold = MI.getOperand(1); bool FoldingImm = OpToFold.isImm(); // FIXME: We could also be folding things like FrameIndexes and // TargetIndexes. if (!FoldingImm && !OpToFold.isReg()) continue; // Folding immediates with more than one use will increase program size. // FIXME: This will also reduce register usage, which may be better // in some cases. A better heuristic is needed. if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && !MRI.hasOneUse(MI.getOperand(0).getReg())) continue; // FIXME: Fold operands with subregs. if (OpToFold.isReg() && (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || OpToFold.getSubReg())) continue; std::vector<FoldCandidate> FoldList; for (MachineRegisterInfo::use_iterator Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); Use != E; ++Use) { MachineInstr *UseMI = Use->getParent(); const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo()); // FIXME: Fold operands with subregs. if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || UseOp.isImplicit())) { continue; } APInt Imm; if (FoldingImm) { unsigned UseReg = UseOp.getReg(); const TargetRegisterClass *UseRC = TargetRegisterInfo::isVirtualRegister(UseReg) ? MRI.getRegClass(UseReg) : TRI.getRegClass(UseReg); Imm = APInt(64, OpToFold.getImm()); // Split 64-bit constants into 32-bits for folding. if (UseOp.getSubReg()) { if (UseRC->getSize() != 8) continue; if (UseOp.getSubReg() == AMDGPU::sub0) { Imm = Imm.getLoBits(32); } else { assert(UseOp.getSubReg() == AMDGPU::sub1); Imm = Imm.getHiBits(32); } } // In order to fold immediates into copies, we need to change the // copy to a MOV. if (UseMI->getOpcode() == AMDGPU::COPY) { unsigned DestReg = UseMI->getOperand(0).getReg(); const TargetRegisterClass *DestRC = TargetRegisterInfo::isVirtualRegister(DestReg) ? MRI.getRegClass(DestReg) : TRI.getRegClass(DestReg); unsigned MovOp = TII->getMovOpcode(DestRC); if (MovOp == AMDGPU::COPY) continue; UseMI->setDesc(TII->get(MovOp)); } } const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes // don't have defined register classes. if (UseDesc.isVariadic() || UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1) continue; if (FoldingImm) { MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII); continue; } tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit // to enable more folding opportunites. The shrink operands pass // already does this. } for (FoldCandidate &Fold : FoldList) { if (updateOperand(Fold, TRI)) { // Clear kill flags. if (!Fold.isImm()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); Fold.OpToFold->setIsKill(false); } DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); } } } } return false; }
void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, int &SPAdj) { assert(Fn.getSubtarget().getRegisterInfo() && "getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); bool InsideCallSequence = false; for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { InsideCallSequence = (I->getOpcode() == FrameSetupOpcode); SPAdj += TII.getSPAdjust(I); MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = std::prev(I); TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else I = std::next(PrevI); continue; } MachineInstr *MI = I; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (!MI->getOperand(i).isFI()) continue; // Frame indicies in debug values are encoded in a target independent // way with simply the frame index and offset rather than any // target-specific addressing mode. if (MI->isDebugValue()) { assert(i == 0 && "Frame indicies can only appear as the first " "operand of a DBG_VALUE machine instruction"); unsigned Reg; MachineOperand &Offset = MI->getOperand(1); Offset.setImm(Offset.getImm() + TFI->getFrameIndexReference( Fn, MI->getOperand(0).getIndex(), Reg)); MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); continue; } // TODO: This code should be commoned with the code for // PATCHPOINT. There's no good reason for the difference in // implementation other than historical accident. The only // remaining difference is the unconditional use of the stack // pointer as the base register. if (MI->getOpcode() == TargetOpcode::STATEPOINT) { assert((!MI->isDebugValue() || i == 0) && "Frame indicies can only appear as the first operand of a " "DBG_VALUE machine instruction"); unsigned Reg; MachineOperand &Offset = MI->getOperand(i + 1); const unsigned refOffset = TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(), Reg); Offset.setImm(Offset.getImm() + refOffset); MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/); continue; } // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register // scavenger to go through all of these instructions so that // it can update its register information. We keep the // iterator at the point before insertion so that we can // revisit them in full. bool AtBeginning = (I == BB->begin()); if (!AtBeginning) --I; // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. TRI.eliminateFrameIndex(MI, SPAdj, i, FrameIndexVirtualScavenging ? nullptr : RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { I = BB->begin(); DoIncr = false; } MI = nullptr; break; } // If we are looking at a call sequence, we need to keep track of // the SP adjustment made by each instruction in the sequence. // This includes both the frame setup/destroy pseudos (handled above), // as well as other instructions that have side effects w.r.t the SP. // Note that this must come after eliminateFrameIndex, because // if I itself referred to a frame index, we shouldn't count its own // adjustment. if (MI && InsideCallSequence) SPAdj += TII.getSPAdjust(MI); if (DoIncr && I != BB->end()) ++I; // Update register states. if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } }
/// Attempt the reassociation transformation to reduce critical path length. /// See the above comments before getMachineCombinerPatterns(). void TargetInstrInfo::reassociateOps( MachineInstr &Root, MachineInstr &Prev, MachineCombinerPattern::MC_PATTERN Pattern, SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { MachineFunction *MF = Root.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); // This array encodes the operand index for each parameter because the // operands may be commuted. Each row corresponds to a pattern value, // and each column specifies the index of A, B, X, Y. unsigned OpIdx[4][4] = { { 1, 1, 2, 2 }, { 1, 2, 2, 1 }, { 2, 1, 1, 2 }, { 2, 2, 1, 1 } }; MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]); MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]); MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]); MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]); MachineOperand &OpC = Root.getOperand(0); unsigned RegA = OpA.getReg(); unsigned RegB = OpB.getReg(); unsigned RegX = OpX.getReg(); unsigned RegY = OpY.getReg(); unsigned RegC = OpC.getReg(); if (TargetRegisterInfo::isVirtualRegister(RegA)) MRI.constrainRegClass(RegA, RC); if (TargetRegisterInfo::isVirtualRegister(RegB)) MRI.constrainRegClass(RegB, RC); if (TargetRegisterInfo::isVirtualRegister(RegX)) MRI.constrainRegClass(RegX, RC); if (TargetRegisterInfo::isVirtualRegister(RegY)) MRI.constrainRegClass(RegY, RC); if (TargetRegisterInfo::isVirtualRegister(RegC)) MRI.constrainRegClass(RegC, RC); // Create a new virtual register for the result of (X op Y) instead of // recycling RegB because the MachineCombiner's computation of the critical // path requires a new register definition rather than an existing one. unsigned NewVR = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); unsigned Opcode = Root.getOpcode(); bool KillA = OpA.isKill(); bool KillX = OpX.isKill(); bool KillY = OpY.isKill(); // Create new instructions for insertion. MachineInstrBuilder MIB1 = BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) .addReg(RegX, getKillRegState(KillX)) .addReg(RegY, getKillRegState(KillY)); MachineInstrBuilder MIB2 = BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) .addReg(RegA, getKillRegState(KillA)) .addReg(NewVR, getKillRegState(true)); setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); // Record new instructions for insertion and old instructions for deletion. InsInstrs.push_back(MIB1); InsInstrs.push_back(MIB2); DelInstrs.push_back(&Prev); DelInstrs.push_back(&Root); }
void UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, const SmallVectorImpl<SlotIndex> &Kills, SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS) { if (Kills.empty()) return; // Don't track copies from physregs, there are too many uses. if (!TargetRegisterInfo::isVirtualRegister(LI->reg)) return; // Collect all the (vreg, valno) pairs that are copies of LI. SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues; for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg)) { MachineInstr *MI = MO.getParent(); // Copies of the full value. if (MO.getSubReg() || !MI->isCopy()) continue; unsigned DstReg = MI->getOperand(0).getReg(); // Don't follow copies to physregs. These are usually setting up call // arguments, and the argument registers are always call clobbered. We are // better off in the source register which could be a callee-saved register, // or it could be spilled. if (!TargetRegisterInfo::isVirtualRegister(DstReg)) continue; // Is LocNo extended to reach this copy? If not, another def may be blocking // it, or we are looking at a wrong value of LI. SlotIndex Idx = LIS.getInstructionIndex(MI); LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); if (!I.valid() || I.value() != LocNo) continue; if (!LIS.hasInterval(DstReg)) continue; LiveInterval *DstLI = &LIS.getInterval(DstReg); const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot()); assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value"); CopyValues.push_back(std::make_pair(DstLI, DstVNI)); } if (CopyValues.empty()) return; DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n'); // Try to add defs of the copied values for each kill point. for (unsigned i = 0, e = Kills.size(); i != e; ++i) { SlotIndex Idx = Kills[i]; for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) { LiveInterval *DstLI = CopyValues[j].first; const VNInfo *DstVNI = CopyValues[j].second; if (DstLI->getVNInfoAt(Idx) != DstVNI) continue; // Check that there isn't already a def at Idx LocMap::iterator I = locInts.find(Idx); if (I.valid() && I.start() <= Idx) continue; DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #" << DstVNI->id << " in " << *DstLI << '\n'); MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); I.insert(Idx, Idx.getNextSlot(), LocNo); NewDefs.push_back(std::make_pair(Idx, LocNo)); break; } } }
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MRI = &mf.getRegInfo(); TRI = MF->getTarget().getRegisterInfo(); unsigned NumRegs = TRI->getNumRegs(); PhysRegDef = new MachineInstr*[NumRegs]; PhysRegUse = new MachineInstr*[NumRegs]; PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()]; std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr); std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr); PHIJoins.clear(); // FIXME: LiveIntervals will be updated to remove its dependence on // LiveVariables to improve compilation time and eliminate bizarre pass // dependencies. Until then, we can't change much in -O0. if (!MRI->isSSA()) report_fatal_error("regalloc=... not currently supported with -O0"); analyzePHINodes(mf); // Calculate live variable information in depth first order on the CFG of the // function. This guarantees that we will see the definition of a virtual // register before its uses due to dominance properties of SSA (except for PHI // nodes, which are treated as a special case). MachineBasicBlock *Entry = MF->begin(); SmallPtrSet<MachineBasicBlock*,16> Visited; for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); DFI != E; ++DFI) { MachineBasicBlock *MBB = *DFI; // Mark live-in registers as live-in. SmallVector<unsigned, 4> Defs; for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), EE = MBB->livein_end(); II != EE; ++II) { assert(TargetRegisterInfo::isPhysicalRegister(*II) && "Cannot have a live-in virtual register!"); HandlePhysRegDef(*II, nullptr, Defs); } // Loop over all of the instructions, processing them. DistanceMap.clear(); unsigned Dist = 0; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { MachineInstr *MI = I; if (MI->isDebugValue()) continue; DistanceMap.insert(std::make_pair(MI, Dist++)); // Process all of the operands of the instruction... unsigned NumOperandsToProcess = MI->getNumOperands(); // Unless it is a PHI node. In this case, ONLY process the DEF, not any // of the uses. They will be handled in other basic blocks. if (MI->isPHI()) NumOperandsToProcess = 1; // Clear kill and dead markers. LV will recompute them. SmallVector<unsigned, 4> UseRegs; SmallVector<unsigned, 4> DefRegs; SmallVector<unsigned, 1> RegMasks; for (unsigned i = 0; i != NumOperandsToProcess; ++i) { MachineOperand &MO = MI->getOperand(i); if (MO.isRegMask()) { RegMasks.push_back(i); continue; } if (!MO.isReg() || MO.getReg() == 0) continue; unsigned MOReg = MO.getReg(); if (MO.isUse()) { MO.setIsKill(false); if (MO.readsReg()) UseRegs.push_back(MOReg); } else /*MO.isDef()*/ { MO.setIsDead(false); DefRegs.push_back(MOReg); } } // Process all uses. for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { unsigned MOReg = UseRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegUse(MOReg, MBB, MI); else if (!MRI->isReserved(MOReg)) HandlePhysRegUse(MOReg, MI); } // Process all masked registers. (Call clobbers). for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) HandleRegMask(MI->getOperand(RegMasks[i])); // Process all defs. for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { unsigned MOReg = DefRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); else if (!MRI->isReserved(MOReg)) HandlePhysRegDef(MOReg, MI, Defs); } UpdatePhysRegDefs(MI, Defs); } // Handle any virtual assignments from PHI nodes which might be at the // bottom of this basic block. We check all of our successor blocks to see // if they have PHI nodes, and if so, we simulate an assignment at the end // of the current block. if (!PHIVarInfo[MBB->getNumber()].empty()) { SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()]; for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(), E = VarInfoVec.end(); I != E; ++I) // Mark it alive only in the block we are representing. MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), MBB); } // MachineCSE may CSE instructions which write to non-allocatable physical // registers across MBBs. Remember if any reserved register is liveout. SmallSet<unsigned, 4> LiveOuts; for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { MachineBasicBlock *SuccMBB = *SI; if (SuccMBB->isLandingPad()) continue; for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(), LE = SuccMBB->livein_end(); LI != LE; ++LI) { unsigned LReg = *LI; if (!TRI->isInAllocatableClass(LReg)) // Ignore other live-ins, e.g. those that are live into landing pads. LiveOuts.insert(LReg); } } // Loop over PhysRegDef / PhysRegUse, killing any registers that are // available at the end of the basic block. for (unsigned i = 0; i != NumRegs; ++i) if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) HandlePhysRegDef(i, nullptr, Defs); std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr); std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr); } // Convert and transfer the dead / killed information we have gathered into // VirtRegInfo onto MI's. for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) { const unsigned Reg = TargetRegisterInfo::index2VirtReg(i); for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j) if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg)) VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI); else VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI); } // Check to make sure there are no unreachable blocks in the MC CFG for the // function. If so, it is due to a bug in the instruction selector or some // other part of the code generator if this happens. #ifndef NDEBUG for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) assert(Visited.count(&*i) != 0 && "unreachable basic block found"); #endif delete[] PhysRegDef; delete[] PhysRegUse; delete[] PHIVarInfo; return false; }
bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (LastOpc == AArch64::Bimm) { TBB = LastInst->getOperand(0).getMBB(); return false; } if (isCondBranch(LastOpc)) { classifyCondBranch(LastInst, TBB, Cond); return false; } return true; // Can't handle indirect branch. } // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; unsigned SecondLastOpc = SecondLastInst->getOpcode(); // If AllowModify is true and the block ends with two or more unconditional // branches, delete all but the first unconditional branch. if (AllowModify && LastOpc == AArch64::Bimm) { while (SecondLastOpc == AArch64::Bimm) { LastInst->eraseFromParent(); LastInst = SecondLastInst; LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { // Return now the only terminator is an unconditional branch. TBB = LastInst->getOperand(0).getMBB(); return false; } else { SecondLastInst = I; SecondLastOpc = SecondLastInst->getOpcode(); } } } // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with a B and a Bcc, handle it. if (LastOpc == AArch64::Bimm) { if (SecondLastOpc == AArch64::Bcc) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } else if (isCondBranch(SecondLastOpc)) { classifyCondBranch(SecondLastInst, TBB, Cond); FBB = LastInst->getOperand(0).getMBB(); return false; } } // If the block ends with two unconditional branches, handle it. The second // one is not executed, so remove it. if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
MachineInstr * ARMBSISimplifyIndexMemOpsPass::convertToSimpleInstrs(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const { // FIXME: Thumb2 support. llvm::errs() << "about to convert to three address\n"; llvm::errs() << "arg passed to convert to three address\n"; MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); uint64_t TSFlags = MI->getDesc().TSFlags; bool isPre = false; switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { default: return NULL; case ARMII::IndexModePre: isPre = true; break; case ARMII::IndexModePost: break; } // Try splitting an indexed load/store to an un-indexed one plus an add/sub // operation. unsigned MemOpc = TII->getUnindexedOpcode(MI->getOpcode()); if (MemOpc == 0) return NULL; MachineInstr *UpdateMI = NULL; MachineInstr *MemMI = NULL; MachineInstrBuilder MemMIB; unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); bool isLoad = !MI->mayStore(); const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); const MachineOperand &Base = MI->getOperand(2); const MachineOperand &Offset = MI->getOperand(NumOps-3); unsigned WBReg = WB.getReg(); unsigned BaseReg = Base.getReg(); unsigned OffReg = -1; unsigned OffImm = -1; if (Offset.isReg()) OffReg = Offset.getReg(); else if (MI->getOperand(NumOps-3).isImm()) { OffImm = MI->getOperand(NumOps-3).getImm(); OffReg = 0; } else { llvm::errs() << "FAIL\n"; return NULL;//FAIL } ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-2).getImm(); switch (AddrMode) { default: llvm_unreachable("Unknown indexed op!"); case ARMII::AddrMode2: { bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; uint64_t Amt = ARM_AM::getAM2Offset(OffImm); if (isSub) { Amt = ~Amt; Amt++; Amt &= 0x0fff; return NULL; } if (OffReg == 0) { if (ARM_AM::getSOImmVal(Amt) == -1) { // Can't encode it in a so_imm operand. This transformation will // add more than 1 instruction. Abandon! llvm::errs() << "here amt = " << Amt << " is sub: " << isSub << "\n"; return NULL; } llvm::errs() << "NONFAIL offImm amt " << OffImm << " pred = " << Pred << "here amt = " << Amt << " is sub: " << isSub << "\n"; UpdateMI = BuildMI(MF, MI->getDebugLoc(), TII->get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) .addReg(BaseReg).addImm(Amt) .addImm(Pred).addReg(0).addReg(0); } else if (Amt != 0) { ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); UpdateMI = BuildMI(MF, MI->getDebugLoc(), TII->get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) .addImm(Pred).addReg(0).addReg(0); } else UpdateMI = BuildMI(MF, MI->getDebugLoc(), TII->get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) .addReg(BaseReg).addReg(OffReg) .addImm(Pred).addReg(0).addReg(0); break; } case ARMII::AddrMode3 : { bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; unsigned Amt = ARM_AM::getAM3Offset(OffImm); llvm::errs() << " in addressing mode 3\n"; if (OffReg == 0) // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. UpdateMI = BuildMI(MF, MI->getDebugLoc(), TII->get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg).addImm(Amt) .addImm(Pred).addReg(0).addReg(0); else UpdateMI = BuildMI(MF, MI->getDebugLoc(), TII->get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) .addReg(BaseReg).addReg(OffReg) .addImm(Pred).addReg(0).addReg(0); break; } } std::vector<MachineInstr*> NewMIs; if (isPre) { if (isLoad) MemMI = BuildMI(MF, MI->getDebugLoc(), TII->get(MemOpc), MI->getOperand(0).getReg()) .addReg(WBReg).addImm(0).addImm(Pred).addReg(0); else { MemMI = BuildMI(MF, MI->getDebugLoc(), TII->get(MemOpc)).addReg(MI->getOperand(1).getReg()) .addReg(WBReg).addImm(0).addImm(Pred).addReg(0); //AddDefaultPred(MemMIB); } NewMIs.push_back(MemMI); } else { if (isLoad) MemMI = BuildMI(MF, MI->getDebugLoc(), TII->get(MemOpc), MI->getOperand(0).getReg()) .addReg(BaseReg).addImm(0).addImm(Pred).addReg(0); else { MemMI = BuildMI(MF, MI->getDebugLoc(), TII->get(MemOpc)).addReg(MI->getOperand(1).getReg()) .addReg(BaseReg).addImm(0).addImm(Pred).addReg(0); //AddDefaultPred(MemMIB); } if (WB.isDead()) UpdateMI->getOperand(0).setIsDead(); NewMIs.push_back(UpdateMI); NewMIs.push_back(MemMI); } // Transfer LiveVariables states, kill / dead info. if (LV) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { unsigned Reg = MO.getReg(); LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); if (MO.isDef()) { MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; if (MO.isDead()) LV->addVirtualRegisterDead(Reg, NewMI); } if (MO.isUse() && MO.isKill()) { for (unsigned j = 0; j < 2; ++j) { // Look at the two new MI's in reverse order. MachineInstr *NewMI = NewMIs[j]; if (!NewMI->readsRegister(Reg)) continue; LV->addVirtualRegisterKilled(Reg, NewMI); if (VI.removeKill(MI)) VI.Kills.push_back(NewMI); break; } } } } } MFI->insert(MBBI, NewMIs[1]); MFI->insert(MBBI, NewMIs[0]); return NewMIs[0]; }
bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Lowering br_unless **********\n" "********** Function: " << MF.getName() << '\n'); auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); auto &MRI = MF.getRegInfo(); for (auto &MBB : MF) { for (auto MII = MBB.begin(); MII != MBB.end();) { MachineInstr *MI = &*MII++; if (MI->getOpcode() != WebAssembly::BR_UNLESS) continue; unsigned Cond = MI->getOperand(1).getReg(); bool Inverted = false; // Attempt to invert the condition in place. if (MFI.isVRegStackified(Cond)) { assert(MRI.hasOneDef(Cond)); MachineInstr *Def = MRI.getVRegDef(Cond); switch (Def->getOpcode()) { using namespace WebAssembly; case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break; case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break; case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break; case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break; case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break; case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break; case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break; case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break; case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break; case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break; case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break; case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break; case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break; case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break; case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break; case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break; case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break; case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break; case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break; case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break; case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break; case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break; case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break; case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break; default: break; } } // If we weren't able to invert the condition in place. Insert an // instruction to invert it. if (!Inverted) { unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp) .addReg(Cond); MFI.stackifyVReg(Tmp); Cond = Tmp; Inverted = true; } // The br_unless condition has now been inverted. Insert a br_if and // delete the br_unless. assert(Inverted); BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF)) .addOperand(MI->getOperand(0)) .addReg(Cond); MBB.erase(MI); } } return true; }
// Creates the more optimized patterns and generally does all the code // transformations in this pass. unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { if (MI->isCopy()) { return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg()); } if (MI->isInsertSubreg()) { unsigned DPRReg = MI->getOperand(1).getReg(); unsigned SPRReg = MI->getOperand(2).getReg(); if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); if (DPRMI && SPRMI) { // See if the first operand of this insert_subreg is IMPLICIT_DEF MachineInstr *ECDef = elideCopies(DPRMI); if (ECDef && ECDef->isImplicitDef()) { // Another corner case - if we're inserting something that is purely // a subreg copy of a DPR, just use that DPR. MachineInstr *EC = elideCopies(SPRMI); // Is it a subreg copy of ssub_0? if (EC && EC->isCopy() && EC->getOperand(1).getSubReg() == ARM::ssub_0) { LLVM_DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI); // Find the thing we're subreg copying out of - is it of the same // regclass as DPRMI? (i.e. a DPR or QPR). unsigned FullReg = SPRMI->getOperand(1).getReg(); const TargetRegisterClass *TRC = MRI->getRegClass(MI->getOperand(1).getReg()); if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { LLVM_DEBUG(dbgs() << "Subreg copy is compatible - returning "); LLVM_DEBUG(dbgs() << printReg(FullReg) << "\n"); eraseInstrWithNoUses(MI); return FullReg; } } return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg()); } } } return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); } if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) { // See if all bar one of the operands are IMPLICIT_DEF and insert the // optimizer pattern accordingly. unsigned NumImplicit = 0, NumTotal = 0; unsigned NonImplicitReg = ~0U; for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) { if (!MI->getOperand(I).isReg()) continue; ++NumTotal; unsigned OpReg = MI->getOperand(I).getReg(); if (!TRI->isVirtualRegister(OpReg)) break; MachineInstr *Def = MRI->getVRegDef(OpReg); if (!Def) break; if (Def->isImplicitDef()) ++NumImplicit; else NonImplicitReg = MI->getOperand(I).getReg(); } if (NumImplicit == NumTotal - 1) return optimizeAllLanesPattern(MI, NonImplicitReg); else return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); } llvm_unreachable("Unhandled update pattern!"); }
unsigned CriticalAntiDepBreaker:: BreakAntiDependencies(const std::vector<SUnit>& SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex) { // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; // Keep a map of the MachineInstr*'s back to the SUnit representing them. // This is used for updating debug information. DenseMap<MachineInstr*,const SUnit*> MISUnitMap; // Find the node at the bottom of the critical path. const SUnit *Max = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { const SUnit *SU = &SUnits[i]; MISUnitMap[SU->getInstr()] = SU; if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } #ifndef NDEBUG { DEBUG(dbgs() << "Critical path has total latency " << (Max->getDepth() + Max->Latency) << "\n"); DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (KillIndices[Reg] == ~0u) DEBUG(dbgs() << " " << TRI->getName(Reg)); } DEBUG(dbgs() << '\n'); } #endif // Track progress along the critical path through the SUnit graph as we walk // the instructions. const SUnit *CriticalPathSU = Max; MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); // Consider this pattern: // A = ... // ... = A // A = ... // ... = A // A = ... // ... = A // A = ... // ... = A // There are three anti-dependencies here, and without special care, // we'd break all of them using the same register: // A = ... // ... = A // B = ... // ... = B // B = ... // ... = B // B = ... // ... = B // because at each anti-dependence, B is the first register that // isn't A which is free. This re-introduces anti-dependencies // at all but one of the original anti-dependencies that we were // trying to break. To avoid this, keep track of the most recent // register that each register was replaced with, avoid // using it to repair an anti-dependence on the same register. // This lets us produce this: // A = ... // ... = A // B = ... // ... = B // C = ... // ... = C // B = ... // ... = B // This still has an anti-dependence on B, but at least it isn't on the // original critical path. // // TODO: If we tracked more than one register here, we could potentially // fix that remaining critical edge too. This is a little more involved, // because unlike the most recent register, less recent registers should // still be considered, though only if no other registers are available. std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0); // Attempt to break anti-dependence edges on the critical path. Walk the // instructions from the bottom up, tracking information about liveness // as we go to help determine which registers are available. unsigned Broken = 0; unsigned Count = InsertPosIndex - 1; for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { MachineInstr *MI = --I; if (MI->isDebugValue()) continue; // Check if this instruction has a dependence on the critical path that // is an anti-dependence that we may be able to break. If it is, set // AntiDepReg to the non-zero register associated with the anti-dependence. // // We limit our attention to the critical path as a heuristic to avoid // breaking anti-dependence edges that aren't going to significantly // impact the overall schedule. There are a limited number of registers // and we want to save them for the important edges. // // TODO: Instructions with multiple defs could have multiple // anti-dependencies. The current code here only knows how to break one // edge per instruction. Note that we'd have to be able to break all of // the anti-dependencies in an instruction in order to be effective. unsigned AntiDepReg = 0; if (MI == CriticalPathMI) { if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) { const SUnit *NextSU = Edge->getSUnit(); // Only consider anti-dependence edges. if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); if (!AllocatableSet.test(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.count(AntiDepReg)) // Don't break anti-dependencies if an use down below requires // this exact register. AntiDepReg = 0; else { // If the SUnit has other dependencies on the SUnit that it // anti-depends on, don't bother breaking the anti-dependency // since those edges would prevent such units from being // scheduled past each other regardless. // // Also, if there are dependencies on other SUnits with the // same register as the anti-dependency, don't attempt to // break it. for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(), PE = CriticalPathSU->Preds.end(); P != PE; ++P) if (P->getSUnit() == NextSU ? (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { AntiDepReg = 0; break; } } } CriticalPathSU = NextSU; CriticalPathMI = CriticalPathSU->getInstr(); } else { // We've reached the end of the critical path. CriticalPathSU = 0; CriticalPathMI = 0; } } PrescanInstruction(MI); // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; else if (AntiDepReg) { // If this instruction has a use of AntiDepReg, breaking it // is invalid. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { AntiDepReg = 0; break; } } } // Determine AntiDepReg's register class, if it is live and is // consistently used within a single class. const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; assert((AntiDepReg == 0 || RC != NULL) && "Register should be live if it's causing an anti-dependence!"); if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) AntiDepReg = 0; // Look for a suitable register to use to break the anti-depenence. // // TODO: Instead of picking the first free register, consider which might // be the best. if (AntiDepReg != 0) { std::pair<std::multimap<unsigned, MachineOperand *>::iterator, std::multimap<unsigned, MachineOperand *>::iterator> Range = RegRefs.equal_range(AntiDepReg); if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second, AntiDepReg, LastNewReg[AntiDepReg], RC)) { DEBUG(dbgs() << "Breaking anti-dependence edge on " << TRI->getName(AntiDepReg) << " with " << RegRefs.count(AntiDepReg) << " references" << " using " << TRI->getName(NewReg) << "!\n"); // Update the references to the old register to refer to the new // register. for (std::multimap<unsigned, MachineOperand *>::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) { Q->second->setReg(NewReg); // If the SU for the instruction being updated has debug information // related to the anti-dependency register, make sure to update that // as well. const SUnit *SU = MISUnitMap[Q->second->getParent()]; if (!SU) continue; for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) { MachineInstr *DI = SU->DbgInstrList[i]; assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() && DI->getOperand(0).getReg() && "Non register dbg_value attached to SUnit!"); if (DI->getOperand(0).getReg() == AntiDepReg) DI->getOperand(0).setReg(NewReg); } } // We just went back in time and modified history; the // liveness information for the anti-dependence reg is now // inconsistent. Set the state as if it were dead. Classes[NewReg] = Classes[AntiDepReg]; DefIndices[NewReg] = DefIndices[AntiDepReg]; KillIndices[NewReg] = KillIndices[AntiDepReg]; assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && "Kill and Def maps aren't consistent for NewReg!"); Classes[AntiDepReg] = 0; DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; KillIndices[AntiDepReg] = ~0u; assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && "Kill and Def maps aren't consistent for AntiDepReg!"); RegRefs.erase(AntiDepReg); LastNewReg[AntiDepReg] = NewReg; ++Broken; } } ScanInstruction(MI, Count); } return Broken; }
void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected SPAdj value"); MachineInstr &MI = *II; DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock &MBB = *MI.getParent(); const MachineFunction &MF = *MBB.getParent(); const AVRTargetMachine &TM = (const AVRTargetMachine &)MF.getTarget(); const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetFrameLowering *TFI = TM.getSubtargetImpl()->getFrameLowering(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); int Offset = MFI.getObjectOffset(FrameIndex); // Add one to the offset because SP points to an empty slot. Offset += MFI.getStackSize() - TFI->getOffsetOfLocalArea() + 1; // Fold incoming offset. Offset += MI.getOperand(FIOperandNum + 1).getImm(); // This is actually "load effective address" of the stack slot // instruction. We have only two-address instructions, thus we need to // expand it into move + add. if (MI.getOpcode() == AVR::FRMIDX) { MI.setDesc(TII.get(AVR::MOVWRdRr)); MI.getOperand(FIOperandNum).ChangeToRegister(AVR::R29R28, false); assert(Offset > 0 && "Invalid offset"); // We need to materialize the offset via an add instruction. unsigned Opcode; unsigned DstReg = MI.getOperand(0).getReg(); assert(DstReg != AVR::R29R28 && "Dest reg cannot be the frame pointer"); // Generally, to load a frame address two add instructions are emitted that // could get folded into a single one: // movw r31:r30, r29:r28 // adiw r31:r30, 29 // adiw r31:r30, 16 // to: // movw r31:r30, r29:r28 // adiw r31:r30, 45 foldFrameOffset(*std::next(II), Offset, DstReg); // Select the best opcode based on DstReg and the offset size. switch (DstReg) { case AVR::R25R24: case AVR::R27R26: case AVR::R31R30: { if (isUInt<6>(Offset)) { Opcode = AVR::ADIWRdK; break; } LLVM_FALLTHROUGH; } default: { // This opcode will get expanded into a pair of subi/sbci. Opcode = AVR::SUBIWRdK; Offset = -Offset; break; } } MachineInstr *New = BuildMI(MBB, std::next(II), dl, TII.get(Opcode), DstReg) .addReg(DstReg, RegState::Kill) .addImm(Offset); New->getOperand(3).setIsDead(); return; } // If the offset is too big we have to adjust and restore the frame pointer // to materialize a valid load/store with displacement. //:TODO: consider using only one adiw/sbiw chain for more than one frame index if (Offset > 63) { unsigned AddOpc = AVR::ADIWRdK, SubOpc = AVR::SBIWRdK; int AddOffset = Offset - 63 + 1; // For huge offsets where adiw/sbiw cannot be used use a pair of subi/sbci. if ((Offset - 63 + 1) > 63) { AddOpc = AVR::SUBIWRdK; SubOpc = AVR::SUBIWRdK; AddOffset = -AddOffset; } // It is possible that the spiller places this frame instruction in between // a compare and branch, invalidating the contents of SREG set by the // compare instruction because of the add/sub pairs. Conservatively save and // restore SREG before and after each add/sub pair. BuildMI(MBB, II, dl, TII.get(AVR::INRdA), AVR::R0).addImm(0x3f); MachineInstr *New = BuildMI(MBB, II, dl, TII.get(AddOpc), AVR::R29R28) .addReg(AVR::R29R28, RegState::Kill) .addImm(AddOffset); New->getOperand(3).setIsDead(); // Restore SREG. BuildMI(MBB, std::next(II), dl, TII.get(AVR::OUTARr)) .addImm(0x3f) .addReg(AVR::R0, RegState::Kill); // No need to set SREG as dead here otherwise if the next instruction is a // cond branch it will be using a dead register. New = BuildMI(MBB, std::next(II), dl, TII.get(SubOpc), AVR::R29R28) .addReg(AVR::R29R28, RegState::Kill) .addImm(Offset - 63 + 1); Offset = 62; } MI.getOperand(FIOperandNum).ChangeToRegister(AVR::R29R28, false); assert(isUInt<6>(Offset) && "Offset is out of range"); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); }
void X86RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); bool reseveCallFrame = TFI->hasReservedCallFrame(MF); int Opcode = I->getOpcode(); bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); DebugLoc DL = I->getDebugLoc(); uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; I = MBB.erase(I); if (!reseveCallFrame) { // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub ESP, <amt>' and the // adjcallstackdown instruction into 'add ESP, <amt>' // TODO: consider using push / pop instead of sub + store / add if (Amount == 0) return; // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; MachineInstr *New = 0; if (Opcode == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)), StackPtr) .addReg(StackPtr) .addImm(Amount); } else { assert(Opcode == TII.getCallFrameDestroyOpcode()); // Factor out the amount the callee already popped. Amount -= CalleeAmt; if (Amount) { unsigned Opc = getADDriOpcode(Is64Bit, Amount); New = BuildMI(MF, DL, TII.get(Opc), StackPtr) .addReg(StackPtr).addImm(Amount); } } if (New) { // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); // Replace the pseudo instruction with a new instruction. MBB.insert(I, New); } return; } if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) .addReg(StackPtr).addImm(CalleeAmt); // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); // We are not tracking the stack pointer adjustment by the callee, so make // sure we restore the stack pointer immediately after the call, there may // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. MachineBasicBlock::iterator B = MBB.begin(); while (I != B && !llvm::prior(I)->isCall()) --I; MBB.insert(I, New); } }
static void LowerTlsAddr(MCStreamer &OutStreamer, X86MCInstLower &MCInstLowering, const MachineInstr &MI) { bool is64Bits = MI.getOpcode() == X86::TLS_addr64 || MI.getOpcode() == X86::TLS_base_addr64; bool needsPadding = MI.getOpcode() == X86::TLS_addr64; MCContext &context = OutStreamer.getContext(); if (needsPadding) OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX)); MCSymbolRefExpr::VariantKind SRVK; switch (MI.getOpcode()) { case X86::TLS_addr32: case X86::TLS_addr64: SRVK = MCSymbolRefExpr::VK_TLSGD; break; case X86::TLS_base_addr32: SRVK = MCSymbolRefExpr::VK_TLSLDM; break; case X86::TLS_base_addr64: SRVK = MCSymbolRefExpr::VK_TLSLD; break; default: llvm_unreachable("unexpected opcode"); } MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)); const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context); MCInst LEA; if (is64Bits) { LEA.setOpcode(X86::LEA64r); LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base LEA.addOperand(MCOperand::CreateImm(1)); // scale LEA.addOperand(MCOperand::CreateReg(0)); // index LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp LEA.addOperand(MCOperand::CreateReg(0)); // seg } else if (SRVK == MCSymbolRefExpr::VK_TLSLDM) { LEA.setOpcode(X86::LEA32r); LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // base LEA.addOperand(MCOperand::CreateImm(1)); // scale LEA.addOperand(MCOperand::CreateReg(0)); // index LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp LEA.addOperand(MCOperand::CreateReg(0)); // seg } else { LEA.setOpcode(X86::LEA32r); LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest LEA.addOperand(MCOperand::CreateReg(0)); // base LEA.addOperand(MCOperand::CreateImm(1)); // scale LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp LEA.addOperand(MCOperand::CreateReg(0)); // seg } OutStreamer.EmitInstruction(LEA); if (needsPadding) { OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX)); OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX)); OutStreamer.EmitInstruction(MCInstBuilder(X86::REX64_PREFIX)); } StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr"; MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name); const MCSymbolRefExpr *tlsRef = MCSymbolRefExpr::Create(tlsGetAddr, MCSymbolRefExpr::VK_PLT, context); OutStreamer.EmitInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32) .addExpr(tlsRef)); }
void MSP430FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>(); const MSP430InstrInfo &TII = *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize(); // Get the offset of the stack slot for the EBP register... which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. MFI->setOffsetAdjustment(-NumBytes); // Save FP into the appropriate stack slot... BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r)) .addReg(MSP430::FP, RegState::Kill); // Update FP with the new base value... BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FP) .addReg(MSP430::SP); // Mark the FramePtr as live-in in every block except the entry. for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(MSP430::FP); } else NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize(); // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r)) ++MBBI; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); if (NumBytes) { // adjust stack pointer: SP -= numbytes // If there is an SUB16ri of SP immediately before this instruction, merge // the two. //NumBytes -= mergeSPUpdates(MBB, MBBI, true); // If there is an ADD16ri or SUB16ri of SP immediately after this // instruction, merge the two instructions. // mergeSPUpdatesDown(MBB, MBBI, &NumBytes); if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SP) .addReg(MSP430::SP).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
void RegScavenger::forward() { // Move ptr forward. if (!Tracking) { MBBI = MBB->begin(); Tracking = true; } else { assert(MBBI != MBB->end() && "Already at the end of the basic block!"); MBBI = next(MBBI); } MachineInstr *MI = MBBI; if (MI == ScavengeRestore) { ScavengedReg = 0; ScavengedRC = NULL; ScavengeRestore = NULL; } // Find out which registers are early clobbered, killed, defined, and marked // def-dead in this instruction. BitVector EarlyClobberRegs(NumPhysRegs); BitVector KillRegs(NumPhysRegs); BitVector DefRegs(NumPhysRegs); BitVector DeadRegs(NumPhysRegs); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if (!Reg || isReserved(Reg)) continue; if (MO.isUse()) { // Two-address operands implicitly kill. if (MO.isKill() || MI->isRegTiedToDefOperand(i)) addRegWithSubRegs(KillRegs, Reg); } else { assert(MO.isDef()); if (MO.isDead()) addRegWithSubRegs(DeadRegs, Reg); else addRegWithSubRegs(DefRegs, Reg); if (MO.isEarlyClobber()) addRegWithAliases(EarlyClobberRegs, Reg); } } // Verify uses and defs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if (!Reg || isReserved(Reg)) continue; if (MO.isUse()) { assert(isUsed(Reg) && "Using an undefined register!"); assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) && "Using an early clobbered register!"); } else { assert(MO.isDef()); assert((KillRegs.test(Reg) || isUnused(Reg) || isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) && "Re-defining a live register!"); } } // Commit the changes. setUnused(KillRegs); setUnused(DeadRegs); setUsed(DefRegs); }
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. /// void PEI::replaceFrameIndices(MachineFunction &Fn) { if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); const TargetFrameLowering *TFI = TM.getFrameLowering(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { #ifndef NDEBUG int SPAdjCount = 0; // frame setup / destroy count. #endif int SPAdj = 0; // SP offset due to call frame setup / destroy. if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { #ifndef NDEBUG // Track whether we see even pairs of them SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; #endif // Remember how much SP has been adjusted to create the call // frame. int Size = I->getOperand(0).getImm(); if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) Size = -Size; SPAdj += Size; MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = prior(I); TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else I = llvm::next(PrevI); continue; } MachineInstr *MI = I; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (!MI->getOperand(i).isFI()) continue; // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register // scavenger to go through all of these instructions so that // it can update its register information. We keep the // iterator at the point before insertion so that we can // revisit them in full. bool AtBeginning = (I == BB->begin()); if (!AtBeginning) --I; // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. TRI.eliminateFrameIndex(MI, SPAdj, i, FrameIndexVirtualScavenging ? NULL : RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { I = BB->begin(); DoIncr = false; } MI = 0; break; } if (DoIncr && I != BB->end()) ++I; // Update register states. if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } // If we have evenly matched pairs of frame setup / destroy instructions, // make sure the adjustments come out to zero. If we don't have matched // pairs, we can't be sure the missing bit isn't in another basic block // due to a custom inserter playing tricks, so just asserting SPAdj==0 // isn't sufficient. See tMOVCC on Thumb1, for example. assert((SPAdjCount || SPAdj == 0) && "Unbalanced call frame setup / destroy pairs?"); } }
bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); auto &MBB = *I.getParent(); auto &MF = *MBB.getParent(); auto &MRI = MF.getRegInfo(); if (!isPreISelGenericOpcode(I.getOpcode())) { if (I.isCopy()) return selectCopy(I, TII, MRI, TRI, RBI); return true; } if (selectImpl(I)) return true; MachineInstrBuilder MIB{MF, I}; bool isSExt = false; using namespace TargetOpcode; switch (I.getOpcode()) { case G_SEXT: isSExt = true; LLVM_FALLTHROUGH; case G_ZEXT: { LLT DstTy = MRI.getType(I.getOperand(0).getReg()); // FIXME: Smaller destination sizes coming soon! if (DstTy.getSizeInBits() != 32) { DEBUG(dbgs() << "Unsupported destination size for extension"); return false; } LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); unsigned SrcSize = SrcTy.getSizeInBits(); switch (SrcSize) { case 1: { // ZExt boils down to & 0x1; for SExt we also subtract that from 0 I.setDesc(TII.get(ARM::ANDri)); MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp()); if (isSExt) { unsigned SExtResult = I.getOperand(0).getReg(); // Use a new virtual register for the result of the AND unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass); I.getOperand(0).setReg(AndResult); auto InsertBefore = std::next(I.getIterator()); auto SubI = BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri)) .addDef(SExtResult) .addUse(AndResult) .addImm(0) .add(predOps(ARMCC::AL)) .add(condCodeOp()); if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI)) return false; } break; } case 8: case 16: { unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize); if (NewOpc == I.getOpcode()) return false; I.setDesc(TII.get(NewOpc)); MIB.addImm(0).add(predOps(ARMCC::AL)); break; } default: DEBUG(dbgs() << "Unsupported source size for extension"); return false; } break; } case G_ANYEXT: case G_TRUNC: { // The high bits are undefined, so there's nothing special to do, just // treat it as a copy. auto SrcReg = I.getOperand(1).getReg(); auto DstReg = I.getOperand(0).getReg(); const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); if (SrcRegBank.getID() != DstRegBank.getID()) { DEBUG(dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n"); return false; } if (SrcRegBank.getID() != ARM::GPRRegBankID) { DEBUG(dbgs() << "G_TRUNC/G_ANYEXT on non-GPR not supported yet\n"); return false; } I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } case G_SELECT: return selectSelect(MIB, MRI); case G_ICMP: { CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END, ARM::GPRRegBankID, 32); return selectCmp(Helper, MIB, MRI); } case G_FCMP: { assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP"); unsigned OpReg = I.getOperand(2).getReg(); unsigned Size = MRI.getType(OpReg).getSizeInBits(); if (Size == 64 && TII.getSubtarget().isFPOnlySP()) { DEBUG(dbgs() << "Subtarget only supports single precision"); return false; } if (Size != 32 && Size != 64) { DEBUG(dbgs() << "Unsupported size for G_FCMP operand"); return false; } CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT, ARM::FPRRegBankID, Size); return selectCmp(Helper, MIB, MRI); } case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; case G_FRAME_INDEX: // Add 0 to the given frame index and hope it will eventually be folded into // the user(s). I.setDesc(TII.get(ARM::ADDri)); MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp()); break; case G_CONSTANT: { unsigned Reg = I.getOperand(0).getReg(); if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID)) return false; I.setDesc(TII.get(ARM::MOVi)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); auto &Val = I.getOperand(1); if (Val.isCImm()) { if (Val.getCImm()->getBitWidth() > 32) return false; Val.ChangeToImmediate(Val.getCImm()->getZExtValue()); } if (!Val.isImm()) { return false; } break; } case G_GLOBAL_VALUE: return selectGlobal(MIB, MRI); case G_STORE: case G_LOAD: { const auto &MemOp = **I.memoperands_begin(); if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { DEBUG(dbgs() << "Atomic load/store not supported yet\n"); return false; } unsigned Reg = I.getOperand(0).getReg(); unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID(); LLT ValTy = MRI.getType(Reg); const auto ValSize = ValTy.getSizeInBits(); assert((ValSize != 64 || TII.getSubtarget().hasVFP2()) && "Don't know how to load/store 64-bit value without VFP"); const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize); if (NewOpc == G_LOAD || NewOpc == G_STORE) return false; I.setDesc(TII.get(NewOpc)); if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH) // LDRH has a funny addressing mode (there's already a FIXME for it). MIB.addReg(0); MIB.addImm(0).add(predOps(ARMCC::AL)); break; } case G_MERGE_VALUES: { if (!selectMergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } case G_UNMERGE_VALUES: { if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } case G_BRCOND: { if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) { DEBUG(dbgs() << "Unsupported condition register for G_BRCOND"); return false; } // Set the flags. auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri)) .addReg(I.getOperand(0).getReg()) .addImm(1) .add(predOps(ARMCC::AL)); if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI)) return false; // Branch conditionally. auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc)) .add(I.getOperand(1)) .add(predOps(ARMCC::EQ, ARM::CPSR)); if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI)) return false; I.eraseFromParent(); return true; } default: return false; } return constrainSelectedInstRegOperands(I, TII, TRI, RBI); }
bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Register Stackifying **********\n" "********** Function: " << MF.getName() << '\n'); bool Changed = false; MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); // Walk the instructions from the bottom up. Currently we don't look past // block boundaries, and the blocks aren't ordered so the block visitation // order isn't significant, but we may want to change this in the future. for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : reverse(MBB)) { MachineInstr *Insert = &MI; // Don't nest anything inside a phi. if (Insert->getOpcode() == TargetOpcode::PHI) break; // Iterate through the inputs in reverse order, since we'll be pulling // operands off the stack in FIFO order. for (MachineOperand &Op : reverse(Insert->uses())) { // We're only interested in explicit virtual register operands. if (!Op.isReg() || Op.isImplicit()) continue; unsigned Reg = Op.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; // Only consider registers with a single definition. // TODO: Eventually we may relax this, to stackify phi transfers. MachineInstr *Def = MRI.getVRegDef(Reg); if (!Def) continue; // There's no use in nesting implicit defs inside anything. if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF) continue; // Argument instructions represent live-in registers and not real // instructions. if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 || Def->getOpcode() == WebAssembly::ARGUMENT_I64 || Def->getOpcode() == WebAssembly::ARGUMENT_F32 || Def->getOpcode() == WebAssembly::ARGUMENT_F64) continue; // Single-use expression trees require defs that have one use, or that // they be trivially clonable. // TODO: Eventually we'll relax this, to take advantage of set_local // returning its result. bool OneUse = MRI.hasOneUse(Reg); if (!OneUse && !Def->isMoveImmediate()) continue; // For now, be conservative and don't look across block boundaries, // unless we have something trivially clonable. // TODO: Be more aggressive. if (Def->getParent() != &MBB && !Def->isMoveImmediate()) continue; // For now, be simple and don't reorder loads, stores, or side effects. // TODO: Be more aggressive. if ((Def->mayLoad() || Def->mayStore() || Def->hasUnmodeledSideEffects())) continue; Changed = true; if (OneUse) { // Move the def down and nest it in the current instruction. MBB.insert(MachineBasicBlock::instr_iterator(Insert), Def->removeFromParent()); MFI.stackifyVReg(Reg); Insert = Def; } else { // Clone the def down and nest it in the current instruction. MachineInstr *Clone = MF.CloneMachineInstr(Def); unsigned OldReg = Def->getOperand(0).getReg(); unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); assert(Op.getReg() == OldReg); assert(Clone->getOperand(0).getReg() == OldReg); Op.setReg(NewReg); Clone->getOperand(0).setReg(NewReg); MBB.insert(MachineBasicBlock::instr_iterator(Insert), Clone); MFI.stackifyVReg(Reg); Insert = Clone; } } } } return Changed; }
// The CC users in CCUsers are testing the result of a comparison of some // value X against zero and we know that any CC value produced by MI // would also reflect the value of X. Try to adjust CCUsers so that // they test the result of MI directly, returning true on success. // Leave everything unchanged on failure. bool SystemZElimCompare:: adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { int Opcode = MI->getOpcode(); const MCInstrDesc &Desc = TII->get(Opcode); unsigned MIFlags = Desc.TSFlags; // See which compare-style condition codes are available. unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); // For unsigned comparisons with zero, only equality makes sense. unsigned CompareFlags = Compare->getDesc().TSFlags; if (CompareFlags & SystemZII::IsLogical) ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; if (ReusableCCMask == 0) return false; unsigned CCValues = SystemZII::getCCValues(MIFlags); assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); // Now check whether these flags are enough for all users. SmallVector<MachineOperand *, 4> AlterMasks; for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { MachineInstr *MI = CCUsers[I]; // Fail if this isn't a use of CC that we understand. unsigned Flags = MI->getDesc().TSFlags; unsigned FirstOpNum; if (Flags & SystemZII::CCMaskFirst) FirstOpNum = 0; else if (Flags & SystemZII::CCMaskLast) FirstOpNum = MI->getNumExplicitOperands() - 2; else return false; // Check whether the instruction predicate treats all CC values // outside of ReusableCCMask in the same way. In that case it // doesn't matter what those CC values mean. unsigned CCValid = MI->getOperand(FirstOpNum).getImm(); unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm(); unsigned OutValid = ~ReusableCCMask & CCValid; unsigned OutMask = ~ReusableCCMask & CCMask; if (OutMask != 0 && OutMask != OutValid) return false; AlterMasks.push_back(&MI->getOperand(FirstOpNum)); AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1)); } // All users are OK. Adjust the masks for MI. for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { AlterMasks[I]->setImm(CCValues); unsigned CCMask = AlterMasks[I + 1]->getImm(); if (CCMask & ~ReusableCCMask) AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | (CCValues & ~ReusableCCMask)); } // CC is now live after MI. int CCDef = MI->findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); assert(CCDef >= 0 && "Couldn't find CC set"); MI->getOperand(CCDef).setIsDead(false); // Clear any intervening kills of CC. MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; for (++MBBI; MBBI != MBBE; ++MBBI) MBBI->clearRegisterKills(SystemZ::CC, TRI); return true; }
/// getTripCount - Return a loop-invariant LLVM value indicating the /// number of times the loop will be executed. The trip count can /// be either a register or a constant value. If the trip-count /// cannot be determined, this returns null. /// /// We find the trip count from the phi instruction that defines the /// induction variable. We follow the links to the CMP instruction /// to get the trip count. /// /// Based upon getTripCount in LoopInfo. /// CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, SmallVector<MachineInstr *, 2> &OldInsts) const { MachineBasicBlock *LastMBB = L->getExitingBlock(); // Don't generate a CTR loop if the loop has more than one exit. if (LastMBB == 0) return 0; MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); if (LastI->getOpcode() != PPC::BCC) return 0; // We need to make sure that this compare is defining the condition // register actually used by the terminating branch. unsigned PredReg = LastI->getOperand(1).getReg(); DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI); unsigned PredCond = LastI->getOperand(0).getImm(); if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE) return 0; // Check that the loop has a induction variable. SmallVector<MachineInstr *, 4> IVars, IOps; getCanonicalInductionVariable(L, IVars, IOps); for (unsigned i = 0; i < IVars.size(); ++i) { MachineInstr *IOp = IOps[i]; MachineInstr *IV_Inst = IVars[i]; // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', // if Imm is 0, get the count from the PHI opnd // if Imm is -M, than M is the count // Otherwise, Imm is the count MachineOperand *IV_Opnd; const MachineOperand *InitialValue; if (!L->contains(IV_Inst->getOperand(2).getMBB())) { InitialValue = &IV_Inst->getOperand(1); IV_Opnd = &IV_Inst->getOperand(3); } else { InitialValue = &IV_Inst->getOperand(3); IV_Opnd = &IV_Inst->getOperand(1); } DEBUG(dbgs() << "Considering:\n"); DEBUG(dbgs() << " induction operation: " << *IOp); DEBUG(dbgs() << " induction variable: " << *IV_Inst); DEBUG(dbgs() << " initial value: " << *InitialValue << "\n"); // Look for the cmp instruction to determine if we // can get a useful trip count. The trip count can // be either a register or an immediate. The location // of the value depends upon the type (reg or imm). for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); RI != RE; ++RI) { IV_Opnd = &RI.getOperand(); bool SignedCmp; MachineInstr *MI = IV_Opnd->getParent(); if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && MI->getOperand(0).getReg() == PredReg) { OldInsts.push_back(MI); OldInsts.push_back(IOp); DEBUG(dbgs() << " compare: " << *MI); const MachineOperand &MO = MI->getOperand(2); assert(MO.isImm() && "IV Cmp Operand should be an immediate"); int64_t ImmVal; if (SignedCmp) ImmVal = (short) MO.getImm(); else ImmVal = MO.getImm(); const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); assert(L->contains(IV_DefInstr->getParent()) && "IV definition should occurs in loop"); int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm(); assert(InitialValue->isReg() && "Expecting register for init value"); unsigned InitialValueReg = InitialValue->getReg(); const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { int64_t start = (short) DefInstr->getOperand(2).getImm(); const MachineInstr *DefInstr2 = MRI->getVRegDef(DefInstr->getOperand(0).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); DEBUG(dbgs() << " initial constant: " << *DefInstr2); start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16; int64_t count = ImmVal - start; if ((count % iv_value) != 0) { return 0; } return new CountValue(count/iv_value); } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } return new CountValue(count/iv_value); } else if (iv_value == 1 || iv_value == -1) { // We can't determine a constant starting value. if (ImmVal == 0) { return new CountValue(InitialValueReg, iv_value > 0); } // FIXME: handle non-zero end value. } // FIXME: handle non-unit increments (we might not want to introduce division // but we can handle some 2^n cases with shifts). } } } return 0; }
MipsInstrInfo::BranchType MipsInstrInfo:: AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify, SmallVectorImpl<MachineInstr*> &BranchInstrs) const { MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); // Skip all the debug instructions. while (I != REnd && I->isDebugValue()) ++I; if (I == REnd || !isUnpredicatedTerminator(&*I)) { // This block ends with no branches (it just falls through to its succ). // Leave TBB/FBB null. TBB = FBB = NULL; return BT_NoBranch; } MachineInstr *LastInst = &*I; unsigned LastOpc = LastInst->getOpcode(); BranchInstrs.push_back(LastInst); // Not an analyzable branch (e.g., indirect jump). if (!GetAnalyzableBrOpc(LastOpc)) return LastInst->isIndirectBranch() ? BT_Indirect : BT_None; // Get the second to last instruction in the block. unsigned SecondLastOpc = 0; MachineInstr *SecondLastInst = NULL; if (++I != REnd) { SecondLastInst = &*I; SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode()); // Not an analyzable branch (must be an indirect jump). if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc) return BT_None; } // If there is only one terminator instruction, process it. if (!SecondLastOpc) { // Unconditional branch if (LastOpc == UncondBrOpc) { TBB = LastInst->getOperand(0).getMBB(); return BT_Uncond; } // Conditional branch AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); return BT_Cond; } // If we reached here, there are two branches. // If there are three terminators, we don't know what sort of block this is. if (++I != REnd && isUnpredicatedTerminator(&*I)) return BT_None; BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst); // If second to last instruction is an unconditional branch, // analyze it and remove the last instruction. if (SecondLastOpc == UncondBrOpc) { // Return if the last instruction cannot be removed. if (!AllowModify) return BT_None; TBB = SecondLastInst->getOperand(0).getMBB(); LastInst->eraseFromParent(); BranchInstrs.pop_back(); return BT_Uncond; } // Conditional branch followed by an unconditional branch. // The last one must be unconditional. if (LastOpc != UncondBrOpc) return BT_None; AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); FBB = LastInst->getOperand(0).getMBB(); return BT_CondUncond; }
bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { // Check that this particular call sequence is amenable to the // transformation. const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); unsigned StackPtr = RegInfo.getStackRegister(); int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); // We expect to enter this at the beginning of a call sequence assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); MachineBasicBlock::iterator FrameSetup = I++; // For globals in PIC mode, we can have some LEAs here. // Ignore them, they don't bother us. // TODO: Extend this to something that covers more cases. while (I->getOpcode() == X86::LEA32r) ++I; // We expect a copy instruction here. // TODO: The copy instruction is a lowering artifact. // We should also support a copy-less version, where the stack // pointer is used directly. if (!I->isCopy() || !I->getOperand(0).isReg()) return false; MachineBasicBlock::iterator SPCopy = I++; StackPtr = SPCopy->getOperand(0).getReg(); // Scan the call setup sequence for the pattern we're looking for. // We only handle a simple case - a sequence of MOV32mi or MOV32mr // instructions, that push a sequence of 32-bit values onto the stack, with // no gaps between them. SmallVector<MachineInstr*, 4> MovVector(4, nullptr); unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4; if (MaxAdjust > 4) MovVector.resize(MaxAdjust, nullptr); do { int Opcode = I->getOpcode(); if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr) break; // We only want movs of the form: // movl imm/r32, k(%esp) // If we run into something else, bail. // Note that AddrBaseReg may, counter to its name, not be a register, // but rather a frame index. // TODO: Support the fi case. This should probably work now that we // have the infrastructure to track the stack pointer within a call // sequence. if (!I->getOperand(X86::AddrBaseReg).isReg() || (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || !I->getOperand(X86::AddrScaleAmt).isImm() || (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || !I->getOperand(X86::AddrDisp).isImm()) return false; int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); assert(StackDisp >= 0 && "Negative stack displacement when passing parameters"); // We really don't want to consider the unaligned case. if (StackDisp % 4) return false; StackDisp /= 4; assert((size_t)StackDisp < MovVector.size() && "Function call has more parameters than the stack is adjusted for."); // If the same stack slot is being filled twice, something's fishy. if (MovVector[StackDisp] != nullptr) return false; MovVector[StackDisp] = I; ++I; } while (I != MBB.end()); // We now expect the end of the sequence - a call and a stack adjust. if (I == MBB.end()) return false; // For PCrel calls, we expect an additional COPY of the basereg. // If we find one, skip it. if (I->isCopy()) { if (I->getOperand(1).getReg() == MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg()) ++I; else return false; } if (!I->isCall()) return false; MachineBasicBlock::iterator Call = I; if ((++I)->getOpcode() != FrameDestroyOpcode) return false; // Now, go through the vector, and see that we don't have any gaps, // but only a series of 32-bit MOVs. int64_t ExpectedDist = 0; auto MMI = MovVector.begin(), MME = MovVector.end(); for (; MMI != MME; ++MMI, ExpectedDist += 4) if (*MMI == nullptr) break; // If the call had no parameters, do nothing if (!ExpectedDist) return false; // We are either at the last parameter, or a gap. // Make sure it's not a gap for (; MMI != MME; ++MMI) if (*MMI != nullptr) return false; // Ok, we can in fact do the transformation for this call. // Do not remove the FrameSetup instruction, but adjust the parameters. // PEI will end up finalizing the handling of this. FrameSetup->getOperand(1).setImm(ExpectedDist); DebugLoc DL = I->getDebugLoc(); // Now, iterate through the vector in reverse order, and replace the movs // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to // replace uses. for (int Idx = (ExpectedDist / 4) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator MOV = *MovVector[Idx]; MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); if (MOV->getOpcode() == X86::MOV32mi) { unsigned PushOpcode = X86::PUSHi32; // If the operand is a small (8-bit) immediate, we can use a // PUSH instruction with a shorter encoding. // Note that isImm() may fail even though this is a MOVmi, because // the operand can also be a symbol. if (PushOp.isImm()) { int64_t Val = PushOp.getImm(); if (isInt<8>(Val)) PushOpcode = X86::PUSH32i8; } BuildMI(MBB, Call, DL, TII->get(PushOpcode)).addOperand(PushOp); } else { unsigned int Reg = PushOp.getReg(); // If PUSHrmm is not slow on this target, try to fold the source of the // push into the instruction. const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); bool SlowPUSHrmm = ST.isAtom() || ST.isSLM(); // Check that this is legal to fold. Right now, we're extremely // conservative about that. MachineInstr *DefMov = nullptr; if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { MachineInstr *Push = BuildMI(MBB, Call, DL, TII->get(X86::PUSH32rmm)); unsigned NumOps = DefMov->getDesc().getNumOperands(); for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) Push->addOperand(DefMov->getOperand(i)); DefMov->eraseFromParent(); } else { BuildMI(MBB, Call, DL, TII->get(X86::PUSH32r)).addReg(Reg).getInstr(); } } MBB.erase(MOV); } // The stack-pointer copy is no longer used in the call sequences. // There should not be any other users, but we can't commit to that, so: if (MRI->use_empty(SPCopy->getOperand(0).getReg())) SPCopy->eraseFromParent(); // Once we've done this, we need to make sure PEI doesn't assume a reserved // frame. X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); FuncInfo->setHasPushSequences(true); return true; }
bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { bool AnyChanges = false; MRI = &MF.getRegInfo(); TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); // Treat reserved registers as always live. BitVector ReservedRegs = TRI->getReservedRegs(MF); // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will // be cleaned up. for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend(); I != E; ++I) { MachineBasicBlock *MBB = &*I; // Start out assuming that reserved registers are live out of this block. LivePhysRegs = ReservedRegs; // Also add any explicit live-out physregs for this block. if (!MBB->empty() && MBB->back().getDesc().isReturn()) for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { unsigned Reg = *LOI; if (TargetRegisterInfo::isPhysicalRegister(Reg)) LivePhysRegs.set(Reg); } // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs // are not live across blocks, but some targets (x86) can have flags live // out of a block. // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE; ) { MachineInstr *MI = &*MII; // If the instruction is dead, delete it! if (isDead(MI)) { DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); // It is possible that some DBG_VALUE instructions refer to this // instruction. Examine each def operand for such references; // if found, mark the DBG_VALUE as undef (but don't delete it). for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; MachineRegisterInfo::use_iterator nextI; for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), E = MRI->use_end(); I!=E; I=nextI) { nextI = llvm::next(I); // I is invalidated by the setReg MachineOperand& Use = I.getOperand(); MachineInstr *UseMI = Use.getParent(); if (UseMI==MI) continue; assert(Use.isDebug()); UseMI->getOperand(0).setReg(0U); } } AnyChanges = true; MI->eraseFromParent(); ++NumDeletes; MIE = MBB->rend(); // MII is now pointing to the next instruction to process, // so don't increment it. continue; } // Record the physreg defs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { LivePhysRegs.reset(Reg); // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); *SubRegs; ++SubRegs) LivePhysRegs.reset(*SubRegs); } } } // Record the physreg uses, after the defs, in case a physreg is // both defined and used in the same instruction. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { LivePhysRegs.set(Reg); for (const unsigned *AliasSet = TRI->getAliasSet(Reg); *AliasSet; ++AliasSet) LivePhysRegs.set(*AliasSet); } } } // We didn't delete the current instruction, so increment MII to // the next one. ++MII; } } LivePhysRegs.clear(); return AnyChanges; }
RegisterBankInfo::InstructionMapping RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { // For copies we want to walk over the operands and try to find one // that has a register bank since the instruction itself will not get // us any constraint. bool IsCopyLike = isCopyLike(MI); // For copy like instruction, only the mapping of the definition // is important. The rest is not constrained. unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands(); RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1, /*OperandsMapping*/ nullptr, NumOperandsForMapping); const MachineFunction &MF = *MI.getParent()->getParent(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // We may need to query the instruction encoding to guess the mapping. const TargetInstrInfo &TII = *STI.getInstrInfo(); // Before doing anything complicated check if the mapping is not // directly available. bool CompleteMapping = true; SmallVector<const ValueMapping *, 8> OperandsMapping(NumOperandsForMapping); for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx; ++OpIdx) { const MachineOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; // The register bank of Reg is just a side effect of the current // excution and in particular, there is no reason to believe this // is the best default mapping for the current instruction. Keep // it as an alternative register bank if we cannot figure out // something. const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI); // For copy-like instruction, we want to reuse the register bank // that is already set on Reg, if any, since those instructions do // not have any constraints. const RegisterBank *CurRegBank = IsCopyLike ? AltRegBank : nullptr; if (!CurRegBank) { // If this is a target specific instruction, we can deduce // the register bank from the encoding constraints. CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI); if (!CurRegBank) { // All our attempts failed, give up. CompleteMapping = false; if (!IsCopyLike) // MI does not carry enough information to guess the mapping. return InstructionMapping(); continue; } } const ValueMapping *ValMapping = &getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank); if (IsCopyLike) { OperandsMapping[0] = ValMapping; CompleteMapping = true; break; } OperandsMapping[OpIdx] = ValMapping; } if (IsCopyLike && !CompleteMapping) // No way to deduce the type from what we have. return InstructionMapping(); assert(CompleteMapping && "Setting an uncomplete mapping"); Mapping.setOperandsMapping(getOperandsMapping(OperandsMapping)); return Mapping; }
void MSP430FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>(); const MSP430InstrInfo &TII = *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); switch (RetOpcode) { case MSP430::RET: case MSP430::RETI: break; // These are ok default: llvm_unreachable("Can only insert epilog into returning blocks"); } // Get the number of bytes to allocate from the FrameInfo uint64_t StackSize = MFI->getStackSize(); unsigned CSSize = MSP430FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - CSSize; // pop FP. BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FP); } else NumBytes = StackSize - CSSize; // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = std::prev(MBBI); unsigned Opc = PI->getOpcode(); if (Opc != MSP430::POP16r && !PI->isTerminator()) break; --MBBI; } DL = MBBI->getDebugLoc(); // If there is an ADD16ri or SUB16ri of SP immediately before this // instruction, merge the two instructions. //if (NumBytes || MFI->hasVarSizedObjects()) // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); if (MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::SP).addReg(MSP430::FP); if (CSSize) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SP) .addReg(MSP430::SP).addImm(CSSize); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } else { // adjust stack pointer back: SP += numbytes if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SP) .addReg(MSP430::SP).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
void X86ExpandPseudo::ExpandICallBranchFunnel( MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) { MachineBasicBlock *JTMBB = MBB; MachineInstr *JTInst = &*MBBI; MachineFunction *MF = MBB->getParent(); const BasicBlock *BB = MBB->getBasicBlock(); auto InsPt = MachineFunction::iterator(MBB); ++InsPt; std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs; DebugLoc DL = JTInst->getDebugLoc(); MachineOperand Selector = JTInst->getOperand(0); const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal(); auto CmpTarget = [&](unsigned Target) { BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11) .addReg(X86::RIP) .addImm(1) .addReg(0) .addGlobalAddress(CombinedGlobal, JTInst->getOperand(2 + 2 * Target).getImm()) .addReg(0); BuildMI(*MBB, MBBI, DL, TII->get(X86::CMP64rr)) .add(Selector) .addReg(X86::R11); }; auto CreateMBB = [&]() { auto *NewMBB = MF->CreateMachineBasicBlock(BB); MBB->addSuccessor(NewMBB); return NewMBB; }; auto EmitCondJump = [&](unsigned Opcode, MachineBasicBlock *ThenMBB) { BuildMI(*MBB, MBBI, DL, TII->get(Opcode)).addMBB(ThenMBB); auto *ElseMBB = CreateMBB(); MF->insert(InsPt, ElseMBB); MBB = ElseMBB; MBBI = MBB->end(); }; auto EmitCondJumpTarget = [&](unsigned Opcode, unsigned Target) { auto *ThenMBB = CreateMBB(); TargetMBBs.push_back({ThenMBB, Target}); EmitCondJump(Opcode, ThenMBB); }; auto EmitTailCall = [&](unsigned Target) { BuildMI(*MBB, MBBI, DL, TII->get(X86::TAILJMPd64)) .add(JTInst->getOperand(3 + 2 * Target)); }; std::function<void(unsigned, unsigned)> EmitBranchFunnel = [&](unsigned FirstTarget, unsigned NumTargets) { if (NumTargets == 1) { EmitTailCall(FirstTarget); return; } if (NumTargets == 2) { CmpTarget(FirstTarget + 1); EmitCondJumpTarget(X86::JB_1, FirstTarget); EmitTailCall(FirstTarget + 1); return; } if (NumTargets < 6) { CmpTarget(FirstTarget + 1); EmitCondJumpTarget(X86::JB_1, FirstTarget); EmitCondJumpTarget(X86::JE_1, FirstTarget + 1); EmitBranchFunnel(FirstTarget + 2, NumTargets - 2); return; } auto *ThenMBB = CreateMBB(); CmpTarget(FirstTarget + (NumTargets / 2)); EmitCondJump(X86::JB_1, ThenMBB); EmitCondJumpTarget(X86::JE_1, FirstTarget + (NumTargets / 2)); EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1, NumTargets - (NumTargets / 2) - 1); MF->insert(InsPt, ThenMBB); MBB = ThenMBB; MBBI = MBB->end(); EmitBranchFunnel(FirstTarget, NumTargets / 2); }; EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2); for (auto P : TargetMBBs) { MF->insert(InsPt, P.first); BuildMI(P.first, DL, TII->get(X86::TAILJMPd64)) .add(JTInst->getOperand(3 + 2 * P.second)); } JTMBB->erase(JTInst); }
MachineBasicBlock::iterator MSP430FrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const MSP430InstrInfo &TII = *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo()); unsigned StackAlign = getStackAlignment(); if (!hasReservedCallFrame(MF)) { // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub SP, <amt>' and the // adjcallstackdown instruction into 'add SP, <amt>' // TODO: consider using push / pop instead of sub + store / add MachineInstr *Old = I; uint64_t Amount = Old->getOperand(0).getImm(); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; MachineInstr *New = nullptr; if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), MSP430::SP) .addReg(MSP430::SP).addImm(Amount); } else { assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode()); // factor out the amount the callee already popped. uint64_t CalleeAmt = Old->getOperand(1).getImm(); Amount -= CalleeAmt; if (Amount) New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::ADD16ri), MSP430::SP) .addReg(MSP430::SP).addImm(Amount); } if (New) { // The SRW implicit def is dead. New->getOperand(3).setIsDead(); // Replace the pseudo instruction with a new instruction... MBB.insert(I, New); } } } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { MachineInstr *Old = I; MachineInstr *New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), MSP430::SP).addReg(MSP430::SP).addImm(CalleeAmt); // The SRW implicit def is dead. New->getOperand(3).setIsDead(); MBB.insert(I, New); } } return MBB.erase(I); }
/// scavengeFrameVirtualRegs - Replace all frame index virtual registers /// with physical registers. Use the register scavenger to find an /// appropriate register to use. /// /// FIXME: Iterating over the instruction stream is unnecessary. We can simply /// iterate over the vreg use list, which at this point only contains machine /// operands for which eliminateFrameIndex need a new scratch reg. void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Run through the instructions and find any virtual registers. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { RS->enterBasicBlock(BB); int SPAdj = 0; // The instruction stream may change in the loop, so check BB->end() // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { // We might end up here again with a NULL iterator if we scavenged a // register for which we inserted spill code for definition by what was // originally the first instruction in BB. if (I == MachineBasicBlock::iterator(nullptr)) I = BB->begin(); MachineInstr *MI = I; MachineBasicBlock::iterator J = std::next(I); MachineBasicBlock::iterator P = I == BB->begin() ? MachineBasicBlock::iterator(nullptr) : std::prev(I); // RS should process this instruction before we might scavenge at this // location. This is because we might be replacing a virtual register // defined by this instruction, and if so, registers killed by this // instruction are available, and defined registers are not. RS->forward(I); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; // When we first encounter a new virtual register, it // must be a definition. assert(MI->getOperand(i).isDef() && "frame index virtual missing def!"); // Scavenge a new scratch register const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); ++NumScavengedRegs; // Replace this reference to the virtual register with the // scratch register. assert (ScratchReg && "Missing scratch register!"); MachineRegisterInfo &MRI = Fn.getRegInfo(); Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); // Make sure MRI now accounts this register as used. MRI.setPhysRegUsed(ScratchReg); // Because this instruction was processed by the RS before this // register was allocated, make sure that the RS now records the // register as being used. RS->setRegUsed(ScratchReg); } } // If the scavenger needed to use one of its spill slots, the // spill code will have been inserted in between I and J. This is a // problem because we need the spill code before I: Move I to just // prior to J. if (I != std::prev(J)) { BB->splice(J, BB, I); // Before we move I, we need to prepare the RS to visit I again. // Specifically, RS will assert if it sees uses of registers that // it believes are undefined. Because we have already processed // register kills in I, when it visits I again, it will believe that // those registers are undefined. To avoid this situation, unprocess // the instruction I. assert(RS->getCurrentPosition() == I && "The register scavenger has an unexpected position"); I = P; RS->unprocess(P); } else ++I; } } }
/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup. bool TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, bool IsSimple, MachineFunction &MF) { // Save the successors list. SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), MBB->succ_end()); SmallVector<MachineBasicBlock*, 8> TDBBs; SmallVector<MachineInstr*, 16> Copies; if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies)) return false; ++NumTails; SmallVector<MachineInstr*, 8> NewPHIs; MachineSSAUpdater SSAUpdate(MF, &NewPHIs); // TailBB's immediate successors are now successors of those predecessors // which duplicated TailBB. Add the predecessors as sources to the PHI // instructions. bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); if (PreRegAlloc) UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); // If it is dead, remove it. if (isDead) { NumInstrDups -= MBB->size(); RemoveDeadBlock(MBB); ++NumDeadBlocks; } // Update SSA form. if (!SSAUpdateVRs.empty()) { for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { unsigned VReg = SSAUpdateVRs[i]; SSAUpdate.Initialize(VReg); // If the original definition is still around, add it as an available // value. MachineInstr *DefMI = MRI->getVRegDef(VReg); MachineBasicBlock *DefBB = 0; if (DefMI) { DefBB = DefMI->getParent(); SSAUpdate.AddAvailableValue(DefBB, VReg); } // Add the new vregs as available values. DenseMap<unsigned, AvailableValsTy>::iterator LI = SSAUpdateVals.find(VReg); for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; unsigned SrcReg = LI->second[j].second; SSAUpdate.AddAvailableValue(SrcBB, SrcReg); } // Rewrite uses that are outside of the original def's block. MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); while (UI != MRI->use_end()) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; ++UI; if (UseMI->isDebugValue()) { // SSAUpdate can replace the use with an undef. That creates // a debug instruction that is a kill. // FIXME: Should it SSAUpdate job to delete debug instructions // instead of replacing the use with undef? UseMI->eraseFromParent(); continue; } if (UseMI->getParent() == DefBB && !UseMI->isPHI()) continue; SSAUpdate.RewriteUse(UseMO); } } SSAUpdateVRs.clear(); SSAUpdateVals.clear(); } // Eliminate some of the copies inserted by tail duplication to maintain // SSA form. for (unsigned i = 0, e = Copies.size(); i != e; ++i) { MachineInstr *Copy = Copies[i]; if (!Copy->isCopy()) continue; unsigned Dst = Copy->getOperand(0).getReg(); unsigned Src = Copy->getOperand(1).getReg(); if (MRI->hasOneNonDBGUse(Src) && MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { // Copy is the only use. Do trivial copy propagation here. MRI->replaceRegWith(Dst, Src); Copy->eraseFromParent(); } } if (NewPHIs.size()) NumAddedPHIs += NewPHIs.size(); return true; }