/// TransferImplicitOperands - MI is a pseudo-instruction, and the lowered /// replacement instructions immediately precede it. Copy any implicit /// operands from MI to the replacement instruction. void ExpandPostRA::TransferImplicitOperands(MachineInstr *MI) { MachineBasicBlock::iterator CopyMI = MI; --CopyMI; for (const MachineOperand &MO : MI->implicit_operands()) if (MO.isReg()) CopyMI->addOperand(MO); }
/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered /// replacement instructions immediately precede it. Copy any implicit-def /// operands from MI to the replacement instruction. void ExpandPostRA::TransferImplicitDefs(MachineInstr *MI) { MachineBasicBlock::iterator CopyMI = MI; --CopyMI; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isImplicit() || MO.isUse()) continue; CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true)); } }
void TMS320C64XFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { DebugLoc DL; const MachineFrameInfo *MFI = MF.getFrameInfo(); MachineBasicBlock::iterator MBBI = prior(MBB.end()); if (MFI->hasVarSizedObjects()) llvm_unreachable("Can't currently support varsize stack frame"); if (MBBI->getOpcode() != TMS320C64X::ret) llvm_unreachable("Can't insert epilogue before non-ret insn"); const TMS320C64XInstrInfo &TII = *TM.getInstrInfo(); if (TM.getSubtarget<TMS320C64XSubtarget>().enablePostRAScheduler()) { // unbundled epilogue instructions (weaved into other bundles) // restore return address (B3) TMS320C64XInstrInfo::addFormOp( TMS320C64XInstrInfo::addDefaultPred( BuildMI(MBB, MBBI, DL, TII.get(TMS320C64X::word_load_1)) .addReg(TMS320C64X::B3, RegState::Define).addReg(TMS320C64X::A15) .addImm(-1)), TMS320C64XII::unit_d, true); // add the (implicit) use of B3 to ret MBBI->addOperand(MachineOperand::CreateReg(TMS320C64X::B3, false, true)); // reset stack pointer TMS320C64XInstrInfo::addDefaultPred( BuildMI(MBB, MBBI, DL, TII.get(TMS320C64X::mv)) .addReg(TMS320C64X::B15, RegState::Define).addReg(TMS320C64X::A15)); // restore caller's frame pointer TMS320C64XInstrInfo::addFormOp( TMS320C64XInstrInfo::addDefaultPred( BuildMI(MBB, MBBI, DL, TII.get(TMS320C64X::word_load_1)) .addReg(TMS320C64X::A15, RegState::Define) .addReg(TMS320C64X::A15).addImm(0)), TMS320C64XII::unit_d, false); } else { // epilogue via pseudo instruction (is a scheduling boundary) TMS320C64XInstrInfo::addDefaultPred( BuildMI(MBB, MBBI, DL, TII.get(TMS320C64X::epilog))); } }
/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor /// blocks, the successors have gained new predecessors. Update the PHI /// instructions in them accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallSetVector<MachineBasicBlock*,8> &Succs) { for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), SE = Succs.end(); SI != SE; ++SI) { MachineBasicBlock *SuccBB = *SI; for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); II != EE; ++II) { if (!II->isPHI()) break; unsigned Idx = 0; for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { Idx = i; break; } } assert(Idx != 0); MachineOperand &MO0 = II->getOperand(Idx); unsigned Reg = MO0.getReg(); if (isDead) { // Folded into the previous BB. // There could be duplicate phi source entries. FIXME: Should sdisel // or earlier pass fixed this? for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { II->RemoveOperand(i+1); II->RemoveOperand(i); } } } else Idx = 0; // If Idx is set, the operands at Idx and Idx+1 must be removed. // We reuse the location to avoid expensive RemoveOperand calls. DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); if (LI != SSAUpdateVals.end()) { // This register is defined in the tail block. for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; // If we didn't duplicate a bb into a particular predecessor, we // might still have added an entry to SSAUpdateVals to correcly // recompute SSA. If that case, avoid adding a dummy extra argument // this PHI. if (!SrcBB->isSuccessor(SuccBB)) continue; unsigned SrcReg = LI->second[j].second; if (Idx != 0) { II->getOperand(Idx).setReg(SrcReg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { II->addOperand(MachineOperand::CreateReg(SrcReg, false)); II->addOperand(MachineOperand::CreateMBB(SrcBB)); } } } else { // Live in tail block, must also be live in predecessors. for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = TDBBs[j]; if (Idx != 0) { II->getOperand(Idx).setReg(Reg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { II->addOperand(MachineOperand::CreateReg(Reg, false)); II->addOperand(MachineOperand::CreateMBB(SrcBB)); } } } if (Idx != 0) { II->RemoveOperand(Idx+1); II->RemoveOperand(Idx); } } } }
void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, const CallContext &Context) { // Ok, we can in fact do the transformation for this call. // Do not remove the FrameSetup instruction, but adjust the parameters. // PEI will end up finalizing the handling of this. MachineBasicBlock::iterator FrameSetup = Context.FrameSetup; MachineBasicBlock &MBB = *(FrameSetup->getParent()); TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist); DebugLoc DL = FrameSetup->getDebugLoc(); bool Is64Bit = STI->is64Bit(); // Now, iterate through the vector in reverse order, and replace the store to // stack with pushes. MOVmi/MOVmr doesn't have any defs, so no need to // replace uses. for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator Store = *Context.ArgStoreVector[Idx]; MachineOperand PushOp = Store->getOperand(X86::AddrNumOperands); MachineBasicBlock::iterator Push = nullptr; unsigned PushOpcode; switch (Store->getOpcode()) { default: llvm_unreachable("Unexpected Opcode!"); case X86::AND16mi8: case X86::AND32mi8: case X86::AND64mi8: case X86::OR16mi8: case X86::OR32mi8: case X86::OR64mi8: case X86::MOV32mi: case X86::MOV64mi32: PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32; // If the operand is a small (8-bit) immediate, we can use a // PUSH instruction with a shorter encoding. // Note that isImm() may fail even though this is a MOVmi, because // the operand can also be a symbol. if (PushOp.isImm()) { int64_t Val = PushOp.getImm(); if (isInt<8>(Val)) PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8; } Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp); break; case X86::MOV32mr: case X86::MOV64mr: { unsigned int Reg = PushOp.getReg(); // If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg // in preparation for the PUSH64. The upper 32 bits can be undef. if (Is64Bit && Store->getOpcode() == X86::MOV32mr) { unsigned UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass); Reg = MRI->createVirtualRegister(&X86::GR64RegClass); BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg); BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg) .addReg(UndefReg) .add(PushOp) .addImm(X86::sub_32bit); } // If PUSHrmm is not slow on this target, try to fold the source of the // push into the instruction. bool SlowPUSHrmm = STI->isAtom() || STI->isSLM(); // Check that this is legal to fold. Right now, we're extremely // conservative about that. MachineInstr *DefMov = nullptr; if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm; Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)); unsigned NumOps = DefMov->getDesc().getNumOperands(); for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) Push->addOperand(DefMov->getOperand(i)); DefMov->eraseFromParent(); } else { PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r; Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)) .addReg(Reg) .getInstr(); } break; } } // For debugging, when using SP-based CFA, we need to adjust the CFA // offset after each push. // TODO: This is needed only if we require precise CFA. if (!TFL->hasFP(MF)) TFL->BuildCFI( MBB, std::next(Push), DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize)); MBB.erase(Store); } // The stack-pointer copy is no longer used in the call sequences. // There should not be any other users, but we can't commit to that, so: if (Context.SPCopy && MRI->use_empty(Context.SPCopy->getOperand(0).getReg())) Context.SPCopy->eraseFromParent(); // Once we've done this, we need to make sure PEI doesn't assume a reserved // frame. X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); FuncInfo->setHasPushSequences(true); }
/// ComputeLocalLiveness - Computes liveness of registers within a basic /// block, setting the killed/dead flags as appropriate. void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // Keep track of the most recently seen previous use or def of each reg, // so that we can update them with dead/kill markers. DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { if (I->isDebugValue()) continue; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &MO = I->getOperand(i); // Uses don't trigger any flags, but we need to save // them for later. Also, we have to process these // _before_ processing the defs, since an instr // uses regs before it defs them. if (!MO.isReg() || !MO.getReg() || !MO.isUse()) continue; LastUseDef[MO.getReg()] = std::make_pair(I, i); if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; const unsigned *Aliases = TRI->getAliasSet(MO.getReg()); if (Aliases == 0) continue; while (*Aliases) { DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator alias = LastUseDef.find(*Aliases); if (alias != LastUseDef.end() && alias->second.first != I) LastUseDef[*Aliases] = std::make_pair(I, i); ++Aliases; } } for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &MO = I->getOperand(i); // Defs others than 2-addr redefs _do_ trigger flag changes: // - A def followed by a def is dead // - A use followed by a def is a kill if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator last = LastUseDef.find(MO.getReg()); if (last != LastUseDef.end()) { // Check if this is a two address instruction. If so, then // the def does not kill the use. if (last->second.first == I && I->isRegTiedToUseOperand(i)) continue; MachineOperand &lastUD = last->second.first->getOperand(last->second.second); if (lastUD.isDef()) lastUD.setIsDead(true); else lastUD.setIsKill(true); } LastUseDef[MO.getReg()] = std::make_pair(I, i); } } // Live-out (of the function) registers contain return values of the function, // so we need to make sure they are alive at return time. MachineBasicBlock::iterator Ret = MBB.getFirstTerminator(); bool BBEndsInReturn = (Ret != MBB.end() && Ret->getDesc().isReturn()); if (BBEndsInReturn) for (MachineRegisterInfo::liveout_iterator I = MF->getRegInfo().liveout_begin(), E = MF->getRegInfo().liveout_end(); I != E; ++I) if (!Ret->readsRegister(*I)) { Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1); } // Finally, loop over the final use/def of each reg // in the block and determine if it is dead. for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) { MachineInstr *MI = I->second.first; unsigned idx = I->second.second; MachineOperand &MO = MI->getOperand(idx); bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg()); // A crude approximation of "live-out" calculation bool usedOutsideBlock = isPhysReg ? false : UsedInMultipleBlocks.test(MO.getReg() - TargetRegisterInfo::FirstVirtualRegister); // If the machine BB ends in a return instruction, then the value isn't used // outside of the BB. if (!isPhysReg && (!usedOutsideBlock || BBEndsInReturn)) { // DBG_VALUE complicates this: if the only refs of a register outside // this block are DBG_VALUE, we can't keep the reg live just for that, // as it will cause the reg to be spilled at the end of this block when // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that // happens. bool UsedByDebugValueOnly = false; for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), UE = MRI.reg_end(); UI != UE; ++UI) { // Two cases: // - used in another block // - used in the same block before it is defined (loop) if (UI->getParent() == &MBB && !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) continue; if (UI->isDebugValue()) { UsedByDebugValueOnly = true; continue; } // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone. UsedInMultipleBlocks.set(MO.getReg() - TargetRegisterInfo::FirstVirtualRegister); usedOutsideBlock = true; UsedByDebugValueOnly = false; break; } if (UsedByDebugValueOnly) for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), UE = MRI.reg_end(); UI != UE; ++UI) if (UI->isDebugValue() && (UI->getParent() != &MBB || (MO.isDef() && precedes(&*UI, MI)))) UI.getOperand().setReg(0U); } // Physical registers and those that are not live-out of the block are // killed/dead at their last use/def within this block. if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) { if (MO.isUse()) { // Don't mark uses that are tied to defs as kills. if (!MI->isRegTiedToDefOperand(idx)) MO.setIsKill(true); } else { MO.setIsDead(true); } } } }
bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, const CallContext &Context) { // Ok, we can in fact do the transformation for this call. // Do not remove the FrameSetup instruction, but adjust the parameters. // PEI will end up finalizing the handling of this. MachineBasicBlock::iterator FrameSetup = Context.FrameSetup; MachineBasicBlock &MBB = *(FrameSetup->getParent()); FrameSetup->getOperand(1).setImm(Context.ExpectedDist); DebugLoc DL = FrameSetup->getDebugLoc(); // Now, iterate through the vector in reverse order, and replace the movs // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to // replace uses. for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator MOV = *Context.MovVector[Idx]; MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); MachineBasicBlock::iterator Push = nullptr; if (MOV->getOpcode() == X86::MOV32mi) { unsigned PushOpcode = X86::PUSHi32; // If the operand is a small (8-bit) immediate, we can use a // PUSH instruction with a shorter encoding. // Note that isImm() may fail even though this is a MOVmi, because // the operand can also be a symbol. if (PushOp.isImm()) { int64_t Val = PushOp.getImm(); if (isInt<8>(Val)) PushOpcode = X86::PUSH32i8; } Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)) .addOperand(PushOp); } else { unsigned int Reg = PushOp.getReg(); // If PUSHrmm is not slow on this target, try to fold the source of the // push into the instruction. bool SlowPUSHrmm = STI->isAtom() || STI->isSLM(); // Check that this is legal to fold. Right now, we're extremely // conservative about that. MachineInstr *DefMov = nullptr; if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm)); unsigned NumOps = DefMov->getDesc().getNumOperands(); for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) Push->addOperand(DefMov->getOperand(i)); DefMov->eraseFromParent(); } else { Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r)) .addReg(Reg) .getInstr(); } } // For debugging, when using SP-based CFA, we need to adjust the CFA // offset after each push. // TODO: This is needed only if we require precise CFA. if (!TFL->hasFP(MF)) TFL->BuildCFI(MBB, std::next(Push), DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, 4)); MBB.erase(MOV); } // The stack-pointer copy is no longer used in the call sequences. // There should not be any other users, but we can't commit to that, so: if (MRI->use_empty(Context.SPCopy->getOperand(0).getReg())) Context.SPCopy->eraseFromParent(); // Once we've done this, we need to make sure PEI doesn't assume a reserved // frame. X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); FuncInfo->setHasPushSequences(true); return true; }