void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, const DebugLoc &DL, ArrayRef<MachineOperand> Cond) const { unsigned Opc = Cond[0].getImm(); const MCInstrDesc &MCID = get(Opc); MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID); for (unsigned i = 1; i < Cond.size(); ++i) { assert((Cond[i].isImm() || Cond[i].isReg()) && "Cannot copy operand for conditional branch!"); MIB.add(Cond[i]); } MIB.addMBB(TBB); }
void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { MachineInstrBuilder MIB; if (Subtarget.isGP64bit()) MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64)) .addReg(Mips::RA_64, RegState::Undef); else MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)) .addReg(Mips::RA, RegState::Undef); // Retain any imp-use flags. for (auto & MO : I->operands()) { if (MO.isImplicit()) MIB.add(MO); } }
void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#" << Head->getNumber() << ":\n" << *CmpBB); // All CmpBB instructions are moved into Head, and CmpBB is deleted. // Update the CFG first. updateTailPHIs(); Head->removeSuccessor(CmpBB, true); CmpBB->removeSuccessor(Tail, true); Head->transferSuccessorsAndUpdatePHIs(CmpBB); DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc(); TII->removeBranch(*Head); // If the Head terminator was one of the cbz / tbz branches with built-in // compare, we need to insert an explicit compare instruction in its place. if (HeadCond[0].getImm() == -1) { ++NumCompBranches; unsigned Opc = 0; switch (HeadCond[1].getImm()) { case AArch64::CBZW: case AArch64::CBNZW: Opc = AArch64::SUBSWri; break; case AArch64::CBZX: case AArch64::CBNZX: Opc = AArch64::SUBSXri; break; default: llvm_unreachable("Cannot convert Head branch"); } const MCInstrDesc &MCID = TII->get(Opc); // Create a dummy virtual register for the SUBS def. unsigned DestReg = MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF)); // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz. BuildMI(*Head, Head->end(), TermDL, MCID) .addReg(DestReg, RegState::Define | RegState::Dead) .add(HeadCond[2]) .addImm(0) .addImm(0); // SUBS uses the GPR*sp register classes. MRI->constrainRegClass(HeadCond[2].getReg(), TII->getRegClass(MCID, 1, TRI, *MF)); } Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end()); // Now replace CmpMI with a ccmp instruction that also considers the incoming // flags. unsigned Opc = 0; unsigned FirstOp = 1; // First CmpMI operand to copy. bool isZBranch = false; // CmpMI is a cbz/cbnz instruction. switch (CmpMI->getOpcode()) { default: llvm_unreachable("Unknown compare opcode"); case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break; case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break; case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break; case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break; case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break; case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break; case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break; case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break; case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break; case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break; case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break; case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break; case AArch64::CBZW: case AArch64::CBNZW: Opc = AArch64::CCMPWi; FirstOp = 0; isZBranch = true; break; case AArch64::CBZX: case AArch64::CBNZX: Opc = AArch64::CCMPXi; FirstOp = 0; isZBranch = true; break; } // The ccmp instruction should set the flags according to the comparison when // Head would have branched to CmpBB. // The NZCV immediate operand should provide flags for the case where Head // would have branched to Tail. These flags should cause the new Head // terminator to branch to tail. unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC); const MCInstrDesc &MCID = TII->get(Opc); MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(), TII->getRegClass(MCID, 0, TRI, *MF)); if (CmpMI->getOperand(FirstOp + 1).isReg()) MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(), TII->getRegClass(MCID, 1, TRI, *MF)); MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID) .add(CmpMI->getOperand(FirstOp)); // Register Rn if (isZBranch) MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0 else MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate MIB.addImm(NZCV).addImm(HeadCmpBBCC); // If CmpMI was a terminator, we need a new conditional branch to replace it. // This now becomes a Head terminator. if (isZBranch) { bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW || CmpMI->getOpcode() == AArch64::CBNZX; BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc)) .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ) .add(CmpMI->getOperand(1)); // Branch target. } CmpMI->eraseFromParent(); Head->updateTerminator(); RemovedBlocks.push_back(CmpBB); CmpBB->eraseFromParent(); DEBUG(dbgs() << "Result:\n" << *Head); ++NumConverted; }
MachineInstrBuilder MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, MachineBasicBlock::iterator I) const { MachineInstrBuilder MIB; // Certain branches have two forms: e.g beq $1, $zero, dest vs beqz $1, dest // Pick the zero form of the branch for readable assembly and for greater // branch distance in non-microMIPS mode. // Additional MIPSR6 does not permit the use of register $zero for compact // branches. // FIXME: Certain atomic sequences on mips64 generate 32bit references to // Mips::ZERO, which is incorrect. This test should be updated to use // Subtarget.getABI().GetZeroReg() when those atomic sequences and others // are fixed. int ZeroOperandPosition = -1; bool BranchWithZeroOperand = false; if (I->isBranch() && !I->isPseudo()) { auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo(); ZeroOperandPosition = I->findRegisterUseOperandIdx(Mips::ZERO, false, TRI); BranchWithZeroOperand = ZeroOperandPosition != -1; } if (BranchWithZeroOperand) { switch (NewOpc) { case Mips::BEQC: NewOpc = Mips::BEQZC; break; case Mips::BNEC: NewOpc = Mips::BNEZC; break; case Mips::BGEC: NewOpc = Mips::BGEZC; break; case Mips::BLTC: NewOpc = Mips::BLTZC; break; case Mips::BEQC64: NewOpc = Mips::BEQZC64; break; case Mips::BNEC64: NewOpc = Mips::BNEZC64; break; } } MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an // immediate 0 as an operand and requires the removal of it's %RA<imp-def> // implicit operand as copying the implicit operations of the instructio we're // looking at will give us the correct flags. if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 || NewOpc == Mips::JIALC64) { if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64) MIB->RemoveOperand(0); for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.add(I->getOperand(J)); } MIB.addImm(0); } else { for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J) continue; MIB.add(I->getOperand(J)); } } MIB.copyImplicitOps(*I); MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end()); return MIB; }
bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); const TargetInstrInfo &TII = *STI.getInstrInfo(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); // If MBBI is a return instruction, or is a tPOP followed by a return // instruction in the successor BB, we may be able to directly restore // LR in the PC. // This is only possible with v5T ops (v4T can't change the Thumb bit via // a POP PC instruction), and only if we do not need to emit any SP update. // Otherwise, we need a temporary register to pop the value // and copy that value into LR. auto MBBI = MBB.getFirstTerminator(); bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; if (CanRestoreDirectly) { if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET); else { auto MBBI_prev = MBBI; MBBI_prev--; assert(MBBI_prev->getOpcode() == ARM::tPOP); assert(MBB.succ_size() == 1); if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. else CanRestoreDirectly = false; } } if (CanRestoreDirectly) { if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) return true; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) .add(predOps(ARMCC::AL)); // Copy implicit ops and popped registers, if any. for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef())) MIB.add(MO); MIB.addReg(ARM::PC, RegState::Define); // Erase the old instruction (tBX_RET or tPOP). MBB.erase(MBBI); return true; } // Look for a temporary register to use. // First, compute the liveness information. const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); LivePhysRegs UsedRegs(TRI); UsedRegs.addLiveOuts(MBB); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function // are not part of the pristines set anymore. // Add those callee-saved now. const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) UsedRegs.addReg(CSRegs[i]); DebugLoc dl = DebugLoc(); if (MBBI != MBB.end()) { dl = MBBI->getDebugLoc(); auto InstUpToMBBI = MBB.end(); while (InstUpToMBBI != MBBI) // The pre-decrement is on purpose here. // We want to have the liveness right before MBBI. UsedRegs.stepBackward(*--InstUpToMBBI); } // Look for a register that can be directly use in the POP. unsigned PopReg = 0; // And some temporary register, just in case. unsigned TemporaryReg = 0; BitVector PopFriendly = TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); // Rebuild the GPRs from the high registers because they are removed // form the GPR reg class for thumb1. BitVector GPRsNoLRSP = TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); GPRsNoLRSP |= PopFriendly; GPRsNoLRSP.reset(ARM::LR); GPRsNoLRSP.reset(ARM::SP); GPRsNoLRSP.reset(ARM::PC); findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); // If we couldn't find a pop-friendly register, restore LR before popping the // other callee-saved registers, so we can use one of them as a temporary. bool UseLDRSP = false; if (!PopReg && MBBI != MBB.begin()) { auto PrevMBBI = MBBI; PrevMBBI--; if (PrevMBBI->getOpcode() == ARM::tPOP) { MBBI = PrevMBBI; UsedRegs.stepBackward(*MBBI); findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); UseLDRSP = true; } } if (!DoIt && !PopReg && !TemporaryReg) return false; assert((PopReg || TemporaryReg) && "Cannot get LR"); if (UseLDRSP) { assert(PopReg && "Do not know how to get LR"); // Load the LR via LDR tmp, [SP, #off] BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi)) .addReg(PopReg, RegState::Define) .addReg(ARM::SP) .addImm(MBBI->getNumExplicitOperands() - 2) .add(predOps(ARMCC::AL)); // Move from the temporary register to the LR. BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::LR, RegState::Define) .addReg(PopReg, RegState::Kill) .add(predOps(ARMCC::AL)); // Advance past the pop instruction. MBBI++; // Increment the SP. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize + 4); return true; } if (TemporaryReg) { assert(!PopReg && "Unnecessary MOV is about to be inserted"); PopReg = PopFriendly.find_first(); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(TemporaryReg, RegState::Define) .addReg(PopReg, RegState::Kill) .add(predOps(ARMCC::AL)); } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { // We couldn't use the direct restoration above, so // perform the opposite conversion: tPOP_RET to tPOP. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) .add(predOps(ARMCC::AL)); bool Popped = false; for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && MO.getReg() != ARM::PC) { MIB.add(MO); if (!MO.isImplicit()) Popped = true; } // Is there anything left to pop? if (!Popped) MBB.erase(MIB.getInstr()); // Erase the old instruction. MBB.erase(MBBI); MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) .add(predOps(ARMCC::AL)); } assert(PopReg && "Do not know how to get LR"); BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) .add(predOps(ARMCC::AL)) .addReg(PopReg, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::LR, RegState::Define) .addReg(PopReg, RegState::Kill) .add(predOps(ARMCC::AL)); if (TemporaryReg) BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(PopReg, RegState::Define) .addReg(TemporaryReg, RegState::Kill) .add(predOps(ARMCC::AL)); return true; }
bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) // Don't issue movs with shifter operand for some CPUs unless we // are optimizing for size. return false; unsigned Limit = ~0U; if (Entry.Imm1Limit) Limit = (1 << Entry.Imm1Limit) - 1; const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { if (MCID.OpInfo[i].isPredicate()) continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { unsigned Reg = MO.getReg(); if (!Reg || Reg == ARM::CPSR) continue; if (Entry.LowRegs1 && !isARMLowRegister(Reg)) return false; } else if (MO.isImm() && !MCID.OpInfo[i].isPredicate()) { if (((unsigned)MO.getImm()) > Limit) return false; } } // Check if it's possible / necessary to transfer the predicate. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; if (MCID.hasOptionalDef()) { unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; } if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) return false; // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(MI, IsSelfLoop)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.add(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; if ((MCID.getOpcode() == ARM::t2RSBSri || MCID.getOpcode() == ARM::t2RSBri || MCID.getOpcode() == ARM::t2SXTB || MCID.getOpcode() == ARM::t2SXTH || MCID.getOpcode() == ARM::t2UXTB || MCID.getOpcode() == ARM::t2UXTH) && i == 2) // Skip the zero immediate operand, it's now implicit. continue; bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); if (SkipPred && isPred) continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) // Skip implicit def of CPSR. Either it's modeled as an optional // def now or it's already an implicit def on the new instruction. continue; MIB.add(MO); } if (!MCID.isPredicable() && NewMCID.isPredicable()) MIB.add(predOps(ARMCC::AL)); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++NumNarrows; return true; }
bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) // Don't issue movs with shifter operand for some CPUs unless we // are optimizing for size. return false; unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); // t2MUL is "special". The tied source operand is second, not first. if (MI->getOpcode() == ARM::t2MUL) { unsigned Reg2 = MI->getOperand(2).getReg(); // Early exit if the regs aren't all low regs. if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) || !isARMLowRegister(Reg2)) return false; if (Reg0 != Reg2) { // If the other operand also isn't the same as the destination, we // can't reduce. if (Reg1 != Reg0) return false; // Try to commute the operands to make it a 2-address instruction. MachineInstr *CommutedMI = TII->commuteInstruction(*MI); if (!CommutedMI) return false; } } else if (Reg0 != Reg1) { // Try to commute the operands to make it a 2-address instruction. unsigned CommOpIdx1 = 1; unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex; if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) || MI->getOperand(CommOpIdx2).getReg() != Reg0) return false; MachineInstr *CommutedMI = TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2); if (!CommutedMI) return false; } if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) return false; if (Entry.Imm2Limit) { unsigned Imm = MI->getOperand(2).getImm(); unsigned Limit = (1 << Entry.Imm2Limit) - 1; if (Imm > Limit) return false; } else { unsigned Reg2 = MI->getOperand(2).getReg(); if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) return false; } // Check if it's possible / necessary to transfer the predicate. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; const MCInstrDesc &MCID = MI->getDesc(); if (MCID.hasOptionalDef()) { unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; } if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) return false; // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(MI, IsSelfLoop)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.add(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; if (SkipPred && MCID.OpInfo[i].isPredicate()) continue; MIB.add(MI->getOperand(i)); } // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++Num2Addrs; return true; }
bool Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry) { if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) return false; unsigned Scale = 1; bool HasImmOffset = false; bool HasShift = false; bool HasOffReg = true; bool isLdStMul = false; unsigned Opc = Entry.NarrowOpc1; unsigned OpNum = 3; // First 'rest' of operands. uint8_t ImmLimit = Entry.Imm1Limit; switch (Entry.WideOpc) { default: llvm_unreachable("Unexpected Thumb2 load / store opcode!"); case ARM::t2LDRi12: case ARM::t2STRi12: if (MI->getOperand(1).getReg() == ARM::SP) { Opc = Entry.NarrowOpc2; ImmLimit = Entry.Imm2Limit; } Scale = 4; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRBi12: case ARM::t2STRBi12: HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRHi12: case ARM::t2STRHi12: Scale = 2; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRs: case ARM::t2LDRBs: case ARM::t2LDRHs: case ARM::t2LDRSBs: case ARM::t2LDRSHs: case ARM::t2STRs: case ARM::t2STRBs: case ARM::t2STRHs: HasShift = true; OpNum = 4; break; case ARM::t2LDR_POST: case ARM::t2STR_POST: { if (!MBB.getParent()->getFunction().optForMinSize()) return false; if (!MI->hasOneMemOperand() || (*MI->memoperands_begin())->getAlignment() < 4) return false; // We're creating a completely different type of load/store - LDM from LDR. // For this reason we can't reuse the logic at the end of this function; we // have to implement the MI building here. bool IsStore = Entry.WideOpc == ARM::t2STR_POST; unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); unsigned Offset = MI->getOperand(3).getImm(); unsigned PredImm = MI->getOperand(4).getImm(); unsigned PredReg = MI->getOperand(5).getReg(); assert(isARMLowRegister(Rt)); assert(isARMLowRegister(Rn)); if (Offset != 4) return false; // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1)) .addReg(Rn, RegState::Define) .addReg(Rn) .addImm(PredImm) .addReg(PredReg) .addReg(Rt, IsStore ? 0 : RegState::Define); // Transfer memoperands. MIB.setMemRefs(MI->memoperands()); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); // Kill the old instruction. MI->eraseFromBundle(); ++NumLdSts; return true; } case ARM::t2LDMIA: { unsigned BaseReg = MI->getOperand(0).getReg(); assert(isARMLowRegister(BaseReg)); // For the non-writeback version (this one), the base register must be // one of the registers being loaded. bool isOK = false; for (unsigned i = 3; i < MI->getNumOperands(); ++i) { if (MI->getOperand(i).getReg() == BaseReg) { isOK = true; break; } } if (!isOK) return false; OpNum = 0; isLdStMul = true; break; } case ARM::t2STMIA: // If the base register is killed, we don't care what its value is after the // instruction, so we can use an updating STMIA. if (!MI->getOperand(0).isKill()) return false; break; case ARM::t2LDMIA_RET: { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) return false; Opc = Entry.NarrowOpc2; // tPOP_RET OpNum = 2; isLdStMul = true; break; } case ARM::t2LDMIA_UPD: case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { OpNum = 0; unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg == ARM::SP && (Entry.WideOpc == ARM::t2LDMIA_UPD || Entry.WideOpc == ARM::t2STMDB_UPD)) { Opc = Entry.NarrowOpc2; // tPOP or tPUSH OpNum = 2; } else if (!isARMLowRegister(BaseReg) || (Entry.WideOpc != ARM::t2LDMIA_UPD && Entry.WideOpc != ARM::t2STMIA_UPD)) { return false; } isLdStMul = true; break; } } unsigned OffsetReg = 0; bool OffsetKill = false; bool OffsetInternal = false; if (HasShift) { OffsetReg = MI->getOperand(2).getReg(); OffsetKill = MI->getOperand(2).isKill(); OffsetInternal = MI->getOperand(2).isInternalRead(); if (MI->getOperand(3).getImm()) // Thumb1 addressing mode doesn't support shift. return false; } unsigned OffsetImm = 0; if (HasImmOffset) { OffsetImm = MI->getOperand(2).getImm(); unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) // Make sure the immediate field fits. return false; } // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); // tSTMIA_UPD takes a defining register operand. We've already checked that // the register is killed, so mark it as dead here. if (Entry.WideOpc == ARM::t2STMIA) MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead); if (!isLdStMul) { MIB.add(MI->getOperand(0)); MIB.add(MI->getOperand(1)); if (HasImmOffset) MIB.addImm(OffsetImm / Scale); assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); if (HasOffReg) MIB.addReg(OffsetReg, getKillRegState(OffsetKill) | getInternalReadRegState(OffsetInternal)); } // Transfer the rest of operands. for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.add(MI->getOperand(OpNum)); // Transfer memoperands. MIB.setMemRefs(MI->memoperands()); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++NumLdSts; return true; }
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize /// a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or /// SUBs first, and uses a constant pool value if the instruction sequence would /// be too long. This is allowed to modify the condition flags. void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, const TargetInstrInfo &TII, const ARMBaseRegisterInfo &MRI, unsigned MIFlags) { bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; int CopyOpc = 0; unsigned CopyBits = 0; unsigned CopyScale = 1; bool CopyNeedsCC = false; int ExtraOpc = 0; unsigned ExtraBits = 0; unsigned ExtraScale = 1; bool ExtraNeedsCC = false; // Strategy: // We need to select two types of instruction, maximizing the available // immediate range of each. The instructions we use will depend on whether // DestReg and BaseReg are low, high or the stack pointer. // * CopyOpc - DestReg = BaseReg + imm // This will be emitted once if DestReg != BaseReg, and never if // DestReg == BaseReg. // * ExtraOpc - DestReg = DestReg + imm // This will be emitted as many times as necessary to add the // full immediate. // If the immediate ranges of these instructions are not large enough to cover // NumBytes with a reasonable number of instructions, we fall back to using a // value loaded from a constant pool. if (DestReg == ARM::SP) { if (BaseReg == ARM::SP) { // sp -> sp // Already in right reg, no copy needed } else { // low -> sp or high -> sp CopyOpc = ARM::tMOVr; CopyBits = 0; } ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi; ExtraBits = 7; ExtraScale = 4; } else if (isARMLowRegister(DestReg)) { if (BaseReg == ARM::SP) { // sp -> low assert(!isSub && "Thumb1 does not have tSUBrSPi"); CopyOpc = ARM::tADDrSPi; CopyBits = 8; CopyScale = 4; } else if (DestReg == BaseReg) { // low -> same low // Already in right reg, no copy needed } else if (isARMLowRegister(BaseReg)) { // low -> different low CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3; CopyBits = 3; CopyNeedsCC = true; } else { // high -> low CopyOpc = ARM::tMOVr; CopyBits = 0; } ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8; ExtraBits = 8; ExtraNeedsCC = true; } else /* DestReg is high */ { if (DestReg == BaseReg) { // high -> same high // Already in right reg, no copy needed } else { // {low,high,sp} -> high CopyOpc = ARM::tMOVr; CopyBits = 0; } ExtraOpc = 0; } // We could handle an unaligned immediate with an unaligned copy instruction // and an aligned extra instruction, but this case is not currently needed. assert(((Bytes & 3) == 0 || ExtraScale == 1) && "Unaligned offset, but all instructions require alignment"); unsigned CopyRange = ((1 << CopyBits) - 1) * CopyScale; // If we would emit the copy with an immediate of 0, just use tMOVr. if (CopyOpc && Bytes < CopyScale) { CopyOpc = ARM::tMOVr; CopyScale = 1; CopyNeedsCC = false; CopyRange = 0; } unsigned ExtraRange = ((1 << ExtraBits) - 1) * ExtraScale; // per instruction unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0; unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange); // We could handle this case when the copy instruction does not require an // aligned immediate, but we do not currently do this. assert(RangeAfterCopy % ExtraScale == 0 && "Extra instruction requires immediate to be aligned"); unsigned RequiredExtraInstrs; if (ExtraRange) RequiredExtraInstrs = alignTo(RangeAfterCopy, ExtraRange) / ExtraRange; else if (RangeAfterCopy > 0) // We need an extra instruction but none is available RequiredExtraInstrs = 1000000; else RequiredExtraInstrs = 0; unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs; unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; // Use a constant pool, if the sequence of ADDs/SUBs is too expensive. if (RequiredInstrs > Threshold) { emitThumbRegPlusImmInReg(MBB, MBBI, dl, DestReg, BaseReg, NumBytes, true, TII, MRI, MIFlags); return; } // Emit zero or one copy instructions if (CopyOpc) { unsigned CopyImm = std::min(Bytes, CopyRange) / CopyScale; Bytes -= CopyImm * CopyScale; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg); if (CopyNeedsCC) MIB = MIB.add(t1CondCodeOp()); MIB.addReg(BaseReg, RegState::Kill); if (CopyOpc != ARM::tMOVr) { MIB.addImm(CopyImm); } MIB.setMIFlags(MIFlags).add(predOps(ARMCC::AL)); BaseReg = DestReg; } // Emit zero or more in-place add/sub instructions while (Bytes) { unsigned ExtraImm = std::min(Bytes, ExtraRange) / ExtraScale; Bytes -= ExtraImm * ExtraScale; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg); if (ExtraNeedsCC) MIB = MIB.add(t1CondCodeOp()); MIB.addReg(BaseReg) .addImm(ExtraImm) .add(predOps(ARMCC::AL)) .setMIFlags(MIFlags); } }
/// If \p MBBI is a pseudo instruction, this method expands /// it to the corresponding (sequence of) actual instruction(s). /// \returns true if \p MBBI has been expanded. bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); switch (Opcode) { default: return false; case X86::TCRETURNdi: case X86::TCRETURNdicc: case X86::TCRETURNri: case X86::TCRETURNmi: case X86::TCRETURNdi64: case X86::TCRETURNdi64cc: case X86::TCRETURNri64: case X86::TCRETURNmi64: { bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); assert(StackAdjust.isImm() && "Expecting immediate value."); // Adjust stack pointer. int StackAdj = StackAdjust.getImm(); int MaxTCDelta = X86FI->getTCReturnAddrDelta(); int Offset = 0; assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); // Incoporate the retaddr area. Offset = StackAdj - MaxTCDelta; assert(Offset >= 0 && "Offset should never be negative"); if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) { assert(Offset == 0 && "Conditional tail call cannot adjust the stack."); } if (Offset) { // Check for possible merge with preceding ADD instruction. Offset += X86FL->mergeSPUpdates(MBB, MBBI, true); X86FL->emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true); } // Jump to label or value in register. bool IsWin64 = STI->isTargetWin64(); if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) { unsigned Op; switch (Opcode) { case X86::TCRETURNdi: Op = X86::TAILJMPd; break; case X86::TCRETURNdicc: Op = X86::TAILJMPd_CC; break; case X86::TCRETURNdi64cc: assert(!IsWin64 && "Conditional tail calls confuse the Win64 unwinder."); // TODO: We could do it for Win64 "leaf" functions though; PR30337. Op = X86::TAILJMPd64_CC; break; default: // Note: Win64 uses REX prefixes indirect jumps out of functions, but // not direct ones. Op = X86::TAILJMPd64; break; } MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); if (JumpTarget.isGlobal()) { MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); } else { assert(JumpTarget.isSymbol()); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) { MIB.addImm(MBBI->getOperand(2).getImm()); } } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { unsigned Op = (Opcode == X86::TCRETURNmi) ? X86::TAILJMPm : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); for (unsigned i = 0; i != 5; ++i) MIB.add(MBBI->getOperand(i)); } else if (Opcode == X86::TCRETURNri64) { BuildMI(MBB, MBBI, DL, TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) .addReg(JumpTarget.getReg(), RegState::Kill); } else { BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr)) .addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr &NewMI = *std::prev(MBBI); NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); return true; } case X86::EH_RETURN: case X86::EH_RETURN64: { MachineOperand &DestAddr = MBBI->getOperand(0); assert(DestAddr.isReg() && "Offset should be in register!"); const bool Uses64BitFramePtr = STI->isTarget64BitLP64() || STI->isTargetNaCl64(); unsigned StackPtr = TRI->getStackRegister(); BuildMI(MBB, MBBI, DL, TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr) .addReg(DestAddr.getReg()); // The EH_RETURN pseudo is really removed during the MC Lowering. return true; } case X86::IRET: { // Adjust stack to erase error code int64_t StackAdj = MBBI->getOperand(0).getImm(); X86FL->emitSPUpdate(MBB, MBBI, StackAdj, true); // Replace pseudo with machine iret BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::IRET64 : X86::IRET32)); MBB.erase(MBBI); return true; } case X86::RET: { // Adjust stack to erase error code int64_t StackAdj = MBBI->getOperand(0).getImm(); MachineInstrBuilder MIB; if (StackAdj == 0) { MIB = BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::RETQ : X86::RETL)); } else if (isUInt<16>(StackAdj)) { MIB = BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::RETIQ : X86::RETIL)) .addImm(StackAdj); } else { assert(!STI->is64Bit() && "shouldn't need to do this for x86_64 targets!"); // A ret can only handle immediates as big as 2**16-1. If we need to pop // off bytes before the return address, we must do it manually. BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define); X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true); BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX); MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL)); } for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I) MIB.add(MBBI->getOperand(I)); MBB.erase(MBBI); return true; } case X86::EH_RESTORE: { // Restore ESP and EBP, and optionally ESI if required. bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality( MBB.getParent()->getFunction()->getPersonalityFn())); X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH); MBBI->eraseFromParent(); return true; } case X86::LCMPXCHG8B_SAVE_EBX: case X86::LCMPXCHG16B_SAVE_RBX: { // Perform the following transformation. // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx // => // [E|R]BX = InArg // actualcmpxchg Addr // [E|R]BX = SaveRbx const MachineOperand &InArg = MBBI->getOperand(6); unsigned SaveRbx = MBBI->getOperand(7).getReg(); unsigned ActualInArg = Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX; // Copy the input argument of the pseudo into the argument of the // actual instruction. TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, InArg.getReg(), InArg.isKill()); // Create the actual instruction. unsigned ActualOpc = Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::LCMPXCHG8B : X86::LCMPXCHG16B; MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(ActualOpc)); // Copy the operands related to the address. for (unsigned Idx = 1; Idx < 6; ++Idx) NewInstr->addOperand(MBBI->getOperand(Idx)); // Finally, restore the value of RBX. TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, SaveRbx, /*SrcIsKill*/ true); // Delete the pseudo. MBBI->eraseFromParent(); return true; } } llvm_unreachable("Previous switch has a fallthrough?"); }
// Convert callee-save register save/restore instruction to do stack pointer // decrement/increment to allocate/deallocate the callee-save stack area by // converting store/load to use pre/post increment version. static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { unsigned NewOpc; bool NewIsUnscaled = false; switch (MBBI->getOpcode()) { default: llvm_unreachable("Unexpected callee-save save/restore opcode!"); case AArch64::STPXi: NewOpc = AArch64::STPXpre; break; case AArch64::STPDi: NewOpc = AArch64::STPDpre; break; case AArch64::STRXui: NewOpc = AArch64::STRXpre; NewIsUnscaled = true; break; case AArch64::STRDui: NewOpc = AArch64::STRDpre; NewIsUnscaled = true; break; case AArch64::LDPXi: NewOpc = AArch64::LDPXpost; break; case AArch64::LDPDi: NewOpc = AArch64::LDPDpost; break; case AArch64::LDRXui: NewOpc = AArch64::LDRXpost; NewIsUnscaled = true; break; case AArch64::LDRDui: NewOpc = AArch64::LDRDpost; NewIsUnscaled = true; break; } MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); MIB.addReg(AArch64::SP, RegState::Define); // Copy all operands other than the immediate offset. unsigned OpndIdx = 0; for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd; ++OpndIdx) MIB.add(MBBI->getOperand(OpndIdx)); assert(MBBI->getOperand(OpndIdx).getImm() == 0 && "Unexpected immediate offset in first/last callee-save save/restore " "instruction!"); assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && "Unexpected base register in callee-save save/restore instruction!"); // Last operand is immediate offset that needs fixing. assert(CSStackSizeInc % 8 == 0); int64_t CSStackSizeIncImm = CSStackSizeInc; if (!NewIsUnscaled) CSStackSizeIncImm /= 8; MIB.addImm(CSStackSizeIncImm); MIB.setMIFlags(MBBI->getFlags()); MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end()); return std::prev(MBB.erase(MBBI)); }