bool Thumb1FrameLowering:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; DebugLoc DL; const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)); AddDefaultPred(MIB); for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); bool isKill = true; // Add the callee-saved register as live-in unless it's LR and // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress // then it's already added to the function and entry block live-in sets. if (Reg == ARM::LR) { MachineFunction &MF = *MBB.getParent(); if (MF.getFrameInfo()->isReturnAddressTaken() && MF.getRegInfo().isLiveIn(Reg)) isKill = false; } if (isKill) MBB.addLiveIn(Reg); MIB.addReg(Reg, getKillRegState(isKill)); } MIB.setMIFlags(MachineInstr::FrameSetup); return true; }
void AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI, const LoadStoreMethod PossClasses[], unsigned NumClasses) const { DebugLoc DL = MBB.findDebugLoc(MBBI); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); // A certain amount of implicit contract is present here. The actual stack // offsets haven't been allocated officially yet, so for strictly correct code // we rely on the fact that the elements of CSI are allocated in order // starting at SP, purely as dictated by size and alignment. In practice since // this function handles the only accesses to those slots it's not quite so // important. // // We have also ordered the Callee-saved register list in AArch64CallingConv // so that the above scheme puts registers in order: in particular we want // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2) for (unsigned i = 0, e = CSI.size(); i < e; ++i) { unsigned Reg = CSI[i].getReg(); // First we need to find out which register class the register belongs to so // that we can use the correct load/store instrucitons. unsigned ClassIdx; for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) { if (PossClasses[ClassIdx].RegClass->contains(Reg)) break; } assert(ClassIdx != NumClasses && "Asked to store register in unexpected class"); const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass; // Now we need to decide whether it's possible to emit a paired instruction: // for this we want the next register to be in the same class. MachineInstrBuilder NewMI; bool Pair = false; if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) { Pair = true; unsigned StLow = 0, StHigh = 0; if (isPrologue) { // Most of these registers will be live-in to the MBB and killed by our // store, though there are exceptions (see determinePrologueDeath). StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg())); StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); } else { StLow = RegState::Define; StHigh = RegState::Define; } NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode)) .addReg(CSI[i+1].getReg(), StLow) .addReg(CSI[i].getReg(), StHigh); // If it's a paired op, we've consumed two registers ++i; } else { unsigned State; if (isPrologue) { State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); } else { State = RegState::Define; } NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].SingleOpcode)) .addReg(CSI[i].getReg(), State); } // Note that the FrameIdx refers to the second register in a pair: it will // be allocated the smaller numeric address and so is the one an LDP/STP // address must use. int FrameIdx = CSI[i].getFrameIdx(); MachineMemOperand::MemOperandFlags Flags; Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), Flags, Pair ? TheClass.getSize() * 2 : TheClass.getSize(), MFI.getObjectAlignment(FrameIdx)); NewMI.addFrameIndex(FrameIdx) .addImm(0) // address-register offset .addMemOperand(MMO); if (isPrologue) NewMI.setMIFlags(MachineInstr::FrameSetup); // For aesthetic reasons, during an epilogue we want to emit complementary // operations to the prologue, but in the opposite order. So we still // iterate through the CalleeSavedInfo list in order, but we put the // instructions successively earlier in the MBB. if (!isPrologue) --MBBI; } }
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize /// a destreg = basereg + immediate in Thumb code. void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, const TargetInstrInfo &TII, const ARMBaseRegisterInfo& MRI, unsigned MIFlags) { bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; bool isMul4 = (Bytes & 3) == 0; bool isTwoAddr = false; bool DstNotEqBase = false; unsigned NumBits = 1; unsigned Scale = 1; int Opc = 0; int ExtraOpc = 0; bool NeedCC = false; if (DestReg == BaseReg && BaseReg == ARM::SP) { assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); NumBits = 7; Scale = 4; Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; isTwoAddr = true; } else if (!isSub && BaseReg == ARM::SP) { // r1 = add sp, 403 // => // r1 = add sp, 100 * 4 // r1 = add r1, 3 if (!isMul4) { Bytes &= ~3; ExtraOpc = ARM::tADDi3; } DstNotEqBase = true; NumBits = 8; Scale = 4; Opc = ARM::tADDrSPi; } else { // sp = sub sp, c // r1 = sub sp, c // r8 = sub sp, c if (DestReg != BaseReg) DstNotEqBase = true; if (DestReg == ARM::SP) { Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); NumBits = 7; Scale = 4; } else { Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; NumBits = 8; NeedCC = true; } isTwoAddr = true; } unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale); unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; if (NumMIs > Threshold) { // This will expand into too many instructions. Load the immediate from a // constpool entry. emitThumbRegPlusImmInReg(MBB, MBBI, dl, DestReg, BaseReg, NumBytes, true, TII, MRI, MIFlags); return; } if (DstNotEqBase) { if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) { // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) unsigned Chunk = (1 << 3) - 1; unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; Bytes -= ThisVal; const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); const MachineInstrBuilder MIB = AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg) .setMIFlags(MIFlags)); AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); } else if (isARMLowRegister(DestReg) && BaseReg == ARM::SP && Bytes > 0) { unsigned ThisVal = std::min(1020U, Bytes / 4 * 4); Bytes -= ThisVal; AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), DestReg) .addReg(BaseReg, RegState::Kill).addImm(ThisVal / 4)) .setMIFlags(MIFlags); } else { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) .addReg(BaseReg, RegState::Kill)) .setMIFlags(MIFlags); } BaseReg = DestReg; } unsigned Chunk = ((1 << NumBits) - 1) * Scale; while (Bytes) { unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; Bytes -= ThisVal; ThisVal /= Scale; // Build the new tADD / tSUB. if (isTwoAddr) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (NeedCC) MIB = AddDefaultT1CC(MIB); MIB.addReg(DestReg).addImm(ThisVal); MIB = AddDefaultPred(MIB); MIB.setMIFlags(MIFlags); } else { bool isKill = BaseReg != ARM::SP; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (NeedCC) MIB = AddDefaultT1CC(MIB); MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); MIB = AddDefaultPred(MIB); MIB.setMIFlags(MIFlags); BaseReg = DestReg; if (Opc == ARM::tADDrSPi) { // r4 = add sp, imm // r4 = add r4, imm // ... NumBits = 8; Scale = 1; Chunk = ((1 << NumBits) - 1) * Scale; Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; NeedCC = isTwoAddr = true; } } } if (ExtraOpc) { const MCInstrDesc &MCID = TII.get(ExtraOpc); AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) .addReg(DestReg, RegState::Kill) .addImm(((unsigned)NumBytes) & 3) .setMIFlags(MIFlags)); } }
bool Thumb1FrameLowering:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; DebugLoc DL; const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineFunction &MF = *MBB.getParent(); const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); ARMRegSet LoRegsToSave; // r0-r7, lr ARMRegSet HiRegsToSave; // r8-r11 ARMRegSet CopyRegs; // Registers which can be used after pushing // LoRegs for saving HiRegs. for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { LoRegsToSave[Reg] = true; } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { HiRegsToSave[Reg] = true; } else { llvm_unreachable("callee-saved register of unexpected class"); } if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) CopyRegs[Reg] = true; } // Unused argument registers can be used for the high register saving. for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) if (!MF.getRegInfo().isLiveIn(ArgReg)) CopyRegs[ArgReg] = true; // Push the low registers and lr const MachineRegisterInfo &MRI = MF.getRegInfo(); if (!LoRegsToSave.none()) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { if (LoRegsToSave[Reg]) { bool isKill = !MRI.isLiveIn(Reg); if (isKill && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); MIB.addReg(Reg, getKillRegState(isKill)); } } MIB.setMIFlags(MachineInstr::FrameSetup); } // Push the high registers. There are no store instructions that can access // these registers directly, so we have to move them to low registers, and // push them. This might take multiple pushes, as it is possible for there to // be fewer low registers available than high registers which need saving. // These are in reverse order so that in the case where we need to use // multiple PUSH instructions, the order of the registers on the stack still // matches the unwind info. They need to be swicthed back to ascending order // before adding to the PUSH instruction. static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4, ARM::R3, ARM::R2, ARM::R1, ARM::R0}; static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8}; const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); const unsigned *AllHighRegsEnd = std::end(AllHighRegs); // Find the first register to save. const unsigned *HiRegToSave = findNextOrderedReg( std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd); while (HiRegToSave != AllHighRegsEnd) { // Find the first low register to use. const unsigned *CopyReg = findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); // Create the PUSH, but don't insert it yet (the MOVs need to come first). MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); SmallVector<unsigned, 4> RegsToPush; while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { if (HiRegsToSave[*HiRegToSave]) { bool isKill = !MRI.isLiveIn(*HiRegToSave); if (isKill && !MRI.isReserved(*HiRegToSave)) MBB.addLiveIn(*HiRegToSave); // Emit a MOV from the high reg to the low reg. BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) .addReg(*CopyReg, RegState::Define) .addReg(*HiRegToSave, getKillRegState(isKill)) .add(predOps(ARMCC::AL)); // Record the register that must be added to the PUSH. RegsToPush.push_back(*CopyReg); CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); HiRegToSave = findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd); } } // Add the low registers to the PUSH, in ascending order. for (unsigned Reg : llvm::reverse(RegsToPush)) PushMIB.addReg(Reg, RegState::Kill); // Insert the PUSH instruction after the MOVs. MBB.insert(MI, PushMIB); } return true; }
bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) // Don't issue movs with shifter operand for some CPUs unless we // are optimizing / minimizing for size. return false; unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); // t2MUL is "special". The tied source operand is second, not first. if (MI->getOpcode() == ARM::t2MUL) { unsigned Reg2 = MI->getOperand(2).getReg(); // Early exit if the regs aren't all low regs. if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) || !isARMLowRegister(Reg2)) return false; if (Reg0 != Reg2) { // If the other operand also isn't the same as the destination, we // can't reduce. if (Reg1 != Reg0) return false; // Try to commute the operands to make it a 2-address instruction. MachineInstr *CommutedMI = TII->commuteInstruction(MI); if (!CommutedMI) return false; } } else if (Reg0 != Reg1) { // Try to commute the operands to make it a 2-address instruction. unsigned CommOpIdx1, CommOpIdx2; if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) return false; MachineInstr *CommutedMI = TII->commuteInstruction(MI); if (!CommutedMI) return false; } if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) return false; if (Entry.Imm2Limit) { unsigned Imm = MI->getOperand(2).getImm(); unsigned Limit = (1 << Entry.Imm2Limit) - 1; if (Imm > Limit) return false; } else { unsigned Reg2 = MI->getOperand(2).getReg(); if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) return false; } // Check if it's possible / necessary to transfer the predicate. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; const MCInstrDesc &MCID = MI->getDesc(); if (MCID.hasOptionalDef()) { unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; } if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) return false; // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(MI, IsSelfLoop)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else AddNoT1CC(MIB); } // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; if (SkipPred && MCID.OpInfo[i].isPredicate()) continue; MIB.addOperand(MI->getOperand(i)); } // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++Num2Addrs; return true; }
bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) // Don't issue movs with shifter operand for some CPUs unless we // are optimizing / minimizing for size. return false; unsigned Limit = ~0U; if (Entry.Imm1Limit) Limit = (1 << Entry.Imm1Limit) - 1; const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { if (MCID.OpInfo[i].isPredicate()) continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { unsigned Reg = MO.getReg(); if (!Reg || Reg == ARM::CPSR) continue; if (Entry.LowRegs1 && !isARMLowRegister(Reg)) return false; } else if (MO.isImm() && !MCID.OpInfo[i].isPredicate()) { if (((unsigned)MO.getImm()) > Limit) return false; } } // Check if it's possible / necessary to transfer the predicate. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; if (MCID.hasOptionalDef()) { unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; } if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) return false; // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(MI, IsSelfLoop)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else AddNoT1CC(MIB); } // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; if ((MCID.getOpcode() == ARM::t2RSBSri || MCID.getOpcode() == ARM::t2RSBri || MCID.getOpcode() == ARM::t2SXTB || MCID.getOpcode() == ARM::t2SXTH || MCID.getOpcode() == ARM::t2UXTB || MCID.getOpcode() == ARM::t2UXTH) && i == 2) // Skip the zero immediate operand, it's now implicit. continue; bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); if (SkipPred && isPred) continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) // Skip implicit def of CPSR. Either it's modeled as an optional // def now or it's already an implicit def on the new instruction. continue; MIB.addOperand(MO); } if (!MCID.isPredicable() && NewMCID.isPredicable()) AddDefaultPred(MIB); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++NumNarrows; return true; }
bool Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry) { if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) return false; unsigned Scale = 1; bool HasImmOffset = false; bool HasShift = false; bool HasOffReg = true; bool isLdStMul = false; unsigned Opc = Entry.NarrowOpc1; unsigned OpNum = 3; // First 'rest' of operands. uint8_t ImmLimit = Entry.Imm1Limit; switch (Entry.WideOpc) { default: llvm_unreachable("Unexpected Thumb2 load / store opcode!"); case ARM::t2LDRi12: case ARM::t2STRi12: if (MI->getOperand(1).getReg() == ARM::SP) { Opc = Entry.NarrowOpc2; ImmLimit = Entry.Imm2Limit; HasOffReg = false; } Scale = 4; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRBi12: case ARM::t2STRBi12: HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRHi12: case ARM::t2STRHi12: Scale = 2; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRs: case ARM::t2LDRBs: case ARM::t2LDRHs: case ARM::t2LDRSBs: case ARM::t2LDRSHs: case ARM::t2STRs: case ARM::t2STRBs: case ARM::t2STRHs: HasShift = true; OpNum = 4; break; case ARM::t2LDMIA: case ARM::t2LDMDB: { unsigned BaseReg = MI->getOperand(0).getReg(); if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA) return false; // For the non-writeback version (this one), the base register must be // one of the registers being loaded. bool isOK = false; for (unsigned i = 4; i < MI->getNumOperands(); ++i) { if (MI->getOperand(i).getReg() == BaseReg) { isOK = true; break; } } if (!isOK) return false; OpNum = 0; isLdStMul = true; break; } case ARM::t2LDMIA_RET: { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) return false; Opc = Entry.NarrowOpc2; // tPOP_RET OpNum = 2; isLdStMul = true; break; } case ARM::t2LDMIA_UPD: case ARM::t2LDMDB_UPD: case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { OpNum = 0; unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg == ARM::SP && (Entry.WideOpc == ARM::t2LDMIA_UPD || Entry.WideOpc == ARM::t2STMDB_UPD)) { Opc = Entry.NarrowOpc2; // tPOP or tPUSH OpNum = 2; } else if (!isARMLowRegister(BaseReg) || (Entry.WideOpc != ARM::t2LDMIA_UPD && Entry.WideOpc != ARM::t2STMIA_UPD)) { return false; } isLdStMul = true; break; } } unsigned OffsetReg = 0; bool OffsetKill = false; if (HasShift) { OffsetReg = MI->getOperand(2).getReg(); OffsetKill = MI->getOperand(2).isKill(); if (MI->getOperand(3).getImm()) // Thumb1 addressing mode doesn't support shift. return false; } unsigned OffsetImm = 0; if (HasImmOffset) { OffsetImm = MI->getOperand(2).getImm(); unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) // Make sure the immediate field fits. return false; } // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); if (!isLdStMul) { MIB.addOperand(MI->getOperand(0)); MIB.addOperand(MI->getOperand(1)); if (HasImmOffset) MIB.addImm(OffsetImm / Scale); assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); if (HasOffReg) MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); } // Transfer the rest of operands. for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.addOperand(MI->getOperand(OpNum)); // Transfer memoperands. MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++NumLdSts; return true; }
bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { unsigned Opc = MI->getOpcode(); if (Opc == ARM::t2ADDri) { // If the source register is SP, try to reduce to tADDrSPi, otherwise // it's a normal reduce. if (MI->getOperand(1).getReg() != ARM::SP) { if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) return true; return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } // Try to reduce to tADDrSPi. unsigned Imm = MI->getOperand(2).getImm(); // The immediate must be in range, the destination register must be a low // reg, the predicate must be "always" and the condition flags must not // be being set. if (Imm & 3 || Imm > 1020) return false; if (!isARMLowRegister(MI->getOperand(0).getReg())) return false; if (MI->getOperand(3).getImm() != ARMCC::AL) return false; const MCInstrDesc &MCID = MI->getDesc(); if (MCID.hasOptionalDef() && MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) return false; MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(ARM::tADDrSPi)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. AddDefaultPred(MIB); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); MBB.erase_instr(MI); ++NumNarrows; return true; } if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; if (MI->mayLoad() || MI->mayStore()) return ReduceLoadStore(MBB, MI, Entry); switch (Opc) { default: break; case ARM::t2ADDSri: case ARM::t2ADDSrr: { unsigned PredReg = 0; if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { switch (Opc) { default: break; case ARM::t2ADDSri: { if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) return true; // fallthrough } case ARM::t2ADDSrr: return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } } break; } case ARM::t2RSBri: case ARM::t2RSBSri: case ARM::t2SXTB: case ARM::t2SXTH: case ARM::t2UXTB: case ARM::t2UXTH: if (MI->getOperand(2).getImm() == 0) return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); break; case ARM::t2MOVi16: // Can convert only 'pure' immediate operands, not immediates obtained as // globals' addresses. if (MI->getOperand(1).isImm()) return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); break; case ARM::t2CMPrr: { // Try to reduce to the lo-reg only version first. Why there are two // versions of the instruction is a mystery. // It would be nice to just have two entries in the master table that // are prioritized, but the table assumes a unique entry for each // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop)) return true; return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } } return false; }
bool Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry) { if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) return false; unsigned Scale = 1; bool HasImmOffset = false; bool HasShift = false; bool HasOffReg = true; bool isLdStMul = false; unsigned Opc = Entry.NarrowOpc1; unsigned OpNum = 3; // First 'rest' of operands. uint8_t ImmLimit = Entry.Imm1Limit; switch (Entry.WideOpc) { default: llvm_unreachable("Unexpected Thumb2 load / store opcode!"); case ARM::t2LDRi12: case ARM::t2STRi12: if (MI->getOperand(1).getReg() == ARM::SP) { Opc = Entry.NarrowOpc2; ImmLimit = Entry.Imm2Limit; } Scale = 4; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRBi12: case ARM::t2STRBi12: HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRHi12: case ARM::t2STRHi12: Scale = 2; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRs: case ARM::t2LDRBs: case ARM::t2LDRHs: case ARM::t2LDRSBs: case ARM::t2LDRSHs: case ARM::t2STRs: case ARM::t2STRBs: case ARM::t2STRHs: HasShift = true; OpNum = 4; break; case ARM::t2LDR_POST: case ARM::t2STR_POST: { if (!MBB.getParent()->getFunction().optForMinSize()) return false; if (!MI->hasOneMemOperand() || (*MI->memoperands_begin())->getAlignment() < 4) return false; // We're creating a completely different type of load/store - LDM from LDR. // For this reason we can't reuse the logic at the end of this function; we // have to implement the MI building here. bool IsStore = Entry.WideOpc == ARM::t2STR_POST; unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); unsigned Offset = MI->getOperand(3).getImm(); unsigned PredImm = MI->getOperand(4).getImm(); unsigned PredReg = MI->getOperand(5).getReg(); assert(isARMLowRegister(Rt)); assert(isARMLowRegister(Rn)); if (Offset != 4) return false; // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1)) .addReg(Rn, RegState::Define) .addReg(Rn) .addImm(PredImm) .addReg(PredReg) .addReg(Rt, IsStore ? 0 : RegState::Define); // Transfer memoperands. MIB.setMemRefs(MI->memoperands()); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); // Kill the old instruction. MI->eraseFromBundle(); ++NumLdSts; return true; } case ARM::t2LDMIA: { unsigned BaseReg = MI->getOperand(0).getReg(); assert(isARMLowRegister(BaseReg)); // For the non-writeback version (this one), the base register must be // one of the registers being loaded. bool isOK = false; for (unsigned i = 3; i < MI->getNumOperands(); ++i) { if (MI->getOperand(i).getReg() == BaseReg) { isOK = true; break; } } if (!isOK) return false; OpNum = 0; isLdStMul = true; break; } case ARM::t2STMIA: // If the base register is killed, we don't care what its value is after the // instruction, so we can use an updating STMIA. if (!MI->getOperand(0).isKill()) return false; break; case ARM::t2LDMIA_RET: { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) return false; Opc = Entry.NarrowOpc2; // tPOP_RET OpNum = 2; isLdStMul = true; break; } case ARM::t2LDMIA_UPD: case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { OpNum = 0; unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg == ARM::SP && (Entry.WideOpc == ARM::t2LDMIA_UPD || Entry.WideOpc == ARM::t2STMDB_UPD)) { Opc = Entry.NarrowOpc2; // tPOP or tPUSH OpNum = 2; } else if (!isARMLowRegister(BaseReg) || (Entry.WideOpc != ARM::t2LDMIA_UPD && Entry.WideOpc != ARM::t2STMIA_UPD)) { return false; } isLdStMul = true; break; } } unsigned OffsetReg = 0; bool OffsetKill = false; bool OffsetInternal = false; if (HasShift) { OffsetReg = MI->getOperand(2).getReg(); OffsetKill = MI->getOperand(2).isKill(); OffsetInternal = MI->getOperand(2).isInternalRead(); if (MI->getOperand(3).getImm()) // Thumb1 addressing mode doesn't support shift. return false; } unsigned OffsetImm = 0; if (HasImmOffset) { OffsetImm = MI->getOperand(2).getImm(); unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) // Make sure the immediate field fits. return false; } // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); // tSTMIA_UPD takes a defining register operand. We've already checked that // the register is killed, so mark it as dead here. if (Entry.WideOpc == ARM::t2STMIA) MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead); if (!isLdStMul) { MIB.add(MI->getOperand(0)); MIB.add(MI->getOperand(1)); if (HasImmOffset) MIB.addImm(OffsetImm / Scale); assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); if (HasOffReg) MIB.addReg(OffsetReg, getKillRegState(OffsetKill) | getInternalReadRegState(OffsetInternal)); } // Transfer the rest of operands. for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.add(MI->getOperand(OpNum)); // Transfer memoperands. MIB.setMemRefs(MI->memoperands()); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++NumLdSts; return true; }
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize /// a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or /// SUBs first, and uses a constant pool value if the instruction sequence would /// be too long. This is allowed to modify the condition flags. void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, const TargetInstrInfo &TII, const ARMBaseRegisterInfo &MRI, unsigned MIFlags) { bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; int CopyOpc = 0; unsigned CopyBits = 0; unsigned CopyScale = 1; bool CopyNeedsCC = false; int ExtraOpc = 0; unsigned ExtraBits = 0; unsigned ExtraScale = 1; bool ExtraNeedsCC = false; // Strategy: // We need to select two types of instruction, maximizing the available // immediate range of each. The instructions we use will depend on whether // DestReg and BaseReg are low, high or the stack pointer. // * CopyOpc - DestReg = BaseReg + imm // This will be emitted once if DestReg != BaseReg, and never if // DestReg == BaseReg. // * ExtraOpc - DestReg = DestReg + imm // This will be emitted as many times as necessary to add the // full immediate. // If the immediate ranges of these instructions are not large enough to cover // NumBytes with a reasonable number of instructions, we fall back to using a // value loaded from a constant pool. if (DestReg == ARM::SP) { if (BaseReg == ARM::SP) { // sp -> sp // Already in right reg, no copy needed } else { // low -> sp or high -> sp CopyOpc = ARM::tMOVr; CopyBits = 0; } ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi; ExtraBits = 7; ExtraScale = 4; } else if (isARMLowRegister(DestReg)) { if (BaseReg == ARM::SP) { // sp -> low assert(!isSub && "Thumb1 does not have tSUBrSPi"); CopyOpc = ARM::tADDrSPi; CopyBits = 8; CopyScale = 4; } else if (DestReg == BaseReg) { // low -> same low // Already in right reg, no copy needed } else if (isARMLowRegister(BaseReg)) { // low -> different low CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3; CopyBits = 3; CopyNeedsCC = true; } else { // high -> low CopyOpc = ARM::tMOVr; CopyBits = 0; } ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8; ExtraBits = 8; ExtraNeedsCC = true; } else /* DestReg is high */ { if (DestReg == BaseReg) { // high -> same high // Already in right reg, no copy needed } else { // {low,high,sp} -> high CopyOpc = ARM::tMOVr; CopyBits = 0; } ExtraOpc = 0; } // We could handle an unaligned immediate with an unaligned copy instruction // and an aligned extra instruction, but this case is not currently needed. assert(((Bytes & 3) == 0 || ExtraScale == 1) && "Unaligned offset, but all instructions require alignment"); unsigned CopyRange = ((1 << CopyBits) - 1) * CopyScale; // If we would emit the copy with an immediate of 0, just use tMOVr. if (CopyOpc && Bytes < CopyScale) { CopyOpc = ARM::tMOVr; CopyScale = 1; CopyNeedsCC = false; CopyRange = 0; } unsigned ExtraRange = ((1 << ExtraBits) - 1) * ExtraScale; // per instruction unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0; unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange); // We could handle this case when the copy instruction does not require an // aligned immediate, but we do not currently do this. assert(RangeAfterCopy % ExtraScale == 0 && "Extra instruction requires immediate to be aligned"); unsigned RequiredExtraInstrs; if (ExtraRange) RequiredExtraInstrs = alignTo(RangeAfterCopy, ExtraRange) / ExtraRange; else if (RangeAfterCopy > 0) // We need an extra instruction but none is available RequiredExtraInstrs = 1000000; else RequiredExtraInstrs = 0; unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs; unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; // Use a constant pool, if the sequence of ADDs/SUBs is too expensive. if (RequiredInstrs > Threshold) { emitThumbRegPlusImmInReg(MBB, MBBI, dl, DestReg, BaseReg, NumBytes, true, TII, MRI, MIFlags); return; } // Emit zero or one copy instructions if (CopyOpc) { unsigned CopyImm = std::min(Bytes, CopyRange) / CopyScale; Bytes -= CopyImm * CopyScale; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg); if (CopyNeedsCC) MIB = MIB.add(t1CondCodeOp()); MIB.addReg(BaseReg, RegState::Kill); if (CopyOpc != ARM::tMOVr) { MIB.addImm(CopyImm); } MIB.setMIFlags(MIFlags).add(predOps(ARMCC::AL)); BaseReg = DestReg; } // Emit zero or more in-place add/sub instructions while (Bytes) { unsigned ExtraImm = std::min(Bytes, ExtraRange) / ExtraScale; Bytes -= ExtraImm * ExtraScale; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg); if (ExtraNeedsCC) MIB = MIB.add(t1CondCodeOp()); MIB.addReg(BaseReg) .addImm(ExtraImm) .add(predOps(ARMCC::AL)) .setMIFlags(MIFlags); } }
bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, MachineInstr *CPSRDef) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); if (Reg0 != Reg1) { // Try to commute the operands to make it a 2-address instruction. unsigned CommOpIdx1, CommOpIdx2; if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) return false; MachineInstr *CommutedMI = TII->commuteInstruction(MI); if (!CommutedMI) return false; } if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) return false; if (Entry.Imm2Limit) { unsigned Imm = MI->getOperand(2).getImm(); unsigned Limit = (1 << Entry.Imm2Limit) - 1; if (Imm > Limit) return false; } else { unsigned Reg2 = MI->getOperand(2).getReg(); if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) return false; } // Check if it's possible / necessary to transfer the predicate. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; const MCInstrDesc &MCID = MI->getDesc(); if (MCID.hasOptionalDef()) { unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; } if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) return false; // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(CPSRDef, MI)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else AddNoT1CC(MIB); } // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; if (SkipPred && MCID.OpInfo[i].isPredicate()) continue; MIB.addOperand(MI->getOperand(i)); } // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase(MI); ++Num2Addrs; return true; }
bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, MachineInstr *CPSRDef) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; unsigned Limit = ~0U; unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1; if (Entry.Imm1Limit) Limit = ((1 << Entry.Imm1Limit) - 1) * Scale; const TargetInstrDesc &TID = MI->getDesc(); for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { if (TID.OpInfo[i].isPredicate()) continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { unsigned Reg = MO.getReg(); if (!Reg || Reg == ARM::CPSR) continue; if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP) continue; if (Entry.LowRegs1 && !isARMLowRegister(Reg)) return false; } else if (MO.isImm() && !TID.OpInfo[i].isPredicate()) { if (((unsigned)MO.getImm()) > Limit || (MO.getImm() & (Scale-1)) != 0) return false; } } // Check if it's possible / necessary to transfer the predicate. const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewTID.isPredicable()) // Can't transfer predicate, fail. return false; } else { SkipPred = !NewTID.isPredicable(); } bool HasCC = false; bool CCDead = false; if (TID.hasOptionalDef()) { unsigned NumOps = TID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; } if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) return false; // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(CPSRDef, MI)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); MIB.addOperand(MI->getOperand(0)); if (NewTID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else AddNoT1CC(MIB); } // Transfer the rest of operands. unsigned NumOps = TID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { if (i < NumOps && TID.OpInfo[i].isOptionalDef()) continue; if ((TID.getOpcode() == ARM::t2RSBSri || TID.getOpcode() == ARM::t2RSBri) && i == 2) // Skip the zero immediate operand, it's now implicit. continue; bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate()); if (SkipPred && isPred) continue; const MachineOperand &MO = MI->getOperand(i); if (Scale > 1 && !isPred && MO.isImm()) MIB.addImm(MO.getImm() / Scale); else { if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) // Skip implicit def of CPSR. Either it's modeled as an optional // def now or it's already an implicit def on the new instruction. continue; MIB.addOperand(MO); } } if (!TID.isPredicable() && NewTID.isPredicable()) AddDefaultPred(MIB); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase(MI); ++NumNarrows; return true; }
// Convert callee-save register save/restore instruction to do stack pointer // decrement/increment to allocate/deallocate the callee-save stack area by // converting store/load to use pre/post increment version. static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { unsigned NewOpc; bool NewIsUnscaled = false; switch (MBBI->getOpcode()) { default: llvm_unreachable("Unexpected callee-save save/restore opcode!"); case AArch64::STPXi: NewOpc = AArch64::STPXpre; break; case AArch64::STPDi: NewOpc = AArch64::STPDpre; break; case AArch64::STRXui: NewOpc = AArch64::STRXpre; NewIsUnscaled = true; break; case AArch64::STRDui: NewOpc = AArch64::STRDpre; NewIsUnscaled = true; break; case AArch64::LDPXi: NewOpc = AArch64::LDPXpost; break; case AArch64::LDPDi: NewOpc = AArch64::LDPDpost; break; case AArch64::LDRXui: NewOpc = AArch64::LDRXpost; NewIsUnscaled = true; break; case AArch64::LDRDui: NewOpc = AArch64::LDRDpost; NewIsUnscaled = true; break; } MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); MIB.addReg(AArch64::SP, RegState::Define); // Copy all operands other than the immediate offset. unsigned OpndIdx = 0; for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd; ++OpndIdx) MIB.add(MBBI->getOperand(OpndIdx)); assert(MBBI->getOperand(OpndIdx).getImm() == 0 && "Unexpected immediate offset in first/last callee-save save/restore " "instruction!"); assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && "Unexpected base register in callee-save save/restore instruction!"); // Last operand is immediate offset that needs fixing. assert(CSStackSizeInc % 8 == 0); int64_t CSStackSizeIncImm = CSStackSizeInc; if (!NewIsUnscaled) CSStackSizeIncImm /= 8; MIB.addImm(CSStackSizeIncImm); MIB.setMIFlags(MBBI->getFlags()); MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end()); return std::prev(MBB.erase(MBBI)); }