void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->getDesc().isReturn() && "Can only insert epilog into returning blocks"); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); if (!isCSRestore(MBBI, TII, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot or target is ELF and the function has FP. if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; if (NumBytes) { if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, ARMCC::AL, 0, TII); else { // It's not possible to restore SP from FP in a single instruction. // For Darwin, this looks like: // mov sp, r7 // sub sp, #24 // This is bad, if an interrupt is taken after the mov, sp is in an // inconsistent state. // Use the first callee-saved register as a scratch register. assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4)); } } else { // Thumb2 or ARM. if (isARM) BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); else AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr)); } } else if (NumBytes) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. if (AFI->getDPRCalleeSavedAreaSize()) { MBBI++; // Since vpop register list cannot have gaps, there may be multiple vpop // instructions in the epilogue. while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) MBBI++; } if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { // Tail call return: adjust the stack pointer and jump to callee. MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd) : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); else { assert(JumpTarget.isSymbol()); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } // Add the default predicate in Thumb mode. if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); } else if (RetOpcode == ARM::TCRETURNri) { BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); } else if (RetOpcode == ARM::TCRETURNriND) { BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)). addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); MBBI = NewMI; } if (VARegSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); }
void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); const XCoreInstrInfo &TII = *MF.getSubtarget<XCoreSubtarget>().getInstrInfo(); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); DebugLoc dl = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); // Work out frame sizes. // We will adjust the SP in stages towards the final FrameSize. int RemainingAdj = MFI->getStackSize(); assert(RemainingAdj%4 == 0 && "Misaligned frame size"); RemainingAdj /= 4; if (RetOpcode == XCore::EH_RETURN) { // 'Restore' the exception info the unwinder has placed into the stack // slots. SmallVector<StackSlotInfo,2> SpillList; GetEHSpillList(SpillList, MFI, XFI, MF.getSubtarget().getTargetLowering()); RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList); // Return to the landing pad. unsigned EhStackReg = MBBI->getOperand(0).getReg(); unsigned EhHandlerReg = MBBI->getOperand(1).getReg(); BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)).addReg(EhStackReg); BuildMI(MBB, MBBI, dl, TII.get(XCore::BAU_1r)).addReg(EhHandlerReg); MBB.erase(MBBI); // Erase the previous return instruction. return; } bool restoreLR = XFI->hasLRSpillSlot(); bool UseRETSP = restoreLR && RemainingAdj && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0); if (UseRETSP) restoreLR = false; bool FP = hasFP(MF); if (FP) // Restore the stack pointer. BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)).addReg(FramePtr); // If necessary, restore LR and FP from the stack, as we EXTSP. SmallVector<StackSlotInfo,2> SpillList; GetSpillList(SpillList, MFI, XFI, restoreLR, FP); RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList); if (RemainingAdj) { // Complete all but one of the remaining Stack adjustments. IfNeededLDAWSP(MBB, MBBI, dl, TII, 0, RemainingAdj); if (UseRETSP) { // Fold prologue into return instruction assert(RetOpcode == XCore::RETSP_u6 || RetOpcode == XCore::RETSP_lu6); int Opcode = isImmU6(RemainingAdj) ? XCore::RETSP_u6 : XCore::RETSP_lu6; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opcode)) .addImm(RemainingAdj); for (unsigned i = 3, e = MBBI->getNumOperands(); i < e; ++i) MIB->addOperand(MBBI->getOperand(i)); // copy any variadic operands MBB.erase(MBBI); // Erase the previous return instruction. } else { int Opcode = isImmU6(RemainingAdj) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(RemainingAdj); // Don't erase the return instruction. } } // else Don't erase the return instruction. }
bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); unsigned Count = CSI.size(); DebugLoc DL; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); if (MI != MBB.end()) DL = MI->getDebugLoc(); for (unsigned i = 0; i < Count; i += 2) { unsigned Reg1 = CSI[i].getReg(); unsigned Reg2 = CSI[i + 1].getReg(); // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() && "Out of order callee saved regs!"); // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only // the last load is sp-pi post-increment and de-allocates the stack: // For example: // ldp fp, lr, [sp, #32] // addImm(+4) // ldp x20, x19, [sp, #16] // addImm(+2) // ldp x22, x21, [sp], #48 // addImm(+6) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); if (AArch64::GPR64RegClass.contains(Reg1)) { assert(AArch64::GPR64RegClass.contains(Reg2) && "Expected GPR64 callee-saved register pair!"); if (i == Count - 2) LdrOpc = AArch64::LDPXpost; else LdrOpc = AArch64::LDPXi; } else if (AArch64::FPR64RegClass.contains(Reg1)) { assert(AArch64::FPR64RegClass.contains(Reg2) && "Expected FPR64 callee-saved register pair!"); if (i == Count - 2) LdrOpc = AArch64::LDPDpost; else LdrOpc = AArch64::LDPDi; } else llvm_unreachable("Unexpected callee saved register!"); DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", " << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx() << ", " << CSI[i + 1].getFrameIdx() << ")\n"); // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4; // etc. const int Offset = (i == Count - 2) ? Count : Count - i - 2; assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for LDP immediate"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost) MIB.addReg(AArch64::SP, RegState::Define); MIB.addReg(Reg2, getDefRegState(true)) .addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8] // where the factor * 8 is implicit } return true; }
bool SystemZFrameLowering:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); bool IsVarArg = MF.getFunction()->isVarArg(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Scan the call-saved GPRs and find the bounds of the register spill area. unsigned SavedGPRFrameSize = 0; unsigned LowGPR = 0; unsigned HighGPR = SystemZ::R15D; unsigned StartOffset = -1U; for (unsigned I = 0, E = CSI.size(); I != E; ++I) { unsigned Reg = CSI[I].getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) { SavedGPRFrameSize += 8; unsigned Offset = RegSpillOffsets[Reg]; assert(Offset && "Unexpected GPR save"); if (StartOffset > Offset) { LowGPR = Reg; StartOffset = Offset; } } } // Save information about the range and location of the call-saved // registers, for use by the epilogue inserter. ZFI->setSavedGPRFrameSize(SavedGPRFrameSize); ZFI->setLowSavedGPR(LowGPR); ZFI->setHighSavedGPR(HighGPR); // Include the GPR varargs, if any. R6D is call-saved, so would // be included by the loop above, but we also need to handle the // call-clobbered argument registers. if (IsVarArg) { unsigned FirstGPR = ZFI->getVarArgsFirstGPR(); if (FirstGPR < SystemZ::NumArgGPRs) { unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; unsigned Offset = RegSpillOffsets[Reg]; if (StartOffset > Offset) { LowGPR = Reg; StartOffset = Offset; } } } // Save GPRs if (LowGPR) { assert(LowGPR != HighGPR && "Should be saving %r15 and something else"); // Build an STMG instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG)); // Add the explicit register operands. addSavedGPR(MBB, MIB, TM, LowGPR, false); addSavedGPR(MBB, MIB, TM, HighGPR, false); // Add the address. MIB.addReg(SystemZ::R15D).addImm(StartOffset); // Make sure all call-saved GPRs are included as operands and are // marked as live on entry. for (unsigned I = 0, E = CSI.size(); I != E; ++I) { unsigned Reg = CSI[I].getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) addSavedGPR(MBB, MIB, TM, Reg, true); } // ...likewise GPR varargs. if (IsVarArg) for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I) addSavedGPR(MBB, MIB, TM, SystemZ::ArgGPRs[I], true); } // Save FPRs in the normal TargetInstrInfo way. for (unsigned I = 0, E = CSI.size(); I != E; ++I) { unsigned Reg = CSI[I].getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(), &SystemZ::FP64BitRegClass, TRI); } } return true; }
void AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI, const LoadStoreMethod PossClasses[], unsigned NumClasses) const { DebugLoc DL = MBB.findDebugLoc(MBBI); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); // A certain amount of implicit contract is present here. The actual stack // offsets haven't been allocated officially yet, so for strictly correct code // we rely on the fact that the elements of CSI are allocated in order // starting at SP, purely as dictated by size and alignment. In practice since // this function handles the only accesses to those slots it's not quite so // important. // // We have also ordered the Callee-saved register list in AArch64CallingConv // so that the above scheme puts registers in order: in particular we want // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2) for (unsigned i = 0, e = CSI.size(); i < e; ++i) { unsigned Reg = CSI[i].getReg(); // First we need to find out which register class the register belongs to so // that we can use the correct load/store instrucitons. unsigned ClassIdx; for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) { if (PossClasses[ClassIdx].RegClass->contains(Reg)) break; } assert(ClassIdx != NumClasses && "Asked to store register in unexpected class"); const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass; // Now we need to decide whether it's possible to emit a paired instruction: // for this we want the next register to be in the same class. MachineInstrBuilder NewMI; bool Pair = false; if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) { Pair = true; unsigned StLow = 0, StHigh = 0; if (isPrologue) { // Most of these registers will be live-in to the MBB and killed by our // store, though there are exceptions (see determinePrologueDeath). StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg())); StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); } else { StLow = RegState::Define; StHigh = RegState::Define; } NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode)) .addReg(CSI[i+1].getReg(), StLow) .addReg(CSI[i].getReg(), StHigh); // If it's a paired op, we've consumed two registers ++i; } else { unsigned State; if (isPrologue) { State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); } else { State = RegState::Define; } NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].SingleOpcode)) .addReg(CSI[i].getReg(), State); } // Note that the FrameIdx refers to the second register in a pair: it will // be allocated the smaller numeric address and so is the one an LDP/STP // address must use. int FrameIdx = CSI[i].getFrameIdx(); MachineMemOperand::MemOperandFlags Flags; Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), Flags, Pair ? TheClass.getSize() * 2 : TheClass.getSize(), MFI.getObjectAlignment(FrameIdx)); NewMI.addFrameIndex(FrameIdx) .addImm(0) // address-register offset .addMemOperand(MMO); if (isPrologue) NewMI.setMIFlags(MachineInstr::FrameSetup); // For aesthetic reasons, during an epilogue we want to emit complementary // operations to the prologue, but in the opposite order. So we still // iterate through the CalleeSavedInfo list in order, but we put the // instructions successively earlier in the MBB. if (!isPrologue) --MBBI; } }
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize /// a destreg = basereg + immediate in Thumb code. void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, const TargetInstrInfo &TII, const ARMBaseRegisterInfo& MRI, unsigned MIFlags) { bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; bool isMul4 = (Bytes & 3) == 0; bool isTwoAddr = false; bool DstNotEqBase = false; unsigned NumBits = 1; unsigned Scale = 1; int Opc = 0; int ExtraOpc = 0; bool NeedCC = false; if (DestReg == BaseReg && BaseReg == ARM::SP) { assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); NumBits = 7; Scale = 4; Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; isTwoAddr = true; } else if (!isSub && BaseReg == ARM::SP) { // r1 = add sp, 403 // => // r1 = add sp, 100 * 4 // r1 = add r1, 3 if (!isMul4) { Bytes &= ~3; ExtraOpc = ARM::tADDi3; } NumBits = 8; Scale = 4; Opc = ARM::tADDrSPi; } else { // sp = sub sp, c // r1 = sub sp, c // r8 = sub sp, c if (DestReg != BaseReg) DstNotEqBase = true; NumBits = 8; if (DestReg == ARM::SP) { Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); NumBits = 7; Scale = 4; } else { Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; NumBits = 8; NeedCC = true; } isTwoAddr = true; } unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale); unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; if (NumMIs > Threshold) { // This will expand into too many instructions. Load the immediate from a // constpool entry. emitThumbRegPlusImmInReg(MBB, MBBI, dl, DestReg, BaseReg, NumBytes, true, TII, MRI, MIFlags); return; } if (DstNotEqBase) { if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) { // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) unsigned Chunk = (1 << 3) - 1; unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; Bytes -= ThisVal; const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); const MachineInstrBuilder MIB = AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg) .setMIFlags(MIFlags)); AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); } else { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) .addReg(BaseReg, RegState::Kill)) .setMIFlags(MIFlags); } BaseReg = DestReg; } unsigned Chunk = ((1 << NumBits) - 1) * Scale; while (Bytes) { unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; Bytes -= ThisVal; ThisVal /= Scale; // Build the new tADD / tSUB. if (isTwoAddr) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (NeedCC) MIB = AddDefaultT1CC(MIB); MIB.addReg(DestReg).addImm(ThisVal); MIB = AddDefaultPred(MIB); MIB.setMIFlags(MIFlags); } else { bool isKill = BaseReg != ARM::SP; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (NeedCC) MIB = AddDefaultT1CC(MIB); MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); MIB = AddDefaultPred(MIB); MIB.setMIFlags(MIFlags); BaseReg = DestReg; if (Opc == ARM::tADDrSPi) { // r4 = add sp, imm // r4 = add r4, imm // ... NumBits = 8; Scale = 1; Chunk = ((1 << NumBits) - 1) * Scale; Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; NeedCC = isTwoAddr = true; } } } if (ExtraOpc) { const MCInstrDesc &MCID = TII.get(ExtraOpc); AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) .addReg(DestReg, RegState::Kill) .addImm(((unsigned)NumBytes) & 3) .setMIFlags(MIFlags)); } }
// Because of how EmitCmp is called with fast-isel, you can // end up with redundant "andi" instructions after the sequences emitted below. // We should try and solve this issue in the future. // bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) { const Value *Left = CI->getOperand(0), *Right = CI->getOperand(1); bool IsUnsigned = CI->isUnsigned(); unsigned LeftReg = getRegEnsuringSimpleIntegerWidening(Left, IsUnsigned); if (LeftReg == 0) return false; unsigned RightReg = getRegEnsuringSimpleIntegerWidening(Right, IsUnsigned); if (RightReg == 0) return false; CmpInst::Predicate P = CI->getPredicate(); switch (P) { default: return false; case CmpInst::ICMP_EQ: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::SLTiu, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_NE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::SLTu, ResultReg).addReg(Mips::ZERO).addReg(TempReg); break; } case CmpInst::ICMP_UGT: { emitInst(Mips::SLTu, ResultReg).addReg(RightReg).addReg(LeftReg); break; } case CmpInst::ICMP_ULT: { emitInst(Mips::SLTu, ResultReg).addReg(LeftReg).addReg(RightReg); break; } case CmpInst::ICMP_UGE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLTu, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_ULE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLTu, TempReg).addReg(RightReg).addReg(LeftReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_SGT: { emitInst(Mips::SLT, ResultReg).addReg(RightReg).addReg(LeftReg); break; } case CmpInst::ICMP_SLT: { emitInst(Mips::SLT, ResultReg).addReg(LeftReg).addReg(RightReg); break; } case CmpInst::ICMP_SGE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLT, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_SLE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLT, TempReg).addReg(RightReg).addReg(LeftReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::FCMP_OEQ: case CmpInst::FCMP_UNE: case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE: case CmpInst::FCMP_OGT: case CmpInst::FCMP_OGE: { if (UnsupportedFPMode) return false; bool IsFloat = Left->getType()->isFloatTy(); bool IsDouble = Left->getType()->isDoubleTy(); if (!IsFloat && !IsDouble) return false; unsigned Opc, CondMovOpc; switch (P) { case CmpInst::FCMP_OEQ: Opc = IsFloat ? Mips::C_EQ_S : Mips::C_EQ_D32; CondMovOpc = Mips::MOVT_I; break; case CmpInst::FCMP_UNE: Opc = IsFloat ? Mips::C_EQ_S : Mips::C_EQ_D32; CondMovOpc = Mips::MOVF_I; break; case CmpInst::FCMP_OLT: Opc = IsFloat ? Mips::C_OLT_S : Mips::C_OLT_D32; CondMovOpc = Mips::MOVT_I; break; case CmpInst::FCMP_OLE: Opc = IsFloat ? Mips::C_OLE_S : Mips::C_OLE_D32; CondMovOpc = Mips::MOVT_I; break; case CmpInst::FCMP_OGT: Opc = IsFloat ? Mips::C_ULE_S : Mips::C_ULE_D32; CondMovOpc = Mips::MOVF_I; break; case CmpInst::FCMP_OGE: Opc = IsFloat ? Mips::C_ULT_S : Mips::C_ULT_D32; CondMovOpc = Mips::MOVF_I; break; default: llvm_unreachable("Only switching of a subset of CCs."); } unsigned RegWithZero = createResultReg(&Mips::GPR32RegClass); unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0); emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1); emitInst(Opc).addReg(LeftReg).addReg(RightReg).addReg( Mips::FCC0, RegState::ImplicitDefine); MachineInstrBuilder MI = emitInst(CondMovOpc, ResultReg) .addReg(RegWithOne) .addReg(Mips::FCC0) .addReg(RegWithZero, RegState::Implicit); MI->tieOperands(0, 3); break; } } return true; }
/// EmitDbgValue - Generate machine instruction for a dbg_value node. /// MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, unsigned> &VRBaseMap) { uint64_t Offset = SD->getOffset(); MDNode *Var = SD->getVariable(); MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SD->getFrameIx()) .addImm(Offset) .addMetadata(Var) .addMetadata(Expr); } // Otherwise, we're going to create an instruction here. const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); MachineInstrBuilder MIB = BuildMI(*MF, DL, II); if (SD->getKind() == SDDbgValue::SDNODE) { SDNode *Node = SD->getSDNode(); SDValue Op = SDValue(Node, SD->getResNo()); // It's possible we replaced this SDNode with other(s) and therefore // didn't generate code for it. It's better to catch these cases where // they happen and transfer the debug info, but trying to guarantee that // in all cases would be very fragile; this is a safeguard for any // that were missed. DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); if (I==VRBaseMap.end()) MIB.addReg(0U); // undef else AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) MIB.addCImm(CI); else MIB.addImm(CI->getSExtValue()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { MIB.addFPImm(CF); } else { // Could be an Undef. In any case insert an Undef so we can see what we // dropped. MIB.addReg(0U); } } else { // Insert an Undef so we can see what we dropped. MIB.addReg(0U); } // Indirect addressing is indicated by an Imm as the second parameter. if (SD->isIndirect()) MIB.addImm(Offset); else { assert(Offset == 0 && "direct value cannot have an offset"); MIB.addReg(0U, RegState::Debug); } MIB.addMetadata(Var); MIB.addMetadata(Expr); return &*MIB; }
/// EmitMachineNode - Generate machine code for a target-specific node and /// needed dependencies. /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially if (Opc == TargetOpcode::EXTRACT_SUBREG || Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); return; } // Handle COPY_TO_REGCLASS specially. if (Opc == TargetOpcode::COPY_TO_REGCLASS) { EmitCopyToRegClassNode(Node, VRBaseMap); return; } // Handle REG_SEQUENCE specially. if (Opc == TargetOpcode::REG_SEQUENCE) { EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); return; } if (Opc == TargetOpcode::IMPLICIT_DEF) // We want a unique VR for each IMPLICIT_DEF use. return; const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NumDefs = II.getNumDefs(); const MCPhysReg *ScratchRegs = nullptr; // Handle STACKMAP and PATCHPOINT specially and then use the generic code. if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { // Stackmaps do not have arguments and do not preserve their calling // convention. However, to simplify runtime support, they clobber the same // scratch registers as AnyRegCC. unsigned CC = CallingConv::AnyReg; if (Opc == TargetOpcode::PATCHPOINT) { CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); NumDefs = NumResults; } ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); } unsigned NumImpUses = 0; unsigned NodeOperands = countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) assert(NumMIOperands >= II.getNumOperands() && "Too few operands for a variadic node!"); else assert(NumMIOperands >= II.getNumOperands() && NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + NumImpUses && "#operands for dag node doesn't match .td file!"); #endif // Create the new machine instruction. MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II); // Add result register values for things that are defined by this // instruction. if (NumResults) CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap); // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. bool HasOptPRefs = NumDefs > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add scratch registers as implicit def and early clobber if (ScratchRegs) for (unsigned i = 0; ScratchRegs[i]; ++i) MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine | RegState::EarlyClobber); // Transfer all of the memory reference descriptions of this instruction. MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); // Insert the instruction into position in the block. This needs to // happen before any custom inserter hook is called so that the // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MIB); // The MachineInstr may also define physregs instead of virtregs. These // physreg values can reach other instructions in different ways: // // 1. When there is a use of a Node value beyond the explicitly defined // virtual registers, we emit a CopyFromReg for one of the implicitly // defined physregs. This only happens when HasPhysRegOuts is true. // // 2. A CopyFromReg reading a physreg may be glued to this instruction. // // 3. A glued instruction may implicitly use a physreg. // // 4. A glued instruction may use a RegisterSDNode operand. // // Collect all the used physreg defs, and make sure that any unused physreg // defs are marked as dead. SmallVector<unsigned, 8> UsedRegs; // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = NumDefs; i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. UsedRegs.push_back(Reg); EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); } } // Scan the glue chain for any used physregs. if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { if (F->getOpcode() == ISD::CopyFromReg) { UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); continue; } else if (F->getOpcode() == ISD::CopyToReg) { // Skip CopyToReg nodes that are internal to the glue chain. continue; } // Collect declared implicit uses. const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); UsedRegs.append(MCID.getImplicitUses(), MCID.getImplicitUses() + MCID.getNumImplicitUses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { unsigned Reg = R->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) UsedRegs.push_back(Reg); } } } // Finally mark unused registers as dead. if (!UsedRegs.empty() || II.getImplicitDefs()) MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. if (II.hasPostISelHook()) TLI->AdjustInstrPostInstrSelection(MIB, Node); }
/// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { MIB.addImm(C->getSExtValue()); } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { MIB.addFPImm(F->getConstantFPValue()); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { // Turn additional physreg operands into implicit uses on non-variadic // instructions. This is used by call and return instructions passing // arguments in registers. bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); MIB.addReg(R->getReg(), getImplRegState(Imp)); } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { MIB.addRegMask(RM->getRegMask()); } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(), TGA->getTargetFlags()); } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) { MIB.addMBB(BBNode->getBasicBlock()); } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { MIB.addFrameIndex(FI->getIndex()); } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) { MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags()); } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { int Offset = CP->getOffset(); unsigned Align = CP->getAlignment(); Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { Align = MF->getDataLayout().getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! Align = MF->getDataLayout().getTypeAllocSize(Type); } } unsigned Idx; MachineConstantPool *MCP = MF->getConstantPool(); if (CP->isMachineConstantPoolEntry()) Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); else Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags()); } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags()); } else if (auto *SymNode = dyn_cast<MCSymbolSDNode>(Op)) { MIB.addSym(SymNode->getMCSymbol()); } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { MIB.addBlockAddress(BA->getBlockAddress(), BA->getOffset(), BA->getTargetFlags()); } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags()); } else { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && "Chain and glue operands should occur at end of operand list!"); AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } }
/// EmitSubregNode - Generate machine code for subreg nodes. /// void InstrEmitter::EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsClone, bool IsCloned) { unsigned VRBase = 0; unsigned Opc = Node->getMachineOpcode(); // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(DestReg)) { VRBase = DestReg; break; } } } if (Opc == TargetOpcode::EXTRACT_SUBREG) { // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no // constraints on the %dst register, COPY can target all legal register // classes. unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); MachineInstr *DefMI = MRI->getVRegDef(VReg); unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && SubIdx == DefSubIdx && TRC == MRI->getRegClass(SrcReg)) { // Optimize these: // r1025 = s/zext r1024, 4 // r1026 = extract_subreg r1025, 4 // to a copy // r1026 = copy r1024 VRBase = MRI->createVirtualRegister(TRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); MRI->clearKillFlags(SrcReg); } else { // VReg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. VReg = ConstrainForSubReg(VReg, SubIdx, Node->getOperand(0).getSimpleValueType(), Node->getDebugLoc()); // Create the destreg if it is missing. if (VRBase == 0) VRBase = MRI->createVirtualRegister(TRC); // Create the extract_subreg machine instruction. BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx); } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); SDValue N2 = Node->getOperand(2); unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); // Figure out the register class to create for the destreg. It should be // the largest legal register class supporting SubIdx sub-registers. // RegisterCoalescer will constrain it further if it decides to eliminate // the INSERT_SUBREG instruction. // // %dst = INSERT_SUBREG %src, %sub, SubIdx // // is lowered by TwoAddressInstructionPass to: // // %dst = COPY %src // %dst:SubIdx = COPY %sub // // There is no constraint on the %src register class. // const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase))) VRBase = MRI->createVirtualRegister(SRC); // Create the insert_subreg or subreg_to_reg machine instruction. MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase); // If creating a subreg_to_reg, then the first input operand // is an implicit value immediate, otherwise it's a register if (Opc == TargetOpcode::SUBREG_TO_REG) { const ConstantSDNode *SD = cast<ConstantSDNode>(N0); MIB.addImm(SD->getZExtValue()); } else AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add the subregster being inserted AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MIB.addImm(SubIdx); MBB->insert(InsertPos, MIB); } else llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); }
void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); unsigned NumResults = CountResults(Node); for (unsigned i = 0; i < II.getNumDefs(); ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); // Always let the value type influence the used register class. The // constraints on the instruction may be too lax to represent the value // type correctly. For example, a 64-bit float (X86::FR64) can't live in // the 32-bit float super-class (X86::FR32). if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) { const TargetRegisterClass *VTRC = TLI->getRegClassFor(Node->getSimpleValueType(i)); if (RC) VTRC = TRI->getCommonSubClass(RC, VTRC); if (VTRC) RC = VTRC; } if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); MIB.addReg(VRBase, RegState::Define); } if (!VRBase && !IsClone && !IsCloned) for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; MIB.addReg(VRBase, RegState::Define); break; } } } } // Create the result registers for this node and add the result regs to // the machine instruction. if (VRBase == 0) { assert(RC && "Isn't a register operand!"); VRBase = MRI->createVirtualRegister(RC); MIB.addReg(VRBase, RegState::Define); } // If this def corresponds to a result of the SDNode insert the VRBase into // the lookup map. if (i < NumResults) { SDValue Op(Node, i); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } } }
void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, bool(*Func)(unsigned, bool)) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); unsigned RetOpcode = MI->getOpcode(); bool isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND); SmallVector<unsigned, 4> Regs; unsigned i = CSI.size(); while (i != 0) { unsigned LastReg = 0; bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (!(Func)(Reg, STI.isTargetDarwin())) continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) { Reg = ARM::PC; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; // Fold the return instruction into the LDM. DeleteRet = true; } // If NoGap is true, pop consecutive registers and then leave the rest // for other instructions. e.g. // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11} if (NoGap && LastReg && LastReg != Reg-1) break; LastReg = Reg; Regs.push_back(Reg); } if (Regs.empty()) continue; if (Regs.size() > 1 || LdrOpc == 0) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) .addReg(ARM::SP)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); if (DeleteRet) { MIB->copyImplicitOps(&*MI); MI->eraseFromParent(); } MI = MIB; } else if (Regs.size() == 1) { // If we adjusted the reg to PC from LR above, switch it back here. We // only do that for LDM. if (Regs[0] == ARM::PC) Regs[0] = ARM::LR; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0]) .addReg(ARM::SP, RegState::Define) .addReg(ARM::SP); // ARM mode needs an extra reg0 here due to addrmode2. Will go away once // that refactoring is complete (eventually). if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) { MIB.addReg(0); MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); } else MIB.addImm(4); AddDefaultPred(MIB); } Regs.clear(); } }
void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, bool(*Func)(unsigned, bool), unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); SmallVector<std::pair<unsigned,bool>, 4> Regs; unsigned i = CSI.size(); while (i != 0) { unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (!(Func)(Reg, STI.isTargetDarwin())) continue; // Add the callee-saved register as live-in unless it's LR and // @llvm.returnaddress is called. If LR is returned for // @llvm.returnaddress then it's already added to the function and // entry block live-in sets. bool isKill = true; if (Reg == ARM::LR) { if (MF.getFrameInfo()->isReturnAddressTaken() && MF.getRegInfo().isLiveIn(Reg)) isKill = false; } if (isKill) MBB.addLiveIn(Reg); // If NoGap is true, push consecutive registers and then leave the rest // for other instructions. e.g. // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} if (NoGap && LastReg && LastReg != Reg-1) break; LastReg = Reg; Regs.push_back(std::make_pair(Reg, isKill)); } if (Regs.empty()) continue; if (Regs.size() > 1 || StrOpc== 0) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) .addReg(ARM::SP).setMIFlags(MIFlags)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); } else if (Regs.size() == 1) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP) .addReg(Regs[0].first, getKillRegState(Regs[0].second)) .addReg(ARM::SP).setMIFlags(MIFlags) .addImm(-4); AddDefaultPred(MIB); } Regs.clear(); } }
bool AArch64FrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; unsigned StrOpc; // Issue sequence of non-sp increment and pi sp spills for cs regs. The // first spill is a pre-increment that allocates the stack. // For example: // stp x22, x21, [sp, #-48]! // addImm(-6) // stp x20, x19, [sp, #16] // addImm(+2) // stp fp, lr, [sp, #32] // addImm(+4) // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rationale and sequence for restores in epilog. bool BumpSP = RPII == RegPairs.rbegin(); if (RPI.IsGPR) { // For first spill use pre-increment store. if (BumpSP) StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre; else StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; } else { // For first spill use pre-increment store. if (BumpSP) StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre; else StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; } DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1); if (RPI.isPaired()) dbgs() << ", " << TRI->getName(Reg2); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx+1; dbgs() << ")\n"); const int Offset = BumpSP ? -RPI.Offset : RPI.Offset; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (BumpSP) MIB.addReg(AArch64::SP, RegState::Define); if (RPI.isPaired()) { MBB.addLiveIn(Reg1); MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit .setMIFlag(MachineInstr::FrameSetup); } else { MBB.addLiveIn(Reg1); MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled .setMIFlag(MachineInstr::FrameSetup); } } return true; }
/// EmitSpecialNode - Generate machine code for a target-independent node and /// needed dependencies. void InstrEmitter:: EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG Node->dump(); #endif llvm_unreachable("This target-independent node should have been selected!"); case ISD::EntryToken: llvm_unreachable("EntryToken should have been excluded from the schedule!"); case ISD::MERGE_VALUES: case ISD::TokenFactor: // fall thru break; case ISD::CopyToReg: { unsigned SrcReg; SDValue SrcVal = Node->getOperand(2); if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) SrcReg = R->getReg(); else SrcReg = getVR(SrcVal, VRBaseMap); unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), DestReg).addReg(SrcReg); break; } case ISD::CopyFromReg: { unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); break; } case ISD::EH_LABEL: { MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel(); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::EH_LABEL)).addSym(S); break; } case ISD::LIFETIME_START: case ISD::LIFETIME_END: { unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1)); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) .addFrameIndex(FI->getIndex()); break; } case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) --NumOps; // Ignore the glue operand. // Create the inline asm machine instruction. MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::INLINEASM)); // Add the asm string as an external symbol operand. SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MIB.addExternalSymbol(AsmStr); // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore // bits. int64_t ExtraInfo = cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> getZExtValue(); MIB.addImm(ExtraInfo); // Remember to operand index of the group flags. SmallVector<unsigned, 8> GroupIdx; // Remember registers that are part of early-clobber defs. SmallVector<unsigned, 8> ECRegs; // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); GroupIdx.push_back(MIB->getNumOperands()); MIB.addImm(Flags); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. MIB.addReg(Reg, RegState::Define | getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); ECRegs.push_back(Reg); } break; case InlineAsm::Kind_RegUse: // Use of register. case InlineAsm::Kind_Imm: // Immediate. case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Manually set isTied bits. if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { unsigned DefGroup = 0; if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { unsigned DefIdx = GroupIdx[DefGroup] + 1; unsigned UseIdx = GroupIdx.back() + 1; for (unsigned j = 0; j != NumVals; ++j) MIB->tieOperands(DefIdx + j, UseIdx + j); } } break; } } // GCC inline assembly allows input operands to also be early-clobber // output operands (so long as the operand is written only after it's // used), but this does not match the semantics of our early-clobber flag. // If an early-clobber operand register is also an input operand register, // then remove the early-clobber flag. for (unsigned Reg : ECRegs) { if (MIB->readsRegister(Reg, TRI)) { MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI); assert(MO && "No def operand for clobbered register?"); MO->setIsEarlyClobber(false); } } // Get the mdnode from the asm if it exists and add it to the instruction. SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); if (MD) MIB.addMetadata(MD); MBB->insert(InsertPos, MIB); break; } } }
bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; if (MI != MBB.end()) DL = MI->getDebugLoc(); computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only // the last load is sp-pi post-increment and de-allocates the stack: // For example: // ldp fp, lr, [sp, #32] // addImm(+4) // ldp x20, x19, [sp, #16] // addImm(+2) // ldp x22, x21, [sp], #48 // addImm(+6) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; bool BumpSP = RPII == std::prev(RegPairs.end()); if (RPI.IsGPR) { if (BumpSP) LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost; else LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; } else { if (BumpSP) LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost; else LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; } DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1); if (RPI.isPaired()) dbgs() << ", " << TRI->getName(Reg2); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx+1; dbgs() << ")\n"); const int Offset = RPI.Offset; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (BumpSP) MIB.addReg(AArch64::SP, RegState::Define); if (RPI.isPaired()) MIB.addReg(Reg2, getDefRegState(true)) .addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(Offset) // [sp], #offset * 8 or [sp, #offset * 8] // where the factor * 8 is implicit .setMIFlag(MachineInstr::FrameDestroy); else MIB.addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled .setMIFlag(MachineInstr::FrameDestroy); } return true; }
void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { unsigned numSubRegs = 0; unsigned movOpc = 0; const unsigned *subRegIdx = nullptr; bool ExtraG0 = false; const unsigned DW_SubRegsIdx[] = { SP::sub_even, SP::sub_odd }; const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd }; const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 }; const unsigned QFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd, SP::sub_odd64_then_sub_even, SP::sub_odd64_then_sub_odd }; if (SP::IntRegsRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0) .addReg(SrcReg, getKillRegState(KillSrc)); else if (SP::IntPairRegClass.contains(DestReg, SrcReg)) { subRegIdx = DW_SubRegsIdx; numSubRegs = 2; movOpc = SP::ORrr; ExtraG0 = true; } else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) { if (Subtarget.isV9()) { BuildMI(MBB, I, DL, get(SP::FMOVD), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); } else { // Use two FMOVS instructions. subRegIdx = DFP_FP_SubRegsIdx; numSubRegs = 2; movOpc = SP::FMOVS; } } else if (SP::QFPRegsRegClass.contains(DestReg, SrcReg)) { if (Subtarget.isV9()) { if (Subtarget.hasHardQuad()) { BuildMI(MBB, I, DL, get(SP::FMOVQ), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); } else { // Use two FMOVD instructions. subRegIdx = QFP_DFP_SubRegsIdx; numSubRegs = 2; movOpc = SP::FMOVD; } } else { // Use four FMOVS instructions. subRegIdx = QFP_FP_SubRegsIdx; numSubRegs = 4; movOpc = SP::FMOVS; } } else if (SP::ASRRegsRegClass.contains(DestReg) && SP::IntRegsRegClass.contains(SrcReg)) { BuildMI(MBB, I, DL, get(SP::WRASRrr), DestReg) .addReg(SP::G0) .addReg(SrcReg, getKillRegState(KillSrc)); } else if (SP::IntRegsRegClass.contains(DestReg) && SP::ASRRegsRegClass.contains(SrcReg)) { BuildMI(MBB, I, DL, get(SP::RDASR), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); } else llvm_unreachable("Impossible reg-to-reg copy"); if (numSubRegs == 0 || subRegIdx == nullptr || movOpc == 0) return; const TargetRegisterInfo *TRI = &getRegisterInfo(); MachineInstr *MovMI = nullptr; for (unsigned i = 0; i != numSubRegs; ++i) { unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]); unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]); assert(Dst && Src && "Bad sub-register"); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst); if (ExtraG0) MIB.addReg(SP::G0); MIB.addReg(Src); MovMI = MIB.getInstr(); } // Add implicit super-register defs and kills to the last MovMI. MovMI->addRegisterDefined(DestReg, TRI); if (KillSrc) MovMI->addRegisterKilled(SrcReg, TRI); }
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Modified = false; SmallSet<unsigned, 4> Defs; SmallSet<unsigned, 4> Uses; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; } Defs.clear(); Uses.clear(); TrackDefUses(MI, Defs, Uses, TRI); // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); // Add implicit use of ITSTATE to IT block instructions. MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); MachineInstr *LastITMI = MI; MachineBasicBlock::iterator InsertPos = MIB.getInstr(); ++MBBI; // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it // is set: skip the loop if (!restrictIT) { // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; MachineInstr *NMI = &*MBBI; MI = NMI; unsigned NPredReg = 0; ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); LastITMI = NMI; } else { if (NCC == ARMCC::AL && MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { --MBBI; MBB.remove(NMI); MBB.insert(InsertPos, NMI); ++NumMovedInsts; continue; } break; } TrackDefUses(NMI, Defs, Uses, TRI); --Pos; } } // Finalize IT mask. Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); // Last instruction in IT block kills ITSTATE. LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); // Finalize the bundle. MachineBasicBlock::instr_iterator LI = LastITMI; finalizeBundle(MBB, InsertPos.getInstrIterator(), std::next(LI)); Modified = true; ++NumITs; } return Modified; }
bool Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry) { if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) return false; unsigned Scale = 1; bool HasImmOffset = false; bool HasShift = false; bool HasOffReg = true; bool isLdStMul = false; unsigned Opc = Entry.NarrowOpc1; unsigned OpNum = 3; // First 'rest' of operands. uint8_t ImmLimit = Entry.Imm1Limit; switch (Entry.WideOpc) { default: llvm_unreachable("Unexpected Thumb2 load / store opcode!"); case ARM::t2LDRi12: case ARM::t2STRi12: if (MI->getOperand(1).getReg() == ARM::SP) { Opc = Entry.NarrowOpc2; ImmLimit = Entry.Imm2Limit; HasOffReg = false; } Scale = 4; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRBi12: case ARM::t2STRBi12: HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRHi12: case ARM::t2STRHi12: Scale = 2; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRs: case ARM::t2LDRBs: case ARM::t2LDRHs: case ARM::t2LDRSBs: case ARM::t2LDRSHs: case ARM::t2STRs: case ARM::t2STRBs: case ARM::t2STRHs: HasShift = true; OpNum = 4; break; case ARM::t2LDMIA: case ARM::t2LDMDB: { unsigned BaseReg = MI->getOperand(0).getReg(); if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA) return false; // For the non-writeback version (this one), the base register must be // one of the registers being loaded. bool isOK = false; for (unsigned i = 4; i < MI->getNumOperands(); ++i) { if (MI->getOperand(i).getReg() == BaseReg) { isOK = true; break; } } if (!isOK) return false; OpNum = 0; isLdStMul = true; break; } case ARM::t2LDMIA_RET: { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) return false; Opc = Entry.NarrowOpc2; // tPOP_RET OpNum = 2; isLdStMul = true; break; } case ARM::t2LDMIA_UPD: case ARM::t2LDMDB_UPD: case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { OpNum = 0; unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg == ARM::SP && (Entry.WideOpc == ARM::t2LDMIA_UPD || Entry.WideOpc == ARM::t2STMDB_UPD)) { Opc = Entry.NarrowOpc2; // tPOP or tPUSH OpNum = 2; } else if (!isARMLowRegister(BaseReg) || (Entry.WideOpc != ARM::t2LDMIA_UPD && Entry.WideOpc != ARM::t2STMIA_UPD)) { return false; } isLdStMul = true; break; } } unsigned OffsetReg = 0; bool OffsetKill = false; if (HasShift) { OffsetReg = MI->getOperand(2).getReg(); OffsetKill = MI->getOperand(2).isKill(); if (MI->getOperand(3).getImm()) // Thumb1 addressing mode doesn't support shift. return false; } unsigned OffsetImm = 0; if (HasImmOffset) { OffsetImm = MI->getOperand(2).getImm(); unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) // Make sure the immediate field fits. return false; } // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); if (!isLdStMul) { MIB.addOperand(MI->getOperand(0)); MIB.addOperand(MI->getOperand(1)); if (HasImmOffset) MIB.addImm(OffsetImm / Scale); assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); if (HasOffReg) MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); } // Transfer the rest of operands. for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.addOperand(MI->getOperand(OpNum)); // Transfer memoperands. MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++NumLdSts; return true; }
bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { CallingConv::ID CC = CLI.CallConv; bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; // const char *SymName = CLI.SymName; // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) return false; // Let SDISel handle vararg functions. if (IsVarArg) return false; // FIXME: Only handle *simple* calls for now. MVT RetVT; if (CLI.RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeLegal(CLI.RetTy, RetVT)) return false; for (auto Flag : CLI.OutFlags) if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal()) return false; // Set up the argument vectors. SmallVector<MVT, 16> OutVTs; OutVTs.reserve(CLI.OutVals.size()); for (auto *Val : CLI.OutVals) { MVT VT; if (!isTypeLegal(Val->getType(), VT) && !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) return false; // We don't handle vector parameters yet. if (VT.isVector() || VT.getSizeInBits() > 64) return false; OutVTs.push_back(VT); } Address Addr; if (!computeCallAddress(Callee, Addr)) return false; // Handle the arguments now that we've gotten them. unsigned NumBytes; if (!processCallArgs(CLI, OutVTs, NumBytes)) return false; // Issue the call. unsigned DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR), Mips::RA).addReg(Mips::T9); // Add implicit physical register uses to the call. for (auto Reg : CLI.OutRegs) MIB.addReg(Reg, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); CLI.Call = MIB; // Add implicit physical register uses to the call. for (auto Reg : CLI.OutRegs) MIB.addReg(Reg, RegState::Implicit); // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); CLI.Call = MIB; // Finish off the call including any return values. return finishCall(CLI, RetVT, NumBytes); }
bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { unsigned Opc = MI->getOpcode(); if (Opc == ARM::t2ADDri) { // If the source register is SP, try to reduce to tADDrSPi, otherwise // it's a normal reduce. if (MI->getOperand(1).getReg() != ARM::SP) { if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) return true; return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } // Try to reduce to tADDrSPi. unsigned Imm = MI->getOperand(2).getImm(); // The immediate must be in range, the destination register must be a low // reg, the predicate must be "always" and the condition flags must not // be being set. if (Imm & 3 || Imm > 1020) return false; if (!isARMLowRegister(MI->getOperand(0).getReg())) return false; if (MI->getOperand(3).getImm() != ARMCC::AL) return false; const MCInstrDesc &MCID = MI->getDesc(); if (MCID.hasOptionalDef() && MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) return false; MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(ARM::tADDrSPi)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. AddDefaultPred(MIB); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); MBB.erase_instr(MI); ++NumNarrows; return true; } if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; if (MI->mayLoad() || MI->mayStore()) return ReduceLoadStore(MBB, MI, Entry); switch (Opc) { default: break; case ARM::t2ADDSri: case ARM::t2ADDSrr: { unsigned PredReg = 0; if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { switch (Opc) { default: break; case ARM::t2ADDSri: { if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) return true; // fallthrough } case ARM::t2ADDSrr: return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } } break; } case ARM::t2RSBri: case ARM::t2RSBSri: case ARM::t2SXTB: case ARM::t2SXTH: case ARM::t2UXTB: case ARM::t2UXTH: if (MI->getOperand(2).getImm() == 0) return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); break; case ARM::t2MOVi16: // Can convert only 'pure' immediate operands, not immediates obtained as // globals' addresses. if (MI->getOperand(1).isImm()) return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); break; case ARM::t2CMPrr: { // Try to reduce to the lo-reg only version first. Why there are two // versions of the instruction is a mystery. // It would be nice to just have two entries in the master table that // are prioritized, but the table assumes a unique entry for each // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop)) return true; return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } } return false; }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { AArch64MachineFunctionInfo *FuncInfo = MF.getInfo<AArch64MachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); DebugLoc DL = MBBI->getDebugLoc(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned RetOpcode = MBBI->getOpcode(); // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust(); uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes; uint64_t ArgumentPopSize = 0; if (RetOpcode == AArch64::TC_RETURNdi || RetOpcode == AArch64::TC_RETURNxi) { MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(1); MachineInstrBuilder MIB; if (RetOpcode == AArch64::TC_RETURNdi) { MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm)); if (JumpTarget.isGlobal()) { MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); } else { assert(JumpTarget.isSymbol() && "unexpected tail call destination"); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } } else { assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg() && "Unexpected tail call"); MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx)); MIB.addReg(JumpTarget.getReg(), RegState::Kill); } // Add the extra operands onto the new tail call instruction even though // they're not used directly (so that liveness is tracked properly etc). for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) MIB->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TC_RETURN. MachineInstr *NewMI = prior(MBBI); MBB.erase(MBBI); MBBI = NewMI; // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = FuncInfo->getArgumentStackToRestore(); } assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0 && "refusing to adjust stack by misaligned amt"); // We may need to address callee-saved registers differently, so find out the // bound on the frame indices. const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); int MinCSFI = 0; int MaxCSFI = -1; if (CSI.size()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } // The "residual" stack update comes first from this direction and guarantees // that SP is NumInitialBytes below its value on function entry, either by a // direct update or restoring it from the frame pointer. if (NumInitialBytes + ArgumentPopSize != 0) { emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, NumInitialBytes + ArgumentPopSize); --MBBI; } // MBBI now points to the instruction just past the last callee-saved // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp" // otherwise). // Now we need to find out where to put the bulk of the stack adjustment MachineBasicBlock::iterator FirstEpilogue = MBBI; while (MBBI != MBB.begin()) { --MBBI; unsigned FrameOp; for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) { if (MBBI->getOperand(FrameOp).isFI()) break; } // If this instruction doesn't have a frame index we've reached the end of // the callee-save restoration. if (FrameOp == MBBI->getNumOperands()) break; // Likewise if it *is* a local reference, but not to a callee-saved object. int FrameIdx = MBBI->getOperand(FrameOp).getIndex(); if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI) break; FirstEpilogue = MBBI; } if (MF.getFrameInfo()->hasVarSizedObjects()) { int64_t StaticFrameBase; StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset()); emitRegUpdate(MBB, FirstEpilogue, DL, TII, AArch64::XSP, AArch64::X29, AArch64::NoRegister, StaticFrameBase); } else { emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes); } }
bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) // Don't issue movs with shifter operand for some CPUs unless we // are optimizing / minimizing for size. return false; unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); // t2MUL is "special". The tied source operand is second, not first. if (MI->getOpcode() == ARM::t2MUL) { unsigned Reg2 = MI->getOperand(2).getReg(); // Early exit if the regs aren't all low regs. if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) || !isARMLowRegister(Reg2)) return false; if (Reg0 != Reg2) { // If the other operand also isn't the same as the destination, we // can't reduce. if (Reg1 != Reg0) return false; // Try to commute the operands to make it a 2-address instruction. MachineInstr *CommutedMI = TII->commuteInstruction(MI); if (!CommutedMI) return false; } } else if (Reg0 != Reg1) { // Try to commute the operands to make it a 2-address instruction. unsigned CommOpIdx1, CommOpIdx2; if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) return false; MachineInstr *CommutedMI = TII->commuteInstruction(MI); if (!CommutedMI) return false; } if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) return false; if (Entry.Imm2Limit) { unsigned Imm = MI->getOperand(2).getImm(); unsigned Limit = (1 << Entry.Imm2Limit) - 1; if (Imm > Limit) return false; } else { unsigned Reg2 = MI->getOperand(2).getReg(); if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) return false; } // Check if it's possible / necessary to transfer the predicate. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; const MCInstrDesc &MCID = MI->getDesc(); if (MCID.hasOptionalDef()) { unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; } if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) return false; // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(MI, IsSelfLoop)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else AddNoT1CC(MIB); } // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; if (SkipPred && MCID.OpInfo[i].isPredicate()) continue; MIB.addOperand(MI->getOperand(i)); } // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++Num2Addrs; return true; }
void XCoreFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo *MMI = &MF.getMMI(); const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo(); const XCoreInstrInfo &TII = *MF.getSubtarget<XCoreSubtarget>().getInstrInfo(); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc dl; if (MFI->getMaxAlignment() > getStackAlignment()) report_fatal_error("emitPrologue unsupported alignment: " + Twine(MFI->getMaxAlignment())); const AttributeSet &PAL = MF.getFunction()->getAttributes(); if (PAL.hasAttrSomewhere(Attribute::Nest)) BuildMI(MBB, MBBI, dl, TII.get(XCore::LDWSP_ru6), XCore::R11).addImm(0); // FIX: Needs addMemOperand() but can't use getFixedStack() or getStack(). // Work out frame sizes. // We will adjust the SP in stages towards the final FrameSize. assert(MFI->getStackSize()%4 == 0 && "Misaligned frame size"); const int FrameSize = MFI->getStackSize() / 4; int Adjusted = 0; bool saveLR = XFI->hasLRSpillSlot(); bool UseENTSP = saveLR && FrameSize && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0); if (UseENTSP) saveLR = false; bool FP = hasFP(MF); bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(MF); if (UseENTSP) { // Allocate space on the stack at the same time as saving LR. Adjusted = (FrameSize > MaxImmU16) ? MaxImmU16 : FrameSize; int Opcode = isImmU6(Adjusted) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6; MBB.addLiveIn(XCore::LR); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opcode)); MIB.addImm(Adjusted); MIB->addRegisterKilled(XCore::LR, MF.getSubtarget().getRegisterInfo(), true); if (emitFrameMoves) { EmitDefCfaOffset(MBB, MBBI, dl, TII, MMI, Adjusted*4); unsigned DRegNum = MRI->getDwarfRegNum(XCore::LR, true); EmitCfiOffset(MBB, MBBI, dl, TII, MMI, DRegNum, 0); } } // If necessary, save LR and FP to the stack, as we EXTSP. SmallVector<StackSlotInfo,2> SpillList; GetSpillList(SpillList, MFI, XFI, saveLR, FP); // We want the nearest (negative) offsets first, so reverse list. std::reverse(SpillList.begin(), SpillList.end()); for (unsigned i = 0, e = SpillList.size(); i != e; ++i) { assert(SpillList[i].Offset % 4 == 0 && "Misaligned stack offset"); assert(SpillList[i].Offset <= 0 && "Unexpected positive stack offset"); int OffsetFromTop = - SpillList[i].Offset/4; IfNeededExtSP(MBB, MBBI, dl, TII, MMI, OffsetFromTop, Adjusted, FrameSize, emitFrameMoves); int Offset = Adjusted - OffsetFromTop; int Opcode = isImmU6(Offset) ? XCore::STWSP_ru6 : XCore::STWSP_lru6; MBB.addLiveIn(SpillList[i].Reg); BuildMI(MBB, MBBI, dl, TII.get(Opcode)) .addReg(SpillList[i].Reg, RegState::Kill) .addImm(Offset) .addMemOperand(getFrameIndexMMO(MBB, SpillList[i].FI, MachineMemOperand::MOStore)); if (emitFrameMoves) { unsigned DRegNum = MRI->getDwarfRegNum(SpillList[i].Reg, true); EmitCfiOffset(MBB, MBBI, dl, TII, MMI, DRegNum, SpillList[i].Offset); } } // Complete any remaining Stack adjustment. IfNeededExtSP(MBB, MBBI, dl, TII, MMI, FrameSize, Adjusted, FrameSize, emitFrameMoves); assert(Adjusted==FrameSize && "IfNeededExtSP has not completed adjustment"); if (FP) { // Set the FP from the SP. BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr).addImm(0); if (emitFrameMoves) EmitDefCfaRegister(MBB, MBBI, dl, TII, MMI, MRI->getDwarfRegNum(FramePtr, true)); } if (emitFrameMoves) { // Frame moves for callee saved. for (const auto &SpillLabel : XFI->getSpillLabels()) { MachineBasicBlock::iterator Pos = SpillLabel.first; ++Pos; const CalleeSavedInfo &CSI = SpillLabel.second; int Offset = MFI->getObjectOffset(CSI.getFrameIdx()); unsigned DRegNum = MRI->getDwarfRegNum(CSI.getReg(), true); EmitCfiOffset(MBB, Pos, dl, TII, MMI, DRegNum, Offset); } if (XFI->hasEHSpillSlot()) { // The unwinder requires stack slot & CFI offsets for the exception info. // We do not save/spill these registers. SmallVector<StackSlotInfo,2> SpillList; GetEHSpillList(SpillList, MFI, XFI, MF.getSubtarget().getTargetLowering()); assert(SpillList.size()==2 && "Unexpected SpillList size"); EmitCfiOffset(MBB, MBBI, dl, TII, MMI, MRI->getDwarfRegNum(SpillList[0].Reg, true), SpillList[0].Offset); EmitCfiOffset(MBB, MBBI, dl, TII, MMI, MRI->getDwarfRegNum(SpillList[1].Reg, true), SpillList[1].Offset); } } }
bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) // Don't issue movs with shifter operand for some CPUs unless we // are optimizing / minimizing for size. return false; unsigned Limit = ~0U; if (Entry.Imm1Limit) Limit = (1 << Entry.Imm1Limit) - 1; const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { if (MCID.OpInfo[i].isPredicate()) continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { unsigned Reg = MO.getReg(); if (!Reg || Reg == ARM::CPSR) continue; if (Entry.LowRegs1 && !isARMLowRegister(Reg)) return false; } else if (MO.isImm() && !MCID.OpInfo[i].isPredicate()) { if (((unsigned)MO.getImm()) > Limit) return false; } } // Check if it's possible / necessary to transfer the predicate. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; if (MCID.hasOptionalDef()) { unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; } if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) return false; // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(MI, IsSelfLoop)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else AddNoT1CC(MIB); } // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; if ((MCID.getOpcode() == ARM::t2RSBSri || MCID.getOpcode() == ARM::t2RSBri || MCID.getOpcode() == ARM::t2SXTB || MCID.getOpcode() == ARM::t2SXTH || MCID.getOpcode() == ARM::t2UXTB || MCID.getOpcode() == ARM::t2UXTH) && i == 2) // Skip the zero immediate operand, it's now implicit. continue; bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); if (SkipPred && isPred) continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) // Skip implicit def of CPSR. Either it's modeled as an optional // def now or it's already an implicit def on the new instruction. continue; MIB.addOperand(MO); } if (!MCID.isPredicable() && NewMCID.isPredicable()) AddDefaultPred(MIB); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++NumNarrows; return true; }
bool AArch64FrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); unsigned Count = CSI.size(); DebugLoc DL; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); if (MI != MBB.end()) DL = MI->getDebugLoc(); for (unsigned i = 0; i < Count; i += 2) { unsigned idx = Count - i - 2; unsigned Reg1 = CSI[idx].getReg(); unsigned Reg2 = CSI[idx + 1].getReg(); // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() && "Out of order callee saved regs!"); unsigned StrOpc; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); // Issue sequence of non-sp increment and pi sp spills for cs regs. The // first spill is a pre-increment that allocates the stack. // For example: // stp x22, x21, [sp, #-48]! // addImm(-6) // stp x20, x19, [sp, #16] // addImm(+2) // stp fp, lr, [sp, #32] // addImm(+4) // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rational and sequence for restores in epilog. if (AArch64::GPR64RegClass.contains(Reg1)) { assert(AArch64::GPR64RegClass.contains(Reg2) && "Expected GPR64 callee-saved register pair!"); // For first spill use pre-increment store. if (i == 0) StrOpc = AArch64::STPXpre; else StrOpc = AArch64::STPXi; } else if (AArch64::FPR64RegClass.contains(Reg1)) { assert(AArch64::FPR64RegClass.contains(Reg2) && "Expected FPR64 callee-saved register pair!"); // For first spill use pre-increment store. if (i == 0) StrOpc = AArch64::STPDpre; else StrOpc = AArch64::STPDi; } else llvm_unreachable("Unexpected callee saved register!"); DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", " << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx() << ", " << CSI[idx + 1].getFrameIdx() << ")\n"); // Compute offset: i = 0 => offset = -Count; // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc. const int Offset = (i == 0) ? -Count : i; assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for STP immediate"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre) MIB.addReg(AArch64::SP, RegState::Define); MBB.addLiveIn(Reg1); MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit .setMIFlag(MachineInstr::FrameSetup); } return true; }
void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); const XCoreInstrInfo &TII = *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo()); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); DebugLoc dl = MBBI->getDebugLoc(); // Work out frame sizes. // We will adjust the SP in stages towards the final FrameSize. int RemainingAdj = MFI->getStackSize(); assert(RemainingAdj%4 == 0 && "Misaligned frame size"); RemainingAdj /= 4; bool restoreLR = XFI->getUsesLR(); bool UseRETSP = restoreLR && RemainingAdj && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0); if (UseRETSP) restoreLR = false; bool FP = hasFP(MF); if (FP) // Restore the stack pointer. BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)).addReg(FramePtr); // If necessary, restore LR and FP from the stack, as we EXTSP. SmallVector<std::pair<unsigned,int>,2> SpillList; GetSpillList(SpillList, MFI, XFI, restoreLR, FP); unsigned i = SpillList.size(); while (i--) { unsigned SpilledReg = SpillList[i].first; int SpillOffset = SpillList[i].second; assert(SpillOffset % 4 == 0 && "Misaligned stack offset"); assert(SpillOffset <= 0 && "Unexpected positive stack offset"); int OffsetFromTop = - SpillOffset/4; IfNeededLDAWSP(MBB, MBBI, dl, TII, OffsetFromTop, RemainingAdj); int Offset = RemainingAdj - OffsetFromTop; int Opcode = isImmU6(Offset) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; BuildMI(MBB, MBBI, dl, TII.get(Opcode), SpilledReg).addImm(Offset); } if (RemainingAdj) { // Complete all but one of the remaining Stack adjustments. IfNeededLDAWSP(MBB, MBBI, dl, TII, 0, RemainingAdj); if (UseRETSP) { // Fold prologue into return instruction assert(MBBI->getOpcode() == XCore::RETSP_u6 || MBBI->getOpcode() == XCore::RETSP_lu6); int Opcode = isImmU6(RemainingAdj) ? XCore::RETSP_u6 : XCore::RETSP_lu6; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opcode)) .addImm(RemainingAdj); for (unsigned i = 3, e = MBBI->getNumOperands(); i < e; ++i) MIB->addOperand(MBBI->getOperand(i)); // copy any variadic operands MBB.erase(MBBI); // Erase the previous return instruction. } else { int Opcode = isImmU6(RemainingAdj) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(RemainingAdj); // Don't erase the return instruction. } } // else Don't erase the return instruction. }
void HexagonEarlyIfConversion::convert(const FlowPattern &FP) { MachineBasicBlock *TSB = 0, *FSB = 0; MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator(); assert(OldTI != FP.SplitB->end()); DebugLoc DL = OldTI->getDebugLoc(); if (FP.TrueB) { TSB = *FP.TrueB->succ_begin(); predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true); } if (FP.FalseB) { FSB = *FP.FalseB->succ_begin(); MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator(); predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false); } // Regenerate new terminators in the split block and update the successors. // First, remember any information that may be needed later and remove the // existing terminators/successors from the split block. MachineBasicBlock *SSB = 0; FP.SplitB->erase(OldTI, FP.SplitB->end()); while (FP.SplitB->succ_size() > 0) { MachineBasicBlock *T = *FP.SplitB->succ_begin(); // It's possible that the split block had a successor that is not a pre- // dicated block. This could only happen if there was only one block to // be predicated. Example: // split_b: // if (p) jump true_b // jump unrelated2_b // unrelated1_b: // ... // unrelated2_b: ; can have other predecessors, so it's not "false_b" // jump other_b // true_b: ; only reachable from split_b, can be predicated // ... // // Find this successor (SSB) if it exists. if (T != FP.TrueB && T != FP.FalseB) { assert(!SSB); SSB = T; } FP.SplitB->removeSuccessor(FP.SplitB->succ_begin()); } // Insert new branches and update the successors of the split block. This // may create unconditional branches to the layout successor, etc., but // that will be cleaned up later. For now, make sure that correct code is // generated. if (FP.JoinB) { assert(!SSB || SSB == FP.JoinB); BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) .addMBB(FP.JoinB); FP.SplitB->addSuccessor(FP.JoinB); } else { bool HasBranch = false; if (TSB) { BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt)) .addReg(FP.PredR) .addMBB(TSB); FP.SplitB->addSuccessor(TSB); HasBranch = true; } if (FSB) { const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump) : TII->get(Hexagon::J2_jumpf); MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D); if (!HasBranch) MIB.addReg(FP.PredR); MIB.addMBB(FSB); FP.SplitB->addSuccessor(FSB); } if (SSB) { // This cannot happen if both TSB and FSB are set. [TF]SB are the // successor blocks of the TrueB and FalseB (or null of the TrueB // or FalseB block is null). SSB is the potential successor block // of the SplitB that is neither TrueB nor FalseB. BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) .addMBB(SSB); FP.SplitB->addSuccessor(SSB); } } // What is left to do is to update the PHI nodes that could have entries // referring to predicated blocks. if (FP.JoinB) { updatePhiNodes(FP.JoinB, FP); } else { if (TSB) updatePhiNodes(TSB, FP); if (FSB) updatePhiNodes(FSB, FP); // Nothing to update in SSB, since SSB's predecessors haven't changed. } }
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned SuperReg = MI->getOperand(0).getReg(); bool IsKill = MI->getOperand(0).isKill(); // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { if (SuperReg == AMDGPU::M0) { assert(NumSubRegs == 1); unsigned CopyM0 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), CopyM0) .addReg(SuperReg, getKillRegState(IsKill)); // The real spill now kills the temp copy. SubReg = SuperReg = CopyM0; IsKill = true; } BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg, getKillRegState(IsKill)) .addImm(Spill.Lane); // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. } else { // Spill SGPR to a frame index. // FIXME we should use S_STORE_DWORD here for VI. MachineInstrBuilder Mov = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addReg(SubReg, SubKillState); // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? if (NumSubRegs > 1) { // The last implicit use of the SuperReg carries the "Kill" flag. unsigned SuperKillState = 0; if (i + 1 == e) SuperKillState |= getKillRegState(IsKill); Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); } unsigned Size = FrameInfo.getObjectSize(Index); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) .addReg(TmpReg, RegState::Kill) // src .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srrsrc .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); } } MI->eraseFromParent(); MFI->addToSpilledSGPRs(NumSubRegs); }