/// EmitSpecialNode - Generate machine code for a target-independent node and /// needed dependencies. void InstrEmitter:: EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG Node->dump(); #endif llvm_unreachable("This target-independent node should have been selected!"); case ISD::EntryToken: llvm_unreachable("EntryToken should have been excluded from the schedule!"); case ISD::MERGE_VALUES: case ISD::TokenFactor: // fall thru break; case ISD::CopyToReg: { unsigned SrcReg; SDValue SrcVal = Node->getOperand(2); if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) SrcReg = R->getReg(); else SrcReg = getVR(SrcVal, VRBaseMap); unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), DestReg).addReg(SrcReg); break; } case ISD::CopyFromReg: { unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); break; } case ISD::EH_LABEL: case ISD::ANNOTATION_LABEL: { unsigned Opc = (Node->getOpcode() == ISD::EH_LABEL) ? TargetOpcode::EH_LABEL : TargetOpcode::ANNOTATION_LABEL; MCSymbol *S = cast<LabelSDNode>(Node)->getLabel(); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(Opc)).addSym(S); break; } case ISD::LIFETIME_START: case ISD::LIFETIME_END: { unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1)); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) .addFrameIndex(FI->getIndex()); break; } case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) --NumOps; // Ignore the glue operand. // Create the inline asm machine instruction. MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::INLINEASM)); // Add the asm string as an external symbol operand. SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MIB.addExternalSymbol(AsmStr); // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore // bits. int64_t ExtraInfo = cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> getZExtValue(); MIB.addImm(ExtraInfo); // Remember to operand index of the group flags. SmallVector<unsigned, 8> GroupIdx; // Remember registers that are part of early-clobber defs. SmallVector<unsigned, 8> ECRegs; // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); GroupIdx.push_back(MIB->getNumOperands()); MIB.addImm(Flags); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. MIB.addReg(Reg, RegState::Define | getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); ECRegs.push_back(Reg); } break; case InlineAsm::Kind_RegUse: // Use of register. case InlineAsm::Kind_Imm: // Immediate. case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Manually set isTied bits. if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { unsigned DefGroup = 0; if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { unsigned DefIdx = GroupIdx[DefGroup] + 1; unsigned UseIdx = GroupIdx.back() + 1; for (unsigned j = 0; j != NumVals; ++j) MIB->tieOperands(DefIdx + j, UseIdx + j); } } break; } } // GCC inline assembly allows input operands to also be early-clobber // output operands (so long as the operand is written only after it's // used), but this does not match the semantics of our early-clobber flag. // If an early-clobber operand register is also an input operand register, // then remove the early-clobber flag. for (unsigned Reg : ECRegs) { if (MIB->readsRegister(Reg, TRI)) { MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI); assert(MO && "No def operand for clobbered register?"); MO->setIsEarlyClobber(false); } } // Get the mdnode from the asm if it exists and add it to the instruction. SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); if (MD) MIB.addMetadata(MD); MBB->insert(InsertPos, MIB); break; } } }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { AArch64MachineFunctionInfo *FuncInfo = MF.getInfo<AArch64MachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); DebugLoc DL = MBBI->getDebugLoc(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned RetOpcode = MBBI->getOpcode(); // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust(); uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes; uint64_t ArgumentPopSize = 0; if (RetOpcode == AArch64::TC_RETURNdi || RetOpcode == AArch64::TC_RETURNxi) { MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(1); MachineInstrBuilder MIB; if (RetOpcode == AArch64::TC_RETURNdi) { MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm)); if (JumpTarget.isGlobal()) { MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); } else { assert(JumpTarget.isSymbol() && "unexpected tail call destination"); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } } else { assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg() && "Unexpected tail call"); MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx)); MIB.addReg(JumpTarget.getReg(), RegState::Kill); } // Add the extra operands onto the new tail call instruction even though // they're not used directly (so that liveness is tracked properly etc). for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) MIB->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TC_RETURN. MachineInstr *NewMI = std::prev(MBBI); MBB.erase(MBBI); MBBI = NewMI; // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = FuncInfo->getArgumentStackToRestore(); } assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0 && "refusing to adjust stack by misaligned amt"); // We may need to address callee-saved registers differently, so find out the // bound on the frame indices. const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); int MinCSFI = 0; int MaxCSFI = -1; if (CSI.size()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } // The "residual" stack update comes first from this direction and guarantees // that SP is NumInitialBytes below its value on function entry, either by a // direct update or restoring it from the frame pointer. if (NumInitialBytes + ArgumentPopSize != 0) { emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, NumInitialBytes + ArgumentPopSize); --MBBI; } // MBBI now points to the instruction just past the last callee-saved // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp" // otherwise). // Now we need to find out where to put the bulk of the stack adjustment MachineBasicBlock::iterator FirstEpilogue = MBBI; while (MBBI != MBB.begin()) { --MBBI; unsigned FrameOp; for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) { if (MBBI->getOperand(FrameOp).isFI()) break; } // If this instruction doesn't have a frame index we've reached the end of // the callee-save restoration. if (FrameOp == MBBI->getNumOperands()) break; // Likewise if it *is* a local reference, but not to a callee-saved object. int FrameIdx = MBBI->getOperand(FrameOp).getIndex(); if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI) break; FirstEpilogue = MBBI; } if (MF.getFrameInfo()->hasVarSizedObjects()) { int64_t StaticFrameBase; StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset()); emitRegUpdate(MBB, FirstEpilogue, DL, TII, AArch64::XSP, AArch64::X29, AArch64::NoRegister, StaticFrameBase); } else { emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes); } }
/// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { MIB.addImm(C->getSExtValue()); } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { MIB.addFPImm(F->getConstantFPValue()); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { unsigned VReg = R->getReg(); MVT OpVT = Op.getSimpleValueType(); const TargetRegisterClass *OpRC = TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr; const TargetRegisterClass *IIRC = II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) : nullptr; if (OpRC && IIRC && OpRC != IIRC && TargetRegisterInfo::isVirtualRegister(VReg)) { unsigned NewVReg = MRI->createVirtualRegister(IIRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; } // Turn additional physreg operands into implicit uses on non-variadic // instructions. This is used by call and return instructions passing // arguments in registers. bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); MIB.addReg(VReg, getImplRegState(Imp)); } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { MIB.addRegMask(RM->getRegMask()); } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(), TGA->getTargetFlags()); } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) { MIB.addMBB(BBNode->getBasicBlock()); } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { MIB.addFrameIndex(FI->getIndex()); } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) { MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags()); } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { int Offset = CP->getOffset(); unsigned Align = CP->getAlignment(); Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { Align = MF->getDataLayout().getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! Align = MF->getDataLayout().getTypeAllocSize(Type); } } unsigned Idx; MachineConstantPool *MCP = MF->getConstantPool(); if (CP->isMachineConstantPoolEntry()) Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); else Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags()); } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags()); } else if (auto *SymNode = dyn_cast<MCSymbolSDNode>(Op)) { MIB.addSym(SymNode->getMCSymbol()); } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { MIB.addBlockAddress(BA->getBlockAddress(), BA->getOffset(), BA->getTargetFlags()); } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags()); } else { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && "Chain and glue operands should occur at end of operand list!"); AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } }
/// If \p MBBI is a pseudo instruction, this method expands /// it to the corresponding (sequence of) actual instruction(s). /// \returns true if \p MBBI has been expanded. bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); switch (Opcode) { default: return false; case X86::TCRETURNdi: case X86::TCRETURNri: case X86::TCRETURNmi: case X86::TCRETURNdi64: case X86::TCRETURNri64: case X86::TCRETURNmi64: { bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); assert(StackAdjust.isImm() && "Expecting immediate value."); // Adjust stack pointer. int StackAdj = StackAdjust.getImm(); if (StackAdj) { // Check for possible merge with preceding ADD instruction. StackAdj += X86FL->mergeSPUpdates(MBB, MBBI, true); X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true); } // Jump to label or value in register. bool IsWin64 = STI->isTargetWin64(); if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) { unsigned Op = (Opcode == X86::TCRETURNdi) ? X86::TAILJMPd : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); else { assert(JumpTarget.isSymbol()); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { unsigned Op = (Opcode == X86::TCRETURNmi) ? X86::TAILJMPm : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); for (unsigned i = 0; i != 5; ++i) MIB.addOperand(MBBI->getOperand(i)); } else if (Opcode == X86::TCRETURNri64) { BuildMI(MBB, MBBI, DL, TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) .addReg(JumpTarget.getReg(), RegState::Kill); } else { BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr)) .addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = std::prev(MBBI); NewMI->copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); return true; } case X86::EH_RETURN: case X86::EH_RETURN64: { MachineOperand &DestAddr = MBBI->getOperand(0); assert(DestAddr.isReg() && "Offset should be in register!"); const bool Uses64BitFramePtr = STI->isTarget64BitLP64() || STI->isTargetNaCl64(); unsigned StackPtr = TRI->getStackRegister(); BuildMI(MBB, MBBI, DL, TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr) .addReg(DestAddr.getReg()); // The EH_RETURN pseudo is really removed during the MC Lowering. return true; } case X86::IRET: { // Adjust stack to erase error code int64_t StackAdj = MBBI->getOperand(0).getImm(); X86FL->emitSPUpdate(MBB, MBBI, StackAdj, true); // Replace pseudo with machine iret BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::IRET64 : X86::IRET32)); MBB.erase(MBBI); return true; } case X86::RET: { // Adjust stack to erase error code int64_t StackAdj = MBBI->getOperand(0).getImm(); MachineInstrBuilder MIB; if (StackAdj == 0) { MIB = BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::RETQ : X86::RETL)); } else if (isUInt<16>(StackAdj)) { MIB = BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::RETIQ : X86::RETIL)) .addImm(StackAdj); } else { assert(!STI->is64Bit() && "shouldn't need to do this for x86_64 targets!"); // A ret can only handle immediates as big as 2**16-1. If we need to pop // off bytes before the return address, we must do it manually. BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define); X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true); BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX); MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL)); } for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I) MIB.addOperand(MBBI->getOperand(I)); MBB.erase(MBBI); return true; } case X86::EH_RESTORE: { // Restore ESP and EBP, and optionally ESI if required. bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality( MBB.getParent()->getFunction()->getPersonalityFn())); X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH); MBBI->eraseFromParent(); return true; } } llvm_unreachable("Previous switch has a fallthrough?"); }
void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->getDesc().isReturn() && "Can only insert epilog into returning blocks"); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); if (!isCSRestore(MBBI, TII, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot or target is ELF and the function has FP. if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; if (NumBytes) { if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, ARMCC::AL, 0, TII); else { // It's not possible to restore SP from FP in a single instruction. // For Darwin, this looks like: // mov sp, r7 // sub sp, #24 // This is bad, if an interrupt is taken after the mov, sp is in an // inconsistent state. // Use the first callee-saved register as a scratch register. assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4)); } } else { // Thumb2 or ARM. if (isARM) BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); else AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr)); } } else if (NumBytes) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. if (AFI->getDPRCalleeSavedAreaSize()) { MBBI++; // Since vpop register list cannot have gaps, there may be multiple vpop // instructions in the epilogue. while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) MBBI++; } if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { // Tail call return: adjust the stack pointer and jump to callee. MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd) : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); else { assert(JumpTarget.isSymbol()); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } // Add the default predicate in Thumb mode. if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); } else if (RetOpcode == ARM::TCRETURNri) { BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); } else if (RetOpcode == ARM::TCRETURNriND) { BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)). addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); MBBI = NewMI; } if (VARegSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); }
void X86FrameInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); const X86InstrInfo &TII = *TM.getInstrInfo(); MachineBasicBlock::iterator MBBI = prior(MBB.end()); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); bool Is64Bit = STI.is64Bit(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); switch (RetOpcode) { default: llvm_unreachable("Can only insert epilog into returning blocks"); case X86::RET: case X86::RETI: case X86::TCRETURNdi: case X86::TCRETURNri: case X86::TCRETURNmi: case X86::TCRETURNdi64: case X86::TCRETURNri64: case X86::TCRETURNmi64: case X86::EH_RETURN: case X86::EH_RETURN64: break; // These are ok } // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); uint64_t MaxAlign = MFI->getMaxAlignment(); unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; // If we're forcing a stack realignment we can't rely on just the frame // info, we need to know the ABI stack alignment as well in case we // have a call out. Otherwise just make sure we have some alignment - we'll // go with the minimum. if (ForceStackAlign) { if (MFI->hasCalls()) MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; else MaxAlign = MaxAlign ? MaxAlign : 4; } if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; if (RegInfo->needsStackRealignment(MF)) FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; NumBytes = FrameSize - CSSize; // Pop EBP. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); } else { NumBytes = StackSize - CSSize; } // Skip the callee-saved pop instructions. MachineBasicBlock::iterator LastCSPop = MBBI; while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); if (Opc != X86::POP32r && Opc != X86::POP64r && !PI->getDesc().isTerminator()) break; --MBBI; } DL = MBBI->getDebugLoc(); // If there is an ADD32ri or SUB32ri of ESP immediately before this // instruction, merge the two instructions. if (NumBytes || MFI->hasVarSizedObjects()) mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); // If dynamic alloca is used, then reset esp to point to the last callee-saved // slot before popping them off! Same applies for the case, when stack was // realigned. if (RegInfo->needsStackRealignment(MF)) { // We cannot use LEA here, because stack pointer was realigned. We need to // deallocate local frame back. if (CSSize) { emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); MBBI = prior(LastCSPop); } BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr).addReg(FramePtr); } else if (MFI->hasVarSizedObjects()) { if (CSSize) { unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), FramePtr, false, -CSSize); MBB.insert(MBBI, MI); } else { BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) .addReg(FramePtr); } } else if (NumBytes) { // Adjust stack pointer back: ESP += numbytes. emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); } // We're returning from function via eh_return. if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { MBBI = prior(MBB.end()); MachineOperand &DestAddr = MBBI->getOperand(0); assert(DestAddr.isReg() && "Offset should be in register!"); BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr).addReg(DestAddr.getReg()); } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || RetOpcode == X86::TCRETURNmi64) { bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; // Tail call return: adjust the stack pointer and jump to callee. MBBI = prior(MBB.end()); MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); assert(StackAdjust.isImm() && "Expecting immediate value."); // Adjust stack pointer. int StackAdj = StackAdjust.getImm(); int MaxTCDelta = X86FI->getTCReturnAddrDelta(); int Offset = 0; assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); // Incoporate the retaddr area. Offset = StackAdj-MaxTCDelta; assert(Offset >= 0 && "Offset should never be negative"); if (Offset) { // Check for possible merge with preceeding ADD instruction. Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII); } // Jump to label or value in register. if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) ? X86::TAILJMPd : X86::TAILJMPd64)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); else { assert(JumpTarget.isSymbol()); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) ? X86::TAILJMPm : X86::TAILJMPm64)); for (unsigned i = 0; i != 5; ++i) MIB.addOperand(MBBI->getOperand(i)); } else if (RetOpcode == X86::TCRETURNri64) { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). addReg(JumpTarget.getReg(), RegState::Kill); } else { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && (X86FI->getTCReturnAddrDelta() < 0)) { // Add the return addr area delta back since we are not tail calling. int delta = -1*X86FI->getTCReturnAddrDelta(); MBBI = prior(MBB.end()); // Check for possible merge with preceeding ADD instruction. delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII); } }
/// If \p MBBI is a pseudo instruction, this method expands /// it to the corresponding (sequence of) actual instruction(s). /// \returns true if \p MBBI has been expanded. bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); switch (Opcode) { default: return false; case X86::TCRETURNdi: case X86::TCRETURNdicc: case X86::TCRETURNri: case X86::TCRETURNmi: case X86::TCRETURNdi64: case X86::TCRETURNdi64cc: case X86::TCRETURNri64: case X86::TCRETURNmi64: { bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); assert(StackAdjust.isImm() && "Expecting immediate value."); // Adjust stack pointer. int StackAdj = StackAdjust.getImm(); int MaxTCDelta = X86FI->getTCReturnAddrDelta(); int Offset = 0; assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); // Incoporate the retaddr area. Offset = StackAdj - MaxTCDelta; assert(Offset >= 0 && "Offset should never be negative"); if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) { assert(Offset == 0 && "Conditional tail call cannot adjust the stack."); } if (Offset) { // Check for possible merge with preceding ADD instruction. Offset += X86FL->mergeSPUpdates(MBB, MBBI, true); X86FL->emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true); } // Jump to label or value in register. bool IsWin64 = STI->isTargetWin64(); if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) { unsigned Op; switch (Opcode) { case X86::TCRETURNdi: Op = X86::TAILJMPd; break; case X86::TCRETURNdicc: Op = X86::TAILJMPd_CC; break; case X86::TCRETURNdi64cc: assert(!IsWin64 && "Conditional tail calls confuse the Win64 unwinder."); // TODO: We could do it for Win64 "leaf" functions though; PR30337. Op = X86::TAILJMPd64_CC; break; default: // Note: Win64 uses REX prefixes indirect jumps out of functions, but // not direct ones. Op = X86::TAILJMPd64; break; } MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); if (JumpTarget.isGlobal()) { MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); } else { assert(JumpTarget.isSymbol()); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) { MIB.addImm(MBBI->getOperand(2).getImm()); } } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { unsigned Op = (Opcode == X86::TCRETURNmi) ? X86::TAILJMPm : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); for (unsigned i = 0; i != 5; ++i) MIB.add(MBBI->getOperand(i)); } else if (Opcode == X86::TCRETURNri64) { BuildMI(MBB, MBBI, DL, TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) .addReg(JumpTarget.getReg(), RegState::Kill); } else { BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr)) .addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr &NewMI = *std::prev(MBBI); NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); return true; } case X86::EH_RETURN: case X86::EH_RETURN64: { MachineOperand &DestAddr = MBBI->getOperand(0); assert(DestAddr.isReg() && "Offset should be in register!"); const bool Uses64BitFramePtr = STI->isTarget64BitLP64() || STI->isTargetNaCl64(); unsigned StackPtr = TRI->getStackRegister(); BuildMI(MBB, MBBI, DL, TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr) .addReg(DestAddr.getReg()); // The EH_RETURN pseudo is really removed during the MC Lowering. return true; } case X86::IRET: { // Adjust stack to erase error code int64_t StackAdj = MBBI->getOperand(0).getImm(); X86FL->emitSPUpdate(MBB, MBBI, StackAdj, true); // Replace pseudo with machine iret BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::IRET64 : X86::IRET32)); MBB.erase(MBBI); return true; } case X86::RET: { // Adjust stack to erase error code int64_t StackAdj = MBBI->getOperand(0).getImm(); MachineInstrBuilder MIB; if (StackAdj == 0) { MIB = BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::RETQ : X86::RETL)); } else if (isUInt<16>(StackAdj)) { MIB = BuildMI(MBB, MBBI, DL, TII->get(STI->is64Bit() ? X86::RETIQ : X86::RETIL)) .addImm(StackAdj); } else { assert(!STI->is64Bit() && "shouldn't need to do this for x86_64 targets!"); // A ret can only handle immediates as big as 2**16-1. If we need to pop // off bytes before the return address, we must do it manually. BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define); X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true); BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX); MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL)); } for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I) MIB.add(MBBI->getOperand(I)); MBB.erase(MBBI); return true; } case X86::EH_RESTORE: { // Restore ESP and EBP, and optionally ESI if required. bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality( MBB.getParent()->getFunction()->getPersonalityFn())); X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH); MBBI->eraseFromParent(); return true; } case X86::LCMPXCHG8B_SAVE_EBX: case X86::LCMPXCHG16B_SAVE_RBX: { // Perform the following transformation. // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx // => // [E|R]BX = InArg // actualcmpxchg Addr // [E|R]BX = SaveRbx const MachineOperand &InArg = MBBI->getOperand(6); unsigned SaveRbx = MBBI->getOperand(7).getReg(); unsigned ActualInArg = Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX; // Copy the input argument of the pseudo into the argument of the // actual instruction. TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, InArg.getReg(), InArg.isKill()); // Create the actual instruction. unsigned ActualOpc = Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::LCMPXCHG8B : X86::LCMPXCHG16B; MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(ActualOpc)); // Copy the operands related to the address. for (unsigned Idx = 1; Idx < 6; ++Idx) NewInstr->addOperand(MBBI->getOperand(Idx)); // Finally, restore the value of RBX. TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, SaveRbx, /*SrcIsKill*/ true); // Delete the pseudo. MBBI->eraseFromParent(); return true; } } llvm_unreachable("Previous switch has a fallthrough?"); }