bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); switch(MI->getDesc().getOpcode()) { default: return false; case Mips::RetRA: expandRetRA(MBB, MI, Mips::RET); break; case Mips::PseudoCVT_S_W: expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false); break; case Mips::PseudoCVT_D32_W: expandCvtFPInt(MBB, MI, Mips::CVT_D32_W, Mips::MTC1, false); break; case Mips::PseudoCVT_S_L: expandCvtFPInt(MBB, MI, Mips::CVT_S_L, Mips::DMTC1, true); break; case Mips::PseudoCVT_D64_W: expandCvtFPInt(MBB, MI, Mips::CVT_D64_W, Mips::MTC1, true); break; case Mips::PseudoCVT_D64_L: expandCvtFPInt(MBB, MI, Mips::CVT_D64_L, Mips::DMTC1, true); break; case Mips::BuildPairF64: expandBuildPairF64(MBB, MI, false); break; case Mips::BuildPairF64_64: expandBuildPairF64(MBB, MI, true); break; case Mips::ExtractElementF64: expandExtractElementF64(MBB, MI, false); break; case Mips::ExtractElementF64_64: expandExtractElementF64(MBB, MI, true); break; case Mips::PseudoLDC1: expandDPLoadStore(MBB, MI, Mips::LDC1, Mips::LWC1); break; case Mips::PseudoSDC1: expandDPLoadStore(MBB, MI, Mips::SDC1, Mips::SWC1); break; case Mips::MIPSeh_return32: case Mips::MIPSeh_return64: expandEhReturn(MBB, MI); break; } MBB.erase(MI); return true; }
/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; bool AdjustsStack = MFI->adjustsStack(); // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); // Early exit for targets which have no call frame setup/destroy pseudo // instructions. if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) return; std::vector<MachineBasicBlock::iterator> FrameSDOps; for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" " instructions should have a single immediate argument!"); unsigned Size = I->getOperand(0).getImm(); if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; AdjustsStack = true; FrameSDOps.push_back(I); } else if (I->isInlineAsm()) { // Some inline asm's need a stack frame, as indicated by operand 1. unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); if (ExtraInfo & InlineAsm::Extra_IsAlignStack) AdjustsStack = true; } MFI->setAdjustsStack(AdjustsStack); MFI->setMaxCallFrameSize(MaxCallFrameSize); for (std::vector<MachineBasicBlock::iterator>::iterator i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { MachineBasicBlock::iterator I = *i; // If call frames are not being included as part of the stack frame, and // the target doesn't indicate otherwise, remove the call frame pseudos // here. The sub/add sp instruction pairs are still inserted, but we don't // need to track the SP adjustment for frame index elimination. if (TFI->canSimplifyCallFramePseudos(Fn)) RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } }
// Cpu0InstrInfo::expandPostRAPseudo /// Expand Pseudo instructions into real backend instructions bool Cpu0InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); switch(MI->getDesc().getOpcode()) { default: return false; case Cpu0::RetLR: ExpandRetLR(MBB, MI, Cpu0::RET); break; } MBB.erase(MI); return true; }
void Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, MachineBasicBlock *NewDest) const { MachineBasicBlock *MBB = Tail->getParent(); ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); if (!AFI->hasITBlocks()) { TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); return; } // If the first instruction of Tail is predicated, we may have to update // the IT instruction. unsigned PredReg = 0; ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg); MachineBasicBlock::iterator MBBI = Tail; if (CC != ARMCC::AL) // Expecting at least the t2IT instruction before it. --MBBI; // Actually replace the tail. TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); // Fix up IT. if (CC != ARMCC::AL) { MachineBasicBlock::iterator E = MBB->begin(); unsigned Count = 4; // At most 4 instructions in an IT block. while (Count && MBBI != E) { if (MBBI->isDebugValue()) { --MBBI; continue; } if (MBBI->getOpcode() == ARM::t2IT) { unsigned Mask = MBBI->getOperand(1).getImm(); if (Count == 4) MBBI->eraseFromParent(); else { unsigned MaskOn = 1 << Count; unsigned MaskOff = ~(MaskOn - 1); MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn); } return; } --MBBI; --Count; } // Ctrl flow can reach here if branch folding is run before IT block // formation pass. } }
bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); switch(MI->getDesc().getOpcode()) { default: return false; case Mips::RetRA16: ExpandRetRA16(MBB, MI, Mips::JrRa16); break; } MBB.erase(MI); return true; }
MachineBasicBlock::iterator insertLiterals( MachineBasicBlock::iterator InsertPos, const std::vector<unsigned> &Literals) const { MachineBasicBlock *MBB = InsertPos->getParent(); for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { unsigned LiteralPair0 = Literals[i]; unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), TII->get(AMDGPU::LITERALS)) .addImm(LiteralPair0) .addImm(LiteralPair1); } return InsertPos; }
// Split ADJDYNALLOC instruction MI. void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction &MF = *MBB->getParent(); MachineFrameInfo *MFFrame = MF.getFrameInfo(); MachineOperand &OffsetMO = MI->getOperand(2); uint64_t Offset = (MFFrame->getMaxCallFrameSize() + SystemZMC::CallFrameSize + OffsetMO.getImm()); unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset); assert(NewOpcode && "No support for huge argument lists yet"); MI->setDesc(get(NewOpcode)); OffsetMO.setImm(Offset); }
/// insertSpill - Insert a spill of NewVReg after MI. void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); MachineInstrSpan MIS(MI); TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, MRI.getRegClass(NewVReg), &TRI); LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, "spill")); ++NumSpills; }
/// Calculate the MaxCallFrameSize and AdjustsStack /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallFrameInfo(MachineFunction &MF) { const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned MaxCallFrameSize = 0; bool AdjustsStack = MFI.adjustsStack(); // Get the function call frame set-up and tear-down instruction opcode unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); // Early exit for targets which have no call frame setup/destroy pseudo // instructions. if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u) return; std::vector<MachineBasicBlock::iterator> FrameSDOps; for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) if (TII.isFrameInstr(*I)) { unsigned Size = TII.getFrameSize(*I); if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; AdjustsStack = true; FrameSDOps.push_back(I); } else if (I->isInlineAsm()) { // Some inline asm's need a stack frame, as indicated by operand 1. unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); if (ExtraInfo & InlineAsm::Extra_IsAlignStack) AdjustsStack = true; } assert(!MFI.isMaxCallFrameSizeComputed() || (MFI.getMaxCallFrameSize() == MaxCallFrameSize && MFI.adjustsStack() == AdjustsStack)); MFI.setAdjustsStack(AdjustsStack); MFI.setMaxCallFrameSize(MaxCallFrameSize); for (std::vector<MachineBasicBlock::iterator>::iterator i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { MachineBasicBlock::iterator I = *i; // If call frames are not being included as part of the stack frame, and // the target doesn't indicate otherwise, remove the call frame pseudos // here. The sub/add sp instruction pairs are still inserted, but we don't // need to track the SP adjustment for frame index elimination. if (TFI->canSimplifyCallFramePseudos(MF)) TFI->eliminateCallFramePseudoInstr(MF, *I->getParent(), I); } }
void InlineSpiller::insertReload(unsigned NewVReg, SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); MachineInstrSpan MIS(MI); TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, MRI.getRegClass(NewVReg), &TRI); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload", NewVReg)); ++NumReloads; }
void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt, unsigned DoubleDestReg, MachineOperand &HiOperand, MachineOperand &LoOperand) { unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); unsigned HiReg = HiOperand.getReg(); DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); // Handle global. if (LoOperand.isGlobal()) { BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) .addReg(HiReg, HiRegKillFlag) .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), LoOperand.getTargetFlags()); return; } // Handle block addresses. if (LoOperand.isBlockAddress()) { BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) .addReg(HiReg, HiRegKillFlag) .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(), LoOperand.getTargetFlags()); return; } // Handle jump tables. if (LoOperand.isJTI()) { BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) .addReg(HiOperand.getReg(), HiRegKillFlag) .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags()); return; } // Handle constant pools. if (LoOperand.isCPI()) { BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) .addReg(HiOperand.getReg(), HiRegKillFlag) .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(), LoOperand.getTargetFlags()); return; } // Insert new combine instruction. // DoubleRegDest = combine HiReg, #LoImm BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) .addReg(HiReg, HiRegKillFlag) .addImm(LoOperand.getImm()); }
void HexagonCopyToCombine::emitConst64(MachineBasicBlock::iterator &InsertPt, unsigned DoubleDestReg, MachineOperand &HiOperand, MachineOperand &LoOperand) { DEBUG(dbgs() << "Found a CONST64\n"); DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); assert(LoOperand.isImm() && HiOperand.isImm() && "Both operands must be immediate"); int64_t V = HiOperand.getImm(); V = (V << 32) | (0x0ffffffffLL & LoOperand.getImm()); BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::CONST64), DoubleDestReg) .addImm(V); }
bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const { MachineBasicBlock *MBB = MI->getParent(); int OffsetOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::addr); // addr is a custom operand with multiple MI operands, and only the // first MI operand is given a name. int RegOpIdx = OffsetOpIdx + 1; int ChanOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::chan); if (isRegisterLoad(*MI)) { int DstOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); unsigned RegIndex = MI->getOperand(RegOpIdx).getImm(); unsigned Channel = MI->getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg(); if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(), getIndirectAddrRegClass()->getRegister(Address)); } else { buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(), Address, OffsetReg); } } else if (isRegisterStore(*MI)) { int ValOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::val); AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); unsigned RegIndex = MI->getOperand(RegOpIdx).getImm(); unsigned Channel = MI->getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg(); if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), MI->getOperand(ValOpIdx).getReg()); } else { buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(), calculateIndirectAddress(RegIndex, Channel), OffsetReg); } } else { return false; } MBB->erase(MI); return true; }
/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); unsigned MaxCallFrameSize = 0; bool HasCalls = false; // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); // Early exit for targets which have no call frame setup/destroy pseudo // instructions. if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) return; std::vector<MachineBasicBlock::iterator> FrameSDOps; for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" " instructions should have a single immediate argument!"); unsigned Size = I->getOperand(0).getImm(); if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; HasCalls = true; FrameSDOps.push_back(I); } else if (I->getOpcode() == TargetInstrInfo::INLINEASM) { // An InlineAsm might be a call; assume it is to get the stack frame // aligned correctly for calls. HasCalls = true; } MachineFrameInfo *FFI = Fn.getFrameInfo(); FFI->setHasCalls(HasCalls); FFI->setMaxCallFrameSize(MaxCallFrameSize); for (std::vector<MachineBasicBlock::iterator>::iterator i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { MachineBasicBlock::iterator I = *i; // If call frames are not being included as part of the stack frame, and // there is no dynamic allocation (therefore referencing frame slots off // sp), leave the pseudo ops alone. We'll eliminate them later. if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn)) RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } }
bool MipsInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); switch(MI->getDesc().getOpcode()) { default: return false; case Mips::BuildPairF64: ExpandBuildPairF64(MBB, MI); break; case Mips::ExtractElementF64: ExpandExtractElementF64(MBB, MI); break; } MBB.erase(MI); return true; }
/// insertReload - Insert a reload of NewLI.reg before MI. void InlineSpiller::insertReload(LiveInterval &NewLI, SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); // Some (out-of-tree) targets have EC reload instructions. if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg)) if (MO->isEarlyClobber()) LoadIdx = LoadIdx.getRegSlot(true); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); ++NumReloads; }
void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, unsigned DoubleDestReg, MachineOperand &HiOperand, MachineOperand &LoOperand) { DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); // Handle globals. if (HiOperand.isGlobal()) { BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), HiOperand.getTargetFlags()) .addImm(LoOperand.getImm()); return; } if (LoOperand.isGlobal()) { BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) .addImm(HiOperand.getImm()) .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), LoOperand.getTargetFlags()); return; } // Handle constant extended immediates. if (!isInt<8>(HiOperand.getImm())) { assert(isInt<8>(LoOperand.getImm())); BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) .addImm(HiOperand.getImm()) .addImm(LoOperand.getImm()); return; } if (!isUInt<6>(LoOperand.getImm())) { assert(isInt<8>(HiOperand.getImm())); BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) .addImm(HiOperand.getImm()) .addImm(LoOperand.getImm()); return; } // Insert new combine instruction. // DoubleRegDest = combine #HiImm, #LoImm BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) .addImm(HiOperand.getImm()) .addImm(LoOperand.getImm()); }
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything /// after it, replacing it with an unconditional branch to NewDest. void TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, MachineBasicBlock *NewDest) const { MachineBasicBlock *MBB = Tail->getParent(); // Remove all the old successors of MBB from the CFG. while (!MBB->succ_empty()) MBB->removeSuccessor(MBB->succ_begin()); // Remove all the dead instructions from the end of MBB. MBB->erase(Tail, MBB->end()); // If MBB isn't immediately before MBB, insert a branch to it. if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(), Tail->getDebugLoc()); MBB->addSuccessor(NewDest); }
// DSPInstrInfo::expandPostRAPseudo /// Expand Pseudo instructions into real backend instructions bool DSPSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); switch (MI->getDesc().getOpcode()) { default: return false; case DSP::RetLR: ExpandRetLR(MBB, MI, DSP::Ret); break; case DSP::MovVR: ExpandMovVR(MBB, MI, DSP::MovG2V40); break; case DSP::MovGR: ExpandMovGR(MBB, MI, DSP::MovIGH, DSP::MovIGL); break; } MBB.erase(MI); return true; }
void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt, unsigned DoubleDestReg, MachineOperand &HiOperand, MachineOperand &LoOperand) { unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); unsigned LoReg = LoOperand.getReg(); unsigned HiReg = HiOperand.getReg(); DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); // Insert new combine instruction. // DoubleRegDest = combine HiReg, LoReg BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combinew), DoubleDestReg) .addReg(HiReg, HiRegKillFlag) .addReg(LoReg, LoRegKillFlag); }
MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush( MachineBasicBlock::iterator FrameSetup, unsigned Reg) { // Do an extremely restricted form of load folding. // ISel will often create patterns like: // movl 4(%edi), %eax // movl 8(%edi), %ecx // movl 12(%edi), %edx // movl %edx, 8(%esp) // movl %ecx, 4(%esp) // movl %eax, (%esp) // call // Get rid of those with prejudice. if (!TargetRegisterInfo::isVirtualRegister(Reg)) return nullptr; // Make sure this is the only use of Reg. if (!MRI->hasOneNonDBGUse(Reg)) return nullptr; MachineBasicBlock::iterator DefMI = MRI->getVRegDef(Reg); // Make sure the def is a MOV from memory. // If the def is an another block, give up. if (DefMI->getOpcode() != X86::MOV32rm || DefMI->getParent() != FrameSetup->getParent()) return nullptr; // Be careful with movs that load from a stack slot, since it may get // resolved incorrectly. // TODO: Again, we already have the infrastructure, so this should work. if (!DefMI->getOperand(1).isReg()) return nullptr; // Now, make sure everything else up until the ADJCALLSTACK is a sequence // of MOVs. To be less conservative would require duplicating a lot of the // logic from PeepholeOptimizer. // FIXME: A possibly better approach would be to teach the PeepholeOptimizer // to be smarter about folding into pushes. for (auto I = DefMI; I != FrameSetup; ++I) if (I->getOpcode() != X86::MOV32rm) return nullptr; return DefMI; }
MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) { assert((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && "Unexpected base register update instruction to merge!"); MachineBasicBlock::iterator NextI = I; // Return the instruction following the merged instruction, which is // the instruction following our unmerged load. Unless that's the add/sub // instruction we're merging, in which case it's the one after that. if (++NextI == Update) ++NextI; int Value = Update->getOperand(2).getImm(); assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && "Can't merge 1 << 12 offset into pre-indexed load / store"); if (Update->getOpcode() == AArch64::SUBXri) Value = -Value; unsigned NewOpc = getPreIndexedOpcode(I->getOpcode()); MachineInstrBuilder MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .addOperand(Update->getOperand(0)) .addOperand(I->getOperand(0)) .addOperand(I->getOperand(1)) .addImm(Value); (void)MIB; DEBUG(dbgs() << "Creating pre-indexed load/store."); DEBUG(dbgs() << " Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(Update->print(dbgs())); DEBUG(dbgs() << " with instruction:\n "); DEBUG(((MachineInstr *)MIB)->print(dbgs())); DEBUG(dbgs() << "\n"); // Erase the old instructions for the block. I->eraseFromParent(); Update->eraseFromParent(); return NextI; }
bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) { for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; ++BBI) for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); MI != ME; ++MI) if (MI->getDesc().isCall()) { // Do not treat tail call sites as safe points. if (MI->getDesc().isTerminator()) continue; /* Code copied from VisitCallPoint(...) */ MachineBasicBlock::iterator RAI = MI; ++RAI; MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc()); FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc()); } return false; }
bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, Position Pos) const { bool Changed = false; MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); if (Pos == Position::AFTER) ++MI; if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1_VOL)); Changed = true; break; case SIAtomicScope::WORKGROUP: case SIAtomicScope::WAVEFRONT: case SIAtomicScope::SINGLETHREAD: // No cache to invalidate. break; default: llvm_unreachable("Unsupported synchronization scope"); } } /// The scratch address space does not need the global memory cache /// to be flushed as all memory operations by the same thread are /// sequentially consistent, and no other thread can access scratch /// memory. /// Other address spaces do not hava a cache. if (Pos == Position::AFTER) --MI; return Changed; }
bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const { MachineBasicBlock *MBB = MI->getParent(); switch(MI->getOpcode()) { default: if (isRegisterLoad(*MI)) { unsigned RegIndex = MI->getOperand(2).getImm(); unsigned Channel = MI->getOperand(3).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); unsigned OffsetReg = MI->getOperand(1).getReg(); if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, MI->getOperand(0).getReg(), getIndirectAddrRegClass()->getRegister(Address)); } else { buildIndirectRead(MBB, MI, MI->getOperand(0).getReg(), Address, OffsetReg); } } else if (isRegisterStore(*MI)) { unsigned RegIndex = MI->getOperand(2).getImm(); unsigned Channel = MI->getOperand(3).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); unsigned OffsetReg = MI->getOperand(1).getReg(); if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), MI->getOperand(0).getReg()); } else { buildIndirectWrite(MBB, MI, MI->getOperand(0).getReg(), calculateIndirectAddress(RegIndex, Channel), OffsetReg); } } else { return false; } } MBB->erase(MI); return true; }
MachineBasicBlock::iterator X86InstrInfo::reverseBranchCondition(MachineBasicBlock::iterator MI) const { unsigned Opcode = MI->getOpcode(); assert(isBranch(Opcode) && "MachineInstr must be a branch"); unsigned ROpcode; switch (Opcode) { default: assert(0 && "Cannot reverse unconditional branches!"); case X86::JB: ROpcode = X86::JAE; break; case X86::JAE: ROpcode = X86::JB; break; case X86::JE: ROpcode = X86::JNE; break; case X86::JNE: ROpcode = X86::JE; break; case X86::JBE: ROpcode = X86::JA; break; case X86::JA: ROpcode = X86::JBE; break; case X86::JS: ROpcode = X86::JNS; break; case X86::JNS: ROpcode = X86::JS; break; case X86::JL: ROpcode = X86::JGE; break; case X86::JGE: ROpcode = X86::JL; break; case X86::JLE: ROpcode = X86::JG; break; case X86::JG: ROpcode = X86::JLE; break; } MachineBasicBlock* MBB = MI->getParent(); MachineBasicBlock* TMBB = MI->getOperand(0).getMachineBasicBlock(); return BuildMI(*MBB, MBB->erase(MI), ROpcode, 1).addMBB(TMBB); }
MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush( MachineBasicBlock::iterator FrameSetup, unsigned Reg) { // Do an extremely restricted form of load folding. // ISel will often create patterns like: // movl 4(%edi), %eax // movl 8(%edi), %ecx // movl 12(%edi), %edx // movl %edx, 8(%esp) // movl %ecx, 4(%esp) // movl %eax, (%esp) // call // Get rid of those with prejudice. if (!TargetRegisterInfo::isVirtualRegister(Reg)) return nullptr; // Make sure this is the only use of Reg. if (!MRI->hasOneNonDBGUse(Reg)) return nullptr; MachineBasicBlock::iterator DefMI = MRI->getVRegDef(Reg); // Make sure the def is a MOV from memory. // If the def is an another block, give up. if ((DefMI->getOpcode() != X86::MOV32rm && DefMI->getOpcode() != X86::MOV64rm) || DefMI->getParent() != FrameSetup->getParent()) return nullptr; // Make sure we don't have any instructions between DefMI and the // push that make folding the load illegal. for (auto I = DefMI; I != FrameSetup; ++I) if (I->isLoadFoldBarrier()) return nullptr; return DefMI; }
void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt, unsigned DoubleDestReg, MachineOperand &HiOperand, MachineOperand &LoOperand) { unsigned LoReg = LoOperand.getReg(); unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); // Handle global. if (HiOperand.isGlobal()) { BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), HiOperand.getTargetFlags()) .addReg(LoReg, LoRegKillFlag); return; } // Insert new combine instruction. // DoubleRegDest = combine #HiImm, LoReg BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) .addImm(HiOperand.getImm()) .addReg(LoReg, LoRegKillFlag); }
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineBasicBlock::iterator MI) { SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true); VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); if (!ParentVNI) { DEBUG(dbgs() << "\tadding <undef> flags: "); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) MO.setIsUndef(); } DEBUG(dbgs() << UseIdx << '\t' << *MI); return true; } if (SnippetCopies.count(MI)) return false; // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy. LiveRangeEdit::Remat RM(ParentVNI); SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); if (SibI != SibValues.end()) RM.OrigMI = SibI->second.DefMI; if (!Edit->canRematerializeAt(RM, UseIdx, false)) { markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); return false; } // If the instruction also writes VirtReg.reg, it had better not require the // same register for uses and defs. SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; MIBundleOperands::VirtRegInfo RI = MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); if (RI.Tied) { markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); return false; } // Before rematerializing into a register for a single instruction, try to // fold a load into the instruction. That avoids allocating a new register. if (RM.OrigMI->canFoldAsLoad() && foldMemoryOperand(Ops, RM.OrigMI)) { Edit->markRematerialized(RM.ParentVNI); ++NumFoldedLoads; return true; } // Alocate a new register for the remat. LiveInterval &NewLI = Edit->createFrom(Original); NewLI.markNotSpillable(); // Finally we can rematerialize OrigMI before MI. SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, TRI); DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); // Replace operands for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i].second); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { MO.setReg(NewLI.reg); MO.setIsKill(); } } DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI)); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); ++NumRemats; return true; }
void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, const CallContext &Context) { // Ok, we can in fact do the transformation for this call. // Do not remove the FrameSetup instruction, but adjust the parameters. // PEI will end up finalizing the handling of this. MachineBasicBlock::iterator FrameSetup = Context.FrameSetup; MachineBasicBlock &MBB = *(FrameSetup->getParent()); TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist); DebugLoc DL = FrameSetup->getDebugLoc(); bool Is64Bit = STI->is64Bit(); // Now, iterate through the vector in reverse order, and replace the store to // stack with pushes. MOVmi/MOVmr doesn't have any defs, so no need to // replace uses. for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator Store = *Context.ArgStoreVector[Idx]; MachineOperand PushOp = Store->getOperand(X86::AddrNumOperands); MachineBasicBlock::iterator Push = nullptr; unsigned PushOpcode; switch (Store->getOpcode()) { default: llvm_unreachable("Unexpected Opcode!"); case X86::AND16mi8: case X86::AND32mi8: case X86::AND64mi8: case X86::OR16mi8: case X86::OR32mi8: case X86::OR64mi8: case X86::MOV32mi: case X86::MOV64mi32: PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32; // If the operand is a small (8-bit) immediate, we can use a // PUSH instruction with a shorter encoding. // Note that isImm() may fail even though this is a MOVmi, because // the operand can also be a symbol. if (PushOp.isImm()) { int64_t Val = PushOp.getImm(); if (isInt<8>(Val)) PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8; } Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp); break; case X86::MOV32mr: case X86::MOV64mr: { unsigned int Reg = PushOp.getReg(); // If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg // in preparation for the PUSH64. The upper 32 bits can be undef. if (Is64Bit && Store->getOpcode() == X86::MOV32mr) { unsigned UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass); Reg = MRI->createVirtualRegister(&X86::GR64RegClass); BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg); BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg) .addReg(UndefReg) .add(PushOp) .addImm(X86::sub_32bit); } // If PUSHrmm is not slow on this target, try to fold the source of the // push into the instruction. bool SlowPUSHrmm = STI->isAtom() || STI->isSLM(); // Check that this is legal to fold. Right now, we're extremely // conservative about that. MachineInstr *DefMov = nullptr; if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm; Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)); unsigned NumOps = DefMov->getDesc().getNumOperands(); for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) Push->addOperand(DefMov->getOperand(i)); DefMov->eraseFromParent(); } else { PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r; Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)) .addReg(Reg) .getInstr(); } break; } } // For debugging, when using SP-based CFA, we need to adjust the CFA // offset after each push. // TODO: This is needed only if we require precise CFA. if (!TFL->hasFP(MF)) TFL->BuildCFI( MBB, std::next(Push), DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize)); MBB.erase(Store); } // The stack-pointer copy is no longer used in the call sequences. // There should not be any other users, but we can't commit to that, so: if (Context.SPCopy && MRI->use_empty(Context.SPCopy->getOperand(0).getReg())) Context.SPCopy->eraseFromParent(); // Once we've done this, we need to make sure PEI doesn't assume a reserved // frame. X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); FuncInfo->setHasPushSequences(true); }