/// Return the corresponding compact (no delay slot) form of a branch. unsigned MipsInstrInfo::getEquivalentCompactForm( const MachineBasicBlock::iterator I) const { unsigned Opcode = I->getOpcode(); bool canUseShortMicroMipsCTI = false; if (Subtarget.inMicroMipsMode()) { switch (Opcode) { case Mips::BNE: case Mips::BNE_MM: case Mips::BEQ: case Mips::BEQ_MM: // microMIPS has NE,EQ branches that do not have delay slots provided one // of the operands is zero. if (I->getOperand(1).getReg() == Subtarget.getABI().GetZeroReg()) canUseShortMicroMipsCTI = true; break; // For microMIPS the PseudoReturn and PseudoIndirectBranch are always // expanded to JR_MM, so they can be replaced with JRC16_MM. case Mips::JR: case Mips::PseudoReturn: case Mips::PseudoIndirectBranch: canUseShortMicroMipsCTI = true; break; } } // MIPSR6 forbids both operands being the zero register. if (Subtarget.hasMips32r6() && (I->getNumOperands() > 1) && (I->getOperand(0).isReg() && (I->getOperand(0).getReg() == Mips::ZERO || I->getOperand(0).getReg() == Mips::ZERO_64)) && (I->getOperand(1).isReg() && (I->getOperand(1).getReg() == Mips::ZERO || I->getOperand(1).getReg() == Mips::ZERO_64))) return 0; if (Subtarget.hasMips32r6() || canUseShortMicroMipsCTI) { switch (Opcode) { case Mips::B: return Mips::BC; case Mips::BAL: return Mips::BALC; case Mips::BEQ: case Mips::BEQ_MM: if (canUseShortMicroMipsCTI) return Mips::BEQZC_MM; else if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BEQC; case Mips::BNE: case Mips::BNE_MM: if (canUseShortMicroMipsCTI) return Mips::BNEZC_MM; else if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BNEC; case Mips::BGE: if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BGEC; case Mips::BGEU: if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BGEUC; case Mips::BGEZ: return Mips::BGEZC; case Mips::BGTZ: return Mips::BGTZC; case Mips::BLEZ: return Mips::BLEZC; case Mips::BLT: if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BLTC; case Mips::BLTU: if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BLTUC; case Mips::BLTZ: return Mips::BLTZC; case Mips::BEQ64: if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BEQC64; case Mips::BNE64: if (I->getOperand(0).getReg() == I->getOperand(1).getReg()) return 0; return Mips::BNEC64; case Mips::BGTZ64: return Mips::BGTZC64; case Mips::BGEZ64: return Mips::BGEZC64; case Mips::BLTZ64: return Mips::BLTZC64; case Mips::BLEZ64: return Mips::BLEZC64; // For MIPSR6, the instruction 'jic' can be used for these cases. Some // tools will accept 'jrc reg' as an alias for 'jic 0, $reg'. case Mips::JR: case Mips::PseudoIndirectBranchR6: case Mips::PseudoReturn: case Mips::TAILCALLR6REG: if (canUseShortMicroMipsCTI) return Mips::JRC16_MM; return Mips::JIC; case Mips::JALRPseudo: return Mips::JIALC; case Mips::JR64: case Mips::PseudoIndirectBranch64R6: case Mips::PseudoReturn64: case Mips::TAILCALL64R6REG: return Mips::JIC64; case Mips::JALR64Pseudo: return Mips::JIALC64; default: return 0; } } return 0; }
bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); bool isMicroMips = Subtarget.inMicroMipsMode(); unsigned Opc; switch(MI->getDesc().getOpcode()) { default: return false; case Mips::RetRA: expandRetRA(MBB, MI); break; case Mips::PseudoMFHI: Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI; expandPseudoMFHiLo(MBB, MI, Opc); break; case Mips::PseudoMFLO: Opc = isMicroMips ? Mips::MFLO16_MM : Mips::MFLO; expandPseudoMFHiLo(MBB, MI, Opc); break; case Mips::PseudoMFHI64: expandPseudoMFHiLo(MBB, MI, Mips::MFHI64); break; case Mips::PseudoMFLO64: expandPseudoMFHiLo(MBB, MI, Mips::MFLO64); break; case Mips::PseudoMTLOHI: expandPseudoMTLoHi(MBB, MI, Mips::MTLO, Mips::MTHI, false); break; case Mips::PseudoMTLOHI64: expandPseudoMTLoHi(MBB, MI, Mips::MTLO64, Mips::MTHI64, false); break; case Mips::PseudoMTLOHI_DSP: expandPseudoMTLoHi(MBB, MI, Mips::MTLO_DSP, Mips::MTHI_DSP, true); break; case Mips::PseudoCVT_S_W: expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false); break; case Mips::PseudoCVT_D32_W: expandCvtFPInt(MBB, MI, Mips::CVT_D32_W, Mips::MTC1, false); break; case Mips::PseudoCVT_S_L: expandCvtFPInt(MBB, MI, Mips::CVT_S_L, Mips::DMTC1, true); break; case Mips::PseudoCVT_D64_W: expandCvtFPInt(MBB, MI, Mips::CVT_D64_W, Mips::MTC1, true); break; case Mips::PseudoCVT_D64_L: expandCvtFPInt(MBB, MI, Mips::CVT_D64_L, Mips::DMTC1, true); break; case Mips::BuildPairF64: expandBuildPairF64(MBB, MI, false); break; case Mips::BuildPairF64_64: expandBuildPairF64(MBB, MI, true); break; case Mips::ExtractElementF64: expandExtractElementF64(MBB, MI, false); break; case Mips::ExtractElementF64_64: expandExtractElementF64(MBB, MI, true); break; case Mips::MIPSeh_return32: case Mips::MIPSeh_return64: expandEhReturn(MBB, MI); break; } MBB.erase(MI); return true; }
bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (LastOpc == AArch64::Bimm) { TBB = LastInst->getOperand(0).getMBB(); return false; } if (isCondBranch(LastOpc)) { classifyCondBranch(LastInst, TBB, Cond); return false; } return true; // Can't handle indirect branch. } // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; unsigned SecondLastOpc = SecondLastInst->getOpcode(); // If AllowModify is true and the block ends with two or more unconditional // branches, delete all but the first unconditional branch. if (AllowModify && LastOpc == AArch64::Bimm) { while (SecondLastOpc == AArch64::Bimm) { LastInst->eraseFromParent(); LastInst = SecondLastInst; LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { // Return now the only terminator is an unconditional branch. TBB = LastInst->getOperand(0).getMBB(); return false; } else { SecondLastInst = I; SecondLastOpc = SecondLastInst->getOpcode(); } } } // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with a B and a Bcc, handle it. if (LastOpc == AArch64::Bimm) { if (SecondLastOpc == AArch64::Bcc) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } else if (isCondBranch(SecondLastOpc)) { classifyCondBranch(SecondLastInst, TBB, Cond); FBB = LastInst->getOperand(0).getMBB(); return false; } } // If the block ends with two unconditional branches, handle it. The second // one is not executed, so remove it. if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { MachineBasicBlock::iterator I = MBB.end(); MachineBasicBlock::iterator UnCondBrIter = MBB.end(); while (I != MBB.begin()) { --I; if (I->isDebugValue()) continue; //When we see a non-terminator, we are done if (!isUnpredicatedTerminator(I)) break; //Terminator is not a branch if (!I->getDesc().isBranch()) return true; //Handle Unconditional branches if (I->getOpcode() == SP::BA) { UnCondBrIter = I; if (!AllowModify) { TBB = I->getOperand(0).getMBB(); continue; } while (llvm::next(I) != MBB.end()) llvm::next(I)->eraseFromParent(); Cond.clear(); FBB = 0; if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { TBB = 0; I->eraseFromParent(); I = MBB.end(); UnCondBrIter = MBB.end(); continue; } TBB = I->getOperand(0).getMBB(); continue; } unsigned Opcode = I->getOpcode(); if (Opcode != SP::BCOND && Opcode != SP::FBCOND) return true; //Unknown Opcode SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm(); if (Cond.empty()) { MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); if (AllowModify && UnCondBrIter != MBB.end() && MBB.isLayoutSuccessor(TargetBB)) { //Transform the code // // brCC L1 // ba L2 // L1: // .. // L2: // // into // // brnCC L2 // L1: // ... // L2: // BranchCode = GetOppositeBranchCondition(BranchCode); MachineBasicBlock::iterator OldInst = I; BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode)) .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode); BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA)) .addMBB(TargetBB); MBB.addSuccessor(TargetBB); OldInst->eraseFromParent(); UnCondBrIter->eraseFromParent(); UnCondBrIter = MBB.end(); I = MBB.end(); continue; } FBB = TBB; TBB = I->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); continue; } //FIXME: Handle subsequent conditional branches //For now, we can't handle multiple conditional branches return true; } return false; }
unsigned LembergInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; --I; if (I->getOpcode() != Lemberg::JUMP && I->getOpcode() != Lemberg::JUMPtrue && I->getOpcode() != Lemberg::JUMPfalse && I->getOpcode() != Lemberg::JUMPpred && I->getOpcode() != Lemberg::JUMPeqz && I->getOpcode() != Lemberg::JUMPnez && I->getOpcode() != Lemberg::JUMPltz && I->getOpcode() != Lemberg::JUMPgez && I->getOpcode() != Lemberg::JUMPgtz && I->getOpcode() != Lemberg::JUMPlez) return 0; // Remove the branch. I->eraseFromParent(); I = MBB.end(); if (I == MBB.begin()) return 1; --I; if (I->getOpcode() != Lemberg::JUMPtrue && I->getOpcode() != Lemberg::JUMPfalse && I->getOpcode() != Lemberg::JUMPpred && I->getOpcode() != Lemberg::JUMPeqz && I->getOpcode() != Lemberg::JUMPnez && I->getOpcode() != Lemberg::JUMPltz && I->getOpcode() != Lemberg::JUMPgez && I->getOpcode() != Lemberg::JUMPgtz && I->getOpcode() != Lemberg::JUMPlez) return 1; // Remove the branch. I->eraseFromParent(); return 2; }
MachineBasicBlock::iterator Filler::findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot) { SmallSet<unsigned, 32> RegDefs; SmallSet<unsigned, 32> RegUses; bool sawLoad = false; bool sawStore = false; if (slot == MBB.begin()) return MBB.end(); if (slot->getOpcode() == SP::RET || slot->getOpcode() == SP::TLS_CALL) return MBB.end(); if (slot->getOpcode() == SP::RETL) { MachineBasicBlock::iterator J = slot; --J; if (J->getOpcode() == SP::RESTORErr || J->getOpcode() == SP::RESTOREri) { // change retl to ret. slot->setDesc(TM.getInstrInfo()->get(SP::RET)); return J; } } // Call's delay filler can def some of call's uses. if (slot->isCall()) insertCallDefsUses(slot, RegDefs, RegUses); else insertDefsUses(slot, RegDefs, RegUses); bool done = false; MachineBasicBlock::iterator I = slot; while (!done) { done = (I == MBB.begin()); if (!done) --I; // skip debug value if (I->isDebugValue()) continue; if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isPosition() || I->hasDelaySlot() || I->isBundledWithSucc()) break; if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) { insertDefsUses(I, RegDefs, RegUses); continue; } return I; } return MBB.end(); }
bool PatmosDelaySlotKiller::killDelaySlots(MachineBasicBlock &MBB) { bool Changed = false; DEBUG( dbgs() << "Killing slots in BB#" << MBB.getNumber() << " (" << MBB.getFullName() << ")\n" ); // consider the basic block from top to bottom for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { // Control-flow instructions ("proper" delay slots) if (I->hasDelaySlot()) { assert( ( I->isCall() || I->isReturn() || I->isBranch() ) && "Unexpected instruction with delay slot."); MachineBasicBlock::instr_iterator MI = *I; if (I->isBundle()) { ++MI; } unsigned Opcode = MI->getOpcode(); if (Opcode == Patmos::BR || Opcode == Patmos::BRu || Opcode == Patmos::BRR || Opcode == Patmos::BRRu || Opcode == Patmos::BRT || Opcode == Patmos::BRTu || Opcode == Patmos::BRCF || Opcode == Patmos::BRCFu || Opcode == Patmos::BRCFR || Opcode == Patmos::BRCFRu || Opcode == Patmos::BRCFT || Opcode == Patmos::BRCFTu || Opcode == Patmos::CALL || Opcode == Patmos::CALLR || Opcode == Patmos::RET || Opcode == Patmos::XRET) { bool onlyNops = true; unsigned maxCount = TM.getSubtargetImpl()->getDelaySlotCycles(&*I); unsigned count = 0; for (MachineBasicBlock::iterator K = llvm::next(I), E = MBB.end(); K != E && count < maxCount; ++K, ++count) { if (K->getOpcode() != Patmos::NOP) { onlyNops = false; } } if (onlyNops) { unsigned NewOpcode = 0; switch(Opcode) { case Patmos::BR: NewOpcode = Patmos::BRND; break; case Patmos::BRu: NewOpcode = Patmos::BRNDu; break; case Patmos::BRR: NewOpcode = Patmos::BRRND; break; case Patmos::BRRu: NewOpcode = Patmos::BRRNDu; break; case Patmos::BRT: NewOpcode = Patmos::BRTND; break; case Patmos::BRTu: NewOpcode = Patmos::BRTNDu; break; case Patmos::BRCF: NewOpcode = Patmos::BRCFND; break; case Patmos::BRCFu: NewOpcode = Patmos::BRCFNDu; break; case Patmos::BRCFR: NewOpcode = Patmos::BRCFRND; break; case Patmos::BRCFRu: NewOpcode = Patmos::BRCFRNDu; break; case Patmos::BRCFT: NewOpcode = Patmos::BRCFTND; break; case Patmos::BRCFTu: NewOpcode = Patmos::BRCFTNDu; break; case Patmos::CALL: NewOpcode = Patmos::CALLND; break; case Patmos::CALLR: NewOpcode = Patmos::CALLRND; break; case Patmos::RET: NewOpcode = Patmos::RETND; break; case Patmos::XRET: NewOpcode = Patmos::XRETND; break; } const MCInstrDesc &nonDelayed = TII->get(NewOpcode); MI->setDesc(nonDelayed); unsigned killCount = 0; MachineBasicBlock::iterator K = llvm::next(I); for (MachineBasicBlock::iterator E = MBB.end(); K != E && killCount < count; ++K, ++killCount) { KilledSlots++; } MBB.erase(llvm::next(I), K); } } Changed = true; // pass result } } return Changed; }
/// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); LV = getAnalysisIfAvailable<LiveVariables>(); AA = &getAnalysis<AliasAnalysis>(); bool MadeChange = false; DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(errs() << "********** Function: " << MF.getFunction()->getName() << '\n'); // ReMatRegs - Keep track of the registers whose def's are remat'ed. BitVector ReMatRegs; ReMatRegs.resize(MRI->getLastVirtReg()+1); typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> > TiedOperandMap; TiedOperandMap TiedOperands(4); SmallPtrSet<MachineInstr*, 8> Processed; for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); mbbi != mbbe; ++mbbi) { unsigned Dist = 0; DistanceMap.clear(); SrcRegMap.clear(); DstRegMap.clear(); Processed.clear(); for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me; ) { MachineBasicBlock::iterator nmi = next(mi); const TargetInstrDesc &TID = mi->getDesc(); bool FirstTied = true; DistanceMap.insert(std::make_pair(mi, ++Dist)); ProcessCopy(&*mi, &*mbbi, Processed); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM) ? mi->getNumOperands() : TID.getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) continue; if (FirstTied) { FirstTied = false; ++NumTwoAddressInstrs; DEBUG(errs() << '\t' << *mi); } assert(mi->getOperand(SrcIdx).isReg() && mi->getOperand(SrcIdx).getReg() && mi->getOperand(SrcIdx).isUse() && "two address instruction invalid"); unsigned regB = mi->getOperand(SrcIdx).getReg(); TiedOperandMap::iterator OI = TiedOperands.find(regB); if (OI == TiedOperands.end()) { SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair; OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first; } OI->second.push_back(std::make_pair(SrcIdx, DstIdx)); } // Now iterate over the information collected above. for (TiedOperandMap::iterator OI = TiedOperands.begin(), OE = TiedOperands.end(); OI != OE; ++OI) { SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second; // If the instruction has a single pair of tied operands, try some // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. if (TiedOperands.size() == 1 && TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; unsigned DstIdx = TiedPairs[0].second; // If the registers are already equal, nothing needs to be done. if (mi->getOperand(SrcIdx).getReg() == mi->getOperand(DstIdx).getReg()) break; // Done with this instruction. if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist)) break; // The tied operands have been eliminated. } bool RemovedKillFlag = false; bool AllUsesCopied = true; unsigned LastCopiedReg = 0; unsigned regB = OI->first; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; unsigned DstIdx = TiedPairs[tpi].second; unsigned regA = mi->getOperand(DstIdx).getReg(); // Grab regB from the instruction because it may have changed if the // instruction was commuted. regB = mi->getOperand(SrcIdx).getReg(); if (regA == regB) { // The register is tied to multiple destinations (or else we would // not have continued this far), but this use of the register // already matches the tied destination. Leave it. AllUsesCopied = false; continue; } LastCopiedReg = regA; assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); #ifndef NDEBUG // First, verify that we don't have a use of "a" in the instruction // (a = b + a for example) because our transformation will not // work. This should never occur because we are in SSA form. for (unsigned i = 0; i != mi->getNumOperands(); ++i) assert(i == DstIdx || !mi->getOperand(i).isReg() || mi->getOperand(i).getReg() != regA); #endif // Emit a copy or rematerialize the definition. const TargetRegisterClass *rc = MRI->getRegClass(regB); MachineInstr *DefMI = MRI->getVRegDef(regB); // If it's safe and profitable, remat the definition instead of // copying it. if (DefMI && DefMI->getDesc().isAsCheapAsAMove() && DefMI->isSafeToReMat(TII, regB, AA) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); ReMatRegs.set(regB); ++NumReMats; } else { bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc); (void)Emitted; assert(Emitted && "Unable to issue a copy instruction!\n"); } MachineBasicBlock::iterator prevMI = prior(mi); // Update DistanceMap. DistanceMap.insert(std::make_pair(prevMI, Dist)); DistanceMap[mi] = ++Dist; DEBUG(errs() << "\t\tprepend:\t" << *prevMI); MachineOperand &MO = mi->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && "inconsistent operand info for 2-reg pass"); if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(regA); } if (AllUsesCopied) { // Replace other (un-tied) uses of regB with LastCopiedReg. for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { MachineOperand &MO = mi->getOperand(i); if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(LastCopiedReg); } } // Update live variables for regB. if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) LV->addVirtualRegisterKilled(regB, prior(mi)); } else if (RemovedKillFlag) { // Some tied uses of regB matched their destination registers, so // regB is still used in this instruction, but a kill flag was // removed from a different tied use of regB, so now we need to add // a kill flag to one of the remaining uses of regB. for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { MachineOperand &MO = mi->getOperand(i); if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { MO.setIsKill(true); break; } } } MadeChange = true; DEBUG(errs() << "\t\trewrite to:\t" << *mi); } // Clear TiedOperands here instead of at the top of the loop // since most instructions do not have tied operands. TiedOperands.clear(); mi = nmi; } } // Some remat'ed instructions are dead. int VReg = ReMatRegs.find_first(); while (VReg != -1) { if (MRI->use_empty(VReg)) { MachineInstr *DefMI = MRI->getVRegDef(VReg); DefMI->eraseFromParent(); } VReg = ReMatRegs.find_next(VReg); } return MadeChange; }
void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); const MipsRegisterInfo *RegInfo = static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo()); const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; // First, compute final stack size. uint64_t StackSize = MFI->getStackSize(); // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; MachineModuleInfo &MMI = MF.getMMI(); std::vector<MachineMove> &Moves = MMI.getFrameMoves(); MachineLocation DstML, SrcML; // Adjust stack. TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); // emit ".cfi_def_cfa_offset StackSize" MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel); DstML = MachineLocation(MachineLocation::VirtualFP); SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize); Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML)); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { // Find the instruction past the last instruction that saves a callee-saved // register to the stack. for (unsigned i = 0; i < CSI.size(); ++i) ++MBBI; // Iterate over list of callee-saved registers and emit .cfi_offset // directives. MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); // If Reg is a double precision register, emit two cfa_offsets, // one for each of the paired single precision registers. if (Mips::AFGR64RegClass.contains(Reg)) { MachineLocation DstML0(MachineLocation::VirtualFP, Offset); MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4); MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven)); MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd)); if (!STI.isLittle()) std::swap(SrcML0, SrcML1); Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0)); Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1)); } else { // Reg is either in CPURegs or FGR32. DstML = MachineLocation(MachineLocation::VirtualFP, Offset); SrcML = MachineLocation(Reg); Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); } } } // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); // emit ".cfi_def_cfa_register $fp" MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); DstML = MachineLocation(FP); SrcML = MachineLocation(MachineLocation::VirtualFP); Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML)); } }
void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineFrameInfo *MFI = MF.getFrameInfo(); MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>(); const MSP430InstrInfo &TII = *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize(); // Get the offset of the stack slot for the EBP register... which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. MFI->setOffsetAdjustment(-NumBytes); // Save FPW into the appropriate stack slot... BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r)) .addReg(MSP430::FPW, RegState::Kill); // Update FPW with the new base value... BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW) .addReg(MSP430::SPW); // Mark the FramePtr as live-in in every block except the entry. for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(MSP430::FPW); } else NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize(); // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r)) ++MBBI; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); if (NumBytes) { // adjust stack pointer: SPW -= numbytes // If there is an SUB16ri of SPW immediately before this instruction, merge // the two. //NumBytes -= mergeSPUpdates(MBB, MBBI, true); // If there is an ADD16ri or SUB16ri of SPW immediately after this // instruction, merge the two instructions. // mergeSPUpdatesDown(MBB, MBBI, &NumBytes); if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
/// TryInstructionTransform - For the case where an instruction has a single /// pair of tied register operands, attempt some transformations that may /// either eliminate the tied operands or improve the opportunities for /// coalescing away the register copy. Returns true if the tied operands /// are eliminated altogether. bool TwoAddressInstructionPass:: TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { const TargetInstrDesc &TID = mi->getDesc(); unsigned regA = mi->getOperand(DstIdx).getReg(); unsigned regB = mi->getOperand(SrcIdx).getReg(); assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); // If regA is dead and the instruction can be deleted, just delete // it so it doesn't clobber regB. bool regBKilled = isKilled(*mi, regB, MRI, TII); if (!regBKilled && mi->getOperand(DstIdx).isDead() && DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { ++NumDeletes; return true; // Done with this instruction. } // Check if it is profitable to commute the operands. unsigned SrcOp1, SrcOp2; unsigned regC = 0; unsigned regCIdx = ~0U; bool TryCommute = false; bool AggressiveCommute = false; if (TID.isCommutable() && mi->getNumOperands() >= 3 && TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { if (SrcIdx == SrcOp1) regCIdx = SrcOp2; else if (SrcIdx == SrcOp2) regCIdx = SrcOp1; if (regCIdx != ~0U) { regC = mi->getOperand(regCIdx).getReg(); if (!regBKilled && isKilled(*mi, regC, MRI, TII)) // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) { TryCommute = true; AggressiveCommute = true; } } } // If it's profitable to commute, try to do so. if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) { ++NumCommuted; if (AggressiveCommute) ++NumAggrCommuted; return false; } if (TID.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA)) { // Try to convert it. if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { ++NumConvertedTo3Addr; return true; // Done with this instruction. } } } return false; }
void MSP430FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const MSP430InstrInfo &TII = *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned StackAlign = getStackAlignment(); if (!hasReservedCallFrame(MF)) { // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub SPW, <amt>' and the // adjcallstackdown instruction into 'add SPW, <amt>' // TODO: consider using push / pop instead of sub + store / add MachineInstr *Old = I; uint64_t Amount = Old->getOperand(0).getImm(); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; MachineInstr *New = 0; if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(Amount); } else { assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode()); // factor out the amount the callee already popped. uint64_t CalleeAmt = Old->getOperand(1).getImm(); Amount -= CalleeAmt; if (Amount) New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::ADD16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(Amount); } if (New) { // The SRW implicit def is dead. New->getOperand(3).setIsDead(); // Replace the pseudo instruction with a new instruction... MBB.insert(I, New); } } } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { MachineInstr *Old = I; MachineInstr *New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt); // The SRW implicit def is dead. New->getOperand(3).setIsDead(); MBB.insert(I, New); } } MBB.erase(I); }
void MSP430FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>(); const MSP430InstrInfo &TII = *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); switch (RetOpcode) { case MSP430::RET: case MSP430::RETI: break; // These are ok default: llvm_unreachable("Can only insert epilog into returning blocks"); } // Get the number of bytes to allocate from the FrameInfo uint64_t StackSize = MFI->getStackSize(); unsigned CSSize = MSP430FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - CSSize; // pop FPW. BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW); } else NumBytes = StackSize - CSSize; // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); if (Opc != MSP430::POP16r && !PI->isTerminator()) break; --MBBI; } DL = MBBI->getDebugLoc(); // If there is an ADD16ri or SUB16ri of SPW immediately before this // instruction, merge the two instructions. //if (NumBytes || MFI->hasVarSizedObjects()) // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); if (MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW); if (CSSize) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(CSSize); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } else { // adjust stack pointer back: SPW += numbytes if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
MachineInstrBuilder MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, MachineBasicBlock::iterator I) const { MachineInstrBuilder MIB; // Certain branches have two forms: e.g beq $1, $zero, dest vs beqz $1, dest // Pick the zero form of the branch for readable assembly and for greater // branch distance in non-microMIPS mode. // Additional MIPSR6 does not permit the use of register $zero for compact // branches. // FIXME: Certain atomic sequences on mips64 generate 32bit references to // Mips::ZERO, which is incorrect. This test should be updated to use // Subtarget.getABI().GetZeroReg() when those atomic sequences and others // are fixed. int ZeroOperandPosition = -1; bool BranchWithZeroOperand = false; if (I->isBranch() && !I->isPseudo()) { auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo(); ZeroOperandPosition = I->findRegisterUseOperandIdx(Mips::ZERO, false, TRI); BranchWithZeroOperand = ZeroOperandPosition != -1; } if (BranchWithZeroOperand) { switch (NewOpc) { case Mips::BEQC: NewOpc = Mips::BEQZC; break; case Mips::BNEC: NewOpc = Mips::BNEZC; break; case Mips::BGEC: NewOpc = Mips::BGEZC; break; case Mips::BLTC: NewOpc = Mips::BLTZC; break; case Mips::BEQC64: NewOpc = Mips::BEQZC64; break; case Mips::BNEC64: NewOpc = Mips::BNEZC64; break; } } MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an // immediate 0 as an operand and requires the removal of it's implicit-def %ra // implicit operand as copying the implicit operations of the instructio we're // looking at will give us the correct flags. if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 || NewOpc == Mips::JIALC64) { if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64) MIB->RemoveOperand(0); for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.add(I->getOperand(J)); } MIB.addImm(0); } else { for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J) continue; MIB.add(I->getOperand(J)); } } MIB.copyImplicitOps(*I); MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end()); return MIB; }
void MipsSEInstrInfo:: loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); unsigned Opc = 0; const Function *Func = MBB.getParent()->getFunction(); bool ReqIndirectLoad = Func->hasFnAttribute("interrupt") && (DestReg == Mips::LO0 || DestReg == Mips::LO0_64 || DestReg == Mips::HI0 || DestReg == Mips::HI0_64); if (Mips::GPR32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::GPR64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; else if (Mips::ACC64RegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC64; else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC64DSP; else if (Mips::ACC128RegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC128; else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = Mips::LWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC164; else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::LD_B; else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::LD_H; else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::LD_W; else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::LD_D; else if (Mips::HI32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::HI64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; else if (Mips::LO32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::LO64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; assert(Opc && "Register class not handled!"); if (!ReqIndirectLoad) BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); else { // Load HI/LO through K0. Notably the DestReg is encoded into the // instruction itself. unsigned Reg = Mips::K0; unsigned LdOp = Mips::MTLO; if (DestReg == Mips::HI0) LdOp = Mips::MTHI; if (Subtarget.getABI().ArePtrs64bit()) { Reg = Mips::K0_64; if (DestReg == Mips::HI0_64) LdOp = Mips::MTHI64; else LdOp = Mips::MTLO64; } BuildMI(MBB, I, DL, get(Opc), Reg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); BuildMI(MBB, I, DL, get(LdOp)).addReg(Reg); } }
bool GCMachineCodeFixup::runOnMachineFunction(MachineFunction &MF) { // Quick exit for functions that do not use GC. if (!MF.getFunction()->hasGC()) return false; const TargetMachine &TM = MF.getTarget(); const TargetInstrInfo *TII = TM.getInstrInfo(); GCModuleInfo &GMI = getAnalysis<GCModuleInfo>(); GCFunctionInfo &GCFI = GMI.getFunctionInfo(*MF.getFunction()); for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); MBBI != MBBE; ++MBBI) { for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end(); MII != MIE;) { if (!MII->isGCRegRoot() || !MII->getOperand(0).isReg()) { ++MII; continue; } // Trace the register back to its location at the site of the call (either // a physical reg or a frame index). bool TracingReg = true; unsigned TracedReg = MII->getOperand(0).getReg(); int FrameIndex; MachineBasicBlock::iterator PrevII = MII; for (--PrevII;; --PrevII) { if (PrevII->isGCRegRoot() && PrevII->getOperand(0).isReg()) break; if (PrevII->isCall()) break; int FI; // Trace back through register reloads. unsigned Reg = TM.getInstrInfo()->isLoadFromStackSlotPostFE(&*PrevII, FI); if (Reg) { // This is a reload. If we're tracing this register, start tracing the // frame index instead. if (TracingReg && TracedReg == Reg) { TracingReg = false; FrameIndex = FI; } continue; } // Trace back through spills. if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&*PrevII, FI)) continue; // Trace back through register-to-register copies. if (PrevII->isCopy()) { if (TracingReg && TracedReg == PrevII->getOperand(0).getReg()) TracedReg = PrevII->getOperand(1).getReg(); continue; } // Trace back through non-register GC_REG_ROOT instructions. if (PrevII->isGCRegRoot() && !PrevII->getOperand(0).isReg()) continue; DEBUG(dbgs() << "Bad instruction: " << *PrevII); llvm_unreachable("GC_REG_ROOT found in an unexpected location!"); } // Now we've reached either a call or another GC_REG_ROOT instruction. // Move the GC_REG_ROOT instruction we're considering to the right place, // and rewrite it if necessary. // // Also, tell the GCFunctionInfo about the frame index, since this is // our only chance -- the frame indices will be deleted by the time // GCMachineCodeAnalysis runs. ++PrevII; unsigned RootIndex = MII->getOperand(1).getImm(); MachineInstr *NewMI; if (TracingReg) { MachineInstrBuilder MIB = BuildMI(MF, MII->getDebugLoc(), TII->get(TargetOpcode::GC_REG_ROOT)); MIB.addReg(TracedReg).addImm(RootIndex); NewMI = MIB; } else { NewMI = TII->emitFrameIndexGCRegRoot(MF, FrameIndex, RootIndex, MII->getDebugLoc()); GCFI.spillRegRoot(RootIndex, FrameIndex); } MBBI->insert(PrevII, NewMI); MachineBasicBlock::iterator NextII = MII; ++NextII; MII->eraseFromParent(); MII = NextII; } } return true; }
void MipsSEInstrInfo::expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { BuildMI(MBB, I, I->getDebugLoc(), get(Mips::ERET)); }
void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, int &SPAdj) { assert(Fn.getSubtarget().getRegisterInfo() && "getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(*BB); bool InsideCallSequence = false; for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { InsideCallSequence = (I->getOpcode() == FrameSetupOpcode); SPAdj += TII.getSPAdjust(I); I = TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); continue; } MachineInstr *MI = I; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (!MI->getOperand(i).isFI()) continue; // Frame indices in debug values are encoded in a target independent // way with simply the frame index and offset rather than any // target-specific addressing mode. if (MI->isDebugValue()) { assert(i == 0 && "Frame indices can only appear as the first " "operand of a DBG_VALUE machine instruction"); unsigned Reg; MachineOperand &Offset = MI->getOperand(1); Offset.setImm(Offset.getImm() + TFI->getFrameIndexReference( Fn, MI->getOperand(0).getIndex(), Reg)); MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); continue; } // TODO: This code should be commoned with the code for // PATCHPOINT. There's no good reason for the difference in // implementation other than historical accident. The only // remaining difference is the unconditional use of the stack // pointer as the base register. if (MI->getOpcode() == TargetOpcode::STATEPOINT) { assert((!MI->isDebugValue() || i == 0) && "Frame indicies can only appear as the first operand of a " "DBG_VALUE machine instruction"); unsigned Reg; MachineOperand &Offset = MI->getOperand(i + 1); const unsigned refOffset = TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(), Reg); Offset.setImm(Offset.getImm() + refOffset); MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/); continue; } // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register // scavenger to go through all of these instructions so that // it can update its register information. We keep the // iterator at the point before insertion so that we can // revisit them in full. bool AtBeginning = (I == BB->begin()); if (!AtBeginning) --I; // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. TRI.eliminateFrameIndex(MI, SPAdj, i, FrameIndexVirtualScavenging ? nullptr : RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { I = BB->begin(); DoIncr = false; } MI = nullptr; break; } // If we are looking at a call sequence, we need to keep track of // the SP adjustment made by each instruction in the sequence. // This includes both the frame setup/destroy pseudos (handled above), // as well as other instructions that have side effects w.r.t the SP. // Note that this must come after eliminateFrameIndex, because // if I itself referred to a frame index, we shouldn't count its own // adjustment. if (MI && InsideCallSequence) SPAdj += TII.getSPAdjust(MI); if (DoIncr && I != BB->end()) ++I; // Update register states. if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } }
static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI, MachineBasicBlock::iterator OrMI, const TargetInstrInfo *TII) { // Before: or <op0>, <op1>, %i[0-7] // restore %g0, %g0, %i[0-7] // and <op0> or <op1> is zero, // // After : restore <op0>, <op1>, %o[0-7] unsigned reg = OrMI->getOperand(0).getReg(); if (reg < SP::I0 || reg > SP::I7) return false; // check whether it is a copy. if (OrMI->getOpcode() == SP::ORrr && OrMI->getOperand(1).getReg() != SP::G0 && OrMI->getOperand(2).getReg() != SP::G0) return false; if (OrMI->getOpcode() == SP::ORri && OrMI->getOperand(1).getReg() != SP::G0 && (!OrMI->getOperand(2).isImm() || OrMI->getOperand(2).getImm() != 0)) return false; // Erase RESTORE. RestoreMI->eraseFromParent(); // Change OR to RESTORE. OrMI->setDesc(TII->get((OrMI->getOpcode() == SP::ORrr) ? SP::RESTORErr : SP::RESTOREri)); // Map the destination register. OrMI->getOperand(0).setReg(reg - SP::I0 + SP::O0); return true; }
/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't /// implemented for a target). Upon success, this returns false and returns /// with the following information in various cases: /// /// 1. If this block ends with no branches (it just falls through to its succ) /// just return false, leaving TBB/FBB null. /// 2. If this block ends with only an unconditional branch, it sets TBB to be /// the destination block. /// 3. If this block ends with an conditional branch and it falls through to /// an successor block, it sets TBB to be the branch destination block and a /// list of operands that evaluate the condition. These /// operands can be passed to other TargetInstrInfo methods to create new /// branches. /// 4. If this block ends with an conditional branch and an unconditional /// block, it returns the 'true' destination in TBB, the 'false' destination /// in FBB, and a list of operands that evaluate the condition. These /// operands can be passed to other TargetInstrInfo methods to create new /// branches. /// /// Note that RemoveBranch and InsertBranch must be implemented to support /// cases where this method returns success. /// bool XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (IsBRU(LastInst->getOpcode())) { TBB = LastInst->getOperand(0).getMBB(); return false; } XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode()); if (BranchCode == XCore::COND_INVALID) return true; // Can't handle indirect branch. // Conditional branch // Block ends with fall-through condbranch. TBB = LastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); Cond.push_back(LastInst->getOperand(0)); return false; } // Get the instruction before it if it's a terminator. MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; unsigned SecondLastOpc = SecondLastInst->getOpcode(); XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc); // If the block ends with conditional branch followed by unconditional, // handle it. if (BranchCode != XCore::COND_INVALID && IsBRU(LastInst->getOpcode())) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } // If the block ends with two unconditional branches, handle it. The second // one is not executed, so remove it. if (IsBRU(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Likewise if it ends with a branch table followed by an unconditional branch. if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) { I = LastInst; if (AllowModify) I->eraseFromParent(); return true; } // Otherwise, can't handle this. return true; }
void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo()); const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MipsABIInfo ABI = STI.getABI(); unsigned SP = ABI.GetStackPtr(); unsigned FP = ABI.GetFramePtr(); unsigned ZERO = ABI.GetNullPtr(); unsigned ADDu = ABI.GetPtrAdduOp(); unsigned ADDiu = ABI.GetPtrAddiuOp(); unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND; const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; // First, compute final stack size. uint64_t StackSize = MFI->getStackSize(); // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); MachineLocation DstML, SrcML; // Adjust stack. TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); // emit ".cfi_def_cfa_offset StackSize" unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { // Find the instruction past the last instruction that saves a callee-saved // register to the stack. for (unsigned i = 0; i < CSI.size(); ++i) ++MBBI; // Iterate over list of callee-saved registers and emit .cfi_offset // directives. for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); // If Reg is a double precision register, emit two cfa_offsets, // one for each of the paired single precision registers. if (Mips::AFGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_lo), true); unsigned Reg1 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_hi), true); if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else if (Mips::FGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(Reg, true); unsigned Reg1 = MRI->getDwarfRegNum(Reg, true) + 1; if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else { // Reg is either in GPR32 or FGR32. unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, 1), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } } if (MipsFI->callsEhReturn()) { // Insert instructions that spill eh data registers. for (int I = 0; I < 4; ++I) { if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) MBB.addLiveIn(ABI.GetEhDataReg(I)); TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, MipsFI->getEhDataRegFI(I), RC, &RegInfo); } // Emit .cfi_offset directives for eh data registers. for (int I = 0; I < 4; ++I) { int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I)); unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO) .setMIFlag(MachineInstr::FrameSetup); // emit ".cfi_def_cfa_register $fp" unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FP, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); if (RegInfo.needsStackRealignment(MF)) { // addiu $Reg, $zero, -MaxAlignment // andi $sp, $sp, $Reg unsigned VR = MF.getRegInfo().createVirtualRegister(RC); assert(isInt<16>(MFI->getMaxAlignment()) && "Function's alignment size requirement is not supported."); int MaxAlign = - (signed) MFI->getMaxAlignment(); BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR).addReg(ZERO) .addImm(MaxAlign); BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); if (hasBP(MF)) { // move $s7, $sp unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7; BuildMI(MBB, MBBI, dl, TII.get(ADDu), BP) .addReg(SP) .addReg(ZERO); } } } }
/// converToHardwareLoop - check if the loop is a candidate for /// converting to a hardware loop. If so, then perform the /// transformation. /// /// This function works on innermost loops first. A loop can /// be converted if it is a counting loop; either a register /// value or an immediate. /// /// The code makes several assumptions about the representation /// of the loop in llvm. bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { bool Changed = false; // Process nested loops first. for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { Changed |= convertToHardwareLoop(*I); } // If a nested loop has been converted, then we can't convert this loop. if (Changed) { return Changed; } // Are we able to determine the trip count for the loop? CountValue *TripCount = getTripCount(L); if (TripCount == 0) { return false; } // Does the loop contain any invalid instructions? if (containsInvalidInstruction(L)) { return false; } MachineBasicBlock *Preheader = L->getLoopPreheader(); // No preheader means there's not place for the loop instr. if (Preheader == 0) { return false; } MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); MachineBasicBlock *LastMBB = L->getExitingBlock(); // Don't generate hw loop if the loop has more than one exit. if (LastMBB == 0) { return false; } MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); // Determine the loop start. MachineBasicBlock *LoopStart = L->getTopBlock(); if (L->getLoopLatch() != LastMBB) { // When the exit and latch are not the same, use the latch block as the // start. // The loop start address is used only after the 1st iteration, and the loop // latch may contains instrs. that need to be executed after the 1st iter. LoopStart = L->getLoopLatch(); // Make sure the latch is a successor of the exit, otherwise it won't work. if (!LastMBB->isSuccessor(LoopStart)) { return false; } } // Convert the loop to a hardware loop DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); if (TripCount->isReg()) { // Create a copy of the loop count register. MachineFunction *MF = LastMBB->getParent(); const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(TripCount->getReg()); unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg()); if (TripCount->isNeg()) { unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(Hexagon::NEG), CountReg).addReg(CountReg1); } // Add the Loop instruction to the begining of the loop. BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg); } else { assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); // Add the Loop immediate instruction to the beginning of the loop. int64_t CountImm = TripCount->getImm(); BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm); } // Make sure the loop start always has a reference in the CFG. We need to // create a BlockAddress operand to get this mechanism to work both the // MachineBasicBlock and BasicBlock objects need the flag set. LoopStart->setHasAddressTaken(); // This line is needed to set the hasAddressTaken flag on the BasicBlock // object BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); // Replace the loop branch with an endloop instruction. DebugLoc dl = LastI->getDebugLoc(); BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart); // The loop ends with either: // - a conditional branch followed by an unconditional branch, or // - a conditional branch to the loop start. if (LastI->getOpcode() == Hexagon::JMP_c || LastI->getOpcode() == Hexagon::JMP_cNot) { // delete one and change/add an uncond. branch to out of the loop MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); LastI = LastMBB->erase(LastI); if (!L->contains(BranchTarget)) { if (LastI != LastMBB->end()) { TII->RemoveBranch(*LastMBB); } SmallVector<MachineOperand, 0> Cond; TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl); } } else { // Conditional branch to loop start; just delete it. LastMBB->erase(LastI); } delete TripCount; ++NumHWLoops; return true; }
// Branch analysis. bool LembergInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (LastOpc == Lemberg::JUMP) { TBB = LastInst->getOperand(0).getMBB(); return false; } if (LastOpc == Lemberg::JUMPtrue) { TBB = LastInst->getOperand(1).getMBB(); Cond.push_back(LastInst->getOperand(0)); Cond.push_back(MachineOperand::CreateImm(LembergCC::TRUE)); return false; } if (LastOpc == Lemberg::JUMPfalse) { TBB = LastInst->getOperand(1).getMBB(); Cond.push_back(LastInst->getOperand(0)); Cond.push_back(MachineOperand::CreateImm(LembergCC::FALSE)); return false; } if (LastOpc == Lemberg::JUMPpred) { TBB = LastInst->getOperand(2).getMBB(); Cond.push_back(LastInst->getOperand(1)); Cond.push_back(LastInst->getOperand(0)); return false; } if (LastOpc == Lemberg::JUMPeqz || LastOpc == Lemberg::JUMPnez || LastOpc == Lemberg::JUMPltz || LastOpc == Lemberg::JUMPgez || LastOpc == Lemberg::JUMPgtz || LastOpc == Lemberg::JUMPlez) { TBB = LastInst->getOperand(1).getMBB(); LembergCC::CondCode Code; switch (LastOpc) { case Lemberg::JUMPeqz: Code = LembergCC::EQZ; break; case Lemberg::JUMPnez: Code = LembergCC::NEZ; break; case Lemberg::JUMPltz: Code = LembergCC::LTZ; break; case Lemberg::JUMPgez: Code = LembergCC::GEZ; break; case Lemberg::JUMPgtz: Code = LembergCC::GTZ; break; case Lemberg::JUMPlez: Code = LembergCC::LEZ; break; } Cond.push_back(LastInst->getOperand(0)); Cond.push_back(MachineOperand::CreateImm(Code)); return false; } return true; // Can't handle indirect branch. } // Get the instruction before it if it's a terminator. MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) { return true; } // If the block ends with JUMP, JUMPtrue, JUMPfalse, or JUMPpred, handle it. if (LastInst->getOpcode() == Lemberg::JUMP) { if (SecondLastInst->getOpcode() == Lemberg::JUMPtrue) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(SecondLastInst->getOperand(0)); Cond.push_back(MachineOperand::CreateImm(LembergCC::TRUE)); FBB = LastInst->getOperand(0).getMBB(); return false; } if (SecondLastInst->getOpcode() == Lemberg::JUMPfalse) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(SecondLastInst->getOperand(0)); Cond.push_back(MachineOperand::CreateImm(LembergCC::FALSE)); FBB = LastInst->getOperand(0).getMBB(); return false; } if (SecondLastInst->getOpcode() == Lemberg::JUMPpred) { TBB = SecondLastInst->getOperand(2).getMBB(); Cond.push_back(SecondLastInst->getOperand(1)); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } if (SecondLastInst->getOpcode() == Lemberg::JUMPeqz || SecondLastInst->getOpcode() == Lemberg::JUMPnez || SecondLastInst->getOpcode() == Lemberg::JUMPltz || SecondLastInst->getOpcode() == Lemberg::JUMPgez || SecondLastInst->getOpcode() == Lemberg::JUMPgtz || SecondLastInst->getOpcode() == Lemberg::JUMPlez) { TBB = SecondLastInst->getOperand(1).getMBB(); LembergCC::CondCode Code; switch (SecondLastInst->getOpcode()) { case Lemberg::JUMPeqz: Code = LembergCC::EQZ; break; case Lemberg::JUMPnez: Code = LembergCC::NEZ; break; case Lemberg::JUMPltz: Code = LembergCC::LTZ; break; case Lemberg::JUMPgez: Code = LembergCC::GEZ; break; case Lemberg::JUMPgtz: Code = LembergCC::GTZ; break; case Lemberg::JUMPlez: Code = LembergCC::LEZ; break; } Cond.push_back(SecondLastInst->getOperand(0)); Cond.push_back(MachineOperand::CreateImm(Code)); FBB = LastInst->getOperand(0).getMBB(); return false; } } // If the block ends with two JUMPs, handle it. The second one is // not executed, so remove it. if (SecondLastInst->getOpcode() == Lemberg::JUMP && LastInst->getOpcode() == Lemberg::JUMP) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, const LdStPairFlags &Flags) { MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need // to skip one further. Either way we merge will invalidate the iterator, // and we don't need to scan the new instruction, as it's a pairwise // instruction, which we're not considering for further action anyway. if (NextI == Paired) ++NextI; int SExtIdx = Flags.getSExtIdx(); unsigned Opc = SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); bool IsUnscaled = isUnscaledLdSt(Opc); int OffsetStride = IsUnscaled ? getMemScale(I) : 1; bool MergeForward = Flags.getMergeForward(); unsigned NewOpc = getMatchingPairOpcode(Opc); // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; // Also based on MergeForward is from where we copy the base register operand // so we get the flags compatible with the input code. const MachineOperand &BaseRegOp = MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I); // Which register is Rt and which is Rt2 depends on the offset order. MachineInstr *RtMI, *Rt2MI; if (getLdStOffsetOp(I).getImm() == getLdStOffsetOp(Paired).getImm() + OffsetStride) { RtMI = Paired; Rt2MI = I; // Here we swapped the assumption made for SExtIdx. // I.e., we turn ldp I, Paired into ldp Paired, I. // Update the index accordingly. if (SExtIdx != -1) SExtIdx = (SExtIdx + 1) % 2; } else { RtMI = I; Rt2MI = Paired; } int OffsetImm = getLdStOffsetOp(RtMI).getImm(); if (isSmallTypeLdMerge(Opc)) { // Change the scaled offset from small to large type. if (!IsUnscaled) OffsetImm /= 2; MachineInstr *RtNewDest = MergeForward ? I : Paired; // Construct the new load instruction. // FIXME: currently we support only halfword unsigned load. We need to // handle byte type, signed, and store instructions as well. MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2; NewMemMI = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .addOperand(getLdStRegOp(RtNewDest)) .addOperand(BaseRegOp) .addImm(OffsetImm); // Copy MachineMemOperands from the original loads. concatenateMemOperands(NewMemMI, I, Paired); DEBUG( dbgs() << "Creating the new load and extract. Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(Paired->print(dbgs())); DEBUG(dbgs() << " with instructions:\n "); DEBUG((NewMemMI)->print(dbgs())); MachineInstr *ExtDestMI = MergeForward ? Paired : I; if (ExtDestMI == Rt2MI) { // Create the bitfield extract for high half. BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(AArch64::UBFMWri)) .addOperand(getLdStRegOp(Rt2MI)) .addReg(getLdStRegOp(RtNewDest).getReg()) .addImm(16) .addImm(31); // Create the bitfield extract for low half. BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(AArch64::ANDWri)) .addOperand(getLdStRegOp(RtMI)) .addReg(getLdStRegOp(RtNewDest).getReg()) .addImm(15); } else { // Create the bitfield extract for low half. BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(AArch64::ANDWri)) .addOperand(getLdStRegOp(RtMI)) .addReg(getLdStRegOp(RtNewDest).getReg()) .addImm(15); // Create the bitfield extract for high half. BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(AArch64::UBFMWri)) .addOperand(getLdStRegOp(Rt2MI)) .addReg(getLdStRegOp(RtNewDest).getReg()) .addImm(16) .addImm(31); } DEBUG(dbgs() << " "); DEBUG((BitExtMI1)->print(dbgs())); DEBUG(dbgs() << " "); DEBUG((BitExtMI2)->print(dbgs())); DEBUG(dbgs() << "\n"); // Erase the old instructions. I->eraseFromParent(); Paired->eraseFromParent(); return NextI; } // Handle Unscaled if (IsUnscaled) OffsetImm /= OffsetStride; // Construct the new instruction. MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(NewOpc)) .addOperand(getLdStRegOp(RtMI)) .addOperand(getLdStRegOp(Rt2MI)) .addOperand(BaseRegOp) .addImm(OffsetImm); (void)MIB; // FIXME: Do we need/want to copy the mem operands from the source // instructions? Probably. What uses them after this? DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(Paired->print(dbgs())); DEBUG(dbgs() << " with instruction:\n "); if (SExtIdx != -1) { // Generate the sign extension for the proper result of the ldp. // I.e., with X1, that would be: // %W1<def> = KILL %W1, %X1<imp-def> // %X1<def> = SBFMXri %X1<kill>, 0, 31 MachineOperand &DstMO = MIB->getOperand(SExtIdx); // Right now, DstMO has the extended register, since it comes from an // extended opcode. unsigned DstRegX = DstMO.getReg(); // Get the W variant of that register. unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); // Update the result of LDP to use the W instead of the X variant. DstMO.setReg(DstRegW); DEBUG(((MachineInstr *)MIB)->print(dbgs())); DEBUG(dbgs() << "\n"); // Make the machine verifier happy by providing a definition for // the X register. // Insert this definition right after the generated LDP, i.e., before // InsertionPoint. MachineInstrBuilder MIBKill = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(TargetOpcode::KILL), DstRegW) .addReg(DstRegW) .addReg(DstRegX, RegState::Define); MIBKill->getOperand(2).setImplicit(); // Create the sign extension. MachineInstrBuilder MIBSXTW = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(AArch64::SBFMXri), DstRegX) .addReg(DstRegX) .addImm(0) .addImm(31); (void)MIBSXTW; DEBUG(dbgs() << " Extend operand:\n "); DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); DEBUG(dbgs() << "\n"); } else { DEBUG(((MachineInstr *)MIB)->print(dbgs())); DEBUG(dbgs() << "\n"); } // Erase the old instructions. I->eraseFromParent(); Paired->eraseFromParent(); return NextI; }
/// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to // be close to the source to make it easier to coalesce. if (AvoidsSinking(MI, MRI)) return false; // Check if it's safe to move the instruction. if (!MI->isSafeToMove(TII, AA, SawStore)) return false; // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to // also sink them down before their first use in the block. This xform has to // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. bool BreakPHIEdge = false; MachineBasicBlock *ParentBlock = MI->getParent(); MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge); // If there are no outputs, it must have side-effects. if (!SuccToSinkTo) return false; // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI->getOperand(I); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (SuccToSinkTo->isLiveIn(Reg)) return false; } DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); // If the block has multiple predecessors, this is a critical edge. // Decide if we can sink along it or need to break the edge. if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. bool TryBreak = false; bool store = true; if (!MI->isSafeToMove(TII, AA, store)) { DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); TryBreak = true; } // We don't want to sink across a critical edge if we don't dominate the // successor. We could be introducing calculations to new code paths. if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); TryBreak = true; } // Don't sink instructions into a loop. if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { DEBUG(dbgs() << " *** NOTE: Loop header found\n"); TryBreak = true; } // Otherwise we are OK with sinking along a critical edge. if (!TryBreak) DEBUG(dbgs() << "Sinking along critical edge.\n"); else { // Mark this edge as to be split. // If the edge can actually be split, the next iteration of the main loop // will sink MI in the newly created block. bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!Status) DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); // The instruction will not be sunk this time. return false; } } if (BreakPHIEdge) { // BreakPHIEdge is true if all the uses are in the successor MBB being // sunken into and they are all PHI nodes. In this case, machine-sink must // break the critical edge first. bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!Status) DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); // The instruction will not be sunk this time. return false; } // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; // collect matching debug values. SmallVector<MachineInstr *, 2> DbgValuesToSink; collectDebugValues(MI, DbgValuesToSink); // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); // Move debug values. for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { MachineInstr *DbgMI = *DBI; SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI, ++MachineBasicBlock::iterator(DbgMI)); } // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. MI->clearKillInfo(); return true; }
/// findMatchingInsn - Scan the instructions looking for a load/store that can /// be combined with the current instruction into a load/store pair. MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LdStPairFlags &Flags, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr *FirstMI = I; ++MBBI; unsigned Opc = FirstMI->getOpcode(); bool MayLoad = FirstMI->mayLoad(); bool IsUnscaled = isUnscaledLdSt(FirstMI); unsigned Reg = getLdStRegOp(FirstMI).getReg(); unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); int Offset = getLdStOffsetOp(FirstMI).getImm(); // Early exit if the first instruction modifies the base register. // e.g., ldr x0, [x0] if (FirstMI->modifiesRegister(BaseReg, TRI)) return E; // Early exit if the offset if not possible to match. (6 bits of positive // range, plus allow an extra one in case we find a later insn that matches // with Offset-1) int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; if (!isSmallTypeLdMerge(Opc) && !inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return E; // Track which registers have been modified and used between the first insn // (inclusive) and the second insn. BitVector ModifiedRegs, UsedRegs; ModifiedRegs.resize(TRI->getNumRegs()); UsedRegs.resize(TRI->getNumRegs()); // Remember any instructions that read/write memory between FirstMI and MI. SmallVector<MachineInstr *, 4> MemInsns; for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr *MI = MBBI; // Skip DBG_VALUE instructions. Otherwise debug info can affect the // optimization by changing how far we scan. if (MI->isDebugValue()) continue; // Now that we know this is a real instruction, count it. ++Count; bool CanMergeOpc = Opc == MI->getOpcode(); Flags.setSExtIdx(-1); if (!CanMergeOpc) { bool IsValidLdStrOpc; unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); assert(IsValidLdStrOpc && "Given Opc should be a Load or Store with an immediate"); // Opc will be the first instruction in the pair. Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0); CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); } if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) { assert(MI->mayLoadOrStore() && "Expected memory operation."); // If we've found another instruction with the same opcode, check to see // if the base and offset are compatible with our starting instruction. // These instructions all have scaled immediate operands, so we just // check for +1/-1. Make sure to check the new instruction offset is // actually an immediate and not a symbolic reference destined for // a relocation. // // Pairwise instructions have a 7-bit signed offset field. Single insns // have a 12-bit unsigned offset field. To be a valid combine, the // final offset must be in range. unsigned MIBaseReg = getLdStBaseOp(MI).getReg(); int MIOffset = getLdStOffsetOp(MI).getImm(); if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || (Offset + OffsetStride == MIOffset))) { int MinOffset = Offset < MIOffset ? Offset : MIOffset; // If this is a volatile load/store that otherwise matched, stop looking // as something is going on that we don't have enough information to // safely transform. Similarly, stop if we see a hint to avoid pairs. if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) return E; // If the resultant immediate offset of merging these instructions // is out of range for a pairwise instruction, bail and keep looking. bool MIIsUnscaled = isUnscaledLdSt(MI); bool IsSmallTypeLd = isSmallTypeLdMerge(MI->getOpcode()); if (!IsSmallTypeLd && !inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(MI); continue; } if (IsSmallTypeLd) { // If the alignment requirements of the larger type scaled load // instruction can't express the scaled offset of the smaller type // input, bail and keep looking. if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(MI); continue; } } else { // If the alignment requirements of the paired (scaled) instruction // can't express the offset of the unscaled input, bail and keep // looking. if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(MI); continue; } } // If the destination register of the loads is the same register, bail // and keep looking. A load-pair instruction with both destination // registers the same is UNPREDICTABLE and will result in an exception. if (MayLoad && Reg == getLdStRegOp(MI).getReg()) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(MI); continue; } // If the Rt of the second instruction was not modified or used between // the two instructions and none of the instructions between the second // and first alias with the second, we can combine the second into the // first. if (!ModifiedRegs[getLdStRegOp(MI).getReg()] && !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) && !mayAlias(MI, MemInsns, TII)) { Flags.setMergeForward(false); return MBBI; } // Likewise, if the Rt of the first instruction is not modified or used // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] && !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) && !mayAlias(FirstMI, MemInsns, TII)) { Flags.setMergeForward(true); return MBBI; } // Unable to combine these instructions due to interference in between. // Keep looking. } } // If the instruction wasn't a matching load or store. Stop searching if we // encounter a call instruction that might modify memory. if (MI->isCall()) return E; // Update modified / uses register lists. trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. if (ModifiedRegs[BaseReg]) return E; // Update list of instructions that read/write memory. if (MI->mayLoadOrStore()) MemInsns.push_back(MI); } return E; }
void MipsSEInstrInfo::expandPseudoMFHiLo(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned NewOpc) const { BuildMI(MBB, I, I->getDebugLoc(), get(NewOpc), I->getOperand(0).getReg()); }
MachineBasicBlock::iterator AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update, bool IsPreIdx) { assert((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && "Unexpected base register update instruction to merge!"); MachineBasicBlock::iterator NextI = I; // Return the instruction following the merged instruction, which is // the instruction following our unmerged load. Unless that's the add/sub // instruction we're merging, in which case it's the one after that. if (++NextI == Update) ++NextI; int Value = Update->getOperand(2).getImm(); assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && "Can't merge 1 << 12 offset into pre-/post-indexed load / store"); if (Update->getOpcode() == AArch64::SUBXri) Value = -Value; unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) : getPostIndexedOpcode(I->getOpcode()); MachineInstrBuilder MIB; if (!isPairedLdSt(I)) { // Non-paired instruction. MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .addOperand(getLdStRegOp(Update)) .addOperand(getLdStRegOp(I)) .addOperand(getLdStBaseOp(I)) .addImm(Value); } else { // Paired instruction. int Scale = getMemScale(I); MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .addOperand(getLdStRegOp(Update)) .addOperand(getLdStRegOp(I, 0)) .addOperand(getLdStRegOp(I, 1)) .addOperand(getLdStBaseOp(I)) .addImm(Value / Scale); } (void)MIB; if (IsPreIdx) DEBUG(dbgs() << "Creating pre-indexed load/store."); else DEBUG(dbgs() << "Creating post-indexed load/store."); DEBUG(dbgs() << " Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(Update->print(dbgs())); DEBUG(dbgs() << " with instruction:\n "); DEBUG(((MachineInstr *)MIB)->print(dbgs())); DEBUG(dbgs() << "\n"); // Erase the old instructions for the block. I->eraseFromParent(); Update->eraseFromParent(); return NextI; }
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Modified = false; SmallSet<unsigned, 4> Defs; SmallSet<unsigned, 4> Uses; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; } Defs.clear(); Uses.clear(); TrackDefUses(MI, Defs, Uses, TRI); // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); // Add implicit use of ITSTATE to IT block instructions. MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); MachineInstr *LastITMI = MI; MachineBasicBlock::iterator InsertPos = MIB.getInstr(); ++MBBI; // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it // is set: skip the loop if (!restrictIT) { // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; MachineInstr *NMI = &*MBBI; MI = NMI; unsigned NPredReg = 0; ARMCC::CondCodes NCC = getITInstrPredicate(*NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); LastITMI = NMI; } else { if (NCC == ARMCC::AL && MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { --MBBI; MBB.remove(NMI); MBB.insert(InsertPos, NMI); ClearKillFlags(MI, Uses); ++NumMovedInsts; continue; } break; } TrackDefUses(NMI, Defs, Uses, TRI); --Pos; } } // Finalize IT mask. Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); // Last instruction in IT block kills ITSTATE. LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); // Finalize the bundle. finalizeBundle(MBB, InsertPos.getInstrIterator(), ++LastITMI->getIterator()); Modified = true; ++NumITs; } return Modified; }
bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { MachineBasicBlock *MBB = CI.I->getParent(); MachineBasicBlock::iterator E = MBB->end(); MachineBasicBlock::iterator MBBI = CI.I; const unsigned Opc = CI.I->getOpcode(); const InstClassEnum InstClass = getInstClass(Opc); if (InstClass == UNKNOWN) { return false; } const unsigned Regs = getRegs(Opc); unsigned AddrOpName[5] = {0}; int AddrIdx[5]; const MachineOperand *AddrReg[5]; unsigned NumAddresses = 0; if (Regs & ADDR) { AddrOpName[NumAddresses++] = AMDGPU::OpName::addr; } if (Regs & SBASE) { AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase; } if (Regs & SRSRC) { AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc; } if (Regs & SOFFSET) { AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset; } if (Regs & VADDR) { AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr; } for (unsigned i = 0; i < NumAddresses; i++) { AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]); AddrReg[i] = &CI.I->getOperand(AddrIdx[i]); // We only ever merge operations with the same base address register, so // don't bother scanning forward if there are no other uses. if (AddrReg[i]->isReg() && (TargetRegisterInfo::isPhysicalRegister(AddrReg[i]->getReg()) || MRI->hasOneNonDBGUse(AddrReg[i]->getReg()))) return false; } ++MBBI; DenseSet<unsigned> RegDefsToMove; DenseSet<unsigned> PhysRegUsesToMove; addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove); for (; MBBI != E; ++MBBI) { const bool IsDS = (InstClass == DS_READ) || (InstClass == DS_WRITE); if ((getInstClass(MBBI->getOpcode()) != InstClass) || (IsDS && (MBBI->getOpcode() != Opc))) { // This is not a matching DS instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. // 2. It is safe to move MBBI down past the instruction that I will // be merged into. if (MBBI->hasUnmodeledSideEffects()) { // We can't re-order this instruction with respect to other memory // operations, so we fail both conditions mentioned above. return false; } if (MBBI->mayLoadOrStore() && (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) || !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))) { // We fail condition #1, but we may still be able to satisfy condition // #2. Add this instruction to the move list and then we will check // if condition #2 holds once we have selected the matching instruction. CI.InstsToMove.push_back(&*MBBI); addDefsUsesToList(*MBBI, RegDefsToMove, PhysRegUsesToMove); continue; } // When we match I with another DS instruction we will be moving I down // to the location of the matched instruction any uses of I will need to // be moved down as well. addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove, CI.InstsToMove); continue; } // Don't merge volatiles. if (MBBI->hasOrderedMemoryRef()) return false; // Handle a case like // DS_WRITE_B32 addr, v, idx0 // w = DS_READ_B32 addr, idx0 // DS_WRITE_B32 addr, f(w), idx1 // where the DS_READ_B32 ends up in InstsToMove and therefore prevents // merging of the two writes. if (addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove, CI.InstsToMove)) continue; bool Match = true; for (unsigned i = 0; i < NumAddresses; i++) { const MachineOperand &AddrRegNext = MBBI->getOperand(AddrIdx[i]); if (AddrReg[i]->isImm() || AddrRegNext.isImm()) { if (AddrReg[i]->isImm() != AddrRegNext.isImm() || AddrReg[i]->getImm() != AddrRegNext.getImm()) { Match = false; break; } continue; } // Check same base pointer. Be careful of subregisters, which can occur // with vectors of pointers. if (AddrReg[i]->getReg() != AddrRegNext.getReg() || AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) { Match = false; break; } } if (Match) { int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset); CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm(); CI.Width0 = getOpcodeWidth(*CI.I); CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm(); CI.Width1 = getOpcodeWidth(*MBBI); CI.Paired = MBBI; if ((CI.InstClass == DS_READ) || (CI.InstClass == DS_WRITE)) { CI.Offset0 &= 0xffff; CI.Offset1 &= 0xffff; } else { CI.GLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::glc)->getImm(); CI.GLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::glc)->getImm(); if (CI.InstClass != S_BUFFER_LOAD_IMM) { CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm(); CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm(); } } // Check both offsets fit in the reduced range. // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI)) if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) return true; } // We've found a load/store that we couldn't merge for some reason. // We could potentially keep looking, but we'd need to make sure that // it was safe to move I and also all the instruction in InstsToMove // down past this instruction. // check if we can move I across MBBI and if we can move all I's users if (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) || !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) break; } return false; }