DenseMap<SectionBase *, int> elf::buildSectionOrder() { DenseMap<SectionBase *, int> SectionOrder; if (Config->SymbolOrderingFile.empty()) return SectionOrder; // Build a map from symbols to their priorities. Symbols that didn't // appear in the symbol ordering file have the lowest priority 0. // All explicitly mentioned symbols have negative (higher) priorities. DenseMap<StringRef, int> SymbolOrder; int Priority = -Config->SymbolOrderingFile.size(); for (StringRef S : Config->SymbolOrderingFile) SymbolOrder.insert({S, Priority++}); // Build a map from sections to their priorities. for (InputFile *File : ObjectFiles) { for (Symbol *Sym : File->getSymbols()) { auto *D = dyn_cast<Defined>(Sym); if (!D || !D->Section) continue; int &Priority = SectionOrder[D->Section]; Priority = std::min(Priority, SymbolOrder.lookup(D->getName())); } } return SectionOrder; }
void MCMachOStreamer::FinishImpl() { EmitFrames(&getAssembler().getBackend(), true); // We have to set the fragment atom associations so we can relax properly for // Mach-O. // First, scan the symbol table to build a lookup table from fragments to // defining symbols. DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap; for (MCSymbolData &SD : getAssembler().symbols()) { if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()) && SD.getFragment()) { // An atom defining symbol should never be internal to a fragment. assert(SD.getOffset() == 0 && "Invalid offset in atom defining symbol!"); DefiningSymbolMap[SD.getFragment()] = &SD; } } // Set the fragment atom associations by tracking the last seen atom defining // symbol. for (MCAssembler::iterator it = getAssembler().begin(), ie = getAssembler().end(); it != ie; ++it) { MCSymbolData *CurrentAtom = nullptr; for (MCSectionData::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2; ++it2) { if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2)) CurrentAtom = SD; it2->setAtom(CurrentAtom); } } this->MCObjectStreamer::FinishImpl(); }
bool llvm::stripDebugInfo(Function &F) { bool Changed = false; if (F.getSubprogram()) { Changed = true; F.setSubprogram(nullptr); } DenseMap<MDNode*, MDNode*> LoopIDsMap; for (BasicBlock &BB : F) { for (auto II = BB.begin(), End = BB.end(); II != End;) { Instruction &I = *II++; // We may delete the instruction, increment now. if (isa<DbgInfoIntrinsic>(&I)) { I.eraseFromParent(); Changed = true; continue; } if (I.getDebugLoc()) { Changed = true; I.setDebugLoc(DebugLoc()); } } auto *TermInst = BB.getTerminator(); if (auto *LoopID = TermInst->getMetadata(LLVMContext::MD_loop)) { auto *NewLoopID = LoopIDsMap.lookup(LoopID); if (!NewLoopID) NewLoopID = LoopIDsMap[LoopID] = stripDebugLocFromLoopID(LoopID); if (NewLoopID != LoopID) TermInst->setMetadata(LLVMContext::MD_loop, NewLoopID); } } return Changed; }
void MCMachOStreamer::FinishImpl() { EmitFrames(&getAssembler().getBackend()); // We have to set the fragment atom associations so we can relax properly for // Mach-O. // First, scan the symbol table to build a lookup table from fragments to // defining symbols. DenseMap<const MCFragment *, const MCSymbol *> DefiningSymbolMap; for (const MCSymbol &Symbol : getAssembler().symbols()) { if (getAssembler().isSymbolLinkerVisible(Symbol) && Symbol.isInSection() && !Symbol.isVariable()) { // An atom defining symbol should never be internal to a fragment. assert(Symbol.getOffset() == 0 && "Invalid offset in atom defining symbol!"); DefiningSymbolMap[Symbol.getFragment()] = &Symbol; } } // Set the fragment atom associations by tracking the last seen atom defining // symbol. for (MCSection &Sec : getAssembler()) { const MCSymbol *CurrentAtom = nullptr; for (MCFragment &Frag : Sec) { if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(&Frag)) CurrentAtom = Symbol; Frag.setAtom(CurrentAtom); } } this->MCObjectStreamer::FinishImpl(); }
// TODO: We should also visit ICmp, FCmp, GetElementPtr, Trunc, ZExt, SExt, // FPTrunc, FPExt, FPToUI, FPToSI, UIToFP, SIToFP, BitCast, Select, // ExtractElement, InsertElement, ShuffleVector, ExtractValue, InsertValue. // // Probaly it's worth to hoist the code for estimating the simplifications // effects to a separate class, since we have a very similar code in // InlineCost already. bool visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (!isa<Constant>(LHS)) if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) LHS = SimpleLHS; if (!isa<Constant>(RHS)) if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; Value *SimpleV = nullptr; if (auto FI = dyn_cast<FPMathOperator>(&I)) SimpleV = SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags()); else SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS); if (SimpleV && CountedInsns.insert(&I).second) NumberOfOptimizedInstructions += TTI.getUserCost(&I); if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) { SimplifiedValues[&I] = C; return true; } return false; }
/// assignInstructionRanges - Find ranges of instructions covered by each /// lexical scope. void LexicalScopes::assignInstructionRanges( SmallVectorImpl<InsnRange> &MIRanges, DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { LexicalScope *PrevLexicalScope = nullptr; for (const auto &R : MIRanges) { LexicalScope *S = MI2ScopeMap.lookup(R.first); assert(S && "Lost LexicalScope for a machine instruction!"); if (PrevLexicalScope && !PrevLexicalScope->dominates(S)) PrevLexicalScope->closeInsnRange(S); S->openInsnRange(R.first); S->extendInsnRange(R.second); PrevLexicalScope = S; } if (PrevLexicalScope) PrevLexicalScope->closeInsnRange(); }
/// assignInstructionRanges - Find ranges of instructions covered by each /// lexical scope. void LexicalScopes:: assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges, DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { LexicalScope *PrevLexicalScope = NULL; for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(), RE = MIRanges.end(); RI != RE; ++RI) { const InsnRange &R = *RI; LexicalScope *S = MI2ScopeMap.lookup(R.first); assert (S && "Lost LexicalScope for a machine instruction!"); if (PrevLexicalScope && !PrevLexicalScope->dominates(S)) PrevLexicalScope->closeInsnRange(S); S->openInsnRange(R.first); S->extendInsnRange(R.second); PrevLexicalScope = S; } if (PrevLexicalScope) PrevLexicalScope->closeInsnRange(); }
/// computeSymbolTable - Compute the symbol table data void MachObjectWriter::computeSymbolTable( MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData, std::vector<MachSymbolData> &ExternalSymbolData, std::vector<MachSymbolData> &UndefinedSymbolData) { // Build section lookup table. DenseMap<const MCSection*, uint8_t> SectionIndexMap; unsigned Index = 1; for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it, ++Index) SectionIndexMap[&*it] = Index; assert(Index <= 256 && "Too many sections!"); // Build the string table. for (const MCSymbol &Symbol : Asm.symbols()) { if (!Asm.isSymbolLinkerVisible(Symbol)) continue; StringTable.add(Symbol.getName()); } StringTable.finalize(); // Build the symbol arrays but only for non-local symbols. // // The particular order that we collect and then sort the symbols is chosen to // match 'as'. Even though it doesn't matter for correctness, this is // important for letting us diff .o files. for (const MCSymbol &Symbol : Asm.symbols()) { // Ignore non-linker visible symbols. if (!Asm.isSymbolLinkerVisible(Symbol)) continue; if (!Symbol.isExternal() && !Symbol.isUndefined()) continue; MachSymbolData MSD; MSD.Symbol = &Symbol; MSD.StringIndex = StringTable.getOffset(Symbol.getName()); if (Symbol.isUndefined()) { MSD.SectionIndex = 0; UndefinedSymbolData.push_back(MSD); } else if (Symbol.isAbsolute()) { MSD.SectionIndex = 0; ExternalSymbolData.push_back(MSD); } else { MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); assert(MSD.SectionIndex && "Invalid section index!"); ExternalSymbolData.push_back(MSD); } } // Now add the data for local symbols. for (const MCSymbol &Symbol : Asm.symbols()) { // Ignore non-linker visible symbols. if (!Asm.isSymbolLinkerVisible(Symbol)) continue; if (Symbol.isExternal() || Symbol.isUndefined()) continue; MachSymbolData MSD; MSD.Symbol = &Symbol; MSD.StringIndex = StringTable.getOffset(Symbol.getName()); if (Symbol.isAbsolute()) { MSD.SectionIndex = 0; LocalSymbolData.push_back(MSD); } else { MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); assert(MSD.SectionIndex && "Invalid section index!"); LocalSymbolData.push_back(MSD); } } // External and undefined symbols are required to be in lexicographic order. std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); // Set the symbol indices. Index = 0; for (auto *SymbolData : {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) for (MachSymbolData &Entry : *SymbolData) Entry.Symbol->setIndex(Index++); for (const MCSection &Section : Asm) { for (RelAndSymbol &Rel : Relocations[&Section]) { if (!Rel.Sym) continue; // Set the Index and the IsExtern bit. unsigned Index = Rel.Sym->getIndex(); assert(isInt<24>(Index)); if (IsLittleEndian) Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27); else Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4); } } }
/// ComputeSymbolTable - Compute the symbol table data /// /// \param StringTable [out] - The string table data. /// \param StringIndexMap [out] - Map from symbol names to offsets in the /// string table. void MachObjectWriter:: ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, std::vector<MachSymbolData> &LocalSymbolData, std::vector<MachSymbolData> &ExternalSymbolData, std::vector<MachSymbolData> &UndefinedSymbolData) { // Build section lookup table. DenseMap<const MCSection*, uint8_t> SectionIndexMap; unsigned Index = 1; for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it, ++Index) SectionIndexMap[&it->getSection()] = Index; assert(Index <= 256 && "Too many sections!"); // Index 0 is always the empty string. StringMap<uint64_t> StringIndexMap; StringTable += '\x00'; // Build the symbol arrays and the string table, but only for non-local // symbols. // // The particular order that we collect the symbols and create the string // table, then sort the symbols is chosen to match 'as'. Even though it // doesn't matter for correctness, this is important for letting us diff .o // files. for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), ie = Asm.symbol_end(); it != ie; ++it) { const MCSymbol &Symbol = it->getSymbol(); // Ignore non-linker visible symbols. if (!Asm.isSymbolLinkerVisible(it->getSymbol())) continue; if (!it->isExternal() && !Symbol.isUndefined()) continue; uint64_t &Entry = StringIndexMap[Symbol.getName()]; if (!Entry) { Entry = StringTable.size(); StringTable += Symbol.getName(); StringTable += '\x00'; } MachSymbolData MSD; MSD.SymbolData = it; MSD.StringIndex = Entry; if (Symbol.isUndefined()) { MSD.SectionIndex = 0; UndefinedSymbolData.push_back(MSD); } else if (Symbol.isAbsolute()) { MSD.SectionIndex = 0; ExternalSymbolData.push_back(MSD); } else { MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); assert(MSD.SectionIndex && "Invalid section index!"); ExternalSymbolData.push_back(MSD); } } // Now add the data for local symbols. for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), ie = Asm.symbol_end(); it != ie; ++it) { const MCSymbol &Symbol = it->getSymbol(); // Ignore non-linker visible symbols. if (!Asm.isSymbolLinkerVisible(it->getSymbol())) continue; if (it->isExternal() || Symbol.isUndefined()) continue; uint64_t &Entry = StringIndexMap[Symbol.getName()]; if (!Entry) { Entry = StringTable.size(); StringTable += Symbol.getName(); StringTable += '\x00'; } MachSymbolData MSD; MSD.SymbolData = it; MSD.StringIndex = Entry; if (Symbol.isAbsolute()) { MSD.SectionIndex = 0; LocalSymbolData.push_back(MSD); } else { MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); assert(MSD.SectionIndex && "Invalid section index!"); LocalSymbolData.push_back(MSD); } } // External and undefined symbols are required to be in lexicographic order. std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); // Set the symbol indices. Index = 0; for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) LocalSymbolData[i].SymbolData->setIndex(Index++); for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) ExternalSymbolData[i].SymbolData->setIndex(Index++); for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) UndefinedSymbolData[i].SymbolData->setIndex(Index++); // The string table is padded to a multiple of 4. while (StringTable.size() % 4) StringTable += '\x00'; }
bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); QRI = MF.getTarget().getSubtarget<HexagonSubtarget>().getRegisterInfo(); MRI = &MF.getRegInfo(); DenseMap<unsigned, unsigned> PeepholeMap; DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap; if (DisableHexagonPeephole) return false; // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { MachineBasicBlock* MBB = MBBb; PeepholeMap.clear(); PeepholeDoubleRegsMap.clear(); // Traverse the basic block. for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); ++MII) { MachineInstr *MI = MII; // Look for sign extends: // %vreg170<def> = SXTW %vreg166 if (!DisableOptSZExt && MI->getOpcode() == Hexagon::A2_sxtw) { assert (MI->getNumOperands() == 2); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src = MI->getOperand(1); unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src.getReg(); // Just handle virtual registers. if (TargetRegisterInfo::isVirtualRegister(DstReg) && TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Map the following: // %vreg170<def> = SXTW %vreg166 // PeepholeMap[170] = vreg166 PeepholeMap[DstReg] = SrcReg; } } // Look for %vreg170<def> = COMBINE_ir_V4 (0, %vreg169) // %vreg170:DoublRegs, %vreg169:IntRegs if (!DisableOptExtTo64 && MI->getOpcode () == Hexagon::COMBINE_Ir_V4) { assert (MI->getNumOperands() == 3); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src1 = MI->getOperand(1); MachineOperand &Src2 = MI->getOperand(2); if (Src1.getImm() != 0) continue; unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src2.getReg(); PeepholeMap[DstReg] = SrcReg; } // Look for this sequence below // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32 // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg. // and convert into // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg. if (MI->getOpcode() == Hexagon::LSRd_ri) { assert(MI->getNumOperands() == 3); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src1 = MI->getOperand(1); MachineOperand &Src2 = MI->getOperand(2); if (Src2.getImm() != 32) continue; unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src1.getReg(); PeepholeDoubleRegsMap[DstReg] = std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/); } // Look for P=NOT(P). if (!DisablePNotP && (MI->getOpcode() == Hexagon::C2_not)) { assert (MI->getNumOperands() == 2); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src = MI->getOperand(1); unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src.getReg(); // Just handle virtual registers. if (TargetRegisterInfo::isVirtualRegister(DstReg) && TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Map the following: // %vreg170<def> = NOT_xx %vreg166 // PeepholeMap[170] = vreg166 PeepholeMap[DstReg] = SrcReg; } } // Look for copy: // %vreg176<def> = COPY %vreg170:subreg_loreg if (!DisableOptSZExt && MI->isCopy()) { assert (MI->getNumOperands() == 2); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src = MI->getOperand(1); // Make sure we are copying the lower 32 bits. if (Src.getSubReg() != Hexagon::subreg_loreg) continue; unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src.getReg(); if (TargetRegisterInfo::isVirtualRegister(DstReg) && TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Try to find in the map. if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) { // Change the 1st operand. MI->RemoveOperand(1); MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false)); } else { DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI = PeepholeDoubleRegsMap.find(SrcReg); if (DI != PeepholeDoubleRegsMap.end()) { std::pair<unsigned,unsigned> PeepholeSrc = DI->second; MI->RemoveOperand(1); MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first, false /*isDef*/, false /*isImp*/, false /*isKill*/, false /*isDead*/, false /*isUndef*/, false /*isEarlyClobber*/, PeepholeSrc.second)); } } } } // Look for Predicated instructions. if (!DisablePNotP) { bool Done = false; if (QII->isPredicated(MI)) { MachineOperand &Op0 = MI->getOperand(0); unsigned Reg0 = Op0.getReg(); const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0); if (RC0->getID() == Hexagon::PredRegsRegClassID) { // Handle instructions that have a prediate register in op0 // (most cases of predicable instructions). if (TargetRegisterInfo::isVirtualRegister(Reg0)) { // Try to find in the map. if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) { // Change the 1st operand and, flip the opcode. MI->getOperand(0).setReg(PeepholeSrc); int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode()); MI->setDesc(QII->get(NewOp)); Done = true; } } } } if (!Done) { // Handle special instructions. unsigned Op = MI->getOpcode(); unsigned NewOp = 0; unsigned PR = 1, S1 = 2, S2 = 3; // Operand indices. switch (Op) { case Hexagon::C2_mux: case Hexagon::C2_muxii: case Hexagon::TFR_condset_ii: NewOp = Op; break; case Hexagon::TFR_condset_ri: NewOp = Hexagon::TFR_condset_ir; break; case Hexagon::TFR_condset_ir: NewOp = Hexagon::TFR_condset_ri; break; case Hexagon::C2_muxri: NewOp = Hexagon::C2_muxir; break; case Hexagon::C2_muxir: NewOp = Hexagon::C2_muxri; break; } if (NewOp) { unsigned PSrc = MI->getOperand(PR).getReg(); if (unsigned POrig = PeepholeMap.lookup(PSrc)) { MI->getOperand(PR).setReg(POrig); MI->setDesc(QII->get(NewOp)); // Swap operands S1 and S2. MachineOperand Op1 = MI->getOperand(S1); MachineOperand Op2 = MI->getOperand(S2); ChangeOpInto(MI->getOperand(S1), Op2); ChangeOpInto(MI->getOperand(S2), Op1); } } // if (NewOp) } // if (!Done) } // if (!DisablePNotP) } // Instruction } // Basic Block return true; }
/// ComputeSymbolTable - Compute the symbol table data void MachObjectWriter::ComputeSymbolTable( MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData, std::vector<MachSymbolData> &ExternalSymbolData, std::vector<MachSymbolData> &UndefinedSymbolData) { // Build section lookup table. DenseMap<const MCSection*, uint8_t> SectionIndexMap; unsigned Index = 1; for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it, ++Index) SectionIndexMap[&it->getSection()] = Index; assert(Index <= 256 && "Too many sections!"); // Build the string table. for (MCSymbolData &SD : Asm.symbols()) { const MCSymbol &Symbol = SD.getSymbol(); if (!Asm.isSymbolLinkerVisible(Symbol)) continue; StringTable.add(Symbol.getName()); } StringTable.finalize(StringTableBuilder::MachO); // Build the symbol arrays but only for non-local symbols. // // The particular order that we collect and then sort the symbols is chosen to // match 'as'. Even though it doesn't matter for correctness, this is // important for letting us diff .o files. for (MCSymbolData &SD : Asm.symbols()) { const MCSymbol &Symbol = SD.getSymbol(); // Ignore non-linker visible symbols. if (!Asm.isSymbolLinkerVisible(Symbol)) continue; if (!SD.isExternal() && !Symbol.isUndefined()) continue; MachSymbolData MSD; MSD.SymbolData = &SD; MSD.StringIndex = StringTable.getOffset(Symbol.getName()); if (Symbol.isUndefined()) { MSD.SectionIndex = 0; UndefinedSymbolData.push_back(MSD); } else if (Symbol.isAbsolute()) { MSD.SectionIndex = 0; ExternalSymbolData.push_back(MSD); } else { MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); assert(MSD.SectionIndex && "Invalid section index!"); ExternalSymbolData.push_back(MSD); } } // Now add the data for local symbols. for (MCSymbolData &SD : Asm.symbols()) { const MCSymbol &Symbol = SD.getSymbol(); // Ignore non-linker visible symbols. if (!Asm.isSymbolLinkerVisible(Symbol)) continue; if (SD.isExternal() || Symbol.isUndefined()) continue; MachSymbolData MSD; MSD.SymbolData = &SD; MSD.StringIndex = StringTable.getOffset(Symbol.getName()); if (Symbol.isAbsolute()) { MSD.SectionIndex = 0; LocalSymbolData.push_back(MSD); } else { MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); assert(MSD.SectionIndex && "Invalid section index!"); LocalSymbolData.push_back(MSD); } } // External and undefined symbols are required to be in lexicographic order. std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); // Set the symbol indices. Index = 0; for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) LocalSymbolData[i].SymbolData->setIndex(Index++); for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) ExternalSymbolData[i].SymbolData->setIndex(Index++); for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) UndefinedSymbolData[i].SymbolData->setIndex(Index++); }
/// \brief Figure out if the loop is worth full unrolling. /// /// Complete loop unrolling can make some loads constant, and we need to know /// if that would expose any further optimization opportunities. This routine /// estimates this optimization. It computes cost of unrolled loop /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By /// dynamic cost we mean that we won't count costs of blocks that are known not /// to be executed (i.e. if we have a branch in the loop and we know that at the /// given iteration its condition would be resolved to true, we won't add up the /// cost of the 'false'-block). /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If /// the analysis failed (no benefits expected from the unrolling, or the loop is /// too big to analyze), the returned value is None. static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, int MaxUnrolledLoopSize) { // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to // analyze *massive* trip counts, so we force the max to be reasonably small. assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) && "The unroll iterations max is too large!"); // Only analyze inner loops. We can't properly estimate cost of nested loops // and we won't visit inner loops again anyway. if (!L->empty()) return None; // Don't simulate loops with a big or unknown tripcount if (!UnrollMaxIterationsCountToAnalyze || !TripCount || TripCount > UnrollMaxIterationsCountToAnalyze) return None; SmallSetVector<BasicBlock *, 16> BBWorklist; SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist; DenseMap<Value *, Constant *> SimplifiedValues; SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues; // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. int UnrolledCost = 0; // We also track the estimated dynamic (that is, actually executed) cost in // the rolled form. This helps identify cases when the savings from unrolling // aren't just exposing dead control flows, but actual reduced dynamic // instructions due to the simplifications which we expect to occur after // unrolling. int RolledDynamicCost = 0; // We track the simplification of each instruction in each iteration. We use // this to recursively merge costs into the unrolled cost on-demand so that // we don't count the cost of any dead code. This is essentially a map from // <instruction, int> to <bool, bool>, but stored as a densely packed struct. DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap; // A small worklist used to accumulate cost of instructions from each // observable and reached root in the loop. SmallVector<Instruction *, 16> CostWorklist; // PHI-used worklist used between iterations while accumulating cost. SmallVector<Instruction *, 4> PHIUsedList; // Helper function to accumulate cost for instructions in the loop. auto AddCostRecursively = [&](Instruction &RootI, int Iteration) { assert(Iteration >= 0 && "Cannot have a negative iteration!"); assert(CostWorklist.empty() && "Must start with an empty cost list"); assert(PHIUsedList.empty() && "Must start with an empty phi used list"); CostWorklist.push_back(&RootI); for (;; --Iteration) { do { Instruction *I = CostWorklist.pop_back_val(); // InstCostMap only uses I and Iteration as a key, the other two values // don't matter here. auto CostIter = InstCostMap.find({I, Iteration, 0, 0}); if (CostIter == InstCostMap.end()) // If an input to a PHI node comes from a dead path through the loop // we may have no cost data for it here. What that actually means is // that it is free. continue; auto &Cost = *CostIter; if (Cost.IsCounted) // Already counted this instruction. continue; // Mark that we are counting the cost of this instruction now. Cost.IsCounted = true; // If this is a PHI node in the loop header, just add it to the PHI set. if (auto *PhiI = dyn_cast<PHINode>(I)) if (PhiI->getParent() == L->getHeader()) { assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they " "inherently simplify during unrolling."); if (Iteration == 0) continue; // Push the incoming value from the backedge into the PHI used list // if it is an in-loop instruction. We'll use this to populate the // cost worklist for the next iteration (as we count backwards). if (auto *OpI = dyn_cast<Instruction>( PhiI->getIncomingValueForBlock(L->getLoopLatch()))) if (L->contains(OpI)) PHIUsedList.push_back(OpI); continue; } // First accumulate the cost of this instruction. if (!Cost.IsFree) { UnrolledCost += TTI.getUserCost(I); DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "); DEBUG(I->dump()); } // We must count the cost of every operand which is not free, // recursively. If we reach a loop PHI node, simply add it to the set // to be considered on the next iteration (backwards!). for (Value *Op : I->operands()) { // Check whether this operand is free due to being a constant or // outside the loop. auto *OpI = dyn_cast<Instruction>(Op); if (!OpI || !L->contains(OpI)) continue; // Otherwise accumulate its cost. CostWorklist.push_back(OpI); } } while (!CostWorklist.empty()); if (PHIUsedList.empty()) // We've exhausted the search. break; assert(Iteration > 0 && "Cannot track PHI-used values past the first iteration!"); CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end()); PHIUsedList.clear(); } }; // Ensure that we don't violate the loop structure invariants relied on by // this analysis. assert(L->isLoopSimplifyForm() && "Must put loop into normal form first."); assert(L->isLCSSAForm(DT) && "Must have loops in LCSSA form to track live-out values."); DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); // Simulate execution of each iteration of the loop counting instructions, // which would be simplified. // Since the same load will take different values on different iterations, // we literally have to go through all loop's iterations. for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); // Prepare for the iteration by collecting any simplified entry or backedge // inputs. for (Instruction &I : *L->getHeader()) { auto *PHI = dyn_cast<PHINode>(&I); if (!PHI) break; // The loop header PHI nodes must have exactly two input: one from the // loop preheader and one from the loop latch. assert( PHI->getNumIncomingValues() == 2 && "Must have an incoming value only for the preheader and the latch."); Value *V = PHI->getIncomingValueForBlock( Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); Constant *C = dyn_cast<Constant>(V); if (Iteration != 0 && !C) C = SimplifiedValues.lookup(V); if (C) SimplifiedInputValues.push_back({PHI, C}); } // Now clear and re-populate the map for the next iteration. SimplifiedValues.clear(); while (!SimplifiedInputValues.empty()) SimplifiedValues.insert(SimplifiedInputValues.pop_back_val()); UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L); BBWorklist.clear(); BBWorklist.insert(L->getHeader()); // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { BasicBlock *BB = BBWorklist[Idx]; // Visit all instructions in the given basic block and try to simplify // it. We don't change the actual IR, just count optimization // opportunities. for (Instruction &I : *BB) { // Track this instruction's expected baseline cost when executing the // rolled loop form. RolledDynamicCost += TTI.getUserCost(&I); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns true, mark the instruction as free after // unrolling and continue. bool IsFree = Analyzer.visit(I); bool Inserted = InstCostMap.insert({&I, (int)Iteration, (unsigned)IsFree, /*IsCounted*/ false}).second; (void)Inserted; assert(Inserted && "Cannot have a state for an unvisited instruction!"); if (IsFree) continue; // If the instruction might have a side-effect recursively account for // the cost of it and all the instructions leading up to it. if (I.mayHaveSideEffects()) AddCostRecursively(I, Iteration); // Can't properly model a cost of a call. // FIXME: With a proper cost model we should be able to do it. if(isa<CallInst>(&I)) return None; // If unrolled body turns out to be too big, bail out. if (UnrolledCost > MaxUnrolledLoopSize) { DEBUG(dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"); return None; } } TerminatorInst *TI = BB->getTerminator(); // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. BasicBlock *KnownSucc = nullptr; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isConditional()) { if (Constant *SimpleCond = SimplifiedValues.lookup(BI->getCondition())) { // Just take the first successor if condition is undef if (isa<UndefValue>(SimpleCond)) KnownSucc = BI->getSuccessor(0); else if (ConstantInt *SimpleCondVal = dyn_cast<ConstantInt>(SimpleCond)) KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0); } } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { if (Constant *SimpleCond = SimplifiedValues.lookup(SI->getCondition())) { // Just take the first successor if condition is undef if (isa<UndefValue>(SimpleCond)) KnownSucc = SI->getSuccessor(0); else if (ConstantInt *SimpleCondVal = dyn_cast<ConstantInt>(SimpleCond)) KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor(); } } if (KnownSucc) { if (L->contains(KnownSucc)) BBWorklist.insert(KnownSucc); else ExitWorklist.insert({BB, KnownSucc}); continue; } // Add BB's successors to the worklist. for (BasicBlock *Succ : successors(BB)) if (L->contains(Succ)) BBWorklist.insert(Succ); else ExitWorklist.insert({BB, Succ}); AddCostRecursively(*TI, Iteration); } // If we found no optimization opportunities on the first iteration, we // won't find them on later ones too. if (UnrolledCost == RolledDynamicCost) { DEBUG(dbgs() << " No opportunities found.. exiting.\n" << " UnrolledCost: " << UnrolledCost << "\n"); return None; } } while (!ExitWorklist.empty()) { BasicBlock *ExitingBB, *ExitBB; std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val(); for (Instruction &I : *ExitBB) { auto *PN = dyn_cast<PHINode>(&I); if (!PN) break; Value *Op = PN->getIncomingValueForBlock(ExitingBB); if (auto *OpI = dyn_cast<Instruction>(Op)) if (L->contains(OpI)) AddCostRecursively(*OpI, TripCount - 1); } } DEBUG(dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n"); return {{UnrolledCost, RolledDynamicCost}}; }
bool X86CmovConverterPass::checkForProfitableCmovCandidates( ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) { struct DepthInfo { /// Depth of original loop. unsigned Depth; /// Depth of optimized loop. unsigned OptDepth; }; /// Number of loop iterations to calculate depth for ?! static const unsigned LoopIterations = 2; DenseMap<MachineInstr *, DepthInfo> DepthMap; DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}}; enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 }; /// For each register type maps the register to its last def instruction. DenseMap<unsigned, MachineInstr *> RegDefMaps[RegTypeNum]; /// Maps register operand to its def instruction, which can be nullptr if it /// is unknown (e.g., operand is defined outside the loop). DenseMap<MachineOperand *, MachineInstr *> OperandToDefMap; // Set depth of unknown instruction (i.e., nullptr) to zero. DepthMap[nullptr] = {0, 0}; SmallPtrSet<MachineInstr *, 4> CmovInstructions; for (auto &Group : CmovInstGroups) CmovInstructions.insert(Group.begin(), Group.end()); //===--------------------------------------------------------------------===// // Step 1: Calculate instruction depth and loop depth. // Optimized-Loop: // loop with CMOV-group-candidates converted into branches. // // Instruction-Depth: // instruction latency + max operand depth. // * For CMOV instruction in optimized loop the depth is calculated as: // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth) // TODO: Find a better way to estimate the latency of the branch instruction // rather than using the CMOV latency. // // Loop-Depth: // max instruction depth of all instructions in the loop. // Note: instruction with max depth represents the critical-path in the loop. // // Loop-Depth[i]: // Loop-Depth calculated for first `i` iterations. // Note: it is enough to calculate depth for up to two iterations. // // Depth-Diff[i]: // Number of cycles saved in first 'i` iterations by optimizing the loop. //===--------------------------------------------------------------------===// for (unsigned I = 0; I < LoopIterations; ++I) { DepthInfo &MaxDepth = LoopDepth[I]; for (auto *MBB : Blocks) { // Clear physical registers Def map. RegDefMaps[PhyRegType].clear(); for (MachineInstr &MI : *MBB) { // Skip debug instructions. if (MI.isDebugInstr()) continue; unsigned MIDepth = 0; unsigned MIDepthOpt = 0; bool IsCMOV = CmovInstructions.count(&MI); for (auto &MO : MI.uses()) { // Checks for "isUse()" as "uses()" returns also implicit definitions. if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); auto &RDM = RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)]; if (MachineInstr *DefMI = RDM.lookup(Reg)) { OperandToDefMap[&MO] = DefMI; DepthInfo Info = DepthMap.lookup(DefMI); MIDepth = std::max(MIDepth, Info.Depth); if (!IsCMOV) MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth); } } if (IsCMOV) MIDepthOpt = getDepthOfOptCmov( DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth, DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth); // Iterates over all operands to handle implicit definitions as well. for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)][Reg] = &MI; } unsigned Latency = TSchedModel.computeInstrLatency(&MI); DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency}; MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth); MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt); } } } unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth, LoopDepth[1].Depth - LoopDepth[1].OptDepth}; //===--------------------------------------------------------------------===// // Step 2: Check if Loop worth to be optimized. // Worth-Optimize-Loop: // case 1: Diff[1] == Diff[0] // Critical-path is iteration independent - there is no dependency // of critical-path instructions on critical-path instructions of // previous iteration. // Thus, it is enough to check gain percent of 1st iteration - // To be conservative, the optimized loop need to have a depth of // 12.5% cycles less than original loop, per iteration. // // case 2: Diff[1] > Diff[0] // Critical-path is iteration dependent - there is dependency of // critical-path instructions on critical-path instructions of // previous iteration. // Thus, check the gain percent of the 2nd iteration (similar to the // previous case), but it is also required to check the gradient of // the gain - the change in Depth-Diff compared to the change in // Loop-Depth between 1st and 2nd iterations. // To be conservative, the gradient need to be at least 50%. // // In addition, In order not to optimize loops with very small gain, the // gain (in cycles) after 2nd iteration should not be less than a given // threshold. Thus, the check (Diff[1] >= GainCycleThreshold) must apply. // // If loop is not worth optimizing, remove all CMOV-group-candidates. //===--------------------------------------------------------------------===// if (Diff[1] < GainCycleThreshold) return false; bool WorthOptLoop = false; if (Diff[1] == Diff[0]) WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth; else if (Diff[1] > Diff[0]) WorthOptLoop = (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) && (Diff[1] * 8 >= LoopDepth[1].Depth); if (!WorthOptLoop) return false; ++NumOfLoopCandidate; //===--------------------------------------------------------------------===// // Step 3: Check for each CMOV-group-candidate if it worth to be optimized. // Worth-Optimize-Group: // Iff it worths to optimize all CMOV instructions in the group. // // Worth-Optimize-CMOV: // Predicted branch is faster than CMOV by the difference between depth of // condition operand and depth of taken (predicted) value operand. // To be conservative, the gain of such CMOV transformation should cover at // at least 25% of branch-misprediction-penalty. //===--------------------------------------------------------------------===// unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; CmovGroups TempGroups; std::swap(TempGroups, CmovInstGroups); for (auto &Group : TempGroups) { bool WorthOpGroup = true; for (auto *MI : Group) { // Avoid CMOV instruction which value is used as a pointer to load from. // This is another conservative check to avoid converting CMOV instruction // used with tree-search like algorithm, where the branch is unpredicted. auto UIs = MRI->use_instructions(MI->defs().begin()->getReg()); if (UIs.begin() != UIs.end() && ++UIs.begin() == UIs.end()) { unsigned Op = UIs.begin()->getOpcode(); if (Op == X86::MOV64rm || Op == X86::MOV32rm) { WorthOpGroup = false; break; } } unsigned CondCost = DepthMap[OperandToDefMap.lookup(&MI->getOperand(3))].Depth; unsigned ValCost = getDepthOfOptCmov( DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth, DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth); if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) { WorthOpGroup = false; break; } } if (WorthOpGroup) CmovInstGroups.push_back(Group); } return !CmovInstGroups.empty(); }
/// Merge the linker flags in Src into the Dest module. Error IRLinker::linkModuleFlagsMetadata() { // If the source module has no module flags, we are done. const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata(); if (!SrcModFlags) return Error::success(); // If the destination module doesn't have module flags yet, then just copy // over the source module's flags. NamedMDNode *DstModFlags = DstM.getOrInsertModuleFlagsMetadata(); if (DstModFlags->getNumOperands() == 0) { for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) DstModFlags->addOperand(SrcModFlags->getOperand(I)); return Error::success(); } // First build a map of the existing module flags and requirements. DenseMap<MDString *, std::pair<MDNode *, unsigned>> Flags; SmallSetVector<MDNode *, 16> Requirements; for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) { MDNode *Op = DstModFlags->getOperand(I); ConstantInt *Behavior = mdconst::extract<ConstantInt>(Op->getOperand(0)); MDString *ID = cast<MDString>(Op->getOperand(1)); if (Behavior->getZExtValue() == Module::Require) { Requirements.insert(cast<MDNode>(Op->getOperand(2))); } else { Flags[ID] = std::make_pair(Op, I); } } // Merge in the flags from the source module, and also collect its set of // requirements. for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) { MDNode *SrcOp = SrcModFlags->getOperand(I); ConstantInt *SrcBehavior = mdconst::extract<ConstantInt>(SrcOp->getOperand(0)); MDString *ID = cast<MDString>(SrcOp->getOperand(1)); MDNode *DstOp; unsigned DstIndex; std::tie(DstOp, DstIndex) = Flags.lookup(ID); unsigned SrcBehaviorValue = SrcBehavior->getZExtValue(); // If this is a requirement, add it and continue. if (SrcBehaviorValue == Module::Require) { // If the destination module does not already have this requirement, add // it. if (Requirements.insert(cast<MDNode>(SrcOp->getOperand(2)))) { DstModFlags->addOperand(SrcOp); } continue; } // If there is no existing flag with this ID, just add it. if (!DstOp) { Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands()); DstModFlags->addOperand(SrcOp); continue; } // Otherwise, perform a merge. ConstantInt *DstBehavior = mdconst::extract<ConstantInt>(DstOp->getOperand(0)); unsigned DstBehaviorValue = DstBehavior->getZExtValue(); // If either flag has override behavior, handle it first. if (DstBehaviorValue == Module::Override) { // Diagnose inconsistent flags which both have override behavior. if (SrcBehaviorValue == Module::Override && SrcOp->getOperand(2) != DstOp->getOperand(2)) return stringErr("linking module flags '" + ID->getString() + "': IDs have conflicting override values"); continue; } else if (SrcBehaviorValue == Module::Override) { // Update the destination flag to that of the source. DstModFlags->setOperand(DstIndex, SrcOp); Flags[ID].first = SrcOp; continue; } // Diagnose inconsistent merge behavior types. if (SrcBehaviorValue != DstBehaviorValue) return stringErr("linking module flags '" + ID->getString() + "': IDs have conflicting behaviors"); auto replaceDstValue = [&](MDNode *New) { Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New}; MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps); DstModFlags->setOperand(DstIndex, Flag); Flags[ID].first = Flag; }; // Perform the merge for standard behavior types. switch (SrcBehaviorValue) { case Module::Require: case Module::Override: llvm_unreachable("not possible"); case Module::Error: { // Emit an error if the values differ. if (SrcOp->getOperand(2) != DstOp->getOperand(2)) return stringErr("linking module flags '" + ID->getString() + "': IDs have conflicting values"); continue; } case Module::Warning: { // Emit a warning if the values differ. if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { emitWarning("linking module flags '" + ID->getString() + "': IDs have conflicting values"); } continue; } case Module::Append: { MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2)); MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2)); SmallVector<Metadata *, 8> MDs; MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands()); MDs.append(DstValue->op_begin(), DstValue->op_end()); MDs.append(SrcValue->op_begin(), SrcValue->op_end()); replaceDstValue(MDNode::get(DstM.getContext(), MDs)); break; } case Module::AppendUnique: { SmallSetVector<Metadata *, 16> Elts; MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2)); MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2)); Elts.insert(DstValue->op_begin(), DstValue->op_end()); Elts.insert(SrcValue->op_begin(), SrcValue->op_end()); replaceDstValue(MDNode::get(DstM.getContext(), makeArrayRef(Elts.begin(), Elts.end()))); break; } } } // Check all of the requirements. for (unsigned I = 0, E = Requirements.size(); I != E; ++I) { MDNode *Requirement = Requirements[I]; MDString *Flag = cast<MDString>(Requirement->getOperand(0)); Metadata *ReqValue = Requirement->getOperand(1); MDNode *Op = Flags[Flag].first; if (!Op || Op->getOperand(2) != ReqValue) return stringErr("linking module flags '" + Flag->getString() + "': does not have the required value"); } return Error::success(); }
/// \brief Figure out if the loop is worth full unrolling. /// /// Complete loop unrolling can make some loads constant, and we need to know /// if that would expose any further optimization opportunities. This routine /// estimates this optimization. It computes cost of unrolled loop /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By /// dynamic cost we mean that we won't count costs of blocks that are known not /// to be executed (i.e. if we have a branch in the loop and we know that at the /// given iteration its condition would be resolved to true, we won't add up the /// cost of the 'false'-block). /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If /// the analysis failed (no benefits expected from the unrolling, or the loop is /// too big to analyze), the returned value is None. static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, int MaxUnrolledLoopSize) { // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to // analyze *massive* trip counts, so we force the max to be reasonably small. assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) && "The unroll iterations max is too large!"); // Don't simulate loops with a big or unknown tripcount if (!UnrollMaxIterationsCountToAnalyze || !TripCount || TripCount > UnrollMaxIterationsCountToAnalyze) return None; SmallSetVector<BasicBlock *, 16> BBWorklist; DenseMap<Value *, Constant *> SimplifiedValues; SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues; // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. int UnrolledCost = 0; // We also track the estimated dynamic (that is, actually executed) cost in // the rolled form. This helps identify cases when the savings from unrolling // aren't just exposing dead control flows, but actual reduced dynamic // instructions due to the simplifications which we expect to occur after // unrolling. int RolledDynamicCost = 0; // Ensure that we don't violate the loop structure invariants relied on by // this analysis. assert(L->isLoopSimplifyForm() && "Must put loop into normal form first."); assert(L->isLCSSAForm(DT) && "Must have loops in LCSSA form to track live-out values."); DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); // Simulate execution of each iteration of the loop counting instructions, // which would be simplified. // Since the same load will take different values on different iterations, // we literally have to go through all loop's iterations. for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); // Prepare for the iteration by collecting any simplified entry or backedge // inputs. for (Instruction &I : *L->getHeader()) { auto *PHI = dyn_cast<PHINode>(&I); if (!PHI) break; // The loop header PHI nodes must have exactly two input: one from the // loop preheader and one from the loop latch. assert( PHI->getNumIncomingValues() == 2 && "Must have an incoming value only for the preheader and the latch."); Value *V = PHI->getIncomingValueForBlock( Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); Constant *C = dyn_cast<Constant>(V); if (Iteration != 0 && !C) C = SimplifiedValues.lookup(V); if (C) SimplifiedInputValues.push_back({PHI, C}); } // Now clear and re-populate the map for the next iteration. SimplifiedValues.clear(); while (!SimplifiedInputValues.empty()) SimplifiedValues.insert(SimplifiedInputValues.pop_back_val()); UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE); BBWorklist.clear(); BBWorklist.insert(L->getHeader()); // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { BasicBlock *BB = BBWorklist[Idx]; // Visit all instructions in the given basic block and try to simplify // it. We don't change the actual IR, just count optimization // opportunities. for (Instruction &I : *BB) { int InstCost = TTI.getUserCost(&I); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns false, include this instruction in the // unrolled cost. if (!Analyzer.visit(I)) UnrolledCost += InstCost; else { DEBUG(dbgs() << " " << I << " would be simplified if loop is unrolled.\n"); (void)0; } // Also track this instructions expected cost when executing the rolled // loop form. RolledDynamicCost += InstCost; // If unrolled body turns out to be too big, bail out. if (UnrolledCost > MaxUnrolledLoopSize) { DEBUG(dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"); return None; } } TerminatorInst *TI = BB->getTerminator(); // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isConditional()) { if (Constant *SimpleCond = SimplifiedValues.lookup(BI->getCondition())) { BasicBlock *Succ = nullptr; // Just take the first successor if condition is undef if (isa<UndefValue>(SimpleCond)) Succ = BI->getSuccessor(0); else Succ = BI->getSuccessor( cast<ConstantInt>(SimpleCond)->isZero() ? 1 : 0); if (L->contains(Succ)) BBWorklist.insert(Succ); continue; } } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { if (Constant *SimpleCond = SimplifiedValues.lookup(SI->getCondition())) { BasicBlock *Succ = nullptr; // Just take the first successor if condition is undef if (isa<UndefValue>(SimpleCond)) Succ = SI->getSuccessor(0); else Succ = SI->findCaseValue(cast<ConstantInt>(SimpleCond)) .getCaseSuccessor(); if (L->contains(Succ)) BBWorklist.insert(Succ); continue; } } // Add BB's successors to the worklist. for (BasicBlock *Succ : successors(BB)) if (L->contains(Succ)) BBWorklist.insert(Succ); } // If we found no optimization opportunities on the first iteration, we // won't find them on later ones too. if (UnrolledCost == RolledDynamicCost) { DEBUG(dbgs() << " No opportunities found.. exiting.\n" << " UnrolledCost: " << UnrolledCost << "\n"); return None; } } DEBUG(dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n"); return {{UnrolledCost, RolledDynamicCost}}; }