Пример #1
0
DenseMap<SectionBase *, int> elf::buildSectionOrder() {
  DenseMap<SectionBase *, int> SectionOrder;
  if (Config->SymbolOrderingFile.empty())
    return SectionOrder;

  // Build a map from symbols to their priorities. Symbols that didn't
  // appear in the symbol ordering file have the lowest priority 0.
  // All explicitly mentioned symbols have negative (higher) priorities.
  DenseMap<StringRef, int> SymbolOrder;
  int Priority = -Config->SymbolOrderingFile.size();
  for (StringRef S : Config->SymbolOrderingFile)
    SymbolOrder.insert({S, Priority++});

  // Build a map from sections to their priorities.
  for (InputFile *File : ObjectFiles) {
    for (Symbol *Sym : File->getSymbols()) {
      auto *D = dyn_cast<Defined>(Sym);
      if (!D || !D->Section)
        continue;
      int &Priority = SectionOrder[D->Section];
      Priority = std::min(Priority, SymbolOrder.lookup(D->getName()));
    }
  }
  return SectionOrder;
}
void MCMachOStreamer::FinishImpl() {
  EmitFrames(&getAssembler().getBackend(), true);

  // We have to set the fragment atom associations so we can relax properly for
  // Mach-O.

  // First, scan the symbol table to build a lookup table from fragments to
  // defining symbols.
  DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap;
  for (MCSymbolData &SD : getAssembler().symbols()) {
    if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()) &&
        SD.getFragment()) {
      // An atom defining symbol should never be internal to a fragment.
      assert(SD.getOffset() == 0 && "Invalid offset in atom defining symbol!");
      DefiningSymbolMap[SD.getFragment()] = &SD;
    }
  }

  // Set the fragment atom associations by tracking the last seen atom defining
  // symbol.
  for (MCAssembler::iterator it = getAssembler().begin(),
         ie = getAssembler().end(); it != ie; ++it) {
    MCSymbolData *CurrentAtom = nullptr;
    for (MCSectionData::iterator it2 = it->begin(),
           ie2 = it->end(); it2 != ie2; ++it2) {
      if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2))
        CurrentAtom = SD;
      it2->setAtom(CurrentAtom);
    }
  }

  this->MCObjectStreamer::FinishImpl();
}
Пример #3
0
bool llvm::stripDebugInfo(Function &F) {
  bool Changed = false;
  if (F.getSubprogram()) {
    Changed = true;
    F.setSubprogram(nullptr);
  }

  DenseMap<MDNode*, MDNode*> LoopIDsMap;
  for (BasicBlock &BB : F) {
    for (auto II = BB.begin(), End = BB.end(); II != End;) {
      Instruction &I = *II++; // We may delete the instruction, increment now.
      if (isa<DbgInfoIntrinsic>(&I)) {
        I.eraseFromParent();
        Changed = true;
        continue;
      }
      if (I.getDebugLoc()) {
        Changed = true;
        I.setDebugLoc(DebugLoc());
      }
    }

    auto *TermInst = BB.getTerminator();
    if (auto *LoopID = TermInst->getMetadata(LLVMContext::MD_loop)) {
      auto *NewLoopID = LoopIDsMap.lookup(LoopID);
      if (!NewLoopID)
        NewLoopID = LoopIDsMap[LoopID] = stripDebugLocFromLoopID(LoopID);
      if (NewLoopID != LoopID)
        TermInst->setMetadata(LLVMContext::MD_loop, NewLoopID);
    }
  }
  return Changed;
}
Пример #4
0
void MCMachOStreamer::FinishImpl() {
  EmitFrames(&getAssembler().getBackend());

  // We have to set the fragment atom associations so we can relax properly for
  // Mach-O.

  // First, scan the symbol table to build a lookup table from fragments to
  // defining symbols.
  DenseMap<const MCFragment *, const MCSymbol *> DefiningSymbolMap;
  for (const MCSymbol &Symbol : getAssembler().symbols()) {
    if (getAssembler().isSymbolLinkerVisible(Symbol) && Symbol.isInSection() &&
        !Symbol.isVariable()) {
      // An atom defining symbol should never be internal to a fragment.
      assert(Symbol.getOffset() == 0 &&
             "Invalid offset in atom defining symbol!");
      DefiningSymbolMap[Symbol.getFragment()] = &Symbol;
    }
  }

  // Set the fragment atom associations by tracking the last seen atom defining
  // symbol.
  for (MCSection &Sec : getAssembler()) {
    const MCSymbol *CurrentAtom = nullptr;
    for (MCFragment &Frag : Sec) {
      if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(&Frag))
        CurrentAtom = Symbol;
      Frag.setAtom(CurrentAtom);
    }
  }

  this->MCObjectStreamer::FinishImpl();
}
Пример #5
0
  // TODO: We should also visit ICmp, FCmp, GetElementPtr, Trunc, ZExt, SExt,
  // FPTrunc, FPExt, FPToUI, FPToSI, UIToFP, SIToFP, BitCast, Select,
  // ExtractElement, InsertElement, ShuffleVector, ExtractValue, InsertValue.
  //
  // Probaly it's worth to hoist the code for estimating the simplifications
  // effects to a separate class, since we have a very similar code in
  // InlineCost already.
  bool visitBinaryOperator(BinaryOperator &I) {
    Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
    if (!isa<Constant>(LHS))
      if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
        LHS = SimpleLHS;
    if (!isa<Constant>(RHS))
      if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
        RHS = SimpleRHS;
    Value *SimpleV = nullptr;
    if (auto FI = dyn_cast<FPMathOperator>(&I))
      SimpleV =
          SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags());
    else
      SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS);

    if (SimpleV && CountedInsns.insert(&I).second)
      NumberOfOptimizedInstructions += TTI.getUserCost(&I);

    if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
      SimplifiedValues[&I] = C;
      return true;
    }
    return false;
  }
Пример #6
0
/// assignInstructionRanges - Find ranges of instructions covered by each
/// lexical scope.
void LexicalScopes::assignInstructionRanges(
    SmallVectorImpl<InsnRange> &MIRanges,
    DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
  LexicalScope *PrevLexicalScope = nullptr;
  for (const auto &R : MIRanges) {
    LexicalScope *S = MI2ScopeMap.lookup(R.first);
    assert(S && "Lost LexicalScope for a machine instruction!");
    if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
      PrevLexicalScope->closeInsnRange(S);
    S->openInsnRange(R.first);
    S->extendInsnRange(R.second);
    PrevLexicalScope = S;
  }

  if (PrevLexicalScope)
    PrevLexicalScope->closeInsnRange();
}
Пример #7
0
/// assignInstructionRanges - Find ranges of instructions covered by each
/// lexical scope.
void LexicalScopes::
assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges,
                    DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap)
{
  
  LexicalScope *PrevLexicalScope = NULL;
  for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(),
         RE = MIRanges.end(); RI != RE; ++RI) {
    const InsnRange &R = *RI;
    LexicalScope *S = MI2ScopeMap.lookup(R.first);
    assert (S && "Lost LexicalScope for a machine instruction!");
    if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
      PrevLexicalScope->closeInsnRange(S);
    S->openInsnRange(R.first);
    S->extendInsnRange(R.second);
    PrevLexicalScope = S;
  }

  if (PrevLexicalScope)
    PrevLexicalScope->closeInsnRange();
}
Пример #8
0
/// computeSymbolTable - Compute the symbol table data
void MachObjectWriter::computeSymbolTable(
    MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
    std::vector<MachSymbolData> &ExternalSymbolData,
    std::vector<MachSymbolData> &UndefinedSymbolData) {
  // Build section lookup table.
  DenseMap<const MCSection*, uint8_t> SectionIndexMap;
  unsigned Index = 1;
  for (MCAssembler::iterator it = Asm.begin(),
         ie = Asm.end(); it != ie; ++it, ++Index)
    SectionIndexMap[&*it] = Index;
  assert(Index <= 256 && "Too many sections!");

  // Build the string table.
  for (const MCSymbol &Symbol : Asm.symbols()) {
    if (!Asm.isSymbolLinkerVisible(Symbol))
      continue;

    StringTable.add(Symbol.getName());
  }
  StringTable.finalize();

  // Build the symbol arrays but only for non-local symbols.
  //
  // The particular order that we collect and then sort the symbols is chosen to
  // match 'as'. Even though it doesn't matter for correctness, this is
  // important for letting us diff .o files.
  for (const MCSymbol &Symbol : Asm.symbols()) {
    // Ignore non-linker visible symbols.
    if (!Asm.isSymbolLinkerVisible(Symbol))
      continue;

    if (!Symbol.isExternal() && !Symbol.isUndefined())
      continue;

    MachSymbolData MSD;
    MSD.Symbol = &Symbol;
    MSD.StringIndex = StringTable.getOffset(Symbol.getName());

    if (Symbol.isUndefined()) {
      MSD.SectionIndex = 0;
      UndefinedSymbolData.push_back(MSD);
    } else if (Symbol.isAbsolute()) {
      MSD.SectionIndex = 0;
      ExternalSymbolData.push_back(MSD);
    } else {
      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
      assert(MSD.SectionIndex && "Invalid section index!");
      ExternalSymbolData.push_back(MSD);
    }
  }

  // Now add the data for local symbols.
  for (const MCSymbol &Symbol : Asm.symbols()) {
    // Ignore non-linker visible symbols.
    if (!Asm.isSymbolLinkerVisible(Symbol))
      continue;

    if (Symbol.isExternal() || Symbol.isUndefined())
      continue;

    MachSymbolData MSD;
    MSD.Symbol = &Symbol;
    MSD.StringIndex = StringTable.getOffset(Symbol.getName());

    if (Symbol.isAbsolute()) {
      MSD.SectionIndex = 0;
      LocalSymbolData.push_back(MSD);
    } else {
      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
      assert(MSD.SectionIndex && "Invalid section index!");
      LocalSymbolData.push_back(MSD);
    }
  }

  // External and undefined symbols are required to be in lexicographic order.
  std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
  std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());

  // Set the symbol indices.
  Index = 0;
  for (auto *SymbolData :
       {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
    for (MachSymbolData &Entry : *SymbolData)
      Entry.Symbol->setIndex(Index++);

  for (const MCSection &Section : Asm) {
    for (RelAndSymbol &Rel : Relocations[&Section]) {
      if (!Rel.Sym)
        continue;

      // Set the Index and the IsExtern bit.
      unsigned Index = Rel.Sym->getIndex();
      assert(isInt<24>(Index));
      if (IsLittleEndian)
        Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
      else
        Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
    }
  }
}
Пример #9
0
/// ComputeSymbolTable - Compute the symbol table data
///
/// \param StringTable [out] - The string table data.
/// \param StringIndexMap [out] - Map from symbol names to offsets in the
/// string table.
void MachObjectWriter::
ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
                   std::vector<MachSymbolData> &LocalSymbolData,
                   std::vector<MachSymbolData> &ExternalSymbolData,
                   std::vector<MachSymbolData> &UndefinedSymbolData) {
  // Build section lookup table.
  DenseMap<const MCSection*, uint8_t> SectionIndexMap;
  unsigned Index = 1;
  for (MCAssembler::iterator it = Asm.begin(),
         ie = Asm.end(); it != ie; ++it, ++Index)
    SectionIndexMap[&it->getSection()] = Index;
  assert(Index <= 256 && "Too many sections!");

  // Index 0 is always the empty string.
  StringMap<uint64_t> StringIndexMap;
  StringTable += '\x00';

  // Build the symbol arrays and the string table, but only for non-local
  // symbols.
  //
  // The particular order that we collect the symbols and create the string
  // table, then sort the symbols is chosen to match 'as'. Even though it
  // doesn't matter for correctness, this is important for letting us diff .o
  // files.
  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
         ie = Asm.symbol_end(); it != ie; ++it) {
    const MCSymbol &Symbol = it->getSymbol();

    // Ignore non-linker visible symbols.
    if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
      continue;

    if (!it->isExternal() && !Symbol.isUndefined())
      continue;

    uint64_t &Entry = StringIndexMap[Symbol.getName()];
    if (!Entry) {
      Entry = StringTable.size();
      StringTable += Symbol.getName();
      StringTable += '\x00';
    }

    MachSymbolData MSD;
    MSD.SymbolData = it;
    MSD.StringIndex = Entry;

    if (Symbol.isUndefined()) {
      MSD.SectionIndex = 0;
      UndefinedSymbolData.push_back(MSD);
    } else if (Symbol.isAbsolute()) {
      MSD.SectionIndex = 0;
      ExternalSymbolData.push_back(MSD);
    } else {
      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
      assert(MSD.SectionIndex && "Invalid section index!");
      ExternalSymbolData.push_back(MSD);
    }
  }

  // Now add the data for local symbols.
  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
         ie = Asm.symbol_end(); it != ie; ++it) {
    const MCSymbol &Symbol = it->getSymbol();

    // Ignore non-linker visible symbols.
    if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
      continue;

    if (it->isExternal() || Symbol.isUndefined())
      continue;

    uint64_t &Entry = StringIndexMap[Symbol.getName()];
    if (!Entry) {
      Entry = StringTable.size();
      StringTable += Symbol.getName();
      StringTable += '\x00';
    }

    MachSymbolData MSD;
    MSD.SymbolData = it;
    MSD.StringIndex = Entry;

    if (Symbol.isAbsolute()) {
      MSD.SectionIndex = 0;
      LocalSymbolData.push_back(MSD);
    } else {
      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
      assert(MSD.SectionIndex && "Invalid section index!");
      LocalSymbolData.push_back(MSD);
    }
  }

  // External and undefined symbols are required to be in lexicographic order.
  std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
  std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());

  // Set the symbol indices.
  Index = 0;
  for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
    LocalSymbolData[i].SymbolData->setIndex(Index++);
  for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
    ExternalSymbolData[i].SymbolData->setIndex(Index++);
  for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
    UndefinedSymbolData[i].SymbolData->setIndex(Index++);

  // The string table is padded to a multiple of 4.
  while (StringTable.size() % 4)
    StringTable += '\x00';
}
Пример #10
0
bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
  QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
  QRI = MF.getTarget().getSubtarget<HexagonSubtarget>().getRegisterInfo();
  MRI = &MF.getRegInfo();

  DenseMap<unsigned, unsigned> PeepholeMap;
  DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap;

  if (DisableHexagonPeephole) return false;

  // Loop over all of the basic blocks.
  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
       MBBb != MBBe; ++MBBb) {
    MachineBasicBlock* MBB = MBBb;
    PeepholeMap.clear();
    PeepholeDoubleRegsMap.clear();

    // Traverse the basic block.
    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
                                     ++MII) {
      MachineInstr *MI = MII;
      // Look for sign extends:
      // %vreg170<def> = SXTW %vreg166
      if (!DisableOptSZExt && MI->getOpcode() == Hexagon::A2_sxtw) {
        assert (MI->getNumOperands() == 2);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src  = MI->getOperand(1);
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src.getReg();
        // Just handle virtual registers.
        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
          // Map the following:
          // %vreg170<def> = SXTW %vreg166
          // PeepholeMap[170] = vreg166
          PeepholeMap[DstReg] = SrcReg;
        }
      }

      // Look for  %vreg170<def> = COMBINE_ir_V4 (0, %vreg169)
      // %vreg170:DoublRegs, %vreg169:IntRegs
      if (!DisableOptExtTo64 &&
          MI->getOpcode () == Hexagon::COMBINE_Ir_V4) {
        assert (MI->getNumOperands() == 3);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src1 = MI->getOperand(1);
        MachineOperand &Src2 = MI->getOperand(2);
        if (Src1.getImm() != 0)
          continue;
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src2.getReg();
        PeepholeMap[DstReg] = SrcReg;
      }

      // Look for this sequence below
      // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
      // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
      // and convert into
      // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg.
      if (MI->getOpcode() == Hexagon::LSRd_ri) {
        assert(MI->getNumOperands() == 3);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src1 = MI->getOperand(1);
        MachineOperand &Src2 = MI->getOperand(2);
        if (Src2.getImm() != 32)
          continue;
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src1.getReg();
        PeepholeDoubleRegsMap[DstReg] =
          std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/);
      }

      // Look for P=NOT(P).
      if (!DisablePNotP &&
          (MI->getOpcode() == Hexagon::C2_not)) {
        assert (MI->getNumOperands() == 2);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src  = MI->getOperand(1);
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src.getReg();
        // Just handle virtual registers.
        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
          // Map the following:
          // %vreg170<def> = NOT_xx %vreg166
          // PeepholeMap[170] = vreg166
          PeepholeMap[DstReg] = SrcReg;
        }
      }

      // Look for copy:
      // %vreg176<def> = COPY %vreg170:subreg_loreg
      if (!DisableOptSZExt && MI->isCopy()) {
        assert (MI->getNumOperands() == 2);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src  = MI->getOperand(1);

        // Make sure we are copying the lower 32 bits.
        if (Src.getSubReg() != Hexagon::subreg_loreg)
          continue;

        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src.getReg();
        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
          // Try to find in the map.
          if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) {
            // Change the 1st operand.
            MI->RemoveOperand(1);
            MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false));
          } else  {
            DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI =
              PeepholeDoubleRegsMap.find(SrcReg);
            if (DI != PeepholeDoubleRegsMap.end()) {
              std::pair<unsigned,unsigned> PeepholeSrc = DI->second;
              MI->RemoveOperand(1);
              MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first,
                                                       false /*isDef*/,
                                                       false /*isImp*/,
                                                       false /*isKill*/,
                                                       false /*isDead*/,
                                                       false /*isUndef*/,
                                                       false /*isEarlyClobber*/,
                                                       PeepholeSrc.second));
            }
          }
        }
      }

      // Look for Predicated instructions.
      if (!DisablePNotP) {
        bool Done = false;
        if (QII->isPredicated(MI)) {
          MachineOperand &Op0 = MI->getOperand(0);
          unsigned Reg0 = Op0.getReg();
          const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0);
          if (RC0->getID() == Hexagon::PredRegsRegClassID) {
            // Handle instructions that have a prediate register in op0
            // (most cases of predicable instructions).
            if (TargetRegisterInfo::isVirtualRegister(Reg0)) {
              // Try to find in the map.
              if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) {
                // Change the 1st operand and, flip the opcode.
                MI->getOperand(0).setReg(PeepholeSrc);
                int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode());
                MI->setDesc(QII->get(NewOp));
                Done = true;
              }
            }
          }
        }

        if (!Done) {
          // Handle special instructions.
          unsigned Op = MI->getOpcode();
          unsigned NewOp = 0;
          unsigned PR = 1, S1 = 2, S2 = 3;   // Operand indices.

          switch (Op) {
            case Hexagon::C2_mux:
            case Hexagon::C2_muxii:
            case Hexagon::TFR_condset_ii:
              NewOp = Op;
              break;
            case Hexagon::TFR_condset_ri:
              NewOp = Hexagon::TFR_condset_ir;
              break;
            case Hexagon::TFR_condset_ir:
              NewOp = Hexagon::TFR_condset_ri;
              break;
            case Hexagon::C2_muxri:
              NewOp = Hexagon::C2_muxir;
              break;
            case Hexagon::C2_muxir:
              NewOp = Hexagon::C2_muxri;
              break;
          }
          if (NewOp) {
            unsigned PSrc = MI->getOperand(PR).getReg();
            if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
              MI->getOperand(PR).setReg(POrig);
              MI->setDesc(QII->get(NewOp));
              // Swap operands S1 and S2.
              MachineOperand Op1 = MI->getOperand(S1);
              MachineOperand Op2 = MI->getOperand(S2);
              ChangeOpInto(MI->getOperand(S1), Op2);
              ChangeOpInto(MI->getOperand(S2), Op1);
            }
          } // if (NewOp)
        } // if (!Done)

      } // if (!DisablePNotP)

    } // Instruction
  } // Basic Block
  return true;
}
Пример #11
0
/// ComputeSymbolTable - Compute the symbol table data
void MachObjectWriter::ComputeSymbolTable(
    MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
    std::vector<MachSymbolData> &ExternalSymbolData,
    std::vector<MachSymbolData> &UndefinedSymbolData) {
  // Build section lookup table.
  DenseMap<const MCSection*, uint8_t> SectionIndexMap;
  unsigned Index = 1;
  for (MCAssembler::iterator it = Asm.begin(),
         ie = Asm.end(); it != ie; ++it, ++Index)
    SectionIndexMap[&it->getSection()] = Index;
  assert(Index <= 256 && "Too many sections!");

  // Build the string table.
  for (MCSymbolData &SD : Asm.symbols()) {
    const MCSymbol &Symbol = SD.getSymbol();
    if (!Asm.isSymbolLinkerVisible(Symbol))
      continue;

    StringTable.add(Symbol.getName());
  }
  StringTable.finalize(StringTableBuilder::MachO);

  // Build the symbol arrays but only for non-local symbols.
  //
  // The particular order that we collect and then sort the symbols is chosen to
  // match 'as'. Even though it doesn't matter for correctness, this is
  // important for letting us diff .o files.
  for (MCSymbolData &SD : Asm.symbols()) {
    const MCSymbol &Symbol = SD.getSymbol();

    // Ignore non-linker visible symbols.
    if (!Asm.isSymbolLinkerVisible(Symbol))
      continue;

    if (!SD.isExternal() && !Symbol.isUndefined())
      continue;

    MachSymbolData MSD;
    MSD.SymbolData = &SD;
    MSD.StringIndex = StringTable.getOffset(Symbol.getName());

    if (Symbol.isUndefined()) {
      MSD.SectionIndex = 0;
      UndefinedSymbolData.push_back(MSD);
    } else if (Symbol.isAbsolute()) {
      MSD.SectionIndex = 0;
      ExternalSymbolData.push_back(MSD);
    } else {
      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
      assert(MSD.SectionIndex && "Invalid section index!");
      ExternalSymbolData.push_back(MSD);
    }
  }

  // Now add the data for local symbols.
  for (MCSymbolData &SD : Asm.symbols()) {
    const MCSymbol &Symbol = SD.getSymbol();

    // Ignore non-linker visible symbols.
    if (!Asm.isSymbolLinkerVisible(Symbol))
      continue;

    if (SD.isExternal() || Symbol.isUndefined())
      continue;

    MachSymbolData MSD;
    MSD.SymbolData = &SD;
    MSD.StringIndex = StringTable.getOffset(Symbol.getName());

    if (Symbol.isAbsolute()) {
      MSD.SectionIndex = 0;
      LocalSymbolData.push_back(MSD);
    } else {
      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
      assert(MSD.SectionIndex && "Invalid section index!");
      LocalSymbolData.push_back(MSD);
    }
  }

  // External and undefined symbols are required to be in lexicographic order.
  std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
  std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());

  // Set the symbol indices.
  Index = 0;
  for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
    LocalSymbolData[i].SymbolData->setIndex(Index++);
  for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
    ExternalSymbolData[i].SymbolData->setIndex(Index++);
  for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
    UndefinedSymbolData[i].SymbolData->setIndex(Index++);
}
Пример #12
0
/// \brief Figure out if the loop is worth full unrolling.
///
/// Complete loop unrolling can make some loads constant, and we need to know
/// if that would expose any further optimization opportunities.  This routine
/// estimates this optimization.  It computes cost of unrolled loop
/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
/// dynamic cost we mean that we won't count costs of blocks that are known not
/// to be executed (i.e. if we have a branch in the loop and we know that at the
/// given iteration its condition would be resolved to true, we won't add up the
/// cost of the 'false'-block).
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
static Optional<EstimatedUnrollCost>
analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
                      ScalarEvolution &SE, const TargetTransformInfo &TTI,
                      int MaxUnrolledLoopSize) {
  // We want to be able to scale offsets by the trip count and add more offsets
  // to them without checking for overflows, and we already don't want to
  // analyze *massive* trip counts, so we force the max to be reasonably small.
  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
         "The unroll iterations max is too large!");

  // Only analyze inner loops. We can't properly estimate cost of nested loops
  // and we won't visit inner loops again anyway.
  if (!L->empty())
    return None;

  // Don't simulate loops with a big or unknown tripcount
  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
      TripCount > UnrollMaxIterationsCountToAnalyze)
    return None;

  SmallSetVector<BasicBlock *, 16> BBWorklist;
  SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist;
  DenseMap<Value *, Constant *> SimplifiedValues;
  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;

  // The estimated cost of the unrolled form of the loop. We try to estimate
  // this by simplifying as much as we can while computing the estimate.
  int UnrolledCost = 0;

  // We also track the estimated dynamic (that is, actually executed) cost in
  // the rolled form. This helps identify cases when the savings from unrolling
  // aren't just exposing dead control flows, but actual reduced dynamic
  // instructions due to the simplifications which we expect to occur after
  // unrolling.
  int RolledDynamicCost = 0;

  // We track the simplification of each instruction in each iteration. We use
  // this to recursively merge costs into the unrolled cost on-demand so that
  // we don't count the cost of any dead code. This is essentially a map from
  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
  DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;

  // A small worklist used to accumulate cost of instructions from each
  // observable and reached root in the loop.
  SmallVector<Instruction *, 16> CostWorklist;

  // PHI-used worklist used between iterations while accumulating cost.
  SmallVector<Instruction *, 4> PHIUsedList;

  // Helper function to accumulate cost for instructions in the loop.
  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
    assert(Iteration >= 0 && "Cannot have a negative iteration!");
    assert(CostWorklist.empty() && "Must start with an empty cost list");
    assert(PHIUsedList.empty() && "Must start with an empty phi used list");
    CostWorklist.push_back(&RootI);
    for (;; --Iteration) {
      do {
        Instruction *I = CostWorklist.pop_back_val();

        // InstCostMap only uses I and Iteration as a key, the other two values
        // don't matter here.
        auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
        if (CostIter == InstCostMap.end())
          // If an input to a PHI node comes from a dead path through the loop
          // we may have no cost data for it here. What that actually means is
          // that it is free.
          continue;
        auto &Cost = *CostIter;
        if (Cost.IsCounted)
          // Already counted this instruction.
          continue;

        // Mark that we are counting the cost of this instruction now.
        Cost.IsCounted = true;

        // If this is a PHI node in the loop header, just add it to the PHI set.
        if (auto *PhiI = dyn_cast<PHINode>(I))
          if (PhiI->getParent() == L->getHeader()) {
            assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
                                  "inherently simplify during unrolling.");
            if (Iteration == 0)
              continue;

            // Push the incoming value from the backedge into the PHI used list
            // if it is an in-loop instruction. We'll use this to populate the
            // cost worklist for the next iteration (as we count backwards).
            if (auto *OpI = dyn_cast<Instruction>(
                    PhiI->getIncomingValueForBlock(L->getLoopLatch())))
              if (L->contains(OpI))
                PHIUsedList.push_back(OpI);
            continue;
          }

        // First accumulate the cost of this instruction.
        if (!Cost.IsFree) {
          UnrolledCost += TTI.getUserCost(I);
          DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration
                       << "): ");
          DEBUG(I->dump());
        }

        // We must count the cost of every operand which is not free,
        // recursively. If we reach a loop PHI node, simply add it to the set
        // to be considered on the next iteration (backwards!).
        for (Value *Op : I->operands()) {
          // Check whether this operand is free due to being a constant or
          // outside the loop.
          auto *OpI = dyn_cast<Instruction>(Op);
          if (!OpI || !L->contains(OpI))
            continue;

          // Otherwise accumulate its cost.
          CostWorklist.push_back(OpI);
        }
      } while (!CostWorklist.empty());

      if (PHIUsedList.empty())
        // We've exhausted the search.
        break;

      assert(Iteration > 0 &&
             "Cannot track PHI-used values past the first iteration!");
      CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
      PHIUsedList.clear();
    }
  };

  // Ensure that we don't violate the loop structure invariants relied on by
  // this analysis.
  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
  assert(L->isLCSSAForm(DT) &&
         "Must have loops in LCSSA form to track live-out values.");

  DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");

  // Simulate execution of each iteration of the loop counting instructions,
  // which would be simplified.
  // Since the same load will take different values on different iterations,
  // we literally have to go through all loop's iterations.
  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
    DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");

    // Prepare for the iteration by collecting any simplified entry or backedge
    // inputs.
    for (Instruction &I : *L->getHeader()) {
      auto *PHI = dyn_cast<PHINode>(&I);
      if (!PHI)
        break;

      // The loop header PHI nodes must have exactly two input: one from the
      // loop preheader and one from the loop latch.
      assert(
          PHI->getNumIncomingValues() == 2 &&
          "Must have an incoming value only for the preheader and the latch.");

      Value *V = PHI->getIncomingValueForBlock(
          Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
      Constant *C = dyn_cast<Constant>(V);
      if (Iteration != 0 && !C)
        C = SimplifiedValues.lookup(V);
      if (C)
        SimplifiedInputValues.push_back({PHI, C});
    }

    // Now clear and re-populate the map for the next iteration.
    SimplifiedValues.clear();
    while (!SimplifiedInputValues.empty())
      SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());

    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);

    BBWorklist.clear();
    BBWorklist.insert(L->getHeader());
    // Note that we *must not* cache the size, this loop grows the worklist.
    for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
      BasicBlock *BB = BBWorklist[Idx];

      // Visit all instructions in the given basic block and try to simplify
      // it.  We don't change the actual IR, just count optimization
      // opportunities.
      for (Instruction &I : *BB) {
        // Track this instruction's expected baseline cost when executing the
        // rolled loop form.
        RolledDynamicCost += TTI.getUserCost(&I);

        // Visit the instruction to analyze its loop cost after unrolling,
        // and if the visitor returns true, mark the instruction as free after
        // unrolling and continue.
        bool IsFree = Analyzer.visit(I);
        bool Inserted = InstCostMap.insert({&I, (int)Iteration,
                                           (unsigned)IsFree,
                                           /*IsCounted*/ false}).second;
        (void)Inserted;
        assert(Inserted && "Cannot have a state for an unvisited instruction!");

        if (IsFree)
          continue;

        // If the instruction might have a side-effect recursively account for
        // the cost of it and all the instructions leading up to it.
        if (I.mayHaveSideEffects())
          AddCostRecursively(I, Iteration);

        // Can't properly model a cost of a call.
        // FIXME: With a proper cost model we should be able to do it.
        if(isa<CallInst>(&I))
          return None;

        // If unrolled body turns out to be too big, bail out.
        if (UnrolledCost > MaxUnrolledLoopSize) {
          DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
                       << "  UnrolledCost: " << UnrolledCost
                       << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
                       << "\n");
          return None;
        }
      }

      TerminatorInst *TI = BB->getTerminator();

      // Add in the live successors by first checking whether we have terminator
      // that may be simplified based on the values simplified by this call.
      BasicBlock *KnownSucc = nullptr;
      if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
        if (BI->isConditional()) {
          if (Constant *SimpleCond =
                  SimplifiedValues.lookup(BI->getCondition())) {
            // Just take the first successor if condition is undef
            if (isa<UndefValue>(SimpleCond))
              KnownSucc = BI->getSuccessor(0);
            else if (ConstantInt *SimpleCondVal =
                         dyn_cast<ConstantInt>(SimpleCond))
              KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
          }
        }
      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
        if (Constant *SimpleCond =
                SimplifiedValues.lookup(SI->getCondition())) {
          // Just take the first successor if condition is undef
          if (isa<UndefValue>(SimpleCond))
            KnownSucc = SI->getSuccessor(0);
          else if (ConstantInt *SimpleCondVal =
                       dyn_cast<ConstantInt>(SimpleCond))
            KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor();
        }
      }
      if (KnownSucc) {
        if (L->contains(KnownSucc))
          BBWorklist.insert(KnownSucc);
        else
          ExitWorklist.insert({BB, KnownSucc});
        continue;
      }

      // Add BB's successors to the worklist.
      for (BasicBlock *Succ : successors(BB))
        if (L->contains(Succ))
          BBWorklist.insert(Succ);
        else
          ExitWorklist.insert({BB, Succ});
      AddCostRecursively(*TI, Iteration);
    }

    // If we found no optimization opportunities on the first iteration, we
    // won't find them on later ones too.
    if (UnrolledCost == RolledDynamicCost) {
      DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
                   << "  UnrolledCost: " << UnrolledCost << "\n");
      return None;
    }
  }

  while (!ExitWorklist.empty()) {
    BasicBlock *ExitingBB, *ExitBB;
    std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();

    for (Instruction &I : *ExitBB) {
      auto *PN = dyn_cast<PHINode>(&I);
      if (!PN)
        break;

      Value *Op = PN->getIncomingValueForBlock(ExitingBB);
      if (auto *OpI = dyn_cast<Instruction>(Op))
        if (L->contains(OpI))
          AddCostRecursively(*OpI, TripCount - 1);
    }
  }

  DEBUG(dbgs() << "Analysis finished:\n"
               << "UnrolledCost: " << UnrolledCost << ", "
               << "RolledDynamicCost: " << RolledDynamicCost << "\n");
  return {{UnrolledCost, RolledDynamicCost}};
}
Пример #13
0
bool X86CmovConverterPass::checkForProfitableCmovCandidates(
    ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) {
  struct DepthInfo {
    /// Depth of original loop.
    unsigned Depth;
    /// Depth of optimized loop.
    unsigned OptDepth;
  };
  /// Number of loop iterations to calculate depth for ?!
  static const unsigned LoopIterations = 2;
  DenseMap<MachineInstr *, DepthInfo> DepthMap;
  DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
  enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
  /// For each register type maps the register to its last def instruction.
  DenseMap<unsigned, MachineInstr *> RegDefMaps[RegTypeNum];
  /// Maps register operand to its def instruction, which can be nullptr if it
  /// is unknown (e.g., operand is defined outside the loop).
  DenseMap<MachineOperand *, MachineInstr *> OperandToDefMap;

  // Set depth of unknown instruction (i.e., nullptr) to zero.
  DepthMap[nullptr] = {0, 0};

  SmallPtrSet<MachineInstr *, 4> CmovInstructions;
  for (auto &Group : CmovInstGroups)
    CmovInstructions.insert(Group.begin(), Group.end());

  //===--------------------------------------------------------------------===//
  // Step 1: Calculate instruction depth and loop depth.
  // Optimized-Loop:
  //   loop with CMOV-group-candidates converted into branches.
  //
  // Instruction-Depth:
  //   instruction latency + max operand depth.
  //     * For CMOV instruction in optimized loop the depth is calculated as:
  //       CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth)
  // TODO: Find a better way to estimate the latency of the branch instruction
  //       rather than using the CMOV latency.
  //
  // Loop-Depth:
  //   max instruction depth of all instructions in the loop.
  // Note: instruction with max depth represents the critical-path in the loop.
  //
  // Loop-Depth[i]:
  //   Loop-Depth calculated for first `i` iterations.
  //   Note: it is enough to calculate depth for up to two iterations.
  //
  // Depth-Diff[i]:
  //   Number of cycles saved in first 'i` iterations by optimizing the loop.
  //===--------------------------------------------------------------------===//
  for (unsigned I = 0; I < LoopIterations; ++I) {
    DepthInfo &MaxDepth = LoopDepth[I];
    for (auto *MBB : Blocks) {
      // Clear physical registers Def map.
      RegDefMaps[PhyRegType].clear();
      for (MachineInstr &MI : *MBB) {
        // Skip debug instructions.
        if (MI.isDebugInstr())
          continue;
        unsigned MIDepth = 0;
        unsigned MIDepthOpt = 0;
        bool IsCMOV = CmovInstructions.count(&MI);
        for (auto &MO : MI.uses()) {
          // Checks for "isUse()" as "uses()" returns also implicit definitions.
          if (!MO.isReg() || !MO.isUse())
            continue;
          unsigned Reg = MO.getReg();
          auto &RDM = RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)];
          if (MachineInstr *DefMI = RDM.lookup(Reg)) {
            OperandToDefMap[&MO] = DefMI;
            DepthInfo Info = DepthMap.lookup(DefMI);
            MIDepth = std::max(MIDepth, Info.Depth);
            if (!IsCMOV)
              MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth);
          }
        }

        if (IsCMOV)
          MIDepthOpt = getDepthOfOptCmov(
              DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth,
              DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth);

        // Iterates over all operands to handle implicit definitions as well.
        for (auto &MO : MI.operands()) {
          if (!MO.isReg() || !MO.isDef())
            continue;
          unsigned Reg = MO.getReg();
          RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)][Reg] = &MI;
        }

        unsigned Latency = TSchedModel.computeInstrLatency(&MI);
        DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency};
        MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth);
        MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt);
      }
    }
  }

  unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
                                   LoopDepth[1].Depth - LoopDepth[1].OptDepth};

  //===--------------------------------------------------------------------===//
  // Step 2: Check if Loop worth to be optimized.
  // Worth-Optimize-Loop:
  //   case 1: Diff[1] == Diff[0]
  //           Critical-path is iteration independent - there is no dependency
  //           of critical-path instructions on critical-path instructions of
  //           previous iteration.
  //           Thus, it is enough to check gain percent of 1st iteration -
  //           To be conservative, the optimized loop need to have a depth of
  //           12.5% cycles less than original loop, per iteration.
  //
  //   case 2: Diff[1] > Diff[0]
  //           Critical-path is iteration dependent - there is dependency of
  //           critical-path instructions on critical-path instructions of
  //           previous iteration.
  //           Thus, check the gain percent of the 2nd iteration (similar to the
  //           previous case), but it is also required to check the gradient of
  //           the gain - the change in Depth-Diff compared to the change in
  //           Loop-Depth between 1st and 2nd iterations.
  //           To be conservative, the gradient need to be at least 50%.
  //
  //   In addition, In order not to optimize loops with very small gain, the
  //   gain (in cycles) after 2nd iteration should not be less than a given
  //   threshold. Thus, the check (Diff[1] >= GainCycleThreshold) must apply.
  //
  // If loop is not worth optimizing, remove all CMOV-group-candidates.
  //===--------------------------------------------------------------------===//
  if (Diff[1] < GainCycleThreshold)
    return false;

  bool WorthOptLoop = false;
  if (Diff[1] == Diff[0])
    WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
  else if (Diff[1] > Diff[0])
    WorthOptLoop =
        (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) &&
        (Diff[1] * 8 >= LoopDepth[1].Depth);

  if (!WorthOptLoop)
    return false;

  ++NumOfLoopCandidate;

  //===--------------------------------------------------------------------===//
  // Step 3: Check for each CMOV-group-candidate if it worth to be optimized.
  // Worth-Optimize-Group:
  //   Iff it worths to optimize all CMOV instructions in the group.
  //
  // Worth-Optimize-CMOV:
  //   Predicted branch is faster than CMOV by the difference between depth of
  //   condition operand and depth of taken (predicted) value operand.
  //   To be conservative, the gain of such CMOV transformation should cover at
  //   at least 25% of branch-misprediction-penalty.
  //===--------------------------------------------------------------------===//
  unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
  CmovGroups TempGroups;
  std::swap(TempGroups, CmovInstGroups);
  for (auto &Group : TempGroups) {
    bool WorthOpGroup = true;
    for (auto *MI : Group) {
      // Avoid CMOV instruction which value is used as a pointer to load from.
      // This is another conservative check to avoid converting CMOV instruction
      // used with tree-search like algorithm, where the branch is unpredicted.
      auto UIs = MRI->use_instructions(MI->defs().begin()->getReg());
      if (UIs.begin() != UIs.end() && ++UIs.begin() == UIs.end()) {
        unsigned Op = UIs.begin()->getOpcode();
        if (Op == X86::MOV64rm || Op == X86::MOV32rm) {
          WorthOpGroup = false;
          break;
        }
      }

      unsigned CondCost =
          DepthMap[OperandToDefMap.lookup(&MI->getOperand(3))].Depth;
      unsigned ValCost = getDepthOfOptCmov(
          DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth,
          DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth);
      if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
        WorthOpGroup = false;
        break;
      }
    }

    if (WorthOpGroup)
      CmovInstGroups.push_back(Group);
  }

  return !CmovInstGroups.empty();
}
Пример #14
0
/// Merge the linker flags in Src into the Dest module.
Error IRLinker::linkModuleFlagsMetadata() {
  // If the source module has no module flags, we are done.
  const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
  if (!SrcModFlags)
    return Error::success();

  // If the destination module doesn't have module flags yet, then just copy
  // over the source module's flags.
  NamedMDNode *DstModFlags = DstM.getOrInsertModuleFlagsMetadata();
  if (DstModFlags->getNumOperands() == 0) {
    for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I)
      DstModFlags->addOperand(SrcModFlags->getOperand(I));

    return Error::success();
  }

  // First build a map of the existing module flags and requirements.
  DenseMap<MDString *, std::pair<MDNode *, unsigned>> Flags;
  SmallSetVector<MDNode *, 16> Requirements;
  for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) {
    MDNode *Op = DstModFlags->getOperand(I);
    ConstantInt *Behavior = mdconst::extract<ConstantInt>(Op->getOperand(0));
    MDString *ID = cast<MDString>(Op->getOperand(1));

    if (Behavior->getZExtValue() == Module::Require) {
      Requirements.insert(cast<MDNode>(Op->getOperand(2)));
    } else {
      Flags[ID] = std::make_pair(Op, I);
    }
  }

  // Merge in the flags from the source module, and also collect its set of
  // requirements.
  for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) {
    MDNode *SrcOp = SrcModFlags->getOperand(I);
    ConstantInt *SrcBehavior =
        mdconst::extract<ConstantInt>(SrcOp->getOperand(0));
    MDString *ID = cast<MDString>(SrcOp->getOperand(1));
    MDNode *DstOp;
    unsigned DstIndex;
    std::tie(DstOp, DstIndex) = Flags.lookup(ID);
    unsigned SrcBehaviorValue = SrcBehavior->getZExtValue();

    // If this is a requirement, add it and continue.
    if (SrcBehaviorValue == Module::Require) {
      // If the destination module does not already have this requirement, add
      // it.
      if (Requirements.insert(cast<MDNode>(SrcOp->getOperand(2)))) {
        DstModFlags->addOperand(SrcOp);
      }
      continue;
    }

    // If there is no existing flag with this ID, just add it.
    if (!DstOp) {
      Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands());
      DstModFlags->addOperand(SrcOp);
      continue;
    }

    // Otherwise, perform a merge.
    ConstantInt *DstBehavior =
        mdconst::extract<ConstantInt>(DstOp->getOperand(0));
    unsigned DstBehaviorValue = DstBehavior->getZExtValue();

    // If either flag has override behavior, handle it first.
    if (DstBehaviorValue == Module::Override) {
      // Diagnose inconsistent flags which both have override behavior.
      if (SrcBehaviorValue == Module::Override &&
          SrcOp->getOperand(2) != DstOp->getOperand(2))
        return stringErr("linking module flags '" + ID->getString() +
                         "': IDs have conflicting override values");
      continue;
    } else if (SrcBehaviorValue == Module::Override) {
      // Update the destination flag to that of the source.
      DstModFlags->setOperand(DstIndex, SrcOp);
      Flags[ID].first = SrcOp;
      continue;
    }

    // Diagnose inconsistent merge behavior types.
    if (SrcBehaviorValue != DstBehaviorValue)
      return stringErr("linking module flags '" + ID->getString() +
                       "': IDs have conflicting behaviors");

    auto replaceDstValue = [&](MDNode *New) {
      Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
      MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps);
      DstModFlags->setOperand(DstIndex, Flag);
      Flags[ID].first = Flag;
    };

    // Perform the merge for standard behavior types.
    switch (SrcBehaviorValue) {
    case Module::Require:
    case Module::Override:
      llvm_unreachable("not possible");
    case Module::Error: {
      // Emit an error if the values differ.
      if (SrcOp->getOperand(2) != DstOp->getOperand(2))
        return stringErr("linking module flags '" + ID->getString() +
                         "': IDs have conflicting values");
      continue;
    }
    case Module::Warning: {
      // Emit a warning if the values differ.
      if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
        emitWarning("linking module flags '" + ID->getString() +
                    "': IDs have conflicting values");
      }
      continue;
    }
    case Module::Append: {
      MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
      MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
      SmallVector<Metadata *, 8> MDs;
      MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands());
      MDs.append(DstValue->op_begin(), DstValue->op_end());
      MDs.append(SrcValue->op_begin(), SrcValue->op_end());

      replaceDstValue(MDNode::get(DstM.getContext(), MDs));
      break;
    }
    case Module::AppendUnique: {
      SmallSetVector<Metadata *, 16> Elts;
      MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
      MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
      Elts.insert(DstValue->op_begin(), DstValue->op_end());
      Elts.insert(SrcValue->op_begin(), SrcValue->op_end());

      replaceDstValue(MDNode::get(DstM.getContext(),
                                  makeArrayRef(Elts.begin(), Elts.end())));
      break;
    }
    }
  }

  // Check all of the requirements.
  for (unsigned I = 0, E = Requirements.size(); I != E; ++I) {
    MDNode *Requirement = Requirements[I];
    MDString *Flag = cast<MDString>(Requirement->getOperand(0));
    Metadata *ReqValue = Requirement->getOperand(1);

    MDNode *Op = Flags[Flag].first;
    if (!Op || Op->getOperand(2) != ReqValue)
      return stringErr("linking module flags '" + Flag->getString() +
                       "': does not have the required value");
  }
  return Error::success();
}
Пример #15
0
/// \brief Figure out if the loop is worth full unrolling.
///
/// Complete loop unrolling can make some loads constant, and we need to know
/// if that would expose any further optimization opportunities.  This routine
/// estimates this optimization.  It computes cost of unrolled loop
/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
/// dynamic cost we mean that we won't count costs of blocks that are known not
/// to be executed (i.e. if we have a branch in the loop and we know that at the
/// given iteration its condition would be resolved to true, we won't add up the
/// cost of the 'false'-block).
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
static Optional<EstimatedUnrollCost>
analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
                      ScalarEvolution &SE, const TargetTransformInfo &TTI,
                      int MaxUnrolledLoopSize) {
  // We want to be able to scale offsets by the trip count and add more offsets
  // to them without checking for overflows, and we already don't want to
  // analyze *massive* trip counts, so we force the max to be reasonably small.
  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
         "The unroll iterations max is too large!");

  // Don't simulate loops with a big or unknown tripcount
  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
      TripCount > UnrollMaxIterationsCountToAnalyze)
    return None;

  SmallSetVector<BasicBlock *, 16> BBWorklist;
  DenseMap<Value *, Constant *> SimplifiedValues;
  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;

  // The estimated cost of the unrolled form of the loop. We try to estimate
  // this by simplifying as much as we can while computing the estimate.
  int UnrolledCost = 0;
  // We also track the estimated dynamic (that is, actually executed) cost in
  // the rolled form. This helps identify cases when the savings from unrolling
  // aren't just exposing dead control flows, but actual reduced dynamic
  // instructions due to the simplifications which we expect to occur after
  // unrolling.
  int RolledDynamicCost = 0;

  // Ensure that we don't violate the loop structure invariants relied on by
  // this analysis.
  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
  assert(L->isLCSSAForm(DT) &&
         "Must have loops in LCSSA form to track live-out values.");

  DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");

  // Simulate execution of each iteration of the loop counting instructions,
  // which would be simplified.
  // Since the same load will take different values on different iterations,
  // we literally have to go through all loop's iterations.
  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
    DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");

    // Prepare for the iteration by collecting any simplified entry or backedge
    // inputs.
    for (Instruction &I : *L->getHeader()) {
      auto *PHI = dyn_cast<PHINode>(&I);
      if (!PHI)
        break;

      // The loop header PHI nodes must have exactly two input: one from the
      // loop preheader and one from the loop latch.
      assert(
          PHI->getNumIncomingValues() == 2 &&
          "Must have an incoming value only for the preheader and the latch.");

      Value *V = PHI->getIncomingValueForBlock(
          Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
      Constant *C = dyn_cast<Constant>(V);
      if (Iteration != 0 && !C)
        C = SimplifiedValues.lookup(V);
      if (C)
        SimplifiedInputValues.push_back({PHI, C});
    }

    // Now clear and re-populate the map for the next iteration.
    SimplifiedValues.clear();
    while (!SimplifiedInputValues.empty())
      SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());

    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE);

    BBWorklist.clear();
    BBWorklist.insert(L->getHeader());
    // Note that we *must not* cache the size, this loop grows the worklist.
    for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
      BasicBlock *BB = BBWorklist[Idx];

      // Visit all instructions in the given basic block and try to simplify
      // it.  We don't change the actual IR, just count optimization
      // opportunities.
      for (Instruction &I : *BB) {
        int InstCost = TTI.getUserCost(&I);

        // Visit the instruction to analyze its loop cost after unrolling,
        // and if the visitor returns false, include this instruction in the
        // unrolled cost.
        if (!Analyzer.visit(I))
          UnrolledCost += InstCost;
        else {
          DEBUG(dbgs() << "  " << I
                       << " would be simplified if loop is unrolled.\n");
          (void)0;
        }

        // Also track this instructions expected cost when executing the rolled
        // loop form.
        RolledDynamicCost += InstCost;

        // If unrolled body turns out to be too big, bail out.
        if (UnrolledCost > MaxUnrolledLoopSize) {
          DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
                       << "  UnrolledCost: " << UnrolledCost
                       << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
                       << "\n");
          return None;
        }
      }

      TerminatorInst *TI = BB->getTerminator();

      // Add in the live successors by first checking whether we have terminator
      // that may be simplified based on the values simplified by this call.
      if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
        if (BI->isConditional()) {
          if (Constant *SimpleCond =
                  SimplifiedValues.lookup(BI->getCondition())) {
            BasicBlock *Succ = nullptr;
            // Just take the first successor if condition is undef
            if (isa<UndefValue>(SimpleCond))
              Succ = BI->getSuccessor(0);
            else
              Succ = BI->getSuccessor(
                  cast<ConstantInt>(SimpleCond)->isZero() ? 1 : 0);
            if (L->contains(Succ))
              BBWorklist.insert(Succ);
            continue;
          }
        }
      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
        if (Constant *SimpleCond =
                SimplifiedValues.lookup(SI->getCondition())) {
          BasicBlock *Succ = nullptr;
          // Just take the first successor if condition is undef
          if (isa<UndefValue>(SimpleCond))
            Succ = SI->getSuccessor(0);
          else
            Succ = SI->findCaseValue(cast<ConstantInt>(SimpleCond))
                       .getCaseSuccessor();
          if (L->contains(Succ))
            BBWorklist.insert(Succ);
          continue;
        }
      }

      // Add BB's successors to the worklist.
      for (BasicBlock *Succ : successors(BB))
        if (L->contains(Succ))
          BBWorklist.insert(Succ);
    }

    // If we found no optimization opportunities on the first iteration, we
    // won't find them on later ones too.
    if (UnrolledCost == RolledDynamicCost) {
      DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
                   << "  UnrolledCost: " << UnrolledCost << "\n");
      return None;
    }
  }
  DEBUG(dbgs() << "Analysis finished:\n"
               << "UnrolledCost: " << UnrolledCost << ", "
               << "RolledDynamicCost: " << RolledDynamicCost << "\n");
  return {{UnrolledCost, RolledDynamicCost}};
}