Пример #1
0
bool PropagateJuliaAddrspaces::runOnFunction(Function &F) {
    visit(F);
    for (auto it : ToInsert)
        it.first->insertBefore(it.second);
    for (Instruction *I : ToDelete)
        I->eraseFromParent();
    ToInsert.clear();
    ToDelete.clear();
    LiftingMap.clear();
    Visited.clear();
    return true;
}
Пример #2
0
/*
 *  When phis are created, only the sigma and phi operands are inserted into them. Thus, we need to insert V, for which sigmas and phis were created, as incoming value of all
 *  incoming edges that still haven't an operand associated for them
 */
void vSSA::populatePhis(SmallVector<PHINode*, 25> &vssaphi_created, Value *V)
{
	// If any vSSA_PHI was created, iterate over the predecessors of vSSA_PHIs to insert V as an operand from the branches where sigma was not created
	for (SmallVectorImpl<PHINode*>::iterator vit = vssaphi_created.begin(), vend = vssaphi_created.end(); vit != vend; ++vit) {
		PHINode *vssaphi = *vit;
		BasicBlock *BB_parent = vssaphi->getParent();
		
		DenseMap<BasicBlock*, unsigned> howManyTimesIsPred;
		
		// Get how many times each basicblock is predecessor of BB_parent
		for (pred_iterator PI = pred_begin(BB_parent), PE = pred_end(BB_parent); PI != PE; ++PI) {
			BasicBlock *predBB = *PI;
			
			DenseMap<BasicBlock*, unsigned>::iterator mit = howManyTimesIsPred.find(predBB);
			
			if (mit == howManyTimesIsPred.end()) {
				howManyTimesIsPred.insert(std::make_pair(predBB, 1));
			}
			else {
				++mit->second;
			}
		}
		
		unsigned i, e;
		
		// If a predecessor already has incoming values in the vSSA_phi, we don't count them
		for (i = 0, e = vssaphi->getNumIncomingValues(); i < e; ++i) {
			--howManyTimesIsPred[vssaphi->getIncomingBlock(i)];
		}
		
		// Finally, add V as incoming value of predBB as many as necessary
		for (DenseMap<BasicBlock*, unsigned>::iterator mit = howManyTimesIsPred.begin(), mend = howManyTimesIsPred.end(); mit != mend; ++mit) {
			unsigned count;
			BasicBlock *predBB = mit->first;
			
			for (count = mit->second; count > 0; --count) {
				vssaphi->addIncoming(V, predBB);
			}
		}
		
		howManyTimesIsPred.clear();
	}
}
Пример #3
0
  // Given a list of loads that could be constant-folded (LoadBaseAddresses),
  // estimate number of optimized instructions after substituting the concrete
  // values for the given Iteration.
  // Fill in SimplifiedInsns map for future use in DCE-estimation.
  unsigned EstimateNumberOfSimplifiedInsns(unsigned Iteration) {
    SmallVector<Instruction *, 8> Worklist;
    SimplifiedValues.clear();
    CountedInsns.clear();

    NumberOfOptimizedInstructions = 0;
    // We start by adding all loads to the worklist.
    for (auto LoadDescr : LoadBaseAddresses) {
      LoadInst *LI = LoadDescr.first;
      SimplifiedValues[LI] = computeLoadValue(LI, Iteration);
      if (CountedInsns.insert(LI).second)
        NumberOfOptimizedInstructions += TTI.getUserCost(LI);

      for (auto U : LI->users()) {
        Instruction *UI = dyn_cast<Instruction>(U);
        if (!UI)
          continue;
        if (!L->contains(UI))
          continue;
        Worklist.push_back(UI);
      }
    }

    // And then we try to simplify every user of every instruction from the
    // worklist. If we do simplify a user, add it to the worklist to process
    // its users as well.
    while (!Worklist.empty()) {
      Instruction *I = Worklist.pop_back_val();
      if (!visit(I))
        continue;
      for (auto U : I->users()) {
        Instruction *UI = dyn_cast<Instruction>(U);
        if (!UI)
          continue;
        if (!L->contains(UI))
          continue;
        Worklist.push_back(UI);
      }
    }
    return NumberOfOptimizedInstructions;
  }
Пример #4
0
  std::string getNodeLabel(const MachineBasicBlock *Node,
                           const MachineBlockFrequencyInfo *Graph) {
    int layout_order = -1;
    // Attach additional ordering information if 'isSimple' is false.
    if (!isSimple()) {
      const MachineFunction *F = Node->getParent();
      if (!CurFunc || F != CurFunc) {
        if (CurFunc)
          LayoutOrderMap.clear();

        CurFunc = F;
        int O = 0;
        for (auto MBI = F->begin(); MBI != F->end(); ++MBI, ++O) {
          LayoutOrderMap[&*MBI] = O;
        }
      }
      layout_order = LayoutOrderMap[Node];
    }
    return MBFIDOTGraphTraitsBase::getNodeLabel(Node, Graph, getGVDT(),
                                                layout_order);
  }
Пример #5
0
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
  if (DisablePeephole)
    return false;

  TM  = &MF.getTarget();
  TII = TM->getInstrInfo();
  MRI = &MF.getRegInfo();
  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;

  bool Changed = false;

  SmallPtrSet<MachineInstr*, 8> LocalMIs;
  SmallSet<unsigned, 4> ImmDefRegs;
  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
    MachineBasicBlock *MBB = &*I;

    bool SeenMoveImm = false;
    LocalMIs.clear();
    ImmDefRegs.clear();
    ImmDefMIs.clear();

    bool First = true;
    MachineBasicBlock::iterator PMII;
    for (MachineBasicBlock::iterator
           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
      MachineInstr *MI = &*MII;
      LocalMIs.insert(MI);

      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
          MI->hasUnmodeledSideEffects()) {
        ++MII;
        continue;
      }

      if (MI->isBitcast()) {
        if (optimizeBitcastInstr(MI, MBB)) {
          // MI is deleted.
          LocalMIs.erase(MI);
          Changed = true;
          MII = First ? I->begin() : llvm::next(PMII);
          continue;
        }
      } else if (MI->isCompare()) {
        if (optimizeCmpInstr(MI, MBB)) {
          // MI is deleted.
          LocalMIs.erase(MI);
          Changed = true;
          MII = First ? I->begin() : llvm::next(PMII);
          continue;
        }
      }

      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
        SeenMoveImm = true;
      } else {
        Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
        if (SeenMoveImm)
          Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
      }

      First = false;
      PMII = MII;
      ++MII;
    }
  }

  return Changed;
}
Пример #6
0
/// \brief Figure out if the loop is worth full unrolling.
///
/// Complete loop unrolling can make some loads constant, and we need to know
/// if that would expose any further optimization opportunities.  This routine
/// estimates this optimization.  It computes cost of unrolled loop
/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
/// dynamic cost we mean that we won't count costs of blocks that are known not
/// to be executed (i.e. if we have a branch in the loop and we know that at the
/// given iteration its condition would be resolved to true, we won't add up the
/// cost of the 'false'-block).
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
Optional<EstimatedUnrollCost>
analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
                      const TargetTransformInfo &TTI,
                      unsigned MaxUnrolledLoopSize) {
  // We want to be able to scale offsets by the trip count and add more offsets
  // to them without checking for overflows, and we already don't want to
  // analyze *massive* trip counts, so we force the max to be reasonably small.
  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
         "The unroll iterations max is too large!");

  // Don't simulate loops with a big or unknown tripcount
  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
      TripCount > UnrollMaxIterationsCountToAnalyze)
    return None;

  SmallSetVector<BasicBlock *, 16> BBWorklist;
  DenseMap<Value *, Constant *> SimplifiedValues;

  // The estimated cost of the unrolled form of the loop. We try to estimate
  // this by simplifying as much as we can while computing the estimate.
  unsigned UnrolledCost = 0;
  // We also track the estimated dynamic (that is, actually executed) cost in
  // the rolled form. This helps identify cases when the savings from unrolling
  // aren't just exposing dead control flows, but actual reduced dynamic
  // instructions due to the simplifications which we expect to occur after
  // unrolling.
  unsigned RolledDynamicCost = 0;

  // Simulate execution of each iteration of the loop counting instructions,
  // which would be simplified.
  // Since the same load will take different values on different iterations,
  // we literally have to go through all loop's iterations.
  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
    SimplifiedValues.clear();
    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE);

    BBWorklist.clear();
    BBWorklist.insert(L->getHeader());
    // Note that we *must not* cache the size, this loop grows the worklist.
    for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
      BasicBlock *BB = BBWorklist[Idx];

      // Visit all instructions in the given basic block and try to simplify
      // it.  We don't change the actual IR, just count optimization
      // opportunities.
      for (Instruction &I : *BB) {
        unsigned InstCost = TTI.getUserCost(&I);

        // Visit the instruction to analyze its loop cost after unrolling,
        // and if the visitor returns false, include this instruction in the
        // unrolled cost.
        if (!Analyzer.visit(I))
          UnrolledCost += InstCost;

        // Also track this instructions expected cost when executing the rolled
        // loop form.
        RolledDynamicCost += InstCost;

        // If unrolled body turns out to be too big, bail out.
        if (UnrolledCost > MaxUnrolledLoopSize)
          return None;
      }

      // Add BB's successors to the worklist.
      for (BasicBlock *Succ : successors(BB))
        if (L->contains(Succ))
          BBWorklist.insert(Succ);
    }

    // If we found no optimization opportunities on the first iteration, we
    // won't find them on later ones too.
    if (UnrolledCost == RolledDynamicCost)
      return None;
  }
  return {{UnrolledCost, RolledDynamicCost}};
}
Пример #7
0
bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
  QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
  QRI = MF.getTarget().getSubtarget<HexagonSubtarget>().getRegisterInfo();
  MRI = &MF.getRegInfo();

  DenseMap<unsigned, unsigned> PeepholeMap;
  DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap;

  if (DisableHexagonPeephole) return false;

  // Loop over all of the basic blocks.
  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
       MBBb != MBBe; ++MBBb) {
    MachineBasicBlock* MBB = MBBb;
    PeepholeMap.clear();
    PeepholeDoubleRegsMap.clear();

    // Traverse the basic block.
    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
                                     ++MII) {
      MachineInstr *MI = MII;
      // Look for sign extends:
      // %vreg170<def> = SXTW %vreg166
      if (!DisableOptSZExt && MI->getOpcode() == Hexagon::A2_sxtw) {
        assert (MI->getNumOperands() == 2);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src  = MI->getOperand(1);
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src.getReg();
        // Just handle virtual registers.
        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
          // Map the following:
          // %vreg170<def> = SXTW %vreg166
          // PeepholeMap[170] = vreg166
          PeepholeMap[DstReg] = SrcReg;
        }
      }

      // Look for  %vreg170<def> = COMBINE_ir_V4 (0, %vreg169)
      // %vreg170:DoublRegs, %vreg169:IntRegs
      if (!DisableOptExtTo64 &&
          MI->getOpcode () == Hexagon::COMBINE_Ir_V4) {
        assert (MI->getNumOperands() == 3);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src1 = MI->getOperand(1);
        MachineOperand &Src2 = MI->getOperand(2);
        if (Src1.getImm() != 0)
          continue;
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src2.getReg();
        PeepholeMap[DstReg] = SrcReg;
      }

      // Look for this sequence below
      // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
      // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
      // and convert into
      // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg.
      if (MI->getOpcode() == Hexagon::LSRd_ri) {
        assert(MI->getNumOperands() == 3);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src1 = MI->getOperand(1);
        MachineOperand &Src2 = MI->getOperand(2);
        if (Src2.getImm() != 32)
          continue;
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src1.getReg();
        PeepholeDoubleRegsMap[DstReg] =
          std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/);
      }

      // Look for P=NOT(P).
      if (!DisablePNotP &&
          (MI->getOpcode() == Hexagon::C2_not)) {
        assert (MI->getNumOperands() == 2);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src  = MI->getOperand(1);
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src.getReg();
        // Just handle virtual registers.
        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
          // Map the following:
          // %vreg170<def> = NOT_xx %vreg166
          // PeepholeMap[170] = vreg166
          PeepholeMap[DstReg] = SrcReg;
        }
      }

      // Look for copy:
      // %vreg176<def> = COPY %vreg170:subreg_loreg
      if (!DisableOptSZExt && MI->isCopy()) {
        assert (MI->getNumOperands() == 2);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src  = MI->getOperand(1);

        // Make sure we are copying the lower 32 bits.
        if (Src.getSubReg() != Hexagon::subreg_loreg)
          continue;

        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src.getReg();
        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
          // Try to find in the map.
          if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) {
            // Change the 1st operand.
            MI->RemoveOperand(1);
            MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false));
          } else  {
            DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI =
              PeepholeDoubleRegsMap.find(SrcReg);
            if (DI != PeepholeDoubleRegsMap.end()) {
              std::pair<unsigned,unsigned> PeepholeSrc = DI->second;
              MI->RemoveOperand(1);
              MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first,
                                                       false /*isDef*/,
                                                       false /*isImp*/,
                                                       false /*isKill*/,
                                                       false /*isDead*/,
                                                       false /*isUndef*/,
                                                       false /*isEarlyClobber*/,
                                                       PeepholeSrc.second));
            }
          }
        }
      }

      // Look for Predicated instructions.
      if (!DisablePNotP) {
        bool Done = false;
        if (QII->isPredicated(MI)) {
          MachineOperand &Op0 = MI->getOperand(0);
          unsigned Reg0 = Op0.getReg();
          const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0);
          if (RC0->getID() == Hexagon::PredRegsRegClassID) {
            // Handle instructions that have a prediate register in op0
            // (most cases of predicable instructions).
            if (TargetRegisterInfo::isVirtualRegister(Reg0)) {
              // Try to find in the map.
              if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) {
                // Change the 1st operand and, flip the opcode.
                MI->getOperand(0).setReg(PeepholeSrc);
                int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode());
                MI->setDesc(QII->get(NewOp));
                Done = true;
              }
            }
          }
        }

        if (!Done) {
          // Handle special instructions.
          unsigned Op = MI->getOpcode();
          unsigned NewOp = 0;
          unsigned PR = 1, S1 = 2, S2 = 3;   // Operand indices.

          switch (Op) {
            case Hexagon::C2_mux:
            case Hexagon::C2_muxii:
            case Hexagon::TFR_condset_ii:
              NewOp = Op;
              break;
            case Hexagon::TFR_condset_ri:
              NewOp = Hexagon::TFR_condset_ir;
              break;
            case Hexagon::TFR_condset_ir:
              NewOp = Hexagon::TFR_condset_ri;
              break;
            case Hexagon::C2_muxri:
              NewOp = Hexagon::C2_muxir;
              break;
            case Hexagon::C2_muxir:
              NewOp = Hexagon::C2_muxri;
              break;
          }
          if (NewOp) {
            unsigned PSrc = MI->getOperand(PR).getReg();
            if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
              MI->getOperand(PR).setReg(POrig);
              MI->setDesc(QII->get(NewOp));
              // Swap operands S1 and S2.
              MachineOperand Op1 = MI->getOperand(S1);
              MachineOperand Op2 = MI->getOperand(S2);
              ChangeOpInto(MI->getOperand(S1), Op2);
              ChangeOpInto(MI->getOperand(S2), Op1);
            }
          } // if (NewOp)
        } // if (!Done)

      } // if (!DisablePNotP)

    } // Instruction
  } // Basic Block
  return true;
}
Пример #8
0
bool ConstantMerge::runOnModule(Module &M) {
  TD = getAnalysisIfAvailable<TargetData>();

  // Find all the globals that are marked "used".  These cannot be merged.
  SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
  FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
  FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
  
  // Map unique <constants, has-unknown-alignment> pairs to globals.  We don't
  // want to merge globals of unknown alignment with those of explicit
  // alignment.  If we have TargetData, we always know the alignment.
  DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap;

  // Replacements - This vector contains a list of replacements to perform.
  SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;

  bool MadeChange = false;

  // Iterate constant merging while we are still making progress.  Merging two
  // constants together may allow us to merge other constants together if the
  // second level constants have initializers which point to the globals that
  // were just merged.
  while (1) {

    // First: Find the canonical constants others will be merged with.
    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
         GVI != E; ) {
      GlobalVariable *GV = GVI++;

      // If this GV is dead, remove it.
      GV->removeDeadConstantUsers();
      if (GV->use_empty() && GV->hasLocalLinkage()) {
        GV->eraseFromParent();
        continue;
      }

      // Only process constants with initializers in the default address space.
      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
          // Don't touch values marked with attribute(used).
          UsedGlobals.count(GV))
        continue;

      // This transformation is legal for weak ODR globals in the sense it
      // doesn't change semantics, but we really don't want to perform it
      // anyway; it's likely to pessimize code generation, and some tools
      // (like the Darwin linker in cases involving CFString) don't expect it.
      if (GV->isWeakForLinker())
        continue;

      Constant *Init = GV->getInitializer();

      // Check to see if the initializer is already known.
      PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
      GlobalVariable *&Slot = CMap[Pair];

      // If this is the first constant we find or if the old one is local,
      // replace with the current one. If the current is externally visible
      // it cannot be replace, but can be the canonical constant we merge with.
      if (Slot == 0 || IsBetterCannonical(*GV, *Slot))
        Slot = GV;
    }

    // Second: identify all globals that can be merged together, filling in
    // the Replacements vector.  We cannot do the replacement in this pass
    // because doing so may cause initializers of other globals to be rewritten,
    // invalidating the Constant* pointers in CMap.
    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
         GVI != E; ) {
      GlobalVariable *GV = GVI++;

      // Only process constants with initializers in the default address space.
      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
          // Don't touch values marked with attribute(used).
          UsedGlobals.count(GV))
        continue;

      // We can only replace constant with local linkage.
      if (!GV->hasLocalLinkage())
        continue;

      Constant *Init = GV->getInitializer();

      // Check to see if the initializer is already known.
      PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
      GlobalVariable *Slot = CMap[Pair];

      if (!Slot || Slot == GV)
        continue;

      if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr())
        continue;

      if (!GV->hasUnnamedAddr())
        Slot->setUnnamedAddr(false);

      // Make all uses of the duplicate constant use the canonical version.
      Replacements.push_back(std::make_pair(GV, Slot));
    }

    if (Replacements.empty())
      return MadeChange;
    CMap.clear();

    // Now that we have figured out which replacements must be made, do them all
    // now.  This avoid invalidating the pointers in CMap, which are unneeded
    // now.
    for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
      // Bump the alignment if necessary.
      if (Replacements[i].first->getAlignment() ||
          Replacements[i].second->getAlignment()) {
        Replacements[i].second->setAlignment(std::max(
            Replacements[i].first->getAlignment(),
            Replacements[i].second->getAlignment()));
      }

      // Eliminate any uses of the dead global.
      Replacements[i].first->replaceAllUsesWith(Replacements[i].second);

      // Delete the global value from the module.
      assert(Replacements[i].first->hasLocalLinkage() &&
             "Refusing to delete an externally visible global variable.");
      Replacements[i].first->eraseFromParent();
    }

    NumMerged += Replacements.size();
    Replacements.clear();
  }
}
Пример #9
0
/// \brief Figure out if the loop is worth full unrolling.
///
/// Complete loop unrolling can make some loads constant, and we need to know
/// if that would expose any further optimization opportunities.  This routine
/// estimates this optimization.  It computes cost of unrolled loop
/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
/// dynamic cost we mean that we won't count costs of blocks that are known not
/// to be executed (i.e. if we have a branch in the loop and we know that at the
/// given iteration its condition would be resolved to true, we won't add up the
/// cost of the 'false'-block).
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
static Optional<EstimatedUnrollCost>
analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
                      ScalarEvolution &SE, const TargetTransformInfo &TTI,
                      int MaxUnrolledLoopSize) {
  // We want to be able to scale offsets by the trip count and add more offsets
  // to them without checking for overflows, and we already don't want to
  // analyze *massive* trip counts, so we force the max to be reasonably small.
  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
         "The unroll iterations max is too large!");

  // Only analyze inner loops. We can't properly estimate cost of nested loops
  // and we won't visit inner loops again anyway.
  if (!L->empty())
    return None;

  // Don't simulate loops with a big or unknown tripcount
  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
      TripCount > UnrollMaxIterationsCountToAnalyze)
    return None;

  SmallSetVector<BasicBlock *, 16> BBWorklist;
  SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist;
  DenseMap<Value *, Constant *> SimplifiedValues;
  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;

  // The estimated cost of the unrolled form of the loop. We try to estimate
  // this by simplifying as much as we can while computing the estimate.
  int UnrolledCost = 0;

  // We also track the estimated dynamic (that is, actually executed) cost in
  // the rolled form. This helps identify cases when the savings from unrolling
  // aren't just exposing dead control flows, but actual reduced dynamic
  // instructions due to the simplifications which we expect to occur after
  // unrolling.
  int RolledDynamicCost = 0;

  // We track the simplification of each instruction in each iteration. We use
  // this to recursively merge costs into the unrolled cost on-demand so that
  // we don't count the cost of any dead code. This is essentially a map from
  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
  DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;

  // A small worklist used to accumulate cost of instructions from each
  // observable and reached root in the loop.
  SmallVector<Instruction *, 16> CostWorklist;

  // PHI-used worklist used between iterations while accumulating cost.
  SmallVector<Instruction *, 4> PHIUsedList;

  // Helper function to accumulate cost for instructions in the loop.
  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
    assert(Iteration >= 0 && "Cannot have a negative iteration!");
    assert(CostWorklist.empty() && "Must start with an empty cost list");
    assert(PHIUsedList.empty() && "Must start with an empty phi used list");
    CostWorklist.push_back(&RootI);
    for (;; --Iteration) {
      do {
        Instruction *I = CostWorklist.pop_back_val();

        // InstCostMap only uses I and Iteration as a key, the other two values
        // don't matter here.
        auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
        if (CostIter == InstCostMap.end())
          // If an input to a PHI node comes from a dead path through the loop
          // we may have no cost data for it here. What that actually means is
          // that it is free.
          continue;
        auto &Cost = *CostIter;
        if (Cost.IsCounted)
          // Already counted this instruction.
          continue;

        // Mark that we are counting the cost of this instruction now.
        Cost.IsCounted = true;

        // If this is a PHI node in the loop header, just add it to the PHI set.
        if (auto *PhiI = dyn_cast<PHINode>(I))
          if (PhiI->getParent() == L->getHeader()) {
            assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
                                  "inherently simplify during unrolling.");
            if (Iteration == 0)
              continue;

            // Push the incoming value from the backedge into the PHI used list
            // if it is an in-loop instruction. We'll use this to populate the
            // cost worklist for the next iteration (as we count backwards).
            if (auto *OpI = dyn_cast<Instruction>(
                    PhiI->getIncomingValueForBlock(L->getLoopLatch())))
              if (L->contains(OpI))
                PHIUsedList.push_back(OpI);
            continue;
          }

        // First accumulate the cost of this instruction.
        if (!Cost.IsFree) {
          UnrolledCost += TTI.getUserCost(I);
          DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration
                       << "): ");
          DEBUG(I->dump());
        }

        // We must count the cost of every operand which is not free,
        // recursively. If we reach a loop PHI node, simply add it to the set
        // to be considered on the next iteration (backwards!).
        for (Value *Op : I->operands()) {
          // Check whether this operand is free due to being a constant or
          // outside the loop.
          auto *OpI = dyn_cast<Instruction>(Op);
          if (!OpI || !L->contains(OpI))
            continue;

          // Otherwise accumulate its cost.
          CostWorklist.push_back(OpI);
        }
      } while (!CostWorklist.empty());

      if (PHIUsedList.empty())
        // We've exhausted the search.
        break;

      assert(Iteration > 0 &&
             "Cannot track PHI-used values past the first iteration!");
      CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
      PHIUsedList.clear();
    }
  };

  // Ensure that we don't violate the loop structure invariants relied on by
  // this analysis.
  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
  assert(L->isLCSSAForm(DT) &&
         "Must have loops in LCSSA form to track live-out values.");

  DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");

  // Simulate execution of each iteration of the loop counting instructions,
  // which would be simplified.
  // Since the same load will take different values on different iterations,
  // we literally have to go through all loop's iterations.
  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
    DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");

    // Prepare for the iteration by collecting any simplified entry or backedge
    // inputs.
    for (Instruction &I : *L->getHeader()) {
      auto *PHI = dyn_cast<PHINode>(&I);
      if (!PHI)
        break;

      // The loop header PHI nodes must have exactly two input: one from the
      // loop preheader and one from the loop latch.
      assert(
          PHI->getNumIncomingValues() == 2 &&
          "Must have an incoming value only for the preheader and the latch.");

      Value *V = PHI->getIncomingValueForBlock(
          Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
      Constant *C = dyn_cast<Constant>(V);
      if (Iteration != 0 && !C)
        C = SimplifiedValues.lookup(V);
      if (C)
        SimplifiedInputValues.push_back({PHI, C});
    }

    // Now clear and re-populate the map for the next iteration.
    SimplifiedValues.clear();
    while (!SimplifiedInputValues.empty())
      SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());

    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);

    BBWorklist.clear();
    BBWorklist.insert(L->getHeader());
    // Note that we *must not* cache the size, this loop grows the worklist.
    for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
      BasicBlock *BB = BBWorklist[Idx];

      // Visit all instructions in the given basic block and try to simplify
      // it.  We don't change the actual IR, just count optimization
      // opportunities.
      for (Instruction &I : *BB) {
        // Track this instruction's expected baseline cost when executing the
        // rolled loop form.
        RolledDynamicCost += TTI.getUserCost(&I);

        // Visit the instruction to analyze its loop cost after unrolling,
        // and if the visitor returns true, mark the instruction as free after
        // unrolling and continue.
        bool IsFree = Analyzer.visit(I);
        bool Inserted = InstCostMap.insert({&I, (int)Iteration,
                                           (unsigned)IsFree,
                                           /*IsCounted*/ false}).second;
        (void)Inserted;
        assert(Inserted && "Cannot have a state for an unvisited instruction!");

        if (IsFree)
          continue;

        // If the instruction might have a side-effect recursively account for
        // the cost of it and all the instructions leading up to it.
        if (I.mayHaveSideEffects())
          AddCostRecursively(I, Iteration);

        // Can't properly model a cost of a call.
        // FIXME: With a proper cost model we should be able to do it.
        if(isa<CallInst>(&I))
          return None;

        // If unrolled body turns out to be too big, bail out.
        if (UnrolledCost > MaxUnrolledLoopSize) {
          DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
                       << "  UnrolledCost: " << UnrolledCost
                       << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
                       << "\n");
          return None;
        }
      }

      TerminatorInst *TI = BB->getTerminator();

      // Add in the live successors by first checking whether we have terminator
      // that may be simplified based on the values simplified by this call.
      BasicBlock *KnownSucc = nullptr;
      if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
        if (BI->isConditional()) {
          if (Constant *SimpleCond =
                  SimplifiedValues.lookup(BI->getCondition())) {
            // Just take the first successor if condition is undef
            if (isa<UndefValue>(SimpleCond))
              KnownSucc = BI->getSuccessor(0);
            else if (ConstantInt *SimpleCondVal =
                         dyn_cast<ConstantInt>(SimpleCond))
              KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
          }
        }
      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
        if (Constant *SimpleCond =
                SimplifiedValues.lookup(SI->getCondition())) {
          // Just take the first successor if condition is undef
          if (isa<UndefValue>(SimpleCond))
            KnownSucc = SI->getSuccessor(0);
          else if (ConstantInt *SimpleCondVal =
                       dyn_cast<ConstantInt>(SimpleCond))
            KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor();
        }
      }
      if (KnownSucc) {
        if (L->contains(KnownSucc))
          BBWorklist.insert(KnownSucc);
        else
          ExitWorklist.insert({BB, KnownSucc});
        continue;
      }

      // Add BB's successors to the worklist.
      for (BasicBlock *Succ : successors(BB))
        if (L->contains(Succ))
          BBWorklist.insert(Succ);
        else
          ExitWorklist.insert({BB, Succ});
      AddCostRecursively(*TI, Iteration);
    }

    // If we found no optimization opportunities on the first iteration, we
    // won't find them on later ones too.
    if (UnrolledCost == RolledDynamicCost) {
      DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
                   << "  UnrolledCost: " << UnrolledCost << "\n");
      return None;
    }
  }

  while (!ExitWorklist.empty()) {
    BasicBlock *ExitingBB, *ExitBB;
    std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();

    for (Instruction &I : *ExitBB) {
      auto *PN = dyn_cast<PHINode>(&I);
      if (!PN)
        break;

      Value *Op = PN->getIncomingValueForBlock(ExitingBB);
      if (auto *OpI = dyn_cast<Instruction>(Op))
        if (L->contains(OpI))
          AddCostRecursively(*OpI, TripCount - 1);
    }
  }

  DEBUG(dbgs() << "Analysis finished:\n"
               << "UnrolledCost: " << UnrolledCost << ", "
               << "RolledDynamicCost: " << RolledDynamicCost << "\n");
  return {{UnrolledCost, RolledDynamicCost}};
}
Пример #10
0
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
  SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;  // Candidates for deletion
  DenseMap<unsigned, MachineInstr*> AvailCopyMap;    // Def -> available copies map
  DenseMap<unsigned, MachineInstr*> CopyMap;         // Def -> copies map
  SourceMap SrcMap; // Src -> Def map

  bool Changed = false;
  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
    MachineInstr *MI = &*I;
    ++I;

    if (MI->isCopy()) {
      unsigned Def = MI->getOperand(0).getReg();
      unsigned Src = MI->getOperand(1).getReg();

      if (TargetRegisterInfo::isVirtualRegister(Def) ||
          TargetRegisterInfo::isVirtualRegister(Src))
        report_fatal_error("MachineCopyPropagation should be run after"
                           " register allocation!");

      DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
      if (CI != AvailCopyMap.end()) {
        MachineInstr *CopyMI = CI->second;
        if (!MRI->isReserved(Def) &&
            (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
            isNopCopy(CopyMI, Def, Src, TRI)) {
          // The two copies cancel out and the source of the first copy
          // hasn't been overridden, eliminate the second one. e.g.
          //  %ECX<def> = COPY %EAX<kill>
          //  ... nothing clobbered EAX.
          //  %EAX<def> = COPY %ECX
          // =>
          //  %ECX<def> = COPY %EAX
          //
          // Also avoid eliminating a copy from reserved registers unless the
          // definition is proven not clobbered. e.g.
          // %RSP<def> = COPY %RAX
          // CALL
          // %RAX<def> = COPY %RSP

          // Clear any kills of Def between CopyMI and MI. This extends the
          // live range.
          for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
            I->clearRegisterKills(Def, TRI);

          removeCopy(MI);
          Changed = true;
          ++NumDeletes;
          continue;
        }
      }

      // If Src is defined by a previous copy, it cannot be eliminated.
      for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
        CI = CopyMap.find(*AI);
        if (CI != CopyMap.end())
          MaybeDeadCopies.remove(CI->second);
      }

      // Copy is now a candidate for deletion.
      MaybeDeadCopies.insert(MI);

      // If 'Src' is previously source of another copy, then this earlier copy's
      // source is no longer available. e.g.
      // %xmm9<def> = copy %xmm2
      // ...
      // %xmm2<def> = copy %xmm0
      // ...
      // %xmm2<def> = copy %xmm9
      SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);

      // Remember Def is defined by the copy.
      // ... Make sure to clear the def maps of aliases first.
      for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) {
        CopyMap.erase(*AI);
        AvailCopyMap.erase(*AI);
      }
      CopyMap[Def] = MI;
      AvailCopyMap[Def] = MI;
      for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) {
        CopyMap[*SR] = MI;
        AvailCopyMap[*SR] = MI;
      }

      // Remember source that's copied to Def. Once it's clobbered, then
      // it's no longer available for copy propagation.
      if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) ==
          SrcMap[Src].end()) {
        SrcMap[Src].push_back(Def);
      }

      continue;
    }

    // Not a copy.
    SmallVector<unsigned, 2> Defs;
    int RegMaskOpNum = -1;
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = MI->getOperand(i);
      if (MO.isRegMask())
        RegMaskOpNum = i;
      if (!MO.isReg())
        continue;
      unsigned Reg = MO.getReg();
      if (!Reg)
        continue;

      if (TargetRegisterInfo::isVirtualRegister(Reg))
        report_fatal_error("MachineCopyPropagation should be run after"
                           " register allocation!");

      if (MO.isDef()) {
        Defs.push_back(Reg);
        continue;
      }

      // If 'Reg' is defined by a copy, the copy is no longer a candidate
      // for elimination.
      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
        DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI);
        if (CI != CopyMap.end())
          MaybeDeadCopies.remove(CI->second);
      }
    }

    // The instruction has a register mask operand which means that it clobbers
    // a large set of registers.  It is possible to use the register mask to
    // prune the available copies, but treat it like a basic block boundary for
    // now.
    if (RegMaskOpNum >= 0) {
      // Erase any MaybeDeadCopies whose destination register is clobbered.
      const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum);
      for (SmallSetVector<MachineInstr*, 8>::iterator
           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
           DI != DE; ++DI) {
        unsigned Reg = (*DI)->getOperand(0).getReg();
        if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
          continue;
        removeCopy(*DI);
        Changed = true;
        ++NumDeletes;
      }

      // Clear all data structures as if we were beginning a new basic block.
      MaybeDeadCopies.clear();
      AvailCopyMap.clear();
      CopyMap.clear();
      SrcMap.clear();
      continue;
    }

    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
      unsigned Reg = Defs[i];

      // No longer defined by a copy.
      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
        CopyMap.erase(*AI);
        AvailCopyMap.erase(*AI);
      }

      // If 'Reg' is previously source of a copy, it is no longer available for
      // copy propagation.
      SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
    }
  }

  // If MBB doesn't have successors, delete the copies whose defs are not used.
  // If MBB does have successors, then conservative assume the defs are live-out
  // since we don't want to trust live-in lists.
  if (MBB.succ_empty()) {
    for (SmallSetVector<MachineInstr*, 8>::iterator
           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
         DI != DE; ++DI) {
      if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
        removeCopy(*DI);
        Changed = true;
        ++NumDeletes;
      }
    }
  }

  return Changed;
}
Пример #11
0
void MyCSE::cleanupGlobalSets(){
	VN.clear();
	LeaderTable.clear();
	TableAllocator.Reset();
}
Пример #12
0
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
    if (DisablePeephole)
        return false;

    TM  = &MF.getTarget();
    TII = TM->getInstrInfo();
    MRI = &MF.getRegInfo();
    DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;

    bool Changed = false;

    SmallPtrSet<MachineInstr*, 8> LocalMIs;
    SmallSet<unsigned, 4> ImmDefRegs;
    DenseMap<unsigned, MachineInstr*> ImmDefMIs;
    unsigned FoldAsLoadDefReg;
    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
        MachineBasicBlock *MBB = &*I;

        bool SeenMoveImm = false;
        LocalMIs.clear();
        ImmDefRegs.clear();
        ImmDefMIs.clear();
        FoldAsLoadDefReg = 0;

        for (MachineBasicBlock::iterator
                MII = I->begin(), MIE = I->end(); MII != MIE; ) {
            MachineInstr *MI = &*MII;
            // We may be erasing MI below, increment MII now.
            ++MII;
            LocalMIs.insert(MI);

            // If there exists an instruction which belongs to the following
            // categories, we will discard the load candidate.
            if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
                    MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
                    MI->hasUnmodeledSideEffects()) {
                FoldAsLoadDefReg = 0;
                continue;
            }
            if (MI->mayStore() || MI->isCall())
                FoldAsLoadDefReg = 0;

            if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
                    (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
                    (MI->isSelect() && optimizeSelect(MI))) {
                // MI is deleted.
                LocalMIs.erase(MI);
                Changed = true;
                continue;
            }

            if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
                SeenMoveImm = true;
            } else {
                Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
                // optimizeExtInstr might have created new instructions after MI
                // and before the already incremented MII. Adjust MII so that the
                // next iteration sees the new instructions.
                MII = MI;
                ++MII;
                if (SeenMoveImm)
                    Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
            }

            // Check whether MI is a load candidate for folding into a later
            // instruction. If MI is not a candidate, check whether we can fold an
            // earlier load into MI.
            if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
                // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
                // can enable folding by converting SUB to CMP.
                MachineInstr *DefMI = 0;
                MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
                                       FoldAsLoadDefReg, DefMI);
                if (FoldMI) {
                    // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
                    LocalMIs.erase(MI);
                    LocalMIs.erase(DefMI);
                    LocalMIs.insert(FoldMI);
                    MI->eraseFromParent();
                    DefMI->eraseFromParent();
                    ++NumLoadFold;

                    // MI is replaced with FoldMI.
                    Changed = true;
                    continue;
                }
            }
        }
    }

    return Changed;
}
Пример #13
0
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
  bool MadeChange = false;

  // Split all critical edges where the dest block has a PHI.
  if (CriticalEdgeSplit) {
    TerminatorInst *BBTI = BB.getTerminator();
    if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
      for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
        BasicBlock *SuccBB = BBTI->getSuccessor(i);
        if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
          SplitEdgeNicely(BBTI, i, BackEdges, this);
      }
    }
  }

  // Keep track of non-local addresses that have been sunk into this block.
  // This allows us to avoid inserting duplicate code for blocks with multiple
  // load/stores of the same address.
  DenseMap<Value*, Value*> SunkAddrs;

  for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
    Instruction *I = BBI++;

    if (CastInst *CI = dyn_cast<CastInst>(I)) {
      // If the source of the cast is a constant, then this should have
      // already been constant folded.  The only reason NOT to constant fold
      // it is if something (e.g. LSR) was careful to place the constant
      // evaluation in a block other than then one that uses it (e.g. to hoist
      // the address of globals out of a loop).  If this is the case, we don't
      // want to forward-subst the cast.
      if (isa<Constant>(CI->getOperand(0)))
        continue;

      bool Change = false;
      if (TLI) {
        Change = OptimizeNoopCopyExpression(CI, *TLI);
        MadeChange |= Change;
      }

      if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) {
        MadeChange |= MoveExtToFormExtLoad(I);
        MadeChange |= OptimizeExtUses(I);
      }
    } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
      MadeChange |= OptimizeCmpExpression(CI);
    } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
      if (TLI)
        MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
                                         SunkAddrs);
    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
      if (TLI)
        MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
                                         SI->getOperand(0)->getType(),
                                         SunkAddrs);
    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
      if (GEPI->hasAllZeroIndices()) {
        /// The GEP operand must be a pointer, so must its result -> BitCast
        Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
                                          GEPI->getName(), GEPI);
        GEPI->replaceAllUsesWith(NC);
        GEPI->eraseFromParent();
        MadeChange = true;
        BBI = NC;
      }
    } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
      // If we found an inline asm expession, and if the target knows how to
      // lower it to normal LLVM code, do so now.
      if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
        if (TLI->ExpandInlineAsm(CI)) {
          BBI = BB.begin();
          // Avoid processing instructions out of order, which could cause
          // reuse before a value is defined.
          SunkAddrs.clear();
        } else
          // Sink address computing for memory operands into the block.
          MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
      } else {
        // Other CallInst optimizations that don't need to muck with the
        // enclosing iterator here.
        MadeChange |= OptimizeCallInst(CI);
      }
    }
  }

  return MadeChange;
}
/// canonicalizeInputFunction - Functions like swift_retain return an
/// argument as a low-level performance optimization.  This makes it difficult
/// to reason about pointer equality though, so undo it as an initial
/// canonicalization step.  After this step, all swift_retain's have been
/// replaced with swift_retain.
///
/// This also does some trivial peep-hole optimizations as we go.
static bool canonicalizeInputFunction(Function &F, ARCEntryPointBuilder &B,
                                      SwiftRCIdentity *RC) {
  bool Changed = false;
  DenseSet<Value *> NativeRefs;
  DenseMap<Value *, TinyPtrVector<Instruction *>> UnknownRetains;
  DenseMap<Value *, TinyPtrVector<Instruction *>> UnknownReleases;
  for (auto &BB : F) {
    UnknownRetains.clear();
    UnknownReleases.clear();
    NativeRefs.clear();
    for (auto I = BB.begin(); I != BB.end(); ) {
      Instruction &Inst = *I++;

      switch (classifyInstruction(Inst)) {
      // These instructions should not reach here based on the pass ordering.
      // i.e. LLVMARCOpt -> LLVMContractOpt.
      case RT_RetainN:
      case RT_UnknownRetainN:
      case RT_BridgeRetainN:
      case RT_ReleaseN:
      case RT_UnknownReleaseN:
      case RT_BridgeReleaseN:
        llvm_unreachable("These are only created by LLVMARCContract !");
      case RT_Unknown:
      case RT_BridgeRelease:
      case RT_AllocObject:
      case RT_FixLifetime:
      case RT_NoMemoryAccessed:
      case RT_RetainUnowned:
      case RT_CheckUnowned:
        break;
      case RT_Retain: {
        CallInst &CI = cast<CallInst>(Inst);
        Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0));
        // retain(null) is a no-op.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }
        // Rewrite unknown retains into swift_retains.
        NativeRefs.insert(ArgVal);
        for (auto &X : UnknownRetains[ArgVal]) {
          B.setInsertPoint(X);
          B.createRetain(ArgVal, cast<CallInst>(X));
          X->eraseFromParent();
          ++NumUnknownRetainReleaseSRed;
          Changed = true;
        }
        UnknownRetains[ArgVal].clear();
        break;
      }
      case RT_UnknownRetain: {
        CallInst &CI = cast<CallInst>(Inst);
        Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0));
        // unknownRetain(null) is a no-op.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }

        // Have not encountered a strong retain/release. keep it in the
        // unknown retain/release list for now. It might get replaced
        // later.
        if (NativeRefs.find(ArgVal) == NativeRefs.end()) {
           UnknownRetains[ArgVal].push_back(&CI);
        } else {
          B.setInsertPoint(&CI);
          B.createRetain(ArgVal, &CI);
          CI.eraseFromParent();
          ++NumUnknownRetainReleaseSRed;
          Changed = true;
        }
        break;
      }
      case RT_Release: {
        CallInst &CI = cast<CallInst>(Inst);
        Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0));
        // release(null) is a no-op.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }
        // Rewrite unknown releases into swift_releases.
        NativeRefs.insert(ArgVal);
        for (auto &X : UnknownReleases[ArgVal]) {
          B.setInsertPoint(X);
          B.createRelease(ArgVal, cast<CallInst>(X));
          X->eraseFromParent();
          ++NumUnknownRetainReleaseSRed;
          Changed = true;
        }
        UnknownReleases[ArgVal].clear();
        break;
      }
      case RT_UnknownRelease: {
        CallInst &CI = cast<CallInst>(Inst);
        Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0));
        // unknownRelease(null) is a no-op.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }

        // Have not encountered a strong retain/release. keep it in the
        // unknown retain/release list for now. It might get replaced
        // later.
        if (NativeRefs.find(ArgVal) == NativeRefs.end()) {
          UnknownReleases[ArgVal].push_back(&CI);
        } else {
          B.setInsertPoint(&CI);
          B.createRelease(ArgVal, &CI);
          CI.eraseFromParent();
          ++NumUnknownRetainReleaseSRed;
          Changed = true;
        }
        break;
      }
      case RT_ObjCRelease: {
        CallInst &CI = cast<CallInst>(Inst);
        Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0));
        // objc_release(null) is a noop, zap it.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }
        break;
      }

      // These retain instructions return their argument so must be processed
      // specially.
      case RT_BridgeRetain:
      case RT_ObjCRetain: {
        // Canonicalize the retain so that nothing uses its result.
        CallInst &CI = cast<CallInst>(Inst);
        // Do not get RC identical value here, could end up with a
        // crash in replaceAllUsesWith as the type maybe different.
        Value *ArgVal = CI.getArgOperand(0);
        if (!CI.use_empty()) {
          CI.replaceAllUsesWith(ArgVal);
          Changed = true;
        }

        // {objc_retain,swift_unknownRetain}(null) is a noop, delete it.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }

        break;
      }
      }
    }
  }
  return Changed;
}
Пример #15
0
bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
  if (skipFunction(MF.getFunction()))
    return false;

  MachineRegisterInfo &MRI = MF.getRegInfo();

  LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');

  InstOrderMap IOM;
  // Map from register to instruction order (value of IOM) where the
  // register is used last. When moving instructions up, we need to
  // make sure all its defs (including dead def) will not cross its
  // last use when moving up.
  DenseMap<unsigned, std::pair<unsigned, MachineInstr *>> UseMap;

  for (MachineBasicBlock &MBB : MF) {
    if (MBB.empty())
      continue;
    bool SawStore = false;
    BuildInstOrderMap(MBB.begin(), IOM);
    UseMap.clear();

    for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) {
      MachineInstr &MI = *Next;
      ++Next;
      if (MI.isPHI() || MI.isDebugInstr())
        continue;
      if (MI.mayStore())
        SawStore = true;

      unsigned CurrentOrder = IOM[&MI];
      unsigned Barrier = 0;
      MachineInstr *BarrierMI = nullptr;
      for (const MachineOperand &MO : MI.operands()) {
        if (!MO.isReg() || MO.isDebug())
          continue;
        if (MO.isUse())
          UseMap[MO.getReg()] = std::make_pair(CurrentOrder, &MI);
        else if (MO.isDead() && UseMap.count(MO.getReg()))
          // Barrier is the last instruction where MO get used. MI should not
          // be moved above Barrier.
          if (Barrier < UseMap[MO.getReg()].first) {
            Barrier = UseMap[MO.getReg()].first;
            BarrierMI = UseMap[MO.getReg()].second;
          }
      }

      if (!MI.isSafeToMove(nullptr, SawStore)) {
        // If MI has side effects, it should become a barrier for code motion.
        // IOM is rebuild from the next instruction to prevent later
        // instructions from being moved before this MI.
        if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
          BuildInstOrderMap(Next, IOM);
          SawStore = false;
        }
        continue;
      }

      const MachineOperand *DefMO = nullptr;
      MachineInstr *Insert = nullptr;

      // Number of live-ranges that will be shortened. We do not count
      // live-ranges that are defined by a COPY as it could be coalesced later.
      unsigned NumEligibleUse = 0;

      for (const MachineOperand &MO : MI.operands()) {
        if (!MO.isReg() || MO.isDead() || MO.isDebug())
          continue;
        unsigned Reg = MO.getReg();
        // Do not move the instruction if it def/uses a physical register,
        // unless it is a constant physical register or a noreg.
        if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
          if (!Reg || MRI.isConstantPhysReg(Reg))
            continue;
          Insert = nullptr;
          break;
        }
        if (MO.isDef()) {
          // Do not move if there is more than one def.
          if (DefMO) {
            Insert = nullptr;
            break;
          }
          DefMO = &MO;
        } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg) && DefMO &&
                   MRI.getRegClass(DefMO->getReg()) ==
                       MRI.getRegClass(MO.getReg())) {
          // The heuristic does not handle different register classes yet
          // (registers of different sizes, looser/tighter constraints). This
          // is because it needs more accurate model to handle register
          // pressure correctly.
          MachineInstr &DefInstr = *MRI.def_instr_begin(Reg);
          if (!DefInstr.isCopy())
            NumEligibleUse++;
          Insert = FindDominatedInstruction(DefInstr, Insert, IOM);
        } else {
          Insert = nullptr;
          break;
        }
      }

      // If Barrier equals IOM[I], traverse forward to find if BarrierMI is
      // after Insert, if yes, then we should not hoist.
      for (MachineInstr *I = Insert; I && IOM[I] == Barrier;
           I = I->getNextNode())
        if (I == BarrierMI) {
          Insert = nullptr;
          break;
        }
      // Move the instruction when # of shrunk live range > 1.
      if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) {
        MachineBasicBlock::iterator I = std::next(Insert->getIterator());
        // Skip all the PHI and debug instructions.
        while (I != MBB.end() && (I->isPHI() || I->isDebugInstr()))
          I = std::next(I);
        if (I == MI.getIterator())
          continue;

        // Update the dominator order to be the same as the insertion point.
        // We do this to maintain a non-decreasing order without need to update
        // all instruction orders after the insertion point.
        unsigned NewOrder = IOM[&*I];
        IOM[&MI] = NewOrder;
        NumInstrsHoistedToShrinkLiveRange++;

        // Find MI's debug value following MI.
        MachineBasicBlock::iterator EndIter = std::next(MI.getIterator());
        if (MI.getOperand(0).isReg())
          for (; EndIter != MBB.end() && EndIter->isDebugValue() &&
                 EndIter->getOperand(0).isReg() &&
                 EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg();
               ++EndIter, ++Next)
            IOM[&*EndIter] = NewOrder;
        MBB.splice(I, &MBB, MI.getIterator(), EndIter);
      }
    }
  }
  return false;
}
Пример #16
0
bool ConstantMerge::runOnModule(Module &M) {
  // Find all the globals that are marked "used".  These cannot be merged.
  SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
  FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
  FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
  
  // Map unique constant/section pairs to globals.  We don't want to merge
  // globals in different sections.
  DenseMap<Constant*, GlobalVariable*> CMap;

  // Replacements - This vector contains a list of replacements to perform.
  SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;

  bool MadeChange = false;

  // Iterate constant merging while we are still making progress.  Merging two
  // constants together may allow us to merge other constants together if the
  // second level constants have initializers which point to the globals that
  // were just merged.
  while (1) {
    // First pass: identify all globals that can be merged together, filling in
    // the Replacements vector.  We cannot do the replacement in this pass
    // because doing so may cause initializers of other globals to be rewritten,
    // invalidating the Constant* pointers in CMap.
    //
    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
         GVI != E; ) {
      GlobalVariable *GV = GVI++;
      
      // If this GV is dead, remove it.
      GV->removeDeadConstantUsers();
      if (GV->use_empty() && GV->hasLocalLinkage()) {
        GV->eraseFromParent();
        continue;
      }
      
      // Only process constants with initializers in the default addres space.
      if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
          GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() ||
          // Don't touch values marked with attribute(used).
          UsedGlobals.count(GV))
        continue;
      
      
      
      Constant *Init = GV->getInitializer();

      // Check to see if the initializer is already known.
      GlobalVariable *&Slot = CMap[Init];

      if (Slot == 0) {    // Nope, add it to the map.
        Slot = GV;
      } else if (GV->hasLocalLinkage()) {    // Yup, this is a duplicate!
        // Make all uses of the duplicate constant use the canonical version.
        Replacements.push_back(std::make_pair(GV, Slot));
      }
    }

    if (Replacements.empty())
      return MadeChange;
    CMap.clear();

    // Now that we have figured out which replacements must be made, do them all
    // now.  This avoid invalidating the pointers in CMap, which are unneeded
    // now.
    for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
      // Eliminate any uses of the dead global.
      Replacements[i].first->replaceAllUsesWith(Replacements[i].second);

      // Delete the global value from the module.
      Replacements[i].first->eraseFromParent();
    }

    NumMerged += Replacements.size();
    Replacements.clear();
  }
}
Пример #17
0
/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
/// schedulers.
void ScheduleDAGFast::ListScheduleBottomUp() {
  unsigned CurCycle = 0;

  // Release any predecessors of the special Exit node.
  ReleasePredecessors(&ExitSU, CurCycle);

  // Add root to Available queue.
  if (!SUnits.empty()) {
    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
    RootSU->isAvailable = true;
    AvailableQueue.push(RootSU);
  }

  // While Available queue is not empty, grab the node with the highest
  // priority. If it is not ready put it back.  Schedule the node.
  SmallVector<SUnit*, 4> NotReady;
  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
  Sequence.reserve(SUnits.size());
  while (!AvailableQueue.empty()) {
    bool Delayed = false;
    LRegsMap.clear();
    SUnit *CurSU = AvailableQueue.pop();
    while (CurSU) {
      SmallVector<unsigned, 4> LRegs;
      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
        break;
      Delayed = true;
      LRegsMap.insert(std::make_pair(CurSU, LRegs));

      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
      NotReady.push_back(CurSU);
      CurSU = AvailableQueue.pop();
    }

    // All candidates are delayed due to live physical reg dependencies.
    // Try code duplication or inserting cross class copies
    // to resolve it.
    if (Delayed && !CurSU) {
      if (!CurSU) {
        // Try duplicating the nodes that produces these
        // "expensive to copy" values to break the dependency. In case even
        // that doesn't work, insert cross class copies.
        SUnit *TrySU = NotReady[0];
        SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
        assert(LRegs.size() == 1 && "Can't handle this yet!");
        unsigned Reg = LRegs[0];
        SUnit *LRDef = LiveRegDefs[Reg];
        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
        const TargetRegisterClass *RC =
          TRI->getMinimalPhysRegClass(Reg, VT);
        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);

        // If cross copy register class is the same as RC, then it must be
        // possible copy the value directly. Do not try duplicate the def.
        // If cross copy register class is not the same as RC, then it's
        // possible to copy the value but it require cross register class copies
        // and it is expensive.
        // If cross copy register class is null, then it's not possible to copy
        // the value at all.
        SUnit *NewDef = nullptr;
        if (DestRC != RC) {
          NewDef = CopyAndMoveSuccessors(LRDef);
          if (!DestRC && !NewDef)
            report_fatal_error("Can't handle live physical "
                               "register dependency!");
        }
        if (!NewDef) {
          // Issue copies, these can be expensive cross register class copies.
          SmallVector<SUnit*, 2> Copies;
          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
          LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
                            << " to SU #" << Copies.front()->NodeNum << "\n");
          AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
          NewDef = Copies.back();
        }

        LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
                          << " to SU #" << TrySU->NodeNum << "\n");
        LiveRegDefs[Reg] = NewDef;
        AddPred(NewDef, SDep(TrySU, SDep::Artificial));
        TrySU->isAvailable = false;
        CurSU = NewDef;
      }

      if (!CurSU) {
        llvm_unreachable("Unable to resolve live physical register dependencies!");
      }
    }

    // Add the nodes that aren't ready back onto the available list.
    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
      NotReady[i]->isPending = false;
      // May no longer be available due to backtracking.
      if (NotReady[i]->isAvailable)
        AvailableQueue.push(NotReady[i]);
    }
    NotReady.clear();

    if (CurSU)
      ScheduleNodeBottomUp(CurSU, CurCycle);
    ++CurCycle;
  }

  // Reverse the order since it is bottom up.
  std::reverse(Sequence.begin(), Sequence.end());

#ifndef NDEBUG
  VerifyScheduledSequence(/*isBottomUp=*/true);
#endif
}
Пример #18
0
/// \brief Figure out if the loop is worth full unrolling.
///
/// Complete loop unrolling can make some loads constant, and we need to know
/// if that would expose any further optimization opportunities.  This routine
/// estimates this optimization.  It computes cost of unrolled loop
/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
/// dynamic cost we mean that we won't count costs of blocks that are known not
/// to be executed (i.e. if we have a branch in the loop and we know that at the
/// given iteration its condition would be resolved to true, we won't add up the
/// cost of the 'false'-block).
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
static Optional<EstimatedUnrollCost>
analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
                      ScalarEvolution &SE, const TargetTransformInfo &TTI,
                      int MaxUnrolledLoopSize) {
  // We want to be able to scale offsets by the trip count and add more offsets
  // to them without checking for overflows, and we already don't want to
  // analyze *massive* trip counts, so we force the max to be reasonably small.
  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
         "The unroll iterations max is too large!");

  // Don't simulate loops with a big or unknown tripcount
  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
      TripCount > UnrollMaxIterationsCountToAnalyze)
    return None;

  SmallSetVector<BasicBlock *, 16> BBWorklist;
  DenseMap<Value *, Constant *> SimplifiedValues;
  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;

  // The estimated cost of the unrolled form of the loop. We try to estimate
  // this by simplifying as much as we can while computing the estimate.
  int UnrolledCost = 0;
  // We also track the estimated dynamic (that is, actually executed) cost in
  // the rolled form. This helps identify cases when the savings from unrolling
  // aren't just exposing dead control flows, but actual reduced dynamic
  // instructions due to the simplifications which we expect to occur after
  // unrolling.
  int RolledDynamicCost = 0;

  // Ensure that we don't violate the loop structure invariants relied on by
  // this analysis.
  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
  assert(L->isLCSSAForm(DT) &&
         "Must have loops in LCSSA form to track live-out values.");

  DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");

  // Simulate execution of each iteration of the loop counting instructions,
  // which would be simplified.
  // Since the same load will take different values on different iterations,
  // we literally have to go through all loop's iterations.
  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
    DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");

    // Prepare for the iteration by collecting any simplified entry or backedge
    // inputs.
    for (Instruction &I : *L->getHeader()) {
      auto *PHI = dyn_cast<PHINode>(&I);
      if (!PHI)
        break;

      // The loop header PHI nodes must have exactly two input: one from the
      // loop preheader and one from the loop latch.
      assert(
          PHI->getNumIncomingValues() == 2 &&
          "Must have an incoming value only for the preheader and the latch.");

      Value *V = PHI->getIncomingValueForBlock(
          Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
      Constant *C = dyn_cast<Constant>(V);
      if (Iteration != 0 && !C)
        C = SimplifiedValues.lookup(V);
      if (C)
        SimplifiedInputValues.push_back({PHI, C});
    }

    // Now clear and re-populate the map for the next iteration.
    SimplifiedValues.clear();
    while (!SimplifiedInputValues.empty())
      SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());

    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE);

    BBWorklist.clear();
    BBWorklist.insert(L->getHeader());
    // Note that we *must not* cache the size, this loop grows the worklist.
    for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
      BasicBlock *BB = BBWorklist[Idx];

      // Visit all instructions in the given basic block and try to simplify
      // it.  We don't change the actual IR, just count optimization
      // opportunities.
      for (Instruction &I : *BB) {
        int InstCost = TTI.getUserCost(&I);

        // Visit the instruction to analyze its loop cost after unrolling,
        // and if the visitor returns false, include this instruction in the
        // unrolled cost.
        if (!Analyzer.visit(I))
          UnrolledCost += InstCost;
        else {
          DEBUG(dbgs() << "  " << I
                       << " would be simplified if loop is unrolled.\n");
          (void)0;
        }

        // Also track this instructions expected cost when executing the rolled
        // loop form.
        RolledDynamicCost += InstCost;

        // If unrolled body turns out to be too big, bail out.
        if (UnrolledCost > MaxUnrolledLoopSize) {
          DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
                       << "  UnrolledCost: " << UnrolledCost
                       << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
                       << "\n");
          return None;
        }
      }

      TerminatorInst *TI = BB->getTerminator();

      // Add in the live successors by first checking whether we have terminator
      // that may be simplified based on the values simplified by this call.
      if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
        if (BI->isConditional()) {
          if (Constant *SimpleCond =
                  SimplifiedValues.lookup(BI->getCondition())) {
            BasicBlock *Succ = nullptr;
            // Just take the first successor if condition is undef
            if (isa<UndefValue>(SimpleCond))
              Succ = BI->getSuccessor(0);
            else
              Succ = BI->getSuccessor(
                  cast<ConstantInt>(SimpleCond)->isZero() ? 1 : 0);
            if (L->contains(Succ))
              BBWorklist.insert(Succ);
            continue;
          }
        }
      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
        if (Constant *SimpleCond =
                SimplifiedValues.lookup(SI->getCondition())) {
          BasicBlock *Succ = nullptr;
          // Just take the first successor if condition is undef
          if (isa<UndefValue>(SimpleCond))
            Succ = SI->getSuccessor(0);
          else
            Succ = SI->findCaseValue(cast<ConstantInt>(SimpleCond))
                       .getCaseSuccessor();
          if (L->contains(Succ))
            BBWorklist.insert(Succ);
          continue;
        }
      }

      // Add BB's successors to the worklist.
      for (BasicBlock *Succ : successors(BB))
        if (L->contains(Succ))
          BBWorklist.insert(Succ);
    }

    // If we found no optimization opportunities on the first iteration, we
    // won't find them on later ones too.
    if (UnrolledCost == RolledDynamicCost) {
      DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
                   << "  UnrolledCost: " << UnrolledCost << "\n");
      return None;
    }
  }
  DEBUG(dbgs() << "Analysis finished:\n"
               << "UnrolledCost: " << UnrolledCost << ", "
               << "RolledDynamicCost: " << RolledDynamicCost << "\n");
  return {{UnrolledCost, RolledDynamicCost}};
}