/// RemoveAccessedObjects - Check to see if the specified location may alias any
/// of the stack objects in the DeadStackObjects set.  If so, they become live
/// because the location is being loaded.
void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
                                SmallPtrSet<Value*, 16> &DeadStackObjects) {
  const Value *UnderlyingPointer = LoadedLoc.Ptr->getUnderlyingObject();

  // A constant can't be in the dead pointer set.
  if (isa<Constant>(UnderlyingPointer))
    return;
  
  // If the kill pointer can be easily reduced to an alloca, don't bother doing
  // extraneous AA queries.
  if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
    DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
    return;
  }
  
  SmallVector<Value*, 16> NowLive;
  for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
       E = DeadStackObjects.end(); I != E; ++I) {
    // See if the loaded location could alias the stack location.
    AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
    if (!AA->isNoAlias(StackLoc, LoadedLoc))
      NowLive.push_back(*I);
  }

  for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
       I != E; ++I)
    DeadStackObjects.erase(*I);
}
Example #2
0
TEST(SmallPtrSetTest, GrowthTest) {
  int i;
  int buf[8];
  for(i=0; i<8; ++i) buf[i]=0;


  SmallPtrSet<int *, 4> s;
  typedef SmallPtrSet<int *, 4>::iterator iter;
  
  s.insert(&buf[0]);
  s.insert(&buf[1]);
  s.insert(&buf[2]);
  s.insert(&buf[3]);
  EXPECT_EQ(4U, s.size());

  i = 0;
  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
      (**I)++;
  EXPECT_EQ(4, i);
  for(i=0; i<8; ++i)
      EXPECT_EQ(i<4?1:0,buf[i]);

  s.insert(&buf[4]);
  s.insert(&buf[5]);
  s.insert(&buf[6]);
  s.insert(&buf[7]);

  i = 0;
  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
      (**I)++;
  EXPECT_EQ(8, i);
  s.erase(&buf[4]);
  s.erase(&buf[5]);
  s.erase(&buf[6]);
  s.erase(&buf[7]);
  EXPECT_EQ(4U, s.size());

  i = 0;
  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
      (**I)++;
  EXPECT_EQ(4, i);
  for(i=0; i<8; ++i)
      EXPECT_EQ(i<4?3:1,buf[i]);

  s.clear();
  for(i=0; i<8; ++i) buf[i]=0;
  for(i=0; i<128; ++i) s.insert(&buf[i%8]); // test repeated entires
  EXPECT_EQ(8U, s.size());
  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
      (**I)++;
  for(i=0; i<8; ++i)
      EXPECT_EQ(1,buf[i]);
}
Example #3
0
/// \brief Find an insertion point that dominates all uses.
Instruction *ConstantHoisting::
FindConstantInsertionPoint(Function &F, const ConstantInfo &CI) const {
  BasicBlock *Entry = &F.getEntryBlock();

  // Collect all basic blocks.
  SmallPtrSet<BasicBlock *, 4> BBs;
  ConstantInfo::RebasedConstantListType::const_iterator RCI, RCE;
  for (RCI = CI.RebasedConstants.begin(), RCE = CI.RebasedConstants.end();
       RCI != RCE; ++RCI)
    for (SmallVectorImpl<User *>::const_iterator U = RCI->Uses.begin(),
         E = RCI->Uses.end(); U != E; ++U)
        CollectBasicBlocks(BBs, F, *U);

  if (BBs.count(Entry))
    return Entry->getFirstInsertionPt();

  while (BBs.size() >= 2) {
    BasicBlock *BB, *BB1, *BB2;
    BB1 = *BBs.begin();
    BB2 = *llvm::next(BBs.begin());
    BB = DT->findNearestCommonDominator(BB1, BB2);
    if (BB == Entry)
      return Entry->getFirstInsertionPt();
    BBs.erase(BB1);
    BBs.erase(BB2);
    BBs.insert(BB);
  }
  assert((BBs.size() == 1) && "Expected only one element.");
  return (*BBs.begin())->getFirstInsertionPt();
}
// Calculate the largest possible vregsPassed sets. These are the registers that
// can pass through an MBB live, but may not be live every time. It is assumed
// that all vregsPassed sets are empty before the call.
void MachineVerifier::calcRegsPassed() {
  // First push live-out regs to successors' vregsPassed. Remember the MBBs that
  // have any vregsPassed.
  SmallPtrSet<const MachineBasicBlock*, 8> todo;
  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
       MFI != MFE; ++MFI) {
    const MachineBasicBlock &MBB(*MFI);
    BBInfo &MInfo = MBBInfoMap[&MBB];
    if (!MInfo.reachable)
      continue;
    for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
           SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
      BBInfo &SInfo = MBBInfoMap[*SuI];
      if (SInfo.addPassed(MInfo.regsLiveOut))
        todo.insert(*SuI);
    }
  }

  // Iteratively push vregsPassed to successors. This will converge to the same
  // final state regardless of DenseSet iteration order.
  while (!todo.empty()) {
    const MachineBasicBlock *MBB = *todo.begin();
    todo.erase(MBB);
    BBInfo &MInfo = MBBInfoMap[MBB];
    for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
           SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
      if (*SuI == MBB)
        continue;
      BBInfo &SInfo = MBBInfoMap[*SuI];
      if (SInfo.addPassed(MInfo.vregsPassed))
        todo.insert(*SuI);
    }
  }
}
// Calculate the set of virtual registers that must be passed through each basic
// block in order to satisfy the requirements of successor blocks. This is very
// similar to calcRegsPassed, only backwards.
void MachineVerifier::calcRegsRequired() {
  // First push live-in regs to predecessors' vregsRequired.
  SmallPtrSet<const MachineBasicBlock*, 8> todo;
  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
       MFI != MFE; ++MFI) {
    const MachineBasicBlock &MBB(*MFI);
    BBInfo &MInfo = MBBInfoMap[&MBB];
    for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
           PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
      BBInfo &PInfo = MBBInfoMap[*PrI];
      if (PInfo.addRequired(MInfo.vregsLiveIn))
        todo.insert(*PrI);
    }
  }

  // Iteratively push vregsRequired to predecessors. This will converge to the
  // same final state regardless of DenseSet iteration order.
  while (!todo.empty()) {
    const MachineBasicBlock *MBB = *todo.begin();
    todo.erase(MBB);
    BBInfo &MInfo = MBBInfoMap[MBB];
    for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
           PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
      if (*PrI == MBB)
        continue;
      BBInfo &SInfo = MBBInfoMap[*PrI];
      if (SInfo.addRequired(MInfo.vregsRequired))
        todo.insert(*PrI);
    }
  }
}
Example #6
0
/// \brief Find an insertion point that dominates all uses.
Instruction *ConstantHoistingPass::findConstantInsertionPoint(
    const ConstantInfo &ConstInfo) const {
  assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
  // Collect all basic blocks.
  SmallPtrSet<BasicBlock *, 8> BBs;
  for (auto const &RCI : ConstInfo.RebasedConstants)
    for (auto const &U : RCI.Uses)
      BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());

  if (BBs.count(Entry))
    return &Entry->front();

  while (BBs.size() >= 2) {
    BasicBlock *BB, *BB1, *BB2;
    BB1 = *BBs.begin();
    BB2 = *std::next(BBs.begin());
    BB = DT->findNearestCommonDominator(BB1, BB2);
    if (BB == Entry)
      return &Entry->front();
    BBs.erase(BB1);
    BBs.erase(BB2);
    BBs.insert(BB);
  }
  assert((BBs.size() == 1) && "Expected only one element.");
  Instruction &FirstInst = (*BBs.begin())->front();
  return findMatInsertPt(&FirstInst);
}
Example #7
0
void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) {
  SmallPtrSet<GlobalValue *, 8> Deps;
  for (User *User : GV.users())
    ComputeDependencies(User, Deps);
  Deps.erase(&GV); // Remove self-reference.
  for (GlobalValue *GVU : Deps) {
    GVDependencies.insert(std::make_pair(GVU, &GV));
  }
}
Example #8
0
/// Find an insertion point that dominates all uses.
SmallPtrSet<Instruction *, 8> ConstantHoistingPass::findConstantInsertionPoint(
    const ConstantInfo &ConstInfo) const {
  assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
  // Collect all basic blocks.
  SmallPtrSet<BasicBlock *, 8> BBs;
  SmallPtrSet<Instruction *, 8> InsertPts;
  for (auto const &RCI : ConstInfo.RebasedConstants)
    for (auto const &U : RCI.Uses)
      BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());

  if (BBs.count(Entry)) {
    InsertPts.insert(&Entry->front());
    return InsertPts;
  }

  if (BFI) {
    findBestInsertionSet(*DT, *BFI, Entry, BBs);
    for (auto BB : BBs) {
      BasicBlock::iterator InsertPt = BB->begin();
      for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
        ;
      InsertPts.insert(&*InsertPt);
    }
    return InsertPts;
  }

  while (BBs.size() >= 2) {
    BasicBlock *BB, *BB1, *BB2;
    BB1 = *BBs.begin();
    BB2 = *std::next(BBs.begin());
    BB = DT->findNearestCommonDominator(BB1, BB2);
    if (BB == Entry) {
      InsertPts.insert(&Entry->front());
      return InsertPts;
    }
    BBs.erase(BB1);
    BBs.erase(BB2);
    BBs.insert(BB);
  }
  assert((BBs.size() == 1) && "Expected only one element.");
  Instruction &FirstInst = (*BBs.begin())->front();
  InsertPts.insert(findMatInsertPt(&FirstInst));
  return InsertPts;
}
Example #9
0
/// FindFunctionBackedges - Analyze the specified function to find all of the
/// loop backedges in the function and return them.  This is a relatively cheap
/// (compared to computing dominators and loop info) analysis.
///
/// The output is added to Result, as pairs of <from,to> edge info.
void llvm::FindFunctionBackedges(const Function &F,
     SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
  const BasicBlock *BB = &F.getEntryBlock();
  if (succ_begin(BB) == succ_end(BB))
    return;
  
  SmallPtrSet<const BasicBlock*, 8> Visited;
  SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
  SmallPtrSet<const BasicBlock*, 8> InStack;
  
  Visited.insert(BB);
  VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
  InStack.insert(BB);
  do {
    std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
    const BasicBlock *ParentBB = Top.first;
    succ_const_iterator &I = Top.second;
    
    bool FoundNew = false;
    while (I != succ_end(ParentBB)) {
      BB = *I++;
      if (Visited.insert(BB)) {
        FoundNew = true;
        break;
      }
      // Successor is in VisitStack, it's a back edge.
      if (InStack.count(BB))
        Result.push_back(std::make_pair(ParentBB, BB));
    }
    
    if (FoundNew) {
      // Go down one level if there is a unvisited successor.
      InStack.insert(BB);
      VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
    } else {
      // Go up one level.
      InStack.erase(VisitStack.pop_back_val().first);
    }
  } while (!VisitStack.empty());
  
  
}
Example #10
0
/// Return a set of basic blocks to insert sinked instructions.
///
/// The returned set of basic blocks (BBsToSinkInto) should satisfy:
///
/// * Inside the loop \p L
/// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto
///   that domintates the UseBB
/// * Has minimum total frequency that is no greater than preheader frequency
///
/// The purpose of the function is to find the optimal sinking points to
/// minimize execution cost, which is defined as "sum of frequency of
/// BBsToSinkInto".
/// As a result, the returned BBsToSinkInto needs to have minimum total
/// frequency.
/// Additionally, if the total frequency of BBsToSinkInto exceeds preheader
/// frequency, the optimal solution is not sinking (return empty set).
///
/// \p ColdLoopBBs is used to help find the optimal sinking locations.
/// It stores a list of BBs that is:
///
/// * Inside the loop \p L
/// * Has a frequency no larger than the loop's preheader
/// * Sorted by BB frequency
///
/// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()).
/// To avoid expensive computation, we cap the maximum UseBBs.size() in its
/// caller.
static SmallPtrSet<BasicBlock *, 2>
findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
                  const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
                  DominatorTree &DT, BlockFrequencyInfo &BFI) {
  SmallPtrSet<BasicBlock *, 2> BBsToSinkInto;
  if (UseBBs.size() == 0)
    return BBsToSinkInto;

  BBsToSinkInto.insert(UseBBs.begin(), UseBBs.end());
  SmallPtrSet<BasicBlock *, 2> BBsDominatedByColdestBB;

  // For every iteration:
  //   * Pick the ColdestBB from ColdLoopBBs
  //   * Find the set BBsDominatedByColdestBB that satisfy:
  //     - BBsDominatedByColdestBB is a subset of BBsToSinkInto
  //     - Every BB in BBsDominatedByColdestBB is dominated by ColdestBB
  //   * If Freq(ColdestBB) < Freq(BBsDominatedByColdestBB), remove
  //     BBsDominatedByColdestBB from BBsToSinkInto, add ColdestBB to
  //     BBsToSinkInto
  for (BasicBlock *ColdestBB : ColdLoopBBs) {
    BBsDominatedByColdestBB.clear();
    for (BasicBlock *SinkedBB : BBsToSinkInto)
      if (DT.dominates(ColdestBB, SinkedBB))
        BBsDominatedByColdestBB.insert(SinkedBB);
    if (BBsDominatedByColdestBB.size() == 0)
      continue;
    if (adjustedSumFreq(BBsDominatedByColdestBB, BFI) >
        BFI.getBlockFreq(ColdestBB)) {
      for (BasicBlock *DominatedBB : BBsDominatedByColdestBB) {
        BBsToSinkInto.erase(DominatedBB);
      }
      BBsToSinkInto.insert(ColdestBB);
    }
  }

  // If the total frequency of BBsToSinkInto is larger than preheader frequency,
  // do not sink.
  if (adjustedSumFreq(BBsToSinkInto, BFI) >
      BFI.getBlockFreq(L.getLoopPreheader()))
    BBsToSinkInto.clear();
  return BBsToSinkInto;
}
/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree
/// of the given MachineFunction.  These numbers are then used in other parts
/// of the PHI elimination process.
void StrongPHIElimination::computeDFS(MachineFunction& MF) {
  SmallPtrSet<MachineDomTreeNode*, 8> frontier;
  SmallPtrSet<MachineDomTreeNode*, 8> visited;
  
  unsigned time = 0;
  
  MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>();
  
  MachineDomTreeNode* node = DT.getRootNode();
  
  std::vector<MachineDomTreeNode*> worklist;
  worklist.push_back(node);
  
  while (!worklist.empty()) {
    MachineDomTreeNode* currNode = worklist.back();
    
    if (!frontier.count(currNode)) {
      frontier.insert(currNode);
      ++time;
      preorder.insert(std::make_pair(currNode->getBlock(), time));
    }
    
    bool inserted = false;
    for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end();
         I != E; ++I)
      if (!frontier.count(*I) && !visited.count(*I)) {
        worklist.push_back(*I);
        inserted = true;
        break;
      }
    
    if (!inserted) {
      frontier.erase(currNode);
      visited.insert(currNode);
      maxpreorder.insert(std::make_pair(currNode->getBlock(), time));
      
      worklist.pop_back();
    }
  }
}
Example #12
0
/// \brief Find nearest common dominator of all uses.
/// FIXME: Replace this with NearestCommonDominator once it is in common code.
BasicBlock *
ConstantHoisting::findIDomOfAllUses(const ConstantUseListType &Uses) const {
  // Collect all basic blocks.
  SmallPtrSet<BasicBlock *, 8> BBs;
  for (auto const &U : Uses)
    BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());

  if (BBs.count(Entry))
    return Entry;

  while (BBs.size() >= 2) {
    BasicBlock *BB, *BB1, *BB2;
    BB1 = *BBs.begin();
    BB2 = *std::next(BBs.begin());
    BB = DT->findNearestCommonDominator(BB1, BB2);
    if (BB == Entry)
      return Entry;
    BBs.erase(BB1);
    BBs.erase(BB2);
    BBs.insert(BB);
  }
  assert((BBs.size() == 1) && "Expected only one element.");
  return *BBs.begin();
}
Example #13
0
const CodeGenRegister::SubRegMap &
CodeGenRegister::getSubRegs(CodeGenRegBank &RegBank) {
  // Only compute this map once.
  if (SubRegsComplete)
    return SubRegs;
  SubRegsComplete = true;

  std::vector<Record*> SubList = TheDef->getValueAsListOfDefs("SubRegs");
  std::vector<Record*> IdxList = TheDef->getValueAsListOfDefs("SubRegIndices");
  if (SubList.size() != IdxList.size())
    throw TGError(TheDef->getLoc(), "Register " + getName() +
                  " SubRegIndices doesn't match SubRegs");

  // First insert the direct subregs and make sure they are fully indexed.
  SmallVector<CodeGenSubRegIndex*, 8> Indices;
  for (unsigned i = 0, e = SubList.size(); i != e; ++i) {
    CodeGenRegister *SR = RegBank.getReg(SubList[i]);
    CodeGenSubRegIndex *Idx = RegBank.getSubRegIdx(IdxList[i]);
    Indices.push_back(Idx);
    if (!SubRegs.insert(std::make_pair(Idx, SR)).second)
      throw TGError(TheDef->getLoc(), "SubRegIndex " + Idx->getName() +
                    " appears twice in Register " + getName());
  }

  // Keep track of inherited subregs and how they can be reached.
  SmallPtrSet<CodeGenRegister*, 8> Orphans;

  // Clone inherited subregs and place duplicate entries in Orphans.
  // Here the order is important - earlier subregs take precedence.
  for (unsigned i = 0, e = SubList.size(); i != e; ++i) {
    CodeGenRegister *SR = RegBank.getReg(SubList[i]);
    const SubRegMap &Map = SR->getSubRegs(RegBank);

    // Add this as a super-register of SR now all sub-registers are in the list.
    // This creates a topological ordering, the exact order depends on the
    // order getSubRegs is called on all registers.
    SR->SuperRegs.push_back(this);

    for (SubRegMap::const_iterator SI = Map.begin(), SE = Map.end(); SI != SE;
         ++SI) {
      if (!SubRegs.insert(*SI).second)
        Orphans.insert(SI->second);

      // Noop sub-register indexes are possible, so avoid duplicates.
      if (SI->second != SR)
        SI->second->SuperRegs.push_back(this);
    }
  }

  // Expand any composed subreg indices.
  // If dsub_2 has ComposedOf = [qsub_1, dsub_0], and this register has a
  // qsub_1 subreg, add a dsub_2 subreg.  Keep growing Indices and process
  // expanded subreg indices recursively.
  for (unsigned i = 0; i != Indices.size(); ++i) {
    CodeGenSubRegIndex *Idx = Indices[i];
    const CodeGenSubRegIndex::CompMap &Comps = Idx->getComposites();
    CodeGenRegister *SR = SubRegs[Idx];
    const SubRegMap &Map = SR->getSubRegs(RegBank);

    // Look at the possible compositions of Idx.
    // They may not all be supported by SR.
    for (CodeGenSubRegIndex::CompMap::const_iterator I = Comps.begin(),
           E = Comps.end(); I != E; ++I) {
      SubRegMap::const_iterator SRI = Map.find(I->first);
      if (SRI == Map.end())
        continue; // Idx + I->first doesn't exist in SR.
      // Add I->second as a name for the subreg SRI->second, assuming it is
      // orphaned, and the name isn't already used for something else.
      if (SubRegs.count(I->second) || !Orphans.erase(SRI->second))
        continue;
      // We found a new name for the orphaned sub-register.
      SubRegs.insert(std::make_pair(I->second, SRI->second));
      Indices.push_back(I->second);
    }
  }

  // Process the composites.
  ListInit *Comps = TheDef->getValueAsListInit("CompositeIndices");
  for (unsigned i = 0, e = Comps->size(); i != e; ++i) {
    DagInit *Pat = dynamic_cast<DagInit*>(Comps->getElement(i));
    if (!Pat)
      throw TGError(TheDef->getLoc(), "Invalid dag '" +
                    Comps->getElement(i)->getAsString() +
                    "' in CompositeIndices");
    DefInit *BaseIdxInit = dynamic_cast<DefInit*>(Pat->getOperator());
    if (!BaseIdxInit || !BaseIdxInit->getDef()->isSubClassOf("SubRegIndex"))
      throw TGError(TheDef->getLoc(), "Invalid SubClassIndex in " +
                    Pat->getAsString());
    CodeGenSubRegIndex *BaseIdx = RegBank.getSubRegIdx(BaseIdxInit->getDef());

    // Resolve list of subreg indices into R2.
    CodeGenRegister *R2 = this;
    for (DagInit::const_arg_iterator di = Pat->arg_begin(),
         de = Pat->arg_end(); di != de; ++di) {
      DefInit *IdxInit = dynamic_cast<DefInit*>(*di);
      if (!IdxInit || !IdxInit->getDef()->isSubClassOf("SubRegIndex"))
        throw TGError(TheDef->getLoc(), "Invalid SubClassIndex in " +
                      Pat->getAsString());
      CodeGenSubRegIndex *Idx = RegBank.getSubRegIdx(IdxInit->getDef());
      const SubRegMap &R2Subs = R2->getSubRegs(RegBank);
      SubRegMap::const_iterator ni = R2Subs.find(Idx);
      if (ni == R2Subs.end())
        throw TGError(TheDef->getLoc(), "Composite " + Pat->getAsString() +
                      " refers to bad index in " + R2->getName());
      R2 = ni->second;
    }

    // Insert composite index. Allow overriding inherited indices etc.
    SubRegs[BaseIdx] = R2;

    // R2 is no longer an orphan.
    Orphans.erase(R2);
  }

  // Now Orphans contains the inherited subregisters without a direct index.
  // Create inferred indexes for all missing entries.
  // Work backwards in the Indices vector in order to compose subregs bottom-up.
  // Consider this subreg sequence:
  //
  //   qsub_1 -> dsub_0 -> ssub_0
  //
  // The qsub_1 -> dsub_0 composition becomes dsub_2, so the ssub_0 register
  // can be reached in two different ways:
  //
  //   qsub_1 -> ssub_0
  //   dsub_2 -> ssub_0
  //
  // We pick the latter composition because another register may have [dsub_0,
  // dsub_1, dsub_2] subregs without neccessarily having a qsub_1 subreg.  The
  // dsub_2 -> ssub_0 composition can be shared.
  while (!Indices.empty() && !Orphans.empty()) {
    CodeGenSubRegIndex *Idx = Indices.pop_back_val();
    CodeGenRegister *SR = SubRegs[Idx];
    const SubRegMap &Map = SR->getSubRegs(RegBank);
    for (SubRegMap::const_iterator SI = Map.begin(), SE = Map.end(); SI != SE;
         ++SI)
      if (Orphans.erase(SI->second))
        SubRegs[RegBank.getCompositeSubRegIndex(Idx, SI->first)] = SI->second;
  }

  // Initialize RegUnitList. A register with no subregisters creates its own
  // unit. Otherwise, it inherits all its subregister's units. Because
  // getSubRegs is called recursively, this processes the register hierarchy in
  // postorder.
  //
  // TODO: We currently assume all register units correspond to a named "leaf"
  // register. We should also unify register units for ad-hoc register
  // aliases. This can be done by iteratively merging units for aliasing
  // registers using a worklist.
  assert(RegUnits.empty() && "Should only initialize RegUnits once");
  if (SubRegs.empty()) {
    RegUnits.push_back(RegBank.newRegUnit());
  }
  else {
    for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end();
         I != E; ++I) {
      // Strangely a register may have itself as a subreg (self-cycle) e.g. XMM.
      CodeGenRegister *SR = I->second;
      if (SR == this) {
        if (RegUnits.empty())
          RegUnits.push_back(RegBank.newRegUnit());
        continue;
      }
      // Merge the subregister's units into this register's RegUnits.
      mergeRegUnits(RegUnits, SR->RegUnits);
    }
  }
  return SubRegs;
}
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
  if (DisablePeephole)
    return false;

  TM  = &MF.getTarget();
  TII = TM->getInstrInfo();
  MRI = &MF.getRegInfo();
  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;

  bool Changed = false;

  SmallPtrSet<MachineInstr*, 8> LocalMIs;
  SmallSet<unsigned, 4> ImmDefRegs;
  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
    MachineBasicBlock *MBB = &*I;

    bool SeenMoveImm = false;
    LocalMIs.clear();
    ImmDefRegs.clear();
    ImmDefMIs.clear();

    bool First = true;
    MachineBasicBlock::iterator PMII;
    for (MachineBasicBlock::iterator
           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
      MachineInstr *MI = &*MII;
      LocalMIs.insert(MI);

      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
          MI->hasUnmodeledSideEffects()) {
        ++MII;
        continue;
      }

      if (MI->isBitcast()) {
        if (optimizeBitcastInstr(MI, MBB)) {
          // MI is deleted.
          LocalMIs.erase(MI);
          Changed = true;
          MII = First ? I->begin() : llvm::next(PMII);
          continue;
        }
      } else if (MI->isCompare()) {
        if (optimizeCmpInstr(MI, MBB)) {
          // MI is deleted.
          LocalMIs.erase(MI);
          Changed = true;
          MII = First ? I->begin() : llvm::next(PMII);
          continue;
        }
      }

      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
        SeenMoveImm = true;
      } else {
        Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
        if (SeenMoveImm)
          Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
      }

      First = false;
      PMII = MII;
      ++MII;
    }
  }

  return Changed;
}
/// FindBackAndExitEdges - Search for back and exit edges for all blocks
/// within the function loops, calculated using loop information.
void BranchPredictionInfo::FindBackAndExitEdges(Function &F) {
  SmallPtrSet<const BasicBlock *, 64> LoopsVisited;
  SmallPtrSet<const BasicBlock *, 64> BlocksVisited;

	int count = 0;
	if(F.getName() == "hypre_SMGResidual")
		count = count + 1;
  for (LoopInfo::iterator LIT = LI->begin(), LIE = LI->end();
       LIT != LIE; ++LIT) {
    Loop *rootLoop = *LIT;
    BasicBlock *rootHeader = rootLoop->getHeader();

    // Check if we already visited this loop.
    if (LoopsVisited.count(rootHeader))
      continue;

    // Create a stack to hold loops (inner most on the top).
    SmallVectorImpl<Loop *> Stack(8);
    SmallPtrSet<const BasicBlock *, 8> InStack;

    // Put the current loop into the Stack.
    Stack.push_back(rootLoop);
    InStack.insert(rootHeader);

    do {
      Loop *loop = Stack.back();

      // Search for new inner loops.
      bool foundNew = false;
      for (Loop::iterator I = loop->begin(), E = loop->end(); I != E; ++I) {
        Loop *innerLoop = *I;
        BasicBlock *innerHeader = innerLoop->getHeader();

        // Skip visited inner loops.
        if (!LoopsVisited.count(innerHeader)) {
          Stack.push_back(innerLoop);
          InStack.insert(innerHeader);
          foundNew = true;
          break;
        }
      }

      // If a new loop is found, continue.
      // Otherwise, it is time to expand it, because it is the most inner loop
      // yet unprocessed.
      if (foundNew)
        continue;

      // The variable "loop" is now the unvisited inner most loop.
      BasicBlock *header = loop->getHeader();

      // Search for all basic blocks on the loop.
      for (Loop::block_iterator LBI = loop->block_begin(),
           LBE = loop->block_end(); LBI != LBE; ++LBI) {
        BasicBlock *lpBB = *LBI;
        if (!BlocksVisited.insert(lpBB))
          continue;

        // Set the number of back edges to this loop head (lpBB) as zero.
        BackEdgesCount[lpBB] = 0;

        // For each loop block successor, check if the block pointing is
        // outside the loop.
        TerminatorInst *TI = lpBB->getTerminator();
        for (unsigned s = 0; s < TI->getNumSuccessors(); ++s) {
          BasicBlock *successor = TI->getSuccessor(s);
          Edge edge = std::make_pair(lpBB, successor);

          // If the successor matches any loop header on the stack,
          // then it is a backedge.
          if (InStack.count(successor)) {
            listBackEdges.insert(edge);
            ++BackEdgesCount[lpBB];
          }

          // If the successor is not present in the loop block list, then it is
          // an exit edge.
          if (!loop->contains(successor))
            listExitEdges.insert(edge);
        }
      }

      // Cleaning the visited loop.
      LoopsVisited.insert(header);
      Stack.pop_back();
      InStack.erase(header);
    } while (!InStack.empty());
  }
}
bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
                                                     MachineLoopInfo &MLI,
                                                     MachineLoop *Loop) {
  MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin();
  SetVector<MachineBasicBlock *> RewriteSuccs;

  // DFS through Loop's body, looking for for irreducible control flow. Loop is
  // natural, and we stay in its body, and we treat any nested loops
  // monolithically, so any cycles we encounter indicate irreducibility.
  SmallPtrSet<MachineBasicBlock *, 8> OnStack;
  SmallPtrSet<MachineBasicBlock *, 8> Visited;
  SmallVector<SuccessorList, 4> LoopWorklist;
  LoopWorklist.push_back(SuccessorList(Header));
  OnStack.insert(Header);
  Visited.insert(Header);
  while (!LoopWorklist.empty()) {
    SuccessorList &Top = LoopWorklist.back();
    if (Top.HasNext()) {
      MachineBasicBlock *Next = Top.Next();
      if (Next == Header || (Loop && !Loop->contains(Next)))
        continue;
      if (LLVM_LIKELY(OnStack.insert(Next).second)) {
        if (!Visited.insert(Next).second) {
          OnStack.erase(Next);
          continue;
        }
        MachineLoop *InnerLoop = MLI.getLoopFor(Next);
        if (InnerLoop != Loop)
          LoopWorklist.push_back(SuccessorList(InnerLoop));
        else
          LoopWorklist.push_back(SuccessorList(Next));
      } else {
        RewriteSuccs.insert(Top.getBlock());
      }
      continue;
    }
    OnStack.erase(Top.getBlock());
    LoopWorklist.pop_back();
  }

  // Most likely, we didn't find any irreducible control flow.
  if (LLVM_LIKELY(RewriteSuccs.empty()))
    return false;

  DEBUG(dbgs() << "Irreducible control flow detected!\n");

  // Ok. We have irreducible control flow! Create a dispatch block which will
  // contains a jump table to any block in the problematic set of blocks.
  MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
  MF.insert(MF.end(), Dispatch);
  MLI.changeLoopFor(Dispatch, Loop);

  // Add the jump table.
  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
  MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(),
                                    TII.get(WebAssembly::BR_TABLE_I32));

  // Add the register which will be used to tell the jump table which block to
  // jump to.
  MachineRegisterInfo &MRI = MF.getRegInfo();
  unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
  MIB.addReg(Reg);

  // Collect all the blocks which need to have their successors rewritten,
  // add the successors to the jump table, and remember their index.
  DenseMap<MachineBasicBlock *, unsigned> Indices;
  SmallVector<MachineBasicBlock *, 4> SuccWorklist(RewriteSuccs.begin(),
                                                   RewriteSuccs.end());
  while (!SuccWorklist.empty()) {
    MachineBasicBlock *MBB = SuccWorklist.pop_back_val();
    auto Pair = Indices.insert(std::make_pair(MBB, 0));
    if (!Pair.second)
      continue;

    unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
    DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index << "\n");

    Pair.first->second = Index;
    for (auto Pred : MBB->predecessors())
      RewriteSuccs.insert(Pred);

    MIB.addMBB(MBB);
    Dispatch->addSuccessor(MBB);

    MetaBlock Meta(MBB);
    for (auto *Succ : Meta.successors())
      if (Succ != Header && (!Loop || Loop->contains(Succ)))
        SuccWorklist.push_back(Succ);
  }

  // Rewrite the problematic successors for every block in RewriteSuccs.
  // For simplicity, we just introduce a new block for every edge we need to
  // rewrite. Fancier things are possible.
  for (MachineBasicBlock *MBB : RewriteSuccs) {
    DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
    for (auto *Succ : MBB->successors()) {
      if (!Indices.count(Succ))
        continue;

      MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
      MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ)
                                             : MF.end(),
                Split);
      MLI.changeLoopFor(Split, Loop);

      // Set the jump table's register of the index of the block we wish to
      // jump to, and jump to the jump table.
      BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32),
              Reg)
          .addImm(Indices[Succ]);
      BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR))
          .addMBB(Dispatch);
      Split->addSuccessor(Dispatch);
      Map[Succ] = Split;
    }
    // Remap the terminator operands and the successor list.
    for (MachineInstr &Term : MBB->terminators())
      for (auto &Op : Term.explicit_uses())
        if (Op.isMBB() && Indices.count(Op.getMBB()))
          Op.setMBB(Map[Op.getMBB()]);
    for (auto Rewrite : Map)
      MBB->replaceSuccessor(Rewrite.first, Rewrite.second);
  }

  // Create a fake default label, because br_table requires one.
  MIB.addMBB(MIB.getInstr()
                 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
                 .getMBB());

  return true;
}
/*
  This method performs Unroll and Jam. For a simple loop like:
  for (i = ..)
    Fore(i)
    for (j = ..)
      SubLoop(i, j)
    Aft(i)

  Instead of doing normal inner or outer unrolling, we do:
  for (i = .., i+=2)
    Fore(i)
    Fore(i+1)
    for (j = ..)
      SubLoop(i, j)
      SubLoop(i+1, j)
    Aft(i)
    Aft(i+1)

  So the outer loop is essetially unrolled and then the inner loops are fused
  ("jammed") together into a single loop. This can increase speed when there
  are loads in SubLoop that are invariant to i, as they become shared between
  the now jammed inner loops.

  We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
  Fore blocks are those before the inner loop, Aft are those after. Normal
  Unroll code is used to copy each of these sets of blocks and the results are
  combined together into the final form above.

  isSafeToUnrollAndJam should be used prior to calling this to make sure the
  unrolling will be valid. Checking profitablility is also advisable.
*/
LoopUnrollResult
llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
                       unsigned TripMultiple, bool UnrollRemainder,
                       LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
                       AssumptionCache *AC, OptimizationRemarkEmitter *ORE) {

  // When we enter here we should have already checked that it is safe
  BasicBlock *Header = L->getHeader();
  assert(L->getSubLoops().size() == 1);
  Loop *SubLoop = *L->begin();

  // Don't enter the unroll code if there is nothing to do.
  if (TripCount == 0 && Count < 2) {
    LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
    return LoopUnrollResult::Unmodified;
  }

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = (Count == TripCount);

  // We use the runtime remainder in cases where we don't know trip multiple
  if (TripMultiple == 1 || TripMultiple % Count != 0) {
    if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
                                    /*UseEpilogRemainder*/ true,
                                    UnrollRemainder, LI, SE, DT, AC, true)) {
      LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
                           "generated when assuming runtime trip count\n");
      return LoopUnrollResult::Unmodified;
    }
  }

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  if (SE) {
    SE->forgetLoop(L);
    SE->forgetLoop(SubLoop);
  }

  using namespace ore;
  // Report the unrolling decision.
  if (CompletelyUnroll) {
    LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
                      << Header->getName() << " with trip count " << TripCount
                      << "!\n");
    ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
                                 L->getHeader())
              << "completely unroll and jammed loop with "
              << NV("UnrollCount", TripCount) << " iterations");
  } else {
    auto DiagBuilder = [&]() {
      OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
                              L->getHeader());
      return Diag << "unroll and jammed loop by a factor of "
                  << NV("UnrollCount", Count);
    };

    LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
                      << " by " << Count);
    if (TripMultiple != 1) {
      LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
      ORE->emit([&]() {
        return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
                             << " trips per branch";
      });
    } else {
      LLVM_DEBUG(dbgs() << " with run-time trip count");
      ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
    }
    LLVM_DEBUG(dbgs() << "!\n");
  }

  BasicBlock *Preheader = L->getLoopPreheader();
  BasicBlock *LatchBlock = L->getLoopLatch();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
  assert(Preheader && LatchBlock && Header);
  assert(BI && !BI->isUnconditional());
  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
  bool SubLoopContinueOnTrue = SubLoop->contains(
      SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));

  // Partition blocks in an outer/inner loop pair into blocks before and after
  // the loop
  BasicBlockSet SubLoopBlocks;
  BasicBlockSet ForeBlocks;
  BasicBlockSet AftBlocks;
  partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
                           DT);

  // We keep track of the entering/first and exiting/last block of each of
  // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
  // blocks easier.
  std::vector<BasicBlock *> ForeBlocksFirst;
  std::vector<BasicBlock *> ForeBlocksLast;
  std::vector<BasicBlock *> SubLoopBlocksFirst;
  std::vector<BasicBlock *> SubLoopBlocksLast;
  std::vector<BasicBlock *> AftBlocksFirst;
  std::vector<BasicBlock *> AftBlocksLast;
  ForeBlocksFirst.push_back(Header);
  ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
  SubLoopBlocksFirst.push_back(SubLoop->getHeader());
  SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
  AftBlocksFirst.push_back(SubLoop->getExitBlock());
  AftBlocksLast.push_back(L->getExitingBlock());
  // Maps Blocks[0] -> Blocks[It]
  ValueToValueMapTy LastValueMap;

  // Move any instructions from fore phi operands from AftBlocks into Fore.
  moveHeaderPhiOperandsToForeBlocks(
      Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(),
      AftBlocks);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);
  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  if (Header->getParent()->isDebugInfoForProfiling())
    for (BasicBlock *BB : L->getBlocks())
      for (Instruction &I : *BB)
        if (!isa<DbgInfoIntrinsic>(&I))
          if (const DILocation *DIL = I.getDebugLoc())
            I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));

  // Copy all blocks
  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock *> NewBlocks;
    // Maps Blocks[It] -> Blocks[It-1]
    DenseMap<Value *, Value *> PrevItValueMap;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      if (ForeBlocks.count(*BB)) {
        L->addBasicBlockToLoop(New, *LI);

        if (*BB == ForeBlocksFirst[0])
          ForeBlocksFirst.push_back(New);
        if (*BB == ForeBlocksLast[0])
          ForeBlocksLast.push_back(New);
      } else if (SubLoopBlocks.count(*BB)) {
        SubLoop->addBasicBlockToLoop(New, *LI);

        if (*BB == SubLoopBlocksFirst[0])
          SubLoopBlocksFirst.push_back(New);
        if (*BB == SubLoopBlocksLast[0])
          SubLoopBlocksLast.push_back(New);
      } else if (AftBlocks.count(*BB)) {
        L->addBasicBlockToLoop(New, *LI);

        if (*BB == AftBlocksFirst[0])
          AftBlocksFirst.push_back(New);
        if (*BB == AftBlocksLast[0])
          AftBlocksLast.push_back(New);
      } else {
        llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
      }

      // Update our running maps of newest clones
      PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI) {
        PrevItValueMap[VI->second] =
            const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
        LastValueMap[VI->first] = VI->second;
      }

      NewBlocks.push_back(New);

      // Update DomTree:
      if (*BB == ForeBlocksFirst[0])
        DT->addNewBlock(New, ForeBlocksLast[It - 1]);
      else if (*BB == SubLoopBlocksFirst[0])
        DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
      else if (*BB == AftBlocksFirst[0])
        DT->addNewBlock(New, AftBlocksLast[It - 1]);
      else {
        // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
        // structure.
        auto BBDomNode = DT->getNode(*BB);
        auto BBIDom = BBDomNode->getIDom();
        BasicBlock *OriginalBBIDom = BBIDom->getBlock();
        assert(OriginalBBIDom);
        assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
        DT->addNewBlock(
            New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
      }
    }

    // Remap all instructions in the most recent iteration
    for (BasicBlock *NewBlock : NewBlocks) {
      for (Instruction &I : *NewBlock) {
        ::remapInstruction(&I, LastValueMap);
        if (auto *II = dyn_cast<IntrinsicInst>(&I))
          if (II->getIntrinsicID() == Intrinsic::assume)
            AC->registerAssumption(II);
      }
    }

    // Alter the ForeBlocks phi's, pointing them at the latest version of the
    // value from the previous iteration's phis
    for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
      Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
      assert(OldValue && "should have incoming edge from Aft[It]");
      Value *NewValue = OldValue;
      if (Value *PrevValue = PrevItValueMap[OldValue])
        NewValue = PrevValue;

      assert(Phi.getNumOperands() == 2);
      Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
      Phi.setIncomingValue(0, NewValue);
      Phi.removeIncomingValue(1);
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // finish up connecting the blocks and phi nodes. At this point LastValueMap
  // is the last unrolled iterations values.

  // Update Phis in BB from OldBB to point to NewBB
  auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB,
                            BasicBlock *NewBB) {
    for (PHINode &Phi : BB->phis()) {
      int I = Phi.getBasicBlockIndex(OldBB);
      Phi.setIncomingBlock(I, NewBB);
    }
  };
  // Update Phis in BB from OldBB to point to NewBB and use the latest value
  // from LastValueMap
  auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
                                     BasicBlock *NewBB,
                                     ValueToValueMapTy &LastValueMap) {
    for (PHINode &Phi : BB->phis()) {
      for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
        if (Phi.getIncomingBlock(b) == OldBB) {
          Value *OldValue = Phi.getIncomingValue(b);
          if (Value *LastValue = LastValueMap[OldValue])
            Phi.setIncomingValue(b, LastValue);
          Phi.setIncomingBlock(b, NewBB);
          break;
        }
      }
    }
  };
  // Move all the phis from Src into Dest
  auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
    Instruction *insertPoint = Dest->getFirstNonPHI();
    while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
      Phi->moveBefore(insertPoint);
  };

  // Update the PHI values outside the loop to point to the last block
  updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
                           LastValueMap);

  // Update ForeBlocks successors and phi nodes
  BranchInst *ForeTerm =
      cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
  BasicBlock *Dest = SubLoopBlocksFirst[0];
  ForeTerm->setSuccessor(0, Dest);

  if (CompletelyUnroll) {
    while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
      Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
      Phi->getParent()->getInstList().erase(Phi);
    }
  } else {
    // Update the PHI values to point to the last aft block
    updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
                             AftBlocksLast.back(), LastValueMap);
  }

  for (unsigned It = 1; It != Count; It++) {
    // Remap ForeBlock successors from previous iteration to this
    BranchInst *ForeTerm =
        cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
    BasicBlock *Dest = ForeBlocksFirst[It];
    ForeTerm->setSuccessor(0, Dest);
  }

  // Subloop successors and phis
  BranchInst *SubTerm =
      cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
  SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
  SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
  updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0],
                  ForeBlocksLast.back());
  updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0],
                  SubLoopBlocksLast.back());

  for (unsigned It = 1; It != Count; It++) {
    // Replace the conditional branch of the previous iteration subloop with an
    // unconditional one to this one
    BranchInst *SubTerm =
        cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
    BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
    SubTerm->eraseFromParent();

    updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It],
                    ForeBlocksLast.back());
    updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It],
                    SubLoopBlocksLast.back());
    movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
  }

  // Aft blocks successors and phis
  BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
  if (CompletelyUnroll) {
    BranchInst::Create(LoopExit, Term);
    Term->eraseFromParent();
  } else {
    Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
  }
  updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
                  SubLoopBlocksLast.back());

  for (unsigned It = 1; It != Count; It++) {
    // Replace the conditional branch of the previous iteration subloop with an
    // unconditional one to this one
    BranchInst *AftTerm =
        cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
    BranchInst::Create(AftBlocksFirst[It], AftTerm);
    AftTerm->eraseFromParent();

    updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It],
                    SubLoopBlocksLast.back());
    movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
  }

  // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
  // new ones required.
  if (Count != 1) {
    SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
                           SubLoopBlocksFirst[0]);
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
                           SubLoopBlocksLast[0], AftBlocksFirst[0]);

    DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
                           ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
                           SubLoopBlocksLast.back(), AftBlocksFirst[0]);
    DT->applyUpdates(DTUpdates);
  }

  // Merge adjacent basic blocks, if possible.
  SmallPtrSet<BasicBlock *, 16> MergeBlocks;
  MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
  MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
  MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
  while (!MergeBlocks.empty()) {
    BasicBlock *BB = *MergeBlocks.begin();
    BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
    if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
        // Don't remove BB and add Fold as they are the same BB
        assert(Fold == BB);
        (void)Fold;
        MergeBlocks.erase(Dest);
      } else
        MergeBlocks.erase(BB);
    } else
      MergeBlocks.erase(BB);
  }

  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC);
  simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC);

  NumCompletelyUnrolledAndJammed += CompletelyUnroll;
  ++NumUnrolledAndJammed;

#ifndef NDEBUG
  // We shouldn't have done anything to break loop simplify form or LCSSA.
  Loop *OuterL = L->getParentLoop();
  Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop);
  assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
  if (!CompletelyUnroll)
    assert(L->isLoopSimplifyForm());
  assert(SubLoop->isLoopSimplifyForm());
  assert(DT->verify());
#endif

  // Update LoopInfo if the loop is completely removed.
  if (CompletelyUnroll)
    LI->erase(L);

  return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
                          : LoopUnrollResult::PartiallyUnrolled;
}
Example #18
0
void fixTerminators(MachineBasicBlock *MBB) {
  SmallPtrSet<MachineBasicBlock*, 2> MissedSuccs;
  MissedSuccs.insert(MBB->succ_begin(), MBB->succ_end());
  MachineInstr *FirstTerminator = 0;

  for (MachineBasicBlock::iterator II = MBB->getFirstTerminator(),
       IE = MBB->end(); II != IE; ++II) {
    MachineInstr *Inst = II;
    if (!VInstrInfo::isBrCndLike(Inst->getOpcode())) continue;

    MachineBasicBlock *TargetBB = Inst->getOperand(1).getMBB();
    MachineOperand Cnd = Inst->getOperand(0);
    //bool inserted;
    //jt_it at;
    //tie(at, inserted) = Table.insert(std::make_pair(TargetBB, Cnd));
    // BranchFolding may generate code that jumping to same bb with multiple
    // instruction, merge the condition.
    //if (!inserted) {
    //  at->second = VInstrInfo::MergePred(Cnd, at->second, *MBB,
    //                                     MBB->getFirstTerminator(), &MRI,
    //                                     TII, VTM::VOpOr);
    //}

    // Change the unconditional branch after conditional branch to
    // conditional branch.
    if (FirstTerminator && VInstrInfo::isUnConditionalBranch(Inst)){
      MachineOperand &TrueCnd = FirstTerminator->getOperand(0);
      MachineOperand &FalseCnd = Inst->getOperand(0);
      TrueCnd.setIsKill(false);
      FalseCnd.setReg(TrueCnd.getReg());
      FalseCnd.setTargetFlags(TrueCnd.getTargetFlags());
      VInstrInfo::ReversePredicateCondition(FalseCnd);
    }

    FirstTerminator = Inst;
    MissedSuccs.erase(TargetBB);
  }

  // Make sure each basic block have a terminator.
  if (!MissedSuccs.empty()) {
    assert(MissedSuccs.size() == 1 && "Fall through to multiple blocks?");
    ++UnconditionalBranches;
    MachineOperand Cnd = VInstrInfo::CreatePredicate();
    if (FirstTerminator) {
      MachineOperand &TrueCnd = FirstTerminator->getOperand(0);
      assert(TrueCnd.getReg() != 0 && "Two unconditional branch?");
      // We will use the register somewhere else
      TrueCnd.setIsKill(false);
      Cnd = TrueCnd;
      VInstrInfo::ReversePredicateCondition(Cnd);
    }
    BuildMI(MBB, DebugLoc(), VInstrInfo::getDesc(VTM::VOpToStateb))
      .addOperand(Cnd).addMBB(*MissedSuccs.begin())
      .addOperand(VInstrInfo::CreatePredicate())
      .addOperand(VInstrInfo::CreateTrace());
  }
  //else if (Table.size() != MBB->succ_size()) {
  //  // Also fix the CFG.
  //  while (!MBB->succ_empty())
  //    MBB->removeSuccessor(MBB->succ_end() - 1);
  //  for (jt_it JI = Table.begin(), JE = Table.end(); JI != JE; ++JI)
  //    MBB->addSuccessor(JI->first);

  //  // Try to correct the CFG.
  //  TII->RemoveBranch(*MBB);
  //  VInstrInfo::insertJumpTable(*MBB, Table, DebugLoc());
  //}

  //Table.clear();

  if (MBB->succ_size() == 0 && MBB->getFirstTerminator() == MBB->end()) {
    ++Unreachables;
    BuildMI(MBB, DebugLoc(), VInstrInfo::getDesc(VTM::VOpUnreachable))
      .addOperand(VInstrInfo::CreatePredicate())
      .addOperand(VInstrInfo::CreateTrace());
  }
}
/// handleEndBlock - Remove dead stores to stack-allocated locations in the
/// function end block.  Ex:
/// %A = alloca i32
/// ...
/// store i32 1, i32* %A
/// ret void
bool DSE::handleEndBlock(BasicBlock &BB) {
  bool MadeChange = false;
  
  // Keep track of all of the stack objects that are dead at the end of the
  // function.
  SmallPtrSet<Value*, 16> DeadStackObjects;
  
  // Find all of the alloca'd pointers in the entry block.
  BasicBlock *Entry = BB.getParent()->begin();
  for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
      DeadStackObjects.insert(AI);
  
  // Treat byval arguments the same, stores to them are dead at the end of the
  // function.
  for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
       AE = BB.getParent()->arg_end(); AI != AE; ++AI)
    if (AI->hasByValAttr())
      DeadStackObjects.insert(AI);
  
  // Scan the basic block backwards
  for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
    --BBI;
    
    // If we find a store, check to see if it points into a dead stack value.
    if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
      // See through pointer-to-pointer bitcasts
      Value *Pointer = getStoredPointerOperand(BBI)->getUnderlyingObject();

      // Stores to stack values are valid candidates for removal.
      if (DeadStackObjects.count(Pointer)) {
        Instruction *Dead = BBI++;
        
        DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
                     << *Dead << "\n  Object: " << *Pointer << '\n');
        
        // DCE instructions only used to calculate that store.
        DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
        ++NumFastStores;
        MadeChange = true;
        continue;
      }
    }
    
    // Remove any dead non-memory-mutating instructions.
    if (isInstructionTriviallyDead(BBI)) {
      Instruction *Inst = BBI++;
      DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
      ++NumFastOther;
      MadeChange = true;
      continue;
    }
    
    if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
      DeadStackObjects.erase(A);
      continue;
    }
    
    if (CallSite CS = cast<Value>(BBI)) {
      // If this call does not access memory, it can't be loading any of our
      // pointers.
      if (AA->doesNotAccessMemory(CS))
        continue;
      
      unsigned NumModRef = 0, NumOther = 0;
      
      // If the call might load from any of our allocas, then any store above
      // the call is live.
      SmallVector<Value*, 8> LiveAllocas;
      for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
           E = DeadStackObjects.end(); I != E; ++I) {
        // If we detect that our AA is imprecise, it's not worth it to scan the
        // rest of the DeadPointers set.  Just assume that the AA will return
        // ModRef for everything, and go ahead and bail out.
        if (NumModRef >= 16 && NumOther == 0)
          return MadeChange;

        // See if the call site touches it.
        AliasAnalysis::ModRefResult A = 
          AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
        
        if (A == AliasAnalysis::ModRef)
          ++NumModRef;
        else
          ++NumOther;
        
        if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
          LiveAllocas.push_back(*I);
      }
      
      for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
           E = LiveAllocas.end(); I != E; ++I)
        DeadStackObjects.erase(*I);
      
      // If all of the allocas were clobbered by the call then we're not going
      // to find anything else to process.
      if (DeadStackObjects.empty())
        return MadeChange;
      
      continue;
    }
    
    AliasAnalysis::Location LoadedLoc;
    
    // If we encounter a use of the pointer, it is no longer considered dead
    if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
      LoadedLoc = AA->getLocation(L);
    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
      LoadedLoc = AA->getLocation(V);
    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
      LoadedLoc = AA->getLocationForSource(MTI);
    } else {
      // Not a loading instruction.
      continue;
    }

    // Remove any allocas from the DeadPointer set that are loaded, as this
    // makes any stores above the access live.
    RemoveAccessedObjects(LoadedLoc, DeadStackObjects);

    // If all of the allocas were clobbered by the access then we're not going
    // to find anything else to process.
    if (DeadStackObjects.empty())
      break;
  }
  
  return MadeChange;
}
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
  if (skipOptnoneFunction(*MF.getFunction()))
    return false;

  DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
  DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');

  if (DisablePeephole)
    return false;

  TM  = &MF.getTarget();
  TII = TM->getInstrInfo();
  MRI = &MF.getRegInfo();
  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;

  bool Changed = false;

  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
    MachineBasicBlock *MBB = &*I;

    bool SeenMoveImm = false;
    SmallPtrSet<MachineInstr*, 8> LocalMIs;
    SmallSet<unsigned, 4> ImmDefRegs;
    DenseMap<unsigned, MachineInstr*> ImmDefMIs;
    SmallSet<unsigned, 16> FoldAsLoadDefCandidates;

    for (MachineBasicBlock::iterator
           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
      MachineInstr *MI = &*MII;
      // We may be erasing MI below, increment MII now.
      ++MII;
      LocalMIs.insert(MI);

      // Skip debug values. They should not affect this peephole optimization.
      if (MI->isDebugValue())
          continue;

      // If there exists an instruction which belongs to the following
      // categories, we will discard the load candidates.
      if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
          MI->isKill() || MI->isInlineAsm() ||
          MI->hasUnmodeledSideEffects()) {
        FoldAsLoadDefCandidates.clear();
        continue;
      }
      if (MI->mayStore() || MI->isCall())
        FoldAsLoadDefCandidates.clear();

      if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||
          (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
          (MI->isSelect() && optimizeSelect(MI))) {
        // MI is deleted.
        LocalMIs.erase(MI);
        Changed = true;
        continue;
      }

      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
        SeenMoveImm = true;
      } else {
        Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
        // optimizeExtInstr might have created new instructions after MI
        // and before the already incremented MII. Adjust MII so that the
        // next iteration sees the new instructions.
        MII = MI;
        ++MII;
        if (SeenMoveImm)
          Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
      }

      // Check whether MI is a load candidate for folding into a later
      // instruction. If MI is not a candidate, check whether we can fold an
      // earlier load into MI.
      if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
          !FoldAsLoadDefCandidates.empty()) {
        const MCInstrDesc &MIDesc = MI->getDesc();
        for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
             ++i) {
          const MachineOperand &MOp = MI->getOperand(i);
          if (!MOp.isReg())
            continue;
          unsigned FoldAsLoadDefReg = MOp.getReg();
          if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
            // We need to fold load after optimizeCmpInstr, since
            // optimizeCmpInstr can enable folding by converting SUB to CMP.
            // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
            // we need it for markUsesInDebugValueAsUndef().
            unsigned FoldedReg = FoldAsLoadDefReg;
            MachineInstr *DefMI = nullptr;
            MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
                                                          FoldAsLoadDefReg,
                                                          DefMI);
            if (FoldMI) {
              // Update LocalMIs since we replaced MI with FoldMI and deleted
              // DefMI.
              DEBUG(dbgs() << "Replacing: " << *MI);
              DEBUG(dbgs() << "     With: " << *FoldMI);
              LocalMIs.erase(MI);
              LocalMIs.erase(DefMI);
              LocalMIs.insert(FoldMI);
              MI->eraseFromParent();
              DefMI->eraseFromParent();
              MRI->markUsesInDebugValueAsUndef(FoldedReg);
              FoldAsLoadDefCandidates.erase(FoldedReg);
              ++NumLoadFold;
              // MI is replaced with FoldMI.
              Changed = true;
              break;
            }
          }
        }
      }
    }
  }

  return Changed;
}
Example #21
0
int Compilation::performJobsImpl() {
  // Create a TaskQueue for execution.
  std::unique_ptr<TaskQueue> TQ;
  if (SkipTaskExecution)
    TQ.reset(new DummyTaskQueue(NumberOfParallelCommands));
  else
    TQ.reset(new TaskQueue(NumberOfParallelCommands));

  PerformJobsState State;

  using DependencyGraph = DependencyGraph<const Job *>;
  DependencyGraph DepGraph;
  SmallPtrSet<const Job *, 16> DeferredCommands;
  SmallVector<const Job *, 16> InitialOutOfDateCommands;

  DependencyGraph::MarkTracer ActualIncrementalTracer;
  DependencyGraph::MarkTracer *IncrementalTracer = nullptr;
  if (ShowIncrementalBuildDecisions)
    IncrementalTracer = &ActualIncrementalTracer;

  auto noteBuilding = [&] (const Job *cmd, StringRef reason) {
    if (!ShowIncrementalBuildDecisions)
      return;
    if (State.ScheduledCommands.count(cmd))
      return;
    llvm::outs() << "Queuing "
                 << llvm::sys::path::filename(cmd->getOutput().getBaseInput(0))
                 << " " << reason << "\n";
    IncrementalTracer->printPath(llvm::outs(), cmd,
                                 [](raw_ostream &out, const Job *base) {
      out << llvm::sys::path::filename(base->getOutput().getBaseInput(0));
    });
  };

  // Set up scheduleCommandIfNecessaryAndPossible.
  // This will only schedule the given command if it has not been scheduled
  // and if all of its inputs are in FinishedCommands.
  auto scheduleCommandIfNecessaryAndPossible = [&] (const Job *Cmd) {
    if (State.ScheduledCommands.count(Cmd))
      return;

    if (auto Blocking = findUnfinishedJob(Cmd->getInputs(),
                                          State.FinishedCommands)) {
      State.BlockingCommands[Blocking].push_back(Cmd);
      return;
    }

    // FIXME: Failing here should not take down the whole process.
    bool success = writeFilelistIfNecessary(Cmd, Diags);
    assert(success && "failed to write filelist");
    (void)success;

    assert(Cmd->getExtraEnvironment().empty() &&
           "not implemented for compilations with multiple jobs");
    State.ScheduledCommands.insert(Cmd);
    TQ->addTask(Cmd->getExecutable(), Cmd->getArguments(), llvm::None,
                (void *)Cmd);
  };

  // When a task finishes, we need to reevaluate the other commands that
  // might have been blocked.
  auto markFinished = [&] (const Job *Cmd) {
    State.FinishedCommands.insert(Cmd);

    auto BlockedIter = State.BlockingCommands.find(Cmd);
    if (BlockedIter != State.BlockingCommands.end()) {
      auto AllBlocked = std::move(BlockedIter->second);
      State.BlockingCommands.erase(BlockedIter);
      for (auto *Blocked : AllBlocked)
        scheduleCommandIfNecessaryAndPossible(Blocked);
    }
  };

  // Schedule all jobs we can.
  for (const Job *Cmd : getJobs()) {
    if (!getIncrementalBuildEnabled()) {
      scheduleCommandIfNecessaryAndPossible(Cmd);
      continue;
    }

    // Try to load the dependencies file for this job. If there isn't one, we
    // always have to run the job, but it doesn't affect any other jobs. If
    // there should be one but it's not present or can't be loaded, we have to
    // run all the jobs.
    // FIXME: We can probably do better here!
    Job::Condition Condition = Job::Condition::Always;
    StringRef DependenciesFile =
      Cmd->getOutput().getAdditionalOutputForType(types::TY_SwiftDeps);
    if (!DependenciesFile.empty()) {
      if (Cmd->getCondition() == Job::Condition::NewlyAdded) {
        DepGraph.addIndependentNode(Cmd);
      } else {
        switch (DepGraph.loadFromPath(Cmd, DependenciesFile)) {
        case DependencyGraphImpl::LoadResult::HadError:
          disableIncrementalBuild();
          for (const Job *Cmd : DeferredCommands)
            scheduleCommandIfNecessaryAndPossible(Cmd);
          DeferredCommands.clear();
          break;
        case DependencyGraphImpl::LoadResult::UpToDate:
          Condition = Cmd->getCondition();
          break;
        case DependencyGraphImpl::LoadResult::AffectsDownstream:
          llvm_unreachable("we haven't marked anything in this graph yet");
        }
      }
    }

    switch (Condition) {
    case Job::Condition::Always:
      if (getIncrementalBuildEnabled() && !DependenciesFile.empty()) {
        InitialOutOfDateCommands.push_back(Cmd);
        DepGraph.markIntransitive(Cmd);
      }
      SWIFT_FALLTHROUGH;
    case Job::Condition::RunWithoutCascading:
      noteBuilding(Cmd, "(initial)");
      scheduleCommandIfNecessaryAndPossible(Cmd);
      break;
    case Job::Condition::CheckDependencies:
      DeferredCommands.insert(Cmd);
      break;
    case Job::Condition::NewlyAdded:
      llvm_unreachable("handled above");
    }
  }

  if (getIncrementalBuildEnabled()) {
    SmallVector<const Job *, 16> AdditionalOutOfDateCommands;

    // We scheduled all of the files that have actually changed. Now add the
    // files that haven't changed, so that they'll get built in parallel if
    // possible and after the first set of files if it's not.
    for (auto *Cmd : InitialOutOfDateCommands) {
      DepGraph.markTransitive(AdditionalOutOfDateCommands, Cmd,
                              IncrementalTracer);
    }

    for (auto *transitiveCmd : AdditionalOutOfDateCommands)
      noteBuilding(transitiveCmd, "because of the initial set:");
    size_t firstSize = AdditionalOutOfDateCommands.size();

    // Check all cross-module dependencies as well.
    for (StringRef dependency : DepGraph.getExternalDependencies()) {
      llvm::sys::fs::file_status depStatus;
      if (!llvm::sys::fs::status(dependency, depStatus))
        if (depStatus.getLastModificationTime() < LastBuildTime)
          continue;

      // If the dependency has been modified since the oldest built file,
      // or if we can't stat it for some reason (perhaps it's been deleted?),
      // trigger rebuilds through the dependency graph.
      DepGraph.markExternal(AdditionalOutOfDateCommands, dependency);
    }

    for (auto *externalCmd :
            llvm::makeArrayRef(AdditionalOutOfDateCommands).slice(firstSize)) {
      noteBuilding(externalCmd, "because of external dependencies");
    }

    for (auto *AdditionalCmd : AdditionalOutOfDateCommands) {
      if (!DeferredCommands.count(AdditionalCmd))
        continue;
      scheduleCommandIfNecessaryAndPossible(AdditionalCmd);
      DeferredCommands.erase(AdditionalCmd);
    }
  }

  int Result = EXIT_SUCCESS;

  // Set up a callback which will be called immediately after a task has
  // started. This callback may be used to provide output indicating that the
  // task began.
  auto taskBegan = [this] (ProcessId Pid, void *Context) {
    // TODO: properly handle task began.
    const Job *BeganCmd = (const Job *)Context;

    // For verbose output, print out each command as it begins execution.
    if (Level == OutputLevel::Verbose)
      BeganCmd->printCommandLine(llvm::errs());
    else if (Level == OutputLevel::Parseable)
      parseable_output::emitBeganMessage(llvm::errs(), *BeganCmd, Pid);
  };

  // Set up a callback which will be called immediately after a task has
  // finished execution. This callback should determine if execution should
  // continue (if execution should stop, this callback should return true), and
  // it should also schedule any additional commands which we now know need
  // to run.
  auto taskFinished = [&] (ProcessId Pid, int ReturnCode, StringRef Output,
                           void *Context) -> TaskFinishedResponse {
    const Job *FinishedCmd = (const Job *)Context;

    if (Level == OutputLevel::Parseable) {
      // Parseable output was requested.
      parseable_output::emitFinishedMessage(llvm::errs(), *FinishedCmd, Pid,
                                            ReturnCode, Output);
    } else {
      // Otherwise, send the buffered output to stderr, though only if we
      // support getting buffered output.
      if (TaskQueue::supportsBufferingOutput())
        llvm::errs() << Output;
    }

    if (ReturnCode != EXIT_SUCCESS) {
      // The task failed, so return true without performing any further
      // dependency analysis.

      // Store this task's ReturnCode as our Result if we haven't stored
      // anything yet.
      if (Result == EXIT_SUCCESS)
        Result = ReturnCode;

      if (!isa<CompileJobAction>(FinishedCmd->getSource()) ||
          ReturnCode != EXIT_FAILURE) {
        Diags.diagnose(SourceLoc(), diag::error_command_failed,
                       FinishedCmd->getSource().getClassName(),
                       ReturnCode);
      }

      return ContinueBuildingAfterErrors ?
          TaskFinishedResponse::ContinueExecution :
          TaskFinishedResponse::StopExecution;
    }

    // When a task finishes, we need to reevaluate the other commands that
    // might have been blocked.
    markFinished(FinishedCmd);

    // In order to handle both old dependencies that have disappeared and new
    // dependencies that have arisen, we need to reload the dependency file.
    if (getIncrementalBuildEnabled()) {
      const CommandOutput &Output = FinishedCmd->getOutput();
      StringRef DependenciesFile =
        Output.getAdditionalOutputForType(types::TY_SwiftDeps);
      if (!DependenciesFile.empty()) {
        SmallVector<const Job *, 16> Dependents;
        bool wasCascading = DepGraph.isMarked(FinishedCmd);

        switch (DepGraph.loadFromPath(FinishedCmd, DependenciesFile)) {
        case DependencyGraphImpl::LoadResult::HadError:
          disableIncrementalBuild();
          for (const Job *Cmd : DeferredCommands)
            scheduleCommandIfNecessaryAndPossible(Cmd);
          DeferredCommands.clear();
          Dependents.clear();
          break;
        case DependencyGraphImpl::LoadResult::UpToDate:
          if (!wasCascading)
            break;
          SWIFT_FALLTHROUGH;
        case DependencyGraphImpl::LoadResult::AffectsDownstream:
          DepGraph.markTransitive(Dependents, FinishedCmd);
          break;
        }

        for (const Job *Cmd : Dependents) {
          DeferredCommands.erase(Cmd);
          noteBuilding(Cmd, "because of dependencies discovered later");
          scheduleCommandIfNecessaryAndPossible(Cmd);
        }
      }
    }

    return TaskFinishedResponse::ContinueExecution;
  };

  auto taskSignalled = [&] (ProcessId Pid, StringRef ErrorMsg, StringRef Output,
                            void *Context) -> TaskFinishedResponse {
    const Job *SignalledCmd = (const Job *)Context;

    if (Level == OutputLevel::Parseable) {
      // Parseable output was requested.
      parseable_output::emitSignalledMessage(llvm::errs(), *SignalledCmd, Pid,
                                             ErrorMsg, Output);
    } else {
      // Otherwise, send the buffered output to stderr, though only if we
      // support getting buffered output.
      if (TaskQueue::supportsBufferingOutput())
        llvm::errs() << Output;
    }

    if (!ErrorMsg.empty())
      Diags.diagnose(SourceLoc(), diag::error_unable_to_execute_command,
                     ErrorMsg);

    Diags.diagnose(SourceLoc(), diag::error_command_signalled,
                   SignalledCmd->getSource().getClassName());

    // Since the task signalled, unconditionally set result to -2.
    Result = -2;

    return TaskFinishedResponse::StopExecution;
  };

  do {
    // Ask the TaskQueue to execute.
    TQ->execute(taskBegan, taskFinished, taskSignalled);

    // Mark all remaining deferred commands as skipped.
    for (const Job *Cmd : DeferredCommands) {
      if (Level == OutputLevel::Parseable) {
        // Provide output indicating this command was skipped if parseable output
        // was requested.
        parseable_output::emitSkippedMessage(llvm::errs(), *Cmd);
      }

      State.ScheduledCommands.insert(Cmd);
      markFinished(Cmd);
    }

    // ...which may allow us to go on and do later tasks.
  } while (Result == 0 && TQ->hasRemainingTasks());

  if (Result == 0) {
    assert(State.BlockingCommands.empty() &&
           "some blocking commands never finished properly");
  } else {
    // Make sure we record any files that still need to be rebuilt.
    for (const Job *Cmd : getJobs()) {
      // Skip files that don't use dependency analysis.
      StringRef DependenciesFile =
          Cmd->getOutput().getAdditionalOutputForType(types::TY_SwiftDeps);
      if (DependenciesFile.empty())
        continue;

      // Don't worry about commands that finished or weren't going to run.
      if (State.FinishedCommands.count(Cmd))
        continue;
      if (!State.ScheduledCommands.count(Cmd))
        continue;

      bool isCascading = true;
      if (getIncrementalBuildEnabled())
        isCascading = DepGraph.isMarked(Cmd);
      State.UnfinishedCommands.insert({Cmd, isCascading});
    }
  }

  if (!CompilationRecordPath.empty() && !SkipTaskExecution) {
    InputInfoMap InputInfo;
    populateInputInfoMap(InputInfo, State);
    checkForOutOfDateInputs(Diags, InputInfo);
    writeCompilationRecord(CompilationRecordPath, ArgsHash, BuildStartTime,
                           InputInfo);
  }

  if (Result == 0)
    Result = Diags.hadAnyError();
  return Result;
}
Example #22
0
/// handleEndBlock - Remove dead stores to stack-allocated locations in the
/// function end block.  Ex:
/// %A = alloca i32
/// ...
/// store i32 1, i32* %A
/// ret void
bool DSE::handleEndBlock(BasicBlock &BB) {
  bool MadeChange = false;

  // Keep track of all of the stack objects that are dead at the end of the
  // function.
  SmallPtrSet<Value*, 16> DeadStackObjects;

  // Find all of the alloca'd pointers in the entry block.
  BasicBlock *Entry = BB.getParent()->begin();
  for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
      DeadStackObjects.insert(AI);

    // Okay, so these are dead heap objects, but if the pointer never escapes
    // then it's leaked by this function anyways.
    CallInst *CI = extractMallocCall(I);
    if (!CI)
      CI = extractCallocCall(I);
    if (CI && !PointerMayBeCaptured(CI, true, true))
      DeadStackObjects.insert(CI);
  }

  // Treat byval arguments the same, stores to them are dead at the end of the
  // function.
  for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
       AE = BB.getParent()->arg_end(); AI != AE; ++AI)
    if (AI->hasByValAttr())
      DeadStackObjects.insert(AI);

  // Scan the basic block backwards
  for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
    --BBI;

    // If we find a store, check to see if it points into a dead stack value.
    if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
      // See through pointer-to-pointer bitcasts
      SmallVector<Value *, 4> Pointers;
      GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);

      // Stores to stack values are valid candidates for removal.
      bool AllDead = true;
      for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
           E = Pointers.end(); I != E; ++I)
        if (!DeadStackObjects.count(*I)) {
          AllDead = false;
          break;
        }

      if (AllDead) {
        Instruction *Dead = BBI++;

        DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
                     << *Dead << "\n  Objects: ";
              for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
                   E = Pointers.end(); I != E; ++I) {
                dbgs() << **I;
                if (llvm::next(I) != E)
                  dbgs() << ", ";
              }
              dbgs() << '\n');

        // DCE instructions only used to calculate that store.
        DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
        ++NumFastStores;
        MadeChange = true;
        continue;
      }
    }

    // Remove any dead non-memory-mutating instructions.
    if (isInstructionTriviallyDead(BBI)) {
      Instruction *Inst = BBI++;
      DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
      ++NumFastOther;
      MadeChange = true;
      continue;
    }

    if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
      DeadStackObjects.erase(A);
      continue;
    }

    if (CallInst *CI = extractMallocCall(BBI)) {
      DeadStackObjects.erase(CI);
      continue;
    }

    if (CallInst *CI = extractCallocCall(BBI)) {
      DeadStackObjects.erase(CI);
      continue;
    }

    if (CallSite CS = cast<Value>(BBI)) {
      // If this call does not access memory, it can't be loading any of our
      // pointers.
      if (AA->doesNotAccessMemory(CS))
        continue;

      // If the call might load from any of our allocas, then any store above
      // the call is live.
      SmallVector<Value*, 8> LiveAllocas;
      for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
           E = DeadStackObjects.end(); I != E; ++I) {
        // See if the call site touches it.
        AliasAnalysis::ModRefResult A =
          AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));

        if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
          LiveAllocas.push_back(*I);
      }

      for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
           E = LiveAllocas.end(); I != E; ++I)
        DeadStackObjects.erase(*I);

      // If all of the allocas were clobbered by the call then we're not going
      // to find anything else to process.
      if (DeadStackObjects.empty())
        return MadeChange;

      continue;
    }

    AliasAnalysis::Location LoadedLoc;

    // If we encounter a use of the pointer, it is no longer considered dead
    if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
      if (!L->isUnordered()) // Be conservative with atomic/volatile load
        break;
      LoadedLoc = AA->getLocation(L);
    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
      LoadedLoc = AA->getLocation(V);
    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
      LoadedLoc = AA->getLocationForSource(MTI);
    } else if (!BBI->mayReadFromMemory()) {
      // Instruction doesn't read memory.  Note that stores that weren't removed
      // above will hit this case.
      continue;
    } else {
      // Unknown inst; assume it clobbers everything.
      break;
    }

    // Remove any allocas from the DeadPointer set that are loaded, as this
    // makes any stores above the access live.
    RemoveAccessedObjects(LoadedLoc, DeadStackObjects);

    // If all of the allocas were clobbered by the access then we're not going
    // to find anything else to process.
    if (DeadStackObjects.empty())
      break;
  }
void SanityCheckInstructionsPass::findInstructions(Function *F) {

    // A list of instructions that are used by sanity checks. They become sanity
    // check instructions if it turns out they're not used by anything else.
    SmallPtrSet<Instruction*, 128> Worklist;
    
    // A list of basic blocks that contain sanity check instructions. They
    // become sanity check blocks if it turns out they don't contain anything
    // else.
    SmallPtrSet<BasicBlock*, 64>   BlockWorklist;
    
    // A map from instructions to the checks that use them.
    std::map<Instruction*, SmallPtrSet<Instruction*, 4> > ChecksByInstruction;

    for (BasicBlock &BB: *F) {
        if (findSanityCheckCall(&BB)) {
            SanityCheckBlocks[F].insert(&BB);

            // All instructions inside sanity check blocks are sanity check instructions
            for (Instruction &I: BB) {
                Worklist.insert(&I);
            }

            // All branches to sanity check blocks are sanity check branches
            for (User *U: BB.users()) {
                if (Instruction *Inst = dyn_cast<Instruction>(U)) {
                    Worklist.insert(Inst);
                }
                BranchInst *BI = dyn_cast<BranchInst>(U);
                if (BI && BI->isConditional()) {
                    SanityCheckBranches[F].insert(BI);
                    ChecksByInstruction[BI].insert(BI);
                }
            }
        }
    }

    while (!Worklist.empty()) {
        // Alternate between emptying the worklist...
        while (!Worklist.empty()) {
            Instruction *Inst = *Worklist.begin();
            Worklist.erase(Inst);
            if (onlyUsedInSanityChecks(Inst)) {
                if (SanityCheckInstructions[F].insert(Inst)) {
                    for (Use &U: Inst->operands()) {
                        if (Instruction *Op = dyn_cast<Instruction>(U.get())) {
                            Worklist.insert(Op);
                            
                            // Copy ChecksByInstruction from Inst to Op
                            auto CBI = ChecksByInstruction.find(Inst);
                            if (CBI != ChecksByInstruction.end()) {
                                ChecksByInstruction[Op].insert(CBI->second.begin(), CBI->second.end());
                            }
                        }
                    }

                    BlockWorklist.insert(Inst->getParent());

                    // Fill InstructionsBySanityCheck from the inverse ChecksByInstruction
                    auto CBI = ChecksByInstruction.find(Inst);
                    if (CBI != ChecksByInstruction.end()) {
                        for (Instruction *CI : CBI->second) {
                            InstructionsBySanityCheck[CI].insert(Inst);
                        }
                    }
                }
            }
        }

        // ... and checking whether this causes basic blocks to contain only
        // sanity checks. This would in turn cause terminators to be added to
        // the worklist.
        while (!BlockWorklist.empty()) {
            BasicBlock *BB = *BlockWorklist.begin();
            BlockWorklist.erase(BB);
            
            bool allInstructionsAreSanityChecks = true;
            for (Instruction &I: *BB) {
                if (!SanityCheckInstructions.at(BB->getParent()).count(&I)) {
                    allInstructionsAreSanityChecks = false;
                    break;
                }
            }
            
            if (allInstructionsAreSanityChecks) {
                for (User *U: BB->users()) {
                    if (Instruction *Inst = dyn_cast<Instruction>(U)) {
                        Worklist.insert(Inst);
                    }
                }
            }
        }
    }
}
Example #24
0
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
    if (DisablePeephole)
        return false;

    TM  = &MF.getTarget();
    TII = TM->getInstrInfo();
    MRI = &MF.getRegInfo();
    DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;

    bool Changed = false;

    SmallPtrSet<MachineInstr*, 8> LocalMIs;
    SmallSet<unsigned, 4> ImmDefRegs;
    DenseMap<unsigned, MachineInstr*> ImmDefMIs;
    unsigned FoldAsLoadDefReg;
    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
        MachineBasicBlock *MBB = &*I;

        bool SeenMoveImm = false;
        LocalMIs.clear();
        ImmDefRegs.clear();
        ImmDefMIs.clear();
        FoldAsLoadDefReg = 0;

        for (MachineBasicBlock::iterator
                MII = I->begin(), MIE = I->end(); MII != MIE; ) {
            MachineInstr *MI = &*MII;
            // We may be erasing MI below, increment MII now.
            ++MII;
            LocalMIs.insert(MI);

            // If there exists an instruction which belongs to the following
            // categories, we will discard the load candidate.
            if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
                    MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
                    MI->hasUnmodeledSideEffects()) {
                FoldAsLoadDefReg = 0;
                continue;
            }
            if (MI->mayStore() || MI->isCall())
                FoldAsLoadDefReg = 0;

            if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
                    (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
                    (MI->isSelect() && optimizeSelect(MI))) {
                // MI is deleted.
                LocalMIs.erase(MI);
                Changed = true;
                continue;
            }

            if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
                SeenMoveImm = true;
            } else {
                Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
                // optimizeExtInstr might have created new instructions after MI
                // and before the already incremented MII. Adjust MII so that the
                // next iteration sees the new instructions.
                MII = MI;
                ++MII;
                if (SeenMoveImm)
                    Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
            }

            // Check whether MI is a load candidate for folding into a later
            // instruction. If MI is not a candidate, check whether we can fold an
            // earlier load into MI.
            if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
                // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
                // can enable folding by converting SUB to CMP.
                MachineInstr *DefMI = 0;
                MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
                                       FoldAsLoadDefReg, DefMI);
                if (FoldMI) {
                    // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
                    LocalMIs.erase(MI);
                    LocalMIs.erase(DefMI);
                    LocalMIs.insert(FoldMI);
                    MI->eraseFromParent();
                    DefMI->eraseFromParent();
                    ++NumLoadFold;

                    // MI is replaced with FoldMI.
                    Changed = true;
                    continue;
                }
            }
        }
    }

    return Changed;
}