Example #1
0
/// Merge an autorelease with a retain into a fused call.
bool
ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
                                     InstructionClass Class,
                                     SmallPtrSet<Instruction *, 4>
                                       &DependingInstructions,
                                     SmallPtrSet<const BasicBlock *, 4>
                                       &Visited) {
  const Value *Arg = GetObjCArg(Autorelease);

  // Check that there are no instructions between the retain and the autorelease
  // (such as an autorelease_pop) which may change the count.
  CallInst *Retain = 0;
  if (Class == IC_AutoreleaseRV)
    FindDependencies(RetainAutoreleaseRVDep, Arg,
                     Autorelease->getParent(), Autorelease,
                     DependingInstructions, Visited, PA);
  else
    FindDependencies(RetainAutoreleaseDep, Arg,
                     Autorelease->getParent(), Autorelease,
                     DependingInstructions, Visited, PA);

  Visited.clear();
  if (DependingInstructions.size() != 1) {
    DependingInstructions.clear();
    return false;
  }

  Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
  DependingInstructions.clear();

  if (!Retain ||
      GetBasicInstructionClass(Retain) != IC_Retain ||
      GetObjCArg(Retain) != Arg)
    return false;

  Changed = true;
  ++NumPeeps;

  DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing "
                  "retain/autorelease. Erasing: " << *Autorelease << "\n"
                  "                                      Old Retain: "
               << *Retain << "\n");

  if (Class == IC_AutoreleaseRV)
    Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
  else
    Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));

  DEBUG(dbgs() << "                                      New Retain: "
               << *Retain << "\n");

  EraseInstruction(Autorelease);
  return true;
}
// Check PHI instructions at the beginning of MBB. It is assumed that
// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
  SmallPtrSet<const MachineBasicBlock*, 8> seen;
  for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
       BBI != BBE && BBI->isPHI(); ++BBI) {
    seen.clear();

    for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
      unsigned Reg = BBI->getOperand(i).getReg();
      const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB();
      if (!Pre->isSuccessor(MBB))
        continue;
      seen.insert(Pre);
      BBInfo &PrInfo = MBBInfoMap[Pre];
      if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
        report("PHI operand is not live-out from predecessor",
               &BBI->getOperand(i), i);
    }

    // Did we see all predecessors?
    for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
           PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
      if (!seen.count(*PrI)) {
        report("Missing PHI operand", BBI);
        *OS << "BB#" << (*PrI)->getNumber()
            << " is a predecessor according to the CFG.\n";
      }
    }
  }
}
Example #3
0
/// FindSelectorAndURoR - Find the eh.selector call associated with the
/// eh.exception call. And indicate if there is a URoR "invoke" associated with
/// the eh.exception call. This recursively looks past instructions which don't
/// change the EH pointer value, like casts or PHI nodes.
bool
DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
                                    SmallPtrSet<IntrinsicInst*, 8> &SelCalls) {
  SmallPtrSet<PHINode*, 32> SeenPHIs;
  bool Changed = false;

 restart:
  for (Value::use_iterator
         I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
    Instruction *II = dyn_cast<Instruction>(*I);
    if (!II || II->getParent()->getParent() != F) continue;
    
    if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
      if (Sel->getIntrinsicID() == Intrinsic::eh_selector)
        SelCalls.insert(Sel);
    } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) {
      if (Invoke->getCalledFunction() == URoR)
        URoRInvoke = true;
    } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
      Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls);
    } else if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
      if (!PromoteStoreInst(SI)) continue;
      Changed = true;
      SeenPHIs.clear();
      goto restart;             // Uses may have changed, restart loop.
    } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
      if (SeenPHIs.insert(PN))
        // Don't process a PHI node more than once.
        Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls);
    }
  }

  return Changed;
}
static void EliminateMultipleEntryLoops(MachineFunction &MF,
                                        const MachineLoopInfo &MLI) {
  SmallPtrSet<MachineBasicBlock *, 8> InSet;
  for (scc_iterator<MachineFunction *> I = scc_begin(&MF), E = scc_end(&MF);
       I != E; ++I) {
    const std::vector<MachineBasicBlock *> &CurrentSCC = *I;

    // Skip trivial SCCs.
    if (CurrentSCC.size() == 1)
      continue;

    InSet.insert(CurrentSCC.begin(), CurrentSCC.end());
    MachineBasicBlock *Header = nullptr;
    for (MachineBasicBlock *MBB : CurrentSCC) {
      for (MachineBasicBlock *Pred : MBB->predecessors()) {
        if (InSet.count(Pred))
          continue;
        if (!Header) {
          Header = MBB;
          break;
        }
        // TODO: Implement multiple-entry loops.
        report_fatal_error("multiple-entry loops are not supported yet");
      }
    }
    assert(MLI.isLoopHeader(Header));

    InSet.clear();
  }
}
Example #5
0
/// Return a set of basic blocks to insert sinked instructions.
///
/// The returned set of basic blocks (BBsToSinkInto) should satisfy:
///
/// * Inside the loop \p L
/// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto
///   that domintates the UseBB
/// * Has minimum total frequency that is no greater than preheader frequency
///
/// The purpose of the function is to find the optimal sinking points to
/// minimize execution cost, which is defined as "sum of frequency of
/// BBsToSinkInto".
/// As a result, the returned BBsToSinkInto needs to have minimum total
/// frequency.
/// Additionally, if the total frequency of BBsToSinkInto exceeds preheader
/// frequency, the optimal solution is not sinking (return empty set).
///
/// \p ColdLoopBBs is used to help find the optimal sinking locations.
/// It stores a list of BBs that is:
///
/// * Inside the loop \p L
/// * Has a frequency no larger than the loop's preheader
/// * Sorted by BB frequency
///
/// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()).
/// To avoid expensive computation, we cap the maximum UseBBs.size() in its
/// caller.
static SmallPtrSet<BasicBlock *, 2>
findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
                  const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
                  DominatorTree &DT, BlockFrequencyInfo &BFI) {
  SmallPtrSet<BasicBlock *, 2> BBsToSinkInto;
  if (UseBBs.size() == 0)
    return BBsToSinkInto;

  BBsToSinkInto.insert(UseBBs.begin(), UseBBs.end());
  SmallPtrSet<BasicBlock *, 2> BBsDominatedByColdestBB;

  // For every iteration:
  //   * Pick the ColdestBB from ColdLoopBBs
  //   * Find the set BBsDominatedByColdestBB that satisfy:
  //     - BBsDominatedByColdestBB is a subset of BBsToSinkInto
  //     - Every BB in BBsDominatedByColdestBB is dominated by ColdestBB
  //   * If Freq(ColdestBB) < Freq(BBsDominatedByColdestBB), remove
  //     BBsDominatedByColdestBB from BBsToSinkInto, add ColdestBB to
  //     BBsToSinkInto
  for (BasicBlock *ColdestBB : ColdLoopBBs) {
    BBsDominatedByColdestBB.clear();
    for (BasicBlock *SinkedBB : BBsToSinkInto)
      if (DT.dominates(ColdestBB, SinkedBB))
        BBsDominatedByColdestBB.insert(SinkedBB);
    if (BBsDominatedByColdestBB.size() == 0)
      continue;
    if (adjustedSumFreq(BBsDominatedByColdestBB, BFI) >
        BFI.getBlockFreq(ColdestBB)) {
      for (BasicBlock *DominatedBB : BBsDominatedByColdestBB) {
        BBsToSinkInto.erase(DominatedBB);
      }
      BBsToSinkInto.insert(ColdestBB);
    }
  }

  // If the total frequency of BBsToSinkInto is larger than preheader frequency,
  // do not sink.
  if (adjustedSumFreq(BBsToSinkInto, BFI) >
      BFI.getBlockFreq(L.getLoopPreheader()))
    BBsToSinkInto.clear();
  return BBsToSinkInto;
}
Example #6
0
TEST(SmallPtrSetTest, GrowthTest) {
  int i;
  int buf[8];
  for(i=0; i<8; ++i) buf[i]=0;


  SmallPtrSet<int *, 4> s;
  typedef SmallPtrSet<int *, 4>::iterator iter;
  
  s.insert(&buf[0]);
  s.insert(&buf[1]);
  s.insert(&buf[2]);
  s.insert(&buf[3]);
  EXPECT_EQ(4U, s.size());

  i = 0;
  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
      (**I)++;
  EXPECT_EQ(4, i);
  for(i=0; i<8; ++i)
      EXPECT_EQ(i<4?1:0,buf[i]);

  s.insert(&buf[4]);
  s.insert(&buf[5]);
  s.insert(&buf[6]);
  s.insert(&buf[7]);

  i = 0;
  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
      (**I)++;
  EXPECT_EQ(8, i);
  s.erase(&buf[4]);
  s.erase(&buf[5]);
  s.erase(&buf[6]);
  s.erase(&buf[7]);
  EXPECT_EQ(4U, s.size());

  i = 0;
  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
      (**I)++;
  EXPECT_EQ(4, i);
  for(i=0; i<8; ++i)
      EXPECT_EQ(i<4?3:1,buf[i]);

  s.clear();
  for(i=0; i<8; ++i) buf[i]=0;
  for(i=0; i<128; ++i) s.insert(&buf[i%8]); // test repeated entires
  EXPECT_EQ(8U, s.size());
  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
      (**I)++;
  for(i=0; i<8; ++i)
      EXPECT_EQ(1,buf[i]);
}
bool PropagateJuliaAddrspaces::runOnFunction(Function &F) {
    visit(F);
    for (auto it : ToInsert)
        it.first->insertBefore(it.second);
    for (Instruction *I : ToDelete)
        I->eraseFromParent();
    ToInsert.clear();
    ToDelete.clear();
    LiftingMap.clear();
    Visited.clear();
    return true;
}
Example #8
0
static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) {
  SmallPtrSet<const MachineBasicBlock*, 5> Visited;

  LiveRange::iterator OutIt;
  VNInfo *PrevValNo = nullptr;
  for (LiveRange::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
    LiveRange::Segment &S = *I;
    // Determine final VNI if necessary.
    if (S.valno == nullptr) {
      // This can only happen at the begin of a basic block.
      assert(S.start.isBlock() && "valno should only be missing at block begin");

      Visited.clear();
      const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
      for (const MachineBasicBlock *Pred : MBB->predecessors()) {
        VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
        if (VNI != nullptr) {
          S.valno = VNI;
          break;
        }
      }
      assert(S.valno != nullptr && "could not determine valno");
    }
    // Merge with previous segment if it has the same VNI.
    if (PrevValNo == S.valno && OutIt->end == S.start) {
      OutIt->end = S.end;
    } else {
      // Didn't merge. Move OutIt to next segment.
      if (PrevValNo == nullptr)
        OutIt = LI.begin();
      else
        ++OutIt;

      if (OutIt != I)
        *OutIt = *I;
      PrevValNo = S.valno;
    }
  }
  // If we merged some segments chop off the end.
  ++OutIt;
  LI.segments.erase(OutIt, LI.end());
}
Example #9
0
static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) {
  SmallPtrSet<const MachineBasicBlock*, 5> Visited;
  for (LiveRange::Segment &S : LI.segments) {
    if (S.valno != nullptr)
      continue;
    // This can only happen at the begin of a basic block.
    assert(S.start.isBlock() && "valno should only be missing at block begin");

    Visited.clear();
    const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
    for (const MachineBasicBlock *Pred : MBB->predecessors()) {
      VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
      if (VNI != nullptr) {
        S.valno = VNI;
        break;
      }
    }
    assert(S.valno != nullptr && "could not determine valno");
  }
}
Example #10
0
  // Given a list of loads that could be constant-folded (LoadBaseAddresses),
  // estimate number of optimized instructions after substituting the concrete
  // values for the given Iteration.
  // Fill in SimplifiedInsns map for future use in DCE-estimation.
  unsigned EstimateNumberOfSimplifiedInsns(unsigned Iteration) {
    SmallVector<Instruction *, 8> Worklist;
    SimplifiedValues.clear();
    CountedInsns.clear();

    NumberOfOptimizedInstructions = 0;
    // We start by adding all loads to the worklist.
    for (auto LoadDescr : LoadBaseAddresses) {
      LoadInst *LI = LoadDescr.first;
      SimplifiedValues[LI] = computeLoadValue(LI, Iteration);
      if (CountedInsns.insert(LI).second)
        NumberOfOptimizedInstructions += TTI.getUserCost(LI);

      for (auto U : LI->users()) {
        Instruction *UI = dyn_cast<Instruction>(U);
        if (!UI)
          continue;
        if (!L->contains(UI))
          continue;
        Worklist.push_back(UI);
      }
    }

    // And then we try to simplify every user of every instruction from the
    // worklist. If we do simplify a user, add it to the worklist to process
    // its users as well.
    while (!Worklist.empty()) {
      Instruction *I = Worklist.pop_back_val();
      if (!visit(I))
        continue;
      for (auto U : I->users()) {
        Instruction *UI = dyn_cast<Instruction>(U);
        if (!UI)
          continue;
        if (!L->contains(UI))
          continue;
        Worklist.push_back(UI);
      }
    }
    return NumberOfOptimizedInstructions;
  }
Example #11
0
bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) {
  TM = &MF.getTarget();
  TII = TM->getInstrInfo();
  MRI = &MF.getRegInfo();
  DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;

  bool Changed = false;

  SmallPtrSet<MachineInstr*, 8> LocalMIs;
  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
    MachineBasicBlock *MBB = &*I;
    LocalMIs.clear();
    for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME;
         ++MII) {
      MachineInstr *MI = &*MII;
      Changed |= OptimizeInstr(MI, MBB, LocalMIs);
    }
  }

  return Changed;
}
Example #12
0
/// getMachineBasicBlocks - Populate given set using machine basic blocks which
/// have machine instructions that belong to lexical scope identified by 
/// DebugLoc.
void LexicalScopes::
getMachineBasicBlocks(DebugLoc DL, 
                      SmallPtrSet<const MachineBasicBlock*, 4> &MBBs) {
  MBBs.clear();
  LexicalScope *Scope = getOrCreateLexicalScope(DL);
  if (!Scope)
    return;
  
  if (Scope == CurrentFnLexicalScope) {
    for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
         I != E; ++I)
      MBBs.insert(I);
    return;
  }

  SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges();
  for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(),
         E = InsnRanges.end(); I != E; ++I) {
    InsnRange &R = *I;
    MBBs.insert(R.first->getParent());
  }
}
Example #13
0
vector<const TargetRegisterInfo*> ipaFindUsedReturns(ParameterRegistry& registry, Function& function, const vector<const TargetRegisterInfo*>& returns)
{
	// Excuse entry points from not having callers; use every return.
	if (function.use_empty())
	if (auto address = md::getVirtualAddress(function))
	if (isEntryPoint(address->getLimitedValue()))
	{
		return returns;
	}
	
	// Otherwise, loop through callers and see which registers are used after the function call.
	TargetInfo& targetInfo = registry.getTargetInfo();
	SmallPtrSet<MemoryPhi*, 4> visited;
	vector<const TargetRegisterInfo*> result;
	for (auto& use : function.uses())
	{
		if (auto call = dyn_cast<CallInst>(use.getUser()))
		{
			auto parentFunction = call->getParent()->getParent();
			if (parentFunction == &function)
			{
				// TODO: This isn't impossible to compute, just somewhat inconvenient.
				continue;
			}
			
			auto parentArgs = static_cast<Argument*>(parentFunction->arg_begin());
			auto pointerType = dyn_cast<PointerType>(parentArgs->getType());
			assert(pointerType != nullptr && pointerType->getTypeAtIndex(int(0))->getStructName() == "struct.x86_regs");
			(void) pointerType;
			
			visited.clear();
			MemorySSA& mssa = *registry.getMemorySSA(*parentFunction);
			findUsedReturns(returns, targetInfo, mssa, visited, *mssa.getMemoryAccess(call), result);
		}
	}
	return result;
}
bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
  if (skipOptnoneFunction(L))
    return false;

  DominatorTreeWrapperPass *DTWP =
      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
  LoopInfo *LI = &getAnalysis<LoopInfo>();
  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
  const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
  AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();

  SmallVector<BasicBlock*, 8> ExitBlocks;
  L->getUniqueExitBlocks(ExitBlocks);
  array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());

  SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;

  // The bit we are stealing from the pointer represents whether this basic
  // block is the header of a subloop, in which case we only process its phis.
  typedef PointerIntPair<BasicBlock*, 1> WorklistItem;
  SmallVector<WorklistItem, 16> VisitStack;
  SmallPtrSet<BasicBlock*, 32> Visited;

  bool Changed = false;
  bool LocalChanged;
  do {
    LocalChanged = false;

    VisitStack.clear();
    Visited.clear();

    VisitStack.push_back(WorklistItem(L->getHeader(), false));

    while (!VisitStack.empty()) {
      WorklistItem Item = VisitStack.pop_back_val();
      BasicBlock *BB = Item.getPointer();
      bool IsSubloopHeader = Item.getInt();

      // Simplify instructions in the current basic block.
      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
        Instruction *I = BI++;

        // The first time through the loop ToSimplify is empty and we try to
        // simplify all instructions. On later iterations ToSimplify is not
        // empty and we only bother simplifying instructions that are in it.
        if (!ToSimplify->empty() && !ToSimplify->count(I))
          continue;

        // Don't bother simplifying unused instructions.
        if (!I->use_empty()) {
          Value *V = SimplifyInstruction(I, DL, TLI, DT, AT);
          if (V && LI->replacementPreservesLCSSAForm(I, V)) {
            // Mark all uses for resimplification next time round the loop.
            for (User *U : I->users())
              Next->insert(cast<Instruction>(U));

            I->replaceAllUsesWith(V);
            LocalChanged = true;
            ++NumSimplified;
          }
        }
        bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
        if (res) {
          // RecursivelyDeleteTriviallyDeadInstruction can remove
          // more than one instruction, so simply incrementing the
          // iterator does not work. When instructions get deleted
          // re-iterate instead.
          BI = BB->begin(); BE = BB->end();
          LocalChanged |= res;
        }

        if (IsSubloopHeader && !isa<PHINode>(I))
          break;
      }

      // Add all successors to the worklist, except for loop exit blocks and the
      // bodies of subloops. We visit the headers of loops so that we can process
      // their phis, but we contract the rest of the subloop body and only follow
      // edges leading back to the original loop.
      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
           ++SI) {
        BasicBlock *SuccBB = *SI;
        if (!Visited.insert(SuccBB).second)
          continue;

        const Loop *SuccLoop = LI->getLoopFor(SuccBB);
        if (SuccLoop && SuccLoop->getHeader() == SuccBB
                     && L->contains(SuccLoop)) {
          VisitStack.push_back(WorklistItem(SuccBB, true));

          SmallVector<BasicBlock*, 8> SubLoopExitBlocks;
          SuccLoop->getExitBlocks(SubLoopExitBlocks);

          for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
            BasicBlock *ExitBB = SubLoopExitBlocks[i];
            if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB).second)
              VisitStack.push_back(WorklistItem(ExitBB, false));
          }

          continue;
        }

        bool IsExitBlock = std::binary_search(ExitBlocks.begin(),
                                              ExitBlocks.end(), SuccBB);
        if (IsExitBlock)
          continue;

        VisitStack.push_back(WorklistItem(SuccBB, false));
      }
    }

    // Place the list of instructions to simplify on the next loop iteration
    // into ToSimplify.
    std::swap(ToSimplify, Next);
    Next->clear();

    Changed |= LocalChanged;
  } while (LocalChanged);

  return Changed;
}
Example #15
0
/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
/// there is one implicit_def for each use. Add isUndef marker to
/// implicit_def defs and their uses.
bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {

  DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
               << "********** Function: "
               << ((Value*)fn.getFunction())->getName() << '\n');

  bool Changed = false;

  TII = fn.getTarget().getInstrInfo();
  TRI = fn.getTarget().getRegisterInfo();
  MRI = &fn.getRegInfo();
  LV = &getAnalysis<LiveVariables>();

  SmallSet<unsigned, 8> ImpDefRegs;
  SmallVector<MachineInstr*, 8> ImpDefMIs;
  SmallVector<MachineInstr*, 4> RUses;
  SmallPtrSet<MachineBasicBlock*,16> Visited;
  SmallPtrSet<MachineInstr*, 8> ModInsts;

  MachineBasicBlock *Entry = fn.begin();
  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
       DFI != E; ++DFI) {
    MachineBasicBlock *MBB = *DFI;
    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
         I != E; ) {
      MachineInstr *MI = &*I;
      ++I;
      if (MI->isImplicitDef()) {
        ImpDefMIs.push_back(MI);
        // Is this a sub-register read-modify-write?
        if (MI->getOperand(0).readsReg())
          continue;
        unsigned Reg = MI->getOperand(0).getReg();
        ImpDefRegs.insert(Reg);
        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
          for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
            ImpDefRegs.insert(*SS);
        }
        continue;
      }

      // Eliminate %reg1032:sub<def> = COPY undef.
      if (MI->isCopy() && MI->getOperand(0).readsReg()) {
        MachineOperand &MO = MI->getOperand(1);
        if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
          if (MO.isKill()) {
            LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
            vi.removeKill(MI);
          }
          unsigned Reg = MI->getOperand(0).getReg();
          MI->eraseFromParent();
          Changed = true;

          // A REG_SEQUENCE may have been expanded into partial definitions.
          // If this was the last one, mark Reg as implicitly defined.
          if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg))
            ImpDefRegs.insert(Reg);
          continue;
        }
      }

      bool ChangedToImpDef = false;
      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        MachineOperand& MO = MI->getOperand(i);
        if (!MO.isReg() || !MO.readsReg())
          continue;
        unsigned Reg = MO.getReg();
        if (!Reg)
          continue;
        if (!ImpDefRegs.count(Reg))
          continue;
        // Use is a copy, just turn it into an implicit_def.
        if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) {
          bool isKill = MO.isKill();
          MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
          for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
            MI->RemoveOperand(j);
          if (isKill) {
            ImpDefRegs.erase(Reg);
            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
            vi.removeKill(MI);
          }
          ChangedToImpDef = true;
          Changed = true;
          break;
        }

        Changed = true;
        MO.setIsUndef();
        // This is a partial register redef of an implicit def.
        // Make sure the whole register is defined by the instruction.
        if (MO.isDef()) {
          MI->addRegisterDefined(Reg);
          continue;
        }
        if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
          // Make sure other reads of Reg are also marked <undef>.
          for (unsigned j = i+1; j != e; ++j) {
            MachineOperand &MOJ = MI->getOperand(j);
            if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg())
              MOJ.setIsUndef();
          }
          ImpDefRegs.erase(Reg);
        }
      }

      if (ChangedToImpDef) {
        // Backtrack to process this new implicit_def.
        --I;
      } else {
        for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
          MachineOperand& MO = MI->getOperand(i);
          if (!MO.isReg() || !MO.isDef())
            continue;
          ImpDefRegs.erase(MO.getReg());
        }
      }
    }

    // Any outstanding liveout implicit_def's?
    for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
      MachineInstr *MI = ImpDefMIs[i];
      unsigned Reg = MI->getOperand(0).getReg();
      if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
          !ImpDefRegs.count(Reg)) {
        // Delete all "local" implicit_def's. That include those which define
        // physical registers since they cannot be liveout.
        MI->eraseFromParent();
        Changed = true;
        continue;
      }

      // If there are multiple defs of the same register and at least one
      // is not an implicit_def, do not insert implicit_def's before the
      // uses.
      bool Skip = false;
      SmallVector<MachineInstr*, 4> DeadImpDefs;
      for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
             DE = MRI->def_end(); DI != DE; ++DI) {
        MachineInstr *DeadImpDef = &*DI;
        if (!DeadImpDef->isImplicitDef()) {
          Skip = true;
          break;
        }
        DeadImpDefs.push_back(DeadImpDef);
      }
      if (Skip)
        continue;

      // The only implicit_def which we want to keep are those that are live
      // out of its block.
      for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j)
        DeadImpDefs[j]->eraseFromParent();
      Changed = true;

      // Process each use instruction once.
      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
             UE = MRI->use_end(); UI != UE; ++UI) {
        if (UI.getOperand().isUndef())
          continue;
        MachineInstr *RMI = &*UI;
        if (ModInsts.insert(RMI))
          RUses.push_back(RMI);
      }

      for (unsigned i = 0, e = RUses.size(); i != e; ++i) {
        MachineInstr *RMI = RUses[i];

        // Turn a copy use into an implicit_def.
        if (isUndefCopy(RMI, Reg, ImpDefRegs)) {
          RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));

          bool isKill = false;
          SmallVector<unsigned, 4> Ops;
          for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
            MachineOperand &RRMO = RMI->getOperand(j);
            if (RRMO.isReg() && RRMO.getReg() == Reg) {
              Ops.push_back(j);
              if (RRMO.isKill())
                isKill = true;
            }
          }
          // Leave the other operands along.
          for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) {
            unsigned OpIdx = Ops[j];
            RMI->RemoveOperand(OpIdx-j);
          }

          // Update LiveVariables varinfo if the instruction is a kill.
          if (isKill) {
            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
            vi.removeKill(RMI);
          }
          continue;
        }

        // Replace Reg with a new vreg that's marked implicit.
        const TargetRegisterClass* RC = MRI->getRegClass(Reg);
        unsigned NewVReg = MRI->createVirtualRegister(RC);
        bool isKill = true;
        for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
          MachineOperand &RRMO = RMI->getOperand(j);
          if (RRMO.isReg() && RRMO.getReg() == Reg) {
            RRMO.setReg(NewVReg);
            RRMO.setIsUndef();
            if (isKill) {
              // Only the first operand of NewVReg is marked kill.
              RRMO.setIsKill();
              isKill = false;
            }
          }
        }
      }
      RUses.clear();
      ModInsts.clear();
    }
    ImpDefRegs.clear();
    ImpDefMIs.clear();
  }

  return Changed;
}
Example #16
0
int Compilation::performJobsImpl() {
  // Create a TaskQueue for execution.
  std::unique_ptr<TaskQueue> TQ;
  if (SkipTaskExecution)
    TQ.reset(new DummyTaskQueue(NumberOfParallelCommands));
  else
    TQ.reset(new TaskQueue(NumberOfParallelCommands));

  PerformJobsState State;

  using DependencyGraph = DependencyGraph<const Job *>;
  DependencyGraph DepGraph;
  SmallPtrSet<const Job *, 16> DeferredCommands;
  SmallVector<const Job *, 16> InitialOutOfDateCommands;

  DependencyGraph::MarkTracer ActualIncrementalTracer;
  DependencyGraph::MarkTracer *IncrementalTracer = nullptr;
  if (ShowIncrementalBuildDecisions)
    IncrementalTracer = &ActualIncrementalTracer;

  auto noteBuilding = [&] (const Job *cmd, StringRef reason) {
    if (!ShowIncrementalBuildDecisions)
      return;
    if (State.ScheduledCommands.count(cmd))
      return;
    llvm::outs() << "Queuing "
                 << llvm::sys::path::filename(cmd->getOutput().getBaseInput(0))
                 << " " << reason << "\n";
    IncrementalTracer->printPath(llvm::outs(), cmd,
                                 [](raw_ostream &out, const Job *base) {
      out << llvm::sys::path::filename(base->getOutput().getBaseInput(0));
    });
  };

  // Set up scheduleCommandIfNecessaryAndPossible.
  // This will only schedule the given command if it has not been scheduled
  // and if all of its inputs are in FinishedCommands.
  auto scheduleCommandIfNecessaryAndPossible = [&] (const Job *Cmd) {
    if (State.ScheduledCommands.count(Cmd))
      return;

    if (auto Blocking = findUnfinishedJob(Cmd->getInputs(),
                                          State.FinishedCommands)) {
      State.BlockingCommands[Blocking].push_back(Cmd);
      return;
    }

    // FIXME: Failing here should not take down the whole process.
    bool success = writeFilelistIfNecessary(Cmd, Diags);
    assert(success && "failed to write filelist");
    (void)success;

    assert(Cmd->getExtraEnvironment().empty() &&
           "not implemented for compilations with multiple jobs");
    State.ScheduledCommands.insert(Cmd);
    TQ->addTask(Cmd->getExecutable(), Cmd->getArguments(), llvm::None,
                (void *)Cmd);
  };

  // When a task finishes, we need to reevaluate the other commands that
  // might have been blocked.
  auto markFinished = [&] (const Job *Cmd) {
    State.FinishedCommands.insert(Cmd);

    auto BlockedIter = State.BlockingCommands.find(Cmd);
    if (BlockedIter != State.BlockingCommands.end()) {
      auto AllBlocked = std::move(BlockedIter->second);
      State.BlockingCommands.erase(BlockedIter);
      for (auto *Blocked : AllBlocked)
        scheduleCommandIfNecessaryAndPossible(Blocked);
    }
  };

  // Schedule all jobs we can.
  for (const Job *Cmd : getJobs()) {
    if (!getIncrementalBuildEnabled()) {
      scheduleCommandIfNecessaryAndPossible(Cmd);
      continue;
    }

    // Try to load the dependencies file for this job. If there isn't one, we
    // always have to run the job, but it doesn't affect any other jobs. If
    // there should be one but it's not present or can't be loaded, we have to
    // run all the jobs.
    // FIXME: We can probably do better here!
    Job::Condition Condition = Job::Condition::Always;
    StringRef DependenciesFile =
      Cmd->getOutput().getAdditionalOutputForType(types::TY_SwiftDeps);
    if (!DependenciesFile.empty()) {
      if (Cmd->getCondition() == Job::Condition::NewlyAdded) {
        DepGraph.addIndependentNode(Cmd);
      } else {
        switch (DepGraph.loadFromPath(Cmd, DependenciesFile)) {
        case DependencyGraphImpl::LoadResult::HadError:
          disableIncrementalBuild();
          for (const Job *Cmd : DeferredCommands)
            scheduleCommandIfNecessaryAndPossible(Cmd);
          DeferredCommands.clear();
          break;
        case DependencyGraphImpl::LoadResult::UpToDate:
          Condition = Cmd->getCondition();
          break;
        case DependencyGraphImpl::LoadResult::AffectsDownstream:
          llvm_unreachable("we haven't marked anything in this graph yet");
        }
      }
    }

    switch (Condition) {
    case Job::Condition::Always:
      if (getIncrementalBuildEnabled() && !DependenciesFile.empty()) {
        InitialOutOfDateCommands.push_back(Cmd);
        DepGraph.markIntransitive(Cmd);
      }
      SWIFT_FALLTHROUGH;
    case Job::Condition::RunWithoutCascading:
      noteBuilding(Cmd, "(initial)");
      scheduleCommandIfNecessaryAndPossible(Cmd);
      break;
    case Job::Condition::CheckDependencies:
      DeferredCommands.insert(Cmd);
      break;
    case Job::Condition::NewlyAdded:
      llvm_unreachable("handled above");
    }
  }

  if (getIncrementalBuildEnabled()) {
    SmallVector<const Job *, 16> AdditionalOutOfDateCommands;

    // We scheduled all of the files that have actually changed. Now add the
    // files that haven't changed, so that they'll get built in parallel if
    // possible and after the first set of files if it's not.
    for (auto *Cmd : InitialOutOfDateCommands) {
      DepGraph.markTransitive(AdditionalOutOfDateCommands, Cmd,
                              IncrementalTracer);
    }

    for (auto *transitiveCmd : AdditionalOutOfDateCommands)
      noteBuilding(transitiveCmd, "because of the initial set:");
    size_t firstSize = AdditionalOutOfDateCommands.size();

    // Check all cross-module dependencies as well.
    for (StringRef dependency : DepGraph.getExternalDependencies()) {
      llvm::sys::fs::file_status depStatus;
      if (!llvm::sys::fs::status(dependency, depStatus))
        if (depStatus.getLastModificationTime() < LastBuildTime)
          continue;

      // If the dependency has been modified since the oldest built file,
      // or if we can't stat it for some reason (perhaps it's been deleted?),
      // trigger rebuilds through the dependency graph.
      DepGraph.markExternal(AdditionalOutOfDateCommands, dependency);
    }

    for (auto *externalCmd :
            llvm::makeArrayRef(AdditionalOutOfDateCommands).slice(firstSize)) {
      noteBuilding(externalCmd, "because of external dependencies");
    }

    for (auto *AdditionalCmd : AdditionalOutOfDateCommands) {
      if (!DeferredCommands.count(AdditionalCmd))
        continue;
      scheduleCommandIfNecessaryAndPossible(AdditionalCmd);
      DeferredCommands.erase(AdditionalCmd);
    }
  }

  int Result = EXIT_SUCCESS;

  // Set up a callback which will be called immediately after a task has
  // started. This callback may be used to provide output indicating that the
  // task began.
  auto taskBegan = [this] (ProcessId Pid, void *Context) {
    // TODO: properly handle task began.
    const Job *BeganCmd = (const Job *)Context;

    // For verbose output, print out each command as it begins execution.
    if (Level == OutputLevel::Verbose)
      BeganCmd->printCommandLine(llvm::errs());
    else if (Level == OutputLevel::Parseable)
      parseable_output::emitBeganMessage(llvm::errs(), *BeganCmd, Pid);
  };

  // Set up a callback which will be called immediately after a task has
  // finished execution. This callback should determine if execution should
  // continue (if execution should stop, this callback should return true), and
  // it should also schedule any additional commands which we now know need
  // to run.
  auto taskFinished = [&] (ProcessId Pid, int ReturnCode, StringRef Output,
                           void *Context) -> TaskFinishedResponse {
    const Job *FinishedCmd = (const Job *)Context;

    if (Level == OutputLevel::Parseable) {
      // Parseable output was requested.
      parseable_output::emitFinishedMessage(llvm::errs(), *FinishedCmd, Pid,
                                            ReturnCode, Output);
    } else {
      // Otherwise, send the buffered output to stderr, though only if we
      // support getting buffered output.
      if (TaskQueue::supportsBufferingOutput())
        llvm::errs() << Output;
    }

    if (ReturnCode != EXIT_SUCCESS) {
      // The task failed, so return true without performing any further
      // dependency analysis.

      // Store this task's ReturnCode as our Result if we haven't stored
      // anything yet.
      if (Result == EXIT_SUCCESS)
        Result = ReturnCode;

      if (!isa<CompileJobAction>(FinishedCmd->getSource()) ||
          ReturnCode != EXIT_FAILURE) {
        Diags.diagnose(SourceLoc(), diag::error_command_failed,
                       FinishedCmd->getSource().getClassName(),
                       ReturnCode);
      }

      return ContinueBuildingAfterErrors ?
          TaskFinishedResponse::ContinueExecution :
          TaskFinishedResponse::StopExecution;
    }

    // When a task finishes, we need to reevaluate the other commands that
    // might have been blocked.
    markFinished(FinishedCmd);

    // In order to handle both old dependencies that have disappeared and new
    // dependencies that have arisen, we need to reload the dependency file.
    if (getIncrementalBuildEnabled()) {
      const CommandOutput &Output = FinishedCmd->getOutput();
      StringRef DependenciesFile =
        Output.getAdditionalOutputForType(types::TY_SwiftDeps);
      if (!DependenciesFile.empty()) {
        SmallVector<const Job *, 16> Dependents;
        bool wasCascading = DepGraph.isMarked(FinishedCmd);

        switch (DepGraph.loadFromPath(FinishedCmd, DependenciesFile)) {
        case DependencyGraphImpl::LoadResult::HadError:
          disableIncrementalBuild();
          for (const Job *Cmd : DeferredCommands)
            scheduleCommandIfNecessaryAndPossible(Cmd);
          DeferredCommands.clear();
          Dependents.clear();
          break;
        case DependencyGraphImpl::LoadResult::UpToDate:
          if (!wasCascading)
            break;
          SWIFT_FALLTHROUGH;
        case DependencyGraphImpl::LoadResult::AffectsDownstream:
          DepGraph.markTransitive(Dependents, FinishedCmd);
          break;
        }

        for (const Job *Cmd : Dependents) {
          DeferredCommands.erase(Cmd);
          noteBuilding(Cmd, "because of dependencies discovered later");
          scheduleCommandIfNecessaryAndPossible(Cmd);
        }
      }
    }

    return TaskFinishedResponse::ContinueExecution;
  };

  auto taskSignalled = [&] (ProcessId Pid, StringRef ErrorMsg, StringRef Output,
                            void *Context) -> TaskFinishedResponse {
    const Job *SignalledCmd = (const Job *)Context;

    if (Level == OutputLevel::Parseable) {
      // Parseable output was requested.
      parseable_output::emitSignalledMessage(llvm::errs(), *SignalledCmd, Pid,
                                             ErrorMsg, Output);
    } else {
      // Otherwise, send the buffered output to stderr, though only if we
      // support getting buffered output.
      if (TaskQueue::supportsBufferingOutput())
        llvm::errs() << Output;
    }

    if (!ErrorMsg.empty())
      Diags.diagnose(SourceLoc(), diag::error_unable_to_execute_command,
                     ErrorMsg);

    Diags.diagnose(SourceLoc(), diag::error_command_signalled,
                   SignalledCmd->getSource().getClassName());

    // Since the task signalled, unconditionally set result to -2.
    Result = -2;

    return TaskFinishedResponse::StopExecution;
  };

  do {
    // Ask the TaskQueue to execute.
    TQ->execute(taskBegan, taskFinished, taskSignalled);

    // Mark all remaining deferred commands as skipped.
    for (const Job *Cmd : DeferredCommands) {
      if (Level == OutputLevel::Parseable) {
        // Provide output indicating this command was skipped if parseable output
        // was requested.
        parseable_output::emitSkippedMessage(llvm::errs(), *Cmd);
      }

      State.ScheduledCommands.insert(Cmd);
      markFinished(Cmd);
    }

    // ...which may allow us to go on and do later tasks.
  } while (Result == 0 && TQ->hasRemainingTasks());

  if (Result == 0) {
    assert(State.BlockingCommands.empty() &&
           "some blocking commands never finished properly");
  } else {
    // Make sure we record any files that still need to be rebuilt.
    for (const Job *Cmd : getJobs()) {
      // Skip files that don't use dependency analysis.
      StringRef DependenciesFile =
          Cmd->getOutput().getAdditionalOutputForType(types::TY_SwiftDeps);
      if (DependenciesFile.empty())
        continue;

      // Don't worry about commands that finished or weren't going to run.
      if (State.FinishedCommands.count(Cmd))
        continue;
      if (!State.ScheduledCommands.count(Cmd))
        continue;

      bool isCascading = true;
      if (getIncrementalBuildEnabled())
        isCascading = DepGraph.isMarked(Cmd);
      State.UnfinishedCommands.insert({Cmd, isCascading});
    }
  }

  if (!CompilationRecordPath.empty() && !SkipTaskExecution) {
    InputInfoMap InputInfo;
    populateInputInfoMap(InputInfo, State);
    checkForOutOfDateInputs(Diags, InputInfo);
    writeCompilationRecord(CompilationRecordPath, ArgsHash, BuildStartTime,
                           InputInfo);
  }

  if (Result == 0)
    Result = Diags.hadAnyError();
  return Result;
}
Example #17
0
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
    if (DisablePeephole)
        return false;

    TM  = &MF.getTarget();
    TII = TM->getInstrInfo();
    MRI = &MF.getRegInfo();
    DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;

    bool Changed = false;

    SmallPtrSet<MachineInstr*, 8> LocalMIs;
    SmallSet<unsigned, 4> ImmDefRegs;
    DenseMap<unsigned, MachineInstr*> ImmDefMIs;
    unsigned FoldAsLoadDefReg;
    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
        MachineBasicBlock *MBB = &*I;

        bool SeenMoveImm = false;
        LocalMIs.clear();
        ImmDefRegs.clear();
        ImmDefMIs.clear();
        FoldAsLoadDefReg = 0;

        for (MachineBasicBlock::iterator
                MII = I->begin(), MIE = I->end(); MII != MIE; ) {
            MachineInstr *MI = &*MII;
            // We may be erasing MI below, increment MII now.
            ++MII;
            LocalMIs.insert(MI);

            // If there exists an instruction which belongs to the following
            // categories, we will discard the load candidate.
            if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
                    MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
                    MI->hasUnmodeledSideEffects()) {
                FoldAsLoadDefReg = 0;
                continue;
            }
            if (MI->mayStore() || MI->isCall())
                FoldAsLoadDefReg = 0;

            if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
                    (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
                    (MI->isSelect() && optimizeSelect(MI))) {
                // MI is deleted.
                LocalMIs.erase(MI);
                Changed = true;
                continue;
            }

            if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
                SeenMoveImm = true;
            } else {
                Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
                // optimizeExtInstr might have created new instructions after MI
                // and before the already incremented MII. Adjust MII so that the
                // next iteration sees the new instructions.
                MII = MI;
                ++MII;
                if (SeenMoveImm)
                    Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
            }

            // Check whether MI is a load candidate for folding into a later
            // instruction. If MI is not a candidate, check whether we can fold an
            // earlier load into MI.
            if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
                // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
                // can enable folding by converting SUB to CMP.
                MachineInstr *DefMI = 0;
                MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
                                       FoldAsLoadDefReg, DefMI);
                if (FoldMI) {
                    // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
                    LocalMIs.erase(MI);
                    LocalMIs.erase(DefMI);
                    LocalMIs.insert(FoldMI);
                    MI->eraseFromParent();
                    DefMI->eraseFromParent();
                    ++NumLoadFold;

                    // MI is replaced with FoldMI.
                    Changed = true;
                    continue;
                }
            }
        }
    }

    return Changed;
}
Example #18
0
/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
/// This function finds loads of the same base and different offsets. If the
/// offsets are not far apart (target specific), it add MVT::Flag inputs and
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
void ScheduleDAGSDNodes::ClusterNeighboringLoads() {
  SmallPtrSet<SDNode*, 16> Visited;
  SmallVector<int64_t, 4> Offsets;
  DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
       E = DAG->allnodes_end(); NI != E; ++NI) {
    SDNode *Node = &*NI;
    if (!Node || !Node->isMachineOpcode())
      continue;

    unsigned Opc = Node->getMachineOpcode();
    const TargetInstrDesc &TID = TII->get(Opc);
    if (!TID.mayLoad())
      continue;

    SDNode *Chain = 0;
    unsigned NumOps = Node->getNumOperands();
    if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
      Chain = Node->getOperand(NumOps-1).getNode();
    if (!Chain)
      continue;

    // Look for other loads of the same chain. Find loads that are loading from
    // the same base pointer and different offsets.
    Visited.clear();
    Offsets.clear();
    O2SMap.clear();
    bool Cluster = false;
    SDNode *Base = Node;
    int64_t BaseOffset;
    for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
         I != E; ++I) {
      SDNode *User = *I;
      if (User == Node || !Visited.insert(User))
        continue;
      int64_t Offset1, Offset2;
      if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
          Offset1 == Offset2)
        // FIXME: Should be ok if they addresses are identical. But earlier
        // optimizations really should have eliminated one of the loads.
        continue;
      if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
        Offsets.push_back(Offset1);
      O2SMap.insert(std::make_pair(Offset2, User));
      Offsets.push_back(Offset2);
      if (Offset2 < Offset1) {
        Base = User;
        BaseOffset = Offset2;
      } else {
        BaseOffset = Offset1;
      }
      Cluster = true;
    }

    if (!Cluster)
      continue;

    // Sort them in increasing order.
    std::sort(Offsets.begin(), Offsets.end());

    // Check if the loads are close enough.
    SmallVector<SDNode*, 4> Loads;
    unsigned NumLoads = 0;
    int64_t BaseOff = Offsets[0];
    SDNode *BaseLoad = O2SMap[BaseOff];
    Loads.push_back(BaseLoad);
    for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
      int64_t Offset = Offsets[i];
      SDNode *Load = O2SMap[Offset];
      if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,
                                        NumLoads))
        break; // Stop right here. Ignore loads that are further away.
      Loads.push_back(Load);
      ++NumLoads;
    }

    if (NumLoads == 0)
      continue;

    // Cluster loads by adding MVT::Flag outputs and inputs. This also
    // ensure they are scheduled in order of increasing addresses.
    SDNode *Lead = Loads[0];
    AddFlags(Lead, SDValue(0,0), true, DAG);
    SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
    for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
      bool OutFlag = i < e-1;
      SDNode *Load = Loads[i];
      AddFlags(Load, InFlag, OutFlag, DAG);
      if (OutFlag)
        InFlag = SDValue(Load, Load->getNumValues()-1);
      ++LoadsClustered;
    }
  }
}
Example #19
0
/// Given \p BBs as input, find another set of BBs which collectively
/// dominates \p BBs and have the minimal sum of frequencies. Return the BB
/// set found in \p BBs.
static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
                                 BasicBlock *Entry,
                                 SmallPtrSet<BasicBlock *, 8> &BBs) {
  assert(!BBs.count(Entry) && "Assume Entry is not in BBs");
  // Nodes on the current path to the root.
  SmallPtrSet<BasicBlock *, 8> Path;
  // Candidates includes any block 'BB' in set 'BBs' that is not strictly
  // dominated by any other blocks in set 'BBs', and all nodes in the path
  // in the dominator tree from Entry to 'BB'.
  SmallPtrSet<BasicBlock *, 16> Candidates;
  for (auto BB : BBs) {
    // Ignore unreachable basic blocks.
    if (!DT.isReachableFromEntry(BB))
      continue;
    Path.clear();
    // Walk up the dominator tree until Entry or another BB in BBs
    // is reached. Insert the nodes on the way to the Path.
    BasicBlock *Node = BB;
    // The "Path" is a candidate path to be added into Candidates set.
    bool isCandidate = false;
    do {
      Path.insert(Node);
      if (Node == Entry || Candidates.count(Node)) {
        isCandidate = true;
        break;
      }
      assert(DT.getNode(Node)->getIDom() &&
             "Entry doens't dominate current Node");
      Node = DT.getNode(Node)->getIDom()->getBlock();
    } while (!BBs.count(Node));

    // If isCandidate is false, Node is another Block in BBs dominating
    // current 'BB'. Drop the nodes on the Path.
    if (!isCandidate)
      continue;

    // Add nodes on the Path into Candidates.
    Candidates.insert(Path.begin(), Path.end());
  }

  // Sort the nodes in Candidates in top-down order and save the nodes
  // in Orders.
  unsigned Idx = 0;
  SmallVector<BasicBlock *, 16> Orders;
  Orders.push_back(Entry);
  while (Idx != Orders.size()) {
    BasicBlock *Node = Orders[Idx++];
    for (auto ChildDomNode : DT.getNode(Node)->getChildren()) {
      if (Candidates.count(ChildDomNode->getBlock()))
        Orders.push_back(ChildDomNode->getBlock());
    }
  }

  // Visit Orders in bottom-up order.
  using InsertPtsCostPair =
      std::pair<SmallPtrSet<BasicBlock *, 16>, BlockFrequency>;

  // InsertPtsMap is a map from a BB to the best insertion points for the
  // subtree of BB (subtree not including the BB itself).
  DenseMap<BasicBlock *, InsertPtsCostPair> InsertPtsMap;
  InsertPtsMap.reserve(Orders.size() + 1);
  for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) {
    BasicBlock *Node = *RIt;
    bool NodeInBBs = BBs.count(Node);
    SmallPtrSet<BasicBlock *, 16> &InsertPts = InsertPtsMap[Node].first;
    BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second;

    // Return the optimal insert points in BBs.
    if (Node == Entry) {
      BBs.clear();
      if (InsertPtsFreq > BFI.getBlockFreq(Node) ||
          (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1))
        BBs.insert(Entry);
      else
        BBs.insert(InsertPts.begin(), InsertPts.end());
      break;
    }

    BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock();
    // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child
    // will update its parent's ParentInsertPts and ParentPtsFreq.
    SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first;
    BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second;
    // Choose to insert in Node or in subtree of Node.
    // Don't hoist to EHPad because we may not find a proper place to insert
    // in EHPad.
    // If the total frequency of InsertPts is the same as the frequency of the
    // target Node, and InsertPts contains more than one nodes, choose hoisting
    // to reduce code size.
    if (NodeInBBs ||
        (!Node->isEHPad() &&
         (InsertPtsFreq > BFI.getBlockFreq(Node) ||
          (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)))) {
      ParentInsertPts.insert(Node);
      ParentPtsFreq += BFI.getBlockFreq(Node);
    } else {
      ParentInsertPts.insert(InsertPts.begin(), InsertPts.end());
      ParentPtsFreq += InsertPtsFreq;
    }
  }
}
static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
                             AssumptionCache &AC, const TargetLibraryInfo &TLI,
                             MemorySSAUpdater *MSSAU) {
  const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
  SimplifyQuery SQ(DL, &TLI, &DT, &AC);

  // On the first pass over the loop body we try to simplify every instruction.
  // On subsequent passes, we can restrict this to only simplifying instructions
  // where the inputs have been updated. We end up needing two sets: one
  // containing the instructions we are simplifying in *this* pass, and one for
  // the instructions we will want to simplify in the *next* pass. We use
  // pointers so we can swap between two stably allocated sets.
  SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;

  // Track the PHI nodes that have already been visited during each iteration so
  // that we can identify when it is necessary to iterate.
  SmallPtrSet<PHINode *, 4> VisitedPHIs;

  // While simplifying we may discover dead code or cause code to become dead.
  // Keep track of all such instructions and we will delete them at the end.
  SmallVector<Instruction *, 8> DeadInsts;

  // First we want to create an RPO traversal of the loop body. By processing in
  // RPO we can ensure that definitions are processed prior to uses (for non PHI
  // uses) in all cases. This ensures we maximize the simplifications in each
  // iteration over the loop and minimizes the possible causes for continuing to
  // iterate.
  LoopBlocksRPO RPOT(&L);
  RPOT.perform(&LI);
  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;

  bool Changed = false;
  for (;;) {
    if (MSSAU && VerifyMemorySSA)
      MSSA->verifyMemorySSA();
    for (BasicBlock *BB : RPOT) {
      for (Instruction &I : *BB) {
        if (auto *PI = dyn_cast<PHINode>(&I))
          VisitedPHIs.insert(PI);

        if (I.use_empty()) {
          if (isInstructionTriviallyDead(&I, &TLI))
            DeadInsts.push_back(&I);
          continue;
        }

        // We special case the first iteration which we can detect due to the
        // empty `ToSimplify` set.
        bool IsFirstIteration = ToSimplify->empty();

        if (!IsFirstIteration && !ToSimplify->count(&I))
          continue;

        Value *V = SimplifyInstruction(&I, SQ.getWithInstruction(&I));
        if (!V || !LI.replacementPreservesLCSSAForm(&I, V))
          continue;

        for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
             UI != UE;) {
          Use &U = *UI++;
          auto *UserI = cast<Instruction>(U.getUser());
          U.set(V);

          // If the instruction is used by a PHI node we have already processed
          // we'll need to iterate on the loop body to converge, so add it to
          // the next set.
          if (auto *UserPI = dyn_cast<PHINode>(UserI))
            if (VisitedPHIs.count(UserPI)) {
              Next->insert(UserPI);
              continue;
            }

          // If we are only simplifying targeted instructions and the user is an
          // instruction in the loop body, add it to our set of targeted
          // instructions. Because we process defs before uses (outside of PHIs)
          // we won't have visited it yet.
          //
          // We also skip any uses outside of the loop being simplified. Those
          // should always be PHI nodes due to LCSSA form, and we don't want to
          // try to simplify those away.
          assert((L.contains(UserI) || isa<PHINode>(UserI)) &&
                 "Uses outside the loop should be PHI nodes due to LCSSA!");
          if (!IsFirstIteration && L.contains(UserI))
            ToSimplify->insert(UserI);
        }

        if (MSSAU)
          if (Instruction *SimpleI = dyn_cast_or_null<Instruction>(V))
            if (MemoryAccess *MA = MSSA->getMemoryAccess(&I))
              if (MemoryAccess *ReplacementMA = MSSA->getMemoryAccess(SimpleI))
                MA->replaceAllUsesWith(ReplacementMA);

        assert(I.use_empty() && "Should always have replaced all uses!");
        if (isInstructionTriviallyDead(&I, &TLI))
          DeadInsts.push_back(&I);
        ++NumSimplified;
        Changed = true;
      }
    }

    // Delete any dead instructions found thus far now that we've finished an
    // iteration over all instructions in all the loop blocks.
    if (!DeadInsts.empty()) {
      Changed = true;
      RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, &TLI, MSSAU);
    }

    if (MSSAU && VerifyMemorySSA)
      MSSA->verifyMemorySSA();

    // If we never found a PHI that needs to be simplified in the next
    // iteration, we're done.
    if (Next->empty())
      break;

    // Otherwise, put the next set in place for the next iteration and reset it
    // and the visited PHIs for that iteration.
    std::swap(Next, ToSimplify);
    Next->clear();
    VisitedPHIs.clear();
    DeadInsts.clear();
  }

  return Changed;
}
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
  if (DisablePeephole)
    return false;

  TM  = &MF.getTarget();
  TII = TM->getInstrInfo();
  MRI = &MF.getRegInfo();
  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;

  bool Changed = false;

  SmallPtrSet<MachineInstr*, 8> LocalMIs;
  SmallSet<unsigned, 4> ImmDefRegs;
  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
    MachineBasicBlock *MBB = &*I;

    bool SeenMoveImm = false;
    LocalMIs.clear();
    ImmDefRegs.clear();
    ImmDefMIs.clear();

    bool First = true;
    MachineBasicBlock::iterator PMII;
    for (MachineBasicBlock::iterator
           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
      MachineInstr *MI = &*MII;
      LocalMIs.insert(MI);

      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
          MI->hasUnmodeledSideEffects()) {
        ++MII;
        continue;
      }

      if (MI->isBitcast()) {
        if (optimizeBitcastInstr(MI, MBB)) {
          // MI is deleted.
          LocalMIs.erase(MI);
          Changed = true;
          MII = First ? I->begin() : llvm::next(PMII);
          continue;
        }
      } else if (MI->isCompare()) {
        if (optimizeCmpInstr(MI, MBB)) {
          // MI is deleted.
          LocalMIs.erase(MI);
          Changed = true;
          MII = First ? I->begin() : llvm::next(PMII);
          continue;
        }
      }

      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
        SeenMoveImm = true;
      } else {
        Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
        if (SeenMoveImm)
          Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
      }

      First = false;
      PMII = MII;
      ++MII;
    }
  }

  return Changed;
}
Example #22
0
/// runOnMachineFunction - Reduce two-address instructions to two operands.
///
bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
  DEBUG(errs() << "Machine Function\n");
  const TargetMachine &TM = MF.getTarget();
  MRI = &MF.getRegInfo();
  TII = TM.getInstrInfo();
  TRI = TM.getRegisterInfo();
  LV = getAnalysisIfAvailable<LiveVariables>();
  AA = &getAnalysis<AliasAnalysis>();

  bool MadeChange = false;

  DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
  DEBUG(errs() << "********** Function: " 
        << MF.getFunction()->getName() << '\n');

  // ReMatRegs - Keep track of the registers whose def's are remat'ed.
  BitVector ReMatRegs;
  ReMatRegs.resize(MRI->getLastVirtReg()+1);

  typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
    TiedOperandMap;
  TiedOperandMap TiedOperands(4);

  SmallPtrSet<MachineInstr*, 8> Processed;
  for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
       mbbi != mbbe; ++mbbi) {
    unsigned Dist = 0;
    DistanceMap.clear();
    SrcRegMap.clear();
    DstRegMap.clear();
    Processed.clear();
    for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
         mi != me; ) {
      MachineBasicBlock::iterator nmi = next(mi);
      const TargetInstrDesc &TID = mi->getDesc();
      bool FirstTied = true;

      DistanceMap.insert(std::make_pair(mi, ++Dist));

      ProcessCopy(&*mi, &*mbbi, Processed);

      // First scan through all the tied register uses in this instruction
      // and record a list of pairs of tied operands for each register.
      unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM)
        ? mi->getNumOperands() : TID.getNumOperands();
      for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
        unsigned DstIdx = 0;
        if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
          continue;

        if (FirstTied) {
          FirstTied = false;
          ++NumTwoAddressInstrs;
          DEBUG(errs() << '\t' << *mi);
        }

        assert(mi->getOperand(SrcIdx).isReg() &&
               mi->getOperand(SrcIdx).getReg() &&
               mi->getOperand(SrcIdx).isUse() &&
               "two address instruction invalid");

        unsigned regB = mi->getOperand(SrcIdx).getReg();
        TiedOperandMap::iterator OI = TiedOperands.find(regB);
        if (OI == TiedOperands.end()) {
          SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair;
          OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first;
        }
        OI->second.push_back(std::make_pair(SrcIdx, DstIdx));
      }

      // Now iterate over the information collected above.
      for (TiedOperandMap::iterator OI = TiedOperands.begin(),
             OE = TiedOperands.end(); OI != OE; ++OI) {
        SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;

        // If the instruction has a single pair of tied operands, try some
        // transformations that may either eliminate the tied operands or
        // improve the opportunities for coalescing away the register copy.
        if (TiedOperands.size() == 1 && TiedPairs.size() == 1) {
          unsigned SrcIdx = TiedPairs[0].first;
          unsigned DstIdx = TiedPairs[0].second;

          // If the registers are already equal, nothing needs to be done.
          if (mi->getOperand(SrcIdx).getReg() ==
              mi->getOperand(DstIdx).getReg())
            break; // Done with this instruction.

          if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist))
            break; // The tied operands have been eliminated.
        }

        bool RemovedKillFlag = false;
        bool AllUsesCopied = true;
        unsigned LastCopiedReg = 0;
        unsigned regB = OI->first;
        for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
          unsigned SrcIdx = TiedPairs[tpi].first;
          unsigned DstIdx = TiedPairs[tpi].second;
          unsigned regA = mi->getOperand(DstIdx).getReg();
          // Grab regB from the instruction because it may have changed if the
          // instruction was commuted.
          regB = mi->getOperand(SrcIdx).getReg();

          if (regA == regB) {
            // The register is tied to multiple destinations (or else we would
            // not have continued this far), but this use of the register
            // already matches the tied destination.  Leave it.
            AllUsesCopied = false;
            continue;
          }
          LastCopiedReg = regA;

          assert(TargetRegisterInfo::isVirtualRegister(regB) &&
                 "cannot make instruction into two-address form");

#ifndef NDEBUG
          // First, verify that we don't have a use of "a" in the instruction
          // (a = b + a for example) because our transformation will not
          // work. This should never occur because we are in SSA form.
          for (unsigned i = 0; i != mi->getNumOperands(); ++i)
            assert(i == DstIdx ||
                   !mi->getOperand(i).isReg() ||
                   mi->getOperand(i).getReg() != regA);
#endif

          // Emit a copy or rematerialize the definition.
          const TargetRegisterClass *rc = MRI->getRegClass(regB);
          MachineInstr *DefMI = MRI->getVRegDef(regB);
          // If it's safe and profitable, remat the definition instead of
          // copying it.
          if (DefMI &&
              DefMI->getDesc().isAsCheapAsAMove() &&
              DefMI->isSafeToReMat(TII, regB, AA) &&
              isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
            DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n");
            unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
            TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI);
            ReMatRegs.set(regB);
            ++NumReMats;
          } else {
            bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc);
            (void)Emitted;
            assert(Emitted && "Unable to issue a copy instruction!\n");
          }

          MachineBasicBlock::iterator prevMI = prior(mi);
          // Update DistanceMap.
          DistanceMap.insert(std::make_pair(prevMI, Dist));
          DistanceMap[mi] = ++Dist;

          DEBUG(errs() << "\t\tprepend:\t" << *prevMI);

          MachineOperand &MO = mi->getOperand(SrcIdx);
          assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
                 "inconsistent operand info for 2-reg pass");
          if (MO.isKill()) {
            MO.setIsKill(false);
            RemovedKillFlag = true;
          }
          MO.setReg(regA);
        }

        if (AllUsesCopied) {
          // Replace other (un-tied) uses of regB with LastCopiedReg.
          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
            MachineOperand &MO = mi->getOperand(i);
            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
              if (MO.isKill()) {
                MO.setIsKill(false);
                RemovedKillFlag = true;
              }
              MO.setReg(LastCopiedReg);
            }
          }

          // Update live variables for regB.
          if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
            LV->addVirtualRegisterKilled(regB, prior(mi));

        } else if (RemovedKillFlag) {
          // Some tied uses of regB matched their destination registers, so
          // regB is still used in this instruction, but a kill flag was
          // removed from a different tied use of regB, so now we need to add
          // a kill flag to one of the remaining uses of regB.
          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
            MachineOperand &MO = mi->getOperand(i);
            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
              MO.setIsKill(true);
              break;
            }
          }
        }
          
        MadeChange = true;

        DEBUG(errs() << "\t\trewrite to:\t" << *mi);
      }

      // Clear TiedOperands here instead of at the top of the loop
      // since most instructions do not have tied operands.
      TiedOperands.clear();
      mi = nmi;
    }
  }

  // Some remat'ed instructions are dead.
  int VReg = ReMatRegs.find_first();
  while (VReg != -1) {
    if (MRI->use_empty(VReg)) {
      MachineInstr *DefMI = MRI->getVRegDef(VReg);
      DefMI->eraseFromParent();
    }
    VReg = ReMatRegs.find_next(VReg);
  }

  return MadeChange;
}