Exemple #1
0
void DominatorTree::verifyDomTree() const {
  if (!VerifyDomInfo)
    return;

  Function &F = *getRoot()->getParent();

  DominatorTree OtherDT;
  OtherDT.recalculate(F);
  if (compare(OtherDT)) {
    errs() << "DominatorTree is not up to date!\nComputed:\n";
    print(errs());
    errs() << "\nActual:\n";
    OtherDT.print(errs());
    abort();
  }
}
Exemple #2
0
Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
  // Returns true if the block is to be partial inlined into the caller
  // (i.e. not to be extracted to the out of line function)
  auto ToBeInlined = [&, this](BasicBlock *BB) {
    return BB == ClonedOI->ReturnBlock ||
           (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
            ClonedOI->Entries.end());
  };

  // Gather up the blocks that we're going to extract.
  std::vector<BasicBlock *> ToExtract;
  ToExtract.push_back(ClonedOI->NonReturnBlock);
  OutlinedRegionCost +=
      PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
  for (BasicBlock &BB : *ClonedFunc)
    if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
      ToExtract.push_back(&BB);
      // FIXME: the code extractor may hoist/sink more code
      // into the outlined function which may make the outlining
      // overhead (the difference of the outlined function cost
      // and OutliningRegionCost) look larger.
      OutlinedRegionCost += computeBBInlineCost(&BB);
    }

  // The CodeExtractor needs a dominator tree.
  DominatorTree DT;
  DT.recalculate(*ClonedFunc);

  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
  LoopInfo LI(DT);
  BranchProbabilityInfo BPI(*ClonedFunc, LI);
  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));

  // Extract the body of the if.
  OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
                               ClonedFuncBFI.get(), &BPI)
                     .extractCodeRegion();

  if (OutlinedFunc) {
    OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
        .getInstruction()
        ->getParent();
    assert(OutliningCallBB->getParent() == ClonedFunc);
  }

  return OutlinedFunc;
}
bool SanitizerCoverageModule::runOnFunction(Function &F) {
  if (F.empty()) return false;
  if (F.getName().find(".module_ctor") != std::string::npos)
    return false;  // Should not instrument sanitizer init functions.
  // Don't instrument functions using SEH for now. Splitting basic blocks like
  // we do for coverage breaks WinEHPrepare.
  // FIXME: Remove this when SEH no longer uses landingpad pattern matching.
  if (F.hasPersonalityFn() &&
      isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
    return false;
  if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
    SplitAllCriticalEdges(F);
  SmallVector<Instruction*, 8> IndirCalls;
  SmallVector<BasicBlock *, 16> BlocksToInstrument;
  SmallVector<Instruction*, 8> CmpTraceTargets;
  SmallVector<Instruction*, 8> SwitchTraceTargets;

  DominatorTree DT;
  DT.recalculate(F);
  for (auto &BB : F) {
    if (shouldInstrumentBlock(&BB, &DT))
      BlocksToInstrument.push_back(&BB);
    for (auto &Inst : BB) {
      if (Options.IndirectCalls) {
        CallSite CS(&Inst);
        if (CS && !CS.getCalledFunction())
          IndirCalls.push_back(&Inst);
      }
      if (Options.TraceCmp) {
        if (isa<ICmpInst>(&Inst))
          CmpTraceTargets.push_back(&Inst);
        if (isa<SwitchInst>(&Inst))
          SwitchTraceTargets.push_back(&Inst);
      }
    }
  }

  InjectCoverage(F, BlocksToInstrument);
  InjectCoverageForIndirectCalls(F, IndirCalls);
  InjectTraceForCmp(F, CmpTraceTargets);
  InjectTraceForSwitch(F, SwitchTraceTargets);
  return true;
}
OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F)
    : F(F), BFI(nullptr) {
  if (!F->getContext().getDiagnosticHotnessRequested())
    return;

  // First create a dominator tree.
  DominatorTree DT;
  DT.recalculate(*const_cast<Function *>(F));

  // Generate LoopInfo from it.
  LoopInfo LI;
  LI.analyze(DT);

  // Then compute BranchProbabilityInfo.
  BranchProbabilityInfo BPI;
  BPI.calculate(*F, LI);

  // Finally compute BFI.
  OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
  BFI = OwnedBFI.get();
}
Exemple #5
0
bool PlaceSafepoints::runOnFunction(Function &F) {
  if (F.isDeclaration() || F.empty()) {
    // This is a declaration, nothing to do.  Must exit early to avoid crash in
    // dom tree calculation
    return false;
  }

  if (isGCSafepointPoll(F)) {
    // Given we're inlining this inside of safepoint poll insertion, this
    // doesn't make any sense.  Note that we do make any contained calls
    // parseable after we inline a poll.
    return false;
  }

  if (!shouldRewriteFunction(F))
    return false;

  bool modified = false;

  // In various bits below, we rely on the fact that uses are reachable from
  // defs.  When there are basic blocks unreachable from the entry, dominance
  // and reachablity queries return non-sensical results.  Thus, we preprocess
  // the function to ensure these properties hold.
  modified |= removeUnreachableBlocks(F);

  // STEP 1 - Insert the safepoint polling locations.  We do not need to
  // actually insert parse points yet.  That will be done for all polls and
  // calls in a single pass.

  DominatorTree DT;
  DT.recalculate(F);

  SmallVector<Instruction *, 16> PollsNeeded;
  std::vector<CallSite> ParsePointNeeded;

  if (enableBackedgeSafepoints(F)) {
    // Construct a pass manager to run the LoopPass backedge logic.  We
    // need the pass manager to handle scheduling all the loop passes
    // appropriately.  Doing this by hand is painful and just not worth messing
    // with for the moment.
    legacy::FunctionPassManager FPM(F.getParent());
    bool CanAssumeCallSafepoints = enableCallSafepoints(F);
    PlaceBackedgeSafepointsImpl *PBS =
      new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints);
    FPM.add(PBS);
    FPM.run(F);

    // We preserve dominance information when inserting the poll, otherwise
    // we'd have to recalculate this on every insert
    DT.recalculate(F);

    auto &PollLocations = PBS->PollLocations;

    auto OrderByBBName = [](Instruction *a, Instruction *b) {
      return a->getParent()->getName() < b->getParent()->getName();
    };
    // We need the order of list to be stable so that naming ends up stable
    // when we split edges.  This makes test cases much easier to write.
    std::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName);

    // We can sometimes end up with duplicate poll locations.  This happens if
    // a single loop is visited more than once.   The fact this happens seems
    // wrong, but it does happen for the split-backedge.ll test case.
    PollLocations.erase(std::unique(PollLocations.begin(),
                                    PollLocations.end()),
                        PollLocations.end());

    // Insert a poll at each point the analysis pass identified
    // The poll location must be the terminator of a loop latch block.
    for (TerminatorInst *Term : PollLocations) {
      // We are inserting a poll, the function is modified
      modified = true;

      if (SplitBackedge) {
        // Split the backedge of the loop and insert the poll within that new
        // basic block.  This creates a loop with two latches per original
        // latch (which is non-ideal), but this appears to be easier to
        // optimize in practice than inserting the poll immediately before the
        // latch test.

        // Since this is a latch, at least one of the successors must dominate
        // it. Its possible that we have a) duplicate edges to the same header
        // and b) edges to distinct loop headers.  We need to insert pools on
        // each.
        SetVector<BasicBlock *> Headers;
        for (unsigned i = 0; i < Term->getNumSuccessors(); i++) {
          BasicBlock *Succ = Term->getSuccessor(i);
          if (DT.dominates(Succ, Term->getParent())) {
            Headers.insert(Succ);
          }
        }
        assert(!Headers.empty() && "poll location is not a loop latch?");

        // The split loop structure here is so that we only need to recalculate
        // the dominator tree once.  Alternatively, we could just keep it up to
        // date and use a more natural merged loop.
        SetVector<BasicBlock *> SplitBackedges;
        for (BasicBlock *Header : Headers) {
          BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT);
          PollsNeeded.push_back(NewBB->getTerminator());
          NumBackedgeSafepoints++;
        }
      } else {
        // Split the latch block itself, right before the terminator.
        PollsNeeded.push_back(Term);
        NumBackedgeSafepoints++;
      }
    }
  }

  if (enableEntrySafepoints(F)) {
    Instruction *Location = findLocationForEntrySafepoint(F, DT);
    if (!Location) {
      // policy choice not to insert?
    } else {
      PollsNeeded.push_back(Location);
      modified = true;
      NumEntrySafepoints++;
    }
  }

  // Now that we've identified all the needed safepoint poll locations, insert
  // safepoint polls themselves.
  for (Instruction *PollLocation : PollsNeeded) {
    std::vector<CallSite> RuntimeCalls;
    InsertSafepointPoll(PollLocation, RuntimeCalls);
    ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
                            RuntimeCalls.end());
  }

  // If we've been asked to not wrap the calls with gc.statepoint, then we're
  // done.  In the near future, this option will be "constant folded" to true,
  // and the code below that deals with insert gc.statepoint calls will be
  // removed.  Wrapping potentially safepointing calls in gc.statepoint will
  // then become the responsibility of the RewriteStatepointsForGC pass.
  if (NoStatepoints)
    return modified;

  PollsNeeded.clear(); // make sure we don't accidentally use
  // The dominator tree has been invalidated by the inlining performed in the
  // above loop.  TODO: Teach the inliner how to update the dom tree?
  DT.recalculate(F);

  if (enableCallSafepoints(F)) {
    std::vector<CallSite> Calls;
    findCallSafepoints(F, Calls);
    NumCallSafepoints += Calls.size();
    ParsePointNeeded.insert(ParsePointNeeded.end(), Calls.begin(), Calls.end());
  }

  // Unique the vectors since we can end up with duplicates if we scan the call
  // site for call safepoints after we add it for entry or backedge.  The
  // only reason we need tracking at all is that some functions might have
  // polls but not call safepoints and thus we might miss marking the runtime
  // calls for the polls. (This is useful in test cases!)
  unique_unsorted(ParsePointNeeded);

  // Any parse point (no matter what source) will be handled here

  // We're about to start modifying the function
  if (!ParsePointNeeded.empty())
    modified = true;

  // Now run through and insert the safepoints, but do _NOT_ update or remove
  // any existing uses.  We have references to live variables that need to
  // survive to the last iteration of this loop.
  std::vector<Value *> Results;
  Results.reserve(ParsePointNeeded.size());
  for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
    CallSite &CS = ParsePointNeeded[i];

    // For invoke statepoints we need to remove all phi nodes at the normal
    // destination block.
    // Reason for this is that we can place gc_result only after last phi node
    // in basic block. We will get malformed code after RAUW for the
    // gc_result if one of this phi nodes uses result from the invoke.
    if (InvokeInst *Invoke = dyn_cast<InvokeInst>(CS.getInstruction())) {
      normalizeForInvokeSafepoint(Invoke->getNormalDest(),
                                  Invoke->getParent());
    }

    Value *GCResult = ReplaceWithStatepoint(CS);
    Results.push_back(GCResult);
  }
  assert(Results.size() == ParsePointNeeded.size());

  // Adjust all users of the old call sites to use the new ones instead
  for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
    CallSite &CS = ParsePointNeeded[i];
    Value *GCResult = Results[i];
    if (GCResult) {
      // Can not RAUW for the invoke gc result in case of phi nodes preset.
      assert(CS.isCall() || !isa<PHINode>(cast<Instruction>(GCResult)->getParent()->begin()));

      // Replace all uses with the new call
      CS.getInstruction()->replaceAllUsesWith(GCResult);
    }

    // Now that we've handled all uses, remove the original call itself
    // Note: The insert point can't be the deleted instruction!
    CS.getInstruction()->eraseFromParent();
  }
  return modified;
}
Exemple #6
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// TripCount is generally defined as the number of times the loop header
/// executes. UnrollLoop relaxes the definition to permit early exits: here
/// TripCount is the iteration on which control exits LatchBlock if no early
/// exits were taken. Note that UnrollLoop assumes that the loop counter test
/// terminates LatchBlock in order to remove unnecesssary instances of the
/// test. In other words, control may exit the loop prior to TripCount
/// iterations via an early branch, but control may not exit the loop from the
/// LatchBlock's terminator prior to TripCount iterations.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
///
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available from the Pass it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                      bool AllowRuntime, unsigned TripMultiple,
                      LoopInfo *LI, Pass *PP, LPPassManager *LPM) {
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    return false;
  }

  BasicBlock *LatchBlock = L->getLoopLatch();
  if (!LatchBlock) {
    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    return false;
  }

  // Loops with indirectbr cannot be cloned.
  if (!L->isSafeToClone()) {
    DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
    return false;
  }

  BasicBlock *Header = L->getHeader();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
             "  Can't unroll; loop not terminated by a conditional branch.\n");
    return false;
  }

  if (Header->hasAddressTaken()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
          "  Won't unroll loop: address of header block is taken.\n");
    return false;
  }

  if (TripCount != 0)
    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
  if (TripMultiple != 1)
    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  // Don't enter the unroll code if there is nothing to do. This way we don't
  // need to support "partial unrolling by 1".
  if (TripCount == 0 && Count < 2)
    return false;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;

  // We assume a run-time trip count if the compiler cannot
  // figure out the loop trip count and the unroll-runtime
  // flag is specified.
  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);

  if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
    return false;

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  if (PP) {
    ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
    if (SE)
      SE->forgetLoop(L);
  }

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  // Report the unrolling decision.
  DebugLoc LoopLoc = L->getStartLoc();
  Function *F = Header->getParent();
  LLVMContext &Ctx = F->getContext();

  if (CompletelyUnroll) {
    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
    emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
                           Twine("completely unrolled loop with ") +
                               Twine(TripCount) + " iterations");
  } else {
    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    Twine DiagMsg("unrolled loop by a factor of " + Twine(Count));
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
      DiagMsg.concat(" with a breakout at trip " + Twine(BreakoutTrip));
    } else if (TripMultiple != 1) {
      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
      DiagMsg.concat(" with " + Twine(TripMultiple) + " trips per branch");
    } else if (RuntimeTripCount) {
      DEBUG(dbgs() << " with run-time trip count");
      DiagMsg.concat(" with run-time trip count");
    }
    DEBUG(dbgs() << "!\n");
    emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, DiagMsg);
  }

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  ValueToValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    OrigPHINode.push_back(cast<PHINode>(I));
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);

  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock*> NewBlocks;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      // Loop over all of the PHI nodes in the block, changing them to use the
      // incoming values from the previous block.
      if (*BB == Header)
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI))
              InVal = LastValueMap[InValI];
          VMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      L->addBasicBlockToLoop(New, LI->getBase());

      // Add phi entries for newly created values to all exit blocks.
      for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
           SI != SE; ++SI) {
        if (L->contains(*SI))
          continue;
        for (BasicBlock::iterator BBI = (*SI)->begin();
             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
          Value *Incoming = phi->getIncomingValueForBlock(*BB);
          ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
          if (It != LastValueMap.end())
            Incoming = It->second;
          phi->addIncoming(Incoming, New);
        }
      }
      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock)
        Latches.push_back(New);

      NewBlocks.push_back(New);
    }

    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        ::RemapInstruction(I, LastValueMap);
  }

  // Loop over the PHI nodes in the original block, setting incoming values.
  for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
    PHINode *PN = OrigPHINode[i];
    if (CompletelyUnroll) {
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
    else if (Count > 1) {
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI))
          InVal = LastValueMap[InVal];
      }
      assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
      PN->addIncoming(InVal, Latches.back());
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    if (RuntimeTripCount && j != 0) {
      NeedConditional = false;
    }

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll && j == 0) {
      Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      // Remove phi operands at this loop exit
      if (Dest != LoopExit) {
        BasicBlock *BB = Latches[i];
        for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
             SI != SE; ++SI) {
          if (*SI == Headers[i])
            continue;
          for (BasicBlock::iterator BBI = (*SI)->begin();
               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
            Phi->removeIncomingValue(BB, false);
          }
        }
      }
      // Replace the conditional branch with an unconditional one.
      BranchInst::Create(Dest, Term);
      Term->eraseFromParent();
    }
  }

  // Merge adjacent basic blocks, if possible.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
    if (Term->isUnconditional()) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
    }
  }

  DominatorTree *DT = nullptr;
  if (PP) {
    // FIXME: Reconstruct dom info, because it is not preserved properly.
    // Incrementally updating domtree after loop unrolling would be easy.
    if (DominatorTreeWrapperPass *DTWP =
            PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
      DT = &DTWP->getDomTree();
      DT->recalculate(*L->getHeader()->getParent());
    }

    // Simplify any new induction variables in the partially unrolled loop.
    ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
    if (SE && !CompletelyUnroll) {
      SmallVector<WeakVH, 16> DeadInsts;
      simplifyLoopIVs(L, SE, LPM, DeadInsts);

      // Aggressively clean up dead instructions that simplifyLoopIVs already
      // identified. Any remaining should be cleaned up below.
      while (!DeadInsts.empty())
        if (Instruction *Inst =
            dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
          RecursivelyDeleteTriviallyDeadInstructions(Inst);
    }
  }
  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Value *V = SimplifyInstruction(Inst))
        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
          Inst->replaceAllUsesWith(V);
          (*BB)->getInstList().erase(Inst);
        }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;

  Loop *OuterL = L->getParentLoop();
  // Remove the loop from the LoopPassManager if it's completely removed.
  if (CompletelyUnroll && LPM != nullptr)
    LPM->deleteLoopFromQueue(L);

  // If we have a pass and a DominatorTree we should re-simplify impacted loops
  // to ensure subsequent analyses can rely on this form. We want to simplify
  // at least one layer outside of the loop that was unrolled so that any
  // changes to the parent loop exposed by the unrolling are considered.
  if (PP && DT) {
    if (!OuterL && !CompletelyUnroll)
      OuterL = L;
    if (OuterL) {
      ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
      simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE);

      // LCSSA must be performed on the outermost affected loop. The unrolled
      // loop's last loop latch is guaranteed to be in the outermost loop after
      // deleteLoopFromQueue updates LoopInfo.
      Loop *LatchLoop = LI->getLoopFor(Latches.back());
      if (!OuterL->contains(LatchLoop))
        while (OuterL->getParentLoop() != LatchLoop)
          OuterL = OuterL->getParentLoop();

      formLCSSARecursively(*OuterL, *DT, SE);
    }
  }

  return true;
}
Exemple #7
0
bool PlaceSafepoints::runOnFunction(Function &F) {
  if (F.isDeclaration() || F.empty()) {
    // This is a declaration, nothing to do.  Must exit early to avoid crash in
    // dom tree calculation
    return false;
  }

  if (isGCSafepointPoll(F)) {
    // Given we're inlining this inside of safepoint poll insertion, this
    // doesn't make any sense.  Note that we do make any contained calls
    // parseable after we inline a poll.
    return false;
  }

  if (!shouldRewriteFunction(F))
    return false;

  const TargetLibraryInfo &TLI =
      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();

  bool Modified = false;

  // In various bits below, we rely on the fact that uses are reachable from
  // defs.  When there are basic blocks unreachable from the entry, dominance
  // and reachablity queries return non-sensical results.  Thus, we preprocess
  // the function to ensure these properties hold.
  Modified |= removeUnreachableBlocks(F);

  // STEP 1 - Insert the safepoint polling locations.  We do not need to
  // actually insert parse points yet.  That will be done for all polls and
  // calls in a single pass.

  DominatorTree DT;
  DT.recalculate(F);

  SmallVector<Instruction *, 16> PollsNeeded;
  std::vector<CallSite> ParsePointNeeded;

  if (enableBackedgeSafepoints(F)) {
    // Construct a pass manager to run the LoopPass backedge logic.  We
    // need the pass manager to handle scheduling all the loop passes
    // appropriately.  Doing this by hand is painful and just not worth messing
    // with for the moment.
    legacy::FunctionPassManager FPM(F.getParent());
    bool CanAssumeCallSafepoints = enableCallSafepoints(F);
    auto *PBS = new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints);
    FPM.add(PBS);
    FPM.run(F);

    // We preserve dominance information when inserting the poll, otherwise
    // we'd have to recalculate this on every insert
    DT.recalculate(F);

    auto &PollLocations = PBS->PollLocations;

    auto OrderByBBName = [](Instruction *a, Instruction *b) {
      return a->getParent()->getName() < b->getParent()->getName();
    };
    // We need the order of list to be stable so that naming ends up stable
    // when we split edges.  This makes test cases much easier to write.
    llvm::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName);

    // We can sometimes end up with duplicate poll locations.  This happens if
    // a single loop is visited more than once.   The fact this happens seems
    // wrong, but it does happen for the split-backedge.ll test case.
    PollLocations.erase(std::unique(PollLocations.begin(),
                                    PollLocations.end()),
                        PollLocations.end());

    // Insert a poll at each point the analysis pass identified
    // The poll location must be the terminator of a loop latch block.
    for (TerminatorInst *Term : PollLocations) {
      // We are inserting a poll, the function is modified
      Modified = true;

      if (SplitBackedge) {
        // Split the backedge of the loop and insert the poll within that new
        // basic block.  This creates a loop with two latches per original
        // latch (which is non-ideal), but this appears to be easier to
        // optimize in practice than inserting the poll immediately before the
        // latch test.

        // Since this is a latch, at least one of the successors must dominate
        // it. Its possible that we have a) duplicate edges to the same header
        // and b) edges to distinct loop headers.  We need to insert pools on
        // each.
        SetVector<BasicBlock *> Headers;
        for (unsigned i = 0; i < Term->getNumSuccessors(); i++) {
          BasicBlock *Succ = Term->getSuccessor(i);
          if (DT.dominates(Succ, Term->getParent())) {
            Headers.insert(Succ);
          }
        }
        assert(!Headers.empty() && "poll location is not a loop latch?");

        // The split loop structure here is so that we only need to recalculate
        // the dominator tree once.  Alternatively, we could just keep it up to
        // date and use a more natural merged loop.
        SetVector<BasicBlock *> SplitBackedges;
        for (BasicBlock *Header : Headers) {
          BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT);
          PollsNeeded.push_back(NewBB->getTerminator());
          NumBackedgeSafepoints++;
        }
      } else {
        // Split the latch block itself, right before the terminator.
        PollsNeeded.push_back(Term);
        NumBackedgeSafepoints++;
      }
    }
  }

  if (enableEntrySafepoints(F)) {
    if (Instruction *Location = findLocationForEntrySafepoint(F, DT)) {
      PollsNeeded.push_back(Location);
      Modified = true;
      NumEntrySafepoints++;
    }
    // TODO: else we should assert that there was, in fact, a policy choice to
    // not insert a entry safepoint poll.
  }

  // Now that we've identified all the needed safepoint poll locations, insert
  // safepoint polls themselves.
  for (Instruction *PollLocation : PollsNeeded) {
    std::vector<CallSite> RuntimeCalls;
    InsertSafepointPoll(PollLocation, RuntimeCalls, TLI);
    ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
                            RuntimeCalls.end());
  }

  return Modified;
}
Function *PartialInlinerImpl::unswitchFunction(Function *F) {
  // First, verify that this function is an unswitching candidate...
  BasicBlock *EntryBlock = &F->front();
  BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
  if (!BR || BR->isUnconditional())
    return nullptr;

  BasicBlock *ReturnBlock = nullptr;
  BasicBlock *NonReturnBlock = nullptr;
  unsigned ReturnCount = 0;
  for (BasicBlock *BB : successors(EntryBlock)) {
    if (isa<ReturnInst>(BB->getTerminator())) {
      ReturnBlock = BB;
      ReturnCount++;
    } else
      NonReturnBlock = BB;
  }

  if (ReturnCount != 1)
    return nullptr;

  // Clone the function, so that we can hack away on it.
  ValueToValueMapTy VMap;
  Function *DuplicateFunction = CloneFunction(F, VMap);
  DuplicateFunction->setLinkage(GlobalValue::InternalLinkage);
  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]);
  BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]);
  BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]);

  // Go ahead and update all uses to the duplicate, so that we can just
  // use the inliner functionality when we're done hacking.
  F->replaceAllUsesWith(DuplicateFunction);

  // Special hackery is needed with PHI nodes that have inputs from more than
  // one extracted block.  For simplicity, just split the PHIs into a two-level
  // sequence of PHIs, some of which will go in the extracted region, and some
  // of which will go outside.
  BasicBlock *PreReturn = NewReturnBlock;
  NewReturnBlock = NewReturnBlock->splitBasicBlock(
      NewReturnBlock->getFirstNonPHI()->getIterator());
  BasicBlock::iterator I = PreReturn->begin();
  Instruction *Ins = &NewReturnBlock->front();
  while (I != PreReturn->end()) {
    PHINode *OldPhi = dyn_cast<PHINode>(I);
    if (!OldPhi)
      break;

    PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
    OldPhi->replaceAllUsesWith(RetPhi);
    Ins = NewReturnBlock->getFirstNonPHI();

    RetPhi->addIncoming(&*I, PreReturn);
    RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock),
                        NewEntryBlock);
    OldPhi->removeIncomingValue(NewEntryBlock);

    ++I;
  }
  NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);

  // Gather up the blocks that we're going to extract.
  std::vector<BasicBlock *> ToExtract;
  ToExtract.push_back(NewNonReturnBlock);
  for (BasicBlock &BB : *DuplicateFunction)
    if (&BB != NewEntryBlock && &BB != NewReturnBlock &&
        &BB != NewNonReturnBlock)
      ToExtract.push_back(&BB);

  // The CodeExtractor needs a dominator tree.
  DominatorTree DT;
  DT.recalculate(*DuplicateFunction);

  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
  LoopInfo LI(DT);
  BranchProbabilityInfo BPI(*DuplicateFunction, LI);
  BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);

  // Extract the body of the if.
  Function *ExtractedFunction =
      CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
          .extractCodeRegion();

  // Inline the top-level if test into all callers.
  std::vector<User *> Users(DuplicateFunction->user_begin(),
                            DuplicateFunction->user_end());
  for (User *User : Users)
    if (CallInst *CI = dyn_cast<CallInst>(User))
      InlineFunction(CI, IFI);
    else if (InvokeInst *II = dyn_cast<InvokeInst>(User))
      InlineFunction(II, IFI);

  // Ditch the duplicate, since we're done with it, and rewrite all remaining
  // users (function pointers, etc.) back to the original function.
  DuplicateFunction->replaceAllUsesWith(F);
  DuplicateFunction->eraseFromParent();

  ++NumPartialInlined;

  return ExtractedFunction;
}
Exemple #9
0
DominatorTree DominatorTreeAnalysis::run(Function &F,
                                         FunctionAnalysisManager &) {
  DominatorTree DT;
  DT.recalculate(F);
  return DT;
}
static void
InsertSafepointPoll(DominatorTree &DT, Instruction *term,
                    std::vector<CallSite> &ParsePointsNeeded /*rval*/) {
  Module *M = term->getParent()->getParent()->getParent();
  assert(M);

  // Inline the safepoint poll implementation - this will get all the branch,
  // control flow, etc..  Most importantly, it will introduce the actual slow
  // path call - where we need to insert a safepoint (parsepoint).
  FunctionType *ftype =
      FunctionType::get(Type::getVoidTy(M->getContext()), false);
  assert(ftype && "null?");
  // Note: This cast can fail if there's a function of the same name with a
  // different type inserted previously
  Function *F =
      dyn_cast<Function>(M->getOrInsertFunction("gc.safepoint_poll", ftype));
  assert(F && !F->empty() && "definition must exist");
  CallInst *poll = CallInst::Create(F, "", term);

  // Record some information about the call site we're replacing
  BasicBlock *OrigBB = term->getParent();
  BasicBlock::iterator before(poll), after(poll);
  bool isBegin(false);
  if (before == term->getParent()->begin()) {
    isBegin = true;
  } else {
    before--;
  }
  after++;
  assert(after != poll->getParent()->end() && "must have successor");
  assert(DT.dominates(before, after) && "trivially true");

  // do the actual inlining
  InlineFunctionInfo IFI;
  bool inlineStatus = InlineFunction(poll, IFI);
  assert(inlineStatus && "inline must succeed");
  (void)inlineStatus; // suppress warning in release-asserts

  // Check post conditions
  assert(IFI.StaticAllocas.empty() && "can't have allocs");

  std::vector<CallInst *> calls; // new calls
  std::set<BasicBlock *> BBs;    // new BBs + insertee
  // Include only the newly inserted instructions, Note: begin may not be valid
  // if we inserted to the beginning of the basic block
  BasicBlock::iterator start;
  if (isBegin) {
    start = OrigBB->begin();
  } else {
    start = before;
    start++;
  }

  // If your poll function includes an unreachable at the end, that's not
  // valid.  Bugpoint likes to create this, so check for it.
  assert(isPotentiallyReachable(&*start, &*after, nullptr, nullptr) &&
         "malformed poll function");

  scanInlinedCode(&*(start), &*(after), calls, BBs);

  // Recompute since we've invalidated cached data.  Conceptually we
  // shouldn't need to do this, but implementation wise we appear to.  Needed
  // so we can insert safepoints correctly.
  // TODO: update more cheaply
  DT.recalculate(*after->getParent()->getParent());

  assert(!calls.empty() && "slow path not found for safepoint poll");

  // Record the fact we need a parsable state at the runtime call contained in
  // the poll function.  This is required so that the runtime knows how to
  // parse the last frame when we actually take  the safepoint (i.e. execute
  // the slow path)
  assert(ParsePointsNeeded.empty());
  for (size_t i = 0; i < calls.size(); i++) {

    // No safepoint needed or wanted
    if (!needsStatepoint(calls[i])) {
      continue;
    }

    // These are likely runtime calls.  Should we assert that via calling
    // convention or something?
    ParsePointsNeeded.push_back(CallSite(calls[i]));
  }
  assert(ParsePointsNeeded.size() <= calls.size());
}
bool PlaceSafepoints::runOnFunction(Function &F) {
  if (F.isDeclaration() || F.empty()) {
    // This is a declaration, nothing to do.  Must exit early to avoid crash in
    // dom tree calculation
    return false;
  }

  bool modified = false;

  // In various bits below, we rely on the fact that uses are reachable from
  // defs.  When there are basic blocks unreachable from the entry, dominance
  // and reachablity queries return non-sensical results.  Thus, we preprocess
  // the function to ensure these properties hold.
  modified |= removeUnreachableBlocks(F);

  // STEP 1 - Insert the safepoint polling locations.  We do not need to
  // actually insert parse points yet.  That will be done for all polls and
  // calls in a single pass.

  // Note: With the migration, we need to recompute this for each 'pass'.  Once
  // we merge these, we'll do it once before the analysis
  DominatorTree DT;

  std::vector<CallSite> ParsePointNeeded;

  if (EnableBackedgeSafepoints) {
    // Construct a pass manager to run the LoopPass backedge logic.  We
    // need the pass manager to handle scheduling all the loop passes
    // appropriately.  Doing this by hand is painful and just not worth messing
    // with for the moment.
    FunctionPassManager FPM(F.getParent());
    PlaceBackedgeSafepointsImpl *PBS =
        new PlaceBackedgeSafepointsImpl(EnableCallSafepoints);
    FPM.add(PBS);
    // Note: While the analysis pass itself won't modify the IR, LoopSimplify
    // (which it depends on) may.  i.e. analysis must be recalculated after run
    FPM.run(F);

    // We preserve dominance information when inserting the poll, otherwise
    // we'd have to recalculate this on every insert
    DT.recalculate(F);

    // Insert a poll at each point the analysis pass identified
    for (size_t i = 0; i < PBS->PollLocations.size(); i++) {
      // We are inserting a poll, the function is modified
      modified = true;

      // The poll location must be the terminator of a loop latch block.
      TerminatorInst *Term = PBS->PollLocations[i];

      std::vector<CallSite> ParsePoints;
      if (SplitBackedge) {
        // Split the backedge of the loop and insert the poll within that new
        // basic block.  This creates a loop with two latches per original
        // latch (which is non-ideal), but this appears to be easier to
        // optimize in practice than inserting the poll immediately before the
        // latch test.

        // Since this is a latch, at least one of the successors must dominate
        // it. Its possible that we have a) duplicate edges to the same header
        // and b) edges to distinct loop headers.  We need to insert pools on
        // each. (Note: This still relies on LoopSimplify.)
        DenseSet<BasicBlock *> Headers;
        for (unsigned i = 0; i < Term->getNumSuccessors(); i++) {
          BasicBlock *Succ = Term->getSuccessor(i);
          if (DT.dominates(Succ, Term->getParent())) {
            Headers.insert(Succ);
          }
        }
        assert(!Headers.empty() && "poll location is not a loop latch?");

        // The split loop structure here is so that we only need to recalculate
        // the dominator tree once.  Alternatively, we could just keep it up to
        // date and use a more natural merged loop.
        DenseSet<BasicBlock *> SplitBackedges;
        for (BasicBlock *Header : Headers) {
          BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, nullptr);
          SplitBackedges.insert(NewBB);
        }
        DT.recalculate(F);
        for (BasicBlock *NewBB : SplitBackedges) {
          InsertSafepointPoll(DT, NewBB->getTerminator(), ParsePoints);
          NumBackedgeSafepoints++;
        }

      } else {
        // Split the latch block itself, right before the terminator.
        InsertSafepointPoll(DT, Term, ParsePoints);
        NumBackedgeSafepoints++;
      }

      // Record the parse points for later use
      ParsePointNeeded.insert(ParsePointNeeded.end(), ParsePoints.begin(),
                              ParsePoints.end());
    }
  }

  if (EnableEntrySafepoints) {
    DT.recalculate(F);
    Instruction *term = findLocationForEntrySafepoint(F, DT);
    if (!term) {
      // policy choice not to insert?
    } else {
      std::vector<CallSite> RuntimeCalls;
      InsertSafepointPoll(DT, term, RuntimeCalls);
      modified = true;
      NumEntrySafepoints++;
      ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
                              RuntimeCalls.end());
    }
  }

  if (EnableCallSafepoints) {
    DT.recalculate(F);
    std::vector<CallSite> Calls;
    findCallSafepoints(F, Calls);
    NumCallSafepoints += Calls.size();
    ParsePointNeeded.insert(ParsePointNeeded.end(), Calls.begin(), Calls.end());
  }

  // Unique the vectors since we can end up with duplicates if we scan the call
  // site for call safepoints after we add it for entry or backedge.  The
  // only reason we need tracking at all is that some functions might have
  // polls but not call safepoints and thus we might miss marking the runtime
  // calls for the polls. (This is useful in test cases!)
  unique_unsorted(ParsePointNeeded);

  // Any parse point (no matter what source) will be handled here
  DT.recalculate(F); // Needed?

  // We're about to start modifying the function
  if (!ParsePointNeeded.empty())
    modified = true;

  // Now run through and insert the safepoints, but do _NOT_ update or remove
  // any existing uses.  We have references to live variables that need to
  // survive to the last iteration of this loop.
  std::vector<Value *> Results;
  Results.reserve(ParsePointNeeded.size());
  for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
    CallSite &CS = ParsePointNeeded[i];
    Value *GCResult = ReplaceWithStatepoint(CS, nullptr);
    Results.push_back(GCResult);
  }
  assert(Results.size() == ParsePointNeeded.size());

  // Adjust all users of the old call sites to use the new ones instead
  for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
    CallSite &CS = ParsePointNeeded[i];
    Value *GCResult = Results[i];
    if (GCResult) {
      // In case if we inserted result in a different basic block than the
      // original safepoint (this can happen for invokes). We need to be sure
      // that
      // original result value was not used in any of the phi nodes at the
      // beginning of basic block with gc result. Because we know that all such
      // blocks will have single predecessor we can safely assume that all phi
      // nodes have single entry (because of normalizeBBForInvokeSafepoint).
      // Just remove them all here.
      if (CS.isInvoke()) {
        FoldSingleEntryPHINodes(cast<Instruction>(GCResult)->getParent(),
                                nullptr);
        assert(
            !isa<PHINode>(cast<Instruction>(GCResult)->getParent()->begin()));
      }

      // Replace all uses with the new call
      CS.getInstruction()->replaceAllUsesWith(GCResult);
    }

    // Now that we've handled all uses, remove the original call itself
    // Note: The insert point can't be the deleted instruction!
    CS.getInstruction()->eraseFromParent();
  }
  return modified;
}
bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L, LPPassManager &LPM) {
  ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();

  // Loop through all predecessors of the loop header and identify all
  // backedges.  We need to place a safepoint on every backedge (potentially).
  // Note: Due to LoopSimplify there should only be one.  Assert?  Or can we
  // relax this?
  BasicBlock *header = L->getHeader();

  // TODO: Use the analysis pass infrastructure for this.  There is no reason
  // to recalculate this here.
  DominatorTree DT;
  DT.recalculate(*header->getParent());

  bool modified = false;
  for (pred_iterator PI = pred_begin(header), E = pred_end(header); PI != E;
       PI++) {
    BasicBlock *pred = *PI;
    if (!L->contains(pred)) {
      // This is not a backedge, it's coming from outside the loop
      continue;
    }

    // Make a policy decision about whether this loop needs a safepoint or
    // not.  Note that this is about unburdening the optimizer in loops, not
    // avoiding the runtime cost of the actual safepoint.
    if (!AllBackedges) {
      if (mustBeFiniteCountedLoop(L, SE, pred)) {
        if (TraceLSP)
          errs() << "skipping safepoint placement in finite loop\n";
        FiniteExecution++;
        continue;
      }
      if (CallSafepointsEnabled &&
          containsUnconditionalCallSafepoint(L, header, pred, DT)) {
        // Note: This is only semantically legal since we won't do any further
        // IPO or inlining before the actual call insertion..  If we hadn't, we
        // might latter loose this call safepoint.
        if (TraceLSP)
          errs() << "skipping safepoint placement due to unconditional call\n";
        CallInLoop++;
        continue;
      }
    }

    // TODO: We can create an inner loop which runs a finite number of
    // iterations with an outer loop which contains a safepoint.  This would
    // not help runtime performance that much, but it might help our ability to
    // optimize the inner loop.

    // We're unconditionally going to modify this loop.
    modified = true;

    // Safepoint insertion would involve creating a new basic block (as the
    // target of the current backedge) which does the safepoint (of all live
    // variables) and branches to the true header
    TerminatorInst *term = pred->getTerminator();

    if (TraceLSP) {
      errs() << "[LSP] terminator instruction: ";
      term->dump();
    }

    PollLocations.push_back(term);
  }

  return modified;
}
Exemple #13
0
Function* PartialInliner::unswitchFunction(Function* F) {
  // First, verify that this function is an unswitching candidate...
  BasicBlock* entryBlock = F->begin();
  BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
  if (!BR || BR->isUnconditional())
    return nullptr;
  
  BasicBlock* returnBlock = nullptr;
  BasicBlock* nonReturnBlock = nullptr;
  unsigned returnCount = 0;
  for (BasicBlock *BB : successors(entryBlock)) {
    if (isa<ReturnInst>(BB->getTerminator())) {
      returnBlock = BB;
      returnCount++;
    } else
      nonReturnBlock = BB;
  }
  
  if (returnCount != 1)
    return nullptr;
  
  // Clone the function, so that we can hack away on it.
  ValueToValueMapTy VMap;
  Function* duplicateFunction = CloneFunction(F, VMap,
                                              /*ModuleLevelChanges=*/false);
  duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
  F->getParent()->getFunctionList().push_back(duplicateFunction);
  BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
  BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
  BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
  
  // Go ahead and update all uses to the duplicate, so that we can just
  // use the inliner functionality when we're done hacking.
  F->replaceAllUsesWith(duplicateFunction);
  
  // Special hackery is needed with PHI nodes that have inputs from more than
  // one extracted block.  For simplicity, just split the PHIs into a two-level
  // sequence of PHIs, some of which will go in the extracted region, and some
  // of which will go outside.
  BasicBlock* preReturn = newReturnBlock;
  newReturnBlock = newReturnBlock->splitBasicBlock(
                                              newReturnBlock->getFirstNonPHI());
  BasicBlock::iterator I = preReturn->begin();
  BasicBlock::iterator Ins = newReturnBlock->begin();
  while (I != preReturn->end()) {
    PHINode* OldPhi = dyn_cast<PHINode>(I);
    if (!OldPhi) break;
    
    PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
    OldPhi->replaceAllUsesWith(retPhi);
    Ins = newReturnBlock->getFirstNonPHI();
    
    retPhi->addIncoming(I, preReturn);
    retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
                        newEntryBlock);
    OldPhi->removeIncomingValue(newEntryBlock);
    
    ++I;
  }
  newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
  
  // Gather up the blocks that we're going to extract.
  std::vector<BasicBlock*> toExtract;
  toExtract.push_back(newNonReturnBlock);
  for (Function::iterator FI = duplicateFunction->begin(),
       FE = duplicateFunction->end(); FI != FE; ++FI)
    if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
        &*FI != newNonReturnBlock)
      toExtract.push_back(FI);
      
  // The CodeExtractor needs a dominator tree.
  DominatorTree DT;
  DT.recalculate(*duplicateFunction);

  // Extract the body of the if.
  Function* extractedFunction
    = CodeExtractor(toExtract, &DT).extractCodeRegion();
  
  InlineFunctionInfo IFI;
  
  // Inline the top-level if test into all callers.
  std::vector<User *> Users(duplicateFunction->user_begin(),
                            duplicateFunction->user_end());
  for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
       UI != UE; ++UI)
    if (CallInst *CI = dyn_cast<CallInst>(*UI))
      InlineFunction(CI, IFI);
    else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
      InlineFunction(II, IFI);
  
  // Ditch the duplicate, since we're done with it, and rewrite all remaining
  // users (function pointers, etc.) back to the original function.
  duplicateFunction->replaceAllUsesWith(F);
  duplicateFunction->eraseFromParent();
  
  ++NumPartialInlined;
  
  return extractedFunction;
}
Exemple #14
0
bool LLPEAnalysisPass::runOnModule(Module& M) {

  if(!mkdtemp(ihp_workdir)) {
    errs() << "Failed to create " << ihp_workdir << "\n";
    exit(1);
  }

  TD = &getAnalysisIfAvailable<DataLayoutPass>()->getDataLayout();
  GlobalTD = TD;
  AA = &getAnalysis<AliasAnalysis>();
  GlobalAA = AA;
  GlobalTLI = getAnalysisIfAvailable<TargetLibraryInfo>();
  GlobalIHP = this;
  GInt8Ptr = Type::getInt8PtrTy(M.getContext());
  GInt8 = Type::getInt8Ty(M.getContext());
  GInt16 = Type::getInt16Ty(M.getContext());
  GInt32 = Type::getInt32Ty(M.getContext());
  GInt64 = Type::getInt64Ty(M.getContext());

  persistPrinter = getPersistPrinter(&M);

  initMRInfo(&M);
  
  for(Module::iterator MI = M.begin(), ME = M.end(); MI != ME; MI++) {

    if(!MI->isDeclaration()) {
      DominatorTree* NewDT = new DominatorTree();
      NewDT->recalculate(*MI);
      DTs[MI] = NewDT;
    }

  }

  Function* FoundF = M.getFunction(RootFunctionName);
  if((!FoundF) || FoundF->isDeclaration()) {

    errs() << "Function " << RootFunctionName << " not found or not defined in this module\n";
    return false;

  }

  Function& F = *FoundF;

  // Mark realloc as an identified object if the function is defined:
  if(Function* Realloc = M.getFunction("realloc")) {

    Realloc->setDoesNotAlias(0);

  }

  DEBUG(dbgs() << "Considering inlining starting at " << F.getName() << ":\n");

  std::vector<Constant*> argConstants(F.arg_size(), 0);
  uint32_t argvIdx = 0xffffffff;
  parseArgs(F, argConstants, argvIdx);

  populateGVCaches(&M);
  initSpecialFunctionsMap(M);
  // Last parameter: reserve extra GV slots for the constants that path condition parsing will produce.
  initShadowGlobals(M, getStringPathConditionCount());
  initBlacklistedFunctions(M);

  InlineAttempt* IA = new InlineAttempt(this, F, 0, 0);
  if(targetCallStack.size()) {

    IA->setTargetCall(targetCallStack[0], 0);

  }

  // Note ignored blocks and path conditions:
  parseArgsPostCreation(IA);

  // Now that all globals have grabbed heap slots, insert extra locations per special function.
  createSpecialLocations();

  argStores = new ArgStore[F.arg_size()];
  
  for(unsigned i = 0; i < F.arg_size(); ++i) {

    if(argConstants[i])
      setParam(IA, i, argConstants[i]);
    else {
      ImprovedValSetSingle* IVS = newIVS();
      IVS->SetType = ValSetTypeOldOverdef;
      IA->argShadows[i].i.PB = IVS;
    }

  }

  if(argvIdx != 0xffffffff) {

    ImprovedValSetSingle* NewIVS = newIVS();
    NewIVS->set(ImprovedVal(ShadowValue(&IA->argShadows[argvIdx]), 0), ValSetTypePB);
    IA->argShadows[argvIdx].i.PB = NewIVS;
    argStores[argvIdx] = ArgStore(heap.size());
    heap.push_back(AllocData());
    heap.back().allocIdx = heap.size() - 1;
    heap.back().isCommitted = false;
    heap.back().allocValue = ShadowValue(&IA->argShadows[argvIdx]);
    heap.back().allocType = IA->argShadows[argvIdx].getType();

  }

  createPointerArguments(IA);
  initGlobalFDStore();

  RootIA = IA;

  errs() << "Interpreting";
  IA->analyse();
  IA->finaliseAndCommit(false);
  fixNonLocalUses();
  errs() << "\n";
  
  if(IHPSaveDOTFiles) {

    // Function sharing is now decided, and hence the graph structure, so create
    // graph tags for the GUI.
    rootTag = RootIA->createTag(0);

  }
    
  return false;

}
 bool runOnFunction(Function &F) override {
   DT.recalculate(F);
   Verify(F, DT);
   return false; // no modifications
 }