Esempio n. 1
0
/// If \param [in] BB has more than one predecessor that is a conditional
/// branch, attempt to use parallel and/or for the branch condition. \returns
/// true on success.
///
/// Before:
///   ......
///   %cmp10 = fcmp une float %tmp1, %tmp2
///   br i1 %cmp1, label %if.then, label %lor.rhs
///
/// lor.rhs:
///   ......
///   %cmp11 = fcmp une float %tmp3, %tmp4
///   br i1 %cmp11, label %if.then, label %ifend
///
/// if.end:  // the merge block
///   ......
///
/// if.then: // has two predecessors, both of them contains conditional branch.
///   ......
///   br label %if.end;
///
/// After:
///  ......
///  %cmp10 = fcmp une float %tmp1, %tmp2
///  ......
///  %cmp11 = fcmp une float %tmp3, %tmp4
///  %cmp12 = or i1 %cmp10, %cmp11    // parallel-or mode.
///  br i1 %cmp12, label %if.then, label %ifend
///
///  if.end:
///    ......
///
///  if.then:
///    ......
///    br label %if.end;
///
///  Current implementation handles two cases.
///  Case 1: \param BB is on the else-path.
///
///          BB1
///        /     |
///       BB2    |
///      /   \   |
///     BB3   \  |     where, BB1, BB2 contain conditional branches.
///      \    |  /     BB3 contains unconditional branch.
///       \   | /      BB4 corresponds to \param BB which is also the merge.
///  BB => BB4
///
///
///  Corresponding source code:
///
///  if (a == b && c == d)
///    statement; // BB3
///
///  Case 2: \param BB BB is on the then-path.
///
///             BB1
///          /      |
///         |      BB2
///         \    /    |  where BB1, BB2 contain conditional branches.
///  BB =>   BB3      |  BB3 contains unconditiona branch and corresponds
///           \     /    to \param BB.  BB4 is the merge.
///             BB4
///
///  Corresponding source code:
///
///  if (a == b || c == d)
///    statement;  // BB3
///
///  In both cases,  \param BB is the common successor of conditional branches.
///  In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
///  its predecessor.  In Case 2, \param BB (BB3) only has conditional branches
///  as its predecessors.
///
bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
                                         Pass *P) {
  PHINode *PHI = dyn_cast<PHINode>(BB->begin());
  if (PHI)
    return false; // For simplicity, avoid cases containing PHI nodes.

  BasicBlock *LastCondBlock = NULL;
  BasicBlock *FirstCondBlock = NULL;
  BasicBlock *UnCondBlock = NULL;
  int Idx = -1;

  // Check predecessors of \param BB.
  SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
  for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
       PI != PE; ++PI) {
    BasicBlock *Pred = *PI;
    BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());

    // All predecessors should terminate with a branch.
    if (!PBI)
      return false;

    BasicBlock *PP = Pred->getSinglePredecessor();

    if (PBI->isUnconditional()) {
      // Case 1: Pred (BB3) is an unconditional block, it should
      // have a single predecessor (BB2) that is also a predecessor
      // of \param BB (BB4) and should not have address-taken.
      // There should exist only one such unconditional
      // branch among the predecessors.
      if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
          Pred->hasAddressTaken())
        return false;

      UnCondBlock = Pred;
      continue;
    }

    // Only conditional branches are allowed beyond this point.
    assert(PBI->isConditional());

    // Condition's unique use should be the branch instruction.
    Value *PC = PBI->getCondition();
    if (!PC || !PC->hasOneUse())
      return false;

    if (PP && Preds.count(PP)) {
      // These are internal condition blocks to be merged from, e.g.,
      // BB2 in both cases.
      // Should not be address-taken.
      if (Pred->hasAddressTaken())
        return false;

      // Instructions in the internal condition blocks should be safe
      // to hoist up.
      for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
        Instruction *CI = BI++;
        if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
          return false;
      }
    } else {
      // This is the condition block to be merged into, e.g. BB1 in
      // both cases.
      if (FirstCondBlock)
        return false;
      FirstCondBlock = Pred;
    }

    // Find whether BB is uniformly on the true (or false) path
    // for all of its predecessors.
    BasicBlock *PS1 = PBI->getSuccessor(0);
    BasicBlock *PS2 = PBI->getSuccessor(1);
    BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
    int CIdx = (PS1 == BB) ? 0 : 1;

    if (Idx == -1)
      Idx = CIdx;
    else if (CIdx != Idx)
      return false;

    // PS is the successor which is not BB. Check successors to identify
    // the last conditional branch.
    if (Preds.count(PS) == 0) {
      // Case 2.
      LastCondBlock = Pred;
    } else {
      // Case 1
      BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
      if (BPS && BPS->isUnconditional()) {
        // Case 1: PS(BB3) should be an unconditional branch.
        LastCondBlock = Pred;
      }
    }
  }

  if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
    return false;

  TerminatorInst *TBB = LastCondBlock->getTerminator();
  BasicBlock *PS1 = TBB->getSuccessor(0);
  BasicBlock *PS2 = TBB->getSuccessor(1);
  BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
  BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());

  // If PS1 does not jump into PS2, but PS2 jumps into PS1,
  // attempt branch inversion.
  if (!PBI1 || !PBI1->isUnconditional() ||
      (PS1->getTerminator()->getSuccessor(0) != PS2)) {
    // Check whether PS2 jumps into PS1.
    if (!PBI2 || !PBI2->isUnconditional() ||
        (PS2->getTerminator()->getSuccessor(0) != PS1))
      return false;

    // Do branch inversion.
    BasicBlock *CurrBlock = LastCondBlock;
    bool EverChanged = false;
    while (1) {
      BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
      CmpInst::Predicate Predicate = CI->getPredicate();
      // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
      if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
        CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
        BI->swapSuccessors();
        EverChanged = true;
      }
      if (CurrBlock == FirstCondBlock)
        break;
      CurrBlock = CurrBlock->getSinglePredecessor();
    }
    return EverChanged;
  }

  // PS1 must have a conditional branch.
  if (!PBI1 || !PBI1->isUnconditional())
    return false;

  // PS2 should not contain PHI node.
  PHI = dyn_cast<PHINode>(PS2->begin());
  if (PHI)
    return false;

  // Do the transformation.
  BasicBlock *CB;
  BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
  bool Iteration = true;
  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
  Value *PC = PBI->getCondition();

  do {
    CB = PBI->getSuccessor(1 - Idx);
    // Delete the conditional branch.
    FirstCondBlock->getInstList().pop_back();
    FirstCondBlock->getInstList()
        .splice(FirstCondBlock->end(), CB->getInstList());
    PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
    Value *CC = PBI->getCondition();
    // Merge conditions.
    Builder.SetInsertPoint(PBI);
    Value *NC;
    if (Idx == 0)
      // Case 2, use parallel or.
      NC = Builder.CreateOr(PC, CC);
    else
      // Case 1, use parallel and.
      NC = Builder.CreateAnd(PC, CC);

    PBI->replaceUsesOfWith(CC, NC);
    PC = NC;
    if (CB == LastCondBlock)
      Iteration = false;
    // Remove internal conditional branches.
    CB->dropAllReferences();
    // make CB unreachable and let downstream to delete the block.
    new UnreachableInst(CB->getContext(), CB);
  } while (Iteration);

  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
  DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
  return true;
}
Esempio n. 2
0
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
  bool MadeChange = false;

  Triple TT = Triple(L->getHeader()->getParent()->getParent()->
                     getTargetTriple());
  if (!TT.isArch32Bit() && !TT.isArch64Bit())
    return MadeChange; // Unknown arch. type.

  // Process nested loops first.
  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
    MadeChange |= convertToCTRLoop(*I);
  }

  // If a nested loop has been converted, then we can't convert this loop.
  if (MadeChange)
    return MadeChange;

#ifndef NDEBUG
  // Stop trying after reaching the limit (if any).
  int Limit = CTRLoopLimit;
  if (Limit >= 0) {
    if (Counter >= CTRLoopLimit)
      return false;
    Counter++;
  }
#endif

  // We don't want to spill/restore the counter register, and so we don't
  // want to use the counter register if the loop contains calls.
  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I)
    if (mightUseCTR(TT, *I))
      return MadeChange;

  SmallVector<BasicBlock*, 4> ExitingBlocks;
  L->getExitingBlocks(ExitingBlocks);

  BasicBlock *CountedExitBlock = 0;
  const SCEV *ExitCount = 0;
  BranchInst *CountedExitBranch = 0;
  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
       IE = ExitingBlocks.end(); I != IE; ++I) {
    const SCEV *EC = SE->getExitCount(L, *I);
    DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
                    (*I)->getName() << ": " << *EC << "\n");
    if (isa<SCEVCouldNotCompute>(EC))
      continue;
    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
      if (ConstEC->getValue()->isZero())
        continue;
    } else if (!SE->isLoopInvariant(EC, L))
      continue;

    if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
      continue;

    // We now have a loop-invariant count of loop iterations (which is not the
    // constant zero) for which we know that this loop will not exit via this
    // exisiting block.

    // We need to make sure that this block will run on every loop iteration.
    // For this to be true, we must dominate all blocks with backedges. Such
    // blocks are in-loop predecessors to the header block.
    bool NotAlways = false;
    for (pred_iterator PI = pred_begin(L->getHeader()),
         PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
      if (!L->contains(*PI))
        continue;

      if (!DT->dominates(*I, *PI)) {
        NotAlways = true;
        break;
      }
    }

    if (NotAlways)
      continue;

    // Make sure this blocks ends with a conditional branch.
    Instruction *TI = (*I)->getTerminator();
    if (!TI)
      continue;

    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
      if (!BI->isConditional())
        continue;

      CountedExitBranch = BI;
    } else
      continue;

    // Note that this block may not be the loop latch block, even if the loop
    // has a latch block.
    CountedExitBlock = *I;
    ExitCount = EC;
    break;
  }

  if (!CountedExitBlock)
    return MadeChange;

  BasicBlock *Preheader = L->getLoopPreheader();

  // If we don't have a preheader, then insert one. If we already have a
  // preheader, then we can use it (except if the preheader contains a use of
  // the CTR register because some such uses might be reordered by the
  // selection DAG after the mtctr instruction).
  if (!Preheader || mightUseCTR(TT, Preheader))
    Preheader = InsertPreheaderForLoop(L, this);
  if (!Preheader)
    return MadeChange;

  DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");

  // Insert the count into the preheader and replace the condition used by the
  // selected branch.
  MadeChange = true;

  SCEVExpander SCEVE(*SE, "loopcnt");
  LLVMContext &C = SE->getContext();
  Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
                                       Type::getInt32Ty(C);
  if (!ExitCount->getType()->isPointerTy() &&
      ExitCount->getType() != CountType)
    ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
  ExitCount = SE->getAddExpr(ExitCount,
                             SE->getConstant(CountType, 1)); 
  Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
                                       Preheader->getTerminator());

  IRBuilder<> CountBuilder(Preheader->getTerminator());
  Module *M = Preheader->getParent()->getParent();
  Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
                                               CountType);
  CountBuilder.CreateCall(MTCTRFunc, ECValue);

  IRBuilder<> CondBuilder(CountedExitBranch);
  Value *DecFunc =
    Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
  Value *NewCond = CondBuilder.CreateCall(DecFunc);
  Value *OldCond = CountedExitBranch->getCondition();
  CountedExitBranch->setCondition(NewCond);

  // The false branch must exit the loop.
  if (!L->contains(CountedExitBranch->getSuccessor(0)))
    CountedExitBranch->swapSuccessors();

  // The old condition may be dead now, and may have even created a dead PHI
  // (the original induction variable).
  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
  DeleteDeadPHIs(CountedExitBlock);

  ++NumCTRLoops;
  return MadeChange;
}
Esempio n. 3
0
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
  bool MadeChange = false;

  // Do not convert small short loops to CTR loop.
  unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
    SmallPtrSet<const Value *, 32> EphValues;
    auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
        *L->getHeader()->getParent());
    CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
    CodeMetrics Metrics;
    for (BasicBlock *BB : L->blocks())
      Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
    // 6 is an approximate latency for the mtctr instruction.
    if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
      return false;
  }

  // Process nested loops first.
  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
    MadeChange |= convertToCTRLoop(*I);
    LLVM_DEBUG(dbgs() << "Nested loop converted\n");
  }

  // If a nested loop has been converted, then we can't convert this loop.
  if (MadeChange)
    return MadeChange;

  // Bail out if the loop has irreducible control flow.
  LoopBlocksRPO RPOT(L);
  RPOT.perform(LI);
  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
    return false;

#ifndef NDEBUG
  // Stop trying after reaching the limit (if any).
  int Limit = CTRLoopLimit;
  if (Limit >= 0) {
    if (Counter >= CTRLoopLimit)
      return false;
    Counter++;
  }
#endif

  // We don't want to spill/restore the counter register, and so we don't
  // want to use the counter register if the loop contains calls.
  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I)
    if (mightUseCTR(*I))
      return MadeChange;

  SmallVector<BasicBlock*, 4> ExitingBlocks;
  L->getExitingBlocks(ExitingBlocks);

  // If there is an exit edge known to be frequently taken,
  // we should not transform this loop.
  for (auto &BB : ExitingBlocks) {
    Instruction *TI = BB->getTerminator();
    if (!TI) continue;

    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
      uint64_t TrueWeight = 0, FalseWeight = 0;
      if (!BI->isConditional() ||
          !BI->extractProfMetadata(TrueWeight, FalseWeight))
        continue;

      // If the exit path is more frequent than the loop path,
      // we return here without further analysis for this loop.
      bool TrueIsExit = !L->contains(BI->getSuccessor(0));
      if (( TrueIsExit && FalseWeight < TrueWeight) ||
          (!TrueIsExit && FalseWeight > TrueWeight))
        return MadeChange;
    }
  }

  BasicBlock *CountedExitBlock = nullptr;
  const SCEV *ExitCount = nullptr;
  BranchInst *CountedExitBranch = nullptr;
  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
       IE = ExitingBlocks.end(); I != IE; ++I) {
    const SCEV *EC = SE->getExitCount(L, *I);
    LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block "
                      << (*I)->getName() << ": " << *EC << "\n");
    if (isa<SCEVCouldNotCompute>(EC))
      continue;
    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
      if (ConstEC->getValue()->isZero())
        continue;
    } else if (!SE->isLoopInvariant(EC, L))
      continue;

    if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
      continue;

    // If this exiting block is contained in a nested loop, it is not eligible
    // for insertion of the branch-and-decrement since the inner loop would
    // end up messing up the value in the CTR.
    if (LI->getLoopFor(*I) != L)
      continue;

    // We now have a loop-invariant count of loop iterations (which is not the
    // constant zero) for which we know that this loop will not exit via this
    // existing block.

    // We need to make sure that this block will run on every loop iteration.
    // For this to be true, we must dominate all blocks with backedges. Such
    // blocks are in-loop predecessors to the header block.
    bool NotAlways = false;
    for (pred_iterator PI = pred_begin(L->getHeader()),
         PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
      if (!L->contains(*PI))
        continue;

      if (!DT->dominates(*I, *PI)) {
        NotAlways = true;
        break;
      }
    }

    if (NotAlways)
      continue;

    // Make sure this blocks ends with a conditional branch.
    Instruction *TI = (*I)->getTerminator();
    if (!TI)
      continue;

    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
      if (!BI->isConditional())
        continue;

      CountedExitBranch = BI;
    } else
      continue;

    // Note that this block may not be the loop latch block, even if the loop
    // has a latch block.
    CountedExitBlock = *I;
    ExitCount = EC;
    break;
  }

  if (!CountedExitBlock)
    return MadeChange;

  BasicBlock *Preheader = L->getLoopPreheader();

  // If we don't have a preheader, then insert one. If we already have a
  // preheader, then we can use it (except if the preheader contains a use of
  // the CTR register because some such uses might be reordered by the
  // selection DAG after the mtctr instruction).
  if (!Preheader || mightUseCTR(Preheader))
    Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
  if (!Preheader)
    return MadeChange;

  LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName()
                    << "\n");

  // Insert the count into the preheader and replace the condition used by the
  // selected branch.
  MadeChange = true;

  SCEVExpander SCEVE(*SE, *DL, "loopcnt");
  LLVMContext &C = SE->getContext();
  Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
  if (!ExitCount->getType()->isPointerTy() &&
      ExitCount->getType() != CountType)
    ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
  ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
  Value *ECValue =
      SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());

  IRBuilder<> CountBuilder(Preheader->getTerminator());
  Module *M = Preheader->getParent()->getParent();
  Function *MTCTRFunc =
      Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType);
  CountBuilder.CreateCall(MTCTRFunc, ECValue);

  IRBuilder<> CondBuilder(CountedExitBranch);
  Function *DecFunc =
      Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
  Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
  Value *OldCond = CountedExitBranch->getCondition();
  CountedExitBranch->setCondition(NewCond);

  // The false branch must exit the loop.
  if (!L->contains(CountedExitBranch->getSuccessor(0)))
    CountedExitBranch->swapSuccessors();

  // The old condition may be dead now, and may have even created a dead PHI
  // (the original induction variable).
  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
  // Run through the basic blocks of the loop and see if any of them have dead
  // PHIs that can be removed.
  for (auto I : L->blocks())
    DeleteDeadPHIs(I);

  ++NumCTRLoops;
  return MadeChange;
}