Esempio n. 1
0
void vSSA::createSigmasIfNeeded(BasicBlock *BB)
{
	TerminatorInst *ti = BB->getTerminator();
	// If the condition used in the terminator instruction is a Comparison instruction:
	//for each operand of the CmpInst, create sigmas, depending on some conditions
	/*
	if(isa<BranchInst>(ti)){
		BranchInst * bc = cast<BranchInst>(ti);
		if(bc->isConditional()){
			Value * cond = bc->getCondition();
			CmpInst *comparison = dyn_cast<CmpInst>(cond);
			for (User::const_op_iterator it = comparison->op_begin(), e = comparison->op_end(); it != e; ++it) {
				Value *operand = *it;
				if (isa<Instruction>(operand) || isa<Argument>(operand)) {
					insertSigmas(ti, operand);
				}
			}
		}
	}
	*/
	
	// CASE 1: Branch Instruction
	BranchInst *bi = NULL;
	SwitchInst *si = NULL;
	if ((bi = dyn_cast<BranchInst>(ti))) {
		if (bi->isConditional()) {
			Value *condition = bi->getCondition();
			
			ICmpInst *comparison = dyn_cast<ICmpInst>(condition);
			
			if (comparison) {
				// Create sigmas for ICmp operands
				for (User::const_op_iterator opit = comparison->op_begin(), opend = comparison->op_end(); opit != opend; ++opit) {
					Value *operand = *opit;
					
					if (isa<Instruction>(operand) || isa<Argument>(operand)) {
						insertSigmas(ti, operand);
						
						// If the operand is a result of a indirect instruction (e.g. ZExt, SExt, Trunc),
						// Create sigmas for the operands of the operands too
						CastInst *cinst = NULL;
						if ((cinst = dyn_cast<CastInst>(operand))) {
							insertSigmas(ti, cinst->getOperand(0));
						}
					}
				}
			}
		}
	}
	// CASE 2: Switch Instruction
	
	else if ((si = dyn_cast<SwitchInst>(ti))) {
		Value *condition = si->getCondition();
		
		if (isa<Instruction>(condition) || isa<Argument>(condition)) {
			insertSigmas(ti, condition);
			
			// If the operand is a result of a indirect instruction (e.g. ZExt, SExt, Trunc),
			// Create sigmas for the operands of the operands too
			CastInst *cinst = NULL;
			if ((cinst = dyn_cast<CastInst>(condition))) {
				insertSigmas(ti, cinst->getOperand(0));
			}
		}
	}
	
}
Esempio n. 2
0
void CPFlowFunction::visitBranchInst(BranchInst &BI) {
  CPLatticePoint* result = new CPLatticePoint(*(info_in_casted.back()));
  info_in_casted.pop_back();
  BranchInst* current = &BI;

  if (BI.isConditional()) {
    Value* cond = BI.getCondition();
    if (isa<ICmpInst>(cond)) {
      std::pair<Use*, Use *> branches = helper::getOps(BI);
      Use* true_branch = branches.first;
      Use* false_branch = branches.second;

      ICmpInst* cmp = dyn_cast<ICmpInst>(cond);
      std::pair<Use*, Use *> operands = helper::getOps(*cmp);
      Use* rhs = operands.second;
      Use* lhs = operands.first;

      ConstantInt* rhs_const = NULL;
      ConstantInt* lhs_const = NULL;
      // get the rhs/lhs as a constant int
      if (isa<ConstantInt>(rhs)) {
        rhs_const = dyn_cast<ConstantInt>(rhs);
      } else if (result->representation.count(rhs->get()) > 0) {
        rhs_const = result->representation[rhs->get()];
      } else {
        rhs_const = ConstantInt::get(context, llvm::APInt(32, 0, true));
      } 
      if (isa<ConstantInt>(lhs)) {
        lhs_const = dyn_cast<ConstantInt>(lhs->get());
      } else if (result->representation.count(lhs->get()) > 0) {
        lhs_const = result->representation[lhs->get()];
      } else {
        lhs_const = ConstantInt::get(context, llvm::APInt(32, 0, true));
      }

      // Create successors
      CPLatticePoint* true_branchCLP = new CPLatticePoint(false, false, std::map<Value*,ConstantInt*>(result->representation));
      CPLatticePoint* false_branchCLP = new CPLatticePoint(false, false, std::map<Value*,ConstantInt*>(result->representation));

      // get the predicate
      int predicate = 0;
      predicate = cmp->isSigned() ? cmp->getSignedPredicate() : cmp->getUnsignedPredicate();
      if (predicate == CmpInst::ICMP_EQ) {
        if (isa<ConstantInt>(lhs)) {
           true_branchCLP->representation[rhs->get()] = lhs_const;
        } else if (isa<ConstantInt>(rhs)) {
           true_branchCLP->representation[lhs->get()] = rhs_const;
        }
        out_map[true_branch->get()] = true_branchCLP;
        out_map[false_branch->get()] = false_branchCLP;
      } else if (predicate == CmpInst::ICMP_NE) {
        if (isa<ConstantInt>(lhs)) {
           false_branchCLP->representation[rhs->get()] = lhs_const;
        } else if (isa<ConstantInt>(rhs)) {
           false_branchCLP->representation[lhs->get()] = rhs_const;
        }
        out_map[true_branch->get()] = true_branchCLP;
        out_map[false_branch->get()] = false_branchCLP;
      } else {
        for (std::map<Value *, LatticePoint *>::iterator it=out_map.begin(); it != out_map.end(); ++it){
          Value* elm = it->first;
          out_map[elm] = new CPLatticePoint(*result);
        }
      }
    } else {
      for (std::map<Value *, LatticePoint *>::iterator it=out_map.begin(); it != out_map.end(); ++it){
        Value* elm = it->first;
        out_map[elm] = new CPLatticePoint(*result);
      }
    }
  } else {
    for (std::map<Value *, LatticePoint *>::iterator it=out_map.begin(); it != out_map.end(); ++it){
        Value* elm = it->first;
        out_map[elm] = new CPLatticePoint(*result);
    }
  }
}
Esempio n. 3
0
std::pair<Optional<SILValue>, SILLocation>
SILGenFunction::emitEpilogBB(SILLocation TopLevel) {
  assert(ReturnDest.getBlock() && "no epilog bb prepared?!");
  SILBasicBlock *epilogBB = ReturnDest.getBlock();
  SILLocation ImplicitReturnFromTopLevel =
    ImplicitReturnLocation::getImplicitReturnLoc(TopLevel);
  SmallVector<SILValue, 4> directResults;
  Optional<SILLocation> returnLoc = None;

  // If the current BB isn't terminated, and we require a return, then we
  // are not allowed to fall off the end of the function and can't reach here.
  if (NeedsReturn && B.hasValidInsertionPoint())
    B.createUnreachable(ImplicitReturnFromTopLevel);

  if (epilogBB->pred_empty()) {
    // If the epilog was not branched to at all, kill the BB and
    // just emit the epilog into the current BB.
    while (!epilogBB->empty())
      epilogBB->back().eraseFromParent();
    eraseBasicBlock(epilogBB);

    // If the current bb is terminated then the epilog is just unreachable.
    if (!B.hasValidInsertionPoint())
      return { None, TopLevel };

    // We emit the epilog at the current insertion point.
    returnLoc = ImplicitReturnFromTopLevel;

  } else if (std::next(epilogBB->pred_begin()) == epilogBB->pred_end()
             && !B.hasValidInsertionPoint()) {
    // If the epilog has a single predecessor and there's no current insertion
    // point to fall through from, then we can weld the epilog to that
    // predecessor BB.

    // Steal the branch argument as the return value if present.
    SILBasicBlock *pred = *epilogBB->pred_begin();
    BranchInst *predBranch = cast<BranchInst>(pred->getTerminator());
    assert(predBranch->getArgs().size() == epilogBB->bbarg_size() &&
           "epilog predecessor arguments does not match block params");

    for (auto index : indices(predBranch->getArgs())) {
      SILValue result = predBranch->getArgs()[index];
      directResults.push_back(result);
      epilogBB->getBBArg(index)->replaceAllUsesWith(result);
    }

    // If we are optimizing, we should use the return location from the single,
    // previously processed, return statement if any.
    if (predBranch->getLoc().is<ReturnLocation>()) {
      returnLoc = predBranch->getLoc();
    } else {
      returnLoc = ImplicitReturnFromTopLevel;
    }
    
    // Kill the branch to the now-dead epilog BB.
    pred->erase(predBranch);

    // Move any instructions from the EpilogBB to the end of the 'pred' block.
    pred->spliceAtEnd(epilogBB);

    // Finally we can erase the epilog BB.
    eraseBasicBlock(epilogBB);

    // Emit the epilog into its former predecessor.
    B.setInsertionPoint(pred);
  } else {
    // Move the epilog block to the end of the ordinary section.
    auto endOfOrdinarySection = StartOfPostmatter;
    B.moveBlockTo(epilogBB, endOfOrdinarySection);

    // Emit the epilog into the epilog bb. Its arguments are the
    // direct results.
    directResults.append(epilogBB->bbarg_begin(), epilogBB->bbarg_end());

    // If we are falling through from the current block, the return is implicit.
    B.emitBlock(epilogBB, ImplicitReturnFromTopLevel);
  }
  
  // Emit top-level cleanups into the epilog block.
  assert(!Cleanups.hasAnyActiveCleanups(getCleanupsDepth(),
                                        ReturnDest.getDepth()) &&
         "emitting epilog in wrong scope");

  auto cleanupLoc = CleanupLocation::get(TopLevel);
  Cleanups.emitCleanupsForReturn(cleanupLoc);

  // If the return location is known to be that of an already
  // processed return, use it. (This will get triggered when the
  // epilog logic is simplified.)
  //
  // Otherwise make the ret instruction part of the cleanups.
  if (!returnLoc) returnLoc = cleanupLoc;

  // Build the return value.  We don't do this if there are no direct
  // results; this can happen for void functions, but also happens when
  // prepareEpilog was asked to not add result arguments to the epilog
  // block.
  SILValue returnValue;
  if (!directResults.empty()) {
    assert(directResults.size()
             == F.getLoweredFunctionType()->getNumDirectResults());
    returnValue = buildReturnValue(*this, TopLevel, directResults);
  }

  return { returnValue, *returnLoc };
}
Esempio n. 4
0
/// Insert code in the prolog code when unrolling a loop with a
/// run-time trip-count.
///
/// This method assumes that the loop unroll factor is total number
/// of loop bodes in the loop after unrolling. (Some folks refer
/// to the unroll factor as the number of *extra* copies added).
/// We assume also that the loop unroll factor is a power-of-two. So, after
/// unrolling the loop, the number of loop bodies executed is 2,
/// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch
/// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for
/// the switch instruction is generated.
///
///    extraiters = tripcount % loopfactor
///    if (extraiters == 0) jump Loop:
///    if (extraiters == loopfactor) jump L1
///    if (extraiters == loopfactor-1) jump L2
///    ...
///    L1:  LoopBody;
///    L2:  LoopBody;
///    ...
///    if tripcount < loopfactor jump End
///    Loop:
///    ...
///    End:
///
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
                                   LPPassManager *LPM) {
  // for now, only unroll loops that contain a single exit
  if (!L->getExitingBlock())
    return false;

  // Make sure the loop is in canonical form, and there is a single
  // exit block only.
  if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
    return false;

  // Use Scalar Evolution to compute the trip count.  This allows more
  // loops to be unrolled than relying on induction var simplification
  if (!LPM)
    return false;
  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
  if (SE == 0)
    return false;

  // Only unroll loops with a computable trip count and the trip count needs
  // to be an int value (allowing a pointer type is a TODO item)
  const SCEV *BECount = SE->getBackedgeTakenCount(L);
  if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
    return false;

  // Add 1 since the backedge count doesn't include the first loop iteration
  const SCEV *TripCountSC =
    SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
  if (isa<SCEVCouldNotCompute>(TripCountSC))
    return false;

  // We only handle cases when the unroll factor is a power of 2.
  // Count is the loop unroll factor, the number of extra copies added + 1.
  if ((Count & (Count-1)) != 0)
    return false;

  // If this loop is nested, then the loop unroller changes the code in
  // parent loop, so the Scalar Evolution pass needs to be run again
  if (Loop *ParentLoop = L->getParentLoop())
    SE->forgetLoop(ParentLoop);

  BasicBlock *PH = L->getLoopPreheader();
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  // It helps to splits the original preheader twice, one for the end of the
  // prolog code and one for a new loop preheader
  BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
  BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());

  // Compute the number of extra iterations required, which is:
  //  extra iterations = run-time trip count % (loop unroll factor + 1)
  SCEVExpander Expander(*SE, "loop-unroll");
  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                            PreHeaderBR);
  Type *CountTy = TripCount->getType();
  BinaryOperator *ModVal =
    BinaryOperator::CreateURem(TripCount,
                               ConstantInt::get(CountTy, Count),
                               "xtraiter");
  ModVal->insertBefore(PreHeaderBR);

  // Check if for no extra iterations, then jump to unrolled loop
  Value *BranchVal = new ICmpInst(PreHeaderBR,
                                  ICmpInst::ICMP_NE, ModVal,
                                  ConstantInt::get(CountTy, 0), "lcmp");
  // Branch to either the extra iterations or the unrolled loop
  // We will fix up the true branch label when adding loop body copies
  BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
  assert(PreHeaderBR->isUnconditional() &&
         PreHeaderBR->getSuccessor(0) == PEnd &&
         "CFG edges in Preheader are not correct");
  PreHeaderBR->eraseFromParent();

  ValueToValueMapTy LVMap;
  Function *F = Header->getParent();
  // These variables are used to update the CFG links in each iteration
  BasicBlock *CompareBB = 0;
  BasicBlock *LastLoopBB = PH;
  // Get an ordered list of blocks in the loop to help with the ordering of the
  // cloned blocks in the prolog code
  LoopBlocksDFS LoopBlocks(L);
  LoopBlocks.perform(LI);

  //
  // For each extra loop iteration, create a copy of the loop's basic blocks
  // and generate a condition that branches to the copy depending on the
  // number of 'left over' iterations.
  //
  for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
    std::vector<BasicBlock*> NewBlocks;
    ValueToValueMapTy VMap;

    // Clone all the basic blocks in the loop, but we don't clone the loop
    // This function adds the appropriate CFG connections.
    CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
                    LoopBlocks, VMap, LVMap, LI);
    LastLoopBB = cast<BasicBlock>(VMap[Latch]);

    // Insert the cloned blocks into function just before the original loop
    F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
                                  NewBlocks[0], F->end());

    // Generate the code for the comparison which determines if the loop
    // prolog code needs to be executed.
    if (leftOverIters == Count-1) {
      // There is no compare block for the fall-thru case when for the last
      // left over iteration
      CompareBB = NewBlocks[0];
    } else {
      // Create a new block for the comparison
      BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
                                             F, CompareBB);
      if (Loop *ParentLoop = L->getParentLoop()) {
        // Add the new block to the parent loop, if needed
        ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
      }

      // The comparison w/ the extra iteration value and branch
      Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
                                      ConstantInt::get(CountTy, leftOverIters),
                                      "un.tmp");
      // Branch to either the extra iterations or the unrolled loop
      BranchInst::Create(NewBlocks[0], CompareBB,
                         BranchVal, NewBB);
      CompareBB = NewBB;
      PH->getTerminator()->setSuccessor(0, NewBB);
      VMap[NewPH] = CompareBB;
    }

    // Rewrite the cloned instruction operands to use the values
    // created when the clone is created.
    for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
             E = NewBlocks[i]->end(); I != E; ++I) {
        RemapInstruction(I, VMap,
                         RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
      }
    }
  }

  // Connect the prolog code to the original loop and update the
  // PHI functions.
  ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
                LPM->getAsPass());
  NumRuntimeUnrolled++;
  return true;
}
Esempio n. 5
0
bool LoopInterchangeTransform::adjustLoopBranches() {

  DEBUG(dbgs() << "adjustLoopBranches called\n");
  // Adjust the loop preheader
  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
  BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
  BasicBlock *InnerLoopLatchPredecessor =
      InnerLoopLatch->getUniquePredecessor();
  BasicBlock *InnerLoopLatchSuccessor;
  BasicBlock *OuterLoopLatchSuccessor;

  BranchInst *OuterLoopLatchBI =
      dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
  BranchInst *InnerLoopLatchBI =
      dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
  BranchInst *OuterLoopHeaderBI =
      dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
  BranchInst *InnerLoopHeaderBI =
      dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());

  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
      !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
      !InnerLoopHeaderBI)
    return false;

  BranchInst *InnerLoopLatchPredecessorBI =
      dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
  BranchInst *OuterLoopPredecessorBI =
      dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());

  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
    return false;
  BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
  if (!InnerLoopHeaderSuccessor)
    return false;

  // Adjust Loop Preheader and headers

  unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors();
  for (unsigned i = 0; i < NumSucc; ++i) {
    if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader)
      OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader);
  }

  NumSucc = OuterLoopHeaderBI->getNumSuccessors();
  for (unsigned i = 0; i < NumSucc; ++i) {
    if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch)
      OuterLoopHeaderBI->setSuccessor(i, LoopExit);
    else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader)
      OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor);
  }

  // Adjust reduction PHI's now that the incoming block has changed.
  updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
                      OuterLoopHeader);

  BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI);
  InnerLoopHeaderBI->eraseFromParent();

  // -------------Adjust loop latches-----------
  if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
    InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
  else
    InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);

  NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors();
  for (unsigned i = 0; i < NumSucc; ++i) {
    if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch)
      InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor);
  }

  // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with
  // the value and remove this PHI node from inner loop.
  SmallVector<PHINode *, 8> LcssaVec;
  for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) {
    PHINode *LcssaPhi = cast<PHINode>(I);
    LcssaVec.push_back(LcssaPhi);
  }
  for (auto I = LcssaVec.begin(), E = LcssaVec.end(); I != E; ++I) {
    PHINode *P = *I;
    Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch);
    P->replaceAllUsesWith(Incoming);
    P->eraseFromParent();
  }

  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
    OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
  else
    OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);

  if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor)
    InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor);
  else
    InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor);

  updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);

  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) {
    OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch);
  } else {
    OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch);
  }

  return true;
}
Esempio n. 6
0
/// \brief Simplify one loop and queue further loops for simplification.
///
/// FIXME: Currently this accepts both lots of analyses that it uses and a raw
/// Pass pointer. The Pass pointer is used by numerous utilities to update
/// specific analyses. Rather than a pass it would be much cleaner and more
/// explicit if they accepted the analysis directly and then updated it.
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
                            AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
                            ScalarEvolution *SE, Pass *PP,
                            AssumptionCache *AC) {
  bool Changed = false;
ReprocessLoop:

  // Check to see that no blocks (other than the header) in this loop have
  // predecessors that are not in the loop.  This is not valid for natural
  // loops, but can occur if the blocks are unreachable.  Since they are
  // unreachable we can just shamelessly delete those CFG edges!
  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
       BB != E; ++BB) {
    if (*BB == L->getHeader()) continue;

    SmallPtrSet<BasicBlock*, 4> BadPreds;
    for (pred_iterator PI = pred_begin(*BB),
         PE = pred_end(*BB); PI != PE; ++PI) {
      BasicBlock *P = *PI;
      if (!L->contains(P))
        BadPreds.insert(P);
    }

    // Delete each unique out-of-loop (and thus dead) predecessor.
    for (BasicBlock *P : BadPreds) {

      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
                   << P->getName() << "\n");

      // Inform each successor of each dead pred.
      for (succ_iterator SI = succ_begin(P), SE = succ_end(P); SI != SE; ++SI)
        (*SI)->removePredecessor(P);
      // Zap the dead pred's terminator and replace it with unreachable.
      TerminatorInst *TI = P->getTerminator();
       TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
      P->getTerminator()->eraseFromParent();
      new UnreachableInst(P->getContext(), P);
      Changed = true;
    }
  }

  // If there are exiting blocks with branches on undef, resolve the undef in
  // the direction which will exit the loop. This will help simplify loop
  // trip count computations.
  SmallVector<BasicBlock*, 8> ExitingBlocks;
  L->getExitingBlocks(ExitingBlocks);
  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
       E = ExitingBlocks.end(); I != E; ++I)
    if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
      if (BI->isConditional()) {
        if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {

          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
                       << (*I)->getName() << "\n");

          BI->setCondition(ConstantInt::get(Cond->getType(),
                                            !L->contains(BI->getSuccessor(0))));

          // This may make the loop analyzable, force SCEV recomputation.
          if (SE)
            SE->forgetLoop(L);

          Changed = true;
        }
      }

  // Does the loop already have a preheader?  If so, don't insert one.
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    Preheader = InsertPreheaderForLoop(L, PP);
    if (Preheader) {
      ++NumInserted;
      Changed = true;
    }
  }

  // Next, check to make sure that all exit nodes of the loop only have
  // predecessors that are inside of the loop.  This check guarantees that the
  // loop preheader/header will dominate the exit blocks.  If the exit block has
  // predecessors from outside of the loop, split the edge now.
  SmallVector<BasicBlock*, 8> ExitBlocks;
  L->getExitBlocks(ExitBlocks);

  SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
                                               ExitBlocks.end());
  for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
         E = ExitBlockSet.end(); I != E; ++I) {
    BasicBlock *ExitBlock = *I;
    for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
         PI != PE; ++PI)
      // Must be exactly this loop: no subloops, parent loops, or non-loop preds
      // allowed.
      if (!L->contains(*PI)) {
        if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) {
          ++NumInserted;
          Changed = true;
        }
        break;
      }
  }

  // If the header has more than two predecessors at this point (from the
  // preheader and from multiple backedges), we must adjust the loop.
  BasicBlock *LoopLatch = L->getLoopLatch();
  if (!LoopLatch) {
    // If this is really a nested loop, rip it out into a child loop.  Don't do
    // this for loops with a giant number of backedges, just factor them into a
    // common backedge instead.
    if (L->getNumBackEdges() < 8) {
      if (Loop *OuterL =
              separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP, AC)) {
        ++NumNested;
        // Enqueue the outer loop as it should be processed next in our
        // depth-first nest walk.
        Worklist.push_back(OuterL);

        // This is a big restructuring change, reprocess the whole loop.
        Changed = true;
        // GCC doesn't tail recursion eliminate this.
        // FIXME: It isn't clear we can't rely on LLVM to TRE this.
        goto ReprocessLoop;
      }
    }

    // If we either couldn't, or didn't want to, identify nesting of the loops,
    // insert a new block that all backedges target, then make it jump to the
    // loop header.
    LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI);
    if (LoopLatch) {
      ++NumInserted;
      Changed = true;
    }
  }

  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();

  // Scan over the PHI nodes in the loop header.  Since they now have only two
  // incoming values (the loop is canonicalized), we may have simplified the PHI
  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
  PHINode *PN;
  for (BasicBlock::iterator I = L->getHeader()->begin();
       (PN = dyn_cast<PHINode>(I++)); )
    if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
      if (AA) AA->deleteValue(PN);
      if (SE) SE->forgetValue(PN);
      PN->replaceAllUsesWith(V);
      PN->eraseFromParent();
    }

  // If this loop has multiple exits and the exits all go to the same
  // block, attempt to merge the exits. This helps several passes, such
  // as LoopRotation, which do not support loops with multiple exits.
  // SimplifyCFG also does this (and this code uses the same utility
  // function), however this code is loop-aware, where SimplifyCFG is
  // not. That gives it the advantage of being able to hoist
  // loop-invariant instructions out of the way to open up more
  // opportunities, and the disadvantage of having the responsibility
  // to preserve dominator information.
  bool UniqueExit = true;
  if (!ExitBlocks.empty())
    for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
      if (ExitBlocks[i] != ExitBlocks[0]) {
        UniqueExit = false;
        break;
      }
  if (UniqueExit) {
    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
      BasicBlock *ExitingBlock = ExitingBlocks[i];
      if (!ExitingBlock->getSinglePredecessor()) continue;
      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
      if (!BI || !BI->isConditional()) continue;
      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
      if (!CI || CI->getParent() != ExitingBlock) continue;

      // Attempt to hoist out all instructions except for the
      // comparison and the branch.
      bool AllInvariant = true;
      bool AnyInvariant = false;
      for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
        Instruction *Inst = I++;
        // Skip debug info intrinsics.
        if (isa<DbgInfoIntrinsic>(Inst))
          continue;
        if (Inst == CI)
          continue;
        if (!L->makeLoopInvariant(Inst, AnyInvariant,
                                  Preheader ? Preheader->getTerminator()
                                            : nullptr)) {
          AllInvariant = false;
          break;
        }
      }
      if (AnyInvariant) {
        Changed = true;
        // The loop disposition of all SCEV expressions that depend on any
        // hoisted values have also changed.
        if (SE)
          SE->forgetLoopDispositions(L);
      }
      if (!AllInvariant) continue;

      // The block has now been cleared of all instructions except for
      // a comparison and a conditional branch. SimplifyCFG may be able
      // to fold it now.
      if (!FoldBranchToCommonDest(BI))
        continue;

      // Success. The block is now dead, so remove it from the loop,
      // update the dominator tree and delete it.
      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
                   << ExitingBlock->getName() << "\n");

      // Notify ScalarEvolution before deleting this block. Currently assume the
      // parent loop doesn't change (spliting edges doesn't count). If blocks,
      // CFG edges, or other values in the parent loop change, then we need call
      // to forgetLoop() for the parent instead.
      if (SE)
        SE->forgetLoop(L);

      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
      Changed = true;
      LI->removeBlock(ExitingBlock);

      DomTreeNode *Node = DT->getNode(ExitingBlock);
      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
        Node->getChildren();
      while (!Children.empty()) {
        DomTreeNode *Child = Children.front();
        DT->changeImmediateDominator(Child, Node->getIDom());
      }
      DT->eraseNode(ExitingBlock);

      BI->getSuccessor(0)->removePredecessor(ExitingBlock);
      BI->getSuccessor(1)->removePredecessor(ExitingBlock);
      ExitingBlock->eraseFromParent();
    }
  }

  return Changed;
}
Esempio n. 7
0
/// This works like CloneAndPruneFunctionInto, except that it does not clone the
/// entire function. Instead it starts at an instruction provided by the caller
/// and copies (and prunes) only the code reachable from that instruction.
void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                     const Instruction *StartingInst,
                                     ValueToValueMapTy &VMap,
                                     bool ModuleLevelChanges,
                                     SmallVectorImpl<ReturnInst *> &Returns,
                                     const char *NameSuffix,
                                     ClonedCodeInfo *CodeInfo) {
  assert(NameSuffix && "NameSuffix cannot be null!");

  ValueMapTypeRemapper *TypeMapper = nullptr;
  ValueMaterializer *Materializer = nullptr;

#ifndef NDEBUG
  // If the cloning starts at the beginning of the function, verify that
  // the function arguments are mapped.
  if (!StartingInst)
    for (const Argument &II : OldFunc->args())
      assert(VMap.count(&II) && "No mapping from source argument specified!");
#endif

  PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
                            NameSuffix, CodeInfo);
  const BasicBlock *StartingBB;
  if (StartingInst)
    StartingBB = StartingInst->getParent();
  else {
    StartingBB = &OldFunc->getEntryBlock();
    StartingInst = &StartingBB->front();
  }

  // Clone the entry block, and anything recursively reachable from it.
  std::vector<const BasicBlock*> CloneWorklist;
  PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
  while (!CloneWorklist.empty()) {
    const BasicBlock *BB = CloneWorklist.back();
    CloneWorklist.pop_back();
    PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
  }
  
  // Loop over all of the basic blocks in the old function.  If the block was
  // reachable, we have cloned it and the old block is now in the value map:
  // insert it into the new function in the right order.  If not, ignore it.
  //
  // Defer PHI resolution until rest of function is resolved.
  SmallVector<const PHINode*, 16> PHIToResolve;
  for (const BasicBlock &BI : *OldFunc) {
    Value *V = VMap[&BI];
    BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
    if (!NewBB) continue;  // Dead block.

    // Add the new block to the new function.
    NewFunc->getBasicBlockList().push_back(NewBB);

    // Handle PHI nodes specially, as we have to remove references to dead
    // blocks.
    for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) {
      // PHI nodes may have been remapped to non-PHI nodes by the caller or
      // during the cloning process.
      if (const PHINode *PN = dyn_cast<PHINode>(I)) {
        if (isa<PHINode>(VMap[PN]))
          PHIToResolve.push_back(PN);
        else
          break;
      } else {
        break;
      }
    }

    // Finally, remap the terminator instructions, as those can't be remapped
    // until all BBs are mapped.
    RemapInstruction(NewBB->getTerminator(), VMap,
                     ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
                     TypeMapper, Materializer);
  }
  
  // Defer PHI resolution until rest of function is resolved, PHI resolution
  // requires the CFG to be up-to-date.
  for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
    const PHINode *OPN = PHIToResolve[phino];
    unsigned NumPreds = OPN->getNumIncomingValues();
    const BasicBlock *OldBB = OPN->getParent();
    BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);

    // Map operands for blocks that are live and remove operands for blocks
    // that are dead.
    for (; phino != PHIToResolve.size() &&
         PHIToResolve[phino]->getParent() == OldBB; ++phino) {
      OPN = PHIToResolve[phino];
      PHINode *PN = cast<PHINode>(VMap[OPN]);
      for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
        Value *V = VMap[PN->getIncomingBlock(pred)];
        if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
          Value *InVal = MapValue(PN->getIncomingValue(pred),
                                  VMap, 
                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
          assert(InVal && "Unknown input value?");
          PN->setIncomingValue(pred, InVal);
          PN->setIncomingBlock(pred, MappedBlock);
        } else {
          PN->removeIncomingValue(pred, false);
          --pred, --e;  // Revisit the next entry.
        }
      } 
    }
    
    // The loop above has removed PHI entries for those blocks that are dead
    // and has updated others.  However, if a block is live (i.e. copied over)
    // but its terminator has been changed to not go to this block, then our
    // phi nodes will have invalid entries.  Update the PHI nodes in this
    // case.
    PHINode *PN = cast<PHINode>(NewBB->begin());
    NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
    if (NumPreds != PN->getNumIncomingValues()) {
      assert(NumPreds < PN->getNumIncomingValues());
      // Count how many times each predecessor comes to this block.
      std::map<BasicBlock*, unsigned> PredCount;
      for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
           PI != E; ++PI)
        --PredCount[*PI];
      
      // Figure out how many entries to remove from each PHI.
      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
        ++PredCount[PN->getIncomingBlock(i)];
      
      // At this point, the excess predecessor entries are positive in the
      // map.  Loop over all of the PHIs and remove excess predecessor
      // entries.
      BasicBlock::iterator I = NewBB->begin();
      for (; (PN = dyn_cast<PHINode>(I)); ++I) {
        for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
             E = PredCount.end(); PCI != E; ++PCI) {
          BasicBlock *Pred     = PCI->first;
          for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
            PN->removeIncomingValue(Pred, false);
        }
      }
    }
    
    // If the loops above have made these phi nodes have 0 or 1 operand,
    // replace them with undef or the input value.  We must do this for
    // correctness, because 0-operand phis are not valid.
    PN = cast<PHINode>(NewBB->begin());
    if (PN->getNumIncomingValues() == 0) {
      BasicBlock::iterator I = NewBB->begin();
      BasicBlock::const_iterator OldI = OldBB->begin();
      while ((PN = dyn_cast<PHINode>(I++))) {
        Value *NV = UndefValue::get(PN->getType());
        PN->replaceAllUsesWith(NV);
        assert(VMap[&*OldI] == PN && "VMap mismatch");
        VMap[&*OldI] = NV;
        PN->eraseFromParent();
        ++OldI;
      }
    }
  }

  // Make a second pass over the PHINodes now that all of them have been
  // remapped into the new function, simplifying the PHINode and performing any
  // recursive simplifications exposed. This will transparently update the
  // WeakVH in the VMap. Notably, we rely on that so that if we coalesce
  // two PHINodes, the iteration over the old PHIs remains valid, and the
  // mapping will just map us to the new node (which may not even be a PHI
  // node).
  for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
    if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
      recursivelySimplifyInstruction(PN);

  // Now that the inlined function body has been fully constructed, go through
  // and zap unconditional fall-through branches. This happens all the time when
  // specializing code: code specialization turns conditional branches into
  // uncond branches, and this code folds them.
  Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
  Function::iterator I = Begin;
  while (I != NewFunc->end()) {
    // Check if this block has become dead during inlining or other
    // simplifications. Note that the first block will appear dead, as it has
    // not yet been wired up properly.
    if (I != Begin && (pred_begin(&*I) == pred_end(&*I) ||
                       I->getSinglePredecessor() == &*I)) {
      BasicBlock *DeadBB = &*I++;
      DeleteDeadBlock(DeadBB);
      continue;
    }

    // We need to simplify conditional branches and switches with a constant
    // operand. We try to prune these out when cloning, but if the
    // simplification required looking through PHI nodes, those are only
    // available after forming the full basic block. That may leave some here,
    // and we still want to prune the dead code as early as possible.
    ConstantFoldTerminator(&*I);

    BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
    if (!BI || BI->isConditional()) { ++I; continue; }
    
    BasicBlock *Dest = BI->getSuccessor(0);
    if (!Dest->getSinglePredecessor()) {
      ++I; continue;
    }

    // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
    // above should have zapped all of them..
    assert(!isa<PHINode>(Dest->begin()));

    // We know all single-entry PHI nodes in the inlined function have been
    // removed, so we just need to splice the blocks.
    BI->eraseFromParent();
    
    // Make all PHI nodes that referred to Dest now refer to I as their source.
    Dest->replaceAllUsesWith(&*I);

    // Move all the instructions in the succ to the pred.
    I->getInstList().splice(I->end(), Dest->getInstList());
    
    // Remove the dest block.
    Dest->eraseFromParent();
    
    // Do not increment I, iteratively merge all things this block branches to.
  }

  // Make a final pass over the basic blocks from the old function to gather
  // any return instructions which survived folding. We have to do this here
  // because we can iteratively remove and merge returns above.
  for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(),
                          E = NewFunc->end();
       I != E; ++I)
    if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
      Returns.push_back(RI);
}
Esempio n. 8
0
/// If \param [in] BB has more than one predecessor that is a conditional
/// branch, attempt to use parallel and/or for the branch condition. \returns
/// true on success.
///
/// Before:
///   ......
///   %cmp10 = fcmp une float %tmp1, %tmp2
///   br i1 %cmp1, label %if.then, label %lor.rhs
///
/// lor.rhs:
///   ......
///   %cmp11 = fcmp une float %tmp3, %tmp4
///   br i1 %cmp11, label %if.then, label %ifend
///
/// if.end:  // the merge block
///   ......
///
/// if.then: // has two predecessors, both of them contains conditional branch.
///   ......
///   br label %if.end;
///
/// After:
///  ......
///  %cmp10 = fcmp une float %tmp1, %tmp2
///  ......
///  %cmp11 = fcmp une float %tmp3, %tmp4
///  %cmp12 = or i1 %cmp10, %cmp11    // parallel-or mode.
///  br i1 %cmp12, label %if.then, label %ifend
///
///  if.end:
///    ......
///
///  if.then:
///    ......
///    br label %if.end;
///
///  Current implementation handles two cases.
///  Case 1: \param BB is on the else-path.
///
///          BB1
///        /     |
///       BB2    |
///      /   \   |
///     BB3   \  |     where, BB1, BB2 contain conditional branches.
///      \    |  /     BB3 contains unconditional branch.
///       \   | /      BB4 corresponds to \param BB which is also the merge.
///  BB => BB4
///
///
///  Corresponding source code:
///
///  if (a == b && c == d)
///    statement; // BB3
///
///  Case 2: \param BB BB is on the then-path.
///
///             BB1
///          /      |
///         |      BB2
///         \    /    |  where BB1, BB2 contain conditional branches.
///  BB =>   BB3      |  BB3 contains unconditiona branch and corresponds
///           \     /    to \param BB.  BB4 is the merge.
///             BB4
///
///  Corresponding source code:
///
///  if (a == b || c == d)
///    statement;  // BB3
///
///  In both cases,  \param BB is the common successor of conditional branches.
///  In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
///  its predecessor.  In Case 2, \param BB (BB3) only has conditional branches
///  as its predecessors.
///
bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
                                         Pass *P) {
  PHINode *PHI = dyn_cast<PHINode>(BB->begin());
  if (PHI)
    return false; // For simplicity, avoid cases containing PHI nodes.

  BasicBlock *LastCondBlock = NULL;
  BasicBlock *FirstCondBlock = NULL;
  BasicBlock *UnCondBlock = NULL;
  int Idx = -1;

  // Check predecessors of \param BB.
  SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
  for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
       PI != PE; ++PI) {
    BasicBlock *Pred = *PI;
    BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());

    // All predecessors should terminate with a branch.
    if (!PBI)
      return false;

    BasicBlock *PP = Pred->getSinglePredecessor();

    if (PBI->isUnconditional()) {
      // Case 1: Pred (BB3) is an unconditional block, it should
      // have a single predecessor (BB2) that is also a predecessor
      // of \param BB (BB4) and should not have address-taken.
      // There should exist only one such unconditional
      // branch among the predecessors.
      if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
          Pred->hasAddressTaken())
        return false;

      UnCondBlock = Pred;
      continue;
    }

    // Only conditional branches are allowed beyond this point.
    assert(PBI->isConditional());

    // Condition's unique use should be the branch instruction.
    Value *PC = PBI->getCondition();
    if (!PC || !PC->hasOneUse())
      return false;

    if (PP && Preds.count(PP)) {
      // These are internal condition blocks to be merged from, e.g.,
      // BB2 in both cases.
      // Should not be address-taken.
      if (Pred->hasAddressTaken())
        return false;

      // Instructions in the internal condition blocks should be safe
      // to hoist up.
      for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
        Instruction *CI = BI++;
        if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
          return false;
      }
    } else {
      // This is the condition block to be merged into, e.g. BB1 in
      // both cases.
      if (FirstCondBlock)
        return false;
      FirstCondBlock = Pred;
    }

    // Find whether BB is uniformly on the true (or false) path
    // for all of its predecessors.
    BasicBlock *PS1 = PBI->getSuccessor(0);
    BasicBlock *PS2 = PBI->getSuccessor(1);
    BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
    int CIdx = (PS1 == BB) ? 0 : 1;

    if (Idx == -1)
      Idx = CIdx;
    else if (CIdx != Idx)
      return false;

    // PS is the successor which is not BB. Check successors to identify
    // the last conditional branch.
    if (Preds.count(PS) == 0) {
      // Case 2.
      LastCondBlock = Pred;
    } else {
      // Case 1
      BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
      if (BPS && BPS->isUnconditional()) {
        // Case 1: PS(BB3) should be an unconditional branch.
        LastCondBlock = Pred;
      }
    }
  }

  if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
    return false;

  TerminatorInst *TBB = LastCondBlock->getTerminator();
  BasicBlock *PS1 = TBB->getSuccessor(0);
  BasicBlock *PS2 = TBB->getSuccessor(1);
  BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
  BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());

  // If PS1 does not jump into PS2, but PS2 jumps into PS1,
  // attempt branch inversion.
  if (!PBI1 || !PBI1->isUnconditional() ||
      (PS1->getTerminator()->getSuccessor(0) != PS2)) {
    // Check whether PS2 jumps into PS1.
    if (!PBI2 || !PBI2->isUnconditional() ||
        (PS2->getTerminator()->getSuccessor(0) != PS1))
      return false;

    // Do branch inversion.
    BasicBlock *CurrBlock = LastCondBlock;
    bool EverChanged = false;
    while (1) {
      BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
      CmpInst::Predicate Predicate = CI->getPredicate();
      // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
      if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
        CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
        BI->swapSuccessors();
        EverChanged = true;
      }
      if (CurrBlock == FirstCondBlock)
        break;
      CurrBlock = CurrBlock->getSinglePredecessor();
    }
    return EverChanged;
  }

  // PS1 must have a conditional branch.
  if (!PBI1 || !PBI1->isUnconditional())
    return false;

  // PS2 should not contain PHI node.
  PHI = dyn_cast<PHINode>(PS2->begin());
  if (PHI)
    return false;

  // Do the transformation.
  BasicBlock *CB;
  BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
  bool Iteration = true;
  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
  Value *PC = PBI->getCondition();

  do {
    CB = PBI->getSuccessor(1 - Idx);
    // Delete the conditional branch.
    FirstCondBlock->getInstList().pop_back();
    FirstCondBlock->getInstList()
        .splice(FirstCondBlock->end(), CB->getInstList());
    PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
    Value *CC = PBI->getCondition();
    // Merge conditions.
    Builder.SetInsertPoint(PBI);
    Value *NC;
    if (Idx == 0)
      // Case 2, use parallel or.
      NC = Builder.CreateOr(PC, CC);
    else
      // Case 1, use parallel and.
      NC = Builder.CreateAnd(PC, CC);

    PBI->replaceUsesOfWith(CC, NC);
    PC = NC;
    if (CB == LastCondBlock)
      Iteration = false;
    // Remove internal conditional branches.
    CB->dropAllReferences();
    // make CB unreachable and let downstream to delete the block.
    new UnreachableInst(CB->getContext(), CB);
  } while (Iteration);

  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
  DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
  return true;
}
Esempio n. 9
0
/// Check whether \param BB is the merge block of a if-region.  If yes, check
/// whether there exists an adjacent if-region upstream, the two if-regions
/// contain identical instuctions and can be legally merged.  \returns true if
/// the two if-regions are merged.
///
/// From:
/// if (a)
///   statement;
/// if (b)
///   statement;
///
/// To:
/// if (a || b)
///   statement;
///
bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
                                  Pass *P) {
  BasicBlock *IfTrue2, *IfFalse2;
  Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
  Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
  if (!CInst2)
    return false;

  BasicBlock *SecondEntryBlock = CInst2->getParent();
  if (SecondEntryBlock->hasAddressTaken())
    return false;

  BasicBlock *IfTrue1, *IfFalse1;
  Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
  Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1);
  if (!CInst1)
    return false;

  BasicBlock *FirstEntryBlock = CInst1->getParent();

  // Either then-path or else-path should be empty.
  if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
    return false;
  if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
    return false;

  TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
  Instruction *PBI2 = SecondEntryBlock->begin();

  if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
                            IfTrue2))
    return false;

  if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1,
                            IfFalse2))
    return false;

  // Check whether \param SecondEntryBlock has side-effect and is safe to
  // speculate.
  for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
    Instruction *CI = BI;
    if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
        !isSafeToSpeculativelyExecute(CI))
      return false;
  }

  // Merge \param SecondEntryBlock into \param FirstEntryBlock.
  FirstEntryBlock->getInstList().pop_back();
  FirstEntryBlock->getInstList()
      .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
  BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
  Value *CC = PBI->getCondition();
  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
  Builder.SetInsertPoint(PBI);
  Value *NC = Builder.CreateOr(CInst1, CC);
  PBI->replaceUsesOfWith(CC, NC);
  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);

  // Remove IfTrue1
  if (IfTrue1 != FirstEntryBlock) {
    IfTrue1->dropAllReferences();
    IfTrue1->eraseFromParent();
  }

  // Remove IfFalse1
  if (IfFalse1 != FirstEntryBlock) {
    IfFalse1->dropAllReferences();
    IfFalse1->eraseFromParent();
  }

  // Remove \param SecondEntryBlock
  SecondEntryBlock->dropAllReferences();
  SecondEntryBlock->eraseFromParent();
  DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
  return true;
}
void TripCountGenerator::generateVectorEstimatedTripCounts(Function &F){

	LoopInfoEx& li = getAnalysis<LoopInfoEx>();
	LoopNormalizerAnalysis& ln = getAnalysis<LoopNormalizerAnalysis>();

	for(LoopInfoEx::iterator lit = li.begin(); lit != li.end(); lit++){

		//Indicates if we don't have ways to determine the trip count
		bool unknownTC = false;

		Loop* loop = *lit;

		BasicBlock* header = loop->getHeader();
		BasicBlock* entryBlock = ln.entryBlocks[header];

		LoopControllersDepGraph& lcd = getAnalysis<LoopControllersDepGraph>();
		lcd.setPerspective(header);

		/*
		 * Here we are looking for the predicate that stops the loop.
		 *
		 * At this moment, we are only considering loops that are controlled by
		 * integer comparisons.
		 */
		BasicBlock* exitBlock = findLoopControllerBlock(loop);
		assert(exitBlock && "Exiting Block not found!");

		TerminatorInst* T = exitBlock->getTerminator();
		BranchInst* BI = dyn_cast<BranchInst>(T);
		ICmpInst* CI = BI ? dyn_cast<ICmpInst>(BI->getCondition()) : NULL;

		Value* Op1 = NULL;
		Value* Op2 = NULL;

		if (!CI) unknownTC = true;
		else {

			int LoopClass;
			if (isIntervalComparison(CI)) {
				LoopClass = 0;
				NumIntervalLoops++;
			} else {
				LoopClass = 1;
				NumEqualityLoops++;
			}

			Op1 = getValueAtEntryPoint(CI->getOperand(0), header);
			Op2 = getValueAtEntryPoint(CI->getOperand(1), header);


			if((!Op1) || (!Op2) ) {

				if (!LoopClass) NumUnknownConditionsIL++;
				else 			NumUnknownConditionsEL++;

				unknownTC = true;
			} else {


				if (!(Op1->getType()->isIntegerTy() && Op2->getType()->isIntegerTy())) {
					//We only handle loop conditions that compares integer variables
					NumIncompatibleOperandTypes++;
					unknownTC = true;
				}

			}

		}

		ProgressVector* V1 = NULL;
		ProgressVector* V2 = NULL;


		if (!unknownTC) {
			V1 = generateConstantProgressVector(CI->getOperand(0), header);
			V2 = generateConstantProgressVector(CI->getOperand(1), header);

			if ((!V1) || (!V2)) {

				//TODO: Increment a statistic here
				unknownTC = true;
			}

		}

		if(!unknownTC) {
			generateVectorEstimatedTripCount(header, entryBlock, Op1, Op2, V1, V2, CI);
			NumVectorEstimatedTCs++;
		}


	}

}
Esempio n. 11
0
std::unique_ptr<FunctionOutliningInfo>
PartialInlinerImpl::computeOutliningInfo(Function *F) {
  BasicBlock *EntryBlock = &F->front();
  BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
  if (!BR || BR->isUnconditional())
    return std::unique_ptr<FunctionOutliningInfo>();

  // Returns true if Succ is BB's successor
  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
    return is_contained(successors(BB), Succ);
  };

  auto SuccSize = [](BasicBlock *BB) {
    return std::distance(succ_begin(BB), succ_end(BB));
  };

  auto IsReturnBlock = [](BasicBlock *BB) {
    TerminatorInst *TI = BB->getTerminator();
    return isa<ReturnInst>(TI);
  };

  auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
    if (IsReturnBlock(Succ1))
      return std::make_tuple(Succ1, Succ2);
    if (IsReturnBlock(Succ2))
      return std::make_tuple(Succ2, Succ1);

    return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
  };

  // Detect a triangular shape:
  auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
    if (IsSuccessor(Succ1, Succ2))
      return std::make_tuple(Succ1, Succ2);
    if (IsSuccessor(Succ2, Succ1))
      return std::make_tuple(Succ2, Succ1);

    return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
  };

  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
      llvm::make_unique<FunctionOutliningInfo>();

  BasicBlock *CurrEntry = EntryBlock;
  bool CandidateFound = false;
  do {
    // The number of blocks to be inlined has already reached
    // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
    // disables partial inlining for the function.
    if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
      break;

    if (SuccSize(CurrEntry) != 2)
      break;

    BasicBlock *Succ1 = *succ_begin(CurrEntry);
    BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);

    BasicBlock *ReturnBlock, *NonReturnBlock;
    std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);

    if (ReturnBlock) {
      OutliningInfo->Entries.push_back(CurrEntry);
      OutliningInfo->ReturnBlock = ReturnBlock;
      OutliningInfo->NonReturnBlock = NonReturnBlock;
      CandidateFound = true;
      break;
    }

    BasicBlock *CommSucc;
    BasicBlock *OtherSucc;
    std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);

    if (!CommSucc)
      break;

    OutliningInfo->Entries.push_back(CurrEntry);
    CurrEntry = OtherSucc;

  } while (true);

  if (!CandidateFound)
    return std::unique_ptr<FunctionOutliningInfo>();

  // Do sanity check of the entries: threre should not
  // be any successors (not in the entry set) other than
  // {ReturnBlock, NonReturnBlock}
  assert(OutliningInfo->Entries[0] == &F->front() &&
         "Function Entry must be the first in Entries vector");
  DenseSet<BasicBlock *> Entries;
  for (BasicBlock *E : OutliningInfo->Entries)
    Entries.insert(E);

  // Returns true of BB has Predecessor which is not
  // in Entries set.
  auto HasNonEntryPred = [Entries](BasicBlock *BB) {
    for (auto Pred : predecessors(BB)) {
      if (!Entries.count(Pred))
        return true;
    }
    return false;
  };
  auto CheckAndNormalizeCandidate =
      [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
        for (BasicBlock *E : OutliningInfo->Entries) {
          for (auto Succ : successors(E)) {
            if (Entries.count(Succ))
              continue;
            if (Succ == OutliningInfo->ReturnBlock)
              OutliningInfo->ReturnBlockPreds.push_back(E);
            else if (Succ != OutliningInfo->NonReturnBlock)
              return false;
          }
          // There should not be any outside incoming edges either:
          if (HasNonEntryPred(E))
            return false;
        }
        return true;
      };

  if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
    return std::unique_ptr<FunctionOutliningInfo>();

  // Now further growing the candidate's inlining region by
  // peeling off dominating blocks from the outlining region:
  while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
    BasicBlock *Cand = OutliningInfo->NonReturnBlock;
    if (SuccSize(Cand) != 2)
      break;

    if (HasNonEntryPred(Cand))
      break;

    BasicBlock *Succ1 = *succ_begin(Cand);
    BasicBlock *Succ2 = *(succ_begin(Cand) + 1);

    BasicBlock *ReturnBlock, *NonReturnBlock;
    std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
    if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
      break;

    if (NonReturnBlock->getSinglePredecessor() != Cand)
      break;

    // Now grow and update OutlininigInfo:
    OutliningInfo->Entries.push_back(Cand);
    OutliningInfo->NonReturnBlock = NonReturnBlock;
    OutliningInfo->ReturnBlockPreds.push_back(Cand);
    Entries.insert(Cand);
  }

  return OutliningInfo;
}
Esempio n. 12
0
/// Rotate loop LP. Return true if the loop is rotated.
bool LoopRotate::rotateLoop(Loop *L) {
  // If the loop has only one block then there is not much to rotate.
  if (L->getBlocks().size() == 1)
    return false;
  
  BasicBlock *OrigHeader = L->getHeader();
  
  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
  if (BI == 0 || BI->isUnconditional())
    return false;
  
  // If the loop header is not one of the loop exiting blocks then
  // either this loop is already rotated or it is not
  // suitable for loop rotation transformations.
  if (!L->isLoopExiting(OrigHeader))
    return false;

  // Updating PHInodes in loops with multiple exits adds complexity. 
  // Keep it simple, and restrict loop rotation to loops with one exit only.
  // In future, lift this restriction and support for multiple exits if
  // required.
  SmallVector<BasicBlock*, 8> ExitBlocks;
  L->getExitBlocks(ExitBlocks);
  if (ExitBlocks.size() > 1)
    return false;

  // Check size of original header and reject loop if it is very big.
  {
    CodeMetrics Metrics;
    Metrics.analyzeBasicBlock(OrigHeader);
    if (Metrics.NumInsts > MAX_HEADER_SIZE)
      return false;
  }

  // Now, this loop is suitable for rotation.
  BasicBlock *OrigPreheader = L->getLoopPreheader();
  BasicBlock *OrigLatch = L->getLoopLatch();
  
  // If the loop could not be converted to canonical form, it must have an
  // indirectbr in it, just give up.
  if (OrigPreheader == 0 || OrigLatch == 0)
    return false;

  // Anything ScalarEvolution may know about this loop or the PHI nodes
  // in its header will soon be invalidated.
  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
    SE->forgetLoop(L);

  // Find new Loop header. NewHeader is a Header's one and only successor
  // that is inside loop.  Header's other successor is outside the
  // loop.  Otherwise loop is not suitable for rotation.
  BasicBlock *Exit = BI->getSuccessor(0);
  BasicBlock *NewHeader = BI->getSuccessor(1);
  if (L->contains(Exit))
    std::swap(Exit, NewHeader);
  assert(NewHeader && "Unable to determine new loop header");
  assert(L->contains(NewHeader) && !L->contains(Exit) && 
         "Unable to determine loop header and exit blocks");
  
  // This code assumes that the new header has exactly one predecessor.
  // Remove any single-entry PHI nodes in it.
  assert(NewHeader->getSinglePredecessor() &&
         "New header doesn't have one pred!");
  FoldSingleEntryPHINodes(NewHeader);

  // Begin by walking OrigHeader and populating ValueMap with an entry for
  // each Instruction.
  BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
  ValueToValueMapTy ValueMap;

  // For PHI nodes, the value available in OldPreHeader is just the
  // incoming value from OldPreHeader.
  for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
    ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader));

  // For the rest of the instructions, either hoist to the OrigPreheader if
  // possible or create a clone in the OldPreHeader if not.
  TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
  while (I != E) {
    Instruction *Inst = I++;
    
    // If the instruction's operands are invariant and it doesn't read or write
    // memory, then it is safe to hoist.  Doing this doesn't change the order of
    // execution in the preheader, but does prevent the instruction from
    // executing in each iteration of the loop.  This means it is safe to hoist
    // something that might trap, but isn't safe to hoist something that reads
    // memory (without proving that the loop doesn't write).
    if (L->hasLoopInvariantOperands(Inst) &&
        !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
        !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) {
      Inst->moveBefore(LoopEntryBranch);
      continue;
    }
    
    // Otherwise, create a duplicate of the instruction.
    Instruction *C = Inst->clone();
    
    // Eagerly remap the operands of the instruction.
    RemapInstruction(C, ValueMap,
                     RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
    
    // With the operands remapped, see if the instruction constant folds or is
    // otherwise simplifyable.  This commonly occurs because the entry from PHI
    // nodes allows icmps and other instructions to fold.
    Value *V = SimplifyInstruction(C);
    if (V && LI->replacementPreservesLCSSAForm(C, V)) {
      // If so, then delete the temporary instruction and stick the folded value
      // in the map.
      delete C;
      ValueMap[Inst] = V;
    } else {
      // Otherwise, stick the new instruction into the new block!
      C->setName(Inst->getName());
      C->insertBefore(LoopEntryBranch);
      ValueMap[Inst] = C;
    }
  }

  // Along with all the other instructions, we just cloned OrigHeader's
  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
  // successors by duplicating their incoming values for OrigHeader.
  TerminatorInst *TI = OrigHeader->getTerminator();
  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
    for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
         PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);

  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
  // OrigPreHeader's old terminator (the original branch into the loop), and
  // remove the corresponding incoming values from the PHI nodes in OrigHeader.
  LoopEntryBranch->eraseFromParent();

  // If there were any uses of instructions in the duplicated block outside the
  // loop, update them, inserting PHI nodes as required
  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);

  // NewHeader is now the header of the loop.
  L->moveToHeader(NewHeader);
  assert(L->getHeader() == NewHeader && "Latch block is our new header");

  
  // At this point, we've finished our major CFG changes.  As part of cloning
  // the loop into the preheader we've simplified instructions and the
  // duplicated conditional branch may now be branching on a constant.  If it is
  // branching on a constant and if that constant means that we enter the loop,
  // then we fold away the cond branch to an uncond branch.  This simplifies the
  // loop in cases important for nested loops, and it also means we don't have
  // to split as many edges.
  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
  assert(PHBI->isConditional() && "Should be clone of BI condbr!");
  if (!isa<ConstantInt>(PHBI->getCondition()) ||
      PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
          != NewHeader) {
    // The conditional branch can't be folded, handle the general case.
    // Update DominatorTree to reflect the CFG change we just made.  Then split
    // edges as necessary to preserve LoopSimplify form.
    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
      // Since OrigPreheader now has the conditional branch to Exit block, it is
      // the dominator of Exit.
      DT->changeImmediateDominator(Exit, OrigPreheader);
      DT->changeImmediateDominator(NewHeader, OrigPreheader);
      
      // Update OrigHeader to be dominated by the new header block.
      DT->changeImmediateDominator(OrigHeader, OrigLatch);
    }
    
    // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
    // thus is not a preheader anymore.  Split the edge to form a real preheader.
    BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
    NewPH->setName(NewHeader->getName() + ".lr.ph");
    
    // Preserve canonical loop form, which means that 'Exit' should have only one
    // predecessor.
    BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
    ExitSplit->moveBefore(Exit);
  } else {
    // We can fold the conditional branch in the preheader, this makes things
    // simpler. The first step is to remove the extra edge to the Exit block.
    Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
    BranchInst::Create(NewHeader, PHBI);
    PHBI->eraseFromParent();
    
    // With our CFG finalized, update DomTree if it is available.
    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
      // Update OrigHeader to be dominated by the new header block.
      DT->changeImmediateDominator(NewHeader, OrigPreheader);
      DT->changeImmediateDominator(OrigHeader, OrigLatch);
    }
  }
  
  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");

  // Now that the CFG and DomTree are in a consistent state again, try to merge
  // the OrigHeader block into OrigLatch.  This will succeed if they are
  // connected by an unconditional branch.  This is just a cleanup so the
  // emitted code isn't too gross in this common case.
  MergeBlockIntoPredecessor(OrigHeader, this);
  
  ++NumRotated;
  return true;
}
Esempio n. 13
0
bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
  BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
  if (!BI || !BI->isConditional())
    return false;

  Value *Cond = BI->getCondition();
  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
  if (!CI)
    return false;

  Value *RHS = CI->getOperand(1);
  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
  if (!CV)
    return false;

  bool isProb;
  if (CV->isZero()) {
    switch (CI->getPredicate()) {
    case CmpInst::ICMP_EQ:
      // X == 0   ->  Unlikely
      isProb = false;
      break;
    case CmpInst::ICMP_NE:
      // X != 0   ->  Likely
      isProb = true;
      break;
    case CmpInst::ICMP_SLT:
      // X < 0   ->  Unlikely
      isProb = false;
      break;
    case CmpInst::ICMP_SGT:
      // X > 0   ->  Likely
      isProb = true;
      break;
    default:
      return false;
    }
  } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) {
    // InstCombine canonicalizes X <= 0 into X < 1.
    // X <= 0   ->  Unlikely
    isProb = false;
  } else if (CV->isAllOnesValue()) {
    switch (CI->getPredicate()) {
    case CmpInst::ICMP_EQ:
      // X == -1  ->  Unlikely
      isProb = false;
      break;
    case CmpInst::ICMP_NE:
      // X != -1  ->  Likely
      isProb = true;
      break;
    case CmpInst::ICMP_SGT:
      // InstCombine canonicalizes X >= 0 into X > -1.
      // X >= 0   ->  Likely
      isProb = true;
      break;
    default:
      return false;
    }
  } else {
    return false;
  }

  unsigned TakenIdx = 0, NonTakenIdx = 1;

  if (!isProb)
    std::swap(TakenIdx, NonTakenIdx);

  setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT);
  setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT);

  return true;
}
bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
    BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
    if (!BI || !BI->isConditional())
        return false;

    Value *Cond = BI->getCondition();
    ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
    if (!CI)
        return false;

    Value *RHS = CI->getOperand(1);
    ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
    if (!CV)
        return false;

    // If the LHS is the result of AND'ing a value with a single bit bitmask,
    // we don't have information about probabilities.
    if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
        if (LHS->getOpcode() == Instruction::And)
            if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
                if (AndRHS->getUniqueInteger().isPowerOf2())
                    return false;

    bool isProb;
    if (CV->isZero()) {
        switch (CI->getPredicate()) {
        case CmpInst::ICMP_EQ:
            // X == 0   ->  Unlikely
            isProb = false;
            break;
        case CmpInst::ICMP_NE:
            // X != 0   ->  Likely
            isProb = true;
            break;
        case CmpInst::ICMP_SLT:
            // X < 0   ->  Unlikely
            isProb = false;
            break;
        case CmpInst::ICMP_SGT:
            // X > 0   ->  Likely
            isProb = true;
            break;
        default:
            return false;
        }
    } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) {
        // InstCombine canonicalizes X <= 0 into X < 1.
        // X <= 0   ->  Unlikely
        isProb = false;
    } else if (CV->isAllOnesValue()) {
        switch (CI->getPredicate()) {
        case CmpInst::ICMP_EQ:
            // X == -1  ->  Unlikely
            isProb = false;
            break;
        case CmpInst::ICMP_NE:
            // X != -1  ->  Likely
            isProb = true;
            break;
        case CmpInst::ICMP_SGT:
            // InstCombine canonicalizes X >= 0 into X > -1.
            // X >= 0   ->  Likely
            isProb = true;
            break;
        default:
            return false;
        }
    } else {
        return false;
    }

    unsigned TakenIdx = 0, NonTakenIdx = 1;

    if (!isProb)
        std::swap(TakenIdx, NonTakenIdx);

    setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT);
    setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT);

    return true;
}
Esempio n. 15
0
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
/// instructions to the predecessor to enable tail call optimizations. The
/// case it is currently looking for is:
/// @code
/// bb0:
///   %tmp0 = tail call i32 @f0()
///   br label %return
/// bb1:
///   %tmp1 = tail call i32 @f1()
///   br label %return
/// bb2:
///   %tmp2 = tail call i32 @f2()
///   br label %return
/// return:
///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
///   ret i32 %retval
/// @endcode
///
/// =>
///
/// @code
/// bb0:
///   %tmp0 = tail call i32 @f0()
///   ret i32 %tmp0
/// bb1:
///   %tmp1 = tail call i32 @f1()
///   ret i32 %tmp1
/// bb2:
///   %tmp2 = tail call i32 @f2()
///   ret i32 %tmp2
/// @endcode
bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
  if (!TLI)
    return false;

  PHINode *PN = 0;
  BitCastInst *BCI = 0;
  Value *V = RI->getReturnValue();
  if (V) {
    BCI = dyn_cast<BitCastInst>(V);
    if (BCI)
      V = BCI->getOperand(0);

    PN = dyn_cast<PHINode>(V);
    if (!PN)
      return false;
  }

  BasicBlock *BB = RI->getParent();
  if (PN && PN->getParent() != BB)
    return false;

  // It's not safe to eliminate the sign / zero extension of the return value.
  // See llvm::isInTailCallPosition().
  const Function *F = BB->getParent();
  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
  if (CallerRetAttr.hasAttribute(Attributes::ZExt) ||
      CallerRetAttr.hasAttribute(Attributes::SExt))
    return false;

  // Make sure there are no instructions between the PHI and return, or that the
  // return is the first instruction in the block.
  if (PN) {
    BasicBlock::iterator BI = BB->begin();
    do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
    if (&*BI == BCI)
      // Also skip over the bitcast.
      ++BI;
    if (&*BI != RI)
      return false;
  } else {
    BasicBlock::iterator BI = BB->begin();
    while (isa<DbgInfoIntrinsic>(BI)) ++BI;
    if (&*BI != RI)
      return false;
  }

  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
  /// call.
  SmallVector<CallInst*, 4> TailCalls;
  if (PN) {
    for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
      CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
      // Make sure the phi value is indeed produced by the tail call.
      if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
          TLI->mayBeEmittedAsTailCall(CI))
        TailCalls.push_back(CI);
    }
  } else {
    SmallPtrSet<BasicBlock*, 4> VisitedBBs;
    for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
      if (!VisitedBBs.insert(*PI))
        continue;

      BasicBlock::InstListType &InstList = (*PI)->getInstList();
      BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
      BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
      do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
      if (RI == RE)
        continue;

      CallInst *CI = dyn_cast<CallInst>(&*RI);
      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
        TailCalls.push_back(CI);
    }
  }

  bool Changed = false;
  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
    CallInst *CI = TailCalls[i];
    CallSite CS(CI);

    // Conservatively require the attributes of the call to match those of the
    // return. Ignore noalias because it doesn't affect the call sequence.
    Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
    if (AttrBuilder(CalleeRetAttr).
          removeAttribute(Attributes::NoAlias) !=
        AttrBuilder(CallerRetAttr).
          removeAttribute(Attributes::NoAlias))
      continue;

    // Make sure the call instruction is followed by an unconditional branch to
    // the return block.
    BasicBlock *CallBB = CI->getParent();
    BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
    if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
      continue;

    // Duplicate the return into CallBB.
    (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
    ModifiedDT = Changed = true;
    ++NumRetsDup;
  }

  // If we eliminated all predecessors of the block, delete the block now.
  if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
    BB->eraseFromParent();

  return Changed;
}
Esempio n. 16
0
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
  bool MadeChange = false;

  Triple TT = Triple(L->getHeader()->getParent()->getParent()->
                     getTargetTriple());
  if (!TT.isArch32Bit() && !TT.isArch64Bit())
    return MadeChange; // Unknown arch. type.

  // Process nested loops first.
  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
    MadeChange |= convertToCTRLoop(*I);
  }

  // If a nested loop has been converted, then we can't convert this loop.
  if (MadeChange)
    return MadeChange;

#ifndef NDEBUG
  // Stop trying after reaching the limit (if any).
  int Limit = CTRLoopLimit;
  if (Limit >= 0) {
    if (Counter >= CTRLoopLimit)
      return false;
    Counter++;
  }
#endif

  // We don't want to spill/restore the counter register, and so we don't
  // want to use the counter register if the loop contains calls.
  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I)
    if (mightUseCTR(TT, *I))
      return MadeChange;

  SmallVector<BasicBlock*, 4> ExitingBlocks;
  L->getExitingBlocks(ExitingBlocks);

  BasicBlock *CountedExitBlock = 0;
  const SCEV *ExitCount = 0;
  BranchInst *CountedExitBranch = 0;
  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
       IE = ExitingBlocks.end(); I != IE; ++I) {
    const SCEV *EC = SE->getExitCount(L, *I);
    DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
                    (*I)->getName() << ": " << *EC << "\n");
    if (isa<SCEVCouldNotCompute>(EC))
      continue;
    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
      if (ConstEC->getValue()->isZero())
        continue;
    } else if (!SE->isLoopInvariant(EC, L))
      continue;

    if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
      continue;

    // We now have a loop-invariant count of loop iterations (which is not the
    // constant zero) for which we know that this loop will not exit via this
    // exisiting block.

    // We need to make sure that this block will run on every loop iteration.
    // For this to be true, we must dominate all blocks with backedges. Such
    // blocks are in-loop predecessors to the header block.
    bool NotAlways = false;
    for (pred_iterator PI = pred_begin(L->getHeader()),
         PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
      if (!L->contains(*PI))
        continue;

      if (!DT->dominates(*I, *PI)) {
        NotAlways = true;
        break;
      }
    }

    if (NotAlways)
      continue;

    // Make sure this blocks ends with a conditional branch.
    Instruction *TI = (*I)->getTerminator();
    if (!TI)
      continue;

    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
      if (!BI->isConditional())
        continue;

      CountedExitBranch = BI;
    } else
      continue;

    // Note that this block may not be the loop latch block, even if the loop
    // has a latch block.
    CountedExitBlock = *I;
    ExitCount = EC;
    break;
  }

  if (!CountedExitBlock)
    return MadeChange;

  BasicBlock *Preheader = L->getLoopPreheader();

  // If we don't have a preheader, then insert one. If we already have a
  // preheader, then we can use it (except if the preheader contains a use of
  // the CTR register because some such uses might be reordered by the
  // selection DAG after the mtctr instruction).
  if (!Preheader || mightUseCTR(TT, Preheader))
    Preheader = InsertPreheaderForLoop(L, this);
  if (!Preheader)
    return MadeChange;

  DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");

  // Insert the count into the preheader and replace the condition used by the
  // selected branch.
  MadeChange = true;

  SCEVExpander SCEVE(*SE, "loopcnt");
  LLVMContext &C = SE->getContext();
  Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
                                       Type::getInt32Ty(C);
  if (!ExitCount->getType()->isPointerTy() &&
      ExitCount->getType() != CountType)
    ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
  ExitCount = SE->getAddExpr(ExitCount,
                             SE->getConstant(CountType, 1)); 
  Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
                                       Preheader->getTerminator());

  IRBuilder<> CountBuilder(Preheader->getTerminator());
  Module *M = Preheader->getParent()->getParent();
  Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
                                               CountType);
  CountBuilder.CreateCall(MTCTRFunc, ECValue);

  IRBuilder<> CondBuilder(CountedExitBranch);
  Value *DecFunc =
    Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
  Value *NewCond = CondBuilder.CreateCall(DecFunc);
  Value *OldCond = CountedExitBranch->getCondition();
  CountedExitBranch->setCondition(NewCond);

  // The false branch must exit the loop.
  if (!L->contains(CountedExitBranch->getSuccessor(0)))
    CountedExitBranch->swapSuccessors();

  // The old condition may be dead now, and may have even created a dead PHI
  // (the original induction variable).
  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
  DeleteDeadPHIs(CountedExitBlock);

  ++NumCTRLoops;
  return MadeChange;
}
Esempio n. 17
0
//insert a basic block with appropriate code
//along a given edge
void insertBB(Edge ed,
	      getEdgeCode *edgeCode, 
	      Instruction *rInst, 
	      Value *countInst, 
	      int numPaths, int Methno, Value *threshold){

  BasicBlock* BB1=ed.getFirst()->getElement();
  BasicBlock* BB2=ed.getSecond()->getElement();
  
#ifdef DEBUG_PATH_PROFILES
  //debugging info
  cerr<<"Edges with codes ######################\n";
  cerr<<BB1->getName()<<"->"<<BB2->getName()<<"\n";
  cerr<<"########################\n";
#endif
  
  //We need to insert a BB between BB1 and BB2 
  TerminatorInst *TI=BB1->getTerminator();
  BasicBlock *newBB=new BasicBlock("counter", BB1->getParent());

  //get code for the new BB
  vector<Value *> retVec;

  edgeCode->getCode(rInst, countInst, BB1->getParent(), newBB, retVec);

  BranchInst *BI =  cast<BranchInst>(TI);

  //Is terminator a branch instruction?
  //then we need to change branch destinations to include new BB

  if(BI->isUnconditional()){
    BI->setUnconditionalDest(newBB);
  }
  else{
      if(BI->getSuccessor(0)==BB2)
      BI->setSuccessor(0, newBB);
    
    if(BI->getSuccessor(1)==BB2)
      BI->setSuccessor(1, newBB);
  }

  BasicBlock *triggerBB = NULL;
  if(retVec.size()>0){
    triggerBB = new BasicBlock("trigger", BB1->getParent());
    getTriggerCode(BB1->getParent()->getParent(), triggerBB, Methno, 
                   retVec[1], countInst, rInst);//retVec[0]);

    //Instruction *castInst = new CastInst(retVec[0], Type::IntTy, "");
    Instruction *etr = new LoadInst(threshold, "threshold");
    
    //std::cerr<<"type1: "<<etr->getType()<<" type2: "<<retVec[0]->getType()<<"\n"; 
    Instruction *cmpInst = new SetCondInst(Instruction::SetLE, etr, 
                                           retVec[0], "");
    Instruction *newBI2 = new BranchInst(triggerBB, BB2, cmpInst);
    //newBB->getInstList().push_back(castInst);
    newBB->getInstList().push_back(etr);
    newBB->getInstList().push_back(cmpInst);
    newBB->getInstList().push_back(newBI2);
    
    //triggerBB->getInstList().push_back(triggerInst);
    new BranchInst(BB2, 0, 0, triggerBB);
  }
  else{
    new BranchInst(BB2, 0, 0, newBB);
  }

  //now iterate over BB2, and set its Phi nodes right
  for(BasicBlock::iterator BB2Inst = BB2->begin(), BBend = BB2->end(); 
      BB2Inst != BBend; ++BB2Inst){
   
    if(PHINode *phiInst=dyn_cast<PHINode>(BB2Inst)){
      int bbIndex=phiInst->getBasicBlockIndex(BB1);
      assert(bbIndex>=0);
      phiInst->setIncomingBlock(bbIndex, newBB);

      ///check if trigger!=null, then add value corresponding to it too!
      if(retVec.size()>0){
        assert(triggerBB && "BasicBlock with trigger should not be null!");
        Value *vl = phiInst->getIncomingValue((unsigned int)bbIndex);
        phiInst->addIncoming(vl, triggerBB);
      }
    }
  }
}
Esempio n. 18
0
void DSWP::buildPDG(Loop *L) {
    //Initialize PDG
	for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) {
		BasicBlock *BB = *bi;
		for (BasicBlock::iterator ui = BB->begin(); ui != BB->end(); ui++) {
			Instruction *inst = &(*ui);

			//standardlize the name for all expr
			if (util.hasNewDef(inst)) {
				inst->setName(util.genId());
				dname[inst] = inst->getNameStr();
			} else {
				dname[inst] = util.genId();
			}

			pdg[inst] = new vector<Edge>();
			rev[inst] = new vector<Edge>();
		}
	}

	//LoopInfo &li = getAnalysis<LoopInfo>();

	/*
	 * Memory dependency analysis
	 */
	MemoryDependenceAnalysis &mda = getAnalysis<MemoryDependenceAnalysis>();

	for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) {
		BasicBlock *BB = *bi;
		for (BasicBlock::iterator ii = BB->begin(); ii != BB->end(); ii++) {
			Instruction *inst = &(*ii);

			//data dependence = register dependence + memory dependence

			//begin register dependence
			for (Value::use_iterator ui = ii->use_begin(); ui != ii->use_end(); ui++) {
				if (Instruction *user = dyn_cast<Instruction>(*ui)) {
					addEdge(inst, user, REG);
				}
			}
			//finish register dependence

			//begin memory dependence
			MemDepResult mdr = mda.getDependency(inst);
			//TODO not sure clobbers mean!!

			if (mdr.isDef()) {
				Instruction *dep = mdr.getInst();

				if (isa<LoadInst>(inst)) {
					if (isa<StoreInst>(dep)) {
						addEdge(dep, inst, DTRUE);	//READ AFTER WRITE
					}
				}
				if (isa<StoreInst>(inst)) {
					if (isa<LoadInst>(dep)) {
						addEdge(dep, inst, DANTI);	//WRITE AFTER READ
					}
					if (isa<StoreInst>(dep)) {
						addEdge(dep, inst, DOUT);	//WRITE AFTER WRITE
					}
				}
				//READ AFTER READ IS INSERT AFTER PDG BUILD
			}
			//end memory dependence
		}//for ii
	}//for bi

	/*
	 * begin control dependence
	 */
	PostDominatorTree &pdt = getAnalysis<PostDominatorTree>();
	//cout << pdt.getRootNode()->getBlock()->getNameStr() << endl;

	/*
	 * alien code part 1
	 */
	LoopInfo *LI = &getAnalysis<LoopInfo>();
	std::set<BranchInst*> backedgeParents;
	for (Loop::block_iterator bi = L->getBlocks().begin(); bi
			!= L->getBlocks().end(); bi++) {
		BasicBlock *BB = *bi;
		for (BasicBlock::iterator ii = BB->begin(); ii != BB->end(); ii++) {
			Instruction *inst = ii;
			if (BranchInst *brInst = dyn_cast<BranchInst>(inst)) {
				// get the loop this instruction (and therefore basic block) belongs to
				Loop *instLoop = LI->getLoopFor(BB);
				bool branchesToHeader = false;
				for (int i = brInst->getNumSuccessors() - 1; i >= 0
						&& !branchesToHeader; i--) {
					// if the branch could exit, store it
					if (LI->getLoopFor(brInst->getSuccessor(i)) != instLoop) {
						branchesToHeader = true;
					}
				}
				if (branchesToHeader) {
					backedgeParents.insert(brInst);
				}
			}
		}
	}

	//build information for predecessor of blocks in post dominator tree
	for (Function::iterator bi = func->begin(); bi != func->end(); bi++) {
		BasicBlock *BB = bi;
		DomTreeNode *dn = pdt.getNode(BB);

		for (DomTreeNode::iterator di = dn->begin(); di != dn->end(); di++) {
			BasicBlock *CB = (*di)->getBlock();
			pre[CB] = BB;
		}
	}
//
//	//add dependency within a basicblock
//	for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) {
//		BasicBlock *BB = *bi;
//		Instruction *pre = NULL;
//		for (BasicBlock::iterator ui = BB->begin(); ui != BB->end(); ui++) {
//			Instruction *inst = &(*ui);
//			if (pre != NULL) {
//				addEdge(pre, inst, CONTROL);
//			}
//			pre = inst;
//		}
//	}

//	//the special kind of dependence need loop peeling ? I don't know whether this is needed
//	for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) {
//		BasicBlock *BB = *bi;
//		for (succ_iterator PI = succ_begin(BB); PI != succ_end(BB); ++PI) {
//			BasicBlock *succ = *PI;
//
//			checkControlDependence(BB, succ, pdt);
//		}
//	}


	/*
	 * alien code part 2
	 */
	// add normal control dependencies
	// loop through each instruction
	for (Loop::block_iterator bbIter = L->block_begin(); bbIter
			!= L->block_end(); ++bbIter) {
		BasicBlock *bb = *bbIter;
		// check the successors of this basic block
		if (BranchInst *branchInst = dyn_cast<BranchInst>(bb->getTerminator())) {
			if (branchInst->getNumSuccessors() > 1) {
				BasicBlock * succ = branchInst->getSuccessor(0);
				// if the successor is nested shallower than the current basic block, continue
				if (LI->getLoopDepth(bb) < LI->getLoopDepth(succ)) {
					continue;
				}
				// otherwise, add all instructions to graph as control dependence
				while (succ != NULL && succ != bb && LI->getLoopDepth(succ)
						>= LI->getLoopDepth(bb)) {
					Instruction *terminator = bb->getTerminator();
					for (BasicBlock::iterator succInstIter = succ->begin(); &(*succInstIter)
							!= succ->getTerminator(); ++succInstIter) {
						addEdge(terminator, &(*succInstIter), CONTROL);
					}
					if (BranchInst *succBrInst = dyn_cast<BranchInst>(succ->getTerminator())) {
						if (succBrInst->getNumSuccessors() > 1) {
							addEdge(terminator, succ->getTerminator(),
									CONTROL);
						}
					}
					if (BranchInst *br = dyn_cast<BranchInst>(succ->getTerminator())) {
						if (br->getNumSuccessors() == 1) {
							succ = br->getSuccessor(0);
						} else {
							succ = NULL;
						}
					} else {
						succ = NULL;
					}
				}
			}
		}
	}


	/*
	 * alien code part 3
	 */
    for (std::set<BranchInst*>::iterator exitIter = backedgeParents.begin(); exitIter != backedgeParents.end(); ++exitIter) {
        BranchInst *exitBranch = *exitIter;
        if (exitBranch->isConditional()) {
            BasicBlock *header = LI->getLoopFor(exitBranch->getParent())->getHeader();
            for (BasicBlock::iterator ctrlIter = header->begin(); ctrlIter != header->end(); ++ctrlIter) {
                addEdge(exitBranch, &(*ctrlIter), CONTROL);
            }
        }
    }

	//end control dependence
}
Esempio n. 19
0
/// GetIfCondition - Given a basic block (BB) with two predecessors,
/// check to see if the merge at this block is due
/// to an "if condition".  If so, return the boolean condition that determines
/// which entry into BB will be taken.  Also, return by references the block
/// that will be entered from if the condition is true, and the block that will
/// be entered if the condition is false.
///
/// This does no checking to see if the true/false blocks have large or unsavory
/// instructions in them.
Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
                             BasicBlock *&IfFalse) {
  PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
  BasicBlock *Pred1 = NULL;
  BasicBlock *Pred2 = NULL;

  if (SomePHI) {
    if (SomePHI->getNumIncomingValues() != 2)
      return NULL;
    Pred1 = SomePHI->getIncomingBlock(0);
    Pred2 = SomePHI->getIncomingBlock(1);
  } else {
    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
    if (PI == PE) // No predecessor
      return NULL;
    Pred1 = *PI++;
    if (PI == PE) // Only one predecessor
      return NULL;
    Pred2 = *PI++;
    if (PI != PE) // More than two predecessors
      return NULL;
  }

  // We can only handle branches.  Other control flow will be lowered to
  // branches if possible anyway.
  BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
  BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
  if (Pred1Br == 0 || Pred2Br == 0)
    return 0;

  // Eliminate code duplication by ensuring that Pred1Br is conditional if
  // either are.
  if (Pred2Br->isConditional()) {
    // If both branches are conditional, we don't have an "if statement".  In
    // reality, we could transform this case, but since the condition will be
    // required anyway, we stand no chance of eliminating it, so the xform is
    // probably not profitable.
    if (Pred1Br->isConditional())
      return 0;

    std::swap(Pred1, Pred2);
    std::swap(Pred1Br, Pred2Br);
  }

  if (Pred1Br->isConditional()) {
    // The only thing we have to watch out for here is to make sure that Pred2
    // doesn't have incoming edges from other blocks.  If it does, the condition
    // doesn't dominate BB.
    if (Pred2->getSinglePredecessor() == 0)
      return 0;

    // If we found a conditional branch predecessor, make sure that it branches
    // to BB and Pred2Br.  If it doesn't, this isn't an "if statement".
    if (Pred1Br->getSuccessor(0) == BB &&
        Pred1Br->getSuccessor(1) == Pred2) {
      IfTrue = Pred1;
      IfFalse = Pred2;
    } else if (Pred1Br->getSuccessor(0) == Pred2 &&
               Pred1Br->getSuccessor(1) == BB) {
      IfTrue = Pred2;
      IfFalse = Pred1;
    } else {
      // We know that one arm of the conditional goes to BB, so the other must
      // go somewhere unrelated, and this must not be an "if statement".
      return 0;
    }

    return Pred1Br->getCondition();
  }

  // Ok, if we got here, both predecessors end with an unconditional branch to
  // BB.  Don't panic!  If both blocks only have a single (identical)
  // predecessor, and THAT is a conditional branch, then we're all ok!
  BasicBlock *CommonPred = Pred1->getSinglePredecessor();
  if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
    return 0;

  // Otherwise, if this is a conditional branch, then we can use it!
  BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
  if (BI == 0) return 0;

  assert(BI->isConditional() && "Two successors but not conditional?");
  if (BI->getSuccessor(0) == Pred1) {
    IfTrue = Pred1;
    IfFalse = Pred2;
  } else {
    IfTrue = Pred2;
    IfFalse = Pred1;
  }
  return BI->getCondition();
}
Esempio n. 20
0
/// \brief Analyze the predecessors of each block and build up predicates
void StructurizeCFG::gatherPredicates(RegionNode *N) {
  RegionInfo *RI = ParentRegion->getRegionInfo();
  BasicBlock *BB = N->getEntry();
  BBPredicates &Pred = Predicates[BB];
  BBPredicates &LPred = LoopPreds[BB];

  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
       PI != PE; ++PI) {

    // Ignore it if it's a branch from outside into our region entry
    if (!ParentRegion->contains(*PI))
      continue;

    Region *R = RI->getRegionFor(*PI);
    if (R == ParentRegion) {

      // It's a top level block in our region
      BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
      for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
        BasicBlock *Succ = Term->getSuccessor(i);
        if (Succ != BB)
          continue;

        if (Visited.count(*PI)) {
          // Normal forward edge
          if (Term->isConditional()) {
            // Try to treat it like an ELSE block
            BasicBlock *Other = Term->getSuccessor(!i);
            if (Visited.count(Other) && !Loops.count(Other) &&
                !Pred.count(Other) && !Pred.count(*PI)) {

              Pred[Other] = BoolFalse;
              Pred[*PI] = BoolTrue;
              continue;
            }
          }
          Pred[*PI] = buildCondition(Term, i, false);

        } else {
          // Back edge
          LPred[*PI] = buildCondition(Term, i, true);
        }
      }

    } else {

      // It's an exit from a sub region
      while (R->getParent() != ParentRegion)
        R = R->getParent();

      // Edge from inside a subregion to its entry, ignore it
      if (*R == *N)
        continue;

      BasicBlock *Entry = R->getEntry();
      if (Visited.count(Entry))
        Pred[Entry] = BoolTrue;
      else
        LPred[Entry] = BoolFalse;
    }
  }
}
bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
                                       BasicBlock *&OldEntry,
                                       bool &TailCallsAreMarkedTail,
                                       SmallVectorImpl<PHINode *> &ArgumentPHIs,
                                       bool CannotTailCallElimCallsMarkedTail) {
  // If we are introducing accumulator recursion to eliminate operations after
  // the call instruction that are both associative and commutative, the initial
  // value for the accumulator is placed in this variable.  If this value is set
  // then we actually perform accumulator recursion elimination instead of
  // simple tail recursion elimination.  If the operation is an LLVM instruction
  // (eg: "add") then it is recorded in AccumulatorRecursionInstr.  If not, then
  // we are handling the case when the return instruction returns a constant C
  // which is different to the constant returned by other return instructions
  // (which is recorded in AccumulatorRecursionEliminationInitVal).  This is a
  // special case of accumulator recursion, the operation being "return C".
  Value *AccumulatorRecursionEliminationInitVal = 0;
  Instruction *AccumulatorRecursionInstr = 0;

  // Ok, we found a potential tail call.  We can currently only transform the
  // tail call if all of the instructions between the call and the return are
  // movable to above the call itself, leaving the call next to the return.
  // Check that this is the case now.
  BasicBlock::iterator BBI = CI;
  for (++BBI; &*BBI != Ret; ++BBI) {
    if (CanMoveAboveCall(BBI, CI)) continue;

    // If we can't move the instruction above the call, it might be because it
    // is an associative and commutative operation that could be transformed
    // using accumulator recursion elimination.  Check to see if this is the
    // case, and if so, remember the initial accumulator value for later.
    if ((AccumulatorRecursionEliminationInitVal =
                           CanTransformAccumulatorRecursion(BBI, CI))) {
      // Yes, this is accumulator recursion.  Remember which instruction
      // accumulates.
      AccumulatorRecursionInstr = BBI;
    } else {
      return false;   // Otherwise, we cannot eliminate the tail recursion!
    }
  }

  // We can only transform call/return pairs that either ignore the return value
  // of the call and return void, ignore the value of the call and return a
  // constant, return the value returned by the tail call, or that are being
  // accumulator recursion variable eliminated.
  if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
      !isa<UndefValue>(Ret->getReturnValue()) &&
      AccumulatorRecursionEliminationInitVal == 0 &&
      !getCommonReturnValue(0, CI)) {
    // One case remains that we are able to handle: the current return
    // instruction returns a constant, and all other return instructions
    // return a different constant.
    if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret))
      return false; // Current return instruction does not return a constant.
    // Check that all other return instructions return a common constant.  If
    // so, record it in AccumulatorRecursionEliminationInitVal.
    AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI);
    if (!AccumulatorRecursionEliminationInitVal)
      return false;
  }

  BasicBlock *BB = Ret->getParent();
  Function *F = BB->getParent();

  // OK! We can transform this tail call.  If this is the first one found,
  // create the new entry block, allowing us to branch back to the old entry.
  if (OldEntry == 0) {
    OldEntry = &F->getEntryBlock();
    BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
    NewEntry->takeName(OldEntry);
    OldEntry->setName("tailrecurse");
    BranchInst::Create(OldEntry, NewEntry);

    // If this tail call is marked 'tail' and if there are any allocas in the
    // entry block, move them up to the new entry block.
    TailCallsAreMarkedTail = CI->isTailCall();
    if (TailCallsAreMarkedTail)
      // Move all fixed sized allocas from OldEntry to NewEntry.
      for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(),
             NEBI = NewEntry->begin(); OEBI != E; )
        if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
          if (isa<ConstantInt>(AI->getArraySize()))
            AI->moveBefore(NEBI);

    // Now that we have created a new block, which jumps to the entry
    // block, insert a PHI node for each argument of the function.
    // For now, we initialize each PHI to only have the real arguments
    // which are passed in.
    Instruction *InsertPos = OldEntry->begin();
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
         I != E; ++I) {
      PHINode *PN = PHINode::Create(I->getType(), 2,
                                    I->getName() + ".tr", InsertPos);
      I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
      PN->addIncoming(I, NewEntry);
      ArgumentPHIs.push_back(PN);
    }
  }

  // If this function has self recursive calls in the tail position where some
  // are marked tail and some are not, only transform one flavor or another.  We
  // have to choose whether we move allocas in the entry block to the new entry
  // block or not, so we can't make a good choice for both.  NOTE: We could do
  // slightly better here in the case that the function has no entry block
  // allocas.
  if (TailCallsAreMarkedTail && !CI->isTailCall())
    return false;

  // Ok, now that we know we have a pseudo-entry block WITH all of the
  // required PHI nodes, add entries into the PHI node for the actual
  // parameters passed into the tail-recursive call.
  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
    ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);

  // If we are introducing an accumulator variable to eliminate the recursion,
  // do so now.  Note that we _know_ that no subsequent tail recursion
  // eliminations will happen on this function because of the way the
  // accumulator recursion predicate is set up.
  //
  if (AccumulatorRecursionEliminationInitVal) {
    Instruction *AccRecInstr = AccumulatorRecursionInstr;
    // Start by inserting a new PHI node for the accumulator.
    pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
    PHINode *AccPN =
      PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
                      std::distance(PB, PE) + 1,
                      "accumulator.tr", OldEntry->begin());

    // Loop over all of the predecessors of the tail recursion block.  For the
    // real entry into the function we seed the PHI with the initial value,
    // computed earlier.  For any other existing branches to this block (due to
    // other tail recursions eliminated) the accumulator is not modified.
    // Because we haven't added the branch in the current block to OldEntry yet,
    // it will not show up as a predecessor.
    for (pred_iterator PI = PB; PI != PE; ++PI) {
      BasicBlock *P = *PI;
      if (P == &F->getEntryBlock())
        AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
      else
        AccPN->addIncoming(AccPN, P);
    }

    if (AccRecInstr) {
      // Add an incoming argument for the current block, which is computed by
      // our associative and commutative accumulator instruction.
      AccPN->addIncoming(AccRecInstr, BB);

      // Next, rewrite the accumulator recursion instruction so that it does not
      // use the result of the call anymore, instead, use the PHI node we just
      // inserted.
      AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
    } else {
      // Add an incoming argument for the current block, which is just the
      // constant returned by the current return instruction.
      AccPN->addIncoming(Ret->getReturnValue(), BB);
    }

    // Finally, rewrite any return instructions in the program to return the PHI
    // node instead of the "initval" that they do currently.  This loop will
    // actually rewrite the return value we are destroying, but that's ok.
    for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
      if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
        RI->setOperand(0, AccPN);
    ++NumAccumAdded;
  }

  // Now that all of the PHI nodes are in place, remove the call and
  // ret instructions, replacing them with an unconditional branch.
  BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
  NewBI->setDebugLoc(CI->getDebugLoc());

  BB->getInstList().erase(Ret);  // Remove return.
  BB->getInstList().erase(CI);   // Remove call.
  ++NumEliminated;
  return true;
}
Esempio n. 22
0
/// buildCFICheck - emits __cfi_check for the current module.
void CrossDSOCFI::buildCFICheck(Module &M) {
  // FIXME: verify that __cfi_check ends up near the end of the code section,
  // but before the jump slots created in LowerTypeTests.
  llvm::DenseSet<uint64_t> TypeIds;
  SmallVector<MDNode *, 2> Types;
  for (GlobalObject &GO : M.global_objects()) {
    Types.clear();
    GO.getMetadata(LLVMContext::MD_type, Types);
    for (MDNode *Type : Types) {
      // Sanity check. GO must not be a function declaration.
      assert(!isa<Function>(&GO) || !cast<Function>(&GO)->isDeclaration());

      if (ConstantInt *TypeId = extractNumericTypeId(Type))
        TypeIds.insert(TypeId->getZExtValue());
    }
  }

  LLVMContext &Ctx = M.getContext();
  Constant *C = M.getOrInsertFunction(
      "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
      Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx), nullptr);
  Function *F = dyn_cast<Function>(C);
  F->setAlignment(4096);
  auto args = F->arg_begin();
  Value &CallSiteTypeId = *(args++);
  CallSiteTypeId.setName("CallSiteTypeId");
  Value &Addr = *(args++);
  Addr.setName("Addr");
  Value &CFICheckFailData = *(args++);
  CFICheckFailData.setName("CFICheckFailData");
  assert(args == F->arg_end());

  BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F);
  BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F);

  BasicBlock *TrapBB = BasicBlock::Create(Ctx, "fail", F);
  IRBuilder<> IRBFail(TrapBB);
  Constant *CFICheckFailFn = M.getOrInsertFunction(
      "__cfi_check_fail", Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx),
      Type::getInt8PtrTy(Ctx), nullptr);
  IRBFail.CreateCall(CFICheckFailFn, {&CFICheckFailData, &Addr});
  IRBFail.CreateBr(ExitBB);

  IRBuilder<> IRBExit(ExitBB);
  IRBExit.CreateRetVoid();

  IRBuilder<> IRB(BB);
  SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, TypeIds.size());
  for (uint64_t TypeId : TypeIds) {
    ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId);
    BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F);
    IRBuilder<> IRBTest(TestBB);
    Function *BitsetTestFn = Intrinsic::getDeclaration(&M, Intrinsic::type_test);

    Value *Test = IRBTest.CreateCall(
        BitsetTestFn, {&Addr, MetadataAsValue::get(
                                  Ctx, ConstantAsMetadata::get(CaseTypeId))});
    BranchInst *BI = IRBTest.CreateCondBr(Test, ExitBB, TrapBB);
    BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights);

    SI->addCase(CaseTypeId, TestBB);
    ++NumTypeIds;
  }
}
Esempio n. 23
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was succesful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
  assert(L->isLCSSAForm());

  BasicBlock *Header = L->getHeader();
  BasicBlock *LatchBlock = L->getLoopLatch();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
  
  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DOUT << "  Can't unroll; loop not terminated by a conditional branch.\n";
    return false;
  }

  // Find trip count
  unsigned TripCount = L->getSmallConstantTripCount();
  // Find trip multiple if count is not available
  unsigned TripMultiple = 1;
  if (TripCount == 0)
    TripMultiple = L->getSmallConstantTripMultiple();

  if (TripCount != 0)
    DOUT << "  Trip Count = " << TripCount << "\n";
  if (TripMultiple != 1)
    DOUT << "  Trip Multiple = " << TripMultiple << "\n";

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  if (CompletelyUnroll) {
    DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
  } else {
    DEBUG(errs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DOUT << " with a breakout at trip " << BreakoutTrip;
    } else if (TripMultiple != 1) {
      DOUT << " with " << TripMultiple << " trips per branch";
    }
    DOUT << "!\n";
  }

  std::vector<BasicBlock*> LoopBlocks = L->getBlocks();

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  typedef DenseMap<const Value*, Value*> ValueMapTy;
  ValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    PHINode *PN = cast<PHINode>(I);
    OrigPHINode.push_back(PN);
    if (Instruction *I = 
                dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
      if (L->contains(I->getParent()))
        LastValueMap[I] = I;
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  for (unsigned It = 1; It != Count; ++It) {
    char SuffixBuffer[100];
    sprintf(SuffixBuffer, ".%d", It);
    
    std::vector<BasicBlock*> NewBlocks;
    
    for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
         E = LoopBlocks.end(); BB != E; ++BB) {
      ValueMapTy ValueMap;
      BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer);
      Header->getParent()->getBasicBlockList().push_back(New);

      // Loop over all of the PHI nodes in the block, changing them to use the
      // incoming values from the previous block.
      if (*BB == Header)
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI->getParent()))
              InVal = LastValueMap[InValI];
          ValueMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      L->addBasicBlockToLoop(New, LI->getBase());

      // Add phi entries for newly created values to all exit blocks except
      // the successor of the latch block.  The successor of the exit block will
      // be updated specially after unrolling all the way.
      if (*BB != LatchBlock)
        for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end();
             UI != UE;) {
          Instruction *UseInst = cast<Instruction>(*UI);
          ++UI;
          if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) {
            PHINode *phi = cast<PHINode>(UseInst);
            Value *Incoming = phi->getIncomingValueForBlock(*BB);
            phi->addIncoming(Incoming, New);
          }
        }

      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock) {
        Latches.push_back(New);

        // Also, clear out the new latch's back edge so that it doesn't look
        // like a new loop, so that it's amenable to being merged with adjacent
        // blocks later on.
        TerminatorInst *Term = New->getTerminator();
        assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
        assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
        Term->setSuccessor(!ContinueOnTrue, NULL);
      }

      NewBlocks.push_back(New);
    }
    
    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        RemapInstruction(I, LastValueMap);
  }
  
  // The latch block exits the loop.  If there are any PHI nodes in the
  // successor blocks, update them to use the appropriate values computed as the
  // last iteration of the loop.
  if (Count != 1) {
    SmallPtrSet<PHINode*, 8> Users;
    for (Value::use_iterator UI = LatchBlock->use_begin(),
         UE = LatchBlock->use_end(); UI != UE; ++UI)
      if (PHINode *phi = dyn_cast<PHINode>(*UI))
        Users.insert(phi);
    
    BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
    for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end();
         SI != SE; ++SI) {
      PHINode *PN = *SI;
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI->getParent()))
          InVal = LastValueMap[InVal];
      }
      PN->addIncoming(InVal, LastIterationBB);
    }
  }

  // Now, if we're doing complete unrolling, loop over the PHI nodes in the
  // original block, setting them to their incoming values.
  if (CompletelyUnroll) {
    BasicBlock *Preheader = L->getLoopPreheader();
    for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
      PHINode *PN = OrigPHINode[i];
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll && j == 0) {
      Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      Term->setUnconditionalDest(Dest);
      // Merge adjacent basic blocks, if possible.
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
        std::replace(Headers.begin(), Headers.end(), Dest, Fold);
      }
    }
  }
  
  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Constant *C = ConstantFoldInstruction(Inst, 
                                                     Header->getContext())) {
        Inst->replaceAllUsesWith(C);
        (*BB)->getInstList().erase(Inst);
      }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;
  // Remove the loop from the LoopPassManager if it's completely removed.
  if (CompletelyUnroll && LPM != NULL)
    LPM->deleteLoopFromQueue(L);

  // If we didn't completely unroll the loop, it should still be in LCSSA form.
  if (!CompletelyUnroll)
    assert(L->isLCSSAForm());

  return true;
}
Esempio n. 24
0
bool ScopDetection::isValidCFG(BasicBlock &BB,
                               DetectionContext &Context) const {
  Region &CurRegion = Context.CurRegion;

  TerminatorInst *TI = BB.getTerminator();

  // Return instructions are only valid if the region is the top level region.
  if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0)
    return true;

  BranchInst *Br = dyn_cast<BranchInst>(TI);

  if (!Br)
    return invalid<ReportNonBranchTerminator>(Context, /*Assert=*/true, &BB);

  if (Br->isUnconditional())
    return true;

  Value *Condition = Br->getCondition();

  // UndefValue is not allowed as condition.
  if (isa<UndefValue>(Condition))
    return invalid<ReportUndefCond>(Context, /*Assert=*/true, Br, &BB);

  // Only Constant and ICmpInst are allowed as condition.
  if (!(isa<Constant>(Condition) || isa<ICmpInst>(Condition))) {
    if (AllowNonAffineSubRegions && !containsLoop(RI->getRegionFor(&BB), LI))
      Context.NonAffineSubRegionSet.insert(RI->getRegionFor(&BB));
    else
      return invalid<ReportInvalidCond>(Context, /*Assert=*/true, Br, &BB);
  }

  // Allow perfectly nested conditions.
  assert(Br->getNumSuccessors() == 2 && "Unexpected number of successors");

  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(Condition)) {
    // Unsigned comparisons are not allowed. They trigger overflow problems
    // in the code generation.
    //
    // TODO: This is not sufficient and just hides bugs. However it does pretty
    // well.
    if (ICmp->isUnsigned() && !AllowUnsigned)
      return false;

    // Are both operands of the ICmp affine?
    if (isa<UndefValue>(ICmp->getOperand(0)) ||
        isa<UndefValue>(ICmp->getOperand(1)))
      return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);

    Loop *L = LI->getLoopFor(ICmp->getParent());
    const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L);
    const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L);

    if (!isAffineExpr(&CurRegion, LHS, *SE) ||
        !isAffineExpr(&CurRegion, RHS, *SE)) {
      if (AllowNonAffineSubRegions && !containsLoop(RI->getRegionFor(&BB), LI))
        Context.NonAffineSubRegionSet.insert(RI->getRegionFor(&BB));
      else
        return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS,
                                           RHS, ICmp);
    }
  }

  // Allow loop exit conditions.
  Loop *L = LI->getLoopFor(&BB);
  if (L && L->getExitingBlock() == &BB)
    return true;

  // Allow perfectly nested conditions.
  Region *R = RI->getRegionFor(&BB);
  if (R->getEntry() != &BB)
    return invalid<ReportCondition>(Context, /*Assert=*/true, &BB);

  return true;
}
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
/// split the critical edge.  This will update DominatorTree information if it
/// is available, thus calling this pass will not invalidate either of them.
/// This returns the new block if the edge was split, null otherwise.
///
/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
/// specified successor will be merged into the same critical edge block.  
/// This is most commonly interesting with switch instructions, which may 
/// have many edges to any one destination.  This ensures that all edges to that
/// dest go to one block instead of each going to a different block, but isn't 
/// the standard definition of a "critical edge".
///
/// It is invalid to call this function on a critical edge that starts at an
/// IndirectBrInst.  Splitting these edges will almost always create an invalid
/// program because the address of the new block won't be the one that is jumped
/// to.
///
BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
                                    Pass *P, bool MergeIdenticalEdges) {
  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
  
  assert(!isa<IndirectBrInst>(TI) &&
         "Cannot split critical edge from IndirectBrInst");
  
  BasicBlock *TIBB = TI->getParent();
  BasicBlock *DestBB = TI->getSuccessor(SuccNum);

  // Splitting the critical edge to a landing pad block is non-trivial. Don't do
  // it in this generic function.
  assert(!DestBB->isLandingPad() &&
         "Cannot split critical edge to a landing pad block!");

  // Create a new basic block, linking it into the CFG.
  BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
                      TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
  // Create our unconditional branch.
  BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
  NewBI->setDebugLoc(TI->getDebugLoc());

  // Branch to the new block, breaking the edge.
  TI->setSuccessor(SuccNum, NewBB);

  // Insert the block into the function... right after the block TI lives in.
  Function &F = *TIBB->getParent();
  Function::iterator FBBI = TIBB;
  F.getBasicBlockList().insert(++FBBI, NewBB);
  
  // If there are any PHI nodes in DestBB, we need to update them so that they
  // merge incoming values from NewBB instead of from TIBB.
  {
    unsigned BBIdx = 0;
    for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
      // We no longer enter through TIBB, now we come in through NewBB.
      // Revector exactly one entry in the PHI node that used to come from
      // TIBB to come from NewBB.
      PHINode *PN = cast<PHINode>(I);

      // Reuse the previous value of BBIdx if it lines up.  In cases where we
      // have multiple phi nodes with *lots* of predecessors, this is a speed
      // win because we don't have to scan the PHI looking for TIBB.  This
      // happens because the BB list of PHI nodes are usually in the same
      // order.
      if (PN->getIncomingBlock(BBIdx) != TIBB)
	BBIdx = PN->getBasicBlockIndex(TIBB);
      PN->setIncomingBlock(BBIdx, NewBB);
    }
  }
   
  // If there are any other edges from TIBB to DestBB, update those to go
  // through the split block, making those edges non-critical as well (and
  // reducing the number of phi entries in the DestBB if relevant).
  if (MergeIdenticalEdges) {
    for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
      if (TI->getSuccessor(i) != DestBB) continue;
      
      // Remove an entry for TIBB from DestBB phi nodes.
      DestBB->removePredecessor(TIBB);
      
      // We found another edge to DestBB, go to NewBB instead.
      TI->setSuccessor(i, NewBB);
    }
  }
  
  

  // If we don't have a pass object, we can't update anything...
  if (P == 0) return NewBB;
  
  DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
  LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
  ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
  
  // If we have nothing to update, just return.
  if (DT == 0 && LI == 0 && PI == 0)
    return NewBB;

  // Now update analysis information.  Since the only predecessor of NewBB is
  // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
  // anything, as there are other successors of DestBB.  However, if all other
  // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
  // loop header) then NewBB dominates DestBB.
  SmallVector<BasicBlock*, 8> OtherPreds;

  // If there is a PHI in the block, loop over predecessors with it, which is
  // faster than iterating pred_begin/end.
  if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
      if (PN->getIncomingBlock(i) != NewBB)
        OtherPreds.push_back(PN->getIncomingBlock(i));
  } else {
    for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
         I != E; ++I) {
      BasicBlock *P = *I;
      if (P != NewBB)
        OtherPreds.push_back(P);
    }
  }

  bool NewBBDominatesDestBB = true;
  
  // Should we update DominatorTree information?
  if (DT) {
    DomTreeNode *TINode = DT->getNode(TIBB);

    // The new block is not the immediate dominator for any other nodes, but
    // TINode is the immediate dominator for the new node.
    //
    if (TINode) {       // Don't break unreachable code!
      DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
      DomTreeNode *DestBBNode = 0;
     
      // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
      if (!OtherPreds.empty()) {
        DestBBNode = DT->getNode(DestBB);
        while (!OtherPreds.empty() && NewBBDominatesDestBB) {
          if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
            NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
          OtherPreds.pop_back();
        }
        OtherPreds.clear();
      }
      
      // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
      // doesn't dominate anything.
      if (NewBBDominatesDestBB) {
        if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
        DT->changeImmediateDominator(DestBBNode, NewBBNode);
      }
    }
  }

  // Update LoopInfo if it is around.
  if (LI) {
    if (Loop *TIL = LI->getLoopFor(TIBB)) {
      // If one or the other blocks were not in a loop, the new block is not
      // either, and thus LI doesn't need to be updated.
      if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
        if (TIL == DestLoop) {
          // Both in the same loop, the NewBB joins loop.
          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
        } else if (TIL->contains(DestLoop)) {
          // Edge from an outer loop to an inner loop.  Add to the outer loop.
          TIL->addBasicBlockToLoop(NewBB, LI->getBase());
        } else if (DestLoop->contains(TIL)) {
          // Edge from an inner loop to an outer loop.  Add to the outer loop.
          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
        } else {
          // Edge from two loops with no containment relation.  Because these
          // are natural loops, we know that the destination block must be the
          // header of its loop (adding a branch into a loop elsewhere would
          // create an irreducible loop).
          assert(DestLoop->getHeader() == DestBB &&
                 "Should not create irreducible loops!");
          if (Loop *P = DestLoop->getParentLoop())
            P->addBasicBlockToLoop(NewBB, LI->getBase());
        }
      }
      // If TIBB is in a loop and DestBB is outside of that loop, split the
      // other exit blocks of the loop that also have predecessors outside
      // the loop, to maintain a LoopSimplify guarantee.
      if (!TIL->contains(DestBB) &&
          P->mustPreserveAnalysisID(LoopSimplifyID)) {
        assert(!TIL->contains(NewBB) &&
               "Split point for loop exit is contained in loop!");

        // Update LCSSA form in the newly created exit block.
        if (P->mustPreserveAnalysisID(LCSSAID)) {
          SmallVector<BasicBlock *, 1> OrigPred;
          OrigPred.push_back(TIBB);
          CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
        }

        // For each unique exit block...
        // FIXME: This code is functionally equivalent to the corresponding
        // loop in LoopSimplify.
        SmallVector<BasicBlock *, 4> ExitBlocks;
        TIL->getExitBlocks(ExitBlocks);
        for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
          // Collect all the preds that are inside the loop, and note
          // whether there are any preds outside the loop.
          SmallVector<BasicBlock *, 4> Preds;
          bool HasPredOutsideOfLoop = false;
          BasicBlock *Exit = ExitBlocks[i];
          for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
               I != E; ++I) {
            BasicBlock *P = *I;
            if (TIL->contains(P)) {
              if (isa<IndirectBrInst>(P->getTerminator())) {
                Preds.clear();
                break;
              }
              Preds.push_back(P);
            } else {
              HasPredOutsideOfLoop = true;
            }
          }
          // If there are any preds not in the loop, we'll need to split
          // the edges. The Preds.empty() check is needed because a block
          // may appear multiple times in the list. We can't use
          // getUniqueExitBlocks above because that depends on LoopSimplify
          // form, which we're in the process of restoring!
          if (!Preds.empty() && HasPredOutsideOfLoop) {
            BasicBlock *NewExitBB =
              SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
                                     "split", P);
            if (P->mustPreserveAnalysisID(LCSSAID))
              CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
          }
        }
      }
      // LCSSA form was updated above for the case where LoopSimplify is
      // available, which means that all predecessors of loop exit blocks
      // are within the loop. Without LoopSimplify form, it would be
      // necessary to insert a new phi.
      assert((!P->mustPreserveAnalysisID(LCSSAID) ||
              P->mustPreserveAnalysisID(LoopSimplifyID)) &&
             "SplitCriticalEdge doesn't know how to update LCCSA form "
             "without LoopSimplify!");
    }
  }

  // Update ProfileInfo if it is around.
  if (PI)
    PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);

  return NewBB;
}
Esempio n. 26
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// TripCount is the upper bound of the iteration on which control exits
/// LatchBlock. Control may exit the loop prior to TripCount iterations either
/// via an early branch in other loop block or via LatchBlock terminator. This
/// is relaxed from the general definition of trip count which is the number of
/// times the loop header executes. Note that UnrollLoop assumes that the loop
/// counter test is in LatchBlock in order to remove unnecesssary instances of
/// the test.  If control can exit the loop from the LatchBlock's terminator
/// prior to TripCount iterations, flag PreserveCondBr needs to be set.
///
/// PreserveCondBr indicates whether the conditional branch of the LatchBlock
/// needs to be preserved.  It is needed when we use trip count upper bound to
/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
/// conditional branch needs to be preserved.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
/// have a runtime (i.e. not compile time constant) trip count.  Unrolling these
/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
/// iterations before branching into the unrolled loop.  UnrollLoop will not
/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
/// AllowExpensiveTripCount is false.
///
/// If we want to perform PGO-based loop peeling, PeelCount is set to the 
/// number of iterations we want to peel off.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
/// DominatorTree if they are non-null.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                      bool AllowRuntime, bool AllowExpensiveTripCount,
                      bool PreserveCondBr, bool PreserveOnlyFirst,
                      unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
                      ScalarEvolution *SE, DominatorTree *DT,
                      AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
                      bool PreserveLCSSA) {

  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    return false;
  }

  BasicBlock *LatchBlock = L->getLoopLatch();
  if (!LatchBlock) {
    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    return false;
  }

  // Loops with indirectbr cannot be cloned.
  if (!L->isSafeToClone()) {
    DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
    return false;
  }

  BasicBlock *Header = L->getHeader();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
             "  Can't unroll; loop not terminated by a conditional branch.\n");
    return false;
  }

  if (Header->hasAddressTaken()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
          "  Won't unroll loop: address of header block is taken.\n");
    return false;
  }

  if (TripCount != 0)
    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
  if (TripMultiple != 1)
    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  // Don't enter the unroll code if there is nothing to do.
  if (TripCount == 0 && Count < 2 && PeelCount == 0)
    return false;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;
  SmallVector<BasicBlock *, 4> ExitBlocks;
  L->getExitBlocks(ExitBlocks);
  std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();

  // Go through all exits of L and see if there are any phi-nodes there. We just
  // conservatively assume that they're inserted to preserve LCSSA form, which
  // means that complete unrolling might break this form. We need to either fix
  // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
  // now we just recompute LCSSA for the outer loop, but it should be possible
  // to fix it in-place.
  bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll &&
                        any_of(ExitBlocks, [](const BasicBlock *BB) {
                          return isa<PHINode>(BB->begin());
                        });

  // We assume a run-time trip count if the compiler cannot
  // figure out the loop trip count and the unroll-runtime
  // flag is specified.
  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);

  assert((!RuntimeTripCount || !PeelCount) &&
         "Did not expect runtime trip-count unrolling "
         "and peeling for the same loop");

  if (PeelCount)
    peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA);

  // Loops containing convergent instructions must have a count that divides
  // their TripMultiple.
  DEBUG(
      {
        bool HasConvergent = false;
        for (auto &BB : L->blocks())
          for (auto &I : *BB)
            if (auto CS = CallSite(&I))
              HasConvergent |= CS.isConvergent();
        assert((!HasConvergent || TripMultiple % Count == 0) &&
               "Unroll count must divide trip multiple if loop contains a "
               "convergent operation.");
      });
/// SimplifyStoreAtEndOfBlock - Turn things like:
///   if () { *P = v1; } else { *P = v2 }
/// into a phi node with a store in the successor.
///
/// Simplify things like:
///   *P = v1; if () { *P = v2; }
/// into a phi node with a store in the successor.
///
bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
  BasicBlock *StoreBB = SI.getParent();

  // Check to see if the successor block has exactly two incoming edges.  If
  // so, see if the other predecessor contains a store to the same location.
  // if so, insert a PHI node (if needed) and move the stores down.
  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);

  // Determine whether Dest has exactly two predecessors and, if so, compute
  // the other predecessor.
  pred_iterator PI = pred_begin(DestBB);
  BasicBlock *P = *PI;
  BasicBlock *OtherBB = nullptr;

  if (P != StoreBB)
    OtherBB = P;

  if (++PI == pred_end(DestBB))
    return false;

  P = *PI;
  if (P != StoreBB) {
    if (OtherBB)
      return false;
    OtherBB = P;
  }
  if (++PI != pred_end(DestBB))
    return false;

  // Bail out if all the relevant blocks aren't distinct (this can happen,
  // for example, if SI is in an infinite loop)
  if (StoreBB == DestBB || OtherBB == DestBB)
    return false;

  // Verify that the other block ends in a branch and is not otherwise empty.
  BasicBlock::iterator BBI = OtherBB->getTerminator();
  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
  if (!OtherBr || BBI == OtherBB->begin())
    return false;

  // If the other block ends in an unconditional branch, check for the 'if then
  // else' case.  there is an instruction before the branch.
  StoreInst *OtherStore = nullptr;
  if (OtherBr->isUnconditional()) {
    --BBI;
    // Skip over debugging info.
    while (isa<DbgInfoIntrinsic>(BBI) ||
           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
      if (BBI==OtherBB->begin())
        return false;
      --BBI;
    }
    // If this isn't a store, isn't a store to the same location, or is not the
    // right kind of store, bail out.
    OtherStore = dyn_cast<StoreInst>(BBI);
    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
        !SI.isSameOperationAs(OtherStore))
      return false;
  } else {
    // Otherwise, the other block ended with a conditional branch. If one of the
    // destinations is StoreBB, then we have the if/then case.
    if (OtherBr->getSuccessor(0) != StoreBB &&
        OtherBr->getSuccessor(1) != StoreBB)
      return false;

    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
    // if/then triangle.  See if there is a store to the same ptr as SI that
    // lives in OtherBB.
    for (;; --BBI) {
      // Check to see if we find the matching store.
      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
            !SI.isSameOperationAs(OtherStore))
          return false;
        break;
      }
      // If we find something that may be using or overwriting the stored
      // value, or if we run out of instructions, we can't do the xform.
      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
          BBI == OtherBB->begin())
        return false;
    }

    // In order to eliminate the store in OtherBr, we have to
    // make sure nothing reads or overwrites the stored value in
    // StoreBB.
    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
      // FIXME: This should really be AA driven.
      if (I->mayReadFromMemory() || I->mayWriteToMemory())
        return false;
    }
  }

  // Insert a PHI node now if we need it.
  Value *MergedVal = OtherStore->getOperand(0);
  if (MergedVal != SI.getOperand(0)) {
    PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
    PN->addIncoming(SI.getOperand(0), SI.getParent());
    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
    MergedVal = InsertNewInstBefore(PN, DestBB->front());
  }

  // Advance to a place where it is safe to insert the new store and
  // insert it.
  BBI = DestBB->getFirstInsertionPt();
  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
                                   SI.isVolatile(),
                                   SI.getAlignment(),
                                   SI.getOrdering(),
                                   SI.getSynchScope());
  InsertNewInstBefore(NewSI, *BBI);
  NewSI->setDebugLoc(OtherStore->getDebugLoc());

  // If the two stores had the same TBAA tag, preserve it.
  if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
    if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag,
                               OtherStore->getMetadata(LLVMContext::MD_tbaa))))
      NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);


  // Nuke the old stores.
  EraseInstFromFunction(SI);
  EraseInstFromFunction(*OtherStore);
  return true;
}
Esempio n. 28
0
/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
/// an unconditional branch in it.
void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
  BasicBlock *DestBB = BI->getSuccessor(0);

  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);

  // If the destination block has a single pred, then this is a trivial edge,
  // just collapse it.
  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
    if (SinglePred != DestBB) {
      // Remember if SinglePred was the entry block of the function.  If so, we
      // will need to move BB back to the entry position.
      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
      MergeBasicBlockIntoOnlyPred(DestBB, this);

      if (isEntry && BB != &BB->getParent()->getEntryBlock())
        BB->moveBefore(&BB->getParent()->getEntryBlock());

      DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
      return;
    }
  }

  // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
  // to handle the new incoming edges it is about to have.
  PHINode *PN;
  for (BasicBlock::iterator BBI = DestBB->begin();
       (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
    // Remove the incoming value for BB, and remember it.
    Value *InVal = PN->removeIncomingValue(BB, false);

    // Two options: either the InVal is a phi node defined in BB or it is some
    // value that dominates BB.
    PHINode *InValPhi = dyn_cast<PHINode>(InVal);
    if (InValPhi && InValPhi->getParent() == BB) {
      // Add all of the input values of the input PHI as inputs of this phi.
      for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
        PN->addIncoming(InValPhi->getIncomingValue(i),
                        InValPhi->getIncomingBlock(i));
    } else {
      // Otherwise, add one instance of the dominating value for each edge that
      // we will be adding.
      if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
        for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
          PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
      } else {
        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
          PN->addIncoming(InVal, *PI);
      }
    }
  }

  // The PHIs are now updated, change everything that refers to BB to use
  // DestBB and remove BB.
  BB->replaceAllUsesWith(DestBB);
  if (DT && !ModifiedDT) {
    BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
    BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
    BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
    DT->changeImmediateDominator(DestBB, NewIDom);
    DT->eraseNode(BB);
  }
  if (PFI) {
    PFI->replaceAllUses(BB, DestBB);
    PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
  }
  BB->eraseFromParent();
  ++NumBlocksElim;

  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
Esempio n. 29
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// TripCount is generally defined as the number of times the loop header
/// executes. UnrollLoop relaxes the definition to permit early exits: here
/// TripCount is the iteration on which control exits LatchBlock if no early
/// exits were taken. Note that UnrollLoop assumes that the loop counter test
/// terminates LatchBlock in order to remove unnecesssary instances of the
/// test. In other words, control may exit the loop prior to TripCount
/// iterations via an early branch, but control may not exit the loop from the
/// LatchBlock's terminator prior to TripCount iterations.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
///
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available from the Pass it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                      bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI,
                      Pass *PP, LPPassManager *LPM, AssumptionCache *AC) {
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    return false;
  }

  BasicBlock *LatchBlock = L->getLoopLatch();
  if (!LatchBlock) {
    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    return false;
  }

  // Loops with indirectbr cannot be cloned.
  if (!L->isSafeToClone()) {
    DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
    return false;
  }

  BasicBlock *Header = L->getHeader();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
             "  Can't unroll; loop not terminated by a conditional branch.\n");
    return false;
  }

  if (Header->hasAddressTaken()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
          "  Won't unroll loop: address of header block is taken.\n");
    return false;
  }

  if (TripCount != 0)
    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
  if (TripMultiple != 1)
    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  // Don't enter the unroll code if there is nothing to do. This way we don't
  // need to support "partial unrolling by 1".
  if (TripCount == 0 && Count < 2)
    return false;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;

  // We assume a run-time trip count if the compiler cannot
  // figure out the loop trip count and the unroll-runtime
  // flag is specified.
  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);

  if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
    return false;

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  ScalarEvolution *SE =
      PP ? PP->getAnalysisIfAvailable<ScalarEvolution>() : nullptr;
  if (SE)
    SE->forgetLoop(L);

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  // Report the unrolling decision.
  DebugLoc LoopLoc = L->getStartLoc();
  Function *F = Header->getParent();
  LLVMContext &Ctx = F->getContext();

  if (CompletelyUnroll) {
    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
    emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
                           Twine("completely unrolled loop with ") +
                               Twine(TripCount) + " iterations");
  } else {
    auto EmitDiag = [&](const Twine &T) {
      emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
                             "unrolled loop by a factor of " + Twine(Count) +
                                 T);
    };

    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
      EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip));
    } else if (TripMultiple != 1) {
      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
      EmitDiag(" with " + Twine(TripMultiple) + " trips per branch");
    } else if (RuntimeTripCount) {
      DEBUG(dbgs() << " with run-time trip count");
      EmitDiag(" with run-time trip count");
    }
    DEBUG(dbgs() << "!\n");
  }

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  ValueToValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    OrigPHINode.push_back(cast<PHINode>(I));
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);

  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock*> NewBlocks;
    SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
    NewLoops[L] = L;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      // Tell LI about New.
      if (*BB == Header) {
        assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop");
        L->addBasicBlockToLoop(New, *LI);
      } else {
        // Figure out which loop New is in.
        const Loop *OldLoop = LI->getLoopFor(*BB);
        assert(OldLoop && "Should (at least) be in the loop being unrolled!");

        Loop *&NewLoop = NewLoops[OldLoop];
        if (!NewLoop) {
          // Found a new sub-loop.
          assert(*BB == OldLoop->getHeader() &&
                 "Header should be first in RPO");

          Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
          assert(NewLoopParent &&
                 "Expected parent loop before sub-loop in RPO");
          NewLoop = new Loop;
          NewLoopParent->addChildLoop(NewLoop);

          // Forget the old loop, since its inputs may have changed.
          if (SE)
            SE->forgetLoop(OldLoop);
        }
        NewLoop->addBasicBlockToLoop(New, *LI);
      }

      if (*BB == Header)
        // Loop over all of the PHI nodes in the block, changing them to use
        // the incoming values from the previous block.
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI))
              InVal = LastValueMap[InValI];
          VMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      // Add phi entries for newly created values to all exit blocks.
      for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
           SI != SE; ++SI) {
        if (L->contains(*SI))
          continue;
        for (BasicBlock::iterator BBI = (*SI)->begin();
             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
          Value *Incoming = phi->getIncomingValueForBlock(*BB);
          ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
          if (It != LastValueMap.end())
            Incoming = It->second;
          phi->addIncoming(Incoming, New);
        }
      }
      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock)
        Latches.push_back(New);

      NewBlocks.push_back(New);
    }

    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        ::RemapInstruction(I, LastValueMap);
  }

  // Loop over the PHI nodes in the original block, setting incoming values.
  for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
    PHINode *PN = OrigPHINode[i];
    if (CompletelyUnroll) {
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
    else if (Count > 1) {
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI))
          InVal = LastValueMap[InVal];
      }
      assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
      PN->addIncoming(InVal, Latches.back());
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    if (RuntimeTripCount && j != 0) {
      NeedConditional = false;
    }

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll && j == 0) {
      Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      // Remove phi operands at this loop exit
      if (Dest != LoopExit) {
        BasicBlock *BB = Latches[i];
        for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
             SI != SE; ++SI) {
          if (*SI == Headers[i])
            continue;
          for (BasicBlock::iterator BBI = (*SI)->begin();
               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
            Phi->removeIncomingValue(BB, false);
          }
        }
      }
      // Replace the conditional branch with an unconditional one.
      BranchInst::Create(Dest, Term);
      Term->eraseFromParent();
    }
  }

  // Merge adjacent basic blocks, if possible.
  SmallPtrSet<Loop *, 4> ForgottenLoops;
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
    if (Term->isUnconditional()) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM,
                                                      ForgottenLoops))
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
    }
  }

  // FIXME: We could register any cloned assumptions instead of clearing the
  // whole function's cache.
  AC->clear();

  DominatorTree *DT = nullptr;
  if (PP) {
    // FIXME: Reconstruct dom info, because it is not preserved properly.
    // Incrementally updating domtree after loop unrolling would be easy.
    if (DominatorTreeWrapperPass *DTWP =
            PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
      DT = &DTWP->getDomTree();
      DT->recalculate(*L->getHeader()->getParent());
    }

    // Simplify any new induction variables in the partially unrolled loop.
    if (SE && !CompletelyUnroll) {
      SmallVector<WeakVH, 16> DeadInsts;
      simplifyLoopIVs(L, SE, LPM, DeadInsts);

      // Aggressively clean up dead instructions that simplifyLoopIVs already
      // identified. Any remaining should be cleaned up below.
      while (!DeadInsts.empty())
        if (Instruction *Inst =
            dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
          RecursivelyDeleteTriviallyDeadInstructions(Inst);
    }
  }
  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Value *V = SimplifyInstruction(Inst))
        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
          Inst->replaceAllUsesWith(V);
          (*BB)->getInstList().erase(Inst);
        }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;

  Loop *OuterL = L->getParentLoop();
  // Remove the loop from the LoopPassManager if it's completely removed.
  if (CompletelyUnroll && LPM != nullptr)
    LPM->deleteLoopFromQueue(L);

  // If we have a pass and a DominatorTree we should re-simplify impacted loops
  // to ensure subsequent analyses can rely on this form. We want to simplify
  // at least one layer outside of the loop that was unrolled so that any
  // changes to the parent loop exposed by the unrolling are considered.
  if (PP && DT) {
    if (!OuterL && !CompletelyUnroll)
      OuterL = L;
    if (OuterL) {
      DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>();
      const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
      simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AC);

      // LCSSA must be performed on the outermost affected loop. The unrolled
      // loop's last loop latch is guaranteed to be in the outermost loop after
      // deleteLoopFromQueue updates LoopInfo.
      Loop *LatchLoop = LI->getLoopFor(Latches.back());
      if (!OuterL->contains(LatchLoop))
        while (OuterL->getParentLoop() != LatchLoop)
          OuterL = OuterL->getParentLoop();

      formLCSSARecursively(*OuterL, *DT, LI, SE);
    }
  }

  return true;
}
Esempio n. 30
0
Function* PartialInliner::unswitchFunction(Function* F) {
  // First, verify that this function is an unswitching candidate...
  BasicBlock* entryBlock = F->begin();
  BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
  if (!BR || BR->isUnconditional())
    return nullptr;
  
  BasicBlock* returnBlock = nullptr;
  BasicBlock* nonReturnBlock = nullptr;
  unsigned returnCount = 0;
  for (BasicBlock *BB : successors(entryBlock)) {
    if (isa<ReturnInst>(BB->getTerminator())) {
      returnBlock = BB;
      returnCount++;
    } else
      nonReturnBlock = BB;
  }
  
  if (returnCount != 1)
    return nullptr;
  
  // Clone the function, so that we can hack away on it.
  ValueToValueMapTy VMap;
  Function* duplicateFunction = CloneFunction(F, VMap,
                                              /*ModuleLevelChanges=*/false);
  duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
  F->getParent()->getFunctionList().push_back(duplicateFunction);
  BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
  BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
  BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
  
  // Go ahead and update all uses to the duplicate, so that we can just
  // use the inliner functionality when we're done hacking.
  F->replaceAllUsesWith(duplicateFunction);
  
  // Special hackery is needed with PHI nodes that have inputs from more than
  // one extracted block.  For simplicity, just split the PHIs into a two-level
  // sequence of PHIs, some of which will go in the extracted region, and some
  // of which will go outside.
  BasicBlock* preReturn = newReturnBlock;
  newReturnBlock = newReturnBlock->splitBasicBlock(
                                              newReturnBlock->getFirstNonPHI());
  BasicBlock::iterator I = preReturn->begin();
  BasicBlock::iterator Ins = newReturnBlock->begin();
  while (I != preReturn->end()) {
    PHINode* OldPhi = dyn_cast<PHINode>(I);
    if (!OldPhi) break;
    
    PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
    OldPhi->replaceAllUsesWith(retPhi);
    Ins = newReturnBlock->getFirstNonPHI();
    
    retPhi->addIncoming(I, preReturn);
    retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
                        newEntryBlock);
    OldPhi->removeIncomingValue(newEntryBlock);
    
    ++I;
  }
  newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
  
  // Gather up the blocks that we're going to extract.
  std::vector<BasicBlock*> toExtract;
  toExtract.push_back(newNonReturnBlock);
  for (Function::iterator FI = duplicateFunction->begin(),
       FE = duplicateFunction->end(); FI != FE; ++FI)
    if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
        &*FI != newNonReturnBlock)
      toExtract.push_back(FI);
      
  // The CodeExtractor needs a dominator tree.
  DominatorTree DT;
  DT.recalculate(*duplicateFunction);

  // Extract the body of the if.
  Function* extractedFunction
    = CodeExtractor(toExtract, &DT).extractCodeRegion();
  
  InlineFunctionInfo IFI;
  
  // Inline the top-level if test into all callers.
  std::vector<User *> Users(duplicateFunction->user_begin(),
                            duplicateFunction->user_end());
  for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
       UI != UE; ++UI)
    if (CallInst *CI = dyn_cast<CallInst>(*UI))
      InlineFunction(CI, IFI);
    else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
      InlineFunction(II, IFI);
  
  // Ditch the duplicate, since we're done with it, and rewrite all remaining
  // users (function pointers, etc.) back to the original function.
  duplicateFunction->replaceAllUsesWith(F);
  duplicateFunction->eraseFromParent();
  
  ++NumPartialInlined;
  
  return extractedFunction;
}